python-爬虫练习1-urllib的使用urllib.request.urlopen（）模拟浏览器向服务器发送请求

urllib.request.urlopen（）模拟浏览器向服务器发送请求 response 服务器返回的数据 response 的数据类型是httpresponse 字节--》字符串解码decode 字符串--》字节编码encode read() 字节形式读取二进制扩展： rede(5) 返回前几个字节 readline() 读取一行 readlines() 一行一行读取直至结束 getcode() 获取状态码 geturl() 获取url getheaders() 获取headers urllib.request.urlretrieve() 请求页面请求图片请求视频

#使用urllib获取百度首页的源码
import  urllib.request


#定义一个url 要访问的地址
url='http://www.baidu.com'
#模拟浏览器向服务器发送请求
response=urllib.request.urlopen(url)
#response中包含很多信息 包括源码、状态码、请求头等
#获取响应中的页面的源码
#read()方法返回字节形式的二进制数据
#将二进制的数据转换为字符串
content=response.read().decode('utf-8')
#打印数据
print(f"{content}")

#一个类型和六个方法
import  urllib.request

url='http://www.baidu.com'

#模拟浏览器向服务器发送请求  response是httpresponse的类型
response=urllib.request.urlopen(url)

#一个类型和六个方法
print(type(response))

#read(*)按照字节读 *为可选参数  读取的字节数量  默认读取全部
content=response.read().decode('utf-8')

#readline()按照行读取数据  只能读取一行
contentline=response.readline().decode('utf-8')

#readlines(*)按照行读取数据 读取多行 * 为可选参数 指定读取的行数
contentlines=response.readlines()

#response.getcode()  获取状态码 200成功
code=response.getcode()

#response.geturl()获取请求地址
url=response.geturl()

#response.getheaders() 获取请求头 和 状态信息
header=response.getheaders()


print(f" 内容= {content},状态吗= {code} , 读取一行={contentline},  url={url}，读取多行={contentlines}，请求头和状态信息 {header} ")

#下载数据
import  urllib.request

#下载一个网页
url_page='http://www.baidu.com'

#urlretrieve()带有两个参数 （下载路径，文件名）
# urllib.request.urlretrieve(url_page,'baidu.html')


#下载图片
# url_image='https://s3.ifanr.com/wp-content/uploads/2021/04/tiga.jpg'
# urllib.request.urlretrieve(url_image,filename='light')

#下载视频
url_video='https://www.bilibili.com/video/BV1Ss411g7W6/'
urllib.request.urlretrieve(url_video,filename='eat.mp4')