前几天学习的,忘记做笔记了,今天给它发出来
# -*- code = utf-8 -*-
# @Time : 2022/9/28 0028 21:20
# @Author : IT球球
# @File : package.py
# @Software : PyCharm
import urllib.request
# get请求
# response = urllib.request.urlopen('http://www.baidu.com') # 打开某个网页
# print(response.read().decode('utf-8')) # 对获取网页源码进行utf-8解码
# import urllib.parse
# # 获取post请求
# data = bytes(urllib.parse.urlencode({'avatar_id' : 1}),encoding="utf-8") #bytes把所有的数据转为2进制数据包
# response = urllib.request.urlopen('http://wx.hyxzkj.cn/api/video.Creatorcenter/view',data=data)
# print(response.read().decode('utf-8'))
# 设置请求时长
# try:
# response = urllib.request.urlopen('http://httpbin.org/get',timeout=0.01)
# print(response.read().decode('utf-8'))
# except Exception as e:
# print("超时")
# response = urllib.request.urlopen('http://www.baidu.com')
# print(response.status) #如果报错418表示你被发现是爬虫了
# print(response.getheaders()) #获取header全部内容
# print(response.getheader('Bdpagetype')) #获取请求头莫个内容
# response = urllib.request.urlopen('https://www.douban.com')
# print(response.status) #urllib.error.HTTPError: HTTP Error 418: 表示你被发现是爬虫了
#模拟浏览器发送数据,避免被发现爬虫薄脆418
import urllib.parse
url = "https://www.douban.com"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"
}
# data = bytes(urllib.parse.urlencode({'test':'aaaa'}),encoding = 'utf-8')
# req = urllib.request.Request(url=url,data =data,headers=headers,method='post')
req = urllib.request.Request(url=url,headers=headers)
response = urllib.request.urlopen(req)
print(response.read().decode('utf-8'))
新手学习请勿喷!
欢迎各位小伙伴来我的QQ交流群一起学习 :842167453