Python utllib补充

72 阅读1分钟

前几天学习的,忘记做笔记了,今天给它发出来

# -*- code = utf-8 -*-
# @Time : 2022/9/28 0028 21:20
# @Author : IT球球
# @File : package.py
# @Software : PyCharm

import urllib.request


# get请求
# response = urllib.request.urlopen('http://www.baidu.com')  # 打开某个网页
# print(response.read().decode('utf-8'))     # 对获取网页源码进行utf-8解码


# import urllib.parse
# # 获取post请求
# data = bytes(urllib.parse.urlencode({'avatar_id' : 1}),encoding="utf-8") #bytes把所有的数据转为2进制数据包
# response = urllib.request.urlopen('http://wx.hyxzkj.cn/api/video.Creatorcenter/view',data=data)
# print(response.read().decode('utf-8'))


# 设置请求时长
# try:
#     response = urllib.request.urlopen('http://httpbin.org/get',timeout=0.01)
#     print(response.read().decode('utf-8'))
# except Exception as e:
#     print("超时")

# response = urllib.request.urlopen('http://www.baidu.com')
# print(response.status)  #如果报错418表示你被发现是爬虫了
# print(response.getheaders())  #获取header全部内容
# print(response.getheader('Bdpagetype'))  #获取请求头莫个内容


# response = urllib.request.urlopen('https://www.douban.com')
# print(response.status)  #urllib.error.HTTPError: HTTP Error 418:  表示你被发现是爬虫了


#模拟浏览器发送数据,避免被发现爬虫薄脆418
import urllib.parse
url = "https://www.douban.com"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"
}

# data = bytes(urllib.parse.urlencode({'test':'aaaa'}),encoding = 'utf-8')
# req = urllib.request.Request(url=url,data =data,headers=headers,method='post')

req = urllib.request.Request(url=url,headers=headers)
response = urllib.request.urlopen(req)
print(response.read().decode('utf-8'))

新手学习请勿喷!

欢迎各位小伙伴来我的QQ交流群一起学习 :842167453