Requests使用技巧

111 阅读2分钟

将cookie转为字典

# -*- coding: UTF-8 -*-  
# @Project :网络爬虫  
# @File :requests技巧.py  
# @IDE :PyCharm  
# @Author :艳烔  
# @Date :2024/10/24 15:01  
  
  
import requests  
  
response = requests.get('https://www.baidu.com')  
  
print(response.cookies)  
# 将Cookie转换为字典  
# {'BD_NOT_HTTPS': '1', 'PSTM': '1729754274', 'H_PS_PSSID': '60449_60839_60851_60886_60875_60934', 'BAIDUID': '3A1E2F1C84792B2CD3AB547D72AC93E4:FG=1', 'BAIDUID_BFESS': '3A1E2F1C84792B2CD3AB547D72AC93E4:FG=1'}  
print(requests.utils.dict_from_cookiejar(response.cookies))  
  
# 将字典转换为Cookie  
print(requests.utils.cookiejar_from_dict({'BD_NOT_HTTPS': '1', 'PSTM': '1729754274', 'H_PS_PSSID': '60449_60839_60851_60886_60875_60934', 'BAIDUID': '3A1E2F1C84792B2CD3AB547D72AC93E4:FG=1', 'BAIDUID_BFESS': '3A1E2F1C84792B2CD3AB547D72AC93E4:FG=1'}))

请求SSL证书验证

图片.png

SSL证书验证: HTTPS = HTTP + SSL

get = requests.get('https://www.12306.cn', verify=False)  
  
print(get)

设置超时

r = requests.get('https://www.google.com.hk/', timeout=3)  
  
print(r)

图片.png

配合状态码判断是否请求成功

assert r.status_code == 200
headers = {  
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'  
}  
  
  
def _parse_url(url):  
response = requests.get(url, headers=headers, timeout=3)  
assert response.status_code == 200  
return response.content.decode()  
  
def parse_url(url):  
try:  
html_str = _parse_url(url)  
except:  
html_str = None  
return html_str  
  
  
if __name__ == '__main__':  
url = 'https://www.baidu.com'  
print(parse_url(url))

编解码

print(requests.utils.unquote('%E7%BD%91%E7%BB%9C%E7%88%AC%E8%99%AB'))  
print(requests.utils.quote('网络爬虫'))

retrying

# -*- coding: UTF-8 -*-  
# @Project :网络爬虫  
# @File :超时重发.py  
# @IDE :PyCharm  
# @Author :艳烔  
# @Date :2024/10/24 16:14  
  
import requests  
from retrying import retry  
  
headers = {  
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'  
}  
  
@retry(stop_max_attempt_number=3)  
def _parse_url(url):  
print('代码执行了几次')  
response = requests.get(url, headers=headers, timeout=3)  
assert response.status_code == 200  
return response.content.decode()  
  
def parse_url(url):  
try:  
html_str = _parse_url(url)  
except:  
html_str = None  
return html_str  
  
  
if __name__ == '__main__':  
url = 'https://www.google.com.hk/'  
print(parse_url(url))

图片.png