Requests
reqeusts用python语言编写,基于urllib,采用Apache2 Licensed 开源协议的HTTP库。比urllib更加方便,可以节约大量的工作,完全满足HTTP测试需求。
Python实现的简单易用的HTTP库
使用
import requests
response = requests.get("https://www.baidu.com/")
print(type(response))
print(response.status_code)
print(type(response.text))
print(response.text)
print(response.cookies)
请求方式
import requests
request.post('http://httpbin.org/post')
request.put('http://httpbin.org/put')
request.delete('http://httpbin.org/delete')
request.head('http://httpbin.org/get')
request.options('http://httpbin.org/get')
请求
GET请求
import requests
response = requests.get("http://httpbin.org/get")
print(response)
# 带参数GET请求
response = requests.get("http://httpbin.org/get?name=germey&age=22")
print(response)
data = {
'name': 'germey',
'age': 22
}
response = requests.get("http://httpbin.org/get", params=data)
print(response.text)
解析json
import requests
response = requests.get("http://httpbin.org/get")
print(type(response.text))
print(response.json())
print(json.load(response.text))
print(response.content)
获取二进制文件
import requests
response = request.get('https://github.com/favicon.ico')
print(type(response.text), type(response.content))
print(response.text)
print(response.content)
with open('favicon.ico', 'wb') as f:
f.write(response.content)
添加headers
import requests
response = requests.get('https://www.zhihu.com/explore')
print(response.text) # 没有header直接500
headers = {
"User-Agent": "Mozilla/5.0(Macintosh;Intel Mac OS X 10_11_4) AppleWebKit/537.36(KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36"
}
response = requests.get('https://www.zhihu.com/explore', headers=headers)
print(response.text)
POST
import requests
data = {'name': 'Germey', 'age': 22}
response = requests.post('http://httpbin.org/post', data=data)
print(request.text)
headers = {
"User-Agent": "Mozilla/5.0(Macintosh;Intel Mac OS X 10_11_4) AppleWebKit/537.36(KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36"
}
response = requests.post('http://httpbin.org/post', data=data, headers=headers)
print(request.text)
响应
response属性
import requests
response = requests.get("http://www.baidu.com")
print(type(response.status_code), response.status_code)
print(type(response.headers), response.headers)
print(type(response.cookies), response.cookies)
print(type(response.url), response.url)
print(type(response.history), response.history)
状态码判断
import requests
response = requests.get("http://www.baidu.com")
exit() if not response.status_code == requests.codes.ok else print("Request Successfully!")
response = requests.get("http://www.baidu.com")
exit() if response.status_code != 200 else print("Request Successfully")
# http status code
# 期待中文注释
http_status_code = {
100: ("continue", ),
101: ("switching_protocols", ),
102: ("processing", ),
103: ("checkpoint", ),
122: ("url_too_long", "request_url_too_long"),
200: ("ok", "okay", "all_ok", "all_good", "\\o/", "√"),
201: ("created", ),
202: ("accepted", ),
203: ("non_authoritative_info", "non_authoritative_information") ,
204: ("no_content", ),
205: ("reset_content", "reset"),
206: ("partial_content", "partial"), # 客户发送了一个带有Range头的GET请求,服务器完成了它(http 1.1)。
207: ("multi_status", "multiple_status", "multi_stati", "multiple_stati"),
208: ("already_reported", ),
226: ("im_used", ),
# Redirection
300: ("multiple_choices", ),
301: ("moved_permanently", "moved", "\\o-"),
302: ("found",),
303: ("see_other", "other"),
304: ("not_modified", ),
305: ("use_proxy",),
306: ("switch_proxy",),
307: ("temporary_redirect", "temporary_moved", "temporary"),
308: ("permanent_redirect", "resume_incomplete", "resume"),
# Client Error
400: ('bad request', 'bad'),
401: ("unauthorized",),
402: ("payment_required", "payment"),
403: ("forbidden", ),
404: ("not_found", '-o-'),
405: ("method_not_allowed", "not_allowed"),
406: ("not_acceptable",),
407: ("proxy_authentication_required", "proxy_auth", "proxy_authentication"),
408: ("request_timeout", "timeout"),
409: ("conflict",),
410: ("gone",),
411: ("length_required", ),
412: ("precondition_failed", "precondition"),
413: ("request_entity_too_large", ),
414: ("request_url_too_large",),
415: ("unsupported_media_type", "unsupported_media", "media_type"),
416: ("requested_range_not_satisfiable", "request_range", "range_not_satisfiable"),
417: ("exceptation_failed",),
418: ("im_a_teapot", "teapot", "i_am_a_teapot"),
421: ("misdirected_request", ),
422: ("unprocessable_entity", "unprocessable"),
423: ("locked",),
424: ("failed_dependency", "dependency"),
425: ("unordered_collection", "unordered"),
426: ("upgrade_required", "upgrade"),
428: ("precondition_required", "precondition"),
429: ("too_many_requests", "too_many"),
431: ("header_fields_too_large", "fields_too_large"),
444: ("no_response", "none"),
449: ("retry_with", "retry"),
450: ("blocked_by_windows_parental_controls", "parental_controls"),
451: ("unavaliable_for_legal_reasons", "legal_reasons"),
499: ("client_closed_request", ),
# Server Error
500: ("internal_server_error", "server_error", "/o\\", "x"),
501: ("not_implemented",),
502: ("bad_gateway",),
503: ("service_unavaliable", "unavaliable"),
504: ("gateway_timeout",),
505; ("http_version_not_supported", "http_versioin"),
506: ("variant_also_negotiates", ),
507: ("insufficient_storage", ),
509: ("bandwidth_limit_exceeded", "bandwidth"),
510: ("not_extended", ),
511: ("network_authentication_requried", "network_auth", "network_authentication"),
}
# 官方文档有详细的解释
高级操作
文件上传
import requests
files = {"file": open("favicon.ico", "rb")}
response = requests.post("http://httpbin.org/post", files=files)
print(response.text)
获取cookie
import requests
response = requests.get("http://www.baidu.com")
print(response.cookies)
for key, value in response.cookies.iteritems():
print(key + "=" + value)
会话维持
import requests
requests.get("http://httpbin.org/cookies/set/number/123456789")
response = requests.get("http://httpbin.org/cookies")
print(response.text)
s = requests.Session()
s.get("http://httpbin.org/cookies/set/number/123456789")
response = s.get("http://httpbin.org/cookies")
print(response.text)
证书验证
import requests
response = request.get("https://www.12306.cn")
print(response.status_code)
from requests.packages import urllib3
urllib3.disable_warnings() # 关闭警告
response = requests.get("https://www.12306.cn", verify=False)
print(response.status_code)
response = requests.get("https://www.12306.cn", cert=('/path/servr.crt', '/path/key'))
print(response.status_code)
代理设置
import requests
proxies = {
"http": "http://127.0.0.1:9743",
"https": "https://127.0.0.1:9743"
}
response = requests.get("https://www.taobao.com", proxies=proxies)
print(response.status_code)
pip install 'requests[socks]'
import requests
proxies = {
'http': 'socks5://127.0.0.1:9742',
"https": 'socks5://127.0.0.1:9742'
}
response = requests.get("https://www.taobao.com", proxies=proxies)
超时设置
import requests
response = requests.get("https://httpbin.org/get", timeout=1)
print(response.status_code)
from requests.exceptions import ReadTimeout
try:
response = requests.get("https://httpbin.org/get", timeout=0.5)
print(response.status_code)
except ReadTimeout:
print("Timeout")
认证设置
import requests
from requests.auth import HTTPBasicAuth
r = requests.get("http://120.27.34.24:9001", auth=HTTPBasicAuth('user', '123'))
print(r.status_code)
r = requests.get("http://120.27.34.24:9001", auth=('user', '123'))
print(r.status_code)
异常处理
import requests
from requests.exception import ReadTimeout, HTTPError, RequestException
try:
response = requests.get("http://httpbin.org/get", timeout=0.5)
print(reponse.status_code)
except ReadTimeout:
print("Timeout")
except HTTPError:
print("Http error")
except RequestException:
print("Error")
学习笔记来源崔庆才 python3网络爬虫