库简介
requests是Python中最简单易用的HTTP库,让发送HTTP请求变得非常简单。它基于urllib3构建,提供了更人性化的API接口,支持HTTP连接保持和连接池、SSL证书验证、文件上传、自动解压、连接超时等高级功能。
主要特点:
- 简单易用的API,让HTTP请求变得直观
- 自动处理连接池和持久连接
- 支持SSL证书验证
- 支持文件上传和流式下载
- 自动处理gzip和deflate压缩
- 支持国际域名和URL
- 支持Cookie持久化
应用场景:
- 调用RESTful API接口
- 网页数据爬取
- 文件下载和上传
- 微服务间通信
- 自动化测试
安装方法
pip install requests
版本要求:
- Python 2.7+ 或 Python 3.5+
- 建议使用最新版本以获得最佳性能和安全性
验证安装:
import requests
print(requests.__version__)
入门示例
基本GET请求
import requests
# 发送GET请求
response = requests.get('https://api.github.com')
# 检查状态码
print(f"状态码:{response.status_code}")
# 获取响应内容
print(f"响应内容:{response.text[:100]}...") # 只显示前100个字符
# 获取响应头
print(f"内容类型:{response.headers['Content-Type']}")
带参数的GET请求
import requests
# 查询参数
params = {'q': 'python', 'page': 1, 'sort': 'stars'}
# 发送带参数的GET请求
response = requests.get('https://api.github.com/search/repositories', params=params)
# 解析JSON响应
data = response.json()
print(f"找到 {data['total_count']} 个仓库")
print(f"第一个仓库:{data['items'][0]['full_name']}")
POST请求
import requests
import json
# POST请求数据
data = {
'username': 'testuser',
'password': 'testpass'
}
# 发送POST请求
response = requests.post('https://httpbin.org/post', data=data)
# 或者发送JSON数据
json_data = {'key': 'value'}
response = requests.post('https://httpbin.org/post', json=json_data)
print(f"响应状态码:{response.status_code}")
print(f"响应内容:{response.json()}")
处理响应
import requests
response = requests.get('https://api.github.com')
# 检查请求是否成功
if response.status_code == 200:
print("请求成功!")
# 获取文本内容
text_content = response.text
# 获取JSON内容(如果是JSON响应)
json_content = response.json()
# 获取二进制内容
binary_content = response.content
# 获取原始响应
raw_response = response.raw
# 获取响应头
headers = response.headers
# 获取Cookie
cookies = response.cookies
# 获取URL
url = response.url
# 获取编码
encoding = response.encoding
# 获取重定向历史
history = response.history
else:
print(f"请求失败,状态码:{response.status_code}")
进阶实战
完整的API调用示例
import requests
import json
from typing import List, Dict, Optional
class GitHubAPI:
"""
GitHub API客户端类
封装了常用的GitHub API操作
"""
def __init__(self, token: Optional[str] = None):
"""
初始化GitHub API客户端
参数:
token: GitHub个人访问令牌(可选)
使用令牌可以提高API调用限制
"""
self.base_url = 'https://api.github.com'
self.headers = {
'Accept': 'application/vnd.github.v3+json',
'User-Agent': 'Python-Requests-GitHub-Client'
}
if token:
self.headers['Authorization'] = f'token {token}'
def get_user_info(self, username: str) -> Optional[Dict]:
"""
获取用户信息
参数:
username: GitHub用户名
返回:
用户信息字典,如果请求失败则返回None
"""
url = f'{self.base_url}/users/{username}'
try:
response = requests.get(url, headers=self.headers, timeout=10)
if response.status_code == 200:
return response.json()
else:
print(f"获取用户信息失败:{response.status_code}")
return None
except requests.exceptions.Timeout:
print("请求超时")
return None
except requests.exceptions.RequestException as e:
print(f"请求异常:{e}")
return None
def get_user_repos(self, username: str, per_page: int = 30) -> List[str]:
"""
获取用户的仓库列表
参数:
username: GitHub用户名
per_page: 每页显示数量
返回:
仓库名称列表
"""
url = f'{self.base_url}/users/{username}/repos'
params = {'per_page': per_page}
try:
response = requests.get(url, headers=self.headers, params=params, timeout=10)
if response.status_code == 200:
repos = response.json()
return [repo['name'] for repo in repos]
else:
print(f"获取仓库列表失败:{response.status_code}")
return []
except requests.exceptions.Timeout:
print("请求超时")
return []
except requests.exceptions.RequestException as e:
print(f"请求异常:{e}")
return []
def create_issue(self, owner: str, repo: str, title: str,
body: Optional[str] = None, labels: Optional[List[str]] = None) -> Optional[Dict]:
"""
创建issue
参数:
owner: 仓库所有者
repo: 仓库名称
title: issue标题
body: issue正文(可选)
labels: 标签列表(可选)
返回:
创建的issue信息,如果请求失败则返回None
"""
url = f'{self.base_url}/repos/{owner}/{repo}/issues'
data = {'title': title}
if body:
data['body'] = body
if labels:
data['labels'] = labels
try:
response = requests.post(url, headers=self.headers, json=data, timeout=10)
if response.status_code == 201:
print(f"成功创建issue:{title}")
return response.json()
else:
print(f"创建issue失败:{response.status_code}")
print(f"错误信息:{response.text}")
return None
except requests.exceptions.Timeout:
print("请求超时")
return None
except requests.exceptions.RequestException as e:
print(f"请求异常:{e}")
return None
def search_repositories(self, query: str, sort: str = 'stars',
order: str = 'desc', per_page: int = 30) -> List[Dict]:
"""
搜索仓库
参数:
query: 搜索查询
sort: 排序方式(stars, forks, updated)
order: 排序顺序(asc, desc)
per_page: 每页显示数量
返回:
仓库信息列表
"""
url = f'{self.base_url}/search/repositories'
params = {
'q': query,
'sort': sort,
'order': order,
'per_page': per_page
}
try:
response = requests.get(url, headers=self.headers, params=params, timeout=10)
if response.status_code == 200:
result = response.json()
return result['items']
else:
print(f"搜索仓库失败:{response.status_code}")
return []
except requests.exceptions.Timeout:
print("请求超时")
return []
except requests.exceptions.RequestException as e:
print(f"请求异常:{e}")
return []
# 使用示例
if __name__ == '__main__':
# 创建API客户端(无令牌,有API调用限制)
api = GitHubAPI()
# 获取用户信息
user_info = api.get_user_info('torvalds')
if user_info:
print(f"用户:{user_info['name']}")
print(f"关注者:{user_info['followers']}")
print(f"仓库数:{user_info['public_repos']}")
# 获取用户仓库
repos = api.get_user_repos('torvalds', per_page=5)
print(f"Linus Torvalds的前5个仓库:{repos}")
# 搜索Python相关的仓库
python_repos = api.search_repositories('python language:python', per_page=3)
print(f"搜索到的Python仓库:")
for repo in python_repos:
print(f" - {repo['full_name']} ({repo['stargazers_count']} stars)")
高级功能示例
import requests
import time
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
class AdvancedHTTPClient:
"""
高级HTTP客户端
包含重试机制、超时设置、会话管理等高级功能
"""
def __init__(self, max_retries: int = 3, backoff_factor: float = 0.5):
"""
初始化高级HTTP客户端
参数:
max_retries: 最大重试次数
backoff_factor: 重试间隔因子
"""
self.session = requests.Session()
# 配置重试策略
retry_strategy = Retry(
total=max_retries,
backoff_factor=backoff_factor,
status_forcelist=[429, 500, 502, 503, 504],
allowed_methods=["HEAD", "GET", "OPTIONS", "POST", "PUT", "DELETE"]
)
# 创建适配器
adapter = HTTPAdapter(max_retries=retry_strategy)
# 挂载适配器
self.session.mount("http://", adapter)
self.session.mount("https://", adapter)
def download_file(self, url: str, filepath: str, chunk_size: int = 8192) -> bool:
"""
下载大文件(支持断点续传)
参数:
url: 文件URL
filepath: 保存路径
chunk_size: 分块大小
返回:
下载是否成功
"""
try:
# 发送HEAD请求获取文件信息
head_response = self.session.head(url, timeout=10)
if head_response.status_code != 200:
print(f"无法获取文件信息:{head_response.status_code}")
return False
# 获取文件大小
file_size = int(head_response.headers.get('Content-Length', 0))
# 检查是否支持断点续传
accept_ranges = head_response.headers.get('Accept-Ranges', 'none')
print(f"文件大小:{file_size / 1024 / 1024:.2f} MB")
print(f"支持断点续传:{accept_ranges == 'bytes'}")
# 下载文件
response = self.session.get(url, stream=True, timeout=30)
if response.status_code == 200:
with open(filepath, 'wb') as f:
downloaded = 0
for chunk in response.iter_content(chunk_size=chunk_size):
if chunk:
f.write(chunk)
downloaded += len(chunk)
# 显示下载进度
if file_size > 0:
progress = (downloaded / file_size) * 100
print(f"下载进度:{progress:.1f}%", end='\r')
print(f"\n文件下载完成:{filepath}")
return True
else:
print(f"下载失败:{response.status_code}")
return False
except requests.exceptions.Timeout:
print("请求超时")
return False
except requests.exceptions.RequestException as e:
print(f"请求异常:{e}")
return False
def make_request_with_retry(self, method: str, url: str, **kwargs) -> Optional[requests.Response]:
"""
带重试机制的请求
参数:
method: HTTP方法
url: 请求URL
**kwargs: 其他请求参数
返回:
响应对象,如果所有重试都失败则返回None
"""
max_attempts = kwargs.pop('max_attempts', 3)
for attempt in range(max_attempts):
try:
response = self.session.request(method, url, **kwargs)
# 检查响应状态
if response.status_code < 400:
return response
elif 400 <= response.status_code < 500:
# 客户端错误,不重试
print(f"客户端错误:{response.status_code}")
return response
else:
# 服务器错误,重试
print(f"服务器错误:{response.status_code},第{attempt + 1}次重试")
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
print(f"连接错误:{e},第{attempt + 1}次重试")
# 等待一段时间后重试
if attempt < max_attempts - 1:
wait_time = 2 ** attempt # 指数退避
print(f"等待{wait_time}秒后重试...")
time.sleep(wait_time)
print(f"所有{max_attempts}次尝试都失败")
return None
# 使用示例
if __name__ == '__main__':
# 创建高级HTTP客户端
client = AdvancedHTTPClient(max_retries=3)
# 下载文件
success = client.download_file(
'https://example.com/largefile.zip',
'largefile.zip'
)
if success:
print("文件下载成功")
# 带重试的请求
response = client.make_request_with_retry(
'GET',
'https://api.example.com/data',
timeout=10
)
if response and response.status_code == 200:
print("请求成功")
data = response.json()
print(f"获取到{len(data)}条数据")
高级功能
1. 会话管理
import requests
# 创建会话
session = requests.Session()
# 配置会话参数
session.headers.update({'User-Agent': 'MyApp/1.0'})
session.timeout = 10
# 使用会话发送请求
response1 = session.get('https://api.example.com/login')
response2 = session.get('https://api.example.com/data')
# 会话会自动处理Cookie
print(f"会话Cookie:{session.cookies}")
# 关闭会话
session.close()
2. 代理设置
import requests
# 设置代理
proxies = {
'http': 'http://10.10.1.10:3128',
'https': 'http://10.10.1.10:1080',
}
# 使用代理发送请求
response = requests.get('https://api.example.com', proxies=proxies)
# 或者使用环境变量
# export HTTP_PROXY="http://10.10.1.10:3128"
# export HTTPS_PROXY="http://10.10.1.10:1080"
3. SSL证书验证
import requests
# 禁用SSL证书验证(不推荐用于生产环境)
response = requests.get('https://api.example.com', verify=False)
# 使用自定义CA证书
response = requests.get('https://api.example.com', verify='/path/to/cert.pem')
# 客户端证书
response = requests.get('https://api.example.com',
cert=('/path/client.cert', '/path/client.key'))
4. 超时设置
import requests
# 连接超时和读取超时
try:
response = requests.get('https://api.example.com', timeout=(3.05, 27))
except requests.exceptions.Timeout:
print("请求超时")
# 分别设置
# timeout = (连接超时, 读取超时)
# 连接超时:建立连接的最大时间
# 读取超时:服务器发送数据的时间
5. 文件上传
import requests
# 上传单个文件
files = {'file': open('report.xls', 'rb')}
response = requests.post('https://httpbin.org/post', files=files)
# 上传多个文件
files = [
('images', ('foo.png', open('foo.png', 'rb'), 'image/png')),
('images', ('bar.png', open('bar.png', 'rb'), 'image/png'))
]
response = requests.post('https://httpbin.org/post', files=files)
# 上传文件和数据
data = {'name': 'John Doe'}
files = {'file': open('report.xls', 'rb')}
response = requests.post('https://httpbin.org/post', data=data, files=files)
6. 流式请求
import requests
# 流式下载
response = requests.get('https://httpbin.org/stream/20', stream=True)
for line in response.iter_lines():
if line:
print(line.decode('utf-8'))
# 流式上传
def generate_data():
for i in range(10):
yield f'data chunk {i}\n'.encode()
response = requests.post('https://httpbin.org/post', data