【Python好用到哭的库】requests-HTTP请求库

48 阅读7分钟

库简介

requests是Python中最简单易用的HTTP库,让发送HTTP请求变得非常简单。它基于urllib3构建,提供了更人性化的API接口,支持HTTP连接保持和连接池、SSL证书验证、文件上传、自动解压、连接超时等高级功能。

主要特点

  • 简单易用的API,让HTTP请求变得直观
  • 自动处理连接池和持久连接
  • 支持SSL证书验证
  • 支持文件上传和流式下载
  • 自动处理gzip和deflate压缩
  • 支持国际域名和URL
  • 支持Cookie持久化

应用场景

  • 调用RESTful API接口
  • 网页数据爬取
  • 文件下载和上传
  • 微服务间通信
  • 自动化测试

安装方法

pip install requests

版本要求

  • Python 2.7+ 或 Python 3.5+
  • 建议使用最新版本以获得最佳性能和安全性

验证安装

import requests
print(requests.__version__)

入门示例

基本GET请求

import requests

# 发送GET请求
response = requests.get('https://api.github.com')

# 检查状态码
print(f"状态码:{response.status_code}")

# 获取响应内容
print(f"响应内容:{response.text[:100]}...")  # 只显示前100个字符

# 获取响应头
print(f"内容类型:{response.headers['Content-Type']}")

带参数的GET请求

import requests

# 查询参数
params = {'q': 'python', 'page': 1, 'sort': 'stars'}

# 发送带参数的GET请求
response = requests.get('https://api.github.com/search/repositories', params=params)

# 解析JSON响应
data = response.json()
print(f"找到 {data['total_count']} 个仓库")
print(f"第一个仓库:{data['items'][0]['full_name']}")

POST请求

import requests
import json

# POST请求数据
data = {
    'username': 'testuser',
    'password': 'testpass'
}

# 发送POST请求
response = requests.post('https://httpbin.org/post', data=data)

# 或者发送JSON数据
json_data = {'key': 'value'}
response = requests.post('https://httpbin.org/post', json=json_data)

print(f"响应状态码:{response.status_code}")
print(f"响应内容:{response.json()}")

处理响应

import requests

response = requests.get('https://api.github.com')

# 检查请求是否成功
if response.status_code == 200:
    print("请求成功!")
    
    # 获取文本内容
    text_content = response.text
    
    # 获取JSON内容(如果是JSON响应)
    json_content = response.json()
    
    # 获取二进制内容
    binary_content = response.content
    
    # 获取原始响应
    raw_response = response.raw
    
    # 获取响应头
    headers = response.headers
    
    # 获取Cookie
    cookies = response.cookies
    
    # 获取URL
    url = response.url
    
    # 获取编码
    encoding = response.encoding
    
    # 获取重定向历史
    history = response.history
else:
    print(f"请求失败,状态码:{response.status_code}")

进阶实战

完整的API调用示例

import requests
import json
from typing import List, Dict, Optional

class GitHubAPI:
    """
    GitHub API客户端类
    封装了常用的GitHub API操作
    """
    
    def __init__(self, token: Optional[str] = None):
        """
        初始化GitHub API客户端
        
        参数:
            token: GitHub个人访问令牌(可选)
                  使用令牌可以提高API调用限制
        """
        self.base_url = 'https://api.github.com'
        self.headers = {
            'Accept': 'application/vnd.github.v3+json',
            'User-Agent': 'Python-Requests-GitHub-Client'
        }
        
        if token:
            self.headers['Authorization'] = f'token {token}'
    
    def get_user_info(self, username: str) -> Optional[Dict]:
        """
        获取用户信息
        
        参数:
            username: GitHub用户名
            
        返回:
            用户信息字典,如果请求失败则返回None
        """
        url = f'{self.base_url}/users/{username}'
        
        try:
            response = requests.get(url, headers=self.headers, timeout=10)
            
            if response.status_code == 200:
                return response.json()
            else:
                print(f"获取用户信息失败:{response.status_code}")
                return None
                
        except requests.exceptions.Timeout:
            print("请求超时")
            return None
        except requests.exceptions.RequestException as e:
            print(f"请求异常:{e}")
            return None
    
    def get_user_repos(self, username: str, per_page: int = 30) -> List[str]:
        """
        获取用户的仓库列表
        
        参数:
            username: GitHub用户名
            per_page: 每页显示数量
            
        返回:
            仓库名称列表
        """
        url = f'{self.base_url}/users/{username}/repos'
        params = {'per_page': per_page}
        
        try:
            response = requests.get(url, headers=self.headers, params=params, timeout=10)
            
            if response.status_code == 200:
                repos = response.json()
                return [repo['name'] for repo in repos]
            else:
                print(f"获取仓库列表失败:{response.status_code}")
                return []
                
        except requests.exceptions.Timeout:
            print("请求超时")
            return []
        except requests.exceptions.RequestException as e:
            print(f"请求异常:{e}")
            return []
    
    def create_issue(self, owner: str, repo: str, title: str, 
                    body: Optional[str] = None, labels: Optional[List[str]] = None) -> Optional[Dict]:
        """
        创建issue
        
        参数:
            owner: 仓库所有者
            repo: 仓库名称
            title: issue标题
            body: issue正文(可选)
            labels: 标签列表(可选)
            
        返回:
            创建的issue信息,如果请求失败则返回None
        """
        url = f'{self.base_url}/repos/{owner}/{repo}/issues'
        
        data = {'title': title}
        
        if body:
            data['body'] = body
        
        if labels:
            data['labels'] = labels
        
        try:
            response = requests.post(url, headers=self.headers, json=data, timeout=10)
            
            if response.status_code == 201:
                print(f"成功创建issue:{title}")
                return response.json()
            else:
                print(f"创建issue失败:{response.status_code}")
                print(f"错误信息:{response.text}")
                return None
                
        except requests.exceptions.Timeout:
            print("请求超时")
            return None
        except requests.exceptions.RequestException as e:
            print(f"请求异常:{e}")
            return None
    
    def search_repositories(self, query: str, sort: str = 'stars', 
                           order: str = 'desc', per_page: int = 30) -> List[Dict]:
        """
        搜索仓库
        
        参数:
            query: 搜索查询
            sort: 排序方式(stars, forks, updated)
            order: 排序顺序(asc, desc)
            per_page: 每页显示数量
            
        返回:
            仓库信息列表
        """
        url = f'{self.base_url}/search/repositories'
        params = {
            'q': query,
            'sort': sort,
            'order': order,
            'per_page': per_page
        }
        
        try:
            response = requests.get(url, headers=self.headers, params=params, timeout=10)
            
            if response.status_code == 200:
                result = response.json()
                return result['items']
            else:
                print(f"搜索仓库失败:{response.status_code}")
                return []
                
        except requests.exceptions.Timeout:
            print("请求超时")
            return []
        except requests.exceptions.RequestException as e:
            print(f"请求异常:{e}")
            return []

# 使用示例
if __name__ == '__main__':
    # 创建API客户端(无令牌,有API调用限制)
    api = GitHubAPI()
    
    # 获取用户信息
    user_info = api.get_user_info('torvalds')
    if user_info:
        print(f"用户:{user_info['name']}")
        print(f"关注者:{user_info['followers']}")
        print(f"仓库数:{user_info['public_repos']}")
    
    # 获取用户仓库
    repos = api.get_user_repos('torvalds', per_page=5)
    print(f"Linus Torvalds的前5个仓库:{repos}")
    
    # 搜索Python相关的仓库
    python_repos = api.search_repositories('python language:python', per_page=3)
    print(f"搜索到的Python仓库:")
    for repo in python_repos:
        print(f"  - {repo['full_name']} ({repo['stargazers_count']} stars)")

高级功能示例

import requests
import time
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

class AdvancedHTTPClient:
    """
    高级HTTP客户端
    包含重试机制、超时设置、会话管理等高级功能
    """
    
    def __init__(self, max_retries: int = 3, backoff_factor: float = 0.5):
        """
        初始化高级HTTP客户端
        
        参数:
            max_retries: 最大重试次数
            backoff_factor: 重试间隔因子
        """
        self.session = requests.Session()
        
        # 配置重试策略
        retry_strategy = Retry(
            total=max_retries,
            backoff_factor=backoff_factor,
            status_forcelist=[429, 500, 502, 503, 504],
            allowed_methods=["HEAD", "GET", "OPTIONS", "POST", "PUT", "DELETE"]
        )
        
        # 创建适配器
        adapter = HTTPAdapter(max_retries=retry_strategy)
        
        # 挂载适配器
        self.session.mount("http://", adapter)
        self.session.mount("https://", adapter)
    
    def download_file(self, url: str, filepath: str, chunk_size: int = 8192) -> bool:
        """
        下载大文件(支持断点续传)
        
        参数:
            url: 文件URL
            filepath: 保存路径
            chunk_size: 分块大小
            
        返回:
            下载是否成功
        """
        try:
            # 发送HEAD请求获取文件信息
            head_response = self.session.head(url, timeout=10)
            
            if head_response.status_code != 200:
                print(f"无法获取文件信息:{head_response.status_code}")
                return False
            
            # 获取文件大小
            file_size = int(head_response.headers.get('Content-Length', 0))
            
            # 检查是否支持断点续传
            accept_ranges = head_response.headers.get('Accept-Ranges', 'none')
            
            print(f"文件大小:{file_size / 1024 / 1024:.2f} MB")
            print(f"支持断点续传:{accept_ranges == 'bytes'}")
            
            # 下载文件
            response = self.session.get(url, stream=True, timeout=30)
            
            if response.status_code == 200:
                with open(filepath, 'wb') as f:
                    downloaded = 0
                    
                    for chunk in response.iter_content(chunk_size=chunk_size):
                        if chunk:
                            f.write(chunk)
                            downloaded += len(chunk)
                            
                            # 显示下载进度
                            if file_size > 0:
                                progress = (downloaded / file_size) * 100
                                print(f"下载进度:{progress:.1f}%", end='\r')
                
                print(f"\n文件下载完成:{filepath}")
                return True
            else:
                print(f"下载失败:{response.status_code}")
                return False
                
        except requests.exceptions.Timeout:
            print("请求超时")
            return False
        except requests.exceptions.RequestException as e:
            print(f"请求异常:{e}")
            return False
    
    def make_request_with_retry(self, method: str, url: str, **kwargs) -> Optional[requests.Response]:
        """
        带重试机制的请求
        
        参数:
            method: HTTP方法
            url: 请求URL
            **kwargs: 其他请求参数
            
        返回:
            响应对象,如果所有重试都失败则返回None
        """
        max_attempts = kwargs.pop('max_attempts', 3)
        
        for attempt in range(max_attempts):
            try:
                response = self.session.request(method, url, **kwargs)
                
                # 检查响应状态
                if response.status_code < 400:
                    return response
                elif 400 <= response.status_code < 500:
                    # 客户端错误,不重试
                    print(f"客户端错误:{response.status_code}")
                    return response
                else:
                    # 服务器错误,重试
                    print(f"服务器错误:{response.status_code},第{attempt + 1}次重试")
                    
            except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
                print(f"连接错误:{e},第{attempt + 1}次重试")
            
            # 等待一段时间后重试
            if attempt < max_attempts - 1:
                wait_time = 2 ** attempt  # 指数退避
                print(f"等待{wait_time}秒后重试...")
                time.sleep(wait_time)
        
        print(f"所有{max_attempts}次尝试都失败")
        return None

# 使用示例
if __name__ == '__main__':
    # 创建高级HTTP客户端
    client = AdvancedHTTPClient(max_retries=3)
    
    # 下载文件
    success = client.download_file(
        'https://example.com/largefile.zip',
        'largefile.zip'
    )
    
    if success:
        print("文件下载成功")
    
    # 带重试的请求
    response = client.make_request_with_retry(
        'GET',
        'https://api.example.com/data',
        timeout=10
    )
    
    if response and response.status_code == 200:
        print("请求成功")
        data = response.json()
        print(f"获取到{len(data)}条数据")

高级功能

1. 会话管理

import requests

# 创建会话
session = requests.Session()

# 配置会话参数
session.headers.update({'User-Agent': 'MyApp/1.0'})
session.timeout = 10

# 使用会话发送请求
response1 = session.get('https://api.example.com/login')
response2 = session.get('https://api.example.com/data')

# 会话会自动处理Cookie
print(f"会话Cookie:{session.cookies}")

# 关闭会话
session.close()

2. 代理设置

import requests

# 设置代理
proxies = {
    'http': 'http://10.10.1.10:3128',
    'https': 'http://10.10.1.10:1080',
}

# 使用代理发送请求
response = requests.get('https://api.example.com', proxies=proxies)

# 或者使用环境变量
# export HTTP_PROXY="http://10.10.1.10:3128"
# export HTTPS_PROXY="http://10.10.1.10:1080"

3. SSL证书验证

import requests

# 禁用SSL证书验证(不推荐用于生产环境)
response = requests.get('https://api.example.com', verify=False)

# 使用自定义CA证书
response = requests.get('https://api.example.com', verify='/path/to/cert.pem')

# 客户端证书
response = requests.get('https://api.example.com', 
                       cert=('/path/client.cert', '/path/client.key'))

4. 超时设置

import requests

# 连接超时和读取超时
try:
    response = requests.get('https://api.example.com', timeout=(3.05, 27))
except requests.exceptions.Timeout:
    print("请求超时")

# 分别设置
# timeout = (连接超时, 读取超时)
# 连接超时:建立连接的最大时间
# 读取超时:服务器发送数据的时间

5. 文件上传

import requests

# 上传单个文件
files = {'file': open('report.xls', 'rb')}
response = requests.post('https://httpbin.org/post', files=files)

# 上传多个文件
files = [
    ('images', ('foo.png', open('foo.png', 'rb'), 'image/png')),
    ('images', ('bar.png', open('bar.png', 'rb'), 'image/png'))
]
response = requests.post('https://httpbin.org/post', files=files)

# 上传文件和数据
data = {'name': 'John Doe'}
files = {'file': open('report.xls', 'rb')}
response = requests.post('https://httpbin.org/post', data=data, files=files)

6. 流式请求

import requests

# 流式下载
response = requests.get('https://httpbin.org/stream/20', stream=True)

for line in response.iter_lines():
    if line:
        print(line.decode('utf-8'))

# 流式上传
def generate_data():
    for i in range(10):
        yield f'data chunk {i}\n'.encode()

response = requests.post('https://httpbin.org/post', data