Python并发HTTP请求示例

77 阅读5分钟

请求示例

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Python并发HTTP请求示例
包含:
1. 真多线程并发(threading模块)
2. asyncio + aiohttp 异步并发请求10个网址
"""

import asyncio
import aiohttp
import threading
import requests
import time
import json
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import List, Dict
import urllib3

# 禁用SSL警告
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

class ConcurrentHttpTester:
    """并发HTTP请求测试器"""
    
    def __init__(self, max_workers: int = 10, timeout: int = 10):
        """
        初始化并发测试器
        
        Args:
            max_workers: 最大并发数
            timeout: 请求超时时间(秒)
        """
        self.max_workers = max_workers
        self.timeout = timeout
        self.results = []
        self.lock = threading.Lock()
    
    def get_test_urls(self) -> List[str]:
        """获取测试用的10个网址"""
        return [
            "https://httpbin.org/delay/1",
            "https://jsonplaceholder.typicode.com/posts/1",
            "https://httpbin.org/json",
            "https://api.github.com/users/github",
            "https://httpbin.org/uuid",
            "https://httpbin.org/headers",
            "https://jsonplaceholder.typicode.com/users/1",
            "https://httpbin.org/user-agent",
            "https://jsonplaceholder.typicode.com/comments/1",
            "https://httpbin.org/ip"
        ]
    
    # === 多线程并发方法 ===
    def make_request(self, url: str, session: requests.Session) -> Dict:
        """发送单个HTTP请求(多线程版本)"""
        start_time = time.time()
        thread_name = threading.current_thread().name
        
        try:
            response = session.get(url, timeout=self.timeout, verify=False)
            end_time = time.time()
            
            result = {
                'url': url,
                'status_code': response.status_code,
                'response_time': round(end_time - start_time, 3),
                'content_length': len(response.content),
                'thread': thread_name,
                'method': 'threading',
                'success': True
            }
            
        except Exception as e:
            end_time = time.time()
            result = {
                'url': url,
                'status_code': None,
                'response_time': round(end_time - start_time, 3),
                'error': str(e),
                'thread': thread_name,
                'method': 'threading',
                'success': False
            }
        
        return result
    
    def run_thread_pool(self, urls: List[str]) -> List[Dict]:
        """使用ThreadPoolExecutor进行并发请求"""
        results = []
        
        with requests.Session() as session:
            with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
                future_to_url = {
                    executor.submit(self.make_request, url, session): url 
                    for url in urls
                }
                
                for future in as_completed(future_to_url):
                    result = future.result()
                    results.append(result)
        
        return results
    
    # === aiohttp异步并发方法 ===
    async def async_make_request(self, url: str, session: aiohttp.ClientSession) -> Dict:
        """发送单个异步HTTP请求"""
        start_time = time.time()
        
        try:
            async with session.get(url, timeout=aiohttp.ClientTimeout(total=self.timeout)) as response:
                content = await response.read()
                end_time = time.time()
                
                result = {
                    'url': url,
                    'status_code': response.status,
                    'response_time': round(end_time - start_time, 3),
                    'content_length': len(content),
                    'method': 'aiohttp',
                    'success': True
                }
                
        except Exception as e:
            end_time = time.time()
            result = {
                'url': url,
                'status_code': None,
                'response_time': round(end_time - start_time, 3),
                'error': str(e),
                'method': 'aiohttp',
                'success': False
            }
        
        return result
    
    async def run_aiohttp_sem(self, urls: List[str]) -> List[Dict]:
        """使用信号量控制并发量的aiohttp请求"""
        semaphore = asyncio.Semaphore(self.max_workers)
        # 禁用ssl 避免 CERTIFICATE_VERIFY_FAILED
        connector = aiohttp.TCPConnector(ssl=False)
        async with aiohttp.ClientSession(connector=connector) as session:
            async def fetch_with_semaphore(url):
                async with semaphore:
                    return await self.async_make_request(url, session)
            
            # 并发执行所有请求
            tasks = [fetch_with_semaphore(url) for url in urls]
            results = await asyncio.gather(*tasks, return_exceptions=True)
            
            # 处理可能的异常
            final_results = []
            for i, result in enumerate(results):
                if isinstance(result, Exception):
                    final_results.append({
                        'url': urls[i],
                        'status_code': None,
                        'response_time': 0,
                        'error': str(result),
                        'method': 'aiohttp',
                        'success': False
                    })
                else:
                    final_results.append(result)
            
            return final_results
    
    async def run_aiohttp_gather(self, urls: List[str]) -> List[Dict]:
        """使用asyncio.gather进行全部并发"""
        async with aiohttp.ClientSession() as session:
            tasks = [self.async_make_request(url, session) for url in urls]
            results = await asyncio.gather(*tasks, return_exceptions=True)
            
            final_results = []
            for i, result in enumerate(results):
                if isinstance(result, Exception):
                    final_results.append({
                        'url': urls[i],
                        'status_code': None,
                        'response_time': 0,
                        'error': str(result),
                        'method': 'aiohttp',
                        'success': False
                    })
                else:
                    final_results.append(result)
            
            return final_results
    
    def print_results(self, results: List[Dict], title: str = "结果汇总"):
        """打印结果统计"""
        print(f"\n{'='*60}")
        print(f"{title}")
        print('='*60)
        
        successful = [r for r in results if r['success']]
        failed = [r for r in results if not r['success']]
        
        print(f"总请求数: {len(results)}")
        print(f"成功: {len(successful)}")
        print(f"失败: {len(failed)}")
        
        if successful:
            avg_response_time = sum(r['response_time'] for r in successful) / len(successful)
            print(f"平均响应时间: {avg_response_time:.3f}秒")
        
        print("\n详细结果:")
        for result in results:
            if result['success']:
                print(f"✓ {result['url']} | 状态: {result['status_code']} | "
                      f"耗时: {result['response_time']}s | 方法: {result['method']}")
            else:
                print(f"✗ {result['url']} | 错误: {result.get('error', 'Unknown')} | "
                      f"耗时: {result['response_time']}s | 方法: {result['method']}")


class AsyncHttpTester:
    """aiohttp异步并发测试器"""
    
    @staticmethod
    async def demo_advanced_async(urls: List[str]):
        """高级异步用法演示"""
        print("\n=== aiohttp高级功能演示 ===")
        
        # 设置连接池
        connector = aiohttp.TCPConnector(
            limit=100,  # 总连接池大小
            limit_per_host=30,  # 每个主机的连接数限制
            ttl_dns_cache=300,  # DNS缓存时间
            use_dns_cache=True,
        )
        
        timeout = aiohttp.ClientTimeout(total=10, connect=5)
        
        async with aiohttp.ClientSession(
            connector=connector,
            timeout=timeout,
            headers={'User-Agent': 'Python aiohttp demo'}
        ) as session:
            
            # 使用async for控制并发
            semaphore = asyncio.Semaphore(10)
            
            async def fetch_with_session(url):
                async with semaphore:
                    try:
                        async with session.get(url) as response:
                            data = await response.json() if response.content_type == 'application/json' else await response.text()
                            return {
                                'url': url,
                                'status': response.status,
                                'content_type': response.content_type,
                                'content_length': len(str(data)),
                                'success': True
                            }
                    except Exception as e:
                        return {
                            'url': url,
                            'error': str(e),
                            'success': False
                        }
            
            # 收集结果
            tasks = [fetch_with_session(url) for url in urls]
            results = await asyncio.gather(*tasks)
            
            print(f"高级异步完成,成功: {sum(1 for r in results if r['success'])}/" + 
                  f"{len(results)}")
            
            return results


def main():
    """主函数"""
    print("Python并发HTTP请求性能对比测试")
    print("="*50)
    
    # 创建测试器实例
    tester = ConcurrentHttpTester(max_workers=10)
    
    # 获取测试URL
    urls = tester.get_test_urls()
    print(f"测试URL数量: {len(urls)}")
    
    # 方法1: 多线程并发
    print("\n1. 多线程并发请求...")
    start_time = time.time()
    results_thread = tester.run_thread_pool(urls)
    thread_time = time.time() - start_time
    tester.print_results(results_thread, f"多线程结果 (总耗时: {thread_time:.3f}秒)")
    
    # 方法2: aiohttp异步并发(信号量控制)
    print("\n2. aiohttp异步并发(信号量控制)...")
    start_time = time.time()
    results_aiohttp_sem = asyncio.run(tester.run_aiohttp_sem(urls))
    aio_sem_time = time.time() - start_time
    tester.print_results(results_aiohttp_sem, f"aiohttp信号量结果 (总耗时: {aio_sem_time:.3f}秒)")
    
    # 方法3: aiohttp异步并发(全部并发)
    print("\n3. aiohttp异步并发(全部并发)...")
    start_time = time.time()
    results_aiohttp_all = asyncio.run(tester.run_aiohttp_gather(urls))
    aio_all_time = time.time() - start_time
    tester.print_results(results_aiohttp_all, f"aiohttp全部并发结果 (总耗时: {aio_all_time:.3f}秒)")
    
    # 高级异步演示
    print("\n4. aiohttp高级功能演示...")
    asyncio.run(AsyncHttpTester.demo_advanced_async(urls))
    
    # 性能对比
    print("\n" + "="*60)
    print("性能对比:")
    print(f"多线程并发: {thread_time:.3f}秒")
    print(f"aiohttp信号量: {aio_sem_time:.3f}秒")
    print(f"aiohttp全部并发: {aio_all_time:.3f}秒")
    
    # 效率提升
    if thread_time > 0:
        improvement1 = (thread_time - aio_sem_time) / thread_time * 100
        improvement2 = (thread_time - aio_all_time) / thread_time * 100
        print(f"\n效率提升:")
        print(f"相比多线程: aiohttp信号量提升 {improvement1:.1f}%")
        print(f"相比多线程: aiohttp全部并发提升 {improvement2:.1f}%")


if __name__ == "__main__":
    main()

aiohttp简单使用

import aiohttp
import asyncio
import json

class Res:
    def __init__(self,**kwargs) -> None:
        self.url=kwargs['url']
        self.status=kwargs['status']
        if 'content' in kwargs:
            self.content=kwargs['content']
            self.error=None
        if 'error' in kwargs:
            self.error=kwargs['error']
            self.content=None
        if 'success' in kwargs:
            self.success=kwargs['success']
        else:
            self.success=False
    def json(self)->dict[str,str|dict|list]|None:
        return json.loads(self.content ) if self.content is not None else None
    def __str__(self) -> str:
        return json.dumps(self.__dict__,indent=4,sort_keys=True)

async def __doGet(url:str,session: aiohttp.ClientSession,semaphore: asyncio.Semaphore)->Res:
    async with semaphore:
        async with session.get(url,timeout=aiohttp.ClientTimeout(total=30)) as resp:
            content = await resp.read()
            result = {
                    'url': url,
                    'status': resp.status,
                    'content': str(content,'utf-8'),
                    'success': True
                }
        return Res(**result)

async def get(urls:list[str])->list[Res]:
    semaphore= asyncio.Semaphore(10)
    connector = aiohttp.TCPConnector(ssl=False)
    async with aiohttp.ClientSession(connector=connector) as session:
        tasks = [__doGet(url,session,semaphore) for url in urls]
        results = await asyncio.gather(*tasks,return_exceptions=True)
        final_results = []
        for i,result in enumerate(results):
            if isinstance(result, Exception):
                    final_results.append(Res(**{
                        'url': urls[i],
                        'error': str(result),
                        'success': False
                    }))
            else:
                final_results.append(result)
        return final_results

def gets(urls:list[str])->list[Res]:
    return [i for i in asyncio.run(get(urls))]

if __name__=='__main__':
    res = gets(['https://httpbin.org/json','https://httpbin.org/uuid'])
    for i in res:
        print(i)