# 线程池嵌套协程 (学习使用,请勿用于商业行为)
import asyncio
import threading
import aiohttp
import logging
import parsel
import concurrent.futures
from jsonpath import jsonpath
lock = threading.Lock()
logging.getLogger().setLevel(logging.INFO)
async def init_request(code):
headers = {
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Referer': 'https://www.eastmoney.com/',
'Sec-Fetch-Dest': 'script',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Site': 'same-site',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36',
'sec-ch-ua': '"Google Chrome";v="105", "Not)A;Brand";v="8", "Chromium";v="105"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
params = {
'input': code,
'type': '14',
}
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session:
async with session.get('https://searchapi.eastmoney.com/***/get', params=params, cookies=None,
headers=headers) as response:
return await response.json()
async def get_overview_url_code(response):
QuoteID = response['QuotationCodeTable']['Data'][0]['QuoteID']
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session:
async with session.get(f'http://quote.eastmoney.com/***/{QuoteID}') as res:
res = await res.text("utf-8", "ignore")
selector = parsel.Selector(res)
overview_url = selector.xpath('//a[text()="公司概况"]/@href').get()
end_code = overview_url.split('=')[-1]
return end_code
async def request_url(code):
headers = {
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
}
params = {
'code': f'{code}',
}
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session:
async with session.get('http://emweb.securities.eastmoney.com/***/PageAjax', params=params,headers=headers,ssl=False) as res:
return await res.json()
async def parse_response(response):
item = {}
item['companyName'] = jsonpath(response, "$..ORG_NAME")[0]
item['numberOfEmployees'] = jsonpath(response, "$..EMP_NUM")[0]
item['companyProfile'] = jsonpath(response, "$..ORG_PROFILE")[0]
item['sfcIndustry'] = jsonpath(response, "$..INDUSTRYCSRC1")[0]
item['dateOfEstablishment'] = jsonpath(response, "$..FOUND_DATE")[0]
print(item)
async def do_work(code):
response = await init_request(code)
end_code = await get_overview_url_code(response)
res = await request_url(end_code)
await parse_response(res)
async def run(code_list):
task_list = [
asyncio.create_task(do_work(code)) for code in code_list
]
await asyncio.wait(task_list)
def task(code_list):
asyncio.run(run(code_list))
def main(code_list):
lock.acquire()
task(code_list)
lock.release()
if __name__ == '__main__':
code_list = [[
"300***",
"688***",
"300***",
"603***",
"300***",
"688***",
"300***",
"300***",
"600***",
"002***",
"300***",
"601***",
"836***",
"688***",
"301***"],
[
"002***",
"871***",
"300***",
"688***",
"300***",
"301***",
"688***",
"688***",
"603***",
"688***",
"300***",
"300***",
"603***",
"300***",
"002***",
],
[
"002***",
"301***",
"300***",
"688***",
"688***",
"603***",
"300***",
"688***",
"603***",
"002***",
"300***",
"603***",
"688***",
"002***",
"301***",
"603***",
"300***",
"601***",
"688***",
"688***",
"688***",
"688***",
"688***",
"603***",
"300***",
"300***",
"301***",
"300***",
"603***",
]
]
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
for code in code_list:
executor.submit(main, code)
以上均为学习分享,可能存在不足或者还有其他更优雅的方法,欢迎评论区留言交流!