东方财富升级之线程池

2,549 阅读1分钟

东方财富之队列与线程池搭配 (学习使用,请勿用于商业行为)

前言:具体分析过程请移步到我另外一篇文章:juejin.cn/post/715271…

import concurrent.futures
import logging
import threading
import time
import requests
import parsel
import queue
from jsonpath import jsonpath
logging.getLogger().setLevel(logging.INFO)
# 创建一把锁
lock = threading.Lock()
​
​
def init_request(code):
    headers = {
        'Accept': '*/*',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Connection': 'keep-alive',
        'Referer': 'https://www.eastmoney.com/',
        'Sec-Fetch-Dest': 'script',
        'Sec-Fetch-Mode': 'no-cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36',
        'sec-ch-ua': '"Google Chrome";v="105", "Not)A;Brand";v="8", "Chromium";v="105"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
    }
    params = {
        'input': code,
        'type': '14',
    }
    response = requests.get('https://searchapi.eastmoney.com/*****/get', params=params, cookies=None,
                            headers=headers).json()
​
    QuoteID = response['QuotationCodeTable']['Data'][0]['QuoteID']
​
    response = requests.get(f'http://quote.eastmoney.com/****/{QuoteID}').text
    selector =parsel.Selector(response)
    overview_url = selector.xpath('//a[text()="公司概况"]/@href').get()
    end_code = overview_url.split('=')[-1]
​
    return end_code
​
def request_url(code):
    headers = {
        'Accept': '*/*',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Connection': 'keep-alive',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36',
        'X-Requested-With': 'XMLHttpRequest',
    }
    params = {
        'code': f'{code}',
    }
    response = requests.get('http://emweb.securities.eastmoney.com/******/PageAjax', params=params,headers=headers, verify=False).json()
    return response
​
def parse_response(response):
    item = {}
    item['companyName'] = jsonpath(response, "$..ORG_NAME")[0]  # 公司名称
    item['numberOfEmployees'] = jsonpath(response, "$..EMP_NUM")[0]  # 雇员人数
    item['companyProfile'] = jsonpath(response, "$..ORG_PROFILE")[0]  # 公司简介
    item['sfcIndustry'] = jsonpath(response, "$..INDUSTRYCSRC1")[0]  # 所属证监会行业
    item['dateOfEstablishment'] = jsonpath(response, "$..FOUND_DATE")[0]  # 成立日期
    return item
​
def run(code):
    end_code = init_request(code)
    response = request_url(end_code)
    item = parse_response(response)
    item['end_code'] = end_code
    lock.acquire()  # 上锁
    print(item)
    lock.release()  # 解锁  1
​
​
​
​
if __name__ == '__main__':
    code_list = [
        "300***",
        "688***",
        "300***",
        "603***",
        "300***",
        "688***",
        "300***",
        "300***",
        "600***",
        "002***",
        "300***",
        "601***",
        "836***",
        "688***",
        "301***",]
    start = time.time()
    task_queue = queue.Queue(30) # 定义30 指定队列长度 表示只能塞30条数据 不写可以无限塞
    for code in code_list:
        if task_queue.full():
            break
        task_queue.put(code)
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        while not task_queue.empty():
            executor.submit(run, task_queue.get())
    end = time.time()
    print(f'总用时:{end-start}!')
​

以上均为学习分享,可能存在不足或者还有其他更优雅的语法,欢迎评论区留言交流!