线程池ThreadPoolExecutor源码分析

1,220 阅读3分钟

需求来源:项目接口需要异步处理数据

  • python threading 模块源码解析
  • 执行过程解析
  • python threading 模块简单使用
  • ThreadPoolExecutor 源码解析
  • 执行过程解析
  • ThreadPoolExecutor 简单使用

1.python threading 模块源码解析

  • 我们看一下模块的源代码
class Thread:
    """A class that represents a thread of control.
    This class can be safely subclassed in a limited fashion. There are two ways
    to specify the activity: by passing a callable object to the constructor, or
    by overriding the run() method in a subclass.
    """
    
    _initialized = False
    _exc_info = _sys.exc_info
    
    def __init__(self, group=None, target=None, name=None,
        args=(), kwargs=None, *, daemon=None):
        """
        This constructor should always be called with keyword arguments. Arguments are:
        *group* should be None; reserved for future extension when a ThreadGroup
        class is implemented.
        *target* is the callable object to be invoked by the run()
        method. Defaults to None, meaning nothing is called.
        *name* is the thread name. By default, a unique name is constructed of
        the form "Thread-N" where N is a small decimal number.
        *args* is the argument tuple for the target invocation. Defaults to ().
        *kwargs* is a dictionary of keyword arguments for the target
        invocation. Defaults to {}.
        If a subclass overrides the constructor, it must make sure to invoke
        the base class constructor (Thread.__init__()) before doing anything
        else to the thread.
        """
  • Thread是一个线程控制的类,这个类可以被继承,实现多态。可以通过两种方式创建线程:,一种是传入一个callable对象,也就是调用对象;另一种重写Thread类的run方法。
import callable
def test():
    pass

callable(test)

demo.py 线程的简单使用

def test1():
    """
    函数一
    """
    pass
def test2():
    """
    函数二
    """
    pass
import threading
threading.Thread(target=test1).start()
threading.Thread(target=test2).start()

2.python ThreadPoolExecutor

  • 看一下这个模块的源代码
class ThreadPoolExecutor(_base.Executor):

    # Used to assign unique thread names when thread_name_prefix is not supplied.
    _counter = itertools.count().__next__

    def __init__(self, max_workers=None, thread_name_prefix='',
                 initializer=None, initargs=()):
        """Initializes a new ThreadPoolExecutor instance.

        Args:
            max_workers: The maximum number of threads that can be used to
                execute the given calls.
            thread_name_prefix: An optional name prefix to give our threads.
            initializer: A callable used to initialize worker threads.
            initargs: A tuple of arguments to pass to the initializer.
        """
        if max_workers is None:
            # Use this number because ThreadPoolExecutor is often
            # used to overlap I/O instead of CPU work.
            max_workers = (os.cpu_count() or 1) * 5
        if max_workers <= 0:
            raise ValueError("max_workers must be greater than 0")

        if initializer is not None and not callable(initializer):
            raise TypeError("initializer must be a callable")

        self._max_workers = max_workers
        self._work_queue = queue.SimpleQueue()
        self._threads = set()
        self._broken = False
        self._shutdown = False
        self._shutdown_lock = threading.Lock()
        self._thread_name_prefix = (thread_name_prefix or
                                    ("ThreadPoolExecutor-%d" % self._counter()))
        self._initializer = initializer
        self._initargs = initargs

    def submit(*args, **kwargs):
        if len(args) >= 2:
            self, fn, *args = args
        elif not args:
            raise TypeError("descriptor 'submit' of 'ThreadPoolExecutor' object "
                            "needs an argument")
        elif 'fn' in kwargs:
            fn = kwargs.pop('fn')
            self, *args = args
        else:
            raise TypeError('submit expected at least 1 positional argument, '
                            'got %d' % (len(args)-1))

        with self._shutdown_lock:
            if self._broken:
                raise BrokenThreadPool(self._broken)

            if self._shutdown:
                raise RuntimeError('cannot schedule new futures after shutdown')
            if _shutdown:
                raise RuntimeError('cannot schedule new futures after '
                                   'interpreter shutdown')

            f = _base.Future()
            w = _WorkItem(f, fn, args, kwargs)

            self._work_queue.put(w)
            self._adjust_thread_count()
            return f
    submit.__doc__ = _base.Executor.submit.__doc__

    def _adjust_thread_count(self):
        # When the executor gets lost, the weakref callback will wake up
        # the worker threads.
        def weakref_cb(_, q=self._work_queue):
            q.put(None)
        # TODO(bquinlan): Should avoid creating new threads if there are more
        # idle threads than items in the work queue.
        num_threads = len(self._threads)
        if num_threads < self._max_workers:
            thread_name = '%s_%d' % (self._thread_name_prefix or self,
                                     num_threads)
            t = threading.Thread(name=thread_name, target=_worker,
                                 args=(weakref.ref(self, weakref_cb),
                                       self._work_queue,
                                       self._initializer,
                                       self._initargs))
            t.daemon = True
            t.start()
            self._threads.add(t)
            _threads_queues[t] = self._work_queue

    def _initializer_failed(self):
        with self._shutdown_lock:
            self._broken = ('A thread initializer failed, the thread pool '
                            'is not usable anymore')
            # Drain work queue and mark pending futures failed
            while True:
                try:
                    work_item = self._work_queue.get_nowait()
                except queue.Empty:
                    break
                if work_item is not None:
                    work_item.future.set_exception(BrokenThreadPool(self._broken))

    def shutdown(self, wait=True):
        with self._shutdown_lock:
            self._shutdown = True
            self._work_queue.put(None)
        if wait:
            for t in self._threads:
                t.join()
    shutdown.__doc__ = _base.Executor.shutdown.__doc__

  • 线程池 带参数
  • 轮询获取数据脚本
import time
from concurrent.futures.thread import ThreadPoolExecutor

def get_modbus(id)
    pass

while 1:
    pool = ThreadPoolExecutor(5)
    p1 = pool.submit(get_modbus, 1)
    p2 = pool.submit(get_modbus, 2)
    p3 = pool.submit(get_modbus, 3)
    p4 = pool.submit(get_modbus, 4)
    p5 = pool.submit(get_modbus, 5)
    time.sleep(5)
  • 线程池 队列
  • 异步, 并发处理
import time
from queue import Queue
from concurrent.futures import ThreadPoolExecutor

#两个队列
q1 = Queue()
q2 = Queue()

# 函数1:取出队列1中的值,处理后装入队列2中
def worker1():
    while True:
        item = q1.get()
        print('get item from q1...', 'item = %s' % item)
        time.sleep(0.1)
        q2.put(item ** 2)
        q1.task_done()

# 函数2:取出队列2中的值
def worker2():
    while True:
        item = q2.get()
        print('get item from q2...', 'item = %s' % item)
        time.sleep(0.1)
        q2.task_done()

# 创建线程池
pool = ThreadPoolExecutor(10)
p1 = pool.submit(worker1)
p2 = pool.submit(worker2)

# 队列1中装入值
for item in range(6):
    q1.put(item)

q1.join()
q2.join()
p1.result()
p2.result()