python源码剖析7~10章

686 阅读6分钟

7.python的编译结果

1.python程序的执行过程

以java为例,program.java 对应 program.py; program.class 对应program.pyc pyc文件中的code对象是类似汇编语言的中间语言,之后虚拟机会将其用翻译成c的操作,通过c模拟栈帧等一些操作。

2.python编译器的编译结果-PyCodeObejct对象

  • PyCodeObject对象是真正的编译结果,pyc是这个对象在硬件上的表现形式
  • python编译器在对源码进行编译时,对代码中的每个code_block会创建一个PyCodeObject
  • 每进入一个新的作用域会认为是一个code_block
  • 被import的文件才会生成pyc,如果import非内置包不存在,会先创建pyc文件再import
/* Definitions for bytecode python2.7 */
/* Bytecode object */
typedef struct {
    PyObject_HEAD
    int co_argcount;		/* #arguments, except *args */
    int co_nlocals;		/* #local variables */
    int co_stacksize;		/* #entries needed for evaluation stack */
    int co_flags;		/* CO_..., see below */
    PyObject *co_code;		/* instruction opcodes */
    PyObject *co_consts;	/* list (constants used) */
    PyObject *co_names;		/* list of strings (names used) */
    PyObject *co_varnames;	/* tuple of strings (local variable names) */
    PyObject *co_freevars;	/* tuple of strings (free variable names) */
    PyObject *co_cellvars;      /* tuple of strings (cell variable names) */
    /* The rest doesn't count for hash/cmp */
    PyObject *co_filename;	/* string (where it was loaded from) */
    PyObject *co_name;		/* string (name, for reference) */
    int co_firstlineno;		/* first source line number */
    PyObject *co_lnotab;	/* string (encoding addr<->lineno mapping) See
				   Objects/lnotab_notes.txt for details. */
    void *co_zombieframe;     /* for optimization only (see frameobject.c) */
    PyObject *co_weakreflist;   /* to support weakrefs to code objects */
} PyCodeObject;
// 生成pyc的方法
static void
write_compiled_module(PyCodeObject *co, char *cpathname, struct stat *srcstat, time_t mtime)
{
    FILE *fp;
#ifdef MS_WINDOWS   /* since Windows uses different permissions  */
    mode_t mode = srcstat->st_mode & ~S_IEXEC;
    /* Issue #6074: We ensure user write access, so we can delete it later
     * when the source file changes. (On POSIX, this only requires write
     * access to the directory, on Windows, we need write access to the file
     * as well)
     */
    mode |= _S_IWRITE;
#else
    mode_t mode = srcstat->st_mode & ~S_IXUSR & ~S_IXGRP & ~S_IXOTH;
#endif

    fp = open_exclusive(cpathname, mode);
    if (fp == NULL) {
        if (Py_VerboseFlag)
            PySys_WriteStderr(
                "# can't create %s\n", cpathname);
        return;
    }
    PyMarshal_WriteLongToFile(pyc_magic, fp, Py_MARSHAL_VERSION); // python源码版本兼容
    /* First write a 0 for mtime */
    PyMarshal_WriteLongToFile(0L, fp, Py_MARSHAL_VERSION);
    PyMarshal_WriteObjectToFile((PyObject *)co, fp, Py_MARSHAL_VERSION);
    if (fflush(fp) != 0 || ferror(fp)) {
        if (Py_VerboseFlag)
            PySys_WriteStderr("# can't write %s\n", cpathname);
        /* Don't keep partial file */
        fclose(fp);
        (void) unlink(cpathname);
        return;
    }
    /* Now write the true mtime (as a 32-bit field) */
    fseek(fp, 4L, 0);
    assert(mtime <= 0xFFFFFFFF);
    PyMarshal_WriteLongToFile((long)mtime, fp, Py_MARSHAL_VERSION); // 和2.5不同,2.5会在写入0时写入时间,主要是作为和py文件比较,确认pyc是否最新
    fflush(fp);
    fclose(fp);
    if (Py_VerboseFlag)
        PySys_WriteStderr("# wrote %s\n", cpathname);
}

相关写入的细节略

8.python虚拟机框架

python虚拟机实际上是对cpu的抽象 python 虚拟机会从编译得到的PyCodeObject对象中依次读入每一条字节码指令,在当前上下文执行

8.1 python虚拟机中的执行环境

  • python通过PyFrameObject维护栈帧
  • PyFrameObject实际是对物理机栈帧机制的一层实现(调用相关的PyFrameObject的地址并不会像实际的物理机一样会连续)
typedef struct _frame {
    PyObject_VAR_HEAD
    struct _frame *f_back;	/* previous frame, or NULL */
    PyCodeObject *f_code;	/* code segment */
    PyObject *f_builtins;	/* builtin symbol table (PyDictObject) */
    PyObject *f_globals;	/* global symbol table (PyDictObject) */
    PyObject *f_locals;		/* local symbol table (any mapping) */
    PyObject **f_valuestack;	/* points after the last local */
    /* Next free slot in f_valuestack.  Frame creation sets to f_valuestack.
       Frame evaluation usually NULLs it, but a frame that yields sets it
       to the current stack top. */
    PyObject **f_stacktop;
    PyObject *f_trace;		/* Trace function */

    /* If an exception is raised in this frame, the next three are used to
     * record the exception info (if any) originally in the thread state.  See
     * comments before set_exc_info() -- it's not obvious.
     * Invariant:  if _type is NULL, then so are _value and _traceback.
     * Desired invariant:  all three are NULL, or all three are non-NULL.  That
     * one isn't currently true, but "should be".
     */
    PyObject *f_exc_type, *f_exc_value, *f_exc_traceback;

    PyThreadState *f_tstate;
    int f_lasti;		/* Last instruction if called */
    /* Call PyFrame_GetLineNumber() instead of reading this field
       directly.  As of 2.3 f_lineno is only valid when tracing is
       active (i.e. when f_trace is set).  At other times we use
       PyCode_Addr2Line to calculate the line from the current
       bytecode index. */
    int f_lineno;		/* Current line number */
    int f_iblock;		/* index in f_blockstack */
    PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */
    PyObject *f_localsplus[1];	/* locals+stack, dynamically sized */
} PyFrameObject;

// 创建一个frame通过`PyFrame_New`实现
```c
PyFrameObject *
PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals,
            PyObject *locals)
{
    PyFrameObject *back = tstate->frame;
    PyFrameObject *f;
    PyObject *builtins;
    Py_ssize_t i;

    if // 省略了debug模式,参数检查,zombie frame等逻辑处理,下面的2.7和2.5差不多
    else {
        Py_ssize_t extras, ncells, nfrees;
        ncells = PyTuple_GET_SIZE(code->co_cellvars);
        nfrees = PyTuple_GET_SIZE(code->co_freevars);
        extras = code->co_stacksize + code->co_nlocals + ncells +
            nfrees;
        // 省略了freelist的优化
        f->f_code = code;
        extras = code->co_nlocals + ncells + nfrees; //extra主要是 局部变量以及闭包所需要的一些对象
        f->f_valuestack = f->f_localsplus + extras;
        for (i=0; i<extras; i++)
            f->f_localsplus[i] = NULL;
        f->f_locals = NULL;
        f->f_trace = NULL;
        f->f_exc_type = f->f_exc_value = f->f_exc_traceback = NULL;
    }

8.2 namespace 等等

  • import abc,class A, 参数传递, a=1这些都是赋值语句,都执行创建一个obj,并给这个obj一个name,如果name在module中,每个name都是module的属性。
  • python的作用域是静态代码决定而不是运行时决定的,python是具有静态作用域,在编译时就确定了name的引用策略(所谓的有global a和 local a,在local a赋值前调用a会报错)
  • LEGB, local, enclosing, global, buildin,属性引用没LEGB规则

8.3 python虚拟机运行框架

  • PyEval_EvalFrameEx是虚拟机的入口函数PyObject * PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
  • 主要逻辑是一个switch语句,根据code的字节码进行相应的处理,同时处理的过程中会改变一个why变量的值,会返回exception, return的执行结果

8.4 python运行时环境初探

  • 进程状态对象PyInterpreterState
  • 线程状态对象PyThreadState,(python的线程是直接调用的内核原生线程)。
// Include/pystate.h
// 应该是第一个非pyObject子类的类
typedef struct _is {

    struct _is *next;       // 下一个进程?
    struct _ts *tstate_head; // 进程内的线程集合

    PyObject *modules;
    PyObject *sysdict;
    PyObject *builtins;
    PyObject *modules_reloading;

    PyObject *codec_search_path;
    PyObject *codec_search_cache;
    PyObject *codec_error_registry;

#ifdef HAVE_DLOPEN
    int dlopenflags;
#endif
#ifdef WITH_TSC
    int tscdump;
#endif

} PyInterpreterState;

typedef struct _ts {
    /* See Python/ceval.c for comments explaining most fields */

    struct _ts *next;
    PyInterpreterState *interp;

    struct _frame *frame; /*线程中的调用栈
    ** 上一节的PyEval_EvalFrameEx 
    ** 在每次调用时,开始会更新当前线程的frame为实参f
    ** 而PyFrame_New 会把记录当前线程的state到f_back和f_state等中
    */
    int recursion_depth;
    /* 'tracing' keeps track of the execution depth when tracing/profiling.
       This is to prevent the actual trace/profile code from being recorded in
       the trace/profile. */
    int tracing;
    int use_tracing;

    Py_tracefunc c_profilefunc;
    Py_tracefunc c_tracefunc;
    PyObject *c_profileobj;
    PyObject *c_traceobj;

    PyObject *curexc_type;
    PyObject *curexc_value;
    PyObject *curexc_traceback;

    PyObject *exc_type;
    PyObject *exc_value;
    PyObject *exc_traceback;

    PyObject *dict;  /* Stores per-thread state */

    /* tick_counter is incremented whenever the check_interval ticker
     * reaches zero. The purpose is to give a useful measure of the number
     * of interpreted bytecode instructions in a given thread.  This
     * extremely lightweight statistic collector may be of interest to
     * profilers (like psyco.jit()), although nothing in the core uses it.
     */
    int tick_counter;

    int gilstate_counter;

    PyObject *async_exc; /* Asynchronous exception to raise */
    long thread_id; /* Thread id where this tstate was created */

    int trash_delete_nesting;
    PyObject *trash_delete_later;

    /* XXX signal handlers should also be here */

} PyThreadState;

9~10.python虚拟机的一般表达式和控制流(静态部分)

  • 总体就是介绍dis解释出的指令的具体操作逻辑,类似汇编语言。但是python会把这些通过c语言替换。
    • 引用计数的维护是在这一步进行的
    • 同时加入了f_locals和汇编的单纯栈帧操作又不一样
  • PyEval_EvalFrameEx定义了处理字节码指令序列co_code的宏,op相关宏的定义在Include/opcode.h
// opcode.h
#ifndef Py_OPCODE_H
#define Py_OPCODE_H
#ifdef __cplusplus
extern "C" {
#endif


/* Instruction opcodes for compiled code */

#define STOP_CODE	0
#define POP_TOP		1
#define ROT_TWO		2
#define ROT_THREE	3
#define DUP_TOP		4
#define ROT_FOUR	5
#define NOP		9

#define UNARY_POSITIVE	10
#define UNARY_NEGATIVE	11
#define UNARY_NOT	12
#define UNARY_CONVERT	13

#define UNARY_INVERT	15

#define BINARY_POWER	19

#define BINARY_MULTIPLY	20
// ...
// ceval.c
 switch (opcode) {

        /* BEWARE!
           It is essential that any operation that fails sets either
           x to NULL, err to nonzero, or why to anything but WHY_NOT,
           and that no operation that succeeds does this! */

        /* case STOP_CODE: this is an error! */

        TARGET_NOARG(NOP)
        {
            FAST_DISPATCH();
        }

        TARGET(LOAD_FAST)
        {
            x = GETLOCAL(oparg);
            if (x != NULL) {
                Py_INCREF(x);
                PUSH(x);
                FAST_DISPATCH();
            }
            format_exc_check_arg(PyExc_UnboundLocalError,
                UNBOUNDLOCAL_ERROR_MSG,
                PyTuple_GetItem(co->co_varnames, oparg));
            break;
        }

        TARGET(LOAD_CONST)
        {
            x = GETITEM(consts, oparg);
            Py_INCREF(x);
            PUSH(x);
            FAST_DISPATCH();
        }

        PREDICTED_WITH_ARG(STORE_FAST);
        TARGET(STORE_FAST)
        {
            v = POP();
            SETLOCAL(oparg, v);
            FAST_DISPATCH();
        }
// ...

如果要看相应操作的具体逻辑,可以python -m dis a.py 根据code码在PyEval_EvalFrameEx看对应逻辑。 特殊一点的有迭代器的实现