7.python的编译结果
1.python程序的执行过程
以java为例,program.java 对应 program.py; program.class 对应program.pyc pyc文件中的code对象是类似汇编语言的中间语言,之后虚拟机会将其用翻译成c的操作,通过c模拟栈帧等一些操作。
2.python编译器的编译结果-PyCodeObejct对象
- PyCodeObject对象是真正的编译结果,pyc是这个对象在硬件上的表现形式
- python编译器在对源码进行编译时,对代码中的每个code_block会创建一个PyCodeObject
- 每进入一个新的作用域会认为是一个code_block
- 被import的文件才会生成pyc,如果import非内置包不存在,会先创建pyc文件再import
/* Definitions for bytecode python2.7 */
/* Bytecode object */
typedef struct {
PyObject_HEAD
int co_argcount; /* #arguments, except *args */
int co_nlocals; /* #local variables */
int co_stacksize; /* #entries needed for evaluation stack */
int co_flags; /* CO_..., see below */
PyObject *co_code; /* instruction opcodes */
PyObject *co_consts; /* list (constants used) */
PyObject *co_names; /* list of strings (names used) */
PyObject *co_varnames; /* tuple of strings (local variable names) */
PyObject *co_freevars; /* tuple of strings (free variable names) */
PyObject *co_cellvars; /* tuple of strings (cell variable names) */
/* The rest doesn't count for hash/cmp */
PyObject *co_filename; /* string (where it was loaded from) */
PyObject *co_name; /* string (name, for reference) */
int co_firstlineno; /* first source line number */
PyObject *co_lnotab; /* string (encoding addr<->lineno mapping) See
Objects/lnotab_notes.txt for details. */
void *co_zombieframe; /* for optimization only (see frameobject.c) */
PyObject *co_weakreflist; /* to support weakrefs to code objects */
} PyCodeObject;
// 生成pyc的方法
static void
write_compiled_module(PyCodeObject *co, char *cpathname, struct stat *srcstat, time_t mtime)
{
FILE *fp;
#ifdef MS_WINDOWS /* since Windows uses different permissions */
mode_t mode = srcstat->st_mode & ~S_IEXEC;
/* Issue #6074: We ensure user write access, so we can delete it later
* when the source file changes. (On POSIX, this only requires write
* access to the directory, on Windows, we need write access to the file
* as well)
*/
mode |= _S_IWRITE;
#else
mode_t mode = srcstat->st_mode & ~S_IXUSR & ~S_IXGRP & ~S_IXOTH;
#endif
fp = open_exclusive(cpathname, mode);
if (fp == NULL) {
if (Py_VerboseFlag)
PySys_WriteStderr(
"# can't create %s\n", cpathname);
return;
}
PyMarshal_WriteLongToFile(pyc_magic, fp, Py_MARSHAL_VERSION); // python源码版本兼容
/* First write a 0 for mtime */
PyMarshal_WriteLongToFile(0L, fp, Py_MARSHAL_VERSION);
PyMarshal_WriteObjectToFile((PyObject *)co, fp, Py_MARSHAL_VERSION);
if (fflush(fp) != 0 || ferror(fp)) {
if (Py_VerboseFlag)
PySys_WriteStderr("# can't write %s\n", cpathname);
/* Don't keep partial file */
fclose(fp);
(void) unlink(cpathname);
return;
}
/* Now write the true mtime (as a 32-bit field) */
fseek(fp, 4L, 0);
assert(mtime <= 0xFFFFFFFF);
PyMarshal_WriteLongToFile((long)mtime, fp, Py_MARSHAL_VERSION); // 和2.5不同,2.5会在写入0时写入时间,主要是作为和py文件比较,确认pyc是否最新
fflush(fp);
fclose(fp);
if (Py_VerboseFlag)
PySys_WriteStderr("# wrote %s\n", cpathname);
}
相关写入的细节略
8.python虚拟机框架
python虚拟机实际上是对cpu的抽象 python 虚拟机会从编译得到的PyCodeObject对象中依次读入每一条字节码指令,在当前上下文执行
8.1 python虚拟机中的执行环境
- python通过PyFrameObject维护栈帧
- PyFrameObject实际是对物理机栈帧机制的一层实现(调用相关的PyFrameObject的地址并不会像实际的物理机一样会连续)
typedef struct _frame {
PyObject_VAR_HEAD
struct _frame *f_back; /* previous frame, or NULL */
PyCodeObject *f_code; /* code segment */
PyObject *f_builtins; /* builtin symbol table (PyDictObject) */
PyObject *f_globals; /* global symbol table (PyDictObject) */
PyObject *f_locals; /* local symbol table (any mapping) */
PyObject **f_valuestack; /* points after the last local */
/* Next free slot in f_valuestack. Frame creation sets to f_valuestack.
Frame evaluation usually NULLs it, but a frame that yields sets it
to the current stack top. */
PyObject **f_stacktop;
PyObject *f_trace; /* Trace function */
/* If an exception is raised in this frame, the next three are used to
* record the exception info (if any) originally in the thread state. See
* comments before set_exc_info() -- it's not obvious.
* Invariant: if _type is NULL, then so are _value and _traceback.
* Desired invariant: all three are NULL, or all three are non-NULL. That
* one isn't currently true, but "should be".
*/
PyObject *f_exc_type, *f_exc_value, *f_exc_traceback;
PyThreadState *f_tstate;
int f_lasti; /* Last instruction if called */
/* Call PyFrame_GetLineNumber() instead of reading this field
directly. As of 2.3 f_lineno is only valid when tracing is
active (i.e. when f_trace is set). At other times we use
PyCode_Addr2Line to calculate the line from the current
bytecode index. */
int f_lineno; /* Current line number */
int f_iblock; /* index in f_blockstack */
PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */
PyObject *f_localsplus[1]; /* locals+stack, dynamically sized */
} PyFrameObject;
// 创建一个frame通过`PyFrame_New`实现
```c
PyFrameObject *
PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals,
PyObject *locals)
{
PyFrameObject *back = tstate->frame;
PyFrameObject *f;
PyObject *builtins;
Py_ssize_t i;
if // 省略了debug模式,参数检查,zombie frame等逻辑处理,下面的2.7和2.5差不多
else {
Py_ssize_t extras, ncells, nfrees;
ncells = PyTuple_GET_SIZE(code->co_cellvars);
nfrees = PyTuple_GET_SIZE(code->co_freevars);
extras = code->co_stacksize + code->co_nlocals + ncells +
nfrees;
// 省略了freelist的优化
f->f_code = code;
extras = code->co_nlocals + ncells + nfrees; //extra主要是 局部变量以及闭包所需要的一些对象
f->f_valuestack = f->f_localsplus + extras;
for (i=0; i<extras; i++)
f->f_localsplus[i] = NULL;
f->f_locals = NULL;
f->f_trace = NULL;
f->f_exc_type = f->f_exc_value = f->f_exc_traceback = NULL;
}
8.2 namespace 等等
import abc,class A, 参数传递,a=1这些都是赋值语句,都执行创建一个obj,并给这个obj一个name,如果name在module中,每个name都是module的属性。- python的作用域是静态代码决定而不是运行时决定的,python是具有静态作用域,在编译时就确定了name的引用策略(所谓的有global a和 local a,在local a赋值前调用a会报错)
- LEGB, local, enclosing, global, buildin,属性引用没LEGB规则
8.3 python虚拟机运行框架
PyEval_EvalFrameEx是虚拟机的入口函数PyObject * PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)- 主要逻辑是一个switch语句,根据
code的字节码进行相应的处理,同时处理的过程中会改变一个why变量的值,会返回exception,return的执行结果
8.4 python运行时环境初探
- 进程状态对象
PyInterpreterState- 线程状态对象
PyThreadState,(python的线程是直接调用的内核原生线程)。
// Include/pystate.h
// 应该是第一个非pyObject子类的类
typedef struct _is {
struct _is *next; // 下一个进程?
struct _ts *tstate_head; // 进程内的线程集合
PyObject *modules;
PyObject *sysdict;
PyObject *builtins;
PyObject *modules_reloading;
PyObject *codec_search_path;
PyObject *codec_search_cache;
PyObject *codec_error_registry;
#ifdef HAVE_DLOPEN
int dlopenflags;
#endif
#ifdef WITH_TSC
int tscdump;
#endif
} PyInterpreterState;
typedef struct _ts {
/* See Python/ceval.c for comments explaining most fields */
struct _ts *next;
PyInterpreterState *interp;
struct _frame *frame; /*线程中的调用栈
** 上一节的PyEval_EvalFrameEx
** 在每次调用时,开始会更新当前线程的frame为实参f
** 而PyFrame_New 会把记录当前线程的state到f_back和f_state等中
*/
int recursion_depth;
/* 'tracing' keeps track of the execution depth when tracing/profiling.
This is to prevent the actual trace/profile code from being recorded in
the trace/profile. */
int tracing;
int use_tracing;
Py_tracefunc c_profilefunc;
Py_tracefunc c_tracefunc;
PyObject *c_profileobj;
PyObject *c_traceobj;
PyObject *curexc_type;
PyObject *curexc_value;
PyObject *curexc_traceback;
PyObject *exc_type;
PyObject *exc_value;
PyObject *exc_traceback;
PyObject *dict; /* Stores per-thread state */
/* tick_counter is incremented whenever the check_interval ticker
* reaches zero. The purpose is to give a useful measure of the number
* of interpreted bytecode instructions in a given thread. This
* extremely lightweight statistic collector may be of interest to
* profilers (like psyco.jit()), although nothing in the core uses it.
*/
int tick_counter;
int gilstate_counter;
PyObject *async_exc; /* Asynchronous exception to raise */
long thread_id; /* Thread id where this tstate was created */
int trash_delete_nesting;
PyObject *trash_delete_later;
/* XXX signal handlers should also be here */
} PyThreadState;
9~10.python虚拟机的一般表达式和控制流(静态部分)
- 总体就是介绍dis解释出的指令的具体操作逻辑,类似汇编语言。但是python会把这些通过c语言替换。
- 引用计数的维护是在这一步进行的
- 同时加入了f_locals和汇编的单纯栈帧操作又不一样
PyEval_EvalFrameEx定义了处理字节码指令序列co_code的宏,op相关宏的定义在Include/opcode.h中
// opcode.h
#ifndef Py_OPCODE_H
#define Py_OPCODE_H
#ifdef __cplusplus
extern "C" {
#endif
/* Instruction opcodes for compiled code */
#define STOP_CODE 0
#define POP_TOP 1
#define ROT_TWO 2
#define ROT_THREE 3
#define DUP_TOP 4
#define ROT_FOUR 5
#define NOP 9
#define UNARY_POSITIVE 10
#define UNARY_NEGATIVE 11
#define UNARY_NOT 12
#define UNARY_CONVERT 13
#define UNARY_INVERT 15
#define BINARY_POWER 19
#define BINARY_MULTIPLY 20
// ...
// ceval.c
switch (opcode) {
/* BEWARE!
It is essential that any operation that fails sets either
x to NULL, err to nonzero, or why to anything but WHY_NOT,
and that no operation that succeeds does this! */
/* case STOP_CODE: this is an error! */
TARGET_NOARG(NOP)
{
FAST_DISPATCH();
}
TARGET(LOAD_FAST)
{
x = GETLOCAL(oparg);
if (x != NULL) {
Py_INCREF(x);
PUSH(x);
FAST_DISPATCH();
}
format_exc_check_arg(PyExc_UnboundLocalError,
UNBOUNDLOCAL_ERROR_MSG,
PyTuple_GetItem(co->co_varnames, oparg));
break;
}
TARGET(LOAD_CONST)
{
x = GETITEM(consts, oparg);
Py_INCREF(x);
PUSH(x);
FAST_DISPATCH();
}
PREDICTED_WITH_ARG(STORE_FAST);
TARGET(STORE_FAST)
{
v = POP();
SETLOCAL(oparg, v);
FAST_DISPATCH();
}
// ...
如果要看相应操作的具体逻辑,可以python -m dis a.py 根据code码在
PyEval_EvalFrameEx看对应逻辑。 特殊一点的有迭代器的实现