举例说明Python中的回溯

231 阅读10分钟

当Python程序中发生异常时,往往会打印出一个回溯。知道如何阅读回溯可以帮助你轻松地识别错误并进行修复。在本教程中,我们将看到回溯可以告诉你什么。

完成本教程后,你将知道。

  • 如何阅读回溯信息
  • 如何打印无异常的调用堆栈
  • 回溯中不显示的内容

让我们开始吧。

了解Python中的回溯

教程概述

本教程分为4个部分,它们是:

  1. 一个简单程序的调用层次结构
  2. 异常时的回溯
  3. 手动触发回溯
  4. 模型训练中的一个例子

一个简单程序的调用层次结构

让我们考虑一个简单的程序。

def indentprint(x, indent=0, prefix="", suffix=""):
    if isinstance(x, dict):
        printdict(x, indent, prefix, suffix)
    elif isinstance(x, list):
        printlist(x, indent, prefix, suffix)
    elif isinstance(x, str):
        printstring(x, indent, prefix, suffix)
    else:
        printnumber(x, indent, prefix, suffix)

def printdict(x, indent, prefix, suffix):
    spaces = " " * indent
    print(spaces + prefix + "{")
    for n, key in enumerate(x):
        comma = "," if n!=len(x)-1 else ""
        indentprint(x[key], indent+2, str(key)+": ", comma)
    print(spaces + "}" + suffix)

def printlist(x, indent, prefix, suffix):
    spaces = " " * indent
    print(spaces + prefix + "[")
    for n, item in enumerate(x):
        comma = "," if n!=len(x)-1 else ""
        indentprint(item, indent+2, "", comma)
    print(spaces + "]" + suffix)

def printstring(x, indent, prefix, suffix):
    spaces = " " * indent
    print(spaces + prefix + '"' + str(x) + '"' + suffix)

def printnumber(x, indent, prefix, suffix):
    spaces = " " * indent
    print(spaces + prefix + str(x) + suffix)

data = {
    "a": [{
        "p": 3, "q": 4,
        "r": [3,4,5],
    },{
        "f": "foo", "g": 2.71
    },{
        "u": None, "v": "bar"
    }],
    "c": {
        "s": ["fizz", 2, 1.1],
        "t": []
    },
}

indentprint(data)

这个程序是要打印Python字典data ,有缩进。它的输出结果如下。

{
  a: [
    {
      p: 3,
      q: 4,
      r: [
        3,
        4,
        5
      ]
    },
    {
      f: "foo",
      g: 2.71
    },
    {
      u: None,
      v: "bar"
    }
  ],
  c: {
    s: [
      "fizz",
      2,
      1.1
    ],
    t: [
    ]
  }
}

这是一个很短的程序,但是函数之间在互相调用。如果我们在每个函数的开头加一行,我们可以揭示输出是如何随着控制流产生的。

def indentprint(x, indent=0, prefix="", suffix=""):
    print(f'indentprint(x, {indent}, "{prefix}", "{suffix}")')
    if isinstance(x, dict):
        printdict(x, indent, prefix, suffix)
    elif isinstance(x, list):
        printlist(x, indent, prefix, suffix)
    elif isinstance(x, str):
        printstring(x, indent, prefix, suffix)
    else:
        printnumber(x, indent, prefix, suffix)

def printdict(x, indent, prefix, suffix):
    print(f'printdict(x, {indent}, "{prefix}", "{suffix}")')
    spaces = " " * indent
    print(spaces + prefix + "{")
    for n, key in enumerate(x):
        comma = "," if n!=len(x)-1 else ""
        indentprint(x[key], indent+2, str(key)+": ", comma)
    print(spaces + "}" + suffix)

def printlist(x, indent, prefix, suffix):
    print(f'printlist(x, {indent}, "{prefix}", "{suffix}")')
    spaces = " " * indent
    print(spaces + prefix + "[")
    for n, item in enumerate(x):
        comma = "," if n!=len(x)-1 else ""
        indentprint(item, indent+2, "", comma)
    print(spaces + "]" + suffix)

def printstring(x, indent, prefix, suffix):
    print(f'printstring(x, {indent}, "{prefix}", "{suffix}")')
    spaces = " " * indent
    print(spaces + prefix + '"' + str(x) + '"' + suffix)

def printnumber(x, indent, prefix, suffix):
    print(f'printnumber(x, {indent}, "{prefix}", "{suffix}")')
    spaces = " " * indent
    print(spaces + prefix + str(x) + suffix)

而输出将被打乱更多的信息。

indentprint(x, 0, "", "")
printdict(x, 0, "", "")
{
indentprint(x, 2, "a: ", ",")
printlist(x, 2, "a: ", ",")
  a: [
indentprint(x, 4, "", ",")
printdict(x, 4, "", ",")
    {
indentprint(x, 6, "p: ", ",")
printnumber(x, 6, "p: ", ",")
      p: 3,
indentprint(x, 6, "q: ", ",")
printnumber(x, 6, "q: ", ",")
      q: 4,
indentprint(x, 6, "r: ", "")
printlist(x, 6, "r: ", "")
      r: [
indentprint(x, 8, "", ",")
printnumber(x, 8, "", ",")
        3,
indentprint(x, 8, "", ",")
printnumber(x, 8, "", ",")
        4,
indentprint(x, 8, "", "")
printnumber(x, 8, "", "")
        5
      ]
    },
indentprint(x, 4, "", ",")
printdict(x, 4, "", ",")
    {
indentprint(x, 6, "f: ", ",")
printstring(x, 6, "f: ", ",")
      f: "foo",
indentprint(x, 6, "g: ", "")
printnumber(x, 6, "g: ", "")
      g: 2.71
    },
indentprint(x, 4, "", "")
printdict(x, 4, "", "")
    {
indentprint(x, 6, "u: ", ",")
printnumber(x, 6, "u: ", ",")
      u: None,
indentprint(x, 6, "v: ", "")
printstring(x, 6, "v: ", "")
      v: "bar"
    }
  ],
indentprint(x, 2, "c: ", "")
printdict(x, 2, "c: ", "")
  c: {
indentprint(x, 4, "s: ", ",")
printlist(x, 4, "s: ", ",")
    s: [
indentprint(x, 6, "", ",")
printstring(x, 6, "", ",")
      "fizz",
indentprint(x, 6, "", ",")
printnumber(x, 6, "", ",")
      2,
indentprint(x, 6, "", "")
printnumber(x, 6, "", "")
      1.1
    ],
indentprint(x, 4, "t: ", "")
printlist(x, 4, "t: ", "")
    t: [
    ]
  }
}

所以现在我们知道了每个函数是如何被调用的顺序。这就是调用栈的概念。在任何时候,当我们在一个函数中运行一行代码时,我们想知道谁调用了这个函数。

异常后的回溯

如果我们在代码中犯了一个像下面这样的错字。

def printdict(x, indent, prefix, suffix):
    spaces = " " * indent
    print(spaces + prefix + "{")
    for n, key in enumerate(x):
        comma = "," if n!=len(x)-1 else ""
        indentprint(x[key], indent+2, str(key)+": ", comma)
    print(spaces + "}") + suffix

错字在最后一行,其中的括号应该在行末,而不是在任何+print() 函数的返回值是一个 PythonNone 对象。而在None 中添加一些东西会触发一个异常。

如果你用Python解释器运行这个程序,你会看到这个情况。

{
  a: [
    {
      p: 3,
      q: 4,
      r: [
        3,
        4,
        5
      ]
    }
Traceback (most recent call last):
  File "tb.py", line 52, in 
    indentprint(data)
  File "tb.py", line 3, in indentprint
    printdict(x, indent, prefix, suffix)
  File "tb.py", line 16, in printdict
    indentprint(x[key], indent+2, str(key)+": ", comma)
  File "tb.py", line 5, in indentprint
    printlist(x, indent, prefix, suffix)
  File "tb.py", line 24, in printlist
    indentprint(item, indent+2, "", comma)
  File "tb.py", line 3, in indentprint
    printdict(x, indent, prefix, suffix)
  File "tb.py", line 17, in printdict
    print(spaces + "}") + suffix
TypeError: unsupported operand type(s) for +: 'NoneType' and 'str'

以 "Traceback (most recent call last): "开头的行是回溯。它是你的程序在遇到异常时的堆栈。在上面的例子中,回溯是按照 "最近一次调用 "的顺序。因此,你的主函数在顶部,而触发异常的函数在底部。所以我们知道问题是在函数printdict()

通常情况下,你会在回溯的最后看到错误信息。在这个例子中,它是由添加None 和 string 触发的TypeError 。但回溯的帮助到此为止。你需要弄清楚哪一个是None ,哪一个是string。通过阅读回溯,我们还知道触发异常的函数printdict() 是由indentprint() 调用的,而它又被printlist() 调用,以此类推。

如果你在Jupyter笔记本中运行这个,下面是输出结果。

{
  a: [
    {
      p: 3,
      q: 4,
      r: [
        3,
        4,
        5
      ]
    }
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/var/folders/6z/w0ltb1ss08l593y5xt9jyl1w0000gn/T/ipykernel_37031/2508041071.py in 
----> 1 indentprint(x)

/var/folders/6z/w0ltb1ss08l593y5xt9jyl1w0000gn/T/ipykernel_37031/2327707064.py in indentprint(x, indent, prefix, suffix)
      1 def indentprint(x, indent=0, prefix="", suffix=""):
      2     if isinstance(x, dict):
----> 3         printdict(x, indent, prefix, suffix)
      4     elif isinstance(x, list):
      5         printlist(x, indent, prefix, suffix)

/var/folders/6z/w0ltb1ss08l593y5xt9jyl1w0000gn/T/ipykernel_37031/2327707064.py in printdict(x, indent, prefix, suffix)
     14     for n, key in enumerate(x):
     15         comma = "," if n!=len(x)-1 else ""
---> 16         indentprint(x[key], indent+2, str(key)+": ", comma)
     17     print(spaces + "}") + suffix
     18 

/var/folders/6z/w0ltb1ss08l593y5xt9jyl1w0000gn/T/ipykernel_37031/2327707064.py in indentprint(x, indent, prefix, suffix)
      3         printdict(x, indent, prefix, suffix)
      4     elif isinstance(x, list):
----> 5         printlist(x, indent, prefix, suffix)
      6     elif isinstance(x, str):
      7         printstring(x, indent, prefix, suffix)

/var/folders/6z/w0ltb1ss08l593y5xt9jyl1w0000gn/T/ipykernel_37031/2327707064.py in printlist(x, indent, prefix, suffix)
     22     for n, item in enumerate(x):
     23         comma = "," if n!=len(x)-1 else ""
---> 24         indentprint(item, indent+2, "", comma)
     25     print(spaces + "]" + suffix)
     26 

/var/folders/6z/w0ltb1ss08l593y5xt9jyl1w0000gn/T/ipykernel_37031/2327707064.py in indentprint(x, indent, prefix, suffix)
      1 def indentprint(x, indent=0, prefix="", suffix=""):
      2     if isinstance(x, dict):
----> 3         printdict(x, indent, prefix, suffix)
      4     elif isinstance(x, list):
      5         printlist(x, indent, prefix, suffix)

/var/folders/6z/w0ltb1ss08l593y5xt9jyl1w0000gn/T/ipykernel_37031/2327707064.py in printdict(x, indent, prefix, suffix)
     15         comma = "," if n!=len(x)-1 else ""
     16         indentprint(x[key], indent+2, str(key)+": ", comma)
---> 17     print(spaces + "}") + suffix
     18 
     19 def printlist(x, indent, prefix, suffix):

TypeError: unsupported operand type(s) for +: 'NoneType' and 'str'

信息基本上是一样的,但它给你的是每个函数调用之前。和之后的行。

手动触发回溯

打印回溯的最简单方法是添加一个raise 语句来手动创建一个异常。但这也会终止你的程序。如果我们想在任何时候打印堆栈,即使没有任何异常,我们可以像下面这样做。

import traceback

def printdict(x, indent, prefix, suffix):
    spaces = " " * indent
    print(spaces + prefix + "{")
    for n, key in enumerate(x):
        comma = "," if n!=len(x)-1 else ""
        indentprint(x[key], indent+2, str(key)+": ", comma)
    traceback.print_stack()    # print the current call stack
    print(spaces + "}" + suffix)

这一行traceback.print_stack() ,将打印出当前的调用堆栈。

但事实上,我们常常只想在有错误的时候打印堆栈(所以我们要多了解一下为什么会这样)。更常见的用例是如下。

import traceback
import random

def compute():
    n = random.randint(0, 10)
    m = random.randint(0, 10)
    return n/m

def compute_many(n_times):
    try:
        for _ in range(n_times):
            x = compute()
        print(f"Completed {n_times} times")
    except:
        print("Something wrong")
        traceback.print_exc()

compute_many(100)

这是反复计算一个函数的典型模式,比如蒙特卡洛模拟。但如果我们不够小心,可能会遇到一些错误,比如在上面的例子中,我们可能会出现除以0的情况。问题是,如果是比较复杂的计算,你不容易发现其中的缺陷。比如上面的例子,问题埋藏在对compute() 的调用中。因此,了解我们是如何出错的是有帮助的。但同时,我们要处理错误的情况,而不是让整个程序终止。如果我们使用try-catch 结构,默认情况下不会打印回溯。因此我们需要使用traceback.print_exc() 语句来手动处理。

实际上,我们可以对回溯进行更详细的说明。因为回溯是调用堆栈,实际上我们可以检查调用堆栈中的每个函数,并检查每一层的变量。在复杂的情况下,我通常用这个函数来做更详细的跟踪。

def print_tb_with_local():
    """Print stack trace with local variables. This does not need to be in
    exception. Print is using the system's print() function to stderr.
    """
    import traceback, sys
    tb = sys.exc_info()[2]
    stack = []
    while tb:
        stack.append(tb.tb_frame)
        tb = tb.tb_next()
    traceback.print_exc()
    print("Locals by frame, most recent call first", file=sys.stderr)
    for frame in stack:
        print("Frame {0} in {1} at line {2}".format(
            frame.f_code.co_name,
            frame.f_code.co_filename,
            frame.f_lineno), file=sys.stderr)
        for key, value in frame.f_locals.items():
            print("\t%20s = " % key, file=sys.stderr)
            try:
                if '__repr__' in dir(value):
                    print(value.__repr__(), file=sys.stderr)
                elif '__str__' in dir(value):
                    print(value.__str__(), file=sys.stderr)
                else:
                    print(value, file=sys.stderr)
            except:
                print("", file=sys.stderr)

模型训练中的一个例子

追踪报告中的调用栈有一个限制。你只能看到Python函数。对于你写的程序来说,这应该是很好的,但是很多Python中的大型库有一部分是用另一种语言写的,并被编译成二进制。一个例子是Tensorflow。为了提高性能,所有的底层操作都是二进制的。因此,如果你运行下面的代码,你会看到不同的东西。

import numpy as np

sequence = np.arange(0.1, 1.0, 0.1)  # 0.1 to 0.9
n_in = len(sequence)
sequence = sequence.reshape((1, n_in, 1))

# define model
import tensorflow as tf
from tensorflow.keras.layers import LSTM, RepeatVector, Dense, TimeDistributed, Input
from tensorflow.keras import Sequential, Model

model = Sequential([
    LSTM(100, activation="relu", input_shape=(n_in+1, 1)),
    RepeatVector(n_in),
    LSTM(100, activation="relu", return_sequences=True),
    TimeDistributed(Dense(1))
])
model.compile(optimizer="adam", loss="mse")

model.fit(sequence, sequence, epochs=300, verbose=0)

模型中第一个LSTM层的input_shape 参数应该是(n_in, 1) ,以匹配输入数据,而不是(n_in+1, 1) 。一旦你调用了最后一行,这段代码将打印出以下错误。

Traceback (most recent call last):
  File "trback3.py", line 20, in 
    model.fit(sequence, sequence, epochs=300, verbose=0)
  File "/usr/local/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/usr/local/lib/python3.9/site-packages/tensorflow/python/framework/func_graph.py", line 1129, in autograph_handler
    raise e.ag_error_metadata.to_exception(e)
ValueError: in user code:

    File "/usr/local/lib/python3.9/site-packages/keras/engine/training.py", line 878, in train_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.9/site-packages/keras/engine/training.py", line 867, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.9/site-packages/keras/engine/training.py", line 860, in run_step  **
        outputs = model.train_step(data)
    File "/usr/local/lib/python3.9/site-packages/keras/engine/training.py", line 808, in train_step
        y_pred = self(x, training=True)
    File "/usr/local/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/usr/local/lib/python3.9/site-packages/keras/engine/input_spec.py", line 263, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 10, 1), found shape=(None, 9, 1)

如果你看一下回溯,你不能真正看到完整的调用堆栈。例如,最上面一帧你知道你调用了model.fit() ,但第二帧是来自一个名为error_handler() 的函数。其中你无法看到fit() 的函数是如何触发的。这是因为Tensorflow是高度优化的。很多东西都隐藏在编译后的代码中,不被Python解释器看到。

在这种情况下,必须耐心地阅读回溯,找到原因的线索。当然,通常错误信息也应该给你一些有用的提示。

摘要

在本教程中,你发现了如何从一个 Python 程序中读取和打印回溯。

具体来说,你学到了

  • 回溯告诉你什么信息
  • 如何在程序的任何一点上打印回溯,而不引发一个异常