1.C++侧
cout<<Tensor;
C++侧中tensor的输出流重载代码位于 /home/pytorch/aten/src/ATen/core/Formatting.cpp:260,代码如下:
std::ostream& print(std::ostream& stream, const Tensor & tensor_, int64_t linesize) {
FormatGuard guard(stream);
if(!tensor_.defined()) {
stream << "[ Tensor (undefined) ]";
} else if (tensor_.is_sparse()) {
//如果tensor是sparse的就执行以下代码
stream << "[ " << tensor_.toString() << "{}\n";
stream << "indices:\n" << tensor_._indices() << "\n";
stream << "values:\n" << tensor_._values() << "\n";
stream << "size:\n" << tensor_.sizes() << "\n";
stream << "]";
} else {
//如果tensor不是sparse的就执行以下代码
Tensor tensor;
if (tensor_.is_quantized()) {
//量化
tensor = tensor_.dequantize().to(kCPU, kDouble).contiguous();
} else if (tensor_.is_mkldnn()) {
//mkldnn
stream << "MKLDNN Tensor: ";
tensor = tensor_.to_dense().to(kCPU, kDouble).contiguous();
} else if (tensor_.is_mps()) {
//mps
// MPS does not support double tensors, so first copy then convert
tensor = tensor_.to(kCPU).to(kDouble).contiguous();
} else {
//CUDA上的tensor直接先传到cpu并转double,再contiguous
tensor = tensor_.to(kCPU, kDouble).contiguous();
}
if(tensor.ndimension() == 0) {
stream << defaultfloat << tensor.data_ptr<double>()[0] << std::endl;
stream << "[ " << tensor_.toString() << "{}";
} else if(tensor.ndimension() == 1) {
if (tensor.numel() > 0) {
double scale = 0.0;
int64_t sz = 0;
std::tie(scale, sz) = __printFormat(stream, tensor);
if(scale != 1) {
printScale(stream, scale);
}
double* tensor_p = tensor.data_ptr<double>();
for (const auto i : c10::irange(tensor.size(0))) {
stream << std::setw(sz) << tensor_p[i]/scale << std::endl;
}
}
stream << "[ " << tensor_.toString() << "{" << tensor.size(0) << "}";
} else if(tensor.ndimension() == 2) {
if (tensor.numel() > 0) {
__printMatrix(stream, tensor, linesize, 0);
}
stream << "[ " << tensor_.toString() << "{" << tensor.size(0) << "," << tensor.size(1) << "}";
} else {
if (tensor.numel() > 0) {
__printTensor(stream, tensor, linesize);
}
stream << "[ " << tensor_.toString() << "{" << tensor.size(0);
for (const auto i : c10::irange(1, tensor.ndimension())) {
stream << "," << tensor.size(i);
}
stream << "}";
}
if (tensor_.is_quantized()) {
stream << ", qscheme: " << toString(tensor_.qscheme());
if (tensor_.qscheme() == c10::kPerTensorAffine) {
stream << ", scale: " << tensor_.q_scale();
stream << ", zero_point: " << tensor_.q_zero_point();
} else if (tensor_.qscheme() == c10::kPerChannelAffine ||
tensor_.qscheme() == c10::kPerChannelAffineFloatQParams) {
stream << ", scales: ";
Tensor scales = tensor_.q_per_channel_scales();
print(stream, scales, linesize);
stream << ", zero_points: ";
Tensor zero_points = tensor_.q_per_channel_zero_points();
print(stream, zero_points, linesize);
stream << ", axis: " << tensor_.q_per_channel_axis();
}
}
// Proxy check for if autograd was built
if (tensor.getIntrusivePtr()->autograd_meta()) {
auto& fw_grad = tensor._fw_grad(/* level */ 0);
if (fw_grad.defined()) {
stream << ", tangent:" << std::endl << fw_grad;
}
}
stream << " ]";
}
return stream;
}
可见对于普通的tensor,其是先将tensor转到cpu并转换成double格式,最后调用contiguous方法。
2.
print(Tensor)
python层的print代码位于:/opt/conda/envs/py38/lib/python3.8/site-packages/torch/_tensor.py(420)__repr__(),代码如下:
def __repr__(self, *, tensor_contents=None):
if has_torch_function_unary(self):
return handle_torch_function(
Tensor.__repr__, (self,), self, tensor_contents=tensor_contents
)
# All strings are unicode in Python 3.
return torch._tensor_str._str(self, tensor_contents=tensor_contents)
最终调用的函数如下,其位于:/opt/conda/envs/py38/lib/python3.8/site-packages/torch/_tensor_str.py(366)
def _str_intern(inp, *, tensor_contents=None):
if torch._C._functorch.is_functorch_wrapped_tensor(inp):
return _functorch_wrapper_str_intern(inp, tensor_contents=tensor_contents)
is_plain_tensor = type(inp) is torch.Tensor or type(inp) is torch.nn.Parameter
if inp.is_nested:
prefix = "nested_tensor("
elif is_plain_tensor:
prefix = "tensor("
else:
prefix = f"{type(inp).__name__}("
indent = len(prefix)
suffixes = []
custom_contents_provided = tensor_contents is not None
if custom_contents_provided:
tensor_str = tensor_contents
# This is used to extract the primal value and thus disable the forward AD
# within this function.
# TODO(albanD) This needs to be updated when more than one level is supported
self, tangent = torch.autograd.forward_ad.unpack_dual(inp)
# Note [Print tensor device]:
# A general logic here is we only print device when it doesn't match
# the device specified in default tensor type.
# Currently torch.set_default_tensor_type() only supports CPU/CUDA, thus
# torch._C._get_default_device() only returns either cpu or cuda.
# In other cases, we don't have a way to set them as default yet,
# and we should always print out device for them.
if (
self.device.type != torch._C._get_default_device()
or (
self.device.type == "cuda"
and torch.cuda.current_device() != self.device.index
)
or (self.device.type == "mps")
):
suffixes.append("device='" + str(self.device) + "'")
# Tensor printing performs tensor operations like slice, indexing, etc to make it in a
# representable format. These operations on ipu/xla/lazy tensor results in compilations. Hence,
# to avoid compilations, copying the tensor to cpu before printing.
if self.device.type in ["xla", "lazy", "ipu"]:
self = self.to("cpu")
# TODO: add an API to map real -> complex dtypes
_default_complex_dtype = (
torch.cdouble if torch.get_default_dtype() == torch.double else torch.cfloat
)
has_default_dtype = self.dtype in (
torch.get_default_dtype(),
_default_complex_dtype,
torch.int64,
torch.bool,
)
if self.is_sparse:
suffixes.append("size=" + str(tuple(self.shape)))
from torch._subclasses.fake_tensor import FakeTensor
if not self.is_meta and not isinstance(self, FakeTensor):
suffixes.append("nnz=" + str(self._nnz()))
if not has_default_dtype:
suffixes.append("dtype=" + str(self.dtype))
if not custom_contents_provided:
indices_prefix = "indices=tensor("
indices = self._indices().detach()
indices_str = _tensor_str(indices, indent + len(indices_prefix))
if indices.numel() == 0:
indices_str += ", size=" + str(tuple(indices.shape))
values_prefix = "values=tensor("
values = self._values().detach()
values_str = _tensor_str(values, indent + len(values_prefix))
if values.numel() == 0:
values_str += ", size=" + str(tuple(values.shape))
tensor_str = (
indices_prefix
+ indices_str
+ "),\n"
+ " " * indent
+ values_prefix
+ values_str
+ ")"
)
elif self.layout in {
torch.sparse_csr,
torch.sparse_csc,
torch.sparse_bsr,
torch.sparse_bsc,
}:
suffixes.append("size=" + str(tuple(self.shape)))
suffixes.append("nnz=" + str(self._nnz()))
if not has_default_dtype:
suffixes.append("dtype=" + str(self.dtype))
if not custom_contents_provided:
compressed_indices_method, plain_indices_method = {
torch.sparse_csr: (torch.Tensor.crow_indices, torch.Tensor.col_indices),
torch.sparse_csc: (torch.Tensor.ccol_indices, torch.Tensor.row_indices),
torch.sparse_bsr: (torch.Tensor.crow_indices, torch.Tensor.col_indices),
torch.sparse_bsc: (torch.Tensor.ccol_indices, torch.Tensor.row_indices),
}[self.layout]
if self.layout in {torch.sparse_csr, torch.sparse_bsr}:
cdimname, pdimname = "row", "column"
else:
cdimname, pdimname = "column", "row"
compressed_indices_prefix = f"c{cdimname[:3]}_indices=tensor("
compressed_indices = compressed_indices_method(self).detach()
compressed_indices_str = _tensor_str(
compressed_indices, indent + len(compressed_indices_prefix)
)
if compressed_indices.numel() == 0:
compressed_indices_str += ", size=" + str(
tuple(compressed_indices.shape)
)
plain_indices_prefix = f"{pdimname[:3]}_indices=tensor("
plain_indices = plain_indices_method(self).detach()
plain_indices_str = _tensor_str(
plain_indices, indent + len(plain_indices_prefix)
)
if plain_indices.numel() == 0:
plain_indices_str += ", size=" + str(tuple(plain_indices.shape))
values_prefix = "values=tensor("
values = self.values().detach()
values_str = _tensor_str(values, indent + len(values_prefix))
if values.numel() == 0:
values_str += ", size=" + str(tuple(values.shape))
tensor_str = (
compressed_indices_prefix
+ compressed_indices_str
+ "),\n"
+ " " * indent
+ plain_indices_prefix
+ plain_indices_str
+ "),\n"
+ " " * indent
+ values_prefix
+ values_str
+ ")"
)
elif self.is_quantized:
suffixes.append("size=" + str(tuple(self.shape)))
if not has_default_dtype:
suffixes.append("dtype=" + str(self.dtype))
suffixes.append("quantization_scheme=" + str(self.qscheme()))
if (
self.qscheme() == torch.per_tensor_affine
or self.qscheme() == torch.per_tensor_symmetric
):
suffixes.append("scale=" + str(self.q_scale()))
suffixes.append("zero_point=" + str(self.q_zero_point()))
elif (
self.qscheme() == torch.per_channel_affine
or self.qscheme() == torch.per_channel_symmetric
or self.qscheme() == torch.per_channel_affine_float_qparams
):
suffixes.append("scale=" + str(self.q_per_channel_scales()))
suffixes.append("zero_point=" + str(self.q_per_channel_zero_points()))
suffixes.append("axis=" + str(self.q_per_channel_axis()))
if not custom_contents_provided:
tensor_str = _tensor_str(self.dequantize(), indent)
elif self.is_nested:
if not custom_contents_provided:
def indented_str(s, indent):
return "\n".join(f" {line}" for line in s.split("\n"))
strs = ",\n".join(
indented_str(str(t), indent + 1)
for t in torch.ops.aten.unbind.int(self, 0)
)
tensor_str = f"[\n{strs}\n]"
elif torch._is_functional_tensor(self):
prefix = "_to_functional_tensor("
tensor_str = repr(torch._from_functional_tensor(self))
else:
if self.is_meta:
suffixes.append("size=" + str(tuple(self.shape)))
if self.dtype != torch.get_default_dtype():
suffixes.append("dtype=" + str(self.dtype))
# TODO: This implies that ellipses is valid syntax for allocating
# a meta tensor, which it could be, but it isn't right now
if not custom_contents_provided:
tensor_str = "..."
else:
if self.numel() == 0 and not self.is_sparse:
# Explicitly print the shape if it is not (0,), to match NumPy behavior
if self.dim() != 1:
suffixes.append("size=" + str(tuple(self.shape)))
# In an empty tensor, there are no elements to infer if the dtype
# should be int64, so it must be shown explicitly.
if self.dtype != torch.get_default_dtype():
suffixes.append("dtype=" + str(self.dtype))
if not custom_contents_provided:
tensor_str = "[]"
else:
if not has_default_dtype:
suffixes.append("dtype=" + str(self.dtype))
if not custom_contents_provided:
if self.layout != torch.strided:
tensor_str = _tensor_str(self.to_dense(), indent)
else:
tensor_str = _tensor_str(self, indent)
if self.layout != torch.strided:
suffixes.append("layout=" + str(self.layout))
# Use inp here to get the original grad_fn and not the one generated by the forward grad
# unpacking.
if inp.grad_fn is not None:
name = type(inp.grad_fn).__name__
if name == "CppFunction":
name = inp.grad_fn.name().rsplit("::", 1)[-1]
suffixes.append("grad_fn=<{}>".format(name))
elif inp.requires_grad:
suffixes.append("requires_grad=True")
if self.has_names():
suffixes.append("names={}".format(self.names))
if tangent is not None:
suffixes.append("tangent={}".format(tangent))
string_repr = _add_suffixes(
prefix + tensor_str, suffixes, indent, force_newline=self.is_sparse
)
# Check if this instance is flagged as a parameter and change the repr accordingly.
# Unfortunately, this function has to be aware of this detail.
# NB: This is currently skipped for plain tensor parameters to maintain BC. In the future,
# this should be done for those as well to produce a valid repr.
if isinstance(self, torch.nn.Parameter) and not is_plain_tensor:
string_repr = f"Parameter({string_repr})"
return string_repr
因此C++侧的cout和python侧的print所调用的算子并不相同,如果某些算子不被支持,那么就可能出现python侧的print结果错误的情况。由于C++侧的cout仅调用了to算子,属于极为基础的算子,因此如果python print的结果不对,可以先用c++侧的cout看看。