第3章: 数据结构 — Python的"瑞士军刀"
Java/Kotlin 开发者习惯了"一个需求一个类"——要排序就 Collections.sort(),要计数就写个 HashMap 循环。Python 的内置数据结构自带大量操作语法,很多你在 JVM 里需要工具类才能做的事,Python 一行搞定。本章逐个拆解四大核心结构 + 切片 + 解包 + collections + 数据类 + 枚举 + 类型注解。
3.1 list vs ArrayList/MutableList
Java/Kotlin 对比
List<String> names = new ArrayList<>();
names.add("Alice");
names.add(0, "Bob");
names.addAll(List.of("C", "D"));
names.remove(0);
names.remove("Alice");
Collections.reverse(names);
Collections.sort(names);
String first = names.get(0);
int size = names.size();
names.sort(Comparator.comparingInt(String::length));
val names = mutableListOf("Alice", "Bob")
names.add("Charlie")
names.addAll(listOf("D", "E"))
names.removeAt(0)
names.reverse()
names.sortBy { it.length }
val first = names[0]
Python 实现
names = ["Alice", "Bob", "Charlie"]
empty = []
from_range = list(range(5))
from_string = list("hello")
names.append("Dave")
names.insert(0, "Zoe")
names.extend(["Eve", "Frank"])
names += ["Grace"]
removed = names.pop()
removed = names.pop(0)
names.remove("Bob")
del names[0]
first = names[0]
last = names[-1]
idx = names.index("Alice")
count = names.count("Alice")
has = "Alice" in names
numbers = [3, 1, 4, 1, 5, 9]
numbers.sort()
numbers.sort(reverse=True)
numbers.sort(key=abs)
sorted_nums = sorted([3, 1, 4])
names_reversed = sorted(names, key=len)
numbers.reverse()
reversed_nums = list(reversed(numbers))
squares = [x ** 2 for x in range(10)]
evens = [x for x in range(20) if x % 2 == 0]
matrix = [[i * 3 + j for j in range(3)] for i in range(3)]
words = ["hello", "world", "", "python"]
non_empty = [w for w in words if w]
upper = [w.upper() for w in words if w]
numbers = [1, 2, 3, 4, 5]
print(len(numbers))
print(sum(numbers))
print(min(numbers))
print(max(numbers))
print(any([0, 0, 1]))
print(all([1, 2, 3]))
print("->".join(["a","b"]))
a = [[1, 2], [3, 4]]
b = a.copy()
b[0][0] = 99
import copy
c = copy.deepcopy(a)
核心差异
| 特性 | Java ArrayList | Kotlin MutableList | Python list |
|---|
| 类型 | 泛型 List<String> | 泛型 MutableList<String> | 无类型约束,可混合类型 [1, "a", True] |
| 追加 | add(e) | add(e) | append(e) |
| 批量追加 | addAll(list) | addAll(list) | extend(list) 或 += list |
| 负索引 | 不支持 | 不支持 | lst[-1] 取最后一个 |
| 排序 | Collections.sort() | sortBy{} | lst.sort() 原地 / sorted(lst) 新建 |
| 推导式 | 无 | 无 | [expr for x in iterable if cond] |
| 切片 | subList(from, to) | subList(from, to) | lst[from:to] 语法糖 |
常见陷阱
nums = [3, 1, 2]
result = nums.sort()
print(result)
print(nums)
result = sorted(nums)
def add(item, lst=[]):
lst.append(item)
return lst
def add(item, lst=None):
if lst is None:
lst = []
lst.append(item)
return lst
mixed = [1, "two", [3, 4]]
a = [1, 2, 3]
b = [1, 2, 3]
print(a == b)
print(a is b)
何时使用
- 用 list: 几乎所有有序、可变集合场景。Python 的 list 就是默认选择,不需要像 Java 那样在 ArrayList/LinkedList 之间纠结。
- 不用 list 的场景: 需要不可变性时用 tuple(3.2),需要快速查找时用 dict/set(3.3/3.4),需要频繁头插时用 deque(3.7)。
3.2 tuple vs 不可变List/Pair/Triple
Java/Kotlin 对比
record Point(int x, int y) {}
Point p = new Point(1, 2);
int x = p.x();
Pair<String, Integer> pair = Pair.of("age", 25);
public record Result(String name, int score) {}
data class Point(val x: Int, val y: Int)
val (x, y) = Point(1, 2)
val pair = Pair("name", "Alice")
val (key, value) = pair
val triple = Triple(1, "hello", true)
val (a, b, c) = triple
val list = listOf(1, 2, 3)
Python 实现
point = (1, 2)
single = (42,)
empty = ()
no_parens = 1, 2, 3
nested = ((1, 2), (3, 4))
point = (10, 20, 30, 40)
print(point[0])
print(point[-1])
print(point[1:3])
x, y = (1, 2)
r, g, b, a = (255, 128, 0, 255)
a, b = 1, 2
a, b = b, a
print(a, b)
def min_max(numbers):
return min(numbers), max(numbers)
lo, hi = min_max([3, 1, 4, 1, 5])
print(lo, hi)
first, *rest = (1, 2, 3, 4, 5)
print(first)
print(rest)
*head, last = (1, 2, 3, 4, 5)
print(head)
print(last)
first, *middle, last = (1, 2, 3, 4, 5)
print(middle)
point = (1, 2)
mutable_in_tuple = ([1, 2], [3, 4])
mutable_in_tuple[0].append(3)
from collections import namedtuple
Point = namedtuple('Point', ['x', 'y'])
p = Point(1, 2)
print(p.x, p.y)
print(p[0], p[1])
x, y = p
from typing import NamedTuple
class Point(NamedTuple):
x: int
y: int
label: str = "origin"
def distance_to(self, other):
return ((self.x - other.x) ** 2 + (self.y - other.y) ** 2) ** 0.5
p = Point(3, 4)
print(p.label)
print(p.distance_to(Point(0, 0)))
p = Point(1, 2)
print(p._asdict())
print(p._replace(x=10))
核心差异
| 特性 | Java record | Kotlin data class / Pair | Python tuple |
|---|
| 定义 | record Point(int x, int y){} | data class Point(val x, val y) | point = (1, 2) 无需定义 |
| 字段访问 | p.x() | p.x | p[0] 或命名元组 p.x |
| 解构 | 无原生支持 | val (x, y) = p | x, y = p |
| 多返回值 | 需要 record 包装 | 需要 data class 包装 | return a, b 天然支持 |
| 不可变保证 | 编译器强制 | val 属性强制 | 浅不可变(内部可变对象可变) |
| 哈希 | 自动实现 | 自动实现 | 自动实现(可用作 dict key) |
常见陷阱
not_a_tuple = (42)
is_a_tuple = (42,)
print(type(not_a_tuple))
print(type(is_a_tuple))
a = 1, 2, 3
b = (1)
c = (1,)
t = ([1, 2], "hello")
t[0].append(3)
def return_one():
return 42
def return_tuple():
return 42,
何时使用
- 用 tuple: 函数多返回值、数据不需要修改、作为 dict 的 key、数据结构的固定结构(如坐标、RGB)。
- 用 namedtuple/NamedTuple: 需要字段名访问时(替代简单的 data class,更轻量)。
- 不用 tuple: 数据需要增删改时用 list。
3.3 dict vs HashMap/Map
Java/Kotlin 对比
Map<String, Integer> ages = new HashMap<>();
ages.put("Alice", 30);
ages.put("Bob", 25);
int age = ages.get("Alice");
int unknown = ages.getOrDefault("Eve", 0);
ages.putIfAbsent("Alice", 31);
ages.replace("Alice", 31);
ages.remove("Bob");
for (Map.Entry<String, Integer> e : ages.entrySet()) {
System.out.println(e.getKey() + ": " + e.getValue());
}
ages.merge("Alice", 1, Integer::sum);
ages.computeIfAbsent("Charlie", k -> k.length());
val ages = mutableMapOf("Alice" to 30, "Bob" to 25)
ages["Charlie"] = 28
val age = ages["Alice"]
val unknown = ages.getOrDefault("Eve", 0)
ages.getOrPut("Dave") { 20 }
ages.remove("Bob")
for ((name, age) in ages) {
println("$name: $age")
}
Python 实现
d = {"name": "Alice", "age": 30}
empty = {}
from_pairs = dict([("x", 1), ("y", 2)])
from_kwargs = dict(name="Bob", age=25)
keys_only = dict.fromkeys(["a", "b", "c"], 0)
d = {}
d["name"] = "Alice"
d["age"] = 30
name = d["name"]
age = d.get("age")
age = d.get("salary", 0)
value = d.setdefault("city", "Beijing")
value = d.setdefault("name", "Bob")
removed = d.pop("age")
d.pop("nonexistent", None)
del d["name"]
removed = d.popitem()
defaults = {"theme": "dark", "lang": "en"}
user_config = {"lang": "zh", "font": "14px"}
config = defaults | user_config
config = defaults.copy()
config |= user_config
config = {**defaults, **user_config}
config = defaults.copy()
config.update(user_config)
scores = {"Alice": 90, "Bob": 85, "Charlie": 92}
for key in scores:
print(key)
for key in scores.keys():
print(key)
for value in scores.values():
print(value)
for key, value in scores.items():
print(f"{key}: {value}")
squares = {x: x ** 2 for x in range(5)}
original = {"a": 1, "b": 2, "c": 3}
flipped = {v: k for k, v in original.items()}
scores = {"Alice": 90, "Bob": 55, "Charlie": 72}
passed = {k: v for k, v in scores.items() if v >= 60}
data = {"user": {"profile": {"name": "Alice"}}}
name = data["user"]["profile"]["name"]
def get_nested(d, *keys, default=None):
"""安全获取嵌套字典的值"""
current = d
for key in keys:
if isinstance(current, dict):
current = current.get(key)
if current is None:
return default
else:
return default
return current
name = get_nested(data, "user", "profile", "name")
missing = get_nested(data, "user", "settings", "theme", default="dark")
d = {"c": 3, "a": 1, "b": 2}
print(list(d.keys()))
核心差异
| 特性 | Java HashMap | Kotlin mapOf | Python dict |
|---|
| 有序性 | LinkedHashMap 才有序 | LinkedHashMap 才有序 | 3.7+ 天然有序 |
| 安全取值 | getOrDefault() | getOrDefault() | d.get(key, default) |
| 不存在时计算 | computeIfAbsent() | getOrPut() | setdefault() 或 defaultdict |
| 合并 | putAll() | + 运算符 | | [3.9+] 或 {**a, **b} |
| 推导式 | 无 | 无 | {k: v for k, v in ...} |
| 遍历 | entrySet() | for ((k,v) in map) | for k, v in d.items() |
常见陷阱
d = {"a": 1}
d.get("b")
d.get("b", 0)
d = {"a": 1, "b": 2, "c": 3}
for k in list(d.keys()):
if k == "b":
del d[k]
d = {}
key = [1, 2]
d[tuple(key)] = "value"
if "key" not in d:
d["key"] = expensive_function()
何时使用
- 用 dict: 键值映射、配置、JSON 数据、计数器(或用 Counter)、缓存。
- 用 defaultdict: 当缺失 key 需要自动初始化时(3.7 详述)。
- 不用 dict: 需要排序的键值对时考虑 OrderedDict;需要不可变时考虑
types.MappingProxyType。
3.4 set vs HashSet/Set
Java/Kotlin 对比
Set<String> names = new HashSet<>();
names.add("Alice");
names.add("Bob");
names.add("Alice");
boolean has = names.contains("Alice");
names.remove("Bob");
Set<String> a = Set.of("a", "b", "c");
Set<String> b = Set.of("b", "c", "d");
Set<String> union = new HashSet<>(a);
union.addAll(b);
Set<String> intersection = new HashSet<>(a);
intersection.retainAll(b);
Set<String> diff = new HashSet<>(a);
diff.removeAll(b);
val setA = setOf("a", "b", "c")
val setB = setOf("b", "c", "d")
val union = setA union setB
val intersect = setA intersect setB
val diff = setA - setB
val symmetric = setA xor setB
Python 实现
s = {1, 2, 3}
empty = set()
from_list = set([1, 2, 2, 3])
from_string = set("hello")
s = {1, 2, 3}
s.add(4)
s.discard(4)
s.remove(3)
s.pop()
s.clear()
a = {1, 2, 3, 4}
b = {3, 4, 5, 6}
print(a | b)
print(a & b)
print(a - b)
print(a ^ b)
print(a <= b)
print(a >= b)
print({3, 4} <= a)
print(a < a)
print(a == {1, 2, 3, 4})
print(a.union(b))
print(a.intersection(b))
print(a.difference(b))
print(a.symmetric_difference(b))
print(a.issubset(b))
print(a.issuperset(b))
squares = {x ** 2 for x in range(10)}
evens = {x for x in range(20) if x % 2 == 0}
fs = frozenset([1, 2, 3])
print(fs | {4})
d = {frozenset([1, 2]): "pair"}
nested_set = {frozenset([1, 2]), frozenset([2, 3])}
names = ["Alice", "Bob", "Alice", "Charlie", "Bob"]
unique = list(set(names))
big_set = set(range(1_000_000))
print(999999 in big_set)
common = {1, 2, 3} & {2, 3, 4}
核心差异
| 特性 | Java HashSet | Kotlin setOf | Python set |
|---|
| 空集合 | new HashSet<>() | emptySet() | set() — 注意不是 {} |
| 并集 | addAll() | union 中缀函数 | | 运算符 |
| 交集 | retainAll() | intersect 中缀函数 | & 运算符 |
| 差集 | removeAll() | - 运算符 | - 运算符 |
| 对称差 | 无原生支持 | xor 中缀函数 | ^ 运算符 |
| 不可变 | Set.of() [Java 9+] | setOf() | frozenset() |
| 推导式 | 无 | 无 | {x for x in ...} |
常见陷阱
empty_set = set()
print(type({}))
s = {(1, 2)}
s = {frozenset([1, 2])}
s = {1, 2, 3}
何时使用
- 用 set: 去重、成员检测(
in 操作频繁时)、集合运算(交并差)。
- 用 frozenset: 需要不可变集合、作为 dict key 或 set 元素时。
- 不用 set: 需要保持顺序时用 list;需要键值映射时用 dict。
3.5 切片 [start:stop:step]
Python 独有核心概念。Java 的 subList()、Kotlin 的 slice() 只是方法调用,Python 把切片做成了语言级语法,无处不在。
Java/Kotlin 对比
List<String> list = List.of("a", "b", "c", "d", "e");
List<String> sub = list.subList(1, 3);
val list = listOf("a", "b", "c", "d", "e")
val sub = list.slice(1..2)
val drop = list.drop(2)
val take = list.take(3)
val reversed = list.reversed()
Python 实现
s = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
print(s[2:5])
print(s[:5])
print(s[5:])
print(s[:])
print(s[-3:])
print(s[:-3])
print(s[::2])
print(s[1::2])
print(s[::3])
print(s[::-1])
print(s[::-2])
print(s[8:2:-2])
s = ['a', 'b', 'c', 'd', 'e']
print(s[-1])
print(s[-2])
print(s[-3:])
print(s[:-1])
print(s[1:-1])
s = [0, 1, 2, 3]
print(s[1:100])
print(s[-100:2])
print(s[10:20])
s = [0, 1, 2, 3, 4, 5]
s[1:4] = [10, 20]
s[1:3] = []
s = [0, 1, 2]
s[1:1] = [10, 20]
s = [0, 1, 2, 3, 4, 5]
s[::2] = [10, 20, 30]
first_three = slice(3)
every_other = slice(None, None, 2)
last_two = slice(-2, None)
s = [0, 1, 2, 3, 4, 5, 6, 7]
print(s[first_three])
print(s[every_other])
print(s[last_two])
class MyList:
def __getitem__(self, key):
if isinstance(key, slice):
return f"slice({key.start}, {key.stop}, {key.step})"
return f"index({key})"
ml = MyList()
print(ml[3])
print(ml[1:5:2])
text = "Hello, World!"
print(text[7:12])
print(text[::-1])
t = (0, 1, 2, 3, 4)
print(t[1:4])
核心差异
| 特性 | Java subList | Kotlin slice | Python 切片 |
|---|
| 语法 | list.subList(1, 3) | list.slice(1..2) | list[1:3] |
| 返回类型 | 视图(修改影响原列表) | 新列表 | 新列表(浅拷贝) |
| 负索引 | 不支持 | 不支持 | list[-1] 取最后 |
| 步长 | 不支持 | 不支持 | list[::2] |
| 反转 | Collections.reverse() | reversed() | list[::-1] |
| 越界 | 抛异常 | 抛异常 | 自动截断,不报错 |
常见陷阱
a = [[1, 2], [3, 4]]
b = a[:]
b[0][0] = 99
s = [0, 1, 2, 3, 4]
s[::2] = [10, 20, 30]
s = [0, 1, 2]
print(s[5:10])
s = [0, 1, 2, 3, 4, 5]
print(s[5:1:-1])
print(s[5:1:-2])
何时使用
- 切片无处不在: 取子序列、反转、分页、跳步——只要操作序列就优先考虑切片。
- slice 对象: 当同一切片模式需要在多处复用时。
3.6 解包与星号表达式
Python 的解包(unpacking)远比 Java/Kotlin 的解构声明强大。它是语言级特性,不仅用于赋值,还用于函数调用、集合构造等场景。
Java/Kotlin 对比
int[] arr = {1, 2, 3};
int a = arr[0], b = arr[1], c = arr[2];
record Point(int x, int y) {}
if (obj instanceof Point(int x, int y)) {
System.out.println(x + ", " + y);
}
val (name, age) = Pair("Alice", 30)
val (x, y, z) = Triple(1, 2, 3)
data class Person(val name: String, val age: Int)
val (name, age) = Person("Bob", 25)
val (_, age) = Pair("Alice", 30)
val map = mapOf("a" to 1, "b" to 2)
for ((key, value) in map) { }
Python 实现
a, b, c = [1, 2, 3]
a, b, c = (1, 2, 3)
a, b, c = "abc"
x, y, z = range(3)
a, b = 1, 2
a, b = b, a
a, _, c = [1, 2, 3]
a, *_, c = [1, 2, 3, 4, 5]
first, *rest = [1, 2, 3, 4, 5]
print(first)
print(rest)
*head, last = [1, 2, 3, 4, 5]
print(head)
print(last)
first, *middle, last = [1, 2, 3, 4, 5]
print(middle)
for first, *rest in [[1, 2, 3], [4, 5, 6, 7]]:
print(first, rest)
(a, b), (c, d) = (1, 2), (3, 4)
print(a, b, c, d)
matrix = [[1, 2], [3, 4], [5, 6]]
for row_idx, (x, y) in enumerate(matrix):
print(row_idx, x, y)
first, (second, *rest), last = [1, (2, 3, 4), 5]
print(first, second, rest, last)
def add(a, b, c):
return a + b + c
nums = [1, 2, 3]
print(add(*nums))
print(add(*range(3)))
def greet(name, age):
return f"{name}, {age}"
info = {"name": "Alice", "age": 30}
print(greet(**info))
a = [1, 2, 3]
b = [4, 5]
c = [*a, *b, 6]
s1 = {1, 2}
s2 = {3, 4}
s3 = {*s1, *s2}
d1 = {"a": 1}
d2 = {"b": 2}
d3 = {**d1, **d2, "c": 3}
def config(**kwargs):
for key, value in kwargs.items():
print(f" {key} = {value}")
config(host="localhost", port=8080, debug=True)
def func(a, b, *args, key="default", **kwargs):
print(f"a={a}, b={b}, args={args}, key={key}, kwargs={kwargs}")
func(1, 2, 3, 4, key="custom", extra="data")
first, *middle, second_last, last = range(10)
print(first, second_last, last)
nested = [[1, 2], [3, 4], [5, 6]]
flat = [*inner for inner in nested]
flat = [item for inner in nested for item in inner]
def wrapper(*args, **kwargs):
print("before")
result = target(*args, **kwargs)
print("after")
return result
核心差异
| 特性 | Kotlin 解构 | Java 21+ 模式匹配 | Python 解包 |
|---|
| 语法 | val (a, b) = pair | if (obj instanceof R(int a, int b)) | a, b = pair |
| 收集剩余 | 不支持 | 不支持 | first, *rest = seq |
| 忽略值 | val (_, b) = pair | _ 占位符 | a, _, c = seq |
| 交换变量 | 不支持 | 不支持 | a, b = b, a |
| 函数参数展开 | spread operator * | 不支持 | *args, **kwargs |
| 集合展开 | 不支持 | 不支持 | [*a, *b], {**d1, **d2} |
| 嵌套解包 | 有限支持 | 有限支持 | 完全支持 |
常见陷阱
first, *rest = (1, 2, 3)
print(type(rest))
a, b, *_ = [1, 2, 3]
def func(*args):
print(type(args))
何时使用
- 基本解包: 函数多返回值、交换变量、遍历时解构。
- 星号解包: 处理不定长序列、分割头尾。
*args/**kwargs: 装饰器、参数转发、灵活 API。
- 集合展开: 合并多个集合/字典。
3.7 collections 模块
Java 需要引入 Guava 或 Apache Commons 才能获得的高级集合工具,Python 标准库直接提供。
Java/Kotlin 对比
Map<String, Integer> ordered = new LinkedHashMap<>();
Deque<String> deque = new ArrayDeque<>();
deque.addFirst("a");
deque.addLast("b");
val ordered = linkedMapOf("a" to 1, "b" to 2)
val deque = ArrayDeque(listOf("a", "b"))
deque.addFirst("c")
deque.addLast("d")
Python 实现
from collections import defaultdict, Counter, OrderedDict, deque, ChainMap, namedtuple
words = ["apple", "banana", "avocado", "blueberry", "cherry"]
by_letter = defaultdict(list)
for word in words:
by_letter[word[0]].append(word)
print(dict(by_letter))
counts = defaultdict(int)
for word in words:
counts[len(word)] += 1
print(dict(counts))
nested = defaultdict(lambda: defaultdict(int))
nested["user"]["clicks"] += 1
nested["user"]["clicks"] += 1
nested["admin"]["clicks"] += 1
print(dict(nested))
text = "the quick brown fox jumps over the lazy dog"
word_counts = Counter(text.split())
print(word_counts.most_common(3))
c = Counter("abracadabra")
print(c)
print(c['a'])
print(c['z'])
c['a'] -= 1
c.update("xyz")
c1 = Counter([1, 2, 2, 3])
c2 = Counter([2, 2, 4])
print(c1 + c2)
print(c1 - c2)
print(c1 & c2)
print(c1 | c2)
top3 = word_counts.most_common(3)
od = OrderedDict([("a", 1), ("b", 2), ("c", 3)])
od.move_to_end("a")
od.move_to_end("a", last=False)
od.popitem(last=False)
od.popitem(last=True)
d = deque([1, 2, 3])
d.append(4)
d.pop()
d.appendleft(0)
d.popleft()
d = deque([1, 2, 3, 4, 5])
d.rotate(2)
d.rotate(-1)
recent = deque(maxlen=3)
for i in range(5):
recent.append(i)
print(recent)
defaults = {"theme": "dark", "lang": "en", "font": "14px"}
user_config = {"lang": "zh"}
env_config = {"font": "16px"}
config = ChainMap(env_config, user_config, defaults)
print(config["lang"])
print(config["theme"])
print(config["font"])
config["theme"] = "light"
print(env_config)
config["debug"] = True
Point = namedtuple('Point', ['x', 'y'], defaults=[0, 0])
p = Point()
p = Point(1)
Point = namedtuple('Point', ['x', 'y'])
p = Point(1, 2)
print(p._asdict())
print(p._fields)
print(p._make([3, 4]))
核心差异
| 工具 | Java 对应 | Kotlin 对应 | Python collections |
|---|
| defaultdict | computeIfAbsent() | getOrPut() | defaultdict |
| Counter | Guava Multiset | 无 | Counter |
| OrderedDict | LinkedHashMap | linkedMapOf() | OrderedDict(3.7+ 普通 dict 也有序) |
| deque | ArrayDeque | ArrayDeque | deque |
| ChainMap | 无直接对应 | 无直接对应 | ChainMap |
| namedtuple | record | data class | namedtuple / NamedTuple |
常见陷阱
c = Counter(["a", "b", "a"])
print(c["z"])
print(list(c.keys()))
d = deque([1, 2, 3, 4])
defaults = {"x": 1}
user = {}
chain = ChainMap(user, defaults)
print(chain["x"])
defaults["x"] = 2
print(chain["x"])
何时使用
- defaultdict: 分组、嵌套字典、计数——任何需要"key 不存在时自动初始化"的场景。
- Counter: 词频统计、投票计数、top-N。
- OrderedDict: LRU 缓存(配合
move_to_end)、需要 popitem(last=False) 的场景。
- deque: 队列、栈、滑动窗口、最近 N 条记录。
- ChainMap: 配置层级覆盖(默认配置 < 用户配置 < 环境变量 < 命令行参数)。
3.8 数据类: dataclasses, NamedTuple, TypedDict
Java/Kotlin 对比
public record Point(int x, int y) {
}
public class Person {
private String name;
private int age;
}
@Data
public class Person {
private String name;
private int age;
}
data class Person(val name: String, val age: Int = 0)
val p1 = Person("Alice", 30)
val p2 = p1.copy(age = 31)
val (name, age) = p1
Python 实现
from dataclasses import dataclass, field, FrozenInstanceError
from typing import NamedTuple, TypedDict
import json
@dataclass
class Point:
x: float
y: float
p = Point(1.0, 2.0)
print(p)
print(p.x, p.y)
print(p == Point(1.0, 2.0))
@dataclass
class Person:
name: str
age: int = 0
email: str = ""
p = Person("Alice")
p = Person("Bob", age=25, email="bob@test.com")
@dataclass(frozen=True)
class ImmutablePoint:
x: float
y: float
p = ImmutablePoint(1.0, 2.0)
points = {ImmutablePoint(1, 2), ImmutablePoint(1, 2)}
print(len(points))
@dataclass
class Team:
name: str
members: list[str] = field(default_factory=list)
scores: list[int] = field(default_factory=list)
id: int = field(default=0, init=False)
_internal: dict = field(default_factory=dict, repr=False, compare=False)
t = Team("Alpha")
t.members.append("Alice")
t2 = Team("Beta")
print(t.members)
@dataclass
class Config:
host: str = "localhost"
port: int = 8080
debug: bool = False
c = Config()
c2 = c.replace(port=9090, debug=True)
print(c)
print(c2)
class Point(NamedTuple):
x: float
y: float
label: str = "origin"
def distance_to(self, other):
return ((self.x - other.x) ** 2 + (self.y - other.y) ** 2) ** 0.5
p = Point(3, 4)
print(p.label)
print(p.distance_to(Point(0, 0)))
print(p._asdict())
class PersonDict(TypedDict):
name: str
age: int
email: str
def greet(person: PersonDict) -> str:
return f"Hello, {person['name']}"
p: PersonDict = {"name": "Alice", "age": 30, "email": "a@b.com"}
print(greet(p))
p2: PersonDict = {"name": "Bob", "age": "not a number", "email": "b@c.com"}
class MovieDict(TypedDict, total=False):
title: str
year: int
rating: float
m: MovieDict = {"title": "Inception"}
核心差异
| 特性 | Java record | Kotlin data class | Python @dataclass | Python NamedTuple |
|---|
| 定义 | record P(int x, int y){} | data class P(val x, val y) | @dataclass class P: | class P(NamedTuple): |
| 不可变 | 天然不可变 | val 属性 | frozen=True | 天然不可变 |
| 可变默认值 | N/A | N/A | field(default_factory=...) | 不支持可变默认值 |
| 不可变拷贝 | 无(构造新实例) | copy(age=31) | replace(age=31) | _replace(age=31) |
| 解构 | 无 | val (x,y) = p | 不支持(需手动) | x, y = p |
| 继承 | 可实现接口 | 可继承非 data class | 可继承 | 有限 |
| 类型检查 | 编译期 | 编译期 | mypy(可选) | mypy(可选) |
常见陷阱
from dataclasses import dataclass, field
@dataclass
class Bad:
items: list = []
@dataclass
class Good:
items: list = field(default_factory=list)
@dataclass
class Good:
age: int
name: str = ""
@dataclass(frozen=True)
class Bad:
x: int
y: int
def __post_init__(self):
pass
何时使用
- @dataclass: 通用数据类,大部分场景的首选。需要可变性、默认值、方法时使用。
- NamedTuple: 轻量不可变数据、需要元组解包、作为函数返回值。
- TypedDict: 处理 JSON/字典数据时提供类型提示,配合 mypy 使用。
3.9 enum.Enum
Java/Kotlin 对比
public enum Direction {
NORTH, SOUTH, EAST, WEST;
public double toRadians() {
return switch (this) {
case NORTH -> 0.0;
case EAST -> Math.PI / 2;
};
}
}
public enum Status {
OK(200), NOT_FOUND(404), ERROR(500);
private final int code;
Status(int code) { this.code = code; }
public int getCode() { return code; }
}
enum class Direction(val degrees: Double) {
NORTH(0.0), EAST(90.0), SOUTH(180.0), WEST(270.0);
fun toRadians() = Math.toRadians(degrees)
}
val d = Direction.NORTH
println(d.name)
println(d.ordinal)
Python 实现
from enum import Enum, IntEnum, Flag, IntFlag, auto
class Direction(Enum):
NORTH = "N"
SOUTH = "S"
EAST = "E"
WEST = "W"
print(Direction.NORTH)
print(Direction.NORTH.value)
print(Direction.NORTH.name)
print(Direction.NORTH is Direction.NORTH)
print(Direction.NORTH == Direction.NORTH)
for d in Direction:
print(f"{d.name} = {d.value}")
d = Direction("N")
d = Direction["NORTH"]
class Status(Enum):
OK = 200
NOT_FOUND = 404
ERROR = 500
@property
def is_success(self):
return self.value < 400
def describe(self):
return f"{self.name} (HTTP {self.value})"
print(Status.OK.is_success)
print(Status.ERROR.describe())
class Color(Enum):
RED = auto()
GREEN = auto()
BLUE = auto()
print(list(Color))
print(Color.RED.value)
class Planet(Enum):
MERCURY = auto()
VENUS = auto()
EARTH = auto()
def _generate_next_value_(name, start, count, last_values):
return name.capitalize()
print(Planet.EARTH.value)
class Priority(IntEnum):
LOW = 1
MEDIUM = 2
HIGH = 3
print(Priority.HIGH > Priority.LOW)
print(Priority.MEDIUM == 2)
class Permission(Flag):
READ = auto()
WRITE = auto()
EXECUTE = auto()
p = Permission.READ | Permission.WRITE
print(p)
print(Permission.READ in p)
print(Permission.EXECUTE in p)
print(p & Permission.READ)
class Mode(IntFlag):
R = 1
W = 2
X = 4
m = Mode.R | Mode.W
print(m)
print(m == 3)
print(m | Mode.X)
print(m & 3)
class HTTPStatus(Enum):
OK = (200, "OK")
NOT_FOUND = (404, "Not Found")
ERROR = (500, "Internal Server Error")
def __init__(self, code, reason):
self.code = code
self.reason = reason
print(HTTPStatus.OK.code)
print(HTTPStatus.OK.reason)
def handle_status(status: HTTPStatus):
if status == HTTPStatus.OK:
return "Success"
return f"Error: {status.reason}"
class Color(Enum):
RED = 1
CRIMSON = 1
print(Color.RED is Color.CRIMSON)
print(list(Color))
print(Color.CRIMSON)
核心差异
| 特性 | Java enum | Kotlin enum class | Python Enum |
|---|
| 定义 | enum Direction { NORTH } | enum class Direction { NORTH } | class Direction(Enum): NORTH = "N" |
| 值 | 序数 ordinal | 构造参数 | 显式赋值或 auto() |
| 比较 | 可用 == | 可用 == | 用 is 或 ==,不支持 <(IntEnum 除外) |
| 位标志 | EnumSet | 无原生支持 | Flag / IntFlag |
| 别名 | 无 | 无 | 支持(同名值自动别名) |
| 方法 | 完整支持 | 完整支持 | 支持 @property 和方法 |
| 类型安全 | 编译期 | 编译期 | 运行时(mypy 可检查) |
常见陷阱
class BadHabit(Enum):
A = "hello"
B = [1, 2]
C = {"key": "val"}
class Status(Enum):
OK = 200
class Status(IntEnum):
OK = 200
ERROR = 500
print(Status.OK == 200)
print(Status.OK.value is 200)
print(Status.OK.value == 200)
何时使用
- Enum: 有限状态集合、配置选项、错误码——任何需要"有限命名常量"的场景。
- IntEnum: 需要和整数互操作时(如协议码、优先级)。
- Flag/IntFlag: 权限、选项组合——需要位运算的场景。
- 不用枚举: 值集合不固定时用常量模块或配置文件。
3.10 typing 模块核心类型
Java/Kotlin 的类型系统是编译期强制的。Python 的类型注解是提示,运行时默认不检查。理解这一点是关键。
Java/Kotlin 对比
List<String> names = new ArrayList<>();
names.add("Alice");
Map<String, Integer> map = new HashMap<>();
Optional<String> maybe = Optional.ofNullable(null);
public <T extends Comparable<T>> T max(List<T> list) { ... }
val names: List<String> = listOf("Alice")
val nullable: String? = null
val result: Int? = nullable?.length
fun <T : Comparable<T>> max(list: List<T>): T { ... }
Python 实现
from typing import (
List, Dict, Set, Tuple,
Optional, Union, Literal,
Callable, Any, NoReturn,
TypeAlias
)
names: List[str] = ["Alice", "Bob"]
scores: Dict[str, int] = {"Alice": 90, "Bob": 85}
unique: Set[int] = {1, 2, 3}
point: Tuple[int, int] = (1, 2)
matrix: List[List[int]] = [[1, 2], [3, 4]]
tree: Dict[str, List[str]] = {
"fruits": ["apple", "banana"],
"veggies": ["carrot"]
}
names: list[str] = ["Alice", "Bob"]
scores: dict[str, int] = {"Alice": 90}
point: tuple[int, int] = (1, 2)
from typing import Optional
def find_user(user_id: int) -> Optional[str]:
"""找不到返回 None"""
if user_id == 1:
return "Alice"
return None
name: Optional[str] = find_user(1)
if name is not None:
print(name.upper())
from typing import Union
def process(value: Union[int, str]) -> str:
if isinstance(value, int):
return f"Number: {value}"
return f"String: {value}"
def process(value: int | str) -> str:
if isinstance(value, int):
return f"Number: {value}"
return f"String: {value}"
from typing import Literal
def set_level(level: Literal["debug", "info", "warn", "error"]) -> None:
print(f"Level: {level}")
set_level("debug")
def http_get(url: str, status_code: Literal[200, 301, 404]) -> str:
...
from typing import Callable
def apply(func: Callable[[int, int], int], a: int, b: int) -> int:
return func(a, b)
result = apply(lambda x, y: x + y, 1, 2)
from typing import Protocol
class Handler(Protocol):
def __call__(self, request: str) -> dict: ...
def use_handler(handler: Handler) -> None:
result = handler("test")
from typing import Any, NoReturn
def process(data: Any) -> None:
"""接受任意类型"""
pass
def fail(message: str) -> NoReturn:
"""永远不会正常返回"""
raise ValueError(message)
from typing import TypeAlias
Vector: TypeAlias = list[float]
Matrix: TypeAlias = list[Vector]
UserId: TypeAlias = int
Config: TypeAlias = dict[str, Any]
def dot_product(a: Vector, b: Vector) -> float:
return sum(x * y for x, y in zip(a, b))
names: list[str] = ["Alice", "Bob"]
names.append(42)
def greet(name: str) -> str:
return f"Hello, {name}"
greet(42)
from typing import Optional
def search_users(
name: Optional[str] = None,
min_age: int = 0,
max_age: int = 150,
tags: list[str] | None = None,
) -> list[dict[str, Any]]:
"""搜索用户
Args:
name: 用户名(模糊匹配),None 表示不限制
min_age: 最小年龄
max_age: 最大年龄
tags: 标签列表,None 表示不限制
Returns:
匹配的用户列表
"""
if tags is None:
tags = []
return [{"name": "Alice", "age": 30, "tags": ["dev"]}]
核心差异
| 特性 | Java/Kotlin | Python typing |
|---|
| 类型检查 | 编译期强制 | 运行时忽略,mypy 可选检查 |
| 可空类型 | String? / String | Optional[str] / str | None |
| 联合类型 | 无(用继承或重载) | Union[int, str] / int | str |
| 字面量类型 | 无 | Literal["a", "b"] |
| 函数类型 | lambda 类型推断 | Callable[[int], str] |
| 泛型 | List<T> | list[T](Python 3.9+) |
| 类型别名 | typealias | type X = ...(3.12+)或 TypeAlias |
| 不可达 | 无 | NoReturn |
常见陷阱
def get_name() -> Optional[str]:
return None
name = get_name()
if name is not None:
name.upper()
def f(x: int | str) -> str:
return str(x)
何时使用
- 始终加类型注解: 函数参数和返回值——这是现代 Python 的最佳实践,即使运行时不检查。
- 复杂项目用 mypy:
mypy --strict your_project/ 可以发现大量潜在 bug。
- Optional vs 不加: 参数可能为 None 时必须用
Optional;不会为 None 时不要加。
- Literal vs Enum: 少量固定值用
Literal;需要方法和逻辑时用 Enum。
- Any: 尽量避免。
Any 会关闭该值的类型检查,相当于"逃逸"了类型系统。
本章速查表
| Python 类型 | JVM 对应 | 不可变版本 | 推导式 | 核心操作 |
|---|
list | ArrayList/MutableList | tuple | [x for x in ...] | append, extend, sort,切片 |
tuple | record/Pair | 自身 | 无 | 解包, 索引, 作为 dict key |
dict | HashMap/Map | MappingProxyType | {k:v for ...} | get, setdefault, | 合并 |
set | HashSet/Set | frozenset | {x for x in ...} | |&-^ 集合运算 |
deque | ArrayDeque | 无 | 无 | appendleft, popleft, rotate |
Counter | Guava Multiset | 无 | 无 | most_common, +&-| |
defaultdict | computeIfAbsent | 无 | 无 | 自动初始化缺失 key |
@dataclass | data class/record | frozen=True | 无 | replace, field() |
NamedTuple | data class | 自身 | 无 | 解包, _asdict, _replace |
Enum | enum/enum class | 自身 | 无 | value, name, Flag 位运算 |