表格见 腾讯位置服务 WebService API
import pandas as pd
import json
import os
import sys
# 增加递归深度限制,防止深层嵌套导致崩溃
sys.setrecursionlimit(5000)
def parse_district_tree(file_path, sheet_name=1, output_file='tree.json', minify_json=False):
"""
解析行政区划Excel并生成树形JSON结构
参数:
file_path: 输入Excel文件路径
sheet_name: Sheet名称或索引
output_file: 输出文件路径
minify_json:
True -> 紧凑模式 (去除换行和空格,体积小,适合程序读取/网络传输)
False -> 美化模式 (带缩进换行,体积大,适合人工查看)
"""
print(f"开始任务...")
print(f"输入文件: {file_path}")
print(f"Sheet索引/名称: {sheet_name}")
print(f"输出文件: {output_file}")
print(f"模式: {'紧凑压缩 (Minify)' if minify_json else '美化格式 (Pretty)'}")
print("-" * 50)
# 1. 读取数据
if not os.path.exists(file_path):
print(f"错误:找不到文件 '{file_path}'")
return
try:
df = pd.read_excel(file_path, sheet_name=sheet_name)
except Exception as e:
print(f"读取Excel失败: {e}")
return
if len(df.columns) < 2:
print("错误:表格列数不足2列。")
return
# 标准化列名
df = df.iloc[:, :2].copy()
df.columns = ['code', 'full_path']
# 数据清洗
df = df.dropna(subset=['code', 'full_path'])
df['code'] = pd.to_numeric(df['code'], errors='coerce').astype('Int64').astype(str)
df['full_path'] = df['full_path'].astype(str).str.strip()
# 过滤9位代码
initial_count = len(df)
df = df[df['code'].str.len() == 9]
print(f"原始行数: {initial_count} | 有效9位代码行数: {len(df)}")
if len(df) == 0:
print("警告:未找到有效的9位行政区划代码。")
return
tree_map = {}
def get_node(code, name):
if code not in tree_map:
tree_map[code] = {
"value": code,
"text": name,
"children": []
}
else:
if name and (not tree_map[code]['text'] or tree_map[code]['text'] != name):
tree_map[code]['text'] = name
return tree_map[code]
print("正在构建拓扑树...")
for _, row in df.iterrows():
code_9 = row['code']
full_path = row['full_path']
parts = [p.strip() for p in full_path.split(',') if p.strip()]
town_name = parts[-1] if parts else "未知街道"
province_name = city_name = district_name = ""
province_code = city_code = ""
district_code = code_9[:6]
# 直辖市 vs 普通省 判断逻辑
if len(parts) == 4:
# 直辖市: 中国, 北京市, 东城区, 街道
province_name = parts[1]
city_name = parts[1]
district_name = parts[2]
p_prefix = code_9[:2]
province_code = p_prefix + "0000"
city_code = province_code
elif len(parts) >= 5:
# 普通省: 中国, 河北省, 石家庄市, 赞皇县, 镇
province_name = parts[1]
city_name = parts[2]
district_name = parts[3]
p_prefix = code_9[:2]
c_prefix = code_9[:4]
province_code = p_prefix + "0000"
city_code = c_prefix + "00"
else:
province_name = "未知省"
city_name = "未知市"
district_name = "未知区"
province_code = code_9[:2] + "0000"
city_code = code_9[:4] + "00"
# --- 节点挂载 ---
node_prov = get_node(province_code, province_name)
if city_code == province_code:
node_city = node_prov
else:
node_city = get_node(city_code, city_name)
if not any(c['value'] == city_code for c in node_prov['children']):
node_prov['children'].append(node_city)
node_dist = get_node(district_code, district_name)
if district_code != city_code:
if not any(c['value'] == district_code for c in node_city['children']):
node_city['children'].append(node_dist)
else:
node_dist = node_city
node_town = get_node(code_9, town_name)
if code_9 != district_code:
if not any(c['value'] == code_9 for c in node_dist['children']):
node_dist['children'].append(node_town)
# 排序
root_nodes = [node for code, node in tree_map.items() if code.endswith('0000')]
root_nodes.sort(key=lambda x: x['value'])
def sort_children(n):
if not n.get('children'):
return
n['children'].sort(key=lambda x: x['value'])
for child in n['children']:
if child['value'] == n['value']:
continue
sort_children(child)
print("正在排序节点...")
for r in root_nodes:
sort_children(r)
# 写入文件
print(f"正在写入文件 {output_file} ...")
try:
# 核心修改:根据 minify_json 决定 indent 参数
# None 表示紧凑模式 (无换行无缩进)
# 2 表示美化模式 (2空格缩进)
indent_val = None if minify_json else 2
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(root_nodes, f, ensure_ascii=False, indent=indent_val)
# 统计大小
file_size = os.path.getsize(output_file)
size_mb = file_size / (1024 * 1024)
print("-" * 50)
print(f"成功完成!")
print(f"省级节点数: {len(root_nodes)}")
print(f"总节点数: {len(tree_map)}")
print(f"文件大小: {size_mb:.2f} MB")
if minify_json:
print(f"已启用紧凑模式 (去除了换行和空格),文件体积已优化。")
else:
print(f"已启用美化模式 (包含换行和缩进),方便人工阅读。")
except Exception as e:
print(f"写入文件失败: {e}")
if __name__ == '__main__':
# ================= 配置区域 =================
# 1. 输入文件
INPUT_FILE = '行政区划编码表_20251127.xlsx'
# 2. Sheet 索引 (0是第一个, 1是第二个)
SHEET_INDEX = 1
# 3. 输出文件名
OUTPUT_FILE = 'district_tree.json'
# 4. 是否压缩 JSON (去除换行和空格)
# True -> 紧凑模式
# False -> 美化模式
MINIFY_JSON = False
# ===========================================
# 自动查找文件逻辑
if not os.path.exists(INPUT_FILE):
xlsx_files = [f for f in os.listdir('.') if f.endswith('.xlsx')]
if xlsx_files:
print(f"未找到 '{INPUT_FILE}',自动使用: {xlsx_files[0]}")
INPUT_FILE = xlsx_files[0]
else:
print("当前目录下未找到 Excel 文件")
sys.exit(1)
parse_district_tree(
file_path=INPUT_FILE,
sheet_name=SHEET_INDEX,
output_file=OUTPUT_FILE,
minify_json=MINIFY_JSON
)