table_sql=[]
for i in range(len(table_name)):
table_sql.append('''select t1.table_name as table_name,t1.column_name as column_name,t1.column_type as column_type,
t1.column_comment as column_comment ,t2.table_rows as table_rows
from information_schema.columns as t1,information_schema.`tables` as t2
where t1.table_schema=t2.table_schema and t1.table_name=t2.table_name and t1.table_name='{}' and t1.table_schema='test_titanic'
'''.format(table_name[i]))
接下来,就是根据这个SQL列表进行查询获得数据,然后继续对字段进行循环查询。
writer=pd.ExcelWriter("table_des.xlsx")#为了导出数据
table_col_name=[]
row=0
for j in range(len(table_sql)): #对表SQL进行循环
table_col_name.append(sql_inquiry(table_sql[j])) #将每个表查询得到的数据框组成列表
table_col_stat=[]
col_sql=[]
for s in range(len(table_col_name[j])): #对每个表信息的字段进行循环
col_sql.append('''select min({1}) as value_min, max({1}) as value_max,sum(case when {1} is not null then 1 end ) as num_classification,
sum(case when {1} is null then num else 0 end) as null_num,sum(num) as rows_num
from
(select {1} ,count(1) as num
from
{0} group by {1})t1'''.format(table_name[j],table_col_name[j].iloc[s,1]))
# 根据字段的SQL语句进行查询并转换成列表
col=sql_inquiry(col_sql[s]).iloc[0,:].tolist()
#将字段名称拼接进列表中
table_col_stat.append([table_col_name[j].iloc[s,1]]+col)
#转换为数据框
table_des=pd.DataFrame(table_col_stat,columns=["name","value_min","value_max","num_classification","null_num","rows_num"])
#计算字段的空值率并以百分比形式显示
table_des["null_rate"]=table_des["null_num"]/table_des["rows_num"]
table_des["null_rate"]=table_des["null_rate"].apply(lambda x :format(x,".2%"))
#合并数据框
table_des=pd.merge(table_col_name[j],table_des,how="inner",left_on="column_name",right_on="name")
table_des.drop(["table_rows","name"],axis=1,inplace=True)
#写入Excel文档
table_des.to_excel(writer,startcol=1,startrow=row,index=False)
row+=table_des.shape[0]+4
#保存文档并计算程序用时
writer.save()
elapsed = (dt.datetime.now()- start)
print("Time used:",elapsed)
print ("\a")