数据库间表复制、从excel快速导入数据库
具体请见代码:
db_config.py:
### 数据库链接参数 ###
DB = {
'host': '192.168.0.20', # 数据库链接地址'127.0.0.1'
'port': 5432, # 数据库端口
'database': 'simple_db', # 数据库名称
'user': 'pg', # 数据库账号
'password': 'asdf1234' # 数据库登录密码
}
MSDB = {
'server': '192.168.0.21', # 数据库链接地址'127.0.0.1'host
'port': 1433, # 数据库端口
'database': 'db_test', # 数据库名称
'user': 'dl', # 数据库账号
'password': 'test@2000' # 数据库登录密码
}
main.py:
import io
import urllib
import pandas as pd
from sqlalchemy import create_engine
import chardet
import datetime
import pymssql
import db_config
import psycopg2
def getFileChar(filename):
f=open(filename,'rb')
data=f.read(200)
f.close()
#print(chardet.detect(data))
#print(chardet.detect(data).get('encoding'))
return chardet.detect(data).get('encoding')
def str_tablenm(table_name,is_return_null=False):
'''
sql字符串拼接时 表名 专用函数,在字符串两边添加 " 双引号 ,否则 英文中文混合表名出错
'''
if not table_name is None and table_name!='':
return '"'+str(table_name)+'"'
elif not is_return_null:
return '""'
else:
return 'null'
def write_to_table(df, table_name, if_exists='fail',cols_name=None):#append/fail/replace .cols_name 元组
dbcfg=db_config.DB
usr=dbcfg.get('user', 'bt')
pwd=dbcfg.get('password', 'a123')#'qais@123'
host=dbcfg.get('host', 'localhost')
port=dbcfg.get('port', '1433')
database=dbcfg.get('database', 'simple_db')
#conn_str="postgresql://qais:%s@localhost/db_qais"%urllib.parse.quote_plus(pwd)#?charset=utf8
conn_str="postgresql://{}:{}@{}:{}/{}".format(usr,urllib.parse.quote_plus(pwd),host,port,database)
db_engine = create_engine(conn_str,encoding='utf-8',echo=False)#
string_data_io = io.StringIO()
df.to_csv(string_data_io, sep='|', index=False,header=False)
pd_sql_engine = pd.io.sql.pandasSQL_builder(db_engine)
table = pd.io.sql.SQLTable(table_name, pd_sql_engine, frame=df,
index=False, if_exists=if_exists,schema = 'public')#goods_code
table.create()
string_data_io.seek(0)
#string_data_io.readline() # remove header
with db_engine.connect() as connection:
with connection.connection.cursor() as cursor:
#copy_cmd = "COPY public.%s FROM STDIN HEADER DELIMITER '|' CSV" %(str_tablenm(table_name))#goods_code
#copy_cmd = "COPY public.%s (%s) FROM STDIN HEADER DELIMITER '|' CSV" %(table_name,','.join(k for k in cols_name))#goods_code
#cursor.copy_expert(copy_cmd, string_data_io)
cursor.copy_from(string_data_io,str_tablenm(table_name),null='',sep='|',columns=cols_name)#此行更易懂
connection.connection.commit()
# 复制表_大数据分组复制,本次未使用
def copystations():
dbcfg=db_config.MSDB
'''
#sqlalchemy方式:
usr=dbcfg.get('user', 'bt')
pwd=dbcfg.get('password', 'a123')#'qais@123'
host=dbcfg.get('server', 'localhost')
port=dbcfg.get('port', '1433')
database=dbcfg.get('database', 'simple_db')
conn_str="mssql+pymssql://{}:{}@{}:{}/{}".format(usr,urllib.parse.quote_plus(pwd),host,port,database)
print(conn_str)
db_engine = create_engine(conn_str,encoding='utf-8',echo=False)#
'''
connect = pymssql.connect(database=dbcfg.get('database', 'simple_db'), user=dbcfg.get('user', 'bt'), password=dbcfg.get('password', 'a123'),host=dbcfg.get('server', 'localhost'), port=dbcfg.get('port', '1433'))
trunk_size=1000
trunk_count=0
col_names=('stationid,stationname','updatetime')
while 1:#数据量大时分组进行
#sqlstr='select StationId,StationName,UpdateTime from TBL_Station limit {} offset {}'.format(trunk_size,trunk_count*trunk_size)#postgresql语法
sqlstr='select StationId,StationName,UpdateTime from TBL_Station where StationName NOT LIKE \'%_Q%\' ORDER BY StationId offset {} rows fetch next {} rows only'.format(trunk_count*trunk_size,trunk_size)#mssql 必须有order by
#print(sqlstr)
try:
df = pd.read_sql_query(sqlstr, con=connect)#db_engine.connect()
if not df.empty:
#df.rename(str.lower,axis='columns',inplace=True)
df.rename(columns={'StationId':'stationid','StationName':'stationname','UpdateTime':'updatetime'},inplace=True)
print(df)
if trunk_count==0:
write_to_table(df,'tbl_stations',if_exists='replace')#,cols_name=col_names
else:
write_to_table(df,'tbl_stations',if_exists='append')
trunk_count+=1
else:
print('tbl_stations导入完成')
break
except Exception as e:
print('tbl_stations错误',e)
break
def copy_cards():#复制card表
dbcfg=db_config.MSDB
connect = pymssql.connect(database=dbcfg.get('database', 'simple_db'), user=dbcfg.get('user', 'bt'), password=dbcfg.get('password', 'a123'),host=dbcfg.get('server', 'localhost'), port=dbcfg.get('port', '1433'))
try:
sqlstr='select CardId,CardCode,CardName,UserId from TBL_Card where CardGroup=8 and CardStatus=1'
df = pd.read_sql_query(sqlstr, con=connect)
df.rename(str.lower,axis='columns',inplace=True)
#print(df)
write_to_table(df,'tbl_cards',if_exists='replace')
print('导入表OK')
except Exception as e:
print('导入表错误',e)
#复制所有表,暂未用
def copyalltables():
#=========(一)、获取需要迁移数据库中模式下的所有表名
# 连接数据库
conn = psycopg2.connect(database='5TC',
user='postgres',
password='123456',
host='127.0.0.1',
port=5432)
# 获取模式下的所有表的名字
tables = pd.read_sql_query("select * from information_schema.tables table_schema = 'public' and table_catalog='simple_db'",con=conn)
tables.head()
#当前模式下的所有表
table_list = tables['table_name']
#====================(二)、DataFrame中的数据写入postgresql
#到此为止,基本工作完成,最后就是调用函数,执行迁移
for city_table in table_list:
# 需要迁移的城市列表
df = pd.read_sql_query('select * from "2020_03_02"."%s"' % city_table, con=conn)
try:
write_to_table(df,city_table)
except Exception as e:
print('城市:',city_table,'错误',e)
print(city_table,'导入完成')
def excel_to_tbl():
#filename='D:/vs/tmp/账号.csv'# test2020-218new.csv
filename='D:/vs/tmp/0-3月份投诉明细.xlsx'
ecding=getFileChar.getFileChar(filename)
#df=pd.read_csv(filename,encoding=ecding)#skiprows=需要忽略的行数,nrows=要读取的行数
df=pd.read_excel(filename)
print(datetime.datetime.now())
write_to_table(df,'3月份投诉明细','append')#,('流水号','接触流水号')
print(datetime.datetime.now())
if __name__=='__main__':
copy_stationstuctures()
copy_cards()