postgresql表复制及快速excel导入

993 阅读3分钟

数据库间表复制、从excel快速导入数据库

具体请见代码:

db_config.py:

### 数据库链接参数 ###
DB = {
    'host': '192.168.0.20',     # 数据库链接地址'127.0.0.1'
    'port': 5432,             # 数据库端口
    'database': 'simple_db',     # 数据库名称
    'user': 'pg',      # 数据库账号
    'password': 'asdf1234'         # 数据库登录密码
}

MSDB = {
    'server': '192.168.0.21',     # 数据库链接地址'127.0.0.1'host
    'port': 1433,             # 数据库端口
    'database': 'db_test',     # 数据库名称
    'user': 'dl',      # 数据库账号
    'password': 'test@2000'         # 数据库登录密码
}

main.py:

import io
import urllib
import pandas as pd
from sqlalchemy import create_engine
import chardet
import datetime

import pymssql
import db_config

import psycopg2

def getFileChar(filename):
    f=open(filename,'rb')
    data=f.read(200)
    f.close()
    #print(chardet.detect(data))
    #print(chardet.detect(data).get('encoding'))
    return chardet.detect(data).get('encoding')
def str_tablenm(table_name,is_return_null=False):
    '''
    sql字符串拼接时 表名 专用函数,在字符串两边添加 " 双引号 ,否则 英文中文混合表名出错
    '''
    if not table_name is None and table_name!='':
        return '"'+str(table_name)+'"'
    elif not is_return_null:
        return '""'
    else:
        return 'null'
def write_to_table(df, table_name, if_exists='fail',cols_name=None):#append/fail/replace .cols_name 元组
    dbcfg=db_config.DB
    usr=dbcfg.get('user', 'bt')
    pwd=dbcfg.get('password', 'a123')#'qais@123'
    host=dbcfg.get('host', 'localhost')
    port=dbcfg.get('port', '1433')
    database=dbcfg.get('database', 'simple_db')
    
    #conn_str="postgresql://qais:%s@localhost/db_qais"%urllib.parse.quote_plus(pwd)#?charset=utf8
    conn_str="postgresql://{}:{}@{}:{}/{}".format(usr,urllib.parse.quote_plus(pwd),host,port,database)
    db_engine = create_engine(conn_str,encoding='utf-8',echo=False)#
    
    string_data_io = io.StringIO()
    df.to_csv(string_data_io, sep='|', index=False,header=False)
    pd_sql_engine = pd.io.sql.pandasSQL_builder(db_engine)
    table = pd.io.sql.SQLTable(table_name, pd_sql_engine, frame=df,
                               index=False, if_exists=if_exists,schema = 'public')#goods_code
    table.create()
    string_data_io.seek(0)
    #string_data_io.readline()  # remove header
    with db_engine.connect() as connection:
        with connection.connection.cursor() as cursor:
            #copy_cmd = "COPY public.%s FROM STDIN HEADER DELIMITER '|' CSV" %(str_tablenm(table_name))#goods_code
            #copy_cmd = "COPY public.%s (%s) FROM STDIN HEADER DELIMITER '|' CSV" %(table_name,','.join(k for k in cols_name))#goods_code
            #cursor.copy_expert(copy_cmd, string_data_io)
            cursor.copy_from(string_data_io,str_tablenm(table_name),null='',sep='|',columns=cols_name)#此行更易懂
        connection.connection.commit()

# 复制表_大数据分组复制,本次未使用
def copystations():
    dbcfg=db_config.MSDB
    
    '''
    #sqlalchemy方式:
    usr=dbcfg.get('user', 'bt')
    pwd=dbcfg.get('password', 'a123')#'qais@123'
    host=dbcfg.get('server', 'localhost')
    port=dbcfg.get('port', '1433')
    database=dbcfg.get('database', 'simple_db')
    
    conn_str="mssql+pymssql://{}:{}@{}:{}/{}".format(usr,urllib.parse.quote_plus(pwd),host,port,database)
    print(conn_str)
    db_engine = create_engine(conn_str,encoding='utf-8',echo=False)#
    '''
    connect = pymssql.connect(database=dbcfg.get('database', 'simple_db'), user=dbcfg.get('user', 'bt'), password=dbcfg.get('password', 'a123'),host=dbcfg.get('server', 'localhost'), port=dbcfg.get('port', '1433'))
    trunk_size=1000
    trunk_count=0
    col_names=('stationid,stationname','updatetime')
    while 1:#数据量大时分组进行
        #sqlstr='select StationId,StationName,UpdateTime from TBL_Station limit {} offset {}'.format(trunk_size,trunk_count*trunk_size)#postgresql语法
        sqlstr='select StationId,StationName,UpdateTime from TBL_Station where StationName NOT LIKE \'%_Q%\' ORDER BY StationId offset {} rows fetch next {} rows only'.format(trunk_count*trunk_size,trunk_size)#mssql 必须有order by
        #print(sqlstr)
        try:
            df = pd.read_sql_query(sqlstr, con=connect)#db_engine.connect()
            if not df.empty:
                #df.rename(str.lower,axis='columns',inplace=True)
                df.rename(columns={'StationId':'stationid','StationName':'stationname','UpdateTime':'updatetime'},inplace=True)
                print(df)
                if trunk_count==0:
                    write_to_table(df,'tbl_stations',if_exists='replace')#,cols_name=col_names
                else:
                    write_to_table(df,'tbl_stations',if_exists='append')
                trunk_count+=1
                
            else:
                print('tbl_stations导入完成')
                break
        except Exception as e:
            print('tbl_stations错误',e)
            break
def copy_cards():#复制card表
    dbcfg=db_config.MSDB
    connect = pymssql.connect(database=dbcfg.get('database', 'simple_db'), user=dbcfg.get('user', 'bt'), password=dbcfg.get('password', 'a123'),host=dbcfg.get('server', 'localhost'), port=dbcfg.get('port', '1433'))
    try:
        
        sqlstr='select CardId,CardCode,CardName,UserId from TBL_Card where CardGroup=8 and CardStatus=1'
        df = pd.read_sql_query(sqlstr, con=connect)
        df.rename(str.lower,axis='columns',inplace=True)
        #print(df)
        write_to_table(df,'tbl_cards',if_exists='replace')
        print('导入表OK')
    except Exception as e:
        print('导入表错误',e)    
#复制所有表,暂未用
def copyalltables():
    #=========(一)、获取需要迁移数据库中模式下的所有表名
# 连接数据库
    conn = psycopg2.connect(database='5TC',
                        user='postgres',
                        password='123456',
                        host='127.0.0.1',
                        port=5432)
    # 获取模式下的所有表的名字
    tables = pd.read_sql_query("select * from information_schema.tables table_schema = 'public' and table_catalog='simple_db'",con=conn)
    tables.head()
    #当前模式下的所有表
    table_list = tables['table_name']
    #====================(二)、DataFrame中的数据写入postgresql
    #到此为止,基本工作完成,最后就是调用函数,执行迁移
    for city_table in table_list:
        # 需要迁移的城市列表
        df = pd.read_sql_query('select * from "2020_03_02"."%s"' % city_table, con=conn)
    
        try:
            write_to_table(df,city_table)
        except Exception as e:
            print('城市:',city_table,'错误',e)
        print(city_table,'导入完成')
def excel_to_tbl():
    #filename='D:/vs/tmp/账号.csv'# test2020-218new.csv
    filename='D:/vs/tmp/0-3月份投诉明细.xlsx'
    ecding=getFileChar.getFileChar(filename)

    #df=pd.read_csv(filename,encoding=ecding)#skiprows=需要忽略的行数,nrows=要读取的行数
    df=pd.read_excel(filename)
    print(datetime.datetime.now())

    write_to_table(df,'3月份投诉明细','append')#,('流水号','接触流水号')

    print(datetime.datetime.now())
if __name__=='__main__':
    copy_stationstuctures()
    copy_cards()