填补csv里的空值

297 阅读1分钟

ntable.csv如下:

w_id,d_id,c_id
1,2,3
2,,3
5,6,7
8,8,
8,0,
0,,4
0,0,

将ntable.csv里的空值都填补成0,存储到numbers.csv里。numbers.csv如下:

w_id,d_id,c_id
1,2,3
2,0,3
5,6,7
8,8,0
8,0,0
0,0,4
0,0,0

import csv

f = open('ntable.csv')
ls = []
with f:
    reader = csv.reader(f)
    for r in reader:
        rls = []
        for e in r:
            if e == '':
                rls.append('0')
            else:
                rls.append(e)
        ls.append(rls)

with open('numbers.csv','w',newline="") as f:
    writer = csv.writer(f)
    writer.writerows(ls)
import csv
import random
import psycopg2
import pandas as pd

op = []
column = ['c_id', 'c_discount', 'c_credit', 'c_last', 'c_first', 'c_credit_lim', 'c_balance', 'c_ytd_payment', 'c_payment_cnt', 'c_delivery_cnt',\
     'c_street_1', 'c_street_2', 'c_city', 'c_state', 'c_zip', 'c_phone', 'c_since', 'c_middle', 'c_data', 'd_id', \
    'd_ytd', 'd_tax', 'd_next_o_id', 'd_name', 'd_street_1', 'd_street_2', 'd_city', 'd_state', 'd_zip', 'w_id', \
    'w_ytd', 'w_tax', 'w_name', 'w_street_1', 'w_street_2', 'w_city', 'w_state', 'w_zip', 'h_d_id', 'h_date', \
    'h_amount', 'h_data', 'o_id', 'o_carrier_id', 'o_ol_cnt', 'o_all_local', 'o_entry_d', 'ol_number', 'ol_delivery_d', 'ol_amount',\
     'ol_quantity', 'ol_dist_info', 's_quantity', 's_ytd', 's_order_cnt', 's_remote_cnt', 's_data', 's_dist_01', 's_dist_02', 's_dist_03',\
     's_dist_04', 's_dist_05', 's_dist_06', 's_dist_07', 's_dist_08', 's_dist_09', 's_dist_10', 'i_id', 'i_name', 'i_price',\
     'i_data', 'i_im_id']
user = "postgres"
database = "tpccc"
port = "5432"
password = "102041"
connect = psycopg2.connect(database=database,
    user=user,password=password,port=port)
cur = connect.cursor()

for i in range(72):
    try:
        res = pd.read_sql("select distinct {} from all_tpcc_no_repeat where {} is not null limit 1000;".format(column[i],column[i]),con=connect)
        op.append([w[0] for w in res.values])
        print("{}已完成".format(column[i]))
    except Exception:
        print('err')




f = open('D:\\tpcc_dataset\\all_tpcc_no_repeat_col.csv')
ls = []

with f:
    reader = csv.reader(f)
    for r in reader:
        rls = []
        for i,e in enumerate(r):
            if e == '':
                rls.append(random.choice(op[i]))
            else:
                rls.append(e)
        ls.append(rls)

with open('D:\\tpcc_dataset\\null_not_eq.csv','w',newline="") as f:
    writer = csv.writer(f)
    writer.writerows(ls)