zz读取pandas文件

168 阅读1分钟
import pandas as pd
import numpy as np

def csvtotxt1():
    data = pd.read_csv('data.csv', header=None, names=['user_id', 'movie_id', 'rate'])
    row = int(data['movie_id'].max())
    line = int(data['user_id'].max())
    print(row)
    print(line)
    # res = np.zeros((line, row), dtype=int)
    # res = pd.DataFrame(np.arange(line*row).reshape((line,row)))
    res = pd.DataFrame(np.zeros((line,row),dtype=int))

    # res2 = pd.DataFrame(np.zeros((3,4))*0,dtype=int)
    # print(res2)
    for i in range(0, len(data)):  # df.shape[0]表示行数也可以
        a = int(data.loc[i]['user_id'])
        b = int(data.loc[i]['movie_id'])
        c = int(data.loc[i]['rate'])
        print(a)
        print(b)
        print(c)
        # res[a][b] =c
        res.iloc[a-1][b-1] =c

    res2 = res.loc[~(res==0).all(axis=1), :]
    res3 = res.loc[~(res==0).all(axis=0), :]
    print(res2)
    print(res3)
    res.to_csv('test4.txt', sep=',',index=False, header=None)

def csvtotxt2():
    data = pd.read_csv('data.csv', header=None, names=['user_id', 'movie_id', 'rate'])
    n_users = data['user_id'].drop_duplicates()
    n_items = data['movie_id'].drop_duplicates()
    res = pd.DataFrame(index=n_users, columns=n_items)
    for i in range(0, len(data)):  # df.shape[0]表示行数也可以
        a = int(data.loc[i]['user_id'])
        b = int(data.loc[i]['movie_id'])
        c = int(data.loc[i]['rate'])
        print(a)
        print(b)
        print(c)
        # res[a][b] =c
        res.loc[a][b] = c

    print(res)
    res2 = res.fillna(0)
    print(res2)
    # res2.to_csv('test4.txt',index=False, header=None)
    res2.to_csv('test4.txt')