1.pandas读取txt---按行输入按行输出
import pandas as pd
'''
id name score
1 张三 100
2 李四 99
3 王五 98
'''
test1 = pd.read_table("test1.txt")
names = test1["name"]
print(names)
'''
张三
李四
王五
'''
'''
4 Allen 100
5 Bob 99
6 Candy 98
'''
test2 = pd.read_table("test2.txt", header=None)
names = test2[1]
print(names)
'''
Allen
Bob
Candy
'''
import pandas as pd
from paddlenlp import Taskflow
import json
path="nlp测试体育类文本.txt"
def get_textLine(path):
string_list = []
file_data = pd.read_table(path,encoding="UTF-8")
for index, elem in file_data.iterrows():
string_list.append(elem[0])
return string_list
data_input=get_textLine(path)
schema = ['时间', '赛手', '赛事名称']
few_ie = Taskflow('information_extraction', schema=schema)
results=few_ie(data_input)
test = pd.DataFrame(data=results)
test.to_csv('excel2txt.txt', sep='\t', index=False,header=False,index=False)
print("数据已导出")
2.with open的方式
import pandas as pd
from paddlenlp import Taskflow
import json
def openreadtxt(file_name):
data = []
file = open(file_name,'r',encoding='UTF-8')
file_data = file.readlines()
for row in file_data:
data.append(row)
return data
data_input=openreadtxt("nlp测试体育类文本.txt")
schema = ['时间', '赛手', '赛事名称']
few_ie = Taskflow('information_extraction', schema=schema)
results=few_ie(data_input)
with open("test.txt", "w+") as f:
for result in results:
line = json.dumps(result, ensure_ascii=False)
f.write(line + "\n")
print("数据已导出")