xg

150 阅读1分钟
def preprocessData(dataFilePath, mode):
conversations = []
labels = []
with io.open(dataFilePath, encoding="utf8") as finput:
finput.readline()
for line in finput:
line = line.strip().split('\t')
for i in range(1, 4):
line[i] = tokenize(line[i])
if mode == "train":
labels.append(emotion2label[line[4]])
conv = line[1:4]
conversations.append(conv)
if mode == "train":
return np.array(conversations), np.array(labels)
else:
return np.array(conversations)
texts_train, labels_train = preprocessData('./starterkitdata/train.txt', mode="train")
texts_dev, labels_dev = preprocessData('./starterkitdata/dev.txt', mode="train")
texts_test, labels_test = preprocessData('./starterkitdata/test.txt', mode="train")