| def preprocessData(dataFilePath, mode): |
| conversations = [] |
| labels = [] |
| with io.open(dataFilePath, encoding="utf8") as finput: |
| finput.readline() |
| for line in finput: |
| line = line.strip().split('\t') |
| for i in range(1, 4): |
| line[i] = tokenize(line[i]) |
| if mode == "train": |
| labels.append(emotion2label[line[4]]) |
| conv = line[1:4] |
| conversations.append(conv) |
| if mode == "train": |
| return np.array(conversations), np.array(labels) |
| else: |
| return np.array(conversations) |
|
|
| texts_train, labels_train = preprocessData('./starterkitdata/train.txt', mode="train") |
| texts_dev, labels_dev = preprocessData('./starterkitdata/dev.txt', mode="train") |
| texts_test, labels_test = preprocessData('./starterkitdata/test.txt', mode="train") |