doccano接口批量导入数据

220 阅读1分钟

1、下载cline包

git clone https://github.com/doccano/doccano-client.git

2、进入目录,并编写函数

# 进入目录
cd doccano-client/
# 创建文件
vim upload_to_doccano.py
from doccano_api_client import DoccanoClient
import glob
import time
import os

doccano_client = DoccanoClient(
    'http://localhost:8080',
    'admin',
    'password'
)
exist_name = [pro["name"] for pro in doccano_client.get_project_list()]


path = "/home/data/test"  # 查找文件的路径
dirnames = []
for fpath, dirname, fnames in os.walk(path):
    dirnames.extend(dirname)

for fname in dirnames:
    if fname in exist_name:
        continue

    files = glob.glob(path + "/" + fname + "/*.jpg")
    res = doccano_client.create_project(
        name=fname,
        description="test",
        project_type="ImageClassification",
        resourcetype="ImageClassificationProject")
    doccano_client.create_label(res["id"], fname, "#ffffff", "#F44336", None, "a")
    doccano_client.create_label(res["id"], "其他", "#ffffff", "#2196F3", None, "b")
    doccano_client.create_label(res["id"], "未知", "#ffffff", "#4CAF50", None, "c")
    images = []
    for f in files:
        images.append(open(f, 'rb'))
    for limit in range(0, len(images), 500):
        start = limit
        end = len(images) if start + 500 > len(images) else start + 500
        print(len(images), start, end)
        a = doccano_client.post_doc_upload_binary(res["id"], images[start:end], "", "", "", "image/jpeg",
                                                  format='ImageFile')
        print(a)
    print(fname)

print("finished!")

3、执行

python upload_to_doccano.py