paddlenlp Taskflow的使用 实体抽取

123 阅读1分钟

from pprint import pprint
from paddlenlp import Taskflow
# 实体抽取
from paddlenlp import Taskflow
schema = ['时间','组织地市','指标','特殊查询方式','维度','客户群','行业','营业厅','终端厂家','手机型号','营销活动']
ie3 = Taskflow('information_extraction', schema=schema)
UIE_CLASS = ['指标', '营销活动', '组织地市', '行业']
UIE_TIME_CLASS = ['时间']
result = ie3('今天天气怎么样')
print(f'result = {result}')
[2025-03-08 22:25:16,231] [    INFO] - Downloading model_state.pdparams from https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base_v1.0/model_state.pdparams
100%|██████████| 450M/450M [00:05<00:00, 89.7MB/s] 
[2025-03-08 22:25:22,539] [    INFO] - Downloading model_config.json from https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base/model_config.json
100%|██████████| 377/377 [00:00<00:00, 464kB/s]
[2025-03-08 22:25:22,658] [    INFO] - Downloading vocab.txt from https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base/vocab.txt
100%|██████████| 182k/182k [00:00<00:00, 52.6MB/s]
[2025-03-08 22:25:22,736] [    INFO] - Downloading special_tokens_map.json from https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base/special_tokens_map.json
100%|██████████| 112/112 [00:00<00:00, 158kB/s]
[2025-03-08 22:25:22,777] [    INFO] - Downloading tokenizer_config.json from https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base/tokenizer_config.json
100%|██████████| 172/172 [00:00<00:00, 219kB/s]
[2025-03-08 22:25:30,934] [    INFO] - Converting to the inference model cost a little time.
[2025-03-08 22:25:38,019] [    INFO] - The inference model save in the path:/home/aistudio/.paddlenlp/taskflow/information_extraction/uie-base/static/inference
[2025-03-08 22:25:38,781] [    INFO] - We are using <class 'paddlenlp.transformers.ernie.tokenizer.ErnieTokenizer'> to load '/home/aistudio/.paddlenlp/taskflow/information_extraction/uie-base'.
 
result = [{
'时间': [{'text': '今天', 'start': 0, 'end': 2, 'probability': 0.924547453668616}],
'指标': [{'text': '天气', 'start': 2, 'end': 4, 'probability': 0.8828416936535461}], 
'维度': [{'text': '天气', 'start': 2, 'end': 4, 'probability': 0.8364995743092578}]}]