1、不区分大小写分词
"""
分词器的配置:
PUT http://127.0.0.1:9200/upper1_index
{
"settings": {
"analysis": {
"analyzer": {
"icu_analyzer": {
"tokenizer": "standard",
"filter": [ "uppercase" ]
}
}
}
}
}
"filter": [ "uppercase" ] -- 大写
"filter": [ "lowercase" ] -- 小写
"filter": [] -- 敏感
"""
data = {
"text": text,
"analyzer": "icu_analyzer"
}
headers = {
"Content-Type": "application/json"
}
url = "http://127.0.0.1:9200/upper2_index/_analyze"
try:
res = requests.post(url=url, data=json.dumps(data), headers=headers)
if res.status_code == 200:
print('------elsticsearch分词处理中----------')
jsonText = json.loads(res.text)
worldList = [word['token'] for word in jsonText['tokens']]
statusCode = res.status_code
except:
print('------elsticsearch分词处理失败,启用备用分词程序----------')