es分词器的一些配置及请求方式--python

411 阅读1分钟

1、不区分大小写分词

"""
    分词器的配置:
    PUT http://127.0.0.1:9200/upper1_index
    {
      "settings": {
        "analysis": {
          "analyzer": {
            "icu_analyzer": {
              "tokenizer": "standard",
              "filter":  [ "uppercase" ]
            }
          }
        }
      }
    }
    "filter":  [ "uppercase" ] -- 大写
    "filter":  [ "lowercase" ] -- 小写
    "filter":  [] -- 敏感
    """
    #需要使用elasticsearch进行分词的语种
    # if country in ES_CN:
    data = {
        "text": text,
        "analyzer": "icu_analyzer"
    }
    headers = {
        "Content-Type": "application/json"
    }
    url = "http://127.0.0.1:9200/upper2_index/_analyze"

    #向本地 Elasticsearch库请求分词结果,Elasticsearch需要启动
    try:
        res = requests.post(url=url, data=json.dumps(data), headers=headers)

        if res.status_code == 200:
            print('------elsticsearch分词处理中----------')
            jsonText = json.loads(res.text)
            worldList = [word['token'] for word in jsonText['tokens']]
            statusCode = res.status_code
    except:
        print('------elsticsearch分词处理失败,启用备用分词程序----------')