Term

240 阅读1分钟

Term

  • Term的重要性
    • Term是表达语义的最小单位。搜索和利用统计语言模型进行自然语言处理都需要处理Term
  • 特点
    • Term Level Query: Term Query/ Range Query/ Exists Query/ Prefix Query/Wildcard Query
    • 在ES中,Term查询,对输入不做分词。会将输入作为一个整体,在倒排索引中查找准确的词项,并且使用相关度算法分公式为每个包含该词项的文档进行相关度算分
    • 可以通过Constant Score将查询转换成一个Filtering,避免算分,并利用缓存,提高性能

示例:

GET products/_mapping

{
  "products" : {
    "mappings" : {
      "properties" : {
        "desc" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "productId" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        }
      }
    }
  }
}
POST /products/_bulk
{"index":{"_id":1}}
{"productId":"XHDK-A-1293-#fJ3", "desc":"iPhone"}
{"index":{"_id":2}}
{"productId":"KDKE-A-9947-#kL5", "desc":"iPad"}
{"index":{"_id":3}}
{"productId":"JODL-A-1937-#pV7", "desc":"MBP"}
GET products

POST products/_search
{
  "query": {
    "term": {
      "desc": {
       // "value": "iphone"
       "value": "iPhone"  //搜不到
      }
    }
  }
}
POST products/_search
{
  "query": {
    "term": {
      "productId": {
        "value": "xhdk"
       //"value": "XHDK-A-1293-#fJ3"  //搜不到
        
      }
    }
  }
}

POST _analyze
{
  "analyzer": "standard",
  "text": ["XHDK-A-1293-#fJ3"]
}

怎么做精确匹配,多字段Mapping和Term查询

POST products/_search
{
  "explain": true,//算分过程
  "query": {
    "term": {
      "productId.keyword": {
        //"value": "xhdk"  //搜不到
        "value": "XHDK-A-1293-#fJ3" 
       // "value": "xhdk-a-1293-#fj3" //搜不到
      }
    }
  }
}

"hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 0.9808291, //有算分,消耗性能
    "hits" : [
      {
        "_index" : "products",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 0.9808291,
        "_source" : {
          "productId" : "XHDK-A-1293-#fJ3",
          "desc" : "iPhone"
        }
        "_explanation" : xxxxxxxxxxxx
      }
    ]
  }

term查询有算分,可通过constant_score跳过算分过程

复合查询,Constant Score转为Filter

  • 将Query转成Filter,忽略TF-IDF计算,避免相关性算分开销
  • Filter可以有效利用缓存
POST products/_search
{
  "explain": true,//算分过程
  "query": {
    "constant_score": {
      "filter": {
        "term": {
          "productId.keyword": {
            "value": "XHDK-A-1293-#fJ3" 
          }
        }
      }
    }
  }
}


"hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_shard" : "[products][0]",
        "_node" : "woTllO_3QdKQHYJBEWGbjw",
        "_index" : "products",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 1.0,
        "_source" : {
          "productId" : "XHDK-A-1293-#fJ3",
          "desc" : "iPhone"
        },
        "_explanation" : {
          "value" : 1.0,
          "description" : "ConstantScore(productId.keyword:XHDK-A-1293-#fJ3)",
          "details" : [ ]
        }
      }
    ]
  }