ElasticSearch 7.5.1+ 指定分词器查询一次完整搜索debug过程 (2020-10)

848 阅读3分钟

// 调试时学习源码一个途径

org.elasticsearch.client.node.NodeClient#executeLocally(org.elasticsearch.action.ActionType, Request, org.elasticsearch.action.ActionListener)

org.elasticsearch.client.node.NodeClient#transportAction

--- value transportAction = org.elasticsearch.action.search.TransportSearchAction

org.elasticsearch.action.support.TransportAction#execute(Request, org.elasticsearch.action.ActionListener)

org.elasticsearch.action.support.TransportAction#execute(org.elasticsearch.tasks.Task, Request, org.elasticsearch.action.ActionListener)

org.elasticsearch.action.support.TransportAction.RequestFilterChain#RequestFilterChain // 构建

org.elasticsearch.action.support.TransportAction.RequestFilterChain#proceed // 处理请求链

org.elasticsearch.action.search.TransportSearchAction#doExecute // 最终处理do

// searchRequest 参数

SearchRequest{searchType=QUERY_THEN_FETCH, indices=[habit_search_release], indicesOptions=IndicesOptions[ignore_unavailable=false, allow_no_indices=true, expand_wildcards_open=true, expand_wildcards_closed=false, allow_aliases_to_multiple_indices=true, forbid_closed_indices=true, ignore_aliases=false, ignore_throttled=true], types=[], routing='null', preference='null', requestCache=null, scroll=null, maxConcurrentShardRequests=0, batchedReduceSize=512, preFilterShardSize=128, allowPartialSearchResults=null, localClusterAlias=null, getOrCreateAbsoluteStartMillis=-1, ccsMinimizeRoundtrips=true, source={"query":{"bool":{"must":[{"match":{"title":{"query":"张柏芝,","operator":"OR","prefix_length":0,"max_expansions":50,"fuzzy_transpositions":true,"lenient":false,"zero_terms_query":"NONE","auto_generate_synonyms_phrase_query":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}}}

// local执行

org.elasticsearch.action.search.TransportSearchAction#executeLocalSearch

org.elasticsearch.action.search.TransportSearchAction#executeSearch(org.elasticsearch.action.search.SearchTask, org.elasticsearch.action.search.TransportSearchAction.SearchTimeProvider, org.elasticsearch.action.search.SearchRequest, org.elasticsearch.action.OriginalIndices, java.util.List, java.util.function.BiFunction, org.elasticsearch.cluster.ClusterState, java.util.Map, org.elasticsearch.action.ActionListener, org.elasticsearch.action.search.SearchResponse.Clusters)

org.elasticsearch.action.search.TransportSearchAction#executeSearch(org.elasticsearch.action.search.SearchTask, org.elasticsearch.action.search.TransportSearchAction.SearchTimeProvider, org.elasticsearch.action.search.SearchRequest, org.elasticsearch.action.OriginalIndices, java.lang.String[], java.util.Map>, java.util.Map, java.util.Map, java.util.List, java.util.function.BiFunction, org.elasticsearch.cluster.ClusterState, org.elasticsearch.action.ActionListener, org.elasticsearch.action.search.SearchResponse.Clusters)

org.elasticsearch.cluster.node.DiscoveryNodes--org.elasticsearch.cluster.ClusterState#nodes

nodes=

[ox-whc-dQ6a5NY424Q-8JQ=>{node-1}{ox-whc-dQ6a5NY424Q-8JQ}{9efezmJPS0WHrqIwFS_j9Q}{172.16.218.164}{172.16.218.164:9300}{dilm}{ml.machine_memory=16362606592, xpack.installed=true, ml.max_open_jobs=20}]

data=

[ox-whc-dQ6a5NY424Q-8JQ=>{node-1}{ox-whc-dQ6a5NY424Q-8JQ}{9efezmJPS0WHrqIwFS_j9Q}{172.16.218.164}{172.16.218.164:9300}{dilm}{ml.machine_memory=16362606592, xpack.installed=true, ml.max_open_jobs=20}]

// 提交到线程池执行

org.elasticsearch.action.search.TransportSearchAction#searchAsyncAction

org.elasticsearch.action.search.SearchQueryThenFetchAsyncAction#SearchQueryThenFetchAsyncAction 构造

// start

org.elasticsearch.action.search.AbstractSearchAsyncAction#start

// run

org.elasticsearch.action.search.AbstractSearchAsyncAction#run

遍历每一个分片

// 单个分片执行

org.elasticsearch.action.search.AbstractSearchAsyncAction#performPhaseOnShard

// 单个分片上do执行逻辑

org.elasticsearch.action.search.SearchQueryThenFetchAsyncAction#executePhaseOnShard

// 构建发往单个分片的请求--对象

org.elasticsearch.action.search.AbstractSearchAsyncAction#buildShardSearchRequest

org.elasticsearch.search.internal.ShardSearchRequest#ShardSearchRequest(org.elasticsearch.action.OriginalIndices, org.elasticsearch.action.search.SearchRequest, org.elasticsearch.index.shard.ShardId, int, org.elasticsearch.search.internal.AliasFilter, float, long, java.lang.String, java.lang.String[])

// 发射请求

org.elasticsearch.action.search.SearchTransportService#sendExecuteQuery(org.elasticsearch.transport.Transport.Connection, org.elasticsearch.search.internal.ShardSearchRequest, org.elasticsearch.action.search.SearchTask, org.elasticsearch.action.search.SearchActionListener)

---org.elasticsearch.transport.TransportService#sendChildRequest(org.elasticsearch.transport.Transport.Connection, java.lang.String, org.elasticsearch.transport.TransportRequest, org.elasticsearch.tasks.Task, org.elasticsearch.transport.TransportRequestOptions, org.elasticsearch.transport.TransportResponseHandler)

------org.elasticsearch.transport.TransportService#sendRequest(org.elasticsearch.transport.Transport.Connection, java.lang.String, org.elasticsearch.transport.TransportRequest, org.elasticsearch.transport.TransportRequestOptions, org.elasticsearch.transport.TransportResponseHandler)

// 继续

-----------org.elasticsearch.transport.TransportInterceptor.AsyncSender#sendRequest

// 发送请求

org.elasticsearch.transport.TransportService#sendRequestInternal

org.elasticsearch.transport.Transport.Connection#sendRequest 给数据节点 9300发布请求

// 查询时分词

GET /habit_search_release/_search { "query": { "bool": { "must": [ { "match": { "title": { "query": "张柏芝,张柏芝,张柏芝,张柏芝,", "analyzer": "my_hanlp_analyzer" } } } ] } } }

// lucene

org.elasticsearch.action.ActionListener#onResponse(Response) 监听节点

org.elasticsearch.search.SearchService#executeQueryPhase(org.elasticsearch.search.internal.ShardSearchRequest, org.elasticsearch.action.search.SearchTask) // 查询任务

// 解析查询请求source

org.elasticsearch.search.SearchService#parseSource

// 解析查询

org.elasticsearch.index.query.QueryShardContext#toQuery(org.elasticsearch.index.query.QueryBuilder, org.elasticsearch.common.CheckedFunction)

// bool解析

// org.elasticsearch.index.search.MatchQuery#parse

org.elasticsearch.index.search.MatchQuery#setAnalyzer(java.lang.String) 解析器设定

立flag 后面-----org.elasticsearch.index.search.MatchQuery#getAnalyzer 获取解析器 直接使用

....

最终配置完MatchQuery

// MatchQuery执行解析,用了上面立的flag 解析器(因为查询语句中指定解析器)

// org.elasticsearch.index.search.MatchQuery.MatchQueryBuilder 构建完成MatchQueryBuilde

短语斜普 phraseSlop

// org.elasticsearch.index.search.MatchQuery.MatchQueryBuilder#createQuery 构建查询

org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.lang.String) 开始执行分词了!!!

// org.elasticsearch.index.search.MatchQuery.MatchQueryBuilder#createFieldQuery(org.apache.lucene.analysis.TokenStream, org.elasticsearch.index.search.MatchQuery.Type, org.apache.lucene.search.BooleanClause.Occur, java.lang.String, int) 查询文本分词

即 analyzing query text

// org.apache.lucene.analysis.CachingTokenFilter#fillCache 分词fill

// org.elasticsearch.index.search.MatchQuery.MatchQueryBuilder#analyzeMultiBoolean 复杂case: complex case: multiple positions

// boolean -- should 查询

// 分词执行完回到 org.elasticsearch.search.SearchService#parseSource 继续解析 排序 聚合 版本 高亮等各种查询属性

// org.elasticsearch.search.SearchService#createAndPutContext 搜索上下文

// 根据创建的上下文 回到查询处

org.elasticsearch.search.SearchService#executeQueryPhase(org.elasticsearch.search.internal.ShardSearchRequest, org.elasticsearch.action.search.SearchTask)

// org.elasticsearch.search.SearchService#loadOrExecuteQueryPhase 根据分片数 如果是单机拦截到的请求个数和分片数一致

// 执行查询

org.elasticsearch.search.query.QueryPhase#execute(org.elasticsearch.search.internal.SearchContext, org.apache.lucene.search.IndexSearcher, java.util.function.Consumer)

// internal 查询

org.elasticsearch.search.internal.ContextIndexSearcher#searchInternal