如何解决ES深度分页的问题

232 阅读6分钟

from+ size分页,带来深度分页的问题

POST master_st_rms_td_wm_tsk_new_tbl/_doc/_search
{
  "from": 1000,
  "size": 10,
  "query": {
    "match_all": {}
  },
  "sort": [
    {
      "Dat_Dt": "asc"
    }
  ]
}

from+size每次都会从master_st_rms_td_wm_tsk_new_tbl这个索引的所以分片查询(from+1)*size 条数据,然后将数据进行合并,取出size条数据,返回给客户端,随着from变大,查询的数据越来越大,导致查询缓慢,非常消耗资源,应避免使用。

scroll滚动搜索

POST master_st_rms_td_wm_tsk_new_tbl/_doc/_search?scroll=5m
{
  
  "size": 2,
  "query": {
    "match_all": {}
  },
  "sort": [
    {
      "Dat_Dt": "asc"
    }
  ]
}

第一次搜索的时候在url里面带上scroll=5m 这个5m是有效时间,返回结果里面会带上一个_scroll_id,如下图所示

{
    "_scroll_id": "ZWNtLWgtc2stbTM=!DnF1ZXJ5VGhlbkZldGNoBwAAAAAIXq7LFlgwOUFwUF9wUUlTVXV0UGRNNWFpekEAAAAAEK_PlhZPbmVwT3dSdFE5MnhSQ1pXaEVrNERBAAAAAAlvykQWRW5RS1UwdERUZlNGdFQ3ckNQSG5EUQAAAAAJHcCTFkpBbk1uREFtU1FLelg2V0RHb2RZSGcAAAAADxMM6xZSUWd1WVJkNlFhU0RPckNzYnRsZkl3AAAAAAVKmUMWZF95eXAzS3JRSmFkZ2hxWUd2QXVoQQAAAAAQkqe0FlJEc2oxWFZxU0VXVHlXdllEQ3hkd2c=",
    "took": 7,
    "timed_out": false,
    "_shards": {
        "total": 7,
        "successful": 7,
        "failed": 0
    },
    "hits": {
        "total": 13588,
        "max_score": 0,
        "hits": [
            {
                "_index": "master_st_rms_td_wm_tsk_new_tbl_2022-04",
                "_type": "_doc",
                "_id": "291396214ad39022PNCI3",
                "_score": 0,
                "_source": {
                    "Pln_Cont_Dt": null,
                    "BBK_Org_Id": "898",
                    "Exe_Psn_Id": "80074266",
                    "Tsk_Deal_Rsl_Cd": "06",
                    "Tsk_Cmt": null,
                    "Bch_Id": null,
                    "Cust_UID": "PNCI****88896",
                    "Tsk_Typ_Cd": "BUSOPP",
                    "Clu_ID": null,
                    "Cust_Nm": "崔**",
                    "Rtl_Pst_Id": "RB0101",
                    "BRN_Org_Id": "755550",
                    "Mnt_Usr_Id": "BUSOPPLOAD_SYS",
                    "Tsk_Sts_Cd": "N",
                    "Tsk_Srl_Id": "291396214ad39022PNCI3",
                    "Pln_Orig_Deal_Tm": null,
                    "Eve_Sts_Chg_Tm": null,
                    "Tsk_Cntnt": "非货基金实时赎回",
                    "Mnt_Tm": "2022-03-30 10:22:55",
                    "Tsk_Prio_Cd": "BUSOPP",
                    "Acmp_Intvw_Ind": null,
                    "Dat_Dt": "2022-03-30",
                    "Eve_Sts_Cd": null,
                    "Tsk_Crt_Tm": "2022-03-30 10:22:55",
                    "Exe_Mth_Cd": "08",
                    "Afl_Line_Of_Bus_Cd": null,
                    "Cust_Id": null,
                    "Tsk_Puh_Dt": "2022-03-30",
                    "Pln_Cont_Mth_Cd": null,
                    "Tsk_Deal_Stop_Dt": "2022-04-01",
                    "Eve_Sts_Desc": null,
                    "Tsk_Sub_Cls_Cd": "291396214ad39022",
                    "Crt_Psn_Id": "BUSOPPLOAD_SYS",
                    "Cust_Intent": null
                },
                "sort": [
                    1648598400000
                ]
            },
            {
                "_index": "master_st_rms_td_wm_tsk_new_tbl_2022-04",
                "_type": "_doc",
                "_id": "291396214ad39022PNCI1",
                "_score": 0,
                "_source": {
                    "Pln_Cont_Dt": null,
                    "BBK_Org_Id": "898",
                    "Exe_Psn_Id": "80074266",
                    "Tsk_Deal_Rsl_Cd": "06",
                    "Tsk_Cmt": null,
                    "Bch_Id": null,
                    "Cust_UID": "PNCI****88896",
                    "Tsk_Typ_Cd": "BUSOPP",
                    "Clu_ID": null,
                    "Cust_Nm": "崔**",
                    "Rtl_Pst_Id": "RB0101",
                    "BRN_Org_Id": "755550",
                    "Mnt_Usr_Id": "BUSOPPLOAD_SYS",
                    "Tsk_Sts_Cd": "N",
                    "Tsk_Srl_Id": "291396214ad39022PNCI1",
                    "Pln_Orig_Deal_Tm": null,
                    "Eve_Sts_Chg_Tm": null,
                    "Tsk_Cntnt": "非货基金实时赎回",
                    "Mnt_Tm": "2022-03-30 10:22:55",
                    "Tsk_Prio_Cd": "BUSOPP",
                    "Acmp_Intvw_Ind": null,
                    "Dat_Dt": "2022-03-30",
                    "Eve_Sts_Cd": null,
                    "Tsk_Crt_Tm": "2022-03-30 10:22:55",
                    "Exe_Mth_Cd": "08",
                    "Afl_Line_Of_Bus_Cd": null,
                    "Cust_Id": null,
                    "Tsk_Puh_Dt": "2022-03-30",
                    "Pln_Cont_Mth_Cd": null,
                    "Tsk_Deal_Stop_Dt": "2022-04-01",
                    "Tsk_Psn_List": [],
                    "Eve_Sts_Desc": null,
                    "Tsk_Sub_Cls_Cd": "291396214ad39022",
                    "Crt_Psn_Id": "BUSOPPLOAD_SYS",
                    "Cust_Intent": null
                },
                "sort": [
                    1648598400000
                ]
            }
        ]
    }
}

然后再拿着上次返回的scroll_id,进行请求,可以得到下一页的数据。


GET /_search/scroll
{
  
  "scroll":"5m",
  "scroll_id":"ZWNtLWgtc2stbTM=!DnF1ZXJ5VGhlbkZldGNoBwAAAAAIXqSPFlgwOUFwUF9wUUlTVXV0UGRNNWFpekEAAAAAEK-NdhZPbmVwT3dSdFE5MnhSQ1pXaEVrNERBAAAAAAlvvuEWRW5RS1UwdERUZlNGdFQ3ckNQSG5EUQAAAAAJHaTEFkpBbk1uREFtU1FLelg2V0RHb2RZSGcAAAAADxL9dBZSUWd1WVJkNlFhU0RPckNzYnRsZkl3AAAAAAVKgOUWZF95eXAzS3JRSmFkZ2hxWUd2QXVoQQAAAAAQkpg8FlJEc2oxWFZxU0VXVHlXdllEQ3hkd2c="
}

返回结果如下:

{
  "_scroll_id": "ZWNtLWgtc2stbTM=!DnF1ZXJ5VGhlbkZldGNoBwAAAAAQkq00FlJEc2oxWFZxU0VXVHlXdllEQ3hkd2cAAAAADLIKpRZacUlGOFYzSVJxMlJFRDZSQ2dRSzJ3AAAAAAbb30kWRndWRExkVkdSVjZXUTJXX2Q0NlhoUQAAAAAIXrWEFlgwOUFwUF9wUUlTVXV0UGRNNWFpekEAAAAAEJKtNRZSRHNqMVhWcVNFV1R5V3ZZREN4ZHdnAAAAABCwDgMWT25lcE93UnRROTJ4UkNaV2hFazREQQAAAAARh-wMFnhkY3d5a1NSU0plSTNKTl85Q2diRlE=",
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 7,
    "successful": 7,
    "failed": 0
  },
  "hits": {
    "total": 13588,
    "max_score": 0,
    "hits": [
      {
        "_index": "master_st_rms_td_wm_tsk_new_tbl_2022-04",
        "_type": "_doc",
        "_id": "291396214ad39022PNCI4",
        "_score": 0,
        "_source": {
          "Pln_Cont_Dt": null,
          "BBK_Org_Id": "898",
          "Exe_Psn_Id": "80074266",
          "Tsk_Deal_Rsl_Cd": "06",
          "Tsk_Cmt": null,
          "Bch_Id": null,
          "Cust_UID": "PNCI****88896",
          "Tsk_Typ_Cd": "BUSOPP",
          "Clu_ID": null,
          "Cust_Nm": "崔**",
          "Rtl_Pst_Id": "RB0101",
          "BRN_Org_Id": "755550",
          "Mnt_Usr_Id": "BUSOPPLOAD_SYS",
          "Tsk_Sts_Cd": "N",
          "Tsk_Srl_Id": "291396214ad39022PNCI4",
          "Pln_Orig_Deal_Tm": null,
          "Eve_Sts_Chg_Tm": null,
          "Tsk_Cntnt": "非货基金实时赎回",
          "Mnt_Tm": "2022-03-30 10:22:55",
          "Tsk_Prio_Cd": "BUSOPP",
          "Acmp_Intvw_Ind": null,
          "Dat_Dt": "2022-03-30",
          "Eve_Sts_Cd": null,
          "Tsk_Crt_Tm": "2022-03-30 10:22:55",
          "Exe_Mth_Cd": "08",
          "Afl_Line_Of_Bus_Cd": null,
          "Cust_Id": null,
          "Tsk_Ext": """{"TRIGGER_DATE":"2022-03-30 10:22:53","FND_CD":"008457","TRX_AMT":"3.72","PROD_NM":"008457/招商瑞阳股债配置混合型证券投资基金"}""",
          "Tsk_Puh_Dt": "2022-03-30",
          "Pln_Cont_Mth_Cd": null,
          "Tsk_Deal_Stop_Dt": "2022-04-01",
          "Tsk_Psn_List": [],
          "Eve_Sts_Desc": null,
          "Tsk_Sub_Cls_Cd": "291396214ad39022",
          "Crt_Psn_Id": "BUSOPPLOAD_SYS",
          "Cust_Intent": null
        },
        "sort": [
          1648598400000
        ]
      },
      {
        "_index": "master_st_rms_td_wm_tsk_new_tbl_2022-04",
        "_type": "_doc",
        "_id": "291396214ad39022PNCI",
        "_score": 0,
        "_source": {
          "Pln_Cont_Dt": null,
          "BBK_Org_Id": "898",
          "Exe_Psn_Id": "80074266",
          "Tsk_Deal_Rsl_Cd": "06",
          "Tsk_Cmt": null,
          "Bch_Id": null,
          "Cust_UID": "PNCI****88896",
          "Tsk_Typ_Cd": "BUSOPP",
          "Clu_ID": null,
          "Cust_Nm": "崔**",
          "Rtl_Pst_Id": "RB0101",
          "BRN_Org_Id": "755550",
          "Mnt_Usr_Id": "BUSOPPLOAD_SYS",
          "Tsk_Sts_Cd": "N",
          "Tsk_Srl_Id": "291396214ad39022PNCI",
          "Pln_Orig_Deal_Tm": null,
          "Eve_Sts_Chg_Tm": null,
          "Tsk_Cntnt": "非货基金实时赎回",
          "Mnt_Tm": "2022-03-30 10:22:55",
          "Tsk_Prio_Cd": "BUSOPP",
          "Acmp_Intvw_Ind": null,
          "Dat_Dt": "2022-03-30",
          "Eve_Sts_Cd": null,
          "Tsk_Crt_Tm": "2022-03-30 10:22:55",
          "Exe_Mth_Cd": "08",
          "Afl_Line_Of_Bus_Cd": null,
          "Cust_Id": null,
          "Tsk_Ext": """{"TRIGGER_DATE":"2022-03-30 10:22:53","FND_CD":"008457","TRX_AMT":"3.72","PROD_NM":"008457/招商瑞阳股债配置混合型证券投资基金"}""",
          "Tsk_Puh_Dt": "2022-03-30",
          "Pln_Cont_Mth_Cd": null,
          "Tsk_Deal_Stop_Dt": "2022-04-01",
          "Tsk_Psn_List": [],
          "Eve_Sts_Desc": null,
          "Tsk_Sub_Cls_Cd": "291396214ad39022",
          "Crt_Psn_Id": "BUSOPPLOAD_SYS",
          "Cust_Intent": null
        },
        "sort": [
          1648598400000
        ]
      }
    ]
  }
}

search_after 假分页性能高

初次搜索

POST master_st_rms_td_wm_tsk_new_tbl/_doc/_search
{
  
  "size": 2,
  "query": {
    "match_all": {}
  },
  "sort": [
    {
      "Dat_Dt": "asc"
    }
  ]
}

返回结果中会有sort

{
  "took": 4,
  "timed_out": false,
  "_shards": {
    "total": 7,
    "successful": 7,
    "failed": 0
  },
  "hits": {
    "total": 13588,
    "max_score": 0,
    "hits": [
      {
        "_index": "master_st_rms_td_wm_tsk_new_tbl_2022-04",
        "_type": "_doc",
        "_id": "291396214ad39022PNCI3",
        "_score": 0,
        "_source": {
          "Pln_Cont_Dt": null,
          "BBK_Org_Id": "898",
          "Exe_Psn_Id": "80074266",
          "Tsk_Deal_Rsl_Cd": "06",
          "Tsk_Cmt": null,
          "Bch_Id": null,
          "Cust_UID": "PNCI****88896",
          "Tsk_Typ_Cd": "BUSOPP",
          "Clu_ID": null,
          "Cust_Nm": "崔**",
          "Rtl_Pst_Id": "RB0101",
          "BRN_Org_Id": "755550",
          "Mnt_Usr_Id": "BUSOPPLOAD_SYS",
          "Tsk_Sts_Cd": "N",
          "Tsk_Srl_Id": "291396214ad39022PNCI3",
          "Pln_Orig_Deal_Tm": null,
          "Eve_Sts_Chg_Tm": null,
          "Tsk_Cntnt": "非货基金实时赎回",
          "Mnt_Tm": "2022-03-30 10:22:55",
          "Tsk_Prio_Cd": "BUSOPP",
          "Acmp_Intvw_Ind": null,
          "Dat_Dt": "2022-03-30",
          "Eve_Sts_Cd": null,
          "Tsk_Crt_Tm": "2022-03-30 10:22:55",
          "Exe_Mth_Cd": "08",
          "Afl_Line_Of_Bus_Cd": null,
          "Cust_Id": null,
          "Tsk_Ext": """{"TRIGGER_DATE":"2022-03-30 10:22:53","FND_CD":"008457","TRX_AMT":"3.72","PROD_NM":"008457/招商瑞阳股债配置混合型证券投资基金"}""",
          "Tsk_Puh_Dt": "2022-03-30",
          "Pln_Cont_Mth_Cd": null,
          "Tsk_Deal_Stop_Dt": "2022-04-01",
          "Tsk_Psn_List": [
            {
              "Bch_Id": "",
              "Psn_Fld_Cd": "TRIGGER_DATE",
              "Trc_Cd": "291396214ad39022",
              "Crt_Tm": "2022-03-30 10:22:55",
              "Afl_Line_Of_Bus_Cd": "",
              "Psn_Fld_Id": "",
              "Psn_Fld_Val": "2022-03-30 10:22:53",
              "Tsk_Deal_Stop_Dt": "2022-04-01",
              "BBK_Org_Id": "898",
              "Dat_Dt": "2022-03-30",
              "Tsk_Srl_Id": "291396214ad39022PNCI3"
            },
            {
              "Bch_Id": "",
              "Psn_Fld_Cd": "FND_CD",
              "Trc_Cd": "291396214ad39022",
              "Crt_Tm": "2022-03-30 10:22:55",
              "Afl_Line_Of_Bus_Cd": "",
              "Psn_Fld_Id": "",
              "Psn_Fld_Val": "008457",
              "Tsk_Deal_Stop_Dt": "2022-04-01",
              "BBK_Org_Id": "898",
              "Dat_Dt": "2022-03-30",
              "Tsk_Srl_Id": "291396214ad39022PNCI3"
            },
            {
              "Bch_Id": "",
              "Psn_Fld_Cd": "TRX_AMT",
              "Trc_Cd": "291396214ad39022",
              "Crt_Tm": "2022-03-30 10:22:55",
              "Afl_Line_Of_Bus_Cd": "",
              "Psn_Fld_Id": "",
              "Psn_Fld_Val": "3.72",
              "Tsk_Deal_Stop_Dt": "2022-04-01",
              "BBK_Org_Id": "898",
              "Dat_Dt": "2022-03-30",
              "Tsk_Srl_Id": "291396214ad39022PNCI3"
            },
            {
              "Bch_Id": "",
              "Psn_Fld_Cd": "PROD_NM",
              "Trc_Cd": "291396214ad39022",
              "Crt_Tm": "2022-03-30 10:22:55",
              "Afl_Line_Of_Bus_Cd": "",
              "Psn_Fld_Id": "",
              "Psn_Fld_Val": "008457/招商瑞阳股债配置混合型证券投资基金",
              "Tsk_Deal_Stop_Dt": "2022-04-01",
              "BBK_Org_Id": "898",
              "Dat_Dt": "2022-03-30",
              "Tsk_Srl_Id": "291396214ad39022PNCI3"
            }
          ],
          "Eve_Sts_Desc": null,
          "Tsk_Sub_Cls_Cd": "291396214ad39022",
          "Crt_Psn_Id": "BUSOPPLOAD_SYS",
          "Cust_Intent": null
        },
        "sort": [
          1648598400000
        ]
      }
    ]
  }
}

下次请求时,在入参里面加上search_after,带上次返回的最后一个sort 里面的值

POST master_st_rms_td_wm_tsk_new_tbl/_doc/_search
{
  
  "size": 2,
  "query": {
    "match_all": {}
  },
 "search_after": [1650585600000],
  "sort": [
    {
      "Dat_Dt": "asc"
    }
  ]
}

总结

from+ size分页、scroll滚动搜索、search_after 假分页比较

1.from + size分页性能低;优点是灵活性好,实现简单;缺点是存在深分页问题;适用于数据量比较小,能容忍深度分页存在的场景

2.scroll滚动搜索性能中等;优点解决了深度分页问题;缺点是无法反应数据的实时性(快照版本),需要维护一个 scroll_id,成本高;适用于需要查询海量结果集场景

3.search_after 假分页性能高;优点性能最好,不存在深度分页问题,能够反映数据的实时变更;缺点是实现复杂,需要有一个全局唯一的字段,连续分页的时每一次查询都需要上次查询的结果;适用于需海量数据的分页的场景

参考:

jingyan.baidu.com/article/363… www.jianshu.com/p/78ae25f4c…