Elasticsearch笔记第六篇

223 阅读4分钟

Elasticsearch核心知识篇(8)

这个部分主要是对于Elasticsearch的聚合分析进行讲解说明

第一个分析需求:计算每个tag下的商品数量

GET /ecommerce/product/_search
{
  "aggs": {                           # aggs 要完成聚合操作
    "group_by_tags": {                # 给聚合取一个名字
      "terms": { "field": "tags" }    # terms 按照指定的field进行分组完成操作
    }
  }
}

遇到了一个错误,我们来分析一下

{
  "error": {
    "root_cause": [
      {
        "type": "illegal_argument_exception",
        # 默认情况下,文本字段上禁用Fielddata。在[tags]上设置fielddata=true,以便通过取消反转索引将fielddata加载到内存中。请注意,这可能会占用大量内存。
        "reason": "Fielddata is disabled on text fields by default. Set fielddata=true on [tags] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory."
      }
    ],
    "type": "search_phase_execution_exception",
    "reason": "all shards failed",
    "phase": "query",
    "grouped": true,
    "failed_shards": [
      {
        "shard": 0,
        "index": "ecommerce",
        "node": "ACfxlp60TqykD51C8SgMFQ",
        "reason": {
          "type": "illegal_argument_exception",
          "reason": "Fielddata is disabled on text fields by default. Set fielddata=true on [tags] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory."
        }
      }
    ],
    "caused_by": {
      "type": "illegal_argument_exception",
      "reason": "Fielddata is disabled on text fields by default. Set fielddata=true on [tags] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory."
    }
  },
  "status": 400
}

解决办法:

将文本field的fielddata属性设置为true

PUT /ecommerce/_mapping/product
{
  "properties": {
    "tags": {
      "type": "text",
      "fielddata": true
    }
  }
}

{
  "acknowledged": true
}

重新运行上面的聚合操作,完成聚合查询

GET /ecommerce/product/_search
{
  "aggs": {
    "group_by_tags": {
      "terms": { "field": "tags" }
    }
  }
}

{
  "took": 20,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 5,
    "max_score": 1,
    "hits": [
      {
        "_index": "ecommerce",
        "_type": "product",
        "_id": "5",
        "_score": 1,
        "_source": {
          "name": "waws520 yagao",
          "desc": "caoben zhiwu",
          "price": 520,
          "producer": "waws producer",
          "tags": [
            "qingxin"
          ]
        }
      },
      {
        "_index": "ecommerce",
        "_type": "product",
        "_id": "2",
        "_score": 1,
        "_source": {
          "name": "jiajieshi yagao",
          "desc": "youxiao fangzhu",
          "price": 25,
          "producer": "jiajieshi producer",
          "tags": [
            "fangzhu"
          ]
        }
      },
      {
        "_index": "ecommerce",
        "_type": "product",
        "_id": "4",
        "_score": 1,
        "_source": {
          "name": "waws512 yagao",
          "desc": "bohe zhiwu",
          "price": 521,
          "producer": "waws producer",
          "tags": [
            "relax",
            "quwu"
          ]
        }
      },
      {
        "_index": "ecommerce",
        "_type": "product",
        "_id": "1",
        "_score": 1,
        "_source": {
          "name": "jiaqiangban gaolujie yagao",
          "desc": "gaoxiao meibai",
          "price": 30,
          "producer": "yagao producer",
          "tags": [
            "meibai",
            "fangzhu"
          ]
        }
      },
      {
        "_index": "ecommerce",
        "_type": "product",
        "_id": "3",
        "_score": 1,
        "_source": {
          "name": "zhonghua yagao",
          "desc": "caoben zhiwu",
          "price": 40,
          "producer": "zhonghua producer",
          "tags": [
            "qingxin"
          ]
        }
      }
    ]
  },
    
  # 从这里开始,我们可以看到聚合的信息,对于tags中的标签进行统计计数
  "aggregations": {                          # 聚合的结果
    "group_by_tags": {                       # 聚合结果的名字
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [                           # bucket 分桶 进行展示聚合的结果
        {
          "key": "fangzhu",
          "doc_count": 2
        },
        {
          "key": "qingxin",
          "doc_count": 2
        },
        {
          "key": "meibai",
          "doc_count": 1
        },
        {
          "key": "quwu",
          "doc_count": 1
        },
        {
          "key": "relax",
          "doc_count": 1
        }
      ]
    }
  }
}

第二个聚合分析的需求:对名称中包含yagao的商品,计算每个tag下的商品数量

GET /ecommerce/product/_search
{
  "size": 0,
  "query": {
    "match": {
      "name": "yagao"
    }
  },
  "aggs": {
    "all_tags": {
      "terms": {
        "field": "tags"
      }
    }
  }
}

{
  "took": 9,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 5,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "all_tags": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "fangzhu",
          "doc_count": 2
        },
        {
          "key": "qingxin",
          "doc_count": 2
        },
        {
          "key": "meibai",
          "doc_count": 1
        },
        {
          "key": "quwu",
          "doc_count": 1
        },
        {
          "key": "relax",
          "doc_count": 1
        }
      ]
    }
  }
}

第三个聚合分析的需求:先分组,再算每组的平均值,计算每个tag下的商品的平均价格

GET /ecommerce/product/_search
{
    "size": 0,
    "aggs" : {                                       # 分组
        "group_by_tags" : {
            "terms" : { "field" : "tags" },
            "aggs" : {                               # 对上面分组的结果在进行分组
                "avg_price" : {
                    "avg" : { "field" : "price" }
                }
            }
        }
    }
}

{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 5,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "group_by_tags": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {                            # 首先先按照tags进行分组
          "key": "fangzhu",
          "doc_count": 2,
          "avg_price": {             # 对于根据tags进行分组后的数据计算价格平均值
            "value": 27.5
          }
        },
        {
          "key": "qingxin",
          "doc_count": 2,
          "avg_price": {
            "value": 280
          }
        },
        {
          "key": "meibai",
          "doc_count": 1,
          "avg_price": {
            "value": 30
          }
        },
        {
          "key": "quwu",
          "doc_count": 1,
          "avg_price": {
            "value": 521
          }
        },
        {
          "key": "relax",
          "doc_count": 1,
          "avg_price": {
            "value": 521
          }
        }
      ]
    }
  }
}

第四个数据分析需求:计算每个tag下的商品的平均价格,并且按照平均价格降序排序

GET /ecommerce/product/_search
{
    "size": 0,
    "aggs" : {
        "all_tags" : {                                    
            "terms" : { "field" : "tags", "order": { "avg_price": "desc" } },  # 多增加一个排序
            "aggs" : {
                "avg_price" : {
                    "avg" : { "field" : "price" }
                }
            }
        }
    }
}



{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 5,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "all_tags": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "quwu",
          "doc_count": 1,
          "avg_price": {
            "value": 521
          }
        },
        {
          "key": "relax",
          "doc_count": 1,
          "avg_price": {
            "value": 521
          }
        },
        {
          "key": "qingxin",
          "doc_count": 2,
          "avg_price": {
            "value": 280
          }
        },
        {
          "key": "meibai",
          "doc_count": 1,
          "avg_price": {
            "value": 30
          }
        },
        {
          "key": "fangzhu",
          "doc_count": 2,
          "avg_price": {
            "value": 27.5
          }
        }
      ]
    }
  }
}

我们现在全部都是用es的restful api在学习和讲解es的所欲知识点和功能点,但是没有使用一些编程语言去讲解(比如java),原因有以下:

  1. es最重要的api,让我们进行各种尝试、学习甚至在某些环境下进行使用的api,就是restful api。如果你学习不用es restful api,比如我上来就用java api来讲es,也是可以的,但是你根本就漏掉了es知识的一大块,你都不知道它最重要的restful api是怎么用的
  2. 讲知识点,用es restful api,更加方便,快捷,不用每次都写大量的java代码,能加快讲课的效率和速度,更加易于同学们关注es本身的知识和功能的学习
  3. 我们通常会讲完es知识点后,开始详细讲解java api,如何用java api执行各种操作
  4. 我们每个篇章都会搭配一个项目实战,项目实战是完全基于java去开发的真实项目和系统

第五个数据分析需求:按照指定的价格范围区间进行分组,然后在每组内再按照tag进行分组,最后再计算每组的平均价格

GET /ecommerce/product/_search
{
  "size": 0,
  "aggs": {
    "group_by_price": {
      "range": {
        "field": "price",
        "ranges": [
          {
            "from": 0,         # range 进行区间分组
            "to": 20             
          },
          {
            "from": 20,
            "to": 40
          },
          {
            "from": 40,
            "to": 50
          }
        ]
      },
      "aggs": {
        "group_by_tags": {
          "terms": {
            "field": "tags"
          },
          "aggs": {
            "average_price": {
              "avg": {
                "field": "price"
              }
            }
          }
        }
      }
    }
  }
}


{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 5,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "group_by_price": {
      "buckets": [
        {
          "key": "0.0-20.0",
          "from": 0,
          "to": 20,
          "doc_count": 0,
          "group_by_tags": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        },
        {
          "key": "20.0-40.0",
          "from": 20,
          "to": 40,
          "doc_count": 2,
          "group_by_tags": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "fangzhu",
                "doc_count": 2,
                "average_price": {
                  "value": 27.5
                }
              },
              {
                "key": "meibai",
                "doc_count": 1,
                "average_price": {
                  "value": 30
                }
              }
            ]
          }
        },
        {
          "key": "40.0-50.0",
          "from": 40,
          "to": 50,
          "doc_count": 1,
          "group_by_tags": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "qingxin",
                "doc_count": 1,
                "average_price": {
                  "value": 40
                }
              }
            ]
          }
        }
      ]
    }
  }
}