Elasticsearch核心知识篇(8)
这个部分主要是对于Elasticsearch的聚合分析进行讲解说明
第一个分析需求:计算每个tag下的商品数量
GET /ecommerce/product/_search
{
"aggs": { # aggs 要完成聚合操作
"group_by_tags": { # 给聚合取一个名字
"terms": { "field": "tags" } # terms 按照指定的field进行分组完成操作
}
}
}
遇到了一个错误,我们来分析一下
{
"error": {
"root_cause": [
{
"type": "illegal_argument_exception",
# 默认情况下,文本字段上禁用Fielddata。在[tags]上设置fielddata=true,以便通过取消反转索引将fielddata加载到内存中。请注意,这可能会占用大量内存。
"reason": "Fielddata is disabled on text fields by default. Set fielddata=true on [tags] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory."
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "ecommerce",
"node": "ACfxlp60TqykD51C8SgMFQ",
"reason": {
"type": "illegal_argument_exception",
"reason": "Fielddata is disabled on text fields by default. Set fielddata=true on [tags] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory."
}
}
],
"caused_by": {
"type": "illegal_argument_exception",
"reason": "Fielddata is disabled on text fields by default. Set fielddata=true on [tags] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory."
}
},
"status": 400
}
解决办法:
将文本field的fielddata属性设置为true
PUT /ecommerce/_mapping/product
{
"properties": {
"tags": {
"type": "text",
"fielddata": true
}
}
}
{
"acknowledged": true
}
重新运行上面的聚合操作,完成聚合查询
GET /ecommerce/product/_search
{
"aggs": {
"group_by_tags": {
"terms": { "field": "tags" }
}
}
}
{
"took": 20,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 1,
"hits": [
{
"_index": "ecommerce",
"_type": "product",
"_id": "5",
"_score": 1,
"_source": {
"name": "waws520 yagao",
"desc": "caoben zhiwu",
"price": 520,
"producer": "waws producer",
"tags": [
"qingxin"
]
}
},
{
"_index": "ecommerce",
"_type": "product",
"_id": "2",
"_score": 1,
"_source": {
"name": "jiajieshi yagao",
"desc": "youxiao fangzhu",
"price": 25,
"producer": "jiajieshi producer",
"tags": [
"fangzhu"
]
}
},
{
"_index": "ecommerce",
"_type": "product",
"_id": "4",
"_score": 1,
"_source": {
"name": "waws512 yagao",
"desc": "bohe zhiwu",
"price": 521,
"producer": "waws producer",
"tags": [
"relax",
"quwu"
]
}
},
{
"_index": "ecommerce",
"_type": "product",
"_id": "1",
"_score": 1,
"_source": {
"name": "jiaqiangban gaolujie yagao",
"desc": "gaoxiao meibai",
"price": 30,
"producer": "yagao producer",
"tags": [
"meibai",
"fangzhu"
]
}
},
{
"_index": "ecommerce",
"_type": "product",
"_id": "3",
"_score": 1,
"_source": {
"name": "zhonghua yagao",
"desc": "caoben zhiwu",
"price": 40,
"producer": "zhonghua producer",
"tags": [
"qingxin"
]
}
}
]
},
# 从这里开始,我们可以看到聚合的信息,对于tags中的标签进行统计计数
"aggregations": { # 聚合的结果
"group_by_tags": { # 聚合结果的名字
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [ # bucket 分桶 进行展示聚合的结果
{
"key": "fangzhu",
"doc_count": 2
},
{
"key": "qingxin",
"doc_count": 2
},
{
"key": "meibai",
"doc_count": 1
},
{
"key": "quwu",
"doc_count": 1
},
{
"key": "relax",
"doc_count": 1
}
]
}
}
}
第二个聚合分析的需求:对名称中包含yagao的商品,计算每个tag下的商品数量
GET /ecommerce/product/_search
{
"size": 0,
"query": {
"match": {
"name": "yagao"
}
},
"aggs": {
"all_tags": {
"terms": {
"field": "tags"
}
}
}
}
{
"took": 9,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 0,
"hits": []
},
"aggregations": {
"all_tags": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "fangzhu",
"doc_count": 2
},
{
"key": "qingxin",
"doc_count": 2
},
{
"key": "meibai",
"doc_count": 1
},
{
"key": "quwu",
"doc_count": 1
},
{
"key": "relax",
"doc_count": 1
}
]
}
}
}
第三个聚合分析的需求:先分组,再算每组的平均值,计算每个tag下的商品的平均价格
GET /ecommerce/product/_search
{
"size": 0,
"aggs" : { # 分组
"group_by_tags" : {
"terms" : { "field" : "tags" },
"aggs" : { # 对上面分组的结果在进行分组
"avg_price" : {
"avg" : { "field" : "price" }
}
}
}
}
}
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 0,
"hits": []
},
"aggregations": {
"group_by_tags": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ # 首先先按照tags进行分组
"key": "fangzhu",
"doc_count": 2,
"avg_price": { # 对于根据tags进行分组后的数据计算价格平均值
"value": 27.5
}
},
{
"key": "qingxin",
"doc_count": 2,
"avg_price": {
"value": 280
}
},
{
"key": "meibai",
"doc_count": 1,
"avg_price": {
"value": 30
}
},
{
"key": "quwu",
"doc_count": 1,
"avg_price": {
"value": 521
}
},
{
"key": "relax",
"doc_count": 1,
"avg_price": {
"value": 521
}
}
]
}
}
}
第四个数据分析需求:计算每个tag下的商品的平均价格,并且按照平均价格降序排序
GET /ecommerce/product/_search
{
"size": 0,
"aggs" : {
"all_tags" : {
"terms" : { "field" : "tags", "order": { "avg_price": "desc" } }, # 多增加一个排序
"aggs" : {
"avg_price" : {
"avg" : { "field" : "price" }
}
}
}
}
}
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 0,
"hits": []
},
"aggregations": {
"all_tags": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "quwu",
"doc_count": 1,
"avg_price": {
"value": 521
}
},
{
"key": "relax",
"doc_count": 1,
"avg_price": {
"value": 521
}
},
{
"key": "qingxin",
"doc_count": 2,
"avg_price": {
"value": 280
}
},
{
"key": "meibai",
"doc_count": 1,
"avg_price": {
"value": 30
}
},
{
"key": "fangzhu",
"doc_count": 2,
"avg_price": {
"value": 27.5
}
}
]
}
}
}
我们现在全部都是用es的restful api在学习和讲解es的所欲知识点和功能点,但是没有使用一些编程语言去讲解(比如java),原因有以下:
- es最重要的api,让我们进行各种尝试、学习甚至在某些环境下进行使用的api,就是restful api。如果你学习不用es restful api,比如我上来就用java api来讲es,也是可以的,但是你根本就漏掉了es知识的一大块,你都不知道它最重要的restful api是怎么用的
- 讲知识点,用es restful api,更加方便,快捷,不用每次都写大量的java代码,能加快讲课的效率和速度,更加易于同学们关注es本身的知识和功能的学习
- 我们通常会讲完es知识点后,开始详细讲解java api,如何用java api执行各种操作
- 我们每个篇章都会搭配一个项目实战,项目实战是完全基于java去开发的真实项目和系统
第五个数据分析需求:按照指定的价格范围区间
进行分组,然后在每组内再按照tag进行分组,最后再计算每组的平均价格
GET /ecommerce/product/_search
{
"size": 0,
"aggs": {
"group_by_price": {
"range": {
"field": "price",
"ranges": [
{
"from": 0, # range 进行区间分组
"to": 20
},
{
"from": 20,
"to": 40
},
{
"from": 40,
"to": 50
}
]
},
"aggs": {
"group_by_tags": {
"terms": {
"field": "tags"
},
"aggs": {
"average_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
}
}
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 0,
"hits": []
},
"aggregations": {
"group_by_price": {
"buckets": [
{
"key": "0.0-20.0",
"from": 0,
"to": 20,
"doc_count": 0,
"group_by_tags": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key": "20.0-40.0",
"from": 20,
"to": 40,
"doc_count": 2,
"group_by_tags": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "fangzhu",
"doc_count": 2,
"average_price": {
"value": 27.5
}
},
{
"key": "meibai",
"doc_count": 1,
"average_price": {
"value": 30
}
}
]
}
},
{
"key": "40.0-50.0",
"from": 40,
"to": 50,
"doc_count": 1,
"group_by_tags": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "qingxin",
"doc_count": 1,
"average_price": {
"value": 40
}
}
]
}
}
]
}
}
}