ES常用操作

682 阅读5分钟

创建索引

  1. 设置mapping 使用put创建索引,指定分片数7,副本数1,刷新时间30秒,禁用“_all”字段,"_routing"设置routing检查,设置离开节点超时时间。
PUT index_name
{
    "settings": {
        "number_of_shards": 7,
        "number_of_replicas": 1,
        "refresh_interval": "60s",
        "index.unassigned.node_left.delayed_timeout":"30m"
    },
    "mappings": {
        "tel": {
            "_all": {
                "enabled": false
            },
            "_routing":{
                "required": true
            },
            "properties": {
                "id": {
                    "type": "keyword"
                },
                "type": {
                    "type": "keyword"
                },
                "data": {
                    "type": "text"
                },
                "create_time": {
                    "type": "date",
                    "format": "yyyy-MM-dd HH:mm:ss"
                }
            }
        }
    }
}

查询

//根据id查询
GET index_name/type/id

//查询所有数据
{
  "query": {
    "match_all": {}
  }
}

//根据字段查询,term不返回匹配度
{
    "query":{
        "match":{
            "city":"北京"
        }
    }
}
{
    "query":{
        "term":{
            "city":"北京"
        }
    }
}

//单字段多值查询
{
  "query": {
    "terms": {
      "city": ["北京","上海","广州","深圳"]
    }
  }
}

//随机查询20条
{
  "size": 20,
  "sort": {
    "_script": {
      "script": "Math.random()",
      "type": "number",
      "order": "asc"
    }
  }
}

//多条件查询
{
  "query": {
    "bool": {
      "must": [
        {
          "terms": {
            "city": [
              "北京",
              "上海"
            ]
          }
        },
        {
          "terms": {
            "type": [
              "景点",
              "公园"
            ]
          }
        }
      ]
    }
  }
}
{
  "query": {
    "bool": {
      "must": [
        {
          "term": {
            "city": "北京"
          }
        },
        {
          "term": {
            "type": "景点"
          }
        }
      ]
    }
  }
}

//查询结果过滤字段,选择指定字段输出
{
    "_source": {
        "includes": [ "obj1.*", "obj2.*" ],
        "excludes": [ "*.description" ]
    },
    "query" : {
        "term" : { "city" : "北京" }
    }
}

//时间范围查询
{
    "query":{
        "range":{
            "create_time":{
                "gt":"2019-10-13 00:00:00",
                "lte":"2019-10-17 00:00:00"
            }
        }
    }
}

//多条件、多字段、非、或查询
//should  等同于或or
//must  等同于并and
//must_not  等同于非!
{
    "query": {
        "bool": {
            "should": [{
                    "term": {
                        "city": "北京"
                    }
                }, {
                    "term": {
                        "type": "景点"
                    }
                }
            ],
            "must_not": [{
                    "range": {
                        "rank": {
                            "gt": 4
                        }
                    }
                }
            ]
        }
    }
}

//ES字段折叠。即每个分组取一个值。
{
  "query": {
    "term": {
      "city": "北京"
    }
  },
  "collapse": {
    "field": "type"
  }
}

//字段间比较查询(使用脚本)
{
  "query": {
    "bool": {
      "must": {
        "script": {
          "script": {
            "source": "doc['score1'].value+200<doc['score2'].value && doc['score3'].value>0"
          }
        }
      }
    }
  }
}

//单条件filter查询
{
  "query": {
    "bool": {
      "filter": {
        "term": {
          "city": "北京"
        }
      }
    }
  }
}

// 多条件filter查询
{
  "query": {
    "bool": {
      "filter": [
        {
          "term": {
            "city": "北京"
          }
        },
        {
          "term": {
            "type": "公园"
          }
        }
      ]
    }
  }
}

//查询文档数大于1000以上的城市及其文档数量,按数量倒序返回10条
{
  "size": 0,
  "aggs": {
    "city_count": {
      "terms": {
        "field": "city",
        "min_doc_count": 1000,
        "size": 10
      }
    }
  }
}

//按照省份分组统计城市的平均gdp和人口
{
  "size": 0,
  "aggs": {
    "group_by_prov": {
      "terms": {
        "field": "prov",  //分组统计的字段
        "size": 100  //指定返回数量,不然默认返回只有10条
      },
      "aggs": {
        "gdp": {
          "avg": {
            "field": "gdp"
          }
        },
        "pop": {
          "avg": {
            "field": "pop"
          }
        }
      }
    }
  }
}
// 正则查询城市长度大于等于3的文档
{
  "query": {
    "bool": {
      "filter": {
        "regexp": {
          "city": {
            "value": ".{3,}"
          }
        }
      }
    }
  }
}

数据量太大使用scroll进行查询

def search_by_scroll(self, body):
    res = self.es.search(index=self.index, body=body, size=10000, scroll='3m', timeout='5s')
    scroll_id = res['_scroll_id']
    result = res['hits']['hits']
    for i in range(int(res['hits']['total'] / 10000)):
        res1 = self.es.scroll(scroll_id=scroll_id, scroll='3m')
        result += res1['hits']['hits']
    return result

使用sql语句查询

_xpack/sql?format=json
//查询十条数据
{
  "query": "select * from indexName limit 10"
}
//查看所有的索引
{
  "query": "show tables"
}
//统计索引文档数据
{
  "query": "select count(1) from indexName"
}

更新

  1. 覆盖更新,根据id,put插入数据
  2. 局部更新,使用_update方法
POST indexName/type/id/_update
{
   "doc" : {
      "field1" : "value1",
      "field2": 1
   }
}
  1. 批量更新,_update_by_query方法
POST index_name/type/_update_by_query
{
    "script": {
        "source": "ctx._source['end_aoi_code']='022DA000083'"
    },
    "query": {
        "match": {
            "end_aoi_guid": "BC4C85EC58DE45A48DDD32161DF7A739"
        }
    }
}

删除

//根据id删除
DELETE index_name/type/doc_id

//根据查询结果删除:使用_delete_by_query方法,遇到版本冲突时使用conflicts=proceed,避免请求中止。
POST indexName/_delete_by_query?conflicts=proceed 
{
  "query": {
    "range": {
      "create_time": {
        "lte": "2019-10-01 00:00:00"
      }
    }
  }
}

参数设置

//修改查询输出最大结果数(默认值1万),设置max_result_window
POST index_name/_settings?preserve_existing=true
{
    "max_result_window": "100000"
}

bulk操作,插入时_source表示插入内容,更新时doc表示更新的内容,而删除只需要_id

index操作

  • 重建索引 【调优】副本数设置为0,批大小设置合理约5-15M,refresh_interval设置为-1
//将旧索引的数据按城市导入新索引
POST _reindex
{
  "source": {
    "index": "old_index",
    "query": {
      "term": {
        "city": "010"
      }
    },
    "size": 5000
  },
  "dest": {
    "index": "new_index_010",
    "op_type": "create"
  }
}
//跨集群reIndex,需要在目标集群上的elasticsearch.yml添加白名单。
//reindex.remote.whitelist: 127.0.0.1:9200
{
  "source": {
    "remote": {
      "host": "http://127.0.0.1:9200",
      "username": "user",
      "password": "pass"
    },
    "index": "old_index_name",
    "query": {
      "match_all": {}
    }
  },
  "dest": {
    "index": "new_index_name"
  }
}
  • 索引配置项 静态配置只能在配置文件或者关闭索引后进行配置,动态配置可以在运行中通过动态进行更改
//使用_all会更改所有索引的动态设置选项
PUT /_all/_settings

//使用索引名只更改该索引的动态设置选项
PUT /index_name/_settings

//设置刷新时间,默认1秒,设置-1可提高插入速度
PUT index_name/_settings
{
  "index": {
    "refresh_interval": "30s"
  }
}

//设置index.translog.durability,默认每5秒钟提交一次硬盘持久化而不是每次插入都持久化硬盘
PUT index_name/_settings?preserve_existing=true
{
  "index.translog.durability": "async"
}

//恢复: 去掉preserve_existing=true
PUT index_name/_settings
{
  "index.translog.durability" : "request"
}

//更改副本数
{
  "index": {
    "number_of_replicas": "0"
  }
}

//延迟分片,允许离开节点的时间
{
  "settings": {
    "index.unassigned.node_left.delayed_timeout": "30m"
  }
}

//设置节点离开延时分片时间(当节点宕机,在该时间内不会重新分片。超出会重新分片,但未完成前,如果节点返回且数据没有变化可用则不重新分片,否则重分片已有的数据将会被丢弃!)
PUT /_all/_settings 更改所有索引的配置
{
  "settings": {
    "index.unassigned.node_left.delayed_timeout": "5m" 
  }
}

//unassigned shards重新找回
//根据节点名和shard找回
PUT  _cluster/reroute?pretty
{
    "commands" : [
        {
          "allocate_empty_primary" : {
                "index" : "index_name", 
                "shard" : 8,
                "node" : "node-6", 
                "accept_data_loss" : "true"
          }
        }
    ]
}

//段合并释放磁盘空间
POST index_name/_forcemerge?only_expunge_deletes=true
//查询forcemerge任务详情
GET _tasks?detailed=true&actions=*forcemerge
//查看各个节点forceMerge的线程数
GET _cat/thread_pool/force_merge?v&s=name
  • 别名设置
// 新建别名
POST  _aliases
{
  "actions": [
    {
      "add": {
        "index": "my_index",
        "alias": "my_index_alias"
      }
    }
  ]
}

// 删除别名
DELETE /{index}/_alias/{name}
  • 索引打开和关闭
//关闭索引
POST index_name/_close

//打开索引
POST index_name/_open

索引模板

//查看模板
GET _template                // 查看所有模板
GET _template/temp*          // 查看与通配符相匹配的模板
GET _template/temp1,temp2    // 查看多个模板
GET _template/shop_template  // 查看指定模板
//删除模板
DELETE _template/shop_template
//判断模板是否存在
HEAD _template/shop_template
a) 如果存在, 响应结果是: 200 - OK
b) 如果不存在, 响应结果是: 404 - Not Found

//建立索引模板
PUT _template/shop_template
{
    "index_patterns": ["shop*", "bar*"],       // 可以通过"shop*"和"bar*"来适配, template字段已过期
    "version": 4,              //高版本号可以覆盖之前版本,仅用作区分版本无实际意义
    "order": 0,                // 模板的权重, 多个模板的时候优先匹配用, 值越大, 权重越高
    "settings": {
        "number_of_shards": 1  // 分片数量, 可以定义其他配置项
    },
    "aliases": {
        "alias_1": {}          // 索引对应的别名
    },
    "mappings": {
        // ES 6.0开始只支持一种type, 名称为“_doc”
        "_doc": {
            "_source": {            // 是否保存字段的原始值
                "enabled": false
            },
            "properties": {        // 字段的映射
                "@timestamp": {    // 具体的字段映射
                    "type": "date",           
                    "format": "yyyy-MM-dd HH:mm:ss"
                },
                "@version": {
                    "doc_values": true,
                    "index": "false",   // 设置为false, 不索引
                    "type": "text"      // text类型
                },
                "logLevel": {
                    "type": "long"
                }
            }
        }
    }
}

//根据模板创建索引,匹配"shop*"
PUT  shop_20201101

任务管理

//查询任务
GET _tasks
GET _tasks?detailed=true
GET /_tasks/taskId1
GET /_tasks?parent_task_id=parentTaskId1

//取消任务
POST /_tasks/taskId1/_cancel
POST /_tasks/_cancel?node_id=nodeId1,nodeId2&actions=*reindex

curl请求

curl -k -u admin:admin -XPOST http://10.202.1.1:9200/index_name/_search -H "Content-Type:application/json" -d "{}"