创建索引
- 设置mapping 使用put创建索引,指定分片数7,副本数1,刷新时间30秒,禁用“_all”字段,"_routing"设置routing检查,设置离开节点超时时间。
PUT index_name
{
"settings": {
"number_of_shards": 7,
"number_of_replicas": 1,
"refresh_interval": "60s",
"index.unassigned.node_left.delayed_timeout":"30m"
},
"mappings": {
"tel": {
"_all": {
"enabled": false
},
"_routing":{
"required": true
},
"properties": {
"id": {
"type": "keyword"
},
"type": {
"type": "keyword"
},
"data": {
"type": "text"
},
"create_time": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
}
}
}
}
}
查询
//根据id查询
GET index_name/type/id
//查询所有数据
{
"query": {
"match_all": {}
}
}
//根据字段查询,term不返回匹配度
{
"query":{
"match":{
"city":"北京"
}
}
}
{
"query":{
"term":{
"city":"北京"
}
}
}
//单字段多值查询
{
"query": {
"terms": {
"city": ["北京","上海","广州","深圳"]
}
}
}
//随机查询20条
{
"size": 20,
"sort": {
"_script": {
"script": "Math.random()",
"type": "number",
"order": "asc"
}
}
}
//多条件查询
{
"query": {
"bool": {
"must": [
{
"terms": {
"city": [
"北京",
"上海"
]
}
},
{
"terms": {
"type": [
"景点",
"公园"
]
}
}
]
}
}
}
{
"query": {
"bool": {
"must": [
{
"term": {
"city": "北京"
}
},
{
"term": {
"type": "景点"
}
}
]
}
}
}
//查询结果过滤字段,选择指定字段输出
{
"_source": {
"includes": [ "obj1.*", "obj2.*" ],
"excludes": [ "*.description" ]
},
"query" : {
"term" : { "city" : "北京" }
}
}
//时间范围查询
{
"query":{
"range":{
"create_time":{
"gt":"2019-10-13 00:00:00",
"lte":"2019-10-17 00:00:00"
}
}
}
}
//多条件、多字段、非、或查询
//should 等同于或or
//must 等同于并and
//must_not 等同于非!
{
"query": {
"bool": {
"should": [{
"term": {
"city": "北京"
}
}, {
"term": {
"type": "景点"
}
}
],
"must_not": [{
"range": {
"rank": {
"gt": 4
}
}
}
]
}
}
}
//ES字段折叠。即每个分组取一个值。
{
"query": {
"term": {
"city": "北京"
}
},
"collapse": {
"field": "type"
}
}
//字段间比较查询(使用脚本)
{
"query": {
"bool": {
"must": {
"script": {
"script": {
"source": "doc['score1'].value+200<doc['score2'].value && doc['score3'].value>0"
}
}
}
}
}
}
//单条件filter查询
{
"query": {
"bool": {
"filter": {
"term": {
"city": "北京"
}
}
}
}
}
// 多条件filter查询
{
"query": {
"bool": {
"filter": [
{
"term": {
"city": "北京"
}
},
{
"term": {
"type": "公园"
}
}
]
}
}
}
//查询文档数大于1000以上的城市及其文档数量,按数量倒序返回10条
{
"size": 0,
"aggs": {
"city_count": {
"terms": {
"field": "city",
"min_doc_count": 1000,
"size": 10
}
}
}
}
//按照省份分组统计城市的平均gdp和人口
{
"size": 0,
"aggs": {
"group_by_prov": {
"terms": {
"field": "prov", //分组统计的字段
"size": 100 //指定返回数量,不然默认返回只有10条
},
"aggs": {
"gdp": {
"avg": {
"field": "gdp"
}
},
"pop": {
"avg": {
"field": "pop"
}
}
}
}
}
}
// 正则查询城市长度大于等于3的文档
{
"query": {
"bool": {
"filter": {
"regexp": {
"city": {
"value": ".{3,}"
}
}
}
}
}
}
数据量太大使用scroll进行查询
def search_by_scroll(self, body):
res = self.es.search(index=self.index, body=body, size=10000, scroll='3m', timeout='5s')
scroll_id = res['_scroll_id']
result = res['hits']['hits']
for i in range(int(res['hits']['total'] / 10000)):
res1 = self.es.scroll(scroll_id=scroll_id, scroll='3m')
result += res1['hits']['hits']
return result
使用sql语句查询
_xpack/sql?format=json
//查询十条数据
{
"query": "select * from indexName limit 10"
}
//查看所有的索引
{
"query": "show tables"
}
//统计索引文档数据
{
"query": "select count(1) from indexName"
}
更新
- 覆盖更新,根据id,put插入数据
- 局部更新,使用_update方法
POST indexName/type/id/_update
{
"doc" : {
"field1" : "value1",
"field2": 1
}
}
- 批量更新,_update_by_query方法
POST index_name/type/_update_by_query
{
"script": {
"source": "ctx._source['end_aoi_code']='022DA000083'"
},
"query": {
"match": {
"end_aoi_guid": "BC4C85EC58DE45A48DDD32161DF7A739"
}
}
}
删除
//根据id删除
DELETE index_name/type/doc_id
//根据查询结果删除:使用_delete_by_query方法,遇到版本冲突时使用conflicts=proceed,避免请求中止。
POST indexName/_delete_by_query?conflicts=proceed
{
"query": {
"range": {
"create_time": {
"lte": "2019-10-01 00:00:00"
}
}
}
}
参数设置
//修改查询输出最大结果数(默认值1万),设置max_result_window
POST index_name/_settings?preserve_existing=true
{
"max_result_window": "100000"
}
bulk操作,插入时_source表示插入内容,更新时doc表示更新的内容,而删除只需要_id
index操作
- 重建索引 【调优】副本数设置为0,批大小设置合理约5-15M,refresh_interval设置为-1
//将旧索引的数据按城市导入新索引
POST _reindex
{
"source": {
"index": "old_index",
"query": {
"term": {
"city": "010"
}
},
"size": 5000
},
"dest": {
"index": "new_index_010",
"op_type": "create"
}
}
//跨集群reIndex,需要在目标集群上的elasticsearch.yml添加白名单。
//reindex.remote.whitelist: 127.0.0.1:9200
{
"source": {
"remote": {
"host": "http://127.0.0.1:9200",
"username": "user",
"password": "pass"
},
"index": "old_index_name",
"query": {
"match_all": {}
}
},
"dest": {
"index": "new_index_name"
}
}
- 索引配置项 静态配置只能在配置文件或者关闭索引后进行配置,动态配置可以在运行中通过动态进行更改
//使用_all会更改所有索引的动态设置选项
PUT /_all/_settings
//使用索引名只更改该索引的动态设置选项
PUT /index_name/_settings
//设置刷新时间,默认1秒,设置-1可提高插入速度
PUT index_name/_settings
{
"index": {
"refresh_interval": "30s"
}
}
//设置index.translog.durability,默认每5秒钟提交一次硬盘持久化而不是每次插入都持久化硬盘
PUT index_name/_settings?preserve_existing=true
{
"index.translog.durability": "async"
}
//恢复: 去掉preserve_existing=true
PUT index_name/_settings
{
"index.translog.durability" : "request"
}
//更改副本数
{
"index": {
"number_of_replicas": "0"
}
}
//延迟分片,允许离开节点的时间
{
"settings": {
"index.unassigned.node_left.delayed_timeout": "30m"
}
}
//设置节点离开延时分片时间(当节点宕机,在该时间内不会重新分片。超出会重新分片,但未完成前,如果节点返回且数据没有变化可用则不重新分片,否则重分片已有的数据将会被丢弃!)
PUT /_all/_settings 更改所有索引的配置
{
"settings": {
"index.unassigned.node_left.delayed_timeout": "5m"
}
}
//unassigned shards重新找回
//根据节点名和shard找回
PUT _cluster/reroute?pretty
{
"commands" : [
{
"allocate_empty_primary" : {
"index" : "index_name",
"shard" : 8,
"node" : "node-6",
"accept_data_loss" : "true"
}
}
]
}
//段合并释放磁盘空间
POST index_name/_forcemerge?only_expunge_deletes=true
//查询forcemerge任务详情
GET _tasks?detailed=true&actions=*forcemerge
//查看各个节点forceMerge的线程数
GET _cat/thread_pool/force_merge?v&s=name
- 别名设置
// 新建别名
POST _aliases
{
"actions": [
{
"add": {
"index": "my_index",
"alias": "my_index_alias"
}
}
]
}
// 删除别名
DELETE /{index}/_alias/{name}
- 索引打开和关闭
//关闭索引
POST index_name/_close
//打开索引
POST index_name/_open
索引模板
//查看模板
GET _template // 查看所有模板
GET _template/temp* // 查看与通配符相匹配的模板
GET _template/temp1,temp2 // 查看多个模板
GET _template/shop_template // 查看指定模板
//删除模板
DELETE _template/shop_template
//判断模板是否存在
HEAD _template/shop_template
a) 如果存在, 响应结果是: 200 - OK
b) 如果不存在, 响应结果是: 404 - Not Found
//建立索引模板
PUT _template/shop_template
{
"index_patterns": ["shop*", "bar*"], // 可以通过"shop*"和"bar*"来适配, template字段已过期
"version": 4, //高版本号可以覆盖之前版本,仅用作区分版本无实际意义
"order": 0, // 模板的权重, 多个模板的时候优先匹配用, 值越大, 权重越高
"settings": {
"number_of_shards": 1 // 分片数量, 可以定义其他配置项
},
"aliases": {
"alias_1": {} // 索引对应的别名
},
"mappings": {
// ES 6.0开始只支持一种type, 名称为“_doc”
"_doc": {
"_source": { // 是否保存字段的原始值
"enabled": false
},
"properties": { // 字段的映射
"@timestamp": { // 具体的字段映射
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
},
"@version": {
"doc_values": true,
"index": "false", // 设置为false, 不索引
"type": "text" // text类型
},
"logLevel": {
"type": "long"
}
}
}
}
}
//根据模板创建索引,匹配"shop*"
PUT shop_20201101
任务管理
//查询任务
GET _tasks
GET _tasks?detailed=true
GET /_tasks/taskId1
GET /_tasks?parent_task_id=parentTaskId1
//取消任务
POST /_tasks/taskId1/_cancel
POST /_tasks/_cancel?node_id=nodeId1,nodeId2&actions=*reindex
curl请求
curl -k -u admin:admin -XPOST http://10.202.1.1:9200/index_name/_search -H "Content-Type:application/json" -d "{}"