Elasticsearch 7.X DSL 入门教程
目录
基础概念
什么是 DSL
DSL (Domain Specific Language) 是 Elasticsearch 的查询语言,基于 JSON 格式,用于执行各种数据操作。
核心概念
{
"index": "索引名", // 类似数据库
"type": "类型名", // 7.x 已废弃,统一为 _doc
"id": "文档ID", // 类似主键
"document": "文档内容" // 类似记录
}
索引操作
创建索引
PUT /products
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 1
},
"mappings": {
"properties": {
"name": {
"type": "text",
"analyzer": "ik_max_word"
},
"price": {
"type": "double"
},
"category": {
"type": "keyword"
},
"description": {
"type": "text",
"analyzer": "ik_max_word"
},
"stock": {
"type": "integer"
},
"created_at": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
},
"is_active": {
"type": "boolean"
}
}
}
}
查看索引
GET /products
GET /products/_mapping
GET /_cat/indices?v
删除索引
DELETE /products
文档操作
创建文档
POST /products/_doc
{
"name": "iPhone 15 Pro",
"price": 7999,
"category": "手机",
"description": "苹果最新款智能手机",
"stock": 100,
"created_at": "2024-01-15 10:00:00",
"is_active": true
}
创建指定 ID 文档
PUT /products/_doc/1
{
"name": "MacBook Pro",
"price": 12999,
"category": "电脑",
"description": "苹果专业笔记本电脑",
"stock": 50,
"created_at": "2024-01-15 11:00:00",
"is_active": true
}
批量创建文档
POST /_bulk
{ "index": { "_index": "products", "_id": "2" } }
{ "name": "iPad Air", "price": 4399, "category": "平板", "description": "轻薄平板电脑", "stock": 80, "created_at": "2024-01-15 12:00:00", "is_active": true }
{ "index": { "_index": "products", "_id": "3" } }
{ "name": "AirPods Pro", "price": 1899, "category": "耳机", "description": "无线降噪耳机", "stock": 200, "created_at": "2024-01-15 13:00:00", "is_active": true }
{ "index": { "_index": "products", "_id": "4" } }
{ "name": "Apple Watch", "price": 2999, "category": "手表", "description": "智能手表", "stock": 150, "created_at": "2024-01-15 14:00:00", "is_active": true }
查询文档
GET /products/_doc/1
更新文档
POST /products/_update/1
{
"doc": {
"price": 11999,
"stock": 45
}
}
删除文档
DELETE /products/_doc/1
基础查询
查询所有文档
GET /products/_search
{
"query": {
"match_all": {}
}
}
分页查询
GET /products/_search
{
"query": {
"match_all": {}
},
"from": 0,
"size": 10
}
指定返回字段
GET /products/_search
{
"query": {
"match_all": {}
},
"_source": ["name", "price", "category"]
}
排序
GET /products/_search
{
"query": {
"match_all": {}
},
"sort": [
{
"price": {
"order": "desc"
}
}
]
}
查询类型详解
1. Match Query(全文搜索)
GET /products/_search
{
"query": {
"match": {
"name": "苹果 手机"
}
}
}
2. Term Query(精确匹配)
GET /products/_search
{
"query": {
"term": {
"category": "手机"
}
}
}
3. Terms Query(多值精确匹配)
GET /products/_search
{
"query": {
"terms": {
"category": ["手机", "电脑"]
}
}
}
4. Range Query(范围查询)
GET /products/_search
{
"query": {
"range": {
"price": {
"gte": 1000,
"lte": 10000
}
}
}
}
范围操作符:
gt: 大于gte: 大于等于lt: 小于lte: 小于等于
5. Prefix Query(前缀匹配)
GET /products/_search
{
"query": {
"prefix": {
"name": "iPh"
}
}
}
6. Wildcard Query(通配符匹配)
GET /products/_search
{
"query": {
"wildcard": {
"name": "*Pro*"
}
}
}
7. Fuzzy Query(模糊查询)
GET /products/_search
{
"query": {
"fuzzy": {
"name": {
"value": "iPone",
"fuzziness": 2
}
}
}
}
8. Bool Query(布尔查询)
GET /products/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"name": "苹果"
}
}
],
"must_not": [
{
"term": {
"category": "耳机"
}
}
],
"should": [
{
"range": {
"price": {
"lte": 5000
}
}
}
],
"filter": [
{
"term": {
"is_active": true
}
}
]
}
}
}
Bool 子句说明:
must: 必须匹配(影响得分)must_not: 必须不匹配should: 应该匹配(影响得分)filter: 必须匹配(不影响得分)
复合查询
1. Multi Match Query(多字段搜索)
GET /products/_search
{
"query": {
"multi_match": {
"query": "苹果",
"fields": ["name", "description"]
}
}
}
2. Query String Query(查询字符串)
GET /products/_search
{
"query": {
"query_string": {
"fields": ["name", "description"],
"query": "(苹果 AND 手机) OR (iPad)"
}
}
}
3. Nested Bool Query(嵌套布尔查询)
GET /products/_search
{
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"match": {
"name": "苹果"
}
},
{
"match": {
"description": "苹果"
}
}
]
}
},
{
"range": {
"price": {
"gte": 1000
}
}
}
]
}
}
}
聚合查询
1. Terms Aggregation(词项聚合)
GET /products/_search
{
"size": 0,
"aggs": {
"category_count": {
"terms": {
"field": "category",
"size": 10
}
}
}
}
2. Stats Aggregation(统计聚合)
GET /products/_search
{
"size": 0,
"aggs": {
"price_stats": {
"stats": {
"field": "price"
}
}
}
}
返回:count、min、max、avg、sum
3. Range Aggregation(范围聚合)
GET /products/_search
{
"size": 0,
"aggs": {
"price_ranges": {
"range": {
"field": "price",
"ranges": [
{
"to": 2000
},
{
"from": 2000,
"to": 5000
},
{
"from": 5000
}
]
}
}
}
}
4. Date Histogram Aggregation(日期直方图)
GET /products/_search
{
"size": 0,
"aggs": {
"products_over_time": {
"date_histogram": {
"field": "created_at",
"calendar_interval": "day",
"format": "yyyy-MM-dd"
}
}
}
}
5. Bucket Aggregation + Metric Aggregation(桶聚合 + 指标聚合)
GET /products/_search
{
"size": 0,
"aggs": {
"by_category": {
"terms": {
"field": "category"
},
"aggs": {
"average_price": {
"avg": {
"field": "price"
}
},
"max_price": {
"max": {
"field": "price"
}
}
}
}
}
}
6. Filter Aggregation(过滤聚合)
GET /products/_search
{
"size": 0,
"aggs": {
"active_products": {
"filter": {
"term": {
"is_active": true
}
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
高级功能
1. Highlight(高亮显示)
GET /products/_search
{
"query": {
"match": {
"name": "苹果"
}
},
"highlight": {
"fields": {
"name": {},
"description": {}
}
}
}
2. Script Fields(脚本字段)
GET /products/_search
{
"query": {
"match_all": {}
},
"script_fields": {
"discount_price": {
"script": {
"source": "doc['price'].value * 0.9"
}
}
}
}
3. Source Filtering(源过滤)
GET /products/_search
{
"query": {
"match_all": {}
},
"_source": {
"includes": ["name", "price"],
"excludes": ["description"]
}
}
4. Explain(解释查询)
GET /products/_search
{
"query": {
"match": {
"name": "苹果"
}
},
"explain": true
}
5. Profile(性能分析)
GET /products/_search
{
"profile": true,
"query": {
"match": {
"name": "苹果"
}
}
}
实战示例
示例 1: 电商商品搜索
GET /products/_search
{
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "苹果",
"fields": ["name^2", "description"],
"type": "best_fields"
}
}
],
"filter": [
{
"term": {
"is_active": true
}
},
{
"range": {
"price": {
"gte": 1000,
"lte": 15000
}
}
}
]
}
},
"aggs": {
"categories": {
"terms": {
"field": "category",
"size": 10
}
},
"price_ranges": {
"range": {
"field": "price",
"ranges": [
{ "to": 2000, "key": "低价" },
{ "from": 2000, "to": 5000, "key": "中价" },
{ "from": 5000, "key": "高价" }
]
}
}
},
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"from": 0,
"size": 20,
"highlight": {
"fields": {
"name": {},
"description": {}
},
"pre_tags": ["<em>"],
"post_tags": ["</em>"]
}
}
示例 2: 日志分析
GET /logs/_search
{
"query": {
"bool": {
"must": [
{
"range": {
"@timestamp": {
"gte": "now-24h",
"lte": "now"
}
}
}
],
"filter": [
{
"term": {
"level": "ERROR"
}
}
]
}
},
"aggs": {
"errors_by_service": {
"terms": {
"field": "service_name",
"size": 20
},
"aggs": {
"error_types": {
"terms": {
"field": "error_type",
"size": 10
}
},
"timeline": {
"date_histogram": {
"field": "@timestamp",
"calendar_interval": "1h"
}
}
}
}
},
"sort": [
{
"@timestamp": {
"order": "desc"
}
}
],
"size": 100
}
示例 3: 用户行为分析
GET /user_actions/_search
{
"query": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": "now-7d/d",
"lte": "now/d"
}
}
}
]
}
},
"aggs": {
"daily_stats": {
"date_histogram": {
"field": "timestamp",
"calendar_interval": "day"
},
"aggs": {
"unique_users": {
"cardinality": {
"field": "user_id"
}
},
"action_types": {
"terms": {
"field": "action_type"
}
}
}
},
"top_users": {
"terms": {
"field": "user_id",
"size": 10,
"order": {
"action_count": "desc"
}
},
"aggs": {
"action_count": {
"value_count": {
"field": "_id"
}
}
}
}
}
}
示例 4: 复杂的多条件查询
GET /products/_search
{
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"match": {
"name": "手机"
}
},
{
"match": {
"description": "手机"
}
}
],
"minimum_should_match": 1
}
}
],
"must_not": [
{
"term": {
"category": "配件"
}
}
],
"should": [
{
"range": {
"price": {
"lte": 3000
}
}
},
{
"term": {
"stock": {
"value": 0
}
}
}
],
"filter": [
{
"term": {
"is_active": true
}
},
{
"range": {
"created_at": {
"gte": "now-30d"
}
}
}
]
}
},
"aggs": {
"category_stats": {
"terms": {
"field": "category"
},
"aggs": {
"price_stats": {
"stats": {
"field": "price"
}
},
"stock_stats": {
"stats": {
"field": "stock"
}
}
}
}
},
"sort": [
{
"price": {
"order": "asc"
}
},
{
"_score": {
"order": "desc"
}
}
],
"from": 0,
"size": 20
}
常用操作符和参数
查询参数
| 参数 | 说明 |
|---|---|
from | 起始位置 |
size | 返回数量 |
timeout | 超时时间 |
track_total_hits | 跟踪总命中数 |
request_cache | 请求缓存 |
排序参数
| 参数 | 说明 |
|---|---|
order | 排序方向(asc/desc) |
mode | 排序模式(min/max/sum/avg/median) |
missing | 缺失值处理(_last/_first) |
聚合参数
| 参数 | 说明 |
|---|---|
size | 返回桶的数量 |
order | 排序方式 |
min_doc_count | 最小文档数 |
性能优化建议
1. 使用 filter 而非 query
// 不推荐
GET /products/_search
{
"query": {
"term": {
"is_active": true
}
}
}
// 推荐
GET /products/_search
{
"query": {
"bool": {
"filter": [
{
"term": {
"is_active": true
}
}
]
}
}
}
2. 限制返回字段
GET /products/_search
{
"_source": ["name", "price"],
"query": {
"match_all": {}
}
}
3. 使用 scroll 处理大量数据
GET /products/_search
{
"scroll": "1m",
"size": 1000,
"query": {
"match_all": {}
}
}
4. 避免深度分页
// 使用 search_after 代替 from/size
GET /products/_search
{
"size": 100,
"query": {
"match_all": {}
},
"sort": [
{
"_id": "asc"
}
],
"search_after": ["last_document_id"]
}
总结
Elasticsearch DSL 的核心要点:
- 查询类型:match、term、range、bool 等
- 复合查询:bool 查询的 must、must_not、should、filter
- 聚合分析:terms、stats、range、date_histogram 等
- 高级功能:highlight、script、explain、profile
- 性能优化:使用 filter、限制字段、避免深度分页
通过本教程,你应该能够:
- 创建和管理索引
- 执行各种类型的查询
- 进行数据聚合分析
- 优化查询性能
继续实践和探索,你会发现 Elasticsearch DSL 的强大之处!