Elasticsearch NativeSearchQuery 使用教程

86 阅读6分钟

Elasticsearch NativeSearchQuery 使用教程

NativeSearchQuery 是 Spring Data Elasticsearch 提供的一个强大的查询构建类,它允许开发者使用 Elasticsearch 原生的查询语法来构建复杂查询。本文将详细介绍 NativeSearchQuery 的使用方法、高级功能和最佳实践。

1. 基本概念

1.1 NativeSearchQuery 与 NativeSearchQueryBuilder

  • NativeSearchQuery:表示一个完整的 Elasticsearch 查询,包含查询条件、排序、分页、高亮等配置
  • NativeSearchQueryBuilder:用于构建 NativeSearchQuery 的构建器类,提供流式 API 来简化查询构建过程

1.2 依赖配置

在使用 NativeSearchQuery 之前,需要添加 Spring Data Elasticsearch 依赖:

<!-- Maven 依赖 -->
<dependency>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>

2. 基本使用

2.1 简单查询示例

import org.elasticsearch.index.query.QueryBuilders;
import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;
import org.springframework.data.elasticsearch.core.SearchHits;
import org.springframework.data.elasticsearch.core.query.NativeSearchQuery;
import org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder;

@Service
public class ProductService {
    
    @Autowired
    private ElasticsearchRestTemplate elasticsearchRestTemplate;
    
    // 基本查询示例
    public List<Product> findProductsByName(String name) {
        // 构建查询
        NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
            .withQuery(QueryBuilders.matchQuery("name", name))
            .build();
            
        // 执行查询
        SearchHits<Product> searchHits = elasticsearchRestTemplate.search(searchQuery, Product.class);
        
        // 处理结果
        return searchHits.stream()
            .map(hit -> hit.getContent())
            .collect(Collectors.toList());
    }
}

3. 高级功能

3.1 分页查询

// 分页查询示例
public Page<Product> findProductsByCategory(String category, int page, int size) {
    // 构建查询
    NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
        .withQuery(QueryBuilders.termQuery("category", category))
        .withPageable(PageRequest.of(page, size)) // 分页配置
        .build();
        
    // 执行查询
    SearchHits<Product> searchHits = elasticsearchRestTemplate.search(searchQuery, Product.class);
    
    // 转换为 Page 对象
    List<Product> products = searchHits.stream()
        .map(hit -> hit.getContent())
        .collect(Collectors.toList());
    
    return new PageImpl<>(products, PageRequest.of(page, size), searchHits.getTotalHits());
}

3.2 排序查询

// 排序查询示例
public List<Product> findProductsWithSorting(String keyword) {
    // 构建查询
    NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
        .withQuery(QueryBuilders.matchQuery("description", keyword))
        .withSorts(
            SortBuilders.fieldSort("price").order(SortOrder.ASC),  // 按价格升序
            SortBuilders.fieldSort("rating").order(SortOrder.DESC) // 价格相同时按评分降序
        )
        .build();
        
    // 执行查询
    SearchHits<Product> searchHits = elasticsearchRestTemplate.search(searchQuery, Product.class);
    
    return searchHits.stream()
        .map(hit -> hit.getContent())
        .collect(Collectors.toList());
}

3.3 高亮显示

// 高亮查询示例
public List<ProductDTO> findProductsWithHighlight(String keyword) {
    // 配置高亮
    HighlightBuilder highlightBuilder = new HighlightBuilder();
    highlightBuilder.field("name")
        .preTags("<em class='highlight'>")
        .postTags("</em>")
        .fragmentSize(100);
    
    // 构建查询
    NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
        .withQuery(QueryBuilders.multiMatchQuery(keyword, "name", "description"))
        .withHighlightBuilder(highlightBuilder)
        .build();
        
    // 执行查询
    SearchHits<Product> searchHits = elasticsearchRestTemplate.search(searchQuery, Product.class);
    
    // 处理高亮结果
    return searchHits.stream()
        .map(hit -> {
            ProductDTO dto = new ProductDTO(hit.getContent());
            
            // 设置高亮文本
            Map<String, List<String>> highlightFields = hit.getHighlightFields();
            if (highlightFields.containsKey("name")) {
                dto.setHighlightName(highlightFields.get("name").get(0));
            }
            
            return dto;
        })
        .collect(Collectors.toList());
}

3.4 聚合查询

// 聚合查询示例
public Map<String, Long> aggregateByCategory() {
    // 构建术语聚合
    TermsAggregationBuilder categoryAgg = AggregationBuilders.terms("by_category")
        .field("category.keyword");
    
    // 构建查询
    NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
        .withQuery(QueryBuilders.matchAllQuery())
        .addAggregation(categoryAgg)
        .withPageable(PageRequest.of(0, 0)) // 不返回文档,只返回聚合结果
        .build();
        
    // 执行查询
    SearchHits<Product> searchHits = elasticsearchRestTemplate.search(searchQuery, Product.class);
    
    // 处理聚合结果
    Terms terms = searchHits.getAggregations().get("by_category");
    Map<String, Long> categoryCountMap = new HashMap<>();
    
    for (Terms.Bucket bucket : terms.getBuckets()) {
        categoryCountMap.put(bucket.getKeyAsString(), bucket.getDocCount());
    }
    
    return categoryCountMap;
}

4. 复杂查询构建

4.1 布尔组合查询

// 复杂布尔查询示例
public List<Product> findProductsByComplexCriteria(ProductSearchCriteria criteria) {
    BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
    
    // 必须满足的条件
    if (StringUtils.hasText(criteria.getKeyword())) {
        boolQuery.must(QueryBuilders.matchQuery("name", criteria.getKeyword()));
    }
    
    // 过滤条件(不影响评分)
    if (StringUtils.hasText(criteria.getCategory())) {
        boolQuery.filter(QueryBuilders.termQuery("category", criteria.getCategory()));
    }
    
    if (criteria.getMinPrice() != null) {
        boolQuery.filter(QueryBuilders.rangeQuery("price").gte(criteria.getMinPrice()));
    }
    
    if (criteria.getMaxPrice() != null) {
        boolQuery.filter(QueryBuilders.rangeQuery("price").lte(criteria.getMaxPrice()));
    }
    
    // 应该满足的条件(OR 关系)
    if (CollectionUtils.isNotEmpty(criteria.getBrands())) {
        BoolQueryBuilder brandsQuery = QueryBuilders.boolQuery();
        for (String brand : criteria.getBrands()) {
            brandsQuery.should(QueryBuilders.termQuery("brand", brand));
        }
        brandsQuery.minimumShouldMatch(1); // 至少满足一个
        boolQuery.must(brandsQuery);
    }
    
    // 构建完整查询
    NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
        .withQuery(boolQuery)
        .withSorts(SortBuilders.fieldSort("price").order(SortOrder.ASC))
        .withPageable(PageRequest.of(criteria.getPage(), criteria.getSize()))
        .build();
        
    // 执行查询
    SearchHits<Product> searchHits = elasticsearchRestTemplate.search(searchQuery, Product.class);
    
    return searchHits.stream()
        .map(hit -> hit.getContent())
        .collect(Collectors.toList());
}

4.2 嵌套聚合查询

// 嵌套聚合查询示例
public Map<String, Map<String, Double>> aggregateByBrandAndPriceRange() {
    // 1. 按品牌分组
    TermsAggregationBuilder brandAgg = AggregationBuilders.terms("by_brand")
        .field("brand.keyword")
        .size(10);
    
    // 2. 在品牌分组内按价格范围分组
    RangeAggregationBuilder priceRangeAgg = AggregationBuilders.range("by_price_range")
        .field("price")
        .addUnboundedTo(1000)
        .addRange(1000, 3000)
        .addRange(3000, 5000)
        .addUnboundedFrom(5000);
    
    // 3. 在价格范围内计算平均评分
    AvgAggregationBuilder avgRatingAgg = AggregationBuilders.avg("avg_rating")
        .field("rating");
    
    // 4. 构建嵌套聚合
    priceRangeAgg.subAggregation(avgRatingAgg);
    brandAgg.subAggregation(priceRangeAgg);
    
    // 5. 构建查询
    NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
        .withQuery(QueryBuilders.matchAllQuery())
        .addAggregation(brandAgg)
        .withPageable(PageRequest.of(0, 0))
        .build();
        
    // 6. 执行查询
    SearchHits<Product> searchHits = elasticsearchRestTemplate.search(searchQuery, Product.class);
    
    // 7. 处理嵌套聚合结果
    Map<String, Map<String, Double>> result = new HashMap<>();
    Terms brands = searchHits.getAggregations().get("by_brand");
    
    for (Terms.Bucket brandBucket : brands.getBuckets()) {
        String brand = brandBucket.getKeyAsString();
        Map<String, Double> priceRangeMap = new HashMap<>();
        
        Range priceRanges = brandBucket.getAggregations().get("by_price_range");
        for (Range.Bucket rangeBucket : priceRanges.getBuckets()) {
            String range = rangeBucket.getKeyAsString();
            Avg avgRating = rangeBucket.getAggregations().get("avg_rating");
            priceRangeMap.put(range, avgRating.getValue());
        }
        
        result.put(brand, priceRangeMap);
    }
    
    return result;
}

5. 性能优化

5.1 字段选择

// 只返回需要的字段,减少网络传输和内存使用
NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
    .withQuery(QueryBuilders.matchQuery("name", keyword))
    .withSourceFilter(new FetchSourceFilter(
        new String[]{"id", "name", "price", "imageUrl"}, // 包含字段
        null // 排除字段
    ))
    .build();

5.2 查询超时设置

// 设置查询超时时间,避免长时间运行的查询占用资源
NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
    .withQuery(QueryBuilders.matchAllQuery())
    .withTimeout(TimeValue.timeValueSeconds(5))
    .build();

5.3 缓存设置

// 设置查询缓存
NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
    .withQuery(QueryBuilders.boolQuery()
        .filter(QueryBuilders.termQuery("status", "active")))
    .withRequestCacheEnabled(true) // 启用请求缓存
    .build();

5.4 深度分页优化

// 使用 searchAfter 进行深度分页,避免 from/size 分页的性能问题
public List<Product> searchWithSearchAfter(String keyword, Object[] searchAfterValues) {
    NativeSearchQuery searchQuery;
    
    if (searchAfterValues != null) {
        // 非第一页,使用 searchAfter
        searchQuery = new NativeSearchQueryBuilder()
            .withQuery(QueryBuilders.matchQuery("name", keyword))
            .withSorts(
                SortBuilders.fieldSort("price").order(SortOrder.ASC),
                SortBuilders.fieldSort("_id").order(SortOrder.ASC) // 确保排序唯一性
            )
            .withSearchAfter(searchAfterValues)
            .withPageable(PageRequest.of(0, 100)) // 页码始终为0
            .build();
    } else {
        // 第一页
        searchQuery = new NativeSearchQueryBuilder()
            .withQuery(QueryBuilders.matchQuery("name", keyword))
            .withSorts(
                SortBuilders.fieldSort("price").order(SortOrder.ASC),
                SortBuilders.fieldSort("_id").order(SortOrder.ASC)
            )
            .withPageable(PageRequest.of(0, 100))
            .build();
    }
    
    SearchHits<Product> searchHits = elasticsearchRestTemplate.search(searchQuery, Product.class);
    
    // 处理结果...
    
    // 返回最后一条记录的排序值,用于下一页查询
    if (searchHits.hasSearchHits() && searchHits.getSearchHits().size() > 0) {
        SearchHit<Product> lastHit = searchHits.getSearchHits().get(searchHits.getSearchHits().size() - 1);
        return lastHit.getSortValues(); // 用于下一页的 searchAfter
    }
    
    return null;
}

6. 高级配置

6.1 自定义结果映射

// 自定义结果映射器,处理复杂的文档结构
@Component
public class CustomResultMapper implements SearchResultMapper {
    
    @Override
    public <T> AggregatedPage<T> mapResults(SearchResponse response, Class<T> clazz, Pageable pageable) {
        List<T> results = new ArrayList<>();
        long totalHits = response.getHits().getTotalHits().value;
        
        for (SearchHit hit : response.getHits()) {
            if (hit != null) {
                T result;
                if (StringUtils.hasText(hit.getSourceAsString())) {
                    // 反序列化文档
                    result = JSON.parseObject(hit.getSourceAsString(), clazz);
                    
                    // 可以在这里处理高亮、聚合等自定义逻辑
                    // ...
                } else {
                    result = null;
                }
                results.add(result);
            }
        }
        
        return new AggregatedPageImpl<>(results, pageable, totalHits);
    }
}

// 使用自定义结果映射器
@Autowired
private CustomResultMapper customResultMapper;

public List<Product> searchWithCustomMapper(String keyword) {
    NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
        .withQuery(QueryBuilders.matchQuery("name", keyword))
        .build();
    
    // 使用 ElasticsearchOperations 并指定自定义映射器
    return elasticsearchOperations.search(searchQuery, Product.class, customResultMapper)
        .stream()
        .map(hit -> hit.getContent())
        .collect(Collectors.toList());
}

6.2 滚动查询

// 滚动查询示例,用于处理大量数据
public List<Product> scrollSearch(String indexName) {
    List<Product> allProducts = new ArrayList<>();
    Scroll scroll = new Scroll(TimeValue.timeValueMinutes(1L));
    
    NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
        .withQuery(QueryBuilders.matchAllQuery())
        .withPageable(PageRequest.of(0, 100))
        .build();
    
    // 初始化滚动
    SearchScrollHits<Product> searchScrollHits = elasticsearchRestTemplate.searchScrollStart(
        1000, searchQuery, Product.class, IndexCoordinates.of(indexName));
    String scrollId = searchScrollHits.getScrollId();
    List<SearchHit<Product>> searchHits = searchScrollHits.getSearchHits();
    
    // 处理滚动结果
    while (searchHits != null && !searchHits.isEmpty()) {
        allProducts.addAll(searchHits.stream()
            .map(SearchHit::getContent)
            .collect(Collectors.toList()));
        
        // 继续滚动
        searchScrollHits = elasticsearchRestTemplate.searchScrollContinue(
            scrollId, TimeValue.timeValueMinutes(1L), Product.class);
        scrollId = searchScrollHits.getScrollId();
        searchHits = searchScrollHits.getSearchHits();
    }
    
    // 清除滚动上下文
    elasticsearchRestTemplate.clearScroll(scrollId);
    
    return allProducts;
}

7. 实际业务场景综合示例

@Service
public class AdvancedSearchService {
    
    @Autowired
    private ElasticsearchRestTemplate elasticsearchRestTemplate;
    
    /**
     * 综合搜索服务
     * 支持:多条件过滤、分页、排序、高亮、聚合统计
     */
    public SearchResultDTO advancedSearch(SearchRequestDTO request) {
        // 1. 构建基础查询
        BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
        
        // 关键词搜索
        if (StringUtils.hasText(request.getKeyword())) {
            boolQuery.must(QueryBuilders.multiMatchQuery(
                request.getKeyword(),
                "title", "description", "tags"
            ).minimumShouldMatch("70%"));
        }
        
        // 分类过滤
        if (StringUtils.hasText(request.getCategory())) {
            boolQuery.filter(QueryBuilders.termQuery("category.keyword", request.getCategory()));
        }
        
        // 日期范围过滤
        if (request.getStartDate() != null || request.getEndDate() != null) {
            RangeQueryBuilder dateRange = QueryBuilders.rangeQuery("publishDate");
            if (request.getStartDate() != null) {
                dateRange.gte(request.getStartDate());
            }
            if (request.getEndDate() != null) {
                dateRange.lte(request.getEndDate());
            }
            boolQuery.filter(dateRange);
        }
        
        // 2. 构建排序
        List<FieldSortBuilder> sortBuilders = new ArrayList<>();
        if (StringUtils.hasText(request.getSortBy())) {
            SortOrder order = request.isAscending() ? SortOrder.ASC : SortOrder.DESC;
            sortBuilders.add(SortBuilders.fieldSort(request.getSortBy()).order(order));
        } else {
            // 默认排序
            sortBuilders.add(SortBuilders.fieldSort("_score").order(SortOrder.DESC));
            sortBuilders.add(SortBuilders.fieldSort("publishDate").order(SortOrder.DESC));
        }
        
        // 3. 构建高亮
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        highlightBuilder.field("title")
            .preTags("<em>")
            .postTags("</em>")
            .fragmentSize(100)
            .numOfFragments(1);
        
        highlightBuilder.field("description")
            .preTags("<em>")
            .postTags("</em>")
            .fragmentSize(200)
            .numOfFragments(3);
        
        // 4. 构建聚合
        TermsAggregationBuilder categoryAgg = AggregationBuilders.terms("category_stats")
            .field("category.keyword")
            .size(20);
        
        DateHistogramAggregationBuilder dateAgg = AggregationBuilders.dateHistogram("date_stats")
            .field("publishDate")
            .calendarInterval(DateHistogramInterval.MONTH)
            .format("yyyy-MM");
        
        // 5. 构建完整查询
        NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
            .withQuery(boolQuery)
            .withSorts(sortBuilders)
            .withHighlightBuilder(highlightBuilder)
            .addAggregation(categoryAgg)
            .addAggregation(dateAgg)
            .withPageable(PageRequest.of(request.getPage(), request.getSize()))
            .withTimeout(TimeValue.timeValueSeconds(10))
            .withSourceFilter(new FetchSourceFilter(
                new String[]{"id", "title", "description", "category", "publishDate", "author"},
                null
            ))
            .build();
        
        // 6. 执行查询
        SearchHits<Article> searchHits = elasticsearchRestTemplate.search(searchQuery, Article.class);
        
        // 7. 处理结果
        List<ArticleDTO> articles = new ArrayList<>();
        for (SearchHit<Article> hit : searchHits) {
            Article article = hit.getContent();
            ArticleDTO dto = new ArticleDTO(article);
            
            // 设置高亮
            Map<String, List<String>> highlightFields = hit.getHighlightFields();
            if (highlightFields.containsKey("title")) {
                dto.setHighlightTitle(highlightFields.get("title").get(0));
            }
            if (highlightFields.containsKey("description")) {
                dto.setHighlightDescriptions(highlightFields.get("description"));
            }
            
            articles.add(dto);
        }
        
        // 8. 处理聚合结果
        SearchResultDTO result = new SearchResultDTO();
        result.setArticles(articles);
        result.setTotalHits(searchHits.getTotalHits());
        result.setCurrentPage(request.getPage());
        result.setPageSize(request.getSize());
        
        // 分类统计
        Terms categoryTerms = searchHits.getAggregations().get("category_stats");
        List<CategoryStatDTO> categoryStats = categoryTerms.getBuckets().stream()
            .map(bucket -> new CategoryStatDTO(
                bucket.getKeyAsString(), 
                bucket.getDocCount()
            ))
            .collect(Collectors.toList());
        result.setCategoryStats(categoryStats);
        
        // 日期统计
        ParsedDateHistogram dateHistogram = searchHits.getAggregations().get("date_stats");
        List<DateStatDTO> dateStats = dateHistogram.getBuckets().stream()
            .map(bucket -> new DateStatDTO(
                bucket.getKeyAsString(),
                bucket.getDocCount()
            ))
            .collect(Collectors.toList());
        result.setDateStats(dateStats);
        
        return result;
    }
}

8. 注意事项与最佳实践

  1. 版本兼容性

    • 确保 Spring Data Elasticsearch 版本与 Elasticsearch 服务器版本兼容
    • 不同版本间的 API 可能有所差异,请参考对应版本的官方文档
  2. 性能优化

    • 使用 filter 上下文进行过滤操作,可提高性能并启用缓存
    • 合理设置分页大小,避免一次返回过多数据
    • 深度分页时使用 searchAfter 替代 from/size
    • 只查询需要的字段,减少数据传输
  3. 内存管理

    • 聚合查询可能消耗大量内存,特别是高基数字段的聚合
    • 对大数据量操作时使用滚动查询(Scroll API)
  4. 错误处理

    • 设置合理的超时时间,避免长时间运行的查询
    • 添加异常处理逻辑,特别是网络相关异常
  5. 监控与调优

    • 使用 Elasticsearch 的监控工具监控查询性能
    • 根据查询性能调整索引设计和查询语句

通过合理使用 NativeSearchQuery,您可以充分利用 Elasticsearch 的强大功能,构建高性能、可扩展的搜索应用。在实际开发中,应根据具体业务需求和数据特点,选择合适的查询方式和优化策略。