es新增&高亮实战

43 阅读5分钟

一、新增题目同步到es

1.1 es文档实体类

@Data
public class SubjectInfoEs extends PageInfo implements Serializable {
    /**
     * 题目id
     */
    private Long subjectId;

    /**
     * es文档id
     */
    private Long docId;

    /**
     * 题目名称
     */
    private String subjectName;

    /**
     * 题目答案
     */
    private String subjectAnswer;

    /**
     * 题目类型
     */
    private String subjectType;

    /**
     * 创建人
     */
    private String createUser;

    /**
     * 创建时间(时间戳)
     */
    private Long createTime;

    /**
     * 搜索的关键词
     */
    private String keyword;

    /**
     * 相关性算分
     */
    private BigDecimal score;
}

1.2 Es文档实体常量类

public class EsSubjectFields {
    public static final String DOC_ID = "doc_id";

    public static final String SUBJECT_ID = "subject_id";

    public static final String SUBJECT_NAME = "subject_name";

    public static final String SUBJECT_ANSWER = "subject_answer";

    public static final String SUBJECT_TYPE = "subject_type";

    public static final String CREATE_USER = "create_user";

    public static final String CREATE_TIME = "create_time";

    /**
     * 存储可以用来被查询的字段
     */
    public static final String[] FIELD_QUERY = {
            SUBJECT_ID, SUBJECT_NAME, SUBJECT_ANSWER, SUBJECT_TYPE, CREATE_USER, CREATE_TIME
    };
}

1.3 es的service层

public interface SubjectEsService {
    /**
     * 新增
     */
    boolean insert(SubjectInfoEs subjectInfoEs);

    /**
     * 分页查询
     */
    PageResult<SubjectInfoEs> querySubjectList(SubjectInfoEs subjectInfoEs);

}
@Service
@Log4j2
public class SubjectEsServiceImpl implements SubjectEsService {

    @Override
    public boolean insert(SubjectInfoEs subjectInfoEs) {
        EsSourceData esSourceData = new EsSourceData();
        esSourceData.setDocId(String.valueOf(subjectInfoEs.getDocId())); //指定docId
        Map<String, Object> data = convert2EsSourceData(subjectInfoEs);
        esSourceData.setData(data);
        return EsRestClient.insertDoc(getEsIndexInfo(), esSourceData);
    }

    @Override
    public PageResult<SubjectInfoEs> querySubjectList(SubjectInfoEs subjectInfoEs) {
        
        return null;
    }

    /**
     * 获取集群索引信息(控制在哪个集群,哪个索引下操作)
     */
    private EsIndexInfo getEsIndexInfo() {
        EsIndexInfo esIndexInfo = new EsIndexInfo();
        esIndexInfo.setClusterName("cf441bd98016");
        esIndexInfo.setIndexName("subject_index");
        return esIndexInfo;
    }

    /**
     * 将实体类转换成map数据
     */
    private Map<String, Object> convert2EsSourceData(SubjectInfoEs subjectInfoEs) {
        Map<String, Object> data = new HashMap<>();
        data.put(EsSubjectFields.SUBJECT_ID, subjectInfoEs.getSubjectId());
        data.put(EsSubjectFields.DOC_ID, subjectInfoEs.getDocId());
        data.put(EsSubjectFields.SUBJECT_NAME, subjectInfoEs.getSubjectName());
        data.put(EsSubjectFields.SUBJECT_ANSWER, subjectInfoEs.getSubjectAnswer());
        data.put(EsSubjectFields.SUBJECT_TYPE, subjectInfoEs.getSubjectType());
        data.put(EsSubjectFields.CREATE_USER, subjectInfoEs.getCreateUser());
        data.put(EsSubjectFields.CREATE_TIME, subjectInfoEs.getCreateTime());
        return data;
    }
}

1.4 雪花算法Util

public class IdWorkerUtil {

    private long workerId;

    private long datacenterId;

    private long sequence;

    private long twepoch = 1585644268888L;

    private long workerIdBits = 5L;

    private long datacenterIdBits = 5L;

    private long sequenceBits = 12L;

    private long maxWorkerId = -1L ^ (-1L << workerIdBits);

    private long maxDatacenterId = -1L ^ (-1L << datacenterIdBits);

    private long workerIdShift = sequenceBits;

    private long datacenterIdShift = sequenceBits + workerIdBits;

    private long timestampLeftShift = sequenceBits + workerIdBits + datacenterIdBits;

    private long sequenceMask = -1L ^ (-1L << sequenceBits);

    private long lastTimestamp = -1L;

    public long getWorkerId() {
        return workerId;
    }

    public long getDatacenterId() {
        return datacenterId;
    }

    public long getTimestamp() {
        return System.currentTimeMillis();
    }

    public IdWorkerUtil(long workerId, long datacenterId, long sequence) {
        if (workerId > maxWorkerId || workerId < 0) {
            throw new IllegalArgumentException(
                    String.format("worker Id can't be greater than %d or less than 0", maxWorkerId));
        }
        if (datacenterId > maxDatacenterId || datacenterId < 0) {

            throw new IllegalArgumentException(
                    String.format("datacenter Id can't be greater than %d or less than 0", maxDatacenterId));
        }
        this.workerId = workerId;
        this.datacenterId = datacenterId;
        this.sequence = sequence;
    }

    public synchronized long nextId() {
        long timestamp = timeGen();
        if (timestamp < lastTimestamp) {
            System.err.printf(
                    "clock is moving backwards. Rejecting requests until %d.", lastTimestamp);
            throw new RuntimeException(
                    String.format("Clock moved backwards. Refusing to generate id for %d milliseconds",
                            lastTimestamp - timestamp));
        }

        if (lastTimestamp == timestamp) {

            sequence = (sequence + 1) & sequenceMask;
            if (sequence == 0) {
                timestamp = tilNextMillis(lastTimestamp);
            }

        } else {
            sequence = 0;
        }
        lastTimestamp = timestamp;
        return ((timestamp - twepoch) << timestampLeftShift) |
                (datacenterId << datacenterIdShift) |
                (workerId << workerIdShift) | sequence;
    }

    private long tilNextMillis(long lastTimestamp) {
        long timestamp = timeGen();
        while (timestamp <= lastTimestamp) {
            timestamp = timeGen();
        }
        return timestamp;
    }

    //获取当前时间戳
    private long timeGen() {
        return System.currentTimeMillis();
    }

}

1.5 新增题目同步es

在domain层的新增题目方法最后把题目数据同步至es

//同步到es
SubjectInfoEs subjectInfoEs = new SubjectInfoEs();
subjectInfoEs.setDocId(new IdWorkerUtil(1, 1, 1).nextId()); //雪花算法生成id
subjectInfoEs.setSubjectId(subjectInfoBO.getId()); //id在info和bo里都有值
subjectInfoEs.setSubjectName(subjectInfoBO.getSubjectName());
subjectInfoEs.setSubjectAnswer(subjectInfoBO.getSubjectAnswer());
subjectInfoEs.setSubjectType(subjectInfoBO.getSubjectType().toString());
subjectInfoEs.setCreateUser("鸡翅");
subjectInfoEs.setCreateTime(new Date().getTime());
subjectEsService.insert(subjectInfoEs);

二、带高亮的全文检索

2.1 service层整体流程

  • 1.根据传入的subjectInfoEs对象转化为EsSearchRequest查询请求类,可使EsRestClient发送请求
  • 2.拿到es的返回结果,若未命中为空,则组装pageResult返回空对象
  • 3.若命中,获取文档数据集合,遍历集合把每一个文档转为subjectInfoEs对象,最后组装为pageResult
@Override
public PageResult<SubjectInfoEs> querySubjectList(SubjectInfoEs subjectInfoEs) {
    PageResult<SubjectInfoEs> pageResult = new PageResult<>();
    EsSearchRequest esSearchRequest = createSearchListQuery(subjectInfoEs);
    SearchResponse searchResponse = EsRestClient.searchWithTermQuery(getEsIndexInfo(), esSearchRequest);

    List<SubjectInfoEs> subjectInfoEsList = new ArrayList<>();
    SearchHits searchHits =  searchResponse.getHits(); //命中的全部数据

    //未命中
    if(searchHits == null || searchHits.getHits() == null) {
        pageResult.setPageNo(subjectInfoEs.getPageNo());
        pageResult.setPageSize(subjectInfoEs.getPageSize());
        pageResult.setRecords(subjectInfoEsList);
        pageResult.setTotal(0);
        return pageResult;
    }

    //命中数据 hits为文档数据集合
    SearchHit[] hits = searchHits.getHits();
    for(SearchHit hit : hits) {
        SubjectInfoEs infoEs = convertResult(hit); //hit数据转化为SubjectInfoEs实体
        if(Objects.nonNull(infoEs)) { //转换成功
            subjectInfoEsList.add(infoEs);
        }
    }

    pageResult.setPageNo(subjectInfoEs.getPageNo());
    pageResult.setPageSize(subjectInfoEs.getPageSize());
    pageResult.setRecords(subjectInfoEsList);
    pageResult.setTotal(Long.valueOf(searchHits.getTotalHits().value).intValue());

    return pageResult;
}

2.2 .根据传入的subjectInfoEs对象转化为EsSearchRequest查询请求类

private EsSearchRequest createSearchListQuery(SubjectInfoEs subjectInfoEs) {
    EsSearchRequest esSearchRequest = new EsSearchRequest();
    BoolQueryBuilder bq = new BoolQueryBuilder();

    MatchQueryBuilder subjectNameQueryBuilder =
            QueryBuilders.matchQuery(EsSubjectFields.SUBJECT_NAME, subjectInfoEs.getKeyword());
    bq.should(subjectNameQueryBuilder); //将查询条件添加到布尔查询构造器中,should 表示或关系,为false时返回null
    bq.boost(2); //题目名称的相关性算分权重加2,优先展示题目名称匹配度高的。

    MatchQueryBuilder subjectAnswerQueryBuilder =
            QueryBuilders.matchQuery(EsSubjectFields.SUBJECT_ANSWER, subjectInfoEs.getKeyword());
    bq.should(subjectAnswerQueryBuilder);

    //搜索只返回简答题,构造简答题查询条件
    MatchQueryBuilder subjectTypeQueryBuilder =
            QueryBuilders.matchQuery(EsSubjectFields.SUBJECT_TYPE, SubjectInfoTypeEnum.BRIEF.getCode());
    bq.must(subjectTypeQueryBuilder); //必须为简答题,为must
    bq.minimumShouldMatch(1); //设置在should中至少需要匹配的条件数量,为1即至少匹配一个条件才返回结果。

    /**
     * 高亮构造器
     * field:* 对所有字段进行高亮处理
     * requireFieldMatch:false表示一个字段可以匹配多个高亮片段,默认为true即一个字段只能匹配一个高亮片段
     * preTags:为搜索结果的高亮部分设置了前后标签
     */
    HighlightBuilder highlightBuilder = new HighlightBuilder().field("*").requireFieldMatch(false);
    highlightBuilder.preTags("<span style = "color:red">");
    highlightBuilder.postTags("</span>");



    esSearchRequest.setBq(bq);
    esSearchRequest.setHighlightBuilder(highlightBuilder);
    esSearchRequest.setFields(EsSubjectFields.FIELD_QUERY); //指定搜索结果中包含的字段信息
    esSearchRequest.setFrom((subjectInfoEs.getPageNo() - 1) * subjectInfoEs.getPageSize()); //起始文档的位置(起始页数 - 1)*页容量,第一条文档为0
    esSearchRequest.setSize(subjectInfoEs.getPageSize());
    esSearchRequest.setNeedScroll(false); //不需要快照缓存
    return esSearchRequest;
}

2.3 把每一个文档转为subjectInfoEs对象

private SubjectInfoEs convertResult(SearchHit hit) {
    Map<String, Object> sourceAsMap = hit.getSourceAsMap(); //文档数据的map映射
    if(CollectionUtils.isEmpty(sourceAsMap)) {
        return null;
    }
    SubjectInfoEs result = new SubjectInfoEs();
    result.setSubjectId(MapUtils.getLong(sourceAsMap, EsSubjectFields.SUBJECT_ID));
    result.setDocId(MapUtils.getLong(sourceAsMap, EsSubjectFields.DOC_ID));
    result.setSubjectName(MapUtils.getString(sourceAsMap, EsSubjectFields.SUBJECT_NAME));
    result.setSubjectAnswer(MapUtils.getString(sourceAsMap, EsSubjectFields.SUBJECT_ANSWER));
    result.setSubjectType(MapUtils.getString(sourceAsMap, EsSubjectFields.SUBJECT_TYPE));
    //查询结果中自带了每个文档的算分(1以内,转换为百分数:*100,保留两位小数,向上取整)
    result.setScore(new BigDecimal(String.valueOf(hit.getScore()))
            .multiply(new BigDecimal("100.00"))
            .setScale(2, BigDecimal.ROUND_HALF_UP));

    //处理高亮字段(将name,asnwer覆盖为高亮后的)
    Map<String, HighlightField> highlightFields = hit.getHighlightFields();
    //获取题目名称高亮信息
    HighlightField subjectNameField = highlightFields.get(EsSubjectFields.SUBJECT_NAME);
    if(Objects.nonNull(subjectNameField)) {
        Text[] fragments = subjectNameField.getFragments(); //高亮片段集合
        StringBuilder subjectNameBuilder = new StringBuilder();
        for(Text fragment : fragments) {
            subjectNameBuilder.append(fragment.string());
        }
        result.setSubjectName(subjectNameBuilder.toString());
    }

    //获取题目答案高亮信息
    HighlightField subjectAnswerField = highlightFields.get(EsSubjectFields.SUBJECT_ANSWER);
    if(Objects.nonNull(subjectAnswerField)) {
        Text[] fragments = subjectAnswerField.getFragments();
        StringBuilder subjectAnswerBuilder = new StringBuilder();
        for(Text fragment : fragments) {
            subjectAnswerBuilder.append(fragment.string());
        }
        result.setSubjectAnswer(subjectAnswerBuilder.toString());
    }
    return result;
}

高亮的结果与查询的文档结果默认是分离的,并不在一起。

因此解析高亮的代码需要额外处理:

image.png

字段中每一个需要高亮显示的语句,都会转化为数组形式一条条展示出来,如上图显示:hightlight.name属性为集合,存储了name字段中需要高亮显示的语句,所以上述代码中采用了StringBuilder进行拼接,最后再把subjectInfoEs实体类中的字段信息覆盖为高亮后内容

2.4 完善controller和domain层

在SubjectInfoDTO和Bo中添加keyWord字段,subjectInfoEs中的pageNo,pageSize均有前端传递

@PostMapping("/getSubjectPageBySearch")
public Result<PageResult<SubjectInfoEs>> querySubjectByKeyword(@RequestBody SubjectInfoDTO subjectInfoDTO) {
    try {
        if (log.isInfoEnabled()) {
            log.info("SubjectController.getSubjectPageBySearch.dto:{}", JSON.toJSONString(subjectInfoDTO));
        }
        Preconditions.checkArgument(!StringUtils.isEmpty(subjectInfoDTO.getKeyWord()), "关键词不能为空");
        SubjectInfoBO subjectInfoBO = SubjectInfoDTOConvert.INSTANCE.subjectInfoDtoToBo(subjectInfoDTO);
        subjectInfoBO.setPageNo(subjectInfoDTO.getPageNo());
        subjectInfoBO.setPageSize(subjectInfoDTO.getPageSize());
        PageResult<SubjectInfoEs> boPageResult = subjectInfoDomainService.getSubjectPageBySearch(subjectInfoBO);
        return Result.ok(boPageResult);
    } catch (Exception e) {
        log.info("SubjectController.getSubjectPageBySearch.error:{}", e.getMessage(), e);
        return Result.fail("全文检索失败");
    }
}
@Override
public PageResult<SubjectInfoEs> getSubjectPageBySearch(SubjectInfoBO subjectInfoBO) {
    SubjectInfoEs subjectInfoEs = new SubjectInfoEs();
    subjectInfoEs.setKeyword(subjectInfoBO.getKeyWord());
    subjectInfoEs.setPageNo(subjectInfoBO.getPageNo());
    subjectInfoEs.setPageSize(subjectInfoBO.getPageSize());
    PageResult<SubjectInfoEs> subjectInfoEsPageResult = subjectEsService.querySubjectList(subjectInfoEs);
    return subjectInfoEsPageResult;
}