一、新增题目同步到es
1.1 es文档实体类
@Data
public class SubjectInfoEs extends PageInfo implements Serializable {
/**
* 题目id
*/
private Long subjectId;
/**
* es文档id
*/
private Long docId;
/**
* 题目名称
*/
private String subjectName;
/**
* 题目答案
*/
private String subjectAnswer;
/**
* 题目类型
*/
private String subjectType;
/**
* 创建人
*/
private String createUser;
/**
* 创建时间(时间戳)
*/
private Long createTime;
/**
* 搜索的关键词
*/
private String keyword;
/**
* 相关性算分
*/
private BigDecimal score;
}
1.2 Es文档实体常量类
public class EsSubjectFields {
public static final String DOC_ID = "doc_id";
public static final String SUBJECT_ID = "subject_id";
public static final String SUBJECT_NAME = "subject_name";
public static final String SUBJECT_ANSWER = "subject_answer";
public static final String SUBJECT_TYPE = "subject_type";
public static final String CREATE_USER = "create_user";
public static final String CREATE_TIME = "create_time";
/**
* 存储可以用来被查询的字段
*/
public static final String[] FIELD_QUERY = {
SUBJECT_ID, SUBJECT_NAME, SUBJECT_ANSWER, SUBJECT_TYPE, CREATE_USER, CREATE_TIME
};
}
1.3 es的service层
public interface SubjectEsService {
/**
* 新增
*/
boolean insert(SubjectInfoEs subjectInfoEs);
/**
* 分页查询
*/
PageResult<SubjectInfoEs> querySubjectList(SubjectInfoEs subjectInfoEs);
}
@Service
@Log4j2
public class SubjectEsServiceImpl implements SubjectEsService {
@Override
public boolean insert(SubjectInfoEs subjectInfoEs) {
EsSourceData esSourceData = new EsSourceData();
esSourceData.setDocId(String.valueOf(subjectInfoEs.getDocId())); //指定docId
Map<String, Object> data = convert2EsSourceData(subjectInfoEs);
esSourceData.setData(data);
return EsRestClient.insertDoc(getEsIndexInfo(), esSourceData);
}
@Override
public PageResult<SubjectInfoEs> querySubjectList(SubjectInfoEs subjectInfoEs) {
return null;
}
/**
* 获取集群索引信息(控制在哪个集群,哪个索引下操作)
*/
private EsIndexInfo getEsIndexInfo() {
EsIndexInfo esIndexInfo = new EsIndexInfo();
esIndexInfo.setClusterName("cf441bd98016");
esIndexInfo.setIndexName("subject_index");
return esIndexInfo;
}
/**
* 将实体类转换成map数据
*/
private Map<String, Object> convert2EsSourceData(SubjectInfoEs subjectInfoEs) {
Map<String, Object> data = new HashMap<>();
data.put(EsSubjectFields.SUBJECT_ID, subjectInfoEs.getSubjectId());
data.put(EsSubjectFields.DOC_ID, subjectInfoEs.getDocId());
data.put(EsSubjectFields.SUBJECT_NAME, subjectInfoEs.getSubjectName());
data.put(EsSubjectFields.SUBJECT_ANSWER, subjectInfoEs.getSubjectAnswer());
data.put(EsSubjectFields.SUBJECT_TYPE, subjectInfoEs.getSubjectType());
data.put(EsSubjectFields.CREATE_USER, subjectInfoEs.getCreateUser());
data.put(EsSubjectFields.CREATE_TIME, subjectInfoEs.getCreateTime());
return data;
}
}
1.4 雪花算法Util
public class IdWorkerUtil {
private long workerId;
private long datacenterId;
private long sequence;
private long twepoch = 1585644268888L;
private long workerIdBits = 5L;
private long datacenterIdBits = 5L;
private long sequenceBits = 12L;
private long maxWorkerId = -1L ^ (-1L << workerIdBits);
private long maxDatacenterId = -1L ^ (-1L << datacenterIdBits);
private long workerIdShift = sequenceBits;
private long datacenterIdShift = sequenceBits + workerIdBits;
private long timestampLeftShift = sequenceBits + workerIdBits + datacenterIdBits;
private long sequenceMask = -1L ^ (-1L << sequenceBits);
private long lastTimestamp = -1L;
public long getWorkerId() {
return workerId;
}
public long getDatacenterId() {
return datacenterId;
}
public long getTimestamp() {
return System.currentTimeMillis();
}
public IdWorkerUtil(long workerId, long datacenterId, long sequence) {
if (workerId > maxWorkerId || workerId < 0) {
throw new IllegalArgumentException(
String.format("worker Id can't be greater than %d or less than 0", maxWorkerId));
}
if (datacenterId > maxDatacenterId || datacenterId < 0) {
throw new IllegalArgumentException(
String.format("datacenter Id can't be greater than %d or less than 0", maxDatacenterId));
}
this.workerId = workerId;
this.datacenterId = datacenterId;
this.sequence = sequence;
}
public synchronized long nextId() {
long timestamp = timeGen();
if (timestamp < lastTimestamp) {
System.err.printf(
"clock is moving backwards. Rejecting requests until %d.", lastTimestamp);
throw new RuntimeException(
String.format("Clock moved backwards. Refusing to generate id for %d milliseconds",
lastTimestamp - timestamp));
}
if (lastTimestamp == timestamp) {
sequence = (sequence + 1) & sequenceMask;
if (sequence == 0) {
timestamp = tilNextMillis(lastTimestamp);
}
} else {
sequence = 0;
}
lastTimestamp = timestamp;
return ((timestamp - twepoch) << timestampLeftShift) |
(datacenterId << datacenterIdShift) |
(workerId << workerIdShift) | sequence;
}
private long tilNextMillis(long lastTimestamp) {
long timestamp = timeGen();
while (timestamp <= lastTimestamp) {
timestamp = timeGen();
}
return timestamp;
}
//获取当前时间戳
private long timeGen() {
return System.currentTimeMillis();
}
}
1.5 新增题目同步es
在domain层的新增题目方法最后把题目数据同步至es
//同步到es
SubjectInfoEs subjectInfoEs = new SubjectInfoEs();
subjectInfoEs.setDocId(new IdWorkerUtil(1, 1, 1).nextId()); //雪花算法生成id
subjectInfoEs.setSubjectId(subjectInfoBO.getId()); //id在info和bo里都有值
subjectInfoEs.setSubjectName(subjectInfoBO.getSubjectName());
subjectInfoEs.setSubjectAnswer(subjectInfoBO.getSubjectAnswer());
subjectInfoEs.setSubjectType(subjectInfoBO.getSubjectType().toString());
subjectInfoEs.setCreateUser("鸡翅");
subjectInfoEs.setCreateTime(new Date().getTime());
subjectEsService.insert(subjectInfoEs);
二、带高亮的全文检索
2.1 service层整体流程
- 1.根据传入的subjectInfoEs对象转化为EsSearchRequest查询请求类,可使EsRestClient发送请求
- 2.拿到es的返回结果,若未命中为空,则组装pageResult返回空对象
- 3.若命中,获取文档数据集合,遍历集合把每一个文档转为subjectInfoEs对象,最后组装为pageResult
@Override
public PageResult<SubjectInfoEs> querySubjectList(SubjectInfoEs subjectInfoEs) {
PageResult<SubjectInfoEs> pageResult = new PageResult<>();
EsSearchRequest esSearchRequest = createSearchListQuery(subjectInfoEs);
SearchResponse searchResponse = EsRestClient.searchWithTermQuery(getEsIndexInfo(), esSearchRequest);
List<SubjectInfoEs> subjectInfoEsList = new ArrayList<>();
SearchHits searchHits = searchResponse.getHits(); //命中的全部数据
//未命中
if(searchHits == null || searchHits.getHits() == null) {
pageResult.setPageNo(subjectInfoEs.getPageNo());
pageResult.setPageSize(subjectInfoEs.getPageSize());
pageResult.setRecords(subjectInfoEsList);
pageResult.setTotal(0);
return pageResult;
}
//命中数据 hits为文档数据集合
SearchHit[] hits = searchHits.getHits();
for(SearchHit hit : hits) {
SubjectInfoEs infoEs = convertResult(hit); //hit数据转化为SubjectInfoEs实体
if(Objects.nonNull(infoEs)) { //转换成功
subjectInfoEsList.add(infoEs);
}
}
pageResult.setPageNo(subjectInfoEs.getPageNo());
pageResult.setPageSize(subjectInfoEs.getPageSize());
pageResult.setRecords(subjectInfoEsList);
pageResult.setTotal(Long.valueOf(searchHits.getTotalHits().value).intValue());
return pageResult;
}
2.2 .根据传入的subjectInfoEs对象转化为EsSearchRequest查询请求类
private EsSearchRequest createSearchListQuery(SubjectInfoEs subjectInfoEs) {
EsSearchRequest esSearchRequest = new EsSearchRequest();
BoolQueryBuilder bq = new BoolQueryBuilder();
MatchQueryBuilder subjectNameQueryBuilder =
QueryBuilders.matchQuery(EsSubjectFields.SUBJECT_NAME, subjectInfoEs.getKeyword());
bq.should(subjectNameQueryBuilder); //将查询条件添加到布尔查询构造器中,should 表示或关系,为false时返回null
bq.boost(2); //题目名称的相关性算分权重加2,优先展示题目名称匹配度高的。
MatchQueryBuilder subjectAnswerQueryBuilder =
QueryBuilders.matchQuery(EsSubjectFields.SUBJECT_ANSWER, subjectInfoEs.getKeyword());
bq.should(subjectAnswerQueryBuilder);
//搜索只返回简答题,构造简答题查询条件
MatchQueryBuilder subjectTypeQueryBuilder =
QueryBuilders.matchQuery(EsSubjectFields.SUBJECT_TYPE, SubjectInfoTypeEnum.BRIEF.getCode());
bq.must(subjectTypeQueryBuilder); //必须为简答题,为must
bq.minimumShouldMatch(1); //设置在should中至少需要匹配的条件数量,为1即至少匹配一个条件才返回结果。
/**
* 高亮构造器
* field:* 对所有字段进行高亮处理
* requireFieldMatch:false表示一个字段可以匹配多个高亮片段,默认为true即一个字段只能匹配一个高亮片段
* preTags:为搜索结果的高亮部分设置了前后标签
*/
HighlightBuilder highlightBuilder = new HighlightBuilder().field("*").requireFieldMatch(false);
highlightBuilder.preTags("<span style = "color:red">");
highlightBuilder.postTags("</span>");
esSearchRequest.setBq(bq);
esSearchRequest.setHighlightBuilder(highlightBuilder);
esSearchRequest.setFields(EsSubjectFields.FIELD_QUERY); //指定搜索结果中包含的字段信息
esSearchRequest.setFrom((subjectInfoEs.getPageNo() - 1) * subjectInfoEs.getPageSize()); //起始文档的位置(起始页数 - 1)*页容量,第一条文档为0
esSearchRequest.setSize(subjectInfoEs.getPageSize());
esSearchRequest.setNeedScroll(false); //不需要快照缓存
return esSearchRequest;
}
2.3 把每一个文档转为subjectInfoEs对象
private SubjectInfoEs convertResult(SearchHit hit) {
Map<String, Object> sourceAsMap = hit.getSourceAsMap(); //文档数据的map映射
if(CollectionUtils.isEmpty(sourceAsMap)) {
return null;
}
SubjectInfoEs result = new SubjectInfoEs();
result.setSubjectId(MapUtils.getLong(sourceAsMap, EsSubjectFields.SUBJECT_ID));
result.setDocId(MapUtils.getLong(sourceAsMap, EsSubjectFields.DOC_ID));
result.setSubjectName(MapUtils.getString(sourceAsMap, EsSubjectFields.SUBJECT_NAME));
result.setSubjectAnswer(MapUtils.getString(sourceAsMap, EsSubjectFields.SUBJECT_ANSWER));
result.setSubjectType(MapUtils.getString(sourceAsMap, EsSubjectFields.SUBJECT_TYPE));
//查询结果中自带了每个文档的算分(1以内,转换为百分数:*100,保留两位小数,向上取整)
result.setScore(new BigDecimal(String.valueOf(hit.getScore()))
.multiply(new BigDecimal("100.00"))
.setScale(2, BigDecimal.ROUND_HALF_UP));
//处理高亮字段(将name,asnwer覆盖为高亮后的)
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
//获取题目名称高亮信息
HighlightField subjectNameField = highlightFields.get(EsSubjectFields.SUBJECT_NAME);
if(Objects.nonNull(subjectNameField)) {
Text[] fragments = subjectNameField.getFragments(); //高亮片段集合
StringBuilder subjectNameBuilder = new StringBuilder();
for(Text fragment : fragments) {
subjectNameBuilder.append(fragment.string());
}
result.setSubjectName(subjectNameBuilder.toString());
}
//获取题目答案高亮信息
HighlightField subjectAnswerField = highlightFields.get(EsSubjectFields.SUBJECT_ANSWER);
if(Objects.nonNull(subjectAnswerField)) {
Text[] fragments = subjectAnswerField.getFragments();
StringBuilder subjectAnswerBuilder = new StringBuilder();
for(Text fragment : fragments) {
subjectAnswerBuilder.append(fragment.string());
}
result.setSubjectAnswer(subjectAnswerBuilder.toString());
}
return result;
}
高亮的结果与查询的文档结果默认是分离的,并不在一起。
因此解析高亮的代码需要额外处理:
字段中每一个需要高亮显示的语句,都会转化为数组形式一条条展示出来,如上图显示:hightlight.name属性为集合,存储了name字段中需要高亮显示的语句,所以上述代码中采用了StringBuilder进行拼接,最后再把subjectInfoEs实体类中的字段信息覆盖为高亮后内容
2.4 完善controller和domain层
在SubjectInfoDTO和Bo中添加keyWord字段,subjectInfoEs中的pageNo,pageSize均有前端传递
@PostMapping("/getSubjectPageBySearch")
public Result<PageResult<SubjectInfoEs>> querySubjectByKeyword(@RequestBody SubjectInfoDTO subjectInfoDTO) {
try {
if (log.isInfoEnabled()) {
log.info("SubjectController.getSubjectPageBySearch.dto:{}", JSON.toJSONString(subjectInfoDTO));
}
Preconditions.checkArgument(!StringUtils.isEmpty(subjectInfoDTO.getKeyWord()), "关键词不能为空");
SubjectInfoBO subjectInfoBO = SubjectInfoDTOConvert.INSTANCE.subjectInfoDtoToBo(subjectInfoDTO);
subjectInfoBO.setPageNo(subjectInfoDTO.getPageNo());
subjectInfoBO.setPageSize(subjectInfoDTO.getPageSize());
PageResult<SubjectInfoEs> boPageResult = subjectInfoDomainService.getSubjectPageBySearch(subjectInfoBO);
return Result.ok(boPageResult);
} catch (Exception e) {
log.info("SubjectController.getSubjectPageBySearch.error:{}", e.getMessage(), e);
return Result.fail("全文检索失败");
}
}
@Override
public PageResult<SubjectInfoEs> getSubjectPageBySearch(SubjectInfoBO subjectInfoBO) {
SubjectInfoEs subjectInfoEs = new SubjectInfoEs();
subjectInfoEs.setKeyword(subjectInfoBO.getKeyWord());
subjectInfoEs.setPageNo(subjectInfoBO.getPageNo());
subjectInfoEs.setPageSize(subjectInfoBO.getPageSize());
PageResult<SubjectInfoEs> subjectInfoEsPageResult = subjectEsService.querySubjectList(subjectInfoEs);
return subjectInfoEsPageResult;
}