Elasticsearch京东搜索实例

·  阅读 819

环境搭建

创建一个SpringBoot项目。

配置

image-20201118161046183

<properties>
    <java.version>1.8</java.version>
    <!--自定一es版本依赖,保证和本地一致-->
    <elasticsearch.version>7.6.2</elasticsearch.version>
</properties>

<dependencies>
    <!--解析网页。只能解析网页-->
    <dependency>
        <groupId>org.jsoup</groupId>
        <artifactId>jsoup</artifactId>
        <version>1.10.2</version>
    </dependency>
    <!--fastJson-->
    <dependency>
        <groupId>com.alibaba</groupId>
        <artifactId>fastjson</artifactId>
        <version>1.2.62</version>
    </dependency>
    <!--Elasticsearch-->
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
    </dependency>
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-thymeleaf</artifactId>
    </dependency>
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-web</artifactId>
    </dependency>

    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-devtools</artifactId>
        <scope>runtime</scope>
        <optional>true</optional>
    </dependency>
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-configuration-processor</artifactId>
        <optional>true</optional>
    </dependency>
    <dependency>
        <groupId>org.projectlombok</groupId>
        <artifactId>lombok</artifactId>
        <optional>true</optional>
    </dependency>
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-test</artifactId>
        <scope>test</scope>
        <exclusions>
            <exclusion>
                <groupId>org.junit.vintage</groupId>
                <artifactId>junit-vintage-engine</artifactId>
            </exclusion>
        </exclusions>
    </dependency>
</dependencies>
复制代码

image-20201118161058938

##关闭thymeleaf缓存
spring.thymeleaf.cache=false
复制代码

image-20201118161405426

导入页面资料。这个在Elasticsearch概述中留有百度云链接

controller

image-20201118161556687

@Controller
public class IndexController {

    @RequestMapping({"/","/index"})
    public String index(){
        return "index";
    }
}
复制代码

启动项目查看效果~ http:localhost:8080/

image-20201118161733077

Jsoup解析

<!--解析网页。只能解析网页-->
<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.10.2</version>
</dependency>
复制代码

image-20201118162431516

image-20201118162754511

image-20201118165224792

创建utils包,创建HtmlParseUtil类

通过对其网站的分析,就应该能看懂下面的代码了

image-20201118164733249

@Component//就可以使用@Autowired注入。不交给Spring管理就用new它
public class HtmlParseUtil {

    public static void main(String[] args) throws IOException {
        //获取请求。需要联网
        String url = "https://search.jd.com/Search?keyword=java";
        //解析网页。Jsoup返回的Document对象就是浏览器的Document对象
        Document document = Jsoup.parse(new URL(url), 30000);
        //所有在js中Document能进行的操作都能在次操作
        Element element = document.getElementById("J_goodsList");
        System.out.println(element.html());//打印J_goodList标签下的的html源码
        //获取所有的li元素
        Elements elements = element.getElementsByTag("li");
        //获取元素中的所有内容
        for (Element e1 : elements) {
            //图片延迟加载
            String img = e1.getElementsByTag("img").eq(0).attr("data-lazy-img");
            String price = e1.getElementsByClass("p-price").eq(0).text();
            String title = e1.getElementsByClass("p-name").eq(0).text();
            System.out.println("========================================");
            System.out.println(img);
            System.out.println(price);
            System.out.println(title);
        }
    }
}
复制代码

image-20201118164644237

成功获取到相应信息。接着对其进行封装。首先创建一个pojo Content对象

image-20201118164634402

然后封装成一个parseJD方法。

image-20201118165533861

image-20201118165602244

@Component//就可以使用@Autowired注入。不交给Spring管理就用new它
public class HtmlParseUtil {

    public static void main(String[] args) throws IOException {
        new HtmlParseUtil().parseJD("Vue").forEach(System.out::println);
    }


    public ArrayList<Content> parseJD(String keywords) throws IOException {
        //获取请求。需要联网
        String url = "https://search.jd.com/Search?keyword=" + keywords;
        System.out.println(url);
        //解析网页。Jsoup返回的Document对象就是浏览器的Document对象
        Document document = Jsoup.parse(new URL(url), 30000);
        //所有在js中Document能进行的操作都能在次操作
        Element element = document.getElementById("J_goodsList");
        //System.out.println(element.html());
        //获取所有的li元素
        Elements elements = element.getElementsByTag("li");

        ArrayList<Content> goodsList = new ArrayList<>();
        //获取元素中的所有内容
        for (Element e1 : elements) {
            String img = e1.getElementsByTag("img").eq(0).attr("data-lazy-img");
            String price = e1.getElementsByClass("p-price").eq(0).text();
            String title = e1.getElementsByClass("p-name").eq(0).text();

            Content content = new Content();
            content.setImg(img);
            content.setTitle(title);
            content.setPrice(price);
            goodsList.add(content);
        }
        return goodsList;
    }
}
复制代码

业务编写

首先照样配置Elastcisearch的配置类。

image-20201118170012108

@Configuration
public class ElasticSearchClientConfig {

    @Bean
    public RestHighLevelClient restHighLevelClient() {
        RestHighLevelClient client = new RestHighLevelClient(
                RestClient.builder(
                        new HttpHost("localhost", 9200, "http")));
        return client;
    }
}
复制代码

编写service业务类

image-20201118170455428

@Service
public class ContentService {

    @Autowired
    RestHighLevelClient restHighLevelClient;

    //1.把解析导的数据放到es索引中
    public Boolean parseContent(String keywords) throws IOException {
        ArrayList<Content> contents = new HtmlParseUtil().parseJD(keywords);

        //把查询导的数据放入es中
        BulkRequest bulkRequest = new BulkRequest();
        bulkRequest.timeout("2m");//过期时间为两分钟

        for (int i = 0 ;i < contents.size();i++){
            System.out.println(JSON.toJSONString(contents.get(i)));
            bulkRequest.add(new IndexRequest("jd_goods").source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
        }
        BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
        return !bulk.hasFailures();//返回是否插入成功
    }
}
复制代码

编写controoler

image-20201118170914091

@RestController
public class ContentController {

    @Autowired
    ContentService contentService;

    @GetMapping("/parse/{keyword}")
    public Boolean parse(@PathVariable("keyword") String keyword) throws IOException {
        Boolean result = contentService.parseContent(keyword);
        return result;
    }
}
复制代码

启动项目进行测试http://localhost:8080/parse/java

image-20201118170930108

image-20201118171017200

成功添加相关javas商品资料。

接着我们继续编写service,添加分页搜索ES中的数据。

image-20201118171855808

@Service
public class ContentService {

    @Autowired
    RestHighLevelClient restHighLevelClient;

    //1.把解析导的数据放到es索引中
    public Boolean parseContent(String keywords) throws IOException {
        ArrayList<Content> contents = new HtmlParseUtil().parseJD(keywords);

        //把查询导的数据放入es中
        BulkRequest bulkRequest = new BulkRequest();
        bulkRequest.timeout("2m");//过期时间为两分钟

        for (int i = 0 ;i < contents.size();i++){
            System.out.println(JSON.toJSONString(contents.get(i)));
            bulkRequest.add(new IndexRequest("jd_goods").source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
        }
        BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
        return !bulk.hasFailures();//返回是否插入成功
    }

    //2.获取这些数据实现搜索功能
    public List<Map<String,Object>> searchPage(String keyword,int pageNo,int pageSize) throws IOException {
        if (pageNo<=1){
            pageNo = 1;
        }

        //条件搜索
        SearchRequest searchRequest = new SearchRequest("jd_goods");
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();

        //分页
        sourceBuilder.from(pageNo);//起始数据
        sourceBuilder.size(pageSize);//页面大小

        //精准匹配关键字
        TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
        sourceBuilder.query(termQueryBuilder);
        sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));//超时控制

        //执行搜索
        searchRequest.source(sourceBuilder);
        SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

        ArrayList<Map<String,Object>> list = new ArrayList<>();

        SearchHit[] hits = searchResponse.getHits().getHits();//得到hits数组对象
        for (SearchHit documentFields : hits){
            list.add(documentFields.getSourceAsMap());//添加到list中
        }
        return list;
    }
}
复制代码

接着在controller中添加一个请求

image-20201118171942279

@RestController
public class ContentController {

    @Autowired
    ContentService contentService;

    @GetMapping("/parse/{keyword}")
    public Boolean parse(@PathVariable("keyword") String keyword) throws IOException {
        Boolean result = contentService.parseContent(keyword);
        return result;
    }

    @GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
    public List<Map<String,Object>> search(@PathVariable("keyword") String keyword,
                                           @PathVariable("pageNo") int pageNo,
                                           @PathVariable("pageSize") int pageSize) throws IOException {

        List<Map<String, Object>> list = contentService.searchPage(keyword, pageNo, pageSize);
        return list; 
    }
}
复制代码

启动项目进行测试http://localhost:8080/search/java/1/20

image-20201118172147380

前端页面

导入vue 和 axios,我这里使用的是在线版的

<script src="https://cdn.staticfile.org/vue/2.6.2/vue.min.js"></script>

<script src="https://unpkg.com/axios/dist/axios.min.js"></script>

修改我们的index页面。

CleanShot 2020-11-18 at 18.02.39

启动项目查看效果。(我已经解析过了vue数据添加到了es中)

CleanShot 2020-11-18 at 18.09.20

高亮功能

我们在业务类service中修改一下代码。

image-20201118183919496

将高亮中的字段替换添加到_source中的title

image-20201118134906443

@Service
public class ContentService {

    @Autowired
    RestHighLevelClient restHighLevelClient;

    //1.把解析导的数据放到es索引中
    public Boolean parseContent(String keywords) throws IOException {
        ArrayList<Content> contents = new HtmlParseUtil().parseJD(keywords);

        //把查询导的数据放入es中
        BulkRequest bulkRequest = new BulkRequest();
        bulkRequest.timeout("2m");//过期时间为两分钟

        for (int i = 0 ;i < contents.size();i++){
            System.out.println(JSON.toJSONString(contents.get(i)));
            bulkRequest.add(new IndexRequest("jd_goods").source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
        }
        BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
        return !bulk.hasFailures();//返回是否插入成功
    }

    //2.获取这些数据实现搜索功能
    public List<Map<String,Object>> searchPage(String keyword,int pageNo,int pageSize) throws IOException {
        if (pageNo<=1){
            pageNo = 1;
        }

        //条件搜索
        SearchRequest searchRequest = new SearchRequest("jd_goods");
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();

        //分页
        sourceBuilder.from(pageNo);//起始数据
        sourceBuilder.size(pageSize);//页面大小

        //精准匹配关键字
        TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
        sourceBuilder.query(termQueryBuilder);
        sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));//超时控制

        //高亮
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        highlightBuilder.field("title");//高亮字段
        highlightBuilder.requireFieldMatch(false);//关闭多个高亮。例如标题中有多个vue,只高亮一个
        highlightBuilder.preTags("<span style='color:red'>");//前置标签
        highlightBuilder.postTags("</span>");//后置标签
        sourceBuilder.highlighter(highlightBuilder);//加入高亮


        //执行搜索
        searchRequest.source(sourceBuilder);
        SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

        ArrayList<Map<String,Object>> list = new ArrayList<>();

        SearchHit[] hits = searchResponse.getHits().getHits();//得到hits数组对象
        for (SearchHit hit : hits){
            Map<String, HighlightField> highlightFields = hit.getHighlightFields();
            Map<String, Object> sourceAsMap = hit.getSourceAsMap();//原来的结果
            HighlightField title = highlightFields.get("title");
            //解析高亮字段,将原来的字段替换成高亮字段
            if (title!=null){
                Text[] fragments = title.fragments();
                String hTitle = "";
                for (Text text : fragments) {
                     hTitle += text;
                }
                sourceAsMap.put("title",hTitle);//将高亮字段替换原来的内容
            }
            list.add(sourceAsMap);
        }
        return list;
    }
}
复制代码

image-20201118184209351

<p class="productTitle">
    <a v-html="result.title">  </a>
</p>
复制代码

重启服务,访问测试。http://localhost:8080/

CleanShot 2020-11-18 at 18.44.34

完成!

分类:
后端
标签:
分类:
后端
标签:
收藏成功!
已添加到「」, 点击更改