ES-搜索实战

259 阅读1分钟

成果

YZV46GM9.png

环境搭建

1. 导入依赖

<dependencies>

		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-web</artifactId>
			<version>2.3.5.RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
		</dependency>

		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-thymeleaf</artifactId>
		</dependency>

		<dependency>
			<groupId>org.projectlombok</groupId>
			<artifactId>lombok</artifactId>
			<optional>true</optional>
		</dependency>

		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-test</artifactId>
			<scope>test</scope>
		</dependency>

		<dependency>
			<groupId>com.rover12421</groupId>
			<artifactId>fastjson</artifactId>
			<version>1.2.47</version>
		</dependency>

		<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
		<dependency>
			<groupId>org.jsoup</groupId>
			<artifactId>jsoup</artifactId>
			<version>1.13.1</version>
		</dependency>

	</dependencies>

2. 编写配置文件

server.port=9090
spring.thymeleaf.cache=false

3. 创建对象

@Configuration
public class ElasticSearch_Config {
    @Bean
    public RestHighLevelClient restHighLevelClient(){
        RestHighLevelClient client = new RestHighLevelClient(
                RestClient.builder(
                        new HttpHost("localhost", 9200, "http")));
        return client;
    }
}
@Data
@AllArgsConstructor
@NoArgsConstructor
public class Content {
    private String img;
    private String price;
    private String title;
}

爬取数据

1.编写工具类

public class HtmlPraseUtil {
    

    public List<Content> parseJD(String keyword) throws IOException {
        List<Content> list=new ArrayList<>();
        // 设置目标url,从jd爬取
        String url= "https://search.jd.com/Search?keyword="+keyword;
        // 解析网页,document代表页面对象
        Document document = Jsoup.parse(new URL(url), 30000);
        //System.out.println(document.html());
        Element element = document.getElementById("J_goodsList");
        Elements li_elements = element.getElementsByTag("li");
        for (Element e: li_elements) {
            // 由于懒加载无法获取图片
            //String img = e.getElementsByTag("img").eq(0).attr("src");
            // 通过打印html获取img图片所在的属性,使用时为data-lazy-img
            String img = e.getElementsByTag("img").eq(0).attr("data-lazy-img");
            String price =e.getElementsByClass("p-price").eq(0).text();
            String title =e.getElementsByClass("p-name").eq(0).text();

            list.add(new Content(img,price,title));
        }
        return list;
    }
}

2.在Service层调用

@Service
public class ContentService {
    @Resource(name = "restHighLevelClient")
    private RestHighLevelClient client;
    public Boolean parseContent(String key)throws IOException{
        List<Content> list= new HtmlPraseUtil().parseJD(key);
        BulkRequest bulkRequest=new BulkRequest();
        bulkRequest.timeout("2m");
        //将爬取的数据放入ES中
        for (int i=0;i<list.size();i++){
            bulkRequest.add(new IndexRequest("jd_goods")
            .source(JSON.toJSONString(list.get(i)), XContentType.JSON));
        }
        return !client.bulk(bulkRequest, RequestOptions.DEFAULT).hasFailures();
    }
}

3. 在Controller层负责跳转

@RestController
public class ContentController {
    @Autowired
    ContentService service;
    @RequestMapping("/prase/{keyword}")
    public Boolean prase(@PathVariable String keyword) throws IOException {
        return service.parseContent(keyword);

    }
}

功能实现

Service层

在Service层添加方法

public List<Map<String,Object>> searchpage(String keyword,int pageNo,int pageSize) throws IOException {
        if (pageNo<1)
            pageNo=1;
        SearchRequest searchRequest=new SearchRequest("jd_goods");
        SearchSourceBuilder sourceBuilder=new SearchSourceBuilder();

        //分页
        sourceBuilder.from(pageNo);
        sourceBuilder.size(pageSize);
        // 条件构建
        QueryBuilder term= QueryBuilders.termQuery("title",keyword);
        sourceBuilder.query(term);
        sourceBuilder.timeout(new TimeValue(60,TimeUnit.SECONDS));

        //高亮
        HighlightBuilder highlightBuilder=new HighlightBuilder();
        highlightBuilder.requireFieldMatch(true)
                .field("title")
                .preTags("<span style='color:red'>")
                .postTags("</span>");
        sourceBuilder.highlighter(highlightBuilder);
        // 请求处理
        searchRequest.source(sourceBuilder);
        SearchResponse search = client.search(searchRequest, RequestOptions.DEFAULT);
        List<Map<String,Object>> list=new ArrayList<>();
        for (SearchHit searchHit :search.getHits().getHits()){
            Map<String, HighlightField> highlightFields = searchHit.getHighlightFields();
            Map<String, Object> sourceAsMap = searchHit.getSourceAsMap();
            HighlightField title = highlightFields.get("title");
            System.out.println("Title"+title);
            if (title!=null){
                Text[] fragments = title.fragments();
                String new_title="";
                for (Text text:fragments)
                    new_title+=text;
                System.out.println("==============================");
                System.out.println("NEW"+new_title);
                sourceAsMap.put("title",new_title);
            }

            list.add(sourceAsMap);
        }

        return list;
    }

Controller层

Controller层调用,跳转

@RestController
public class ContentController {
    @Autowired
    ContentService service;
    @RequestMapping("/prase/{keyword}")
    public Boolean prase(@PathVariable String keyword) throws IOException {
        return service.parseContent(keyword);

    }
    @RequestMapping("/prase/{keyword}/{pageNo}/{pageSize}")
    public List<Map<String,Object>>  search(@PathVariable("keyword") String keyword,
                                            @PathVariable("pageNo") int pageNo,
                                            @PathVariable("pageSize") int pageSize) throws IOException {
        return service.searchpage(keyword, pageNo, pageSize);
    }
}

前后端交互

简单的界面,没有排版

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
<div id="page">
<div id="search">
    <input v-model="keyword" type="text">
    <button @click.prevent="searchKey" >搜索</button>
</div>
<div id="content">
    <div class="product" v-for="result in results">
        <ul>
            <li>
                <div class="">
                    <div class="p-img">
                        <a>
                            <img :src="result.img">
                        </a>
                    </div>

                    <div class="p-price">
                        <a>{{ result.price }}</a>
                    </div>
                    <div class="p-name">
                        <a v-html="result.title"></a>
                    </div>
                </div>
            </li>
        </ul>
    </div>
</div>
</div>

<script src="https://cdn.staticfile.org/vue/2.2.2/vue.min.js"></script>
<script src="https://cdn.staticfile.org/axios/0.18.0/axios.min.js"></script>
<script>
    new Vue({
        el:'#page',
        data:{
            keyword:'',
            results:[]
        },
        methods:{
          searchKey(){
              var key=this.keyword;
              axios.get('/prase/'+key+'/1/10').then(response=>{
                  this.results=response.data;
              });
          }
        }
    })
</script>
</body>
</html>