成果
环境搭建
1. 导入依赖
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
<version>2.3.5.RELEASE</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.rover12421</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.47</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
</dependencies>
2. 编写配置文件
server.port=9090
spring.thymeleaf.cache=false
3. 创建对象
@Configuration
public class ElasticSearch_Config {
@Bean
public RestHighLevelClient restHighLevelClient(){
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("localhost", 9200, "http")));
return client;
}
}
@Data
@AllArgsConstructor
@NoArgsConstructor
public class Content {
private String img;
private String price;
private String title;
}
爬取数据
1.编写工具类
public class HtmlPraseUtil {
public List<Content> parseJD(String keyword) throws IOException {
List<Content> list=new ArrayList<>();
// 设置目标url,从jd爬取
String url= "https://search.jd.com/Search?keyword="+keyword;
// 解析网页,document代表页面对象
Document document = Jsoup.parse(new URL(url), 30000);
//System.out.println(document.html());
Element element = document.getElementById("J_goodsList");
Elements li_elements = element.getElementsByTag("li");
for (Element e: li_elements) {
// 由于懒加载无法获取图片
//String img = e.getElementsByTag("img").eq(0).attr("src");
// 通过打印html获取img图片所在的属性,使用时为data-lazy-img
String img = e.getElementsByTag("img").eq(0).attr("data-lazy-img");
String price =e.getElementsByClass("p-price").eq(0).text();
String title =e.getElementsByClass("p-name").eq(0).text();
list.add(new Content(img,price,title));
}
return list;
}
}
2.在Service层调用
@Service
public class ContentService {
@Resource(name = "restHighLevelClient")
private RestHighLevelClient client;
public Boolean parseContent(String key)throws IOException{
List<Content> list= new HtmlPraseUtil().parseJD(key);
BulkRequest bulkRequest=new BulkRequest();
bulkRequest.timeout("2m");
//将爬取的数据放入ES中
for (int i=0;i<list.size();i++){
bulkRequest.add(new IndexRequest("jd_goods")
.source(JSON.toJSONString(list.get(i)), XContentType.JSON));
}
return !client.bulk(bulkRequest, RequestOptions.DEFAULT).hasFailures();
}
}
3. 在Controller层负责跳转
@RestController
public class ContentController {
@Autowired
ContentService service;
@RequestMapping("/prase/{keyword}")
public Boolean prase(@PathVariable String keyword) throws IOException {
return service.parseContent(keyword);
}
}
功能实现
Service层
在Service层添加方法
public List<Map<String,Object>> searchpage(String keyword,int pageNo,int pageSize) throws IOException {
if (pageNo<1)
pageNo=1;
SearchRequest searchRequest=new SearchRequest("jd_goods");
SearchSourceBuilder sourceBuilder=new SearchSourceBuilder();
//分页
sourceBuilder.from(pageNo);
sourceBuilder.size(pageSize);
// 条件构建
QueryBuilder term= QueryBuilders.termQuery("title",keyword);
sourceBuilder.query(term);
sourceBuilder.timeout(new TimeValue(60,TimeUnit.SECONDS));
//高亮
HighlightBuilder highlightBuilder=new HighlightBuilder();
highlightBuilder.requireFieldMatch(true)
.field("title")
.preTags("<span style='color:red'>")
.postTags("</span>");
sourceBuilder.highlighter(highlightBuilder);
// 请求处理
searchRequest.source(sourceBuilder);
SearchResponse search = client.search(searchRequest, RequestOptions.DEFAULT);
List<Map<String,Object>> list=new ArrayList<>();
for (SearchHit searchHit :search.getHits().getHits()){
Map<String, HighlightField> highlightFields = searchHit.getHighlightFields();
Map<String, Object> sourceAsMap = searchHit.getSourceAsMap();
HighlightField title = highlightFields.get("title");
System.out.println("Title"+title);
if (title!=null){
Text[] fragments = title.fragments();
String new_title="";
for (Text text:fragments)
new_title+=text;
System.out.println("==============================");
System.out.println("NEW"+new_title);
sourceAsMap.put("title",new_title);
}
list.add(sourceAsMap);
}
return list;
}
Controller层
Controller层调用,跳转
@RestController
public class ContentController {
@Autowired
ContentService service;
@RequestMapping("/prase/{keyword}")
public Boolean prase(@PathVariable String keyword) throws IOException {
return service.parseContent(keyword);
}
@RequestMapping("/prase/{keyword}/{pageNo}/{pageSize}")
public List<Map<String,Object>> search(@PathVariable("keyword") String keyword,
@PathVariable("pageNo") int pageNo,
@PathVariable("pageSize") int pageSize) throws IOException {
return service.searchpage(keyword, pageNo, pageSize);
}
}
前后端交互
简单的界面,没有排版
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<div id="page">
<div id="search">
<input v-model="keyword" type="text">
<button @click.prevent="searchKey" >搜索</button>
</div>
<div id="content">
<div class="product" v-for="result in results">
<ul>
<li>
<div class="">
<div class="p-img">
<a>
<img :src="result.img">
</a>
</div>
<div class="p-price">
<a>{{ result.price }}</a>
</div>
<div class="p-name">
<a v-html="result.title"></a>
</div>
</div>
</li>
</ul>
</div>
</div>
</div>
<script src="https://cdn.staticfile.org/vue/2.2.2/vue.min.js"></script>
<script src="https://cdn.staticfile.org/axios/0.18.0/axios.min.js"></script>
<script>
new Vue({
el:'#page',
data:{
keyword:'',
results:[]
},
methods:{
searchKey(){
var key=this.keyword;
axios.get('/prase/'+key+'/1/10').then(response=>{
this.results=response.data;
});
}
}
})
</script>
</body>
</html>