实现爬取百度百科头视频的java代码

777 阅读1分钟

只供参考,产生的问题本人不负责,不做讲解(因为是作业)

package Example.TestAnnotation;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import lombok.SneakyThrows;

import java.io.File;
import java.io.FileOutputStream;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.util.HashMap;

/**
 * 依赖项:
 * fastjson
 * <dependency>
 *         <groupId>com.alibaba</groupId>
 *         <artifactId>fastjson</artifactId>
 *         <version>1.2.62</version>
 *     </dependency>
 * lambok
 *  <dependency>
 *         <groupId>org.projectlombok</groupId>
 *         <artifactId>lombok</artifactId>
 *         <version>1.18.10</version>
 *         <scope>provided</scope>
 *     </dependency>
 */
public class BaiduWiki {
        private File file;
        private long lemmaId;

    /**
     *
     * @param fileName 必须包含存为mp4的名字 例如 C:\Users\dreamlike\Desktop\test.mp4
     * @param lemmaId 例如:https://baike.baidu.com/item/网络爬虫/5162711?fromtitle=%E7%88%AC%E8%99%AB&fromid=22046949中5162711就是lemmaId
     */
        public BaiduWiki(String fileName, long lemmaId) {
            this.file = new File(fileName);
            this.lemmaId = lemmaId;
        }

        @SneakyThrows
        public String getMp4Uri(){
            String s= "https://baike.baidu.com/api/wikisecond/lemmasecond?lemmaId="+lemmaId;
            HttpRequest httpRequest = HttpRequest.newBuilder()
                    .uri(URI.create(s))
                    .build();
            String body = HttpClient.newBuilder()
                    .build()
                    .send(httpRequest, HttpResponse.BodyHandlers.ofString())
                    .body();
            HashMap map = JSON.parseObject(body, HashMap.class);
            String l = (String) ((JSONObject) ((JSONArray) ((JSONObject) map.get("list")).get("同词条")).get(0))
                    .get("playMp4Url");
            return l;
        }
        @SneakyThrows
        public void getMp4File(){
            HttpRequest httpRequest = HttpRequest.newBuilder()
                    .uri(URI.create(getMp4Uri()))
                    .build();
            HttpClient.newHttpClient()
                    .send(httpRequest, HttpResponse.BodyHandlers.ofInputStream())
                    .body()
                    .transferTo(new FileOutputStream(file));
        }

}