统计局街道信息jsoup方式获取

279 阅读1分钟
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.*;
import java.nio.charset.StandardCharsets;

public class hwztest {
    public static void main(String[] args) throws IOException {
        //6.Jsoup解析html
        String url = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2020/41/";
        Document document ;
        FileOutputStream fos=new FileOutputStream("henanstreet.txt");
        for(int i=90;i<91;i++){
            document = Jsoup.connect(url+"41"+i+".html").get();
//        System.out.println(document.getElementsByClass("towntr"));
            Elements eles = document.getElementsByClass("countytr");
//        Document containerDoc = Jsoup.parse(eles.toString());
//        System.out.println(containerDoc);



            for(Element countytr :eles){
                Element street = countytr.getElementsByAttribute("href").first();
                Elements countytds  = countytr.getElementsByTag("td");
                String areaCode = countytds.get(0).text().substring(0,6);
                String streeturl = countytr.getElementsByTag("td").first().getElementsByTag("a").attr("href");
                System.out.println(streeturl);
                if(StringUtils.isNotBlank(streeturl)){
                    Document document1 = Jsoup.connect(url+"/"+streeturl).get();
                    Elements elestreets = document1.getElementsByClass("towntr");
                    for(Element towntr :elestreets){
                        Elements tds = towntr.getElementsByTag("td");
                        //area的街道插入sql
                        fos.write(("INSERT INTO `business_user_group`(`user_group_id`, `parent_id`, `user_group_name`, `user_group_level`) VALUES ('"+tds.get(0).text()+"', '"+areaCode+"', '"+tds.get(1).text()+"', 5);\n").getBytes(StandardCharsets.UTF_8));
//                    fos.write(("DELETE FROM `business_user_group` WHERE `user_group_id` = '"+tds.get(0).text()+"';\n").getBytes(StandardCharsets.UTF_8));                    }
                }

            }
        }

    }
}

将国家统计局河南省下的街道信息读取后,生成对应的mysql插入语句,批量一次性插入数据库。

注:读取完成之后,检查一遍sql语句,有先生僻字会存在字符编码乱码的问题,得自己重新编辑一下