import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Test14 {
public static void main(String[] args) throws IOException {
String surnameNet = "https://hanyu.baidu.com/shici/detail?pid=0b2f26d4c0ddb3ee693fdb1137ee1b0d&from=kg0";
String boyNet = "http://www.haoming8.cn/baobao/10881.html";
String girlNet = "http://www.haoming8.cn/baobao/7641.html";
String surnameContent = getUrlContent(surnameNet);
ArrayList<String> surnameList = regularCompilation(surnameContent, "(.{4})(,|。)", 1, "姓氏");
System.out.println();
String boyContent = getUrlContent(boyNet);
ArrayList<String> boyNameList = regularCompilation(boyContent, "([\\u4e00-\\u9fa5·]{2})(、)", 1, "男生名字");
System.out.println();
String girlContent = getUrlContent(girlNet);
ArrayList<String> girlNameList = regularCompilation(girlContent, "(.. ){4}..", 0, "女生名字");
System.out.println();
combinationName(surnameList, boyNameList, girlNameList, 30, false);
BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter("E:\\Code\\JavaDemo\\src\\CrawlData"));
for (String s : linkedHashSet) {
bufferedWriter.write(s);
bufferedWriter.newLine();
}
bufferedWriter.close();
}
public static String getUrlContent(String net) throws IOException {
StringBuilder stringBuilder = new StringBuilder();
URL url = new URL(net);
URLConnection urlConnection = url.openConnection();
InputStreamReader inputStreamReader = new InputStreamReader(urlConnection.getInputStream());
int c;
while ((c = inputStreamReader.read()) != -1) {
stringBuilder.append((char) c);
}
inputStreamReader.close();
return stringBuilder.toString();
}
private static ArrayList<String> regularCompilation(String str, String regex, int index, String logTest) {
ArrayList<String> arrayList = new ArrayList<>();
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(str);
while (matcher.find()) {
arrayList.add(matcher.group(index));
}
System.out.println("该" + logTest + "为:");
System.out.println(arrayList);
return arrayList;
}
private static LinkedHashSet<String> combinationName(ArrayList<String> surnameList, ArrayList<String> boyNameList, ArrayList<String> girlNameList, int nameQuantity, boolean isBoy) {
LinkedHashSet<String> linkedHashSet = new LinkedHashSet<>();
ArrayList<String> singleSurnameList = new ArrayList<>();
for (String s : surnameList) {
for (int i = 0; i < s.length(); i++) {
singleSurnameList.add(String.valueOf(s.charAt(i)));
}
}
ArrayList<String> singleGirlNameList = new ArrayList<>();
for (String s : girlNameList) {
String[] names = s.split(" ");
for (String name : names) {
singleGirlNameList.add(name);
}
}
while (linkedHashSet.size() < nameQuantity) {
Collections.shuffle(singleSurnameList);
if (isBoy) {
Collections.shuffle(boyNameList);
linkedHashSet.add(singleSurnameList.get(0) + boyNameList.get(0));
} else {
Collections.shuffle(singleGirlNameList);
linkedHashSet.add(singleSurnameList.get(0) + singleGirlNameList.get(0));
}
}
System.out.println(linkedHashSet);
return linkedHashSet;
}
}