经常会遇见一些同步数据、或者是某些爬虫爬来的数据、上游 API 传来的地址格式不同的问题。写了一个简单的地址解析起。为了兼容一些老系统,采用 JDK1.7 写的。
import com.xxx.Address;
import org.apache.shiro.util.CollectionUtils;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class AddressResolutionUtil {
private final static String[][] provinceAndCityAndDistrictsRegexWrapper = {{"省", "自治区", "行政区", "市"}, // 省
{"市", "自治州", "地区", "行政单位", "盟", "县"}, // 市
{"市", "县", "区", "旗", "海域", "岛"} // 区
};
/**
* 解析一段规范的地址为Address 类 并去除详细地址内的重复省市区 采用定长数组来压榨性能 省市区必须为规范地址,例如: 山东省青岛市黄岛区
*
* @param add 源字符串
* @param withSuffix 是否需要后缀 例如 山东省是否需要省
* @return 地址类
* @see AddressMatchIndexAndKey
*/
public static Address resolveAddress(String add, boolean withSuffix) {
add = add.replaceAll(" ", ""); // 清理地址格式
String[] proAndCityAndDistrictsFullName = new String[3];
String[] proAndCityAndDistricts = new String[3];
for (int i = 0; i < provinceAndCityAndDistrictsRegexWrapper.length; i++) { // 省市区
List<AddressMatchIndexAndKey> allMatchKey = new ArrayList<>();
for (String key : provinceAndCityAndDistrictsRegexWrapper[i]) // 检测是否符合key
if (add.contains(key) && (add.indexOf(key) > 1)) // 是否包含key
allMatchKey.add(new AddressMatchIndexAndKey(key, add.indexOf(key))); // 所有符合条件的集合
if (!CollectionUtils.isEmpty(allMatchKey)) {
AddressMatchIndexAndKey minIndexAndKey = Collections.min(allMatchKey); // 获取最接近的key
if (!withSuffix) // 兼容with Suffix 条件 第一次判断是否suffix
proAndCityAndDistrictsFullName[i] = add.substring(0,
minIndexAndKey.getIndex() + minIndexAndKey.getKey().length());
proAndCityAndDistricts[i] = add.substring(0,
withSuffix ? minIndexAndKey.getIndex() + minIndexAndKey.getKey().length()
: minIndexAndKey.getIndex()); // 根据withSuffix 判断是否需要后缀,例如 山东省 是否需要省 第二次判断是否suffix
add = add.substring(minIndexAndKey.getIndex() + minIndexAndKey.getKey().length());
}
}
Address address = Address.of(proAndCityAndDistricts[0], proAndCityAndDistricts[1], proAndCityAndDistricts[2]);
for (String key : withSuffix ? proAndCityAndDistricts : proAndCityAndDistrictsFullName) { // 第三次判断是否suffix
if (key != null)
add = add.replace(key, "");
}
address.setDetail(add);
return address;
}
static class AddressMatchIndexAndKey implements Comparable<AddressMatchIndexAndKey> {
private String key;
private Integer index;
public AddressMatchIndexAndKey(String key, Integer index) {
this.key = key;
this.index = index;
}
public String getKey() {
return key;
}
public void setKey(String key) {
this.key = key;
}
public Integer getIndex() {
return index;
}
public void setIndex(Integer index) {
this.index = index;
}
@Override
public int compareTo(AddressMatchIndexAndKey o) {
return Integer.compare(index, o.index);
}
}
}
Address 类
public class Address {
private String province;
private String city;
private String region;
private String detail;
public static Address of(String province,String city,String region){
Address address = new Address();
address.setProvince(province);
address.setCity(city);
address.setRegion(region);
return address;
}
public static Address empty(){
Address address = new Address();
address.setProvince("");
address.setCity("");
address.setRegion("");
address.setDetail("");
return address;
}
public String getDetail() {
return detail;
}
public void setDetail(String detail) {
this.detail = detail;
}
public String getProvince() {
return province;
}
public void setProvince(String province) {
this.province = province;
}
public String getCity() {
return city;
}
public void setCity(String city) {
this.city = city;
}
public String getRegion() {
return region;
}
public void setRegion(String region) {
this.region = region;
}