public static void main(String[] args) throws ClientProtocolException, IOException {
String html = SimpleHttpClient.get("https://www.kuaidi100.com/network/plist.shtml");
Pattern pattern = Pattern.compile("<a href=\"(.*?)\" target=\"_blank\"><h4>.*?</h4><b>(.*?)</b>");
Matcher matcher = pattern.matcher(html);
JSONObject json = new JSONObject();
while (matcher.find()) {
String url = matcher.group(1);
JSONObject item= parse(url);
if(item != null){
json.put(item.getString("key"), item);
}
else{
System.out.println(url);
}
}
System.out.println(json);
}
public static JSONObject parse(String url) throws ClientProtocolException, IOException{
String html = SimpleHttpClient.get(url);
Pattern pattern = Pattern.compile("<div class=\"ex-title\">[\\s\\S]*?<h1>(.*?)</h1>[\\s\\S]*?<input type=\"hidden\" id=\"companyCode\" value=\"(.*?)\" />[\\s\\S]*?<font id=\"allcompanytel\" class=\"tel-icon\" title=\"拨打客服电话\">(.*?)</font> <a target=\"_blank\" rel=\"nofollow\" id=\"allcompanyurl\" class=\"url-icon\" title=\"访问官网\" href=\"(.*?)\">.*?</a> <a target=\"_blank\" class=\"net-icon\" rel=\"nofollow\" id=\"serversite\" title=\"查看快递网点\" href=\".*?\">服务网点</a>[\\s\\S]*?</div>");
Matcher matcher = pattern.matcher(html);
if (matcher.find()) {
String name = matcher.group(1);
String key = matcher.group(2);
String tel = matcher.group(3);
String site = matcher.group(4);
JSONObject item = new JSONObject();
item.put("key", key);
item.put("name", name);
item.put("tel", tel);
item.put("site", site);
return item;
}
return null;
}