package zhou;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class JsoupTest {
/**
* 获取博客上的文章标题和链接
*/
public void article() {
Document doc;
try {
doc = Jsoup.connect("http://www.cnblogs.com/zyw-205520/").get();
Elements ListDiv = doc.getElementsByAttributeValue("class","postTitle");//获取指定class
for (Element element :ListDiv) {
Elements links = element.getElementsByTag("a");
for (Element link : links) {
String linkHref = link.attr("href");
String linkText = link.text().trim();
System.out.println(linkHref);
System.out.println(linkText);
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 获取指定博客文章的内容
*/
//@Test
public void Blog() {
Document doc;
try {
doc = Jsoup.connect("http://www.cnblogs.com/zyw-205520/archive/2012/12/20/2826402.html").get();
Elements ListDiv = doc.getElementsByAttributeValue("class","postBody");
for (Element element :ListDiv) {
System.out.println(element.html());
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//@Test
public void souLaoLai(){
int count = 0;
try{
OutputStream os=new FileOutputStream("D:\\tempSql.sql");
BufferedWriter bw=new BufferedWriter(new OutputStreamWriter(os,"GBK"));
String allsql = "";
System.out.println("开始");
for(int i =1;i<=132;i++){
String phpUrl = "http://123.57.67.94:8080/lxth/forum.php?mod=forumdisplay&fid=36&page="+i;
Document doc = Jsoup.connect(phpUrl).timeout(5000).get();
Elements e1 = doc.getElementById("wp").getElementById("threadlist").getElementsByTag("th").select("a");
String tempStr = e1.text();
tempStr = tempStr.replace("New", "");
String []temparray = tempStr.split("姓名");
for (int j = 1; j < temparray.length; j++) {
String tempStr2 = temparray[j];
if(tempStr2.indexOf("身份证号")>0){
String []temparrya2 = tempStr2.split("身份证号");
String name = "";
String cardid = "";
if(i==11){
name = temparrya2[0].replace(":","").replace(":", "").trim();
cardid = temparrya2[1].replace(":","").replace(":", "").trim();
}else{
name = temparrya2[0].replace(":","").replace(":", "").trim();
cardid = temparrya2[1].replace(":","").replace(":", "").trim();
}
String sql = "insert into BLACKLIST_INFO (ID, BLACKLIST_USER, CARD_ID, PROVINCE, ADDRESS, WORK_ADDRESS, USER_PHONE, LOAN_DATE, PAY_DATE, OVER_TIME, " +
"UPDATE_DATE, STATISTICAL_DATE, OVER_DAYS, OVER_NUMBER, OVER_MONEY, PAY_CAPITAL, OVER_TYPE, DATA_SOURCE, LOAN_CLIENT, LOAN_TYPE," +
" NETWORK_LINK, REMARK1, REMARK2, EMAIL, LELIEVE, CREATE_DEPT_ID, CREATE_DEPT_NAME, CREATE_NAME_ID, CREATE_NAME, SOURCE_TYPE)" +
" values (SEQ_BLACKLIST_INFO.Nextval, '"+name+"', '"+cardid+"', null, null, null, null, null, null, null, to_date(to_char(sysdate,'dd-mm-yyyy hh24:mi:ss'), 'dd-mm-yyyy hh24:mi:ss'), null, null, null, null, null, '0', 3, '老赖网', '不详', '"+phpUrl+"', null, null, null, null, null, null,null, null, null);";
sql = sql.replace("&", "&&");
allsql +=sql+"\r\n";
count ++;
System.out.println(sql);
}
}
}
// bw.write(allsql);
System.out.println("共:"+count+"条");
}catch (Exception e) {
System.out.println(count);
e.printStackTrace();
// TODO: handle exception
}
}
/**
* 解析网页数据
*/
//@Test
public void xiaokeai() {
Document doc;
try {
for (int i = 1; i < 165; i++) {
doc = Jsoup.connect("http://www.jiedai.cn/blacklist/"+i+".html").get();
Elements links = doc.getElementsByAttributeValue("class","black_item");//获取指定的class
for (Element element :links) {
System.out.println(element.text().trim().substring(element.text().trim().indexOf("姓名:")+"姓名:".length(),element.text().trim().indexOf("手机:")).trim());
System.out.println(element.text().trim().substring(element.text().trim().indexOf("身份:")+"身份:".length(),element.text().trim().indexOf("地址:")).trim());
System.out.println(element.text().trim().substring(element.text().trim().indexOf("手机:")+"手机:".length(),element.text().trim().indexOf("身份:")).trim());
System.out.println(element.text().trim().substring(element.text().trim().indexOf("地址:")+"地址:".length(),element.text().trim().indexOf("申报情况:")).trim());
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
}