1 import java.io.IOException;
2
3 import org.jsoup.Jsoup;
4 import org.jsoup.nodes.Document;
5
6 /**
7 * 解析知网文章的页面内容的代码
8 *
9 */
10 public class Kns50onepage {
11 public static void main(String[] args) throws IOException {
12
13 String url="http://www.cfed.cnki.net/kns50/detail.aspx?filename=GLXB201301003&dbname=CFJD2013&filetitle=%E7%9B%8A%E7%BB%84%E7%BB%87%E8%A1%8C%E4%B8%BA%E4%B8%8E%E6%8D%9F%E7%BB%84%E7%BB%87%E8%A1%8C%E4%B8%BA%3a%E4%B8%AD%E5%9B%BD%E7%89%B9%E5%BE%81%E7%9A%84%E8%A7%92%E8%89%B2%E5%A4%96%E8%A1%8C%E4%B8%BA%E6%A8%A1%E5%9E%8B%E5%8F%8A%E5%85%B6%E7%BB%8F%E9%AA%8C%E5%AE%9E%E8%AF%81";
14 getContentByJsoup(url);
15 //getLinksByJsoup(divContent);
16
17 }
18 public static void getContentByJsoup(String url){
19 //解析整个网页
20 String content="";
21 try {
22 Document doc=Jsoup.connect(url)
23 .data("jquery", "java")
24 .userAgent("Mozilla")
25 .cookie("auth", "token")
26 .timeout(50000)
27 .get();
28
29 content=doc.toString();
30 } catch (IOException e) {
31 e.printStackTrace();
32 }
33
34 Document doc=Jsoup.parse(content);
35
36 String title=doc.select("span.datatitle").get(1).text();
37 System.out.println("标题:"+title);
38
39 String author=doc.select("td").text().split("【作者】")[1].split("【")[0];
40 System.out.println("作者:"+author);
41
42 String summary=doc.select("td").text().split("【中文摘要】")[1].split("【")[0];
43 System.out.println("中文摘要:"+summary);
44
45
46 }
47
48
49 }