1 package util;
2
3 import java.io.BufferedReader;
4 import java.io.File;
5 import java.io.FileNotFoundException;
6 import java.io.FileOutputStream;
7 import java.io.IOException;
8 import java.io.InputStreamReader;
9 import java.net.MalformedURLException;
10 import java.net.URL;
11 import java.net.URLConnection;
12
13 import org.jsoup.Jsoup;
14 import org.jsoup.nodes.Document;
15 import org.jsoup.nodes.Element;
16 import org.jsoup.select.Elements;
17
18 public class DataDownUtil {
19 /**
20 * @author UPO
21 * @param url
22 * @param encoding
23 * @return String 网页的源代码
24 * <a href="http://www.baidu.com">百度</a>
25 */
26 public static String getHtmlResourceByUrl(String url,String encoding){
27 StringBuffer buffer=new StringBuffer();
28 URL urlobj=null;
29 URLConnection uc=null;
30 InputStreamReader isr=null;
31 BufferedReader reader=null;
32 try {
33 //建立网络连接
34 urlobj=new URL(url);
35 //打开网络
36 uc=urlobj.openConnection();
37 //建立文件输入流的对象
38 isr=new InputStreamReader(uc.getInputStream(), encoding);
39 //建立文件缓冲写入流(相当于ctrl+v放入内存中)
40 reader=new BufferedReader(isr);
41
42 //建立临时变量
43 String temp=null;
44 while((temp=reader.readLine())!=null){
45 buffer.append(temp);
46 buffer.append("\n");
47 }
48
49 } catch (MalformedURLException e) {
50 // TODO Auto-generated catch block
51 e.printStackTrace();
52 System.out.println("网络连接不可用");
53 }catch (IOException e) {
54 // TODO Auto-generated catch block
55 e.printStackTrace();
56 System.out.println("网络连接失败");
57 }finally {
58 if(isr!=null){
59 try {
60 isr.close();
61 } catch (IOException e) {
62 // TODO Auto-generated catch block
63 e.printStackTrace();
64 }
65 }
66 }
67 return buffer.toString();
68 }
69 public static String getContext(){
70
71 String context=null;
72 int start=0;
73 while(start>=0&&start<=20){
74
75
76 String url="https://movie.douban.com/subject/3168101/comments?start="+start+"&limit=20&sort=new_score&status=P";
77 String encoding="utf-8";
78 start=start+20;
79 //1.获取网页源代码
80 String html=getHtmlResourceByUrl(url, encoding);
81 //System.out.println(html);
82 //2.
83 Document document=Jsoup.parse(html);
84 Element element=document.getElementById("comments");
85 Elements elements=element.getElementsByClass("comment-item");
86 for (Element ele : elements) {
87 //https://movie.douban.com/subject/3168101/comments?start=20&limit=20&sort=new_score&status=P
88 String name=ele.getElementsByTag("a").last().text();
89 String desc=ele.getElementsByClass("short").text();
90 String time=ele.getElementsByClass("comment-time").text();
91 String votes=ele.getElementsByClass("votes").text();
92 System.out.println("\nname:"+name+"\ndesc:"+desc+"\ntime:"+time+"\nvotes:"+votes);
93 context="\nname:"+name+"\ndesc:"+desc+"\ntime:"+time+"\nvotes:"+votes;
94 }
95 }
96 return context;
97 }
98
99 public static void writeFileByLine(String content,String filePath){
100 File file=new File(filePath);
101 try {
102 FileOutputStream out=new FileOutputStream(file);
103 out.write(content.getBytes());
104 } catch (FileNotFoundException e) {
105 // TODO Auto-generated catch block
106 e.printStackTrace();
107 } catch (IOException e) {
108 // TODO Auto-generated catch block
109 e.printStackTrace();
110 }
111 }
112
113 public static void main(String[] args) {
114 System.out.println("你好阿泡");
115 System.out.println(getContext());
116
117
118
119 }
120
121 }