1 package demo;
2
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.io.InputStream;
6 import java.io.InputStreamReader;
7
8 /**
9 * web页面内容抓取
10 * @author sy
11 *
12 */
13 public class GrabWebHtml{
14
15 public static void main(String[] args) {
16 String url="http://www.baidu.com";
17 System.out.println(getWebHtml(url));
18 }
19
20 public static String getWebHtml(String domain) {
21 StringBuffer sb = new StringBuffer();
22 InputStream is = null;
23 InputStreamReader isr = null;
24 BufferedReader in = null;
25 try {
26 java.net.URL url = new java.net.URL(domain);
27 is = url.openStream();
28 isr = new InputStreamReader(is,"utf-8");
29 in = new BufferedReader(isr);
30 String line;
31 while ((line = in.readLine()) != null) {
32 sb.append(line).append("\n");
33 }
34 in.close();
35
36 } catch (IOException e) {
37 e.printStackTrace();
38 }finally {
39 try {
40 if(in!=null){
41 in.close();
42 in=null;
43 }
44 if(isr!=null){
45 isr.close();
46 isr=null;
47 }
48 if(is!=null){
49 is.close();
50 is=null;
51 }
52 } catch (IOException e) {
53 e.printStackTrace();
54 }
55 }
56 return sb.toString();
57 }
58
59 }