网页爬虫

/*
    网页爬虫
*/

import java.io.*;
import java.net.*;
import java.util.regex.*;

class RegexTest2
{
    public static void main(String[] args) throws Exception
    {
        getMails();
    }

    public static void getMails() throws Exception
    {
        URL url = new URL("要抓取的内容");
        URLConnection conn = url.openConnection();
        BufferedReader bufr = new BufferedReader(new InputStreamReader(conn.getInputStream()));

        String line = null;
        String mailReg = "正则表达式";

        Pattern p = Pattern.compile(mailReg);

        while((line = bufr.readLine()) != null){
            Matcher m = p.matcher(line);
            while(m.find()){
                System.out.println(m.group());
            }
        }
    }
}
posted @ 2014-07-23 18:37  Nophy  阅读(396)  评论(0)    收藏  举报