import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RegexWeb
{
/**
* 网页爬虫
*/
public static void main(String[] args) throws Exception
{
//URL
String str_url = "http://tieba.baidu.com/p/2314539885";
//规则
//String regex = "\\w+@\\w+\\.[a-zA-Z]{2,3}";
String regex = "(\\w)+(\\.\\w+)*@(\\w)+((\\.\\w{2,3}){1,3})";
regexForWeb(str_url,regex);
}
private static void regexForWeb(String str_url,String regex) throws Exception
{
URL url = new URL(str_url);
//打开URL连接
URLConnection conn = url.openConnection();
//设置网络连接时间
conn.setConnectTimeout(1000*10);
//读取指定网络地址中的文件
BufferedReader buf = new BufferedReader(new InputStreamReader(conn.getInputStream()));
//把正则表达式转换成正则对象
Pattern p = Pattern.compile(regex);
//每行读取的内容
String line = null;
while((line=buf.readLine())!=null){
//Pattern对象转换成Matcher对象,操作字符串
Matcher m = p.matcher(line);
//部分匹配
while(m.find()){
//返回匹配成功的部分
System.out.println(m.group());
}
}
}
}