import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 爬取网站上的邮箱
* https://book.douban.com/subject/24753651/discussion/58975313
* @author He
*/
public class GetEmail {
public static void main(String[] args) throws Exception {
//实例化URL类
URL url=new URL("https://book.douban.com/subject/24753651/discussion/58975313");
//取得链接
URLConnection conn = url.openConnection();
//取得网页数据
BufferedReader bufIn = new BufferedReader(new InputStreamReader(conn.getInputStream()));
//声明循环结束标记
String line=null;
//声明正则
String emailReg="\\w+@\\w+(\\.\\w+)+";
//将正则表达式封装成对象patttern
Pattern p = Pattern.compile(emailReg);
//循环读取网页数据
while ((line=bufIn.readLine())!=null){
//让正则对象和要操作的数据相关联,获取正则匹配引擎。
Matcher m = p.matcher(line);
//循环查询匹配
while (m.find()){
//打印匹配后的结果
System.out.println(m.group());
}
}
}
}