package com.imooc.regex;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStreamReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RegexSample {
public static void main(String[] args) {
StringBuilder content = new StringBuilder();
try {
FileInputStream fis = new FileInputStream("D:\\eclipse-workspace\\regex\\WebContent\\sample.html");
InputStreamReader isr = new InputStreamReader(fis,"UTF-8");
BufferedReader bufferedReader = new BufferedReader(isr);
String lineText = "";
while((lineText=bufferedReader.readLine()) !=null ) {
content.append(lineText + "\n");
}
bufferedReader.close();
// System.out.println(content);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
//创建正则表达式对象
Pattern p = Pattern.compile("<li>([\\u4e00-\\u9fa5]{2,8})([a-zA-Z]+)</li>");
//匹配正则表达式
Matcher m = p.matcher(content);
//查找匹配结构
while(m.find()) {
System.out.println(m.group(0));//打印全部
System.out.println(m.group(1));//打印第一个分组
System.out.println(m.group(2));//打印第二个分组
}
}
}