//自己看教程写的代码,感觉涵盖了正则大部分的知识点,基本够用于平常工作(一些地方的注释写给自己看的,可以略过:D)
package basic;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.Arrays;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 正则表达式
* <ul>
* <li>\d (digital) \D (none digital)</li>
* <li>\s (space) \S (none space)</li>
* <li>\w (word) \W (none word)</li>
* <li>. (anyone)</li>
* </ul>
* <ul>
* <li>? (zero or one)</li>
* <li>+ (one or more)</li>
* <li>* (zero or more)</li>
* <li>{n,m} (n<= and =<m)</li>
* </ul>
* <ul>
* 后向引用见splitDemo() matcherDemo() demo1() demo2()
* <li>content.replaceAll("<courseId:([0-9]+)>", "<cms:course st='name' uid='$1'/>")(<换成<可以正常显示)</li>
* </ul>
* @author Tank
* @date 2016年3月10日
*/
public class RegexDemo {
private static String EMAIL_REGEX = "[a-zA-Z0-9_]+@[a-zA-Z0-9]+(\\.[a-zA-Z]+){1,3}";
public static void main(String[] args) throws IOException {
// System.out.println(isQQ("0124309"));
// System.out.println(isEmail("asdf@q1q.com.cn"));
// System.out.println(isTel("1321234123a"));
// splitDemo();
// replaceDemo();
// matcherDemo();
// demo1();
// demo2();
demo3();
}
/**
* 校验QQ号
* <br>
* 5~15位,0不能开头,只能是数字
*
* @param s
* @return boolean
*/
public static boolean isQQ(String str){
// String regex = "[1-9][0-9]{4,14}";
String regex = "[1-9]\\d{4,14}";
return str.matches(regex);
}
/**
* 校验邮箱
* <br>
* (大小写字母和数字)@(小写字母).(小写字母和数字).(小写字母)
*
* @param str
* @return
*/
public static boolean isEmail(String str){
String regex = EMAIL_REGEX;//精确匹配
return str.matches(regex);
}
/**
* 校验手机号
* <br>
* 13x xxxx xxxx, 15x xxxx xxxx, 18x xxxx xxxx
*
* @param str
* @return
*/
public static boolean isTel(String str){
String regex = "1[358]\\d{9}";
return str.matches(regex);
}
/**
* 按多空格进行切割
*
*/
public static void splitDemo(){
/*
* 多空格切割
String str = "123asdf asuz asdfwq asfq2asdf s asdf asf asdf ds";
String regex = " +";
*/
/*
* 小数点切割
String str = "asdfasd.zdfasd.asdf.asdfzzz.adfa.a.a.aa.as";
String regex = "\\.";
// .表示任意字符 \.表示字符. \\.代码反斜杠转义
*/
/*
* 反斜杠切割
String str = "c:\\abc\\a.txt";
String regex = "\\\\";
*/
/*
* 叠词切割
*/
String str = "asdffasddfzxxxc";
String regex = "(.)\\1+";
//引用
String[] ss = str.split(regex);
System.out.println(Arrays.toString(ss));
}
/**
* 将叠词替换成单个字母(后向引用)
*
*/
public static void replaceDemo(){
String str = "asdffasddfzxxxc";
String regex = "(.)\\1+";
String newStr = "$1";
str = str.replaceAll(regex, newStr);
System.out.println(str);
}
/**
* 将文本中三个字母的单词返回
* <br>
* 正则对象,匹配器,匹配查找
* <br>
* \b 单词边界
*/
public static void matcherDemo(){
String str = "adsf sdf zzxjaj. zjd jsh saa, sdf asdf(lei)";
String regex = "\\b\\w{3}\\b";
Pattern p = Pattern.compile(regex);
//将正则封装成对象
Matcher m = p.matcher(str);
//获取匹配器对象(将正则对象与字符串关联)
while(m.find()){
//将匹配器作用于字符串上,执行一次查找
System.out.println(m.group());
//再执行一次返回匹配查找结果
System.out.println(m.start()+"..."+m.end());
}
}
/**
* 将下列字符串转换成:我要学编程
*
*/
public static void demo1(){
String str = "我我...我我我...要要要...学学......编....编编编.....程程程....程程";
str = str.replaceAll("\\.","");
str = str.replaceAll("(.)\\1+", "$1");
System.out.println(str);
}
/**
* 将IP地址进行地址段顺序的排序
*
*/
public static void demo2(){
String str = "192.68.1.254 102.49.23.13 10.10.10.10 10.101.88.95 2.2.2.2 8.109.90.30";
//一位的补一个0,两位的补两个0
/*
str = str.replaceAll("(\\b\\d{1}\\b)", "00$1");
str = str.replaceAll("(\\b\\d{2}\\b)", "0$1");
*/
//先统一补两个零(零的个数等于IP地址分段长度减1),再截取每个分段后三位数字
str = str.replaceAll("(\\d+)", "00$1");
str = str.replaceAll("0*(\\d{3})","$1");
System.out.println(str);
String regex = "(\\d{3}\\.){3}\\d{3}";
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(str);
Set<String> ss = new TreeSet<String>();
while(m.find()){
ss.add(m.group());
}
for (String string : ss) {
String replaceAll = string.replaceAll("0*(\\d+)","$1");
System.out.println(replaceAll);
}
}
/**
* 网页爬虫
* <br>
* 获取一个网页文件中所有的邮箱地址
* @throws IOException
*
*/
public static void demo3() throws IOException{
/*
URL url = new URL("http://www.baidu.com");
URLConnection conn = url.openConnection();
BufferedReader urlBr = new BufferedReader(new InputStreamReader(conn.getInputStream()));
*/
//以下是单机示例
BufferedReader br = new BufferedReader(new FileReader(IODemo.ROOT_PATH+"mail.txt"));
String line = null;
Pattern p = Pattern.compile(EMAIL_REGEX);
while( (line=br.readLine()) !=null){
Matcher m = p.matcher(line);
while(m.find()){
System.out.println(m.group());
}
}
br.close();
}
}