java匹配http或https的url的正则表达式20180912

package org.jimmy.autosearch20180821.test;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class TestUrlRegularExpression {

    public static void main(String[] args) {
        //默认是贪婪匹配,暂时还没尝试写一个正则匹配
        String urlRegex = "https?://(\\w|-)+(\\.(\\w|-)+)+(/(\\w|-)+(/((\\w|-)+\\.(\\w|-)+)|/(\\w|-)*)|(/((\\w|-)+\\.(\\w|-)+)|/(\\w|-)+)|/?)/(((\\w|-)+\\.(\\w|-)+)|(\\w|-)+(\\?\\w+=(\\w|-|%|[\u4e00-\u9fa5])+(\\&\\w+=(\\w|-|%|[\u4e00-\u9fa5])+)*)?)";
     修改为:
     urlRegex = "https?://(\\w|-)+(\\.(\\w|-)+)+(/(\\w|-)+(/((\\w|-)+\\.(\\w|-)+)|/(\\w|-)*)|(/((\\w|-)+\\.(\\w|-)+)|/(\\w|-)+)|/?)/(((\\w|-)+\\.(\\w|-)+)|(\\w|-)*(\\?\\w+=(\\w|-|%|[\u4e00-\u9fa5])+(\\&\\w+=(\\w|-|%|[\u4e00-\u9fa5])+)*)?)";
     urlRegex = "https?://(\\w|-)+(\\.(\\w|-)+)+(/(\\w+(\\?(\\w+=(\\w|%|-)*(\\&\\w+=(\\w|%|-)*)*)?)?)?)+";//修改版 String urlRegex2
= "https?://(\\w|-)+(\\.(\\w|-)+)+(/(\\w|-)+(/((\\w|-)+\\.(\\w|-)+)|/(\\w|-)*)|(/((\\w|-)+\\.(\\w|-)+)|/(\\w|-)+)|/?)"; //我在百度搜索了java 正则表达式,然后复制了放在url里面的 String url = "https://www.baidu.com/s?wd=java%20%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F&rsv_spt=1&rsv_iqid=0xf233885e000326c0&issp=1&f=8&rsv_bp=0&rsv_idx=2&ie=utf-8&tn=baiduhome_pg&rsv_enter=1&rsv_sug3=24&rsv_sug1=8&rsv_sug7=100&rsv_t=0d31XJ5IR0T98Bv150wUMKQHirYYsh2IgKsJFk0FH4wGur10ND3LypRnWtdrcFCsDH%2F3&rsv_sug2=0&inputT=6942&rsv_sug4=6942"; //这个就是不带参数的url了 String url2 = "https://www.baidu.com"; Pattern pattern = Pattern.compile(urlRegex); Matcher matcher = pattern.matcher(url); String isSuccess = "不匹配"; if(matcher.matches()){ isSuccess = "匹配"; } System.out.println(isSuccess); pattern = Pattern.compile(urlRegex2); matcher = pattern.matcher(url2); isSuccess = "不匹配"; if(matcher.matches()){ isSuccess = "匹配"; } System.out.println(isSuccess); } }

还是直接上代码.

运行结果见下图:

 修复了之前不能匹配微软必应搜索的url地址.

posted @ 2018-09-12 11:34  ラピスラズリ(Dawn)  阅读(8112)  评论(0编辑  收藏  举报