一次算法实践：笛卡尔乘积和模式匹配 - ayaov

公告

一次算法实践：笛卡尔乘积和模式匹配

问题一：

穷尽集合之间的元素组合。

input: list of list

[ [‘a’, ‘b’, ‘c’], [‘1’, ‘2’, ‘3’, ‘4’, ‘5’, ‘6’] , [‘A’, ‘B’, ‘C’, ‘D’] ]

output:

[

[‘a’, ‘1’, ‘A’],

[‘a’, ‘1’, ‘B’],

[‘a’, ‘1’, ‘C’],

[‘a’, ‘1’, ‘D’],

...

[‘c’, ‘6’, ‘D’]

]

实现下面的方法, 要求不要用递归的方法。

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public class CombinationProblem {

    /**
     * @Describe 不采用stream
     * @param input
     * @return
     */
    public static ArrayList<ArrayList<String>> getSetCombination(ArrayList<ArrayList<String>> input) {
        if (input == null || input.isEmpty() || input.size() == 1) {
            return input;
        }
        ArrayList<ArrayList<String>> output = new ArrayList<ArrayList<String>>();
        ArrayList<String> first = input.get(0);
        for (int i = 1; i < input.size(); i++) {
            first = combineTwo(first, input.get(i));
        }
        for (int i = 0; i < first.size(); i++) {
            String[] a = first.get(i).split("");
            ArrayList<String> t = new ArrayList<String>(Arrays.asList(a));
            output.add(t);
        }

        return output;
    }

    public static ArrayList<String> combineTwo(List<String> a1, List<String> a2) {
        ArrayList<String> combine = new ArrayList<>();
        for (String s : a1) {
            for (String m : a2) {
                String a = s + m;
                combine.add(a);
            }
        }
        return combine;
    }

    /**
     * @Describe 每次都是两个数组进行笛卡尔乘积，之后分割字符串输出二维数组
     * @param input
     * @return
     */
    public static ArrayList<ArrayList<String>> getSetCombination1(ArrayList<ArrayList<String>> input) {
        if (input == null || input.isEmpty() || input.size() == 1) {
            return input;
        }
        ArrayList<String> first = input.get(0);
        for (int i = 1; i < input.size(); i++) {
            ArrayList<String> a2 = input.get(i);
            first = first.stream().flatMap(x->a2.stream().map(y-> (x+y)
            )).collect(Collectors.toCollection(ArrayList::new));
        }
        ArrayList<ArrayList<String>> output = first.
                stream()
                .map(m-> Arrays.stream(m.split(""))
                        .collect(Collectors.toCollection(ArrayList::new)))
                .collect(Collectors.toCollection(ArrayList::new));
        return output;
    }

    /**
     * @Describe 首次笛卡尔乘积时是两个字符串数组，之后是前一次的笛卡尔乘积结果与一维数组的笛卡尔
     * @param input
     * @return
     */
    public static ArrayList<ArrayList<String>> getSetCombination2(ArrayList<ArrayList<String>> input) {
        if (input == null || input.isEmpty() || input.size() == 1) {
            return input;
        }
        ArrayList<String> first = input.get(0);
        ArrayList<ArrayList<String>> output = new ArrayList<>();
        for (int i = 1; i < input.size(); i++) {
            ArrayList<String> a2 = input.get(i);
            if(i==1){
                output = first.stream().flatMap(x->a2.stream().map(y-> Arrays.asList(x,y).stream().collect(Collectors.toCollection(ArrayList::new))
                )).collect(Collectors.toCollection(ArrayList::new));
            }else{
                output = output.stream().flatMap(x->a2.stream().map(y->
                            Stream.concat(x.stream(),Arrays.asList(y).stream()).collect(Collectors.toCollection(ArrayList::new))
                )).collect(Collectors.toCollection(ArrayList::new));
            }
        }
        return output;
    }

    public static void main(String [] args) {
        ArrayList<String> a1 = new ArrayList<>(Arrays.asList("a", "b", "c")) ;
        ArrayList<String> a2 = new ArrayList<>(Arrays.asList("1", "2", "3","4","5","6"));
        ArrayList<String> a3 = new ArrayList<>(Arrays.asList("A", "B", "C"));
        ArrayList<ArrayList<String>> input = new ArrayList<>();
        input.add(a1);
        input.add(a2);
        input.add(a3);
        // 直接使用Stream输出
        List<List<String>> list= a1.stream().flatMap(x->a2.stream().flatMap(y->a3.stream().map(z->
                Arrays.asList(x,y,z)))).collect(Collectors.toList());
        long method1 = System.currentTimeMillis();
        System.out.println("output1::"+list);
        System.out.println("耗时::"+String.valueOf(System.currentTimeMillis()-method1));
        //不采用stream
        long method2 = System.currentTimeMillis();
        System.out.println("output2::"+getSetCombination(input));
        System.out.println("耗时::"+String.valueOf(System.currentTimeMillis()-method2));
        //Stream实现每次都是两个数组进行笛卡尔乘积，之后分割字符串输出二维数组
        long method3 = System.currentTimeMillis();
        System.out.println("output3::"+getSetCombination1(input));
        System.out.println("耗时::"+String.valueOf(System.currentTimeMillis()-method3));
        //Stream实现多个数组笛卡尔积，结果输出使用循环
        long method4 = System.currentTimeMillis();
        System.out.println("output4::"+getSetCombination2(input));
        System.out.println("耗时::"+String.valueOf(System.currentTimeMillis()-method4));
    }

}

参考文档及扩展:

穷尽集合之间的元素组合 https://blog.csdn.net/weixin_44690103/article/details/106763626
笛卡尔积算法JAVA实现（递归） https://www.cnblogs.com/tusheng/articles/8109068.html
Java Stream 菜鸟教程 https://www.runoob.com/java/java8-streams.html

问题二:

字典查找的算法。

input:

\1. input_file

每一行有一个词汇，如“浙江”, “浙江大学”, “美国”, “美国政府”。该文件可能有100万词

\2. a document，字符串。一般有2000字左右。如 “美国规划协会中国办公室揭牌仪式及美国规划领域合作研讨会在浙江大学城乡规划设计研究院208会议室举行。美国规划协会CEO James Drinan，国际项目及外联主任Jeffrey Soule先生，浙江大学党委副书记任少波，浙江大学控股集团领导杨其和，西湖区政府代表应权英副主任....”

output:

输出document中出现的词汇,以及其位置列表。如

{

“美国” : [ 0,16, ....],

“浙江”: [28, ...]

“浙江大学”: [28, ...]

}

实现下面的方法，尽量优化算法的时间复杂度：

import java.util.ArrayList;
import java.util.HashMap;

public class DictionarySearcher {
    String[] words;
    String[] documents;

    public DictionarySearcher(String filename) {
        String[] input = filename.split(",");
        this.words = input;
    }

    public DictionarySearcher(String filename,String document) {
        String[] input = filename.split(",");
        this.words = input;
        String[] documents = document.split("");
        this.documents = documents;
    }

    public HashMap<String, ArrayList<Integer>> search(String document) {
        HashMap<String, ArrayList<Integer>> output = new HashMap<String, ArrayList<Integer>>();
        for (int i = 0; i < words.length; i++) {
            if (document.contains(words[i])) {
                ArrayList<Integer> count = new ArrayList<Integer>();
                int firstIndext = document.indexOf(words[i]);
                count.add(firstIndext);
                while(document.indexOf(words[i], firstIndext+words[i].length())!=-1){
                    int pos = document.indexOf(words[i], firstIndext+words[i].length());
                    count.add(pos);
                    firstIndext = pos;
                }
                output.put(words[i], count);
            }
        }
        return output;
    }

    public HashMap<String, ArrayList<Integer>> search2() {
        HashMap<String, ArrayList<Integer>> output = new HashMap<String, ArrayList<Integer>>();
        for (String ps : words) {
            ArrayList<Integer> count = KMP(documents,ps);
            if(!count.isEmpty()){
                output.put(ps, count);
            }
        }
        return output;
    }

    private ArrayList<Integer> KMP(String[] t, String ps) {
        String[] p = ps.split("");
        ArrayList<Integer> indexs = new ArrayList<Integer>();
        int i = 0;
        int j = 0;
        int[] next = getNext(ps);
        while (i < t.length && j < p.length) {
            if (j == -1 || t[i].equals(p[j])) {
                i++;
                j++;
            } else {
                j = next[j];
            }
            if(j == p.length){
                indexs.add(i-j);
                j=0;
                next = getNext(ps);
            }
        }
        return indexs;
    }
    public static int[] getNext(String ps) {
        int length = ps.length();
        int[] next = new int[length];
        String[] strchar = ps.split("");
        int j = 0;
        int k = -1;
        next[0] = -1;
        while (j < length - 1) {
            if (k == -1 || strchar[j].equals(strchar[k]) ) {
                j++;
                k++;
                if (!strchar[j].equals(strchar[k])) {
                    next[j] = k;
                } else {
                    next[j] = next[k];
                }
            } else {
                k = next[k];
            }
        }
        return next;
    }

    public static void main(String[] args) {
        String filename = "浙江,浙江大学,美国,美国政府,Jeffrey";
        String document = "美国规划协会中国办公室揭牌仪式及美国规划领域合作研讨会在浙江大学城乡规划设计研究院208会议室举行。" +
                "美国规划协会CEO James Drinan，国际项目及外联主任Jeffrey Soule先生，浙江大学党委副书记任少波，" +
                "浙江大学控股集团领导杨其和，西湖区政府代表应权英副主任....";
        long method1 = System.currentTimeMillis();
        DictionarySearcher ds = new DictionarySearcher(filename);
        System.out.println("Java字符串匹配"+ds.search(document));
        System.out.println("Java字符串匹配耗时::"+String.valueOf(System.currentTimeMillis()-method1));
        long method2 = System.currentTimeMillis();
        DictionarySearcher ds1 = new DictionarySearcher(filename,document);
        System.out.println("KMP模式匹配"+ds1.search2());
        System.out.println("KMP模式匹配耗时::"+String.valueOf(System.currentTimeMillis()-method2));
    }
}

参考文档及扩展:

字典查找的算法 https://blog.csdn.net/weixin_44690103/article/details/106767109
（原创）详解KMP算法 https://www.cnblogs.com/yjiyjige/p/3263858.html
字符串匹配算法的比较（BF算法/KMP算法/jdk自带的indexOf方法）https://www.cnblogs.com/maoerbao/p/14690335.html

posted on 2021-11-07 20:36 ayaov 阅读(419) 评论(0) 收藏举报

刷新页面返回顶部