FP-Tree Java实现(一):FP-Tree创建

原始数据

Line1: C A B

Line2: A B D

Line3: A B

Line4: C E

1、  按照单词数量逆排序(小于支撑度的单词舍弃)

Line1: A B C

Line2: A B D

Line3: A B

Line4: C E

2、  构建FP树

Root: -1

--A: 3

  --B: 3

    --C: 1

    --D: 1

--C: 1

  --E: 1

3、构建包含线索的FP树

 

 代码如下

package com.coshaho.fptree;

import java.util.HashMap;
import java.util.Map;

/**
 * FP树节点:仅考虑算法
 * @author coshaho
 * @since 2020/1/5
 */
public class FPNode {
    // 单词
    private String word;
    // 单词出现次数
    private int count = 1;
    // 子节点
    Map<String, FPNode> children = new HashMap<>();
    // 父节点
    private FPNode father;
    // 线索:指向下一个相同单词节点
    private FPNode next;
    // 是否有线索指向自己
    private boolean visited = false;

    public FPNode(String word, int count) {
        this.word = word;
        this.count = count;
    }

    public void increase() {
        count++;
    }

    public void print(int n) {
        for(int i = 0; i < n; i++) {
            if(i == n - 1) {
                System.out.print("--");
            } else {
                System.out.print("  ");
            }
        }
        System.out.println(word + ": " + count);
        for(FPNode child : children.values()) {
            child.print(n + 1);
        }
    }

    public String getWord() {
        return word;
    }

    public void setWord(String word) {
        this.word = word;
    }

    public int getCount() {
        return count;
    }

    public void setCount(int count) {
        this.count = count;
    }

    public Map<String, FPNode> getChildren() {
        return children;
    }

    public void setChildren(Map<String, FPNode> children) {
        this.children = children;
    }

    public FPNode getFather() {
        return father;
    }

    public void setFather(FPNode father) {
        this.father = father;
    }

    public FPNode getNext() {
        return next;
    }

    public void setNext(FPNode next) {
        this.next = next;
    }

    public boolean isVisited() {
        return visited;
    }

    public void setVisited(boolean visited) {
        this.visited = visited;
    }
}

  

package com.coshaho.fptree;

import java.util.*;
import java.util.stream.Collectors;

/**
 * FP树:仅考虑算法
 * @author coshaho
 * @since 2020/1/5
 */
public class FPTree {
    // FP树根节点
    FPNode root = new FPNode("Root", -1);
    // FP树节点线索头
    Map<String, FPNode> firstNodeTable = new HashMap<>();
    // FP树节点线索尾
    Map<String, FPNode> lastNodeTable = new HashMap<>();
    // 支持度
    private int support = 1;

    public FPTree(List<List<String>> data, int support) {
        this.support = support;
        data = sort(data);
        // line为一行日志
        for(List<String> line : data) {
            FPNode curNode = root;
            for(String word : line) {
                if(curNode.getChildren().containsKey(word)) {
                    // 子节点存在则访问次数加一
                    curNode.getChildren().get(word).increase();
                } else {
                    // 子节点不存在则新增子节点
                    FPNode child = new FPNode(word, 1);
                    curNode.getChildren().put(word, child);
                    child.setFather(curNode);
                }
                curNode = curNode.getChildren().get(word);

                // 当前节点有线索指向,则不必重复建立线索
                if(curNode.isVisited()) {
                    continue;
                }

                // 创建线索
                if(firstNodeTable.containsKey(word)) {
                    lastNodeTable.get(word).setNext(curNode);
                } else {
                    firstNodeTable.put(word, curNode);
                }
                lastNodeTable.put(word, curNode);
                curNode.setVisited(true);
            }
        }
    }

    private List<List<String>> sort(List<List<String>> data) {
        Map<String, Integer> wordCount = new HashMap<>();
        // 统计单词出现的次数
        for(List<String> line : data) {
            for(String word : line) {
                if(wordCount.containsKey(word)) {
                    wordCount.put(word, wordCount.get(word) + 1);
                } else {
                    wordCount.put(word, 1);
                }
            }
        }

        List<List<String>> result = new ArrayList<>();
        // 单词排序
        for(List<String> line : data) {
            List<String> newLine = line.stream().filter(word -> wordCount.get(word) >= support)
                    .sorted(Comparator.comparing(word -> wordCount.get(word)).reversed()).collect(Collectors.toList());
            if(null != newLine && 0 != newLine.size()) {
                result.add(newLine);
            }
        }
        return result;
    }

    public void print() {
        root.print(0);
    }

    public static void main(String[] args) {
        List<String> line1 = new ArrayList<>();
        line1.add("C");
        line1.add("A");
        line1.add("B");
        List<String> line2 = new ArrayList<>();
        line2.add("A");
        line2.add("B");
        line2.add("D");
        List<String> line3 = new ArrayList<>();
        line3.add("A");
        line3.add("B");
        List<String> line4 = new ArrayList<>();
        line4.add("C");
        line4.add("E");
        List<List<String>> data = new ArrayList<>();
        data.add(line1);
        data.add(line2);
        data.add(line3);
        data.add(line4);

        FPTree tree = new FPTree(data, 1);
        tree.print();
    }
}

  

 

posted @ 2020-01-05 18:47  coshaho  阅读(589)  评论(0编辑  收藏  举报