AC自动机, 字符串匹配算法

package utils

import java.util.HashMap
import java.util.LinkedList
import util.control.Breaks._
import collection.JavaConversions._

class NodeAc {
var children = new HashMap[Char, NodeAc]
var isLeaf: Boolean = false
var value: Char = ' '
var fail: NodeAc = null
}

object AhoCorasickAutomation {
var root: Option[NodeAc] = Option(new NodeAc())

def buildTrie(words: String) {
if (words == null || words.isEmpty) {
return
}

var current: NodeAc = root.get
val chars: Array[Char] = words.toCharArray
var i: Int = 0
for (i <- 0 to (chars.length - 1)) {
val currentChildren = current.children
if (!currentChildren.containsKey(chars(i))) {
currentChildren.put(chars(i), new NodeAc)
currentChildren.get(chars(i)).value = chars(i)
}
if (i == chars.length - 1) {
currentChildren.get(chars(i)).isLeaf = true;
}
current = currentChildren.get(chars(i))
}
}

def buildACFromTrie: Unit = {
var queue: LinkedList[NodeAc] = new LinkedList[NodeAc]()
queue.add(root.get)
while (!queue.isEmpty) {
val parent: NodeAc = queue.poll()
var temp: NodeAc = null
for (child: NodeAc <- parent.children.values()) {
if (parent == root.get) {
child.fail = root.get
} else {
temp = parent.fail
breakable {
while (temp != null) {
var node: NodeAc = temp.children.get(child.value)
if (node != null) {
child.fail = node
break()
}
temp = temp.fail
}
}
if (temp == null) {
child.fail = root.get
}
}
queue.add(child)
}
}
}

def containDictionaryWord(words: String): Boolean = {
var current: NodeAc = root.get
val chars: Array[Char] = words.toCharArray
for (i <- 0 to (chars.length - 1)) {
while (current.children.get(chars(i)) == null && current != root.get) {
current = current.fail
}
current = if (current.children.get(chars(i)) == null) root.get else current.children.get(chars(i))
if (current != root.get && current.isLeaf) {
return true
}
}
false
}

}

posted on 2019-01-21 11:11  斜杆青年  阅读(146)  评论(0)    收藏  举报

导航