package com.app;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
public class Rule {
/**
* 分类规则内容,键值为类别(大类#中类#小类),Value值为关键字
*/
private LinkedHashMap<String, ArrayList<KeyWords>> rule = new LinkedHashMap<String, ArrayList<KeyWords>>();
/**
* 加载分类规则文件
*
* @param path
*/
public Rule(String path) {
if (this.rule.isEmpty()) {
try {
loadXml(path);
} catch (DocumentException e) {
e.printStackTrace();
}
}
}
/**
* 使用dom4j 中saxreader 获取Document容器,利用此容器的elementIterator读取xml文件
*/
public void loadXml(String rulePath) throws DocumentException {
// 获取读取xml的对象
SAXReader sr = new SAXReader();
// 得到xml所在位置,然后开始读取,并将数据放入doc中
Document doc = sr.read(rulePath);
// 向外取数据,获取xml的根节点
Element root = doc.getRootElement();
ArrayList<KeyWords> keyWords = new ArrayList<KeyWords>();
iteElement(root, "", keyWords);
}
public void iteElement(Element element, String className,
ArrayList<KeyWords> keyWords) {
// 遍历该子节点
Iterator it = element.elementIterator();
while (it.hasNext()) {
ArrayList<KeyWords> keyWords_clone = (ArrayList<KeyWords>) keyWords
.clone();
// 获取节点
Element firstClass = (Element) it.next();
// 到达叶子节点
if (firstClass.elements().size() == 0) {
String word = firstClass.getText();
String weight = firstClass.attributeValue("weight");
KeyWords words = new KeyWords(new HashSet<String>(
Arrays.asList(word.split("\\s+"))),
Double.valueOf(weight));
keyWords_clone.add(words);
rule.put(className, keyWords_clone);
return;
} else {
String dalei = firstClass.attributeValue("name");
String feature = firstClass.attributeValue("feature");
String weight = firstClass.attributeValue("weight");
KeyWords firWords = new KeyWords(new HashSet<String>(
Arrays.asList(feature.split("\\s+"))),
Double.valueOf(weight));
keyWords_clone.add(firWords);
// 递归调用
if (className.length() < 1) {
iteElement(firstClass, className + dalei, keyWords_clone);
} else {
iteElement(firstClass, className + "#" + dalei,
keyWords_clone);
}
}
}
}
/**
* 每一类别的规则关键词
*/
class KeyWords {
/**
* 关键词列表
*/
HashSet<String> value;
/**
* 权重
*/
double weight;
public KeyWords(HashSet<String> value, double weight) {
this.value = value;
this.weight = weight;
}
/**
* @return the value
*/
public HashSet<String> getValue() {
return value;
}
/**
* @param value
* the value to set
*/
public void setValue(HashSet<String> value) {
this.value = value;
}
/**
* @return the weight
*/
public double getWeight() {
return weight;
}
/**
* @param weight
* the weight to set
*/
public void setWeight(double weight) {
this.weight = weight;
}
}
/**
* @return the rule
*/
public LinkedHashMap<String, ArrayList<KeyWords>> getRule() {
return rule;
}
public static void main(String[] args) {
Rule r = new Rule("rule2.xml");
LinkedHashMap<String, ArrayList<KeyWords>> rule = r.getRule();
for (String className : rule.keySet()) {
System.out.println(className + "---------------");
ArrayList<KeyWords> keyWords = rule.get(className);
for (KeyWords words : keyWords) {
HashSet<String> value = words.getValue();
System.out.println(value.toString());
}
}
}
}