/**
* 词工具
*
*/
public class WordUtil {
private final static Logger logger = LoggerFactory.getLogger(WordUtil.class);
public static Map<Property,List<String>> synonymMap = Maps.newHashMap();
public static Map<String,List<String>> propertySynonymMap = Maps.newHashMap();
public static Map<String,List<String>> synnonymPropertyMap = Maps.newHashMap();
public static Map<String,Integer> duplicateSyn = Maps.newHashMap();
public static Map<String,Synonym> duplicateSynonymMap = Maps.newHashMap();
public static Map<String,Synonym> unDuplicateSynonymMap = Maps.newHashMap();
static {
loadSynonymDic();
}
private static void loadSynonymDic(){
//获取字典文件的io流进行读取
try (BufferedReader br = new BufferedReader(new InputStreamReader(WordUtil.class.getResourceAsStream("/dic/synonym.txt"), "UTF-8"))) {
String line;
while ((line = br.readLine()) != null) {
String[] data = line.split("\\s+");
if(data!=null && data.length > 2){
Property p = new Property();
p.setKey(data[0]);
p.setValue(data[1]);
List<String> value = Lists.newArrayList(line.replaceAll(data[0], "").replaceAll(data[1], "").split("\\s+"));
value.remove("");
synonymMap.put(p, value);
propertySynonymMap.put(data[0]+":"+data[1], value);
}
}
for (Property key : synonymMap.keySet()) {
List<String> synonyms = synonymMap.get(key);
for (String synonym : synonyms) {
List<String> checkProperty = synnonymPropertyMap.get(synonym);
if (checkProperty == null) {
checkProperty = Lists.newArrayList();
}
checkProperty.add(key.getKey()+":"+key.getValue());
synnonymPropertyMap.put(synonym, checkProperty);
}
}
for (Property key : synonymMap.keySet()) {
boolean exception = false;
List<String> synonyms = synonymMap.get(key);
// System.out.println(key.getKey());
// System.out.println(key.getValue());
for (String synonym : synonyms) {
Synonym syn = new Synonym();
syn.setName(synonym);
Synonym checkSyn = duplicateSynonymMap.get(synonym);
if (checkSyn != null && StringUtils.isNotEmpty(checkSyn.getName()) && checkSyn.getName().equals(synonym)) {
List<Property> changeProperty = checkSyn.getProperty();
changeProperty.add(key);
syn.setProperty(changeProperty);
syn.setNum(changeProperty.size());
} else if(checkSyn != null && StringUtils.isNotEmpty(checkSyn.getName()) && !checkSyn.getName().equals(synonym)) {
logger.info("出现错误bug~~~~~~~~~~,两同义词:{},{}",checkSyn.getName(),synonym);
exception = true;
break;
} else {
syn.setNum(1);
List<Property> newProperty = Lists.newArrayList();
newProperty.add(key);
syn.setProperty(newProperty);
}
duplicateSynonymMap.put(synonym, syn);
}
if (exception) {
logger.info("Warning~~~~~~~~~~~~");
break;
}
}
// System.out.println(duplicateSynonymMap);
logger.info("同义词总数:{}",duplicateSynonymMap.size());
List<String> outingSyn = Lists.newArrayList();
for (String syn : duplicateSynonymMap.keySet()) {
Synonym message = duplicateSynonymMap.get(syn);
if (message.getNum() > 1) {
duplicateSyn.put(syn, message.getNum());
} else {
outingSyn.add(syn);
}
}
for (String syn : outingSyn) {
unDuplicateSynonymMap.put(syn, duplicateSynonymMap.get(syn));
duplicateSynonymMap.remove(syn);
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
logger.info("Synonym map obtained.size is {}",synonymMap.size());
}
/**
* 获取同义词
* @param word
* @return
*/
public static List<String> getSynonym(Property p){
if(p == null){
return Collections.emptyList();
}
return synonymMap.get(p);
}
}