敏感词判断二:敏感词任意排列组合都过滤(scala版本)

    val mottoWord="骂人风波胡歌"
    val mottoWord2="草木皆兵"
    val mottoWordSet=uptoNGramSeg(mottoWord,mottoWord.length)
    println("==mottoWordSet: "+mottoWordSet) //Set(波胡, 人风, 歌, 波, 骂人风波胡, 骂人风波, 骂, 人风波胡, 胡歌, 人风波胡歌, 风波胡, 骂人风, 风波, 人风波, 人, 风波胡歌, 风, 骂人风波胡歌, 波胡歌, 骂人, 胡)
    val dropWordList=List("胡歌 陷 门","胡歌 骂人","笨蛋","草").map(x=>x.split(" ").toList)
    val dropWordMap=toSetTree(dropWordList)
    println("==dropWordMap: "+dropWordMap) //Map(胡歌 -> Map(骂人 -> Map( -> ), 陷 -> Map(门 -> Map( -> ))), 笨蛋 -> Map( -> ), 草 -> Map( -> ))
    val isflag=testFunction(mottoWordSet,dropWordMap)
    println("isflag: "+isflag)//true


def testFunction(set1:Set[String],map1:Map[String,Any]):Boolean ={
    val intSet=set1.intersect(map1.keySet)
    //println("map1.keySet: "+map1.keySet)
    //println("intSet: "+intSet)
    var filterFlag=false
    if(intSet.nonEmpty){
      for(t<-intSet.toList){
        if(map1(t).equals(Map(""->""))){
          filterFlag=true
        }else{
          filterFlag=testFunction(set1,map1(t).asInstanceOf[Map[String,Any]])
        }
      }
    }
    filterFlag  //false:不包含敏感词;true:包含敏感词
  }
  /**
    * 将句子切词
    * @param sentence
    * @param senLength
    * @return
    */
  def uptoNGramSeg(sentence: String, senLength: Int): Set[String] ={
    try {
      val wordList = sentence.replaceAll(" ", "").toCharArray.toList.map(x => x.toString)
      //println("==wordList(切词为1个字): " + wordList)
      var resultSet = wordList.toSet
      //println("===resultSet: " + resultSet)
      if (senLength > 1) {

        for (i <- 2 to senLength) {
          val segWordList = wordList.sliding(i).toList.map(x => x.mkString(""))
          //println("切词为" + i + "个字: " + segWordList)
          resultSet ++= segWordList.toSet
        }
      }
      resultSet
    } catch {
      case e:Exception =>Set.empty[String]
    }
  }

  def toSetTree(keyWordList:List[List[String]]): Map[String,Any] ={
    if(keyWordList!=List(List())){
      keyWordList.map(x=>{
        if(x.nonEmpty){
          (x.head,x.drop(1))
        }else{
          ("",List.empty[String])
        }
      }).filter(x=>x._1 !="").groupBy(x=> x._1).map(x=>{
        val value2=x._2.flatMap(z=>Array(z._2))
        (x._1,toSetTree(value2))
      })
    }else{
      Map("" -> "")
    }
  }

 

posted @ 2019-02-28 20:20  等木鱼的猫  阅读(601)  评论(0)    收藏  举报