Java与Scala混合编程

 

1、目录结构如图

 

 2、Java代码

package main.java.work;

import main.scala.core.wc_count;

public class callScala {

    public static void main(String[] args){

        System.out.println("Holleo word!");

        //Java 调用 Scala 类方法:先创建对象 在调用方法
        wc_count model =  new wc_count();
        model.wcCount();

    }

}

  

3、Scala代码

package main.scala.core
import main.scala.core.config.sc
import main.scala.core.delFilePath.delFPath
import org.apache.spark.rdd.RDD


class wc_count {

  def delete(master:String,path:String): Unit ={
    println("Begin delete!--" + master+path)
    val output = new org.apache.hadoop.fs.Path(master+path)
    val hdfs = org.apache.hadoop.fs.FileSystem.get(
      new java.net.URI(master), new org.apache.hadoop.conf.Configuration())
    // 删除输出目录
    if (hdfs.exists(output)) {
      hdfs.delete(output, true)
      println("delete!--" + master+path)
    }
  }


  def wcCount(): Unit = {

    //  hdfs dfs -put words.txt /user/root/

    val worddata: RDD[String] = sc.textFile("data/wc.txt")
    val worddata1: RDD[String] = worddata.flatMap(x=>x.split(" "))
    val worddata2: RDD[(String, Int)] = worddata1.map(x=>(x,1))
    val worddata3: RDD[(String, Int)] =  worddata2.reduceByKey((x, y)=>x+y)

    delFPath("data/out")
    worddata3.repartition(1).saveAsTextFile("data/out")

    // 统计出现次数大于 3 的单词
    val worddata4: RDD[String] =worddata3.filter(x=>x._2>3).map(x=>x._1)

    delFPath("data/out1")
    worddata4.repartition(1).saveAsTextFile("data/out1")

    val cunt: Long =  worddata4.count()

    println(s"出现次数大于3的字母个数:$cunt")
    sc.stop()
  }
}

  

posted @ 2022-05-18 22:08  洺剑残虹  阅读(571)  评论(0编辑  收藏  举报