wordCount
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>spark_yarn</groupId> <artifactId>yarn</artifactId> <version>1.0-SNAPSHOT</version> <properties> <scala.version>2.11</scala.version> <scala.compat.version>2.11</scala.compat.version> <spark.version>2.2.0</spark.version> <hadoop.version>2.7.2</hadoop.version> <hbase.version>1.0</hbase.version> </properties> <!--<properties>--> <!--<scala.version>2.11</scala.version>--> <!--<scala.compat.version>2.11</scala.compat.version>--> <!--<spark.version>1.6.1</spark.version>--> <!--<hadoop.version>2.7.2</hadoop.version>--> <!--</properties>--> <repositories> <repository> <id>nexus-aliyun</id> <name>Nexus aliyun</name> <url>http://maven.aliyun.com/nexus/content/groups/public</url> </repository> </repositories> <dependencies> <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-library</artifactId> <version>2.11.8</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_${scala.version}</artifactId> <version>${spark.version}</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_${scala.version}</artifactId> <version>${spark.version}</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-yarn_${scala.version}</artifactId> <version>${spark.version}</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-launcher_${scala.version}</artifactId> <version>${spark.version}</version> </dependency> <dependency> <groupId>com.databricks</groupId> <artifactId>spark-csv_${scala.version}</artifactId> <version>1.4.0</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <!-- hadoop的客户端,用于访问HDFS --> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-auth</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-client</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-api</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-common</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-server-common</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-server-web-proxy</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>com.typesafe</groupId> <artifactId>config</artifactId> <version>1.3.3</version> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.scala-tools</groupId> <artifactId>maven-scala-plugin</artifactId> <version>2.15.2</version> <executions> <execution> <id>scala-compile-first</id> <goals> <goal>compile</goal> </goals> <configuration> <includes> <include>**/*.scala</include> </includes> </configuration> </execution> <execution> <id>scala-test-compile</id> <goals> <goal>testCompile</goal> </goals> </execution> </executions> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <configuration> <source>1.8</source> <target>1.8</target> </configuration> </plugin> </plugins> </build> </project>
package atguigu import org.apache.spark.SparkContext import org.apache.spark.SparkConf object WordCount { //定义主方法 def main(args: Array[String]): Unit = { //创建SparkConf对象 //如果Master是local,表示运行在本地模式上,即可以在开发工具中直接运行 //如果要提交到集群中运行,不需要设置Master //本地模式 val conf = new SparkConf().setAppName("My Scala Word Count").setMaster("local") //创建SparkContext对象 val sc = new SparkContext(conf) //本地模式 // val result = sc.textFile(args(0)) // .flatMap(_.split(" ")) // .map((_, 1)) // .reduceByKey(_ + _).saveAsTextFile(args(1)) val result = sc.textFile("in") .flatMap(_.split(" ")) .map((_, 1)) .reduceByKey(_ + _).saveAsTextFile("out") sc.stop() } }
package atguigu import org.apache.spark.sql.SparkSession object WordCount2 { //定义主方法 def main(args: Array[String]): Unit = { //集群模式 val spark: SparkSession = SparkSession.builder().appName("My Scala Word Count").getOrCreate() //创建SparkContext对象 val result = spark.sparkContext.textFile(args(0)) .flatMap(_.split(" ")) .map((_, 1)) .reduceByKey(_ + _).saveAsTextFile(args(1)) spark.sparkContext.stop() } }
bin/spark-submit --class atguigu.WordCount --master yarn --deploy-mode cluster session.jar in out2
posted on 2020-09-18 10:10 happygril3 阅读(116) 评论(0) 收藏 举报
浙公网安备 33010602011771号