01.spark资料

http://blog.csdn.net/yirenboy/article/details/47291765

 

https://databricks.gitbooks.io/databricks-spark-knowledge-base/content/best_practices/prefer_reducebykey_over_groupbykey.html

 

  /home/makexu/ops/spark-2.2.0/bin/spark-submit \
  --class org.apache.spark.examples.SparkPi \
  --master local[8] \
  /home/makexu/ops/spark-2.2.0/examples/jars/spark-examples_2.11-2.2.0.jar \
package edu.berkeley;

/**
 * Hello world!
 *
 */
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.Dataset;

public class App {
      public static void main(String[] args) {
            String logFile = "/home/makexu/ops/spark-2.2.0/README.md"; // Should be some file on your system
            SparkSession spark = SparkSession.builder().appName("Simple Application").getOrCreate();
            Dataset logData = spark.read().textFile(logFile).cache();

            long numAs = logData.filter(s -> s.contains("a")).count();
            long numBs = logData.filter(s -> s.contains("b")).count();

            System.out.println("Lines with a: " + numAs + ", lines with b: " + numBs);

            spark.stop();
      }
}
$ YOUR_SPARK_HOME/bin/spark-submit \
  --class "edu.berkeley.App" \
  --master local[4] \
  /home/makexu/mvnpro/simple-project/target/simple-project.jar
posted @ 2017-08-04 20:06  桃源仙居  阅读(144)  评论(0)    收藏  举报