01.spark资料
http://blog.csdn.net/yirenboy/article/details/47291765
https://databricks.gitbooks.io/databricks-spark-knowledge-base/content/best_practices/prefer_reducebykey_over_groupbykey.html
/home/makexu/ops/spark-2.2.0/bin/spark-submit \ --class org.apache.spark.examples.SparkPi \ --master local[8] \ /home/makexu/ops/spark-2.2.0/examples/jars/spark-examples_2.11-2.2.0.jar \
package edu.berkeley;
/**
* Hello world!
*
*/
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.Dataset;
public class App {
public static void main(String[] args) {
String logFile = "/home/makexu/ops/spark-2.2.0/README.md"; // Should be some file on your system
SparkSession spark = SparkSession.builder().appName("Simple Application").getOrCreate();
Dataset logData = spark.read().textFile(logFile).cache();
long numAs = logData.filter(s -> s.contains("a")).count();
long numBs = logData.filter(s -> s.contains("b")).count();
System.out.println("Lines with a: " + numAs + ", lines with b: " + numBs);
spark.stop();
}
}
$ YOUR_SPARK_HOME/bin/spark-submit \ --class "edu.berkeley.App" \ --master local[4] \ /home/makexu/mvnpro/simple-project/target/simple-project.jar
http://www.cnblogs.com/makexu/

浙公网安备 33010602011771号