1)java(App.java)
package com.ejiajie.bi.hello;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.SparkConf;
/**
* Hello world!
*
*/
public class App
{
public static void main( String[] args )
{
SparkConf conf = new SparkConf();
JavaSparkContext sc = new JavaSparkContext(conf);
System.out.println( "Hello World!" );
}
}2)python(PyHelloWorld.py)
from pyspark import SparkContext, SparkConf conf = SparkConf() sc = SparkContext(conf=conf) import numpy as np import scipy.sparse as sps from pyspark.mllib.linalg import Vectors # Use a NumPy array as a dense vector. dv1 = np.array([1.0, 0.0, 3.0]) # Use a Python list as a dense vector. dv2 = [1.0, 0.0, 3.0] # Create a SparseVector. sv1 = Vectors.sparse(3, [0, 2], [1.0, 3.0]) # Use a single-column SciPy csc_matrix as a sparse vector. sv2 = sps.csc_matrix((np.array([1.0, 3.0]), np.array([0, 2]), np.array([0, 2])), shape = (3, 1)) from pyspark.mllib.linalg import SparseVector from pyspark.mllib.regression import LabeledPoint # Create a labeled point with a positive label and a dense feature vector. pos = LabeledPoint(1.0, [1.0, 0.0, 3.0]) # Create a labeled point with a negative label and a sparse feature vector. neg = LabeledPoint(0.0, SparseVector(3, [0, 2], [1.0, 3.0]))
3)执行命令
java
# spark-submit --class com.ejiajie.bi.hello.App --master yarn --deploy-mode client ./hello-1.0-SNAPSHOT-jar-with-dependencies.jar # spark-submit --class com.ejiajie.bi.hello.App --master yarn --deploy-mode cluster ./hello-1.0-SNAPSHOT-jar-with-dependencies.jar
python
# spark-submit --master yarn --deploy-mode client /home/lihanhui/work/spark-example/PyHelloWorld.py # spark-submit --master yarn --deploy-mode cluster /home/lihanhui/work/spark-example/PyHelloWorld.py
4)hadoop查看任务状态

浙公网安备 33010602011771号