From 68cebd0959e1d6ae271274656e0196871c85f213 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 13 May 2016 18:40:14 +0530 Subject: [PATCH 01/27] ML Classification done --- .../org/apache/spark/SharedSparkSession.java | 32 +++++++++++++++++++ .../apache/spark/ml/JavaPipelineSuite.java | 23 ++----------- .../JavaDecisionTreeClassifierSuite.java | 25 ++------------- .../JavaGBTClassifierSuite.java | 26 ++------------- .../JavaLogisticRegressionSuite.java | 24 ++------------ ...vaMultilayerPerceptronClassifierSuite.java | 23 ++----------- .../classification/JavaNaiveBayesSuite.java | 22 ++----------- .../ml/classification/JavaOneVsRestSuite.java | 24 ++------------ .../JavaRandomForestClassifierSuite.java | 26 ++------------- 9 files changed, 52 insertions(+), 173 deletions(-) create mode 100644 mllib/src/test/java/org/apache/spark/SharedSparkSession.java diff --git a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java new file mode 100644 index 0000000000000..68166ff7b6968 --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java @@ -0,0 +1,32 @@ +package org.apache.spark; + +import org.junit.After; +import org.junit.Before; + +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.SparkSession; + +public class SharedSparkSession { + + public transient SparkSession spark; + public transient JavaSparkContext jsc; + + @Before + public void setUp() { + spark = SparkSession.builder() + .master("local") + .appName("shared-spark-session") + .getOrCreate(); + jsc = new JavaSparkContext(spark.sparkContext()); + + customSetUp(); + } + + public void customSetUp() {} + + @After + public void tearDown() { + spark.stop(); + spark = null; + } +} diff --git a/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java b/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java index e0c4363597da4..6a1fe97f856b5 100644 --- a/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java @@ -17,47 +17,30 @@ package org.apache.spark.ml; -import org.junit.After; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.ml.classification.LogisticRegression; import org.apache.spark.ml.feature.StandardScaler; import org.apache.spark.mllib.regression.LabeledPoint; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; import static org.apache.spark.mllib.classification.LogisticRegressionSuite.generateLogisticInputAsList; /** * Test Pipeline construction and fitting in Java. */ -public class JavaPipelineSuite { +public class JavaPipelineSuite extends SharedSparkSession { - private transient SparkSession spark; - private transient JavaSparkContext jsc; private transient Dataset dataset; - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaPipelineSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); + public void customSetUp() { JavaRDD points = jsc.parallelize(generateLogisticInputAsList(1.0, 1.0, 100, 42), 2); dataset = spark.createDataFrame(points, LabeledPoint.class); } - @After - public void tearDown() { - spark.stop(); - spark = null; - } - @Test public void pipeline() { StandardScaler scaler = new StandardScaler() diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaDecisionTreeClassifierSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaDecisionTreeClassifierSuite.java index 8b899913277fc..6abc7ab541e4a 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaDecisionTreeClassifierSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaDecisionTreeClassifierSuite.java @@ -21,38 +21,17 @@ import java.util.HashMap; import java.util.Map; -import org.junit.After; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.ml.tree.impl.TreeTests; import org.apache.spark.mllib.classification.LogisticRegressionSuite; import org.apache.spark.mllib.regression.LabeledPoint; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; -public class JavaDecisionTreeClassifierSuite implements Serializable { - - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaDecisionTreeClassifierSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaDecisionTreeClassifierSuite extends SharedSparkSession implements Serializable { @Test public void runDT() { diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaGBTClassifierSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaGBTClassifierSuite.java index 682371eb9e4d5..c7fe70c157f1b 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaGBTClassifierSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaGBTClassifierSuite.java @@ -21,39 +21,17 @@ import java.util.HashMap; import java.util.Map; -import org.junit.After; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.ml.tree.impl.TreeTests; import org.apache.spark.mllib.classification.LogisticRegressionSuite; import org.apache.spark.mllib.regression.LabeledPoint; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; - -public class JavaGBTClassifierSuite implements Serializable { - - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaGBTClassifierSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaGBTClassifierSuite extends SharedSparkSession implements Serializable { @Test public void runDT() { diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java index e3ff68364e690..128405288ba42 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java @@ -20,49 +20,31 @@ import java.io.Serializable; import java.util.List; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.regression.LabeledPoint; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; import static org.apache.spark.mllib.classification.LogisticRegressionSuite.generateLogisticInputAsList; -public class JavaLogisticRegressionSuite implements Serializable { +public class JavaLogisticRegressionSuite extends SharedSparkSession implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; private transient Dataset dataset; private transient JavaRDD datasetRDD; private double eps = 1e-5; - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaLogisticRegressionSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - + public void customSetUp() { List points = generateLogisticInputAsList(1.0, 1.0, 100, 42); datasetRDD = jsc.parallelize(points, 2); dataset = spark.createDataFrame(datasetRDD, LabeledPoint.class); dataset.registerTempTable("dataset"); } - @After - public void tearDown() { - spark.stop(); - spark = null; - } - @Test public void logisticRegressionDefaultParams() { LogisticRegression lr = new LogisticRegression(); diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaMultilayerPerceptronClassifierSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaMultilayerPerceptronClassifierSuite.java index b0624cea3ecd7..ea2d53f6a888f 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaMultilayerPerceptronClassifierSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaMultilayerPerceptronClassifierSuite.java @@ -21,34 +21,17 @@ import java.util.Arrays; import java.util.List; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.mllib.regression.LabeledPoint; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; -public class JavaMultilayerPerceptronClassifierSuite implements Serializable { - - private transient SparkSession spark; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaLogisticRegressionSuite") - .getOrCreate(); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaMultilayerPerceptronClassifierSuite + extends SharedSparkSession implements Serializable { @Test public void testMLPC() { diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java index 3fc3648627153..e6a397c06344a 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java @@ -21,39 +21,21 @@ import java.util.Arrays; import java.util.List; -import org.junit.After; -import org.junit.Before; import org.junit.Test; import static org.junit.Assert.assertEquals; +import org.apache.spark.SharedSparkSession; import org.apache.spark.mllib.linalg.VectorUDT; import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; -import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.types.DataTypes; import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -public class JavaNaiveBayesSuite implements Serializable { - - private transient SparkSession spark; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaLogisticRegressionSuite") - .getOrCreate(); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaNaiveBayesSuite extends SharedSparkSession implements Serializable { public void validatePrediction(Dataset predictionAndLabels) { for (Row r : predictionAndLabels.collectAsList()) { diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java index 486fbbd58c179..fe11e4c65f27a 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java @@ -22,34 +22,22 @@ import scala.collection.JavaConverters; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.regression.LabeledPoint; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; import static org.apache.spark.mllib.classification.LogisticRegressionSuite.generateMultinomialLogisticInput; -public class JavaOneVsRestSuite implements Serializable { +public class JavaOneVsRestSuite extends SharedSparkSession implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; private transient Dataset dataset; private transient JavaRDD datasetRDD; - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaLOneVsRestSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - + public void customSetUp() { int nPoints = 3; // The following coefficients and xMean/xVariance are computed from iris dataset with @@ -68,12 +56,6 @@ public void setUp() { dataset = spark.createDataFrame(datasetRDD, LabeledPoint.class); } - @After - public void tearDown() { - spark.stop(); - spark = null; - } - @Test public void oneVsRestDefaultParams() { OneVsRest ova = new OneVsRest(); diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaRandomForestClassifierSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaRandomForestClassifierSuite.java index e3855662fb6de..3f62c4b66ce91 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaRandomForestClassifierSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaRandomForestClassifierSuite.java @@ -21,41 +21,19 @@ import java.util.HashMap; import java.util.Map; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.ml.tree.impl.TreeTests; import org.apache.spark.mllib.classification.LogisticRegressionSuite; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.regression.LabeledPoint; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; - -public class JavaRandomForestClassifierSuite implements Serializable { - - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaRandomForestClassifierSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaRandomForestClassifierSuite extends SharedSparkSession implements Serializable { @Test public void runDT() { From e8bae89020c1db8573b76add89765f378c8a044b Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 13 May 2016 19:46:19 +0530 Subject: [PATCH 02/27] ml clustering done --- .../spark/ml/clustering/JavaKMeansSuite.java | 23 ++++--------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java b/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java index 3ab09ac27d306..d1cacb71b33af 100644 --- a/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java @@ -21,39 +21,24 @@ import java.util.Arrays; import java.util.List; +import org.junit.Test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - +import org.apache.spark.SharedSparkSession; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; -public class JavaKMeansSuite implements Serializable { +public class JavaKMeansSuite extends SharedSparkSession implements Serializable { private transient int k = 5; private transient Dataset dataset; - private transient SparkSession spark; - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaKMeansSuite") - .getOrCreate(); + public void customSetUp() { dataset = KMeansSuite.generateKMeansData(spark, 50, 3, k); } - @After - public void tearDown() { - spark.stop(); - spark = null; - } - @Test public void fitAndTransform() { KMeans kmeans = new KMeans().setK(k).setSeed(1); From afe620a34e219c7cb7c4a286cbfb17305b8517fa Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 13 May 2016 20:07:05 +0530 Subject: [PATCH 03/27] ML Feature done --- .../spark/ml/feature/JavaBucketizerSuite.java | 18 ++--------------- .../apache/spark/ml/feature/JavaDCTSuite.java | 18 ++--------------- .../spark/ml/feature/JavaHashingTFSuite.java | 18 ++--------------- .../spark/ml/feature/JavaNormalizerSuite.java | 20 ++----------------- .../apache/spark/ml/feature/JavaPCASuite.java | 20 ++----------------- .../feature/JavaPolynomialExpansionSuite.java | 20 ++----------------- .../ml/feature/JavaStandardScalerSuite.java | 20 ++----------------- .../ml/feature/JavaStopWordsRemoverSuite.java | 19 ++---------------- .../ml/feature/JavaStringIndexerSuite.java | 19 ++---------------- .../spark/ml/feature/JavaTokenizerSuite.java | 20 ++----------------- .../ml/feature/JavaVectorAssemblerSuite.java | 19 ++---------------- .../ml/feature/JavaVectorIndexerSuite.java | 20 ++----------------- .../ml/feature/JavaVectorSlicerSuite.java | 18 ++--------------- .../spark/ml/feature/JavaWord2VecSuite.java | 18 ++--------------- 14 files changed, 28 insertions(+), 239 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java index a96b43de15779..854abd05e221f 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java @@ -25,6 +25,7 @@ import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; @@ -34,22 +35,7 @@ import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -public class JavaBucketizerSuite { - private transient SparkSession spark; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaBucketizerSuite") - .getOrCreate(); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaBucketizerSuite extends SharedSparkSession { @Test public void bucketizerTest() { diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java index 06482d8f0dcd1..4a5ba362f4639 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java @@ -27,6 +27,7 @@ import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.linalg.VectorUDT; import org.apache.spark.mllib.linalg.Vectors; @@ -38,22 +39,7 @@ import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -public class JavaDCTSuite { - private transient SparkSession spark; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaDCTSuite") - .getOrCreate(); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaDCTSuite extends SharedSparkSession { @Test public void javaCompatibilityTest() { diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java index 0e21d4a94f24b..11e7bee832e14 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java @@ -25,6 +25,7 @@ import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; @@ -36,22 +37,7 @@ import org.apache.spark.sql.types.StructType; -public class JavaHashingTFSuite { - private transient SparkSession spark; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaHashingTFSuite") - .getOrCreate(); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaHashingTFSuite extends SharedSparkSession { @Test public void hashingTF() { diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaNormalizerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaNormalizerSuite.java index 04b2897b18117..582bc99e0c784 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaNormalizerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaNormalizerSuite.java @@ -23,6 +23,7 @@ import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.linalg.Vectors; @@ -30,24 +31,7 @@ import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; -public class JavaNormalizerSuite { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaNormalizerSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaNormalizerSuite extends SharedSparkSession { @Test public void normalizer() { diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java index 32f6b4375ee72..4709c92adacd4 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java @@ -28,6 +28,7 @@ import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; @@ -39,24 +40,7 @@ import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; -public class JavaPCASuite implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaPCASuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaPCASuite extends SharedSparkSession implements Serializable { public static class VectorPair implements Serializable { private Vector features = Vectors.dense(0.0); diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java index 8f726077a2490..ff119392785ad 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java @@ -25,6 +25,7 @@ import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.linalg.VectorUDT; @@ -37,24 +38,7 @@ import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -public class JavaPolynomialExpansionSuite { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaPolynomialExpansionSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - jsc.stop(); - jsc = null; - } +public class JavaPolynomialExpansionSuite extends SharedSparkSession { @Test public void polynomialExpansionTest() { diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStandardScalerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStandardScalerSuite.java index c7397bdd6878d..5006f52618356 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStandardScalerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStandardScalerSuite.java @@ -24,30 +24,14 @@ import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; -public class JavaStandardScalerSuite { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaStandardScalerSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaStandardScalerSuite extends SharedSparkSession { @Test public void standardScaler() { diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java index 2b156f3bca5b1..f25c0a7e204ff 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java @@ -24,6 +24,7 @@ import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; @@ -34,23 +35,7 @@ import org.apache.spark.sql.types.StructType; -public class JavaStopWordsRemoverSuite { - - private transient SparkSession spark; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaStopWordsRemoverSuite") - .getOrCreate(); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaStopWordsRemoverSuite extends SharedSparkSession { @Test public void javaCompatibilityTest() { diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStringIndexerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStringIndexerSuite.java index 52c0bde8f3677..e75d53e585048 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStringIndexerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStringIndexerSuite.java @@ -25,6 +25,7 @@ import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.SparkConf; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; @@ -34,23 +35,7 @@ import org.apache.spark.sql.types.StructType; import static org.apache.spark.sql.types.DataTypes.*; -public class JavaStringIndexerSuite { - private transient SparkSession spark; - - @Before - public void setUp() { - SparkConf sparkConf = new SparkConf(); - sparkConf.setMaster("local"); - sparkConf.setAppName("JavaStringIndexerSuite"); - - spark = SparkSession.builder().config(sparkConf).getOrCreate(); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaStringIndexerSuite extends SharedSparkSession { @Test public void testStringIndexer() { diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaTokenizerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaTokenizerSuite.java index 0bac2839e179d..9bba059c4bebd 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaTokenizerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaTokenizerSuite.java @@ -25,30 +25,14 @@ import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; -public class JavaTokenizerSuite { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaTokenizerSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaTokenizerSuite extends SharedSparkSession { @Test public void regexTokenizer() { diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorAssemblerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorAssemblerSuite.java index 8774cd0c69f19..adb455ee34a5a 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorAssemblerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorAssemblerSuite.java @@ -24,6 +24,7 @@ import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.SparkConf; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.linalg.VectorUDT; @@ -36,23 +37,7 @@ import org.apache.spark.sql.types.StructType; import static org.apache.spark.sql.types.DataTypes.*; -public class JavaVectorAssemblerSuite { - private transient SparkSession spark; - - @Before - public void setUp() { - SparkConf sparkConf = new SparkConf(); - sparkConf.setMaster("local"); - sparkConf.setAppName("JavaVectorAssemblerSuite"); - - spark = SparkSession.builder().config(sparkConf).getOrCreate(); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaVectorAssemblerSuite extends SharedSparkSession { @Test public void testVectorAssembler() { diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java index c386c9a45b099..afa4d1b46665a 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java @@ -27,6 +27,7 @@ import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.ml.feature.VectorIndexerSuite.FeatureData; import org.apache.spark.mllib.linalg.Vectors; @@ -35,24 +36,7 @@ import org.apache.spark.sql.SparkSession; -public class JavaVectorIndexerSuite implements Serializable { - private transient SparkSession spark; - private JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaVectorIndexerSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaVectorIndexerSuite extends SharedSparkSession implements Serializable { @Test public void vectorIndexerAPI() { diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java index 59ad3c2f61e85..7c485cff41141 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java @@ -25,6 +25,7 @@ import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.ml.attribute.Attribute; import org.apache.spark.ml.attribute.AttributeGroup; import org.apache.spark.ml.attribute.NumericAttribute; @@ -37,22 +38,7 @@ import org.apache.spark.sql.types.StructType; -public class JavaVectorSlicerSuite { - private transient SparkSession spark; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaVectorSlicerSuite") - .getOrCreate(); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaVectorSlicerSuite extends SharedSparkSession { @Test public void vectorSlice() { diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java index 392aabc96d372..0f06bf665158a 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java @@ -24,6 +24,7 @@ import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; @@ -31,22 +32,7 @@ import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.types.*; -public class JavaWord2VecSuite { - private transient SparkSession spark; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaWord2VecSuite") - .getOrCreate(); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaWord2VecSuite extends SharedSparkSession { @Test public void testJavaWord2Vec() { From f3a2244a5bab178c223ceaa7adeed7728629fdbf Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 13 May 2016 20:11:06 +0530 Subject: [PATCH 04/27] ML Feature remove unused imports --- .../org/apache/spark/ml/feature/JavaBucketizerSuite.java | 3 --- .../java/org/apache/spark/ml/feature/JavaDCTSuite.java | 3 --- .../org/apache/spark/ml/feature/JavaHashingTFSuite.java | 3 --- .../org/apache/spark/ml/feature/JavaNormalizerSuite.java | 4 ---- .../java/org/apache/spark/ml/feature/JavaPCASuite.java | 4 ---- .../spark/ml/feature/JavaPolynomialExpansionSuite.java | 4 ---- .../apache/spark/ml/feature/JavaStandardScalerSuite.java | 4 ---- .../apache/spark/ml/feature/JavaStopWordsRemoverSuite.java | 3 --- .../apache/spark/ml/feature/JavaStringIndexerSuite.java | 7 ++----- .../org/apache/spark/ml/feature/JavaTokenizerSuite.java | 4 ---- .../apache/spark/ml/feature/JavaVectorAssemblerSuite.java | 7 ++----- .../apache/spark/ml/feature/JavaVectorIndexerSuite.java | 4 ---- .../org/apache/spark/ml/feature/JavaVectorSlicerSuite.java | 3 --- .../org/apache/spark/ml/feature/JavaWord2VecSuite.java | 3 --- 14 files changed, 4 insertions(+), 52 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java index 854abd05e221f..87639380bdcf4 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java @@ -20,16 +20,13 @@ import java.util.Arrays; import java.util.List; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; import org.apache.spark.SharedSparkSession; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; -import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.types.DataTypes; import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java index 4a5ba362f4639..7fe5d7d7f1f54 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java @@ -22,9 +22,7 @@ import edu.emory.mathcs.jtransforms.dct.DoubleDCT_1D; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; import org.apache.spark.SharedSparkSession; @@ -34,7 +32,6 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; -import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java index 11e7bee832e14..863a1e275a5d1 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java @@ -20,9 +20,7 @@ import java.util.Arrays; import java.util.List; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; import org.apache.spark.SharedSparkSession; @@ -30,7 +28,6 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; -import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.types.DataTypes; import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaNormalizerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaNormalizerSuite.java index 582bc99e0c784..819cab6b7f847 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaNormalizerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaNormalizerSuite.java @@ -19,17 +19,13 @@ import java.util.Arrays; -import org.junit.After; -import org.junit.Before; import org.junit.Test; import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; public class JavaNormalizerSuite extends SharedSparkSession { diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java index 4709c92adacd4..b000c6374ba82 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java @@ -23,14 +23,11 @@ import scala.Tuple2; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.mllib.linalg.Matrix; import org.apache.spark.mllib.linalg.Vector; @@ -38,7 +35,6 @@ import org.apache.spark.mllib.linalg.distributed.RowMatrix; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; public class JavaPCASuite extends SharedSparkSession implements Serializable { diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java index ff119392785ad..14712e424b39a 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java @@ -20,20 +20,16 @@ import java.util.Arrays; import java.util.List; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; import org.apache.spark.SharedSparkSession; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.linalg.VectorUDT; import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; -import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStandardScalerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStandardScalerSuite.java index 5006f52618356..fdb9cc88cb7ef 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStandardScalerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStandardScalerSuite.java @@ -20,16 +20,12 @@ import java.util.Arrays; import java.util.List; -import org.junit.After; -import org.junit.Before; import org.junit.Test; import org.apache.spark.SharedSparkSession; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; public class JavaStandardScalerSuite extends SharedSparkSession { diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java index f25c0a7e204ff..6480b57e1f796 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java @@ -20,15 +20,12 @@ import java.util.Arrays; import java.util.List; -import org.junit.After; -import org.junit.Before; import org.junit.Test; import org.apache.spark.SharedSparkSession; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; -import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.types.DataTypes; import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStringIndexerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStringIndexerSuite.java index e75d53e585048..c1928a26b609e 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStringIndexerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStringIndexerSuite.java @@ -20,20 +20,17 @@ import java.util.Arrays; import java.util.List; -import org.junit.After; +import static org.apache.spark.sql.types.DataTypes.*; + import org.junit.Assert; -import org.junit.Before; import org.junit.Test; import org.apache.spark.SharedSparkSession; -import org.apache.spark.SparkConf; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; -import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -import static org.apache.spark.sql.types.DataTypes.*; public class JavaStringIndexerSuite extends SharedSparkSession { diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaTokenizerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaTokenizerSuite.java index 9bba059c4bebd..27550a3d5c373 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaTokenizerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaTokenizerSuite.java @@ -20,17 +20,13 @@ import java.util.Arrays; import java.util.List; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; public class JavaTokenizerSuite extends SharedSparkSession { diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorAssemblerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorAssemblerSuite.java index adb455ee34a5a..382e4ffe75b4e 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorAssemblerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorAssemblerSuite.java @@ -19,23 +19,20 @@ import java.util.Arrays; -import org.junit.After; +import static org.apache.spark.sql.types.DataTypes.*; + import org.junit.Assert; -import org.junit.Before; import org.junit.Test; import org.apache.spark.SharedSparkSession; -import org.apache.spark.SparkConf; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.linalg.VectorUDT; import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; -import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -import static org.apache.spark.sql.types.DataTypes.*; public class JavaVectorAssemblerSuite extends SharedSparkSession { diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java index afa4d1b46665a..c77e29342c615 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java @@ -22,18 +22,14 @@ import java.util.List; import java.util.Map; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; import org.apache.spark.SharedSparkSession; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.ml.feature.VectorIndexerSuite.FeatureData; import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; public class JavaVectorIndexerSuite extends SharedSparkSession implements Serializable { diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java index 7c485cff41141..8172fb081ea2d 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java @@ -20,9 +20,7 @@ import java.util.Arrays; import java.util.List; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; import org.apache.spark.SharedSparkSession; @@ -34,7 +32,6 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; -import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.types.StructType; diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java index 0f06bf665158a..ade87952bd242 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java @@ -19,9 +19,7 @@ import java.util.Arrays; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; import org.apache.spark.SharedSparkSession; @@ -29,7 +27,6 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; -import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.types.*; public class JavaWord2VecSuite extends SharedSparkSession { From 9e419362e487f04d1001a05904afa58fee88ead3 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 13 May 2016 20:11:36 +0530 Subject: [PATCH 05/27] ML Param Done - Remove SparkSession, esc since not used --- .../spark/ml/param/JavaParamsSuite.java | 23 ------------------- 1 file changed, 23 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/ml/param/JavaParamsSuite.java b/mllib/src/test/java/org/apache/spark/ml/param/JavaParamsSuite.java index a5b5dd4088ff8..1077e103a3b89 100644 --- a/mllib/src/test/java/org/apache/spark/ml/param/JavaParamsSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/param/JavaParamsSuite.java @@ -19,37 +19,14 @@ import java.util.Arrays; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.SparkSession; - /** * Test Param and related classes in Java */ public class JavaParamsSuite { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaParamsSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } - @Test public void testParams() { JavaTestParams testParams = new JavaTestParams(); From b97da5be1517c9989b752c11ec61a36b28d61258 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 13 May 2016 20:46:07 +0530 Subject: [PATCH 06/27] ml regression done --- .../JavaDecisionTreeRegressorSuite.java | 25 ++----------------- .../ml/regression/JavaGBTRegressorSuite.java | 25 ++----------------- .../regression/JavaLinearRegressionSuite.java | 23 +++-------------- .../JavaRandomForestRegressorSuite.java | 25 ++----------------- 4 files changed, 9 insertions(+), 89 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/ml/regression/JavaDecisionTreeRegressorSuite.java b/mllib/src/test/java/org/apache/spark/ml/regression/JavaDecisionTreeRegressorSuite.java index bbd59a04ec867..a8c6c46de4504 100644 --- a/mllib/src/test/java/org/apache/spark/ml/regression/JavaDecisionTreeRegressorSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/regression/JavaDecisionTreeRegressorSuite.java @@ -21,39 +21,18 @@ import java.util.HashMap; import java.util.Map; -import org.junit.After; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.ml.tree.impl.TreeTests; import org.apache.spark.mllib.classification.LogisticRegressionSuite; import org.apache.spark.mllib.regression.LabeledPoint; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; -public class JavaDecisionTreeRegressorSuite implements Serializable { - - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaDecisionTreeRegressorSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaDecisionTreeRegressorSuite extends SharedSparkSession implements Serializable { @Test public void runDT() { diff --git a/mllib/src/test/java/org/apache/spark/ml/regression/JavaGBTRegressorSuite.java b/mllib/src/test/java/org/apache/spark/ml/regression/JavaGBTRegressorSuite.java index 5370b58e8fda5..f243723ca206b 100644 --- a/mllib/src/test/java/org/apache/spark/ml/regression/JavaGBTRegressorSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/regression/JavaGBTRegressorSuite.java @@ -21,39 +21,18 @@ import java.util.HashMap; import java.util.Map; -import org.junit.After; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.ml.tree.impl.TreeTests; import org.apache.spark.mllib.classification.LogisticRegressionSuite; import org.apache.spark.mllib.regression.LabeledPoint; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; -public class JavaGBTRegressorSuite implements Serializable { - - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaGBTRegressorSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaGBTRegressorSuite extends SharedSparkSession implements Serializable { @Test public void runDT() { diff --git a/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java b/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java index 00c59f08b679a..a85d626d9c0d2 100644 --- a/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java @@ -20,45 +20,28 @@ import java.io.Serializable; import java.util.List; -import org.junit.After; import org.junit.Before; import org.junit.Test; import static org.junit.Assert.assertEquals; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.regression.LabeledPoint; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; import static org.apache.spark.mllib.classification.LogisticRegressionSuite.generateLogisticInputAsList; -public class JavaLinearRegressionSuite implements Serializable { - - private transient SparkSession spark; - private transient JavaSparkContext jsc; +public class JavaLinearRegressionSuite extends SharedSparkSession implements Serializable { private transient Dataset dataset; private transient JavaRDD datasetRDD; - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaLinearRegressionSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); + public void customSetUp() { List points = generateLogisticInputAsList(1.0, 1.0, 100, 42); datasetRDD = jsc.parallelize(points, 2); dataset = spark.createDataFrame(datasetRDD, LabeledPoint.class); dataset.registerTempTable("dataset"); } - @After - public void tearDown() { - jsc.stop(); - jsc = null; - } - @Test public void linearRegressionDefaultParams() { LinearRegression lr = new LinearRegression(); diff --git a/mllib/src/test/java/org/apache/spark/ml/regression/JavaRandomForestRegressorSuite.java b/mllib/src/test/java/org/apache/spark/ml/regression/JavaRandomForestRegressorSuite.java index fdb41ffc10388..30837420ee57c 100644 --- a/mllib/src/test/java/org/apache/spark/ml/regression/JavaRandomForestRegressorSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/regression/JavaRandomForestRegressorSuite.java @@ -21,41 +21,20 @@ import java.util.HashMap; import java.util.Map; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.ml.tree.impl.TreeTests; import org.apache.spark.mllib.classification.LogisticRegressionSuite; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.regression.LabeledPoint; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; -public class JavaRandomForestRegressorSuite implements Serializable { - - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaRandomForestRegressorSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaRandomForestRegressorSuite extends SharedSparkSession implements Serializable { @Test public void runDT() { From a96fa3a34a6d3c6d687d8a9b15f95a40e91bf929 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 13 May 2016 20:55:38 +0530 Subject: [PATCH 07/27] ml libsvm done --- .../org/apache/spark/SharedSparkSession.java | 12 ++++++++++- .../libsvm/JavaLibSVMRelationSuite.java | 20 ++++--------------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java index 68166ff7b6968..08d0defb64112 100644 --- a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java +++ b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java @@ -1,5 +1,7 @@ package org.apache.spark; +import java.io.IOException; + import org.junit.After; import org.junit.Before; @@ -12,7 +14,7 @@ public class SharedSparkSession { public transient JavaSparkContext jsc; @Before - public void setUp() { + public void setUp() throws IOException { spark = SparkSession.builder() .master("local") .appName("shared-spark-session") @@ -20,13 +22,21 @@ public void setUp() { jsc = new JavaSparkContext(spark.sparkContext()); customSetUp(); + customSetUpWithException(); } public void customSetUp() {} + // TODO: Remove this once we have a way to use customSetUp that Exception + public void customSetUpWithException() throws IOException {} + @After public void tearDown() { spark.stop(); spark = null; + + customTearDown(); } + + public void customTearDown() {} } diff --git a/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java b/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java index 058f2ddafd866..fb7905ce87a69 100644 --- a/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java @@ -23,35 +23,26 @@ import com.google.common.io.Files; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.mllib.linalg.DenseVector; import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; import org.apache.spark.util.Utils; /** * Test LibSVMRelation in Java. */ -public class JavaLibSVMRelationSuite { - private transient SparkSession spark; +public class JavaLibSVMRelationSuite extends SharedSparkSession { private File tempDir; private String path; - @Before - public void setUp() throws IOException { - spark = SparkSession.builder() - .master("local") - .appName("JavaLibSVMRelationSuite") - .getOrCreate(); - + public void customSetUpWithException() throws IOException { tempDir = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "datasource"); File file = new File(tempDir, "part-00000"); String s = "1 1:1.0 3:2.0 5:3.0\n0\n0 2:4.0 4:5.0 6:6.0"; @@ -59,10 +50,7 @@ public void setUp() throws IOException { path = tempDir.toURI().toString(); } - @After - public void tearDown() { - spark.stop(); - spark = null; + public void customTearDown() { Utils.deleteRecursively(tempDir); } From 29a1194326cb8bb6bd3b5eb2ee665156b17fe773 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 13 May 2016 20:59:44 +0530 Subject: [PATCH 08/27] ml tuning, util done --- .../ml/tuning/JavaCrossValidatorSuite.java | 24 ++------------- .../ml/util/JavaDefaultReadWriteSuite.java | 29 +++---------------- 2 files changed, 7 insertions(+), 46 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java b/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java index 8b4d034ffea02..9d1ec1f5af173 100644 --- a/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java @@ -20,45 +20,27 @@ import java.io.Serializable; import java.util.List; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; -import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.SharedSparkSession; import org.apache.spark.ml.classification.LogisticRegression; import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator; import org.apache.spark.ml.param.ParamMap; import org.apache.spark.mllib.regression.LabeledPoint; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; import static org.apache.spark.mllib.classification.LogisticRegressionSuite.generateLogisticInputAsList; -public class JavaCrossValidatorSuite implements Serializable { +public class JavaCrossValidatorSuite extends SharedSparkSession implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; private transient Dataset dataset; - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaCrossValidatorSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - + public void customSetUp() { List points = generateLogisticInputAsList(1.0, 1.0, 100, 42); dataset = spark.createDataFrame(jsc.parallelize(points, 2), LabeledPoint.class); } - @After - public void tearDown() { - jsc.stop(); - jsc = null; - } - @Test public void crossValidationWithLogisticRegression() { LogisticRegression lr = new LogisticRegression(); diff --git a/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java b/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java index 7151e27cde839..b6b975632e2c3 100644 --- a/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java @@ -20,42 +20,21 @@ import java.io.File; import java.io.IOException; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.SQLContext; -import org.apache.spark.sql.SparkSession; +import org.apache.spark.SharedSparkSession; import org.apache.spark.util.Utils; -public class JavaDefaultReadWriteSuite { - - JavaSparkContext jsc = null; - SparkSession spark = null; +public class JavaDefaultReadWriteSuite extends SharedSparkSession { File tempDir = null; - @Before - public void setUp() { - SQLContext.clearActive(); - spark = SparkSession.builder() - .master("local[2]") - .appName("JavaDefaultReadWriteSuite") - .getOrCreate(); - SQLContext.setActive(spark.wrapped()); - + public void customSetUp() { tempDir = Utils.createTempDir( System.getProperty("java.io.tmpdir"), "JavaDefaultReadWriteSuite"); } - @After - public void tearDown() { - SQLContext.clearActive(); - if (spark != null) { - spark.stop(); - spark = null; - } + public void customTearDown() { Utils.deleteRecursively(tempDir); } From 8ad34a0d753c9b0a1c187b7d1ac565674486db1c Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 13 May 2016 21:12:26 +0530 Subject: [PATCH 09/27] mllib classification --- .../JavaLogisticRegressionSuite.java | 24 ++----------------- .../classification/JavaNaiveBayesSuite.java | 24 ++----------------- .../mllib/classification/JavaSVMSuite.java | 24 ++----------------- 3 files changed, 6 insertions(+), 66 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java index 2f10d14da5e4a..0bae289d37392 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java @@ -20,34 +20,14 @@ import java.io.Serializable; import java.util.List; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.regression.LabeledPoint; -import org.apache.spark.sql.SparkSession; -public class JavaLogisticRegressionSuite implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaLogisticRegressionSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaLogisticRegressionSuite extends SharedSparkSession implements Serializable { int validatePrediction(List validationData, LogisticRegressionModel model) { int numAccurate = 0; diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java index 5e212e2fc5b3b..722f6b525845a 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java @@ -21,38 +21,18 @@ import java.util.Arrays; import java.util.List; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.mllib.regression.LabeledPoint; -import org.apache.spark.sql.SparkSession; -public class JavaNaiveBayesSuite implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaNaiveBayesSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaNaiveBayesSuite extends SharedSparkSession implements Serializable { private static final List POINTS = Arrays.asList( new LabeledPoint(0, Vectors.dense(1.0, 0.0, 0.0)), diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaSVMSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaSVMSuite.java index 2a090c054fe2b..fea41ba24ab52 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaSVMSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaSVMSuite.java @@ -20,34 +20,14 @@ import java.io.Serializable; import java.util.List; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.regression.LabeledPoint; -import org.apache.spark.sql.SparkSession; -public class JavaSVMSuite implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaSVMSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaSVMSuite extends SharedSparkSession implements Serializable { int validatePrediction(List validationData, SVMModel model) { int numAccurate = 0; From 940d56468fb4078125b4e0967502a1bb4e7e5a1c Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 13 May 2016 21:15:43 +0530 Subject: [PATCH 10/27] mllib clustering --- .../clustering/JavaBisectingKMeansSuite.java | 24 ++---------------- .../clustering/JavaGaussianMixtureSuite.java | 24 ++---------------- .../mllib/clustering/JavaKMeansSuite.java | 24 ++---------------- .../spark/mllib/clustering/JavaLDASuite.java | 25 +++---------------- 4 files changed, 9 insertions(+), 88 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaBisectingKMeansSuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaBisectingKMeansSuite.java index 7f29b050479fc..c714144093250 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaBisectingKMeansSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaBisectingKMeansSuite.java @@ -21,35 +21,15 @@ import com.google.common.collect.Lists; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.linalg.Vectors; -import org.apache.spark.sql.SparkSession; -public class JavaBisectingKMeansSuite implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaBisectingKMeansSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaBisectingKMeansSuite extends SharedSparkSession implements Serializable { @Test public void twoDimensionalData() { diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaGaussianMixtureSuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaGaussianMixtureSuite.java index 20edd08a2172d..12cf153739c31 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaGaussianMixtureSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaGaussianMixtureSuite.java @@ -23,34 +23,14 @@ import static org.junit.Assert.assertEquals; -import org.junit.After; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.linalg.Vectors; -import org.apache.spark.sql.SparkSession; -public class JavaGaussianMixtureSuite implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaGaussianMixture") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaGaussianMixtureSuite extends SharedSparkSession implements Serializable { @Test public void runGaussianMixture() { diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java index 4e5b87f588e3d..9e8b65659431c 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java @@ -23,34 +23,14 @@ import static org.junit.Assert.assertEquals; -import org.junit.After; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.linalg.Vectors; -import org.apache.spark.sql.SparkSession; -public class JavaKMeansSuite implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaKMeans") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaKMeansSuite extends SharedSparkSession implements Serializable { @Test public void runKMeansUsingStaticMethods() { diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java index f16585aff4f5d..35bb577223f6d 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java @@ -24,32 +24,19 @@ import scala.Tuple2; import scala.Tuple3; -import org.junit.After; -import org.junit.Before; import org.junit.Test; import static org.junit.Assert.*; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.mllib.linalg.Matrix; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.linalg.Vectors; -import org.apache.spark.sql.SparkSession; - -public class JavaLDASuite implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaLDASuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); +public class JavaLDASuite extends SharedSparkSession implements Serializable { + public void customSetUp() { ArrayList> tinyCorpus = new ArrayList<>(); for (int i = 0; i < LDASuite.tinyCorpus().length; i++) { tinyCorpus.add(new Tuple2<>((Long) LDASuite.tinyCorpus()[i]._1(), @@ -59,12 +46,6 @@ public void setUp() { corpus = JavaPairRDD.fromJavaRDD(tmpCorpus); } - @After - public void tearDown() { - spark.stop(); - spark = null; - } - @Test public void localLDAModel() { Matrix topics = LDASuite.tinyTopics(); From 9d4d015cdb2f50c14859e0574d59d7809eb97bc1 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 13 May 2016 21:19:48 +0530 Subject: [PATCH 11/27] mllib evaluation and feature --- .../evaluation/JavaRankingMetricsSuite.java | 22 +++-------------- .../spark/mllib/feature/JavaTfIdfSuite.java | 24 ++----------------- .../mllib/feature/JavaWord2VecSuite.java | 24 ++----------------- 3 files changed, 7 insertions(+), 63 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java index 6a096d6386550..ecb077a280a83 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java @@ -24,28 +24,18 @@ import scala.Tuple2; import scala.Tuple2$; -import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.SparkSession; -public class JavaRankingMetricsSuite implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; +public class JavaRankingMetricsSuite extends SharedSparkSession implements Serializable { private transient JavaRDD, List>> predictionAndLabels; @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaPCASuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - + public void customSetUp() { predictionAndLabels = jsc.parallelize(Arrays.asList( Tuple2$.MODULE$.apply( Arrays.asList(1, 6, 2, 7, 8, 3, 9, 10, 4, 5), Arrays.asList(1, 2, 3, 4, 5)), @@ -55,12 +45,6 @@ public void setUp() { Arrays.asList(1, 2, 3, 4, 5), Arrays.asList())), 2); } - @After - public void tearDown() { - spark.stop(); - spark = null; - } - @Test public void rankingMetrics() { @SuppressWarnings("unchecked") diff --git a/mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java b/mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java index de50fb8c4fdb2..aab196065df90 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java @@ -21,34 +21,14 @@ import java.util.Arrays; import java.util.List; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.linalg.Vector; -import org.apache.spark.sql.SparkSession; -public class JavaTfIdfSuite implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaPCASuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaTfIdfSuite extends SharedSparkSession implements Serializable { @Test public void tfIdf() { diff --git a/mllib/src/test/java/org/apache/spark/mllib/feature/JavaWord2VecSuite.java b/mllib/src/test/java/org/apache/spark/mllib/feature/JavaWord2VecSuite.java index 64885cc8425d3..24d9ccfe15df9 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/feature/JavaWord2VecSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/feature/JavaWord2VecSuite.java @@ -25,33 +25,13 @@ import scala.Tuple2; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.SparkSession; -public class JavaWord2VecSuite implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaPCASuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaWord2VecSuite extends SharedSparkSession implements Serializable { @Test @SuppressWarnings("unchecked") From e033253871621eba088972c919d75379059f9f40 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 13 May 2016 21:25:40 +0530 Subject: [PATCH 12/27] mllib fpm --- .../mllib/fpm/JavaAssociationRulesSuite.java | 24 ++----------------- .../spark/mllib/fpm/JavaFPGrowthSuite.java | 24 ++----------------- .../spark/mllib/fpm/JavaPrefixSpanSuite.java | 24 ++----------------- 3 files changed, 6 insertions(+), 66 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java index fdc19a5b3dc47..ddc48af85dea6 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java @@ -19,33 +19,13 @@ import java.io.Serializable; import java.util.Arrays; -import org.junit.After; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.fpm.FPGrowth.FreqItemset; -import org.apache.spark.sql.SparkSession; -public class JavaAssociationRulesSuite implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaAssociationRulesSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaAssociationRulesSuite extends SharedSparkSession implements Serializable { @Test public void runAssociationRules() { diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java index f235251e61d42..b337d7bd0f8c0 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java @@ -24,33 +24,13 @@ import static org.junit.Assert.assertEquals; -import org.junit.After; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.SparkSession; import org.apache.spark.util.Utils; -public class JavaFPGrowthSuite implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaFPGrowth") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaFPGrowthSuite extends SharedSparkSession implements Serializable { @Test public void runFPGrowth() { diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaPrefixSpanSuite.java b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaPrefixSpanSuite.java index bf7f1fc71b08e..75b0ec6480196 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaPrefixSpanSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaPrefixSpanSuite.java @@ -21,35 +21,15 @@ import java.util.Arrays; import java.util.List; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.fpm.PrefixSpan.FreqSequence; -import org.apache.spark.sql.SparkSession; import org.apache.spark.util.Utils; -public class JavaPrefixSpanSuite { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaPrefixSpan") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaPrefixSpanSuite extends SharedSparkSession { @Test public void runPrefixSpan() { From 3aa61df7ec666ee53f2ad86ad6dd92c9c8dcb51d Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 13 May 2016 21:26:50 +0530 Subject: [PATCH 13/27] mllib random --- .../mllib/random/JavaRandomRDDsSuite.java | 24 ++----------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java index b449108a9b83e..6d114024c31be 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java @@ -20,36 +20,16 @@ import java.io.Serializable; import java.util.Arrays; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaDoubleRDD; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.linalg.Vector; -import org.apache.spark.sql.SparkSession; import static org.apache.spark.mllib.random.RandomRDDs.*; -public class JavaRandomRDDsSuite { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaRandomRDDsSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaRandomRDDsSuite extends SharedSparkSession { @Test public void testUniformRDD() { From ddf68dad16c34fbaf890e797ed2c77bafb2501df Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 13 May 2016 21:28:14 +0530 Subject: [PATCH 14/27] mllib recommendation --- .../mllib/recommendation/JavaALSSuite.java | 24 ++----------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java index aa784054d551e..64c4810eabf80 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java @@ -24,34 +24,14 @@ import scala.Tuple2; import scala.Tuple3; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.SparkSession; - -public class JavaALSSuite implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaALS") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaALSSuite extends SharedSparkSession implements Serializable { private void validatePrediction( MatrixFactorizationModel model, From 90b048acf514907ff9029c5bc2e6b785aa62a2bf Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 13 May 2016 21:30:47 +0530 Subject: [PATCH 15/27] mllib regression --- .../JavaIsotonicRegressionSuite.java | 24 ++----------------- .../mllib/regression/JavaLassoSuite.java | 24 ++----------------- .../regression/JavaLinearRegressionSuite.java | 24 ++----------------- .../regression/JavaRidgeRegressionSuite.java | 24 ++----------------- 4 files changed, 8 insertions(+), 88 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaIsotonicRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaIsotonicRegressionSuite.java index 8b05675d65a1d..d63bb1bb096d5 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaIsotonicRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaIsotonicRegressionSuite.java @@ -24,19 +24,14 @@ import scala.Tuple3; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaDoubleRDD; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.SparkSession; -public class JavaIsotonicRegressionSuite implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; +public class JavaIsotonicRegressionSuite extends SharedSparkSession implements Serializable { private static List> generateIsotonicInput(double[] labels) { List> input = new ArrayList<>(labels.length); @@ -55,21 +50,6 @@ private IsotonicRegressionModel runIsotonicRegression(double[] labels) { return new IsotonicRegression().run(trainRDD); } - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaLinearRegressionSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } - @Test public void testIsotonicRegressionJavaRDD() { IsotonicRegressionModel model = diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java index 098bac3bedfff..3b7b4629d8170 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java @@ -20,34 +20,14 @@ import java.io.Serializable; import java.util.List; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.util.LinearDataGenerator; -import org.apache.spark.sql.SparkSession; -public class JavaLassoSuite implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaLassoSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaLassoSuite extends SharedSparkSession implements Serializable { int validatePrediction(List validationData, LassoModel model) { int numAccurate = 0; diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java index 35087a5e461df..f712521f3db46 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java @@ -20,36 +20,16 @@ import java.io.Serializable; import java.util.List; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.util.LinearDataGenerator; -import org.apache.spark.sql.SparkSession; -public class JavaLinearRegressionSuite implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaLinearRegressionSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaLinearRegressionSuite extends SharedSparkSession implements Serializable { int validatePrediction(List validationData, LinearRegressionModel model) { int numAccurate = 0; diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java index b2efb2e72e374..43fc60e9fd151 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java @@ -21,34 +21,14 @@ import java.util.List; import java.util.Random; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.util.LinearDataGenerator; -import org.apache.spark.sql.SparkSession; - -public class JavaRidgeRegressionSuite implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaRidgeRegressionSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaRidgeRegressionSuite extends SharedSparkSession implements Serializable { private static double predictionError(List validationData, RidgeRegressionModel model) { From c3f166d85e192878288d0d3bb9eb2640d2d1e1e5 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 13 May 2016 21:34:37 +0530 Subject: [PATCH 16/27] mllib tree --- .../mllib/tree/JavaDecisionTreeSuite.java | 25 ++----------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java b/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java index 5b464a4722d92..e9c4f80b44c9b 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java @@ -21,13 +21,11 @@ import java.util.HashMap; import java.util.List; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.regression.LabeledPoint; @@ -35,27 +33,8 @@ import org.apache.spark.mllib.tree.configuration.Strategy; import org.apache.spark.mllib.tree.impurity.Gini; import org.apache.spark.mllib.tree.model.DecisionTreeModel; -import org.apache.spark.sql.SparkSession; - -public class JavaDecisionTreeSuite implements Serializable { - private transient SparkSession spark; - private transient JavaSparkContext jsc; - - @Before - public void setUp() { - spark = SparkSession.builder() - .master("local") - .appName("JavaDecisionTreeSuite") - .getOrCreate(); - jsc = new JavaSparkContext(spark.sparkContext()); - } - - @After - public void tearDown() { - spark.stop(); - spark = null; - } +public class JavaDecisionTreeSuite extends SharedSparkSession implements Serializable { int validatePrediction(List validationData, DecisionTreeModel model) { int numCorrect = 0; From 36ce8d26723333487fc0983b128dc0a8beb30aa0 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 13 May 2016 21:40:17 +0530 Subject: [PATCH 17/27] fix javastyle --- .../apache/spark/ml/regression/JavaLinearRegressionSuite.java | 1 - 1 file changed, 1 deletion(-) diff --git a/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java b/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java index a85d626d9c0d2..7ac68e3399a9a 100644 --- a/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java @@ -20,7 +20,6 @@ import java.io.Serializable; import java.util.List; -import org.junit.Before; import org.junit.Test; import static org.junit.Assert.assertEquals; From 12ba02811d375d8360bbb6d71631f111a279ce67 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 13 May 2016 21:47:11 +0530 Subject: [PATCH 18/27] add license to SharedSparkSession --- .../org/apache/spark/SharedSparkSession.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java index 08d0defb64112..95ee18fd58d00 100644 --- a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java +++ b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.spark; import java.io.IOException; From f3fa1f5b84873eead74157dacd53a2765a957245 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Wed, 18 May 2016 08:11:41 +0530 Subject: [PATCH 19/27] fix import --- .../org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java b/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java index 15e0f7ce86c1d..e1c77f34cca6b 100644 --- a/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java @@ -30,7 +30,7 @@ import org.apache.spark.ml.param.ParamMap; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import static org.apache.spark.mllib.classification.LogisticRegressionSuite.generateLogisticInputAsList; +import static org.apache.spark.ml.classification.LogisticRegressionSuite.generateLogisticInputAsList; public class JavaCrossValidatorSuite extends SharedSparkSession implements Serializable { From e4117f34c554618a14fbe1a3a9fa3d8d9bd33b9e Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 20 May 2016 01:03:06 +0530 Subject: [PATCH 20/27] Mark custom methods as protected and add override --- .../test/java/org/apache/spark/SharedSparkSession.java | 8 ++------ .../test/java/org/apache/spark/ml/JavaPipelineSuite.java | 2 +- .../ml/classification/JavaLogisticRegressionSuite.java | 2 +- .../spark/ml/classification/JavaOneVsRestSuite.java | 2 +- .../org/apache/spark/ml/clustering/JavaKMeansSuite.java | 2 +- .../spark/ml/regression/JavaLinearRegressionSuite.java | 2 +- .../spark/ml/source/libsvm/JavaLibSVMRelationSuite.java | 4 ++-- .../apache/spark/ml/tuning/JavaCrossValidatorSuite.java | 2 +- .../apache/spark/ml/util/JavaDefaultReadWriteSuite.java | 4 ++-- .../org/apache/spark/mllib/clustering/JavaLDASuite.java | 2 +- .../spark/mllib/evaluation/JavaRankingMetricsSuite.java | 3 +-- 11 files changed, 14 insertions(+), 19 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java index 95ee18fd58d00..4a8ad7c6dad15 100644 --- a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java +++ b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java @@ -39,13 +39,9 @@ public void setUp() throws IOException { jsc = new JavaSparkContext(spark.sparkContext()); customSetUp(); - customSetUpWithException(); } - public void customSetUp() {} - - // TODO: Remove this once we have a way to use customSetUp that Exception - public void customSetUpWithException() throws IOException {} + protected void customSetUp() throws IOException {} @After public void tearDown() { @@ -55,5 +51,5 @@ public void tearDown() { customTearDown(); } - public void customTearDown() {} + protected void customTearDown() {} } diff --git a/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java b/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java index 5018cf1d7a0a5..c3cab2f86a376 100644 --- a/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java @@ -35,7 +35,7 @@ public class JavaPipelineSuite extends SharedSparkSession { private transient Dataset dataset; - public void customSetUp() { + @Override protected void customSetUp() { JavaRDD points = jsc.parallelize(generateLogisticInputAsList(1.0, 1.0, 100, 42), 2); dataset = spark.createDataFrame(points, LabeledPoint.class); diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java index 949b82cce6365..8c703c4d3195e 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java @@ -38,7 +38,7 @@ public class JavaLogisticRegressionSuite extends SharedSparkSession implements S private transient JavaRDD datasetRDD; private double eps = 1e-5; - public void customSetUp() { + @Override protected void customSetUp() { List points = generateLogisticInputAsList(1.0, 1.0, 100, 42); datasetRDD = jsc.parallelize(points, 2); dataset = spark.createDataFrame(datasetRDD, LabeledPoint.class); diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java index 9ccdd333c84b5..9849bef5adc67 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java @@ -37,7 +37,7 @@ public class JavaOneVsRestSuite extends SharedSparkSession implements Serializab private transient Dataset dataset; private transient JavaRDD datasetRDD; - public void customSetUp() { + @Override protected void customSetUp() { int nPoints = 3; // The following coefficients and xMean/xVariance are computed from iris dataset with diff --git a/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java b/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java index 6d0432cb38965..69266a24e83e8 100644 --- a/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java @@ -35,7 +35,7 @@ public class JavaKMeansSuite extends SharedSparkSession implements Serializable private transient int k = 5; private transient Dataset dataset; - public void customSetUp() { + @Override protected void customSetUp() { dataset = KMeansSuite.generateKMeansData(spark, 50, 3, k); } diff --git a/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java b/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java index 24db702326b2d..9498525ae99b9 100644 --- a/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java @@ -34,7 +34,7 @@ public class JavaLinearRegressionSuite extends SharedSparkSession implements Ser private transient Dataset dataset; private transient JavaRDD datasetRDD; - public void customSetUp() { + @Override protected void customSetUp() { List points = generateLogisticInputAsList(1.0, 1.0, 100, 42); datasetRDD = jsc.parallelize(points, 2); dataset = spark.createDataFrame(datasetRDD, LabeledPoint.class); diff --git a/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java b/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java index 97926b8fc404c..d18ae1e0e58ac 100644 --- a/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java @@ -42,7 +42,7 @@ public class JavaLibSVMRelationSuite extends SharedSparkSession { private File tempDir; private String path; - public void customSetUpWithException() throws IOException { + @Override protected void customSetUp() throws IOException { tempDir = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "datasource"); File file = new File(tempDir, "part-00000"); String s = "1 1:1.0 3:2.0 5:3.0\n0\n0 2:4.0 4:5.0 6:6.0"; @@ -50,7 +50,7 @@ public void customSetUpWithException() throws IOException { path = tempDir.toURI().toString(); } - public void customTearDown() { + @Override protected void customTearDown() { Utils.deleteRecursively(tempDir); } diff --git a/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java b/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java index e1c77f34cca6b..21cafc7dd076b 100644 --- a/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java @@ -37,7 +37,7 @@ public class JavaCrossValidatorSuite extends SharedSparkSession implements Seria private transient Dataset dataset; - public void customSetUp() { + @Override protected void customSetUp() { List points = generateLogisticInputAsList(1.0, 1.0, 100, 42); dataset = spark.createDataFrame(jsc.parallelize(points, 2), LabeledPoint.class); } diff --git a/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java b/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java index b6b975632e2c3..59eee5a6d18eb 100644 --- a/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java @@ -29,12 +29,12 @@ public class JavaDefaultReadWriteSuite extends SharedSparkSession { File tempDir = null; - public void customSetUp() { + @Override protected void customSetUp() { tempDir = Utils.createTempDir( System.getProperty("java.io.tmpdir"), "JavaDefaultReadWriteSuite"); } - public void customTearDown() { + @Override protected void customTearDown() { Utils.deleteRecursively(tempDir); } diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java index 35bb577223f6d..37ace1c6ec2f5 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java @@ -36,7 +36,7 @@ import org.apache.spark.mllib.linalg.Vectors; public class JavaLDASuite extends SharedSparkSession implements Serializable { - public void customSetUp() { + @Override protected void customSetUp() { ArrayList> tinyCorpus = new ArrayList<>(); for (int i = 0; i < LDASuite.tinyCorpus().length; i++) { tinyCorpus.add(new Tuple2<>((Long) LDASuite.tinyCorpus()[i]._1(), diff --git a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java index ecb077a280a83..4fe5e79efeafe 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java @@ -34,8 +34,7 @@ public class JavaRankingMetricsSuite extends SharedSparkSession implements Serializable { private transient JavaRDD, List>> predictionAndLabels; - @Before - public void customSetUp() { + @Override protected void customSetUp() { predictionAndLabels = jsc.parallelize(Arrays.asList( Tuple2$.MODULE$.apply( Arrays.asList(1, 6, 2, 7, 8, 3, 9, 10, 4, 5), Arrays.asList(1, 2, 3, 4, 5)), From 874eddb78a8cba58e7a758b9543d7d67f3e1ff05 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 20 May 2016 01:05:53 +0530 Subject: [PATCH 21/27] fix java lint errors --- .../apache/spark/examples/ml/JavaGaussianMixtureExample.java | 2 +- .../apache/spark/ml/classification/JavaGBTClassifierSuite.java | 1 - .../ml/classification/JavaRandomForestClassifierSuite.java | 2 -- .../src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java | 2 +- .../apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java | 1 - 5 files changed, 2 insertions(+), 6 deletions(-) diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaGaussianMixtureExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaGaussianMixtureExample.java index 79b99095815a1..526bed93fbd24 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaGaussianMixtureExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaGaussianMixtureExample.java @@ -37,7 +37,7 @@ public class JavaGaussianMixtureExample { public static void main(String[] args) { - // Creates a SparkSession + // Creates a SparkSession SparkSession spark = SparkSession .builder() .appName("JavaGaussianMixtureExample") diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaGBTClassifierSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaGBTClassifierSuite.java index 5db8878907d79..2ebe650e01ac0 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaGBTClassifierSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaGBTClassifierSuite.java @@ -25,7 +25,6 @@ import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.ml.classification.LogisticRegressionSuite; import org.apache.spark.ml.feature.LabeledPoint; import org.apache.spark.ml.tree.impl.TreeTests; import org.apache.spark.sql.Dataset; diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaRandomForestClassifierSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaRandomForestClassifierSuite.java index 0d08785b50612..f25c91be2b8d9 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaRandomForestClassifierSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaRandomForestClassifierSuite.java @@ -26,8 +26,6 @@ import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.ml.classification.LogisticRegressionSuite; import org.apache.spark.ml.feature.LabeledPoint; import org.apache.spark.ml.linalg.Vector; import org.apache.spark.ml.tree.impl.TreeTests; diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java index d38e5386ad9be..07c928fd54389 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java @@ -75,7 +75,7 @@ public org.apache.spark.mllib.linalg.Vector call(Vector vector) { } } ).rdd()); - + Matrix pc = mat.computePrincipalComponents(3); mat.multiply(pc).rows().toJavaRDD(); diff --git a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java index 4fe5e79efeafe..59e5ded2399f6 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java @@ -25,7 +25,6 @@ import scala.Tuple2$; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; import org.apache.spark.SharedSparkSession; From 8e264ea2cd69ae1fa9a404b2f3494604bce60056 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 20 May 2016 01:08:17 +0530 Subject: [PATCH 22/27] fix style --- .../test/java/org/apache/spark/ml/JavaPipelineSuite.java | 3 ++- .../ml/classification/JavaLogisticRegressionSuite.java | 3 ++- .../apache/spark/ml/classification/JavaOneVsRestSuite.java | 3 ++- .../org/apache/spark/ml/clustering/JavaKMeansSuite.java | 3 ++- .../spark/ml/regression/JavaLinearRegressionSuite.java | 3 ++- .../spark/ml/source/libsvm/JavaLibSVMRelationSuite.java | 6 ++++-- .../org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java | 3 ++- .../org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java | 6 ++++-- .../org/apache/spark/mllib/clustering/JavaLDASuite.java | 3 ++- .../spark/mllib/evaluation/JavaRankingMetricsSuite.java | 3 ++- 10 files changed, 24 insertions(+), 12 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java b/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java index c3cab2f86a376..9281fe0431b4d 100644 --- a/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java @@ -35,7 +35,8 @@ public class JavaPipelineSuite extends SharedSparkSession { private transient Dataset dataset; - @Override protected void customSetUp() { + @Override + protected void customSetUp() { JavaRDD points = jsc.parallelize(generateLogisticInputAsList(1.0, 1.0, 100, 42), 2); dataset = spark.createDataFrame(points, LabeledPoint.class); diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java index 8c703c4d3195e..94ced64728544 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java @@ -38,7 +38,8 @@ public class JavaLogisticRegressionSuite extends SharedSparkSession implements S private transient JavaRDD datasetRDD; private double eps = 1e-5; - @Override protected void customSetUp() { + @Override + protected void customSetUp() { List points = generateLogisticInputAsList(1.0, 1.0, 100, 42); datasetRDD = jsc.parallelize(points, 2); dataset = spark.createDataFrame(datasetRDD, LabeledPoint.class); diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java index 9849bef5adc67..78da9dc201287 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java @@ -37,7 +37,8 @@ public class JavaOneVsRestSuite extends SharedSparkSession implements Serializab private transient Dataset dataset; private transient JavaRDD datasetRDD; - @Override protected void customSetUp() { + @Override + protected void customSetUp() { int nPoints = 3; // The following coefficients and xMean/xVariance are computed from iris dataset with diff --git a/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java b/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java index 69266a24e83e8..524773807069c 100644 --- a/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java @@ -35,7 +35,8 @@ public class JavaKMeansSuite extends SharedSparkSession implements Serializable private transient int k = 5; private transient Dataset dataset; - @Override protected void customSetUp() { + @Override + protected void customSetUp() { dataset = KMeansSuite.generateKMeansData(spark, 50, 3, k); } diff --git a/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java b/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java index 9498525ae99b9..ca949a10b3867 100644 --- a/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java @@ -34,7 +34,8 @@ public class JavaLinearRegressionSuite extends SharedSparkSession implements Ser private transient Dataset dataset; private transient JavaRDD datasetRDD; - @Override protected void customSetUp() { + @Override + protected void customSetUp() { List points = generateLogisticInputAsList(1.0, 1.0, 100, 42); datasetRDD = jsc.parallelize(points, 2); dataset = spark.createDataFrame(datasetRDD, LabeledPoint.class); diff --git a/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java b/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java index d18ae1e0e58ac..49429a84754dd 100644 --- a/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java @@ -42,7 +42,8 @@ public class JavaLibSVMRelationSuite extends SharedSparkSession { private File tempDir; private String path; - @Override protected void customSetUp() throws IOException { + @Override + protected void customSetUp() throws IOException { tempDir = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "datasource"); File file = new File(tempDir, "part-00000"); String s = "1 1:1.0 3:2.0 5:3.0\n0\n0 2:4.0 4:5.0 6:6.0"; @@ -50,7 +51,8 @@ public class JavaLibSVMRelationSuite extends SharedSparkSession { path = tempDir.toURI().toString(); } - @Override protected void customTearDown() { + @Override + protected void customTearDown() { Utils.deleteRecursively(tempDir); } diff --git a/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java b/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java index 21cafc7dd076b..bea199564a36a 100644 --- a/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java @@ -37,7 +37,8 @@ public class JavaCrossValidatorSuite extends SharedSparkSession implements Seria private transient Dataset dataset; - @Override protected void customSetUp() { + @Override + protected void customSetUp() { List points = generateLogisticInputAsList(1.0, 1.0, 100, 42); dataset = spark.createDataFrame(jsc.parallelize(points, 2), LabeledPoint.class); } diff --git a/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java b/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java index 59eee5a6d18eb..317d30deaabe8 100644 --- a/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java @@ -29,12 +29,14 @@ public class JavaDefaultReadWriteSuite extends SharedSparkSession { File tempDir = null; - @Override protected void customSetUp() { + @Override + protected void customSetUp() { tempDir = Utils.createTempDir( System.getProperty("java.io.tmpdir"), "JavaDefaultReadWriteSuite"); } - @Override protected void customTearDown() { + @Override + protected void customTearDown() { Utils.deleteRecursively(tempDir); } diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java index 37ace1c6ec2f5..2601d66503824 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java @@ -36,7 +36,8 @@ import org.apache.spark.mllib.linalg.Vectors; public class JavaLDASuite extends SharedSparkSession implements Serializable { - @Override protected void customSetUp() { + @Override + protected void customSetUp() { ArrayList> tinyCorpus = new ArrayList<>(); for (int i = 0; i < LDASuite.tinyCorpus().length; i++) { tinyCorpus.add(new Tuple2<>((Long) LDASuite.tinyCorpus()[i]._1(), diff --git a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java index 59e5ded2399f6..9f58d6cdb9584 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java @@ -33,7 +33,8 @@ public class JavaRankingMetricsSuite extends SharedSparkSession implements Serializable { private transient JavaRDD, List>> predictionAndLabels; - @Override protected void customSetUp() { + @Override + protected void customSetUp() { predictionAndLabels = jsc.parallelize(Arrays.asList( Tuple2$.MODULE$.apply( Arrays.asList(1, 6, 2, 7, 8, 3, 9, 10, 4, 5), Arrays.asList(1, 2, 3, 4, 5)), From 40edaad3f12425303489ff618458bf8545f030a0 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 20 May 2016 06:08:03 +0530 Subject: [PATCH 23/27] remove customSetUp() and customTearDown() --- .../test/java/org/apache/spark/SharedSparkSession.java | 10 +--------- .../java/org/apache/spark/ml/JavaPipelineSuite.java | 5 ++++- .../ml/classification/JavaLogisticRegressionSuite.java | 4 +++- .../spark/ml/classification/JavaOneVsRestSuite.java | 4 +++- .../apache/spark/ml/clustering/JavaKMeansSuite.java | 4 +++- .../spark/ml/regression/JavaLinearRegressionSuite.java | 4 +++- .../ml/source/libsvm/JavaLibSVMRelationSuite.java | 6 ++++-- .../spark/ml/tuning/JavaCrossValidatorSuite.java | 4 +++- .../spark/ml/util/JavaDefaultReadWriteSuite.java | 6 ++++-- .../apache/spark/mllib/clustering/JavaLDASuite.java | 4 +++- .../mllib/evaluation/JavaRankingMetricsSuite.java | 4 +++- 11 files changed, 34 insertions(+), 21 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java index 4a8ad7c6dad15..0202e3897e5b1 100644 --- a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java +++ b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java @@ -25,7 +25,7 @@ import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; -public class SharedSparkSession { +public abstract class SharedSparkSession { public transient SparkSession spark; public transient JavaSparkContext jsc; @@ -37,19 +37,11 @@ public void setUp() throws IOException { .appName("shared-spark-session") .getOrCreate(); jsc = new JavaSparkContext(spark.sparkContext()); - - customSetUp(); } - protected void customSetUp() throws IOException {} - @After public void tearDown() { spark.stop(); spark = null; - - customTearDown(); } - - protected void customTearDown() {} } diff --git a/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java b/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java index 9281fe0431b4d..9b209006bc369 100644 --- a/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java @@ -17,6 +17,8 @@ package org.apache.spark.ml; +import java.io.IOException; + import org.junit.Test; import org.apache.spark.SharedSparkSession; @@ -36,7 +38,8 @@ public class JavaPipelineSuite extends SharedSparkSession { private transient Dataset dataset; @Override - protected void customSetUp() { + public void setUp() throws IOException { + super.setUp(); JavaRDD points = jsc.parallelize(generateLogisticInputAsList(1.0, 1.0, 100, 42), 2); dataset = spark.createDataFrame(points, LabeledPoint.class); diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java index 94ced64728544..d8d0050752cc4 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java @@ -17,6 +17,7 @@ package org.apache.spark.ml.classification; +import java.io.IOException; import java.io.Serializable; import java.util.List; @@ -39,7 +40,8 @@ public class JavaLogisticRegressionSuite extends SharedSparkSession implements S private double eps = 1e-5; @Override - protected void customSetUp() { + public void setUp() throws IOException { + super.setUp(); List points = generateLogisticInputAsList(1.0, 1.0, 100, 42); datasetRDD = jsc.parallelize(points, 2); dataset = spark.createDataFrame(datasetRDD, LabeledPoint.class); diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java index 78da9dc201287..1a581503cdf17 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java @@ -17,6 +17,7 @@ package org.apache.spark.ml.classification; +import java.io.IOException; import java.io.Serializable; import java.util.List; @@ -38,7 +39,8 @@ public class JavaOneVsRestSuite extends SharedSparkSession implements Serializab private transient JavaRDD datasetRDD; @Override - protected void customSetUp() { + public void setUp() throws IOException { + super.setUp(); int nPoints = 3; // The following coefficients and xMean/xVariance are computed from iris dataset with diff --git a/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java b/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java index 524773807069c..caab73b0b00d4 100644 --- a/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java @@ -17,6 +17,7 @@ package org.apache.spark.ml.clustering; +import java.io.IOException; import java.io.Serializable; import java.util.Arrays; import java.util.List; @@ -36,7 +37,8 @@ public class JavaKMeansSuite extends SharedSparkSession implements Serializable private transient Dataset dataset; @Override - protected void customSetUp() { + public void setUp() throws IOException { + super.setUp(); dataset = KMeansSuite.generateKMeansData(spark, 50, 3, k); } diff --git a/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java b/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java index ca949a10b3867..25a59e7f25d6d 100644 --- a/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java @@ -17,6 +17,7 @@ package org.apache.spark.ml.regression; +import java.io.IOException; import java.io.Serializable; import java.util.List; @@ -35,7 +36,8 @@ public class JavaLinearRegressionSuite extends SharedSparkSession implements Ser private transient JavaRDD datasetRDD; @Override - protected void customSetUp() { + public void setUp() throws IOException { + super.setUp(); List points = generateLogisticInputAsList(1.0, 1.0, 100, 42); datasetRDD = jsc.parallelize(points, 2); dataset = spark.createDataFrame(datasetRDD, LabeledPoint.class); diff --git a/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java b/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java index 49429a84754dd..fa39f4560c8aa 100644 --- a/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java @@ -43,7 +43,8 @@ public class JavaLibSVMRelationSuite extends SharedSparkSession { private String path; @Override - protected void customSetUp() throws IOException { + public void setUp() throws IOException { + super.setUp(); tempDir = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "datasource"); File file = new File(tempDir, "part-00000"); String s = "1 1:1.0 3:2.0 5:3.0\n0\n0 2:4.0 4:5.0 6:6.0"; @@ -52,7 +53,8 @@ protected void customSetUp() throws IOException { } @Override - protected void customTearDown() { + public void tearDown() { + super.tearDown(); Utils.deleteRecursively(tempDir); } diff --git a/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java b/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java index bea199564a36a..4aea021373051 100644 --- a/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java @@ -17,6 +17,7 @@ package org.apache.spark.ml.tuning; +import java.io.IOException; import java.io.Serializable; import java.util.List; @@ -38,7 +39,8 @@ public class JavaCrossValidatorSuite extends SharedSparkSession implements Seria private transient Dataset dataset; @Override - protected void customSetUp() { + public void setUp() throws IOException { + super.setUp(); List points = generateLogisticInputAsList(1.0, 1.0, 100, 42); dataset = spark.createDataFrame(jsc.parallelize(points, 2), LabeledPoint.class); } diff --git a/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java b/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java index 317d30deaabe8..da623d1d15702 100644 --- a/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java @@ -30,13 +30,15 @@ public class JavaDefaultReadWriteSuite extends SharedSparkSession { File tempDir = null; @Override - protected void customSetUp() { + public void setUp() throws IOException { + super.setUp(); tempDir = Utils.createTempDir( System.getProperty("java.io.tmpdir"), "JavaDefaultReadWriteSuite"); } @Override - protected void customTearDown() { + public void tearDown() { + super.tearDown(); Utils.deleteRecursively(tempDir); } diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java index 2601d66503824..4f02ab789e537 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java @@ -17,6 +17,7 @@ package org.apache.spark.mllib.clustering; +import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; @@ -37,7 +38,8 @@ public class JavaLDASuite extends SharedSparkSession implements Serializable { @Override - protected void customSetUp() { + public void setUp() throws IOException { + super.setUp(); ArrayList> tinyCorpus = new ArrayList<>(); for (int i = 0; i < LDASuite.tinyCorpus().length; i++) { tinyCorpus.add(new Tuple2<>((Long) LDASuite.tinyCorpus()[i]._1(), diff --git a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java index 9f58d6cdb9584..21e3404096441 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java @@ -17,6 +17,7 @@ package org.apache.spark.mllib.evaluation; +import java.io.IOException; import java.io.Serializable; import java.util.Arrays; import java.util.List; @@ -34,7 +35,8 @@ public class JavaRankingMetricsSuite extends SharedSparkSession implements Seria private transient JavaRDD, List>> predictionAndLabels; @Override - protected void customSetUp() { + public void setUp() throws IOException { + super.setUp(); predictionAndLabels = jsc.parallelize(Arrays.asList( Tuple2$.MODULE$.apply( Arrays.asList(1, 6, 2, 7, 8, 3, 9, 10, 4, 5), Arrays.asList(1, 2, 3, 4, 5)), From 9b340fed085c38a7510d78311d4c480b4ac2106e Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 20 May 2016 06:23:19 +0530 Subject: [PATCH 24/27] SparkSession appName should be class simple name --- mllib/src/test/java/org/apache/spark/SharedSparkSession.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java index 0202e3897e5b1..56d0c33de263b 100644 --- a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java +++ b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java @@ -34,7 +34,7 @@ public abstract class SharedSparkSession { public void setUp() throws IOException { spark = SparkSession.builder() .master("local") - .appName("shared-spark-session") + .appName(getClass().getSimpleName()) .getOrCreate(); jsc = new JavaSparkContext(spark.sparkContext()); } From c1ce08ec46f8623c6311dc6f7cbc866c9cc3fd4f Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 20 May 2016 07:02:34 +0530 Subject: [PATCH 25/27] SharedSparkSession should implement Serializable --- .../test/java/org/apache/spark/SharedSparkSession.java | 9 +++++---- .../classification/JavaDecisionTreeClassifierSuite.java | 3 +-- .../spark/ml/classification/JavaGBTClassifierSuite.java | 3 +-- .../ml/classification/JavaLogisticRegressionSuite.java | 3 +-- .../JavaMultilayerPerceptronClassifierSuite.java | 3 +-- .../spark/ml/classification/JavaNaiveBayesSuite.java | 3 +-- .../spark/ml/classification/JavaOneVsRestSuite.java | 5 ++--- .../classification/JavaRandomForestClassifierSuite.java | 3 +-- .../org/apache/spark/ml/clustering/JavaKMeansSuite.java | 6 +++--- .../java/org/apache/spark/ml/feature/JavaPCASuite.java | 2 +- .../apache/spark/ml/feature/JavaVectorIndexerSuite.java | 3 +-- .../ml/regression/JavaDecisionTreeRegressorSuite.java | 3 +-- .../spark/ml/regression/JavaGBTRegressorSuite.java | 3 +-- .../spark/ml/regression/JavaLinearRegressionSuite.java | 3 +-- .../ml/regression/JavaRandomForestRegressorSuite.java | 3 +-- .../apache/spark/ml/tuning/JavaCrossValidatorSuite.java | 5 ++--- .../classification/JavaLogisticRegressionSuite.java | 3 +-- .../spark/mllib/classification/JavaNaiveBayesSuite.java | 3 +-- .../apache/spark/mllib/classification/JavaSVMSuite.java | 3 +-- .../JavaStreamingLogisticRegressionSuite.java | 3 +-- .../spark/mllib/clustering/JavaBisectingKMeansSuite.java | 4 +--- .../spark/mllib/clustering/JavaGaussianMixtureSuite.java | 3 +-- .../apache/spark/mllib/clustering/JavaKMeansSuite.java | 3 +-- .../org/apache/spark/mllib/clustering/JavaLDASuite.java | 3 +-- .../spark/mllib/clustering/JavaStreamingKMeansSuite.java | 3 +-- .../spark/mllib/evaluation/JavaRankingMetricsSuite.java | 3 +-- .../org/apache/spark/mllib/feature/JavaTfIdfSuite.java | 3 +-- .../apache/spark/mllib/feature/JavaWord2VecSuite.java | 3 +-- .../spark/mllib/fpm/JavaAssociationRulesSuite.java | 3 +-- .../org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java | 3 +-- .../org/apache/spark/mllib/linalg/JavaMatricesSuite.java | 3 +-- .../org/apache/spark/mllib/linalg/JavaVectorsSuite.java | 3 +-- .../apache/spark/mllib/recommendation/JavaALSSuite.java | 3 +-- .../mllib/regression/JavaIsotonicRegressionSuite.java | 3 +-- .../apache/spark/mllib/regression/JavaLassoSuite.java | 3 +-- .../mllib/regression/JavaLinearRegressionSuite.java | 3 +-- .../spark/mllib/regression/JavaRidgeRegressionSuite.java | 3 +-- .../regression/JavaStreamingLinearRegressionSuite.java | 3 +-- .../org/apache/spark/mllib/stat/JavaStatisticsSuite.java | 3 +-- .../apache/spark/mllib/tree/JavaDecisionTreeSuite.java | 3 +-- 40 files changed, 48 insertions(+), 85 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java index 56d0c33de263b..43779878890db 100644 --- a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java +++ b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java @@ -18,6 +18,7 @@ package org.apache.spark; import java.io.IOException; +import java.io.Serializable; import org.junit.After; import org.junit.Before; @@ -25,15 +26,15 @@ import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; -public abstract class SharedSparkSession { +public abstract class SharedSparkSession implements Serializable { - public transient SparkSession spark; - public transient JavaSparkContext jsc; + protected transient SparkSession spark; + protected transient JavaSparkContext jsc; @Before public void setUp() throws IOException { spark = SparkSession.builder() - .master("local") + .master("local[2]") .appName(getClass().getSimpleName()) .getOrCreate(); jsc = new JavaSparkContext(spark.sparkContext()); diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaDecisionTreeClassifierSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaDecisionTreeClassifierSuite.java index d4ccf7ce957ce..5aba4e8f7de07 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaDecisionTreeClassifierSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaDecisionTreeClassifierSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.ml.classification; -import java.io.Serializable; import java.util.HashMap; import java.util.Map; @@ -30,7 +29,7 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -public class JavaDecisionTreeClassifierSuite extends SharedSparkSession implements Serializable { +public class JavaDecisionTreeClassifierSuite extends SharedSparkSession { @Test public void runDT() { diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaGBTClassifierSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaGBTClassifierSuite.java index 2ebe650e01ac0..74bb46bd217a9 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaGBTClassifierSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaGBTClassifierSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.ml.classification; -import java.io.Serializable; import java.util.HashMap; import java.util.Map; @@ -30,7 +29,7 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -public class JavaGBTClassifierSuite extends SharedSparkSession implements Serializable { +public class JavaGBTClassifierSuite extends SharedSparkSession { @Test public void runDT() { diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java index d8d0050752cc4..004102103d52c 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java @@ -18,7 +18,6 @@ package org.apache.spark.ml.classification; import java.io.IOException; -import java.io.Serializable; import java.util.List; import org.junit.Assert; @@ -32,7 +31,7 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -public class JavaLogisticRegressionSuite extends SharedSparkSession implements Serializable { +public class JavaLogisticRegressionSuite extends SharedSparkSession { private transient Dataset dataset; diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaMultilayerPerceptronClassifierSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaMultilayerPerceptronClassifierSuite.java index 7ee1a6f3af7c7..e5f3161b9d78c 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaMultilayerPerceptronClassifierSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaMultilayerPerceptronClassifierSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.ml.classification; -import java.io.Serializable; import java.util.Arrays; import java.util.List; @@ -31,7 +30,7 @@ import org.apache.spark.sql.Row; public class JavaMultilayerPerceptronClassifierSuite - extends SharedSparkSession implements Serializable { + extends SharedSparkSession { @Test public void testMLPC() { diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java index 948ef8a3d5d92..c2a9e7b58b470 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.ml.classification; -import java.io.Serializable; import java.util.Arrays; import java.util.List; @@ -35,7 +34,7 @@ import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -public class JavaNaiveBayesSuite extends SharedSparkSession implements Serializable { +public class JavaNaiveBayesSuite extends SharedSparkSession { public void validatePrediction(Dataset predictionAndLabels) { for (Row r : predictionAndLabels.collectAsList()) { diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java index 1a581503cdf17..6194167bda354 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java @@ -18,7 +18,6 @@ package org.apache.spark.ml.classification; import java.io.IOException; -import java.io.Serializable; import java.util.List; import scala.collection.JavaConverters; @@ -28,12 +27,12 @@ import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.ml.feature.LabeledPoint; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.ml.feature.LabeledPoint; import static org.apache.spark.ml.classification.LogisticRegressionSuite.generateMultinomialLogisticInput; -public class JavaOneVsRestSuite extends SharedSparkSession implements Serializable { +public class JavaOneVsRestSuite extends SharedSparkSession { private transient Dataset dataset; private transient JavaRDD datasetRDD; diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaRandomForestClassifierSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaRandomForestClassifierSuite.java index f25c91be2b8d9..dd98513f37ecf 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaRandomForestClassifierSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaRandomForestClassifierSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.ml.classification; -import java.io.Serializable; import java.util.HashMap; import java.util.Map; @@ -32,7 +31,7 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -public class JavaRandomForestClassifierSuite extends SharedSparkSession implements Serializable { +public class JavaRandomForestClassifierSuite extends SharedSparkSession { @Test public void runDT() { diff --git a/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java b/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java index caab73b0b00d4..f3a281906fff7 100644 --- a/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java @@ -18,20 +18,20 @@ package org.apache.spark.ml.clustering; import java.io.IOException; -import java.io.Serializable; import java.util.Arrays; import java.util.List; -import org.junit.Test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import org.junit.Test; + import org.apache.spark.SharedSparkSession; import org.apache.spark.ml.linalg.Vector; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -public class JavaKMeansSuite extends SharedSparkSession implements Serializable { +public class JavaKMeansSuite extends SharedSparkSession { private transient int k = 5; private transient Dataset dataset; diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java index 07c928fd54389..ac479c08418ce 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java @@ -36,7 +36,7 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -public class JavaPCASuite extends SharedSparkSession implements Serializable { +public class JavaPCASuite extends SharedSparkSession { public static class VectorPair implements Serializable { private Vector features = Vectors.dense(0.0); diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java index 51a5f97c55036..ca8fae3a48b9d 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.ml.feature; -import java.io.Serializable; import java.util.Arrays; import java.util.List; import java.util.Map; @@ -32,7 +31,7 @@ import org.apache.spark.sql.Row; -public class JavaVectorIndexerSuite extends SharedSparkSession implements Serializable { +public class JavaVectorIndexerSuite extends SharedSparkSession { @Test public void vectorIndexerAPI() { diff --git a/mllib/src/test/java/org/apache/spark/ml/regression/JavaDecisionTreeRegressorSuite.java b/mllib/src/test/java/org/apache/spark/ml/regression/JavaDecisionTreeRegressorSuite.java index 6cbd4315fc310..1da85ed9dab4e 100644 --- a/mllib/src/test/java/org/apache/spark/ml/regression/JavaDecisionTreeRegressorSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/regression/JavaDecisionTreeRegressorSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.ml.regression; -import java.io.Serializable; import java.util.HashMap; import java.util.Map; @@ -32,7 +31,7 @@ import org.apache.spark.sql.Row; -public class JavaDecisionTreeRegressorSuite extends SharedSparkSession implements Serializable { +public class JavaDecisionTreeRegressorSuite extends SharedSparkSession { @Test public void runDT() { diff --git a/mllib/src/test/java/org/apache/spark/ml/regression/JavaGBTRegressorSuite.java b/mllib/src/test/java/org/apache/spark/ml/regression/JavaGBTRegressorSuite.java index 33cfe798fbd65..7fd9b1feb7f83 100644 --- a/mllib/src/test/java/org/apache/spark/ml/regression/JavaGBTRegressorSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/regression/JavaGBTRegressorSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.ml.regression; -import java.io.Serializable; import java.util.HashMap; import java.util.Map; @@ -32,7 +31,7 @@ import org.apache.spark.sql.Row; -public class JavaGBTRegressorSuite extends SharedSparkSession implements Serializable { +public class JavaGBTRegressorSuite extends SharedSparkSession { @Test public void runDT() { diff --git a/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java b/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java index 25a59e7f25d6d..6cdcdda1a6480 100644 --- a/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java @@ -18,7 +18,6 @@ package org.apache.spark.ml.regression; import java.io.IOException; -import java.io.Serializable; import java.util.List; import org.junit.Test; @@ -31,7 +30,7 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -public class JavaLinearRegressionSuite extends SharedSparkSession implements Serializable { +public class JavaLinearRegressionSuite extends SharedSparkSession { private transient Dataset dataset; private transient JavaRDD datasetRDD; diff --git a/mllib/src/test/java/org/apache/spark/ml/regression/JavaRandomForestRegressorSuite.java b/mllib/src/test/java/org/apache/spark/ml/regression/JavaRandomForestRegressorSuite.java index 368d4b5e1185d..4ba13e2e06c8d 100644 --- a/mllib/src/test/java/org/apache/spark/ml/regression/JavaRandomForestRegressorSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/regression/JavaRandomForestRegressorSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.ml.regression; -import java.io.Serializable; import java.util.HashMap; import java.util.Map; @@ -34,7 +33,7 @@ import org.apache.spark.sql.Row; -public class JavaRandomForestRegressorSuite extends SharedSparkSession implements Serializable { +public class JavaRandomForestRegressorSuite extends SharedSparkSession { @Test public void runDT() { diff --git a/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java b/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java index 4aea021373051..692d5ad591e84 100644 --- a/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java @@ -18,7 +18,6 @@ package org.apache.spark.ml.tuning; import java.io.IOException; -import java.io.Serializable; import java.util.List; import org.junit.Assert; @@ -26,15 +25,15 @@ import org.apache.spark.SharedSparkSession; import org.apache.spark.ml.classification.LogisticRegression; -import org.apache.spark.ml.feature.LabeledPoint; import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator; +import org.apache.spark.ml.feature.LabeledPoint; import org.apache.spark.ml.param.ParamMap; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import static org.apache.spark.ml.classification.LogisticRegressionSuite.generateLogisticInputAsList; -public class JavaCrossValidatorSuite extends SharedSparkSession implements Serializable { +public class JavaCrossValidatorSuite extends SharedSparkSession { private transient Dataset dataset; diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java index 0bae289d37392..c04e2e69541ba 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.mllib.classification; -import java.io.Serializable; import java.util.List; import org.junit.Assert; @@ -27,7 +26,7 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.mllib.regression.LabeledPoint; -public class JavaLogisticRegressionSuite extends SharedSparkSession implements Serializable { +public class JavaLogisticRegressionSuite extends SharedSparkSession { int validatePrediction(List validationData, LogisticRegressionModel model) { int numAccurate = 0; diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java index 722f6b525845a..6ded42e928250 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.mllib.classification; -import java.io.Serializable; import java.util.Arrays; import java.util.List; @@ -32,7 +31,7 @@ import org.apache.spark.mllib.regression.LabeledPoint; -public class JavaNaiveBayesSuite extends SharedSparkSession implements Serializable { +public class JavaNaiveBayesSuite extends SharedSparkSession { private static final List POINTS = Arrays.asList( new LabeledPoint(0, Vectors.dense(1.0, 0.0, 0.0)), diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaSVMSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaSVMSuite.java index fea41ba24ab52..0f54e684e447d 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaSVMSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaSVMSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.mllib.classification; -import java.io.Serializable; import java.util.List; import org.junit.Assert; @@ -27,7 +26,7 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.mllib.regression.LabeledPoint; -public class JavaSVMSuite extends SharedSparkSession implements Serializable { +public class JavaSVMSuite extends SharedSparkSession { int validatePrediction(List validationData, SVMModel model) { int numAccurate = 0; diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaStreamingLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaStreamingLogisticRegressionSuite.java index 62c6d9b7e390a..8c6bced52dd74 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaStreamingLogisticRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaStreamingLogisticRegressionSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.mllib.classification; -import java.io.Serializable; import java.util.Arrays; import java.util.List; @@ -37,7 +36,7 @@ import org.apache.spark.streaming.api.java.JavaStreamingContext; import static org.apache.spark.streaming.JavaTestUtils.*; -public class JavaStreamingLogisticRegressionSuite implements Serializable { +public class JavaStreamingLogisticRegressionSuite { protected transient JavaStreamingContext ssc; diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaBisectingKMeansSuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaBisectingKMeansSuite.java index c714144093250..3d62b273d2210 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaBisectingKMeansSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaBisectingKMeansSuite.java @@ -17,8 +17,6 @@ package org.apache.spark.mllib.clustering; -import java.io.Serializable; - import com.google.common.collect.Lists; import org.junit.Assert; @@ -29,7 +27,7 @@ import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.linalg.Vectors; -public class JavaBisectingKMeansSuite extends SharedSparkSession implements Serializable { +public class JavaBisectingKMeansSuite extends SharedSparkSession { @Test public void twoDimensionalData() { diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaGaussianMixtureSuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaGaussianMixtureSuite.java index 12cf153739c31..bf76719937772 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaGaussianMixtureSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaGaussianMixtureSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.mllib.clustering; -import java.io.Serializable; import java.util.Arrays; import java.util.List; @@ -30,7 +29,7 @@ import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.linalg.Vectors; -public class JavaGaussianMixtureSuite extends SharedSparkSession implements Serializable { +public class JavaGaussianMixtureSuite extends SharedSparkSession { @Test public void runGaussianMixture() { diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java index 9e8b65659431c..270e636f82117 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.mllib.clustering; -import java.io.Serializable; import java.util.Arrays; import java.util.List; @@ -30,7 +29,7 @@ import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.linalg.Vectors; -public class JavaKMeansSuite extends SharedSparkSession implements Serializable { +public class JavaKMeansSuite extends SharedSparkSession { @Test public void runKMeansUsingStaticMethods() { diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java index 4f02ab789e537..08d6713ab2bc3 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java @@ -18,7 +18,6 @@ package org.apache.spark.mllib.clustering; import java.io.IOException; -import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; @@ -36,7 +35,7 @@ import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.linalg.Vectors; -public class JavaLDASuite extends SharedSparkSession implements Serializable { +public class JavaLDASuite extends SharedSparkSession { @Override public void setUp() throws IOException { super.setUp(); diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaStreamingKMeansSuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaStreamingKMeansSuite.java index d1d618f7de2d8..d41fc0e4dca96 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaStreamingKMeansSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaStreamingKMeansSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.mllib.clustering; -import java.io.Serializable; import java.util.Arrays; import java.util.List; @@ -36,7 +35,7 @@ import org.apache.spark.streaming.api.java.JavaStreamingContext; import static org.apache.spark.streaming.JavaTestUtils.*; -public class JavaStreamingKMeansSuite implements Serializable { +public class JavaStreamingKMeansSuite { protected transient JavaStreamingContext ssc; diff --git a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java index 21e3404096441..e9d7e4fdbe8ce 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java @@ -18,7 +18,6 @@ package org.apache.spark.mllib.evaluation; import java.io.IOException; -import java.io.Serializable; import java.util.Arrays; import java.util.List; @@ -31,7 +30,7 @@ import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -public class JavaRankingMetricsSuite extends SharedSparkSession implements Serializable { +public class JavaRankingMetricsSuite extends SharedSparkSession { private transient JavaRDD, List>> predictionAndLabels; @Override diff --git a/mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java b/mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java index aab196065df90..05128ea343420 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.mllib.feature; -import java.io.Serializable; import java.util.Arrays; import java.util.List; @@ -28,7 +27,7 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.mllib.linalg.Vector; -public class JavaTfIdfSuite extends SharedSparkSession implements Serializable { +public class JavaTfIdfSuite extends SharedSparkSession { @Test public void tfIdf() { diff --git a/mllib/src/test/java/org/apache/spark/mllib/feature/JavaWord2VecSuite.java b/mllib/src/test/java/org/apache/spark/mllib/feature/JavaWord2VecSuite.java index 24d9ccfe15df9..3e3abddbee638 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/feature/JavaWord2VecSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/feature/JavaWord2VecSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.mllib.feature; -import java.io.Serializable; import java.util.Arrays; import java.util.List; @@ -31,7 +30,7 @@ import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -public class JavaWord2VecSuite extends SharedSparkSession implements Serializable { +public class JavaWord2VecSuite extends SharedSparkSession { @Test @SuppressWarnings("unchecked") diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java index ddc48af85dea6..3451e0773759b 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java @@ -16,7 +16,6 @@ */ package org.apache.spark.mllib.fpm; -import java.io.Serializable; import java.util.Arrays; import org.junit.Test; @@ -25,7 +24,7 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.mllib.fpm.FPGrowth.FreqItemset; -public class JavaAssociationRulesSuite extends SharedSparkSession implements Serializable { +public class JavaAssociationRulesSuite extends SharedSparkSession { @Test public void runAssociationRules() { diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java index b337d7bd0f8c0..46e9dd8b59828 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java @@ -18,7 +18,6 @@ package org.apache.spark.mllib.fpm; import java.io.File; -import java.io.Serializable; import java.util.Arrays; import java.util.List; @@ -30,7 +29,7 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.util.Utils; -public class JavaFPGrowthSuite extends SharedSparkSession implements Serializable { +public class JavaFPGrowthSuite extends SharedSparkSession { @Test public void runFPGrowth() { diff --git a/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaMatricesSuite.java b/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaMatricesSuite.java index 92fc57871cdbf..f427846b9ad10 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaMatricesSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaMatricesSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.mllib.linalg; -import java.io.Serializable; import java.util.Random; import static org.junit.Assert.assertArrayEquals; @@ -25,7 +24,7 @@ import org.junit.Test; -public class JavaMatricesSuite implements Serializable { +public class JavaMatricesSuite { @Test public void randMatrixConstruction() { diff --git a/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaVectorsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaVectorsSuite.java index 817b962c75007..f67f555e418a7 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaVectorsSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaVectorsSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.mllib.linalg; -import java.io.Serializable; import java.util.Arrays; import static org.junit.Assert.assertArrayEquals; @@ -26,7 +25,7 @@ import org.junit.Test; -public class JavaVectorsSuite implements Serializable { +public class JavaVectorsSuite { @Test public void denseArrayConstruction() { diff --git a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java index 64c4810eabf80..363ab42546d11 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.mllib.recommendation; -import java.io.Serializable; import java.util.ArrayList; import java.util.List; @@ -31,7 +30,7 @@ import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; -public class JavaALSSuite extends SharedSparkSession implements Serializable { +public class JavaALSSuite extends SharedSparkSession { private void validatePrediction( MatrixFactorizationModel model, diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaIsotonicRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaIsotonicRegressionSuite.java index d63bb1bb096d5..dbd4cbfd2b746 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaIsotonicRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaIsotonicRegressionSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.mllib.regression; -import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -31,7 +30,7 @@ import org.apache.spark.api.java.JavaDoubleRDD; import org.apache.spark.api.java.JavaRDD; -public class JavaIsotonicRegressionSuite extends SharedSparkSession implements Serializable { +public class JavaIsotonicRegressionSuite extends SharedSparkSession { private static List> generateIsotonicInput(double[] labels) { List> input = new ArrayList<>(labels.length); diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java index 3b7b4629d8170..1458cc72bc17f 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.mllib.regression; -import java.io.Serializable; import java.util.List; import org.junit.Assert; @@ -27,7 +26,7 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.mllib.util.LinearDataGenerator; -public class JavaLassoSuite extends SharedSparkSession implements Serializable { +public class JavaLassoSuite extends SharedSparkSession { int validatePrediction(List validationData, LassoModel model) { int numAccurate = 0; diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java index f712521f3db46..a46b1321b3ca2 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.mllib.regression; -import java.io.Serializable; import java.util.List; import org.junit.Assert; @@ -29,7 +28,7 @@ import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.util.LinearDataGenerator; -public class JavaLinearRegressionSuite extends SharedSparkSession implements Serializable { +public class JavaLinearRegressionSuite extends SharedSparkSession { int validatePrediction(List validationData, LinearRegressionModel model) { int numAccurate = 0; diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java index 43fc60e9fd151..cb00977412345 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.mllib.regression; -import java.io.Serializable; import java.util.List; import java.util.Random; @@ -28,7 +27,7 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.mllib.util.LinearDataGenerator; -public class JavaRidgeRegressionSuite extends SharedSparkSession implements Serializable { +public class JavaRidgeRegressionSuite extends SharedSparkSession { private static double predictionError(List validationData, RidgeRegressionModel model) { diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaStreamingLinearRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaStreamingLinearRegressionSuite.java index ea0ccd7448986..ab554475d59a1 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaStreamingLinearRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaStreamingLinearRegressionSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.mllib.regression; -import java.io.Serializable; import java.util.Arrays; import java.util.List; @@ -36,7 +35,7 @@ import org.apache.spark.streaming.api.java.JavaStreamingContext; import static org.apache.spark.streaming.JavaTestUtils.*; -public class JavaStreamingLinearRegressionSuite implements Serializable { +public class JavaStreamingLinearRegressionSuite { protected transient JavaStreamingContext ssc; diff --git a/mllib/src/test/java/org/apache/spark/mllib/stat/JavaStatisticsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/stat/JavaStatisticsSuite.java index 373417d3ba7c0..1abaa39eadc22 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/stat/JavaStatisticsSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/stat/JavaStatisticsSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.mllib.stat; -import java.io.Serializable; import java.util.Arrays; import java.util.List; @@ -42,7 +41,7 @@ import org.apache.spark.streaming.api.java.JavaStreamingContext; import static org.apache.spark.streaming.JavaTestUtils.*; -public class JavaStatisticsSuite implements Serializable { +public class JavaStatisticsSuite { private transient SparkSession spark; private transient JavaSparkContext jsc; private transient JavaStreamingContext ssc; diff --git a/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java b/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java index e9c4f80b44c9b..1dcbbcaa0223c 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java @@ -17,7 +17,6 @@ package org.apache.spark.mllib.tree; -import java.io.Serializable; import java.util.HashMap; import java.util.List; @@ -34,7 +33,7 @@ import org.apache.spark.mllib.tree.impurity.Gini; import org.apache.spark.mllib.tree.model.DecisionTreeModel; -public class JavaDecisionTreeSuite extends SharedSparkSession implements Serializable { +public class JavaDecisionTreeSuite extends SharedSparkSession { int validatePrediction(List validationData, DecisionTreeModel model) { int numCorrect = 0; From 2bcfdd77471780b405345c2cc67a77e851e1dead Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 20 May 2016 07:04:51 +0530 Subject: [PATCH 26/27] fix imports --- .../java/org/apache/spark/ml/clustering/JavaKMeansSuite.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java b/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java index f3a281906fff7..1be6f96f4c942 100644 --- a/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java @@ -21,11 +21,10 @@ import java.util.Arrays; import java.util.List; +import org.junit.Test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -import org.junit.Test; - import org.apache.spark.SharedSparkSession; import org.apache.spark.ml.linalg.Vector; import org.apache.spark.sql.Dataset; From 138818bc1aba76bc17738612e5bd98538856fef1 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 20 May 2016 07:08:14 +0530 Subject: [PATCH 27/27] fix --- .../JavaMultilayerPerceptronClassifierSuite.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaMultilayerPerceptronClassifierSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaMultilayerPerceptronClassifierSuite.java index e5f3161b9d78c..6d0604d8f9a5a 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaMultilayerPerceptronClassifierSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaMultilayerPerceptronClassifierSuite.java @@ -29,8 +29,7 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -public class JavaMultilayerPerceptronClassifierSuite - extends SharedSparkSession { +public class JavaMultilayerPerceptronClassifierSuite extends SharedSparkSession { @Test public void testMLPC() {