@@ -646,30 +646,30 @@ test_that("spark.isotonicRegression", {
646646
647647test_that(" spark.logit" , {
648648 # test binary logistic regression
649- label <- c(1 .0 , 1 .0 , 1 .0 , 0 .0 , 0 .0 )
649+ label <- c(0 .0 , 0 .0 , 0 .0 , 1 .0 , 1 .0 )
650650 feature <- c(1.1419053 , 0.9194079 , - 0.9498666 , - 1.1069903 , 0.2809776 )
651651 binary_data <- as.data.frame(cbind(label , feature ))
652652 binary_df <- createDataFrame(binary_data )
653653
654654 blr_model <- spark.logit(binary_df , label ~ feature , thresholds = 1.0 )
655655 blr_predict <- collect(select(predict(blr_model , binary_df ), " prediction" ))
656- expect_equal(blr_predict $ prediction , c(0 , 0 , 0 , 0 , 0 ))
656+ expect_equal(blr_predict $ prediction , c(" 0.0 " , " 0.0 " , " 0.0 " , " 0.0 " , " 0.0 " ))
657657 blr_model1 <- spark.logit(binary_df , label ~ feature , thresholds = 0.0 )
658658 blr_predict1 <- collect(select(predict(blr_model1 , binary_df ), " prediction" ))
659- expect_equal(blr_predict1 $ prediction , c(1 , 1 , 1 , 1 , 1 ))
659+ expect_equal(blr_predict1 $ prediction , c(" 1.0 " , " 1.0 " , " 1.0 " , " 1.0 " , " 1.0 " ))
660660
661661 # test summary of binary logistic regression
662662 blr_summary <- summary(blr_model )
663663 blr_fmeasure <- collect(select(blr_summary $ fMeasureByThreshold , " threshold" , " F-Measure" ))
664- expect_equal(blr_fmeasure $ threshold , c(0.8221347 , 0.7884005 , 0.6674709 , 0.3785437 , 0.3434487 ),
664+ expect_equal(blr_fmeasure $ threshold , c(0.6565513 , 0.6214563 , 0.3325291 , 0.2115995 , 0.1778653 ),
665665 tolerance = 1e-4 )
666- expect_equal(blr_fmeasure $ " F-Measure" , c(0.5000000 , 0.8000000 , 0.6666667 , 0.8571429 , 0.7500000 ),
666+ expect_equal(blr_fmeasure $ " F-Measure" , c(0.6666667 , 0.5000000 , 0.8000000 , 0.6666667 , 0.5714286 ),
667667 tolerance = 1e-4 )
668668 blr_precision <- collect(select(blr_summary $ precisionByThreshold , " threshold" , " precision" ))
669- expect_equal(blr_precision $ precision , c(1.0000000 , 1.0000000 , 0.6666667 , 0.7500000 , 0.6000000 ),
669+ expect_equal(blr_precision $ precision , c(1.0000000 , 0.5000000 , 0.6666667 , 0.5000000 , 0.4000000 ),
670670 tolerance = 1e-4 )
671671 blr_recall <- collect(select(blr_summary $ recallByThreshold , " threshold" , " recall" ))
672- expect_equal(blr_recall $ recall , c(0.3333333 , 0.6666667 , 0.6666667 , 1.0000000 , 1.0000000 ),
672+ expect_equal(blr_recall $ recall , c(0.5000000 , 0.5000000 , 1.0000000 , 1.0000000 , 1.0000000 ),
673673 tolerance = 1e-4 )
674674
675675 # test model save and read
@@ -683,6 +683,16 @@ test_that("spark.logit", {
683683 expect_error(summary(blr_model2 ))
684684 unlink(modelPath )
685685
686+ # test prediction label as text
687+ training <- suppressWarnings(createDataFrame(iris ))
688+ binomial_training <- training [training $ Species %in% c(" versicolor" , " virginica" ), ]
689+ binomial_model <- spark.logit(binomial_training , Species ~ Sepal_Length + Sepal_Width )
690+ prediction <- predict(binomial_model , binomial_training )
691+ expect_equal(typeof(take(select(prediction , " prediction" ), 1 )$ prediction ), " character" )
692+ expected <- c(" virginica" , " virginica" , " virginica" , " versicolor" , " virginica" ,
693+ " versicolor" , " virginica" , " versicolor" , " virginica" , " versicolor" )
694+ expect_equal(as.list(take(select(prediction , " prediction" ), 10 ))[[1 ]], expected )
695+
686696 # test multinomial logistic regression
687697 label <- c(0.0 , 1.0 , 2.0 , 0.0 , 0.0 )
688698 feature1 <- c(4.845940 , 5.64480 , 7.430381 , 6.464263 , 5.555667 )
@@ -694,7 +704,7 @@ test_that("spark.logit", {
694704
695705 model <- spark.logit(df , label ~ . , family = " multinomial" , thresholds = c(0 , 1 , 1 ))
696706 predict1 <- collect(select(predict(model , df ), " prediction" ))
697- expect_equal(predict1 $ prediction , c(0 , 0 , 0 , 0 , 0 ))
707+ expect_equal(predict1 $ prediction , c(" 0.0 " , " 0.0 " , " 0.0 " , " 0.0 " , " 0.0 " ))
698708 # Summary of multinomial logistic regression is not implemented yet
699709 expect_error(summary(model ))
700710})
0 commit comments