Skip to content

Commit a99703a

Browse files
committed
group glm methods
1 parent f78cb67 commit a99703a

1 file changed

Lines changed: 31 additions & 46 deletions

File tree

R/pkg/R/mllib.R

Lines changed: 31 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ setClass("KMeansModel", representation(jobj = "jobj"))
5555

5656
#' Generalized Linear Models
5757
#'
58-
#' Fits a generalized linear model against a Spark DataFrame.
58+
#' Fit generalized linear model against a Spark DataFrame. Can print, make predictions on the
59+
#' produced model and save the model to the input path.
5960
#'
6061
#' @param data SparkDataFrame for training.
6162
#' @param formula A symbolic description of the model to be fitted. Currently only a few formula
@@ -66,7 +67,7 @@ setClass("KMeansModel", representation(jobj = "jobj"))
6667
#' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
6768
#' @param tol Positive convergence tolerance of iterations.
6869
#' @param maxIter Integer giving the maximal number of IRLS iterations.
69-
#' @return a fitted generalized linear model
70+
#' @return \code{spark.glm} returns a fitted generalized linear model
7071
#' @rdname spark.glm
7172
#' @name spark.glm
7273
#' @export
@@ -77,7 +78,21 @@ setClass("KMeansModel", representation(jobj = "jobj"))
7778
#' df <- createDataFrame(iris)
7879
#' model <- spark.glm(df, Sepal_Length ~ Sepal_Width, family = "gaussian")
7980
#' summary(model)
81+
#'
82+
#' # fitted values on training data
83+
#' fitted <- predict(model, df)
84+
#' head(select(fitted, "Sepal_Length", "prediction"))
85+
#'
86+
#' # save fitted model to input path
87+
#' path <- "path/to/model"
88+
#' write.ml(model, path)
89+
#'
90+
#' # can also read back the saved model and print
91+
#' savedModel <- read.ml(path)
92+
#' summary(savedModel)
8093
#' }
94+
95+
8196
#' @note spark.glm since 2.0.0
8297
setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
8398
function(data, formula, family = gaussian, tol = 1e-6, maxIter = 25) {
@@ -99,10 +114,8 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
99114
tol, as.integer(maxIter))
100115
return(new("GeneralizedLinearRegressionModel", jobj = jobj))
101116
})
102-
#' Fits a generalized linear model (R-compliant).
103-
#'
104-
#' Fits a generalized linear model, similarly to R's glm().
105-
#'
117+
118+
#' @title Fit a generalized linear model
106119
#' @param formula A symbolic description of the model to be fitted. Currently only a few formula
107120
#' operators are supported, including '~', '.', ':', '+', and '-'.
108121
#' @param data SparkDataFrame for training.
@@ -112,35 +125,23 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
112125
#' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
113126
#' @param epsilon Positive convergence tolerance of iterations.
114127
#' @param maxit Integer giving the maximal number of IRLS iterations.
115-
#' @return a fitted generalized linear model
128+
#' @return \code{spark.glm} returns a fitted generalized linear model.
116129
#' @rdname glm
117130
#' @export
118-
#' @examples
119-
#' \dontrun{
120-
#' sparkR.session()
121-
#' data(iris)
122-
#' df <- createDataFrame(iris)
123-
#' model <- glm(Sepal_Length ~ Sepal_Width, df, family = "gaussian")
124-
#' summary(model)
125-
#' }
126131
#' @note glm since 1.5.0
127132
setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDataFrame"),
128133
function(formula, family = gaussian, data, epsilon = 1e-6, maxit = 25) {
129134
spark.glm(data, formula, family, tol = epsilon, maxIter = maxit)
130135
})
131136

132-
133-
#' Returns the summary of a model produced by glm() or spark.glm(), similarly to R's summary().
134-
#'
137+
#' @title Return a summary of the produced generalized linear model
135138
#' @param object A fitted generalized linear model
136-
#' @return coefficients the model's coefficients, intercept
139+
#' @return \code{summary} returns a summary object of the fitted model, a list of components
140+
#' including at least the coefficients, null/residual deviance, null/residual degrees
141+
#' of freedom, AIC and number of iterations IRLS takes.
142+
#'
137143
#' @rdname spark.glm
138144
#' @export
139-
#' @examples
140-
#' \dontrun{
141-
#' model <- glm(y ~ x, trainingData)
142-
#' summary(model)
143-
#' }
144145
#' @note summary(GeneralizedLinearRegressionModel) since 2.0.0
145146
setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"),
146147
function(object, ...) {
@@ -172,9 +173,9 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"),
172173
return(ans)
173174
})
174175

175-
#' Print the summary of GeneralizedLinearRegressionModel
176-
#'
176+
#' @title Print the summary of the produced generalized linear model
177177
#' @rdname spark.glm
178+
#' @param x Summary object of fitted generalized linear model returned by \code{summary} function
178179
#' @export
179180
#' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0
180181
print.summary.GeneralizedLinearRegressionModel <- function(x, ...) {
@@ -203,21 +204,11 @@ print.summary.GeneralizedLinearRegressionModel <- function(x, ...) {
203204
invisible(x)
204205
}
205206

206-
207-
#' Makes predictions from a generalized linear model produced by glm() or spark.glm(),
208-
#' similarly to R's predict().
209-
#'
210-
#' @param object A fitted generalized linear model
207+
#' @title Make predictions using the produced generalized linear model
211208
#' @param newData SparkDataFrame for testing
212-
#' @return SparkDataFrame containing predicted labels in a column named "prediction"
209+
#' @return \code{predict} returns a SparkDataFrame containing predicted labels in a column named "prediction"
213210
#' @rdname spark.glm
214211
#' @export
215-
#' @examples
216-
#' \dontrun{
217-
#' model <- glm(y ~ x, trainingData)
218-
#' predicted <- predict(model, testData)
219-
#' showDF(predicted)
220-
#' }
221212
#' @note predict(GeneralizedLinearRegressionModel) since 1.5.0
222213
setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"),
223214
function(object, newData) {
@@ -468,22 +459,16 @@ setMethod("write.ml", signature(object = "AFTSurvivalRegressionModel", path = "c
468459
invisible(callJMethod(writer, "save", path))
469460
})
470461

471-
#' Save the generalized linear model to the input path.
462+
#' @title Save fitted generalized linear model to the input path
472463
#'
473-
#' @param object A fitted generalized linear model
474464
#' @param path The directory where the model is saved
475465
#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
476466
#' which means throw exception if the output path exists.
477467
#'
478468
#' @rdname spark.glm
479469
#' @export
480-
#' @examples
481-
#' \dontrun{
482-
#' model <- glm(y ~ x, trainingData)
483-
#' path <- "path/to/model"
484-
#' write.ml(model, path)
485-
#' }
486470
#' @note write.ml(GeneralizedLinearRegressionModel, character) since 2.0.0
471+
#' @seealso \link{read.ml}
487472
setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", path = "character"),
488473
function(object, path, overwrite = FALSE) {
489474
writer <- callJMethod(object@jobj, "write")

0 commit comments

Comments
 (0)