@@ -55,7 +55,8 @@ setClass("KMeansModel", representation(jobj = "jobj"))
5555
5656# ' Generalized Linear Models
5757# '
58- # ' Fits a generalized linear model against a Spark DataFrame.
58+ # ' Fit generalized linear model against a Spark DataFrame. Can print, make predictions on the
59+ # ' produced model and save the model to the input path.
5960# '
6061# ' @param data SparkDataFrame for training.
6162# ' @param formula A symbolic description of the model to be fitted. Currently only a few formula
@@ -66,7 +67,7 @@ setClass("KMeansModel", representation(jobj = "jobj"))
6667# ' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
6768# ' @param tol Positive convergence tolerance of iterations.
6869# ' @param maxIter Integer giving the maximal number of IRLS iterations.
69- # ' @return a fitted generalized linear model
70+ # ' @return \code{spark.glm} returns a fitted generalized linear model
7071# ' @rdname spark.glm
7172# ' @name spark.glm
7273# ' @export
@@ -77,7 +78,21 @@ setClass("KMeansModel", representation(jobj = "jobj"))
7778# ' df <- createDataFrame(iris)
7879# ' model <- spark.glm(df, Sepal_Length ~ Sepal_Width, family = "gaussian")
7980# ' summary(model)
81+ # '
82+ # ' # fitted values on training data
83+ # ' fitted <- predict(model, df)
84+ # ' head(select(fitted, "Sepal_Length", "prediction"))
85+ # '
86+ # ' # save fitted model to input path
87+ # ' path <- "path/to/model"
88+ # ' write.ml(model, path)
89+ # '
90+ # ' # can also read back the saved model and print
91+ # ' savedModel <- read.ml(path)
92+ # ' summary(savedModel)
8093# ' }
94+
95+
8196# ' @note spark.glm since 2.0.0
8297setMethod ("spark.glm ", signature(data = "SparkDataFrame", formula = "formula"),
8398 function (data , formula , family = gaussian , tol = 1e-6 , maxIter = 25 ) {
@@ -99,10 +114,8 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
99114 tol , as.integer(maxIter ))
100115 return (new(" GeneralizedLinearRegressionModel" , jobj = jobj ))
101116 })
102- # ' Fits a generalized linear model (R-compliant).
103- # '
104- # ' Fits a generalized linear model, similarly to R's glm().
105- # '
117+
118+ # ' @title Fit a generalized linear model
106119# ' @param formula A symbolic description of the model to be fitted. Currently only a few formula
107120# ' operators are supported, including '~', '.', ':', '+', and '-'.
108121# ' @param data SparkDataFrame for training.
@@ -112,35 +125,23 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
112125# ' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
113126# ' @param epsilon Positive convergence tolerance of iterations.
114127# ' @param maxit Integer giving the maximal number of IRLS iterations.
115- # ' @return a fitted generalized linear model
128+ # ' @return \code{spark.glm} returns a fitted generalized linear model.
116129# ' @rdname glm
117130# ' @export
118- # ' @examples
119- # ' \dontrun{
120- # ' sparkR.session()
121- # ' data(iris)
122- # ' df <- createDataFrame(iris)
123- # ' model <- glm(Sepal_Length ~ Sepal_Width, df, family = "gaussian")
124- # ' summary(model)
125- # ' }
126131# ' @note glm since 1.5.0
127132setMethod ("glm ", signature(formula = "formula", family = "ANY", data = "SparkDataFrame"),
128133 function (formula , family = gaussian , data , epsilon = 1e-6 , maxit = 25 ) {
129134 spark.glm(data , formula , family , tol = epsilon , maxIter = maxit )
130135 })
131136
132-
133- # ' Returns the summary of a model produced by glm() or spark.glm(), similarly to R's summary().
134- # '
137+ # ' @title Return a summary of the produced generalized linear model
135138# ' @param object A fitted generalized linear model
136- # ' @return coefficients the model's coefficients, intercept
139+ # ' @return \code{summary} returns a summary object of the fitted model, a list of components
140+ # ' including at least the coefficients, null/residual deviance, null/residual degrees
141+ # ' of freedom, AIC and number of iterations IRLS takes.
142+ # '
137143# ' @rdname spark.glm
138144# ' @export
139- # ' @examples
140- # ' \dontrun{
141- # ' model <- glm(y ~ x, trainingData)
142- # ' summary(model)
143- # ' }
144145# ' @note summary(GeneralizedLinearRegressionModel) since 2.0.0
145146setMethod ("summary ", signature(object = "GeneralizedLinearRegressionModel"),
146147 function (object , ... ) {
@@ -172,9 +173,9 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"),
172173 return (ans )
173174 })
174175
175- # ' Print the summary of GeneralizedLinearRegressionModel
176- # '
176+ # ' @title Print the summary of the produced generalized linear model
177177# ' @rdname spark.glm
178+ # ' @param x Summary object of fitted generalized linear model returned by \code{summary} function
178179# ' @export
179180# ' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0
180181print.summary.GeneralizedLinearRegressionModel <- function (x , ... ) {
@@ -203,21 +204,11 @@ print.summary.GeneralizedLinearRegressionModel <- function(x, ...) {
203204 invisible (x )
204205 }
205206
206-
207- # ' Makes predictions from a generalized linear model produced by glm() or spark.glm(),
208- # ' similarly to R's predict().
209- # '
210- # ' @param object A fitted generalized linear model
207+ # ' @title Make predictions using the produced generalized linear model
211208# ' @param newData SparkDataFrame for testing
212- # ' @return SparkDataFrame containing predicted labels in a column named "prediction"
209+ # ' @return \code{predict} returns a SparkDataFrame containing predicted labels in a column named "prediction"
213210# ' @rdname spark.glm
214211# ' @export
215- # ' @examples
216- # ' \dontrun{
217- # ' model <- glm(y ~ x, trainingData)
218- # ' predicted <- predict(model, testData)
219- # ' showDF(predicted)
220- # ' }
221212# ' @note predict(GeneralizedLinearRegressionModel) since 1.5.0
222213setMethod ("predict ", signature(object = "GeneralizedLinearRegressionModel"),
223214 function (object , newData ) {
@@ -468,22 +459,16 @@ setMethod("write.ml", signature(object = "AFTSurvivalRegressionModel", path = "c
468459 invisible (callJMethod(writer , " save" , path ))
469460 })
470461
471- # ' Save the generalized linear model to the input path.
462+ # ' @title Save fitted generalized linear model to the input path
472463# '
473- # ' @param object A fitted generalized linear model
474464# ' @param path The directory where the model is saved
475465# ' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
476466# ' which means throw exception if the output path exists.
477467# '
478468# ' @rdname spark.glm
479469# ' @export
480- # ' @examples
481- # ' \dontrun{
482- # ' model <- glm(y ~ x, trainingData)
483- # ' path <- "path/to/model"
484- # ' write.ml(model, path)
485- # ' }
486470# ' @note write.ml(GeneralizedLinearRegressionModel, character) since 2.0.0
471+ # ' @seealso \link{read.ml}
487472setMethod ("write.ml ", signature(object = "GeneralizedLinearRegressionModel", path = "character"),
488473 function (object , path , overwrite = FALSE ) {
489474 writer <- callJMethod(object @ jobj , " write" )
0 commit comments