Merge pull request #13 from m-clark/fix-gam-cat-re

m-clark · web-flow · commit e93c902b552d · 2020-06-24T08:58:33.000-04:00
issue #12 (but opens up issue with new R CMD Checks for R development 4.1)
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: mixedup
 Title: Miscellaneous functions for mixed models
-Version: 0.3.6
+Version: 0.3.7
 Authors@R: 
     person(given = "Michael",
            family = "Clark",
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,7 @@
+# mixedup 0.3.7
+
+* Extend exponentiate to summarize_model, extend extract_random_effects to multivariate models, minor fixes (e.g. issue #12).
+
 # mixedup 0.3.6
 
 * Add group counts for extract_random_effects.
diff --git a/R/extract_random_coefs.R b/R/extract_random_coefs.R
@@ -25,12 +25,19 @@
 #'   \href{https://bbolker.github.io/mixedmodels-misc/glmmFAQ.html#confidence-intervals-on-conditional-meansblupsrandom-effects}{GLMM
 #'   FAQ}. As noted there, this assumption may not be appropriate, and if you
 #'   are really interested in an accurate uncertainty estimate you should
-#'   probably use brms.
+#'   probably use `brms`.
 #'
-#'   The nlme package only provides the coefficients no estimated variance, so this
+#'   For more complex models that include multiple outcomes/categories or have
+#'   other anomalies, this function likely will not work at present, even if the
+#'   underlying `extract_fixed_effects` and `extract_random_effects` do, as
+#'   naming conventions are not consistent enough to deal with this without a
+#'   lot of tedium that still may not satisfy every situation. I will possibly
+#'   be able to update this in the future.
+#'
+#'   The `nlme` package only provides the coefficients no estimated variance, so this
 #'   function doesn't add to what you get from basic functionality for those
-#'   models.  In addition, nlme adds all random effects to the fixed effects,
-#'   whereas lme4 and others only add the effects requested.
+#'   models.  In addition, `nlme` adds all random effects to the fixed effects,
+#'   whereas `lme4` and others only add the effects requested.
 #'
 #' @return A data frame of the random coefficients and their standard errors.
 #'
diff --git a/R/extract_random_effects.R b/R/extract_random_effects.R
@@ -575,16 +575,19 @@ extract_random_effects.gam <- function(
     }
   }
 
-
   if (purrr::is_empty(re_levels) | all(purrr::map_lgl(re_levels, is.null))) {
     stop('No factor random effects.')
   }
 
+  # can put an re smooth on a continuous covariate for penalization, but don't
+  # want that in output
   non_factors <- purrr::map_lgl(re_levels, is.null)
+  re_idx  <- which(!non_factors)
 
   # this test is covered but covr ignores for some reason
   if (any(non_factors)) {
     re_terms[non_factors] <- FALSE
+    re_names  <- re_names[re_idx]
   }
 
   if (!is.null(re) && !re %in% re_names)
@@ -594,37 +597,58 @@ extract_random_effects.gam <- function(
       )
     )
 
-  re_labels <- purrr::map(model$smooth[re_terms], function(x) x$label)
+  re_labels <- purrr::map(model$smooth[re_idx], function(x) x$label)
 
   gam_coef <- stats::coef(model)
 
-  # issue, parenthesis in the names means problematic regex matching so remove
+  # issue, parenthesis in the names means problematic regex matching, so remove
   # all but key part of pattern
   re_label_base <- gsub(re_labels, pattern = "s\\(", replacement = '') # remove first s
   re_label_base <- gsub(re_label_base, pattern = "\\(|\\)", replacement = '') # remove parenthesis
 
-  re_coef <- grepl(names(gam_coef), pattern = paste0('^s\\(', re_label_base, collapse = "|"))
+  re_coef  <- vector('list', length = length(re_idx))
+  coef_idx <- vector('list', length = length(re_idx))
+
+  for (i in re_idx) {
+    first_para <- model$smooth[[i]]$first.para
+    last_para  <- model$smooth[[i]]$last.para
 
-  re0 <- gam_coef[re_coef]
+    coef_idx[[i]] <- first_para:last_para
+    re_coef[[i]]  <- gam_coef[coef_idx[[i]]]
+  }
 
-  gam_se <- sqrt(diag(model$Vp)) # no names
-  gam_se <- gam_se[names(gam_coef) %in% names(re0)]
+  # extract coefs and se
+  re0 <- unlist(re_coef)
+  gam_se <- sqrt(diag(model$Vp))[unlist(coef_idx)]
 
-  # clean up names
+  # clean up and gather names
   names(re0) <- gsub(names(re0), pattern = "s\\(|\\)", replacement = '')
   names(re0) <- gsub(names(re0), pattern = "\\.[0-9]+", replacement = '')
 
-  re_n <- dplyr::n_distinct(names(re0)) # possible use later
-  re_names <- names(re0)
+  re_names   <- names(re0)
+  re_effects <- purrr::map_chr(model$smooth, function(x) x$term[1])
+  re_effects <- rep(re_effects, times = purrr::map_int(re_coef, length))
+
+  # check to see if factors are the smooth terms (i.e. random cat slope), and
+  # repeat levels of grouping variable the number of levels in the factor
+  for (i in re_idx) {
+    smooth_vars <-  model$smooth[[i]]$term
+    smooth_term <- model$model[[smooth_vars[1]]] # it will be the first term, 2nd term is the RE var
+
+    if (length(smooth_vars) > 1 & (is.factor(smooth_term) | is.character(smooth_term))) {
+      n_levs <- dplyr::n_distinct(smooth_term)
+      re_levels[[i]] <- rep(re_levels[[i]], each = n_levs) # note the each, this is how mgcv orders and confirmed via lme4
+    }
+  }
 
-  random_effects <- dplyr::tibble(effect = re_names) %>%
+  random_effects <- dplyr::tibble(group_var = re_names) %>%
     dplyr::mutate(
-      group_var = split_group_effect(effect, which = 2),
-      effect = split_group_effect(effect, which = 1),
+      group_var = split_group_effect(group_var, which = 2),
+      effect = re_effects,
       effect = ifelse(effect ==  group_var, 'Intercept', effect),
-      group = unlist(re_levels),
-      value = re0,
-      se = gam_se
+      group  = unlist(re_levels),
+      value  = re0,
+      se     = gam_se
     )
 
   if (add_group_N) {
diff --git a/codemeta.json b/codemeta.json
@@ -10,7 +10,7 @@
   "codeRepository": "https://m-clark.github.io/mixedup",
   "issueTracker": "https://github.com/m-clark/mixedup/issues",
   "license": "https://spdx.org/licenses/MIT",
-  "version": "0.3.6",
+  "version": "0.3.7",
   "programmingLanguage": {
     "@type": "ComputerLanguage",
     "name": "R",
@@ -218,7 +218,7 @@
     }
   ],
   "releaseNotes": "https://github.com/m-clark/mixedup/blob/master/NEWS.md",
-  "fileSize": "31823.321KB",
+  "fileSize": "37466.782KB",
   "contIntegration": [
     "https://travis-ci.org/m-clark/mixedup",
     "https://ci.appveyor.com/project/m-clark/mixedup",
diff --git a/man/extract_random_coefs.Rd b/man/extract_random_coefs.Rd
diff --git a/tests/testthat/brm_extended_results.RData b/tests/testthat/brm_extended_results.RData
diff --git a/tests/testthat/helper-load_data.R b/tests/testthat/helper-load_data.R
@@ -252,6 +252,7 @@ brm_corCAR <- readRDS('brm_car_results.rds')
 #
 # bprior1 <- prior(student_t(5,0,10), class = b) +
 #   prior(cauchy(0,2), class = sd)
+# pr = prior(normal(0, 10), class = b)
 #
 # brm_glm <- brm(
 #   count ~ zAge + zBase * Trt + (1 | patient),
@@ -262,7 +263,7 @@ brm_corCAR <- readRDS('brm_car_results.rds')
 #   thin  = 40
 # )
 #
-# pr = prior(normal(0, 10), class = b)
+
 #
 # brm_0 <-
 #   brm(
@@ -321,9 +322,11 @@ brm_corCAR <- readRDS('brm_car_results.rds')
 #   )
 #
 # probably problematic models but fine for testing
-
+#
+#
+#
 ### standard autocor
-
+#
 # brm_corAR <- update(
 #   brm_2,
 #   autocor = cor_ar( ~ Days | Subject),
@@ -470,8 +473,27 @@ brm_corCAR <- readRDS('brm_car_results.rds')
 #   cores = 4,
 #   thin = 40,
 # )
+# brm_ordinal <- brm(
+#   rating ~ period + carry + cs(treat) + (1|subject),
+#   data  = inhaler,
+#   family = sratio("cloglog"),
+#   prior = pr,
+#   cores = 4,
+#   thin  = 40
+# )
+#
+# brm_categorical <- brm(
+#   rating ~ period + carry + treat + (1|subject),
+#   data  = inhaler,
+#   family = categorical,
+#   prior = pr,
+#   cores = 4,
+#   thin  = 40
+# )
 #
 # save(
+#   brm_ordinal,
+#   brm_categorical,
 #   brm_sigma_simple,
 #   brm_sigma,
 #   brm_zi,
@@ -616,6 +638,17 @@ load('mgcv_results.RData')
 #   data = glmmTMB::Salamanders
 # )
 #
+# gam_cat_slope <-
+#   gam(
+#     Reaction ~ Days + s(Subject, bs = "re") + s(Days, Subject, bs = "re"),
+#     data = lme4::sleepstudy %>%
+#       mutate(Days = factor(
+#         case_when(Days < 2 ~ "x",
+#                   Days < 5 ~ "y",
+#                   TRUE ~ "z")
+#       )
+#       )
+#   )
 #
 # bam objects are very large to save even for small models
 # bam_1 = bam(
@@ -632,6 +665,7 @@ load('mgcv_results.RData')
 #   gam_1,
 #   gam_2,
 #   gam_3,
+#   gam_cat_slope,
 #   gam_glm,
 #   bam_1,
 #   file = 'tests/testthat/mgcv_results.RData'
diff --git a/tests/testthat/mgcv_results.RData b/tests/testthat/mgcv_results.RData
diff --git a/tests/testthat/test-extract_random_effects.R b/tests/testthat/test-extract_random_effects.R
@@ -347,6 +347,14 @@ test_that('extract_random_effects.gam fails if no factors', {
 
 })
 
+test_that('extract_random_effects.gam can handle categorical slopes', {
+  # cat_slope discretizes Days into 3 levels
+  expect_equal(
+    nrow(extract_random_effects(gam_cat_slope, re = 'Subject', add_group_N = TRUE, ci_level = .9)),
+    nlevels(sleepstudy$Subject)*1 + nlevels(sleepstudy$Subject)*3
+  )
+})
+
 test_that('extract_random_effects.gam correct output', {
   expect_equal(
     nrow(extract_random_effects(gam_2, re = 'Subject')),