-
Notifications
You must be signed in to change notification settings - Fork 1k
dcast only computes default fill if necessary #5549
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 24 commits
2886c4f
90f0647
26745f4
03dc91d
258befb
360ba9d
75102bf
6225799
5055306
6a93cb1
a40d969
2019a5c
c46cfaa
1a8ba9c
47d735e
7198d08
02f2c3a
fc542ec
6ae4c76
eb95ab8
6d8f614
3c7fb24
dcb51ed
83b0cf5
6f4b711
ee93c5f
747c76c
07c6838
4d6c0e1
359c3c3
4b96d35
4ca3736
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -152,23 +152,22 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ..., | |
| dat = .Call(CsubsetDT, dat, idx, seq_along(dat)) | ||
| } | ||
| fun.call = m[["fun.aggregate"]] | ||
| fill.default = NULL | ||
| if (is.null(fun.call)) { | ||
| oo = forderv(dat, by=varnames, retGrp=TRUE) | ||
| if (attr(oo, 'maxgrpn', exact=TRUE) > 1L) { | ||
| messagef("'fun.aggregate' is NULL, but found duplicate row/column combinations, so defaulting to length(). That is, the variables %s used in 'formula' do not uniquely identify rows in the input 'data'. In such cases, 'fun.aggregate' is used to derive a single representative value for each combination in the output data.table, for example by summing or averaging (fun.aggregate=sum or fun.aggregate=mean, respectively). Check the resulting table for values larger than 1 to see which combinations were not unique. See ?dcast.data.table for more details.", brackify(varnames)) | ||
| fun.call = quote(length) | ||
| } | ||
| } | ||
| if (!is.null(fun.call)) { | ||
| dat_for_default_fill = dat | ||
tdhock marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| run_agg_funs = !is.null(fun.call) | ||
| if (run_agg_funs) { | ||
| fun.call = aggregate_funs(fun.call, lvals, sep, ...) | ||
| errmsg = gettext("Aggregating function(s) should take vector inputs and return a single value (length=1). However, function(s) returns length!=1. This value will have to be used to fill any missing combinations, and therefore must be length=1. Either override by setting the 'fill' argument explicitly or modify your function to handle this case appropriately.") | ||
| if (is.null(fill)) { | ||
| fill.default = suppressWarnings(dat[0L][, eval(fun.call)]) | ||
| # tryCatch(fill.default <- dat[0L][, eval(fun.call)], error = function(x) stopf(errmsg)) | ||
| if (nrow(fill.default) != 1L) stopf(errmsg) | ||
| maybe_err = function(list.of.columns) { | ||
| if (any(sapply(list.of.columns, length) != 1L)) stopf("Aggregating function(s) should take vector inputs and return a single value (length=1). However, function(s) returns length!=1. This value will have to be used to fill any missing combinations, and therefore must be length=1. Either override by setting the 'fill' argument explicitly or modify your function to handle this case appropriately.") | ||
tdhock marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| list.of.columns | ||
| } | ||
| dat = dat[, eval(fun.call), by=c(varnames)] | ||
| dat = dat[, maybe_err(eval(fun.call)), by=c(varnames)] | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this could possibly affect the code path in [.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are you thinking for efficiency? Otherwise passing tests ensures your concern is moot right?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. possibly for efficiency but just touching the edge like that raises some concerns |
||
| } | ||
| order_ = function(x) { | ||
| o = forderv(x, retGrp=TRUE, sort=TRUE) | ||
|
|
@@ -211,7 +210,12 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ..., | |
| } | ||
| maplen = vapply_1i(mapunique, length) | ||
| idx = do.call("CJ", mapunique)[map, 'I' := .I][["I"]] # TO DO: move this to C and avoid materialising the Cross Join. | ||
| ans = .Call(Cfcast, lhs, val, maplen[[1L]], maplen[[2L]], idx, fill, fill.default, is.null(fun.call)) | ||
| fill.default = NULL | ||
| some_fill = anyNA(idx) | ||
| if (run_agg_funs && is.null(fill) && some_fill) { | ||
| fill.default = dat_for_default_fill[0L][, maybe_err(eval(fun.call))] | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here |
||
| } | ||
tdhock marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| ans = .Call(Cfcast, lhs, val, maplen[[1L]], maplen[[2L]], idx, fill, fill.default, is.null(fun.call), some_fill) | ||
MichaelChirico marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| allcols = do.call("paste", c(rhs, sep=sep)) | ||
| if (length(valnames) > 1L) | ||
| allcols = do.call("paste", if (identical(".", allcols)) list(valnames, sep=sep) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.