@@ -41,20 +41,16 @@ import kotlin.reflect.KProperty
4141 * defines the group consisting of all rows where the column(s) contain that value combination.
4242 *
4343 * Returns a [GroupBy] — a dataframe-like structure that contains all unique combinations of key values
44- * along with the corresponding groups of rows (each represented as a [DataFrame]).
44+ * along with the corresponding groups of rows (each represented as a [DataFrame]) as rows .
4545 *
4646 * A [GroupBy] can then be:
4747 * * [transformed][Transformation] into a new [GroupBy];
4848 * * [reduced][Reducing] into a [DataFrame], where each group is collapsed into a single representative row;
4949 * * [aggregated][Aggregation] into a [DataFrame], where each group is transformed into one or more rows of derived values;
50- * * [pivoted][Pivoting] into a [PivotGroupBy] structure, which combines [pivot] and [groupBy] operations.
50+ * * [pivoted][Pivoting] into a [PivotGroupBy] structure, which combines [pivot] and [groupBy] operations
51+ * and then reduced or aggregated into a [DataFrame].
5152 *
52- * Grouping keys can also be created inline:
53- * ```kotlin
54- * // Create a new column "newName" based on existing "oldName" values
55- * // and use it as a grouping key:
56- * df.groupBy { expr("newName") { oldName.drop(5) } }
57- * ```
53+ * @include [GroupingKeysInline]
5854 *
5955 * Check out [Grammar].
6056 *
@@ -66,6 +62,8 @@ import kotlin.reflect.KProperty
6662 *
6763 * Don't confuse this with [group], which groups column into
6864 * [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup].
65+ *
66+ * See also [pivot][DataFrame.pivot] that groups rows of [DataFrame] vertically.
6967 */
7068internal interface GroupByDocs {
7169 /* *
@@ -160,16 +158,16 @@ internal interface GroupByDocs {
160158 * ### Pivot [GroupBy] into [PivotGroupBy] and reduce / aggregate it
161159 *
162160 * {@include [Indent]}
163- * `| `__`.`__ [**`pivot`**][GroupBy.pivot]**` { `**`columns: `[`ColumnsSelector`][ColumnsSelector]**` }`**
161+ * [GroupBy][GroupBy]`.` [**`pivot`**][GroupBy.pivot]**` { `**`columns: `[`ColumnsSelector`][ColumnsSelector]**` }`**
164162 *
165163 * {@include [Indent]}
166164 * ` \[ `__`.`__[**`default`**][PivotGroupBy.default]**`(`**`defaultValue`**`) `**`]`
167165 *
168166 * {@include [Indent]}
169- * `| ` __`.`__[<pivot_reducer >][PivotGroupByDocs.Reducing]
167+ * __`.`__[<pivot_groupBy_reducer >][PivotGroupByDocs.Reducing]
170168 *
171169 * {@include [Indent]}
172- * `| `__`.`__[<pivot_aggregator >][PivotGroupByDocs.Aggregation]
170+ * `| `__`.`__[<pivot_groupBy_groupBy >][PivotGroupByDocs.Aggregation]
173171 *
174172 * Check out [PivotGroupBy Grammar][PivotGroupByDocs.Grammar] for more information.
175173 */
@@ -183,14 +181,37 @@ internal interface GroupByDocs {
183181
184182 /* *
185183 * ### [GroupBy] aggregation statistics
186- * * [count][Grouped.count]
187- * * [max][Grouped.max]/[maxOf][Grouped.maxOf]/[maxFor][Grouped.maxFor]
188- * * [min][Grouped.min]/[minOf][Grouped.minOf]/[minFor][Grouped.minFor]
189- * * [sum][Grouped.sum]/[sumOf][Grouped.sumOf]/[sumFor][Grouped.sumFor]
190- * * [mean][Grouped.mean]/[meanOf][Grouped.meanOf]/[meanFor][Grouped.meanFor]
191- * * [std][Grouped.std]/[stdOf][Grouped.stdOf]/[stdFor][Grouped.stdFor]
192- * * [median][Grouped.median]/[medianOf][Grouped.medianOf]/[medianFor][Grouped.medianFor]
193- * * [percentile][Grouped.percentile]/[percentileOf][Grouped.percentileOf]/[percentileFor][Grouped.percentileFor]
184+ *
185+ * Provides predefined shortcuts for the most common statistical aggregation operations
186+ * that can be applied to each group within a [GroupBy].
187+ *
188+ * Each function computes a statistic across the rows of a group and returns the result as
189+ * a new column (or several columns) in the resulting [DataFrame].
190+ *
191+ * * [count][Grouped.count] — calculate the number of rows in each group;
192+ * * [max][Grouped.max] / [maxOf][Grouped.maxOf] / [maxFor][Grouped.maxFor] —
193+ * calculate the maximum of all values on the selected columns / by a row expression /
194+ * for each of the selected columns within each group;
195+ * * [min][Grouped.min] / [minOf][Grouped.minOf] / [minFor][Grouped.minFor] —
196+ * calculate the minimum of all values on the selected columns / by a row expression /
197+ * for each of the selected columns within each group;
198+ * * [sum][Grouped.sum] / [sumOf][Grouped.sumOf] / [sumFor][Grouped.sumFor] —
199+ * calculate the sum of all values on the selected columns / by a row expression /
200+ * for each of the selected columns within each group;
201+ * * [mean][Grouped.mean] / [meanOf][Grouped.meanOf] / [meanFor][Grouped.meanFor] —
202+ * calculate the mean (average) of all values on the selected columns / by a row expression /
203+ * for each of the selected columns within each group;
204+ * * [std][Grouped.std] / [stdOf][Grouped.stdOf] / [stdFor][Grouped.stdFor] —
205+ * calculate the standard deviation of all values on the selected columns / by a row expression /
206+ * for each of the selected columns within each group;
207+ * * [median][Grouped.median] / [medianOf][Grouped.medianOf] / [medianFor][Grouped.medianFor] —
208+ * calculate the median of all values on the selected columns / by a row expression /
209+ * for each of the selected columns within each group;
210+ * * [percentile][Grouped.percentile] / [percentileOf][Grouped.percentileOf] / [percentileFor][Grouped.percentileFor] —
211+ * calculate a specified percentile of all values on the selected columns / by a row expression /
212+ * for each of the selected columns within each group.
213+ *
214+ * For more information: {@include [DocumentationUrls.GroupByStatistics]}
194215 */
195216 interface AggregationStatistics
196217
@@ -235,8 +256,8 @@ internal interface GroupByDocs {
235256 * These functions return a [ReducedGroupBy], which can then be transformed into a new [DataFrame]
236257 * containing the reduced rows (either original or transformed) using one of the following methods:
237258 * * [concat][ReducedGroupBy.concat] — simply concatenates all reduced rows;
238- * * [values][ReducedGroupBy.values] — creates a [DataFrame] with new rows by transforming each reduced row
239- * using [ColumnsForAggregateSelectionDsl];
259+ * * [values][ReducedGroupBy.values] — creates a [DataFrame] containing the values
260+ * from the reduced rows in the selected columns.
240261 * * [into][ReducedGroupBy.into] — creates a new column with values computed with [RowExpression] on each row,
241262 * or a new [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup]
242263 * containing each group reduced to a single row;
@@ -262,14 +283,16 @@ internal interface GroupByDocs {
262283 * The following aggregation methods are available:
263284 * * [concat][GroupBy.concat] — concatenates all rows from all groups into a single [DataFrame],
264285 * without preserving grouping keys;
286+ * * [toDataFrame][GroupBy.toDataFrame] — returns this [GroupBy] as [DataFrame] with the grouping keys and
287+ * corresponding groups in [FrameColumn].
265288 * * [concatWithKeys][GroupBy.concatWithKeys] — a variant of [concat][GroupBy.concat] that also includes
266289 * grouping keys that were not present in the original [DataFrame];
267290 * * [into][GroupBy.into] — creates a new column containing a list of values computed with a [RowExpression]
268291 * for each group, or a new [frame column][org.jetbrains.kotlinx.dataframe.columns.FrameColumn]
269292 * containing the groups themselves;
270- * * [values][ReducedGroupBy .values] — creates a [DataFrame] with new rows produced by transforming
271- * each group using [ColumnsForAggregateSelectionDsl];
272- * * [count][Grouped.count] — returns a [DataFrame] containing the grouping key columns and an additional column
293+ * * [values][Grouped .values] — creates a [DataFrame] containing values collected into a single [List]
294+ * from all rows of each group for the selected columns.
295+ * * [count][Grouped.count] — creates a [DataFrame] containing the grouping key columns and an additional column
273296 * with the number of rows in each corresponding group;
274297 * * [aggregate][Grouped.aggregate] — performs a set of custom aggregations using [AggregateDsl],
275298 * allowing you to compute one or more derived values per group;
@@ -295,6 +318,19 @@ internal interface GroupByDocs {
295318 * @include [PivotGroupByDocs.CommonDescription]
296319 */
297320 interface Pivoting
321+
322+ /* *
323+ * Grouping keys can also be created inline
324+ * (i.g. by creating a new column using [expr] or simply renaming the old one
325+ * using [named]):
326+ * ```kotlin
327+ * // Create a new column "newName" based on existing "oldName" values
328+ * // and use it as a grouping key:
329+ * df.groupBy { expr("newName") { oldName.drop(5) } }
330+ * ```
331+ */
332+ @ExcludeFromSources
333+ interface GroupingKeysInline
298334}
299335
300336/* * {@set [SelectingColumns.OPERATION] [groupBy][groupBy]} */
@@ -348,19 +384,13 @@ public fun <T> DataFrame<T>.groupBy(vararg cols: AnyColumnReference, moveToTop:
348384// endregion
349385
350386/* *
351- * Groups the rows of this [Pivot] into [PivotGroupBy]
387+ * Groups the rows of this [Pivot] groups
352388 * based on the values in one or more specified [key columns][\columns].
353- *
354- * Works like regular [DataFrame.groupBy] on pivot groups.
355- *
356- * Grouping keys can also be created inline:
357- * ```kotlin
358- * // Create a new column "newName" based on existing "oldName" values
359- * // and use it as a grouping key:
360- * pivot.groupBy { expr("newName") { oldName.drop(5) } }
361- * ```
389+ * Returns a [PivotGroupBy].
362390 *
363391 * @include [PivotGroupByDocs.CommonDescription]
392+ *
393+ * @include [GroupByDocs.GroupingKeysInline]
364394 */
365395@ExcludeFromSources
366396private interface GroupByForPivotDocs
@@ -376,17 +406,8 @@ private interface CommonGroupByForPivotDocs
376406
377407/* *
378408 * {@include [CommonGroupByForPivotDocs]}
379- * @include [SelectingColumns.Dsl]
380- *
381- * #### For example:
409+ * @include [SelectingColumns.Dsl.WithExample] {@include [SetGroupByOperationArg] {@set [SelectingColumns.RECEIVER] <code>`pivot`</code>}}
382410 *
383- * `pivot.`{@get [OPERATION]}` { length `[and][ColumnsSelectionDsl.and]` age }`
384- *
385- * `pivot.`{@get [OPERATION]}` { `[cols][ColumnsSelectionDsl.cols]`(1..5) }`
386- *
387- * `pivot.`{@get [OPERATION]}` { `[colsOf][ColumnsSelectionDsl.colsOf]`<`[Double][Double]`>() }`
388- *
389- * {@include [SetGroupByOperationArg]}
390411 * @param moveToTop Specifies whether nested grouping columns should be moved to the top level
391412 * or kept inside a [ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup].
392413 * Defaults to `true`.
0 commit comments