Skip to content

Commit 21144a0

Browse files
pivot Count/Matches better description
1 parent 771ccc6 commit 21144a0

File tree

1 file changed

+45
-48
lines changed
  • core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api

1 file changed

+45
-48
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt

Lines changed: 45 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -445,23 +445,23 @@ public fun <T> DataFrame<T>.pivot(vararg columns: KProperty<*>, inward: Boolean?
445445
// region pivotMatches
446446

447447
/**
448-
* * Cell values are [Boolean] indicators showing whether matching rows exist
449-
* for each pivoting/grouping key combination.
448+
* Computes a **presence matrix** (similar to one-hot encoding) for the values in the
449+
* specified [\columns] of this [DataFrame], returning a new [DataFrame] where:
450450
*/
451451
@ExcludeFromSources
452-
internal interface PivotMatchesResultDescription
452+
internal interface PivotMatchesCommonDescription
453453

454454
/**
455-
* Computes whether matching rows exist in this [DataFrame] for all unique values of the
456-
* selected [\columns] across all possible combinations
457-
* of values in the remaining columns (all expecting selected).
458-
*
459-
* Performs a [pivot] operation on the specified [\columns] of this [DataFrame],
460-
* then [groups it by][Pivot.groupByOther] the remaining columns,
461-
* and produces a new [Boolean] matrix (in the form of a [DataFrame]).
462-
*
463-
* @include [PivotGroupByDocs.ResultingMatrixCommonDescription]
464-
* @include [PivotMatchesResultDescription]
455+
* * **Cells** contain a [Boolean] value indicating whether a row with the corresponding
456+
* combination of values (horizontal and vertical) exists in the [DataFrame].
457+
*/
458+
@ExcludeFromSources
459+
internal interface PivotMatchesResultCellDescription
460+
461+
/**
462+
* {@include [PivotMatchesCommonDescription]}
463+
* @include [PivotGroupByDocs.ResultingMatrixShortcutDescription] {@set [PivotGroupByDocs.GroupingColumns] remaining}
464+
* @include [PivotMatchesResultCellDescription]
465465
*
466466
* This function combines [pivot][DataFrame.pivot], [groupByOther][Pivot.groupByOther],
467467
* and [matches][PivotGroupBy.matches] operations into a single call.
@@ -538,23 +538,23 @@ public fun <T> DataFrame<T>.pivotMatches(vararg columns: KProperty<*>, inward: B
538538
// region pivotCounts
539539

540540
/**
541-
* * Cell values represent the number of matching rows
542-
* for each pivoting/grouping key combination.
541+
* Computes a **count matrix** (similar to frequency encoding) for the values in the
542+
* specified [\columns] of this [DataFrame], returning a new [DataFrame] where:
543543
*/
544544
@ExcludeFromSources
545-
internal interface PivotCountsResultDescription
545+
internal interface PivotCountsCommonDescription
546546

547547
/**
548-
* Computes number of matching rows in this [DataFrame] for all unique values of the
549-
* selected [\columns] (independently) across all possible combinations
550-
* of values in the remaining columns (all expecting selected).
551-
*
552-
* Performs a [pivot] operation on the specified [\columns] of this [DataFrame],
553-
* then [groups it by][Pivot.groupByOther] the remaining columns,
554-
* and produces a new count matrix (in the form of a [DataFrame]).
555-
*
556-
* @include [PivotGroupByDocs.ResultingMatrixCommonDescription]
557-
* @include [PivotCountsResultDescription]
548+
* * **Cells** contain a [Int] value indicating number a row with the corresponding
549+
* combination of values (horizontal and vertical) exists in the [DataFrame].
550+
*/
551+
@ExcludeFromSources
552+
internal interface PivotCountsResultCellDescription
553+
554+
/**
555+
* {@include [PivotCountsCommonDescription]}
556+
* @include [PivotGroupByDocs.ResultingMatrixShortcutDescription] {@set [PivotGroupByDocs.GroupingColumns] remaining}
557+
* @include [PivotCountsResultCellDescription]
558558
*
559559
* This function combines [pivot][DataFrame.pivot], [groupByOther][Pivot.groupByOther],
560560
* and [count][PivotGroupBy.count] operations into a single call.
@@ -686,14 +686,10 @@ public fun <G> GroupBy<*, G>.pivot(vararg columns: KProperty<*>, inward: Boolean
686686
// region pivotMatches
687687

688688
/**
689-
* Computes whether matching rows exist in groups of this [GroupBy] for all unique values of the
690-
* selected columns (independently) across all [groupBy] key combinations.
691-
*
692-
* Performs a [pivot][GroupBy.pivot] operation on the specified [\columns] of this [GroupBy] groups,
693-
* and produces a new matrix-like [DataFrame].
694-
*
695-
* @include [PivotGroupByDocs.ResultingMatrixCommonDescription]
696-
* @include [PivotMatchesResultDescription]
689+
* Computes a **presence matrix** (similar to one-hot encoding) for the values in the
690+
* specified [\columns] within each group of this [GroupBy], returning a new [DataFrame] where:
691+
* @include [PivotGroupByDocs.ResultingMatrixShortcutDescription]
692+
* @include [PivotMatchesResultCellDescription]
697693
*
698694
* This function combines [pivot][GroupBy.pivot]
699695
* and [matches][PivotGroupBy.matches] operations into a single call.
@@ -764,14 +760,10 @@ public fun <G> GroupBy<*, G>.pivotMatches(vararg columns: KProperty<*>, inward:
764760
// region pivotCounts
765761

766762
/**
767-
* Computes number of matching rows in groups of this [GroupBy] for all unique values of the
768-
* selected [\columns] (independently) across all [groupBy] key combinations.
769-
*
770-
* Performs a [pivot] operation on the specified [\columns] of this [DataFrame]
771-
* and produces a new matrix-like [DataFrame].
772-
*
773-
* @include [PivotGroupByDocs.ResultingMatrixCommonDescription]
774-
* @include [PivotCountsResultDescription]
763+
* Computes a **count matrix** (similar to frequency encoding) for the values in the
764+
* specified [\columns] within each group of this [GroupBy], returning a new [DataFrame] where:
765+
* @include [PivotGroupByDocs.ResultingMatrixShortcutDescription]
766+
* @include [PivotCountsResultCellDescription]
775767
*
776768
* This function combines [pivot][GroupBy.pivot]
777769
* and [count][PivotGroupBy.count] operations into a single call.
@@ -1202,14 +1194,19 @@ internal inline fun <T> Pivot<T>.delegate(crossinline body: PivotGroupBy<T>.() -
12021194
*/
12031195
internal interface PivotGroupByDocs {
12041196

1197+
interface GroupingColumns
1198+
12051199
/**
1206-
* In the resulting [DataFrame]:
1207-
* * Pivoted columns are displayed vertically — as [column groups][ColumnGroup] for each pivoted column,
1208-
* with subcolumns corresponding to their unique values;
1209-
* * Grouping key columns are displayed horizontally — as columns representing
1210-
* unique combinations of grouping key values;
1200+
* * **Columns** represent all unique values from the selected [\columns]
1201+
* (they become [column groups][ColumnGroup]
1202+
* corresponding to value combinations when using [then][PivotDsl.then],
1203+
* similar to [pivot]);
1204+
* * **Rows** correspond to all unique combinations of values from the {@get [GroupingColumns] grouping} columns;
1205+
* each combination is represented in dedicated key columns that store
1206+
* a distinct set of values for each row
1207+
* (similar to [keys][GroupBy.keys] in [GroupBy]).
12111208
*/
1212-
interface ResultingMatrixCommonDescription
1209+
interface ResultingMatrixShortcutDescription
12131210

12141211
/**
12151212
* [PivotGroupBy] is a dataframe-like structure that combines [Pivot] and [GroupBy],

0 commit comments

Comments
 (0)