|
20 | 20 |
|
21 | 21 | import org.apache.hudi.common.model.HoodieRecord; |
22 | 22 | import org.apache.hudi.common.testutils.HoodieTestDataGenerator; |
| 23 | +import org.apache.hudi.common.util.Option; |
| 24 | +import org.apache.hudi.config.HoodieWriteConfig; |
23 | 25 | import org.apache.hudi.table.BulkInsertPartitioner; |
24 | 26 | import org.apache.hudi.testutils.HoodieClientTestHarness; |
25 | 27 | import org.apache.hudi.testutils.SparkDatasetTestUtils; |
|
29 | 31 | import org.apache.spark.sql.Row; |
30 | 32 | import org.junit.jupiter.api.AfterEach; |
31 | 33 | import org.junit.jupiter.api.BeforeEach; |
| 34 | +import org.junit.jupiter.api.Test; |
32 | 35 | import org.junit.jupiter.params.ParameterizedTest; |
33 | 36 | import org.junit.jupiter.params.provider.Arguments; |
34 | 37 | import org.junit.jupiter.params.provider.MethodSource; |
|
48 | 51 | */ |
49 | 52 | public class TestBulkInsertInternalPartitionerForRows extends HoodieClientTestHarness { |
50 | 53 |
|
| 54 | + private static final Comparator<Row> KEY_COMPARATOR = |
| 55 | + Comparator.comparing(o -> (o.getAs(HoodieRecord.PARTITION_PATH_METADATA_FIELD) + "+" + o.getAs(HoodieRecord.RECORD_KEY_METADATA_FIELD))); |
51 | 56 | @BeforeEach |
52 | 57 | public void setUp() throws Exception { |
53 | 58 | initSparkContexts("TestBulkInsertInternalPartitionerForRows"); |
@@ -77,29 +82,55 @@ public void testBulkInsertInternalPartitioner(BulkInsertSortMode sortMode, |
77 | 82 | Dataset<Row> records1 = generateTestRecords(); |
78 | 83 | Dataset<Row> records2 = generateTestRecords(); |
79 | 84 | testBulkInsertInternalPartitioner(BulkInsertInternalPartitionerWithRowsFactory.get(sortMode), |
80 | | - records1, isGloballySorted, isLocallySorted, generateExpectedPartitionNumRecords(records1)); |
| 85 | + records1, isGloballySorted, isLocallySorted, generateExpectedPartitionNumRecords(records1), Option.empty()); |
81 | 86 | testBulkInsertInternalPartitioner(BulkInsertInternalPartitionerWithRowsFactory.get(sortMode), |
82 | | - records2, isGloballySorted, isLocallySorted, generateExpectedPartitionNumRecords(records2)); |
| 87 | + records2, isGloballySorted, isLocallySorted, generateExpectedPartitionNumRecords(records2), Option.empty()); |
| 88 | + } |
| 89 | + |
| 90 | + @Test |
| 91 | + public void testCustomColumnSortPartitionerWithRows() { |
| 92 | + Dataset<Row> records1 = generateTestRecords(); |
| 93 | + Dataset<Row> records2 = generateTestRecords(); |
| 94 | + String sortColumnString = records1.columns()[5]; |
| 95 | + String[] sortColumns = sortColumnString.split(","); |
| 96 | + Comparator<Row> comparator = getCustomColumnComparator(sortColumns); |
| 97 | + |
| 98 | + testBulkInsertInternalPartitioner(new RowCustomColumnsSortPartitioner(sortColumns), |
| 99 | + records1, false, true, generateExpectedPartitionNumRecords(records1), Option.of(comparator)); |
| 100 | + testBulkInsertInternalPartitioner(new RowCustomColumnsSortPartitioner(sortColumns), |
| 101 | + records2, false, true, generateExpectedPartitionNumRecords(records2), Option.of(comparator)); |
| 102 | + |
| 103 | + HoodieWriteConfig config = HoodieWriteConfig |
| 104 | + .newBuilder() |
| 105 | + .withPath("/") |
| 106 | + .withUserDefinedBulkInsertPartitionerClass(RowCustomColumnsSortPartitioner.class.getName()) |
| 107 | + .withUserDefinedBulkInsertPartitionerSortColumns(sortColumnString) |
| 108 | + .build(); |
| 109 | + testBulkInsertInternalPartitioner(new RowCustomColumnsSortPartitioner(config), |
| 110 | + records1, false, true, generateExpectedPartitionNumRecords(records1), Option.of(comparator)); |
| 111 | + testBulkInsertInternalPartitioner(new RowCustomColumnsSortPartitioner(config), |
| 112 | + records2, false, true, generateExpectedPartitionNumRecords(records2), Option.of(comparator)); |
83 | 113 | } |
84 | 114 |
|
85 | 115 | private void testBulkInsertInternalPartitioner(BulkInsertPartitioner partitioner, |
86 | 116 | Dataset<Row> rows, |
87 | 117 | boolean isGloballySorted, boolean isLocallySorted, |
88 | | - Map<String, Long> expectedPartitionNumRecords) { |
| 118 | + Map<String, Long> expectedPartitionNumRecords, |
| 119 | + Option<Comparator<Row>> comparator) { |
89 | 120 | int numPartitions = 2; |
90 | 121 | Dataset<Row> actualRecords = (Dataset<Row>) partitioner.repartitionRecords(rows, numPartitions); |
91 | 122 | List<Row> collectedActualRecords = actualRecords.collectAsList(); |
92 | 123 | if (isGloballySorted) { |
93 | 124 | // Verify global order |
94 | | - verifyRowsAscendingOrder(collectedActualRecords); |
| 125 | + verifyRowsAscendingOrder(collectedActualRecords, comparator); |
95 | 126 | } else if (isLocallySorted) { |
96 | 127 | // Verify local order |
97 | 128 | actualRecords.mapPartitions((MapPartitionsFunction<Row, Object>) input -> { |
98 | 129 | List<Row> partitionRows = new ArrayList<>(); |
99 | 130 | while (input.hasNext()) { |
100 | 131 | partitionRows.add(input.next()); |
101 | 132 | } |
102 | | - verifyRowsAscendingOrder(partitionRows); |
| 133 | + verifyRowsAscendingOrder(partitionRows, comparator); |
103 | 134 | return Collections.emptyList().iterator(); |
104 | 135 | }, SparkDatasetTestUtils.ENCODER); |
105 | 136 | } |
@@ -130,10 +161,20 @@ public Dataset<Row> generateTestRecords() { |
130 | 161 | return rowsPart1.union(rowsPart2); |
131 | 162 | } |
132 | 163 |
|
133 | | - private void verifyRowsAscendingOrder(List<Row> records) { |
| 164 | + private void verifyRowsAscendingOrder(List<Row> records, Option<Comparator<Row>> comparator) { |
134 | 165 | List<Row> expectedRecords = new ArrayList<>(records); |
135 | | - Collections.sort(expectedRecords, Comparator.comparing(o -> (o.getAs(HoodieRecord.PARTITION_PATH_METADATA_FIELD) + "+" + o.getAs(HoodieRecord.RECORD_KEY_METADATA_FIELD)))); |
| 166 | + Collections.sort(expectedRecords,comparator.orElse(KEY_COMPARATOR)); |
136 | 167 | assertEquals(expectedRecords, records); |
137 | 168 | } |
138 | 169 |
|
| 170 | + private Comparator<Row> getCustomColumnComparator(String[] sortColumns) { |
| 171 | + Comparator<Row> comparator = Comparator.comparing(row -> { |
| 172 | + StringBuilder sb = new StringBuilder(); |
| 173 | + for (String col : sortColumns) { |
| 174 | + sb.append(row.getAs(col).toString()); |
| 175 | + } |
| 176 | + return sb.toString(); |
| 177 | + }); |
| 178 | + return comparator; |
| 179 | + } |
139 | 180 | } |
0 commit comments