apache · danny0405 · Aug 17, 2022 · Aug 11, 2022 · Aug 11, 2022 · Aug 14, 2022
diff --git a/...rce/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java b/...rce/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java
@@ -68,6 +68,7 @@
 import java.util.Set;
 import java.util.stream.IntStream;
 
+import static org.apache.flink.table.types.logical.LogicalTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE;
 import static org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.HOODIE_COMMIT_TIME_COL_POS;
 import static org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.HOODIE_RECORD_KEY_COL_POS;
 import static org.apache.hudi.table.format.FormatUtils.buildAvroRecordBySchema;
@@ -298,10 +299,21 @@ private ParquetColumnarRowSplitReader getReader(String path, int[] requiredPos)
         new org.apache.hadoop.fs.Path(path).getParent(),
         this.conf.getBoolean(FlinkOptions.HIVE_STYLE_PARTITIONING),
         FilePathUtils.extractPartitionKeys(this.conf));
+
     LinkedHashMap<String, Object> partObjects = new LinkedHashMap<>();
-    partSpec.forEach((k, v) -> partObjects.put(k, DataTypeUtils.resolvePartition(
-        defaultPartName.equals(v) ? null : v,
-        fieldTypes.get(fieldNames.indexOf(k)))));
+
+    // can't convert data by partition value if timestamp is used to partition key.
+    partSpec.entrySet().stream()
+        .filter(entry -> fieldTypes
+            .get(fieldNames.indexOf(entry.getKey()))
+            .getLogicalType().getTypeRoot() != TIMESTAMP_WITHOUT_TIME_ZONE)
+        .forEach(entry -> {
+          String k = entry.getKey();
+          String v = entry.getValue();
+          partObjects.put(k, DataTypeUtils.resolvePartition(
+              defaultPartName.equals(v) ? null : v,
+              fieldTypes.get(fieldNames.indexOf(k))));
+        });
 
     return ParquetSplitReaderUtil.genPartColumnarRowReader(
         this.conf.getBoolean(FlinkOptions.UTC_TIMEZONE),

diff --git a/...ink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java b/...ink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java
@@ -18,11 +18,16 @@
 
 package org.apache.hudi.table;
 
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
 import org.apache.hudi.adapter.TestTableEnvs;
 import org.apache.hudi.common.model.DefaultHoodieRecordPayload;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.configuration.FlinkOptions;
+import org.apache.hudi.source.FileIndex;
 import org.apache.hudi.table.catalog.HoodieHiveCatalog;
 import org.apache.hudi.table.catalog.HoodieCatalogTestUtils;
 import org.apache.hudi.util.StreamerUtil;
@@ -56,7 +61,9 @@
 import org.junit.jupiter.params.provider.ValueSource;
 
 import java.io.File;
+import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -66,9 +73,11 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import static org.apache.hudi.configuration.FlinkOptions.PARTITION_DEFAULT_NAME;
 import static org.apache.hudi.utils.TestConfigurations.catalog;
 import static org.apache.hudi.utils.TestConfigurations.sql;
 import static org.apache.hudi.utils.TestData.assertRowsEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
@@ -1395,6 +1404,51 @@ void testWriteReadWithComputedColumns() {
     assertRowsEquals(result2, "[+I[3]]");
   }
 
+  @Test
+  public void testReadMorTableWithCompactionAndTimestampPartition() throws TableNotExistException, InterruptedException {
+    TableEnvironment tableEnv = batchTableEnv;
+    String createSql = sql("t1")
+        .field("id int")
+        .field("ts timestamp(3)")
+        .pkField("id")
+        .partitionField("ts")
+        .option(FlinkOptions.PATH, tempFile.getAbsolutePath())
+        .option(FlinkOptions.TABLE_TYPE, FlinkOptions.TABLE_TYPE_MERGE_ON_READ)
+        .option(FlinkOptions.PARTITION_PATH_FIELD, "ts")
+        .option("hoodie.compact.inline", true)
+        .option(FlinkOptions.COMPACTION_DELTA_COMMITS, 2)
+        .option(FlinkOptions.PARTITION_FORMAT, FlinkOptions.PARTITION_FORMAT_DASHED_DAY)
+        .end();
+    tableEnv.executeSql(createSql);
+
+    execInsertSql(tableEnv, "insert into t1 values (1, TIMESTAMP '2022-08-11 10:05:59')");
+    List<Row> result1 = CollectionUtil.iterableToList(() -> tableEnv.sqlQuery("select * from t1").execute().collect());
+
+    execInsertSql(tableEnv, "insert into t1 values (2, TIMESTAMP '2022-08-11 11:05:59')");
+    List<Row> result2 = CollectionUtil.iterableToList(() -> tableEnv.sqlQuery("select * from t1").execute().collect());
+
+    ObjectPath objectPath = ObjectPath.fromString(tableEnv.getCurrentDatabase() + ".t1");
+    String currentCatalog = tableEnv.getCurrentCatalog();
+    DataType dataType = tableEnv.getCatalog(currentCatalog).get().getTable(objectPath).getSchema().toRowDataType();
+    RowType rowType = (RowType) dataType.getLogicalType();
+    FileIndex fileIndex = FileIndex.instance(
+        new Path(tempFile.getAbsolutePath()), tableEnv.getConfig().getConfiguration(), rowType);
+    List<Map<String, String>> partitions =
+        fileIndex.getPartitions(Collections.singletonList("ts"), PARTITION_DEFAULT_NAME.defaultValue(), false);
+    assertEquals(1, partitions.size());
+    assertEquals("2022-08-11", partitions.get(0).get("ts"));
+
+    FileStatus[] fileStatuses = fileIndex.getFilesInPartitions();
+    // should have two log files and one parquet file
+    assertEquals(3, fileStatuses.length);
+    assertEquals(1,
+        Arrays.stream(fileStatuses).filter(file -> file.getPath().getName().endsWith("parquet")).count()
+    );
+
+    assertRowsEquals(result1, "[+I[1, 2022-08-11T10:05:59]]");
+    assertRowsEquals(result2, "[+I[1, 2022-08-11T10:05:59], +I[2, 2022-08-11T11:05:59]]");
+  }
+
   // -------------------------------------------------------------------------
   //  Utilities
   // -------------------------------------------------------------------------