-
Notifications
You must be signed in to change notification settings - Fork 2.5k
[HUDI-6635] Hudi Spark Integration Redesign MOR and Bootstrap reading #9276
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
0217715
9325f13
37d3b93
4e77337
54a4e7e
ee25b44
3a1eadb
67f298d
6f357c6
d28be3b
d7612ac
bb2cd1b
9ea1398
7b7d90e
a6f97ed
a52dacd
0e91a54
bb0acc5
3e2626a
c05f009
662f3b3
72c0bb1
793964b
646edf5
663aa88
3d6f947
4e33648
26bb36c
b8f1f89
2089508
eb91c86
d6025b9
87e8f76
54bb07b
b695af3
b54a365
f179c08
293ae46
6ce7ff6
af76828
1875a19
ef8eaad
89a4c7f
def394b
f13bb9c
e5a805e
65cfcdf
c458337
44a63c8
fa681fd
83f6b8b
996c798
69aa9e6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,4 +17,4 @@ | |
|
|
||
|
|
||
| org.apache.hudi.DefaultSource | ||
| org.apache.spark.sql.execution.datasources.parquet.HoodieParquetFileFormat | ||
| org.apache.spark.sql.execution.datasources.parquet.LegacyHoodieParquetFileFormat | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When switching to the new file format with the config, should the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe? I'm not sure. What benefit does it give us?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just curious, I don't have a clear answer. Since |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -246,6 +246,16 @@ object DefaultSource { | |
| } else if (isCdcQuery) { | ||
| CDCRelation.getCDCRelation(sqlContext, metaClient, parameters) | ||
| } else { | ||
| lazy val newHudiFileFormatUtils = if (parameters.getOrElse(USE_NEW_HUDI_PARQUET_FILE_FORMAT.key, | ||
| USE_NEW_HUDI_PARQUET_FILE_FORMAT.defaultValue).toBoolean && (globPaths == null || globPaths.isEmpty) | ||
| && parameters.getOrElse(REALTIME_MERGE.key(), REALTIME_MERGE.defaultValue()) | ||
| .equalsIgnoreCase(REALTIME_PAYLOAD_COMBINE_OPT_VAL)) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there any issue with
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. I wasn't able to get it to work correctly before the code freeze |
||
| val formatUtils = new NewHoodieParquetFileFormatUtils(sqlContext, metaClient, parameters, userSchema) | ||
| if (formatUtils.hasSchemaOnRead) Option.empty else Some(formatUtils) | ||
| } else { | ||
| Option.empty | ||
| } | ||
|
|
||
| (tableType, queryType, isBootstrappedTable) match { | ||
| case (COPY_ON_WRITE, QUERY_TYPE_SNAPSHOT_OPT_VAL, false) | | ||
| (COPY_ON_WRITE, QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, false) | | ||
|
|
@@ -256,16 +266,28 @@ object DefaultSource { | |
| new IncrementalRelation(sqlContext, parameters, userSchema, metaClient) | ||
|
|
||
| case (MERGE_ON_READ, QUERY_TYPE_SNAPSHOT_OPT_VAL, false) => | ||
| new MergeOnReadSnapshotRelation(sqlContext, parameters, metaClient, globPaths, userSchema) | ||
| if (newHudiFileFormatUtils.isEmpty) { | ||
| new MergeOnReadSnapshotRelation(sqlContext, parameters, metaClient, globPaths, userSchema) | ||
| } else { | ||
| newHudiFileFormatUtils.get.getHadoopFsRelation(isMOR = true, isBootstrap = false) | ||
| } | ||
|
|
||
| case (MERGE_ON_READ, QUERY_TYPE_INCREMENTAL_OPT_VAL, _) => | ||
| new MergeOnReadIncrementalRelation(sqlContext, parameters, metaClient, userSchema) | ||
|
|
||
| case (MERGE_ON_READ, QUERY_TYPE_SNAPSHOT_OPT_VAL, true) => | ||
| new HoodieBootstrapMORRelation(sqlContext, userSchema, globPaths, metaClient, parameters) | ||
| if (newHudiFileFormatUtils.isEmpty) { | ||
| new HoodieBootstrapMORRelation(sqlContext, userSchema, globPaths, metaClient, parameters) | ||
| } else { | ||
| newHudiFileFormatUtils.get.getHadoopFsRelation(isMOR = true, isBootstrap = true) | ||
| } | ||
|
|
||
| case (_, _, true) => | ||
| resolveHoodieBootstrapRelation(sqlContext, globPaths, userSchema, metaClient, parameters) | ||
| if (newHudiFileFormatUtils.isEmpty) { | ||
| resolveHoodieBootstrapRelation(sqlContext, globPaths, userSchema, metaClient, parameters) | ||
| } else { | ||
| newHudiFileFormatUtils.get.getHadoopFsRelation(isMOR = false, isBootstrap = true) | ||
| } | ||
|
|
||
| case (_, _, _) => | ||
| throw new HoodieException(s"Invalid query type : $queryType for tableType: $tableType," + | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.