@@ -23,40 +23,40 @@ import java.util
2323import java .util .concurrent .ConcurrentHashMap
2424import org .apache .avro .Schema
2525import org .apache .hudi .AvroConversionUtils
26+ import org .apache .hudi .avro .HoodieAvroUtils
2627import org .apache .hudi .avro .HoodieAvroUtils .{createFullName , fromJavaDate , toJavaDate }
2728import org .apache .hudi .common .model .HoodieRecord .HoodieMetadataField
29+ import org .apache .hudi .common .util .ValidationUtils
2830import org .apache .hudi .exception .HoodieException
2931import org .apache .spark .sql .catalyst .InternalRow
3032import org .apache .spark .sql .catalyst .expressions .{GenericInternalRow , JoinedRow , MutableProjection , Projection }
3133import org .apache .spark .sql .catalyst .util .{ArrayBasedMapData , ArrayData , GenericArrayData , MapData }
3234import org .apache .spark .sql .hudi .ColumnStatsExpressionUtils .AllowedTransformationExpression .exprUtils .generateMutableProjection
3335import org .apache .spark .sql .types ._
34- import scala .collection .mutable
3536
37+ import scala .collection .mutable
3638
39+ /**
40+ * Helper class to do common stuff across Spark InternalRow.
41+ * Provides common methods similar to {@link HoodieAvroUtils}.
42+ */
3743object HoodieInternalRowUtils {
3844
3945 val projectionMap = new ConcurrentHashMap [(StructType , StructType ), MutableProjection ]
4046 val schemaMap = new ConcurrentHashMap [Schema , StructType ]
4147 val SchemaPosMap = new ConcurrentHashMap [StructType , Map [String , (StructField , Int )]]
4248
43- /**
44- * @see org.apache.hudi.avro.HoodieAvroUtils#stitchRecords(org.apache.avro.generic.GenericRecord, org.apache.avro.generic.GenericRecord, org.apache.avro.Schema)
45- */
4649 def stitchRecords (left : InternalRow , leftSchema : StructType , right : InternalRow , rightSchema : StructType , stitchedSchema : StructType ): InternalRow = {
4750 val mergeSchema = StructType (leftSchema.fields ++ rightSchema.fields)
4851 val row = new JoinedRow (left, right)
49- val projection = getCacheProjection (mergeSchema, stitchedSchema)
52+ val projection = getCachedProjection (mergeSchema, stitchedSchema)
5053 projection(row)
5154 }
5255
53- /**
54- * @see org.apache.hudi.avro.HoodieAvroUtils#rewriteRecord(org.apache.avro.generic.GenericRecord, org.apache.avro.Schema)
55- */
5656 def rewriteRecord (oldRecord : InternalRow , oldSchema : StructType , newSchema : StructType ): InternalRow = {
5757 val newRow = new GenericInternalRow (Array .fill(newSchema.fields.length)(null ).asInstanceOf [Array [Any ]])
5858
59- val oldFieldMap = getCacheSchemaPosMap (oldSchema)
59+ val oldFieldMap = getCachedSchemaPosMap (oldSchema)
6060 for ((field, pos) <- newSchema.fields.zipWithIndex) {
6161 var oldValue : AnyRef = null
6262 if (oldFieldMap.contains(field.name)) {
@@ -87,29 +87,21 @@ object HoodieInternalRowUtils {
8787 newRow
8888 }
8989
90- /**
91- * @see org.apache.hudi.avro.HoodieAvroUtils#rewriteRecordWithNewSchema(org.apache.avro.generic.IndexedRecord, org.apache.avro.Schema, java.util.Map)
92- */
9390 def rewriteRecordWithNewSchema (oldRecord : InternalRow , oldSchema : StructType , newSchema : StructType , renameCols : util.Map [String , String ]): InternalRow = {
9491 rewriteRecordWithNewSchema(oldRecord, oldSchema, newSchema, renameCols, new util.LinkedList [String ]).asInstanceOf [InternalRow ]
9592 }
9693
97- /**
98- * @see org.apache.hudi.avro.HoodieAvroUtils#rewriteRecordWithNewSchema(java.lang.Object, org.apache.avro.Schema, org.apache.avro.Schema, java.util.Map, java.util.Deque)
99- */
10094 private def rewriteRecordWithNewSchema (oldRecord : Any , oldSchema : DataType , newSchema : DataType , renameCols : util.Map [String , String ], fieldNames : util.Deque [String ]): Any = {
10195 if (oldRecord == null ) {
10296 null
10397 } else {
10498 newSchema match {
10599 case targetSchema : StructType =>
106- if (! oldRecord.isInstanceOf [InternalRow ]) {
107- throw new IllegalArgumentException (" cannot rewrite record with different type" )
108- }
100+ ValidationUtils .checkArgument(oldRecord.isInstanceOf [InternalRow ], " cannot rewrite record with different type" )
109101 val oldRow = oldRecord.asInstanceOf [InternalRow ]
110102 val helper = mutable.Map [Integer , Any ]()
111103
112- val oldSchemaPos = getCacheSchemaPosMap (oldSchema.asInstanceOf [StructType ])
104+ val oldSchemaPos = getCachedSchemaPosMap (oldSchema.asInstanceOf [StructType ])
113105 targetSchema.fields.zipWithIndex.foreach { case (field, i) =>
114106 fieldNames.push(field.name)
115107 if (oldSchemaPos.contains(field.name)) {
@@ -140,9 +132,7 @@ object HoodieInternalRowUtils {
140132
141133 newRow
142134 case targetSchema : ArrayType =>
143- if (! oldRecord.isInstanceOf [ArrayData ]) {
144- throw new IllegalArgumentException (" cannot rewrite record with different type" )
145- }
135+ ValidationUtils .checkArgument(oldRecord.isInstanceOf [ArrayData ], " cannot rewrite record with different type" )
146136 val oldElementType = oldSchema.asInstanceOf [ArrayType ].elementType
147137 val oldArray = oldRecord.asInstanceOf [ArrayData ]
148138 val newElementType = targetSchema.elementType
@@ -153,9 +143,7 @@ object HoodieInternalRowUtils {
153143
154144 newArray
155145 case targetSchema : MapType =>
156- if (! oldRecord.isInstanceOf [MapData ]) {
157- throw new IllegalArgumentException (" cannot rewrite record with different type" )
158- }
146+ ValidationUtils .checkArgument(oldRecord.isInstanceOf [MapData ], " cannot rewrite record with different type" )
159147 val oldValueType = oldSchema.asInstanceOf [MapType ].valueType
160148 val oldKeyType = oldSchema.asInstanceOf [MapType ].keyType
161149 val oldMap = oldRecord.asInstanceOf [MapData ]
@@ -174,27 +162,21 @@ object HoodieInternalRowUtils {
174162 }
175163 }
176164
177- /**
178- * @see org.apache.hudi.avro.HoodieAvroUtils#rewriteRecordWithMetadata(org.apache.avro.generic.GenericRecord, org.apache.avro.Schema, java.lang.String)
179- */
180165 def rewriteRecordWithMetadata (record : InternalRow , oldSchema : StructType , newSchema : StructType , fileName : String ): InternalRow = {
181166 val newRecord = rewriteRecord(record, oldSchema, newSchema)
182167 newRecord.update(HoodieMetadataField .FILENAME_METADATA_FIELD .ordinal, fileName)
183168
184169 newRecord
185170 }
186171
187- /**
188- * @see org.apache.hudi.avro.HoodieAvroUtils#rewriteEvolutionRecordWithMetadata(org.apache.avro.generic.GenericRecord, org.apache.avro.Schema, java.lang.String)
189- */
190172 def rewriteEvolutionRecordWithMetadata (record : InternalRow , oldSchema : StructType , newSchema : StructType , fileName : String ): InternalRow = {
191173 val newRecord = rewriteRecordWithNewSchema(record, oldSchema, newSchema, new util.HashMap [String , String ]())
192174 newRecord.update(HoodieMetadataField .FILENAME_METADATA_FIELD .ordinal, fileName)
193175
194176 newRecord
195177 }
196178
197- def getCacheSchema (schema : Schema ): StructType = {
179+ def getCachedSchema (schema : Schema ): StructType = {
198180 if (! schemaMap.contains(schema)) {
199181 schemaMap.synchronized {
200182 if (! schemaMap.contains(schema)) {
@@ -206,7 +188,7 @@ object HoodieInternalRowUtils {
206188 schemaMap.get(schema)
207189 }
208190
209- private def getCacheProjection (from : StructType , to : StructType ): Projection = {
191+ private def getCachedProjection (from : StructType , to : StructType ): Projection = {
210192 val schemaPair = (from, to)
211193 if (! projectionMap.contains(schemaPair)) {
212194 projectionMap.synchronized {
@@ -219,7 +201,7 @@ object HoodieInternalRowUtils {
219201 projectionMap.get(schemaPair)
220202 }
221203
222- def getCacheSchemaPosMap (schema : StructType ): Map [String , (StructField , Int )] = {
204+ def getCachedSchemaPosMap (schema : StructType ): Map [String , (StructField , Int )] = {
223205 if (! SchemaPosMap .contains(schema)) {
224206 SchemaPosMap .synchronized {
225207 if (! SchemaPosMap .contains(schema)) {
0 commit comments