7070import java .util .Iterator ;
7171import java .util .List ;
7272import java .util .Map ;
73+ import java .util .Deque ;
74+ import java .util .LinkedList ;
7375import java .util .TimeZone ;
7476import java .util .stream .Collectors ;
7577
@@ -405,6 +407,14 @@ public static GenericRecord rewriteRecordWithMetadata(GenericRecord genericRecor
405407 return newRecord ;
406408 }
407409
410+ // TODO Unify the logical of rewriteRecordWithMetadata and rewriteEvolutionRecordWithMetadata, and delete this function.
411+ public static GenericRecord rewriteEvolutionRecordWithMetadata (GenericRecord genericRecord , Schema newSchema , String fileName ) {
412+ GenericRecord newRecord = HoodieAvroUtils .rewriteRecordWithNewSchema (genericRecord , newSchema , new HashMap <>());
413+ // do not preserve FILENAME_METADATA_FIELD
414+ newRecord .put (HoodieRecord .FILENAME_METADATA_FIELD_POS , fileName );
415+ return newRecord ;
416+ }
417+
408418 /**
409419 * Converts list of {@link GenericRecord} provided into the {@link GenericRecord} adhering to the
410420 * provided {@code newSchema}.
@@ -719,14 +729,28 @@ public static Object getRecordColumnValues(HoodieRecord<? extends HoodieRecordPa
719729 *
720730 * @param oldRecord oldRecord to be rewritten
721731 * @param newSchema newSchema used to rewrite oldRecord
732+ * @param renameCols a map store all rename cols, (k, v)-> (colNameFromNewSchema, colNameFromOldSchema)
722733 * @return newRecord for new Schema
723734 */
724- public static GenericRecord rewriteRecordWithNewSchema (IndexedRecord oldRecord , Schema newSchema ) {
725- Object newRecord = rewriteRecordWithNewSchema (oldRecord , oldRecord .getSchema (), newSchema );
735+ public static GenericRecord rewriteRecordWithNewSchema (IndexedRecord oldRecord , Schema newSchema , Map < String , String > renameCols ) {
736+ Object newRecord = rewriteRecordWithNewSchema (oldRecord , oldRecord .getSchema (), newSchema , renameCols , new LinkedList <>() );
726737 return (GenericData .Record ) newRecord ;
727738 }
728739
729- private static Object rewriteRecordWithNewSchema (Object oldRecord , Schema oldSchema , Schema newSchema ) {
740+ /**
741+ * Given a avro record with a given schema, rewrites it into the new schema while setting fields only from the new schema.
742+ * support deep rewrite for nested record and adjust rename operation.
743+ * This particular method does the following things :
744+ * a) Create a new empty GenericRecord with the new schema.
745+ * b) For GenericRecord, copy over the data from the old schema to the new schema or set default values for all fields of this transformed schema
746+ *
747+ * @param oldRecord oldRecord to be rewritten
748+ * @param newSchema newSchema used to rewrite oldRecord
749+ * @param renameCols a map store all rename cols, (k, v)-> (colNameFromNewSchema, colNameFromOldSchema)
750+ * @param fieldNames track the full name of visited field when we travel new schema.
751+ * @return newRecord for new Schema
752+ */
753+ private static Object rewriteRecordWithNewSchema (Object oldRecord , Schema oldSchema , Schema newSchema , Map <String , String > renameCols , Deque <String > fieldNames ) {
730754 if (oldRecord == null ) {
731755 return null ;
732756 }
@@ -741,10 +765,23 @@ private static Object rewriteRecordWithNewSchema(Object oldRecord, Schema oldSch
741765
742766 for (int i = 0 ; i < fields .size (); i ++) {
743767 Schema .Field field = fields .get (i );
768+ String fieldName = field .name ();
769+ fieldNames .push (fieldName );
744770 if (oldSchema .getField (field .name ()) != null ) {
745771 Schema .Field oldField = oldSchema .getField (field .name ());
746- helper .put (i , rewriteRecordWithNewSchema (indexedRecord .get (oldField .pos ()), oldField .schema (), fields .get (i ).schema ()));
772+ helper .put (i , rewriteRecordWithNewSchema (indexedRecord .get (oldField .pos ()), oldField .schema (), fields .get (i ).schema (), renameCols , fieldNames ));
773+ } else {
774+ String fieldFullName = createFullName (fieldNames );
775+ String [] colNamePartsFromOldSchema = renameCols .getOrDefault (fieldFullName , "" ).split ("\\ ." );
776+ String lastColNameFromOldSchema = colNamePartsFromOldSchema [colNamePartsFromOldSchema .length - 1 ];
777+ // deal with rename
778+ if (oldSchema .getField (field .name ()) == null && oldSchema .getField (lastColNameFromOldSchema ) != null ) {
779+ // find rename
780+ Schema .Field oldField = oldSchema .getField (lastColNameFromOldSchema );
781+ helper .put (i , rewriteRecordWithNewSchema (indexedRecord .get (oldField .pos ()), oldField .schema (), fields .get (i ).schema (), renameCols , fieldNames ));
782+ }
747783 }
784+ fieldNames .pop ();
748785 }
749786 GenericData .Record newRecord = new GenericData .Record (newSchema );
750787 for (int i = 0 ; i < fields .size (); i ++) {
@@ -765,27 +802,41 @@ private static Object rewriteRecordWithNewSchema(Object oldRecord, Schema oldSch
765802 }
766803 Collection array = (Collection )oldRecord ;
767804 List <Object > newArray = new ArrayList ();
805+ fieldNames .push ("element" );
768806 for (Object element : array ) {
769- newArray .add (rewriteRecordWithNewSchema (element , oldSchema .getElementType (), newSchema .getElementType ()));
807+ newArray .add (rewriteRecordWithNewSchema (element , oldSchema .getElementType (), newSchema .getElementType (), renameCols , fieldNames ));
770808 }
809+ fieldNames .pop ();
771810 return newArray ;
772811 case MAP :
773812 if (!(oldRecord instanceof Map )) {
774813 throw new IllegalArgumentException ("cannot rewrite record with different type" );
775814 }
776815 Map <Object , Object > map = (Map <Object , Object >) oldRecord ;
777816 Map <Object , Object > newMap = new HashMap <>();
817+ fieldNames .push ("value" );
778818 for (Map .Entry <Object , Object > entry : map .entrySet ()) {
779- newMap .put (entry .getKey (), rewriteRecordWithNewSchema (entry .getValue (), oldSchema .getValueType (), newSchema .getValueType ()));
819+ newMap .put (entry .getKey (), rewriteRecordWithNewSchema (entry .getValue (), oldSchema .getValueType (), newSchema .getValueType (), renameCols , fieldNames ));
780820 }
821+ fieldNames .pop ();
781822 return newMap ;
782823 case UNION :
783- return rewriteRecordWithNewSchema (oldRecord , getActualSchemaFromUnion (oldSchema , oldRecord ), getActualSchemaFromUnion (newSchema , oldRecord ));
824+ return rewriteRecordWithNewSchema (oldRecord , getActualSchemaFromUnion (oldSchema , oldRecord ), getActualSchemaFromUnion (newSchema , oldRecord ), renameCols , fieldNames );
784825 default :
785826 return rewritePrimaryType (oldRecord , oldSchema , newSchema );
786827 }
787828 }
788829
830+ private static String createFullName (Deque <String > fieldNames ) {
831+ String result = "" ;
832+ if (!fieldNames .isEmpty ()) {
833+ List <String > parentNames = new ArrayList <>();
834+ fieldNames .descendingIterator ().forEachRemaining (parentNames ::add );
835+ result = parentNames .stream ().collect (Collectors .joining ("." ));
836+ }
837+ return result ;
838+ }
839+
789840 private static Object rewritePrimaryType (Object oldValue , Schema oldSchema , Schema newSchema ) {
790841 Schema realOldSchema = oldSchema ;
791842 if (realOldSchema .getType () == UNION ) {
@@ -958,9 +1009,10 @@ private static Schema getActualSchemaFromUnion(Schema schema, Object data) {
9581009 *
9591010 * @param oldRecords oldRecords to be rewrite
9601011 * @param newSchema newSchema used to rewrite oldRecord
1012+ * @param renameCols a map store all rename cols, (k, v)-> (colNameFromNewSchema, colNameFromOldSchema)
9611013 * @return a iterator of rewrote GeneriRcords
9621014 */
963- public static Iterator <GenericRecord > rewriteRecordWithNewSchema (Iterator <GenericRecord > oldRecords , Schema newSchema ) {
1015+ public static Iterator <GenericRecord > rewriteRecordWithNewSchema (Iterator <GenericRecord > oldRecords , Schema newSchema , Map < String , String > renameCols ) {
9641016 if (oldRecords == null || newSchema == null ) {
9651017 return Collections .emptyIterator ();
9661018 }
@@ -972,7 +1024,7 @@ public boolean hasNext() {
9721024
9731025 @ Override
9741026 public GenericRecord next () {
975- return rewriteRecordWithNewSchema (oldRecords .next (), newSchema );
1027+ return rewriteRecordWithNewSchema (oldRecords .next (), newSchema , renameCols );
9761028 }
9771029 };
9781030 }
0 commit comments