2020
2121import org .apache .hudi .common .config .TypedProperties ;
2222import org .apache .hudi .utilities .exception .HoodieSchemaPostProcessException ;
23- import org .apache .hudi .utilities .schema .DeleteSupportSchemaPostProcessor ;
24- import org .apache .hudi .utilities .schema .DropColumnSchemaPostProcessor ;
23+ import org .apache .hudi .utilities .schema .postprocessor .add .AddPrimitiveColumnSchemaPostProcessor ;
24+ import org .apache .hudi .utilities .schema .postprocessor .DeleteSupportSchemaPostProcessor ;
25+ import org .apache .hudi .utilities .schema .postprocessor .DropColumnSchemaPostProcessor ;
2526import org .apache .hudi .utilities .schema .SchemaPostProcessor ;
2627import org .apache .hudi .utilities .schema .SchemaPostProcessor .Config ;
2728import org .apache .hudi .utilities .schema .SchemaProvider ;
2829import org .apache .hudi .utilities .schema .SparkAvroPostProcessor ;
30+ import org .apache .hudi .utilities .schema .postprocessor .add .BaseSchemaPostProcessorConfig ;
2931import org .apache .hudi .utilities .testutils .UtilitiesTestBase ;
3032import org .apache .hudi .utilities .transform .FlatteningTransformer ;
3133
3234import org .apache .avro .Schema ;
3335import org .apache .avro .Schema .Type ;
3436import org .junit .jupiter .api .Assertions ;
3537import org .junit .jupiter .api .Test ;
38+ import org .junit .jupiter .params .ParameterizedTest ;
39+ import org .junit .jupiter .params .provider .Arguments ;
40+ import org .junit .jupiter .params .provider .MethodSource ;
3641
3742import java .io .IOException ;
3843import java .util .ArrayList ;
3944import java .util .List ;
45+ import java .util .stream .Stream ;
4046
4147import static org .junit .jupiter .api .Assertions .assertEquals ;
48+ import static org .junit .jupiter .api .Assertions .assertNotEquals ;
4249import static org .junit .jupiter .api .Assertions .assertNotNull ;
4350import static org .junit .jupiter .api .Assertions .assertNull ;
4451
@@ -55,13 +62,18 @@ public class TestSchemaPostProcessor extends UtilitiesTestBase {
5562 + "{\" name\" :\" _row_key\" ,\" type\" :\" string\" },{\" name\" :\" rider\" ,\" type\" :\" string\" },{\" name\" :\" driver\" ,"
5663 + "\" type\" :\" string\" },{\" name\" :\" fare\" ,\" type\" :\" double\" }]}" ;
5764
65+ private static Stream <Arguments > configParams () {
66+ String [] types = {"bytes" , "string" , "int" , "long" , "float" , "double" , "boolean" };
67+ return Stream .of (types ).map (Arguments ::of );
68+ }
69+
5870 @ Test
5971 public void testPostProcessor () throws IOException {
6072 properties .put (Config .SCHEMA_POST_PROCESSOR_PROP , DummySchemaPostProcessor .class .getName ());
6173 SchemaProvider provider =
6274 UtilHelpers .wrapSchemaProviderWithPostProcessor (
63- UtilHelpers .createSchemaProvider (DummySchemaProvider .class .getName (), properties , jsc ),
64- properties , jsc ,null );
75+ UtilHelpers .createSchemaProvider (DummySchemaProvider .class .getName (), properties , jsc ),
76+ properties , jsc , null );
6577
6678 Schema schema = provider .getSourceSchema ();
6779 assertEquals (schema .getType (), Type .RECORD );
@@ -76,9 +88,9 @@ public void testSparkAvro() throws IOException {
7688 transformerClassNames .add (FlatteningTransformer .class .getName ());
7789
7890 SchemaProvider provider =
79- UtilHelpers .wrapSchemaProviderWithPostProcessor (
80- UtilHelpers .createSchemaProvider (SparkAvroSchemaProvider .class .getName (), properties , jsc ),
81- properties , jsc , transformerClassNames );
91+ UtilHelpers .wrapSchemaProviderWithPostProcessor (
92+ UtilHelpers .createSchemaProvider (SparkAvroSchemaProvider .class .getName (), properties , jsc ),
93+ properties , jsc , transformerClassNames );
8294
8395 Schema schema = provider .getSourceSchema ();
8496 assertEquals (schema .getType (), Type .RECORD );
@@ -99,7 +111,7 @@ public void testDeleteSupport() {
99111 public void testChainedSchemaPostProcessor () {
100112 // DeleteSupportSchemaPostProcessor first, DummySchemaPostProcessor second
101113 properties .put (Config .SCHEMA_POST_PROCESSOR_PROP ,
102- "org.apache.hudi.utilities.schema.DeleteSupportSchemaPostProcessor,org.apache.hudi.utilities.DummySchemaPostProcessor" );
114+ "org.apache.hudi.utilities.schema.postprocessor. DeleteSupportSchemaPostProcessor,org.apache.hudi.utilities.DummySchemaPostProcessor" );
103115
104116 SchemaPostProcessor processor = UtilHelpers .createSchemaPostProcessor (properties .getString (Config .SCHEMA_POST_PROCESSOR_PROP ), properties , jsc );
105117 Schema schema = new Schema .Parser ().parse (ORIGINAL_SCHEMA );
@@ -111,7 +123,7 @@ public void testChainedSchemaPostProcessor() {
111123
112124 // DummySchemaPostProcessor first, DeleteSupportSchemaPostProcessor second
113125 properties .put (Config .SCHEMA_POST_PROCESSOR_PROP ,
114- "org.apache.hudi.utilities.DummySchemaPostProcessor,org.apache.hudi.utilities.schema.DeleteSupportSchemaPostProcessor" );
126+ "org.apache.hudi.utilities.DummySchemaPostProcessor,org.apache.hudi.utilities.schema.postprocessor. DeleteSupportSchemaPostProcessor" );
115127
116128 processor = UtilHelpers .createSchemaPostProcessor (properties .getString (Config .SCHEMA_POST_PROCESSOR_PROP ), properties , jsc );
117129 schema = new Schema .Parser ().parse (ORIGINAL_SCHEMA );
@@ -144,6 +156,32 @@ public void testDeleteColumnThrows() {
144156 Assertions .assertThrows (HoodieSchemaPostProcessException .class , () -> processor .processSchema (schema ));
145157 }
146158
159+ @ ParameterizedTest
160+ @ MethodSource ("configParams" )
161+ public void testAddPrimitiveTypeColumn (String type ) {
162+ properties .put (BaseSchemaPostProcessorConfig .SCHEMA_POST_PROCESSOR_ADD_COLUMN_NAME_PROP .key (), "primitive_column" );
163+ properties .put (BaseSchemaPostProcessorConfig .SCHEMA_POST_PROCESSOR_ADD_COLUMN_TYPE_PROP .key (), type );
164+ properties .put (BaseSchemaPostProcessorConfig .SCHEMA_POST_PROCESSOR_ADD_COLUMN_DOC_PROP .key (), "primitive column test" );
165+
166+ AddPrimitiveColumnSchemaPostProcessor processor = new AddPrimitiveColumnSchemaPostProcessor (properties , null );
167+ Schema schema = new Schema .Parser ().parse (ORIGINAL_SCHEMA );
168+ Schema targetSchema = processor .processSchema (schema );
169+
170+ Schema .Field newColumn = targetSchema .getField ("primitive_column" );
171+
172+ assertNotNull (newColumn );
173+ assertEquals ("primitive column test" , newColumn .doc ());
174+ // nullable by default, so new column is union type
175+ assertNotEquals (type , newColumn .schema ().getType ().getName ());
176+
177+ // test not nullable
178+ properties .put (BaseSchemaPostProcessorConfig .SCHEMA_POST_PROCESSOR_ADD_COLUMN_NULLABLE_PROP .key (), false );
179+ targetSchema = processor .processSchema (schema );
180+ newColumn = targetSchema .getField ("primitive_column" );
181+ assertEquals (type , newColumn .schema ().getType ().getName ());
182+
183+ }
184+
147185 @ Test
148186 public void testSparkAvroSchema () throws IOException {
149187 SparkAvroPostProcessor processor = new SparkAvroPostProcessor (properties , null );
0 commit comments