Skip to content

Commit 3cfe485

Browse files
authored
[HUDI-4825] Remove redundant fields in serialized commit metadata in JSON (#6646)
1 parent dc5ec0c commit 3cfe485

3 files changed

Lines changed: 82 additions & 0 deletions

File tree

hudi-common/src/main/java/org/apache/hudi/common/util/JsonUtils.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,13 @@ public class JsonUtils {
2929
private static final ObjectMapper MAPPER = new ObjectMapper();
3030
static {
3131
MAPPER.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);
32+
// We need to exclude custom getters, setters and creators which can use member fields
33+
// to derive new fields, so that they are not included in the serialization
3234
MAPPER.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY);
35+
MAPPER.setVisibility(PropertyAccessor.GETTER, JsonAutoDetect.Visibility.NONE);
36+
MAPPER.setVisibility(PropertyAccessor.IS_GETTER, JsonAutoDetect.Visibility.NONE);
37+
MAPPER.setVisibility(PropertyAccessor.SETTER, JsonAutoDetect.Visibility.NONE);
38+
MAPPER.setVisibility(PropertyAccessor.CREATOR, JsonAutoDetect.Visibility.NONE);
3339
}
3440

3541
public static ObjectMapper getObjectMapper() {

hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieCommitMetadata.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,16 @@
1919
package org.apache.hudi.common.model;
2020

2121
import org.apache.hudi.common.testutils.HoodieTestUtils;
22+
import org.apache.hudi.common.util.CollectionUtils;
2223
import org.apache.hudi.common.util.FileIOUtils;
24+
import org.apache.hudi.common.util.JsonUtils;
2325

2426
import org.junit.jupiter.api.Test;
2527

28+
import java.io.IOException;
29+
import java.util.Arrays;
2630
import java.util.List;
31+
import java.util.stream.Collectors;
2732

2833
import static org.junit.jupiter.api.Assertions.assertEquals;
2934
import static org.junit.jupiter.api.Assertions.assertSame;
@@ -34,6 +39,30 @@
3439
*/
3540
public class TestHoodieCommitMetadata {
3641

42+
private static final List<String> EXPECTED_FIELD_NAMES = Arrays.asList(
43+
"partitionToWriteStats", "compacted", "extraMetadata", "operationType");
44+
45+
public static void verifyMetadataFieldNames(
46+
HoodieCommitMetadata commitMetadata, List<String> expectedFieldNameList)
47+
throws IOException {
48+
String serializedCommitMetadata = commitMetadata.toJsonString();
49+
List<String> actualFieldNameList = CollectionUtils.toStream(
50+
JsonUtils.getObjectMapper().readTree(serializedCommitMetadata).fieldNames())
51+
.collect(Collectors.toList());
52+
assertEquals(
53+
expectedFieldNameList.stream().sorted().collect(Collectors.toList()),
54+
actualFieldNameList.stream().sorted().collect(Collectors.toList())
55+
);
56+
}
57+
58+
@Test
59+
public void verifyFieldNamesInCommitMetadata() throws IOException {
60+
List<HoodieWriteStat> fakeHoodieWriteStats = HoodieTestUtils.generateFakeHoodieWriteStat(10);
61+
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
62+
fakeHoodieWriteStats.forEach(stat -> commitMetadata.addWriteStat(stat.getPartitionPath(), stat));
63+
verifyMetadataFieldNames(commitMetadata, EXPECTED_FIELD_NAMES);
64+
}
65+
3766
@Test
3867
public void testPerfStatPresenceInHoodieMetadata() throws Exception {
3968

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.hudi.common.model;
21+
22+
import org.apache.hudi.common.testutils.HoodieTestUtils;
23+
24+
import org.junit.jupiter.api.Test;
25+
26+
import java.io.IOException;
27+
import java.util.Arrays;
28+
import java.util.List;
29+
30+
import static org.apache.hudi.common.model.TestHoodieCommitMetadata.verifyMetadataFieldNames;
31+
32+
public class TestHoodieReplaceCommitMetadata {
33+
34+
private static final List<String> EXPECTED_FIELD_NAMES = Arrays.asList(
35+
"partitionToWriteStats", "partitionToReplaceFileIds", "compacted", "extraMetadata", "operationType");
36+
37+
@Test
38+
public void verifyFieldNamesInReplaceCommitMetadata() throws IOException {
39+
List<HoodieWriteStat> fakeHoodieWriteStats = HoodieTestUtils.generateFakeHoodieWriteStat(10);
40+
HoodieReplaceCommitMetadata commitMetadata = new HoodieReplaceCommitMetadata();
41+
fakeHoodieWriteStats.forEach(stat -> {
42+
commitMetadata.addWriteStat(stat.getPartitionPath(), stat);
43+
commitMetadata.addReplaceFileId(stat.getPartitionPath(), stat.getFileId());
44+
});
45+
verifyMetadataFieldNames(commitMetadata, EXPECTED_FIELD_NAMES);
46+
}
47+
}

0 commit comments

Comments
 (0)