Skip to content

Commit

Permalink
Fix NPE excpetion when reading dataset with nested array
Browse files Browse the repository at this point in the history
  • Loading branch information
770120041 committed Nov 29, 2023
1 parent 563e7d9 commit 8bbf1d1
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 1 deletion.
33 changes: 32 additions & 1 deletion core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

package org.apache.iceberg.avro;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Deque;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -103,7 +105,8 @@ public Schema struct(Types.StructType struct, List<Schema> fieldSchemas) {
String fieldName = isValidFieldName ? origFieldName : AvroSchemaUtil.sanitize(origFieldName);
Object defaultValue = structField.hasDefaultValue() ? structField.getDefaultValue() :
(structField.isOptional() ? JsonProperties.NULL_VALUE : null);
Schema.Field field = new Schema.Field(fieldName, fieldSchemas.get(i), structField.doc(), defaultValue);
Schema.Field field = new Schema.Field(fieldName, fieldSchemas.get(i), structField.doc(),
convertToJsonNull(defaultValue));
if (!isValidFieldName) {
field.addProp(AvroSchemaUtil.ICEBERG_FIELD_NAME_PROP, origFieldName);
}
Expand Down Expand Up @@ -232,4 +235,32 @@ public Schema primitive(Type.PrimitiveType primitive) {

return primitiveSchema;
}

// This function ensures that all nested null are converted to JsonProperties.NULL_VALUE
// to make sure JacksonUtils.toJsonNode() converts them properly.
private Object convertToJsonNull(Object defaultValue) {
if (defaultValue instanceof Map) {
for (Map.Entry<Object, Object> entry : ((Map<Object, Object>) defaultValue).entrySet()) {
if (entry.getValue() instanceof Map || entry.getValue() instanceof Collection) {
entry.setValue(convertToJsonNull(entry.getValue()));
} else {
entry.setValue(JsonProperties.NULL_VALUE);
}
}
return defaultValue;
} else if (defaultValue instanceof List) {
List<Object> originalList = (List<Object>) defaultValue;
List<Object> copiedList = new ArrayList<>();

for (Object element : originalList) {
if (element instanceof Map || element instanceof Collection) {
copiedList.add(convertToJsonNull(element));
} else {
copiedList.add(JsonProperties.NULL_VALUE);
}
}
return copiedList;
}
return defaultValue;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -516,4 +516,72 @@ public void testVariousTypesDefaultValues() {
Assert.assertTrue(IntStream.range(0, roundTripiSchema.columns().size())
.allMatch(i -> roundTripiSchema.columns().get(i).equals(iSchema.columns().get(i))));
}

@Test
public void testConversionOfRecordWithNestedSubElement() {
String schemaString = "{\n" +
" \"type\": \"record\",\n" +
" \"name\": \"Root\",\n" +
" \"fields\": [\n" +
" {\n" +
" \"name\": \"OuterRecord1\",\n" +
" \"type\": [\n" +
" \"null\",\n" +
" {\n" +
" \"type\": \"record\",\n" +
" \"name\": \"InnerElement\",\n" +
" \"fields\": [\n" +
" {\n" +
" \"name\": \"InnerField\",\n" +
" \"type\": {\n" +
" \"type\": \"record\",\n" +
" \"name\": \"InnerField1\",\n" +
" \"fields\": [\n" +
" {\n" +
" \"name\": \"InnerField1Param\",\n" +
" \"type\": [\n" +
" \"null\",\n" +
" \"string\"\n" +
" ],\n" +
" \"default\": null\n" +
" }\n" +
" ]\n" +
" },\n" +
" \"default\": {\n" +
" \"InnerField1Param\": null\n" +
" }\n" +
" }\n" +
" ]\n" +
" }\n" +
" ],\n" +
" \"default\": null\n" +
" },\n" +
" {\n" +
" \"name\": \"InnerElementV2\",\n" +
" \"type\": [\n" +
" \"null\",\n" +
" {\n" +
" \"type\": \"record\",\n" +
" \"name\": \"InnerElementV2\",\n" +
" \"fields\": [\n" +
" {\n" +
" \"name\": \"InnerField2\",\n" +
" \"type\": {\n" +
" \"type\": \"array\",\n" +
" \"items\": \"InnerElement\"\n" +
" },\n" +
" \"default\": []\n" +
" }\n" +
" ]\n" +
" }\n" +
" ],\n" +
" \"default\": null\n" +
" }\n" +
" ]\n" +
"}";
Schema schema = new Schema.Parser().parse(schemaString);
org.apache.iceberg.Schema iSchema = AvroSchemaUtil.toIceberg(schema);
String jSchema = SchemaParser.toJson(iSchema);
org.apache.iceberg.Schema roundTripiSchema = SchemaParser.fromJson(jSchema);
}
}

0 comments on commit 8bbf1d1

Please sign in to comment.