Skip to content

Commit 8953c16

Browse files
Blizzarajonahgao
authored andcommitted
More properly handle nullability of types/literals in Substrait (apache#10640)
* More properly handle nullability of types/literals in Substrait This isn't perfect; some things are still assumed to just always be nullable (e.g. Literal list elements). But it's still giving a closer match than just assuming everything is nullable. * Avoid cloning and creating DataFusionError Co-authored-by: Jonah Gao <jonahgao@msn.com> * simplify Literal/ScalarValue null handling --------- Co-authored-by: Jonah Gao <jonahgao@msn.com>
1 parent e8f2c97 commit 8953c16

2 files changed

Lines changed: 122 additions & 220 deletions

File tree

datafusion/substrait/src/logical_plan/consumer.rs

Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,11 +1136,13 @@ pub(crate) fn from_substrait_type(dt: &substrait::proto::Type) -> Result<DataTyp
11361136
),
11371137
},
11381138
r#type::Kind::List(list) => {
1139-
let inner_type =
1140-
from_substrait_type(list.r#type.as_ref().ok_or_else(|| {
1141-
substrait_datafusion_err!("List type must have inner type")
1142-
})?)?;
1143-
let field = Arc::new(Field::new_list_field(inner_type, true));
1139+
let inner_type = list.r#type.as_ref().ok_or_else(|| {
1140+
substrait_datafusion_err!("List type must have inner type")
1141+
})?;
1142+
let field = Arc::new(Field::new_list_field(
1143+
from_substrait_type(inner_type)?,
1144+
is_substrait_type_nullable(inner_type)?,
1145+
));
11441146
match list.type_variation_reference {
11451147
DEFAULT_CONTAINER_TYPE_REF => Ok(DataType::List(field)),
11461148
LARGE_CONTAINER_TYPE_REF => Ok(DataType::LargeList(field)),
@@ -1163,8 +1165,11 @@ pub(crate) fn from_substrait_type(dt: &substrait::proto::Type) -> Result<DataTyp
11631165
r#type::Kind::Struct(s) => {
11641166
let mut fields = vec![];
11651167
for (i, f) in s.types.iter().enumerate() {
1166-
let field =
1167-
Field::new(&format!("c{i}"), from_substrait_type(f)?, true);
1168+
let field = Field::new(
1169+
&format!("c{i}"),
1170+
from_substrait_type(f)?,
1171+
is_substrait_type_nullable(f)?,
1172+
);
11681173
fields.push(field);
11691174
}
11701175
Ok(DataType::Struct(fields.into()))
@@ -1175,6 +1180,47 @@ pub(crate) fn from_substrait_type(dt: &substrait::proto::Type) -> Result<DataTyp
11751180
}
11761181
}
11771182

1183+
fn is_substrait_type_nullable(dtype: &Type) -> Result<bool> {
1184+
fn is_nullable(nullability: i32) -> bool {
1185+
nullability != substrait::proto::r#type::Nullability::Required as i32
1186+
}
1187+
1188+
let nullable = match dtype
1189+
.kind
1190+
.as_ref()
1191+
.ok_or_else(|| substrait_datafusion_err!("Type must contain Kind"))?
1192+
{
1193+
r#type::Kind::Bool(val) => is_nullable(val.nullability),
1194+
r#type::Kind::I8(val) => is_nullable(val.nullability),
1195+
r#type::Kind::I16(val) => is_nullable(val.nullability),
1196+
r#type::Kind::I32(val) => is_nullable(val.nullability),
1197+
r#type::Kind::I64(val) => is_nullable(val.nullability),
1198+
r#type::Kind::Fp32(val) => is_nullable(val.nullability),
1199+
r#type::Kind::Fp64(val) => is_nullable(val.nullability),
1200+
r#type::Kind::String(val) => is_nullable(val.nullability),
1201+
r#type::Kind::Binary(val) => is_nullable(val.nullability),
1202+
r#type::Kind::Timestamp(val) => is_nullable(val.nullability),
1203+
r#type::Kind::Date(val) => is_nullable(val.nullability),
1204+
r#type::Kind::Time(val) => is_nullable(val.nullability),
1205+
r#type::Kind::IntervalYear(val) => is_nullable(val.nullability),
1206+
r#type::Kind::IntervalDay(val) => is_nullable(val.nullability),
1207+
r#type::Kind::TimestampTz(val) => is_nullable(val.nullability),
1208+
r#type::Kind::Uuid(val) => is_nullable(val.nullability),
1209+
r#type::Kind::FixedChar(val) => is_nullable(val.nullability),
1210+
r#type::Kind::Varchar(val) => is_nullable(val.nullability),
1211+
r#type::Kind::FixedBinary(val) => is_nullable(val.nullability),
1212+
r#type::Kind::Decimal(val) => is_nullable(val.nullability),
1213+
r#type::Kind::PrecisionTimestamp(val) => is_nullable(val.nullability),
1214+
r#type::Kind::PrecisionTimestampTz(val) => is_nullable(val.nullability),
1215+
r#type::Kind::Struct(val) => is_nullable(val.nullability),
1216+
r#type::Kind::List(val) => is_nullable(val.nullability),
1217+
r#type::Kind::Map(val) => is_nullable(val.nullability),
1218+
r#type::Kind::UserDefined(val) => is_nullable(val.nullability),
1219+
r#type::Kind::UserDefinedTypeReference(_) => true, // not implemented, assume nullable
1220+
};
1221+
Ok(nullable)
1222+
}
1223+
11781224
fn from_substrait_bound(
11791225
bound: &Option<Bound>,
11801226
is_lower: bool,

0 commit comments

Comments
 (0)