Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions datafusion/core/tests/sql/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1158,3 +1158,123 @@ async fn nested_subquery() -> Result<()> {
assert_batches_eq!(expected, &actual);
Ok(())
}

#[tokio::test]
async fn comparisons_with_null() -> Result<()> {
let ctx = SessionContext::new();
// 1. Numeric comparison with NULL
let sql = "select column1 < NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t";
let actual = execute_to_batches(&ctx, sql).await;
let expected = vec![
"+-------------------+",
"| t.column1 Lt NULL |",
"+-------------------+",
"| |",
"| |",
"+-------------------+",
];
assert_batches_eq!(expected, &actual);

let sql =
"select column1 <= NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t";
let actual = execute_to_batches(&ctx, sql).await;
let expected = vec![
"+---------------------+",
"| t.column1 LtEq NULL |",
"+---------------------+",
"| |",
"| |",
"+---------------------+",
];
assert_batches_eq!(expected, &actual);

let sql = "select column1 > NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t";
let actual = execute_to_batches(&ctx, sql).await;
let expected = vec![
"+-------------------+",
"| t.column1 Gt NULL |",
"+-------------------+",
"| |",
"| |",
"+-------------------+",
];
assert_batches_eq!(expected, &actual);

let sql =
"select column1 >= NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t";
let actual = execute_to_batches(&ctx, sql).await;
let expected = vec![
"+---------------------+",
"| t.column1 GtEq NULL |",
"+---------------------+",
"| |",
"| |",
"+---------------------+",
];
assert_batches_eq!(expected, &actual);

let sql = "select column1 = NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t";
let actual = execute_to_batches(&ctx, sql).await;
let expected = vec![
"+-------------------+",
"| t.column1 Eq NULL |",
"+-------------------+",
"| |",
"| |",
"+-------------------+",
];
assert_batches_eq!(expected, &actual);

let sql =
"select column1 != NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t";
let actual = execute_to_batches(&ctx, sql).await;
let expected = vec![
"+----------------------+",
"| t.column1 NotEq NULL |",
"+----------------------+",
"| |",
"| |",
"+----------------------+",
];
assert_batches_eq!(expected, &actual);

// 1.1 Float value comparison with NULL
let sql = "select column3 < NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t";
let actual = execute_to_batches(&ctx, sql).await;
let expected = vec![
"+-------------------+",
"| t.column3 Lt NULL |",
"+-------------------+",
"| |",
"| |",
"+-------------------+",
];
assert_batches_eq!(expected, &actual);

// String comparison with NULL
let sql = "select column2 < NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t";
let actual = execute_to_batches(&ctx, sql).await;
let expected = vec![
"+-------------------+",
"| t.column2 Lt NULL |",
"+-------------------+",
"| |",
"| |",
"+-------------------+",
];
assert_batches_eq!(expected, &actual);

// Boolean comparison with NULL
let sql = "select column1 < NULL from (VALUES (true), (false)) as t";
let actual = execute_to_batches(&ctx, sql).await;
let expected = vec![
"+-------------------+",
"| t.column1 Lt NULL |",
"+-------------------+",
"| |",
"| |",
"+-------------------+",
];
assert_batches_eq!(expected, &actual);
Ok(())
}
2 changes: 2 additions & 0 deletions datafusion/expr/src/binary_rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ fn comparison_eq_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<Da
.or_else(|| dictionary_coercion(lhs_type, rhs_type))
.or_else(|| temporal_coercion(lhs_type, rhs_type))
.or_else(|| string_coercion(lhs_type, rhs_type))
.or_else(|| null_coercion(lhs_type, rhs_type))
}

fn comparison_order_coercion(
Expand All @@ -177,6 +178,7 @@ fn comparison_order_coercion(
.or_else(|| string_coercion(lhs_type, rhs_type))
.or_else(|| dictionary_coercion(lhs_type, rhs_type))
.or_else(|| temporal_coercion(lhs_type, rhs_type))
.or_else(|| null_coercion(lhs_type, rhs_type))
}

fn comparison_binary_numeric_coercion(
Expand Down
80 changes: 45 additions & 35 deletions datafusion/physical-expr/src/expressions/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -657,17 +657,15 @@ macro_rules! compute_utf8_op_scalar {

/// Invoke a compute kernel on a data array and a scalar value
macro_rules! compute_utf8_op_dyn_scalar {
($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
($LEFT:expr, $RIGHT:expr, $OP:ident, $OP_TYPE:expr) => {{
if let Some(string_value) = $RIGHT {
Ok(Arc::new(paste::expr! {[<$OP _dyn_utf8_scalar>]}(
$LEFT,
&string_value,
)?))
} else {
Err(DataFusionError::Internal(format!(
"compute_utf8_op_scalar for '{}' failed with literal 'none' value",
stringify!($OP),
)))
// when the $RIGHT is a NULL, generate a NULL array of $OP_TYPE
Ok(Arc::new(new_null_array($OP_TYPE, $LEFT.len())))
Copy link
Copy Markdown
Member

@yjshen yjshen May 9, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. I think we should respect SortOptions here for ScalaValue.
  2. Question: how about the NULL != column case? Do we have an expression normalizer that asserts Nulls often coming as the right-hand operand for a binary comparison?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@yjshen what do you mean SortOptions? I don't think these expressions are used directly in ORDER BY clauses -- they are used instead for expressions like a < b

Adding some more tests in sql/expr.rs for NULL < column and NULL > column sounds like a good idea.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After checking Postgres and MySQL, I found I'm wrong on the expected behavior while comparing columns with NULL using < > ..., they will return NULL in all cases. I was expecting to apply null first or null last while comparing column with NULL, so always true or always false at the first time.

Copy link
Copy Markdown
Contributor Author

@WinkerDu WinkerDu May 10, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @alamb @yjshen
I agree this macro is just used in expression evaluate, maybe we can ignore SortOptions in this pr.
I'll look into this case NULL != column carefully and add more cases in ut.
Besides, I am super busy these days, I'll do it ASAP 🤟

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will spend some time writing the NULL != column

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Follow on PR: #2510 turns out there was a bug!

}
}};
}
Expand All @@ -691,7 +689,7 @@ macro_rules! compute_bool_op_scalar {

/// Invoke a compute kernel on a boolean data array and a scalar value
macro_rules! compute_bool_op_dyn_scalar {
($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
($LEFT:expr, $RIGHT:expr, $OP:ident, $OP_TYPE:expr) => {{
// generate the scalar function name, such as lt_dyn_bool_scalar, from the $OP parameter
// (which could have a value of lt) and the suffix _scalar
if let Some(b) = $RIGHT {
Expand All @@ -700,10 +698,8 @@ macro_rules! compute_bool_op_dyn_scalar {
b,
)?))
} else {
Err(DataFusionError::Internal(format!(
"compute_utf8_op_scalar for '{}' failed with literal 'none' value",
stringify!($OP),
)))
// when the $RIGHT is a NULL, generate a NULL array of $OP_TYPE
Ok(Arc::new(new_null_array($OP_TYPE, $LEFT.len())))
}
}};
}
Expand Down Expand Up @@ -751,8 +747,9 @@ macro_rules! compute_op_scalar {

/// Invoke a dyn compute kernel on a data array and a scalar value
/// LEFT is Primitive or Dictionart array of numeric values, RIGHT is scalar value
/// OP_TYPE is the return type of scalar function
macro_rules! compute_op_dyn_scalar {
($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
($LEFT:expr, $RIGHT:expr, $OP:ident, $OP_TYPE:expr) => {{
// generate the scalar function name, such as lt_dyn_scalar, from the $OP parameter
// (which could have a value of lt_dyn) and the suffix _scalar
if let Some(value) = $RIGHT {
Expand All @@ -761,10 +758,8 @@ macro_rules! compute_op_dyn_scalar {
value,
)?))
} else {
Err(DataFusionError::Internal(format!(
"compute_utf8_op_scalar for '{}' failed with literal 'none' value",
stringify!($OP),
)))
// when the $RIGHT is a NULL, generate a NULL array of $OP_TYPE
Ok(Arc::new(new_null_array($OP_TYPE, $LEFT.len())))
}
}};
}
Expand Down Expand Up @@ -1125,22 +1120,22 @@ impl PhysicalExpr for BinaryExpr {
/// such as Utf8 strings.
#[macro_export]
macro_rules! binary_array_op_dyn_scalar {
($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
($LEFT:expr, $RIGHT:expr, $OP:ident, $OP_TYPE:expr) => {{
let result: Result<Arc<dyn Array>> = match $RIGHT {
ScalarValue::Boolean(b) => compute_bool_op_dyn_scalar!($LEFT, b, $OP),
ScalarValue::Boolean(b) => compute_bool_op_dyn_scalar!($LEFT, b, $OP, $OP_TYPE),
ScalarValue::Decimal128(..) => compute_decimal_op_scalar!($LEFT, $RIGHT, $OP, DecimalArray),
ScalarValue::Utf8(v) => compute_utf8_op_dyn_scalar!($LEFT, v, $OP),
ScalarValue::LargeUtf8(v) => compute_utf8_op_dyn_scalar!($LEFT, v, $OP),
ScalarValue::Int8(v) => compute_op_dyn_scalar!($LEFT, v, $OP),
ScalarValue::Int16(v) => compute_op_dyn_scalar!($LEFT, v, $OP),
ScalarValue::Int32(v) => compute_op_dyn_scalar!($LEFT, v, $OP),
ScalarValue::Int64(v) => compute_op_dyn_scalar!($LEFT, v, $OP),
ScalarValue::UInt8(v) => compute_op_dyn_scalar!($LEFT, v, $OP),
ScalarValue::UInt16(v) => compute_op_dyn_scalar!($LEFT, v, $OP),
ScalarValue::UInt32(v) => compute_op_dyn_scalar!($LEFT, v, $OP),
ScalarValue::UInt64(v) => compute_op_dyn_scalar!($LEFT, v, $OP),
ScalarValue::Float32(_) => compute_op_scalar!($LEFT, $RIGHT, $OP, Float32Array),
ScalarValue::Float64(_) => compute_op_scalar!($LEFT, $RIGHT, $OP, Float64Array),
ScalarValue::Utf8(v) => compute_utf8_op_dyn_scalar!($LEFT, v, $OP, $OP_TYPE),
ScalarValue::LargeUtf8(v) => compute_utf8_op_dyn_scalar!($LEFT, v, $OP, $OP_TYPE),
ScalarValue::Int8(v) => compute_op_dyn_scalar!($LEFT, v, $OP, $OP_TYPE),
ScalarValue::Int16(v) => compute_op_dyn_scalar!($LEFT, v, $OP, $OP_TYPE),
ScalarValue::Int32(v) => compute_op_dyn_scalar!($LEFT, v, $OP, $OP_TYPE),
ScalarValue::Int64(v) => compute_op_dyn_scalar!($LEFT, v, $OP, $OP_TYPE),
ScalarValue::UInt8(v) => compute_op_dyn_scalar!($LEFT, v, $OP, $OP_TYPE),
ScalarValue::UInt16(v) => compute_op_dyn_scalar!($LEFT, v, $OP, $OP_TYPE),
ScalarValue::UInt32(v) => compute_op_dyn_scalar!($LEFT, v, $OP, $OP_TYPE),
ScalarValue::UInt64(v) => compute_op_dyn_scalar!($LEFT, v, $OP, $OP_TYPE),
ScalarValue::Float32(v) => compute_op_dyn_scalar!($LEFT, v, $OP, $OP_TYPE),
ScalarValue::Float64(v) => compute_op_dyn_scalar!($LEFT, v, $OP, $OP_TYPE),
ScalarValue::Date32(_) => compute_op_scalar!($LEFT, $RIGHT, $OP, Date32Array),
ScalarValue::Date64(_) => compute_op_scalar!($LEFT, $RIGHT, $OP, Date64Array),
ScalarValue::TimestampSecond(..) => compute_op_scalar!($LEFT, $RIGHT, $OP, TimestampSecondArray),
Expand All @@ -1163,22 +1158,37 @@ impl BinaryExpr {
) -> Result<Option<Result<ArrayRef>>> {
let scalar_result = match &self.op {
Operator::Lt => {
binary_array_op_dyn_scalar!(array, scalar.clone(), lt)
binary_array_op_dyn_scalar!(array, scalar.clone(), lt, &DataType::Boolean)
}
Operator::LtEq => {
binary_array_op_dyn_scalar!(array, scalar.clone(), lt_eq)
binary_array_op_dyn_scalar!(
array,
scalar.clone(),
lt_eq,
&DataType::Boolean
)
}
Operator::Gt => {
binary_array_op_dyn_scalar!(array, scalar.clone(), gt)
binary_array_op_dyn_scalar!(array, scalar.clone(), gt, &DataType::Boolean)
}
Operator::GtEq => {
binary_array_op_dyn_scalar!(array, scalar.clone(), gt_eq)
binary_array_op_dyn_scalar!(
array,
scalar.clone(),
gt_eq,
&DataType::Boolean
)
}
Operator::Eq => {
binary_array_op_dyn_scalar!(array, scalar.clone(), eq)
binary_array_op_dyn_scalar!(array, scalar.clone(), eq, &DataType::Boolean)
}
Operator::NotEq => {
binary_array_op_dyn_scalar!(array, scalar.clone(), neq)
binary_array_op_dyn_scalar!(
array,
scalar.clone(),
neq,
&DataType::Boolean
)
}
Operator::Like => {
binary_string_array_op_scalar!(array, scalar.clone(), like)
Expand Down