Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions datafusion/core/src/physical_plan/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2093,10 +2093,7 @@ mod tests {
];
for case in cases {
let logical_plan = test_csv_scan().await?.project(vec![case.clone()]);
let message = format!(
"Expression {case:?} expected to error due to impossible coercion"
);
assert!(logical_plan.is_err(), "{}", message);
Comment on lines -2096 to -2099
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Type checking is done in the Type coercion analyzer rule rather than directly in the build

assert!(logical_plan.is_ok());
}
Ok(())
}
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/sqllogictests/test_files/dates.slt
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ select i_item_desc from test
where d3_date > now() + '5 days';

# DATE minus DATE
query error DataFusion error: Error during planning: Date32 \- Date32 can't be evaluated because there isn't a common type to coerce the types to
query error DataFusion error: Error during planning: Unsupported argument types\. Can not evaluate Date32 \- Date32
SELECT DATE '2023-04-09' - DATE '2023-04-02';

# DATE minus Timestamp
Expand Down
5 changes: 2 additions & 3 deletions datafusion/core/tests/sqllogictests/test_files/timestamps.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1014,7 +1014,7 @@ SELECT ts1 + i FROM foo;
2003-07-12T01:31:15.000123463

# Timestamp + Timestamp => error
statement error DataFusion error: Error during planning: Timestamp\(Nanosecond, None\) \+ Timestamp\(Nanosecond, None\) can't be evaluated because there isn't a common type to coerce the types to
statement error DataFusion error: Error during planning: Unsupported argument types\. Can not evaluate Timestamp\(Nanosecond, None\) \+ Timestamp\(Nanosecond, None\)
SELECT ts1 + ts2
FROM foo;

Expand All @@ -1031,8 +1031,7 @@ SELECT '2000-01-01T00:00:00'::timestamp - '2010-01-01T00:00:00'::timestamp;
0 years 0 mons -3653 days 0 hours 0 mins 0.000000000 secs

# Interval - Timestamp => error
statement error DataFusion error: Error during planning: interval can't subtract timestamp/date

statement error DataFusion error: type_coercion\ncaused by\nError during planning: interval can't subtract timestamp/date
SELECT i - ts1 from FOO;

statement ok
Expand Down
65 changes: 53 additions & 12 deletions datafusion/expr/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ pub fn binary_operator_data_type(
let result_type = if !any_decimal(lhs_type, rhs_type) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I als find binary_oprator_data_type to be confusing and redundant with get_result_type but we can fix that in a follow on PR perhaps

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great suggestion, I will do in following PR

// validate that it is possible to perform the operation on incoming types.
// (or the return datatype cannot be inferred)
coerce_types(lhs_type, op, rhs_type)?
get_result_type(lhs_type, op, rhs_type)?
} else {
let (coerced_lhs_type, coerced_rhs_type) =
math_decimal_coercion(lhs_type, rhs_type);
Expand Down Expand Up @@ -106,19 +106,60 @@ pub fn binary_operator_data_type(
}
}

/// Coercion rules for all binary operators. Returns the output type
/// of applying `op` to an argument of `lhs_type` and `rhs_type`.
/// returns the resulting type of a binary expression evaluating the `op` with the left and right hand types
pub fn get_result_type(
lhs_type: &DataType,
op: &Operator,
rhs_type: &DataType,
) -> Result<DataType> {
let result = match op {
Operator::And
| Operator::Or
| Operator::Eq
| Operator::NotEq
| Operator::Lt
| Operator::Gt
| Operator::GtEq
| Operator::LtEq
| Operator::IsDistinctFrom
Comment on lines +119 to +124
Copy link
Copy Markdown
Member Author

@jackwener jackwener May 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Original, when we call get_data_type, it will return common type instead of result type (Boolean) in coerce_type. So original it's wrong.

| Operator::IsNotDistinctFrom => Some(DataType::Boolean),
Operator::Plus | Operator::Minus
if is_datetime(lhs_type)
|| is_datetime(rhs_type)
|| is_interval(lhs_type)
|| is_interval(rhs_type) =>
{
temporal_add_sub_coercion(lhs_type, rhs_type, op)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eventually I think it would make the code easier to understand and maintain if we can disentangle coerce_types from get_result_types. To start in this PR, calling into coerce_types is fine.

Eventually I think it would keep the logic much simpler if get_result_type is only called on expressions that have their inputs properly coerced.

}
Operator::BitwiseAnd
| Operator::BitwiseOr
| Operator::BitwiseXor
| Operator::BitwiseShiftRight
| Operator::BitwiseShiftLeft
| Operator::Plus
| Operator::Minus
| Operator::Modulo
| Operator::Divide
| Operator::Multiply
| Operator::RegexMatch
| Operator::RegexIMatch
| Operator::RegexNotMatch
| Operator::RegexNotIMatch
| Operator::StringConcat => coerce_types(lhs_type, op, rhs_type).ok(),
};

match result {
None => Err(DataFusionError::Plan(format!(
"Unsupported argument types. Can not evaluate {lhs_type:?} {op} {rhs_type:?}"
))),
Some(t) => Ok(t),
}
}

/// Coercion rules for all binary operators. Returns the 'coerce_types'
/// is returns the type the arguments should be coerced to
///
/// Returns None if no suitable type can be found.
///
/// TODO this function is trying to serve two purposes at once; it
/// determines the result type of the binary operation and also
/// determines how the inputs can be coerced but this results in
/// inconsistencies in some cases (particular around date + interval)
/// when the input argument types do not match the output argument
/// types
///
/// Tracking issue is <https://github.com/apache/arrow-datafusion/issues/3419>
pub fn coerce_types(
lhs_type: &DataType,
op: &Operator,
Expand Down
4 changes: 2 additions & 2 deletions datafusion/physical-expr/src/expressions/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use arrow::datatypes::{DataType, Schema};
use arrow::record_batch::RecordBatch;

use datafusion_common::{DataFusionError, Result};
use datafusion_expr::type_coercion::binary::coerce_types;
use datafusion_expr::type_coercion::binary::get_result_type;
use datafusion_expr::{ColumnarValue, Operator};
use std::any::Any;
use std::fmt::{Display, Formatter};
Expand Down Expand Up @@ -77,7 +77,7 @@ impl PhysicalExpr for DateTimeIntervalExpr {
}

fn data_type(&self, input_schema: &Schema) -> Result<DataType> {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would also love to remove DateTimeIntervalExpr out of its own thing and into binary.rs with the other operators, but for now 👍

coerce_types(
get_result_type(
&self.lhs.data_type(input_schema)?,
&Operator::Minus,
&self.rhs.data_type(input_schema)?,
Expand Down
4 changes: 2 additions & 2 deletions datafusion/physical-expr/src/intervals/cp_solver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use std::sync::Arc;

use arrow_schema::DataType;
use datafusion_common::{Result, ScalarValue};
use datafusion_expr::type_coercion::binary::coerce_types;
use datafusion_expr::type_coercion::binary::get_result_type;
use datafusion_expr::Operator;
use petgraph::graph::NodeIndex;
use petgraph::stable_graph::{DefaultIx, StableGraph};
Expand Down Expand Up @@ -260,7 +260,7 @@ fn comparison_operator_target(
op: &Operator,
right_datatype: &DataType,
) -> Result<Interval> {
let datatype = coerce_types(left_datatype, &Operator::Minus, right_datatype)?;
let datatype = get_result_type(left_datatype, &Operator::Minus, right_datatype)?;
let unbounded = IntervalBound::make_unbounded(&datatype)?;
let zero = ScalarValue::new_zero(&datatype)?;
Ok(match *op {
Expand Down
6 changes: 3 additions & 3 deletions datafusion/physical-expr/src/intervals/interval_aritmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use std::fmt::{Display, Formatter};
use arrow::compute::{cast_with_options, CastOptions};
use arrow::datatypes::DataType;
use datafusion_common::{DataFusionError, Result, ScalarValue};
use datafusion_expr::type_coercion::binary::coerce_types;
use datafusion_expr::type_coercion::binary::get_result_type;
use datafusion_expr::Operator;

use crate::aggregate::min_max::{max, min};
Expand Down Expand Up @@ -82,7 +82,7 @@ impl IntervalBound {
) -> Result<IntervalBound> {
let rhs = other.borrow();
if self.is_unbounded() || rhs.is_unbounded() {
return IntervalBound::make_unbounded(coerce_types(
return IntervalBound::make_unbounded(get_result_type(
&self.get_datatype(),
&Operator::Plus,
&rhs.get_datatype(),
Expand All @@ -109,7 +109,7 @@ impl IntervalBound {
) -> Result<IntervalBound> {
let rhs = other.borrow();
if self.is_unbounded() || rhs.is_unbounded() {
return IntervalBound::make_unbounded(coerce_types(
return IntervalBound::make_unbounded(get_result_type(
&self.get_datatype(),
&Operator::Minus,
&rhs.get_datatype(),
Expand Down