-
Notifications
You must be signed in to change notification settings - Fork 2k
refactor: separate get_result_type from coerce_type
#6221
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,7 +42,7 @@ pub fn binary_operator_data_type( | |
| let result_type = if !any_decimal(lhs_type, rhs_type) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I als find
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great suggestion, I will do in following PR |
||
| // validate that it is possible to perform the operation on incoming types. | ||
| // (or the return datatype cannot be inferred) | ||
| coerce_types(lhs_type, op, rhs_type)? | ||
| get_result_type(lhs_type, op, rhs_type)? | ||
| } else { | ||
| let (coerced_lhs_type, coerced_rhs_type) = | ||
| math_decimal_coercion(lhs_type, rhs_type); | ||
|
|
@@ -106,19 +106,60 @@ pub fn binary_operator_data_type( | |
| } | ||
| } | ||
|
|
||
| /// Coercion rules for all binary operators. Returns the output type | ||
| /// of applying `op` to an argument of `lhs_type` and `rhs_type`. | ||
| /// returns the resulting type of a binary expression evaluating the `op` with the left and right hand types | ||
| pub fn get_result_type( | ||
jackwener marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| lhs_type: &DataType, | ||
| op: &Operator, | ||
| rhs_type: &DataType, | ||
| ) -> Result<DataType> { | ||
| let result = match op { | ||
| Operator::And | ||
| | Operator::Or | ||
| | Operator::Eq | ||
| | Operator::NotEq | ||
| | Operator::Lt | ||
| | Operator::Gt | ||
| | Operator::GtEq | ||
| | Operator::LtEq | ||
| | Operator::IsDistinctFrom | ||
|
Comment on lines
+119
to
+124
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Original, when we call |
||
| | Operator::IsNotDistinctFrom => Some(DataType::Boolean), | ||
| Operator::Plus | Operator::Minus | ||
| if is_datetime(lhs_type) | ||
| || is_datetime(rhs_type) | ||
| || is_interval(lhs_type) | ||
| || is_interval(rhs_type) => | ||
| { | ||
| temporal_add_sub_coercion(lhs_type, rhs_type, op) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Eventually I think it would make the code easier to understand and maintain if we can disentangle Eventually I think it would keep the logic much simpler if |
||
| } | ||
| Operator::BitwiseAnd | ||
| | Operator::BitwiseOr | ||
| | Operator::BitwiseXor | ||
| | Operator::BitwiseShiftRight | ||
| | Operator::BitwiseShiftLeft | ||
| | Operator::Plus | ||
| | Operator::Minus | ||
| | Operator::Modulo | ||
| | Operator::Divide | ||
| | Operator::Multiply | ||
| | Operator::RegexMatch | ||
| | Operator::RegexIMatch | ||
| | Operator::RegexNotMatch | ||
| | Operator::RegexNotIMatch | ||
| | Operator::StringConcat => coerce_types(lhs_type, op, rhs_type).ok(), | ||
| }; | ||
|
|
||
| match result { | ||
| None => Err(DataFusionError::Plan(format!( | ||
| "Unsupported argument types. Can not evaluate {lhs_type:?} {op} {rhs_type:?}" | ||
| ))), | ||
| Some(t) => Ok(t), | ||
| } | ||
| } | ||
|
|
||
| /// Coercion rules for all binary operators. Returns the 'coerce_types' | ||
| /// is returns the type the arguments should be coerced to | ||
| /// | ||
| /// Returns None if no suitable type can be found. | ||
| /// | ||
| /// TODO this function is trying to serve two purposes at once; it | ||
| /// determines the result type of the binary operation and also | ||
| /// determines how the inputs can be coerced but this results in | ||
| /// inconsistencies in some cases (particular around date + interval) | ||
| /// when the input argument types do not match the output argument | ||
| /// types | ||
| /// | ||
| /// Tracking issue is <https://github.com/apache/arrow-datafusion/issues/3419> | ||
| pub fn coerce_types( | ||
| lhs_type: &DataType, | ||
| op: &Operator, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,7 +23,7 @@ use arrow::datatypes::{DataType, Schema}; | |
| use arrow::record_batch::RecordBatch; | ||
|
|
||
| use datafusion_common::{DataFusionError, Result}; | ||
| use datafusion_expr::type_coercion::binary::coerce_types; | ||
| use datafusion_expr::type_coercion::binary::get_result_type; | ||
| use datafusion_expr::{ColumnarValue, Operator}; | ||
| use std::any::Any; | ||
| use std::fmt::{Display, Formatter}; | ||
|
|
@@ -77,7 +77,7 @@ impl PhysicalExpr for DateTimeIntervalExpr { | |
| } | ||
|
|
||
| fn data_type(&self, input_schema: &Schema) -> Result<DataType> { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would also love to remove |
||
| coerce_types( | ||
| get_result_type( | ||
| &self.lhs.data_type(input_schema)?, | ||
| &Operator::Minus, | ||
| &self.rhs.data_type(input_schema)?, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Type checking is done in the
Type coercion analyzer rulerather than directly in the build