-
Notifications
You must be signed in to change notification settings - Fork 2k
Date to Timestamp cast #5140
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Date to Timestamp cast #5140
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1683,7 +1683,6 @@ async fn test_current_time() -> Result<()> { | |
| #[tokio::test] | ||
| async fn test_ts_dt_binary_ops() -> Result<()> { | ||
| let ctx = SessionContext::new(); | ||
|
|
||
| // test cast in where clause | ||
| let sql = | ||
| "select count(1) result from (select now() as n) a where n = '2000-01-01'::date"; | ||
|
|
@@ -1742,5 +1741,55 @@ async fn test_ts_dt_binary_ops() -> Result<()> { | |
|
|
||
| assert_batches_eq!(expected, &results); | ||
|
|
||
| //test cast path timestamp date using literals | ||
| let sql = "select '2000-01-01'::timestamp >= '2000-01-01'::date"; | ||
| let df = ctx.sql(sql).await.unwrap(); | ||
|
|
||
| let plan = df.explain(true, false)?.collect().await?; | ||
| let batch = &plan[0]; | ||
| let mut res: Option<String> = None; | ||
| for row in 0..batch.num_rows() { | ||
| if &array_value_to_string(batch.column(0), row)? | ||
| == "logical_plan after type_coercion" | ||
| { | ||
| res = Some(array_value_to_string(batch.column(1), row)?); | ||
| break; | ||
| } | ||
| } | ||
| assert_eq!(res, Some("Projection: CAST(Utf8(\"2000-01-01\") AS Timestamp(Nanosecond, None)) >= CAST(CAST(Utf8(\"2000-01-01\") AS Date32) AS Timestamp(Nanosecond, None))\n EmptyRelation".to_string())); | ||
|
|
||
| //test cast path timestamp date using function | ||
| let sql = "select now() >= '2000-01-01'::date"; | ||
| let df = ctx.sql(sql).await.unwrap(); | ||
|
|
||
| let plan = df.explain(true, false)?.collect().await?; | ||
| let batch = &plan[0]; | ||
| let mut res: Option<String> = None; | ||
| for row in 0..batch.num_rows() { | ||
| if &array_value_to_string(batch.column(0), row)? | ||
| == "logical_plan after type_coercion" | ||
| { | ||
| res = Some(array_value_to_string(batch.column(1), row)?); | ||
| break; | ||
| } | ||
| } | ||
| assert_eq!(res, Some("Projection: CAST(now() AS Timestamp(Nanosecond, None)) >= CAST(CAST(Utf8(\"2000-01-01\") AS Date32) AS Timestamp(Nanosecond, None))\n EmptyRelation".to_string())); | ||
|
|
||
| let sql = "select now() = current_date()"; | ||
| let df = ctx.sql(sql).await.unwrap(); | ||
|
|
||
| let plan = df.explain(true, false)?.collect().await?; | ||
| let batch = &plan[0]; | ||
| let mut res: Option<String> = None; | ||
| for row in 0..batch.num_rows() { | ||
| if &array_value_to_string(batch.column(0), row)? | ||
| == "logical_plan after type_coercion" | ||
| { | ||
| res = Some(array_value_to_string(batch.column(1), row)?); | ||
| break; | ||
| } | ||
| } | ||
| assert_eq!(res, Some("Projection: CAST(now() AS Timestamp(Nanosecond, None)) = CAST(currentdate() AS Timestamp(Nanosecond, None))\n EmptyRelation".to_string())); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
|
|
||
| Ok(()) | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -541,33 +541,30 @@ fn is_time_with_valid_unit(datatype: DataType) -> bool { | |
| fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> { | ||
| use arrow::datatypes::DataType::*; | ||
| match (lhs_type, rhs_type) { | ||
| (Date64, Date32) => Some(Date64), | ||
| (Date32, Date64) => Some(Date64), | ||
| (Utf8, Date32) => Some(Date32), | ||
| (Date32, Utf8) => Some(Date32), | ||
| (Utf8, Date64) => Some(Date64), | ||
| (Date64, Utf8) => Some(Date64), | ||
| (Utf8, Time32(unit)) => match is_time_with_valid_unit(Time32(unit.clone())) { | ||
| false => None, | ||
| true => Some(Time32(unit.clone())), | ||
| }, | ||
| (Time32(unit), Utf8) => match is_time_with_valid_unit(Time32(unit.clone())) { | ||
| false => None, | ||
| true => Some(Time32(unit.clone())), | ||
| }, | ||
| (Utf8, Time64(unit)) => match is_time_with_valid_unit(Time64(unit.clone())) { | ||
| false => None, | ||
| true => Some(Time64(unit.clone())), | ||
| }, | ||
| (Time64(unit), Utf8) => match is_time_with_valid_unit(Time64(unit.clone())) { | ||
| false => None, | ||
| true => Some(Time64(unit.clone())), | ||
| }, | ||
| (Timestamp(_, tz), Utf8) => Some(Timestamp(TimeUnit::Nanosecond, tz.clone())), | ||
| (Utf8, Timestamp(_, tz)) => Some(Timestamp(TimeUnit::Nanosecond, tz.clone())), | ||
| // TODO: need to investigate the result type for the comparison between timestamp and date | ||
| (Timestamp(_, _), Date32) => Some(Date32), | ||
| (Timestamp(_, _), Date64) => Some(Date64), | ||
| (Date64, Date32) | (Date32, Date64) => Some(Date64), | ||
| (Utf8, Date32) | (Date32, Utf8) => Some(Date32), | ||
| (Utf8, Date64) | (Date64, Utf8) => Some(Date64), | ||
| (Utf8, Time32(unit)) | (Time32(unit), Utf8) => { | ||
| match is_time_with_valid_unit(Time32(unit.clone())) { | ||
| false => None, | ||
| true => Some(Time32(unit.clone())), | ||
| } | ||
| } | ||
| (Utf8, Time64(unit)) | (Time64(unit), Utf8) => { | ||
| match is_time_with_valid_unit(Time64(unit.clone())) { | ||
| false => None, | ||
| true => Some(Time64(unit.clone())), | ||
| } | ||
| } | ||
| (Timestamp(_, tz), Utf8) | (Utf8, Timestamp(_, tz)) => { | ||
| Some(Timestamp(TimeUnit::Nanosecond, tz.clone())) | ||
| } | ||
|
Comment on lines
+559
to
+561
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i tried this, but it didn't work ❯ select '2000-01-01T00:00:00'::timestamp::timestamptz = '2000-01-01T00:00:00';
Internal("The type of Timestamp(Nanosecond, Some(\"+00:00\")) Eq Utf8 of binary physical should be same")
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need another cast to add Utf <-> Timestamp #5117 probably addresses this cast. If not I'll create another ticket
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Even after #5117 this still fails. I will file a follow on ticket ❯ select '2000-01-01T00:00:00'::timestamp::timestamptz = '2000-01-01T00:00:00';
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Filed #5164 |
||
| (Timestamp(_, None), Date32) | (Date32, Timestamp(_, None)) => { | ||
| Some(Timestamp(TimeUnit::Nanosecond, None)) | ||
| } | ||
| (Timestamp(_, _tz), Date32) | (Date32, Timestamp(_, _tz)) => { | ||
| Some(Timestamp(TimeUnit::Nanosecond, None)) | ||
| } | ||
|
Comment on lines
+562
to
+567
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does it mean that we convert Timestamptz to Timestamp and then compare?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thats correct, we lose TZ for now, we need some extra efforts to make arrow-rs work with TimestampTZ,planning to do so just after this PR merged. |
||
| (Timestamp(lhs_unit, lhs_tz), Timestamp(rhs_unit, rhs_tz)) => { | ||
| let tz = match (lhs_tz, rhs_tz) { | ||
| // can't cast across timezones | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -174,8 +174,8 @@ pub fn can_coerce_from(type_into: &DataType, type_from: &DataType) -> bool { | |
| | Float64 | ||
| | Decimal128(_, _) | ||
| ), | ||
| Timestamp(TimeUnit::Nanosecond, None) => { | ||
| matches!(type_from, Null | Timestamp(_, None)) | ||
| Timestamp(TimeUnit::Nanosecond, _) => { | ||
| matches!(type_from, Null | Timestamp(_, _) | Date32) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 (I have a draft PR in the works to support coercing from |
||
| } | ||
| Utf8 | LargeUtf8 => true, | ||
| Null => can_cast_types(type_from, type_into), | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You might be able to add these tests to sqllogictest if you wanted -- https://github.com/apache/arrow-datafusion/blob/master/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
Not required but I find the sqllogictest framework much easier to work with
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks @alamb I will create a separate ticket to move all timestamp tests from timestamp.rs to sqllogictest, all that can be moved of course
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you very much -- that would be great
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Filed #5165