Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 50 additions & 1 deletion datafusion/core/tests/sql/timestamp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1683,7 +1683,6 @@ async fn test_current_time() -> Result<()> {
#[tokio::test]
async fn test_ts_dt_binary_ops() -> Result<()> {
let ctx = SessionContext::new();

// test cast in where clause
let sql =
"select count(1) result from (select now() as n) a where n = '2000-01-01'::date";
Expand Down Expand Up @@ -1742,5 +1741,55 @@ async fn test_ts_dt_binary_ops() -> Result<()> {

assert_batches_eq!(expected, &results);

//test cast path timestamp date using literals
let sql = "select '2000-01-01'::timestamp >= '2000-01-01'::date";
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might be able to add these tests to sqllogictest if you wanted -- https://github.com/apache/arrow-datafusion/blob/master/datafusion/core/tests/sqllogictests/test_files/timestamps.slt

Not required but I find the sqllogictest framework much easier to work with

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @alamb I will create a separate ticket to move all timestamp tests from timestamp.rs to sqllogictest, all that can be moved of course

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you very much -- that would be great

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Filed #5165

let df = ctx.sql(sql).await.unwrap();

let plan = df.explain(true, false)?.collect().await?;
let batch = &plan[0];
let mut res: Option<String> = None;
for row in 0..batch.num_rows() {
if &array_value_to_string(batch.column(0), row)?
== "logical_plan after type_coercion"
{
res = Some(array_value_to_string(batch.column(1), row)?);
break;
}
}
assert_eq!(res, Some("Projection: CAST(Utf8(\"2000-01-01\") AS Timestamp(Nanosecond, None)) >= CAST(CAST(Utf8(\"2000-01-01\") AS Date32) AS Timestamp(Nanosecond, None))\n EmptyRelation".to_string()));

//test cast path timestamp date using function
let sql = "select now() >= '2000-01-01'::date";
let df = ctx.sql(sql).await.unwrap();

let plan = df.explain(true, false)?.collect().await?;
let batch = &plan[0];
let mut res: Option<String> = None;
for row in 0..batch.num_rows() {
if &array_value_to_string(batch.column(0), row)?
== "logical_plan after type_coercion"
{
res = Some(array_value_to_string(batch.column(1), row)?);
break;
}
}
assert_eq!(res, Some("Projection: CAST(now() AS Timestamp(Nanosecond, None)) >= CAST(CAST(Utf8(\"2000-01-01\") AS Date32) AS Timestamp(Nanosecond, None))\n EmptyRelation".to_string()));

let sql = "select now() = current_date()";
let df = ctx.sql(sql).await.unwrap();

let plan = df.explain(true, false)?.collect().await?;
let batch = &plan[0];
let mut res: Option<String> = None;
for row in 0..batch.num_rows() {
if &array_value_to_string(batch.column(0), row)?
== "logical_plan after type_coercion"
{
res = Some(array_value_to_string(batch.column(1), row)?);
break;
}
}
assert_eq!(res, Some("Projection: CAST(now() AS Timestamp(Nanosecond, None)) = CAST(currentdate() AS Timestamp(Nanosecond, None))\n EmptyRelation".to_string()));
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍


Ok(())
}
51 changes: 24 additions & 27 deletions datafusion/expr/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -541,33 +541,30 @@ fn is_time_with_valid_unit(datatype: DataType) -> bool {
fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
use arrow::datatypes::DataType::*;
match (lhs_type, rhs_type) {
(Date64, Date32) => Some(Date64),
(Date32, Date64) => Some(Date64),
(Utf8, Date32) => Some(Date32),
(Date32, Utf8) => Some(Date32),
(Utf8, Date64) => Some(Date64),
(Date64, Utf8) => Some(Date64),
(Utf8, Time32(unit)) => match is_time_with_valid_unit(Time32(unit.clone())) {
false => None,
true => Some(Time32(unit.clone())),
},
(Time32(unit), Utf8) => match is_time_with_valid_unit(Time32(unit.clone())) {
false => None,
true => Some(Time32(unit.clone())),
},
(Utf8, Time64(unit)) => match is_time_with_valid_unit(Time64(unit.clone())) {
false => None,
true => Some(Time64(unit.clone())),
},
(Time64(unit), Utf8) => match is_time_with_valid_unit(Time64(unit.clone())) {
false => None,
true => Some(Time64(unit.clone())),
},
(Timestamp(_, tz), Utf8) => Some(Timestamp(TimeUnit::Nanosecond, tz.clone())),
(Utf8, Timestamp(_, tz)) => Some(Timestamp(TimeUnit::Nanosecond, tz.clone())),
// TODO: need to investigate the result type for the comparison between timestamp and date
(Timestamp(_, _), Date32) => Some(Date32),
(Timestamp(_, _), Date64) => Some(Date64),
(Date64, Date32) | (Date32, Date64) => Some(Date64),
(Utf8, Date32) | (Date32, Utf8) => Some(Date32),
(Utf8, Date64) | (Date64, Utf8) => Some(Date64),
(Utf8, Time32(unit)) | (Time32(unit), Utf8) => {
match is_time_with_valid_unit(Time32(unit.clone())) {
false => None,
true => Some(Time32(unit.clone())),
}
}
(Utf8, Time64(unit)) | (Time64(unit), Utf8) => {
match is_time_with_valid_unit(Time64(unit.clone())) {
false => None,
true => Some(Time64(unit.clone())),
}
}
(Timestamp(_, tz), Utf8) | (Utf8, Timestamp(_, tz)) => {
Some(Timestamp(TimeUnit::Nanosecond, tz.clone()))
}
Comment on lines +559 to +561
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i tried this, but it didn't work

select '2000-01-01T00:00:00'::timestamp::timestamptz = '2000-01-01T00:00:00';
Internal("The type of Timestamp(Nanosecond, Some(\"+00:00\")) Eq Utf8 of binary physical should be same")

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need another cast to add Utf <-> Timestamp #5117 probably addresses this cast. If not I'll create another ticket

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even after #5117 this still fails. I will file a follow on ticket

❯ select '2000-01-01T00:00:00'::timestamp::timestamptz = '2000-01-01T00:00:00';
Internal("The type of Timestamp(Nanosecond, Some("+00:00")) Eq Utf8 of binary physical should be same")

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Filed #5164

(Timestamp(_, None), Date32) | (Date32, Timestamp(_, None)) => {
Some(Timestamp(TimeUnit::Nanosecond, None))
}
(Timestamp(_, _tz), Date32) | (Date32, Timestamp(_, _tz)) => {
Some(Timestamp(TimeUnit::Nanosecond, None))
}
Comment on lines +562 to +567
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does it mean that we convert Timestamptz to Timestamp and then compare?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thats correct, we lose TZ for now, we need some extra efforts to make arrow-rs work with TimestampTZ,planning to do so just after this PR merged.

(Timestamp(lhs_unit, lhs_tz), Timestamp(rhs_unit, rhs_tz)) => {
let tz = match (lhs_tz, rhs_tz) {
// can't cast across timezones
Expand Down
4 changes: 2 additions & 2 deletions datafusion/expr/src/type_coercion/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ pub fn can_coerce_from(type_into: &DataType, type_from: &DataType) -> bool {
| Float64
| Decimal128(_, _)
),
Timestamp(TimeUnit::Nanosecond, None) => {
matches!(type_from, Null | Timestamp(_, None))
Timestamp(TimeUnit::Nanosecond, _) => {
matches!(type_from, Null | Timestamp(_, _) | Date32)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 (I have a draft PR in the works to support coercing from utf8 as well #5117)

}
Utf8 | LargeUtf8 => true,
Null => can_cast_types(type_from, type_into),
Expand Down
19 changes: 19 additions & 0 deletions datafusion/optimizer/src/type_coercion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1038,4 +1038,23 @@ mod test {
Ok(())
// TODO add more test for this
}

#[test]
fn binary_op_date32_eq_ts() -> Result<()> {
let expr = cast(
lit("1998-03-18"),
DataType::Timestamp(arrow::datatypes::TimeUnit::Nanosecond, None),
)
.eq(cast(lit("1998-03-18"), DataType::Date32));
let empty = Arc::new(LogicalPlan::EmptyRelation(EmptyRelation {
produce_one_row: false,
schema: Arc::new(DFSchema::empty()),
}));
let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?);
dbg!(&plan);
let expected =
"Projection: CAST(Utf8(\"1998-03-18\") AS Timestamp(Nanosecond, None)) = CAST(CAST(Utf8(\"1998-03-18\") AS Date32) AS Timestamp(Nanosecond, None))\n EmptyRelation";
assert_optimized_plan_eq(&plan, expected)?;
Ok(())
}
}