From 887c8f6f65adf2ac8fc614bcb74d475815035415 Mon Sep 17 00:00:00 2001 From: comphead Date: Tue, 24 Jan 2023 17:16:38 -0800 Subject: [PATCH 1/2] ts to date cast --- datafusion/core/tests/sql/timestamp.rs | 83 +++++++++++++++++++ datafusion/expr/src/type_coercion/binary.rs | 32 +++---- .../expr/src/type_coercion/functions.rs | 4 +- datafusion/optimizer/src/type_coercion.rs | 16 ++++ 4 files changed, 113 insertions(+), 22 deletions(-) diff --git a/datafusion/core/tests/sql/timestamp.rs b/datafusion/core/tests/sql/timestamp.rs index c7dce1b00aa07..4ffcc3522ec90 100644 --- a/datafusion/core/tests/sql/timestamp.rs +++ b/datafusion/core/tests/sql/timestamp.rs @@ -1680,10 +1680,43 @@ async fn test_current_time() -> Result<()> { Ok(()) } + +#[tokio::test] +async fn test_ts_ms_fail() -> Result<()> { + let ctx = SessionContext::new(); + + let sql = + "select to_timestamp_millis(1926632005177685123)"; + let results = execute_to_batches(&ctx, sql).await; + + let expected = vec![ + "+--------+", + "| result |", + "+--------+", + "| 0 |", + "+--------+", + ]; + + assert_batches_eq!(expected, &results); + Ok(()) + +} + #[tokio::test] async fn test_ts_dt_binary_ops() -> Result<()> { let ctx = SessionContext::new(); + //test cast path + //let sql = "explain verbose select now() = '2021-1-1'::date"; + //let sql = "explain verbose select '1'::int = 2::string"; + // let sql = "explain verbose select '1998-03-18'::timestamp = '1998-03-18'::date"; + + // let df = ctx.sql(sql).await.unwrap(); + // let plan = df.collect().await?; + // let formatted = arrow::util::pretty::pretty_format_batches(&plan) + // .unwrap() + // .to_string(); + // println!("{}", formatted); // test cast in where clause let sql = "select count(1) result from (select now() as n) a where n = '2000-01-01'::date"; @@ -1742,5 +1775,55 @@ async fn test_ts_dt_binary_ops() -> Result<()> { assert_batches_eq!(expected, &results); + //test cast path timestamp date using literals + let sql = "select '2000-01-01'::timestamp >= '2000-01-01'::date"; + let df = ctx.sql(sql).await.unwrap(); + + let plan = df.explain(true, false)?.collect().await?; + let batch = &plan[0]; + let mut res: Option = None; + for row in 0..batch.num_rows() { + if &array_value_to_string(batch.column(0), row)? + == "logical_plan after type_coercion" + { + res = Some(array_value_to_string(batch.column(1), row)?); + break; + } + } + assert_eq!(res, Some("Projection: CAST(Utf8(\"2000-01-01\") AS Timestamp(Nanosecond, None)) = CAST(CAST(Utf8(\"2000-01-01\") AS Date32) AS Timestamp(Nanosecond, None))\n EmptyRelation".to_string())); + + //test cast path timestamp date using function + let sql = "select now() = '2000-01-01'::date"; + let df = ctx.sql(sql).await.unwrap(); + + let plan = df.explain(true, false)?.collect().await?; + let batch = &plan[0]; + let mut res: Option = None; + for row in 0..batch.num_rows() { + if &array_value_to_string(batch.column(0), row)? + == "logical_plan after type_coercion" + { + res = Some(array_value_to_string(batch.column(1), row)?); + break; + } + } + assert_eq!(res, Some("Projection: now() = CAST(CAST(Utf8(\"2000-01-01\") AS Date32) AS Timestamp(Nanosecond, Some(\"+00:00\")))\n EmptyRelation".to_string())); + + let sql = "select now() = current_date()"; + let df = ctx.sql(sql).await.unwrap(); + + let plan = df.explain(true, false)?.collect().await?; + let batch = &plan[0]; + let mut res: Option = None; + for row in 0..batch.num_rows() { + if &array_value_to_string(batch.column(0), row)? + == "logical_plan after type_coercion" + { + res = Some(array_value_to_string(batch.column(1), row)?); + break; + } + } + assert_eq!(res, Some("Projection: now() = CAST(currentdate() AS Timestamp(Nanosecond, Some(\"+00:00\")))\n EmptyRelation".to_string())); + Ok(()) } diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index d2923c8dbfae7..f505e619e31fe 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -541,33 +541,25 @@ fn is_time_with_valid_unit(datatype: DataType) -> bool { fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { use arrow::datatypes::DataType::*; match (lhs_type, rhs_type) { - (Date64, Date32) => Some(Date64), - (Date32, Date64) => Some(Date64), - (Utf8, Date32) => Some(Date32), - (Date32, Utf8) => Some(Date32), - (Utf8, Date64) => Some(Date64), - (Date64, Utf8) => Some(Date64), - (Utf8, Time32(unit)) => match is_time_with_valid_unit(Time32(unit.clone())) { + (Date64, Date32) | (Date32, Date64) => Some(Date64), + (Utf8, Date32) | (Date32, Utf8) => Some(Date32), + (Utf8, Date64) | (Date64, Utf8) => Some(Date64), + (Utf8, Time32(unit)) | (Time32(unit), Utf8) => match is_time_with_valid_unit(Time32(unit.clone())) { false => None, true => Some(Time32(unit.clone())), }, - (Time32(unit), Utf8) => match is_time_with_valid_unit(Time32(unit.clone())) { - false => None, - true => Some(Time32(unit.clone())), - }, - (Utf8, Time64(unit)) => match is_time_with_valid_unit(Time64(unit.clone())) { - false => None, - true => Some(Time64(unit.clone())), - }, - (Time64(unit), Utf8) => match is_time_with_valid_unit(Time64(unit.clone())) { + (Utf8, Time64(unit)) | (Time64(unit), Utf8) => match is_time_with_valid_unit(Time64(unit.clone())) { false => None, true => Some(Time64(unit.clone())), }, - (Timestamp(_, tz), Utf8) => Some(Timestamp(TimeUnit::Nanosecond, tz.clone())), - (Utf8, Timestamp(_, tz)) => Some(Timestamp(TimeUnit::Nanosecond, tz.clone())), + (Timestamp(_, tz), Utf8) | (Utf8, Timestamp(_, tz)) => Some(Timestamp(TimeUnit::Nanosecond, tz.clone())), // TODO: need to investigate the result type for the comparison between timestamp and date - (Timestamp(_, _), Date32) => Some(Date32), - (Timestamp(_, _), Date64) => Some(Date64), + //(Date32, Timestamp(tu, tz)) => Some(Timestamp(tu.clo, tz.clone())), + //(Timestamp(tu, tz), Date32) => Some(Timestamp(tu.clone(), tz.clone())), + //(Date32, Timestamp(tu, tz)) => Some(Timestamp(tu.clone(), tz.clone())), + (Timestamp(_, None), Date32) | (Date32, Timestamp(_, None)) => Some(Timestamp(TimeUnit::Nanosecond, None)), + //(Timestamp(tu, tz), Date32) | (Date32, Timestamp(tu, tz)) => Some(Date32), + //(Timestamp(_, _), Date64) => Some(Date64), (Timestamp(lhs_unit, lhs_tz), Timestamp(rhs_unit, rhs_tz)) => { let tz = match (lhs_tz, rhs_tz) { // can't cast across timezones diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index f58050eb9415d..a732c552bee7f 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -174,8 +174,8 @@ pub fn can_coerce_from(type_into: &DataType, type_from: &DataType) -> bool { | Float64 | Decimal128(_, _) ), - Timestamp(TimeUnit::Nanosecond, None) => { - matches!(type_from, Null | Timestamp(_, None)) + Timestamp(TimeUnit::Nanosecond, _) => { + matches!(type_from, Null | Timestamp(_, _) | Date32) } Utf8 | LargeUtf8 => true, Null => can_cast_types(type_from, type_into), diff --git a/datafusion/optimizer/src/type_coercion.rs b/datafusion/optimizer/src/type_coercion.rs index e335c9ff5f851..c56b2347ef330 100644 --- a/datafusion/optimizer/src/type_coercion.rs +++ b/datafusion/optimizer/src/type_coercion.rs @@ -1038,4 +1038,20 @@ mod test { Ok(()) // TODO add more test for this } + + #[test] + fn binary_op_date32_eq_ts() -> Result<()> { + let expr = cast(lit("1998-03-18"), DataType::Timestamp(arrow::datatypes::TimeUnit::Nanosecond, None)).eq( + cast(lit("1998-03-18"), DataType::Date32)); + let empty = Arc::new(LogicalPlan::EmptyRelation(EmptyRelation { + produce_one_row: false, + schema: Arc::new(DFSchema::empty()), + })); + let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?); + dbg!(&plan); + let expected = + "Projection: CAST(Utf8(\"1998-03-18\") AS Timestamp(Nanosecond, None)) = CAST(CAST(Utf8(\"1998-03-18\") AS Date32) AS Timestamp(Nanosecond, None))\n EmptyRelation"; + assert_optimized_plan_eq(&plan, expected)?; + Ok(()) + } } From ca350c15d5c8d6600c87e520a651704d9c6de573 Mon Sep 17 00:00:00 2001 From: comphead Date: Tue, 31 Jan 2023 16:50:21 -0800 Subject: [PATCH 2/2] Date to Timestamp Cast --- datafusion/core/tests/sql/timestamp.rs | 100 +++++++------------- datafusion/expr/src/type_coercion/binary.rs | 37 ++++---- datafusion/optimizer/src/type_coercion.rs | 7 +- 3 files changed, 59 insertions(+), 85 deletions(-) diff --git a/datafusion/core/tests/sql/timestamp.rs b/datafusion/core/tests/sql/timestamp.rs index aed39135a1834..c547f82e3b63c 100644 --- a/datafusion/core/tests/sql/timestamp.rs +++ b/datafusion/core/tests/sql/timestamp.rs @@ -1680,43 +1680,9 @@ async fn test_current_time() -> Result<()> { Ok(()) } - -#[tokio::test] -async fn test_ts_ms_fail() -> Result<()> { - let ctx = SessionContext::new(); - - let sql = - "select to_timestamp_millis(1926632005177685123)"; - let results = execute_to_batches(&ctx, sql).await; - - let expected = vec![ - "+--------+", - "| result |", - "+--------+", - "| 0 |", - "+--------+", - ]; - - assert_batches_eq!(expected, &results); - Ok(()) - -} - #[tokio::test] async fn test_ts_dt_binary_ops() -> Result<()> { let ctx = SessionContext::new(); - - //test cast path - //let sql = "explain verbose select now() = '2021-1-1'::date"; - //let sql = "explain verbose select '1'::int = 2::string"; - // let sql = "explain verbose select '1998-03-18'::timestamp = '1998-03-18'::date"; - - // let df = ctx.sql(sql).await.unwrap(); - // let plan = df.collect().await?; - // let formatted = arrow::util::pretty::pretty_format_batches(&plan) - // .unwrap() - // .to_string(); - // println!("{}", formatted); // test cast in where clause let sql = "select count(1) result from (select now() as n) a where n = '2000-01-01'::date"; @@ -1790,40 +1756,40 @@ async fn test_ts_dt_binary_ops() -> Result<()> { break; } } - assert_eq!(res, Some("Projection: CAST(Utf8(\"2000-01-01\") AS Timestamp(Nanosecond, None)) = CAST(CAST(Utf8(\"2000-01-01\") AS Date32) AS Timestamp(Nanosecond, None))\n EmptyRelation".to_string())); - - //test cast path timestamp date using function - let sql = "select now() = '2000-01-01'::date"; - let df = ctx.sql(sql).await.unwrap(); - - let plan = df.explain(true, false)?.collect().await?; - let batch = &plan[0]; - let mut res: Option = None; - for row in 0..batch.num_rows() { - if &array_value_to_string(batch.column(0), row)? - == "logical_plan after type_coercion" - { - res = Some(array_value_to_string(batch.column(1), row)?); - break; - } + assert_eq!(res, Some("Projection: CAST(Utf8(\"2000-01-01\") AS Timestamp(Nanosecond, None)) >= CAST(CAST(Utf8(\"2000-01-01\") AS Date32) AS Timestamp(Nanosecond, None))\n EmptyRelation".to_string())); + + //test cast path timestamp date using function + let sql = "select now() >= '2000-01-01'::date"; + let df = ctx.sql(sql).await.unwrap(); + + let plan = df.explain(true, false)?.collect().await?; + let batch = &plan[0]; + let mut res: Option = None; + for row in 0..batch.num_rows() { + if &array_value_to_string(batch.column(0), row)? + == "logical_plan after type_coercion" + { + res = Some(array_value_to_string(batch.column(1), row)?); + break; } - assert_eq!(res, Some("Projection: now() = CAST(CAST(Utf8(\"2000-01-01\") AS Date32) AS Timestamp(Nanosecond, Some(\"+00:00\")))\n EmptyRelation".to_string())); - - let sql = "select now() = current_date()"; - let df = ctx.sql(sql).await.unwrap(); - - let plan = df.explain(true, false)?.collect().await?; - let batch = &plan[0]; - let mut res: Option = None; - for row in 0..batch.num_rows() { - if &array_value_to_string(batch.column(0), row)? - == "logical_plan after type_coercion" - { - res = Some(array_value_to_string(batch.column(1), row)?); - break; - } + } + assert_eq!(res, Some("Projection: CAST(now() AS Timestamp(Nanosecond, None)) >= CAST(CAST(Utf8(\"2000-01-01\") AS Date32) AS Timestamp(Nanosecond, None))\n EmptyRelation".to_string())); + + let sql = "select now() = current_date()"; + let df = ctx.sql(sql).await.unwrap(); + + let plan = df.explain(true, false)?.collect().await?; + let batch = &plan[0]; + let mut res: Option = None; + for row in 0..batch.num_rows() { + if &array_value_to_string(batch.column(0), row)? + == "logical_plan after type_coercion" + { + res = Some(array_value_to_string(batch.column(1), row)?); + break; } - assert_eq!(res, Some("Projection: now() = CAST(currentdate() AS Timestamp(Nanosecond, Some(\"+00:00\")))\n EmptyRelation".to_string())); - + } + assert_eq!(res, Some("Projection: CAST(now() AS Timestamp(Nanosecond, None)) = CAST(currentdate() AS Timestamp(Nanosecond, None))\n EmptyRelation".to_string())); + Ok(()) } diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index f505e619e31fe..d942d1cf1cbec 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -544,22 +544,27 @@ fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option Some(Date64), (Utf8, Date32) | (Date32, Utf8) => Some(Date32), (Utf8, Date64) | (Date64, Utf8) => Some(Date64), - (Utf8, Time32(unit)) | (Time32(unit), Utf8) => match is_time_with_valid_unit(Time32(unit.clone())) { - false => None, - true => Some(Time32(unit.clone())), - }, - (Utf8, Time64(unit)) | (Time64(unit), Utf8) => match is_time_with_valid_unit(Time64(unit.clone())) { - false => None, - true => Some(Time64(unit.clone())), - }, - (Timestamp(_, tz), Utf8) | (Utf8, Timestamp(_, tz)) => Some(Timestamp(TimeUnit::Nanosecond, tz.clone())), - // TODO: need to investigate the result type for the comparison between timestamp and date - //(Date32, Timestamp(tu, tz)) => Some(Timestamp(tu.clo, tz.clone())), - //(Timestamp(tu, tz), Date32) => Some(Timestamp(tu.clone(), tz.clone())), - //(Date32, Timestamp(tu, tz)) => Some(Timestamp(tu.clone(), tz.clone())), - (Timestamp(_, None), Date32) | (Date32, Timestamp(_, None)) => Some(Timestamp(TimeUnit::Nanosecond, None)), - //(Timestamp(tu, tz), Date32) | (Date32, Timestamp(tu, tz)) => Some(Date32), - //(Timestamp(_, _), Date64) => Some(Date64), + (Utf8, Time32(unit)) | (Time32(unit), Utf8) => { + match is_time_with_valid_unit(Time32(unit.clone())) { + false => None, + true => Some(Time32(unit.clone())), + } + } + (Utf8, Time64(unit)) | (Time64(unit), Utf8) => { + match is_time_with_valid_unit(Time64(unit.clone())) { + false => None, + true => Some(Time64(unit.clone())), + } + } + (Timestamp(_, tz), Utf8) | (Utf8, Timestamp(_, tz)) => { + Some(Timestamp(TimeUnit::Nanosecond, tz.clone())) + } + (Timestamp(_, None), Date32) | (Date32, Timestamp(_, None)) => { + Some(Timestamp(TimeUnit::Nanosecond, None)) + } + (Timestamp(_, _tz), Date32) | (Date32, Timestamp(_, _tz)) => { + Some(Timestamp(TimeUnit::Nanosecond, None)) + } (Timestamp(lhs_unit, lhs_tz), Timestamp(rhs_unit, rhs_tz)) => { let tz = match (lhs_tz, rhs_tz) { // can't cast across timezones diff --git a/datafusion/optimizer/src/type_coercion.rs b/datafusion/optimizer/src/type_coercion.rs index c56b2347ef330..96dacab512efe 100644 --- a/datafusion/optimizer/src/type_coercion.rs +++ b/datafusion/optimizer/src/type_coercion.rs @@ -1041,8 +1041,11 @@ mod test { #[test] fn binary_op_date32_eq_ts() -> Result<()> { - let expr = cast(lit("1998-03-18"), DataType::Timestamp(arrow::datatypes::TimeUnit::Nanosecond, None)).eq( - cast(lit("1998-03-18"), DataType::Date32)); + let expr = cast( + lit("1998-03-18"), + DataType::Timestamp(arrow::datatypes::TimeUnit::Nanosecond, None), + ) + .eq(cast(lit("1998-03-18"), DataType::Date32)); let empty = Arc::new(LogicalPlan::EmptyRelation(EmptyRelation { produce_one_row: false, schema: Arc::new(DFSchema::empty()),