From 9e5d869619521ee92fd0cb3adbe2e364b69c4a19 Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Wed, 25 Dec 2024 16:35:08 +0800 Subject: [PATCH 1/4] Initial --- velox/functions/sparksql/specialforms/SparkCastExpr.cpp | 8 ++++---- velox/functions/sparksql/specialforms/SparkCastHooks.cpp | 7 ++++++- velox/functions/sparksql/specialforms/SparkCastHooks.h | 6 ++++++ 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/velox/functions/sparksql/specialforms/SparkCastExpr.cpp b/velox/functions/sparksql/specialforms/SparkCastExpr.cpp index 2fbff4a669bf..f9c991f74d82 100644 --- a/velox/functions/sparksql/specialforms/SparkCastExpr.cpp +++ b/velox/functions/sparksql/specialforms/SparkCastExpr.cpp @@ -22,7 +22,7 @@ exec::ExprPtr SparkCastCallToSpecialForm::constructSpecialForm( const TypePtr& type, std::vector&& compiledChildren, bool trackCpuUsage, - const core::QueryConfig& /*config*/) { + const core::QueryConfig& config) { VELOX_CHECK_EQ( compiledChildren.size(), 1, @@ -33,14 +33,14 @@ exec::ExprPtr SparkCastCallToSpecialForm::constructSpecialForm( std::move(compiledChildren[0]), trackCpuUsage, false, - std::make_shared()); + std::make_shared(config)); } exec::ExprPtr SparkTryCastCallToSpecialForm::constructSpecialForm( const TypePtr& type, std::vector&& compiledChildren, bool trackCpuUsage, - const core::QueryConfig& /*config*/) { + const core::QueryConfig& config) { VELOX_CHECK_EQ( compiledChildren.size(), 1, @@ -51,6 +51,6 @@ exec::ExprPtr SparkTryCastCallToSpecialForm::constructSpecialForm( std::move(compiledChildren[0]), trackCpuUsage, true, - std::make_shared()); + std::make_shared(config)); } } // namespace facebook::velox::functions::sparksql diff --git a/velox/functions/sparksql/specialforms/SparkCastHooks.cpp b/velox/functions/sparksql/specialforms/SparkCastHooks.cpp index fcab616e1b07..3c07b149fb20 100644 --- a/velox/functions/sparksql/specialforms/SparkCastHooks.cpp +++ b/velox/functions/sparksql/specialforms/SparkCastHooks.cpp @@ -17,9 +17,13 @@ #include "velox/functions/sparksql/specialforms/SparkCastHooks.h" #include "velox/functions/lib/string/StringImpl.h" #include "velox/type/TimestampConversion.h" +#include "velox/type/tz/TimeZoneMap.h" namespace facebook::velox::functions::sparksql { +SparkCastHooks::SparkCastHooks(const velox::core::QueryConfig& config) + : config_(config) {} + Expected SparkCastHooks::castStringToTimestamp( const StringView& view) const { return util::fromTimestampString( @@ -76,12 +80,13 @@ StringView SparkCastHooks::removeWhiteSpaces(const StringView& view) const { const TimestampToStringOptions& SparkCastHooks::timestampToStringOptions() const { - static constexpr TimestampToStringOptions options = { + static TimestampToStringOptions options = { .precision = TimestampToStringOptions::Precision::kMicroseconds, .leadingPositiveSign = true, .skipTrailingZeros = true, .zeroPaddingYear = true, .dateTimeSeparator = ' ', + .timeZone = tz::locateZone(config_.sessionTimezone()), }; return options; } diff --git a/velox/functions/sparksql/specialforms/SparkCastHooks.h b/velox/functions/sparksql/specialforms/SparkCastHooks.h index c7d298a0ba4e..f30cb8af5a6f 100644 --- a/velox/functions/sparksql/specialforms/SparkCastHooks.h +++ b/velox/functions/sparksql/specialforms/SparkCastHooks.h @@ -16,6 +16,7 @@ #pragma once +#include "velox/core/QueryCtx.h" #include "velox/expression/CastHooks.h" namespace facebook::velox::functions::sparksql { @@ -23,6 +24,8 @@ namespace facebook::velox::functions::sparksql { // This class provides cast hooks following Spark semantics. class SparkCastHooks : public exec::CastHooks { public: + explicit SparkCastHooks(const velox::core::QueryConfig& config); + // TODO: Spark hook allows more string patterns than Presto. Expected castStringToTimestamp( const StringView& view) const override; @@ -59,5 +62,8 @@ class SparkCastHooks : public exec::CastHooks { } exec::PolicyType getPolicy() const override; + + private: + const core::QueryConfig& config_; }; } // namespace facebook::velox::functions::sparksql From 6015eafdae62da51a0c645e77e043db6a1b54e46 Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Wed, 25 Dec 2024 20:37:27 +0800 Subject: [PATCH 2/4] Fix empty time zone --- .../sparksql/specialforms/SparkCastHooks.cpp | 17 +++++++---------- .../sparksql/specialforms/SparkCastHooks.h | 6 ++++++ 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/velox/functions/sparksql/specialforms/SparkCastHooks.cpp b/velox/functions/sparksql/specialforms/SparkCastHooks.cpp index 3c07b149fb20..05cd28fdd68d 100644 --- a/velox/functions/sparksql/specialforms/SparkCastHooks.cpp +++ b/velox/functions/sparksql/specialforms/SparkCastHooks.cpp @@ -22,7 +22,12 @@ namespace facebook::velox::functions::sparksql { SparkCastHooks::SparkCastHooks(const velox::core::QueryConfig& config) - : config_(config) {} + : config_(config) { + const auto sessionTzName = config.sessionTimezone(); + if (!sessionTzName.empty()) { + timestampToStringOptions_.timeZone = tz::locateZone(sessionTzName); + } +} Expected SparkCastHooks::castStringToTimestamp( const StringView& view) const { @@ -80,15 +85,7 @@ StringView SparkCastHooks::removeWhiteSpaces(const StringView& view) const { const TimestampToStringOptions& SparkCastHooks::timestampToStringOptions() const { - static TimestampToStringOptions options = { - .precision = TimestampToStringOptions::Precision::kMicroseconds, - .leadingPositiveSign = true, - .skipTrailingZeros = true, - .zeroPaddingYear = true, - .dateTimeSeparator = ' ', - .timeZone = tz::locateZone(config_.sessionTimezone()), - }; - return options; + return timestampToStringOptions_; } exec::PolicyType SparkCastHooks::getPolicy() const { diff --git a/velox/functions/sparksql/specialforms/SparkCastHooks.h b/velox/functions/sparksql/specialforms/SparkCastHooks.h index f30cb8af5a6f..060d82ef5214 100644 --- a/velox/functions/sparksql/specialforms/SparkCastHooks.h +++ b/velox/functions/sparksql/specialforms/SparkCastHooks.h @@ -65,5 +65,11 @@ class SparkCastHooks : public exec::CastHooks { private: const core::QueryConfig& config_; + TimestampToStringOptions timestampToStringOptions_ = { + .precision = TimestampToStringOptions::Precision::kMicroseconds, + .leadingPositiveSign = true, + .skipTrailingZeros = true, + .zeroPaddingYear = true, + .dateTimeSeparator = ' '}; }; } // namespace facebook::velox::functions::sparksql From 2aa126257f45e5564636b7b8e159cdafb8192124 Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Wed, 25 Dec 2024 21:42:59 +0800 Subject: [PATCH 3/4] Add test --- .../sparksql/tests/SparkCastExprTest.cpp | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/velox/functions/sparksql/tests/SparkCastExprTest.cpp b/velox/functions/sparksql/tests/SparkCastExprTest.cpp index 4e820a2af15a..bf681829180b 100644 --- a/velox/functions/sparksql/tests/SparkCastExprTest.cpp +++ b/velox/functions/sparksql/tests/SparkCastExprTest.cpp @@ -580,6 +580,33 @@ TEST_F(SparkCastExprTest, timestampToString) { "-0010-02-01 10:00:00", std::nullopt, }); + + setTimezone("America/Los_Angeles"); + testCast( + "string", + { + Timestamp(0, 0), + Timestamp(3600, 0), + Timestamp(61, 10), + }, + { + "1969-12-31 16:00:00", + "1969-12-31 17:00:00", + "1969-12-31 16:01:01", + }); + setTimezone("Asia/Shanghai"); + testCast( + "string", + { + Timestamp(0, 0), + Timestamp(3600, 0), + Timestamp(61, 10), + }, + { + "1970-01-01 08:00:00", + "1970-01-01 09:00:00", + "1970-01-01 08:01:01", + }); } TEST_F(SparkCastExprTest, fromString) { From 6b1dcd201e64d5fb1858fa3301b09421a3f7c0aa Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Fri, 27 Dec 2024 15:33:24 +0800 Subject: [PATCH 4/4] Fix comment --- .../sparksql/specialforms/SparkCastHooks.cpp | 5 -- .../sparksql/specialforms/SparkCastHooks.h | 11 ++-- .../sparksql/tests/SparkCastExprTest.cpp | 53 ++++++++++++++----- 3 files changed, 48 insertions(+), 21 deletions(-) diff --git a/velox/functions/sparksql/specialforms/SparkCastHooks.cpp b/velox/functions/sparksql/specialforms/SparkCastHooks.cpp index 05cd28fdd68d..37649fbf6747 100644 --- a/velox/functions/sparksql/specialforms/SparkCastHooks.cpp +++ b/velox/functions/sparksql/specialforms/SparkCastHooks.cpp @@ -83,11 +83,6 @@ StringView SparkCastHooks::removeWhiteSpaces(const StringView& view) const { return output; } -const TimestampToStringOptions& SparkCastHooks::timestampToStringOptions() - const { - return timestampToStringOptions_; -} - exec::PolicyType SparkCastHooks::getPolicy() const { return exec::PolicyType::SparkCastPolicy; } diff --git a/velox/functions/sparksql/specialforms/SparkCastHooks.h b/velox/functions/sparksql/specialforms/SparkCastHooks.h index 060d82ef5214..309c22ab5b9f 100644 --- a/velox/functions/sparksql/specialforms/SparkCastHooks.h +++ b/velox/functions/sparksql/specialforms/SparkCastHooks.h @@ -52,10 +52,9 @@ class SparkCastHooks : public exec::CastHooks { /// whitespaces before cast. StringView removeWhiteSpaces(const StringView& view) const override; - /// 1) Does not follow 'isLegacyCast' and session timezone. 2) The conversion - /// precision is microsecond. 3) Does not append trailing zeros. 4) Adds a - /// positive sign at first if the year exceeds 9999. - const TimestampToStringOptions& timestampToStringOptions() const override; + const TimestampToStringOptions& timestampToStringOptions() const override { + return timestampToStringOptions_; + } bool truncate() const override { return true; @@ -65,6 +64,10 @@ class SparkCastHooks : public exec::CastHooks { private: const core::QueryConfig& config_; + /// 1) Does not follow 'isLegacyCast' and session timezone. 2) The conversion + /// precision is microsecond. 3) Does not append trailing zeros. 4) Adds a + /// positive sign at first if the year exceeds 9999. 5) The configured session + /// timezone is respected. TimestampToStringOptions timestampToStringOptions_ = { .precision = TimestampToStringOptions::Precision::kMicroseconds, .leadingPositiveSign = true, diff --git a/velox/functions/sparksql/tests/SparkCastExprTest.cpp b/velox/functions/sparksql/tests/SparkCastExprTest.cpp index bf681829180b..ec7fb3034574 100644 --- a/velox/functions/sparksql/tests/SparkCastExprTest.cpp +++ b/velox/functions/sparksql/tests/SparkCastExprTest.cpp @@ -581,31 +581,60 @@ TEST_F(SparkCastExprTest, timestampToString) { std::nullopt, }); + std::vector> input = { + Timestamp(-946684800, 0), + Timestamp(-7266, 0), + Timestamp(0, 0), + Timestamp(61, 10), + Timestamp(3600, 0), + Timestamp(946684800, 0), + + Timestamp(946729316, 0), + Timestamp(946729316, 123), + Timestamp(946729316, 100000000), + Timestamp(946729316, 129900000), + Timestamp(946729316, 123456789), + Timestamp(7266, 0), + std::nullopt, + }; + setTimezone("America/Los_Angeles"); testCast( "string", + input, { - Timestamp(0, 0), - Timestamp(3600, 0), - Timestamp(61, 10), - }, - { + "1940-01-01 16:00:00", + "1969-12-31 13:58:54", "1969-12-31 16:00:00", - "1969-12-31 17:00:00", "1969-12-31 16:01:01", + "1969-12-31 17:00:00", + "1999-12-31 16:00:00", + "2000-01-01 04:21:56", + "2000-01-01 04:21:56", + "2000-01-01 04:21:56.1", + "2000-01-01 04:21:56.1299", + "2000-01-01 04:21:56.123456", + "1969-12-31 18:01:06", + std::nullopt, }); setTimezone("Asia/Shanghai"); testCast( "string", + input, { - Timestamp(0, 0), - Timestamp(3600, 0), - Timestamp(61, 10), - }, - { + "1940-01-02 08:00:00", + "1970-01-01 05:58:54", "1970-01-01 08:00:00", - "1970-01-01 09:00:00", "1970-01-01 08:01:01", + "1970-01-01 09:00:00", + "2000-01-01 08:00:00", + "2000-01-01 20:21:56", + "2000-01-01 20:21:56", + "2000-01-01 20:21:56.1", + "2000-01-01 20:21:56.1299", + "2000-01-01 20:21:56.123456", + "1970-01-01 10:01:06", + std::nullopt, }); }