Skip to content

Commit

Permalink
Exposed stream-ordering to datetime API (#16774)
Browse files Browse the repository at this point in the history
This merge request exposes stream-ordering to the public-facing datetime APIs.

- `extract_year`
- `extract_month`
- `extract_day`
- `extract_weekday`
- `extract_hour`
- `extract_minute`
- `extract_second`
- `extract_millisecond_fraction`
- `extract_microsecond_fraction`
- `extract_nanosecond_fraction`
- `last_day_of_month`
- `day_of_year`
- `add_calendrical_months`
- `is_leap_year`
- `days_in_month`
- `extract_quarter`
- `ceil_datetimes`
- `floor_datetimes`
- `round_datetimes`



 Follows-up #13744
Closes #16775

Authors:
  - Basit Ayantunde (https://github.com/lamarrr)

Approvers:
  - Karthikeyan (https://github.com/karthikeyann)
  - Yunsong Wang (https://github.com/PointKernel)

URL: #16774
  • Loading branch information
lamarrr authored Sep 21, 2024
1 parent 96d2f81 commit 9b4c4c7
Show file tree
Hide file tree
Showing 8 changed files with 276 additions and 68 deletions.
43 changes: 43 additions & 0 deletions cpp/include/cudf/datetime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,12 @@
#pragma once

#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/export.hpp>
#include <cudf/utilities/memory_resource.hpp>

#include <rmm/cuda_stream_view.hpp>

#include <memory>

/**
Expand All @@ -40,97 +43,111 @@ namespace datetime {
* cudf::column.
*
* @param column cudf::column_view of the input datetime values
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @returns cudf::column of the extracted int16_t years
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
*/
std::unique_ptr<cudf::column> extract_year(
cudf::column_view const& column,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Extracts month from any datetime type and returns an int16_t
* cudf::column.
*
* @param column cudf::column_view of the input datetime values
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @returns cudf::column of the extracted int16_t months
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
*/
std::unique_ptr<cudf::column> extract_month(
cudf::column_view const& column,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Extracts day from any datetime type and returns an int16_t
* cudf::column.
*
* @param column cudf::column_view of the input datetime values
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @returns cudf::column of the extracted int16_t days
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
*/
std::unique_ptr<cudf::column> extract_day(
cudf::column_view const& column,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Extracts a weekday from any datetime type and returns an int16_t
* cudf::column.
*
* @param column cudf::column_view of the input datetime values
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @returns cudf::column of the extracted int16_t days
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
*/
std::unique_ptr<cudf::column> extract_weekday(
cudf::column_view const& column,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Extracts hour from any datetime type and returns an int16_t
* cudf::column.
*
* @param column cudf::column_view of the input datetime values
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @returns cudf::column of the extracted int16_t hours
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
*/
std::unique_ptr<cudf::column> extract_hour(
cudf::column_view const& column,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Extracts minute from any datetime type and returns an int16_t
* cudf::column.
*
* @param column cudf::column_view of the input datetime values
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @returns cudf::column of the extracted int16_t minutes
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
*/
std::unique_ptr<cudf::column> extract_minute(
cudf::column_view const& column,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Extracts second from any datetime type and returns an int16_t
* cudf::column.
*
* @param column cudf::column_view of the input datetime values
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @returns cudf::column of the extracted int16_t seconds
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
*/
std::unique_ptr<cudf::column> extract_second(
cudf::column_view const& column,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
Expand All @@ -141,13 +158,15 @@ std::unique_ptr<cudf::column> extract_second(
* For example, the millisecond fraction of 1.234567890 seconds is 234.
*
* @param column cudf::column_view of the input datetime values
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @returns cudf::column of the extracted int16_t milliseconds
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
*/
std::unique_ptr<cudf::column> extract_millisecond_fraction(
cudf::column_view const& column,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
Expand All @@ -158,13 +177,15 @@ std::unique_ptr<cudf::column> extract_millisecond_fraction(
* For example, the microsecond fraction of 1.234567890 seconds is 567.
*
* @param column cudf::column_view of the input datetime values
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @returns cudf::column of the extracted int16_t microseconds
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
*/
std::unique_ptr<cudf::column> extract_microsecond_fraction(
cudf::column_view const& column,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
Expand All @@ -175,13 +196,15 @@ std::unique_ptr<cudf::column> extract_microsecond_fraction(
* For example, the nanosecond fraction of 1.234567890 seconds is 890.
*
* @param column cudf::column_view of the input datetime values
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @returns cudf::column of the extracted int16_t nanoseconds
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
*/
std::unique_ptr<cudf::column> extract_nanosecond_fraction(
cudf::column_view const& column,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/** @} */ // end of group
Expand All @@ -196,27 +219,31 @@ std::unique_ptr<cudf::column> extract_nanosecond_fraction(
* cudf::column.
*
* @param column cudf::column_view of the input datetime values
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @returns cudf::column containing last day of the month as TIMESTAMP_DAYS
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
*/
std::unique_ptr<cudf::column> last_day_of_month(
cudf::column_view const& column,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Computes the day number since the start of the year from the datetime and
* returns an int16_t cudf::column. The value is between [1, {365-366}]
*
* @param column cudf::column_view of the input datetime values
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @returns cudf::column of datatype INT16 containing the day number since the start of the year
* @throw cudf::logic_error if input column datatype is not a TIMESTAMP
*/
std::unique_ptr<cudf::column> day_of_year(
cudf::column_view const& column,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
Expand Down Expand Up @@ -245,13 +272,15 @@ std::unique_ptr<cudf::column> day_of_year(
*
* @param timestamps cudf::column_view of timestamp type
* @param months cudf::column_view of integer type containing the number of months to add
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @returns cudf::column of timestamp type containing the computed timestamps
*/
std::unique_ptr<cudf::column> add_calendrical_months(
cudf::column_view const& timestamps,
cudf::column_view const& months,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
Expand Down Expand Up @@ -280,13 +309,15 @@ std::unique_ptr<cudf::column> add_calendrical_months(
*
* @param timestamps cudf::column_view of timestamp type
* @param months cudf::scalar of integer type containing the number of months to add
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @return cudf::column of timestamp type containing the computed timestamps
*/
std::unique_ptr<cudf::column> add_calendrical_months(
cudf::column_view const& timestamps,
cudf::scalar const& months,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
Expand All @@ -297,13 +328,15 @@ std::unique_ptr<cudf::column> add_calendrical_months(
* `output[i] is null` if `column[i]` is null
*
* @param column cudf::column_view of the input datetime values
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @returns cudf::column of datatype BOOL8 truth value of the corresponding date
* @throw cudf::logic_error if input column datatype is not a TIMESTAMP
*/
std::unique_ptr<cudf::column> is_leap_year(
cudf::column_view const& column,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
Expand All @@ -315,11 +348,13 @@ std::unique_ptr<cudf::column> is_leap_year(
* @throw cudf::logic_error if input column datatype is not a TIMESTAMP
*
* @param column cudf::column_view of the input datetime values
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
* @return cudf::column of datatype INT16 of days in month of the corresponding date
*/
std::unique_ptr<cudf::column> days_in_month(
cudf::column_view const& column,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
Expand All @@ -331,11 +366,13 @@ std::unique_ptr<cudf::column> days_in_month(
* @throw cudf::logic_error if input column datatype is not a TIMESTAMP
*
* @param column The input column containing datetime values
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
* @return A column of INT16 type indicating which quarter the date is in
*/
std::unique_ptr<cudf::column> extract_quarter(
cudf::column_view const& column,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
Expand All @@ -357,6 +394,7 @@ enum class rounding_frequency : int32_t {
*
* @param column cudf::column_view of the input datetime values
* @param freq rounding_frequency indicating the frequency to round up to
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP.
Expand All @@ -365,13 +403,15 @@ enum class rounding_frequency : int32_t {
std::unique_ptr<cudf::column> ceil_datetimes(
cudf::column_view const& column,
rounding_frequency freq,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Round datetimes down to the nearest multiple of the given frequency.
*
* @param column cudf::column_view of the input datetime values
* @param freq rounding_frequency indicating the frequency to round down to
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP.
Expand All @@ -380,13 +420,15 @@ std::unique_ptr<cudf::column> ceil_datetimes(
std::unique_ptr<cudf::column> floor_datetimes(
cudf::column_view const& column,
rounding_frequency freq,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Round datetimes to the nearest multiple of the given frequency.
*
* @param column cudf::column_view of the input datetime values
* @param freq rounding_frequency indicating the frequency to round to
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP.
Expand All @@ -395,6 +437,7 @@ std::unique_ptr<cudf::column> floor_datetimes(
std::unique_ptr<cudf::column> round_datetimes(
cudf::column_view const& column,
rounding_frequency freq,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/** @} */ // end of group
Expand Down
Loading

0 comments on commit 9b4c4c7

Please sign in to comment.