Skip to content

Commit

Permalink
Add missing stream param to dictionary factory APIs (#16319)
Browse files Browse the repository at this point in the history
Add `stream` param to dictionary column factory functions. Partially solves #13744

Authors:
  - Jayjeet Chakraborty (https://github.com/JayjeetAtGithub)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Yunsong Wang (https://github.com/PointKernel)

URL: #16319
  • Loading branch information
JayjeetAtGithub authored Jul 22, 2024
1 parent 135c995 commit 3053f42
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 8 deletions.
13 changes: 9 additions & 4 deletions cpp/include/cudf/dictionary/dictionary_factories.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,17 @@ std::unique_ptr<column> make_dictionary_column(
* @param indices_column Indices to use for the new dictionary column.
* @param null_mask Null mask for the output column.
* @param null_count Number of nulls for the output column.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New dictionary column.
*/
std::unique_ptr<column> make_dictionary_column(std::unique_ptr<column> keys_column,
std::unique_ptr<column> indices_column,
rmm::device_buffer&& null_mask,
size_type null_count);
std::unique_ptr<column> make_dictionary_column(
std::unique_ptr<column> keys_column,
std::unique_ptr<column> indices_column,
rmm::device_buffer&& null_mask,
size_type null_count,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());

/**
* @brief Construct a dictionary column by taking ownership of the provided keys
Expand Down
13 changes: 9 additions & 4 deletions cpp/src/dictionary/dictionary_factories.cu
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ std::unique_ptr<column> make_dictionary_column(column_view const& keys_column,
std::unique_ptr<column> make_dictionary_column(std::unique_ptr<column> keys_column,
std::unique_ptr<column> indices_column,
rmm::device_buffer&& null_mask,
size_type null_count)
size_type null_count,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_EXPECTS(!keys_column->has_nulls(), "keys column must not have nulls");
CUDF_EXPECTS(!indices_column->has_nulls(), "indices column must not have nulls");
Expand All @@ -89,7 +91,7 @@ std::unique_ptr<column> make_dictionary_column(std::unique_ptr<column> keys_colu
children.emplace_back(std::move(keys_column));
return std::make_unique<column>(data_type{type_id::DICTIONARY32},
count,
rmm::device_buffer{},
rmm::device_buffer{0, stream, mr},
std::move(null_mask),
null_count,
std::move(children));
Expand Down Expand Up @@ -134,8 +136,11 @@ std::unique_ptr<column> make_dictionary_column(std::unique_ptr<column> keys,
auto indices_column = [&] {
// If the types match, then just commandeer the column's data buffer.
if (new_type.id() == indices_type) {
return std::make_unique<column>(
new_type, indices_size, std::move(*(contents.data.release())), rmm::device_buffer{}, 0);
return std::make_unique<column>(new_type,
indices_size,
std::move(*(contents.data.release())),
rmm::device_buffer{0, stream, mr},
0);
}
// If the new type does not match, then convert the data.
cudf::column_view cast_view{
Expand Down
46 changes: 46 additions & 0 deletions cpp/tests/streams/dictionary_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,52 @@

class DictionaryTest : public cudf::test::BaseFixture {};

TEST_F(DictionaryTest, FactoryColumnViews)
{
cudf::test::strings_column_wrapper keys({"aaa", "ccc", "ddd", "www"});
cudf::test::fixed_width_column_wrapper<uint8_t> values{2, 0, 3, 1, 2, 2, 2, 3, 0};

auto dictionary = cudf::make_dictionary_column(keys, values, cudf::test::get_default_stream());
cudf::dictionary_column_view view(dictionary->view());

CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.keys(), keys);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.indices(), values);
}

TEST_F(DictionaryTest, FactoryColumns)
{
std::vector<std::string> h_keys{"aaa", "ccc", "ddd", "www"};
cudf::test::strings_column_wrapper keys(h_keys.begin(), h_keys.end());
std::vector<uint8_t> h_values{2, 0, 3, 1, 2, 2, 2, 3, 0};
cudf::test::fixed_width_column_wrapper<uint8_t> values(h_values.begin(), h_values.end());

auto dictionary = cudf::make_dictionary_column(
keys.release(), values.release(), cudf::test::get_default_stream());
cudf::dictionary_column_view view(dictionary->view());

cudf::test::strings_column_wrapper keys_expected(h_keys.begin(), h_keys.end());
cudf::test::fixed_width_column_wrapper<uint8_t> values_expected(h_values.begin(), h_values.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.keys(), keys_expected);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.indices(), values_expected);
}

TEST_F(DictionaryTest, FactoryColumnsNullMaskCount)
{
std::vector<std::string> h_keys{"aaa", "ccc", "ddd", "www"};
cudf::test::strings_column_wrapper keys(h_keys.begin(), h_keys.end());
std::vector<uint8_t> h_values{2, 0, 3, 1, 2, 2, 2, 3, 0};
cudf::test::fixed_width_column_wrapper<uint8_t> values(h_values.begin(), h_values.end());

auto dictionary = cudf::make_dictionary_column(
keys.release(), values.release(), rmm::device_buffer{}, 0, cudf::test::get_default_stream());
cudf::dictionary_column_view view(dictionary->view());

cudf::test::strings_column_wrapper keys_expected(h_keys.begin(), h_keys.end());
cudf::test::fixed_width_column_wrapper<uint8_t> values_expected(h_values.begin(), h_values.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.keys(), keys_expected);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.indices(), values_expected);
}

TEST_F(DictionaryTest, Encode)
{
cudf::test::fixed_width_column_wrapper<int> col({1, 2, 3, 4, 5});
Expand Down

0 comments on commit 3053f42

Please sign in to comment.