Skip to content

Commit

Permalink
formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
shrshi committed Sep 23, 2024
1 parent 529e88e commit 545359d
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 54 deletions.
77 changes: 40 additions & 37 deletions cpp/src/io/json/column_tree_construction.cu
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ using row_offset_t = size_type;

// debug printing
#ifndef CSR_DEBUG_PRINT
//#define CSR_DEBUG_PRINT
// #define CSR_DEBUG_PRINT
#endif

#ifdef CSR_DEBUG_PRINT
Expand Down Expand Up @@ -143,13 +143,13 @@ std::tuple<compressed_sparse_row, column_tree_properties> reduce_to_column_tree(

auto [unpermuted_tree, unpermuted_col_ids, unpermuted_max_row_offsets] =
cudf::io::json::detail::reduce_to_column_tree(node_tree,
original_col_ids,
sorted_col_ids,
ordered_node_ids,
row_offsets,
is_array_of_arrays,
row_array_parent_col_id,
stream);
original_col_ids,
sorted_col_ids,
ordered_node_ids,
row_offsets,
is_array_of_arrays,
row_array_parent_col_id,
stream);

NodeIndexT num_columns = unpermuted_col_ids.size();

Expand All @@ -158,38 +158,41 @@ std::tuple<compressed_sparse_row, column_tree_properties> reduce_to_column_tree(
rmm::device_uvector<NodeIndexT> rev_mapped_col_ids(num_columns, stream);
rmm::device_uvector<NodeIndexT> reordering_index(unpermuted_col_ids.size(), stream);

thrust::sequence(rmm::exec_policy_nosync(stream),
reordering_index.begin(),
reordering_index.end());
thrust::sequence(
rmm::exec_policy_nosync(stream), reordering_index.begin(), reordering_index.end());
// Reorder nodes and column ids in level-wise fashion
thrust::sort_by_key(rmm::exec_policy_nosync(stream),
reordering_index.begin(),
reordering_index.end(),
mapped_col_ids.begin(),
level_ordering{unpermuted_tree.node_levels, unpermuted_col_ids, unpermuted_tree.parent_node_ids});
thrust::sort_by_key(
rmm::exec_policy_nosync(stream),
reordering_index.begin(),
reordering_index.end(),
mapped_col_ids.begin(),
level_ordering{
unpermuted_tree.node_levels, unpermuted_col_ids, unpermuted_tree.parent_node_ids});

{
auto mapped_col_ids_copy = cudf::detail::make_device_uvector_async(
mapped_col_ids, stream, cudf::get_current_device_resource_ref());
thrust::sequence(
rmm::exec_policy_nosync(stream), rev_mapped_col_ids.begin(), rev_mapped_col_ids.end());
thrust::sort_by_key(rmm::exec_policy_nosync(stream),
mapped_col_ids_copy.begin(),
mapped_col_ids_copy.end(),
rev_mapped_col_ids.begin());
mapped_col_ids, stream, cudf::get_current_device_resource_ref());
thrust::sequence(
rmm::exec_policy_nosync(stream), rev_mapped_col_ids.begin(), rev_mapped_col_ids.end());
thrust::sort_by_key(rmm::exec_policy_nosync(stream),
mapped_col_ids_copy.begin(),
mapped_col_ids_copy.end(),
rev_mapped_col_ids.begin());
}

rmm::device_uvector<NodeIndexT> parent_col_ids(num_columns, stream);
thrust::transform_output_iterator parent_col_ids_it(
parent_col_ids.begin(), parent_nodeids_to_colids{rev_mapped_col_ids});
thrust::transform_output_iterator parent_col_ids_it(parent_col_ids.begin(),
parent_nodeids_to_colids{rev_mapped_col_ids});
rmm::device_uvector<row_offset_t> max_row_offsets(num_columns, stream);
rmm::device_uvector<NodeT> column_categories(num_columns, stream);
thrust::copy_n(
rmm::exec_policy_nosync(stream),
thrust::make_zip_iterator(
thrust::make_permutation_iterator(unpermuted_tree.parent_node_ids.begin(), reordering_index.begin()),
thrust::make_permutation_iterator(unpermuted_max_row_offsets.begin(), reordering_index.begin()),
thrust::make_permutation_iterator(unpermuted_tree.node_categories.begin(), reordering_index.begin())),
thrust::make_zip_iterator(thrust::make_permutation_iterator(
unpermuted_tree.parent_node_ids.begin(), reordering_index.begin()),
thrust::make_permutation_iterator(unpermuted_max_row_offsets.begin(),
reordering_index.begin()),
thrust::make_permutation_iterator(
unpermuted_tree.node_categories.begin(), reordering_index.begin())),
num_columns,
thrust::make_zip_iterator(
parent_col_ids_it, max_row_offsets.begin(), column_categories.begin()));
Expand Down Expand Up @@ -256,8 +259,8 @@ std::tuple<compressed_sparse_row, column_tree_properties> reduce_to_column_tree(
};

auto construct_colidx = [&stream](NodeIndexT num_columns,
device_span<NodeIndexT const> parent_col_ids,
device_span<NodeIndexT const> rowidx) {
device_span<NodeIndexT const> parent_col_ids,
device_span<NodeIndexT const> rowidx) {
rmm::device_uvector<NodeIndexT> colidx((num_columns - 1) * 2, stream);
thrust::fill(rmm::exec_policy_nosync(stream), colidx.begin(), colidx.end(), -1);
// excluding root node, construct scatter map
Expand Down Expand Up @@ -289,12 +292,12 @@ std::tuple<compressed_sparse_row, column_tree_properties> reduce_to_column_tree(
print<NodeIndexT>(colidx, "h_colidx", stream);
#endif

// Skip the parent of root node
thrust::scatter(rmm::exec_policy_nosync(stream),
parent_col_ids.begin() + 1,
parent_col_ids.end(),
rowidx.begin() + 1,
colidx.begin());
// Skip the parent of root node
thrust::scatter(rmm::exec_policy_nosync(stream),
parent_col_ids.begin() + 1,
parent_col_ids.end(),
rowidx.begin() + 1,
colidx.begin());

return colidx;
};
Expand Down
10 changes: 6 additions & 4 deletions cpp/src/io/json/json_column.cu
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ namespace cudf::io::json::detail {
#define CSR_DEBUG_EQ
#endif
#ifndef CSR_DEBUG_PRINT
//#define CSR_DEBUG_PRINT
// #define CSR_DEBUG_PRINT
#endif

#ifdef CSR_DEBUG_PRINT
Expand Down Expand Up @@ -137,7 +137,7 @@ reduce_to_column_tree(tree_meta_t& tree,
rmm::cuda_stream_view stream)
{
CUDF_FUNC_RANGE();

// 1. column count for allocation
auto const num_columns = thrust::unique_count(
rmm::exec_policy_nosync(stream), sorted_col_ids.begin(), sorted_col_ids.end());
Expand Down Expand Up @@ -205,8 +205,10 @@ reduce_to_column_tree(tree_meta_t& tree,
thrust::make_permutation_iterator(tree.node_range_begin.begin(), unique_node_ids.begin()),
thrust::make_permutation_iterator(tree.node_range_end.begin(), unique_node_ids.begin())),
unique_node_ids.size(),
thrust::make_zip_iterator(
column_levels.begin(), parent_col_ids.begin(), col_range_begin.begin(), col_range_end.begin()));
thrust::make_zip_iterator(column_levels.begin(),
parent_col_ids.begin(),
col_range_begin.begin(),
col_range_end.begin()));

// convert parent_node_ids to parent_col_ids
thrust::transform(
Expand Down
44 changes: 31 additions & 13 deletions cpp/tests/io/json/json_tree_csr.cu
Original file line number Diff line number Diff line change
Expand Up @@ -92,26 +92,44 @@ bool check_equality(cuio_json::tree_meta_t& d_a,

auto num_nodes = a.parent_node_ids.size();
if (num_nodes > 1) {
if (b.rowidx.size() != num_nodes + 1) { std::cout << "1\n"; return false; }
if (b.rowidx.size() != num_nodes + 1) {
std::cout << "1\n";
return false;
}

for (auto pos = b.rowidx[0]; pos < b.rowidx[1]; pos++) {
auto v = b.colidx[pos];
if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) { std::cout << "2\n"; return false; }
if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) {
std::cout << "2\n";
return false;
}
}
for (size_t u = 1; u < num_nodes; u++) {
auto v = b.colidx[b.rowidx[u]];
if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) { std::cout << "3\n"; return false; }
if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) {
std::cout << "3\n";
return false;
}

for (auto pos = b.rowidx[u] + 1; pos < b.rowidx[u + 1]; pos++) {
v = b.colidx[pos];
if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) { std::cout << "4\n"; return false; }
if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) {
std::cout << "4\n";
return false;
}
}
}
for (size_t u = 0; u < num_nodes; u++) {
if (a.node_categories[b.column_ids[u]] != b.categories[u]) { std::cout << "5\n"; return false; }
if (a.node_categories[b.column_ids[u]] != b.categories[u]) {
std::cout << "5\n";
return false;
}
}
for (size_t u = 0; u < num_nodes; u++) {
if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { std::cout << "6\n"; return false; }
if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) {
std::cout << "6\n";
return false;
}
}
} else if (num_nodes == 1) {
if (b.rowidx.size() != num_nodes + 1) { return false; }
Expand Down Expand Up @@ -211,13 +229,13 @@ void run_test(std::string const& input, bool enable_lines = true)

auto [d_column_tree_csr, d_column_tree_properties] =
cudf::io::json::experimental::detail::reduce_to_column_tree(gpu_tree,
gpu_col_id,
sorted_col_ids,
node_ids,
gpu_row_offsets,
is_array_of_arrays,
row_array_parent_col_id,
stream);
gpu_col_id,
sorted_col_ids,
node_ids,
gpu_row_offsets,
is_array_of_arrays,
row_array_parent_col_id,
stream);

auto iseq = check_equality(
d_column_tree, d_max_row_offsets, d_column_tree_csr, d_column_tree_properties, stream);
Expand Down

0 comments on commit 545359d

Please sign in to comment.