Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -181,7 +181,9 @@ def _write_samples_to_parquet_csr(
[
cudf.Series(minors_array[results_start:results_end], name="minors"),
cudf.Series(
renumber_map.map.values[renumber_map_start:renumber_map_end],
renumber_map.renumber_map.values[
renumber_map_start:renumber_map_end
],
name="map",
),
label_hop_offsets_current_partition,
Expand Down Expand Up @@ -299,7 +301,7 @@ def _write_samples_to_parquet_coo(
else:
renumber_map_end_ix = offsets_z.renumber_map_offsets.iloc[0]

renumber_map_p = renumber_map.map.iloc[
renumber_map_p = renumber_map.renumber_map.iloc[
renumber_map_start_ix:renumber_map_end_ix
]

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -120,9 +120,9 @@ def sample_neighbors(
return self._get_edgeid_type_d(sampled_df)
else:
return (
sampled_df[src_n].values,
sampled_df[dst_n].values,
sampled_df["indices"].values,
sampled_df[src_n].astype("float").values,
sampled_df[dst_n].astype("float").values,
sampled_df["indices"].astype("float").values,
)

def _get_edgeid_type_d(self, df):
Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/sampling/sampling_utilities.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -74,7 +74,7 @@ def sampling_results_from_cupy_array_dict(
if renumber:
renumber_df = cudf.DataFrame(
{
"map": cupy_array_dict["renumber_map"],
"renumber_map": cupy_array_dict["renumber_map"],
}
)

Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -222,7 +222,7 @@ def test_bulk_sampler_partitions(scratch_dir):
]

recovered_samples = cudf.read_parquet(os.path.join(samples_path, file))
recovered_map = recovered_samples.map
recovered_map = recovered_samples["map"]
recovered_samples = recovered_samples.drop("map", axis=1).dropna()

for current_batch_id in range(start_batch_id, end_batch_id + 1):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -166,7 +166,7 @@ def test_bulk_sampler_partitions(dask_client, scratch_dir, mg_input):
]

recovered_samples = cudf.read_parquet(os.path.join(samples_path, file))
recovered_map = recovered_samples.map
recovered_map = recovered_samples["map"]
recovered_samples = recovered_samples.drop("map", axis=1).dropna()

for current_batch_id in range(start_batch_id, end_batch_id + 1):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -796,7 +796,9 @@ def test_uniform_neighbor_sample_renumber(hops):
expected_renumber_map = cudf.concat([sources_hop_0, destinations_hop]).unique()

assert sorted(expected_renumber_map.values_host.tolist()) == sorted(
renumber_map.map[0 : len(expected_renumber_map)].values_host.tolist()
renumber_map.renumber_map[
0 : len(expected_renumber_map)
].values_host.tolist()
)
assert (renumber_map.batch_id == 0).all()

Expand Down Expand Up @@ -854,7 +856,9 @@ def test_uniform_neighbor_sample_offset_renumber(hops):
expected_renumber_map = cudf.concat([sources_hop_0, destinations_hop]).unique()

assert sorted(expected_renumber_map.values_host.tolist()) == sorted(
renumber_map.map[0 : len(expected_renumber_map)].values_host.tolist()
renumber_map.renumber_map[
0 : len(expected_renumber_map)
].values_host.tolist()
)

renumber_map_offsets = offsets_renumbered.renumber_map_offsets.dropna()
Expand Down Expand Up @@ -902,8 +906,8 @@ def test_uniform_neighbor_sample_csr_csc_global(hops, seed):
minors = sampling_results["minors"].dropna()
assert len(majors) == len(minors)

majors = renumber_map.map.iloc[majors]
minors = renumber_map.map.iloc[minors]
majors = renumber_map.renumber_map.iloc[majors]
minors = renumber_map.renumber_map.iloc[minors]

for i in range(len(majors)):
assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])])
Expand Down Expand Up @@ -952,8 +956,8 @@ def test_uniform_neighbor_sample_csr_csc_local(hops, seed):
majors = cudf.Series(cupy.arange(len(major_offsets) - 1))
majors = majors.repeat(cupy.diff(major_offsets))

majors = renumber_map.map.iloc[majors]
minors = renumber_map.map.iloc[minors]
majors = renumber_map.renumber_map.iloc[majors]
minors = renumber_map.renumber_map.iloc[minors]

for i in range(len(majors)):
assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1015,7 +1015,7 @@ def test_uniform_neighbor_sample_renumber(dask_client, hops):

assert (renumber_map.batch_id == 0).all()
assert (
renumber_map.map.nunique()
renumber_map.renumber_map.nunique()
== cudf.concat(
[sources_hop_0, sampling_results_renumbered.destinations]
).nunique()
Expand Down Expand Up @@ -1091,7 +1091,9 @@ def test_uniform_neighbor_sample_offset_renumber(dask_client, hops):
expected_renumber_map = cudf.concat([sources_hop_0, destinations_hop]).unique()

assert sorted(expected_renumber_map.values_host.tolist()) == sorted(
renumber_map.map[0 : len(expected_renumber_map)].values_host.tolist()
renumber_map.renumber_map[
0 : len(expected_renumber_map)
].values_host.tolist()
)

renumber_map_offsets = offsets_renumbered.renumber_map_offsets.dropna()
Expand Down Expand Up @@ -1153,8 +1155,8 @@ def test_uniform_neighbor_sample_csr_csc_global(dask_client, hops, seed):
minors = sampling_results["minors"].dropna()
assert len(majors) == len(minors)

majors = renumber_map.map.iloc[majors]
minors = renumber_map.map.iloc[minors]
majors = renumber_map.renumber_map.iloc[majors]
minors = renumber_map.renumber_map.iloc[minors]

for i in range(len(majors)):
assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])])
Expand Down Expand Up @@ -1221,8 +1223,8 @@ def test_uniform_neighbor_sample_csr_csc_local(dask_client, hops, seed):
majors = cudf.Series(cupy.arange(len(major_offsets) - 1))
majors = majors.repeat(cupy.diff(major_offsets))

majors = renumber_map.map.iloc[majors]
minors = renumber_map.map.iloc[minors]
majors = renumber_map.renumber_map.iloc[majors]
minors = renumber_map.renumber_map.iloc[minors]

for i in range(len(majors)):
assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])])
Expand Down