Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions pygmt/clib/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -1747,6 +1747,8 @@ def virtualfile_to_dataset(
vfname: str,
output_type: Literal["pandas", "numpy", "file"] = "pandas",
column_names: list[str] | None = None,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In Session.virtualfile_to_dataset, do we want to rename column_names to names?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in f211b7d.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I kinda prefer column_names, we don't necessarily need to follow pd.read_csv here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean using column_names in both GMT_DATASET.to_dataframe and Session.virtualfile_to_dataset?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, i.e. revert f211b7d

dtype: type | dict[str, type] | None = None,
index_col: str | int | None = None,
) -> pd.DataFrame | np.ndarray | None:
"""
Output a tabular dataset stored in a virtual file to a different format.
Expand All @@ -1766,6 +1768,11 @@ def virtualfile_to_dataset(
- ``"file"`` means the result was saved to a file and will return ``None``.
column_names
The column names for the :class:`pandas.DataFrame` output.
dtype
Data type for the columns of the :class:`pandas.DataFrame` output. Can be a
single type for all columns or a dictionary mapping column names to types.
index_col
Column to set as the index of the :class:`pandas.DataFrame` output.

Returns
-------
Expand Down Expand Up @@ -1854,13 +1861,13 @@ def virtualfile_to_dataset(
return None

# Read the virtual file as a GMT dataset and convert to pandas.DataFrame
result = self.read_virtualfile(vfname, kind="dataset").contents.to_dataframe()
result = self.read_virtualfile(vfname, kind="dataset").contents.to_dataframe(
column_names=column_names,
dtype=dtype,
index_col=index_col,
)
if output_type == "numpy": # numpy.ndarray output
return result.to_numpy()

# Assign column names
if column_names is not None:
result.columns = column_names
return result # pandas.DataFrame output

def extract_region(self):
Expand Down
25 changes: 23 additions & 2 deletions pygmt/datatypes/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,14 +143,29 @@ class _GMT_DATASEGMENT(ctp.Structure): # noqa: N801
("hidden", ctp.c_void_p),
]

def to_dataframe(self) -> pd.DataFrame:
def to_dataframe(
self,
column_names: list[str] | None = None,
dtype: type | dict[str, type] | None = None,
index_col: str | int | None = None,
) -> pd.DataFrame:
"""
Convert a _GMT_DATASET object to a :class:`pandas.DataFrame` object.

Currently, the number of columns in all segments of all tables are assumed to be
the same. The same column in all segments of all tables are concatenated. The
trailing text column is also concatenated as a single string column.

Parameters
----------
column_names
A list of column names.
dtype
Data type. Can be a single type for all columns or a dictionary mapping
column names to types.
index_col
Column to set as index.

Returns
-------
df
Expand Down Expand Up @@ -211,5 +226,11 @@ def to_dataframe(self) -> pd.DataFrame:
pd.Series(data=np.char.decode(textvector), dtype=pd.StringDtype())
)

df = pd.concat(objs=vectors, axis=1)
df = pd.concat(objs=vectors, axis="columns")
if column_names is not None: # Assign column names
df.columns = column_names
if dtype is not None:
df = df.astype(dtype)
if index_col is not None:
df = df.set_index(index_col)
return df
18 changes: 7 additions & 11 deletions pygmt/src/grdhisteq.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,18 +238,14 @@ def compute_bins(
module="grdhisteq", args=build_arg_string(kwargs, infile=vingrd)
)

result = lib.virtualfile_to_dataset(
return lib.virtualfile_to_dataset(
vfname=vouttbl,
output_type=output_type,
column_names=["start", "stop", "bin_id"],
dtype={
"start": np.float32,
"stop": np.float32,
"bin_id": np.uint32,
},
index_col="bin_id" if output_type == "pandas" else None,
)
if output_type == "pandas":
result = result.astype(
{
"start": np.float32,
"stop": np.float32,
"bin_id": np.uint32,
}
)
return result.set_index("bin_id")
return result