Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 15 additions & 8 deletions pygmt/clib/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -1746,7 +1746,9 @@ def virtualfile_to_dataset(
self,
vfname: str,
output_type: Literal["pandas", "numpy", "file"] = "pandas",
column_names: list[str] | None = None,
names: list[str] | None = None,
dtype: type | dict[str, type] | None = None,
index_col: str | int | None = None,
) -> pd.DataFrame | np.ndarray | None:
"""
Output a tabular dataset stored in a virtual file to a different format.
Expand All @@ -1764,8 +1766,13 @@ def virtualfile_to_dataset(
- ``"pandas"`` will return a :class:`pandas.DataFrame` object.
- ``"numpy"`` will return a :class:`numpy.ndarray` object.
- ``"file"`` means the result was saved to a file and will return ``None``.
column_names
names
The column names for the :class:`pandas.DataFrame` output.
dtype
Data type for the columns of the :class:`pandas.DataFrame` output. Can be a
single type for all columns or a dictionary mapping column names to types.
index_col
Column to set as the index of the :class:`pandas.DataFrame` output.

Returns
-------
Expand Down Expand Up @@ -1829,7 +1836,7 @@ def virtualfile_to_dataset(
... outpd2 = lib.virtualfile_to_dataset(
... vfname=vouttbl,
... output_type="pandas",
... column_names=["col1", "col2", "col3", "coltext"],
... names=["col1", "col2", "col3", "coltext"],
... )
... assert isinstance(outpd2, pd.DataFrame)
>>> outnp
Expand All @@ -1854,13 +1861,13 @@ def virtualfile_to_dataset(
return None

# Read the virtual file as a GMT dataset and convert to pandas.DataFrame
result = self.read_virtualfile(vfname, kind="dataset").contents.to_dataframe()
result = self.read_virtualfile(vfname, kind="dataset").contents.to_dataframe(
names=names,
dtype=dtype,
index_col=index_col,
)
if output_type == "numpy": # numpy.ndarray output
return result.to_numpy()

# Assign column names
if column_names is not None:
result.columns = column_names
return result # pandas.DataFrame output

def extract_region(self):
Expand Down
25 changes: 23 additions & 2 deletions pygmt/datatypes/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,14 +143,29 @@ class _GMT_DATASEGMENT(ctp.Structure): # noqa: N801
("hidden", ctp.c_void_p),
]

def to_dataframe(self) -> pd.DataFrame:
def to_dataframe(
self,
names: list[str] | None = None,
dtype: type | dict[str, type] | None = None,
index_col: str | int | None = None,
) -> pd.DataFrame:
"""
Convert a _GMT_DATASET object to a :class:`pandas.DataFrame` object.

Currently, the number of columns in all segments of all tables are assumed to be
the same. The same column in all segments of all tables are concatenated. The
trailing text column is also concatenated as a single string column.

Parameters
----------
names
A list of column names.
dtype
Data type. Can be a single type for all columns or a dictionary mapping
column names to types.
index_col
Column to set as index.

Returns
-------
df
Expand Down Expand Up @@ -211,5 +226,11 @@ def to_dataframe(self) -> pd.DataFrame:
pd.Series(data=np.char.decode(textvector), dtype=pd.StringDtype())
)

df = pd.concat(objs=vectors, axis=1)
df = pd.concat(objs=vectors, axis="columns")
if names is not None: # Assigne column names
df.columns = names
if dtype is not None:
df = df.astype(dtype)
if index_col is not None:
df = df.set_index(index_col)
return df
2 changes: 1 addition & 1 deletion pygmt/src/blockm.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def _blockm(
args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl),
)
return lib.virtualfile_to_dataset(
vfname=vouttbl, output_type=output_type, column_names=column_names
vfname=vouttbl, output_type=output_type, names=column_names
)


Expand Down
2 changes: 1 addition & 1 deletion pygmt/src/grd2xyz.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,5 +168,5 @@ def grd2xyz(
args=build_arg_string(kwargs, infile=vingrd, outfile=vouttbl),
)
return lib.virtualfile_to_dataset(
vfname=vouttbl, output_type=output_type, column_names=column_names
vfname=vouttbl, output_type=output_type, names=column_names
)
20 changes: 8 additions & 12 deletions pygmt/src/grdhisteq.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,18 +238,14 @@ def compute_bins(
module="grdhisteq", args=build_arg_string(kwargs, infile=vingrd)
)

result = lib.virtualfile_to_dataset(
return lib.virtualfile_to_dataset(
vfname=vouttbl,
output_type=output_type,
column_names=["start", "stop", "bin_id"],
names=["start", "stop", "bin_id"],
dtype={
"start": np.float32,
"stop": np.float32,
"bin_id": np.uint32,
},
index_col="bin_id" if output_type == "pandas" else None,
)
if output_type == "pandas":
result = result.astype(
{
"start": np.float32,
"stop": np.float32,
"bin_id": np.uint32,
}
)
return result.set_index("bin_id")
return result
2 changes: 1 addition & 1 deletion pygmt/src/grdtrack.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,5 +320,5 @@ def grdtrack(
return lib.virtualfile_to_dataset(
vfname=vouttbl,
output_type=output_type,
column_names=column_names,
names=column_names,
)
2 changes: 1 addition & 1 deletion pygmt/src/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,5 +258,5 @@ def project(
return lib.virtualfile_to_dataset(
vfname=vouttbl,
output_type=output_type,
column_names=column_names,
names=column_names,
)
2 changes: 1 addition & 1 deletion pygmt/src/select.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,5 +223,5 @@ def select(
return lib.virtualfile_to_dataset(
vfname=vouttbl,
output_type=output_type,
column_names=column_names,
names=column_names,
)