GenericMappingTools · seisman · Mar 27, 2024 · Mar 26, 2024 · Mar 26, 2024 · Mar 26, 2024
diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py
@@ -1746,7 +1746,9 @@ def virtualfile_to_dataset(
         self,
         vfname: str,
         output_type: Literal["pandas", "numpy", "file"] = "pandas",
-        column_names: list[str] | None = None,
+        names: list[str] | None = None,
+        dtype: type | dict[str, type] | None = None,
+        index_col: str | int | None = None,
     ) -> pd.DataFrame | np.ndarray | None:
         """
         Output a tabular dataset stored in a virtual file to a different format.
@@ -1764,8 +1766,13 @@ def virtualfile_to_dataset(
             - ``"pandas"`` will return a :class:`pandas.DataFrame` object.
             - ``"numpy"`` will return a :class:`numpy.ndarray` object.
             - ``"file"`` means the result was saved to a file and will return ``None``.
-        column_names
+        names
             The column names for the :class:`pandas.DataFrame` output.
+        dtype
+            Data type for the columns of the :class:`pandas.DataFrame` output. Can be a
+            single type for all columns or a dictionary mapping column names to types.
+        index_col
+            Column to set as the index of the :class:`pandas.DataFrame` output.
 
         Returns
         -------
@@ -1829,7 +1836,7 @@ def virtualfile_to_dataset(
         ...             outpd2 = lib.virtualfile_to_dataset(
         ...                 vfname=vouttbl,
         ...                 output_type="pandas",
-        ...                 column_names=["col1", "col2", "col3", "coltext"],
+        ...                 names=["col1", "col2", "col3", "coltext"],
         ...             )
         ...     assert isinstance(outpd2, pd.DataFrame)
         >>> outnp
@@ -1854,13 +1861,13 @@ def virtualfile_to_dataset(
             return None
 
         # Read the virtual file as a GMT dataset and convert to pandas.DataFrame
-        result = self.read_virtualfile(vfname, kind="dataset").contents.to_dataframe()
+        result = self.read_virtualfile(vfname, kind="dataset").contents.to_dataframe(
+            names=names,
+            dtype=dtype,
+            index_col=index_col,
+        )
         if output_type == "numpy":  # numpy.ndarray output
             return result.to_numpy()
-
-        # Assign column names
-        if column_names is not None:
-            result.columns = column_names
         return result  # pandas.DataFrame output
 
     def extract_region(self):

diff --git a/pygmt/datatypes/dataset.py b/pygmt/datatypes/dataset.py
@@ -143,14 +143,29 @@ class _GMT_DATASEGMENT(ctp.Structure):  # noqa: N801
         ("hidden", ctp.c_void_p),
     ]
 
-    def to_dataframe(self) -> pd.DataFrame:
+    def to_dataframe(
+        self,
+        names: list[str] | None = None,
+        dtype: type | dict[str, type] | None = None,
+        index_col: str | int | None = None,
+    ) -> pd.DataFrame:
         """
         Convert a _GMT_DATASET object to a :class:`pandas.DataFrame` object.
 
         Currently, the number of columns in all segments of all tables are assumed to be
         the same. The same column in all segments of all tables are concatenated. The
         trailing text column is also concatenated as a single string column.
 
+        Parameters
+        ----------
+        names
+            A list of column names.
+        dtype
+            Data type. Can be a single type for all columns or a dictionary mapping
+            column names to types.
+        index_col
+            Column to set as index.
+
         Returns
         -------
         df
@@ -211,5 +226,11 @@ def to_dataframe(self) -> pd.DataFrame:
                 pd.Series(data=np.char.decode(textvector), dtype=pd.StringDtype())
             )
 
-        df = pd.concat(objs=vectors, axis=1)
+        df = pd.concat(objs=vectors, axis="columns")
+        if names is not None:  # Assigne column names
+            df.columns = names
+        if dtype is not None:
+            df = df.astype(dtype)
+        if index_col is not None:
+            df = df.set_index(index_col)
         return df
diff --git a/pygmt/src/blockm.py b/pygmt/src/blockm.py
@@ -64,7 +64,7 @@ def _blockm(
                 args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl),
             )
             return lib.virtualfile_to_dataset(
-                vfname=vouttbl, output_type=output_type, column_names=column_names
+                vfname=vouttbl, output_type=output_type, names=column_names
             )
 
 

diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py
@@ -168,5 +168,5 @@ def grd2xyz(
                 args=build_arg_string(kwargs, infile=vingrd, outfile=vouttbl),
             )
             return lib.virtualfile_to_dataset(
-                vfname=vouttbl, output_type=output_type, column_names=column_names
+                vfname=vouttbl, output_type=output_type, names=column_names
             )
diff --git a/pygmt/src/grdhisteq.py b/pygmt/src/grdhisteq.py
@@ -238,18 +238,14 @@ def compute_bins(
                     module="grdhisteq", args=build_arg_string(kwargs, infile=vingrd)
                 )
 
-            result = lib.virtualfile_to_dataset(
+            return lib.virtualfile_to_dataset(
                 vfname=vouttbl,
                 output_type=output_type,
-                column_names=["start", "stop", "bin_id"],
+                names=["start", "stop", "bin_id"],
+                dtype={
+                    "start": np.float32,
+                    "stop": np.float32,
+                    "bin_id": np.uint32,
+                },
+                index_col="bin_id" if output_type == "pandas" else None,
             )
-            if output_type == "pandas":
-                result = result.astype(
-                    {
-                        "start": np.float32,
-                        "stop": np.float32,
-                        "bin_id": np.uint32,
-                    }
-                )
-                return result.set_index("bin_id")
-            return result
diff --git a/pygmt/src/grdtrack.py b/pygmt/src/grdtrack.py
@@ -320,5 +320,5 @@ def grdtrack(
         return lib.virtualfile_to_dataset(
             vfname=vouttbl,
             output_type=output_type,
-            column_names=column_names,
+            names=column_names,
         )
diff --git a/pygmt/src/project.py b/pygmt/src/project.py
@@ -258,5 +258,5 @@ def project(
         return lib.virtualfile_to_dataset(
             vfname=vouttbl,
             output_type=output_type,
-            column_names=column_names,
+            names=column_names,
         )
diff --git a/pygmt/src/select.py b/pygmt/src/select.py
@@ -223,5 +223,5 @@ def select(
         return lib.virtualfile_to_dataset(
             vfname=vouttbl,
             output_type=output_type,
-            column_names=column_names,
+            names=column_names,
         )