-
Notifications
You must be signed in to change notification settings - Fork 234
clib.Session.virtualfile_from_vectors: Now takes a sequence of vectors as its single argument (Passing multiple arguments will be unsupported in v0.16.0) #3522
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
29bd3f8
f5b0e8e
361d837
f786b8e
3893211
b7b87c6
6481df2
089ef3f
1a43e38
697d66b
7ef4e01
b2032c8
c41806e
61f323c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1329,37 +1329,36 @@ def open_virtual_file(self, family, geometry, direction, data): | |
| return self.open_virtualfile(family, geometry, direction, data) | ||
|
|
||
| @contextlib.contextmanager | ||
| def virtualfile_from_vectors(self, *vectors): | ||
| def virtualfile_from_vectors( | ||
| self, vectors: Sequence, *args | ||
| ) -> Generator[str, None, None]: | ||
| """ | ||
| Store 1-D arrays as columns of a table inside a virtual file. | ||
| Store a sequence of 1-D vectors as columns of a dataset inside a virtual file. | ||
|
|
||
| Use the virtual file name to pass in the data in your vectors to a GMT | ||
| module. | ||
| Use the virtual file name to pass the dataset with your vectors to a GMT module. | ||
|
|
||
| Context manager (use in a ``with`` block). Yields the virtual file name | ||
| that you can pass as an argument to a GMT module call. Closes the | ||
| virtual file upon exit of the ``with`` block. | ||
| Context manager (use in a ``with`` block). Yields the virtual file name that you | ||
| can pass as an argument to a GMT module call. Closes the virtual file upon exit | ||
| of the ``with`` block. | ||
|
|
||
| Use this instead of creating the data container and virtual file by | ||
| hand with :meth:`pygmt.clib.Session.create_data`, | ||
| :meth:`pygmt.clib.Session.put_vector`, and | ||
| :meth:`pygmt.clib.Session.open_virtualfile`. | ||
| Use this instead of creating the data container and virtual file by hand with | ||
| :meth:`pygmt.clib.Session.create_data`, :meth:`pygmt.clib.Session.put_vector`, | ||
| and :meth:`pygmt.clib.Session.open_virtualfile`. | ||
|
|
||
| If the arrays are C contiguous blocks of memory, they will be passed | ||
| without copying to GMT. If they are not (e.g., they are columns of a | ||
| 2-D array), they will need to be copied to a contiguous block. | ||
| If the arrays are C contiguous blocks of memory, they will be passed without | ||
| copying to GMT. If they are not (e.g., they are columns of a 2-D array), they | ||
| will need to be copied to a contiguous block. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| vectors : 1-D arrays | ||
| The vectors that will be included in the array. All must be of the | ||
| vectors | ||
| A sequence of vectors that will be stored in the dataset. All must be of the | ||
| same size. | ||
|
|
||
| Yields | ||
| ------ | ||
| fname : str | ||
| The name of virtual file. Pass this as a file name argument to a | ||
| GMT module. | ||
| fname | ||
| The name of virtual file. Pass this as a file name argument to a GMT module. | ||
|
|
||
| Examples | ||
| -------- | ||
|
|
@@ -1371,34 +1370,49 @@ def virtualfile_from_vectors(self, *vectors): | |
| >>> y = np.array([4, 5, 6]) | ||
| >>> z = pd.Series([7, 8, 9]) | ||
| >>> with Session() as ses: | ||
| ... with ses.virtualfile_from_vectors(x, y, z) as fin: | ||
| ... with ses.virtualfile_from_vectors((x, y, z)) as fin: | ||
| ... # Send the output to a file so that we can read it | ||
| ... with GMTTempFile() as fout: | ||
| ... ses.call_module("info", [fin, f"->{fout.name}"]) | ||
| ... print(fout.read().strip()) | ||
| <vector memory>: N = 3 <1/3> <4/6> <7/9> | ||
| """ | ||
| # Conversion to a C-contiguous array needs to be done here and not in | ||
| # put_vector or put_strings because we need to maintain a reference to | ||
| # the copy while it is being used by the C API. Otherwise, the array | ||
| # would be garbage collected and the memory freed. Creating it in this | ||
| # context manager guarantees that the copy will be around until the | ||
| # virtual file is closed. The conversion is implicit in | ||
| # "*args" is added in v0.14.0 for backward-compatibility with the deprecated | ||
| # syntax of passing multiple vectors as positional arguments. | ||
| # Remove it in v0.16.0. | ||
| if len(args) > 0: | ||
| msg = ( | ||
| "Passing multiple arguments to Session.virtualfile_from_vectors is " | ||
| "deprecated since v0.14.0 and will be unsupported in v0.16.0. " | ||
| "Put all vectors in a sequence (a tuple or a list) instead and pass " | ||
| "the sequnece as the single argument to this function. " | ||
| "e.g., use `with lib.virtualfile_from_vectors((x, y, z)) as vfile` " | ||
seisman marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| "instead of `with lib.virtualfile_from_vectors(x, y, z) as vfile`." | ||
| ) | ||
| warnings.warn(message=msg, category=FutureWarning, stacklevel=3) | ||
| vectors = (vectors, *args) | ||
|
|
||
| # Conversion to a C-contiguous array needs to be done here and not in put_vector | ||
| # or put_strings because we need to maintain a reference to the copy while it is | ||
| # being used by the C API. Otherwise, the array would be garbage collected and | ||
| # the memory freed. Creating it in this context manager guarantees that the copy | ||
| # will be around until the virtual file is closed. The conversion is implicit in | ||
| # vectors_to_arrays. | ||
| arrays = vectors_to_arrays(vectors) | ||
|
|
||
| columns = len(arrays) | ||
| # Find arrays that are of string dtype from column 3 onwards | ||
| # Assumes that first 2 columns contains coordinates like longitude | ||
| # latitude, or datetime string types. | ||
| # Find arrays that are of string dtype from column 3 onwards. Assumes that first | ||
| # 2 columns contains coordinates like longitude latitude, or datetime string | ||
seisman marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| # types. | ||
| for col, array in enumerate(arrays[2:]): | ||
| if pd.api.types.is_string_dtype(array.dtype): | ||
| columns = col + 2 | ||
| break | ||
|
|
||
| rows = len(arrays[0]) | ||
| if not all(len(i) == rows for i in arrays): | ||
| raise GMTInvalidInput("All arrays must have same size.") | ||
| msg = "All arrays must have same size." | ||
| raise GMTInvalidInput(msg) | ||
|
|
||
| family = "GMT_IS_DATASET|GMT_VIA_VECTOR" | ||
| geometry = "GMT_IS_POINT" | ||
|
|
@@ -1411,8 +1425,8 @@ def virtualfile_from_vectors(self, *vectors): | |
| for col, array in enumerate(arrays[:columns]): | ||
| self.put_vector(dataset, column=col, vector=array) | ||
|
|
||
| # Use put_strings for last column(s) with string type data | ||
| # Have to use modifier "GMT_IS_DUPLICATE" to duplicate the strings | ||
| # Use put_strings for last column(s) with string type data. | ||
| # Have to use modifier "GMT_IS_DUPLICATE" to duplicate the strings. | ||
| string_arrays = arrays[columns:] | ||
| if string_arrays: | ||
| if len(string_arrays) == 1: | ||
|
|
@@ -1691,7 +1705,7 @@ def virtualfile_from_stringio(self, stringio: io.StringIO): | |
| seg.header = None | ||
| seg.text = None | ||
|
|
||
| def virtualfile_in( # noqa: PLR0912 | ||
| def virtualfile_in( | ||
| self, | ||
| check_kind=None, | ||
| data=None, | ||
|
|
@@ -1790,19 +1804,18 @@ def virtualfile_in( # noqa: PLR0912 | |
| "vectors": self.virtualfile_from_vectors, | ||
| }[kind] | ||
|
|
||
| # Ensure the data is an iterable (Python list or tuple). | ||
| # "_data" is the data that will be passed to the _virtualfile_from function. | ||
| # "_data" defaults to "data" but should be adjusted for some cases. | ||
| _data = data | ||
| match kind: | ||
| case "arg" | "file" | "geojson" | "grid" | "image" | "stringio": | ||
|
Comment on lines
+1798
to
-1786
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe just put So that if there's a new
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We can't do it like this, because we're currently writing case statements like We can refactor the codes to: or I still prefer the current codes, which set
If there is a new
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah ok, that makes sense, let's keep |
||
| _data = (data,) | ||
| if kind == "image" and data.dtype != "uint8": | ||
| msg = ( | ||
| f"Input image has dtype: {data.dtype} which is unsupported, " | ||
| "and may result in an incorrect output. Please recast image " | ||
| "to a uint8 dtype and/or scale to 0-255 range, e.g. " | ||
| "using a histogram equalization function like " | ||
| "skimage.exposure.equalize_hist." | ||
| ) | ||
| warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2) | ||
| case "image" if data.dtype != "uint8": | ||
| msg = ( | ||
| f"Input image has dtype: {data.dtype} which is unsupported, and " | ||
| "may result in an incorrect output. Please recast image to a uint8 " | ||
| "dtype and/or scale to 0-255 range, e.g. using a histogram " | ||
| "equalization function like skimage.exposure.equalize_hist." | ||
| ) | ||
| warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2) | ||
| case "empty": # data is None, so data must be given via x/y/z. | ||
| _data = [x, y] | ||
| if z is not None: | ||
|
|
@@ -1817,19 +1830,17 @@ def virtualfile_in( # noqa: PLR0912 | |
| else: | ||
| # Python list, tuple, numpy.ndarray, and pandas.Series types | ||
| _data = np.atleast_2d(np.asanyarray(data).T) | ||
| case "matrix": | ||
| case "matrix" if data.dtype.kind not in "iuf": | ||
| # GMT can only accept a 2-D matrix which are signed integer (i), | ||
| # unsigned integer (u) or floating point (f) types. For other data | ||
| # types, we need to use virtualfile_from_vectors instead, which turns | ||
| # the matrix into a list of vectors and allows for better handling of | ||
| # non-integer/float type inputs (e.g. for string or datetime data types) | ||
| _data = (data,) | ||
| if data.dtype.kind not in "iuf": | ||
| _virtualfile_from = self.virtualfile_from_vectors | ||
| _data = data.T | ||
| _virtualfile_from = self.virtualfile_from_vectors | ||
| _data = data.T | ||
|
|
||
| # Finally create the virtualfile from the data, to be passed into GMT | ||
| file_context = _virtualfile_from(*_data) | ||
| file_context = _virtualfile_from(_data) | ||
| return file_context | ||
|
|
||
| def virtualfile_from_data( | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.