88
99import difflib
1010import warnings
11+ from collections .abc import Iterable
1112from json import JSONDecodeError
1213from pathlib import Path
1314from urllib .parse import quote
2122from astroquery import log
2223from astroquery .utils import commons , async_to_sync
2324from astroquery .utils .class_or_instance import class_or_instance
24- from astroquery .exceptions import InvalidQueryError , MaxResultsWarning , NoResultsWarning
25+ from astroquery .exceptions import InputWarning , InvalidQueryError , MaxResultsWarning , NoResultsWarning
2526
2627from astroquery .mast import utils
2728from astroquery .mast .core import MastQueryWithLogin
@@ -43,7 +44,7 @@ class MastMissionsClass(MastQueryWithLogin):
4344 _list_products = 'post_list_products'
4445
4546 # Workaround so that observation_id is returned in ULLYSES queries that do not specify columns
46- _default_ullyses_cols = ['target_name_ulysses ' , 'target_classification' , 'targ_ra' , 'targ_dec' , 'host_galaxy_name' ,
47+ _default_ullyses_cols = ['target_name_ullyses ' , 'target_classification' , 'targ_ra' , 'targ_dec' , 'host_galaxy_name' ,
4748 'spectral_type' , 'bmv0_mag' , 'u_mag' , 'b_mag' , 'v_mag' , 'gaia_g_mean_mag' , 'star_mass' ,
4849 'instrument' , 'grating' , 'filter' , 'observation_id' ]
4950
@@ -197,6 +198,71 @@ def _build_params_from_criteria(self, params, **criteria):
197198 value = [value ]
198199 params [prop ] = value
199200
201+ def _parse_select_cols (self , select_cols ):
202+ """
203+ Parse the select_cols parameter to ensure it is in the correct format.
204+
205+ Parameters
206+ ----------
207+ select_cols : iterable or str or None
208+ The select_cols parameter to parse.
209+
210+ Returns
211+ -------
212+ list
213+ A list of column names to select.
214+
215+ Raises
216+ ------
217+ InvalidQueryError
218+ If select_cols is not an iterable of strings, a comma-separated string, 'all', or '*'.
219+ If any individual column name is not a string.
220+ """
221+ if select_cols is None :
222+ if self .mission == 'ullyses' :
223+ select_cols = self ._default_ullyses_cols
224+ return select_cols
225+
226+ # Handle special string cases first
227+ all_columns = self .get_column_list ()['name' ].value .tolist ()
228+ if isinstance (select_cols , str ):
229+ if (select_cols .lower () == 'all' or select_cols == '*' ):
230+ return all_columns
231+ # Comma-separated string
232+ select_cols = select_cols .split (',' )
233+
234+ # Handle an iterable
235+ elif isinstance (select_cols , Iterable ):
236+ # Convert to list so we can iterate multiple times safely
237+ select_cols = list (select_cols )
238+
239+ else :
240+ raise InvalidQueryError (
241+ "`select_cols` must be an iterable of column names, a comma-separated string, "
242+ "'all', or '*'."
243+ )
244+
245+ # Validate the column names
246+ valid_select_cols = []
247+ for col in select_cols :
248+ if not isinstance (col , str ):
249+ raise InvalidQueryError (
250+ "`select_cols` must contain only strings (column names)."
251+ )
252+ col = col .strip ()
253+ if col not in all_columns :
254+ closest_match = difflib .get_close_matches (col , all_columns , n = 1 )
255+ suggestion = f' Did you mean "{ closest_match [0 ]} "?' if closest_match else ''
256+ warnings .warn (f"Column '{ col } ' not found.{ suggestion } " , InputWarning )
257+ else :
258+ valid_select_cols .append (col )
259+
260+ # Dataset ID column should always be returned
261+ dataset_col = self .dataset_kwds .get (self .mission , None )
262+ if dataset_col and dataset_col not in valid_select_cols :
263+ valid_select_cols .append (dataset_col )
264+ return valid_select_cols
265+
200266 @class_or_instance
201267 def query_region_async (self , coordinates , * , radius = 3 * u .arcmin , limit = 5000 , offset = 0 ,
202268 select_cols = None , ** criteria ):
@@ -217,9 +283,11 @@ def query_region_async(self, coordinates, *, radius=3*u.arcmin, limit=5000, offs
217283 Default is 5000. The maximum number of dataset IDs in the results.
218284 offset : int
219285 Default is 0. The number of records you wish to skip before selecting records.
220- select_cols: list , optional
286+ select_cols: iterable or str or None , optional
221287 Default is None. Names of columns that will be included in the result table.
222288 If None, a default set of columns will be returned.
289+ Can either be an iterable of column names, a comma-separated string of column names,
290+ or 'all'/'*' to return all available columns.
223291 **criteria
224292 Other mission-specific criteria arguments.
225293 All valid filters can be found using `~astroquery.mast.missions.MastMissionsClass.get_column_list`
@@ -255,19 +323,13 @@ def query_region_async(self, coordinates, *, radius=3*u.arcmin, limit=5000, offs
255323 f"Query radius too large. Must be ≤{ self ._max_query_radius } , got { radius } ."
256324 )
257325
258- # Dataset ID column should always be returned
259- if select_cols :
260- select_cols .append (self .dataset_kwds .get (self .mission , None ))
261- elif self .mission == 'ullyses' :
262- select_cols = self ._default_ullyses_cols
263-
264326 # Basic params
265327 params = {'target' : [f"{ coordinates .ra .deg } { coordinates .dec .deg } " ],
266328 'radius' : radius .arcsec ,
267329 'radius_units' : 'arcseconds' ,
268330 'limit' : limit ,
269331 'offset' : offset ,
270- 'select_cols' : select_cols }
332+ 'select_cols' : self . _parse_select_cols ( select_cols ) }
271333
272334 self ._build_params_from_criteria (params , ** criteria )
273335
@@ -295,9 +357,11 @@ def query_criteria_async(self, *, coordinates=None, objectname=None, radius=3*u.
295357 Default is 5000. The maximum number of dataset IDs in the results.
296358 offset : int
297359 Default is 0. The number of records you wish to skip before selecting records.
298- select_cols: list , optional
360+ select_cols: iterable or str or None , optional
299361 Default is None. Names of columns that will be included in the result table.
300362 If None, a default set of columns will be returned.
363+ Can either be an iterable of column names, a comma-separated string of column names,
364+ or 'all'/'*' to return all available columns.
301365 resolver : str, optional
302366 Default is None. The resolver to use when resolving a named target into coordinates. Valid options are
303367 "SIMBAD" and "NED". If not specified, the default resolver order will be used. Please see the
@@ -344,14 +408,8 @@ def query_criteria_async(self, *, coordinates=None, objectname=None, radius=3*u.
344408 f"Query radius too large. Must be ≤{ self ._max_query_radius } , got { radius } ."
345409 )
346410
347- # Dataset ID column should always be returned
348- if select_cols :
349- select_cols .append (self .dataset_kwds .get (self .mission , None ))
350- elif self .mission == 'ullyses' :
351- select_cols = self ._default_ullyses_cols
352-
353411 # build query
354- params = {"limit" : self .limit , "offset" : offset , 'select_cols' : select_cols }
412+ params = {"limit" : self .limit , "offset" : offset , 'select_cols' : self . _parse_select_cols ( select_cols ) }
355413 if coordinates :
356414 params ["target" ] = [f"{ coordinates .ra .deg } { coordinates .dec .deg } " ]
357415 params ["radius" ] = radius .arcsec
@@ -382,9 +440,11 @@ def query_object_async(self, objectname, *, radius=3*u.arcmin, limit=5000, offse
382440 Default is 5000. The maximum number of dataset IDs in the results.
383441 offset : int
384442 Default is 0. The number of records you wish to skip before selecting records.
385- select_cols: list , optional
443+ select_cols: iterable or str or None , optional
386444 Default is None. Names of columns that will be included in the result table.
387445 If None, a default set of columns will be returned.
446+ Can either be an iterable of column names, a comma-separated string of column names,
447+ or 'all'/'*' to return all available columns.
388448 resolver : str, optional
389449 Default is None. The resolver to use when resolving a named target into coordinates. Valid options are
390450 "SIMBAD" and "NED". If not specified, the default resolver order will be used. Please see the
0 commit comments