Skip to content

Commit 2ac2ee8

Browse files
feat: add CLI commands for browsing OpenML flows and datasets (#1486)
1 parent 7feb2a3 commit 2ac2ee8

File tree

2 files changed

+270
-2
lines changed

2 files changed

+270
-2
lines changed

openml/cli.py

Lines changed: 178 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Command Line Interface for `openml` to configure its settings."""
1+
"""Command Line Interface for `openml` to configure its settings and browse resources."""
22

33
from __future__ import annotations
44

@@ -9,6 +9,7 @@
99
from pathlib import Path
1010
from urllib.parse import urlparse
1111

12+
import openml
1213
from openml import config
1314
from openml.__version__ import __version__
1415

@@ -300,6 +301,79 @@ def configure_field( # noqa: PLR0913
300301
verbose_set(field, value)
301302

302303

304+
def list_flows_cli(args: argparse.Namespace) -> None:
305+
"""List OpenML flows with optional filtering."""
306+
df = openml.flows.list_flows(
307+
offset=args.offset,
308+
size=args.size,
309+
tag=args.tag,
310+
uploader=args.uploader,
311+
)
312+
if df.empty:
313+
print("No flows found matching the given criteria.")
314+
else:
315+
print(df.to_string())
316+
317+
318+
def info_flow_cli(args: argparse.Namespace) -> None:
319+
"""Display detailed information about a specific OpenML flow."""
320+
flow = openml.flows.get_flow(args.flow_id)
321+
print(flow)
322+
323+
324+
def handle_flows(args: argparse.Namespace) -> None:
325+
"""Dispatch flows subcommands."""
326+
actions = {
327+
"list": list_flows_cli,
328+
"info": info_flow_cli,
329+
}
330+
action = getattr(args, "flows_action", None)
331+
if action is None:
332+
# Print help when no subcommand is given
333+
args._parser_flows.print_help()
334+
else:
335+
actions[action](args)
336+
337+
338+
def list_datasets_cli(args: argparse.Namespace) -> None:
339+
"""List OpenML datasets with optional filtering."""
340+
df = openml.datasets.list_datasets(
341+
offset=args.offset,
342+
size=args.size,
343+
tag=args.tag,
344+
status=args.status,
345+
data_name=args.data_name,
346+
)
347+
if df.empty:
348+
print("No datasets found matching the given criteria.")
349+
else:
350+
print(df.to_string())
351+
352+
353+
def info_dataset_cli(args: argparse.Namespace) -> None:
354+
"""Display detailed information about a specific OpenML dataset."""
355+
dataset = openml.datasets.get_dataset(
356+
args.dataset_id,
357+
download_data=False,
358+
download_qualities=True,
359+
download_features_meta_data=True,
360+
)
361+
print(dataset)
362+
363+
364+
def handle_datasets(args: argparse.Namespace) -> None:
365+
"""Dispatch datasets subcommands."""
366+
actions = {
367+
"list": list_datasets_cli,
368+
"info": info_dataset_cli,
369+
}
370+
action = getattr(args, "datasets_action", None)
371+
if action is None:
372+
args._parser_datasets.print_help()
373+
else:
374+
actions[action](args)
375+
376+
303377
def configure(args: argparse.Namespace) -> None:
304378
"""Calls the right submenu(s) to edit `args.field` in the configuration file."""
305379
set_functions = {
@@ -329,7 +403,7 @@ def not_supported_yet(_: str) -> None:
329403

330404

331405
def main() -> None:
332-
subroutines = {"configure": configure}
406+
subroutines = {"configure": configure, "flows": handle_flows, "datasets": handle_datasets}
333407

334408
parser = argparse.ArgumentParser()
335409
# Add a global --version flag to display installed version and exit
@@ -368,7 +442,109 @@ def main() -> None:
368442
help="The value to set the FIELD to.",
369443
)
370444

445+
# --- flows subcommand ---
446+
parser_flows = subparsers.add_parser(
447+
"flows",
448+
description="Browse and search OpenML flows (models).",
449+
)
450+
flows_subparsers = parser_flows.add_subparsers(dest="flows_action")
451+
452+
parser_flows_list = flows_subparsers.add_parser(
453+
"list",
454+
description="List OpenML flows with optional filtering.",
455+
)
456+
parser_flows_list.add_argument(
457+
"--size",
458+
type=int,
459+
default=10,
460+
help="Maximum number of flows to return (default: 10).",
461+
)
462+
parser_flows_list.add_argument(
463+
"--offset",
464+
type=int,
465+
default=None,
466+
help="Number of flows to skip, for pagination.",
467+
)
468+
parser_flows_list.add_argument(
469+
"--tag",
470+
type=str,
471+
default=None,
472+
help="Only list flows with this tag.",
473+
)
474+
parser_flows_list.add_argument(
475+
"--uploader",
476+
type=str,
477+
default=None,
478+
help="Only list flows uploaded by this user.",
479+
)
480+
481+
parser_flows_info = flows_subparsers.add_parser(
482+
"info",
483+
description="Display detailed information about a specific flow.",
484+
)
485+
parser_flows_info.add_argument(
486+
"flow_id",
487+
type=int,
488+
help="The ID of the flow to display.",
489+
)
490+
491+
# --- datasets subcommand ---
492+
parser_datasets = subparsers.add_parser(
493+
"datasets",
494+
description="Browse and search OpenML datasets.",
495+
)
496+
datasets_subparsers = parser_datasets.add_subparsers(dest="datasets_action")
497+
498+
parser_datasets_list = datasets_subparsers.add_parser(
499+
"list",
500+
description="List OpenML datasets with optional filtering.",
501+
)
502+
parser_datasets_list.add_argument(
503+
"--size",
504+
type=int,
505+
default=10,
506+
help="Maximum number of datasets to return (default: 10).",
507+
)
508+
parser_datasets_list.add_argument(
509+
"--offset",
510+
type=int,
511+
default=None,
512+
help="Number of datasets to skip, for pagination.",
513+
)
514+
parser_datasets_list.add_argument(
515+
"--tag",
516+
type=str,
517+
default=None,
518+
help="Only list datasets with this tag.",
519+
)
520+
parser_datasets_list.add_argument(
521+
"--status",
522+
type=str,
523+
default=None,
524+
choices=["active", "in_preparation", "deactivated"],
525+
help="Filter by dataset status (default: active).",
526+
)
527+
parser_datasets_list.add_argument(
528+
"--data-name",
529+
type=str,
530+
default=None,
531+
help="Filter by dataset name.",
532+
)
533+
534+
parser_datasets_info = datasets_subparsers.add_parser(
535+
"info",
536+
description="Display detailed information about a specific dataset.",
537+
)
538+
parser_datasets_info.add_argument(
539+
"dataset_id",
540+
type=int,
541+
help="The ID of the dataset to display.",
542+
)
543+
371544
args = parser.parse_args()
545+
# Attach subparsers so handlers can print help when no action is given
546+
args._parser_flows = parser_flows
547+
args._parser_datasets = parser_datasets
372548
subroutines.get(args.subroutine, lambda _: parser.print_help())(args)
373549

374550

tests/test_openml/test_cli.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,95 @@ def test_console_script_version_prints_package_version():
4242
assert result.returncode == 0
4343
assert result.stderr == ""
4444
assert openml.__version__ in result.stdout
45+
46+
47+
@pytest.mark.production_server()
48+
def test_cli_flows_list():
49+
"""Test that 'openml flows list --size 5' returns a table of flows."""
50+
result = subprocess.run(
51+
[sys.executable, "-m", "openml.cli", "flows", "list", "--size", "5"],
52+
stdout=subprocess.PIPE,
53+
stderr=subprocess.PIPE,
54+
text=True,
55+
check=False,
56+
)
57+
58+
assert result.returncode == 0
59+
# Output should contain at least one flow entry with a name column
60+
assert "name" in result.stdout.lower() or len(result.stdout.strip()) > 0
61+
62+
63+
@pytest.mark.production_server()
64+
def test_cli_flows_info():
65+
"""Test that 'openml flows info <id>' prints flow details."""
66+
result = subprocess.run(
67+
[sys.executable, "-m", "openml.cli", "flows", "info", "5"],
68+
stdout=subprocess.PIPE,
69+
stderr=subprocess.PIPE,
70+
text=True,
71+
check=False,
72+
)
73+
74+
assert result.returncode == 0
75+
# The output should contain the flow name or ID
76+
assert "Flow Name" in result.stdout or "5" in result.stdout
77+
78+
79+
def test_cli_flows_no_action_prints_help():
80+
"""Test that 'openml flows' with no subcommand prints help text."""
81+
result = subprocess.run(
82+
[sys.executable, "-m", "openml.cli", "flows"],
83+
stdout=subprocess.PIPE,
84+
stderr=subprocess.PIPE,
85+
text=True,
86+
check=False,
87+
)
88+
89+
assert result.returncode == 0
90+
# Should print help text mentioning available subcommands
91+
assert "list" in result.stdout or "info" in result.stdout
92+
93+
94+
@pytest.mark.production_server()
95+
def test_cli_datasets_list():
96+
"""Test that 'openml datasets list --size 5' returns a table of datasets."""
97+
result = subprocess.run(
98+
[sys.executable, "-m", "openml.cli", "datasets", "list", "--size", "5"],
99+
stdout=subprocess.PIPE,
100+
stderr=subprocess.PIPE,
101+
text=True,
102+
check=False,
103+
)
104+
105+
assert result.returncode == 0
106+
assert "name" in result.stdout.lower() or len(result.stdout.strip()) > 0
107+
108+
109+
@pytest.mark.production_server()
110+
def test_cli_datasets_info():
111+
"""Test that 'openml datasets info <id>' prints dataset details."""
112+
result = subprocess.run(
113+
[sys.executable, "-m", "openml.cli", "datasets", "info", "61"],
114+
stdout=subprocess.PIPE,
115+
stderr=subprocess.PIPE,
116+
text=True,
117+
check=False,
118+
)
119+
120+
assert result.returncode == 0
121+
# Dataset 61 is the iris dataset
122+
assert "iris" in result.stdout.lower() or "61" in result.stdout
123+
124+
125+
def test_cli_datasets_no_action_prints_help():
126+
"""Test that 'openml datasets' with no subcommand prints help text."""
127+
result = subprocess.run(
128+
[sys.executable, "-m", "openml.cli", "datasets"],
129+
stdout=subprocess.PIPE,
130+
stderr=subprocess.PIPE,
131+
text=True,
132+
check=False,
133+
)
134+
135+
assert result.returncode == 0
136+
assert "list" in result.stdout or "info" in result.stdout

0 commit comments

Comments
 (0)