feat: add CLI commands for browsing OpenML flows and datasets (#1486)

KanishqGandharv219 · KanishqGandharv219 · commit 2ac2ee8f2f19 · 2026-02-28T18:58:24.000+05:30
diff --git a/openml/cli.py b/openml/cli.py
@@ -1,4 +1,4 @@
-"""Command Line Interface for `openml` to configure its settings."""
+"""Command Line Interface for `openml` to configure its settings and browse resources."""
 
 from __future__ import annotations
 
@@ -9,6 +9,7 @@
 from pathlib import Path
 from urllib.parse import urlparse
 
+import openml
 from openml import config
 from openml.__version__ import __version__
 
@@ -300,6 +301,79 @@ def configure_field(  # noqa: PLR0913
     verbose_set(field, value)
 
 
+def list_flows_cli(args: argparse.Namespace) -> None:
+    """List OpenML flows with optional filtering."""
+    df = openml.flows.list_flows(
+        offset=args.offset,
+        size=args.size,
+        tag=args.tag,
+        uploader=args.uploader,
+    )
+    if df.empty:
+        print("No flows found matching the given criteria.")
+    else:
+        print(df.to_string())
+
+
+def info_flow_cli(args: argparse.Namespace) -> None:
+    """Display detailed information about a specific OpenML flow."""
+    flow = openml.flows.get_flow(args.flow_id)
+    print(flow)
+
+
+def handle_flows(args: argparse.Namespace) -> None:
+    """Dispatch flows subcommands."""
+    actions = {
+        "list": list_flows_cli,
+        "info": info_flow_cli,
+    }
+    action = getattr(args, "flows_action", None)
+    if action is None:
+        # Print help when no subcommand is given
+        args._parser_flows.print_help()
+    else:
+        actions[action](args)
+
+
+def list_datasets_cli(args: argparse.Namespace) -> None:
+    """List OpenML datasets with optional filtering."""
+    df = openml.datasets.list_datasets(
+        offset=args.offset,
+        size=args.size,
+        tag=args.tag,
+        status=args.status,
+        data_name=args.data_name,
+    )
+    if df.empty:
+        print("No datasets found matching the given criteria.")
+    else:
+        print(df.to_string())
+
+
+def info_dataset_cli(args: argparse.Namespace) -> None:
+    """Display detailed information about a specific OpenML dataset."""
+    dataset = openml.datasets.get_dataset(
+        args.dataset_id,
+        download_data=False,
+        download_qualities=True,
+        download_features_meta_data=True,
+    )
+    print(dataset)
+
+
+def handle_datasets(args: argparse.Namespace) -> None:
+    """Dispatch datasets subcommands."""
+    actions = {
+        "list": list_datasets_cli,
+        "info": info_dataset_cli,
+    }
+    action = getattr(args, "datasets_action", None)
+    if action is None:
+        args._parser_datasets.print_help()
+    else:
+        actions[action](args)
+
+
 def configure(args: argparse.Namespace) -> None:
     """Calls the right submenu(s) to edit `args.field` in the configuration file."""
     set_functions = {
@@ -329,7 +403,7 @@ def not_supported_yet(_: str) -> None:
 
 
 def main() -> None:
-    subroutines = {"configure": configure}
+    subroutines = {"configure": configure, "flows": handle_flows, "datasets": handle_datasets}
 
     parser = argparse.ArgumentParser()
     # Add a global --version flag to display installed version and exit
@@ -368,7 +442,109 @@ def main() -> None:
         help="The value to set the FIELD to.",
     )
 
+    # --- flows subcommand ---
+    parser_flows = subparsers.add_parser(
+        "flows",
+        description="Browse and search OpenML flows (models).",
+    )
+    flows_subparsers = parser_flows.add_subparsers(dest="flows_action")
+
+    parser_flows_list = flows_subparsers.add_parser(
+        "list",
+        description="List OpenML flows with optional filtering.",
+    )
+    parser_flows_list.add_argument(
+        "--size",
+        type=int,
+        default=10,
+        help="Maximum number of flows to return (default: 10).",
+    )
+    parser_flows_list.add_argument(
+        "--offset",
+        type=int,
+        default=None,
+        help="Number of flows to skip, for pagination.",
+    )
+    parser_flows_list.add_argument(
+        "--tag",
+        type=str,
+        default=None,
+        help="Only list flows with this tag.",
+    )
+    parser_flows_list.add_argument(
+        "--uploader",
+        type=str,
+        default=None,
+        help="Only list flows uploaded by this user.",
+    )
+
+    parser_flows_info = flows_subparsers.add_parser(
+        "info",
+        description="Display detailed information about a specific flow.",
+    )
+    parser_flows_info.add_argument(
+        "flow_id",
+        type=int,
+        help="The ID of the flow to display.",
+    )
+
+    # --- datasets subcommand ---
+    parser_datasets = subparsers.add_parser(
+        "datasets",
+        description="Browse and search OpenML datasets.",
+    )
+    datasets_subparsers = parser_datasets.add_subparsers(dest="datasets_action")
+
+    parser_datasets_list = datasets_subparsers.add_parser(
+        "list",
+        description="List OpenML datasets with optional filtering.",
+    )
+    parser_datasets_list.add_argument(
+        "--size",
+        type=int,
+        default=10,
+        help="Maximum number of datasets to return (default: 10).",
+    )
+    parser_datasets_list.add_argument(
+        "--offset",
+        type=int,
+        default=None,
+        help="Number of datasets to skip, for pagination.",
+    )
+    parser_datasets_list.add_argument(
+        "--tag",
+        type=str,
+        default=None,
+        help="Only list datasets with this tag.",
+    )
+    parser_datasets_list.add_argument(
+        "--status",
+        type=str,
+        default=None,
+        choices=["active", "in_preparation", "deactivated"],
+        help="Filter by dataset status (default: active).",
+    )
+    parser_datasets_list.add_argument(
+        "--data-name",
+        type=str,
+        default=None,
+        help="Filter by dataset name.",
+    )
+
+    parser_datasets_info = datasets_subparsers.add_parser(
+        "info",
+        description="Display detailed information about a specific dataset.",
+    )
+    parser_datasets_info.add_argument(
+        "dataset_id",
+        type=int,
+        help="The ID of the dataset to display.",
+    )
+
     args = parser.parse_args()
+    # Attach subparsers so handlers can print help when no action is given
+    args._parser_flows = parser_flows
+    args._parser_datasets = parser_datasets
     subroutines.get(args.subroutine, lambda _: parser.print_help())(args)
 
 
diff --git a/tests/test_openml/test_cli.py b/tests/test_openml/test_cli.py
@@ -42,3 +42,95 @@ def test_console_script_version_prints_package_version():
     assert result.returncode == 0
     assert result.stderr == ""
     assert openml.__version__ in result.stdout
+
+
+@pytest.mark.production_server()
+def test_cli_flows_list():
+    """Test that 'openml flows list --size 5' returns a table of flows."""
+    result = subprocess.run(
+        [sys.executable, "-m", "openml.cli", "flows", "list", "--size", "5"],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        check=False,
+    )
+
+    assert result.returncode == 0
+    # Output should contain at least one flow entry with a name column
+    assert "name" in result.stdout.lower() or len(result.stdout.strip()) > 0
+
+
+@pytest.mark.production_server()
+def test_cli_flows_info():
+    """Test that 'openml flows info <id>' prints flow details."""
+    result = subprocess.run(
+        [sys.executable, "-m", "openml.cli", "flows", "info", "5"],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        check=False,
+    )
+
+    assert result.returncode == 0
+    # The output should contain the flow name or ID
+    assert "Flow Name" in result.stdout or "5" in result.stdout
+
+
+def test_cli_flows_no_action_prints_help():
+    """Test that 'openml flows' with no subcommand prints help text."""
+    result = subprocess.run(
+        [sys.executable, "-m", "openml.cli", "flows"],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        check=False,
+    )
+
+    assert result.returncode == 0
+    # Should print help text mentioning available subcommands
+    assert "list" in result.stdout or "info" in result.stdout
+
+
+@pytest.mark.production_server()
+def test_cli_datasets_list():
+    """Test that 'openml datasets list --size 5' returns a table of datasets."""
+    result = subprocess.run(
+        [sys.executable, "-m", "openml.cli", "datasets", "list", "--size", "5"],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        check=False,
+    )
+
+    assert result.returncode == 0
+    assert "name" in result.stdout.lower() or len(result.stdout.strip()) > 0
+
+
+@pytest.mark.production_server()
+def test_cli_datasets_info():
+    """Test that 'openml datasets info <id>' prints dataset details."""
+    result = subprocess.run(
+        [sys.executable, "-m", "openml.cli", "datasets", "info", "61"],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        check=False,
+    )
+
+    assert result.returncode == 0
+    # Dataset 61 is the iris dataset
+    assert "iris" in result.stdout.lower() or "61" in result.stdout
+
+
+def test_cli_datasets_no_action_prints_help():
+    """Test that 'openml datasets' with no subcommand prints help text."""
+    result = subprocess.run(
+        [sys.executable, "-m", "openml.cli", "datasets"],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        check=False,
+    )
+
+    assert result.returncode == 0
+    assert "list" in result.stdout or "info" in result.stdout