libpython: Support benchmarks of non-parallel runs better (#1733)

wenzeslaus · web-flow · commit 7b09143baf55 · 2021-07-19T21:44:51.000-04:00
* Function for possibly non-parallel repeated runs for writing benchmark scripts.
* Better documentation of non-parallel runs in resolution-changing benchmark.
* CLI for joining JSON result files from multiple benchmarks and plotting from a file.
* CLI which is using argparse with subcommands (subparsers) is extensible and more can be added in the future.
diff --git a/python/grass/benchmark/Makefile b/python/grass/benchmark/Makefile
@@ -5,7 +5,7 @@ include $(MODULE_TOPDIR)/include/Make/Python.make
 
 DSTDIR = $(ETC)/python/grass/benchmark
 
-MODULES = plots results runners
+MODULES = app plots results runners __main__
 
 PYFILES := $(patsubst %,$(DSTDIR)/%.py,$(MODULES) __init__)
 PYCFILES := $(patsubst %,$(DSTDIR)/%.pyc,$(MODULES) __init__)
diff --git a/python/grass/benchmark/__init__.py b/python/grass/benchmark/__init__.py
@@ -1,16 +1,36 @@
+# MODULE:    grass.benchmark
+#
+# AUTHOR(S): Vaclav Petras <wenzeslaus gmail com>
+#
+# PURPOSE:   Benchmarking for GRASS GIS modules
+#
+# COPYRIGHT: (C) 2021 Vaclav Petras, and by the GRASS Development Team
+#
+#            This program is free software under the GNU General Public
+#            License (>=v2). Read the file COPYING that comes with GRASS
+#            for details.
+
 """Benchmarking for GRASS GIS modules
 
 This subpackage of the grass package is experimental and the API can change anytime.
 The API of the package is defined by what is imported in the top-level ``__init__.py``
 file of the subpackage.
+
+The functions in the Python API raise exceptions, although calls of other functions from
+the grass package may call grass.script.fatal and exit
+(see :func:`grass.script.core.set_raise_on_error` for changing the behavior).
+This applies to the CLI interface of this subpackage too except that raised usage
+exceptions originating in the CLI code result in *sys.exit* with an error message, not
+traceback. Messages and other user-visible texts in this package are not translatable.
 """
 
 from .plots import nprocs_plot, num_cells_plot
 from .results import (
     join_results,
+    join_results_from_files,
     load_results,
     load_results_from_file,
     save_results,
     save_results_to_file,
 )
-from .runners import benchmark_nprocs, benchmark_resolutions
+from .runners import benchmark_nprocs, benchmark_resolutions, benchmark_single
diff --git a/python/grass/benchmark/__main__.py b/python/grass/benchmark/__main__.py
@@ -0,0 +1,19 @@
+# MODULE:    grass.benchmark
+#
+# AUTHOR(S): Vaclav Petras <wenzeslaus gmail com>
+#
+# PURPOSE:   Benchmarking for GRASS GIS modules
+#
+# COPYRIGHT: (C) 2021 Vaclav Petras, and by the GRASS Development Team
+#
+#            This program is free software under the GNU General Public
+#            License (>=v2). Read the file COPYING that comes with GRASS
+#            for details.
+
+
+"""The main file for executing using python -m"""
+
+from grass.benchmark.app import main
+
+if __name__ == "__main__":
+    main()
diff --git a/python/grass/benchmark/app.py b/python/grass/benchmark/app.py
@@ -0,0 +1,181 @@
+# MODULE:    grass.benchmark
+#
+# AUTHOR(S): Vaclav Petras <wenzeslaus gmail com>
+#
+# PURPOSE:   Benchmarking for GRASS GIS modules
+#
+# COPYRIGHT: (C) 2021 Vaclav Petras, and by the GRASS Development Team
+#
+#            This program is free software under the GNU General Public
+#            License (>=v2). Read the file COPYING that comes with GRASS
+#            for details.
+
+
+"""CLI for the benchmark package"""
+
+import argparse
+import sys
+from pathlib import Path
+
+from grass.benchmark import (
+    join_results_from_files,
+    load_results_from_file,
+    num_cells_plot,
+    save_results_to_file,
+)
+
+
+class CliUsageError(ValueError):
+    """Raised when error is in the command line arguments.
+
+    Used when the error is discovered only after argparse parsed the arguments.
+    """
+
+    # ArgumentError from argparse may work too, but it is not documented and
+    # takes a reference argument which we don't have access to after the parse step.
+    pass
+
+
+def join_results_cli(args):
+    """Translate CLI parser result to API calls."""
+    if args.prefixes and len(args.results) != len(args.prefixes):
+        raise CliUsageError(
+            f"Number of prefixes ({len(args.prefixes)}) needs to be the same"
+            f" as the number of input result files ({len(args.results)})"
+        )
+    results = join_results_from_files(
+        source_filenames=args.results,
+        prefixes=args.prefixes,
+    )
+    save_results_to_file(results, args.output)
+
+
+def plot_cells_cli(args):
+    """Translate CLI parser result to API calls."""
+    results = load_results_from_file(args.input)
+    num_cells_plot(
+        results.results,
+        filename=args.output,
+        title=args.title,
+        show_resolution=args.resolutions,
+    )
+
+
+def get_executable_name():
+    """Get name of the executable and module.
+
+    This is a workaround for Python issue:
+    argparse support for "python -m module" in help
+    https://bugs.python.org/issue22240
+    """
+    executable = Path(sys.executable).stem
+    return f"{executable} -m grass.benchmark"
+
+
+class ExtendAction(argparse.Action):
+    """Support for agrparse action="extend" before Python 3.8
+
+    Each parser instance needs the action to be registered.
+    """
+
+    # pylint: disable=too-few-public-methods
+    def __call__(self, parser, namespace, values, option_string=None):
+        items = getattr(namespace, self.dest) or []
+        items.extend(values)
+        setattr(namespace, self.dest, items)
+
+
+def add_subcommand_parser(subparsers, name, description):
+    """Add parser for a subcommand into subparsers."""
+    # help is in parent's help, description in subcommand's help.
+    return subparsers.add_parser(name, help=description, description=description)
+
+
+def add_subparsers(parser, dest):
+    """Add subparsers in a unified way.
+
+    Uses title 'subcommands' for the list of commands
+    (instead of the 'positional' which is the default).
+
+    The *dest* should be 'command', 'subcommand', etc. with appropriate nesting.
+    """
+    if sys.version_info < (3, 7):
+        # required as parameter is only in >=3.7.
+        return parser.add_subparsers(title="subcommands", dest=dest)
+    return parser.add_subparsers(title="subcommands", required=True, dest=dest)
+
+
+def add_results_subcommand(parent_subparsers):
+    """Add results subcommand."""
+    main_parser = add_subcommand_parser(
+        parent_subparsers, "results", description="Manipulate results"
+    )
+    main_subparsers = add_subparsers(main_parser, dest="subcommand")
+
+    join = main_subparsers.add_parser("join", help="Join results")
+    join.add_argument("results", help="Files with results", nargs="*", metavar="file")
+    join.add_argument("output", help="Output file", metavar="output_file")
+    if sys.version_info < (3, 8):
+        join.register("action", "extend", ExtendAction)
+    join.add_argument(
+        "--prefixes",
+        help="Add prefixes to result labels per file",
+        action="extend",
+        nargs="*",
+        metavar="text",
+    )
+    join.set_defaults(handler=join_results_cli)
+
+
+def add_plot_subcommand(parent_subparsers):
+    """Add plot subcommand."""
+    main_parser = add_subcommand_parser(
+        parent_subparsers, "plot", description="Plot results"
+    )
+    main_subparsers = add_subparsers(main_parser, dest="subcommand")
+
+    join = main_subparsers.add_parser("cells", help="Plot for variable number of cells")
+    join.add_argument("input", help="file with results (JSON)", metavar="input_file")
+    join.add_argument(
+        "output", help="output file (e.g., PNG)", nargs="?", metavar="output_file"
+    )
+    join.add_argument(
+        "--title",
+        help="Title for the plot",
+        metavar="text",
+    )
+    join.add_argument(
+        "--resolutions",
+        help="Use resolutions for x axis instead of cell count",
+        action="store_true",
+    )
+    join.set_defaults(handler=plot_cells_cli)
+
+
+def define_arguments():
+    """Define top level parser and create subparsers."""
+    parser = argparse.ArgumentParser(
+        description="Process results from module benchmarks.",
+        prog=get_executable_name(),
+    )
+    subparsers = add_subparsers(parser, dest="command")
+
+    add_results_subcommand(subparsers)
+    add_plot_subcommand(subparsers)
+
+    return parser
+
+
+def main(args=None):
+    """Define and parse command line parameters then run the appropriate handler."""
+    parser = define_arguments()
+    args = parser.parse_args(args)
+    try:
+        args.handler(args)
+    except CliUsageError as error:
+        # Report a usage error and exit.
+        sys.exit(f"ERROR: {error}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python/grass/benchmark/plots.py b/python/grass/benchmark/plots.py
@@ -40,7 +40,7 @@ def get_pyplot(to_file):
 def nprocs_plot(results, filename=None):
     """Plot results from a multiple nprocs (thread) benchmarks.
 
-    *results* is a list of individual results from separate benchmars.
+    *results* is a list of individual results from separate benchmarks.
     One result is required to have attributes: *nprocs*, *times*, *label*.
     The *nprocs* attribute is a list of all processing elements
     (cores, threads, processes) used in the benchmark.
@@ -76,10 +76,10 @@ def nprocs_plot(results, filename=None):
         plt.show()
 
 
-def num_cells_plot(results, filename=None, show_resolution=False):
+def num_cells_plot(results, filename=None, title=None, show_resolution=False):
     """Plot results from a multiple raster grid size benchmarks.
 
-    *results* is a list of individual results from separate benchmars
+    *results* is a list of individual results from separate benchmarks
     with one result being similar to the :func:`nprocs_plot` function.
     The result is required to have *times* and *label* attributes
     and may have an *all_times* attribute.
@@ -116,6 +116,12 @@ def num_cells_plot(results, filename=None, show_resolution=False):
     else:
         plt.xlabel("Number of cells")
     plt.ylabel("Time [s]")
+    if title:
+        plt.title(title)
+    elif show_resolution:
+        plt.title("Execution time by resolution")
+    else:
+        plt.title("Execution time by cell count")
     if filename:
         plt.savefig(filename)
     else:
diff --git a/python/grass/benchmark/results.py b/python/grass/benchmark/results.py
@@ -93,3 +93,11 @@ def join_results(results, prefixes=None):
                 result.label = f"{prefix}: {result.label}"
             joined.append(result)
     return joined
+
+
+def join_results_from_files(source_filenames, prefixes):
+    """Join multiple files into one results object."""
+    to_merge = []
+    for result_file in source_filenames:
+        to_merge.append(load_results_from_file(result_file))
+    return join_results(to_merge, prefixes=prefixes)
diff --git a/python/grass/benchmark/runners.py b/python/grass/benchmark/runners.py
diff --git a/python/grass/benchmark/testsuite/test_benchmark.py b/python/grass/benchmark/testsuite/test_benchmark.py
diff --git a/python/grass/benchmark/testsuite/test_benchmark_cli.py b/python/grass/benchmark/testsuite/test_benchmark_cli.py