Source code for cldfbench.cli_util

import json
import typing
from time import time
import argparse

from clldutils.clilib import ParserError
import termcolor

import pycldf

import cldfbench
from cldfbench import ENTRY_POINT
from cldfbench import get_dataset as _get
from cldfbench import get_datasets as _gets

__all__ = ['DatasetNotFoundException',
           'add_entry_point', 'add_dataset_spec', 'add_catalog_spec',
           'get_dataset', 'get_datasets', 'get_cldf_dataset',
           'with_dataset', 'with_datasets']

IGNORE_MISSING = '-'


class DatasetNotFoundException(Exception):
    pass


def add_entry_point(parser: argparse.ArgumentParser, ep: str = ENTRY_POINT):
    parser.add_argument(
        '--entry-point',
        help='Name of entry_points to identify datasets',
        default=ep)


[docs]def add_dataset_spec(parser: argparse.ArgumentParser, ep: str = ENTRY_POINT, multiple=False):
    """
    Add arguments and options to specify `cldfbench` Datasets to the CLI.

    :param multiple: Flag signaling whether selection of multiple datasets should be allowed.

    .. note::

        This funtion is supposed to be used in tandem with :func:`get_dataset`, called in a
        command's `run` function.
    """
    h = "Dataset spec, either ID of installed dataset or path to python module"
    if multiple:
        h += " or simplified glob pattern (where _ is understood as *) " \
             "specifying python modules (requires --glob option!)" \
             " or just _, which will match all datasets of the given --entry-point"
    parser.add_argument(
        'dataset',
        metavar='DATASET',
        help=h + '.')
    add_entry_point(parser, ep=ep)
    if multiple:
        parser.add_argument(
            '--glob',
            action='store_true',
            default=False,
            help="Interpret DATASET as simplified glob pattern relative to cwd.")


[docs]def get_dataset(args: argparse.Namespace) -> cldfbench.Dataset:
    """
    Get the `cldfbench.Dataset` specified by `args`.

    :raises ParserError: If no matching dataset was found.
    """
    ds = _get(args.dataset, ep=args.entry_point)
    if ds:
        return ds
    raise ParserError(termcolor.colored(
        '\nInvalid dataset spec: <{0}> {1}\n'.format(args.entry_point, args.dataset), "red"))


[docs]def get_datasets(args: argparse.Namespace) -> typing.List[cldfbench.Dataset]:
    """
    Get the `cldfbench.Dataset` s specified by `args`.

    :raises ParserError: If no matching datasets were found.
    """
    if args.glob or args.dataset == '_':
        args.dataset = args.dataset.replace('_', '*')
    res = _gets(args.dataset, ep=args.entry_point, glob=args.glob)
    if res:
        return res
    raise ParserError(termcolor.colored(
        '\nInvalid dataset spec: <{0}> {1}\n'.format(args.entry_point, args.dataset), "red"))


[docs]def get_cldf_dataset(args: argparse.Namespace, cldf_spec=None) -> pycldf.Dataset:
    """
    Get the `pycldf.Dataset` specified by `cldf_spec` for the `cldfbench.Dataset` specified by \
    `args`.
    """
    try:
        return get_dataset(args).cldf_reader(cldf_spec=cldf_spec)
    except (ParserError, ModuleNotFoundError):
        # Try to load plain (i.e. non-cldfbench-enabled) CLDF dataset.
        try:
            return pycldf.Dataset.from_metadata(args.dataset)
        except json.JSONDecodeError:
            return pycldf.Dataset.from_data(args.dataset)


[docs]def add_catalog_spec(
        parser: argparse.ArgumentParser,
        name: str,
        with_version: bool = True,
        default=None):
    """
    Add an option for a reference catalog (at a specific version tag) to the CLI.

    :param parser: Subparser for the subcommand.
    :param name: Option name to use for the catalog.
    :param with_version: Flag signaling whether an option to select a version tag for the \
    catalog should be added.
    :param default: The default value for the argument. `None` will trigger config lookup, \
    `IGNORE_MISSING` will set the argument to `None` if no user-supplied value is found.

    .. note::

        If one of the `cldfbench.catalogs.BUILTIN_CATALOGS` is added (using its name as `name`),
        `cldfbench` will add an initialized `cldfcatalog.Catalog` object (with entered context,
        if a particular version was requested) as `name` to the `argparse.Namespace` passed to the
        command's `run` function.
    """
    parser.add_argument(
        '--' + name,
        metavar=name.upper(),
        help='Path to repository clone of {0} data'.format(name.capitalize()),
        default=default)
    if with_version:
        parser.add_argument(
            '--{0}-version'.format(name),
            help='Version of {0} data to checkout'.format(name.capitalize()),
            default=None)


[docs]def with_dataset(args: argparse.Namespace, func: typing.Union[callable, str], dataset=None) \
        -> typing.Any:
    """
    Run a callable, passing a dataset and `args` as arguments, returning it's result.

    :param args: CLI arguments
    :param func: Callable with suitable signature or `str`, in which case a method `_cmd_<name>` \
    will be looked up on the dataset and run.
    :param dataset: `cldfbench.Dataset` instance or `None`, in which case a dataset will be \
    retrieved as specified by `args`.
    """
    dataset = dataset or get_dataset(args)
    s = time()
    arg = [dataset]
    if isinstance(func, str):
        func_ = getattr(dataset, '_cmd_' + func, getattr(dataset, 'cmd_' + func, None))
        if not func_:
            raise ParserError('Dataset {0} has no {1} command'.format(dataset.id, func))
        func, arg = func_, []
    args.log.info('running {0} on {1} ...'.format(getattr(func, '__name__', func), dataset.id))
    res = func(*arg, args)
    args.log.info('... done %s [%.1f secs]' % (dataset.id, time() - s))
    return res


[docs]def with_datasets(args, func):
    """
    Run `func` on all datasets specified by `args`.

    See :func:`with_dataset` for details.
    """
    res = []
    for ds in get_datasets(args):
        res.append(with_dataset(args, func, dataset=ds))
    return res