Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
c712bcb
Enable flag on spacy.load: foundation for include, enable arguments.
rmitsch May 10, 2022
210fe46
Enable flag on spacy.load: fixed tests.
rmitsch May 11, 2022
f1dd7c6
Enable flag on spacy.load: switched from pretrained model to empty mo…
rmitsch May 11, 2022
9ae4189
Enable flag on spacy.load: switched to more consistent error on missp…
rmitsch May 13, 2022
53ba6a6
Enable flag on spacy.load: added support for fields not in pipeline.
rmitsch May 16, 2022
5750b82
Enable flag on spacy.load: removed serialization fields from supporte…
rmitsch May 16, 2022
e4c2c13
Enable flag on spacy.load: removed 'enable' from config again.
rmitsch May 16, 2022
60e47ff
Enable flag on spacy.load: relaxed checks in _resolve_component_activ…
rmitsch May 18, 2022
d5d91e6
Enable flag on spacy.load: fixed relaxed checks for _resolve_componen…
rmitsch May 18, 2022
1c81701
Enable flag on spacy.load: comments w.r.t. resolution workarounds.
rmitsch May 23, 2022
6226b04
Enable flag on spacy.load: remove include fields. Update website docs.
rmitsch May 25, 2022
8e7042d
Enable flag on spacy.load: updates w.r.t. changes in master.
rmitsch May 25, 2022
59c400e
Merge branch 'master' into feat/inclusive-spacy-load-flags
rmitsch May 25, 2022
f6ffe68
Implement Doc.from_json(): update docstrings.
rmitsch May 25, 2022
c7eaef6
Implement Doc.from_json(): remove newline.
rmitsch May 25, 2022
3230f9c
Implement Doc.from_json(): change error message for E1038.
rmitsch May 25, 2022
79787b7
Enable flag on spacy.load: wrapped docstring for _resolve_component_s…
rmitsch May 25, 2022
d00d525
Enable flag on spacy.load: changed exmples for enable flag.
rmitsch May 25, 2022
e319c45
Remove newline.
rmitsch Jun 9, 2022
bdc9955
Fix docstring for Language._resolve_component_status().
rmitsch Jun 9, 2022
928caec
Merge branch 'feat/inclusive-spacy-load-flags' of github.com:rmitsch/…
rmitsch Jun 9, 2022
dd5f92d
Rename E1038 to E1042.
rmitsch Jun 9, 2022
dc6c194
Merge master.
rmitsch Jun 9, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion spacy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def load(
*,
vocab: Union[Vocab, bool] = True,
disable: Iterable[str] = util.SimpleFrozenList(),
enable: Iterable[str] = util.SimpleFrozenList(),
exclude: Iterable[str] = util.SimpleFrozenList(),
config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
) -> Language:
Expand All @@ -42,14 +43,21 @@ def load(
disable (Iterable[str]): Names of pipeline components to disable. Disabled
pipes will be loaded but they won't be run unless you explicitly
enable them by calling nlp.enable_pipe.
enable (Iterable[str]): Names of pipeline components to enable. All other
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
RETURNS (Language): The loaded nlp object.
"""
return util.load_model(
name, vocab=vocab, disable=disable, exclude=exclude, config=config
name,
vocab=vocab,
disable=disable,
enable=enable,
exclude=exclude,
config=config,
)


Expand Down
2 changes: 2 additions & 0 deletions spacy/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,8 @@ class Errors(metaclass=ErrorsWithCodes):
E1035 = ("Token index {i} out of bounds ({length})")
E1036 = ("Cannot index into NoneNode")
E1037 = ("Invalid attribute value '{attr}'.")
E1038 = ("Function was called with `{arg1}`={arg1_values} and `{arg2}`={arg2_values} but that information is "
"conflicting.")


# Deprecated model shortcuts, only used in errors and warnings
Expand Down
54 changes: 52 additions & 2 deletions spacy/language.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Iterator, Optional, Any, Dict, Callable, Iterable
from typing import Iterator, Optional, Any, Dict, Callable, Iterable, Collection
from typing import Union, Tuple, List, Set, Pattern, Sequence
from typing import NoReturn, TYPE_CHECKING, TypeVar, cast, overload

Expand Down Expand Up @@ -1671,6 +1671,7 @@ def from_config(
*,
vocab: Union[Vocab, bool] = True,
disable: Iterable[str] = SimpleFrozenList(),
enable: Iterable[str] = SimpleFrozenList(),
exclude: Iterable[str] = SimpleFrozenList(),
meta: Dict[str, Any] = SimpleFrozenDict(),
auto_fill: bool = True,
Expand All @@ -1685,6 +1686,8 @@ def from_config(
disable (Iterable[str]): Names of pipeline components to disable.
Disabled pipes will be loaded but they won't be run unless you
explicitly enable them by calling nlp.enable_pipe.
enable (Iterable[str]): Names of pipeline components to enable. All other
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
exclude (Iterable[str]): Names of pipeline components to exclude.
Excluded components won't be loaded.
meta (Dict[str, Any]): Meta overrides for nlp.meta.
Expand Down Expand Up @@ -1838,8 +1841,15 @@ def from_config(
# Restore the original vocab after sourcing if necessary
if vocab_b is not None:
nlp.vocab.from_bytes(vocab_b)
disabled_pipes = [*config["nlp"]["disabled"], *disable]

# Resolve disabled/enabled settings.
disabled_pipes = cls._resolve_component_status(
[*config["nlp"]["disabled"], *disable],
[*config["nlp"].get("enabled", []), *enable],
config["nlp"]["pipeline"],
)
nlp._disabled = set(p for p in disabled_pipes if p not in exclude)

nlp.batch_size = config["nlp"]["batch_size"]
nlp.config = filled if auto_fill else config
if after_pipeline_creation is not None:
Expand Down Expand Up @@ -1975,6 +1985,7 @@ def to_disk(

DOCS: https://spacy.io/api/language#to_disk
"""

path = util.ensure_path(path)
serializers = {}
serializers["tokenizer"] = lambda p: self.tokenizer.to_disk( # type: ignore[union-attr]
Expand All @@ -1991,6 +2002,44 @@ def to_disk(
serializers["vocab"] = lambda p: self.vocab.to_disk(p, exclude=exclude)
util.to_disk(path, serializers, exclude)

@staticmethod
def _resolve_component_status(
disable: Iterable[str], enable: Iterable[str], pipe_names: Collection[str]
) -> Tuple[str, ...]:
"""
Derives whether (1) `disable` and `enable` values are consistent and (2) resolves those to a single set of
disabled components.
`enable` takes precedence over `disable` - so if the former is set, all other components are assumed to be
disabled, independently of the value of `disable`. If `enable` is empty, only those component whose names are
included in `disable` are assumed to be disabled.

disable (Iterable[str]): Names of components or serialization fields to disable.
enable (Iterable[str]): Names of pipeline components to enable.
pipe_names (Iterable[str]): Names of all pipeline components.

RETURNS (Tuple[str, ...]): Names of components to exclude from pipeline w.r.t. specified includes and excludes.
"""

if disable is not None and isinstance(disable, str):
disable = [disable]
to_disable = disable

if enable:
to_disable = [
pipe_name for pipe_name in pipe_names if pipe_name not in enable
]
if disable and disable != to_disable:
raise ValueError(
Errors.E1038.format(
arg1="enable",
arg2="disable",
arg1_values=enable,
arg2_values=disable,
)
)

return tuple(to_disable)

def from_disk(
self,
path: Union[str, Path],
Expand All @@ -2004,6 +2053,7 @@ def from_disk(

path (str / Path): A path to a directory.
exclude (Iterable[str]): Names of components or serialization fields to exclude.

RETURNS (Language): The modified `Language` object.

DOCS: https://spacy.io/api/language#from_disk
Expand Down
52 changes: 51 additions & 1 deletion spacy/tests/pipeline/test_pipe_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
import pytest
from thinc.api import get_current_ops

import spacy
from spacy.lang.en import English
from spacy.lang.en.syntax_iterators import noun_chunks
from spacy.language import Language
from spacy.pipeline import TrainablePipe
from spacy.tokens import Doc
from spacy.training import Example
from spacy.util import SimpleFrozenList, get_arg_names
from spacy.util import SimpleFrozenList, get_arg_names, make_tempdir
from spacy.vocab import Vocab


Expand Down Expand Up @@ -602,3 +603,52 @@ def component(doc):
assert results[component] == "".join(eg.predicted.text for eg in examples)
for component in components - set(components_to_annotate):
assert results[component] == ""


def test_load_disable_enable() -> None:
"""
Tests spacy.load() with dis-/enabling components.
"""

base_nlp = English()
for pipe in ("sentencizer", "tagger", "parser"):
base_nlp.add_pipe(pipe)

with make_tempdir() as tmp_dir:
base_nlp.to_disk(tmp_dir)
to_disable = ["parser", "tagger"]
to_enable = ["tagger", "parser"]

# Setting only `disable`.
nlp = spacy.load(tmp_dir, disable=to_disable)
assert all([comp_name in nlp.disabled for comp_name in to_disable])

# Setting only `enable`.
nlp = spacy.load(tmp_dir, enable=to_enable)
assert all(
[
(comp_name in nlp.disabled) is (comp_name not in to_enable)
for comp_name in nlp.component_names
]
)

# Testing consistent enable/disable combination.
nlp = spacy.load(
tmp_dir,
enable=to_enable,
disable=[
comp_name
for comp_name in nlp.component_names
if comp_name not in to_enable
],
)
assert all(
[
(comp_name in nlp.disabled) is (comp_name not in to_enable)
for comp_name in nlp.component_names
]
)

# Inconsistent enable/disable combination.
with pytest.raises(ValueError):
spacy.load(tmp_dir, enable=to_enable, disable=["parser"])
35 changes: 31 additions & 4 deletions spacy/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from thinc.api import ConfigValidationError, Model
import functools
import itertools
import numpy.random
import numpy
import srsly
import catalogue
Expand Down Expand Up @@ -400,6 +399,7 @@ def load_model(
*,
vocab: Union["Vocab", bool] = True,
disable: Iterable[str] = SimpleFrozenList(),
enable: Iterable[str] = SimpleFrozenList(),
exclude: Iterable[str] = SimpleFrozenList(),
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
) -> "Language":
Expand All @@ -409,11 +409,19 @@ def load_model(
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
disable (Iterable[str]): Names of pipeline components to disable.
enable (Iterable[str]): Names of pipeline components to enable. All others will be disabled.
exclude (Iterable[str]): Names of pipeline components to exclude.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
RETURNS (Language): The loaded nlp object.
"""
kwargs = {"vocab": vocab, "disable": disable, "exclude": exclude, "config": config}
kwargs = {
"vocab": vocab,
"disable": disable,
"enable": enable,
"exclude": exclude,
"config": config,
}
if isinstance(name, str): # name or string path
if name.startswith("blank:"): # shortcut for blank model
return get_lang_class(name.replace("blank:", ""))()
Expand All @@ -433,6 +441,7 @@ def load_model_from_package(
*,
vocab: Union["Vocab", bool] = True,
disable: Iterable[str] = SimpleFrozenList(),
enable: Iterable[str] = SimpleFrozenList(),
exclude: Iterable[str] = SimpleFrozenList(),
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
) -> "Language":
Expand All @@ -444,14 +453,16 @@ def load_model_from_package(
disable (Iterable[str]): Names of pipeline components to disable. Disabled
pipes will be loaded but they won't be run unless you explicitly
enable them by calling nlp.enable_pipe.
enable (Iterable[str]): Names of pipeline components to enable. All other
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
RETURNS (Language): The loaded nlp object.
"""
cls = importlib.import_module(name)
return cls.load(vocab=vocab, disable=disable, exclude=exclude, config=config) # type: ignore[attr-defined]
return cls.load(vocab=vocab, disable=disable, enable=enable, exclude=exclude, config=config) # type: ignore[attr-defined]


def load_model_from_path(
Expand All @@ -460,6 +471,7 @@ def load_model_from_path(
meta: Optional[Dict[str, Any]] = None,
vocab: Union["Vocab", bool] = True,
disable: Iterable[str] = SimpleFrozenList(),
enable: Iterable[str] = SimpleFrozenList(),
exclude: Iterable[str] = SimpleFrozenList(),
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
) -> "Language":
Expand All @@ -473,6 +485,8 @@ def load_model_from_path(
disable (Iterable[str]): Names of pipeline components to disable. Disabled
pipes will be loaded but they won't be run unless you explicitly
enable them by calling nlp.enable_pipe.
enable (Iterable[str]): Names of pipeline components to enable. All other
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
Expand All @@ -487,7 +501,12 @@ def load_model_from_path(
overrides = dict_to_dot(config)
config = load_config(config_path, overrides=overrides)
nlp = load_model_from_config(
config, vocab=vocab, disable=disable, exclude=exclude, meta=meta
config,
vocab=vocab,
disable=disable,
enable=enable,
exclude=exclude,
meta=meta,
)
return nlp.from_disk(model_path, exclude=exclude, overrides=overrides)

Expand All @@ -498,6 +517,7 @@ def load_model_from_config(
meta: Dict[str, Any] = SimpleFrozenDict(),
vocab: Union["Vocab", bool] = True,
disable: Iterable[str] = SimpleFrozenList(),
enable: Iterable[str] = SimpleFrozenList(),
exclude: Iterable[str] = SimpleFrozenList(),
auto_fill: bool = False,
validate: bool = True,
Expand All @@ -512,6 +532,8 @@ def load_model_from_config(
disable (Iterable[str]): Names of pipeline components to disable. Disabled
pipes will be loaded but they won't be run unless you explicitly
enable them by calling nlp.enable_pipe.
enable (Iterable[str]): Names of pipeline components to enable. All other
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
components won't be loaded.
auto_fill (bool): Whether to auto-fill config with missing defaults.
Expand All @@ -530,6 +552,7 @@ def load_model_from_config(
config,
vocab=vocab,
disable=disable,
enable=enable,
exclude=exclude,
auto_fill=auto_fill,
validate=validate,
Expand Down Expand Up @@ -594,6 +617,7 @@ def load_model_from_init_py(
*,
vocab: Union["Vocab", bool] = True,
disable: Iterable[str] = SimpleFrozenList(),
enable: Iterable[str] = SimpleFrozenList(),
exclude: Iterable[str] = SimpleFrozenList(),
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
) -> "Language":
Expand All @@ -605,6 +629,8 @@ def load_model_from_init_py(
disable (Iterable[str]): Names of pipeline components to disable. Disabled
pipes will be loaded but they won't be run unless you explicitly
enable them by calling nlp.enable_pipe.
enable (Iterable[str]): Names of pipeline components to enable. All other
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
Expand All @@ -622,6 +648,7 @@ def load_model_from_init_py(
vocab=vocab,
meta=meta,
disable=disable,
enable=enable,
exclude=exclude,
config=config,
)
Expand Down
1 change: 1 addition & 0 deletions website/docs/api/top-level.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ specified separately using the new `exclude` keyword argument.
| _keyword-only_ | |
| `vocab` | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~ |
| `disable` | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~List[str]~~ |
| `enable` | Names of pipeline components to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled. ~~List[str]~~ |
| `exclude` <Tag variant="new">3</Tag> | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~ |
| `config` <Tag variant="new">3</Tag> | Optional config overrides, either as nested dict or dict keyed by section value in dot notation, e.g. `"components.name.value"`. ~~Union[Dict[str, Any], Config]~~ |
| **RETURNS** | A `Language` object with the loaded pipeline. ~~Language~~ |
Expand Down
12 changes: 12 additions & 0 deletions website/docs/usage/processing-pipelines.md
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,18 @@ nlp = spacy.load("en_core_web_sm", disable=["tagger", "parser"])
nlp.enable_pipe("tagger")
```

In addition to `disable`, `spacy.load()` also accepts `enable`. If `enable` is
set, all components except for those in `enable` are disabled.

```python
# Load the complete pipeline, but disable all components except the tagger
nlp = spacy.load("en_core_web_sm", enable=["tagger"])
# Has the same effect, as the parser is already not part of enabled set of components
nlp = spacy.load("en_core_web_sm", enable=["tagger"], disable=["parser"])
# Will raise an error, as the sets of enabled and disabled components are conflicting
nlp = spacy.load("en_core_web_sm", enable=["tagger"], disable=["tagger"])
```

<Infobox variant="warning" title="Changed in v3.0">

As of v3.0, the `disable` keyword argument specifies components to load but
Expand Down