Skip to content

Commit

Permalink
PEP585 updates.
Browse files Browse the repository at this point in the history
  • Loading branch information
gandersen101 committed Jan 30, 2021
1 parent 9564b0a commit 1b01e4f
Show file tree
Hide file tree
Showing 18 changed files with 135 additions and 141 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
*v0.4.1 Release Notes:*
- *Spaczz's phrase searching algorithm has been further optimized so both the `FuzzyMatcher` and `SimilarityMatcher` should run considerably faster.*
- *The `FuzzyMatcher` and `SimilarityMatcher` now include a `thresh` parameter that defaults to `100`. When matching, if `flex > 0` and the match ratio is >= `thresh` during the initial scan of the document, no optimization will be attempted. By default perfect matches don't need to be run through match optimization.*
- *PEP585 code updates.*

*v0.4.0 Release Notes:*
- *Spaczz now includes a `TokenMatcher` that provides token pattern support like spaCy's `Matcher`. It provides all the same functionality as spaCy's `Matcher` but adds fuzzy and fuzzy-regex support. However, it adds additional overhead to it's spaCy counterpart so it should only be used as needed for fuzzy matching purposes.*
- *Spaczz's custom attributes have been reworked and now initialize within spaczz's root `__init__`. These are set via spaczz pipeline components (currently just the `SpaczzRuler`) The only downside is that I had to remove the `attr` parameter from the `SpaczzRuler` to enable this.*
Expand Down
19 changes: 10 additions & 9 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


package = "spaczz"
nox.options.sessions = "lint", "mypy", "safety", "tests", "typeguard"
nox.options.sessions = "lint", "mypy", "safety", "tests"
locations = "src", "tests", "noxfile.py", "docs/conf.py"


Expand Down Expand Up @@ -151,14 +151,15 @@ def tests(session: Session) -> None:
session.run("pytest", *args)


@nox.session(python=["3.9", "3.8", "3.7"])
def typeguard(session: Session) -> None:
"""Runtime type checking using Typeguard."""
args = session.posargs
session.run("poetry", "install", "--no-dev", external=True)
install_with_constraints(session, "pytest", "pytest-mock", "typeguard")
session.run("python", "-m", "spacy", "download", "en_core_web_md")
session.run("pytest", f"--typeguard-packages={package}", *args)
# Typeguard does not seem to currently work with PEP585
# @nox.session(python=["3.9", "3.8", "3.7"])
# def typeguard(session: Session) -> None:
# """Runtime type checking using Typeguard."""
# args = session.posargs
# session.run("poetry", "install", "--no-dev", external=True)
# install_with_constraints(session, "pytest", "pytest-mock", "typeguard")
# session.run("python", "-m", "spacy", "download", "en_core_web_md")
# session.run("pytest", f"--typeguard-packages={package}", *args)


@nox.session(python=["3.9", "3.8", "3.7"])
Expand Down
10 changes: 5 additions & 5 deletions src/spaczz/attrs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Custom spaCy attributes for spaczz."""
from __future__ import annotations

from typing import Iterable, Optional, Set, Tuple, Type
from typing import Iterable, Optional, Type
import warnings

from spacy.tokens import Doc, Span, Token
Expand Down Expand Up @@ -69,7 +69,7 @@ def get_spaczz_span(span: Span) -> bool:
return all([token._.spaczz_token for token in span])

@staticmethod
def get_token_types(token: Token) -> Set[str]:
def get_token_types(token: Token) -> set[str]:
"""Getter for spaczz_types `Token` attribute."""
types = set()
if token._.spaczz_ratio:
Expand All @@ -81,7 +81,7 @@ def get_token_types(token: Token) -> Set[str]:
return types

@classmethod
def get_span_types(cls: Type[SpaczzAttrs], span: Span) -> Set[str]:
def get_span_types(cls: Type[SpaczzAttrs], span: Span) -> set[str]:
"""Getter for spaczz_types `Span` attribute."""
types = set()
if cls.get_ratio(span):
Expand All @@ -103,7 +103,7 @@ def get_ratio(cls: Type[SpaczzAttrs], span: Span) -> Optional[int]:
@classmethod
def get_counts(
cls: Type[SpaczzAttrs], span: Span
) -> Optional[Tuple[int, int, int]]:
) -> Optional[tuple[int, int, int]]:
"""Getter for spaczz_counts `Span` attribute."""
if cls._all_equal([token._.spaczz_counts for token in span]):
return span[0]._.spaczz_counts
Expand All @@ -124,7 +124,7 @@ def get_spaczz_doc(doc: Doc) -> bool:
return any([token._.spaczz_token for token in doc])

@staticmethod
def get_doc_types(doc: Doc) -> Set[str]:
def get_doc_types(doc: Doc) -> set[str]:
"""Getter for spaczz_types `Doc` attribute."""
types = set()
for token in doc:
Expand Down
23 changes: 10 additions & 13 deletions src/spaczz/matcher/_phrasematcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,9 @@
Any,
Callable,
DefaultDict,
Dict,
Generator,
Iterable,
List,
Optional,
Tuple,
)
import warnings

Expand Down Expand Up @@ -61,16 +58,16 @@ def __init__(self: _PhraseMatcher, vocab: Vocab, **defaults: Any) -> None:
"""
self.defaults = defaults
self.type = "_phrase"
self._callbacks: Dict[
self._callbacks: dict[
str,
Optional[Callable[[Any, Doc, int, List[Tuple[str, int, int, int]]], None]],
Optional[Callable[[Any, Doc, int, list[tuple[str, int, int, int]]], None]],
] = {} # Any type due to inheritence typing issue.
self._patterns: DefaultDict[str, DefaultDict[str, Any]] = defaultdict(
lambda: defaultdict(list)
)
self._searcher = _PhraseSearcher(vocab=vocab)

def __call__(self: _PhraseMatcher, doc: Doc) -> List[Tuple[str, int, int, int]]:
def __call__(self: _PhraseMatcher, doc: Doc) -> list[tuple[str, int, int, int]]:
"""Find all sequences matching the supplied patterns in the doc.
Args:
Expand Down Expand Up @@ -120,7 +117,7 @@ def __len__(self: _PhraseMatcher) -> int:
return len(self._patterns)

@property
def labels(self: _PhraseMatcher) -> Tuple[str, ...]:
def labels(self: _PhraseMatcher) -> tuple[str, ...]:
"""All labels present in the matcher.
Returns:
Expand All @@ -138,7 +135,7 @@ def labels(self: _PhraseMatcher) -> Tuple[str, ...]:
return tuple(self._patterns.keys())

@property
def patterns(self: _PhraseMatcher) -> List[Dict[str, Any]]:
def patterns(self: _PhraseMatcher) -> list[dict[str, Any]]:
"""Get all patterns and kwargs that were added to the matcher.
Returns:
Expand Down Expand Up @@ -179,10 +176,10 @@ def vocab(self: _PhraseMatcher) -> Vocab:
def add(
self: _PhraseMatcher,
label: str,
patterns: List[Doc],
kwargs: Optional[List[Dict[str, Any]]] = None,
patterns: list[Doc],
kwargs: Optional[list[dict[str, Any]]] = None,
on_match: Optional[
Callable[[Any, Doc, int, List[Tuple[str, int, int, int]]], None]
Callable[[Any, Doc, int, list[tuple[str, int, int, int]]], None]
] = None, # Any type due to inheritence typing issue.
) -> None:
"""Add a rule to the matcher, consisting of a label and one or more patterns.
Expand All @@ -207,11 +204,11 @@ def add(
TypeError: If kwargs is not an iterable dictionaries.
Warnings:
UserWarning:
KwargsWarning:
If there are more patterns than kwargs
default matching settings will be used
for extra patterns.
UserWarning:
KwargsWarning:
If there are more kwargs dicts than patterns,
the extra kwargs will be ignored.
Expand Down
24 changes: 10 additions & 14 deletions src/spaczz/matcher/regexmatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,9 @@
Any,
Callable,
DefaultDict,
Dict,
Generator,
Iterable,
List,
Optional,
Sequence,
Tuple,
Union,
)
import warnings
Expand Down Expand Up @@ -73,15 +69,15 @@ def __init__(
"""
self.defaults = defaults
self.type = "regex"
self._callbacks: Dict[
self._callbacks: dict[
str,
Optional[
Callable[
[
RegexMatcher,
Doc,
int,
List[Tuple[str, int, int, Tuple[int, int, int]]],
list[tuple[str, int, int, tuple[int, int, int]]],
],
None,
],
Expand All @@ -94,7 +90,7 @@ def __init__(

def __call__(
self: RegexMatcher, doc: Doc
) -> List[Tuple[str, int, int, Tuple[int, int, int]]]:
) -> list[tuple[str, int, int, tuple[int, int, int]]]:
r"""Find all sequences matching the supplied patterns in the doc.
Args:
Expand Down Expand Up @@ -145,7 +141,7 @@ def __len__(self: RegexMatcher) -> int:
return len(self._patterns)

@property
def labels(self: RegexMatcher) -> Tuple[str, ...]:
def labels(self: RegexMatcher) -> tuple[str, ...]:
"""All labels present in the matcher.
Returns:
Expand All @@ -163,7 +159,7 @@ def labels(self: RegexMatcher) -> Tuple[str, ...]:
return tuple(self._patterns.keys())

@property
def patterns(self: RegexMatcher) -> List[Dict[str, Any]]:
def patterns(self: RegexMatcher) -> list[dict[str, Any]]:
"""Get all patterns and kwargs that were added to the matcher.
Returns:
Expand Down Expand Up @@ -203,15 +199,15 @@ def vocab(self: RegexMatcher) -> Vocab:
def add(
self: RegexMatcher,
label: str,
patterns: Sequence[str],
kwargs: Optional[List[Dict[str, Any]]] = None,
patterns: list[str],
kwargs: Optional[list[dict[str, Any]]] = None,
on_match: Optional[
Callable[
[
RegexMatcher,
Doc,
int,
List[Tuple[str, int, int, Tuple[int, int, int]]],
list[tuple[str, int, int, tuple[int, int, int]]],
],
None,
]
Expand Down Expand Up @@ -240,11 +236,11 @@ def add(
TypeError: If kwargs is not a iterable of dictionaries.
Warnings:
UserWarning:
KwargsWarning:
If there are more patterns than kwargs
default regex matching settings will be used
for extra patterns.
UserWarning:
KwargsWarning:
If there are more kwargs dictionaries than patterns,
the extra kwargs will be ignored.
Expand Down
4 changes: 0 additions & 4 deletions src/spaczz/matcher/similaritymatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,6 @@ def __init__(self: SimilarityMatcher, vocab: Vocab, **defaults: Any) -> None:
be used as default matching settings.
These arguments will become the new defaults for matching.
See `SimilaritySearcher` documentation for details.
Warnings:
UserWarning:
If vocab does not contain any word vectors.
"""
super().__init__(vocab=vocab, **defaults)
self.type = "similarity"
Expand Down
23 changes: 10 additions & 13 deletions src/spaczz/matcher/tokenmatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,9 @@
Any,
Callable,
DefaultDict,
Dict,
Generator,
Iterable,
List,
Optional,
Tuple,
Union,
)

Expand Down Expand Up @@ -71,19 +68,19 @@ def __init__(self: TokenMatcher, vocab: Vocab, **defaults: Any) -> None:
"""
self.defaults = defaults
self.type = "token"
self._callbacks: Dict[
self._callbacks: dict[
str,
Union[
Callable[
[TokenMatcher, Doc, int, List[Tuple[str, int, int, None]]], None
[TokenMatcher, Doc, int, list[tuple[str, int, int, None]]], None
],
None,
],
] = {}
self._patterns: DefaultDict[str, List[List[Dict[str, Any]]]] = defaultdict(list)
self._patterns: DefaultDict[str, list[list[dict[str, Any]]]] = defaultdict(list)
self._searcher = TokenSearcher(vocab=vocab)

def __call__(self: TokenMatcher, doc: Doc) -> List[Tuple[str, int, int, None]]:
def __call__(self: TokenMatcher, doc: Doc) -> list[tuple[str, int, int, None]]:
"""Find all sequences matching the supplied patterns in the doc.
Args:
Expand Down Expand Up @@ -135,7 +132,7 @@ def __len__(self: TokenMatcher) -> int:
return len(self._patterns)

@property
def labels(self: TokenMatcher) -> Tuple[str, ...]:
def labels(self: TokenMatcher) -> tuple[str, ...]:
"""All labels present in the matcher.
Returns:
Expand All @@ -153,7 +150,7 @@ def labels(self: TokenMatcher) -> Tuple[str, ...]:
return tuple(self._patterns.keys())

@property
def patterns(self: TokenMatcher) -> List[Dict[str, Any]]:
def patterns(self: TokenMatcher) -> list[dict[str, Any]]:
"""Get all patterns that were added to the matcher.
Returns:
Expand Down Expand Up @@ -189,9 +186,9 @@ def vocab(self: TokenMatcher) -> Vocab:
def add(
self: TokenMatcher,
label: str,
patterns: List[List[Dict[str, Any]]],
patterns: list[list[dict[str, Any]]],
on_match: Optional[
Callable[[TokenMatcher, Doc, int, List[Tuple[str, int, int, None]]], None]
Callable[[TokenMatcher, Doc, int, list[tuple[str, int, int, None]]], None]
] = None,
) -> None:
"""Add a rule to the matcher, consisting of a label and one or more patterns.
Expand Down Expand Up @@ -322,8 +319,8 @@ def pipe(


def _spacyfy(
matches: List[List[Optional[Tuple[str, str]]]], pattern: List[Dict[str, Any]]
) -> List[List[Dict[str, Any]]]:
matches: list[list[Optional[tuple[str, str]]]], pattern: list[dict[str, Any]]
) -> list[list[dict[str, Any]]]:
"""Turns token searcher matches into spaCy `Matcher` compatible patterns."""
new_patterns = []
if matches:
Expand Down
Loading

0 comments on commit 1b01e4f

Please sign in to comment.