Coverage for whole_app/spell.py: 100%
39 statements
« prev ^ index » next coverage.py v7.10.4, created at 2025-08-21 23:45 +0000
« prev ^ index » next coverage.py v7.10.4, created at 2025-08-21 23:45 +0000
1import re
2import typing
4import cachebox
5import urlextract
6from enchant.checker import SpellChecker
8from . import models
9from .settings import SETTINGS
12_MISSPELED_CACHE: typing.Final[
13 cachebox.LRUCache[str, list[str]] | dict[str, list[str]]
14] = (
15 cachebox.LRUCache[str, list[str]](SETTINGS.cache_size)
16 if SETTINGS.cache_size > 0
17 else typing.cast("dict[str, list[str]]", {})
18)
20SEPARATORS_TO_SPLIT_URL_BY_WORDS: typing.Final[re.Pattern[str]] = re.compile(r"\.|\:|\/\/|\/|\?|\&|\=|\+|\#|\-")
23class SpellCheckService:
24 __slots__ = ("_exclusion_words", "_input_text", "_spellcheck_engine")
25 _input_text: str
26 _spellcheck_engine: SpellChecker
27 _exclusion_words: list[str]
28 _url_extractor: urlextract.URLExtract = urlextract.URLExtract()
30 def prepare(
31 self: "SpellCheckService",
32 request_payload: models.SpellCheckRequest,
33 exclusion_words: list[str] | None = None,
34 ) -> "SpellCheckService":
35 """Initialize machinery."""
36 self._input_text = request_payload.text
37 self._exclusion_words = exclusion_words if exclusion_words else []
38 self._exclusion_words.extend(typing.cast("set[str]", SETTINGS.exclusion_words_set))
40 if request_payload.exclude_urls:
41 for one_url in self._url_extractor.find_urls(self._input_text):
42 self._exclusion_words.extend(
43 {one_word.lower() for one_word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)}
44 )
45 self._spellcheck_engine = SpellChecker(request_payload.language)
46 return self
48 @staticmethod
49 def get_memorized_suggestions(word_spellcheck_result: SpellChecker) -> list[str]:
50 misspelled_suggestions: list[str]
51 if word_spellcheck_result.word in _MISSPELED_CACHE:
52 misspelled_suggestions = _MISSPELED_CACHE[word_spellcheck_result.word]
53 else:
54 misspelled_suggestions = word_spellcheck_result.suggest()
55 _MISSPELED_CACHE[word_spellcheck_result.word] = misspelled_suggestions
56 return (
57 misspelled_suggestions[: SETTINGS.max_suggestions]
58 if SETTINGS.max_suggestions > 0
59 else misspelled_suggestions
60 )
62 def run_check(self: "SpellCheckService") -> list[models.OneCorrection]:
63 corrections_output: list[models.OneCorrection] = []
64 self._spellcheck_engine.set_text(self._input_text)
65 for one_result in self._spellcheck_engine:
66 if one_result.word.lower() in self._exclusion_words:
67 continue
68 corrections_output.append(
69 models.OneCorrection(
70 first_position=one_result.wordpos,
71 last_position=one_result.wordpos + len(one_result.word),
72 word=one_result.word,
73 suggestions=self.get_memorized_suggestions(one_result),
74 ),
75 )
76 return corrections_output