Coverage for whole_app/spell.py: 100%

39 statements  

« prev     ^ index     » next       coverage.py v7.10.4, created at 2025-08-21 23:45 +0000

1import re 

2import typing 

3 

4import cachebox 

5import urlextract 

6from enchant.checker import SpellChecker 

7 

8from . import models 

9from .settings import SETTINGS 

10 

11 

12_MISSPELED_CACHE: typing.Final[ 

13 cachebox.LRUCache[str, list[str]] | dict[str, list[str]] 

14] = ( 

15 cachebox.LRUCache[str, list[str]](SETTINGS.cache_size) 

16 if SETTINGS.cache_size > 0 

17 else typing.cast("dict[str, list[str]]", {}) 

18) 

19 

20SEPARATORS_TO_SPLIT_URL_BY_WORDS: typing.Final[re.Pattern[str]] = re.compile(r"\.|\:|\/\/|\/|\?|\&|\=|\+|\#|\-") 

21 

22 

23class SpellCheckService: 

24 __slots__ = ("_exclusion_words", "_input_text", "_spellcheck_engine") 

25 _input_text: str 

26 _spellcheck_engine: SpellChecker 

27 _exclusion_words: list[str] 

28 _url_extractor: urlextract.URLExtract = urlextract.URLExtract() 

29 

30 def prepare( 

31 self: "SpellCheckService", 

32 request_payload: models.SpellCheckRequest, 

33 exclusion_words: list[str] | None = None, 

34 ) -> "SpellCheckService": 

35 """Initialize machinery.""" 

36 self._input_text = request_payload.text 

37 self._exclusion_words = exclusion_words if exclusion_words else [] 

38 self._exclusion_words.extend(typing.cast("set[str]", SETTINGS.exclusion_words_set)) 

39 

40 if request_payload.exclude_urls: 

41 for one_url in self._url_extractor.find_urls(self._input_text): 

42 self._exclusion_words.extend( 

43 {one_word.lower() for one_word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)} 

44 ) 

45 self._spellcheck_engine = SpellChecker(request_payload.language) 

46 return self 

47 

48 @staticmethod 

49 def get_memorized_suggestions(word_spellcheck_result: SpellChecker) -> list[str]: 

50 misspelled_suggestions: list[str] 

51 if word_spellcheck_result.word in _MISSPELED_CACHE: 

52 misspelled_suggestions = _MISSPELED_CACHE[word_spellcheck_result.word] 

53 else: 

54 misspelled_suggestions = word_spellcheck_result.suggest() 

55 _MISSPELED_CACHE[word_spellcheck_result.word] = misspelled_suggestions 

56 return ( 

57 misspelled_suggestions[: SETTINGS.max_suggestions] 

58 if SETTINGS.max_suggestions > 0 

59 else misspelled_suggestions 

60 ) 

61 

62 def run_check(self: "SpellCheckService") -> list[models.OneCorrection]: 

63 corrections_output: list[models.OneCorrection] = [] 

64 self._spellcheck_engine.set_text(self._input_text) 

65 for one_result in self._spellcheck_engine: 

66 if one_result.word.lower() in self._exclusion_words: 

67 continue 

68 corrections_output.append( 

69 models.OneCorrection( 

70 first_position=one_result.wordpos, 

71 last_position=one_result.wordpos + len(one_result.word), 

72 word=one_result.word, 

73 suggestions=self.get_memorized_suggestions(one_result), 

74 ), 

75 ) 

76 return corrections_output