Coverage for meta_tags_parser/parse.py: 100%
62 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-21 00:51 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-21 00:51 +0000
1import typing
3from selectolax.lexbor import LexborHTMLParser
5from . import settings, structs
8if typing.TYPE_CHECKING:
9 from collections.abc import KeysView
12def _extract_social_tags_from_precursor(
13 all_tech_attrs: list[dict[str, structs.ValuesGroup]],
14 media_type: typing.Literal[structs.WhatToParse.OPEN_GRAPH, structs.WhatToParse.TWITTER],
15) -> list[structs.OneMetaTag]:
16 possible_settings_for_parsing: typing.Final[typing.Mapping[str, str | tuple[str, ...]]] = (
17 settings.SETTINGS_FOR_SOCIAL_MEDIA[media_type]
18 )
19 output_buffer: typing.Final[list[structs.OneMetaTag]] = []
20 for one_attr_group in all_tech_attrs:
21 og_tag_name: str = ""
22 tech_keys: KeysView[str] = one_attr_group.keys()
23 for attr_name in possible_settings_for_parsing["prop"]:
24 if attr_name in tech_keys and one_attr_group[attr_name].normalized.startswith(
25 possible_settings_for_parsing["prefix"]
26 ):
27 og_tag_name = one_attr_group[attr_name].normalized.replace(
28 str(possible_settings_for_parsing["prefix"]), ""
29 )
30 if og_tag_name and "content" in tech_keys and one_attr_group["content"].original:
31 output_buffer.append(structs.OneMetaTag(name=og_tag_name, value=one_attr_group["content"].original))
32 break
33 return output_buffer
36def _extract_basic_tags_from_precursor(
37 all_tech_attrs: list[dict[str, structs.ValuesGroup]],
38) -> list[structs.OneMetaTag]:
39 output_buffer: typing.Final[list[structs.OneMetaTag]] = []
40 for one_attr_group in all_tech_attrs:
41 tech_keys: KeysView[str] = one_attr_group.keys()
43 if len(output_buffer) == len(settings.BASIC_META_TAGS):
44 break
45 output_buffer.extend(
46 structs.OneMetaTag(
47 name=one_ordinary_meta_tag,
48 value=one_attr_group["content"].original,
49 )
50 for one_ordinary_meta_tag in settings.BASIC_META_TAGS
51 if (
52 "name" in tech_keys
53 and one_attr_group["name"].normalized == one_ordinary_meta_tag
54 and "content" in one_attr_group
55 and one_attr_group["content"].original
56 )
57 )
58 return output_buffer
61def _extract_all_other_tags_from_precursor(
62 all_tech_attrs: list[dict[str, structs.ValuesGroup]],
63) -> list[structs.OneMetaTag]:
64 output_buffer: typing.Final[list[structs.OneMetaTag]] = []
65 for one_attr_group in all_tech_attrs:
66 tech_keys: KeysView[str] = one_attr_group.keys()
68 should_we_skip: bool = False
69 for one_config in settings.SETTINGS_FOR_SOCIAL_MEDIA.values():
70 for attr_name in one_config["prop"]:
71 if attr_name in tech_keys and one_attr_group[attr_name].normalized.startswith(one_config["prefix"]):
72 should_we_skip = True
73 break
74 if should_we_skip:
75 continue
77 if "name" in tech_keys:
78 if one_attr_group["name"].normalized in settings.BASIC_META_TAGS:
79 continue
80 if "content" in one_attr_group and one_attr_group["content"].original:
81 output_buffer.append(
82 structs.OneMetaTag(
83 name=one_attr_group["name"].normalized,
84 value=one_attr_group["content"].original,
85 )
86 )
87 return output_buffer
90def _prepare_normalized_meta_attrs(
91 html_tree: LexborHTMLParser,
92) -> list[dict[str, structs.ValuesGroup]]:
93 normalized_meta_attrs: typing.Final[list[dict[str, structs.ValuesGroup]]] = []
94 for meta_node in html_tree.css("meta"):
95 prepared_attrs: dict[str, structs.ValuesGroup] = {}
96 for attr_name, raw_value in meta_node.attributes.items():
97 prepared_value: str = raw_value or ""
98 prepared_attrs[attr_name.lower().strip()] = structs.ValuesGroup(
99 original=prepared_value,
100 normalized=prepared_value.lower().strip(),
101 )
102 normalized_meta_attrs.append(prepared_attrs)
103 return normalized_meta_attrs
106def parse_meta_tags_from_source(
107 source_code: str,
108 what_to_parse: tuple[structs.WhatToParse, ...] = settings.DEFAULT_PARSE_GROUP,
109) -> structs.TagsGroup:
110 """Parse meta tags from source code."""
111 html_tree: typing.Final[LexborHTMLParser] = LexborHTMLParser(source_code)
112 title_node = html_tree.css_first("title") if structs.WhatToParse.TITLE in what_to_parse else None
113 page_title: typing.Final[str] = title_node.text().strip() if title_node else ""
115 should_parse_meta: typing.Final[bool] = any(
116 one in what_to_parse
117 for one in (
118 structs.WhatToParse.OPEN_GRAPH,
119 structs.WhatToParse.TWITTER,
120 structs.WhatToParse.BASIC,
121 structs.WhatToParse.OTHER,
122 )
123 )
125 normalized_meta_attrs: typing.Final[list[dict[str, structs.ValuesGroup]]] = (
126 _prepare_normalized_meta_attrs(html_tree) if should_parse_meta else []
127 )
129 open_graph_meta_tags: typing.Final[list[structs.OneMetaTag]] = (
130 _extract_social_tags_from_precursor(normalized_meta_attrs, structs.WhatToParse.OPEN_GRAPH)
131 if structs.WhatToParse.OPEN_GRAPH in what_to_parse
132 else []
133 )
135 twitter_meta_tags: typing.Final[list[structs.OneMetaTag]] = (
136 _extract_social_tags_from_precursor(normalized_meta_attrs, structs.WhatToParse.TWITTER)
137 if structs.WhatToParse.TWITTER in what_to_parse
138 else []
139 )
141 basic_meta_tags: typing.Final[list[structs.OneMetaTag]] = (
142 _extract_basic_tags_from_precursor(normalized_meta_attrs)
143 if structs.WhatToParse.BASIC in what_to_parse
144 else []
145 )
147 other_meta_tags: typing.Final[list[structs.OneMetaTag]] = (
148 _extract_all_other_tags_from_precursor(normalized_meta_attrs)
149 if structs.WhatToParse.OTHER in what_to_parse
150 else []
151 )
153 return structs.TagsGroup(
154 title=page_title,
155 basic=basic_meta_tags,
156 open_graph=open_graph_meta_tags,
157 twitter=twitter_meta_tags,
158 other=other_meta_tags,
159 )