Coverage for meta_tags_parser/parse.py: 100%

62 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-21 00:51 +0000

1import typing 

2 

3from selectolax.lexbor import LexborHTMLParser 

4 

5from . import settings, structs 

6 

7 

8if typing.TYPE_CHECKING: 

9 from collections.abc import KeysView 

10 

11 

12def _extract_social_tags_from_precursor( 

13 all_tech_attrs: list[dict[str, structs.ValuesGroup]], 

14 media_type: typing.Literal[structs.WhatToParse.OPEN_GRAPH, structs.WhatToParse.TWITTER], 

15) -> list[structs.OneMetaTag]: 

16 possible_settings_for_parsing: typing.Final[typing.Mapping[str, str | tuple[str, ...]]] = ( 

17 settings.SETTINGS_FOR_SOCIAL_MEDIA[media_type] 

18 ) 

19 output_buffer: typing.Final[list[structs.OneMetaTag]] = [] 

20 for one_attr_group in all_tech_attrs: 

21 og_tag_name: str = "" 

22 tech_keys: KeysView[str] = one_attr_group.keys() 

23 for attr_name in possible_settings_for_parsing["prop"]: 

24 if attr_name in tech_keys and one_attr_group[attr_name].normalized.startswith( 

25 possible_settings_for_parsing["prefix"] 

26 ): 

27 og_tag_name = one_attr_group[attr_name].normalized.replace( 

28 str(possible_settings_for_parsing["prefix"]), "" 

29 ) 

30 if og_tag_name and "content" in tech_keys and one_attr_group["content"].original: 

31 output_buffer.append(structs.OneMetaTag(name=og_tag_name, value=one_attr_group["content"].original)) 

32 break 

33 return output_buffer 

34 

35 

36def _extract_basic_tags_from_precursor( 

37 all_tech_attrs: list[dict[str, structs.ValuesGroup]], 

38) -> list[structs.OneMetaTag]: 

39 output_buffer: typing.Final[list[structs.OneMetaTag]] = [] 

40 for one_attr_group in all_tech_attrs: 

41 tech_keys: KeysView[str] = one_attr_group.keys() 

42 

43 if len(output_buffer) == len(settings.BASIC_META_TAGS): 

44 break 

45 output_buffer.extend( 

46 structs.OneMetaTag( 

47 name=one_ordinary_meta_tag, 

48 value=one_attr_group["content"].original, 

49 ) 

50 for one_ordinary_meta_tag in settings.BASIC_META_TAGS 

51 if ( 

52 "name" in tech_keys 

53 and one_attr_group["name"].normalized == one_ordinary_meta_tag 

54 and "content" in one_attr_group 

55 and one_attr_group["content"].original 

56 ) 

57 ) 

58 return output_buffer 

59 

60 

61def _extract_all_other_tags_from_precursor( 

62 all_tech_attrs: list[dict[str, structs.ValuesGroup]], 

63) -> list[structs.OneMetaTag]: 

64 output_buffer: typing.Final[list[structs.OneMetaTag]] = [] 

65 for one_attr_group in all_tech_attrs: 

66 tech_keys: KeysView[str] = one_attr_group.keys() 

67 

68 should_we_skip: bool = False 

69 for one_config in settings.SETTINGS_FOR_SOCIAL_MEDIA.values(): 

70 for attr_name in one_config["prop"]: 

71 if attr_name in tech_keys and one_attr_group[attr_name].normalized.startswith(one_config["prefix"]): 

72 should_we_skip = True 

73 break 

74 if should_we_skip: 

75 continue 

76 

77 if "name" in tech_keys: 

78 if one_attr_group["name"].normalized in settings.BASIC_META_TAGS: 

79 continue 

80 if "content" in one_attr_group and one_attr_group["content"].original: 

81 output_buffer.append( 

82 structs.OneMetaTag( 

83 name=one_attr_group["name"].normalized, 

84 value=one_attr_group["content"].original, 

85 ) 

86 ) 

87 return output_buffer 

88 

89 

90def _prepare_normalized_meta_attrs( 

91 html_tree: LexborHTMLParser, 

92) -> list[dict[str, structs.ValuesGroup]]: 

93 normalized_meta_attrs: typing.Final[list[dict[str, structs.ValuesGroup]]] = [] 

94 for meta_node in html_tree.css("meta"): 

95 prepared_attrs: dict[str, structs.ValuesGroup] = {} 

96 for attr_name, raw_value in meta_node.attributes.items(): 

97 prepared_value: str = raw_value or "" 

98 prepared_attrs[attr_name.lower().strip()] = structs.ValuesGroup( 

99 original=prepared_value, 

100 normalized=prepared_value.lower().strip(), 

101 ) 

102 normalized_meta_attrs.append(prepared_attrs) 

103 return normalized_meta_attrs 

104 

105 

106def parse_meta_tags_from_source( 

107 source_code: str, 

108 what_to_parse: tuple[structs.WhatToParse, ...] = settings.DEFAULT_PARSE_GROUP, 

109) -> structs.TagsGroup: 

110 """Parse meta tags from source code.""" 

111 html_tree: typing.Final[LexborHTMLParser] = LexborHTMLParser(source_code) 

112 title_node = html_tree.css_first("title") if structs.WhatToParse.TITLE in what_to_parse else None 

113 page_title: typing.Final[str] = title_node.text().strip() if title_node else "" 

114 

115 should_parse_meta: typing.Final[bool] = any( 

116 one in what_to_parse 

117 for one in ( 

118 structs.WhatToParse.OPEN_GRAPH, 

119 structs.WhatToParse.TWITTER, 

120 structs.WhatToParse.BASIC, 

121 structs.WhatToParse.OTHER, 

122 ) 

123 ) 

124 

125 normalized_meta_attrs: typing.Final[list[dict[str, structs.ValuesGroup]]] = ( 

126 _prepare_normalized_meta_attrs(html_tree) if should_parse_meta else [] 

127 ) 

128 

129 open_graph_meta_tags: typing.Final[list[structs.OneMetaTag]] = ( 

130 _extract_social_tags_from_precursor(normalized_meta_attrs, structs.WhatToParse.OPEN_GRAPH) 

131 if structs.WhatToParse.OPEN_GRAPH in what_to_parse 

132 else [] 

133 ) 

134 

135 twitter_meta_tags: typing.Final[list[structs.OneMetaTag]] = ( 

136 _extract_social_tags_from_precursor(normalized_meta_attrs, structs.WhatToParse.TWITTER) 

137 if structs.WhatToParse.TWITTER in what_to_parse 

138 else [] 

139 ) 

140 

141 basic_meta_tags: typing.Final[list[structs.OneMetaTag]] = ( 

142 _extract_basic_tags_from_precursor(normalized_meta_attrs) 

143 if structs.WhatToParse.BASIC in what_to_parse 

144 else [] 

145 ) 

146 

147 other_meta_tags: typing.Final[list[structs.OneMetaTag]] = ( 

148 _extract_all_other_tags_from_precursor(normalized_meta_attrs) 

149 if structs.WhatToParse.OTHER in what_to_parse 

150 else [] 

151 ) 

152 

153 return structs.TagsGroup( 

154 title=page_title, 

155 basic=basic_meta_tags, 

156 open_graph=open_graph_meta_tags, 

157 twitter=twitter_meta_tags, 

158 other=other_meta_tags, 

159 ) 

160 

161