misc-csechet/frontools/theme_index.py

146 lines
4.8 KiB
Python

"""Store themes and associated urls, providing ways to load them from several sources."""
from logging import getLogger
from pathlib import Path
from re import compile as re_compile
from ssl import CERT_NONE, create_default_context
from typing import Iterable, Optional
from aiohttp import ClientSession
from bs4 import BeautifulSoup
from xdg import xdg_cache_home
from yaml import Loader, dump
from yaml import load as load_yaml
from frontools.utils import TaskListType, report_progress
_LOGGER = getLogger(__file__)
ThemeIndexData = dict[str, dict[str, list[str]]]
UrlEntry = tuple[str, list[str], Optional[str]] # (url, tags, theme name) tuples
class _Inputs:
urls: list[UrlEntry] = []
yaml_files: list[Path] = []
class ThemeIndex:
"""Store themes and associated urls, providing ways to load them from several sources."""
def __init__(self) -> None:
self._themes: ThemeIndexData = {}
self._inputs = _Inputs()
@property
def urls(self) -> Iterable[tuple[str, str, set[str]]]:
"""Return themes configured for this context"""
for theme_name, theme in self._themes.items():
for url, tags in theme.items():
yield theme_name, url, set(tags)
def add_urls(self, *urls: UrlEntry) -> None:
"""Add an url for a theme"""
self._inputs.urls.extend(urls)
def add_yaml(self, yaml_path: Path) -> None:
"""Load a yaml file containing dictionnary of urls to add as themes."""
self._inputs.yaml_files.append(yaml_path)
async def load(self, update_cache: bool) -> None:
index_cache = xdg_cache_home() / "frontools" / "index-cache.yaml"
if index_cache.is_file() and not update_cache:
_Inputs.yaml_files.append(index_cache)
else:
await self._load_urls_without_theme()
with open(index_cache, "w") as index_cache_handle:
dump(self._themes, index_cache_handle)
self._load_yaml_files()
async def _load_urls_without_theme(self) -> None:
async def _load(url: str, tags: list[str]) -> None:
theme = await _get_theme(url)
if theme is None:
return
self._register(url, tags, theme)
tasks: TaskListType = [
(url, _load(url, tags))
for (url, tags, theme) in self._inputs.urls
if theme is None
]
await report_progress(
"Gathering themes from IMIO sites",
tasks,
10,
)
async def _load_urls_with_theme(self) -> None:
for url, tags, theme in self._inputs.urls:
if theme is not None:
self._register(url, tags, theme)
def _load_yaml_files(self) -> None:
for yaml_path in self._inputs.yaml_files:
with open(yaml_path, "r", encoding="utf-8") as yaml_file:
yaml_document = load_yaml(yaml_file, Loader)
for theme_name, urls in yaml_document.items():
for url, tags in urls.items():
self._register(url, tags, theme_name)
def _register(self, url: str, new_tags: list[str], theme_name: str) -> None:
"""Add an url for a theme"""
if theme_name is None:
self._unknown_themes.append((url, new_tags))
return
theme = self._themes.get(theme_name, None)
if theme is None:
theme = {}
self._themes[theme_name] = theme
tags = theme.get(url, None)
if tags is None:
tags = []
theme[url] = tags
if new_tags is None:
return
for tag in new_tags:
if tag not in tags:
tags.append(tag)
THEME_CSS_PATH_PATTERN = re_compile(r".*static/(?P<theme>[\w-]*)/style.css.*")
async def _get_theme(url: str) -> Optional[str]:
try:
async with ClientSession() as session:
ssl_context = create_default_context()
ssl_context.check_hostname = False
ssl_context.verify_mode = CERT_NONE
ssl_context.set_ciphers("DEFAULT@SECLEVEL=1")
async with session.get(url, ssl_context=ssl_context) as response:
page_content = await response.content.read()
page_html = BeautifulSoup(page_content, features="html5lib")
links = page_html.find_all("link")
for link in links:
if "stylesheet" not in link.get("rel", []):
continue
href = link["href"]
theme_match = THEME_CSS_PATH_PATTERN.match(href)
if not theme_match:
continue
return theme_match["theme"]
except Exception as ex:
_LOGGER.error(f"Error while loading {url} : {ex} skipping")
return None
_LOGGER.error(f"No theme found for url {url}")
return None