common: add theme_index class to handle config loading and caching

This commit is contained in:
Corentin Sechet 2022-04-12 12:55:01 +02:00
parent 4cb58652d9
commit 8cad4c798f
3 changed files with 158 additions and 54 deletions

View File

@ -4,13 +4,12 @@ from importlib.util import module_from_spec, spec_from_file_location
from pathlib import Path
from re import Pattern
from re import compile as re_compile
from typing import Any, Iterable, Optional, Type
from typing import Any, Iterable, Optional, Type, Union
from xdg import xdg_config_dirs, xdg_config_home
from yaml import Loader
from yaml import load as load_yaml
from frontools.sources import CachedSource, OverrideSource, Source
from frontools.theme_index import ThemeIndex, UrlEntry
REMOTE_SOURCE_NAME = "remote"
@ -19,20 +18,6 @@ class ConfigError(Exception):
"""Error raised on config error"""
class UrlConfig:
"""Config for an url"""
def __init__(self) -> None:
self.tags: set[str] = set()
class ThemeConfig:
"""Configuration object for a particular webtheme"""
def __init__(self) -> None:
self.urls: dict[str, UrlConfig] = {}
class Config:
"""Configuration object"""
@ -46,7 +31,7 @@ class Config:
exclude_tags: list[str],
):
self._sources: dict[str, Source] = {}
self._themes: dict[str, ThemeConfig] = {}
self._theme_index = ThemeIndex()
self._block_urls: list[Pattern[str]] = []
self._add_source(
@ -86,6 +71,7 @@ class Config:
config_loader = getattr(config_module, "CONFIG")
await config_loader(config)
await config._theme_index.load()
return config
@ -97,40 +83,17 @@ class Config:
@property
def urls(self) -> Iterable[tuple[str, str]]:
"""Return themes configured for this context"""
for theme_name, theme in self._themes.items():
for url, config in theme.urls.items():
if self._filter(url, config.tags):
yield theme_name, url
for theme, url, tags in self._theme_index.urls:
if self._filter(url, tags):
yield theme, url
def add_url(
self, theme_name: str, url: str, tags: Optional[Iterable[str]] = None
) -> None:
def add_urls(self, *urls: UrlEntry) -> None:
"""Add an url for a theme"""
self._theme_index.add_urls(*urls)
theme = self._themes.get(theme_name, None)
if theme is None:
theme = ThemeConfig()
self._themes[theme_name] = theme
if tags is None:
new_tags = set()
else:
new_tags = set(tags)
url_config = theme.urls.get(url, None)
if url_config is None:
url_config = UrlConfig()
theme.urls[url] = url_config
url_config.tags.update(new_tags)
def load_urls(self, yaml_path: str) -> None:
def add_yaml(self, yaml_path: Union[str, Path]) -> None:
"""Load a yaml file containing dictionnary of urls to add as themes."""
with open(yaml_path, "r", encoding="utf-8") as yaml_file:
yaml_document = load_yaml(yaml_file, Loader)
for theme_name, urls in yaml_document.items():
for url, tags in urls.items():
self.add_url(theme_name, url, tags)
self._theme_index.add_yaml(Path(yaml_path))
def block_urls(self, *patterns: str) -> None:
"""Will return 500 error for urls matching this pattern."""

146
frontools/theme_index.py Normal file
View File

@ -0,0 +1,146 @@
"""Store themes and associated urls, providing ways to load them from several sources."""
from logging import getLogger
from pathlib import Path
from re import compile as re_compile
from ssl import CERT_NONE, create_default_context
from typing import Iterable, Optional
from aiohttp import ClientSession
from bs4 import BeautifulSoup
from xdg import xdg_cache_home
from yaml import Loader, dump
from yaml import load as load_yaml
from frontools.utils import TaskListType, report_progress
_LOGGER = getLogger(__file__)
ThemeIndexData = dict[str, dict[str, list[str]]]
UrlEntry = tuple[str, list[str], Optional[str]] # (url, tags, theme name) tuples
class _Inputs:
urls: list[UrlEntry] = []
yaml_files: list[Path] = []
class ThemeIndex:
"""Store themes and associated urls, providing ways to load them from several sources."""
def __init__(self) -> None:
self._themes: ThemeIndexData = {}
self._inputs = _Inputs()
@property
def urls(self) -> Iterable[tuple[str, str, set[str]]]:
"""Return themes configured for this context"""
for theme_name, theme in self._themes.items():
for url, tags in theme.items():
yield theme_name, url, set(tags)
def add_urls(self, *urls: UrlEntry) -> None:
"""Add an url for a theme"""
for url in urls:
self._inputs.urls.append(url)
def add_yaml(self, yaml_path: Path) -> None:
"""Load a yaml file containing dictionnary of urls to add as themes."""
self._inputs.yaml_files.append(yaml_path)
async def load(self) -> None:
index_cache = xdg_cache_home() / "frontools" / "index-cache.yaml"
if index_cache.is_file():
_Inputs.yaml_files.append(index_cache)
else:
await self._load_urls_without_theme()
with open(index_cache, "w") as index_cache_handle:
dump(self._themes, index_cache_handle)
self._load_yaml_files()
async def _load_urls_without_theme(self) -> None:
async def _load(url: str, tags: list[str]) -> None:
theme = await _get_theme(url)
if theme is None:
return
self._register(url, tags, theme)
tasks: TaskListType = [
(url, _load(url, tags))
for (url, tags, theme) in self._inputs.urls
if theme is None
]
await report_progress(
"Gathering themes from IMIO sites",
tasks,
10,
)
async def _load_urls_with_theme(self) -> None:
for url, tags, theme in self._inputs.urls:
if theme is not None:
self._register(url, tags, theme)
def _load_yaml_files(self) -> None:
for yaml_path in self._inputs.yaml_files:
with open(yaml_path, "r", encoding="utf-8") as yaml_file:
yaml_document = load_yaml(yaml_file, Loader)
for theme_name, urls in yaml_document.items():
for url, tags in urls.items():
self._register(url, tags, theme_name)
def _register(self, url: str, new_tags: list[str], theme_name: str) -> None:
"""Add an url for a theme"""
if theme_name is None:
self._unknown_themes.append((url, new_tags))
return
theme = self._themes.get(theme_name, None)
if theme is None:
theme = {}
self._themes[theme_name] = theme
tags = theme.get(url, None)
if tags is None:
tags = []
theme[url] = tags
if new_tags is None:
return
for tag in new_tags:
if tag not in tags:
tags.append(tag)
THEME_CSS_PATH_PATTERN = re_compile(r".*static/(?P<theme>[\w-]*)/style.css.*")
async def _get_theme(url: str) -> Optional[str]:
try:
async with ClientSession() as session:
ssl_context = create_default_context()
ssl_context.check_hostname = False
ssl_context.verify_mode = CERT_NONE
ssl_context.set_ciphers("DEFAULT@SECLEVEL=1")
async with session.get(url, ssl_context=ssl_context) as response:
page_content = await response.content.read()
page_html = BeautifulSoup(page_content, features="html5lib")
links = page_html.find_all("link")
for link in links:
if "stylesheet" not in link.get("rel", []):
continue
href = link["href"]
theme_match = THEME_CSS_PATH_PATTERN.match(href)
if not theme_match:
continue
return theme_match["theme"]
except Exception as ex:
_LOGGER.error(f"Error while loading {url} : {ex} skipping")
return None
_LOGGER.error(f"No theme found for url {url}")
return None

View File

@ -1,13 +1,8 @@
"""Common utilities"""
from asyncio import gather
from os.path import expandvars
from pathlib import Path
from re import compile as re_compile
from typing import Awaitable, cast
from click import echo, progressbar
from xdg import xdg_config_home
from click import progressbar
TaskListType = list[tuple[str, Awaitable[None]]]