common: add theme_index class to handle config loading and caching
This commit is contained in:
parent
4cb58652d9
commit
8cad4c798f
|
@ -4,13 +4,12 @@ from importlib.util import module_from_spec, spec_from_file_location
|
|||
from pathlib import Path
|
||||
from re import Pattern
|
||||
from re import compile as re_compile
|
||||
from typing import Any, Iterable, Optional, Type
|
||||
from typing import Any, Iterable, Optional, Type, Union
|
||||
|
||||
from xdg import xdg_config_dirs, xdg_config_home
|
||||
from yaml import Loader
|
||||
from yaml import load as load_yaml
|
||||
|
||||
from frontools.sources import CachedSource, OverrideSource, Source
|
||||
from frontools.theme_index import ThemeIndex, UrlEntry
|
||||
|
||||
REMOTE_SOURCE_NAME = "remote"
|
||||
|
||||
|
@ -19,20 +18,6 @@ class ConfigError(Exception):
|
|||
"""Error raised on config error"""
|
||||
|
||||
|
||||
class UrlConfig:
|
||||
"""Config for an url"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.tags: set[str] = set()
|
||||
|
||||
|
||||
class ThemeConfig:
|
||||
"""Configuration object for a particular webtheme"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.urls: dict[str, UrlConfig] = {}
|
||||
|
||||
|
||||
class Config:
|
||||
"""Configuration object"""
|
||||
|
||||
|
@ -46,7 +31,7 @@ class Config:
|
|||
exclude_tags: list[str],
|
||||
):
|
||||
self._sources: dict[str, Source] = {}
|
||||
self._themes: dict[str, ThemeConfig] = {}
|
||||
self._theme_index = ThemeIndex()
|
||||
self._block_urls: list[Pattern[str]] = []
|
||||
|
||||
self._add_source(
|
||||
|
@ -86,6 +71,7 @@ class Config:
|
|||
|
||||
config_loader = getattr(config_module, "CONFIG")
|
||||
await config_loader(config)
|
||||
await config._theme_index.load()
|
||||
|
||||
return config
|
||||
|
||||
|
@ -97,40 +83,17 @@ class Config:
|
|||
@property
|
||||
def urls(self) -> Iterable[tuple[str, str]]:
|
||||
"""Return themes configured for this context"""
|
||||
for theme_name, theme in self._themes.items():
|
||||
for url, config in theme.urls.items():
|
||||
if self._filter(url, config.tags):
|
||||
yield theme_name, url
|
||||
for theme, url, tags in self._theme_index.urls:
|
||||
if self._filter(url, tags):
|
||||
yield theme, url
|
||||
|
||||
def add_url(
|
||||
self, theme_name: str, url: str, tags: Optional[Iterable[str]] = None
|
||||
) -> None:
|
||||
def add_urls(self, *urls: UrlEntry) -> None:
|
||||
"""Add an url for a theme"""
|
||||
self._theme_index.add_urls(*urls)
|
||||
|
||||
theme = self._themes.get(theme_name, None)
|
||||
if theme is None:
|
||||
theme = ThemeConfig()
|
||||
self._themes[theme_name] = theme
|
||||
|
||||
if tags is None:
|
||||
new_tags = set()
|
||||
else:
|
||||
new_tags = set(tags)
|
||||
|
||||
url_config = theme.urls.get(url, None)
|
||||
if url_config is None:
|
||||
url_config = UrlConfig()
|
||||
theme.urls[url] = url_config
|
||||
|
||||
url_config.tags.update(new_tags)
|
||||
|
||||
def load_urls(self, yaml_path: str) -> None:
|
||||
def add_yaml(self, yaml_path: Union[str, Path]) -> None:
|
||||
"""Load a yaml file containing dictionnary of urls to add as themes."""
|
||||
with open(yaml_path, "r", encoding="utf-8") as yaml_file:
|
||||
yaml_document = load_yaml(yaml_file, Loader)
|
||||
for theme_name, urls in yaml_document.items():
|
||||
for url, tags in urls.items():
|
||||
self.add_url(theme_name, url, tags)
|
||||
self._theme_index.add_yaml(Path(yaml_path))
|
||||
|
||||
def block_urls(self, *patterns: str) -> None:
|
||||
"""Will return 500 error for urls matching this pattern."""
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
"""Store themes and associated urls, providing ways to load them from several sources."""
|
||||
from logging import getLogger
|
||||
from pathlib import Path
|
||||
from re import compile as re_compile
|
||||
from ssl import CERT_NONE, create_default_context
|
||||
from typing import Iterable, Optional
|
||||
|
||||
from aiohttp import ClientSession
|
||||
from bs4 import BeautifulSoup
|
||||
from xdg import xdg_cache_home
|
||||
from yaml import Loader, dump
|
||||
from yaml import load as load_yaml
|
||||
|
||||
from frontools.utils import TaskListType, report_progress
|
||||
|
||||
_LOGGER = getLogger(__file__)
|
||||
|
||||
ThemeIndexData = dict[str, dict[str, list[str]]]
|
||||
UrlEntry = tuple[str, list[str], Optional[str]] # (url, tags, theme name) tuples
|
||||
|
||||
|
||||
class _Inputs:
|
||||
urls: list[UrlEntry] = []
|
||||
yaml_files: list[Path] = []
|
||||
|
||||
|
||||
class ThemeIndex:
|
||||
"""Store themes and associated urls, providing ways to load them from several sources."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._themes: ThemeIndexData = {}
|
||||
self._inputs = _Inputs()
|
||||
|
||||
@property
|
||||
def urls(self) -> Iterable[tuple[str, str, set[str]]]:
|
||||
"""Return themes configured for this context"""
|
||||
for theme_name, theme in self._themes.items():
|
||||
for url, tags in theme.items():
|
||||
yield theme_name, url, set(tags)
|
||||
|
||||
def add_urls(self, *urls: UrlEntry) -> None:
|
||||
"""Add an url for a theme"""
|
||||
for url in urls:
|
||||
self._inputs.urls.append(url)
|
||||
|
||||
def add_yaml(self, yaml_path: Path) -> None:
|
||||
"""Load a yaml file containing dictionnary of urls to add as themes."""
|
||||
self._inputs.yaml_files.append(yaml_path)
|
||||
|
||||
async def load(self) -> None:
|
||||
index_cache = xdg_cache_home() / "frontools" / "index-cache.yaml"
|
||||
if index_cache.is_file():
|
||||
_Inputs.yaml_files.append(index_cache)
|
||||
else:
|
||||
await self._load_urls_without_theme()
|
||||
with open(index_cache, "w") as index_cache_handle:
|
||||
dump(self._themes, index_cache_handle)
|
||||
|
||||
self._load_yaml_files()
|
||||
|
||||
async def _load_urls_without_theme(self) -> None:
|
||||
async def _load(url: str, tags: list[str]) -> None:
|
||||
theme = await _get_theme(url)
|
||||
if theme is None:
|
||||
return
|
||||
|
||||
self._register(url, tags, theme)
|
||||
|
||||
tasks: TaskListType = [
|
||||
(url, _load(url, tags))
|
||||
for (url, tags, theme) in self._inputs.urls
|
||||
if theme is None
|
||||
]
|
||||
await report_progress(
|
||||
"Gathering themes from IMIO sites",
|
||||
tasks,
|
||||
10,
|
||||
)
|
||||
|
||||
async def _load_urls_with_theme(self) -> None:
|
||||
for url, tags, theme in self._inputs.urls:
|
||||
if theme is not None:
|
||||
self._register(url, tags, theme)
|
||||
|
||||
def _load_yaml_files(self) -> None:
|
||||
for yaml_path in self._inputs.yaml_files:
|
||||
with open(yaml_path, "r", encoding="utf-8") as yaml_file:
|
||||
yaml_document = load_yaml(yaml_file, Loader)
|
||||
for theme_name, urls in yaml_document.items():
|
||||
for url, tags in urls.items():
|
||||
self._register(url, tags, theme_name)
|
||||
|
||||
def _register(self, url: str, new_tags: list[str], theme_name: str) -> None:
|
||||
"""Add an url for a theme"""
|
||||
if theme_name is None:
|
||||
self._unknown_themes.append((url, new_tags))
|
||||
return
|
||||
|
||||
theme = self._themes.get(theme_name, None)
|
||||
if theme is None:
|
||||
theme = {}
|
||||
self._themes[theme_name] = theme
|
||||
|
||||
tags = theme.get(url, None)
|
||||
if tags is None:
|
||||
tags = []
|
||||
theme[url] = tags
|
||||
|
||||
if new_tags is None:
|
||||
return
|
||||
|
||||
for tag in new_tags:
|
||||
if tag not in tags:
|
||||
tags.append(tag)
|
||||
|
||||
|
||||
THEME_CSS_PATH_PATTERN = re_compile(r".*static/(?P<theme>[\w-]*)/style.css.*")
|
||||
|
||||
|
||||
async def _get_theme(url: str) -> Optional[str]:
|
||||
try:
|
||||
async with ClientSession() as session:
|
||||
ssl_context = create_default_context()
|
||||
ssl_context.check_hostname = False
|
||||
ssl_context.verify_mode = CERT_NONE
|
||||
ssl_context.set_ciphers("DEFAULT@SECLEVEL=1")
|
||||
async with session.get(url, ssl_context=ssl_context) as response:
|
||||
page_content = await response.content.read()
|
||||
page_html = BeautifulSoup(page_content, features="html5lib")
|
||||
links = page_html.find_all("link")
|
||||
for link in links:
|
||||
if "stylesheet" not in link.get("rel", []):
|
||||
continue
|
||||
href = link["href"]
|
||||
theme_match = THEME_CSS_PATH_PATTERN.match(href)
|
||||
|
||||
if not theme_match:
|
||||
continue
|
||||
|
||||
return theme_match["theme"]
|
||||
except Exception as ex:
|
||||
_LOGGER.error(f"Error while loading {url} : {ex} skipping")
|
||||
return None
|
||||
|
||||
_LOGGER.error(f"No theme found for url {url}")
|
||||
return None
|
|
@ -1,13 +1,8 @@
|
|||
"""Common utilities"""
|
||||
from asyncio import gather
|
||||
from os.path import expandvars
|
||||
from pathlib import Path
|
||||
from re import compile as re_compile
|
||||
from typing import Awaitable, cast
|
||||
|
||||
from click import echo, progressbar
|
||||
from xdg import xdg_config_home
|
||||
|
||||
from click import progressbar
|
||||
|
||||
TaskListType = list[tuple[str, Awaitable[None]]]
|
||||
|
||||
|
|
Loading…
Reference in New Issue