diff --git a/frontools/cache.py b/frontools/cache.py deleted file mode 100644 index dd80877..0000000 --- a/frontools/cache.py +++ /dev/null @@ -1,108 +0,0 @@ -"""Cache management""" -from abc import ABC, abstractmethod -from pathlib import Path -from shutil import rmtree -from typing import Awaitable, Callable, Union, Optional - -from click import echo -from xdg import xdg_cache_home - -CacheFallback = Union[bytes, Callable[[str], Awaitable[Optional[bytes]]]] - - -class Cache(ABC): - """Base class for caches""" - - @abstractmethod - async def get(self, key: str, fallback: CacheFallback) -> Optional[bytes]: - """Get an item in the cache, call fallback if it's not present""" - - @abstractmethod - def set(self, key: str, data: bytes) -> None: - """Set a content in the cache""" - - @staticmethod - async def _get_fallback_value(key: str, fallback: CacheFallback) -> Optional[bytes]: - if callable(fallback): - result = await fallback(key) - else: - result = fallback - - return result - - -class NullCache(Cache): - """Disabled cache""" - - async def get(self, key: str, fallback: CacheFallback) -> Optional[bytes]: - return await self._get_fallback_value(key, fallback) - - def set(self, key: str, data: bytes) -> None: - pass - - -class FileCache(Cache): - """Cache on the local filesystem""" - - cache_base = xdg_cache_home() / "frontools" - - def __init__(self, name: str) -> None: - self._name = name - - async def get( - self, - key: str, - fallback: CacheFallback, - ) -> Optional[bytes]: - """Get an item in the cache, call fallback if it's not present""" - cache_file_path = self._get_cache_file_path(key) - if not cache_file_path.is_file(): - content = await self._get_fallback_value(key, fallback) - if content is not None: - self.set(key, content) - else: - with open(cache_file_path, "rb") as cache_file: - content = cache_file.read() - - return content - - def set(self, key: str, data: bytes) -> None: - """Set content in the cache""" - cache_file_path = self._get_cache_file_path(key) - with open(cache_file_path, "wb") as cache_file: - cache_file.write(data) - - @staticmethod - def prune(cache_names: list[str]) -> None: - """Remove caches from filesystem. - - If empty list is provided, all caches will be cleaned - """ - if not cache_names: - cache_names = [ - it.name for it in FileCache.cache_base.iterdir() if it.is_dir() - ] - for cache_name in cache_names: - cache_path: Path = FileCache.cache_base / cache_name - if not cache_path.is_dir(): - echo(f"{cache_path} isn't a chache directory", err=True) - continue - echo(f"Removing {cache_path}") - rmtree(cache_path) - - def _get_cache_file_path(self, key: str) -> Path: - key_slug = _get_key_slug(key) - cache_directory = self.cache_base / self._name - file_path = cache_directory.joinpath(*key_slug.split("&")) - file_path = file_path.parent / (file_path.name[:254] + "_") - file_directory = file_path.parent - - if not file_directory.is_dir(): - file_directory.mkdir(parents=True) - - return file_path - - -def _get_key_slug(url: str) -> str: - """Return an unique slug usable as a path name for a given url.""" - return url.replace("_", "___").replace("/", "__").replace(":", "_") diff --git a/frontools/cli.py b/frontools/cli.py index 1dc34f6..474409a 100644 --- a/frontools/cli.py +++ b/frontools/cli.py @@ -8,8 +8,8 @@ from click import Context as ClickContext from click import Path as PathArgument from click import argument, group, option, pass_context, pass_obj -from frontools.cache import FileCache from frontools.config import Config +from frontools.sources import CachedSource from frontools.screenshot import screenshot_diff @@ -92,7 +92,7 @@ async def main( @argument("cache_names", nargs=-1) def prune_caches(cache_names: list[str]) -> None: """Prune frontools caches""" - FileCache.prune(cache_names) + CachedSource.prune(cache_names) @main.command(name="screenshot-diff") diff --git a/frontools/config.py b/frontools/config.py index 0705755..e52da06 100644 --- a/frontools/config.py +++ b/frontools/config.py @@ -10,7 +10,6 @@ from xdg import xdg_config_dirs, xdg_config_home from yaml import Loader from yaml import load as load_yaml -from frontools.cache import FileCache, NullCache from frontools.sources import CachedSource, OverrideSource, Source REMOTE_SOURCE_NAME = "remote" @@ -51,9 +50,7 @@ class Config: self._block_urls: list[Pattern[str]] = [] self._add_source( - REMOTE_SOURCE_NAME, - CachedSource, - FileCache(REMOTE_SOURCE_NAME) if use_cache else NullCache(), + REMOTE_SOURCE_NAME, CachedSource, REMOTE_SOURCE_NAME, not use_cache ) self._source_name = source_name if source_name else REMOTE_SOURCE_NAME diff --git a/frontools/sources.py b/frontools/sources.py index 6d2a258..9673bf2 100644 --- a/frontools/sources.py +++ b/frontools/sources.py @@ -6,14 +6,15 @@ from logging import getLogger from pathlib import Path from re import Pattern from re import compile as re_compile +from shutil import rmtree from typing import AsyncGenerator, Optional, cast +from frontools.utils import get_url_slug from aiohttp import ClientConnectionError, ClientPayloadError, ClientSession from playwright.async_api import BrowserContext, Error, Page, Route from playwright.async_api import TimeoutError as PlaywrightTimeoutError from playwright.async_api import ViewportSize, async_playwright - -from frontools.cache import Cache +from xdg import xdg_cache_home _LOGGER = getLogger("frontools") @@ -106,17 +107,29 @@ class Source(ABC): class CachedSource(Source): """Source loading urls from the internet.""" - def __init__( - self, - block_urls: list[Pattern[str]], - cache: Cache, - ) -> None: + cache_base = xdg_cache_home() / "frontools" + + def __init__(self, block_urls: list[Pattern[str]], name: str, disabled: bool = False) -> None: super().__init__(block_urls) - self._cache = cache + self._name = name + self._disabled = disabled async def get_url(self, url: str) -> Optional[bytes]: """Get a page content from the local or remote cache.""" - return await self._cache.get(url, self._load_url) + if self._disabled: + return await self._load_url(url) + + cache_file_path = self._get_cache_file_path(url) + if not cache_file_path.is_file(): + content = await self._load_url(url) + if content is not None: + with open(cache_file_path, "wb") as cache_file: + cache_file.write(content) + else: + with open(cache_file_path, "rb") as cache_file: + content = cache_file.read() + + return content async def _load_url(self, url: str) -> Optional[bytes]: try: @@ -128,6 +141,36 @@ class CachedSource(Source): return None + @staticmethod + def prune(cache_names: list[str]) -> None: + """Remove caches from filesystem. + + If empty list is provided, all caches will be cleaned + """ + if not cache_names: + cache_names = [ + it.name for it in CachedSource.cache_base.iterdir() if it.is_dir() + ] + for cache_name in cache_names: + cache_path: Path = CachedSource.cache_base / cache_name + if not cache_path.is_dir(): + _LOGGER.error(f"{cache_path} isn't a chache directory") + continue + _LOGGER.info(f"Removing {cache_path}") + rmtree(cache_path) + + def _get_cache_file_path(self, url: str) -> Path: + key_slug = get_url_slug(url) + cache_directory = self.cache_base / self._name + file_path = cache_directory.joinpath(*key_slug.split("&")) + file_path = file_path.parent / (file_path.name[:254] + "_") + file_directory = file_path.parent + + if not file_directory.is_dir(): + file_directory.mkdir(parents=True) + + return file_path + class OverrideSource(Source): """Source overriding paths matching patterns with local files"""