caches: remove overengineered cache code

This commit is contained in:
Corentin Sechet 2022-04-12 11:52:17 +02:00
parent b3f1e4823e
commit 4cb58652d9
4 changed files with 55 additions and 123 deletions

View File

@ -1,108 +0,0 @@
"""Cache management"""
from abc import ABC, abstractmethod
from pathlib import Path
from shutil import rmtree
from typing import Awaitable, Callable, Union, Optional
from click import echo
from xdg import xdg_cache_home
CacheFallback = Union[bytes, Callable[[str], Awaitable[Optional[bytes]]]]
class Cache(ABC):
"""Base class for caches"""
@abstractmethod
async def get(self, key: str, fallback: CacheFallback) -> Optional[bytes]:
"""Get an item in the cache, call fallback if it's not present"""
@abstractmethod
def set(self, key: str, data: bytes) -> None:
"""Set a content in the cache"""
@staticmethod
async def _get_fallback_value(key: str, fallback: CacheFallback) -> Optional[bytes]:
if callable(fallback):
result = await fallback(key)
else:
result = fallback
return result
class NullCache(Cache):
"""Disabled cache"""
async def get(self, key: str, fallback: CacheFallback) -> Optional[bytes]:
return await self._get_fallback_value(key, fallback)
def set(self, key: str, data: bytes) -> None:
pass
class FileCache(Cache):
"""Cache on the local filesystem"""
cache_base = xdg_cache_home() / "frontools"
def __init__(self, name: str) -> None:
self._name = name
async def get(
self,
key: str,
fallback: CacheFallback,
) -> Optional[bytes]:
"""Get an item in the cache, call fallback if it's not present"""
cache_file_path = self._get_cache_file_path(key)
if not cache_file_path.is_file():
content = await self._get_fallback_value(key, fallback)
if content is not None:
self.set(key, content)
else:
with open(cache_file_path, "rb") as cache_file:
content = cache_file.read()
return content
def set(self, key: str, data: bytes) -> None:
"""Set content in the cache"""
cache_file_path = self._get_cache_file_path(key)
with open(cache_file_path, "wb") as cache_file:
cache_file.write(data)
@staticmethod
def prune(cache_names: list[str]) -> None:
"""Remove caches from filesystem.
If empty list is provided, all caches will be cleaned
"""
if not cache_names:
cache_names = [
it.name for it in FileCache.cache_base.iterdir() if it.is_dir()
]
for cache_name in cache_names:
cache_path: Path = FileCache.cache_base / cache_name
if not cache_path.is_dir():
echo(f"{cache_path} isn't a chache directory", err=True)
continue
echo(f"Removing {cache_path}")
rmtree(cache_path)
def _get_cache_file_path(self, key: str) -> Path:
key_slug = _get_key_slug(key)
cache_directory = self.cache_base / self._name
file_path = cache_directory.joinpath(*key_slug.split("&"))
file_path = file_path.parent / (file_path.name[:254] + "_")
file_directory = file_path.parent
if not file_directory.is_dir():
file_directory.mkdir(parents=True)
return file_path
def _get_key_slug(url: str) -> str:
"""Return an unique slug usable as a path name for a given url."""
return url.replace("_", "___").replace("/", "__").replace(":", "_")

View File

@ -8,8 +8,8 @@ from click import Context as ClickContext
from click import Path as PathArgument
from click import argument, group, option, pass_context, pass_obj
from frontools.cache import FileCache
from frontools.config import Config
from frontools.sources import CachedSource
from frontools.screenshot import screenshot_diff
@ -92,7 +92,7 @@ async def main(
@argument("cache_names", nargs=-1)
def prune_caches(cache_names: list[str]) -> None:
"""Prune frontools caches"""
FileCache.prune(cache_names)
CachedSource.prune(cache_names)
@main.command(name="screenshot-diff")

View File

@ -10,7 +10,6 @@ from xdg import xdg_config_dirs, xdg_config_home
from yaml import Loader
from yaml import load as load_yaml
from frontools.cache import FileCache, NullCache
from frontools.sources import CachedSource, OverrideSource, Source
REMOTE_SOURCE_NAME = "remote"
@ -51,9 +50,7 @@ class Config:
self._block_urls: list[Pattern[str]] = []
self._add_source(
REMOTE_SOURCE_NAME,
CachedSource,
FileCache(REMOTE_SOURCE_NAME) if use_cache else NullCache(),
REMOTE_SOURCE_NAME, CachedSource, REMOTE_SOURCE_NAME, not use_cache
)
self._source_name = source_name if source_name else REMOTE_SOURCE_NAME

View File

@ -6,14 +6,15 @@ from logging import getLogger
from pathlib import Path
from re import Pattern
from re import compile as re_compile
from shutil import rmtree
from typing import AsyncGenerator, Optional, cast
from frontools.utils import get_url_slug
from aiohttp import ClientConnectionError, ClientPayloadError, ClientSession
from playwright.async_api import BrowserContext, Error, Page, Route
from playwright.async_api import TimeoutError as PlaywrightTimeoutError
from playwright.async_api import ViewportSize, async_playwright
from frontools.cache import Cache
from xdg import xdg_cache_home
_LOGGER = getLogger("frontools")
@ -106,17 +107,29 @@ class Source(ABC):
class CachedSource(Source):
"""Source loading urls from the internet."""
def __init__(
self,
block_urls: list[Pattern[str]],
cache: Cache,
) -> None:
cache_base = xdg_cache_home() / "frontools"
def __init__(self, block_urls: list[Pattern[str]], name: str, disabled: bool = False) -> None:
super().__init__(block_urls)
self._cache = cache
self._name = name
self._disabled = disabled
async def get_url(self, url: str) -> Optional[bytes]:
"""Get a page content from the local or remote cache."""
return await self._cache.get(url, self._load_url)
if self._disabled:
return await self._load_url(url)
cache_file_path = self._get_cache_file_path(url)
if not cache_file_path.is_file():
content = await self._load_url(url)
if content is not None:
with open(cache_file_path, "wb") as cache_file:
cache_file.write(content)
else:
with open(cache_file_path, "rb") as cache_file:
content = cache_file.read()
return content
async def _load_url(self, url: str) -> Optional[bytes]:
try:
@ -128,6 +141,36 @@ class CachedSource(Source):
return None
@staticmethod
def prune(cache_names: list[str]) -> None:
"""Remove caches from filesystem.
If empty list is provided, all caches will be cleaned
"""
if not cache_names:
cache_names = [
it.name for it in CachedSource.cache_base.iterdir() if it.is_dir()
]
for cache_name in cache_names:
cache_path: Path = CachedSource.cache_base / cache_name
if not cache_path.is_dir():
_LOGGER.error(f"{cache_path} isn't a chache directory")
continue
_LOGGER.info(f"Removing {cache_path}")
rmtree(cache_path)
def _get_cache_file_path(self, url: str) -> Path:
key_slug = get_url_slug(url)
cache_directory = self.cache_base / self._name
file_path = cache_directory.joinpath(*key_slug.split("&"))
file_path = file_path.parent / (file_path.name[:254] + "_")
file_directory = file_path.parent
if not file_directory.is_dir():
file_directory.mkdir(parents=True)
return file_path
class OverrideSource(Source):
"""Source overriding paths matching patterns with local files"""