From 98fbfe63abfe5129bc2ab6469ecc2b3c1729c833 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Corentin=20S=C3=A9chet?= Date: Tue, 12 Apr 2022 10:36:14 +0200 Subject: [PATCH] common: fix lint errors --- frontools/cache.py | 33 ++++++++++++++-------------- frontools/config.py | 5 +++-- frontools/css.py | 9 ++++++-- frontools/sources.py | 48 +++++++++++++++++++++++++---------------- frontools/url_gather.py | 1 + 5 files changed, 57 insertions(+), 39 deletions(-) create mode 100644 frontools/url_gather.py diff --git a/frontools/cache.py b/frontools/cache.py index 82e928d..dd80877 100644 --- a/frontools/cache.py +++ b/frontools/cache.py @@ -2,27 +2,27 @@ from abc import ABC, abstractmethod from pathlib import Path from shutil import rmtree -from typing import Awaitable, Callable, Union +from typing import Awaitable, Callable, Union, Optional from click import echo from xdg import xdg_cache_home -CacheFallback = Union[bytes, Callable[[str], Awaitable[bytes]]] +CacheFallback = Union[bytes, Callable[[str], Awaitable[Optional[bytes]]]] class Cache(ABC): """Base class for caches""" @abstractmethod - async def get(self, key: str, fallback: CacheFallback) -> bytes: + async def get(self, key: str, fallback: CacheFallback) -> Optional[bytes]: """Get an item in the cache, call fallback if it's not present""" @abstractmethod def set(self, key: str, data: bytes) -> None: - """Set a resource in the cache""" + """Set a content in the cache""" @staticmethod - async def _get_fallback_value(key: str, fallback: CacheFallback) -> bytes: + async def _get_fallback_value(key: str, fallback: CacheFallback) -> Optional[bytes]: if callable(fallback): result = await fallback(key) else: @@ -34,10 +34,10 @@ class Cache(ABC): class NullCache(Cache): """Disabled cache""" - async def get(self, key: str, fallback: CacheFallback) -> bytes: + async def get(self, key: str, fallback: CacheFallback) -> Optional[bytes]: return await self._get_fallback_value(key, fallback) - def set(self, key: str, resource: bytes) -> None: + def set(self, key: str, data: bytes) -> None: pass @@ -53,23 +53,24 @@ class FileCache(Cache): self, key: str, fallback: CacheFallback, - ) -> bytes: + ) -> Optional[bytes]: """Get an item in the cache, call fallback if it's not present""" cache_file_path = self._get_cache_file_path(key) if not cache_file_path.is_file(): - resource = await self._get_fallback_value(key, fallback) - self.set(key, resource) + content = await self._get_fallback_value(key, fallback) + if content is not None: + self.set(key, content) else: with open(cache_file_path, "rb") as cache_file: - resource = cache_file.read() + content = cache_file.read() - return resource + return content - def set(self, key: str, resource: bytes) -> None: - """Set a resource in the cache""" + def set(self, key: str, data: bytes) -> None: + """Set content in the cache""" cache_file_path = self._get_cache_file_path(key) with open(cache_file_path, "wb") as cache_file: - cache_file.write(resource) + cache_file.write(data) @staticmethod def prune(cache_names: list[str]) -> None: @@ -77,7 +78,7 @@ class FileCache(Cache): If empty list is provided, all caches will be cleaned """ - if not len(cache_names): + if not cache_names: cache_names = [ it.name for it in FileCache.cache_base.iterdir() if it.is_dir() ] diff --git a/frontools/config.py b/frontools/config.py index 62a2dad..b3a570a 100644 --- a/frontools/config.py +++ b/frontools/config.py @@ -185,6 +185,7 @@ class Config: return self._sources[name] def echo_error_summary(self) -> None: + """Echo the error summary to the user.""" self._error_summary.echo() def _add_source( @@ -198,11 +199,11 @@ class Config: def _filter(self, url: str, tags: set[str]) -> bool: if self._include_urls: - if all([not it.match(url) for it in self._include_urls]): + if all(not it.match(url) for it in self._include_urls): return False if self._exclude_urls: - if any([it.match(url) for it in self._exclude_urls]): + if any(it.match(url) for it in self._exclude_urls): return False if self._include_tags and not self._include_tags & tags: diff --git a/frontools/css.py b/frontools/css.py index fe0f03e..c1c2ce5 100644 --- a/frontools/css.py +++ b/frontools/css.py @@ -1,6 +1,6 @@ """Css related functions""" from enum import Enum -from typing import AsyncIterable, Iterable, Iterator +from typing import AsyncIterable, Iterator from urllib.parse import urljoin from bs4 import BeautifulSoup @@ -20,10 +20,15 @@ from frontools.sources import Source async def css_diff(url: str, left_source: Source, right_source: Source) -> None: """Print different stylesheets in the given url""" async for css_url in _get_stylesheets(url, left_source): - left_stylesheet_content = await left_source.get_url(css_url) if css_url is None: continue + + left_stylesheet_content = await left_source.get_url(css_url) + assert left_stylesheet_content is not None + right_stylesheet_content = await right_source.get_url(urljoin(url, css_url)) + assert right_stylesheet_content is not None + left_sheet = parse_stylesheet( left_stylesheet_content.decode("utf-8"), True, True ) diff --git a/frontools/sources.py b/frontools/sources.py index d3e8282..1a8dcb9 100644 --- a/frontools/sources.py +++ b/frontools/sources.py @@ -1,28 +1,29 @@ """Source for remote files""" from abc import ABC, abstractmethod +from asyncio import TimeoutError as AIOTimeoutError from contextlib import asynccontextmanager from pathlib import Path from re import Pattern from re import compile as re_compile from typing import AsyncGenerator, AsyncIterable, Optional, cast -from aiohttp import ClientConnectionError, ClientSession +from aiohttp import ClientConnectionError, ClientPayloadError, ClientSession from bs4 import BeautifulSoup -from playwright.async_api import ( - BrowserContext, - Error, - Page, - Route, - TimeoutError, - ViewportSize, - async_playwright, -) +from playwright.async_api import BrowserContext, Error, Page, Route +from playwright.async_api import TimeoutError as PlaywrightTimeoutError +from playwright.async_api import ViewportSize, async_playwright from frontools.cache import Cache from frontools.utils import ErrorSummary class Browser: + """Wrapper around Playwright BrowserContext. + + We need that to set routing on page, and not on browser context, due to a Playwright bug spamming output + with error when setting route directly on the context. + """ + def __init__(self, source: "Source", browser_context: BrowserContext) -> None: """Wraps a browser instance, with helpers methods to load pages.""" self._source = source @@ -30,6 +31,12 @@ class Browser: @asynccontextmanager async def load_page(self, url: str) -> AsyncGenerator[Page, None]: + """Retrieve a page and wait for it to be fully loaded. + + @param url The url to load + + @return A Playwright page, fully loaded. + """ page = await self._browser_context.new_page() await page.route("*", self._source.route) for retry in range(0, 3): @@ -37,8 +44,8 @@ class Browser: await page.goto(url) await page.wait_for_load_state("networkidle") break - except TimeoutError: - if retry == 3: + except PlaywrightTimeoutError: + if retry == 2: self._source._error_summary.add_error( f"Timeout while loading {url} : retried 3 times, skipping" ) @@ -60,7 +67,7 @@ class Source(ABC): self._block_urls = block_urls @abstractmethod - async def get_url(self, url: str) -> bytes: + async def get_url(self, url: str) -> Optional[bytes]: """Retrieve the given url content""" @asynccontextmanager @@ -94,7 +101,10 @@ class Source(ABC): await route.fulfill(status=500) else: content = await self.get_url(url) - await route.fulfill(body=content, status=200) + if content is None: + await route.abort("connectionfailed") + else: + await route.fulfill(body=content, status=200) class CachedSource(Source): @@ -109,19 +119,19 @@ class CachedSource(Source): super().__init__(error_summary, block_urls) self._cache = cache - async def get_url(self, url: str) -> bytes: + async def get_url(self, url: str) -> Optional[bytes]: """Get a page content from the local or remote cache.""" return await self._cache.get(url, self._load_url) - async def _load_url(self, url: str) -> bytes: + async def _load_url(self, url: str) -> Optional[bytes]: try: async with ClientSession() as session: async with session.get(url) as response: return await response.content.read() - except ClientConnectionError as ex: + except (ClientConnectionError, ClientPayloadError, AIOTimeoutError) as ex: self._error_summary.add_error(f"error while loading {url} : {ex}") - return b"" + return None class OverrideSource(Source): @@ -141,7 +151,7 @@ class OverrideSource(Source): for pattern, replace in mappings: self._mappings.append((re_compile(pattern), replace)) - async def get_url(self, url: str) -> bytes: + async def get_url(self, url: str) -> Optional[bytes]: """Return local stylesheet""" for pattern, replace in self._mappings: diff --git a/frontools/url_gather.py b/frontools/url_gather.py new file mode 100644 index 0000000..d9587d9 --- /dev/null +++ b/frontools/url_gather.py @@ -0,0 +1 @@ +"""Gather theme urls from various sources"""