common: fix lint errors

This commit is contained in:
Corentin Sechet 2022-04-12 10:36:14 +02:00
parent 4f9dd27045
commit 98fbfe63ab
5 changed files with 57 additions and 39 deletions

View File

@ -2,27 +2,27 @@
from abc import ABC, abstractmethod
from pathlib import Path
from shutil import rmtree
from typing import Awaitable, Callable, Union
from typing import Awaitable, Callable, Union, Optional
from click import echo
from xdg import xdg_cache_home
CacheFallback = Union[bytes, Callable[[str], Awaitable[bytes]]]
CacheFallback = Union[bytes, Callable[[str], Awaitable[Optional[bytes]]]]
class Cache(ABC):
"""Base class for caches"""
@abstractmethod
async def get(self, key: str, fallback: CacheFallback) -> bytes:
async def get(self, key: str, fallback: CacheFallback) -> Optional[bytes]:
"""Get an item in the cache, call fallback if it's not present"""
@abstractmethod
def set(self, key: str, data: bytes) -> None:
"""Set a resource in the cache"""
"""Set a content in the cache"""
@staticmethod
async def _get_fallback_value(key: str, fallback: CacheFallback) -> bytes:
async def _get_fallback_value(key: str, fallback: CacheFallback) -> Optional[bytes]:
if callable(fallback):
result = await fallback(key)
else:
@ -34,10 +34,10 @@ class Cache(ABC):
class NullCache(Cache):
"""Disabled cache"""
async def get(self, key: str, fallback: CacheFallback) -> bytes:
async def get(self, key: str, fallback: CacheFallback) -> Optional[bytes]:
return await self._get_fallback_value(key, fallback)
def set(self, key: str, resource: bytes) -> None:
def set(self, key: str, data: bytes) -> None:
pass
@ -53,23 +53,24 @@ class FileCache(Cache):
self,
key: str,
fallback: CacheFallback,
) -> bytes:
) -> Optional[bytes]:
"""Get an item in the cache, call fallback if it's not present"""
cache_file_path = self._get_cache_file_path(key)
if not cache_file_path.is_file():
resource = await self._get_fallback_value(key, fallback)
self.set(key, resource)
content = await self._get_fallback_value(key, fallback)
if content is not None:
self.set(key, content)
else:
with open(cache_file_path, "rb") as cache_file:
resource = cache_file.read()
content = cache_file.read()
return resource
return content
def set(self, key: str, resource: bytes) -> None:
"""Set a resource in the cache"""
def set(self, key: str, data: bytes) -> None:
"""Set content in the cache"""
cache_file_path = self._get_cache_file_path(key)
with open(cache_file_path, "wb") as cache_file:
cache_file.write(resource)
cache_file.write(data)
@staticmethod
def prune(cache_names: list[str]) -> None:
@ -77,7 +78,7 @@ class FileCache(Cache):
If empty list is provided, all caches will be cleaned
"""
if not len(cache_names):
if not cache_names:
cache_names = [
it.name for it in FileCache.cache_base.iterdir() if it.is_dir()
]

View File

@ -185,6 +185,7 @@ class Config:
return self._sources[name]
def echo_error_summary(self) -> None:
"""Echo the error summary to the user."""
self._error_summary.echo()
def _add_source(
@ -198,11 +199,11 @@ class Config:
def _filter(self, url: str, tags: set[str]) -> bool:
if self._include_urls:
if all([not it.match(url) for it in self._include_urls]):
if all(not it.match(url) for it in self._include_urls):
return False
if self._exclude_urls:
if any([it.match(url) for it in self._exclude_urls]):
if any(it.match(url) for it in self._exclude_urls):
return False
if self._include_tags and not self._include_tags & tags:

View File

@ -1,6 +1,6 @@
"""Css related functions"""
from enum import Enum
from typing import AsyncIterable, Iterable, Iterator
from typing import AsyncIterable, Iterator
from urllib.parse import urljoin
from bs4 import BeautifulSoup
@ -20,10 +20,15 @@ from frontools.sources import Source
async def css_diff(url: str, left_source: Source, right_source: Source) -> None:
"""Print different stylesheets in the given url"""
async for css_url in _get_stylesheets(url, left_source):
left_stylesheet_content = await left_source.get_url(css_url)
if css_url is None:
continue
left_stylesheet_content = await left_source.get_url(css_url)
assert left_stylesheet_content is not None
right_stylesheet_content = await right_source.get_url(urljoin(url, css_url))
assert right_stylesheet_content is not None
left_sheet = parse_stylesheet(
left_stylesheet_content.decode("utf-8"), True, True
)

View File

@ -1,28 +1,29 @@
"""Source for remote files"""
from abc import ABC, abstractmethod
from asyncio import TimeoutError as AIOTimeoutError
from contextlib import asynccontextmanager
from pathlib import Path
from re import Pattern
from re import compile as re_compile
from typing import AsyncGenerator, AsyncIterable, Optional, cast
from aiohttp import ClientConnectionError, ClientSession
from aiohttp import ClientConnectionError, ClientPayloadError, ClientSession
from bs4 import BeautifulSoup
from playwright.async_api import (
BrowserContext,
Error,
Page,
Route,
TimeoutError,
ViewportSize,
async_playwright,
)
from playwright.async_api import BrowserContext, Error, Page, Route
from playwright.async_api import TimeoutError as PlaywrightTimeoutError
from playwright.async_api import ViewportSize, async_playwright
from frontools.cache import Cache
from frontools.utils import ErrorSummary
class Browser:
"""Wrapper around Playwright BrowserContext.
We need that to set routing on page, and not on browser context, due to a Playwright bug spamming output
with error when setting route directly on the context.
"""
def __init__(self, source: "Source", browser_context: BrowserContext) -> None:
"""Wraps a browser instance, with helpers methods to load pages."""
self._source = source
@ -30,6 +31,12 @@ class Browser:
@asynccontextmanager
async def load_page(self, url: str) -> AsyncGenerator[Page, None]:
"""Retrieve a page and wait for it to be fully loaded.
@param url The url to load
@return A Playwright page, fully loaded.
"""
page = await self._browser_context.new_page()
await page.route("*", self._source.route)
for retry in range(0, 3):
@ -37,8 +44,8 @@ class Browser:
await page.goto(url)
await page.wait_for_load_state("networkidle")
break
except TimeoutError:
if retry == 3:
except PlaywrightTimeoutError:
if retry == 2:
self._source._error_summary.add_error(
f"Timeout while loading {url} : retried 3 times, skipping"
)
@ -60,7 +67,7 @@ class Source(ABC):
self._block_urls = block_urls
@abstractmethod
async def get_url(self, url: str) -> bytes:
async def get_url(self, url: str) -> Optional[bytes]:
"""Retrieve the given url content"""
@asynccontextmanager
@ -94,7 +101,10 @@ class Source(ABC):
await route.fulfill(status=500)
else:
content = await self.get_url(url)
await route.fulfill(body=content, status=200)
if content is None:
await route.abort("connectionfailed")
else:
await route.fulfill(body=content, status=200)
class CachedSource(Source):
@ -109,19 +119,19 @@ class CachedSource(Source):
super().__init__(error_summary, block_urls)
self._cache = cache
async def get_url(self, url: str) -> bytes:
async def get_url(self, url: str) -> Optional[bytes]:
"""Get a page content from the local or remote cache."""
return await self._cache.get(url, self._load_url)
async def _load_url(self, url: str) -> bytes:
async def _load_url(self, url: str) -> Optional[bytes]:
try:
async with ClientSession() as session:
async with session.get(url) as response:
return await response.content.read()
except ClientConnectionError as ex:
except (ClientConnectionError, ClientPayloadError, AIOTimeoutError) as ex:
self._error_summary.add_error(f"error while loading {url} : {ex}")
return b""
return None
class OverrideSource(Source):
@ -141,7 +151,7 @@ class OverrideSource(Source):
for pattern, replace in mappings:
self._mappings.append((re_compile(pattern), replace))
async def get_url(self, url: str) -> bytes:
async def get_url(self, url: str) -> Optional[bytes]:
"""Return local stylesheet"""
for pattern, replace in self._mappings:

1
frontools/url_gather.py Normal file
View File

@ -0,0 +1 @@
"""Gather theme urls from various sources"""