2022-03-28 18:26:50 +02:00
|
|
|
"""Source for remote files"""
|
|
|
|
from abc import ABC, abstractmethod
|
2022-04-05 16:34:45 +02:00
|
|
|
from contextlib import asynccontextmanager
|
2022-03-28 18:26:50 +02:00
|
|
|
from re import Pattern
|
|
|
|
from re import compile as re_compile
|
2022-04-08 14:07:42 +02:00
|
|
|
from typing import AsyncGenerator, Optional, cast
|
2022-03-28 18:26:50 +02:00
|
|
|
|
2022-03-30 02:19:02 +02:00
|
|
|
from aiohttp import ClientSession
|
2022-04-11 12:22:33 +02:00
|
|
|
from playwright.async_api import BrowserContext, Route, ViewportSize, async_playwright, Page
|
2022-03-30 02:19:02 +02:00
|
|
|
|
|
|
|
from frontools.cache import Cache
|
2022-03-28 18:26:50 +02:00
|
|
|
|
|
|
|
|
2022-04-11 12:22:33 +02:00
|
|
|
class Browser:
|
|
|
|
def __init__(self, source: 'Source', browser_context: BrowserContext) -> None:
|
|
|
|
"""Wraps a browser instance, with helpers methods to load pages."""
|
|
|
|
self._source = source
|
|
|
|
self._browser_context = browser_context
|
|
|
|
|
|
|
|
@asynccontextmanager
|
|
|
|
async def load_page(self, url: str) -> AsyncGenerator[Page, None]:
|
|
|
|
page = await self._browser_context.new_page()
|
|
|
|
await page.route("*", self._source.route)
|
|
|
|
await page.goto(url)
|
|
|
|
await page.wait_for_load_state("networkidle")
|
|
|
|
yield page
|
|
|
|
await page.close()
|
|
|
|
|
|
|
|
|
2022-03-28 18:26:50 +02:00
|
|
|
class Source(ABC):
|
|
|
|
"""Base class for sources"""
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
async def get_url(self, url: str) -> bytes:
|
|
|
|
"""Retrieve the given url content"""
|
|
|
|
|
2022-04-05 16:34:45 +02:00
|
|
|
@asynccontextmanager
|
2022-04-08 14:07:42 +02:00
|
|
|
async def get_browser(
|
|
|
|
self, width: Optional[int] = None, height: Optional[int] = None
|
2022-04-11 12:22:33 +02:00
|
|
|
) -> AsyncGenerator[Browser, None]:
|
2022-04-05 16:34:45 +02:00
|
|
|
"""Return a Playwright browser that will eventually get files from local cache"""
|
|
|
|
|
2022-04-08 14:07:42 +02:00
|
|
|
viewport: ViewportSize = cast(
|
|
|
|
ViewportSize, None
|
|
|
|
) # playwright typings are broken
|
|
|
|
|
|
|
|
if width is not None:
|
|
|
|
assert height is not None
|
|
|
|
viewport = dict(width=width, height=height)
|
|
|
|
|
2022-04-05 16:34:45 +02:00
|
|
|
async with async_playwright() as pwright:
|
2022-04-11 12:22:33 +02:00
|
|
|
browser = await pwright.firefox.launch(headless=True)
|
2022-04-08 14:07:42 +02:00
|
|
|
context = await browser.new_context(viewport=viewport)
|
2022-04-11 12:22:33 +02:00
|
|
|
yield Browser(self, context)
|
2022-04-05 16:34:45 +02:00
|
|
|
await browser.close()
|
|
|
|
|
2022-04-11 12:22:33 +02:00
|
|
|
async def route(self, route: Route) -> None:
|
|
|
|
content = await self.get_url(route.request.url)
|
|
|
|
await route.fulfill(body=content, status=200)
|
|
|
|
|
2022-03-28 18:26:50 +02:00
|
|
|
|
|
|
|
class CachedSource(Source):
|
|
|
|
"""Source loading urls from the internet."""
|
|
|
|
|
2022-03-30 02:19:02 +02:00
|
|
|
def __init__(self, cache: Cache[bytes]) -> None:
|
2022-03-29 11:03:48 +02:00
|
|
|
self._cache = cache
|
2022-03-28 18:26:50 +02:00
|
|
|
|
|
|
|
async def get_url(self, url: str) -> bytes:
|
|
|
|
"""Get a page content from the local or remote cache."""
|
2022-03-29 23:09:58 +02:00
|
|
|
return await self._cache.get(url, self._load_url)
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
async def _load_url(url: str) -> bytes:
|
2022-03-30 01:22:16 +02:00
|
|
|
async with ClientSession() as session:
|
|
|
|
async with session.get(url) as response:
|
|
|
|
return await response.content.read()
|
2022-03-28 18:26:50 +02:00
|
|
|
|
|
|
|
|
2022-03-29 11:03:48 +02:00
|
|
|
class OverrideSource(Source):
|
2022-03-28 18:26:50 +02:00
|
|
|
"""Source overriding paths matching patterns with local files"""
|
|
|
|
|
|
|
|
def __init__(
|
2022-03-28 18:30:48 +02:00
|
|
|
self,
|
|
|
|
mappings: list[tuple[str, str]],
|
2022-03-29 11:03:48 +02:00
|
|
|
next_source: Source,
|
2022-03-28 18:26:50 +02:00
|
|
|
):
|
|
|
|
self._mappings: list[tuple[Pattern[str], str]] = []
|
2022-03-29 11:03:48 +02:00
|
|
|
self._next_source = next_source
|
2022-03-28 18:26:50 +02:00
|
|
|
|
|
|
|
for pattern, replace in mappings:
|
|
|
|
self._mappings.append((re_compile(pattern), replace))
|
|
|
|
|
|
|
|
async def get_url(self, url: str) -> bytes:
|
|
|
|
"""Return local stylesheet"""
|
|
|
|
|
|
|
|
for pattern, replace in self._mappings:
|
|
|
|
if pattern.match(url):
|
|
|
|
mapped_path = pattern.sub(replace, url)
|
2022-03-30 00:53:24 +02:00
|
|
|
with open(mapped_path, "rb") as mapped_file:
|
|
|
|
return mapped_file.read()
|
2022-03-28 18:26:50 +02:00
|
|
|
|
|
|
|
return await self._next_source.get_url(url)
|