diff --git a/frontools/sources.py b/frontools/sources.py index eee4db0..5d2df72 100644 --- a/frontools/sources.py +++ b/frontools/sources.py @@ -5,12 +5,13 @@ from re import Pattern from re import compile as re_compile from typing import AsyncGenerator, AsyncIterable, Optional, cast -from aiohttp import ClientSession, ClientConnectionError +from aiohttp import ClientConnectionError, ClientSession from bs4 import BeautifulSoup from playwright.async_api import ( BrowserContext, Page, Route, + TimeoutError, ViewportSize, async_playwright, ) @@ -30,7 +31,15 @@ class Browser: page = await self._browser_context.new_page() await page.route("*", self._source.route) await page.goto(url) - await page.wait_for_load_state("networkidle", timeout=1000 * 60 * 2) + for retry in range(0, 3): + try: + await page.wait_for_load_state("networkidle") + break + except TimeoutError: + if retry == 3: + self._source._error_summary.add_error( + f"Error while loading {url} : timeout, retried 3 times" + ) yield page await page.close() @@ -51,13 +60,16 @@ class Source(ABC): ) -> AsyncGenerator[Browser, None]: """Return a Playwright browser that will eventually get files from local cache""" - viewport: ViewportSize = cast( + viewport: Optional[ViewportSize] = cast( ViewportSize, None - ) # playwright typings are broken + ) # Playwright typings are broken if width is not None: - assert height is not None - viewport = dict(width=width, height=height) + viewport = dict( + # height is not used, as screenshot are taken full page + width=width, + height=600, + ) async with async_playwright() as pwright: browser = await pwright.firefox.launch(headless=True) @@ -89,9 +101,9 @@ class CachedSource(Source): async with session.get(url) as response: return await response.content.read() except ClientConnectionError as ex: - self._error_summary.add_error(f'error while loading {url} : {ex}') + self._error_summary.add_error(f"error while loading {url} : {ex}") - return b'' + return b"" class OverrideSource(Source):