sources: Retrying in case of a timeout error

This commit is contained in:
Corentin Sechet 2022-04-11 14:56:45 +02:00
parent 9afe5d3701
commit 171b570e12
1 changed files with 20 additions and 8 deletions

View File

@ -5,12 +5,13 @@ from re import Pattern
from re import compile as re_compile
from typing import AsyncGenerator, AsyncIterable, Optional, cast
from aiohttp import ClientSession, ClientConnectionError
from aiohttp import ClientConnectionError, ClientSession
from bs4 import BeautifulSoup
from playwright.async_api import (
BrowserContext,
Page,
Route,
TimeoutError,
ViewportSize,
async_playwright,
)
@ -30,7 +31,15 @@ class Browser:
page = await self._browser_context.new_page()
await page.route("*", self._source.route)
await page.goto(url)
await page.wait_for_load_state("networkidle", timeout=1000 * 60 * 2)
for retry in range(0, 3):
try:
await page.wait_for_load_state("networkidle")
break
except TimeoutError:
if retry == 3:
self._source._error_summary.add_error(
f"Error while loading {url} : timeout, retried 3 times"
)
yield page
await page.close()
@ -51,13 +60,16 @@ class Source(ABC):
) -> AsyncGenerator[Browser, None]:
"""Return a Playwright browser that will eventually get files from local cache"""
viewport: ViewportSize = cast(
viewport: Optional[ViewportSize] = cast(
ViewportSize, None
) # playwright typings are broken
) # Playwright typings are broken
if width is not None:
assert height is not None
viewport = dict(width=width, height=height)
viewport = dict(
# height is not used, as screenshot are taken full page
width=width,
height=600,
)
async with async_playwright() as pwright:
browser = await pwright.firefox.launch(headless=True)
@ -89,9 +101,9 @@ class CachedSource(Source):
async with session.get(url) as response:
return await response.content.read()
except ClientConnectionError as ex:
self._error_summary.add_error(f'error while loading {url} : {ex}')
self._error_summary.add_error(f"error while loading {url} : {ex}")
return b''
return b""
class OverrideSource(Source):