sources: Retrying in case of a timeout error
This commit is contained in:
parent
9afe5d3701
commit
171b570e12
|
@ -5,12 +5,13 @@ from re import Pattern
|
|||
from re import compile as re_compile
|
||||
from typing import AsyncGenerator, AsyncIterable, Optional, cast
|
||||
|
||||
from aiohttp import ClientSession, ClientConnectionError
|
||||
from aiohttp import ClientConnectionError, ClientSession
|
||||
from bs4 import BeautifulSoup
|
||||
from playwright.async_api import (
|
||||
BrowserContext,
|
||||
Page,
|
||||
Route,
|
||||
TimeoutError,
|
||||
ViewportSize,
|
||||
async_playwright,
|
||||
)
|
||||
|
@ -30,7 +31,15 @@ class Browser:
|
|||
page = await self._browser_context.new_page()
|
||||
await page.route("*", self._source.route)
|
||||
await page.goto(url)
|
||||
await page.wait_for_load_state("networkidle", timeout=1000 * 60 * 2)
|
||||
for retry in range(0, 3):
|
||||
try:
|
||||
await page.wait_for_load_state("networkidle")
|
||||
break
|
||||
except TimeoutError:
|
||||
if retry == 3:
|
||||
self._source._error_summary.add_error(
|
||||
f"Error while loading {url} : timeout, retried 3 times"
|
||||
)
|
||||
yield page
|
||||
await page.close()
|
||||
|
||||
|
@ -51,13 +60,16 @@ class Source(ABC):
|
|||
) -> AsyncGenerator[Browser, None]:
|
||||
"""Return a Playwright browser that will eventually get files from local cache"""
|
||||
|
||||
viewport: ViewportSize = cast(
|
||||
viewport: Optional[ViewportSize] = cast(
|
||||
ViewportSize, None
|
||||
) # playwright typings are broken
|
||||
) # Playwright typings are broken
|
||||
|
||||
if width is not None:
|
||||
assert height is not None
|
||||
viewport = dict(width=width, height=height)
|
||||
viewport = dict(
|
||||
# height is not used, as screenshot are taken full page
|
||||
width=width,
|
||||
height=600,
|
||||
)
|
||||
|
||||
async with async_playwright() as pwright:
|
||||
browser = await pwright.firefox.launch(headless=True)
|
||||
|
@ -89,9 +101,9 @@ class CachedSource(Source):
|
|||
async with session.get(url) as response:
|
||||
return await response.content.read()
|
||||
except ClientConnectionError as ex:
|
||||
self._error_summary.add_error(f'error while loading {url} : {ex}')
|
||||
self._error_summary.add_error(f"error while loading {url} : {ex}")
|
||||
|
||||
return b''
|
||||
return b""
|
||||
|
||||
|
||||
class OverrideSource(Source):
|
||||
|
|
Loading…
Reference in New Issue