From fc3b44b620c2ab6eb589ce8e640c48a3dfd5423c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Corentin=20S=C3=A9chet?= Date: Mon, 11 Apr 2022 13:35:24 +0200 Subject: [PATCH] common: add ability to include / exclude url patterns --- frontools/cli.py | 23 +++++++++++++++++++++-- frontools/config.py | 27 ++++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/frontools/cli.py b/frontools/cli.py index 5f2eea0..8992b89 100644 --- a/frontools/cli.py +++ b/frontools/cli.py @@ -40,13 +40,32 @@ def _async_command(function: Any) -> Any: help="Source to use (configured in config file)", ) @option("--no-cache", type=bool, help="Disable caching", count=True) +@option( + "--include-urls", + type=str, + multiple=True, + help="Take into account only urls matching these patterns", +) +@option( + "--exclude-urls", + type=str, + multiple=True, + help="Patterns of urls to ignore", +) @_async_command async def main( - ctx: ClickContext, config_file: Optional[Path], source: str, no_cache: bool + ctx: ClickContext, + config_file: Optional[Path], + source: str, + no_cache: bool, + include_urls: list[str], + exclude_urls: list[str], ) -> None: """Utilities for EO frontend development.""" - ctx.obj = await Config.load(config_file, source, not no_cache) + ctx.obj = await Config.load( + config_file, source, not no_cache, include_urls, exclude_urls + ) @main.command(name="prune-caches") diff --git a/frontools/config.py b/frontools/config.py index 807010e..0bd3a81 100644 --- a/frontools/config.py +++ b/frontools/config.py @@ -2,6 +2,7 @@ from gettext import gettext as _ from importlib.util import module_from_spec, spec_from_file_location from pathlib import Path +from re import compile as re_compile from typing import Iterable, Optional from xdg import xdg_config_dirs @@ -28,7 +29,13 @@ class SiteConfig: class Config: """Configuration object""" - def __init__(self, use_cache: bool, default_source_name: Optional[str]): + def __init__( + self, + use_cache: bool, + default_source_name: Optional[str], + include_urls: list[str], + exclude_urls: list[str], + ): self._use_cache = use_cache self._sources: dict[str, Source] = {} self._sites: dict[str, SiteConfig] = {} @@ -40,13 +47,19 @@ class Config: remote_cache = self.get_data_cache(REMOTE_SOURCE_NAME) self._add_source(REMOTE_SOURCE_NAME, CachedSource(remote_cache)) + self._include_urls = [re_compile(it) for it in include_urls] + self._exclude_urls = [re_compile(it) for it in exclude_urls] @staticmethod async def load( - config_path: Optional[Path], default_source_name: Optional[str], use_cache: bool + config_path: Optional[Path], + default_source_name: Optional[str], + use_cache: bool, + include_urls: list[str], + exclude_urls: list[str], ) -> "Config": """Load config from the given path""" - config = Config(use_cache, default_source_name) + config = Config(use_cache, default_source_name, include_urls, exclude_urls) if config_path is None: config_path = _find_config() @@ -93,6 +106,14 @@ class Config: def add_site_url(self, name: str, url: str) -> None: """Add an url for a site""" + if len(self._include_urls): + if all([not it.match(url) for it in self._include_urls]): + return + + if len(self._exclude_urls): + if any([it.match(url) for it in self._exclude_urls]): + return + if name not in self._sites: self._sites[name] = SiteConfig([])