misc-csechet/frontools/css.py

"""Css related functions"""
from enum import Enum
from typing import AsyncIterable, Iterable, Iterator
from urllib.parse import urljoin

from bs4 import BeautifulSoup
from tinycss2 import parse_stylesheet
from tinycss2.ast import (
    IdentToken,
    LiteralToken,
    Node,
    SquareBracketsBlock,
    WhitespaceToken,
)
from tinycss2.parser import QualifiedRule

from frontools.sources import Source


async def css_diff(url: str, left_source: Source, right_source: Source) -> None:
    """Print different stylesheets in the given url"""
    async for css_url in _get_stylesheets(url, left_source):
        left_stylesheet_content = await left_source.get_url(css_url)
        if css_url is None:
            continue
        right_stylesheet_content = await right_source.get_url(urljoin(url, css_url))
        left_sheet = parse_stylesheet(
            left_stylesheet_content.decode("utf-8"), True, True
        )
        right_sheet = parse_stylesheet(
            right_stylesheet_content.decode("utf-8"), True, True
        )

        left_selector_index = _get_selector_index(left_sheet)
        right_selector_index = _get_selector_index(right_sheet)

        for selector, rule in left_selector_index.items():
            if selector not in right_selector_index:
                print(rule)


def _get_selector_index(rule_list: list[QualifiedRule]) -> dict[str, QualifiedRule]:
    result: dict[str, QualifiedRule] = {}
    for rule in rule_list:
        selector = _parse_selector(iter(rule.prelude))
        print(selector)

    return result


class Combinator(Enum):
    CHILD = 0
    DIRECT_CHILD = 1
    ADJACENT_SIBLING = 2
    GENERAL_SIBLING = 3


class Selector:
    def __init__(self) -> None:
        self._classes: set[str] = set()
        self._combined = dict[Combinator, "Selector"]


def _parse_selector(token_iterator: Iterator[Node]) -> Selector:
    try:
        classes: set[str] = set()
        child_selectors: dict[Combinator, Selector] = {}
        token = next(token_iterator)
        tag = ""
        attributes: dict[str, str] = {}
        while True:
            while isinstance(token, WhitespaceToken):
                token = next(token_iterator)
            if isinstance(token, LiteralToken):
                if token.value == ".":
                    while True:
                        token = next(token_iterator)
                        assert isinstance(token, IdentToken)
                        classes.add(token.value)
                        token = next(token_iterator)

                        if not isinstance(token, LiteralToken) or token.value != ".":
                            break
                else:
                    combinator_mappings = {
                        "+": Combinator.ADJACENT_SIBLING,
                        ">": Combinator.DIRECT_CHILD,
                        "~": Combinator.GENERAL_SIBLING,
                    }
                    if token.value in combinator_mappings:
                        combinator = combinator_mappings[token.value]
                        child_selectors[combinator] = _parse_selector(token_iterator)
            elif isinstance(token, IdentToken):
                tag = token.value
                token = next(token_iterator)
            elif isinstance(token, SquareBracketsBlock):
                # parse attributes
                token = next(token_iterator)
    except StopIteration:
        pass

    return Selector()


async def _get_stylesheets(url: str, source: Source) -> AsyncIterable[str]:
    page = await source.get_url(url)
    page_html = BeautifulSoup(page, features="html5lib")
    links = page_html.find_all("link")
    for link in links:
        if "stylesheet" not in link.get("rel", []):
            continue
        yield urljoin(url, link["href"])