misc-csechet/frontools/css.py

113 lines
3.7 KiB
Python

"""Css related functions"""
from enum import Enum
from typing import AsyncIterable, Iterable, Iterator
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from tinycss2 import parse_stylesheet
from tinycss2.ast import (
IdentToken,
LiteralToken,
Node,
SquareBracketsBlock,
WhitespaceToken,
)
from tinycss2.parser import QualifiedRule
from frontools.sources import Source
async def css_diff(url: str, left_source: Source, right_source: Source) -> None:
"""Print different stylesheets in the given url"""
async for css_url in _get_stylesheets(url, left_source):
left_stylesheet_content = await left_source.get_url(css_url)
if css_url is None:
continue
right_stylesheet_content = await right_source.get_url(urljoin(url, css_url))
left_sheet = parse_stylesheet(
left_stylesheet_content.decode("utf-8"), True, True
)
right_sheet = parse_stylesheet(
right_stylesheet_content.decode("utf-8"), True, True
)
left_selector_index = _get_selector_index(left_sheet)
right_selector_index = _get_selector_index(right_sheet)
for selector, rule in left_selector_index.items():
if selector not in right_selector_index:
print(rule)
def _get_selector_index(rule_list: list[QualifiedRule]) -> dict[str, QualifiedRule]:
result: dict[str, QualifiedRule] = {}
for rule in rule_list:
selector = _parse_selector(iter(rule.prelude))
print(selector)
return result
class Combinator(Enum):
CHILD = 0
DIRECT_CHILD = 1
ADJACENT_SIBLING = 2
GENERAL_SIBLING = 3
class Selector:
def __init__(self) -> None:
self._classes: set[str] = set()
self._combined = dict[Combinator, "Selector"]
def _parse_selector(token_iterator: Iterator[Node]) -> Selector:
try:
classes: set[str] = set()
child_selectors: dict[Combinator, Selector] = {}
token = next(token_iterator)
tag = ""
attributes: dict[str, str] = {}
while True:
while isinstance(token, WhitespaceToken):
token = next(token_iterator)
if isinstance(token, LiteralToken):
if token.value == ".":
while True:
token = next(token_iterator)
assert isinstance(token, IdentToken)
classes.add(token.value)
token = next(token_iterator)
if not isinstance(token, LiteralToken) or token.value != ".":
break
else:
combinator_mappings = {
"+": Combinator.ADJACENT_SIBLING,
">": Combinator.DIRECT_CHILD,
"~": Combinator.GENERAL_SIBLING,
}
if token.value in combinator_mappings:
combinator = combinator_mappings[token.value]
child_selectors[combinator] = _parse_selector(token_iterator)
elif isinstance(token, IdentToken):
tag = token.value
token = next(token_iterator)
elif isinstance(token, SquareBracketsBlock):
# parse attributes
token = next(token_iterator)
except StopIteration:
pass
return Selector()
async def _get_stylesheets(url: str, source: Source) -> AsyncIterable[str]:
page = await source.get_url(url)
page_html = BeautifulSoup(page, features="html5lib")
links = page_html.find_all("link")
for link in links:
if "stylesheet" not in link.get("rel", []):
continue
yield urljoin(url, link["href"])