113 lines
3.7 KiB
Python
113 lines
3.7 KiB
Python
"""Css related functions"""
|
|
from enum import Enum
|
|
from typing import AsyncIterable, Iterable, Iterator
|
|
from urllib.parse import urljoin
|
|
|
|
from bs4 import BeautifulSoup
|
|
from tinycss2 import parse_stylesheet
|
|
from tinycss2.ast import (
|
|
IdentToken,
|
|
LiteralToken,
|
|
Node,
|
|
SquareBracketsBlock,
|
|
WhitespaceToken,
|
|
)
|
|
from tinycss2.parser import QualifiedRule
|
|
|
|
from frontools.sources import Source
|
|
|
|
|
|
async def css_diff(url: str, left_source: Source, right_source: Source) -> None:
|
|
"""Print different stylesheets in the given url"""
|
|
async for css_url in _get_stylesheets(url, left_source):
|
|
left_stylesheet_content = await left_source.get_url(css_url)
|
|
if css_url is None:
|
|
continue
|
|
right_stylesheet_content = await right_source.get_url(urljoin(url, css_url))
|
|
left_sheet = parse_stylesheet(
|
|
left_stylesheet_content.decode("utf-8"), True, True
|
|
)
|
|
right_sheet = parse_stylesheet(
|
|
right_stylesheet_content.decode("utf-8"), True, True
|
|
)
|
|
|
|
left_selector_index = _get_selector_index(left_sheet)
|
|
right_selector_index = _get_selector_index(right_sheet)
|
|
|
|
for selector, rule in left_selector_index.items():
|
|
if selector not in right_selector_index:
|
|
print(rule)
|
|
|
|
|
|
def _get_selector_index(rule_list: list[QualifiedRule]) -> dict[str, QualifiedRule]:
|
|
result: dict[str, QualifiedRule] = {}
|
|
for rule in rule_list:
|
|
selector = _parse_selector(iter(rule.prelude))
|
|
print(selector)
|
|
|
|
return result
|
|
|
|
|
|
class Combinator(Enum):
|
|
CHILD = 0
|
|
DIRECT_CHILD = 1
|
|
ADJACENT_SIBLING = 2
|
|
GENERAL_SIBLING = 3
|
|
|
|
|
|
class Selector:
|
|
def __init__(self) -> None:
|
|
self._classes: set[str] = set()
|
|
self._combined = dict[Combinator, "Selector"]
|
|
|
|
|
|
def _parse_selector(token_iterator: Iterator[Node]) -> Selector:
|
|
try:
|
|
classes: set[str] = set()
|
|
child_selectors: dict[Combinator, Selector] = {}
|
|
token = next(token_iterator)
|
|
tag = ""
|
|
attributes: dict[str, str] = {}
|
|
while True:
|
|
while isinstance(token, WhitespaceToken):
|
|
token = next(token_iterator)
|
|
if isinstance(token, LiteralToken):
|
|
if token.value == ".":
|
|
while True:
|
|
token = next(token_iterator)
|
|
assert isinstance(token, IdentToken)
|
|
classes.add(token.value)
|
|
token = next(token_iterator)
|
|
|
|
if not isinstance(token, LiteralToken) or token.value != ".":
|
|
break
|
|
else:
|
|
combinator_mappings = {
|
|
"+": Combinator.ADJACENT_SIBLING,
|
|
">": Combinator.DIRECT_CHILD,
|
|
"~": Combinator.GENERAL_SIBLING,
|
|
}
|
|
if token.value in combinator_mappings:
|
|
combinator = combinator_mappings[token.value]
|
|
child_selectors[combinator] = _parse_selector(token_iterator)
|
|
elif isinstance(token, IdentToken):
|
|
tag = token.value
|
|
token = next(token_iterator)
|
|
elif isinstance(token, SquareBracketsBlock):
|
|
# parse attributes
|
|
token = next(token_iterator)
|
|
except StopIteration:
|
|
pass
|
|
|
|
return Selector()
|
|
|
|
|
|
async def _get_stylesheets(url: str, source: Source) -> AsyncIterable[str]:
|
|
page = await source.get_url(url)
|
|
page_html = BeautifulSoup(page, features="html5lib")
|
|
links = page_html.find_all("link")
|
|
for link in links:
|
|
if "stylesheet" not in link.get("rel", []):
|
|
continue
|
|
yield urljoin(url, link["href"])
|