misc: add diff method

This commit is contained in:
Corentin Sechet 2023-01-17 00:48:02 +01:00
parent fa8956f848
commit 1d80371945
17 changed files with 387 additions and 53 deletions

View File

@ -2,6 +2,8 @@
ignore =
# at least two spaces before inline comment
E261
# Line break before binary operator (black uses it)
W503
max-complexity = 10
max-line-length = 110
exclude = .nox,.venv

View File

@ -22,5 +22,5 @@ ignore_missing_imports = true
max-line-length = 110
[tool.pytest.ini_options]
asyncio_mode = "auto"
log_cli=1

View File

@ -1,20 +0,0 @@
from pathlib import Path
class SourcePosition:
def __init__(self, source: Path, line: int, column: int) -> None:
self._source = source
self._line = line
self._column = column
@property
def source(self) -> Path:
return self._source
@property
def line(self) -> int:
return self._line
@property
def column(self) -> int:
return self._column

30
stylo/diff.py Normal file
View File

@ -0,0 +1,30 @@
from typing import Iterable
from cssselect2 import ElementWrapper
from stylo.nodes import Declaration
from stylo.page_set import PageSet
from stylo.stylesheet import Stylesheet
def diff(
page_set: PageSet, left: Stylesheet, right: Stylesheet
) -> Iterable[tuple[ElementWrapper, Declaration | None, Declaration | None]]:
for element in page_set.elements:
left_style = left.style(element)
right_style = right.style(element)
left_declarations = set(left_style.keys())
right_declarations = set(right_style.keys())
for declaration in left_declarations - right_declarations:
yield (element, left_style[declaration], None)
for declaration in left_declarations & right_declarations:
left_declaration = left_style[declaration]
right_declaration = right_style[declaration]
if str(left_declaration) != str(right_declaration):
yield (element, left_style[declaration], right_style[declaration])
for declaration in right_declarations - left_declarations:
yield (element, None, right_style[declaration])

89
stylo/nodes.py Normal file
View File

@ -0,0 +1,89 @@
from functools import cached_property
from logging import getLogger
from typing import Any, Generic, Iterable, TypeVar, cast
from cssselect2.parser import parse
from tinycss2 import serialize
from tinycss2.ast import Declaration as TCSSDeclaration
from tinycss2.ast import Node as TCSSNode
from tinycss2.ast import ParseError as TCSSParseError
from tinycss2.ast import QualifiedRule as TCSSQualifiedRule
from tinycss2.ast import WhitespaceToken as TCSSWhitespaceToken
from tinycss2.parser import parse_declaration_list
from stylo.selector import Selector
from stylo.source_map import SourceMap, SourcePosition
TWrapped = TypeVar("TWrapped", bound=TCSSNode)
_LOG = getLogger(__name__)
def _check_error(node: TCSSNode, source_map: SourceMap) -> bool:
position = source_map[node.source_line, node.source_column]
if isinstance(node, TCSSParseError):
_LOG.warning("%s: %s", position, node.message)
return False
return True
class Node(Generic[TWrapped]):
def __init__(self, wrapped_node: TWrapped, source_map: SourceMap):
self._source_map = source_map
self._wrapped_node = wrapped_node
@cached_property
def source_position(self) -> SourcePosition:
return self._source_map[self._wrapped_node.source_line, self._wrapped_node.source_column]
@staticmethod
def wrap(nodes: Iterable[TWrapped], source_map: SourceMap) -> "Iterable[Node[TWrapped]]":
for node in nodes:
if not _check_error(node, source_map):
continue
if isinstance(node, TCSSQualifiedRule):
yield QualifiedRule(node, source_map)
elif isinstance(node, TCSSWhitespaceToken):
yield Whitespace(node, source_map)
else:
assert False
def __str__(self) -> str:
return cast(str, serialize([self._wrapped_node]))
class Whitespace(Node[TCSSWhitespaceToken]):
pass
class Declaration(Node[TCSSDeclaration]):
@property
def name(self) -> str:
return cast(str, self._wrapped_node.lower_name)
@property
def value(self) -> str:
return cast(str, serialize(self._wrapped_node.value))
@property
def important(self) -> bool:
return cast(bool, self._wrapped_node.important)
class QualifiedRule(Node[TCSSQualifiedRule]):
@cached_property
def selectors(self) -> Iterable[Selector[Any]]:
return [Selector.wrap(it.parsed_tree) for it in parse(self._wrapped_node.prelude)]
@cached_property
def declarations(self) -> Iterable[Declaration]:
def _list() -> Iterable[Declaration]:
for declaration in parse_declaration_list(self._wrapped_node.content):
if not _check_error(declaration, self._source_map):
continue
if isinstance(declaration, TCSSWhitespaceToken):
continue
yield Declaration(declaration, self._source_map)
return list(_list())

View File

@ -8,8 +8,7 @@ from html5lib import parse as parse_html
from tinycss2 import parse_rule_list, parse_stylesheet
from tinycss2.ast import AtRule, Node, QualifiedRule
from stylo.common import SourcePosition
from stylo.source_map import SourceMap
from stylo.source_map import SourceMap, SourcePosition
class Page:

20
stylo/page_set.py Normal file
View File

@ -0,0 +1,20 @@
from typing import Iterable
from urllib.request import urlopen
from cssselect2 import ElementWrapper
from html5lib import parse
def _load_page(url: str) -> ElementWrapper:
with urlopen(url) as page_content:
return ElementWrapper.from_html_root(parse(page_content))
class PageSet:
def __init__(self, urls: Iterable[str]):
self._roots = list(map(_load_page, urls))
@property
def elements(self) -> Iterable[ElementWrapper]:
for root in self._roots:
yield from root.iter_subtree()

34
stylo/selector.py Normal file
View File

@ -0,0 +1,34 @@
from functools import cached_property
from typing import Any, Generic, TypeVar
from cssselect2.compiler import CompiledSelector
from cssselect2.parser import CombinedSelector as WrappedCombinedSelector
from cssselect2.parser import CompoundSelector as WrappedCompoundSelector
from cssselect2.parser import Selector as WrappedSelector
TWrapped = TypeVar("TWrapped")
class Selector(Generic[TWrapped]):
def __init__(self, wrapped_selector: TWrapped) -> None:
self._wrapped_selector = wrapped_selector
@staticmethod
def wrap(selector: Any) -> "Selector[TWrapped]":
if isinstance(selector, WrappedCombinedSelector):
return CombinedSelector(selector)
if isinstance(selector, WrappedCompoundSelector):
return CompoundSelector(selector)
assert False
@cached_property
def compiled(self) -> CompiledSelector:
return CompiledSelector(WrappedSelector(self._wrapped_selector))
class CompoundSelector(Selector[WrappedCompoundSelector]):
pass
class CombinedSelector(Selector[WrappedCombinedSelector]):
pass

View File

@ -1,17 +1,72 @@
"""Decode source maps"""
from abc import ABC, abstractmethod
from bisect import bisect
from json import loads
from pathlib import Path
from re import compile as re_compile
from typing import Final, Iterable, Optional
from stylo.common import SourcePosition
class SourcePosition:
def __init__(self, source: Path, line: int, column: int) -> None:
self._source = source
self._line = line
self._column = column
@property
def source(self) -> Path:
return self._source
@property
def line(self) -> int:
return self._line
@property
def column(self) -> int:
return self._column
def __str__(self) -> str:
return f"{self._source}:{self._line}:{self._column}"
class SourceMap(ABC):
@staticmethod
def load(source_path: Path) -> "SourceMap":
source_directory = source_path.parent
source_map_path = _get_map_path(source_path)
if source_map_path is None or not source_map_path.is_file():
return _NullSourceMap(source_path)
with open(source_map_path, "r", encoding="utf-8") as source_map:
json_map = loads(source_map.read())
if json_map["version"] != 3:
raise ValueError("Only version 3 sourcemaps are supported")
sources = list((source_directory / it).resolve() for it in json_map["sources"])
mappings = json_map.get("mappings", "")
index, bisect_index = _load_indices(sources, mappings)
return _JsonSourceMap(index, bisect_index)
@abstractmethod
def __getitem__(self, key: tuple[int, int]) -> SourcePosition:
pass
class _NullSourceMap(SourceMap):
def __init__(self, source: Path) -> None:
self._source = source
def __getitem__(self, key: tuple[int, int]) -> SourcePosition:
return SourcePosition(self._source, key[0], key[1])
PositionIndex = dict[tuple[int, int], SourcePosition]
BisectIndex = list[list[int]]
class SourceMap:
class _JsonSourceMap(SourceMap):
def __init__(
self,
index: PositionIndex,
@ -32,28 +87,6 @@ class SourceMap:
column = columns[column_index and column_index - 1]
return self._index[line, column]
@staticmethod
def load(source_path: Path) -> "SourceMap | None":
source_directory = source_path.parent
source_map_path = _get_map_path(source_path)
if source_map_path is None:
return None
if not source_map_path.is_file():
return None
with open(source_map_path, "r", encoding="utf-8") as source_map:
json_map = loads(source_map.read())
if json_map["version"] != 3:
raise ValueError("Only version 3 sourcemaps are supported")
sources = list((source_directory / it).resolve() for it in json_map["sources"])
mappings = json_map.get("mappings", "")
index, bisect_index = _load_indices(sources, mappings)
return SourceMap(index, bisect_index)
B64CHARS: Final[bytes] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
B64TABLE: Final[list[Optional[int]]] = [None] * (max(B64CHARS) + 1)

55
stylo/stylesheet.py Normal file
View File

@ -0,0 +1,55 @@
from functools import cached_property
from pathlib import Path
from typing import Any, Iterable, cast
from cssselect2 import ElementWrapper, Matcher
from tinycss2 import parse_stylesheet
from stylo.nodes import Declaration, Node, QualifiedRule
from stylo.selector import Selector
from stylo.source_map import SourceMap
class Stylesheet:
def __init__(self, path: Path) -> None:
with open(path, "r", encoding="utf-8") as stylesheet:
content = stylesheet.read()
source_map = SourceMap.load(path)
self._nodes = Node.wrap(parse_stylesheet(content), source_map)
self._matcher = Matcher()
for rule in self.qualified_rules:
for selector in rule.selectors:
self._matcher.add_selector(selector.compiled, (selector, rule))
@cached_property
def qualified_rules(self) -> Iterable[QualifiedRule]:
def _list() -> Iterable[QualifiedRule]:
for node in self._nodes:
if isinstance(node, QualifiedRule):
yield node
return list(_list())
def match(self, node: ElementWrapper) -> Iterable[tuple[Selector[Any], QualifiedRule]]:
for match in self._matcher.match(node):
selector, rule = match[3]
yield cast(Selector[Any], selector), cast(QualifiedRule, rule)
def style(self, node: ElementWrapper) -> dict[str, Declaration]:
declarations: dict[str, Declaration] = {}
for _, rule in self.match(node):
for declaration in rule.declarations:
name = declaration.name
previous_declaration = declarations.get(name, None)
if (
previous_declaration is not None
and previous_declaration.important
and not declaration.important
):
continue
declarations[name] = declaration
return declarations

16
tests/data/page.html Normal file
View File

@ -0,0 +1,16 @@
<html>
<body>
<div class="list">
<div class="list--item">
<a class="list--link">Parks</a>
<div class="list--description">
<a class="list--description-link"></a>
</div>
</div>
<span class="list--item">
<a class="list--link">Krascuky</a>
<div class="list--description"></div>
</span>
</div>
</body>
</html>

20
tests/data/style.css Normal file
View File

@ -0,0 +1,20 @@
.list--item > a {
display: none;
}
.list a {
display: none;
}
.list > div {
display: none;
}
.list > div a {
display: none;
}
.list--item {
color: red;
background: blue;
}

20
tests/data/style_diff.css Normal file
View File

@ -0,0 +1,20 @@
.list--item > a {
display: none;
}
.list a {
display: none;
}
.list > div {
display: none;
}
.list > div a {
display: none;
}
.list--item {
background: black;
margin: 1rem 0 0 1rem;
}

View File

@ -1,13 +1,16 @@
.list--item > a {
display: none; }
display: none;
}
.list a {
display: none; }
display: none;
}
.list > div {
display: none; }
display: none;
}
.list > div a {
display: none; }
display: none;
}
/*# sourceMappingURL=style.css.map */

View File

@ -0,0 +1,21 @@
.list--item > a {
display: none;
}
.list a {
display: none;
}
.list > div {
display: none;
}
.list > div a {
display: none;
}
.list--item {
color: red;
font-family: Arial;
}

View File

@ -5,7 +5,7 @@ from stylo.filters.overlaps import Overlap, OverlapType, get_overlaps
def test_overlaps(shared_datadir: Path) -> None:
page_path = shared_datadir.absolute() / "page.html"
page_path = shared_datadir / "page.html"
css_path = shared_datadir / "style.css"
def _get_content(overlap: Overlap) -> tuple[str, str, OverlapType]:

12
tests/test_diff.py Normal file
View File

@ -0,0 +1,12 @@
from pathlib import Path
from stylo.diff import diff
from stylo.page_set import PageSet
from stylo.stylesheet import Stylesheet
def test_diff(shared_datadir: Path) -> None:
page_set = PageSet([f"file://{shared_datadir.absolute() / 'page.html' }"])
left_stylesheet = Stylesheet(shared_datadir / "style.css")
right_stylesheet = Stylesheet(shared_datadir / "style_diff.css")
list(diff(page_set, left_stylesheet, right_stylesheet))