misc: add overlaps command

This commit is contained in:
Corentin Sechet 2023-01-11 18:22:40 +01:00
parent 0b5d05411a
commit fa8956f848
15 changed files with 512 additions and 37 deletions

View File

@ -16,9 +16,11 @@ src_paths = ["stylo", "tests", "noxfile.py"]
[tool.mypy]
strict = true
files = "stylo/**/*.py,tests/**/*.py,noxfile.py"
ignore_missing_imports = true
[tool.pylint]
max-line-length = 110
[tool.pytest.ini_options]
asyncio_mode = "auto"

View File

@ -26,12 +26,12 @@ setup(
"Programming Language :: Python",
"Programming Language :: Python :: 3.8",
],
install_requires=["click"],
install_requires=["click", "tinycss2", "cssselect2", "html5lib"],
extras_require={
"dev": [
"nox",
"pytest",
"pytest-asyncio",
"pytest-datadir",
]
},
author="Entr'ouvert",

49
stylo/__main__.py Normal file
View File

@ -0,0 +1,49 @@
"""frontools main module"""
from logging import INFO, basicConfig, getLogger
from pathlib import Path
from typing import Iterable
from click import Path as ClickPath
from click import argument, group
from stylo.filters.overlaps import OverlapType, get_overlaps
_LOG = getLogger(__file__)
@group()
def main() -> None:
"""Utilities for EO frontend development."""
basicConfig(format="%(message)s", level=INFO)
_OVERLAP_TYPE_TEXT = {
OverlapType.EQUALS: "EQUALS",
OverlapType.CONTAINS: "CONTAINS",
OverlapType.CONTAINED: "IS CONTAINED BY",
OverlapType.INTERSECTS: "INTERSECTS",
}
@main.command(name="overlaps")
@argument("selector", type=str)
@argument("url", type=str, required=True)
@argument("stylesheets", type=ClickPath(exists=True), nargs=-1)
def list_aliases(selector: str, url: str, stylesheets: Iterable[str]) -> None:
"""Find all selectors overlapping SELECTOR applying STYLESHEETS to URL content."""
stylesheet_paths = [Path(it) for it in stylesheets]
for overlap in get_overlaps(selector, url, stylesheet_paths):
position = overlap.page.get_position(overlap.rule)
_LOG.info(
"%s:%i:%i %s %s %s",
position.source,
position.line,
position.column,
overlap.selector,
_OVERLAP_TYPE_TEXT[overlap.type],
overlap.overlapping_selector,
)
if __name__ == "__main__":
main() # pylint: disable=no-value-for-parameter

View File

@ -1,35 +0,0 @@
"""frontools main module"""
from asyncio import run
from functools import update_wrapper
from logging import INFO, basicConfig, getLogger
from typing import Any
from click import Context as ClickContext
from click import group, pass_context
def _async_command(function: Any) -> Any:
def wrapper(*args: Any, **kwargs: Any) -> Any:
return run(function(*args, **kwargs))
return update_wrapper(wrapper, function)
_LOGGER = getLogger(__file__)
@group()
@pass_context
@_async_command
async def main(_: ClickContext) -> None:
"""Utilities for EO frontend development."""
basicConfig(format="%(message)s", level=INFO)
@main.command(name="lint")
def lint() -> None:
"""Lint CSS files frontools caches"""
if __name__ == "__main__":
main() # pylint: disable=no-value-for-parameter

20
stylo/common.py Normal file
View File

@ -0,0 +1,20 @@
from pathlib import Path
class SourcePosition:
def __init__(self, source: Path, line: int, column: int) -> None:
self._source = source
self._line = line
self._column = column
@property
def source(self) -> Path:
return self._source
@property
def line(self) -> int:
return self._line
@property
def column(self) -> int:
return self._column

View File

165
stylo/filters/overlaps.py Normal file
View File

@ -0,0 +1,165 @@
"""Access CSS and source HTML for themes."""
from enum import Enum
from itertools import product
from logging import getLogger
from pathlib import Path
from typing import Any, Iterable
from cssselect2 import ElementWrapper
from cssselect2.compiler import CompiledSelector
from cssselect2.parser import CombinedSelector, Selector, SelectorError
from cssselect2.parser import parse as css_parse
from tinycss2.ast import QualifiedRule
from stylo.page import Page
_LOG = getLogger(__name__)
class OverlapType(Enum):
EQUALS = 1
CONTAINS = 2
CONTAINED = 3
INTERSECTS = 4
class Overlap:
def __init__( # pylint: disable=too-many-arguments
self,
page: Page,
rule: QualifiedRule,
selector: CombinedSelector,
overlapping_selector: CombinedSelector,
type_: OverlapType,
):
self._page = page
self._rule = rule
self._selector = selector
self._overlapping_selector = overlapping_selector
self._type = type_
@property
def page(self) -> Page:
return self._page
@property
def rule(self) -> QualifiedRule:
return self._rule
@property
def selector(self) -> CombinedSelector:
return self._selector
@property
def overlapping_selector(self) -> CombinedSelector:
return self._overlapping_selector
@property
def type(self) -> OverlapType:
return self._type
def _parse(content: Any) -> Iterable[CombinedSelector]:
return list(it.parsed_tree for it in css_parse(content, forgiving=True))
def _query(page: Page, selector: CombinedSelector) -> set[ElementWrapper]:
compiled_selector = CompiledSelector(Selector(selector))
return set(page.root.query_all(compiled_selector))
def get_overlaps(base_selector: str, url: str, stylesheets: Iterable[Path]) -> Iterable[Overlap]:
compiled_selectors = _parse(base_selector)
pages = Page.load(url, stylesheets)
for selector, page in product(compiled_selectors, pages):
yield from _get_selector_overlaps(selector, page)
def _get_selector_overlaps(selector: CombinedSelector, page: Page) -> Iterable[Overlap]:
matching_nodes = _query(page, selector)
for rule in page.qualified_rules:
for left, combinator, right in _get_rule_overlaps(matching_nodes, rule, page):
if str(left) == str(selector):
continue
if right is None:
original_selector = selector
overlapping_selector = left
else:
original_selector = _combine(selector, combinator, right)
overlapping_selector = _combine(left, combinator, right)
original_nodes = _query(page, original_selector)
overlapping_nodes = _query(page, overlapping_selector)
if not original_nodes and not overlapping_nodes:
continue
if original_nodes == overlapping_nodes:
overlap_type = OverlapType.EQUALS
elif original_nodes.issuperset(overlapping_nodes):
overlap_type = OverlapType.CONTAINS
elif overlapping_nodes.issuperset(original_nodes):
overlap_type = OverlapType.CONTAINED
else:
assert original_nodes & overlapping_nodes
overlap_type = OverlapType.INTERSECTS
yield Overlap(page, rule, original_selector, overlapping_selector, overlap_type)
def _match_any(selector: CombinedSelector, *nodes: ElementWrapper) -> bool:
compiled_selector = CompiledSelector(Selector(selector))
return any(it.matches(compiled_selector) for it in nodes)
def _combine(left: CombinedSelector, combinator: str, right: CombinedSelector | None) -> CombinedSelector:
if right is None:
return left
if not isinstance(right, CombinedSelector):
return CombinedSelector(left, combinator, right)
return _combine(CombinedSelector(left, combinator, right.left), right.combinator, right.right)
def _get_rule_overlaps(
matching_nodes: set[ElementWrapper], rule: QualifiedRule, page: Page
) -> Iterable[CompiledSelector]:
for selector in _parse(rule.prelude):
left = selector
combinator: str = ""
right: CombinedSelector | None = None
right_left_leaf: CombinedSelector | None = None
while True:
try:
if _match_any(left, *matching_nodes):
yield left, combinator, right
break
except SelectorError as ex:
position = page.get_position(rule)
_LOG.warning(
"%s:%i:%i : Error parsing selector : %s",
position.source,
position.line,
position.column,
ex,
)
break
if not isinstance(left, CombinedSelector):
break
if right is None:
right = left.right
elif right_left_leaf is None:
right = CombinedSelector(left.right, combinator, right)
right_left_leaf = right
else:
right_left_leaf.left = CombinedSelector(left.right, combinator, right_left_leaf.left)
right_left_leaf = right_left_leaf.left
combinator = left.combinator
left = left.left

65
stylo/page.py Normal file
View File

@ -0,0 +1,65 @@
"""Access CSS and source HTML for themes."""
from pathlib import Path
from typing import Iterable
from urllib.request import urlopen
from cssselect2 import ElementWrapper
from html5lib import parse as parse_html
from tinycss2 import parse_rule_list, parse_stylesheet
from tinycss2.ast import AtRule, Node, QualifiedRule
from stylo.common import SourcePosition
from stylo.source_map import SourceMap
class Page:
def __init__(
self,
stylesheet_path: Path,
root: ElementWrapper,
rules: Iterable[Node],
source_map: SourceMap | None = None,
) -> None:
self._stylesheet_path = stylesheet_path
self._root = root
self._rules = list(rules)
self._source_map = source_map
@staticmethod
def load(url: str, stylesheets: Iterable[Path]) -> "Iterable[Page]":
with urlopen(url) as page_content:
page_root = ElementWrapper.from_html_root(parse_html(page_content))
for stylesheet_path in stylesheets:
with open(stylesheet_path, "r", encoding="utf-8") as stylesheet:
content = stylesheet.read()
rules = parse_stylesheet(content, skip_whitespace=True, skip_comments=True)
source_map = SourceMap.load(stylesheet_path)
yield Page(stylesheet_path, page_root, rules, source_map)
@property
def root(self) -> ElementWrapper:
return self._root
@property
def rules(self) -> Iterable[Node]:
return self._rules
@property
def qualified_rules(self) -> Iterable[QualifiedRule]:
def _expand(rule: Node) -> Iterable[QualifiedRule]:
if isinstance(rule, AtRule):
if rule.at_keyword != "media":
return
rules = parse_rule_list(rule.content, skip_comments=True, skip_whitespace=True)
yield from [it for it in rules if isinstance(it, QualifiedRule)]
elif isinstance(rule, QualifiedRule):
yield rule
for rule in self.rules:
yield from _expand(rule)
def get_position(self, node: Node) -> SourcePosition:
if self._source_map is None:
return SourcePosition(self._stylesheet_path, node.source_line, node.source_column)
return self._source_map[node.source_line, node.source_column]

125
stylo/source_map.py Normal file
View File

@ -0,0 +1,125 @@
"""Decode source maps"""
from bisect import bisect
from json import loads
from pathlib import Path
from re import compile as re_compile
from typing import Final, Iterable, Optional
from stylo.common import SourcePosition
PositionIndex = dict[tuple[int, int], SourcePosition]
BisectIndex = list[list[int]]
class SourceMap:
def __init__(
self,
index: PositionIndex,
bisect_index: BisectIndex,
):
self._index = index
self._bisect_index = bisect_index
def __getitem__(self, key: tuple[int, int]) -> SourcePosition:
result = self._index.get(key, None)
if result is not None:
return result
line, column = key
columns = self._bisect_index[line]
column_index = bisect(columns, column)
column = columns[column_index and column_index - 1]
return self._index[line, column]
@staticmethod
def load(source_path: Path) -> "SourceMap | None":
source_directory = source_path.parent
source_map_path = _get_map_path(source_path)
if source_map_path is None:
return None
if not source_map_path.is_file():
return None
with open(source_map_path, "r", encoding="utf-8") as source_map:
json_map = loads(source_map.read())
if json_map["version"] != 3:
raise ValueError("Only version 3 sourcemaps are supported")
sources = list((source_directory / it).resolve() for it in json_map["sources"])
mappings = json_map.get("mappings", "")
index, bisect_index = _load_indices(sources, mappings)
return SourceMap(index, bisect_index)
B64CHARS: Final[bytes] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
B64TABLE: Final[list[Optional[int]]] = [None] * (max(B64CHARS) + 1)
for i, b in enumerate(B64CHARS):
B64TABLE[b] = i
SHIFTSIZE: Final[int] = 5
FLAG: Final[int] = 1 << 5
MASK: Final[int] = (1 << 5) - 1
def _base64vlq_decode(vlqval: str) -> Iterable[int]:
shift = value = 0
for v in map(B64TABLE.__getitem__, vlqval.encode("ascii")):
value += (v & MASK) << shift # type: ignore # v is always int
if v & FLAG: # type: ignore # v is always int
shift += SHIFTSIZE
continue
# determine sign and add to results
yield (value >> 1) * (-1 if value & 1 else 1)
shift = value = 0
def _get_map_path(source_path: Path) -> Path | None:
map_url_regex = re_compile(r"\/[\/|\*][@|#]\s*sourceMappingURL=([^\*\/]*)\s*(\*\/)?")
with open(source_path, "r", encoding="utf-8") as source:
for line in source.readlines():
match = map_url_regex.match(line)
if match:
source_map_path = match.group(1).strip()
return source_path.parent / source_map_path
return None
def _load_indices( # pylint: disable = too-many-locals
sources: list[Path], mappings: str
) -> tuple[PositionIndex, BisectIndex]:
index = {}
bisect_index = []
source_id = source_line = source_column = 0
for target_line, vlqs in enumerate(mappings.split(";")):
columns = []
target_column = 0
if vlqs:
for target_column_offset, *args in map(_base64vlq_decode, vlqs.split(",")):
target_column += target_column_offset
if len(args) >= 3:
(
source_id_offset,
source_line_offset,
source_column_offset,
*_,
) = args
source_id += source_id_offset
source_line += source_line_offset
source_column += source_column_offset
index[(target_line, target_column)] = SourcePosition(
sources[source_id], source_line, source_column
)
columns.append(target_column)
bisect_index.append(columns)
return index, bisect_index

View File

View File

@ -0,0 +1,16 @@
<html>
<body>
<div class="list">
<div class="list--item">
<a class="list--link">Parks</a>
<div class="list--description">
<a class="list--description-link"></a>
</div>
</div>
<span class="list--item">
<a class="list--link">Krascuky</a>
<div class="list--description"></div>
</span>
</div>
</body>
</html>

View File

@ -0,0 +1,13 @@
.list--item > a {
display: none; }
.list a {
display: none; }
.list > div {
display: none; }
.list > div a {
display: none; }
/*# sourceMappingURL=style.css.map */

View File

@ -0,0 +1,9 @@
{
"version": 3,
"file": "style.css",
"sources": [
"style.scss"
],
"names": [],
"mappings": "AAAA,AAAA,WAAW,GAAG,CAAC,CAAC;EACf,OAAO,EAAE,IAAI,GACb;;AAED,AAAA,KAAK,CAAC,CAAC,CAAC;EACP,OAAO,EAAE,IAAI,GACb;;AAED,AAAA,KAAK,GAAG,GAAG,CAAC;EACX,OAAO,EAAE,IAAI,GACb;;AAED,AAAA,KAAK,GAAG,GAAG,CAAC,CAAC,CAAC;EACb,OAAO,EAAE,IAAI,GACb"
}

View File

@ -0,0 +1,16 @@
.list--item > a { // equals list--link
display: none;
}
.list a { // contains list--link
display: none;
}
.list > div a { // intersects list--link
display: none;
}
.list > div { // contained by list--item
display: none;
}

View File

@ -0,0 +1,30 @@
from pathlib import Path
from typing import Any
from stylo.filters.overlaps import Overlap, OverlapType, get_overlaps
def test_overlaps(shared_datadir: Path) -> None:
page_path = shared_datadir.absolute() / "page.html"
css_path = shared_datadir / "style.css"
def _get_content(overlap: Overlap) -> tuple[str, str, OverlapType]:
return (str(overlap.selector), str(overlap.overlapping_selector), overlap.type)
def _(selector: Any) -> list[tuple[str, str, OverlapType]]:
return list(
map(
_get_content,
get_overlaps(selector, f"file://{page_path}", [css_path]),
)
)
assert _(".list--item") == [
(".list--item", ".list>div", OverlapType.CONTAINS),
(".list--item a", ".list>div a", OverlapType.CONTAINS),
]
assert _(".list--link") == [
(".list--link", ".list--item>a", OverlapType.EQUALS),
(".list--link", ".list a", OverlapType.CONTAINED),
(".list--link", ".list>div a", OverlapType.INTERSECTS),
]