1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664
|
from __future__ import print_function
import re
from warnings import warn
from scss.cssdefs import CSS2_PSEUDO_ELEMENTS
# Super dumb little selector parser.
# Yes, yes, this is a regex tokenizer. The actual meaning of the
# selector doesn't matter; the parts are just important for matching up
# during @extend.
# Selectors have three levels: simple, combinator, comma-delimited.
# Each combinator can only appear once as a delimiter between simple
# selectors, so it can be thought of as a prefix.
# So this:
# a.b + c, d#e
# parses into two Selectors with these structures:
# [[' ', 'a', '.b'], ['+', 'c']]
# [[' ', 'd', '#e']]
# Note that the first simple selector has an implied descendant
# combinator -- i.e., it is a descendant of the root element.
# TODO `*html` is incorrectly parsed as a single selector
# TODO this oughta be touched up for css4 selectors
SELECTOR_TOKENIZER = re.compile(r'''
# Colons introduce pseudo-selectors, sometimes with parens
# TODO doesn't handle quoted )
[:]+ [-\w]+ (?: [(] .+? [)] )?
# These guys are combinators -- note that a single space counts too
| \s* [ +>~,] \s*
# Square brackets are attribute tests
# TODO: this doesn't handle ] within a string
| \[ .+? \]
# Dot and pound start class/id selectors. Percent starts a Sass
# extend-target faux selector.
| [.#%] [-\w]+
# Percentages are used for @keyframes
| [-.\d]+ [%]
# Plain identifiers, or single asterisks, are element names
| [-\w]+
| [*]
# & is the sass replacement token
| [&]
# And as a last-ditch effort, just eat up to whitespace
| (\S+)
''', re.VERBOSE | re.MULTILINE)
# Set of starting squiggles that are known to mean "this is a general
# commutative token", i.e., not an element
BODY_TOKEN_SIGILS = frozenset('#.[:%')
def _is_combinator_subset_of(specific, general, is_first=True):
"""Return whether `specific` matches a non-strict subset of what `general`
matches.
"""
if is_first and general == ' ':
# First selector always has a space to mean "descendent of root", which
# still holds if any other selector appears above it
return True
if specific == general:
return True
if specific == '>' and general == ' ':
return True
if specific == '+' and general == '~':
return True
return False
class SimpleSelector(object):
"""A simple selector, by CSS 2.1 terminology: a combination of element
name, class selectors, id selectors, and other criteria that all apply to a
single element.
Note that CSS 3 considers EACH of those parts to be a "simple selector",
and calls a group of them a "sequence of simple selectors". That's a
terrible class name, so we're going with 2.1 here.
For lack of a better name, each of the individual parts is merely called a
"token".
Note that it's possible to have zero tokens. This isn't legal CSS, but
it's perfectly legal Sass, since you might nest blocks like so:
body > {
div {
...
}
}
"""
def __init__(self, combinator, tokens):
self.combinator = combinator
# TODO enforce that only one element name (including *) appears in a
# selector. only one pseudo, too.
# TODO remove duplicates?
self.tokens = tuple(tokens)
def __repr__(self):
return "<%s: %r>" % (type(self).__name__, self.render())
def __hash__(self):
return hash((self.combinator, self.tokens))
def __eq__(self, other):
if not isinstance(other, SimpleSelector):
return NotImplemented
return (
self.combinator == other.combinator and
self.tokens == other.tokens)
@property
def has_parent_reference(self):
return '&' in self.tokens or 'self' in self.tokens
@property
def has_placeholder(self):
for token in self.tokens:
if token.startswith('%'):
return True
return False
def is_superset_of(self, other, soft_combinator=False):
"""Return True iff this selector matches the same elements as `other`,
and perhaps others.
That is, ``.foo`` is a superset of ``.foo.bar``, because the latter is
more specific.
Set `soft_combinator` true to ignore the specific case of this selector
having a descendent combinator and `other` having anything else. This
is for superset checking for ``@extend``, where a space combinator
really means "none".
"""
# Combinators must match, OR be compatible -- space is a superset of >,
# ~ is a superset of +
if soft_combinator and self.combinator == ' ':
combinator_superset = True
else:
combinator_superset = (
self.combinator == other.combinator or
(self.combinator == ' ' and other.combinator == '>') or
(self.combinator == '~' and other.combinator == '+'))
return (
combinator_superset and
set(self.tokens) <= set(other.tokens))
def replace_parent(self, parent_simples):
"""If ``&`` (or the legacy xCSS equivalent ``self``) appears in this
selector, replace it with the given iterable of parent selectors.
Returns a tuple of simple selectors.
"""
assert parent_simples
ancestors = parent_simples[:-1]
parent = parent_simples[-1]
did_replace = False
new_tokens = []
for token in self.tokens:
if not did_replace and token in ('&', 'self'):
did_replace = True
new_tokens.extend(parent.tokens)
if token == 'self':
warn(FutureWarning(
"The xCSS 'self' selector is deprecated and will be "
"removed in 2.0. Use & instead. ({0!r})"
.format(self)
))
else:
new_tokens.append(token)
if not did_replace:
# This simple selector doesn't contain a parent reference so just
# stick it on the end
return parent_simples + (self,)
# This simple selector was merged into the direct parent.
merged_self = type(self)(parent.combinator, new_tokens)
selector = ancestors + (merged_self,)
# Our combinator goes on the first ancestor, i.e., substituting "foo
# bar baz" into "+ &.quux" produces "+ foo bar baz.quux". This means a
# potential conflict with the first ancestor's combinator!
root = selector[0]
if not _is_combinator_subset_of(self.combinator, root.combinator):
raise ValueError(
"Can't sub parent {0!r} into {1!r}: "
"combinators {2!r} and {3!r} conflict!"
.format(
parent_simples, self, self.combinator, root.combinator))
root = type(self)(self.combinator, root.tokens)
selector = (root,) + selector[1:]
return tuple(selector)
def merge_into(self, other):
"""Merge two simple selectors together. This is expected to be the
selector being injected into `other` -- that is, `other` is the
selector for a block using ``@extend``, and `self` is a selector being
extended.
Element tokens must come first, and pseudo-element tokens must come
last, and there can only be one of each. The final selector thus looks
something like::
[element] [misc self tokens] [misc other tokens] [pseudo-element]
This method does not check for duplicate tokens; those are assumed to
have been removed earlier, during the search for a hinge.
"""
# TODO it shouldn't be possible to merge two elements or two pseudo
# elements, /but/ it shouldn't just be a fatal error here -- it
# shouldn't even be considered a candidate for extending!
# TODO this is slightly inconsistent with ruby, which treats a trailing
# set of self tokens like ':before.foo' as a single unit to be stuck at
# the end. but that's completely bogus anyway.
element = []
middle = []
pseudo = []
for token in self.tokens + other.tokens:
if token in CSS2_PSEUDO_ELEMENTS or token.startswith('::'):
pseudo.append(token)
elif token[0] in BODY_TOKEN_SIGILS:
middle.append(token)
else:
element.append(token)
new_tokens = element + middle + pseudo
if self.combinator == ' ' or self.combinator == other.combinator:
combinator = other.combinator
elif other.combinator == ' ':
combinator = self.combinator
else:
raise ValueError(
"Don't know how to merge conflicting combinators: "
"{0!r} and {1!r}"
.format(self, other))
return type(self)(combinator, new_tokens)
def difference(self, other):
new_tokens = tuple(token for token in self.tokens if token not in set(other.tokens))
return type(self)(self.combinator, new_tokens)
def render(self):
# TODO fail if there are no tokens, or if one is a placeholder?
rendered = ''.join(self.tokens)
if self.combinator == ' ':
return rendered
elif rendered:
return self.combinator + ' ' + rendered
else:
return self.combinator
class Selector(object):
"""A single CSS selector."""
def __init__(self, simples):
"""Return a selector containing a sequence of `SimpleSelector`s.
You probably want to use `parse_many` or `parse_one` instead.
"""
# TODO enforce uniqueness
self.simple_selectors = tuple(simples)
@classmethod
def parse_many(cls, selector):
selector = selector.strip()
ret = []
pending = dict(
simples=[],
combinator=' ',
tokens=[],
)
def promote_simple():
if pending['tokens']:
pending['simples'].append(
SimpleSelector(pending['combinator'], pending['tokens']))
pending['combinator'] = ' '
pending['tokens'] = []
def promote_selector():
promote_simple()
if pending['combinator'] != ' ':
pending['simples'].append(
SimpleSelector(pending['combinator'], []))
pending['combinator'] = ' '
if pending['simples']:
ret.append(cls(pending['simples']))
pending['simples'] = []
pos = 0
while pos < len(selector):
# TODO i don't think this deals with " + " correctly. anywhere.
# TODO this used to turn "1.5%" into empty string; why does error
# not work?
m = SELECTOR_TOKENIZER.match(selector, pos)
if not m:
# TODO prettify me
raise SyntaxError("Couldn't parse selector: %r" % (selector,))
token = m.group(0)
pos += len(token)
# Kill any extraneous space, BUT make sure not to turn a lone space
# into an empty string
token = token.strip() or ' '
if token == ',':
# End current selector
promote_selector()
elif token in ' +>~':
# End current simple selector
promote_simple()
pending['combinator'] = token
else:
# Add to pending simple selector
pending['tokens'].append(token)
# Deal with any remaining pending bits
promote_selector()
return ret
@classmethod
def parse_one(cls, selector_string):
selectors = cls.parse_many(selector_string)
if len(selectors) != 1:
# TODO better error
raise ValueError
return selectors[0]
def __repr__(self):
return "<%s: %r>" % (type(self).__name__, self.render())
def __hash__(self):
return hash(self.simple_selectors)
def __eq__(self, other):
if not isinstance(other, Selector):
return NotImplemented
return self.simple_selectors == other.simple_selectors
@property
def has_parent_reference(self):
for simple in self.simple_selectors:
if simple.has_parent_reference:
return True
return False
@property
def has_placeholder(self):
for simple in self.simple_selectors:
if simple.has_placeholder:
return True
return False
def with_parent(self, parent):
saw_parent_ref = False
new_simples = []
for simple in self.simple_selectors:
if simple.has_parent_reference:
new_simples.extend(simple.replace_parent(parent.simple_selectors))
saw_parent_ref = True
else:
new_simples.append(simple)
if not saw_parent_ref:
new_simples = parent.simple_selectors + tuple(new_simples)
return type(self)(new_simples)
def lookup_key(self):
"""Build a key from the "important" parts of a selector: elements,
classes, ids.
"""
parts = set()
for node in self.simple_selectors:
for token in node.tokens:
if token[0] not in ':[':
parts.add(token)
if not parts:
# Should always have at least ONE key; selectors with no elements,
# no classes, and no ids can be indexed as None to avoid a scan of
# every selector in the entire document
parts.add(None)
return frozenset(parts)
def is_superset_of(self, other):
assert isinstance(other, Selector)
idx = 0
for other_node in other.simple_selectors:
if idx >= len(self.simple_selectors):
return False
while idx < len(self.simple_selectors):
node = self.simple_selectors[idx]
idx += 1
if node.is_superset_of(other_node):
break
return True
def substitute(self, target, replacement):
"""Return a list of selectors obtained by replacing the `target`
selector with `replacement`.
Herein lie the guts of the Sass @extend directive.
In general, for a selector ``a X b Y c``, a target ``X Y``, and a
replacement ``q Z``, return the selectors ``a q X b Z c`` and ``q a X b
Z c``. Note in particular that no more than two selectors will be
returned, and the permutation of ancestors will never insert new simple
selectors "inside" the target selector.
"""
# Find the target in the parent selector, and split it into
# before/after
p_before, p_extras, p_after = self.break_around(target.simple_selectors)
# The replacement has no hinge; it only has the most specific simple
# selector (which is the part that replaces "self" in the parent) and
# whatever preceding simple selectors there may be
r_trail = replacement.simple_selectors[:-1]
r_extras = replacement.simple_selectors[-1]
# TODO what if the prefix doesn't match? who wins? should we even get
# this far?
focal_nodes = (p_extras.merge_into(r_extras),)
befores = _merge_selectors(p_before, r_trail)
cls = type(self)
return [
cls(before + focal_nodes + p_after)
for before in befores]
def break_around(self, hinge):
"""Given a simple selector node contained within this one (a "hinge"),
break it in half and return a parent selector, extra specifiers for the
hinge, and a child selector.
That is, given a hinge X, break the selector A + X.y B into A, + .y,
and B.
"""
hinge_start = hinge[0]
for i, node in enumerate(self.simple_selectors):
# In this particular case, a ' ' combinator actually means "no" (or
# any) combinator, so it should be ignored
if hinge_start.is_superset_of(node, soft_combinator=True):
start_idx = i
break
else:
raise ValueError(
"Couldn't find hinge %r in compound selector %r" %
(hinge_start, self.simple_selectors))
for i, hinge_node in enumerate(hinge):
if i == 0:
# We just did this
continue
self_node = self.simple_selectors[start_idx + i]
if hinge_node.is_superset_of(self_node):
continue
# TODO this isn't true; consider finding `a b` in `a c a b`
raise ValueError(
"Couldn't find hinge %r in compound selector %r" %
(hinge_node, self.simple_selectors))
end_idx = start_idx + len(hinge) - 1
focal_node = self.simple_selectors[end_idx]
extras = focal_node.difference(hinge[-1])
return (
self.simple_selectors[:start_idx],
extras,
self.simple_selectors[end_idx + 1:])
def render(self):
return ' '.join(simple.render() for simple in self.simple_selectors)
def _merge_selectors(left, right):
"""Given two selector chains (lists of simple selectors), return a list of
selector chains representing elements matched by both of them.
This operation is not exact, and involves some degree of fudging -- the
wackier and more divergent the input, the more fudging. It's meant to be
what a human might expect rather than a precise covering of all possible
cases. Most notably, when the two input chains have absolutely nothing in
common, the output is merely ``left + right`` and ``right + left`` rather
than all possible interleavings.
"""
if not left or not right:
# At least one is empty, so there are no conflicts; just return
# whichever isn't empty. Remember to return a LIST, though
return [left or right]
lcs = longest_common_subsequence(left, right, _merge_simple_selectors)
ret = [()] # start with a dummy empty chain or weaving won't work
left_last = 0
right_last = 0
for left_next, right_next, merged in lcs:
ret = _weave_conflicting_selectors(
ret,
left[left_last:left_next],
right[right_last:right_next],
(merged,))
left_last = left_next + 1
right_last = right_next + 1
ret = _weave_conflicting_selectors(
ret,
left[left_last:],
right[right_last:])
return ret
def _weave_conflicting_selectors(prefixes, a, b, suffix=()):
"""Part of the selector merge algorithm above. Not useful on its own. Pay
no attention to the man behind the curtain.
"""
# OK, what this actually does: given a list of selector chains, two
# "conflicting" selector chains, and an optional suffix, return a new list
# of chains like this:
# prefix[0] + a + b + suffix,
# prefix[0] + b + a + suffix,
# prefix[1] + a + b + suffix,
# ...
# In other words, this just appends a new chain to each of a list of given
# chains, except that the new chain might be the superposition of two
# other incompatible chains.
both = a and b
for prefix in prefixes:
yield prefix + a + b + suffix
if both:
# Only use both orderings if there's an actual conflict!
yield prefix + b + a + suffix
def _merge_simple_selectors(a, b):
"""Merge two simple selectors, for the purposes of the LCS algorithm below.
In practice this returns the more specific selector if one is a subset of
the other, else it returns None.
"""
# TODO what about combinators
if a.is_superset_of(b):
return b
elif b.is_superset_of(a):
return a
else:
return None
def longest_common_subsequence(a, b, mergefunc=None):
"""Find the longest common subsequence between two iterables.
The longest common subsequence is the core of any diff algorithm: it's the
longest sequence of elements that appears in both parent sequences in the
same order, but NOT necessarily consecutively.
Original algorithm borrowed from Wikipedia:
http://en.wikipedia.org/wiki/Longest_common_subsequence_problem#Code_for_the_dynamic_programming_solution
This function is used only to implement @extend, largely because that's
what the Ruby implementation does. Thus it's been extended slightly from
the simple diff-friendly algorithm given above.
What @extend wants to know is whether two simple selectors are compatible,
not just equal. To that end, you must pass in a "merge" function to
compare a pair of elements manually. It should return `None` if they are
incompatible, and a MERGED element if they are compatible -- in the case of
selectors, this is whichever one is more specific.
Because of this fuzzier notion of equality, the return value is a list of
``(a_index, b_index, value)`` tuples rather than items alone.
"""
if mergefunc is None:
# Stupid default, just in case
def mergefunc(a, b):
if a == b:
return a
return None
# Precalculate equality, since it can be a tad expensive and every pair is
# compared at least once
eq = {}
for ai, aval in enumerate(a):
for bi, bval in enumerate(b):
eq[ai, bi] = mergefunc(aval, bval)
# Build the "length" matrix, which provides the length of the LCS for
# arbitrary-length prefixes. -1 exists only to support the base case
prefix_lcs_length = {}
for ai in range(-1, len(a)):
for bi in range(-1, len(b)):
if ai == -1 or bi == -1:
l = 0
elif eq[ai, bi]:
l = prefix_lcs_length[ai - 1, bi - 1] + 1
else:
l = max(
prefix_lcs_length[ai, bi - 1],
prefix_lcs_length[ai - 1, bi])
prefix_lcs_length[ai, bi] = l
# The interesting part. The key insight is that the bottom-right value in
# the length matrix must be the length of the LCS because of how the matrix
# is defined, so all that's left to do is backtrack from the ends of both
# sequences in whatever way keeps the LCS as long as possible, and keep
# track of the equal pairs of elements we see along the way.
# Wikipedia does this with recursion, but the algorithm is trivial to
# rewrite as a loop, as below.
ai = len(a) - 1
bi = len(b) - 1
ret = []
while ai >= 0 and bi >= 0:
merged = eq[ai, bi]
if merged is not None:
ret.append((ai, bi, merged))
ai -= 1
bi -= 1
elif prefix_lcs_length[ai, bi - 1] > prefix_lcs_length[ai - 1, bi]:
bi -= 1
else:
ai -= 1
# ret has the latest items first, which is backwards
ret.reverse()
return ret
|