Package cssutils :: Module util
[hide private]
[frames] | no frames]

Source Code for Module cssutils.util

  1  """base classes for css and stylesheets packages 
  2   
  3  **this test class does not run standalone!** 
  4  see _readURL() to fix this temporarily 
  5   
  6  """ 
  7  __all__ = [] 
  8  __docformat__ = 'restructuredtext' 
  9  __version__ = '$Id: util.py 1159 2008-03-19 20:23:13Z cthedot $' 
 10   
 11   
 12  import codecs 
 13  from itertools import ifilter 
 14  import re 
 15  import types 
 16  import urllib2 
 17  import xml.dom 
 18  import cssutils  
 19  import encutils # this test class does not run standalone! 
 20  from tokenize2 import Tokenizer 
21 22 -class Base(object):
23 """ 24 Base class for most CSS and StyleSheets classes 25 26 **Superceded by Base2 which is used for new seq handling class.** 27 See cssutils.util.Base2 28 29 Contains helper methods for inheriting classes helping parsing 30 31 ``_normalize`` is static as used by Preferences. 32 """ 33 __tokenizer2 = Tokenizer() 34 35 _log = cssutils.log 36 _prods = cssutils.tokenize2.CSSProductions 37 38 # for more on shorthand properties see 39 # http://www.dustindiaz.com/css-shorthand/ 40 # format: shorthand: [(propname, mandatorycheck?)*] 41 _SHORTHANDPROPERTIES = { 42 u'background': [], 43 u'border': [], 44 u'border-left': [], 45 u'border-right': [], 46 u'border-top': [], 47 u'border-bottom': [], 48 u'border-color': [], 49 u'border-style': [], 50 u'border-width': [], 51 u'cue': [], 52 u'font': [], 53 # [('font-weight', True), 54 # ('font-size', True), 55 # ('line-height', False), 56 # ('font-family', True)], 57 u'list-style': [], 58 u'margin': [], 59 u'outline': [], 60 u'padding': [], 61 u'pause': [] 62 } 63 64 # simple escapes, all non unicodes 65 __escapes = re.compile(ur'(\\[^0-9a-fA-F])').sub 66 # all unicode (see cssproductions "unicode") 67 __unicodes = re.compile(ur'\\[0-9a-fA-F]{1,6}[\t|\r|\n|\f|\x20]?').sub 68 69 @staticmethod
70 - def _normalize(x):
71 """ 72 normalizes x, namely: 73 74 - remove any \ before non unicode sequences (0-9a-zA-Z) so for 75 x=="c\olor\" return "color" (unicode escape sequences should have 76 been resolved by the tokenizer already) 77 - lowercase 78 """ 79 if x: 80 def removeescape(matchobj): 81 return matchobj.group(0)[1:]
82 x = Base.__escapes(removeescape, x) 83 return x.lower() 84 else: 85 return x
86
87 - def _checkReadonly(self):
88 "raises xml.dom.NoModificationAllowedErr if rule/... is readonly" 89 if hasattr(self, '_readonly') and self._readonly: 90 raise xml.dom.NoModificationAllowedErr( 91 u'%s is readonly.' % self.__class__) 92 return True 93 return False
94
95 - def _splitNamespacesOff(self, text_namespaces_tuple):
96 """ 97 returns tuple (text, dict-of-namespaces) or if no namespaces are 98 in cssText returns (cssText, {}) 99 100 used in Selector, SelectorList, CSSStyleRule, CSSMediaRule and 101 CSSStyleSheet 102 """ 103 if isinstance(text_namespaces_tuple, tuple): 104 return text_namespaces_tuple[0], _SimpleNamespaces( 105 text_namespaces_tuple[1]) 106 else: 107 return text_namespaces_tuple, _SimpleNamespaces()
108
109 - def _tokenize2(self, textortokens):
110 """ 111 returns tokens of textortokens which may already be tokens in which 112 case simply returns input 113 """ 114 if not textortokens: 115 return None 116 elif isinstance(textortokens, basestring): 117 # needs to be tokenized 118 return self.__tokenizer2.tokenize( 119 textortokens) 120 elif types.GeneratorType == type(textortokens): 121 # already tokenized 122 return textortokens 123 elif isinstance(textortokens, tuple): 124 # a single token (like a comment) 125 return [textortokens] 126 else: 127 # already tokenized but return generator 128 return (x for x in textortokens)
129
130 - def _nexttoken(self, tokenizer, default=None):
131 "returns next token in generator tokenizer or the default value" 132 try: 133 return tokenizer.next() 134 except (StopIteration, AttributeError): 135 return default
136
137 - def _type(self, token):
138 "returns type of Tokenizer token" 139 if token: 140 return token[0] 141 else: 142 return None
143
144 - def _tokenvalue(self, token, normalize=False):
145 "returns value of Tokenizer token" 146 if token and normalize: 147 return Base._normalize(token[1]) 148 elif token: 149 return token[1] 150 else: 151 return None
152
153 - def _stringtokenvalue(self, token):
154 """ 155 for STRING returns the actual content without surrounding "" or '' 156 and without respective escapes, e.g.:: 157 158 "with \" char" => with " char 159 """ 160 if token: 161 value = token[1] 162 return value.replace('\\'+value[0], value[0])[1:-1] 163 else: 164 return None
165
166 - def _uritokenvalue(self, token):
167 """ 168 for URI returns the actual content without surrounding url() 169 or url(""), url('') and without respective escapes, e.g.:: 170 171 url("\"") => " 172 """ 173 if token: 174 value = token[1][4:-1].strip() 175 if (value[0] in '\'"') and (value[0] == value[-1]): 176 # a string "..." or '...' 177 value = value.replace('\\'+value[0], value[0])[1:-1] 178 return value 179 else: 180 return None
181
182 - def _tokensupto2(self, 183 tokenizer, 184 starttoken=None, 185 blockstartonly=False, # { 186 blockendonly=False, # } 187 mediaendonly=False, 188 importmediaqueryendonly=False, # ; or STRING 189 mediaqueryendonly=False, # { or STRING 190 semicolon=False, # ; 191 propertynameendonly=False, # : 192 propertyvalueendonly=False, # ! ; } 193 propertypriorityendonly=False, # ; } 194 selectorattendonly=False, # ] 195 funcendonly=False, # ) 196 listseponly=False, # , 197 separateEnd=False # returns (resulttokens, endtoken) 198 ):
199 """ 200 returns tokens upto end of atrule and end index 201 end is defined by parameters, might be ; } ) or other 202 203 default looks for ending "}" and ";" 204 """ 205 ends = u';}' 206 endtypes = () 207 brace = bracket = parant = 0 # {}, [], () 208 209 if blockstartonly: # { 210 ends = u'{' 211 brace = -1 # set to 0 with first { 212 elif blockendonly: # } 213 ends = u'}' 214 brace = 1 215 elif mediaendonly: # } 216 ends = u'}' 217 brace = 1 # rules } and mediarules } 218 elif importmediaqueryendonly: 219 # end of mediaquery which may be ; or STRING 220 ends = u';' 221 endtypes = ('STRING',) 222 elif mediaqueryendonly: 223 # end of mediaquery which may be { or STRING 224 # special case, see below 225 ends = u'{' 226 brace = -1 # set to 0 with first { 227 endtypes = ('STRING',) 228 elif semicolon: 229 ends = u';' 230 elif propertynameendonly: # : and ; in case of an error 231 ends = u':;' 232 elif propertyvalueendonly: # ; or !important 233 ends = u';!' 234 elif propertypriorityendonly: # ; 235 ends = u';' 236 elif selectorattendonly: # ] 237 ends = u']' 238 if starttoken and self._tokenvalue(starttoken) == u'[': 239 bracket = 1 240 elif funcendonly: # ) 241 ends = u')' 242 parant = 1 243 elif listseponly: # , 244 ends = u',' 245 246 resulttokens = [] 247 if starttoken: 248 resulttokens.append(starttoken) 249 if tokenizer: 250 for token in tokenizer: 251 typ, val, line, col = token 252 if 'EOF' == typ: 253 resulttokens.append(token) 254 break 255 if u'{' == val: 256 brace += 1 257 elif u'}' == val: 258 brace -= 1 259 elif u'[' == val: 260 bracket += 1 261 elif u']' == val: 262 bracket -= 1 263 # function( or single ( 264 elif u'(' == val or \ 265 Base._prods.FUNCTION == typ: 266 parant += 1 267 elif u')' == val: 268 parant -= 1 269 270 resulttokens.append(token) 271 272 if (brace == bracket == parant == 0) and ( 273 val in ends or typ in endtypes): 274 break 275 elif mediaqueryendonly and brace == -1 and ( 276 bracket == parant == 0) and typ in endtypes: 277 # mediaqueryendonly with STRING 278 break 279 280 if separateEnd: 281 # TODO: use this method as generator, then this makes sense 282 if resulttokens: 283 return resulttokens[:-1], resulttokens[-1] 284 else: 285 return resulttokens, None 286 else: 287 return resulttokens
288
289 - def _valuestr(self, t):
290 """ 291 returns string value of t (t may be a string, a list of token tuples 292 or a single tuple in format (type, value, line, col). 293 Mainly used to get a string value of t for error messages. 294 """ 295 if not t: 296 return u'' 297 elif isinstance(t, basestring): 298 return t 299 else: 300 return u''.join([x[1] for x in t])
301
302 - def _adddefaultproductions(self, productions, new=None):
303 """ 304 adds default productions if not already present, used by 305 _parse only 306 307 each production should return the next expected token 308 normaly a name like "uri" or "EOF" 309 some have no expectation like S or COMMENT, so simply return 310 the current value of self.__expected 311 """ 312 def ATKEYWORD(expected, seq, token, tokenizer=None): 313 "TODO: add default impl for unexpected @rule?" 314 return expected
315 316 def COMMENT(expected, seq, token, tokenizer=None): 317 "default implementation for COMMENT token adds CSSCommentRule" 318 seq.append(cssutils.css.CSSComment([token])) 319 return expected 320 321 def S(expected, seq, token, tokenizer=None): 322 "default implementation for S token, does nothing" 323 return expected 324 325 def EOF(expected=None, seq=None, token=None, tokenizer=None): 326 "default implementation for EOF token" 327 return 'EOF' 328 329 p = {'ATKEYWORD': ATKEYWORD, 330 'COMMENT': COMMENT, 331 'S': S, 332 'EOF': EOF # only available if fullsheet 333 } 334 p.update(productions) 335 return p 336
337 - def _parse(self, expected, seq, tokenizer, productions, default=None, 338 new=None):
339 """ 340 puts parsed tokens in seq by calling a production with 341 (seq, tokenizer, token) 342 343 expected 344 a name what token or value is expected next, e.g. 'uri' 345 seq 346 to add rules etc to 347 tokenizer 348 call tokenizer.next() to get next token 349 productions 350 callbacks {tokentype: callback} 351 default 352 default callback if tokentype not in productions 353 new 354 used to init default productions 355 356 returns (wellformed, expected) which the last prod might have set 357 """ 358 wellformed = True 359 if tokenizer: 360 prods = self._adddefaultproductions(productions, new) 361 for token in tokenizer: 362 p = prods.get(token[0], default) 363 if p: 364 expected = p(expected, seq, token, tokenizer) 365 else: 366 wellformed = False 367 self._log.error(u'Unexpected token (%s, %s, %s, %s)' % token) 368 return wellformed, expected
369
370 371 -class Base2(Base):
372 """ 373 Base class for new seq handling, used by Selector for now only 374 """
375 - def __init__(self):
376 self._seq = Seq()
377
378 - def _setSeq(self, newseq):
379 """ 380 sets newseq and makes it readonly 381 """ 382 newseq._readonly = True 383 self._seq = newseq
384 385 seq = property(lambda self: self._seq, doc="seq for most classes") 386
387 - def _tempSeq(self, readonly=False):
388 "get a writeable Seq() which is added later" 389 return Seq(readonly=readonly)
390
391 - def _adddefaultproductions(self, productions, new=None):
392 """ 393 adds default productions if not already present, used by 394 _parse only 395 396 each production should return the next expected token 397 normaly a name like "uri" or "EOF" 398 some have no expectation like S or COMMENT, so simply return 399 the current value of self.__expected 400 """ 401 def ATKEYWORD(expected, seq, token, tokenizer=None): 402 "default impl for unexpected @rule" 403 if expected != 'EOF': 404 # TODO: parentStyleSheet=self 405 rule = cssutils.css.CSSUnknownRule() 406 rule.cssText = self._tokensupto2(tokenizer, token) 407 if rule.wellformed: 408 seq.append(rule, cssutils.css.CSSRule.UNKNOWN_RULE, 409 line=token[2], col=token[3]) 410 return expected 411 else: 412 new['wellformed'] = False 413 self._log.error(u'Expected EOF.', token=token) 414 return expected
415 416 def COMMENT(expected, seq, token, tokenizer=None): 417 "default impl, adds CSSCommentRule if not token == EOF" 418 if expected == 'EOF': 419 new['wellformed'] = False 420 self._log.error(u'Expected EOF but found comment.', token=token) 421 seq.append(cssutils.css.CSSComment([token]), 'COMMENT') 422 return expected
423 424 def S(expected, seq, token, tokenizer=None): 425 "default impl, does nothing if not token == EOF" 426 if expected == 'EOF': 427 new['wellformed'] = False 428 self._log.error(u'Expected EOF but found whitespace.', token=token) 429 return expected 430 431 def EOF(expected=None, seq=None, token=None, tokenizer=None): 432 "default implementation for EOF token" 433 return 'EOF' 434 435 defaultproductions = {'ATKEYWORD': ATKEYWORD, 436 'COMMENT': COMMENT, 437 'S': S, 438 'EOF': EOF # only available if fullsheet 439 } 440 defaultproductions.update(productions) 441 return defaultproductions 442
443 444 -class Seq(object):
445 """ 446 property seq of Base2 inheriting classes, holds a list of Item objects. 447 448 used only by Selector for now 449 450 is normally readonly, only writable during parsing 451 """
452 - def __init__(self, readonly=True):
453 """ 454 only way to write to a Seq is to initialize it with new items 455 each itemtuple has (value, type, line) where line is optional 456 """ 457 self._seq = [] 458 self._readonly = readonly
459
460 - def __delitem__(self, i):
461 del self._seq[i]
462
463 - def __getitem__(self, i):
464 return self._seq[i]
465
466 - def __setitem__(self, i, (val, typ, line, col)):
467 self._seq[i] = Item(val, typ, line, col)
468
469 - def __iter__(self):
470 return iter(self._seq)
471
472 - def __len__(self):
473 return len(self._seq)
474
475 - def append(self, val, typ, line=None, col=None):
476 "if not readonly add new Item()" 477 if self._readonly: 478 raise AttributeError('Seq is readonly.') 479 else: 480 self._seq.append(Item(val, typ, line, col))
481
482 - def replace(self, index=-1, val=None, typ=None, line=None, col=None):
483 """ 484 if not readonly replace Item at index with new Item or 485 simply replace value or type 486 """ 487 if self._readonly: 488 raise AttributeError('Seq is readonly.') 489 else: 490 self._seq[index] = Item(val, typ, line, col)
491
492 - def __repr__(self):
493 "returns a repr same as a list of tuples of (value, type)" 494 return u'cssutils.%s.%s([\n %s])' % (self.__module__, 495 self.__class__.__name__, 496 u',\n '.join([u'(%r, %r)' % (item.type, item.value) 497 for item in self._seq] 498 ))
499 - def __str__(self):
500 return "<cssutils.%s.%s object length=%r at 0x%x>" % ( 501 self.__module__, self.__class__.__name__, len(self), id(self))
502
503 -class Item(object):
504 """ 505 an item in the seq list of classes (successor to tuple items in old seq) 506 507 each item has attributes: 508 509 type 510 a sematic type like "element", "attribute" 511 value 512 the actual value which may be a string, number etc or an instance 513 of e.g. a CSSComment 514 *line* 515 **NOT IMPLEMENTED YET, may contain the line in the source later** 516 """
517 - def __init__(self, value, type, line=None, col=None):
518 self.__value = value 519 self.__type = type 520 self.__line = line 521 self.__col = col
522 523 type = property(lambda self: self.__type) 524 value = property(lambda self: self.__value) 525 line = property(lambda self: self.__line) 526 col = property(lambda self: self.__col) 527
528 - def __repr__(self):
529 return "%s.%s(value=%r, type=%r, line=%r, col=%r)" % ( 530 self.__module__, self.__class__.__name__, 531 self.__value, self.__type, self.__line, self.__col)
532
533 534 -class ListSeq(object):
535 """ 536 (EXPERIMENTAL) 537 A base class used for list classes like css.SelectorList or 538 stylesheets.MediaList 539 540 adds list like behaviour running on inhering class' property ``seq`` 541 542 - item in x => bool 543 - len(x) => integer 544 - get, set and del x[i] 545 - for item in x 546 - append(item) 547 548 some methods must be overwritten in inheriting class 549 """
550 - def __init__(self):
551 self.seq = [] # does not need to use ``Seq`` as simple list only
552
553 - def __contains__(self, item):
554 return item in self.seq
555
556 - def __delitem__(self, index):
557 del self.seq[index]
558
559 - def __getitem__(self, index):
560 return self.seq[index]
561
562 - def __iter__(self):
563 def gen(): 564 for x in self.seq: 565 yield x
566 return gen()
567
568 - def __len__(self):
569 return len(self.seq)
570
571 - def __setitem__(self, index, item):
572 "must be overwritten" 573 raise NotImplementedError
574
575 - def append(self, item):
576 "must be overwritten" 577 raise NotImplementedError
578
579 580 -class Deprecated(object):
581 """This is a decorator which can be used to mark functions 582 as deprecated. It will result in a warning being emitted 583 when the function is used. 584 585 It accepts a single paramter ``msg`` which is shown with the warning. 586 It should contain information which function or method to use instead. 587 """
588 - def __init__(self, msg):
589 self.msg = msg
590
591 - def __call__(self, func):
592 def newFunc(*args, **kwargs): 593 import warnings 594 warnings.warn("Call to deprecated method %r. %s" % 595 (func.__name__, self.msg), 596 category=DeprecationWarning, 597 stacklevel=2) 598 return func(*args, **kwargs)
599 newFunc.__name__ = func.__name__ 600 newFunc.__doc__ = func.__doc__ 601 newFunc.__dict__.update(func.__dict__) 602 return newFunc
603
604 605 -class _Namespaces(object):
606 """ 607 A dictionary like wrapper for @namespace rules used in a CSSStyleSheet. 608 Works on effective namespaces, so e.g. if:: 609 610 @namespace p1 "uri"; 611 @namespace p2 "uri"; 612 613 only the second rule is effective and kept. 614 615 namespaces 616 a dictionary {prefix: namespaceURI} containing the effective namespaces 617 only. These are the latest set in the CSSStyleSheet. 618 parentStyleSheet 619 the parent CSSStyleSheet 620 """
621 - def __init__(self, parentStyleSheet, *args):
622 "no initial values are set, only the relevant sheet is" 623 self.parentStyleSheet = parentStyleSheet
624
625 - def __contains__(self, prefix):
626 return prefix in self.namespaces
627
628 - def __delitem__(self, prefix):
629 """deletes CSSNamespaceRule(s) with rule.prefix == prefix 630 631 prefix '' and None are handled the same 632 """ 633 if not prefix: 634 prefix = u'' 635 delrule = self.__findrule(prefix) 636 for i, rule in enumerate(ifilter(lambda r: r.type == r.NAMESPACE_RULE, 637 self.parentStyleSheet.cssRules)): 638 if rule == delrule: 639 self.parentStyleSheet.deleteRule(i) 640 return 641 642 raise xml.dom.NamespaceErr('Prefix %r not found.' % prefix)
643
644 - def __getitem__(self, prefix):
645 try: 646 return self.namespaces[prefix] 647 except KeyError, e: 648 raise xml.dom.NamespaceErr('Prefix %r not found.' % prefix)
649
650 - def __iter__(self):
651 return self.namespaces.__iter__()
652
653 - def __len__(self):
654 return len(self.namespaces)
655
656 - def __setitem__(self, prefix, namespaceURI):
657 "replaces prefix or sets new rule, may raise NoModificationAllowedErr" 658 if not prefix: 659 prefix = u'' # None or '' 660 rule = self.__findrule(prefix) 661 if not rule: 662 self.parentStyleSheet.insertRule(cssutils.css.CSSNamespaceRule( 663 prefix=prefix, 664 namespaceURI=namespaceURI), 665 inOrder=True) 666 else: 667 if prefix in self.namespaces: 668 rule.namespaceURI = namespaceURI # raises NoModificationAllowedErr 669 if namespaceURI in self.namespaces.values(): 670 rule.prefix = prefix
671
672 - def __findrule(self, prefix):
673 # returns namespace rule where prefix == key 674 for rule in ifilter(lambda r: r.type == r.NAMESPACE_RULE, 675 reversed(self.parentStyleSheet.cssRules)): 676 if rule.prefix == prefix: 677 return rule
678
679 - def __getNamespaces(self):
680 namespaces = {} 681 for rule in ifilter(lambda r: r.type == r.NAMESPACE_RULE, 682 reversed(self.parentStyleSheet.cssRules)): 683 if rule.namespaceURI not in namespaces.values(): 684 namespaces[rule.prefix] = rule.namespaceURI 685 return namespaces
686 687 namespaces = property(__getNamespaces, 688 doc=u'Holds only effective @namespace rules in self.parentStyleSheets' 689 '@namespace rules.') 690
691 - def get(self, prefix, default):
692 return self.namespaces.get(prefix, default)
693
694 - def items(self):
695 return self.namespaces.items()
696
697 - def keys(self):
698 return self.namespaces.keys()
699
700 - def values(self):
701 return self.namespaces.values()
702
703 - def prefixForNamespaceURI(self, namespaceURI):
704 """ 705 returns effective prefix for given namespaceURI or raises IndexError 706 if this cannot be found""" 707 for prefix, uri in self.namespaces.items(): 708 if uri == namespaceURI: 709 return prefix 710 raise IndexError(u'NamespaceURI %r not found.' % namespaceURI)
711
712 - def __str__(self):
713 return u"<cssutils.util.%s object parentStyleSheet=%r namespaces=%r "\ 714 u"at 0x%x>" % ( 715 self.__class__.__name__, str(self.parentStyleSheet), 716 self.namespaces, id(self))
717
718 719 -class _SimpleNamespaces(_Namespaces):
720 """ 721 namespaces used in objects like Selector as long as they are not connected 722 to a CSSStyleSheet 723 """
724 - def __init__(self, *args):
725 self.__namespaces = dict(*args)
726
727 - def __setitem__(self, prefix, namespaceURI):
728 self.__namespaces[prefix] = namespaceURI
729 730 namespaces = property(lambda self: self.__namespaces, 731 doc=u'Dict Wrapper for self.sheets @namespace rules.') 732
733 - def __str__(self):
734 return u"<cssutils.util.%s object namespaces=%r at 0x%x>" % ( 735 self.__class__.__name__, self.namespaces, id(self))
736
737 738 -def _readURL(url, encoding=None):
739 """Retrieve text from url using explicit or detected encoding via encutils 740 """ 741 try: 742 req = urllib2.Request(url) 743 res = urllib2.urlopen(req) 744 except ValueError, e: 745 # invalid url 746 cssutils.log.warn(u'Error opening url=%r: %s' % (url, e.message), 747 error=ValueError) 748 except urllib2.HTTPError, e: 749 # http error 750 cssutils.log.warn(u'Error opening url=%r: %s %s' % (url, e.code, e.msg), 751 error=e) # special case error=e! 752 except urllib2.URLError, e: 753 # urlerror like mailto: 754 cssutils.log.warn(u'Error opening url=%r: %s' % (url, e.reason), 755 error=e) # special case error=e! 756 else: 757 if res: 758 # get real URL, may have been redirected 759 url = res.geturl() 760 if not encoding: 761 # COMMENT OUT IF RUNNING THIS TEST STANDALONE! 762 media_type, encoding = encutils.getHTTPInfo(res) 763 if media_type != u'text/css': 764 self._log.warn(u'Unexpected media type opening url=%s: %r != "text/css"' % 765 (url, media_type)) 766 try: 767 return codecs.getreader(encoding)(res).read() 768 except urllib2.HTTPError, e: 769 # http error 770 cssutils.log.warn(u'Error reading url=%r: %s %s' % (url, e.code, e.msg), 771 error=e) # special case error=e! 772 except urllib2.URLError, e: 773 cssutils.log.warn(u'Error reading url=%r: %s' % (url, e.reason), 774 error=e) # special case error=e! 775 except Exception, e: 776 cssutils.log.warn(u'Error reading url=%r: %r' % (url, e))
777