Package cssutils :: Module util
[hide private]
[frames] | no frames]

Source Code for Module cssutils.util

  1  """base classes and helper functions for css and stylesheets packages 
  2  """ 
  3  __all__ = [] 
  4  __docformat__ = 'restructuredtext' 
  5  __version__ = '$Id: util.py 1284 2008-06-05 16:29:17Z cthedot $' 
  6   
  7  import codecs 
  8  from itertools import ifilter 
  9  import re 
 10  import types 
 11  import urllib2 
 12  import xml.dom 
 13  import cssutils 
 14  from tokenize2 import Tokenizer 
15 # COMMENT OUT IF RUNNING THIS TEST STANDALONE! 16 import encutils 17 18 -class Base(object):
19 """ 20 Base class for most CSS and StyleSheets classes 21 22 **Superceded by Base2 which is used for new seq handling class.** 23 See cssutils.util.Base2 24 25 Contains helper methods for inheriting classes helping parsing 26 27 ``_normalize`` is static as used by Preferences. 28 """ 29 __tokenizer2 = Tokenizer() 30 31 _log = cssutils.log 32 _prods = cssutils.tokenize2.CSSProductions 33 34 # for more on shorthand properties see 35 # http://www.dustindiaz.com/css-shorthand/ 36 # format: shorthand: [(propname, mandatorycheck?)*] 37 _SHORTHANDPROPERTIES = { 38 u'background': [], 39 u'border': [], 40 u'border-left': [], 41 u'border-right': [], 42 u'border-top': [], 43 u'border-bottom': [], 44 u'border-color': [], 45 u'border-style': [], 46 u'border-width': [], 47 u'cue': [], 48 u'font': [], 49 # [('font-weight', True), 50 # ('font-size', True), 51 # ('line-height', False), 52 # ('font-family', True)], 53 u'list-style': [], 54 u'margin': [], 55 u'outline': [], 56 u'padding': [], 57 u'pause': [] 58 } 59 60 # simple escapes, all non unicodes 61 __escapes = re.compile(ur'(\\[^0-9a-fA-F])').sub 62 # all unicode (see cssproductions "unicode") 63 __unicodes = re.compile(ur'\\[0-9a-fA-F]{1,6}[\t|\r|\n|\f|\x20]?').sub 64 65 @staticmethod
66 - def _normalize(x):
67 """ 68 normalizes x, namely: 69 70 - remove any \ before non unicode sequences (0-9a-zA-Z) so for 71 x=="c\olor\" return "color" (unicode escape sequences should have 72 been resolved by the tokenizer already) 73 - lowercase 74 """ 75 if x: 76 def removeescape(matchobj): 77 return matchobj.group(0)[1:]
78 x = Base.__escapes(removeescape, x) 79 return x.lower() 80 else: 81 return x
82
83 - def _checkReadonly(self):
84 "raises xml.dom.NoModificationAllowedErr if rule/... is readonly" 85 if hasattr(self, '_readonly') and self._readonly: 86 raise xml.dom.NoModificationAllowedErr( 87 u'%s is readonly.' % self.__class__) 88 return True 89 return False
90
91 - def _splitNamespacesOff(self, text_namespaces_tuple):
92 """ 93 returns tuple (text, dict-of-namespaces) or if no namespaces are 94 in cssText returns (cssText, {}) 95 96 used in Selector, SelectorList, CSSStyleRule, CSSMediaRule and 97 CSSStyleSheet 98 """ 99 if isinstance(text_namespaces_tuple, tuple): 100 return text_namespaces_tuple[0], _SimpleNamespaces( 101 text_namespaces_tuple[1]) 102 else: 103 return text_namespaces_tuple, _SimpleNamespaces()
104
105 - def _tokenize2(self, textortokens):
106 """ 107 returns tokens of textortokens which may already be tokens in which 108 case simply returns input 109 """ 110 if not textortokens: 111 return None 112 elif isinstance(textortokens, basestring): 113 # needs to be tokenized 114 return self.__tokenizer2.tokenize( 115 textortokens) 116 elif types.GeneratorType == type(textortokens): 117 # already tokenized 118 return textortokens 119 elif isinstance(textortokens, tuple): 120 # a single token (like a comment) 121 return [textortokens] 122 else: 123 # already tokenized but return generator 124 return (x for x in textortokens)
125
126 - def _nexttoken(self, tokenizer, default=None):
127 "returns next token in generator tokenizer or the default value" 128 try: 129 return tokenizer.next() 130 except (StopIteration, AttributeError): 131 return default
132
133 - def _type(self, token):
134 "returns type of Tokenizer token" 135 if token: 136 return token[0] 137 else: 138 return None
139
140 - def _tokenvalue(self, token, normalize=False):
141 "returns value of Tokenizer token" 142 if token and normalize: 143 return Base._normalize(token[1]) 144 elif token: 145 return token[1] 146 else: 147 return None
148
149 - def _stringtokenvalue(self, token):
150 """ 151 for STRING returns the actual content without surrounding "" or '' 152 and without respective escapes, e.g.:: 153 154 "with \" char" => with " char 155 """ 156 if token: 157 value = token[1] 158 return value.replace('\\'+value[0], value[0])[1:-1] 159 else: 160 return None
161
162 - def _uritokenvalue(self, token):
163 """ 164 for URI returns the actual content without surrounding url() 165 or url(""), url('') and without respective escapes, e.g.:: 166 167 url("\"") => " 168 """ 169 if token: 170 value = token[1][4:-1].strip() 171 if (value[0] in '\'"') and (value[0] == value[-1]): 172 # a string "..." or '...' 173 value = value.replace('\\'+value[0], value[0])[1:-1] 174 return value 175 else: 176 return None
177
178 - def _tokensupto2(self, 179 tokenizer, 180 starttoken=None, 181 blockstartonly=False, # { 182 blockendonly=False, # } 183 mediaendonly=False, 184 importmediaqueryendonly=False, # ; or STRING 185 mediaqueryendonly=False, # { or STRING 186 semicolon=False, # ; 187 propertynameendonly=False, # : 188 propertyvalueendonly=False, # ! ; } 189 propertypriorityendonly=False, # ; } 190 selectorattendonly=False, # ] 191 funcendonly=False, # ) 192 listseponly=False, # , 193 separateEnd=False # returns (resulttokens, endtoken) 194 ):
195 """ 196 returns tokens upto end of atrule and end index 197 end is defined by parameters, might be ; } ) or other 198 199 default looks for ending "}" and ";" 200 """ 201 ends = u';}' 202 endtypes = () 203 brace = bracket = parant = 0 # {}, [], () 204 205 if blockstartonly: # { 206 ends = u'{' 207 brace = -1 # set to 0 with first { 208 elif blockendonly: # } 209 ends = u'}' 210 brace = 1 211 elif mediaendonly: # } 212 ends = u'}' 213 brace = 1 # rules } and mediarules } 214 elif importmediaqueryendonly: 215 # end of mediaquery which may be ; or STRING 216 ends = u';' 217 endtypes = ('STRING',) 218 elif mediaqueryendonly: 219 # end of mediaquery which may be { or STRING 220 # special case, see below 221 ends = u'{' 222 brace = -1 # set to 0 with first { 223 endtypes = ('STRING',) 224 elif semicolon: 225 ends = u';' 226 elif propertynameendonly: # : and ; in case of an error 227 ends = u':;' 228 elif propertyvalueendonly: # ; or !important 229 ends = u';!' 230 elif propertypriorityendonly: # ; 231 ends = u';' 232 elif selectorattendonly: # ] 233 ends = u']' 234 if starttoken and self._tokenvalue(starttoken) == u'[': 235 bracket = 1 236 elif funcendonly: # ) 237 ends = u')' 238 parant = 1 239 elif listseponly: # , 240 ends = u',' 241 242 resulttokens = [] 243 if starttoken: 244 resulttokens.append(starttoken) 245 if tokenizer: 246 for token in tokenizer: 247 typ, val, line, col = token 248 if 'EOF' == typ: 249 resulttokens.append(token) 250 break 251 if u'{' == val: 252 brace += 1 253 elif u'}' == val: 254 brace -= 1 255 elif u'[' == val: 256 bracket += 1 257 elif u']' == val: 258 bracket -= 1 259 # function( or single ( 260 elif u'(' == val or \ 261 Base._prods.FUNCTION == typ: 262 parant += 1 263 elif u')' == val: 264 parant -= 1 265 266 resulttokens.append(token) 267 268 if (brace == bracket == parant == 0) and ( 269 val in ends or typ in endtypes): 270 break 271 elif mediaqueryendonly and brace == -1 and ( 272 bracket == parant == 0) and typ in endtypes: 273 # mediaqueryendonly with STRING 274 break 275 276 if separateEnd: 277 # TODO: use this method as generator, then this makes sense 278 if resulttokens: 279 return resulttokens[:-1], resulttokens[-1] 280 else: 281 return resulttokens, None 282 else: 283 return resulttokens
284
285 - def _valuestr(self, t):
286 """ 287 returns string value of t (t may be a string, a list of token tuples 288 or a single tuple in format (type, value, line, col). 289 Mainly used to get a string value of t for error messages. 290 """ 291 if not t: 292 return u'' 293 elif isinstance(t, basestring): 294 return t 295 else: 296 return u''.join([x[1] for x in t])
297
298 - def _adddefaultproductions(self, productions, new=None):
299 """ 300 adds default productions if not already present, used by 301 _parse only 302 303 each production should return the next expected token 304 normaly a name like "uri" or "EOF" 305 some have no expectation like S or COMMENT, so simply return 306 the current value of self.__expected 307 """ 308 def ATKEYWORD(expected, seq, token, tokenizer=None): 309 "TODO: add default impl for unexpected @rule?" 310 if expected != 'EOF': 311 # TODO: parentStyleSheet=self 312 rule = cssutils.css.CSSUnknownRule() 313 rule.cssText = self._tokensupto2(tokenizer, token) 314 if rule.wellformed: 315 seq.append(rule) 316 return expected 317 else: 318 new['wellformed'] = False 319 self._log.error(u'Expected EOF.', token=token) 320 return expected
321 322 def COMMENT(expected, seq, token, tokenizer=None): 323 "default implementation for COMMENT token adds CSSCommentRule" 324 seq.append(cssutils.css.CSSComment([token])) 325 return expected 326 327 def S(expected, seq, token, tokenizer=None): 328 "default implementation for S token, does nothing" 329 return expected 330 331 def EOF(expected=None, seq=None, token=None, tokenizer=None): 332 "default implementation for EOF token" 333 return 'EOF' 334 335 p = {'ATKEYWORD': ATKEYWORD, 336 'COMMENT': COMMENT, 337 'S': S, 338 'EOF': EOF # only available if fullsheet 339 } 340 p.update(productions) 341 return p 342
343 - def _parse(self, expected, seq, tokenizer, productions, default=None, 344 new=None):
345 """ 346 puts parsed tokens in seq by calling a production with 347 (seq, tokenizer, token) 348 349 expected 350 a name what token or value is expected next, e.g. 'uri' 351 seq 352 to add rules etc to 353 tokenizer 354 call tokenizer.next() to get next token 355 productions 356 callbacks {tokentype: callback} 357 default 358 default callback if tokentype not in productions 359 new 360 used to init default productions 361 362 returns (wellformed, expected) which the last prod might have set 363 """ 364 wellformed = True 365 if tokenizer: 366 prods = self._adddefaultproductions(productions, new) 367 for token in tokenizer: 368 p = prods.get(token[0], default) 369 if p: 370 expected = p(expected, seq, token, tokenizer) 371 else: 372 wellformed = False 373 self._log.error(u'Unexpected token (%s, %s, %s, %s)' % token) 374 return wellformed, expected
375
376 377 -class Base2(Base):
378 """ 379 Base class for new seq handling, used by Selector for now only 380 """
381 - def __init__(self):
382 self._seq = Seq()
383
384 - def _setSeq(self, newseq):
385 """ 386 sets newseq and makes it readonly 387 """ 388 newseq._readonly = True 389 self._seq = newseq
390 391 seq = property(lambda self: self._seq, doc="seq for most classes") 392
393 - def _tempSeq(self, readonly=False):
394 "get a writeable Seq() which is added later" 395 return Seq(readonly=readonly)
396
397 - def _adddefaultproductions(self, productions, new=None):
398 """ 399 adds default productions if not already present, used by 400 _parse only 401 402 each production should return the next expected token 403 normaly a name like "uri" or "EOF" 404 some have no expectation like S or COMMENT, so simply return 405 the current value of self.__expected 406 """ 407 def ATKEYWORD(expected, seq, token, tokenizer=None): 408 "default impl for unexpected @rule" 409 if expected != 'EOF': 410 # TODO: parentStyleSheet=self 411 rule = cssutils.css.CSSUnknownRule() 412 rule.cssText = self._tokensupto2(tokenizer, token) 413 if rule.wellformed: 414 seq.append(rule, cssutils.css.CSSRule.UNKNOWN_RULE, 415 line=token[2], col=token[3]) 416 return expected 417 else: 418 new['wellformed'] = False 419 self._log.error(u'Expected EOF.', token=token) 420 return expected
421 422 def COMMENT(expected, seq, token, tokenizer=None): 423 "default impl, adds CSSCommentRule if not token == EOF" 424 if expected == 'EOF': 425 new['wellformed'] = False 426 self._log.error(u'Expected EOF but found comment.', token=token) 427 seq.append(cssutils.css.CSSComment([token]), 'COMMENT') 428 return expected
429 430 def S(expected, seq, token, tokenizer=None): 431 "default impl, does nothing if not token == EOF" 432 if expected == 'EOF': 433 new['wellformed'] = False 434 self._log.error(u'Expected EOF but found whitespace.', token=token) 435 return expected 436 437 def EOF(expected=None, seq=None, token=None, tokenizer=None): 438 "default implementation for EOF token" 439 return 'EOF' 440 441 defaultproductions = {'ATKEYWORD': ATKEYWORD, 442 'COMMENT': COMMENT, 443 'S': S, 444 'EOF': EOF # only available if fullsheet 445 } 446 defaultproductions.update(productions) 447 return defaultproductions 448
449 450 -class Seq(object):
451 """ 452 property seq of Base2 inheriting classes, holds a list of Item objects. 453 454 used only by Selector for now 455 456 is normally readonly, only writable during parsing 457 """
458 - def __init__(self, readonly=True):
459 """ 460 only way to write to a Seq is to initialize it with new items 461 each itemtuple has (value, type, line) where line is optional 462 """ 463 self._seq = [] 464 self._readonly = readonly
465
466 - def __delitem__(self, i):
467 del self._seq[i]
468
469 - def __getitem__(self, i):
470 return self._seq[i]
471
472 - def __setitem__(self, i, (val, typ, line, col)):
473 self._seq[i] = Item(val, typ, line, col)
474
475 - def __iter__(self):
476 return iter(self._seq)
477
478 - def __len__(self):
479 return len(self._seq)
480
481 - def append(self, val, typ, line=None, col=None):
482 "if not readonly add new Item()" 483 if self._readonly: 484 raise AttributeError('Seq is readonly.') 485 else: 486 self._seq.append(Item(val, typ, line, col))
487
488 - def appendItem(self, item):
489 "if not readonly add item which must be an Item" 490 if self._readonly: 491 raise AttributeError('Seq is readonly.') 492 else: 493 self._seq.append(item)
494
495 - def replace(self, index=-1, val=None, typ=None, line=None, col=None):
496 """ 497 if not readonly replace Item at index with new Item or 498 simply replace value or type 499 """ 500 if self._readonly: 501 raise AttributeError('Seq is readonly.') 502 else: 503 self._seq[index] = Item(val, typ, line, col)
504
505 - def __repr__(self):
506 "returns a repr same as a list of tuples of (value, type)" 507 return u'cssutils.%s.%s([\n %s])' % (self.__module__, 508 self.__class__.__name__, 509 u',\n '.join([u'(%r, %r)' % (item.type, item.value) 510 for item in self._seq] 511 ))
512 - def __str__(self):
513 return "<cssutils.%s.%s object length=%r at 0x%x>" % ( 514 self.__module__, self.__class__.__name__, len(self), id(self))
515
516 -class Item(object):
517 """ 518 an item in the seq list of classes (successor to tuple items in old seq) 519 520 each item has attributes: 521 522 type 523 a sematic type like "element", "attribute" 524 value 525 the actual value which may be a string, number etc or an instance 526 of e.g. a CSSComment 527 *line* 528 **NOT IMPLEMENTED YET, may contain the line in the source later** 529 """
530 - def __init__(self, value, type, line=None, col=None):
531 self.__value = value 532 self.__type = type 533 self.__line = line 534 self.__col = col
535 536 type = property(lambda self: self.__type) 537 value = property(lambda self: self.__value) 538 line = property(lambda self: self.__line) 539 col = property(lambda self: self.__col) 540
541 - def __repr__(self):
542 return "%s.%s(value=%r, type=%r, line=%r, col=%r)" % ( 543 self.__module__, self.__class__.__name__, 544 self.__value, self.__type, self.__line, self.__col)
545
546 547 -class ListSeq(object):
548 """ 549 (EXPERIMENTAL) 550 A base class used for list classes like css.SelectorList or 551 stylesheets.MediaList 552 553 adds list like behaviour running on inhering class' property ``seq`` 554 555 - item in x => bool 556 - len(x) => integer 557 - get, set and del x[i] 558 - for item in x 559 - append(item) 560 561 some methods must be overwritten in inheriting class 562 """
563 - def __init__(self):
564 self.seq = [] # does not need to use ``Seq`` as simple list only
565
566 - def __contains__(self, item):
567 return item in self.seq
568
569 - def __delitem__(self, index):
570 del self.seq[index]
571
572 - def __getitem__(self, index):
573 return self.seq[index]
574
575 - def __iter__(self):
576 def gen(): 577 for x in self.seq: 578 yield x
579 return gen()
580
581 - def __len__(self):
582 return len(self.seq)
583
584 - def __setitem__(self, index, item):
585 "must be overwritten" 586 raise NotImplementedError
587
588 - def append(self, item):
589 "must be overwritten" 590 raise NotImplementedError
591
592 593 -class Deprecated(object):
594 """This is a decorator which can be used to mark functions 595 as deprecated. It will result in a warning being emitted 596 when the function is used. 597 598 It accepts a single paramter ``msg`` which is shown with the warning. 599 It should contain information which function or method to use instead. 600 """
601 - def __init__(self, msg):
602 self.msg = msg
603
604 - def __call__(self, func):
605 def newFunc(*args, **kwargs): 606 import warnings 607 warnings.warn("Call to deprecated method %r. %s" % 608 (func.__name__, self.msg), 609 category=DeprecationWarning, 610 stacklevel=2) 611 return func(*args, **kwargs)
612 newFunc.__name__ = func.__name__ 613 newFunc.__doc__ = func.__doc__ 614 newFunc.__dict__.update(func.__dict__) 615 return newFunc
616
617 618 -class _Namespaces(object):
619 """ 620 A dictionary like wrapper for @namespace rules used in a CSSStyleSheet. 621 Works on effective namespaces, so e.g. if:: 622 623 @namespace p1 "uri"; 624 @namespace p2 "uri"; 625 626 only the second rule is effective and kept. 627 628 namespaces 629 a dictionary {prefix: namespaceURI} containing the effective namespaces 630 only. These are the latest set in the CSSStyleSheet. 631 parentStyleSheet 632 the parent CSSStyleSheet 633 """
634 - def __init__(self, parentStyleSheet, *args):
635 "no initial values are set, only the relevant sheet is" 636 self.parentStyleSheet = parentStyleSheet
637
638 - def __contains__(self, prefix):
639 return prefix in self.namespaces
640
641 - def __delitem__(self, prefix):
642 """deletes CSSNamespaceRule(s) with rule.prefix == prefix 643 644 prefix '' and None are handled the same 645 """ 646 if not prefix: 647 prefix = u'' 648 delrule = self.__findrule(prefix) 649 for i, rule in enumerate(ifilter(lambda r: r.type == r.NAMESPACE_RULE, 650 self.parentStyleSheet.cssRules)): 651 if rule == delrule: 652 self.parentStyleSheet.deleteRule(i) 653 return 654 655 raise xml.dom.NamespaceErr('Prefix %r not found.' % prefix)
656
657 - def __getitem__(self, prefix):
658 try: 659 return self.namespaces[prefix] 660 except KeyError, e: 661 raise xml.dom.NamespaceErr('Prefix %r not found.' % prefix)
662
663 - def __iter__(self):
664 return self.namespaces.__iter__()
665
666 - def __len__(self):
667 return len(self.namespaces)
668
669 - def __setitem__(self, prefix, namespaceURI):
670 "replaces prefix or sets new rule, may raise NoModificationAllowedErr" 671 if not prefix: 672 prefix = u'' # None or '' 673 rule = self.__findrule(prefix) 674 if not rule: 675 self.parentStyleSheet.insertRule(cssutils.css.CSSNamespaceRule( 676 prefix=prefix, 677 namespaceURI=namespaceURI), 678 inOrder=True) 679 else: 680 if prefix in self.namespaces: 681 rule.namespaceURI = namespaceURI # raises NoModificationAllowedErr 682 if namespaceURI in self.namespaces.values(): 683 rule.prefix = prefix
684
685 - def __findrule(self, prefix):
686 # returns namespace rule where prefix == key 687 for rule in ifilter(lambda r: r.type == r.NAMESPACE_RULE, 688 reversed(self.parentStyleSheet.cssRules)): 689 if rule.prefix == prefix: 690 return rule
691
692 - def __getNamespaces(self):
693 namespaces = {} 694 for rule in ifilter(lambda r: r.type == r.NAMESPACE_RULE, 695 reversed(self.parentStyleSheet.cssRules)): 696 if rule.namespaceURI not in namespaces.values(): 697 namespaces[rule.prefix] = rule.namespaceURI 698 return namespaces
699 700 namespaces = property(__getNamespaces, 701 doc=u'Holds only effective @namespace rules in self.parentStyleSheets' 702 '@namespace rules.') 703
704 - def get(self, prefix, default):
705 return self.namespaces.get(prefix, default)
706
707 - def items(self):
708 return self.namespaces.items()
709
710 - def keys(self):
711 return self.namespaces.keys()
712
713 - def values(self):
714 return self.namespaces.values()
715
716 - def prefixForNamespaceURI(self, namespaceURI):
717 """ 718 returns effective prefix for given namespaceURI or raises IndexError 719 if this cannot be found""" 720 for prefix, uri in self.namespaces.items(): 721 if uri == namespaceURI: 722 return prefix 723 raise IndexError(u'NamespaceURI %r not found.' % namespaceURI)
724
725 - def __str__(self):
726 return u"<cssutils.util.%s object parentStyleSheet=%r namespaces=%r "\ 727 u"at 0x%x>" % ( 728 self.__class__.__name__, str(self.parentStyleSheet), 729 self.namespaces, id(self))
730
731 732 -class _SimpleNamespaces(_Namespaces):
733 """ 734 namespaces used in objects like Selector as long as they are not connected 735 to a CSSStyleSheet 736 """
737 - def __init__(self, *args):
738 self.__namespaces = dict(*args)
739
740 - def __setitem__(self, prefix, namespaceURI):
741 self.__namespaces[prefix] = namespaceURI
742 743 namespaces = property(lambda self: self.__namespaces, 744 doc=u'Dict Wrapper for self.sheets @namespace rules.') 745
746 - def __str__(self):
747 return u"<cssutils.util.%s object namespaces=%r at 0x%x>" % ( 748 self.__class__.__name__, self.namespaces, id(self))
749
750 - def __repr__(self):
751 return u"cssutils.util.%s(%r)" % (self.__class__.__name__, 752 self.namespaces)
753
754 755 -def _defaultFetcher(url):
756 """Retrieve data from ``url``. cssutils default implementation of fetch 757 URL function. 758 759 Returns ``(encoding, string)`` or ``None`` 760 """ 761 try: 762 res = urllib2.urlopen(url) 763 except ValueError, e: 764 # invalid url, e.g. "1" 765 cssutils.log.warn(u'ValueError, %s' % e.message, error=ValueError) 766 except urllib2.HTTPError, e: 767 # http error, e.g. 404, e can be raised 768 cssutils.log.warn(u'HTTPError opening url=%r: %s %s' % 769 (url, e.code, e.msg), error=e) 770 except urllib2.URLError, e: 771 # URLError like mailto: or other IO errors, e can be raised 772 cssutils.log.warn(u'URLError, %s' % e.reason, error=e) 773 else: 774 if res: 775 mimeType, encoding = encutils.getHTTPInfo(res) 776 if mimeType != u'text/css': 777 cssutils.log.warn(u'Expected "text/css" mime type for url=%s but found: %r' % 778 (url, mimeType), error=ValueError) 779 return encoding, res.read()
780
781 -def _readUrl(url, fetcher=None, overrideEncoding=None, parentEncoding=None):
782 """ 783 Read cssText from url and decode it using all relevant methods (HTTP 784 header, BOM, @charset). Returns encoding (which is needed to set encoding 785 of stylesheet properly) and decoded text 786 787 ``fetcher`` 788 see cssutils.registerFetchUrl for details 789 ``overrideEncoding`` 790 If given this encoding is used and all other encoding information is 791 ignored (HTTP, BOM etc) 792 ``parentEncoding`` 793 Encoding of parent stylesheet (while e.g. reading @import references sheets) 794 or document if available. 795 796 Priority or encoding information 797 -------------------------------- 798 799 0. **cssutils only**: overrideEncoding 800 1. An HTTP "charset" parameter in a "Content-Type" field (or similar parameters in other protocols) 801 2. BOM and/or @charset (see below) 802 3. <link charset=""> or other metadata from the linking mechanism (if any) 803 4. charset of referring style sheet or document (if any) 804 5. Assume UTF-8 805 806 """ 807 if not fetcher: 808 fetcher = _defaultFetcher 809 r = fetcher(url) 810 if r and len(r) == 2 and r[1] is not None: 811 httpEncoding, content = r 812 UTF8_BOM = u'\xEF\xBB\xBF' 813 814 if overrideEncoding: 815 # 0. override encoding 816 encoding = overrideEncoding 817 elif httpEncoding: 818 # 1. HTTP 819 encoding = httpEncoding 820 else: 821 try: 822 if content.startswith(u'@charset "utf-8";') or \ 823 content.startswith(UTF8_BOM + u'@charset "utf-8";'): 824 # 2. BOM/@charset: explicitly UTF-8 825 contentEncoding = 'utf-8' 826 else: 827 # other encoding with ascii content as not UnicodeDecodeError 828 contentEncoding = False 829 except UnicodeDecodeError, e: 830 # other encoding in any way (with other than ascii content) 831 contentEncoding = False 832 833 if contentEncoding: 834 encoding = contentEncoding 835 else: 836 # contentEncoding may be UTF-8 but this may not be explicit 837 contentEncoding = cssutils.codec._detectencoding_str(content) 838 # contentEncoding may be None for empty string! 839 if contentEncoding and contentEncoding != 'utf-8': 840 # 2. BOM/@charset: explicitly not UTF-8 841 encoding = contentEncoding 842 else: 843 # 4. parent stylesheet or document 844 # may also be None in which case 5. is used in next step anyway 845 encoding = parentEncoding 846 try: 847 # encoding may still be wrong if encoding *is lying*! 848 decodedContent = codecs.lookup("css")[1](content, encoding=encoding)[0] 849 except UnicodeDecodeError, e: 850 decodedContent = None 851 852 return encoding, decodedContent 853 else: 854 return None, None
855