1 """base classes for css and stylesheets packages
2
3 **this test class does not run standalone!**
4 see _readURL() to fix this temporarily
5
6 """
7 __all__ = []
8 __docformat__ = 'restructuredtext'
9 __version__ = '$Id: util.py 1159 2008-03-19 20:23:13Z cthedot $'
10
11
12 import codecs
13 from itertools import ifilter
14 import re
15 import types
16 import urllib2
17 import xml.dom
18 import cssutils
19 import encutils
20 from tokenize2 import Tokenizer
21
22 -class Base(object):
23 """
24 Base class for most CSS and StyleSheets classes
25
26 **Superceded by Base2 which is used for new seq handling class.**
27 See cssutils.util.Base2
28
29 Contains helper methods for inheriting classes helping parsing
30
31 ``_normalize`` is static as used by Preferences.
32 """
33 __tokenizer2 = Tokenizer()
34
35 _log = cssutils.log
36 _prods = cssutils.tokenize2.CSSProductions
37
38
39
40
41 _SHORTHANDPROPERTIES = {
42 u'background': [],
43 u'border': [],
44 u'border-left': [],
45 u'border-right': [],
46 u'border-top': [],
47 u'border-bottom': [],
48 u'border-color': [],
49 u'border-style': [],
50 u'border-width': [],
51 u'cue': [],
52 u'font': [],
53
54
55
56
57 u'list-style': [],
58 u'margin': [],
59 u'outline': [],
60 u'padding': [],
61 u'pause': []
62 }
63
64
65 __escapes = re.compile(ur'(\\[^0-9a-fA-F])').sub
66
67 __unicodes = re.compile(ur'\\[0-9a-fA-F]{1,6}[\t|\r|\n|\f|\x20]?').sub
68
69 @staticmethod
71 """
72 normalizes x, namely:
73
74 - remove any \ before non unicode sequences (0-9a-zA-Z) so for
75 x=="c\olor\" return "color" (unicode escape sequences should have
76 been resolved by the tokenizer already)
77 - lowercase
78 """
79 if x:
80 def removeescape(matchobj):
81 return matchobj.group(0)[1:]
82 x = Base.__escapes(removeescape, x)
83 return x.lower()
84 else:
85 return x
86
88 "raises xml.dom.NoModificationAllowedErr if rule/... is readonly"
89 if hasattr(self, '_readonly') and self._readonly:
90 raise xml.dom.NoModificationAllowedErr(
91 u'%s is readonly.' % self.__class__)
92 return True
93 return False
94
96 """
97 returns tuple (text, dict-of-namespaces) or if no namespaces are
98 in cssText returns (cssText, {})
99
100 used in Selector, SelectorList, CSSStyleRule, CSSMediaRule and
101 CSSStyleSheet
102 """
103 if isinstance(text_namespaces_tuple, tuple):
104 return text_namespaces_tuple[0], _SimpleNamespaces(
105 text_namespaces_tuple[1])
106 else:
107 return text_namespaces_tuple, _SimpleNamespaces()
108
110 """
111 returns tokens of textortokens which may already be tokens in which
112 case simply returns input
113 """
114 if not textortokens:
115 return None
116 elif isinstance(textortokens, basestring):
117
118 return self.__tokenizer2.tokenize(
119 textortokens)
120 elif types.GeneratorType == type(textortokens):
121
122 return textortokens
123 elif isinstance(textortokens, tuple):
124
125 return [textortokens]
126 else:
127
128 return (x for x in textortokens)
129
131 "returns next token in generator tokenizer or the default value"
132 try:
133 return tokenizer.next()
134 except (StopIteration, AttributeError):
135 return default
136
138 "returns type of Tokenizer token"
139 if token:
140 return token[0]
141 else:
142 return None
143
145 "returns value of Tokenizer token"
146 if token and normalize:
147 return Base._normalize(token[1])
148 elif token:
149 return token[1]
150 else:
151 return None
152
154 """
155 for STRING returns the actual content without surrounding "" or ''
156 and without respective escapes, e.g.::
157
158 "with \" char" => with " char
159 """
160 if token:
161 value = token[1]
162 return value.replace('\\'+value[0], value[0])[1:-1]
163 else:
164 return None
165
167 """
168 for URI returns the actual content without surrounding url()
169 or url(""), url('') and without respective escapes, e.g.::
170
171 url("\"") => "
172 """
173 if token:
174 value = token[1][4:-1].strip()
175 if (value[0] in '\'"') and (value[0] == value[-1]):
176
177 value = value.replace('\\'+value[0], value[0])[1:-1]
178 return value
179 else:
180 return None
181
182 - def _tokensupto2(self,
183 tokenizer,
184 starttoken=None,
185 blockstartonly=False,
186 blockendonly=False,
187 mediaendonly=False,
188 importmediaqueryendonly=False,
189 mediaqueryendonly=False,
190 semicolon=False,
191 propertynameendonly=False,
192 propertyvalueendonly=False,
193 propertypriorityendonly=False,
194 selectorattendonly=False,
195 funcendonly=False,
196 listseponly=False,
197 separateEnd=False
198 ):
199 """
200 returns tokens upto end of atrule and end index
201 end is defined by parameters, might be ; } ) or other
202
203 default looks for ending "}" and ";"
204 """
205 ends = u';}'
206 endtypes = ()
207 brace = bracket = parant = 0
208
209 if blockstartonly:
210 ends = u'{'
211 brace = -1
212 elif blockendonly:
213 ends = u'}'
214 brace = 1
215 elif mediaendonly:
216 ends = u'}'
217 brace = 1
218 elif importmediaqueryendonly:
219
220 ends = u';'
221 endtypes = ('STRING',)
222 elif mediaqueryendonly:
223
224
225 ends = u'{'
226 brace = -1
227 endtypes = ('STRING',)
228 elif semicolon:
229 ends = u';'
230 elif propertynameendonly:
231 ends = u':;'
232 elif propertyvalueendonly:
233 ends = u';!'
234 elif propertypriorityendonly:
235 ends = u';'
236 elif selectorattendonly:
237 ends = u']'
238 if starttoken and self._tokenvalue(starttoken) == u'[':
239 bracket = 1
240 elif funcendonly:
241 ends = u')'
242 parant = 1
243 elif listseponly:
244 ends = u','
245
246 resulttokens = []
247 if starttoken:
248 resulttokens.append(starttoken)
249 if tokenizer:
250 for token in tokenizer:
251 typ, val, line, col = token
252 if 'EOF' == typ:
253 resulttokens.append(token)
254 break
255 if u'{' == val:
256 brace += 1
257 elif u'}' == val:
258 brace -= 1
259 elif u'[' == val:
260 bracket += 1
261 elif u']' == val:
262 bracket -= 1
263
264 elif u'(' == val or \
265 Base._prods.FUNCTION == typ:
266 parant += 1
267 elif u')' == val:
268 parant -= 1
269
270 resulttokens.append(token)
271
272 if (brace == bracket == parant == 0) and (
273 val in ends or typ in endtypes):
274 break
275 elif mediaqueryendonly and brace == -1 and (
276 bracket == parant == 0) and typ in endtypes:
277
278 break
279
280 if separateEnd:
281
282 if resulttokens:
283 return resulttokens[:-1], resulttokens[-1]
284 else:
285 return resulttokens, None
286 else:
287 return resulttokens
288
290 """
291 returns string value of t (t may be a string, a list of token tuples
292 or a single tuple in format (type, value, line, col).
293 Mainly used to get a string value of t for error messages.
294 """
295 if not t:
296 return u''
297 elif isinstance(t, basestring):
298 return t
299 else:
300 return u''.join([x[1] for x in t])
301
303 """
304 adds default productions if not already present, used by
305 _parse only
306
307 each production should return the next expected token
308 normaly a name like "uri" or "EOF"
309 some have no expectation like S or COMMENT, so simply return
310 the current value of self.__expected
311 """
312 def ATKEYWORD(expected, seq, token, tokenizer=None):
313 "TODO: add default impl for unexpected @rule?"
314 return expected
315
316 def COMMENT(expected, seq, token, tokenizer=None):
317 "default implementation for COMMENT token adds CSSCommentRule"
318 seq.append(cssutils.css.CSSComment([token]))
319 return expected
320
321 def S(expected, seq, token, tokenizer=None):
322 "default implementation for S token, does nothing"
323 return expected
324
325 def EOF(expected=None, seq=None, token=None, tokenizer=None):
326 "default implementation for EOF token"
327 return 'EOF'
328
329 p = {'ATKEYWORD': ATKEYWORD,
330 'COMMENT': COMMENT,
331 'S': S,
332 'EOF': EOF
333 }
334 p.update(productions)
335 return p
336
337 - def _parse(self, expected, seq, tokenizer, productions, default=None,
338 new=None):
339 """
340 puts parsed tokens in seq by calling a production with
341 (seq, tokenizer, token)
342
343 expected
344 a name what token or value is expected next, e.g. 'uri'
345 seq
346 to add rules etc to
347 tokenizer
348 call tokenizer.next() to get next token
349 productions
350 callbacks {tokentype: callback}
351 default
352 default callback if tokentype not in productions
353 new
354 used to init default productions
355
356 returns (wellformed, expected) which the last prod might have set
357 """
358 wellformed = True
359 if tokenizer:
360 prods = self._adddefaultproductions(productions, new)
361 for token in tokenizer:
362 p = prods.get(token[0], default)
363 if p:
364 expected = p(expected, seq, token, tokenizer)
365 else:
366 wellformed = False
367 self._log.error(u'Unexpected token (%s, %s, %s, %s)' % token)
368 return wellformed, expected
369
372 """
373 Base class for new seq handling, used by Selector for now only
374 """
377
379 """
380 sets newseq and makes it readonly
381 """
382 newseq._readonly = True
383 self._seq = newseq
384
385 seq = property(lambda self: self._seq, doc="seq for most classes")
386
388 "get a writeable Seq() which is added later"
389 return Seq(readonly=readonly)
390
392 """
393 adds default productions if not already present, used by
394 _parse only
395
396 each production should return the next expected token
397 normaly a name like "uri" or "EOF"
398 some have no expectation like S or COMMENT, so simply return
399 the current value of self.__expected
400 """
401 def ATKEYWORD(expected, seq, token, tokenizer=None):
402 "default impl for unexpected @rule"
403 if expected != 'EOF':
404
405 rule = cssutils.css.CSSUnknownRule()
406 rule.cssText = self._tokensupto2(tokenizer, token)
407 if rule.wellformed:
408 seq.append(rule, cssutils.css.CSSRule.UNKNOWN_RULE,
409 line=token[2], col=token[3])
410 return expected
411 else:
412 new['wellformed'] = False
413 self._log.error(u'Expected EOF.', token=token)
414 return expected
415
416 def COMMENT(expected, seq, token, tokenizer=None):
417 "default impl, adds CSSCommentRule if not token == EOF"
418 if expected == 'EOF':
419 new['wellformed'] = False
420 self._log.error(u'Expected EOF but found comment.', token=token)
421 seq.append(cssutils.css.CSSComment([token]), 'COMMENT')
422 return expected
423
424 def S(expected, seq, token, tokenizer=None):
425 "default impl, does nothing if not token == EOF"
426 if expected == 'EOF':
427 new['wellformed'] = False
428 self._log.error(u'Expected EOF but found whitespace.', token=token)
429 return expected
430
431 def EOF(expected=None, seq=None, token=None, tokenizer=None):
432 "default implementation for EOF token"
433 return 'EOF'
434
435 defaultproductions = {'ATKEYWORD': ATKEYWORD,
436 'COMMENT': COMMENT,
437 'S': S,
438 'EOF': EOF
439 }
440 defaultproductions.update(productions)
441 return defaultproductions
442
443
444 -class Seq(object):
445 """
446 property seq of Base2 inheriting classes, holds a list of Item objects.
447
448 used only by Selector for now
449
450 is normally readonly, only writable during parsing
451 """
453 """
454 only way to write to a Seq is to initialize it with new items
455 each itemtuple has (value, type, line) where line is optional
456 """
457 self._seq = []
458 self._readonly = readonly
459
462
465
468
470 return iter(self._seq)
471
473 return len(self._seq)
474
475 - def append(self, val, typ, line=None, col=None):
476 "if not readonly add new Item()"
477 if self._readonly:
478 raise AttributeError('Seq is readonly.')
479 else:
480 self._seq.append(Item(val, typ, line, col))
481
482 - def replace(self, index=-1, val=None, typ=None, line=None, col=None):
483 """
484 if not readonly replace Item at index with new Item or
485 simply replace value or type
486 """
487 if self._readonly:
488 raise AttributeError('Seq is readonly.')
489 else:
490 self._seq[index] = Item(val, typ, line, col)
491
493 "returns a repr same as a list of tuples of (value, type)"
494 return u'cssutils.%s.%s([\n %s])' % (self.__module__,
495 self.__class__.__name__,
496 u',\n '.join([u'(%r, %r)' % (item.type, item.value)
497 for item in self._seq]
498 ))
500 return "<cssutils.%s.%s object length=%r at 0x%x>" % (
501 self.__module__, self.__class__.__name__, len(self), id(self))
502
504 """
505 an item in the seq list of classes (successor to tuple items in old seq)
506
507 each item has attributes:
508
509 type
510 a sematic type like "element", "attribute"
511 value
512 the actual value which may be a string, number etc or an instance
513 of e.g. a CSSComment
514 *line*
515 **NOT IMPLEMENTED YET, may contain the line in the source later**
516 """
517 - def __init__(self, value, type, line=None, col=None):
518 self.__value = value
519 self.__type = type
520 self.__line = line
521 self.__col = col
522
523 type = property(lambda self: self.__type)
524 value = property(lambda self: self.__value)
525 line = property(lambda self: self.__line)
526 col = property(lambda self: self.__col)
527
529 return "%s.%s(value=%r, type=%r, line=%r, col=%r)" % (
530 self.__module__, self.__class__.__name__,
531 self.__value, self.__type, self.__line, self.__col)
532
535 """
536 (EXPERIMENTAL)
537 A base class used for list classes like css.SelectorList or
538 stylesheets.MediaList
539
540 adds list like behaviour running on inhering class' property ``seq``
541
542 - item in x => bool
543 - len(x) => integer
544 - get, set and del x[i]
545 - for item in x
546 - append(item)
547
548 some methods must be overwritten in inheriting class
549 """
552
555
558
560 return self.seq[index]
561
563 def gen():
564 for x in self.seq:
565 yield x
566 return gen()
567
570
572 "must be overwritten"
573 raise NotImplementedError
574
576 "must be overwritten"
577 raise NotImplementedError
578
581 """This is a decorator which can be used to mark functions
582 as deprecated. It will result in a warning being emitted
583 when the function is used.
584
585 It accepts a single paramter ``msg`` which is shown with the warning.
586 It should contain information which function or method to use instead.
587 """
590
592 def newFunc(*args, **kwargs):
593 import warnings
594 warnings.warn("Call to deprecated method %r. %s" %
595 (func.__name__, self.msg),
596 category=DeprecationWarning,
597 stacklevel=2)
598 return func(*args, **kwargs)
599 newFunc.__name__ = func.__name__
600 newFunc.__doc__ = func.__doc__
601 newFunc.__dict__.update(func.__dict__)
602 return newFunc
603
606 """
607 A dictionary like wrapper for @namespace rules used in a CSSStyleSheet.
608 Works on effective namespaces, so e.g. if::
609
610 @namespace p1 "uri";
611 @namespace p2 "uri";
612
613 only the second rule is effective and kept.
614
615 namespaces
616 a dictionary {prefix: namespaceURI} containing the effective namespaces
617 only. These are the latest set in the CSSStyleSheet.
618 parentStyleSheet
619 the parent CSSStyleSheet
620 """
621 - def __init__(self, parentStyleSheet, *args):
624
627
643
645 try:
646 return self.namespaces[prefix]
647 except KeyError, e:
648 raise xml.dom.NamespaceErr('Prefix %r not found.' % prefix)
649
652
655
671
678
686
687 namespaces = property(__getNamespaces,
688 doc=u'Holds only effective @namespace rules in self.parentStyleSheets'
689 '@namespace rules.')
690
691 - def get(self, prefix, default):
693
696
699
702
704 """
705 returns effective prefix for given namespaceURI or raises IndexError
706 if this cannot be found"""
707 for prefix, uri in self.namespaces.items():
708 if uri == namespaceURI:
709 return prefix
710 raise IndexError(u'NamespaceURI %r not found.' % namespaceURI)
711
713 return u"<cssutils.util.%s object parentStyleSheet=%r namespaces=%r "\
714 u"at 0x%x>" % (
715 self.__class__.__name__, str(self.parentStyleSheet),
716 self.namespaces, id(self))
717
720 """
721 namespaces used in objects like Selector as long as they are not connected
722 to a CSSStyleSheet
723 """
725 self.__namespaces = dict(*args)
726
729
730 namespaces = property(lambda self: self.__namespaces,
731 doc=u'Dict Wrapper for self.sheets @namespace rules.')
732
734 return u"<cssutils.util.%s object namespaces=%r at 0x%x>" % (
735 self.__class__.__name__, self.namespaces, id(self))
736
739 """Retrieve text from url using explicit or detected encoding via encutils
740 """
741 try:
742 req = urllib2.Request(url)
743 res = urllib2.urlopen(req)
744 except ValueError, e:
745
746 cssutils.log.warn(u'Error opening url=%r: %s' % (url, e.message),
747 error=ValueError)
748 except urllib2.HTTPError, e:
749
750 cssutils.log.warn(u'Error opening url=%r: %s %s' % (url, e.code, e.msg),
751 error=e)
752 except urllib2.URLError, e:
753
754 cssutils.log.warn(u'Error opening url=%r: %s' % (url, e.reason),
755 error=e)
756 else:
757 if res:
758
759 url = res.geturl()
760 if not encoding:
761
762 media_type, encoding = encutils.getHTTPInfo(res)
763 if media_type != u'text/css':
764 self._log.warn(u'Unexpected media type opening url=%s: %r != "text/css"' %
765 (url, media_type))
766 try:
767 return codecs.getreader(encoding)(res).read()
768 except urllib2.HTTPError, e:
769
770 cssutils.log.warn(u'Error reading url=%r: %s %s' % (url, e.code, e.msg),
771 error=e)
772 except urllib2.URLError, e:
773 cssutils.log.warn(u'Error reading url=%r: %s' % (url, e.reason),
774 error=e)
775 except Exception, e:
776 cssutils.log.warn(u'Error reading url=%r: %r' % (url, e))
777