Package cssutils :: Package tests :: Module test_tokenize2
[hide private]
[frames] | no frames]

Source Code for Module cssutils.tests.test_tokenize2

  1  # -*- coding: utf-8 -*- 
  2  """testcases for new cssutils.tokenize.Tokenizer 
  3   
  4  TODO:: 
  5   
  6      - escape ends with explicit space but \r\n as single space 
  7      - ur'"\""': [('STRING', ur'"\""', 1, 1)], 
  8      - font-face with escaped "-" 
  9   
 10  + old tests as new ones are **not complete**! 
 11  """ 
 12  __author__ = '$LastChangedBy: cthedot $' 
 13  __date__ = '$LastChangedDate: 2007-09-01 15:56:36 +0200 (Sa, 01 Sep 2007) $' 
 14  __version__ = '$LastChangedRevision: 302 $' 
 15   
 16  import xml.dom 
 17  import basetest 
 18  from cssutils.tokenize2 import * 
 19   
20 -class TokenizerTestCase(basetest.BaseTestCase):
21 22 testsall = { 23 # IDENT 24 u'äöü߀': [('IDENT', u'äöü߀', 1, 1)], 25 u' a ': [('S', u' ', 1, 1), 26 ('IDENT', u'a', 1, 2), 27 ('S', u' ', 1, 3)], 28 u'_a': [('IDENT', u'_a', 1, 1)], 29 u'-a': [('IDENT', u'-a', 1, 1)], 30 u'aA-_\200\377': [('IDENT', u'aA-_\200\377', 1, 1)], 31 u'a1': [('IDENT', u'a1', 1, 1)], 32 # escapes must end with S or max 6 digits: 33 u'\\44 b': [('IDENT', u'Db', 1, 1)], 34 u'\\44 b': [('IDENT', u'D', 1, 1), 35 ('S', u' ', 1, 5), 36 ('IDENT', u'b', 1, 6)], 37 u'\\44\nb': [('IDENT', u'Db', 1, 1)], 38 u'\\44\rb': [('IDENT', u'Db', 1, 1)], 39 u'\\44\fb': [('IDENT', u'Db', 1, 1)], 40 u'\\44\n*': [('IDENT', u'D', 1, 1), 41 ('CHAR', u'*', 2, 1)], 42 u'\\44 a': [('IDENT', u'D', 1, 1), 43 ('S', u' ', 1, 5), 44 ('IDENT', u'a', 1, 6)], 45 # TODO: 46 # Note that this means that a "real" space after the escape sequence 47 # must itself either be escaped or doubled: 48 u'\\44\ x': [('IDENT', u'D\\ x', 1, 1)], 49 u'\\44 ': [('IDENT', u'D', 1, 1), 50 ('S', u' ', 1, 5)], 51 52 ur'\44': [('IDENT', u'D', 1, 1)], 53 ur'\\': [('IDENT', ur'\\', 1, 1)], 54 ur'\{': [('IDENT', ur'\{', 1, 1)], 55 ur'\"': [('IDENT', ur'\"', 1, 1)], 56 ur'\(': [('IDENT', ur'\(', 1, 1)], 57 ur'\1 \22 \333 \4444 \55555 \666666 \777777 7 \7777777': [ 58 ('IDENT', u'\x01"\u0333\u4444\\55555 \\666666 \\777777 7', 1, 1), 59 ('S', ' ', 1, 43), 60 ('IDENT', '\\7777777', 1, 44)], 61 62 63 u'\\1 b': [('IDENT', u'\x01b', 1, 1)], 64 u'\\44 b': [('IDENT', u'Db', 1, 1)], 65 u'\\123 b': [('IDENT', u'\u0123b', 1, 1)], 66 u'\\1234 b': [('IDENT', u'\u1234b', 1, 1)], 67 u'\\12345 b': [('IDENT', u'\\12345 b', 1, 1)], 68 u'\\123456 b': [('IDENT', u'\\123456 b', 1, 1)], 69 u'\\1234567 b': [('IDENT', u'\\1234567', 1, 1), 70 ('S', u' ', 1, 9), 71 ('IDENT', u'b', 1, 10)], 72 u'\\{\\}\\(\\)\\[\\]\\#\\@\\.\\,': 73 [('IDENT', u'\\{\\}\\(\\)\\[\\]\\#\\@\\.\\,', 1, 1)], 74 75 # STRING 76 u' "" ': [('S', u' ', 1, 1), 77 ('STRING', u'""', 1, 2), 78 ('S', u' ', 1, 4)], 79 u' "\'" ': [('S', u' ', 1, 1), 80 ('STRING', u'"\'"', 1, 2), 81 ('S', u' ', 1, 5)], 82 u" '' ": [('S', u' ', 1, 1), 83 ('STRING', u"''", 1, 2), 84 ('S', u' ', 1, 4)], 85 u" '' ": [('S', u' ', 1, 1), 86 ('STRING', u"''", 1, 2), 87 ('S', u' ', 1, 4)], 88 u"'\\\n'": [('STRING', u"'\\\n'", 1, 1)], 89 u"'\\\n\\\n\\\n'": [('STRING', u"'\\\n\\\n\\\n'", 1, 1)], 90 u"'\\\f'": [('STRING', u"'\\\f'", 1, 1)], 91 u"'\\\r'": [('STRING', u"'\\\r'", 1, 1)], 92 u"'\\\r\n'": [('STRING', u"'\\\r\n'", 1, 1)], 93 u"'1\\\n2'": [('STRING', u"'1\\\n2'", 1, 1)], 94 95 # HASH 96 u' #a ': [('S', u' ', 1, 1), 97 ('HASH', u'#a', 1, 2), 98 ('S', u' ', 1, 4)], 99 100 u'#ccc': [('HASH', u'#ccc', 1, 1)], 101 u'#111': [('HASH', u'#111', 1, 1)], 102 u'#a1a1a1': [('HASH', u'#a1a1a1', 1, 1)], 103 u'#1a1a1a': [('HASH', u'#1a1a1a', 1, 1)], 104 105 # NUMBER, for plus see CSS3 106 u' 0 ': [('S', u' ', 1, 1), 107 ('NUMBER', u'0', 1, 2), 108 ('S', u' ', 1, 3)], 109 u' 0.1 ': [('S', u' ', 1, 1), 110 ('NUMBER', u'0.1', 1, 2), 111 ('S', u' ', 1, 5)], 112 u' .0 ': [('S', u' ', 1, 1), 113 ('NUMBER', u'.0', 1, 2), 114 ('S', u' ', 1, 4)], 115 116 u' -0 ': [('S', u' ', 1, 1), 117 ('CHAR', u'-', 1, 2), 118 ('NUMBER', u'0', 1, 3), 119 ('S', u' ', 1, 4)], 120 121 # PERCENTAGE 122 u' 0% ': [('S', u' ', 1, 1), 123 ('PERCENTAGE', u'0%', 1, 2), 124 ('S', u' ', 1, 4)], 125 u' .5% ': [('S', u' ', 1, 1), 126 ('PERCENTAGE', u'.5%', 1, 2), 127 ('S', u' ', 1, 5)], 128 129 # URI 130 u' url() ': [('S', u' ', 1, 1), 131 ('URI', u'url()', 1, 2), 132 ('S', u' ', 1, 7)], 133 u' url(a) ': [('S', u' ', 1, 1), 134 ('URI', u'url(a)', 1, 2), 135 ('S', u' ', 1, 8)], 136 u' url("a") ': [('S', u' ', 1, 1), 137 ('URI', u'url("a")', 1, 2), 138 ('S', u' ', 1, 10)], 139 u' url( a ) ': [('S', u' ', 1, 1), 140 ('URI', u'url( a )', 1, 2), 141 ('S', u' ', 1, 10)], 142 u'ur\\l(': [('FUNCTION', u'ur\\l(', 1, 1)], 143 144 # UNICODE-RANGE 145 146 # CDO 147 u' <!-- ': [('S', u' ', 1, 1), 148 ('CDO', u'<!--', 1, 2), 149 ('S', u' ', 1, 6)], 150 u'"<!--""-->"': [('STRING', u'"<!--"', 1, 1), 151 ('STRING', u'"-->"', 1, 7)], 152 153 # CDC 154 u' --> ': [('S', u' ', 1, 1), 155 ('CDC', u'-->', 1, 2), 156 ('S', u' ', 1, 5)], 157 158 # S 159 u' ': [('S', u' ', 1, 1)], 160 u' ': [('S', u' ', 1, 1)], 161 u'\r': [('S', u'\r', 1, 1)], 162 u'\n': [('S', u'\n', 1, 1)], 163 u'\r\n': [('S', u'\r\n', 1, 1)], 164 u'\f': [('S', u'\f', 1, 1)], 165 u'\r': [('S', u'\r', 1, 1)], 166 u'\t': [('S', u'\t', 1, 1)], 167 u'\r\n\r\n\f\t ': [('S', u'\r\n\r\n\f\t ', 1, 1)], 168 169 # COMMENT, for incomplete see later 170 u'/*x*/ ': [('COMMENT', u'/*x*/', 1, 1), 171 ('S', u' ', 1, 6)], 172 173 # FUNCTION 174 u' x( ': [('S', u' ', 1, 1), 175 ('FUNCTION', u'x(', 1, 2), 176 ('S', u' ', 1, 4)], 177 # only url( is a valid URI so this must be a function 178 u'URL(': [('FUNCTION', u'URL(', 1, 1)], 179 u'uRl(': [('FUNCTION', u'uRl(', 1, 1)], 180 181 # INCLUDES 182 u' ~= ': [('S', u' ', 1, 1), 183 ('INCLUDES', u'~=', 1, 2), 184 ('S', u' ', 1, 4)], 185 u'~==': [('INCLUDES', u'~=', 1, 1), ('CHAR', u'=', 1, 3)], 186 187 # DASHMATCH 188 u' |= ': [('S', u' ', 1, 1), 189 ('DASHMATCH', u'|=', 1, 2), 190 ('S', u' ', 1, 4)], 191 u'|==': [('DASHMATCH', u'|=', 1, 1), ('CHAR', u'=', 1, 3)], 192 193 # CHAR 194 u' @ ': [('S', u' ', 1, 1), 195 ('CHAR', u'@', 1, 2), 196 ('S', u' ', 1, 3)], 197 198 # --- overwritten for CSS 2.1 --- 199 # LBRACE 200 u' { ': [('S', u' ', 1, 1), 201 ('CHAR', u'{', 1, 2), 202 ('S', u' ', 1, 3)], 203 # PLUS 204 u' + ': [('S', u' ', 1, 1), 205 ('CHAR', u'+', 1, 2), 206 ('S', u' ', 1, 3)], 207 # GREATER 208 u' > ': [('S', u' ', 1, 1), 209 ('CHAR', u'>', 1, 2), 210 ('S', u' ', 1, 3)], 211 # COMMA 212 u' , ': [('S', u' ', 1, 1), 213 ('CHAR', u',', 1, 2), 214 ('S', u' ', 1, 3)], 215 216 # class 217 u' . ': [('S', u' ', 1, 1), 218 ('CHAR', u'.', 1, 2), 219 ('S', u' ', 1, 3)], 220 221 } 222 223 tests3 = { 224 # specials 225 u'c\\olor': [('IDENT', u'c\\olor', 1, 1)], 226 u'-1': [('CHAR', u'-', 1, 1), ('NUMBER', u'1', 1, 2)], 227 u'-1px': [('CHAR', u'-', 1, 1), ('DIMENSION', u'1px', 1, 2)], 228 229 # ATKEYWORD 230 u' @x ': [('S', u' ', 1, 1), 231 ('ATKEYWORD', u'@x', 1, 2), 232 ('S', u' ', 1, 4)], 233 u'@X': [('ATKEYWORD', u'@X', 1, 1)], 234 u'@\\x': [('ATKEYWORD', u'@\\x', 1, 1)], 235 # - 236 u'@1x': [('CHAR', u'@', 1, 1), 237 ('DIMENSION', u'1x', 1, 2)], 238 239 # DIMENSION 240 u' 0px ': [('S', u' ', 1, 1), 241 ('DIMENSION', u'0px', 1, 2), 242 ('S', u' ', 1, 5)], 243 u' 1s ': [('S', u' ', 1, 1), 244 ('DIMENSION', u'1s', 1, 2), 245 ('S', u' ', 1, 4)], 246 u'0.2EM': [('DIMENSION', u'0.2EM', 1, 1)], 247 u'1p\\x': [('DIMENSION', u'1p\\x', 1, 1)], 248 u'1PX': [('DIMENSION', u'1PX', 1, 1)], 249 250 # NUMBER 251 u' - 0 ': [('S', u' ', 1, 1), 252 ('CHAR', u'-', 1, 2), 253 ('S', u' ', 1, 3), 254 ('NUMBER', u'0', 1, 4), 255 ('S', u' ', 1, 5)], 256 u' + 0 ': [('S', u' ', 1, 1), 257 ('CHAR', u'+', 1, 2), 258 ('S', u' ', 1, 3), 259 ('NUMBER', u'0', 1, 4), 260 ('S', u' ', 1, 5)], 261 262 # PREFIXMATCH 263 u' ^= ': [('S', u' ', 1, 1), 264 ('PREFIXMATCH', u'^=', 1, 2), 265 ('S', u' ', 1, 4)], 266 u'^==': [('PREFIXMATCH', u'^=', 1, 1), ('CHAR', u'=', 1, 3)], 267 268 # SUFFIXMATCH 269 u' $= ': [('S', u' ', 1, 1), 270 ('SUFFIXMATCH', u'$=', 1, 2), 271 ('S', u' ', 1, 4)], 272 u'$==': [('SUFFIXMATCH', u'$=', 1, 1), ('CHAR', u'=', 1, 3)], 273 274 # SUBSTRINGMATCH 275 u' *= ': [('S', u' ', 1, 1), 276 ('SUBSTRINGMATCH', u'*=', 1, 2), 277 ('S', u' ', 1, 4)], 278 u'*==': [('SUBSTRINGMATCH', u'*=', 1, 1), ('CHAR', u'=', 1, 3)], 279 280 # BOM 281 u' \xFEFF ': [('S', u' ', 1, 1), 282 ('BOM', u'\xFEFF', 1, 2), # len=3 283 ('S', u' ', 1, 5)], 284 285 } 286 287 tests2 = { 288 # escapes work not for a-f! 289 # IMPORT_SYM 290 u' @import ': [('S', u' ', 1, 1), 291 ('IMPORT_SYM', u'@import', 1, 2), 292 ('S', u' ', 1, 9)], 293 u'@IMPORT': [('IMPORT_SYM', u'@IMPORT', 1, 1)], 294 ur'@\i\m\p\o\r\t': [('IMPORT_SYM', ur'@\i\m\p\o\r\t', 1, 1)], 295 ur'@\I\M\P\O\R\T': [('IMPORT_SYM', ur'@\I\M\P\O\R\T', 1, 1)], 296 ur'@\49 \04d\0050\0004f\000052\54': [('IMPORT_SYM', 297 ur'@\49 \04d\0050\0004f\000052\54', 298 1, 1)], 299 ur'@\69 \06d\0070\0006f\000072\74': [('IMPORT_SYM', 300 ur'@\69 \06d\0070\0006f\000072\74', 301 1, 1)], 302 303 # PAGE_SYM 304 u' @page ': [('S', u' ', 1, 1), 305 ('PAGE_SYM', u'@page', 1, 2), 306 ('S', u' ', 1, 7)], 307 u'@PAGE': [('PAGE_SYM', u'@PAGE', 1, 1)], 308 ur'@\pa\ge': [('PAGE_SYM', ur'@\pa\ge', 1, 1)], 309 ur'@\PA\GE': [('PAGE_SYM', ur'@\PA\GE', 1, 1)], 310 ur'@\50\41\47\45': [('PAGE_SYM', ur'@\50\41\47\45', 1, 1)], 311 ur'@\70\61\67\65': [('PAGE_SYM', ur'@\70\61\67\65', 1, 1)], 312 313 # MEDIA_SYM 314 u' @media ': [('S', u' ', 1, 1), 315 ('MEDIA_SYM', u'@media', 1, 2), 316 ('S', u' ', 1, 8)], 317 u'@MEDIA': [('MEDIA_SYM', u'@MEDIA', 1, 1)], 318 ur'@\med\ia': [('MEDIA_SYM', ur'@\med\ia', 1, 1)], 319 ur'@\MED\IA': [('MEDIA_SYM', ur'@\MED\IA', 1, 1)], 320 u'@\\4d\n\\45\r\\44\t\\49\r\n\\41\f': [('MEDIA_SYM', 321 u'@\\4d\n\\45\r\\44\t\\49\r\n\\41\f', 322 1, 1)], 323 u'@\\6d\n\\65\r\\64\t\\69\r\n\\61\f': [('MEDIA_SYM', 324 u'@\\6d\n\\65\r\\64\t\\69\r\n\\61\f', 325 1, 1)], 326 327 # FONT_FACE_SYM 328 u' @font-face ': [('S', u' ', 1, 1), 329 ('FONT_FACE_SYM', u'@font-face', 1, 2), 330 ('S', u' ', 1, 12)], 331 u'@FONT-FACE': [('FONT_FACE_SYM', u'@FONT-FACE', 1, 1)], 332 ur'@f\o\n\t\-face': [('FONT_FACE_SYM', ur'@f\o\n\t\-face', 1, 1)], 333 ur'@F\O\N\T\-FACE': [('FONT_FACE_SYM', ur'@F\O\N\T\-FACE', 1, 1)], 334 # TODO: "-" as hex! 335 ur'@\46\4f\4e\54\-\46\41\43\45': [('FONT_FACE_SYM', 336 ur'@\46\4f\4e\54\-\46\41\43\45', 1, 1)], 337 ur'@\66\6f\6e\74\-\66\61\63\65': [('FONT_FACE_SYM', 338 ur'@\66\6f\6e\74\-\66\61\63\65', 1, 1)], 339 340 # CHARSET_SYM only if "@charset "! 341 u' @charset ': [('S', u' ', 1, 1), 342 ('CHARSET_SYM', u'@charset ', 1, 2), 343 ('S', u' ', 1, 11)], 344 u'@charset': [('ATKEYWORD', u'@charset', 1, 1)], # no ending S 345 u'@CHARSET ': [('ATKEYWORD', u'@CHARSET', 1, 1),# uppercase 346 ('S', u' ', 1, 9)], 347 u'@cha\\rset ': [('ATKEYWORD', u'@cha\\rset', 1, 1), # not literal 348 ('S', u' ', 1, 10)], 349 350 # NAMESPACE_SYM 351 u' @namespace ': [('S', u' ', 1, 1), 352 ('NAMESPACE_SYM', u'@namespace', 1, 2), 353 ('S', u' ', 1, 12)], 354 ur'@NAMESPACE': [('NAMESPACE_SYM', ur'@NAMESPACE', 1, 1)], 355 ur'@\na\me\s\pace': [('NAMESPACE_SYM', ur'@\na\me\s\pace', 1, 1)], 356 ur'@\NA\ME\S\PACE': [('NAMESPACE_SYM', ur'@\NA\ME\S\PACE', 1, 1)], 357 ur'@\4e\41\4d\45\53\50\41\43\45': [('NAMESPACE_SYM', 358 ur'@\4e\41\4d\45\53\50\41\43\45', 1, 1)], 359 ur'@\6e\61\6d\65\73\70\61\63\65': [('NAMESPACE_SYM', 360 ur'@\6e\61\6d\65\73\70\61\63\65', 1, 1)], 361 362 # ATKEYWORD 363 u' @unknown ': [('S', u' ', 1, 1), 364 ('ATKEYWORD', u'@unknown', 1, 2), 365 ('S', u' ', 1, 10)], 366 367 # STRING 368 # strings with linebreak in it 369 u' "\\na"\na': [('S', u' ', 1, 1), 370 ('STRING', u'"\\na"', 1, 2), 371 ('S', u'\n', 1, 7), 372 ('IDENT', u'a', 2, 1)], 373 u" '\\na'\na": [('S', u' ', 1, 1), 374 ('STRING', u"'\\na'", 1, 2), 375 ('S', u'\n', 1, 7), 376 ('IDENT', u'a', 2, 1)], 377 u' "\\r\\n\\t\\n\\ra"a': [('S', u' ', 1, 1), 378 ('STRING', u'"\\r\\n\\t\\n\\ra"', 1, 2), 379 ('IDENT', u'a', 1, 15)], 380 381 # IMPORTANT_SYM is not IDENT!!! 382 u' !important ': [('S', u' ', 1, 1), 383 ('CHAR', u'!', 1, 2), 384 ('IDENT', u'important', 1, 3), 385 ('S', u' ', 1, 12)], 386 u'! /*1*/ important ': [ 387 ('CHAR', u'!', 1, 1), 388 ('S', u' ', 1, 2), 389 ('COMMENT', u'/*1*/', 1, 3), 390 ('S', u' ', 1, 8), 391 ('IDENT', u'important', 1, 9), 392 ('S', u' ', 1, 18)], 393 u'! important': [('CHAR', u'!', 1, 1), 394 ('S', u' ', 1, 2), 395 ('IDENT', u'important', 1, 3)], 396 u'!\n\timportant': [('CHAR', u'!', 1, 1), 397 ('S', u'\n\t', 1, 2), 398 ('IDENT', u'important', 2, 2)], 399 u'!IMPORTANT': [('CHAR', u'!', 1, 1), 400 ('IDENT', u'IMPORTANT', 1, 2)], 401 ur'!\i\m\p\o\r\ta\n\t': [('CHAR', u'!', 1, 1), 402 ('IDENT', 403 ur'\i\m\p\o\r\ta\n\t', 1, 2)], 404 ur'!\I\M\P\O\R\Ta\N\T': [('CHAR', u'!', 1, 1), 405 ('IDENT', 406 ur'\I\M\P\O\R\Ta\N\T', 1, 2)], 407 ur'!\49\4d\50\4f\52\54\41\4e\54': [('CHAR', u'!', 1, 1), 408 ('IDENT', 409 ur'IMPORTANT', 410 1, 2)], 411 ur'!\69\6d\70\6f\72\74\61\6e\74': [('CHAR', u'!', 1, 1), 412 ('IDENT', 413 ur'important', 414 1, 2)], 415 } 416 417 # overwriting tests in testsall 418 tests2only = { 419 # LBRACE 420 u' { ': [('S', u' ', 1, 1), 421 ('LBRACE', u'{', 1, 2), 422 ('S', u' ', 1, 3)], 423 # PLUS 424 u' + ': [('S', u' ', 1, 1), 425 ('PLUS', u'+', 1, 2), 426 ('S', u' ', 1, 3)], 427 # GREATER 428 u' > ': [('S', u' ', 1, 1), 429 ('GREATER', u'>', 1, 2), 430 ('S', u' ', 1, 3)], 431 # COMMA 432 u' , ': [('S', u' ', 1, 1), 433 ('COMMA', u',', 1, 2), 434 ('S', u' ', 1, 3)], 435 # class 436 u' . ': [('S', u' ', 1, 1), 437 ('CLASS', u'.', 1, 2), 438 ('S', u' ', 1, 3)], 439 } 440 441 testsfullsheet = { 442 # TODO: escape ends with explicit space but \r\n as single space 443 #u'\\1\r\nb': [('IDENT', u'\\1\r', 1, 1), ('IDENT', u'b', 1, 4)], 444 445 # STRING 446 ur'"\" "': [('STRING', ur'"\" "', 1, 1)], 447 u"""'\\''""": [('STRING', u"""'\\''""", 1, 1)], 448 u'''"\\""''': [('STRING', u'''"\\""''', 1, 1)], 449 u' "\na': [('S', u' ', 1, 1), 450 ('INVALID', u'"', 1, 2), 451 ('S', u'\n', 1, 3), 452 ('IDENT', u'a', 2, 1)], 453 454 # strings with linebreak in it 455 u' "\\na\na': [('S', u' ', 1, 1), 456 ('INVALID', u'"\\na', 1, 2), 457 ('S', u'\n', 1, 6), 458 ('IDENT', u'a', 2, 1)], 459 u' "\\r\\n\\t\\n\\ra\na': [('S', u' ', 1, 1), 460 ('INVALID', u'"\\r\\n\\t\\n\\ra', 1, 2), 461 ('S', u'\n', 1, 14), 462 ('IDENT', u'a', 2, 1)], 463 } 464 465 # tests if fullsheet=False is set on tokenizer 466 testsfullsheetfalse = { 467 # COMMENT incomplete 468 u'/*': [('CHAR', u'/', 1, 1), 469 ('CHAR', u'*', 1, 2)], 470 471 # INVALID incomplete 472 u' " ': [('S', u' ', 1, 1), 473 ('INVALID', u'" ', 1, 2)], 474 u" 'abc\"with quote\" in it": [('S', u' ', 1, 1), 475 ('INVALID', u"'abc\"with quote\" in it", 1, 2)], 476 477 # URI incomplete 478 u'url(a': [('FUNCTION', u'url(', 1, 1), 479 ('IDENT', u'a', 1, 5)], 480 u'url("a': [('FUNCTION', u'url(', 1, 1), 481 ('INVALID', u'"a', 1, 5)], 482 u"url('a": [('FUNCTION', u'url(', 1, 1), 483 ('INVALID', u"'a", 1, 5)], 484 } 485 486 # tests if fullsheet=True is set on tokenizer 487 testsfullsheettrue = { 488 # COMMENT incomplete 489 u'/*': [('COMMENT', u'/**/', 1, 1)], 490 491 # INVALID incomplete => STRING 492 u' " ': [('S', u' ', 1, 1), 493 ('STRING', u'" "', 1, 2)], 494 u" 'abc\"with quote\" in it": [('S', u' ', 1, 1), 495 ('STRING', u"'abc\"with quote\" in it'", 1, 2)], 496 497 # URI incomplete FUNC => URI 498 u'url(a': [('URI', u'url(a)', 1, 1)], 499 u'url( a': [('URI', u'url( a)', 1, 1)], 500 u'url("a': [('URI', u'url("a")', 1, 1)], 501 u'url( "a ': [('URI', u'url( "a ")', 1, 1)], 502 u"url('a": [('URI', u"url('a')", 1, 1)], 503 u'url("a"': [('URI', u'url("a")', 1, 1)], 504 u"url('a'": [('URI', u"url('a')", 1, 1)], 505 506 } 507
508 - def setUp(self):
509 #log = cssutils.errorhandler.ErrorHandler() 510 self.tokenizer = Tokenizer()
511
512 - def test_linenumbers(self):
513 "Tokenizer line + col" 514 pass
515
516 - def test_tokenize(self):
517 "cssutils Tokenizer().tokenize()" 518 import cssutils.cssproductions 519 tokenizer = Tokenizer(cssutils.cssproductions.MACROS, 520 cssutils.cssproductions.PRODUCTIONS) 521 tests = {} 522 tests.update(self.testsall) 523 tests.update(self.tests2) 524 tests.update(self.tests3) 525 tests.update(self.testsfullsheet) 526 tests.update(self.testsfullsheetfalse) 527 for css in tests: 528 # check token format 529 tokens = tokenizer.tokenize(css) 530 for i, actual in enumerate(tokens): 531 expected = tests[css][i] 532 self.assertEqual(expected, actual) 533 534 # check if all same number of tokens 535 tokens = [t for t in tokenizer.tokenize(css)] 536 self.assertEqual(len(tokens), len(tests[css]))
537
538 - def test_tokenizefullsheet(self):
539 "cssutils Tokenizer().tokenize(fullsheet=True)" 540 import cssutils.cssproductions 541 tokenizer = Tokenizer(cssutils.cssproductions.MACROS, 542 cssutils.cssproductions.PRODUCTIONS) 543 tests = {} 544 tests.update(self.testsall) 545 tests.update(self.tests2) 546 tests.update(self.tests3) 547 tests.update(self.testsfullsheet) 548 tests.update(self.testsfullsheettrue) 549 for css in tests: 550 # check token format 551 tokens = tokenizer.tokenize(css, fullsheet=True) 552 for i, actual in enumerate(tokens): 553 try: 554 expected = tests[css][i] 555 except IndexError: 556 # EOF is added 557 self.assertEqual(actual[0], 'EOF') 558 else: 559 self.assertEqual(expected, actual) 560 561 # check if all same number of tokens 562 tokens = [t for t in tokenizer.tokenize(css, fullsheet=True)] 563 # EOF is added so -1 564 self.assertEqual(len(tokens) - 1, len(tests[css]))
565 566 567 # not really needed
568 - def test_tokenizeCSS3(self):
569 "CSS3 Tokenizer().tokenize()" 570 import cssutils.css3productions 571 tokenizer = Tokenizer(cssutils.css3productions.MACROS, 572 cssutils.css3productions.PRODUCTIONS) 573 tests = {} 574 tests.update(self.testsall) 575 tests.update(self.tests3) 576 for css in tests: 577 tokens = tokenizer.tokenize(css) 578 for i, actual in enumerate(tokens): 579 expected = tests[css][i] 580 self.assertEqual(expected, actual)
581 582 # not really needed
583 - def test_tokenizeCSS2_1(self):
584 "CSS2 Tokenizer().tokenize()" 585 import cssutils.css2productions 586 tokenizer = Tokenizer(cssutils.css2productions.MACROS, 587 cssutils.css2productions.PRODUCTIONS) 588 tests = {} 589 tests.update(self.testsall) 590 #tests.update(self.tests2) 591 tests.update(self.tests2only) 592 for css in tests: 593 tokens = tokenizer.tokenize(css) 594 for i, actual in enumerate(tokens): 595 expected = tests[css][i] 596 self.assertEqual(expected, actual)
597 598 # -------------- 599
600 - def __old(self):
601 602 testsOLD = { 603 u'x x1 -x .-x #_x -': [(1, 1, tt.IDENT, u'x'), 604 (1, 2, 'S', u' '), 605 (1, 3, tt.IDENT, u'x1'), 606 (1, 5, 'S', u' '), 607 (1, 6, tt.IDENT, u'-x'), 608 (1, 8, 'S', u' '), 609 (1, 9, tt.CLASS, u'.'), 610 (1, 10, tt.IDENT, u'-x'), 611 (1, 12, 'S', u' '), 612 (1, 13, tt.HASH, u'#_x'), 613 (1, 16, 'S', u' '), 614 (1, 17, 'DELIM', u'-')], 615 616 # num 617 u'1 1.1 -1 -1.1 .1 -.1 1.': [(1, 1, tt.NUMBER, u'1'), 618 (1, 2, 'S', u' '), (1, 3, tt.NUMBER, u'1.1'), 619 (1, 6, 'S', u' '), (1, 7, tt.NUMBER, u'-1'), 620 (1, 9, 'S', u' '), (1, 10, tt.NUMBER, u'-1.1'), 621 (1, 14, 'S', u' '), (1, 15, tt.NUMBER, u'0.1'), 622 (1, 17, 'S', u' '), (1, 18, tt.NUMBER, u'-0.1'), 623 (1, 21, 'S', u' '), 624 (1, 22, tt.NUMBER, u'1'), (1, 23, tt.CLASS, u'.') 625 ], 626 # CSS3 pseudo 627 u'::': [(1, 1, tt.PSEUDO_ELEMENT, u'::')], 628 629 # SPECIALS 630 u'*+>~{},': [(1, 1, tt.UNIVERSAL, u'*'), 631 (1, 2, tt.PLUS, u'+'), 632 (1, 3, tt.GREATER, u'>'), 633 (1, 4, tt.TILDE, u'~'), 634 (1, 5, tt.LBRACE, u'{'), 635 (1, 6, tt.RBRACE, u'}'), 636 (1, 7, tt.COMMA, u',')], 637 638 # DELIM 639 u'!%:&$|': [(1, 1, 'DELIM', u'!'), 640 (1, 2, 'DELIM', u'%'), 641 (1, 3, 'DELIM', u':'), 642 (1, 4, 'DELIM', u'&'), 643 (1, 5, 'DELIM', u'$'), 644 (1, 6, 'DELIM', u'|')], 645 646 647 # DIMENSION 648 u'5em': [(1, 1, tt.DIMENSION, u'5em')], 649 u' 5em': [(1, 1, 'S', u' '), (1, 2, tt.DIMENSION, u'5em')], 650 u'5em ': [(1, 1, tt.DIMENSION, u'5em'), (1, 4, 'S', u' ')], 651 652 u'-5em': [(1, 1, tt.DIMENSION, u'-5em')], 653 u' -5em': [(1, 1, 'S', u' '), (1, 2, tt.DIMENSION, u'-5em')], 654 u'-5em ': [(1, 1, tt.DIMENSION, u'-5em'), (1, 5, 'S', u' ')], 655 656 u'.5em': [(1, 1, tt.DIMENSION, u'0.5em')], 657 u' .5em': [(1, 1, 'S', u' '), (1, 2, tt.DIMENSION, u'0.5em')], 658 u'.5em ': [(1, 1, tt.DIMENSION, u'0.5em'), (1, 5, 'S', u' ')], 659 660 u'-.5em': [(1, 1, tt.DIMENSION, u'-0.5em')], 661 u' -.5em': [(1, 1, 'S', u' '), (1, 2, tt.DIMENSION, u'-0.5em')], 662 u'-.5em ': [(1, 1, tt.DIMENSION, u'-0.5em'), (1, 6, 'S', u' ')], 663 664 u'5em5_-': [(1, 1, tt.DIMENSION, u'5em5_-')], 665 666 u'a a5 a5a 5 5a 5a5': [(1, 1, tt.IDENT, u'a'), 667 (1, 2, 'S', u' '), 668 (1, 3, tt.IDENT, u'a5'), 669 (1, 5, 'S', u' '), 670 (1, 6, tt.IDENT, u'a5a'), 671 (1, 9, 'S', u' '), 672 (1, 10, tt.NUMBER, u'5'), 673 (1, 11, 'S', u' '), 674 (1, 12, tt.DIMENSION, u'5a'), 675 (1, 14, 'S', u' '), 676 (1, 15, tt.DIMENSION, u'5a5')], 677 678 # URI 679 u'url()': [(1, 1, tt.URI, u'url()')], 680 u'url();': [(1, 1, tt.URI, u'url()'), (1, 6, tt.SEMICOLON, ';')], 681 u'url("x")': [(1, 1, tt.URI, u'url("x")')], 682 u'url( "x")': [(1, 1, tt.URI, u'url("x")')], 683 u'url("x" )': [(1, 1, tt.URI, u'url("x")')], 684 u'url( "x" )': [(1, 1, tt.URI, u'url("x")')], 685 u' url("x")': [ 686 (1, 1, 'S', u' '), 687 (1, 2, tt.URI, u'url("x")')], 688 u'url("x") ': [ 689 (1, 1, tt.URI, u'url("x")'), 690 (1, 9, 'S', u' '), 691 ], 692 u'url(ab)': [(1, 1, tt.URI, u'url(ab)')], 693 u'url($#/ab)': [(1, 1, tt.URI, u'url($#/ab)')], 694 u'url(\1233/a/b)': [(1, 1, tt.URI, u'url(\1233/a/b)')], 695 # not URI 696 u'url("1""2")': [ 697 (1, 1, tt.FUNCTION, u'url('), 698 (1, 5, tt.STRING, u'"1"'), 699 (1, 8, tt.STRING, u'"2"'), 700 (1, 11, tt.RPARANTHESIS, u')'), 701 ], 702 u'url(a"2")': [ 703 (1, 1, tt.FUNCTION, u'url('), 704 (1, 5, tt.IDENT, u'a'), 705 (1, 6, tt.STRING, u'"2"'), 706 (1, 9, tt.RPARANTHESIS, u')'), 707 ], 708 u'url(a b)': [ 709 (1, 1, tt.FUNCTION, u'url('), 710 (1, 5, tt.IDENT, u'a'), 711 (1, 6, 'S', u' '), 712 (1, 7, tt.IDENT, u'b'), 713 (1, 8, tt.RPARANTHESIS, u')'), 714 ], 715 716 # FUNCTION 717 u' counter("x")': [ 718 (1,1, 'S', u' '), 719 (1, 2, tt.FUNCTION, u'counter('), 720 (1, 10, tt.STRING, u'"x"'), 721 (1, 13, tt.RPARANTHESIS, u')')], 722 # HASH 723 u'# #a #_a #-a #1': [ 724 (1, 1, 'DELIM', u'#'), 725 (1, 2, 'S', u' '), 726 (1, 3, tt.HASH, u'#a'), 727 (1, 5, 'S', u' '), 728 (1, 6, tt.HASH, u'#_a'), 729 (1, 9, 'S', u' '), 730 (1, 10, tt.HASH, u'#-a'), 731 (1, 13, 'S', u' '), 732 (1, 14, tt.HASH, u'#1') 733 ], 734 u'#1a1 ': [ 735 (1, 1, tt.HASH, u'#1a1'), 736 (1, 5, 'S', u' '), 737 ], 738 u'#1a1\n': [ 739 (1, 1, tt.HASH, u'#1a1'), 740 (1, 5, 'S', u'\n'), 741 ], 742 u'#1a1{': [ 743 (1, 1, tt.HASH, u'#1a1'), 744 (1, 5, tt.LBRACE, u'{'), 745 ], 746 u'#1a1 {': [ 747 (1, 1, tt.HASH, u'#1a1'), 748 (1, 5, 'S', u' '), 749 (1, 6, tt.LBRACE, u'{'), 750 ], 751 u'#1a1\n{': [ 752 (1, 1, tt.HASH, u'#1a1'), 753 (1, 5, 'S', u'\n'), 754 (2, 1, tt.LBRACE, u'{'), 755 ], 756 u'#1a1\n {': [ 757 (1, 1, tt.HASH, u'#1a1'), 758 (1, 5, 'S', u'\n '), 759 (2, 2, tt.LBRACE, u'{'), 760 ], 761 u'#1a1 \n{': [ 762 (1, 1, tt.HASH, u'#1a1'), 763 (1, 5, 'S', u' \n'), 764 (2, 1, tt.LBRACE, u'{'), 765 ], 766 # STRINGS with NL 767 u'"x\n': [(1,1, tt.INVALID, u'"x\n')], 768 u'"x\r': [(1,1, tt.INVALID, u'"x\r')], 769 u'"x\f': [(1,1, tt.INVALID, u'"x\f')], 770 u'"x\n ': [ 771 (1,1, tt.INVALID, u'"x\n'), 772 (2,1, 'S', u' ') 773 ] 774 775 }
776 777 # tests = { 778 # u'/*a': xml.dom.SyntaxErr, 779 # u'"a': xml.dom.SyntaxErr, 780 # u"'a": xml.dom.SyntaxErr, 781 # u"\\0 a": xml.dom.SyntaxErr, 782 # u"\\00": xml.dom.SyntaxErr, 783 # u"\\000": xml.dom.SyntaxErr, 784 # u"\\0000": xml.dom.SyntaxErr, 785 # u"\\00000": xml.dom.SyntaxErr, 786 # u"\\000000": xml.dom.SyntaxErr, 787 # u"\\0000001": xml.dom.SyntaxErr 788 # } 789 # self.tokenizer.log.raiseExceptions = True #!! 790 # for css, exception in tests.items(): 791 # self.assertRaises(exception, self.tokenizer.tokenize, css) 792 793 794 if __name__ == '__main__': 795 import unittest 796 unittest.main() 797