1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 __doc__ = """
21 This module contains the supporting classes for the Two Step Analysis user agent
22 algorithm that is used as the primary way to match user agents with the Java API
23 for the WURFL.
24
25 A description of the way the following source is intended to work can be found
26 within the source for the original Java API implementation here:
27 http://sourceforge.net/projects/wurfl/files/WURFL Java API/
28
29 The original Java code is GPLd and Copyright (c) 2008-2009 WURFL-Pro srl
30 """
31
32 __author__ = "Armand Lynch <lyncha@users.sourceforge.net>"
33 __copyright__ = "Copyright 2010, Armand Lynch"
34 __license__ = "LGPL"
35 __url__ = "http://celljam.net/"
36 __version__ = "1.0.0"
37
38 import re
39
40
41
42 babel_fish_re = re.compile(ur"\s*\(via babelfish.yahoo.com\)\s*", re.UNICODE)
43 uplink_re = re.compile(ur"\s*UP\.Link.+$", re.UNICODE)
44 vodafone_re = re.compile(ur"/SN(\d+)\s", re.UNICODE)
45 yeswap_re = re.compile(ur"\s*Mozilla/4\.0 \(YesWAP mobile phone proxy\)",
46 re.UNICODE)
47 safari_re = re.compile(ur"(Mozilla\/5\.0.*)(\;\s*U\;.*?)(Safari\/\d{0,3})",
48 re.UNICODE)
49 ibm_wbi_re = re.compile(ur"\(via IBM WBI \d+\.\d+\)", re.UNICODE)
50 gmcc_re = re.compile(ur"GMCC/\d\.\d")
51
52
54 """Replace the "via babelfish.yahoo.com" with ''"""
55
56 return babel_fish_re.sub('', user_agent)
57
58
60 """ Replaces the heading "BlackBerry" string with ''"""
61
62 if u"BlackBerry" in user_agent and not user_agent.startswith(u"BlackBerry"):
63 user_agent = user_agent[user_agent.index(u"BlackBerry"):]
64 return user_agent
65
66
68 """Replace the trailing UP.Link ... with ''"""
69
70 return uplink_re.sub('', user_agent)
71
72
74 """Normalize the "/SNnnnnnnnnnnnnnnnn" String."""
75
76 match = vodafone_re.search(user_agent)
77 if match:
78 grp_repl = u"/SN" + "X" * (len(match.group()) - 4) + " "
79 user_agent = vodafone_re.sub(grp_repl, user_agent)
80 return user_agent
81
82
84 """Replace the "YesWAP mobile phone proxy" with ''"""
85
86 return yeswap_re.sub('', user_agent)
87
88
92
93
94 -def gmcc(user_agent):
95
96 return gmcc_re.sub('', user_agent)
97
98
100 def normalizer(user_agent):
101
102 for f in funcs:
103 user_agent = f(user_agent)
104 return user_agent.replace(' ', ' ').strip()
105 return normalizer
106
107 default_normalizer = _combine_funcs(vodafone, blackberry, uplink, yeswap,
108 babelfish, ibm_wbi, gmcc)
109
110
111
112
113
115 if search_string in user_agent:
116 start = user_agent.index(search_string)
117 user_agent = user_agent[start:start + vsn_size]
118 return user_agent
119
120
124
125
129
130
134
135
136 -def msie(user_agent):
139
140
144
145
147 """
148 Return the safari user agent stripping out all the chararcters between
149 U; and Safari/xxx
150
151 e.g Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; fr) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.1 Safari/525.18
152 becomes
153 Mozilla/5.0 (Macintosh Safari/525
154 """
155
156 match = safari_re.search(user_agent)
157 if match and len(match.groups()) >= 3:
158 user_agent = " ".join([match.group(1).strip(), match.group(3).strip()])
159 return user_agent
160