Package pywurfl :: Package algorithms :: Package wurfl :: Module normalizers
[hide private]
[frames] | no frames]

Source Code for Module pywurfl.algorithms.wurfl.normalizers

  1  # pywurfl - Wireless Universal Resource File Tools in Python 
  2  # Copyright (C) 2006-2010 Armand Lynch 
  3  # 
  4  # This library is free software; you can redistribute it and/or modify it 
  5  # under the terms of the GNU Lesser General Public License as published by the 
  6  # Free Software Foundation; either version 2.1 of the License, or (at your 
  7  # option) any later version. 
  8  # 
  9  # This library is distributed in the hope that it will be useful, but WITHOUT 
 10  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
 11  # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 
 12  # details. 
 13  # 
 14  # You should have received a copy of the GNU Lesser General Public License 
 15  # along with this library; if not, write to the Free Software Foundation, Inc., 
 16  # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 17  # 
 18  # Armand Lynch <lyncha@users.sourceforge.net> 
 19   
 20  __doc__ = """ 
 21  This module contains the supporting classes for the Two Step Analysis user agent 
 22  algorithm that is used as the primary way to match user agents with the Java API 
 23  for the WURFL. 
 24   
 25  A description of the way the following source is intended to work can be found 
 26  within the source for the original Java API implementation here: 
 27  http://sourceforge.net/projects/wurfl/files/WURFL Java API/ 
 28   
 29  The original Java code is GPLd and Copyright (c) 2008-2009 WURFL-Pro srl 
 30  """ 
 31   
 32  __author__ = "Armand Lynch <lyncha@users.sourceforge.net>" 
 33  __copyright__ = "Copyright 2010, Armand Lynch" 
 34  __license__ = "LGPL" 
 35  __url__ = "http://celljam.net/" 
 36  __version__ = "1.0.0" 
 37   
 38  import re 
 39   
 40  # generic user agent normalizers 
 41   
 42  babel_fish_re = re.compile(ur"\s*\(via babelfish.yahoo.com\)\s*", re.UNICODE) 
 43  uplink_re = re.compile(ur"\s*UP\.Link.+$", re.UNICODE) 
 44  vodafone_re = re.compile(ur"/SN(\d+)\s", re.UNICODE) 
 45  yeswap_re = re.compile(ur"\s*Mozilla/4\.0 \(YesWAP mobile phone proxy\)", 
 46                         re.UNICODE) 
 47  safari_re = re.compile(ur"(Mozilla\/5\.0.*)(\;\s*U\;.*?)(Safari\/\d{0,3})", 
 48                         re.UNICODE) 
 49  ibm_wbi_re = re.compile(ur"\(via IBM WBI \d+\.\d+\)", re.UNICODE) 
 50  gmcc_re = re.compile(ur"GMCC/\d\.\d") 
 51   
 52   
53 -def babelfish(user_agent):
54 """Replace the "via babelfish.yahoo.com" with ''""" 55 #print "normalizer babelfish" 56 return babel_fish_re.sub('', user_agent)
57 58
59 -def blackberry(user_agent):
60 """ Replaces the heading "BlackBerry" string with ''""" 61 #print "normalizer blackberry" 62 if u"BlackBerry" in user_agent and not user_agent.startswith(u"BlackBerry"): 63 user_agent = user_agent[user_agent.index(u"BlackBerry"):] 64 return user_agent
65 66 71 72
73 -def vodafone(user_agent):
74 """Normalize the "/SNnnnnnnnnnnnnnnnn" String.""" 75 #print "normalizer vodafone" 76 match = vodafone_re.search(user_agent) 77 if match: 78 grp_repl = u"/SN" + "X" * (len(match.group()) - 4) + " " 79 user_agent = vodafone_re.sub(grp_repl, user_agent) 80 return user_agent
81 82
83 -def yeswap(user_agent):
84 """Replace the "YesWAP mobile phone proxy" with ''""" 85 #print "normalizer yeswap" 86 return yeswap_re.sub('', user_agent)
87 88
89 -def ibm_wbi(user_agent):
90 #print "normalizer ibm_wbi" 91 return ibm_wbi_re.sub('', user_agent)
92 93
94 -def gmcc(user_agent):
95 #print "normalizer gmcc" 96 return gmcc_re.sub('', user_agent)
97 98
99 -def _combine_funcs(*funcs):
100 def normalizer(user_agent): 101 #print "applying default normalizer" 102 for f in funcs: 103 user_agent = f(user_agent) 104 return user_agent.replace(' ', ' ').strip()
105 return normalizer 106 107 default_normalizer = _combine_funcs(vodafone, blackberry, uplink, yeswap, 108 babelfish, ibm_wbi, gmcc) 109 110 111 112 # specific user agent normalizers 113
114 -def _specific_normalizer(user_agent, search_string, vsn_size):
115 if search_string in user_agent: 116 start = user_agent.index(search_string) 117 user_agent = user_agent[start:start + vsn_size] 118 return user_agent
119 120
121 -def chrome(user_agent):
122 #print "chrome normalizer" 123 return _specific_normalizer(user_agent, u"Chrome", 8)
124 125
126 -def firefox(user_agent):
127 #print "firefox normalizer" 128 return _specific_normalizer(user_agent, u"Firefox", 11)
129 130
131 -def konqueror(user_agent):
132 #print "konqueror normalizer" 133 return _specific_normalizer(user_agent, u"Konqueror", 11)
134 135
136 -def msie(user_agent):
137 #print "msie normalizer" 138 return _specific_normalizer(user_agent, u"MSIE", 8)
139 140
141 -def opera(user_agent):
142 #print "opera normalizer" 143 return _specific_normalizer(user_agent, u"Opera", 7)
144 145
146 -def safari(user_agent):
147 """ 148 Return the safari user agent stripping out all the chararcters between 149 U; and Safari/xxx 150 151 e.g Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; fr) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.1 Safari/525.18 152 becomes 153 Mozilla/5.0 (Macintosh Safari/525 154 """ 155 #print "safari normalizer" 156 match = safari_re.search(user_agent) 157 if match and len(match.groups()) >= 3: 158 user_agent = " ".join([match.group(1).strip(), match.group(3).strip()]) 159 return user_agent
160