1
2
3
4
5 import base64
6 import errno
7 import logging
8 import os
9 import time
10 import socket
11 import traceback
12 import types
13 import urlparse
14
15 try:
16 import ssl
17 _ssl_wrapper = ssl.wrap_socket
18 have_ssl = True
19 except ImportError:
20 if hasattr(socket, "ssl"):
21 from httplib import FakeSocket
22 from .sock import trust_all_certificates
23
24 @trust_all_certificates
26 ssl_sck = socket.ssl(sck, **kwargs)
27 return FakeSocket(sck, ssl_sck)
28 have_ssl = True
29 else:
30 have_ssl = False
31
32 from . import __version__
33 from .conn import Connection
34 from .errors import RequestError, RequestTimeout, RedirectLimit, \
35 NoMoreData, ProxyError
36 from .globals import get_manager
37 from . import http
38
39 from .sock import close, send, sendfile, sendlines, send_chunk, \
40 validate_ssl_args
41 from .util import parse_netloc, rewrite_location
42 from .wrappers import Request, Response
43
44
45 MAX_CLIENT_TIMEOUT=300
46 MAX_CLIENT_CONNECTIONS = 5
47 MAX_CLIENT_TRIES = 5
48 CLIENT_WAIT_TRIES = 0.3
49 MAX_FOLLOW_REDIRECTS = 5
50 USER_AGENT = "restkit/%s" % __version__
51
52 log = logging.getLogger(__name__)
55
56 """ A client handle a connection at a time. A client is threadsafe,
57 but an handled shouldn't be shared between threads. All connections
58 are shared between threads via a pool.
59
60 >>> from restkit import *
61 >>> c = Client()
62 >>> r = c.request("http://google.com")
63 r>>> r.status
64 '301 Moved Permanently'
65 >>> r.body_string()
66 '<HTML><HEAD><meta http-equiv="content-type" content="text/html;charset=utf-8">\n<TITLE>301 Moved</TITLE></HEAD><BODY>\n<H1>301 Moved</H1>\nThe document has moved\n<A HREF="http://www.google.com/">here</A>.\r\n</BODY></HTML>\r\n'
67 >>> c.follow_redirect = True
68 >>> r = c.request("http://google.com")
69 >>> r.status
70 '200 OK'
71
72 """
73
74 version = (1, 1)
75 response_class=Response
76
77 - def __init__(self,
78 follow_redirect=False,
79 force_follow_redirect=False,
80 max_follow_redirect=MAX_FOLLOW_REDIRECTS,
81 filters=None,
82 decompress=True,
83 max_status_line_garbage=None,
84 max_header_count=0,
85 manager=None,
86 response_class=None,
87 timeout=None,
88 use_proxy=False,
89 max_tries=5,
90 wait_tries=1.0,
91 **ssl_args):
92 """
93 Client parameters
94 ~~~~~~~~~~~~~~~~~
95
96 :param follow_redirect: follow redirection, by default False
97 :param max_ollow_redirect: number of redirections available
98 :filters: http filters to pass
99 :param decompress: allows the client to decompress the response
100 body
101 :param max_status_line_garbage: defines the maximum number of ignorable
102 lines before we expect a HTTP response's status line. With
103 HTTP/1.1 persistent connections, the problem arises that broken
104 scripts could return a wrong Content-Length (there are more
105 bytes sent than specified). Unfortunately, in some cases, this
106 cannot be detected after the bad response, but only before the
107 next one. So the client is abble to skip bad lines using this
108 limit. 0 disable garbage collection, None means unlimited number
109 of tries.
110 :param max_header_count: determines the maximum HTTP header count
111 allowed. by default no limit.
112 :param manager: the manager to use. By default we use the global
113 one.
114 :parama response_class: the response class to use
115 :param timeout: the default timeout of the connection
116 (SO_TIMEOUT)
117
118 :param max_tries: the number of tries before we give up a
119 connection
120 :param wait_tries: number of time we wait between each tries.
121 :param ssl_args: named argument, see ssl module for more
122 informations
123 """
124 self.follow_redirect = follow_redirect
125 self.force_follow_redirect = force_follow_redirect
126 self.max_follow_redirect = max_follow_redirect
127 self.decompress = decompress
128 self.filters = filters or []
129 self.max_status_line_garbage = max_status_line_garbage
130 self.max_header_count = max_header_count
131 self.use_proxy = use_proxy
132
133 self.request_filters = []
134 self.response_filters = []
135 self.load_filters()
136
137
138
139 if manager is None:
140 manager = get_manager()
141 self._manager = manager
142
143
144 if response_class is not None:
145 self.response_class = response_class
146
147 self.max_tries = max_tries
148 self.wait_tries = wait_tries
149 self.timeout = timeout
150
151 self._nb_redirections = self.max_follow_redirect
152 self._url = None
153 self._initial_url = None
154 self._write_cb = None
155 self._headers = None
156 self._sock_key = None
157 self._sock = None
158 self._original = None
159
160 self.method = 'GET'
161 self.body = None
162 self.ssl_args = ssl_args or {}
163
165 """ Populate filters from self.filters.
166 Must be called each time self.filters is updated.
167 """
168 for f in self.filters:
169 if hasattr(f, "on_request"):
170 self.request_filters.append(f)
171 if hasattr(f, "on_response"):
172 self.response_filters.append(f)
173
175 """ create a socket """
176 if log.isEnabledFor(logging.DEBUG):
177 log.debug("create new connection")
178 for res in socket.getaddrinfo(addr[0], addr[1], 0,
179 socket.SOCK_STREAM):
180 af, socktype, proto, canonname, sa = res
181
182 try:
183 sck = socket.socket(af, socktype, proto)
184
185 if self.timeout is not None:
186 sck.settimeout(self.timeout)
187
188 sck.connect(sa)
189
190 if is_ssl:
191 if not have_ssl:
192 raise ValueError("https isn't supported. On python 2.5x,"
193 + " https support requires ssl module "
194 + "(http://pypi.python.org/pypi/ssl) "
195 + "to be intalled.")
196 validate_ssl_args(self.ssl_args)
197 sck = _ssl_wrapper(sck, **self.ssl_args)
198
199 return sck
200 except socket.error:
201 close(sck)
202 raise socket.error, "getaddrinfo returns an empty list"
203
205 """ get a connection from the pool or create new one. """
206
207 addr = parse_netloc(request.parsed_url)
208 is_ssl = request.is_ssl()
209
210 extra_headers = []
211 sck = None
212 if self.use_proxy:
213 sck, addr, extra_headers = self.proxy_connection(request, addr, ssl)
214 if not sck:
215 sck = self._manager.find_socket(addr, is_ssl)
216 if sck is None:
217 sck = self.connect(addr, is_ssl)
218
219
220 if self.timeout is not None:
221 sck.settimeout(self.timeout)
222
223 connection = Connection(sck, self._manager, addr,
224 ssl=is_ssl, extra_headers=extra_headers)
225 return connection
226
228 """ do the proxy connection """
229 proxy_settings = os.environ.get('%s_proxy' %
230 request.parsed_url.scheme)
231
232 if proxy_settings and proxy_settings is not None:
233 proxy_settings, proxy_auth = _get_proxy_auth(proxy_settings)
234 addr = parse_netloc(urlparse.urlparse(proxy_settings))
235
236 if ssl:
237 if proxy_auth:
238 proxy_auth = 'Proxy-authorization: %s' % proxy_auth
239 proxy_connect = 'CONNECT %s:%s HTTP/1.0\r\n' % req_addr
240
241 user_agent = request.headers.iget('user_agent')
242 if not user_agent:
243 user_agent = "User-Agent: restkit/%s\r\n" % __version__
244
245 proxy_pieces = '%s%s%s\r\n' % (proxy_connect, proxy_auth,
246 user_agent)
247
248 sck = self._manager.find_socket(addr, ssl)
249 if sck is None:
250 self = self.connect(addr, ssl)
251
252 send(sck, proxy_pieces)
253 unreader = http.Unreader(sck)
254 resp = http.Request(unreader)
255 body = resp.body.read()
256 if resp.status_int != 200:
257 raise ProxyError("Tunnel connection failed: %d %s" %
258 (resp.status_int, body))
259
260 return sck, addr, []
261 else:
262 headers = []
263 if proxy_auth:
264 headers = [('Proxy-authorization', proxy_auth)]
265
266 sck = self._manager.find_socket(addr, ssl)
267 if sck is None:
268 sck = self.connect(addr, ssl)
269 return sck, addr, headers
270 return None, req_addr, []
271
273 """ create final header string """
274 headers = request.headers.copy()
275 if extra_headers is not None:
276 for k, v in extra_headers:
277 headers[k] = v
278
279 if not request.body and request.method in ('POST', 'PUT',):
280 headers['Content-Length'] = 0
281
282 if self.version == (1,1):
283 httpver = "HTTP/1.1"
284 else:
285 httpver = "HTTP/1.0"
286
287 ua = headers.iget('user_agent')
288 if not ua:
289 ua = USER_AGENT
290 host = request.host
291
292 accept_encoding = headers.iget('accept-encoding')
293 if not accept_encoding:
294 accept_encoding = 'identity'
295
296 lheaders = [
297 "%s %s %s\r\n" % (request.method, request.path, httpver),
298 "Host: %s\r\n" % host,
299 "User-Agent: %s\r\n" % ua,
300 "Accept-Encoding: %s\r\n" % accept_encoding
301 ]
302
303 lheaders.extend(["%s: %s\r\n" % (k, str(v)) for k, v in \
304 headers.items() if k.lower() not in \
305 ('user-agent', 'host', 'accept-encoding',)])
306 if log.isEnabledFor(logging.DEBUG):
307 log.debug("Send headers: %s" % lheaders)
308 return "%s\r\n" % "".join(lheaders)
309
418
419
420 - def request(self, url, method='GET', body=None, headers=None):
438
439 - def redirect(self, resp, location, request):
440 """ reset request, set new url of request and perform it """
441 if self._nb_redirections <= 0:
442 raise RedirectLimit("Redirection limit is reached")
443
444 if request.initial_url is None:
445 request.initial_url = self.url
446
447
448 if hasattr(resp, "_body"):
449 resp._body.discard()
450 else:
451 resp.body.discard()
452
453
454 location = rewrite_location(request.url, location)
455
456 if log.isEnabledFor(logging.DEBUG):
457 log.debug("Redirect to %s" % location)
458
459
460 request.url = location
461
462 self._nb_redirections -= 1
463
464
465 return self.perform(request)
466
468 """ return final respons, it is only accessible via peform
469 method """
470 if log.isEnabledFor(logging.DEBUG):
471 log.debug("Start to parse response")
472
473 unreader = http.Unreader(connection.socket())
474 while True:
475 resp = http.Request(unreader, decompress=self.decompress,
476 max_status_line_garbage=self.max_status_line_garbage,
477 max_header_count=self.max_header_count)
478 if resp.status_int != 100:
479 break
480 resp.body.discard()
481
482 if log.isEnabledFor(logging.DEBUG):
483 log.debug("Got response: %s" % resp.status)
484 log.debug("headers: [%s]" % resp.headers)
485
486 location = resp.headers.iget('location')
487
488 if self.follow_redirect:
489 if resp.status_int in (301, 302, 307,):
490 if request.method in ('GET', 'HEAD',) or \
491 self.force_follow_redirect:
492 if hasattr(self.body, 'read'):
493 try:
494 self.body.seek(0)
495 except AttributeError:
496 connection.release()
497 raise RequestError("Can't redirect %s to %s "
498 "because body has already been read"
499 % (self.url, location))
500 connection.release()
501 return self.redirect(resp, location, request)
502
503 elif resp.status_int == 303 and self.method == "POST":
504 connection.release()
505 request.method = "GET"
506 request.body = None
507 return self.redirect(resp, location, request)
508
509
510 resp = self.response_class(connection, request, resp)
511
512
513 for f in self.response_filters:
514 f.on_response(resp, request)
515
516 if log.isEnabledFor(logging.DEBUG):
517 log.debug("return response class")
518
519
520 return resp
521
524 proxy_username = os.environ.get('proxy-username')
525 if not proxy_username:
526 proxy_username = os.environ.get('proxy_username')
527 proxy_password = os.environ.get('proxy-password')
528 if not proxy_password:
529 proxy_password = os.environ.get('proxy_password')
530
531 proxy_password = proxy_password or ""
532
533 if not proxy_username:
534 u = urlparse.urlparse(proxy_settings)
535 if u.username:
536 proxy_password = u.password or proxy_password
537 proxy_settings = urlparse.urlunparse((u.scheme,
538 u.netloc.split("@")[-1], u.path, u.params, u.query,
539 u.fragment))
540
541 if proxy_username:
542 user_auth = base64.encodestring('%s:%s' % (proxy_username,
543 proxy_password))
544 return proxy_settings, 'Basic %s\r\n' % (user_auth.strip())
545 else:
546 return proxy_settings, ''
547