diff options
Diffstat (limited to 'scripts/lib')
7 files changed, 0 insertions, 3688 deletions
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py deleted file mode 100644 index 7bcd9d5541..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py +++ /dev/null @@ -1,53 +0,0 @@ -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Library General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -# Copyright 2002-2006 Michael D. Stenner, Ryan Tomayko - -# $Id: __init__.py,v 1.20 2006/09/22 00:58:55 mstenner Exp $ - -"""A high-level cross-protocol url-grabber. - -Using urlgrabber, data can be fetched in three basic ways: - - urlgrab(url) copy the file to the local filesystem - urlopen(url) open the remote file and return a file object - (like urllib2.urlopen) - urlread(url) return the contents of the file as a string - -When using these functions (or methods), urlgrabber supports the -following features: - - * identical behavior for http://, ftp://, and file:// urls - * http keepalive - faster downloads of many files by using - only a single connection - * byte ranges - fetch only a portion of the file - * reget - for a urlgrab, resume a partial download - * progress meters - the ability to report download progress - automatically, even when using urlopen! - * throttling - restrict bandwidth usage - * retries - automatically retry a download if it fails. The - number of retries and failure types are configurable. - * authenticated server access for http and ftp - * proxy support - support for authenticated http and ftp proxies - * mirror groups - treat a list of mirrors as a single source, - automatically switching mirrors if there is a failure. -""" - -__version__ = '3.1.0' -__date__ = '2006/09/21' -__author__ = 'Michael D. Stenner <mstenner@linux.duke.edu>, ' \ - 'Ryan Tomayko <rtomayko@naeblis.cx>' -__url__ = 'http://linux.duke.edu/projects/urlgrabber/' - -from grabber import urlgrab, urlopen, urlread diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py deleted file mode 100644 index 001b4e32d6..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py +++ /dev/null @@ -1,463 +0,0 @@ -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., -# 59 Temple Place, Suite 330, -# Boston, MA 02111-1307 USA - -# This file is part of urlgrabber, a high-level cross-protocol url-grabber -# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko - -# $Id: byterange.py,v 1.12 2006/07/20 20:15:58 mstenner Exp $ - -import os -import stat -import urllib -import urllib2 -import rfc822 - -DEBUG = None - -try: - from cStringIO import StringIO -except ImportError, msg: - from StringIO import StringIO - -class RangeError(IOError): - """Error raised when an unsatisfiable range is requested.""" - pass - -class HTTPRangeHandler(urllib2.BaseHandler): - """Handler that enables HTTP Range headers. - - This was extremely simple. The Range header is a HTTP feature to - begin with so all this class does is tell urllib2 that the - "206 Partial Content" reponse from the HTTP server is what we - expected. - - Example: - import urllib2 - import byterange - - range_handler = range.HTTPRangeHandler() - opener = urllib2.build_opener(range_handler) - - # install it - urllib2.install_opener(opener) - - # create Request and set Range header - req = urllib2.Request('http://www.python.org/') - req.header['Range'] = 'bytes=30-50' - f = urllib2.urlopen(req) - """ - - def http_error_206(self, req, fp, code, msg, hdrs): - # 206 Partial Content Response - r = urllib.addinfourl(fp, hdrs, req.get_full_url()) - r.code = code - r.msg = msg - return r - - def http_error_416(self, req, fp, code, msg, hdrs): - # HTTP's Range Not Satisfiable error - raise RangeError('Requested Range Not Satisfiable') - -class HTTPSRangeHandler(HTTPRangeHandler): - """ Range Header support for HTTPS. """ - - def https_error_206(self, req, fp, code, msg, hdrs): - return self.http_error_206(req, fp, code, msg, hdrs) - - def https_error_416(self, req, fp, code, msg, hdrs): - self.https_error_416(req, fp, code, msg, hdrs) - -class RangeableFileObject: - """File object wrapper to enable raw range handling. - This was implemented primarilary for handling range - specifications for file:// urls. This object effectively makes - a file object look like it consists only of a range of bytes in - the stream. - - Examples: - # expose 10 bytes, starting at byte position 20, from - # /etc/aliases. - >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30)) - # seek seeks within the range (to position 23 in this case) - >>> fo.seek(3) - # tell tells where your at _within the range_ (position 3 in - # this case) - >>> fo.tell() - # read EOFs if an attempt is made to read past the last - # byte in the range. the following will return only 7 bytes. - >>> fo.read(30) - """ - - def __init__(self, fo, rangetup): - """Create a RangeableFileObject. - fo -- a file like object. only the read() method need be - supported but supporting an optimized seek() is - preferable. - rangetup -- a (firstbyte,lastbyte) tuple specifying the range - to work over. - The file object provided is assumed to be at byte offset 0. - """ - self.fo = fo - (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup) - self.realpos = 0 - self._do_seek(self.firstbyte) - - def __getattr__(self, name): - """This effectively allows us to wrap at the instance level. - Any attribute not found in _this_ object will be searched for - in self.fo. This includes methods.""" - if hasattr(self.fo, name): - return getattr(self.fo, name) - raise AttributeError, name - - def tell(self): - """Return the position within the range. - This is different from fo.seek in that position 0 is the - first byte position of the range tuple. For example, if - this object was created with a range tuple of (500,899), - tell() will return 0 when at byte position 500 of the file. - """ - return (self.realpos - self.firstbyte) - - def seek(self,offset,whence=0): - """Seek within the byte range. - Positioning is identical to that described under tell(). - """ - assert whence in (0, 1, 2) - if whence == 0: # absolute seek - realoffset = self.firstbyte + offset - elif whence == 1: # relative seek - realoffset = self.realpos + offset - elif whence == 2: # absolute from end of file - # XXX: are we raising the right Error here? - raise IOError('seek from end of file not supported.') - - # do not allow seek past lastbyte in range - if self.lastbyte and (realoffset >= self.lastbyte): - realoffset = self.lastbyte - - self._do_seek(realoffset - self.realpos) - - def read(self, size=-1): - """Read within the range. - This method will limit the size read based on the range. - """ - size = self._calc_read_size(size) - rslt = self.fo.read(size) - self.realpos += len(rslt) - return rslt - - def readline(self, size=-1): - """Read lines within the range. - This method will limit the size read based on the range. - """ - size = self._calc_read_size(size) - rslt = self.fo.readline(size) - self.realpos += len(rslt) - return rslt - - def _calc_read_size(self, size): - """Handles calculating the amount of data to read based on - the range. - """ - if self.lastbyte: - if size > -1: - if ((self.realpos + size) >= self.lastbyte): - size = (self.lastbyte - self.realpos) - else: - size = (self.lastbyte - self.realpos) - return size - - def _do_seek(self,offset): - """Seek based on whether wrapped object supports seek(). - offset is relative to the current position (self.realpos). - """ - assert offset >= 0 - if not hasattr(self.fo, 'seek'): - self._poor_mans_seek(offset) - else: - self.fo.seek(self.realpos + offset) - self.realpos+= offset - - def _poor_mans_seek(self,offset): - """Seek by calling the wrapped file objects read() method. - This is used for file like objects that do not have native - seek support. The wrapped objects read() method is called - to manually seek to the desired position. - offset -- read this number of bytes from the wrapped - file object. - raise RangeError if we encounter EOF before reaching the - specified offset. - """ - pos = 0 - bufsize = 1024 - while pos < offset: - if (pos + bufsize) > offset: - bufsize = offset - pos - buf = self.fo.read(bufsize) - if len(buf) != bufsize: - raise RangeError('Requested Range Not Satisfiable') - pos+= bufsize - -class FileRangeHandler(urllib2.FileHandler): - """FileHandler subclass that adds Range support. - This class handles Range headers exactly like an HTTP - server would. - """ - def open_local_file(self, req): - import mimetypes - import mimetools - host = req.get_host() - file = req.get_selector() - localfile = urllib.url2pathname(file) - stats = os.stat(localfile) - size = stats[stat.ST_SIZE] - modified = rfc822.formatdate(stats[stat.ST_MTIME]) - mtype = mimetypes.guess_type(file)[0] - if host: - host, port = urllib.splitport(host) - if port or socket.gethostbyname(host) not in self.get_names(): - raise urllib2.URLError('file not on local host') - fo = open(localfile,'rb') - brange = req.headers.get('Range',None) - brange = range_header_to_tuple(brange) - assert brange != () - if brange: - (fb,lb) = brange - if lb == '': lb = size - if fb < 0 or fb > size or lb > size: - raise RangeError('Requested Range Not Satisfiable') - size = (lb - fb) - fo = RangeableFileObject(fo, (fb,lb)) - headers = mimetools.Message(StringIO( - 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % - (mtype or 'text/plain', size, modified))) - return urllib.addinfourl(fo, headers, 'file:'+file) - - -# FTP Range Support -# Unfortunately, a large amount of base FTP code had to be copied -# from urllib and urllib2 in order to insert the FTP REST command. -# Code modifications for range support have been commented as -# follows: -# -- range support modifications start/end here - -from urllib import splitport, splituser, splitpasswd, splitattr, \ - unquote, addclosehook, addinfourl -import ftplib -import socket -import sys -import ftplib -import mimetypes -import mimetools - -class FTPRangeHandler(urllib2.FTPHandler): - def ftp_open(self, req): - host = req.get_host() - if not host: - raise IOError, ('ftp error', 'no host given') - host, port = splitport(host) - if port is None: - port = ftplib.FTP_PORT - - # username/password handling - user, host = splituser(host) - if user: - user, passwd = splitpasswd(user) - else: - passwd = None - host = unquote(host) - user = unquote(user or '') - passwd = unquote(passwd or '') - - try: - host = socket.gethostbyname(host) - except socket.error, msg: - raise urllib2.URLError(msg) - path, attrs = splitattr(req.get_selector()) - dirs = path.split('/') - dirs = map(unquote, dirs) - dirs, file = dirs[:-1], dirs[-1] - if dirs and not dirs[0]: - dirs = dirs[1:] - try: - fw = self.connect_ftp(user, passwd, host, port, dirs) - type = file and 'I' or 'D' - for attr in attrs: - attr, value = splitattr(attr) - if attr.lower() == 'type' and \ - value in ('a', 'A', 'i', 'I', 'd', 'D'): - type = value.upper() - - # -- range support modifications start here - rest = None - range_tup = range_header_to_tuple(req.headers.get('Range',None)) - assert range_tup != () - if range_tup: - (fb,lb) = range_tup - if fb > 0: rest = fb - # -- range support modifications end here - - fp, retrlen = fw.retrfile(file, type, rest) - - # -- range support modifications start here - if range_tup: - (fb,lb) = range_tup - if lb == '': - if retrlen is None or retrlen == 0: - raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.') - lb = retrlen - retrlen = lb - fb - if retrlen < 0: - # beginning of range is larger than file - raise RangeError('Requested Range Not Satisfiable') - else: - retrlen = lb - fb - fp = RangeableFileObject(fp, (0,retrlen)) - # -- range support modifications end here - - headers = "" - mtype = mimetypes.guess_type(req.get_full_url())[0] - if mtype: - headers += "Content-Type: %s\n" % mtype - if retrlen is not None and retrlen >= 0: - headers += "Content-Length: %d\n" % retrlen - sf = StringIO(headers) - headers = mimetools.Message(sf) - return addinfourl(fp, headers, req.get_full_url()) - except ftplib.all_errors, msg: - raise IOError, ('ftp error', msg), sys.exc_info()[2] - - def connect_ftp(self, user, passwd, host, port, dirs): - fw = ftpwrapper(user, passwd, host, port, dirs) - return fw - -class ftpwrapper(urllib.ftpwrapper): - # range support note: - # this ftpwrapper code is copied directly from - # urllib. The only enhancement is to add the rest - # argument and pass it on to ftp.ntransfercmd - def retrfile(self, file, type, rest=None): - self.endtransfer() - if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 - else: cmd = 'TYPE ' + type; isdir = 0 - try: - self.ftp.voidcmd(cmd) - except ftplib.all_errors: - self.init() - self.ftp.voidcmd(cmd) - conn = None - if file and not isdir: - # Use nlst to see if the file exists at all - try: - self.ftp.nlst(file) - except ftplib.error_perm, reason: - raise IOError, ('ftp error', reason), sys.exc_info()[2] - # Restore the transfer mode! - self.ftp.voidcmd(cmd) - # Try to retrieve as a file - try: - cmd = 'RETR ' + file - conn = self.ftp.ntransfercmd(cmd, rest) - except ftplib.error_perm, reason: - if str(reason)[:3] == '501': - # workaround for REST not supported error - fp, retrlen = self.retrfile(file, type) - fp = RangeableFileObject(fp, (rest,'')) - return (fp, retrlen) - elif str(reason)[:3] != '550': - raise IOError, ('ftp error', reason), sys.exc_info()[2] - if not conn: - # Set transfer mode to ASCII! - self.ftp.voidcmd('TYPE A') - # Try a directory listing - if file: cmd = 'LIST ' + file - else: cmd = 'LIST' - conn = self.ftp.ntransfercmd(cmd) - self.busy = 1 - # Pass back both a suitably decorated object and a retrieval length - return (addclosehook(conn[0].makefile('rb'), - self.endtransfer), conn[1]) - - -#################################################################### -# Range Tuple Functions -# XXX: These range tuple functions might go better in a class. - -_rangere = None -def range_header_to_tuple(range_header): - """Get a (firstbyte,lastbyte) tuple from a Range header value. - - Range headers have the form "bytes=<firstbyte>-<lastbyte>". This - function pulls the firstbyte and lastbyte values and returns - a (firstbyte,lastbyte) tuple. If lastbyte is not specified in - the header value, it is returned as an empty string in the - tuple. - - Return None if range_header is None - Return () if range_header does not conform to the range spec - pattern. - - """ - global _rangere - if range_header is None: return None - if _rangere is None: - import re - _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)') - match = _rangere.match(range_header) - if match: - tup = range_tuple_normalize(match.group(1,2)) - if tup and tup[1]: - tup = (tup[0],tup[1]+1) - return tup - return () - -def range_tuple_to_header(range_tup): - """Convert a range tuple to a Range header value. - Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None - if no range is needed. - """ - if range_tup is None: return None - range_tup = range_tuple_normalize(range_tup) - if range_tup: - if range_tup[1]: - range_tup = (range_tup[0],range_tup[1] - 1) - return 'bytes=%s-%s' % range_tup - -def range_tuple_normalize(range_tup): - """Normalize a (first_byte,last_byte) range tuple. - Return a tuple whose first element is guaranteed to be an int - and whose second element will be '' (meaning: the last byte) or - an int. Finally, return None if the normalized tuple == (0,'') - as that is equivelant to retrieving the entire file. - """ - if range_tup is None: return None - # handle first byte - fb = range_tup[0] - if fb in (None,''): fb = 0 - else: fb = int(fb) - # handle last byte - try: lb = range_tup[1] - except IndexError: lb = '' - else: - if lb is None: lb = '' - elif lb != '': lb = int(lb) - # check if range is over the entire file - if (fb,lb) == (0,''): return None - # check that the range is valid - if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb)) - return (fb,lb) - diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py deleted file mode 100644 index fefdab36f6..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py +++ /dev/null @@ -1,1477 +0,0 @@ -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., -# 59 Temple Place, Suite 330, -# Boston, MA 02111-1307 USA - -# This file is part of urlgrabber, a high-level cross-protocol url-grabber -# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko - -"""A high-level cross-protocol url-grabber. - -GENERAL ARGUMENTS (kwargs) - - Where possible, the module-level default is indicated, and legal - values are provided. - - copy_local = 0 [0|1] - - ignored except for file:// urls, in which case it specifies - whether urlgrab should still make a copy of the file, or simply - point to the existing copy. The module level default for this - option is 0. - - close_connection = 0 [0|1] - - tells URLGrabber to close the connection after a file has been - transfered. This is ignored unless the download happens with the - http keepalive handler (keepalive=1). Otherwise, the connection - is left open for further use. The module level default for this - option is 0 (keepalive connections will not be closed). - - keepalive = 1 [0|1] - - specifies whether keepalive should be used for HTTP/1.1 servers - that support it. The module level default for this option is 1 - (keepalive is enabled). - - progress_obj = None - - a class instance that supports the following methods: - po.start(filename, url, basename, length, text) - # length will be None if unknown - po.update(read) # read == bytes read so far - po.end() - - text = None - - specifies an alternativ text item in the beginning of the progress - bar line. If not given, the basename of the file is used. - - throttle = 1.0 - - a number - if it's an int, it's the bytes/second throttle limit. - If it's a float, it is first multiplied by bandwidth. If throttle - == 0, throttling is disabled. If None, the module-level default - (which can be set on default_grabber.throttle) is used. See - BANDWIDTH THROTTLING for more information. - - timeout = None - - a positive float expressing the number of seconds to wait for socket - operations. If the value is None or 0.0, socket operations will block - forever. Setting this option causes urlgrabber to call the settimeout - method on the Socket object used for the request. See the Python - documentation on settimeout for more information. - http://www.python.org/doc/current/lib/socket-objects.html - - bandwidth = 0 - - the nominal max bandwidth in bytes/second. If throttle is a float - and bandwidth == 0, throttling is disabled. If None, the - module-level default (which can be set on - default_grabber.bandwidth) is used. See BANDWIDTH THROTTLING for - more information. - - range = None - - a tuple of the form (first_byte, last_byte) describing a byte - range to retrieve. Either or both of the values may set to - None. If first_byte is None, byte offset 0 is assumed. If - last_byte is None, the last byte available is assumed. Note that - the range specification is python-like in that (0,10) will yeild - the first 10 bytes of the file. - - If set to None, no range will be used. - - reget = None [None|'simple'|'check_timestamp'] - - whether to attempt to reget a partially-downloaded file. Reget - only applies to .urlgrab and (obviously) only if there is a - partially downloaded file. Reget has two modes: - - 'simple' -- the local file will always be trusted. If there - are 100 bytes in the local file, then the download will always - begin 100 bytes into the requested file. - - 'check_timestamp' -- the timestamp of the server file will be - compared to the timestamp of the local file. ONLY if the - local file is newer than or the same age as the server file - will reget be used. If the server file is newer, or the - timestamp is not returned, the entire file will be fetched. - - NOTE: urlgrabber can do very little to verify that the partial - file on disk is identical to the beginning of the remote file. - You may want to either employ a custom "checkfunc" or simply avoid - using reget in situations where corruption is a concern. - - user_agent = 'urlgrabber/VERSION' - - a string, usually of the form 'AGENT/VERSION' that is provided to - HTTP servers in the User-agent header. The module level default - for this option is "urlgrabber/VERSION". - - http_headers = None - - a tuple of 2-tuples, each containing a header and value. These - will be used for http and https requests only. For example, you - can do - http_headers = (('Pragma', 'no-cache'),) - - ftp_headers = None - - this is just like http_headers, but will be used for ftp requests. - - proxies = None - - a dictionary that maps protocol schemes to proxy hosts. For - example, to use a proxy server on host "foo" port 3128 for http - and https URLs: - proxies={ 'http' : 'http://foo:3128', 'https' : 'http://foo:3128' } - note that proxy authentication information may be provided using - normal URL constructs: - proxies={ 'http' : 'http://user:host@foo:3128' } - Lastly, if proxies is None, the default environment settings will - be used. - - prefix = None - - a url prefix that will be prepended to all requested urls. For - example: - g = URLGrabber(prefix='http://foo.com/mirror/') - g.urlgrab('some/file.txt') - ## this will fetch 'http://foo.com/mirror/some/file.txt' - This option exists primarily to allow identical behavior to - MirrorGroup (and derived) instances. Note: a '/' will be inserted - if necessary, so you cannot specify a prefix that ends with a - partial file or directory name. - - opener = None - - Overrides the default urllib2.OpenerDirector provided to urllib2 - when making requests. This option exists so that the urllib2 - handler chain may be customized. Note that the range, reget, - proxy, and keepalive features require that custom handlers be - provided to urllib2 in order to function properly. If an opener - option is provided, no attempt is made by urlgrabber to ensure - chain integrity. You are responsible for ensuring that any - extension handlers are present if said features are required. - - data = None - - Only relevant for the HTTP family (and ignored for other - protocols), this allows HTTP POSTs. When the data kwarg is - present (and not None), an HTTP request will automatically become - a POST rather than GET. This is done by direct passthrough to - urllib2. If you use this, you may also want to set the - 'Content-length' and 'Content-type' headers with the http_headers - option. Note that python 2.2 handles the case of these - badly and if you do not use the proper case (shown here), your - values will be overridden with the defaults. - - -RETRY RELATED ARGUMENTS - - retry = None - - the number of times to retry the grab before bailing. If this is - zero, it will retry forever. This was intentional... really, it - was :). If this value is not supplied or is supplied but is None - retrying does not occur. - - retrycodes = [-1,2,4,5,6,7] - - a sequence of errorcodes (values of e.errno) for which it should - retry. See the doc on URLGrabError for more details on this. You - might consider modifying a copy of the default codes rather than - building yours from scratch so that if the list is extended in the - future (or one code is split into two) you can still enjoy the - benefits of the default list. You can do that with something like - this: - - retrycodes = urlgrabber.grabber.URLGrabberOptions().retrycodes - if 12 not in retrycodes: - retrycodes.append(12) - - checkfunc = None - - a function to do additional checks. This defaults to None, which - means no additional checking. The function should simply return - on a successful check. It should raise URLGrabError on an - unsuccessful check. Raising of any other exception will be - considered immediate failure and no retries will occur. - - If it raises URLGrabError, the error code will determine the retry - behavior. Negative error numbers are reserved for use by these - passed in functions, so you can use many negative numbers for - different types of failure. By default, -1 results in a retry, - but this can be customized with retrycodes. - - If you simply pass in a function, it will be given exactly one - argument: a CallbackObject instance with the .url attribute - defined and either .filename (for urlgrab) or .data (for urlread). - For urlgrab, .filename is the name of the local file. For - urlread, .data is the actual string data. If you need other - arguments passed to the callback (program state of some sort), you - can do so like this: - - checkfunc=(function, ('arg1', 2), {'kwarg': 3}) - - if the downloaded file has filename /tmp/stuff, then this will - result in this call (for urlgrab): - - function(obj, 'arg1', 2, kwarg=3) - # obj.filename = '/tmp/stuff' - # obj.url = 'http://foo.com/stuff' - - NOTE: both the "args" tuple and "kwargs" dict must be present if - you use this syntax, but either (or both) can be empty. - - failure_callback = None - - The callback that gets called during retries when an attempt to - fetch a file fails. The syntax for specifying the callback is - identical to checkfunc, except for the attributes defined in the - CallbackObject instance. The attributes for failure_callback are: - - exception = the raised exception - url = the url we're trying to fetch - tries = the number of tries so far (including this one) - retry = the value of the retry option - - The callback is present primarily to inform the calling program of - the failure, but if it raises an exception (including the one it's - passed) that exception will NOT be caught and will therefore cause - future retries to be aborted. - - The callback is called for EVERY failure, including the last one. - On the last try, the callback can raise an alternate exception, - but it cannot (without severe trickiness) prevent the exception - from being raised. - - interrupt_callback = None - - This callback is called if KeyboardInterrupt is received at any - point in the transfer. Basically, this callback can have three - impacts on the fetch process based on the way it exits: - - 1) raise no exception: the current fetch will be aborted, but - any further retries will still take place - - 2) raise a URLGrabError: if you're using a MirrorGroup, then - this will prompt a failover to the next mirror according to - the behavior of the MirrorGroup subclass. It is recommended - that you raise URLGrabError with code 15, 'user abort'. If - you are NOT using a MirrorGroup subclass, then this is the - same as (3). - - 3) raise some other exception (such as KeyboardInterrupt), which - will not be caught at either the grabber or mirror levels. - That is, it will be raised up all the way to the caller. - - This callback is very similar to failure_callback. They are - passed the same arguments, so you could use the same function for - both. - - urlparser = URLParser() - - The URLParser class handles pre-processing of URLs, including - auth-handling for user/pass encoded in http urls, file handing - (that is, filenames not sent as a URL), and URL quoting. If you - want to override any of this behavior, you can pass in a - replacement instance. See also the 'quote' option. - - quote = None - - Whether or not to quote the path portion of a url. - quote = 1 -> quote the URLs (they're not quoted yet) - quote = 0 -> do not quote them (they're already quoted) - quote = None -> guess what to do - - This option only affects proper urls like 'file:///etc/passwd'; it - does not affect 'raw' filenames like '/etc/passwd'. The latter - will always be quoted as they are converted to URLs. Also, only - the path part of a url is quoted. If you need more fine-grained - control, you should probably subclass URLParser and pass it in via - the 'urlparser' option. - -BANDWIDTH THROTTLING - - urlgrabber supports throttling via two values: throttle and - bandwidth Between the two, you can either specify and absolute - throttle threshold or specify a theshold as a fraction of maximum - available bandwidth. - - throttle is a number - if it's an int, it's the bytes/second - throttle limit. If it's a float, it is first multiplied by - bandwidth. If throttle == 0, throttling is disabled. If None, the - module-level default (which can be set with set_throttle) is used. - - bandwidth is the nominal max bandwidth in bytes/second. If throttle - is a float and bandwidth == 0, throttling is disabled. If None, the - module-level default (which can be set with set_bandwidth) is used. - - THROTTLING EXAMPLES: - - Lets say you have a 100 Mbps connection. This is (about) 10^8 bits - per second, or 12,500,000 Bytes per second. You have a number of - throttling options: - - *) set_bandwidth(12500000); set_throttle(0.5) # throttle is a float - - This will limit urlgrab to use half of your available bandwidth. - - *) set_throttle(6250000) # throttle is an int - - This will also limit urlgrab to use half of your available - bandwidth, regardless of what bandwidth is set to. - - *) set_throttle(6250000); set_throttle(1.0) # float - - Use half your bandwidth - - *) set_throttle(6250000); set_throttle(2.0) # float - - Use up to 12,500,000 Bytes per second (your nominal max bandwidth) - - *) set_throttle(6250000); set_throttle(0) # throttle = 0 - - Disable throttling - this is more efficient than a very large - throttle setting. - - *) set_throttle(0); set_throttle(1.0) # throttle is float, bandwidth = 0 - - Disable throttling - this is the default when the module is loaded. - - SUGGESTED AUTHOR IMPLEMENTATION (THROTTLING) - - While this is flexible, it's not extremely obvious to the user. I - suggest you implement a float throttle as a percent to make the - distinction between absolute and relative throttling very explicit. - - Also, you may want to convert the units to something more convenient - than bytes/second, such as kbps or kB/s, etc. - -""" - -# $Id: grabber.py,v 1.48 2006/09/22 00:58:05 mstenner Exp $ - -import os -import os.path -import sys -import urlparse -import rfc822 -import time -import string -import urllib -import urllib2 -from stat import * # S_* and ST_* - -######################################################################## -# MODULE INITIALIZATION -######################################################################## -try: - exec('from ' + (__name__.split('.'))[0] + ' import __version__') -except: - __version__ = '???' - -import sslfactory - -auth_handler = urllib2.HTTPBasicAuthHandler( \ - urllib2.HTTPPasswordMgrWithDefaultRealm()) - -try: - from i18n import _ -except ImportError, msg: - def _(st): return st - -try: - from httplib import HTTPException -except ImportError, msg: - HTTPException = None - -try: - # This is a convenient way to make keepalive optional. - # Just rename the module so it can't be imported. - import keepalive - from keepalive import HTTPHandler, HTTPSHandler - have_keepalive = True -except ImportError, msg: - have_keepalive = False - -try: - # add in range support conditionally too - import byterange - from byterange import HTTPRangeHandler, HTTPSRangeHandler, \ - FileRangeHandler, FTPRangeHandler, range_tuple_normalize, \ - range_tuple_to_header, RangeError -except ImportError, msg: - range_handlers = () - RangeError = None - have_range = 0 -else: - range_handlers = (HTTPRangeHandler(), HTTPSRangeHandler(), - FileRangeHandler(), FTPRangeHandler()) - have_range = 1 - - -# check whether socket timeout support is available (Python >= 2.3) -import socket -try: - TimeoutError = socket.timeout - have_socket_timeout = True -except AttributeError: - TimeoutError = None - have_socket_timeout = False - -######################################################################## -# functions for debugging output. These functions are here because they -# are also part of the module initialization. -DEBUG = None -def set_logger(DBOBJ): - """Set the DEBUG object. This is called by _init_default_logger when - the environment variable URLGRABBER_DEBUG is set, but can also be - called by a calling program. Basically, if the calling program uses - the logging module and would like to incorporate urlgrabber logging, - then it can do so this way. It's probably not necessary as most - internal logging is only for debugging purposes. - - The passed-in object should be a logging.Logger instance. It will - be pushed into the keepalive and byterange modules if they're - being used. The mirror module pulls this object in on import, so - you will need to manually push into it. In fact, you may find it - tidier to simply push your logging object (or objects) into each - of these modules independently. - """ - - global DEBUG - DEBUG = DBOBJ - if have_keepalive and keepalive.DEBUG is None: - keepalive.DEBUG = DBOBJ - if have_range and byterange.DEBUG is None: - byterange.DEBUG = DBOBJ - if sslfactory.DEBUG is None: - sslfactory.DEBUG = DBOBJ - -def _init_default_logger(): - '''Examines the environment variable URLGRABBER_DEBUG and creates - a logging object (logging.logger) based on the contents. It takes - the form - - URLGRABBER_DEBUG=level,filename - - where "level" can be either an integer or a log level from the - logging module (DEBUG, INFO, etc). If the integer is zero or - less, logging will be disabled. Filename is the filename where - logs will be sent. If it is "-", then stdout will be used. If - the filename is empty or missing, stderr will be used. If the - variable cannot be processed or the logging module cannot be - imported (python < 2.3) then logging will be disabled. Here are - some examples: - - URLGRABBER_DEBUG=1,debug.txt # log everything to debug.txt - URLGRABBER_DEBUG=WARNING,- # log warning and higher to stdout - URLGRABBER_DEBUG=INFO # log info and higher to stderr - - This funtion is called during module initialization. It is not - intended to be called from outside. The only reason it is a - function at all is to keep the module-level namespace tidy and to - collect the code into a nice block.''' - - try: - dbinfo = os.environ['URLGRABBER_DEBUG'].split(',') - import logging - level = logging._levelNames.get(dbinfo[0], int(dbinfo[0])) - if level < 1: raise ValueError() - - formatter = logging.Formatter('%(asctime)s %(message)s') - if len(dbinfo) > 1: filename = dbinfo[1] - else: filename = '' - if filename == '': handler = logging.StreamHandler(sys.stderr) - elif filename == '-': handler = logging.StreamHandler(sys.stdout) - else: handler = logging.FileHandler(filename) - handler.setFormatter(formatter) - DBOBJ = logging.getLogger('urlgrabber') - DBOBJ.addHandler(handler) - DBOBJ.setLevel(level) - except (KeyError, ImportError, ValueError): - DBOBJ = None - set_logger(DBOBJ) - -_init_default_logger() -######################################################################## -# END MODULE INITIALIZATION -######################################################################## - - - -class URLGrabError(IOError): - """ - URLGrabError error codes: - - URLGrabber error codes (0 -- 255) - 0 - everything looks good (you should never see this) - 1 - malformed url - 2 - local file doesn't exist - 3 - request for non-file local file (dir, etc) - 4 - IOError on fetch - 5 - OSError on fetch - 6 - no content length header when we expected one - 7 - HTTPException - 8 - Exceeded read limit (for urlread) - 9 - Requested byte range not satisfiable. - 10 - Byte range requested, but range support unavailable - 11 - Illegal reget mode - 12 - Socket timeout - 13 - malformed proxy url - 14 - HTTPError (includes .code and .exception attributes) - 15 - user abort - - MirrorGroup error codes (256 -- 511) - 256 - No more mirrors left to try - - Custom (non-builtin) classes derived from MirrorGroup (512 -- 767) - [ this range reserved for application-specific error codes ] - - Retry codes (< 0) - -1 - retry the download, unknown reason - - Note: to test which group a code is in, you can simply do integer - division by 256: e.errno / 256 - - Negative codes are reserved for use by functions passed in to - retrygrab with checkfunc. The value -1 is built in as a generic - retry code and is already included in the retrycodes list. - Therefore, you can create a custom check function that simply - returns -1 and the fetch will be re-tried. For more customized - retries, you can use other negative number and include them in - retry-codes. This is nice for outputting useful messages about - what failed. - - You can use these error codes like so: - try: urlgrab(url) - except URLGrabError, e: - if e.errno == 3: ... - # or - print e.strerror - # or simply - print e #### print '[Errno %i] %s' % (e.errno, e.strerror) - """ - pass - -class CallbackObject: - """Container for returned callback data. - - This is currently a dummy class into which urlgrabber can stuff - information for passing to callbacks. This way, the prototype for - all callbacks is the same, regardless of the data that will be - passed back. Any function that accepts a callback function as an - argument SHOULD document what it will define in this object. - - It is possible that this class will have some greater - functionality in the future. - """ - def __init__(self, **kwargs): - self.__dict__.update(kwargs) - -def urlgrab(url, filename=None, **kwargs): - """grab the file at <url> and make a local copy at <filename> - If filename is none, the basename of the url is used. - urlgrab returns the filename of the local file, which may be different - from the passed-in filename if the copy_local kwarg == 0. - - See module documentation for a description of possible kwargs. - """ - return default_grabber.urlgrab(url, filename, **kwargs) - -def urlopen(url, **kwargs): - """open the url and return a file object - If a progress object or throttle specifications exist, then - a special file object will be returned that supports them. - The file object can be treated like any other file object. - - See module documentation for a description of possible kwargs. - """ - return default_grabber.urlopen(url, **kwargs) - -def urlread(url, limit=None, **kwargs): - """read the url into a string, up to 'limit' bytes - If the limit is exceeded, an exception will be thrown. Note that urlread - is NOT intended to be used as a way of saying "I want the first N bytes" - but rather 'read the whole file into memory, but don't use too much' - - See module documentation for a description of possible kwargs. - """ - return default_grabber.urlread(url, limit, **kwargs) - - -class URLParser: - """Process the URLs before passing them to urllib2. - - This class does several things: - - * add any prefix - * translate a "raw" file to a proper file: url - * handle any http or https auth that's encoded within the url - * quote the url - - Only the "parse" method is called directly, and it calls sub-methods. - - An instance of this class is held in the options object, which - means that it's easy to change the behavior by sub-classing and - passing the replacement in. It need only have a method like: - - url, parts = urlparser.parse(url, opts) - """ - - def parse(self, url, opts): - """parse the url and return the (modified) url and its parts - - Note: a raw file WILL be quoted when it's converted to a URL. - However, other urls (ones which come with a proper scheme) may - or may not be quoted according to opts.quote - - opts.quote = 1 --> quote it - opts.quote = 0 --> do not quote it - opts.quote = None --> guess - """ - quote = opts.quote - - if opts.prefix: - url = self.add_prefix(url, opts.prefix) - - parts = urlparse.urlparse(url) - (scheme, host, path, parm, query, frag) = parts - - if not scheme or (len(scheme) == 1 and scheme in string.letters): - # if a scheme isn't specified, we guess that it's "file:" - if url[0] not in '/\\': url = os.path.abspath(url) - url = 'file:' + urllib.pathname2url(url) - parts = urlparse.urlparse(url) - quote = 0 # pathname2url quotes, so we won't do it again - - if scheme in ['http', 'https']: - parts = self.process_http(parts) - - if quote is None: - quote = self.guess_should_quote(parts) - if quote: - parts = self.quote(parts) - - url = urlparse.urlunparse(parts) - return url, parts - - def add_prefix(self, url, prefix): - if prefix[-1] == '/' or url[0] == '/': - url = prefix + url - else: - url = prefix + '/' + url - return url - - def process_http(self, parts): - (scheme, host, path, parm, query, frag) = parts - - if '@' in host and auth_handler: - try: - user_pass, host = host.split('@', 1) - if ':' in user_pass: - user, password = user_pass.split(':', 1) - except ValueError, e: - raise URLGrabError(1, _('Bad URL: %s') % url) - if DEBUG: DEBUG.info('adding HTTP auth: %s, XXXXXXXX', user) - auth_handler.add_password(None, host, user, password) - - return (scheme, host, path, parm, query, frag) - - def quote(self, parts): - """quote the URL - - This method quotes ONLY the path part. If you need to quote - other parts, you should override this and pass in your derived - class. The other alternative is to quote other parts before - passing into urlgrabber. - """ - (scheme, host, path, parm, query, frag) = parts - path = urllib.quote(path) - return (scheme, host, path, parm, query, frag) - - hexvals = '0123456789ABCDEF' - def guess_should_quote(self, parts): - """ - Guess whether we should quote a path. This amounts to - guessing whether it's already quoted. - - find ' ' -> 1 - find '%' -> 1 - find '%XX' -> 0 - else -> 1 - """ - (scheme, host, path, parm, query, frag) = parts - if ' ' in path: - return 1 - ind = string.find(path, '%') - if ind > -1: - while ind > -1: - if len(path) < ind+3: - return 1 - code = path[ind+1:ind+3].upper() - if code[0] not in self.hexvals or \ - code[1] not in self.hexvals: - return 1 - ind = string.find(path, '%', ind+1) - return 0 - return 1 - -class URLGrabberOptions: - """Class to ease kwargs handling.""" - - def __init__(self, delegate=None, **kwargs): - """Initialize URLGrabberOptions object. - Set default values for all options and then update options specified - in kwargs. - """ - self.delegate = delegate - if delegate is None: - self._set_defaults() - self._set_attributes(**kwargs) - - def __getattr__(self, name): - if self.delegate and hasattr(self.delegate, name): - return getattr(self.delegate, name) - raise AttributeError, name - - def raw_throttle(self): - """Calculate raw throttle value from throttle and bandwidth - values. - """ - if self.throttle <= 0: - return 0 - elif type(self.throttle) == type(0): - return float(self.throttle) - else: # throttle is a float - return self.bandwidth * self.throttle - - def derive(self, **kwargs): - """Create a derived URLGrabberOptions instance. - This method creates a new instance and overrides the - options specified in kwargs. - """ - return URLGrabberOptions(delegate=self, **kwargs) - - def _set_attributes(self, **kwargs): - """Update object attributes with those provided in kwargs.""" - self.__dict__.update(kwargs) - if have_range and kwargs.has_key('range'): - # normalize the supplied range value - self.range = range_tuple_normalize(self.range) - if not self.reget in [None, 'simple', 'check_timestamp']: - raise URLGrabError(11, _('Illegal reget mode: %s') \ - % (self.reget, )) - - def _set_defaults(self): - """Set all options to their default values. - When adding new options, make sure a default is - provided here. - """ - self.progress_obj = None - self.throttle = 1.0 - self.bandwidth = 0 - self.retry = None - self.retrycodes = [-1,2,4,5,6,7] - self.checkfunc = None - self.copy_local = 0 - self.close_connection = 0 - self.range = None - self.user_agent = 'urlgrabber/%s' % __version__ - self.keepalive = 1 - self.proxies = None - self.reget = None - self.failure_callback = None - self.interrupt_callback = None - self.prefix = None - self.opener = None - self.cache_openers = True - self.timeout = None - self.text = None - self.http_headers = None - self.ftp_headers = None - self.data = None - self.urlparser = URLParser() - self.quote = None - self.ssl_ca_cert = None - self.ssl_context = None - -class URLGrabber: - """Provides easy opening of URLs with a variety of options. - - All options are specified as kwargs. Options may be specified when - the class is created and may be overridden on a per request basis. - - New objects inherit default values from default_grabber. - """ - - def __init__(self, **kwargs): - self.opts = URLGrabberOptions(**kwargs) - - def _retry(self, opts, func, *args): - tries = 0 - while 1: - # there are only two ways out of this loop. The second has - # several "sub-ways" - # 1) via the return in the "try" block - # 2) by some exception being raised - # a) an excepton is raised that we don't "except" - # b) a callback raises ANY exception - # c) we're not retry-ing or have run out of retries - # d) the URLGrabError code is not in retrycodes - # beware of infinite loops :) - tries = tries + 1 - exception = None - retrycode = None - callback = None - if DEBUG: DEBUG.info('attempt %i/%s: %s', - tries, opts.retry, args[0]) - try: - r = apply(func, (opts,) + args, {}) - if DEBUG: DEBUG.info('success') - return r - except URLGrabError, e: - exception = e - callback = opts.failure_callback - retrycode = e.errno - except KeyboardInterrupt, e: - exception = e - callback = opts.interrupt_callback - - if DEBUG: DEBUG.info('exception: %s', exception) - if callback: - if DEBUG: DEBUG.info('calling callback: %s', callback) - cb_func, cb_args, cb_kwargs = self._make_callback(callback) - obj = CallbackObject(exception=exception, url=args[0], - tries=tries, retry=opts.retry) - cb_func(obj, *cb_args, **cb_kwargs) - - if (opts.retry is None) or (tries == opts.retry): - if DEBUG: DEBUG.info('retries exceeded, re-raising') - raise - - if (retrycode is not None) and (retrycode not in opts.retrycodes): - if DEBUG: DEBUG.info('retrycode (%i) not in list %s, re-raising', - retrycode, opts.retrycodes) - raise - - def urlopen(self, url, **kwargs): - """open the url and return a file object - If a progress object or throttle value specified when this - object was created, then a special file object will be - returned that supports them. The file object can be treated - like any other file object. - """ - opts = self.opts.derive(**kwargs) - (url,parts) = opts.urlparser.parse(url, opts) - def retryfunc(opts, url): - return URLGrabberFileObject(url, filename=None, opts=opts) - return self._retry(opts, retryfunc, url) - - def urlgrab(self, url, filename=None, **kwargs): - """grab the file at <url> and make a local copy at <filename> - If filename is none, the basename of the url is used. - urlgrab returns the filename of the local file, which may be - different from the passed-in filename if copy_local == 0. - """ - opts = self.opts.derive(**kwargs) - (url,parts) = opts.urlparser.parse(url, opts) - (scheme, host, path, parm, query, frag) = parts - if filename is None: - filename = os.path.basename( urllib.unquote(path) ) - if scheme == 'file' and not opts.copy_local: - # just return the name of the local file - don't make a - # copy currently - path = urllib.url2pathname(path) - if host: - path = os.path.normpath('//' + host + path) - if not os.path.exists(path): - raise URLGrabError(2, - _('Local file does not exist: %s') % (path, )) - elif not os.path.isfile(path): - raise URLGrabError(3, - _('Not a normal file: %s') % (path, )) - elif not opts.range: - return path - - def retryfunc(opts, url, filename): - fo = URLGrabberFileObject(url, filename, opts) - try: - fo._do_grab() - if not opts.checkfunc is None: - cb_func, cb_args, cb_kwargs = \ - self._make_callback(opts.checkfunc) - obj = CallbackObject() - obj.filename = filename - obj.url = url - apply(cb_func, (obj, )+cb_args, cb_kwargs) - finally: - fo.close() - return filename - - return self._retry(opts, retryfunc, url, filename) - - def urlread(self, url, limit=None, **kwargs): - """read the url into a string, up to 'limit' bytes - If the limit is exceeded, an exception will be thrown. Note - that urlread is NOT intended to be used as a way of saying - "I want the first N bytes" but rather 'read the whole file - into memory, but don't use too much' - """ - opts = self.opts.derive(**kwargs) - (url,parts) = opts.urlparser.parse(url, opts) - if limit is not None: - limit = limit + 1 - - def retryfunc(opts, url, limit): - fo = URLGrabberFileObject(url, filename=None, opts=opts) - s = '' - try: - # this is an unfortunate thing. Some file-like objects - # have a default "limit" of None, while the built-in (real) - # file objects have -1. They each break the other, so for - # now, we just force the default if necessary. - if limit is None: s = fo.read() - else: s = fo.read(limit) - - if not opts.checkfunc is None: - cb_func, cb_args, cb_kwargs = \ - self._make_callback(opts.checkfunc) - obj = CallbackObject() - obj.data = s - obj.url = url - apply(cb_func, (obj, )+cb_args, cb_kwargs) - finally: - fo.close() - return s - - s = self._retry(opts, retryfunc, url, limit) - if limit and len(s) > limit: - raise URLGrabError(8, - _('Exceeded limit (%i): %s') % (limit, url)) - return s - - def _make_callback(self, callback_obj): - if callable(callback_obj): - return callback_obj, (), {} - else: - return callback_obj - -# create the default URLGrabber used by urlXXX functions. -# NOTE: actual defaults are set in URLGrabberOptions -default_grabber = URLGrabber() - -class URLGrabberFileObject: - """This is a file-object wrapper that supports progress objects - and throttling. - - This exists to solve the following problem: lets say you want to - drop-in replace a normal open with urlopen. You want to use a - progress meter and/or throttling, but how do you do that without - rewriting your code? Answer: urlopen will return a wrapped file - object that does the progress meter and-or throttling internally. - """ - - def __init__(self, url, filename, opts): - self.url = url - self.filename = filename - self.opts = opts - self.fo = None - self._rbuf = '' - self._rbufsize = 1024*8 - self._ttime = time.time() - self._tsize = 0 - self._amount_read = 0 - self._opener = None - self._do_open() - - def __getattr__(self, name): - """This effectively allows us to wrap at the instance level. - Any attribute not found in _this_ object will be searched for - in self.fo. This includes methods.""" - if hasattr(self.fo, name): - return getattr(self.fo, name) - raise AttributeError, name - - def _get_opener(self): - """Build a urllib2 OpenerDirector based on request options.""" - if self.opts.opener: - return self.opts.opener - elif self._opener is None: - handlers = [] - need_keepalive_handler = (have_keepalive and self.opts.keepalive) - need_range_handler = (range_handlers and \ - (self.opts.range or self.opts.reget)) - # if you specify a ProxyHandler when creating the opener - # it _must_ come before all other handlers in the list or urllib2 - # chokes. - if self.opts.proxies: - handlers.append( CachedProxyHandler(self.opts.proxies) ) - - # ------------------------------------------------------- - # OK, these next few lines are a serious kludge to get - # around what I think is a bug in python 2.2's - # urllib2. The basic idea is that default handlers - # get applied first. If you override one (like a - # proxy handler), then the default gets pulled, but - # the replacement goes on the end. In the case of - # proxies, this means the normal handler picks it up - # first and the proxy isn't used. Now, this probably - # only happened with ftp or non-keepalive http, so not - # many folks saw it. The simple approach to fixing it - # is just to make sure you override the other - # conflicting defaults as well. I would LOVE to see - # these go way or be dealt with more elegantly. The - # problem isn't there after 2.2. -MDS 2005/02/24 - if not need_keepalive_handler: - handlers.append( urllib2.HTTPHandler() ) - if not need_range_handler: - handlers.append( urllib2.FTPHandler() ) - # ------------------------------------------------------- - - ssl_factory = sslfactory.get_factory(self.opts.ssl_ca_cert, - self.opts.ssl_context) - - if need_keepalive_handler: - handlers.append(HTTPHandler()) - handlers.append(HTTPSHandler(ssl_factory)) - if need_range_handler: - handlers.extend( range_handlers ) - handlers.append( auth_handler ) - if self.opts.cache_openers: - self._opener = CachedOpenerDirector(ssl_factory, *handlers) - else: - self._opener = ssl_factory.create_opener(*handlers) - # OK, I don't like to do this, but otherwise, we end up with - # TWO user-agent headers. - self._opener.addheaders = [] - return self._opener - - def _do_open(self): - opener = self._get_opener() - - req = urllib2.Request(self.url, self.opts.data) # build request object - self._add_headers(req) # add misc headers that we need - self._build_range(req) # take care of reget and byterange stuff - - fo, hdr = self._make_request(req, opener) - if self.reget_time and self.opts.reget == 'check_timestamp': - # do this if we have a local file with known timestamp AND - # we're in check_timestamp reget mode. - fetch_again = 0 - try: - modified_tuple = hdr.getdate_tz('last-modified') - modified_stamp = rfc822.mktime_tz(modified_tuple) - if modified_stamp > self.reget_time: fetch_again = 1 - except (TypeError,): - fetch_again = 1 - - if fetch_again: - # the server version is newer than the (incomplete) local - # version, so we should abandon the version we're getting - # and fetch the whole thing again. - fo.close() - self.opts.reget = None - del req.headers['Range'] - self._build_range(req) - fo, hdr = self._make_request(req, opener) - - (scheme, host, path, parm, query, frag) = urlparse.urlparse(self.url) - path = urllib.unquote(path) - if not (self.opts.progress_obj or self.opts.raw_throttle() \ - or self.opts.timeout): - # if we're not using the progress_obj, throttling, or timeout - # we can get a performance boost by going directly to - # the underlying fileobject for reads. - self.read = fo.read - if hasattr(fo, 'readline'): - self.readline = fo.readline - elif self.opts.progress_obj: - try: - length = int(hdr['Content-Length']) - length = length + self._amount_read # Account for regets - except (KeyError, ValueError, TypeError): - length = None - - self.opts.progress_obj.start(str(self.filename), - urllib.unquote(self.url), - os.path.basename(path), - length, text=self.opts.text) - self.opts.progress_obj.update(0) - (self.fo, self.hdr) = (fo, hdr) - - def _add_headers(self, req): - if self.opts.user_agent: - req.add_header('User-agent', self.opts.user_agent) - try: req_type = req.get_type() - except ValueError: req_type = None - if self.opts.http_headers and req_type in ('http', 'https'): - for h, v in self.opts.http_headers: - req.add_header(h, v) - if self.opts.ftp_headers and req_type == 'ftp': - for h, v in self.opts.ftp_headers: - req.add_header(h, v) - - def _build_range(self, req): - self.reget_time = None - self.append = 0 - reget_length = 0 - rt = None - if have_range and self.opts.reget and type(self.filename) == type(''): - # we have reget turned on and we're dumping to a file - try: - s = os.stat(self.filename) - except OSError: - pass - else: - self.reget_time = s[ST_MTIME] - reget_length = s[ST_SIZE] - - # Set initial length when regetting - self._amount_read = reget_length - - rt = reget_length, '' - self.append = 1 - - if self.opts.range: - if not have_range: - raise URLGrabError(10, _('Byte range requested but range '\ - 'support unavailable')) - rt = self.opts.range - if rt[0]: rt = (rt[0] + reget_length, rt[1]) - - if rt: - header = range_tuple_to_header(rt) - if header: req.add_header('Range', header) - - def _make_request(self, req, opener): - try: - if have_socket_timeout and self.opts.timeout: - old_to = socket.getdefaulttimeout() - socket.setdefaulttimeout(self.opts.timeout) - try: - fo = opener.open(req) - finally: - socket.setdefaulttimeout(old_to) - else: - fo = opener.open(req) - hdr = fo.info() - except ValueError, e: - raise URLGrabError(1, _('Bad URL: %s') % (e, )) - except RangeError, e: - raise URLGrabError(9, str(e)) - except urllib2.HTTPError, e: - new_e = URLGrabError(14, str(e)) - new_e.code = e.code - new_e.exception = e - raise new_e - except IOError, e: - if hasattr(e, 'reason') and have_socket_timeout and \ - isinstance(e.reason, TimeoutError): - raise URLGrabError(12, _('Timeout: %s') % (e, )) - else: - raise URLGrabError(4, _('IOError: %s') % (e, )) - except OSError, e: - raise URLGrabError(5, _('OSError: %s') % (e, )) - except HTTPException, e: - raise URLGrabError(7, _('HTTP Exception (%s): %s') % \ - (e.__class__.__name__, e)) - else: - return (fo, hdr) - - def _do_grab(self): - """dump the file to self.filename.""" - if self.append: new_fo = open(self.filename, 'ab') - else: new_fo = open(self.filename, 'wb') - bs = 1024*8 - size = 0 - - block = self.read(bs) - size = size + len(block) - while block: - new_fo.write(block) - block = self.read(bs) - size = size + len(block) - - new_fo.close() - try: - modified_tuple = self.hdr.getdate_tz('last-modified') - modified_stamp = rfc822.mktime_tz(modified_tuple) - os.utime(self.filename, (modified_stamp, modified_stamp)) - except (TypeError,), e: pass - - return size - - def _fill_buffer(self, amt=None): - """fill the buffer to contain at least 'amt' bytes by reading - from the underlying file object. If amt is None, then it will - read until it gets nothing more. It updates the progress meter - and throttles after every self._rbufsize bytes.""" - # the _rbuf test is only in this first 'if' for speed. It's not - # logically necessary - if self._rbuf and not amt is None: - L = len(self._rbuf) - if amt > L: - amt = amt - L - else: - return - - # if we've made it here, then we don't have enough in the buffer - # and we need to read more. - - buf = [self._rbuf] - bufsize = len(self._rbuf) - while amt is None or amt: - # first, delay if necessary for throttling reasons - if self.opts.raw_throttle(): - diff = self._tsize/self.opts.raw_throttle() - \ - (time.time() - self._ttime) - if diff > 0: time.sleep(diff) - self._ttime = time.time() - - # now read some data, up to self._rbufsize - if amt is None: readamount = self._rbufsize - else: readamount = min(amt, self._rbufsize) - try: - new = self.fo.read(readamount) - except socket.error, e: - raise URLGrabError(4, _('Socket Error: %s') % (e, )) - except TimeoutError, e: - raise URLGrabError(12, _('Timeout: %s') % (e, )) - except IOError, e: - raise URLGrabError(4, _('IOError: %s') %(e,)) - newsize = len(new) - if not newsize: break # no more to read - - if amt: amt = amt - newsize - buf.append(new) - bufsize = bufsize + newsize - self._tsize = newsize - self._amount_read = self._amount_read + newsize - if self.opts.progress_obj: - self.opts.progress_obj.update(self._amount_read) - - self._rbuf = string.join(buf, '') - return - - def read(self, amt=None): - self._fill_buffer(amt) - if amt is None: - s, self._rbuf = self._rbuf, '' - else: - s, self._rbuf = self._rbuf[:amt], self._rbuf[amt:] - return s - - def readline(self, limit=-1): - i = string.find(self._rbuf, '\n') - while i < 0 and not (0 < limit <= len(self._rbuf)): - L = len(self._rbuf) - self._fill_buffer(L + self._rbufsize) - if not len(self._rbuf) > L: break - i = string.find(self._rbuf, '\n', L) - - if i < 0: i = len(self._rbuf) - else: i = i+1 - if 0 <= limit < len(self._rbuf): i = limit - - s, self._rbuf = self._rbuf[:i], self._rbuf[i:] - return s - - def close(self): - if self.opts.progress_obj: - self.opts.progress_obj.end(self._amount_read) - self.fo.close() - if self.opts.close_connection: - try: self.fo.close_connection() - except: pass - -_handler_cache = [] -def CachedOpenerDirector(ssl_factory = None, *handlers): - for (cached_handlers, opener) in _handler_cache: - if cached_handlers == handlers: - for handler in opener.handlers: - handler.add_parent(opener) - return opener - if not ssl_factory: - ssl_factory = sslfactory.get_factory() - opener = ssl_factory.create_opener(*handlers) - _handler_cache.append( (handlers, opener) ) - return opener - -_proxy_cache = [] -def CachedProxyHandler(proxies): - for (pdict, handler) in _proxy_cache: - if pdict == proxies: - if DEBUG: DEBUG.debug('re-using proxy settings: %s', proxies) - break - else: - for k, v in proxies.items(): - utype, url = urllib.splittype(v) - host, other = urllib.splithost(url) - if (utype is None) or (host is None): - raise URLGrabError(13, _('Bad proxy URL: %s') % v) - - if DEBUG: DEBUG.info('creating new proxy handler: %s', proxies) - handler = urllib2.ProxyHandler(proxies) - _proxy_cache.append( (proxies, handler) ) - return handler - -##################################################################### -# DEPRECATED FUNCTIONS -def set_throttle(new_throttle): - """Deprecated. Use: default_grabber.throttle = new_throttle""" - default_grabber.throttle = new_throttle - -def set_bandwidth(new_bandwidth): - """Deprecated. Use: default_grabber.bandwidth = new_bandwidth""" - default_grabber.bandwidth = new_bandwidth - -def set_progress_obj(new_progress_obj): - """Deprecated. Use: default_grabber.progress_obj = new_progress_obj""" - default_grabber.progress_obj = new_progress_obj - -def set_user_agent(new_user_agent): - """Deprecated. Use: default_grabber.user_agent = new_user_agent""" - default_grabber.user_agent = new_user_agent - -def retrygrab(url, filename=None, copy_local=0, close_connection=0, - progress_obj=None, throttle=None, bandwidth=None, - numtries=3, retrycodes=[-1,2,4,5,6,7], checkfunc=None): - """Deprecated. Use: urlgrab() with the retry arg instead""" - kwargs = {'copy_local' : copy_local, - 'close_connection' : close_connection, - 'progress_obj' : progress_obj, - 'throttle' : throttle, - 'bandwidth' : bandwidth, - 'retry' : numtries, - 'retrycodes' : retrycodes, - 'checkfunc' : checkfunc - } - return urlgrab(url, filename, **kwargs) - - -##################################################################### -# TESTING -def _main_test(): - import sys - try: url, filename = sys.argv[1:3] - except ValueError: - print 'usage:', sys.argv[0], \ - '<url> <filename> [copy_local=0|1] [close_connection=0|1]' - sys.exit() - - kwargs = {} - for a in sys.argv[3:]: - k, v = string.split(a, '=', 1) - kwargs[k] = int(v) - - set_throttle(1.0) - set_bandwidth(32 * 1024) - print "throttle: %s, throttle bandwidth: %s B/s" % (default_grabber.throttle, - default_grabber.bandwidth) - - try: from progress import text_progress_meter - except ImportError, e: pass - else: kwargs['progress_obj'] = text_progress_meter() - - try: name = apply(urlgrab, (url, filename), kwargs) - except URLGrabError, e: print e - else: print 'LOCAL FILE:', name - - -def _retry_test(): - import sys - try: url, filename = sys.argv[1:3] - except ValueError: - print 'usage:', sys.argv[0], \ - '<url> <filename> [copy_local=0|1] [close_connection=0|1]' - sys.exit() - - kwargs = {} - for a in sys.argv[3:]: - k, v = string.split(a, '=', 1) - kwargs[k] = int(v) - - try: from progress import text_progress_meter - except ImportError, e: pass - else: kwargs['progress_obj'] = text_progress_meter() - - def cfunc(filename, hello, there='foo'): - print hello, there - import random - rnum = random.random() - if rnum < .5: - print 'forcing retry' - raise URLGrabError(-1, 'forcing retry') - if rnum < .75: - print 'forcing failure' - raise URLGrabError(-2, 'forcing immediate failure') - print 'success' - return - - kwargs['checkfunc'] = (cfunc, ('hello',), {'there':'there'}) - try: name = apply(retrygrab, (url, filename), kwargs) - except URLGrabError, e: print e - else: print 'LOCAL FILE:', name - -def _file_object_test(filename=None): - import random, cStringIO, sys - if filename is None: - filename = __file__ - print 'using file "%s" for comparisons' % filename - fo = open(filename) - s_input = fo.read() - fo.close() - - for testfunc in [_test_file_object_smallread, - _test_file_object_readall, - _test_file_object_readline, - _test_file_object_readlines]: - fo_input = cStringIO.StringIO(s_input) - fo_output = cStringIO.StringIO() - wrapper = URLGrabberFileObject(fo_input, None, 0) - print 'testing %-30s ' % testfunc.__name__, - testfunc(wrapper, fo_output) - s_output = fo_output.getvalue() - if s_output == s_input: print 'passed' - else: print 'FAILED' - -def _test_file_object_smallread(wrapper, fo_output): - while 1: - s = wrapper.read(23) - fo_output.write(s) - if not s: return - -def _test_file_object_readall(wrapper, fo_output): - s = wrapper.read() - fo_output.write(s) - -def _test_file_object_readline(wrapper, fo_output): - while 1: - s = wrapper.readline() - fo_output.write(s) - if not s: return - -def _test_file_object_readlines(wrapper, fo_output): - li = wrapper.readlines() - fo_output.write(string.join(li, '')) - -if __name__ == '__main__': - _main_test() - _retry_test() - _file_object_test('test') diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py deleted file mode 100644 index 71393e2b8d..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py +++ /dev/null @@ -1,617 +0,0 @@ -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., -# 59 Temple Place, Suite 330, -# Boston, MA 02111-1307 USA - -# This file is part of urlgrabber, a high-level cross-protocol url-grabber -# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko - -"""An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive. - ->>> import urllib2 ->>> from keepalive import HTTPHandler ->>> keepalive_handler = HTTPHandler() ->>> opener = urllib2.build_opener(keepalive_handler) ->>> urllib2.install_opener(opener) ->>> ->>> fo = urllib2.urlopen('http://www.python.org') - -If a connection to a given host is requested, and all of the existing -connections are still in use, another connection will be opened. If -the handler tries to use an existing connection but it fails in some -way, it will be closed and removed from the pool. - -To remove the handler, simply re-run build_opener with no arguments, and -install that opener. - -You can explicitly close connections by using the close_connection() -method of the returned file-like object (described below) or you can -use the handler methods: - - close_connection(host) - close_all() - open_connections() - -NOTE: using the close_connection and close_all methods of the handler -should be done with care when using multiple threads. - * there is nothing that prevents another thread from creating new - connections immediately after connections are closed - * no checks are done to prevent in-use connections from being closed - ->>> keepalive_handler.close_all() - -EXTRA ATTRIBUTES AND METHODS - - Upon a status of 200, the object returned has a few additional - attributes and methods, which should not be used if you want to - remain consistent with the normal urllib2-returned objects: - - close_connection() - close the connection to the host - readlines() - you know, readlines() - status - the return status (ie 404) - reason - english translation of status (ie 'File not found') - - If you want the best of both worlds, use this inside an - AttributeError-catching try: - - >>> try: status = fo.status - >>> except AttributeError: status = None - - Unfortunately, these are ONLY there if status == 200, so it's not - easy to distinguish between non-200 responses. The reason is that - urllib2 tries to do clever things with error codes 301, 302, 401, - and 407, and it wraps the object upon return. - - For python versions earlier than 2.4, you can avoid this fancy error - handling by setting the module-level global HANDLE_ERRORS to zero. - You see, prior to 2.4, it's the HTTP Handler's job to determine what - to handle specially, and what to just pass up. HANDLE_ERRORS == 0 - means "pass everything up". In python 2.4, however, this job no - longer belongs to the HTTP Handler and is now done by a NEW handler, - HTTPErrorProcessor. Here's the bottom line: - - python version < 2.4 - HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as - errors - HANDLE_ERRORS == 0 pass everything up, error processing is - left to the calling code - python version >= 2.4 - HANDLE_ERRORS == 1 pass up 200, treat the rest as errors - HANDLE_ERRORS == 0 (default) pass everything up, let the - other handlers (specifically, - HTTPErrorProcessor) decide what to do - - In practice, setting the variable either way makes little difference - in python 2.4, so for the most consistent behavior across versions, - you probably just want to use the defaults, which will give you - exceptions on errors. - -""" - -# $Id: keepalive.py,v 1.16 2006/09/22 00:58:05 mstenner Exp $ - -import urllib2 -import httplib -import socket -import thread - -DEBUG = None - -import sslfactory - -import sys -if sys.version_info < (2, 4): HANDLE_ERRORS = 1 -else: HANDLE_ERRORS = 0 - -class ConnectionManager: - """ - The connection manager must be able to: - * keep track of all existing - """ - def __init__(self): - self._lock = thread.allocate_lock() - self._hostmap = {} # map hosts to a list of connections - self._connmap = {} # map connections to host - self._readymap = {} # map connection to ready state - - def add(self, host, connection, ready): - self._lock.acquire() - try: - if not self._hostmap.has_key(host): self._hostmap[host] = [] - self._hostmap[host].append(connection) - self._connmap[connection] = host - self._readymap[connection] = ready - finally: - self._lock.release() - - def remove(self, connection): - self._lock.acquire() - try: - try: - host = self._connmap[connection] - except KeyError: - pass - else: - del self._connmap[connection] - del self._readymap[connection] - self._hostmap[host].remove(connection) - if not self._hostmap[host]: del self._hostmap[host] - finally: - self._lock.release() - - def set_ready(self, connection, ready): - try: self._readymap[connection] = ready - except KeyError: pass - - def get_ready_conn(self, host): - conn = None - self._lock.acquire() - try: - if self._hostmap.has_key(host): - for c in self._hostmap[host]: - if self._readymap[c]: - self._readymap[c] = 0 - conn = c - break - finally: - self._lock.release() - return conn - - def get_all(self, host=None): - if host: - return list(self._hostmap.get(host, [])) - else: - return dict(self._hostmap) - -class KeepAliveHandler: - def __init__(self): - self._cm = ConnectionManager() - - #### Connection Management - def open_connections(self): - """return a list of connected hosts and the number of connections - to each. [('foo.com:80', 2), ('bar.org', 1)]""" - return [(host, len(li)) for (host, li) in self._cm.get_all().items()] - - def close_connection(self, host): - """close connection(s) to <host> - host is the host:port spec, as in 'www.cnn.com:8080' as passed in. - no error occurs if there is no connection to that host.""" - for h in self._cm.get_all(host): - self._cm.remove(h) - h.close() - - def close_all(self): - """close all open connections""" - for host, conns in self._cm.get_all().items(): - for h in conns: - self._cm.remove(h) - h.close() - - def _request_closed(self, request, host, connection): - """tells us that this request is now closed and the the - connection is ready for another request""" - self._cm.set_ready(connection, 1) - - def _remove_connection(self, host, connection, close=0): - if close: connection.close() - self._cm.remove(connection) - - #### Transaction Execution - def do_open(self, req): - host = req.get_host() - if not host: - raise urllib2.URLError('no host given') - - try: - h = self._cm.get_ready_conn(host) - while h: - r = self._reuse_connection(h, req, host) - - # if this response is non-None, then it worked and we're - # done. Break out, skipping the else block. - if r: break - - # connection is bad - possibly closed by server - # discard it and ask for the next free connection - h.close() - self._cm.remove(h) - h = self._cm.get_ready_conn(host) - else: - # no (working) free connections were found. Create a new one. - h = self._get_connection(host) - if DEBUG: DEBUG.info("creating new connection to %s (%d)", - host, id(h)) - self._cm.add(host, h, 0) - self._start_transaction(h, req) - r = h.getresponse() - except (socket.error, httplib.HTTPException), err: - raise urllib2.URLError(err) - - # if not a persistent connection, don't try to reuse it - if r.will_close: self._cm.remove(h) - - if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason) - r._handler = self - r._host = host - r._url = req.get_full_url() - r._connection = h - r.code = r.status - r.headers = r.msg - r.msg = r.reason - - if r.status == 200 or not HANDLE_ERRORS: - return r - else: - return self.parent.error('http', req, r, - r.status, r.msg, r.headers) - - def _reuse_connection(self, h, req, host): - """start the transaction with a re-used connection - return a response object (r) upon success or None on failure. - This DOES not close or remove bad connections in cases where - it returns. However, if an unexpected exception occurs, it - will close and remove the connection before re-raising. - """ - try: - self._start_transaction(h, req) - r = h.getresponse() - # note: just because we got something back doesn't mean it - # worked. We'll check the version below, too. - except (socket.error, httplib.HTTPException): - r = None - except: - # adding this block just in case we've missed - # something we will still raise the exception, but - # lets try and close the connection and remove it - # first. We previously got into a nasty loop - # where an exception was uncaught, and so the - # connection stayed open. On the next try, the - # same exception was raised, etc. The tradeoff is - # that it's now possible this call will raise - # a DIFFERENT exception - if DEBUG: DEBUG.error("unexpected exception - closing " + \ - "connection to %s (%d)", host, id(h)) - self._cm.remove(h) - h.close() - raise - - if r is None or r.version == 9: - # httplib falls back to assuming HTTP 0.9 if it gets a - # bad header back. This is most likely to happen if - # the socket has been closed by the server since we - # last used the connection. - if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)", - host, id(h)) - r = None - else: - if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h)) - - return r - - def _start_transaction(self, h, req): - try: - if req.has_data(): - data = req.get_data() - h.putrequest('POST', req.get_selector()) - if not req.headers.has_key('Content-type'): - h.putheader('Content-type', - 'application/x-www-form-urlencoded') - if not req.headers.has_key('Content-length'): - h.putheader('Content-length', '%d' % len(data)) - else: - h.putrequest('GET', req.get_selector()) - except (socket.error, httplib.HTTPException), err: - raise urllib2.URLError(err) - - for args in self.parent.addheaders: - h.putheader(*args) - for k, v in req.headers.items(): - h.putheader(k, v) - h.endheaders() - if req.has_data(): - h.send(data) - - def _get_connection(self, host): - return NotImplementedError - -class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler): - def __init__(self): - KeepAliveHandler.__init__(self) - - def http_open(self, req): - return self.do_open(req) - - def _get_connection(self, host): - return HTTPConnection(host) - -class HTTPSHandler(KeepAliveHandler, urllib2.HTTPSHandler): - def __init__(self, ssl_factory=None): - KeepAliveHandler.__init__(self) - if not ssl_factory: - ssl_factory = sslfactory.get_factory() - self._ssl_factory = ssl_factory - - def https_open(self, req): - return self.do_open(req) - - def _get_connection(self, host): - return self._ssl_factory.get_https_connection(host) - -class HTTPResponse(httplib.HTTPResponse): - # we need to subclass HTTPResponse in order to - # 1) add readline() and readlines() methods - # 2) add close_connection() methods - # 3) add info() and geturl() methods - - # in order to add readline(), read must be modified to deal with a - # buffer. example: readline must read a buffer and then spit back - # one line at a time. The only real alternative is to read one - # BYTE at a time (ick). Once something has been read, it can't be - # put back (ok, maybe it can, but that's even uglier than this), - # so if you THEN do a normal read, you must first take stuff from - # the buffer. - - # the read method wraps the original to accomodate buffering, - # although read() never adds to the buffer. - # Both readline and readlines have been stolen with almost no - # modification from socket.py - - - def __init__(self, sock, debuglevel=0, strict=0, method=None): - if method: # the httplib in python 2.3 uses the method arg - httplib.HTTPResponse.__init__(self, sock, debuglevel, method) - else: # 2.2 doesn't - httplib.HTTPResponse.__init__(self, sock, debuglevel) - self.fileno = sock.fileno - self.code = None - self._rbuf = '' - self._rbufsize = 8096 - self._handler = None # inserted by the handler later - self._host = None # (same) - self._url = None # (same) - self._connection = None # (same) - - _raw_read = httplib.HTTPResponse.read - - def close(self): - if self.fp: - self.fp.close() - self.fp = None - if self._handler: - self._handler._request_closed(self, self._host, - self._connection) - - def close_connection(self): - self._handler._remove_connection(self._host, self._connection, close=1) - self.close() - - def info(self): - return self.headers - - def geturl(self): - return self._url - - def read(self, amt=None): - # the _rbuf test is only in this first if for speed. It's not - # logically necessary - if self._rbuf and not amt is None: - L = len(self._rbuf) - if amt > L: - amt -= L - else: - s = self._rbuf[:amt] - self._rbuf = self._rbuf[amt:] - return s - - s = self._rbuf + self._raw_read(amt) - self._rbuf = '' - return s - - def readline(self, limit=-1): - data = "" - i = self._rbuf.find('\n') - while i < 0 and not (0 < limit <= len(self._rbuf)): - new = self._raw_read(self._rbufsize) - if not new: break - i = new.find('\n') - if i >= 0: i = i + len(self._rbuf) - self._rbuf = self._rbuf + new - if i < 0: i = len(self._rbuf) - else: i = i+1 - if 0 <= limit < len(self._rbuf): i = limit - data, self._rbuf = self._rbuf[:i], self._rbuf[i:] - return data - - def readlines(self, sizehint = 0): - total = 0 - list = [] - while 1: - line = self.readline() - if not line: break - list.append(line) - total += len(line) - if sizehint and total >= sizehint: - break - return list - - -class HTTPConnection(httplib.HTTPConnection): - # use the modified response class - response_class = HTTPResponse - -class HTTPSConnection(httplib.HTTPSConnection): - response_class = HTTPResponse - -######################################################################### -##### TEST FUNCTIONS -######################################################################### - -def error_handler(url): - global HANDLE_ERRORS - orig = HANDLE_ERRORS - keepalive_handler = HTTPHandler() - opener = urllib2.build_opener(keepalive_handler) - urllib2.install_opener(opener) - pos = {0: 'off', 1: 'on'} - for i in (0, 1): - print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i) - HANDLE_ERRORS = i - try: - fo = urllib2.urlopen(url) - foo = fo.read() - fo.close() - try: status, reason = fo.status, fo.reason - except AttributeError: status, reason = None, None - except IOError, e: - print " EXCEPTION: %s" % e - raise - else: - print " status = %s, reason = %s" % (status, reason) - HANDLE_ERRORS = orig - hosts = keepalive_handler.open_connections() - print "open connections:", hosts - keepalive_handler.close_all() - -def continuity(url): - import md5 - format = '%25s: %s' - - # first fetch the file with the normal http handler - opener = urllib2.build_opener() - urllib2.install_opener(opener) - fo = urllib2.urlopen(url) - foo = fo.read() - fo.close() - m = md5.new(foo) - print format % ('normal urllib', m.hexdigest()) - - # now install the keepalive handler and try again - opener = urllib2.build_opener(HTTPHandler()) - urllib2.install_opener(opener) - - fo = urllib2.urlopen(url) - foo = fo.read() - fo.close() - m = md5.new(foo) - print format % ('keepalive read', m.hexdigest()) - - fo = urllib2.urlopen(url) - foo = '' - while 1: - f = fo.readline() - if f: foo = foo + f - else: break - fo.close() - m = md5.new(foo) - print format % ('keepalive readline', m.hexdigest()) - -def comp(N, url): - print ' making %i connections to:\n %s' % (N, url) - - sys.stdout.write(' first using the normal urllib handlers') - # first use normal opener - opener = urllib2.build_opener() - urllib2.install_opener(opener) - t1 = fetch(N, url) - print ' TIME: %.3f s' % t1 - - sys.stdout.write(' now using the keepalive handler ') - # now install the keepalive handler and try again - opener = urllib2.build_opener(HTTPHandler()) - urllib2.install_opener(opener) - t2 = fetch(N, url) - print ' TIME: %.3f s' % t2 - print ' improvement factor: %.2f' % (t1/t2, ) - -def fetch(N, url, delay=0): - import time - lens = [] - starttime = time.time() - for i in range(N): - if delay and i > 0: time.sleep(delay) - fo = urllib2.urlopen(url) - foo = fo.read() - fo.close() - lens.append(len(foo)) - diff = time.time() - starttime - - j = 0 - for i in lens[1:]: - j = j + 1 - if not i == lens[0]: - print "WARNING: inconsistent length on read %i: %i" % (j, i) - - return diff - -def test_timeout(url): - global DEBUG - dbbackup = DEBUG - class FakeLogger: - def debug(self, msg, *args): print msg % args - info = warning = error = debug - DEBUG = FakeLogger() - print " fetching the file to establish a connection" - fo = urllib2.urlopen(url) - data1 = fo.read() - fo.close() - - i = 20 - print " waiting %i seconds for the server to close the connection" % i - while i > 0: - sys.stdout.write('\r %2i' % i) - sys.stdout.flush() - time.sleep(1) - i -= 1 - sys.stderr.write('\r') - - print " fetching the file a second time" - fo = urllib2.urlopen(url) - data2 = fo.read() - fo.close() - - if data1 == data2: - print ' data are identical' - else: - print ' ERROR: DATA DIFFER' - - DEBUG = dbbackup - - -def test(url, N=10): - print "checking error hander (do this on a non-200)" - try: error_handler(url) - except IOError, e: - print "exiting - exception will prevent further tests" - sys.exit() - print - print "performing continuity test (making sure stuff isn't corrupted)" - continuity(url) - print - print "performing speed comparison" - comp(N, url) - print - print "performing dropped-connection check" - test_timeout(url) - -if __name__ == '__main__': - import time - import sys - try: - N = int(sys.argv[1]) - url = sys.argv[2] - except: - print "%s <integer> <url>" % sys.argv[0] - else: - test(url, N) diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py deleted file mode 100644 index 9664c6b5c5..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py +++ /dev/null @@ -1,458 +0,0 @@ -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., -# 59 Temple Place, Suite 330, -# Boston, MA 02111-1307 USA - -# This file is part of urlgrabber, a high-level cross-protocol url-grabber -# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko - -"""Module for downloading files from a pool of mirrors - -DESCRIPTION - - This module provides support for downloading files from a pool of - mirrors with configurable failover policies. To a large extent, the - failover policy is chosen by using different classes derived from - the main class, MirrorGroup. - - Instances of MirrorGroup (and cousins) act very much like URLGrabber - instances in that they have urlread, urlgrab, and urlopen methods. - They can therefore, be used in very similar ways. - - from urlgrabber.grabber import URLGrabber - from urlgrabber.mirror import MirrorGroup - gr = URLGrabber() - mg = MirrorGroup(gr, ['http://foo.com/some/directory/', - 'http://bar.org/maybe/somewhere/else/', - 'ftp://baz.net/some/other/place/entirely/'] - mg.urlgrab('relative/path.zip') - - The assumption is that all mirrors are identical AFTER the base urls - specified, so that any mirror can be used to fetch any file. - -FAILOVER - - The failover mechanism is designed to be customized by subclassing - from MirrorGroup to change the details of the behavior. In general, - the classes maintain a master mirror list and a "current mirror" - index. When a download is initiated, a copy of this list and index - is created for that download only. The specific failover policy - depends on the class used, and so is documented in the class - documentation. Note that ANY behavior of the class can be - overridden, so any failover policy at all is possible (although - you may need to change the interface in extreme cases). - -CUSTOMIZATION - - Most customization of a MirrorGroup object is done at instantiation - time (or via subclassing). There are four major types of - customization: - - 1) Pass in a custom urlgrabber - The passed in urlgrabber will be - used (by default... see #2) for the grabs, so options to it - apply for the url-fetching - - 2) Custom mirror list - Mirror lists can simply be a list of - stings mirrors (as shown in the example above) but each can - also be a dict, allowing for more options. For example, the - first mirror in the list above could also have been: - - {'mirror': 'http://foo.com/some/directory/', - 'grabber': <a custom grabber to be used for this mirror>, - 'kwargs': { <a dict of arguments passed to the grabber> }} - - All mirrors are converted to this format internally. If - 'grabber' is omitted, the default grabber will be used. If - kwargs are omitted, then (duh) they will not be used. - - 3) Pass keyword arguments when instantiating the mirror group. - See, for example, the failure_callback argument. - - 4) Finally, any kwargs passed in for the specific file (to the - urlgrab method, for example) will be folded in. The options - passed into the grabber's urlXXX methods will override any - options specified in a custom mirror dict. - -""" - -# $Id: mirror.py,v 1.14 2006/02/22 18:26:46 mstenner Exp $ - -import random -import thread # needed for locking to make this threadsafe - -from grabber import URLGrabError, CallbackObject, DEBUG - -try: - from i18n import _ -except ImportError, msg: - def _(st): return st - -class GrabRequest: - """This is a dummy class used to hold information about the specific - request. For example, a single file. By maintaining this information - separately, we can accomplish two things: - - 1) make it a little easier to be threadsafe - 2) have request-specific parameters - """ - pass - -class MirrorGroup: - """Base Mirror class - - Instances of this class are built with a grabber object and a list - of mirrors. Then all calls to urlXXX should be passed relative urls. - The requested file will be searched for on the first mirror. If the - grabber raises an exception (possibly after some retries) then that - mirror will be removed from the list, and the next will be attempted. - If all mirrors are exhausted, then an exception will be raised. - - MirrorGroup has the following failover policy: - - * downloads begin with the first mirror - - * by default (see default_action below) a failure (after retries) - causes it to increment the local AND master indices. Also, - the current mirror is removed from the local list (but NOT the - master list - the mirror can potentially be used for other - files) - - * if the local list is ever exhausted, a URLGrabError will be - raised (errno=256, no more mirrors) - - OPTIONS - - In addition to the required arguments "grabber" and "mirrors", - MirrorGroup also takes the following optional arguments: - - default_action - - A dict that describes the actions to be taken upon failure - (after retries). default_action can contain any of the - following keys (shown here with their default values): - - default_action = {'increment': 1, - 'increment_master': 1, - 'remove': 1, - 'remove_master': 0, - 'fail': 0} - - In this context, 'increment' means "use the next mirror" and - 'remove' means "never use this mirror again". The two - 'master' values refer to the instance-level mirror list (used - for all files), whereas the non-master values refer to the - current download only. - - The 'fail' option will cause immediate failure by re-raising - the exception and no further attempts to get the current - download. - - This dict can be set at instantiation time, - mg = MirrorGroup(grabber, mirrors, default_action={'fail':1}) - at method-execution time (only applies to current fetch), - filename = mg.urlgrab(url, default_action={'increment': 0}) - or by returning an action dict from the failure_callback - return {'fail':0} - in increasing precedence. - - If all three of these were done, the net result would be: - {'increment': 0, # set in method - 'increment_master': 1, # class default - 'remove': 1, # class default - 'remove_master': 0, # class default - 'fail': 0} # set at instantiation, reset - # from callback - - failure_callback - - this is a callback that will be called when a mirror "fails", - meaning the grabber raises some URLGrabError. If this is a - tuple, it is interpreted to be of the form (cb, args, kwargs) - where cb is the actual callable object (function, method, - etc). Otherwise, it is assumed to be the callable object - itself. The callback will be passed a grabber.CallbackObject - instance along with args and kwargs (if present). The following - attributes are defined withing the instance: - - obj.exception = < exception that was raised > - obj.mirror = < the mirror that was tried > - obj.relative_url = < url relative to the mirror > - obj.url = < full url that failed > - # .url is just the combination of .mirror - # and .relative_url - - The failure callback can return an action dict, as described - above. - - Like default_action, the failure_callback can be set at - instantiation time or when the urlXXX method is called. In - the latter case, it applies only for that fetch. - - The callback can re-raise the exception quite easily. For - example, this is a perfectly adequate callback function: - - def callback(obj): raise obj.exception - - WARNING: do not save the exception object (or the - CallbackObject instance). As they contain stack frame - references, they can lead to circular references. - - Notes: - * The behavior can be customized by deriving and overriding the - 'CONFIGURATION METHODS' - * The 'grabber' instance is kept as a reference, not copied. - Therefore, the grabber instance can be modified externally - and changes will take effect immediately. - """ - - # notes on thread-safety: - - # A GrabRequest should never be shared by multiple threads because - # it's never saved inside the MG object and never returned outside it. - # therefore, it should be safe to access/modify grabrequest data - # without a lock. However, accessing the mirrors and _next attributes - # of the MG itself must be done when locked to prevent (for example) - # removal of the wrong mirror. - - ############################################################## - # CONFIGURATION METHODS - intended to be overridden to - # customize behavior - def __init__(self, grabber, mirrors, **kwargs): - """Initialize the MirrorGroup object. - - REQUIRED ARGUMENTS - - grabber - URLGrabber instance - mirrors - a list of mirrors - - OPTIONAL ARGUMENTS - - failure_callback - callback to be used when a mirror fails - default_action - dict of failure actions - - See the module-level and class level documentation for more - details. - """ - - # OVERRIDE IDEAS: - # shuffle the list to randomize order - self.grabber = grabber - self.mirrors = self._parse_mirrors(mirrors) - self._next = 0 - self._lock = thread.allocate_lock() - self.default_action = None - self._process_kwargs(kwargs) - - # if these values are found in **kwargs passed to one of the urlXXX - # methods, they will be stripped before getting passed on to the - # grabber - options = ['default_action', 'failure_callback'] - - def _process_kwargs(self, kwargs): - self.failure_callback = kwargs.get('failure_callback') - self.default_action = kwargs.get('default_action') - - def _parse_mirrors(self, mirrors): - parsed_mirrors = [] - for m in mirrors: - if type(m) == type(''): m = {'mirror': m} - parsed_mirrors.append(m) - return parsed_mirrors - - def _load_gr(self, gr): - # OVERRIDE IDEAS: - # shuffle gr list - self._lock.acquire() - gr.mirrors = list(self.mirrors) - gr._next = self._next - self._lock.release() - - def _get_mirror(self, gr): - # OVERRIDE IDEAS: - # return a random mirror so that multiple mirrors get used - # even without failures. - if not gr.mirrors: - raise URLGrabError(256, _('No more mirrors to try.')) - return gr.mirrors[gr._next] - - def _failure(self, gr, cb_obj): - # OVERRIDE IDEAS: - # inspect the error - remove=1 for 404, remove=2 for connection - # refused, etc. (this can also be done via - # the callback) - cb = gr.kw.get('failure_callback') or self.failure_callback - if cb: - if type(cb) == type( () ): - cb, args, kwargs = cb - else: - args, kwargs = (), {} - action = cb(cb_obj, *args, **kwargs) or {} - else: - action = {} - # XXXX - decide - there are two ways to do this - # the first is action-overriding as a whole - use the entire action - # or fall back on module level defaults - #action = action or gr.kw.get('default_action') or self.default_action - # the other is to fall through for each element in the action dict - a = dict(self.default_action or {}) - a.update(gr.kw.get('default_action', {})) - a.update(action) - action = a - self.increment_mirror(gr, action) - if action and action.get('fail', 0): raise - - def increment_mirror(self, gr, action={}): - """Tell the mirror object increment the mirror index - - This increments the mirror index, which amounts to telling the - mirror object to use a different mirror (for this and future - downloads). - - This is a SEMI-public method. It will be called internally, - and you may never need to call it. However, it is provided - (and is made public) so that the calling program can increment - the mirror choice for methods like urlopen. For example, with - urlopen, there's no good way for the mirror group to know that - an error occurs mid-download (it's already returned and given - you the file object). - - remove --- can have several values - 0 do not remove the mirror from the list - 1 remove the mirror for this download only - 2 remove the mirror permanently - - beware of remove=0 as it can lead to infinite loops - """ - badmirror = gr.mirrors[gr._next] - - self._lock.acquire() - try: - ind = self.mirrors.index(badmirror) - except ValueError: - pass - else: - if action.get('remove_master', 0): - del self.mirrors[ind] - elif self._next == ind and action.get('increment_master', 1): - self._next += 1 - if self._next >= len(self.mirrors): self._next = 0 - self._lock.release() - - if action.get('remove', 1): - del gr.mirrors[gr._next] - elif action.get('increment', 1): - gr._next += 1 - if gr._next >= len(gr.mirrors): gr._next = 0 - - if DEBUG: - grm = [m['mirror'] for m in gr.mirrors] - DEBUG.info('GR mirrors: [%s] %i', ' '.join(grm), gr._next) - selfm = [m['mirror'] for m in self.mirrors] - DEBUG.info('MAIN mirrors: [%s] %i', ' '.join(selfm), self._next) - - ##################################################################### - # NON-CONFIGURATION METHODS - # these methods are designed to be largely workhorse methods that - # are not intended to be overridden. That doesn't mean you can't; - # if you want to, feel free, but most things can be done by - # by overriding the configuration methods :) - - def _join_url(self, base_url, rel_url): - if base_url.endswith('/') or rel_url.startswith('/'): - return base_url + rel_url - else: - return base_url + '/' + rel_url - - def _mirror_try(self, func, url, kw): - gr = GrabRequest() - gr.func = func - gr.url = url - gr.kw = dict(kw) - self._load_gr(gr) - - for k in self.options: - try: del kw[k] - except KeyError: pass - - while 1: - mirrorchoice = self._get_mirror(gr) - fullurl = self._join_url(mirrorchoice['mirror'], gr.url) - kwargs = dict(mirrorchoice.get('kwargs', {})) - kwargs.update(kw) - grabber = mirrorchoice.get('grabber') or self.grabber - func_ref = getattr(grabber, func) - if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl) - try: - return func_ref( *(fullurl,), **kwargs ) - except URLGrabError, e: - if DEBUG: DEBUG.info('MIRROR: failed') - obj = CallbackObject() - obj.exception = e - obj.mirror = mirrorchoice['mirror'] - obj.relative_url = gr.url - obj.url = fullurl - self._failure(gr, obj) - - def urlgrab(self, url, filename=None, **kwargs): - kw = dict(kwargs) - kw['filename'] = filename - func = 'urlgrab' - return self._mirror_try(func, url, kw) - - def urlopen(self, url, **kwargs): - kw = dict(kwargs) - func = 'urlopen' - return self._mirror_try(func, url, kw) - - def urlread(self, url, limit=None, **kwargs): - kw = dict(kwargs) - kw['limit'] = limit - func = 'urlread' - return self._mirror_try(func, url, kw) - - -class MGRandomStart(MirrorGroup): - """A mirror group that starts at a random mirror in the list. - - This behavior of this class is identical to MirrorGroup, except that - it starts at a random location in the mirror list. - """ - - def __init__(self, grabber, mirrors, **kwargs): - """Initialize the object - - The arguments for intialization are the same as for MirrorGroup - """ - MirrorGroup.__init__(self, grabber, mirrors, **kwargs) - self._next = random.randrange(len(mirrors)) - -class MGRandomOrder(MirrorGroup): - """A mirror group that uses mirrors in a random order. - - This behavior of this class is identical to MirrorGroup, except that - it uses the mirrors in a random order. Note that the order is set at - initialization time and fixed thereafter. That is, it does not pick a - random mirror after each failure. - """ - - def __init__(self, grabber, mirrors, **kwargs): - """Initialize the object - - The arguments for intialization are the same as for MirrorGroup - """ - MirrorGroup.__init__(self, grabber, mirrors, **kwargs) - random.shuffle(self.mirrors) - -if __name__ == '__main__': - pass diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/progress.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/progress.py deleted file mode 100644 index 02db524e76..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/progress.py +++ /dev/null @@ -1,530 +0,0 @@ -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., -# 59 Temple Place, Suite 330, -# Boston, MA 02111-1307 USA - -# This file is part of urlgrabber, a high-level cross-protocol url-grabber -# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko - -# $Id: progress.py,v 1.7 2005/08/19 21:59:07 mstenner Exp $ - -import sys -import time -import math -import thread - -class BaseMeter: - def __init__(self): - self.update_period = 0.3 # seconds - - self.filename = None - self.url = None - self.basename = None - self.text = None - self.size = None - self.start_time = None - self.last_amount_read = 0 - self.last_update_time = None - self.re = RateEstimator() - - def start(self, filename=None, url=None, basename=None, - size=None, now=None, text=None): - self.filename = filename - self.url = url - self.basename = basename - self.text = text - - #size = None ######### TESTING - self.size = size - if not size is None: self.fsize = format_number(size) + 'B' - - if now is None: now = time.time() - self.start_time = now - self.re.start(size, now) - self.last_amount_read = 0 - self.last_update_time = now - self._do_start(now) - - def _do_start(self, now=None): - pass - - def update(self, amount_read, now=None): - # for a real gui, you probably want to override and put a call - # to your mainloop iteration function here - if now is None: now = time.time() - if (now >= self.last_update_time + self.update_period) or \ - not self.last_update_time: - self.re.update(amount_read, now) - self.last_amount_read = amount_read - self.last_update_time = now - self._do_update(amount_read, now) - - def _do_update(self, amount_read, now=None): - pass - - def end(self, amount_read, now=None): - if now is None: now = time.time() - self.re.update(amount_read, now) - self.last_amount_read = amount_read - self.last_update_time = now - self._do_end(amount_read, now) - - def _do_end(self, amount_read, now=None): - pass - -class TextMeter(BaseMeter): - def __init__(self, fo=sys.stderr): - BaseMeter.__init__(self) - self.fo = fo - - def _do_update(self, amount_read, now=None): - etime = self.re.elapsed_time() - fetime = format_time(etime) - fread = format_number(amount_read) - #self.size = None - if self.text is not None: - text = self.text - else: - text = self.basename - if self.size is None: - out = '\r%-60.60s %5sB %s ' % \ - (text, fread, fetime) - else: - rtime = self.re.remaining_time() - frtime = format_time(rtime) - frac = self.re.fraction_read() - bar = '='*int(25 * frac) - - out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s ETA ' % \ - (text, frac*100, bar, fread, frtime) - - self.fo.write(out) - self.fo.flush() - - def _do_end(self, amount_read, now=None): - total_time = format_time(self.re.elapsed_time()) - total_size = format_number(amount_read) - if self.text is not None: - text = self.text - else: - text = self.basename - if self.size is None: - out = '\r%-60.60s %5sB %s ' % \ - (text, total_size, total_time) - else: - bar = '='*25 - out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s ' % \ - (text, 100, bar, total_size, total_time) - self.fo.write(out + '\n') - self.fo.flush() - -text_progress_meter = TextMeter - -class MultiFileHelper(BaseMeter): - def __init__(self, master): - BaseMeter.__init__(self) - self.master = master - - def _do_start(self, now): - self.master.start_meter(self, now) - - def _do_update(self, amount_read, now): - # elapsed time since last update - self.master.update_meter(self, now) - - def _do_end(self, amount_read, now): - self.ftotal_time = format_time(now - self.start_time) - self.ftotal_size = format_number(self.last_amount_read) - self.master.end_meter(self, now) - - def failure(self, message, now=None): - self.master.failure_meter(self, message, now) - - def message(self, message): - self.master.message_meter(self, message) - -class MultiFileMeter: - helperclass = MultiFileHelper - def __init__(self): - self.meters = [] - self.in_progress_meters = [] - self._lock = thread.allocate_lock() - self.update_period = 0.3 # seconds - - self.numfiles = None - self.finished_files = 0 - self.failed_files = 0 - self.open_files = 0 - self.total_size = None - self.failed_size = 0 - self.start_time = None - self.finished_file_size = 0 - self.last_update_time = None - self.re = RateEstimator() - - def start(self, numfiles=None, total_size=None, now=None): - if now is None: now = time.time() - self.numfiles = numfiles - self.finished_files = 0 - self.failed_files = 0 - self.open_files = 0 - self.total_size = total_size - self.failed_size = 0 - self.start_time = now - self.finished_file_size = 0 - self.last_update_time = now - self.re.start(total_size, now) - self._do_start(now) - - def _do_start(self, now): - pass - - def end(self, now=None): - if now is None: now = time.time() - self._do_end(now) - - def _do_end(self, now): - pass - - def lock(self): self._lock.acquire() - def unlock(self): self._lock.release() - - ########################################################### - # child meter creation and destruction - def newMeter(self): - newmeter = self.helperclass(self) - self.meters.append(newmeter) - return newmeter - - def removeMeter(self, meter): - self.meters.remove(meter) - - ########################################################### - # child functions - these should only be called by helpers - def start_meter(self, meter, now): - if not meter in self.meters: - raise ValueError('attempt to use orphaned meter') - self._lock.acquire() - try: - if not meter in self.in_progress_meters: - self.in_progress_meters.append(meter) - self.open_files += 1 - finally: - self._lock.release() - self._do_start_meter(meter, now) - - def _do_start_meter(self, meter, now): - pass - - def update_meter(self, meter, now): - if not meter in self.meters: - raise ValueError('attempt to use orphaned meter') - if (now >= self.last_update_time + self.update_period) or \ - not self.last_update_time: - self.re.update(self._amount_read(), now) - self.last_update_time = now - self._do_update_meter(meter, now) - - def _do_update_meter(self, meter, now): - pass - - def end_meter(self, meter, now): - if not meter in self.meters: - raise ValueError('attempt to use orphaned meter') - self._lock.acquire() - try: - try: self.in_progress_meters.remove(meter) - except ValueError: pass - self.open_files -= 1 - self.finished_files += 1 - self.finished_file_size += meter.last_amount_read - finally: - self._lock.release() - self._do_end_meter(meter, now) - - def _do_end_meter(self, meter, now): - pass - - def failure_meter(self, meter, message, now): - if not meter in self.meters: - raise ValueError('attempt to use orphaned meter') - self._lock.acquire() - try: - try: self.in_progress_meters.remove(meter) - except ValueError: pass - self.open_files -= 1 - self.failed_files += 1 - if meter.size and self.failed_size is not None: - self.failed_size += meter.size - else: - self.failed_size = None - finally: - self._lock.release() - self._do_failure_meter(meter, message, now) - - def _do_failure_meter(self, meter, message, now): - pass - - def message_meter(self, meter, message): - pass - - ######################################################## - # internal functions - def _amount_read(self): - tot = self.finished_file_size - for m in self.in_progress_meters: - tot += m.last_amount_read - return tot - - -class TextMultiFileMeter(MultiFileMeter): - def __init__(self, fo=sys.stderr): - self.fo = fo - MultiFileMeter.__init__(self) - - # files: ###/### ###% data: ######/###### ###% time: ##:##:##/##:##:## - def _do_update_meter(self, meter, now): - self._lock.acquire() - try: - format = "files: %3i/%-3i %3i%% data: %6.6s/%-6.6s %3i%% " \ - "time: %8.8s/%8.8s" - df = self.finished_files - tf = self.numfiles or 1 - pf = 100 * float(df)/tf + 0.49 - dd = self.re.last_amount_read - td = self.total_size - pd = 100 * (self.re.fraction_read() or 0) + 0.49 - dt = self.re.elapsed_time() - rt = self.re.remaining_time() - if rt is None: tt = None - else: tt = dt + rt - - fdd = format_number(dd) + 'B' - ftd = format_number(td) + 'B' - fdt = format_time(dt, 1) - ftt = format_time(tt, 1) - - out = '%-79.79s' % (format % (df, tf, pf, fdd, ftd, pd, fdt, ftt)) - self.fo.write('\r' + out) - self.fo.flush() - finally: - self._lock.release() - - def _do_end_meter(self, meter, now): - self._lock.acquire() - try: - format = "%-30.30s %6.6s %8.8s %9.9s" - fn = meter.basename - size = meter.last_amount_read - fsize = format_number(size) + 'B' - et = meter.re.elapsed_time() - fet = format_time(et, 1) - frate = format_number(size / et) + 'B/s' - - out = '%-79.79s' % (format % (fn, fsize, fet, frate)) - self.fo.write('\r' + out + '\n') - finally: - self._lock.release() - self._do_update_meter(meter, now) - - def _do_failure_meter(self, meter, message, now): - self._lock.acquire() - try: - format = "%-30.30s %6.6s %s" - fn = meter.basename - if type(message) in (type(''), type(u'')): - message = message.splitlines() - if not message: message = [''] - out = '%-79s' % (format % (fn, 'FAILED', message[0] or '')) - self.fo.write('\r' + out + '\n') - for m in message[1:]: self.fo.write(' ' + m + '\n') - self._lock.release() - finally: - self._do_update_meter(meter, now) - - def message_meter(self, meter, message): - self._lock.acquire() - try: - pass - finally: - self._lock.release() - - def _do_end(self, now): - self._do_update_meter(None, now) - self._lock.acquire() - try: - self.fo.write('\n') - self.fo.flush() - finally: - self._lock.release() - -###################################################################### -# support classes and functions - -class RateEstimator: - def __init__(self, timescale=5.0): - self.timescale = timescale - - def start(self, total=None, now=None): - if now is None: now = time.time() - self.total = total - self.start_time = now - self.last_update_time = now - self.last_amount_read = 0 - self.ave_rate = None - - def update(self, amount_read, now=None): - if now is None: now = time.time() - if amount_read == 0: - # if we just started this file, all bets are off - self.last_update_time = now - self.last_amount_read = 0 - self.ave_rate = None - return - - #print 'times', now, self.last_update_time - time_diff = now - self.last_update_time - read_diff = amount_read - self.last_amount_read - self.last_update_time = now - self.last_amount_read = amount_read - self.ave_rate = self._temporal_rolling_ave(\ - time_diff, read_diff, self.ave_rate, self.timescale) - #print 'results', time_diff, read_diff, self.ave_rate - - ##################################################################### - # result methods - def average_rate(self): - "get the average transfer rate (in bytes/second)" - return self.ave_rate - - def elapsed_time(self): - "the time between the start of the transfer and the most recent update" - return self.last_update_time - self.start_time - - def remaining_time(self): - "estimated time remaining" - if not self.ave_rate or not self.total: return None - return (self.total - self.last_amount_read) / self.ave_rate - - def fraction_read(self): - """the fraction of the data that has been read - (can be None for unknown transfer size)""" - if self.total is None: return None - elif self.total == 0: return 1.0 - else: return float(self.last_amount_read)/self.total - - ######################################################################### - # support methods - def _temporal_rolling_ave(self, time_diff, read_diff, last_ave, timescale): - """a temporal rolling average performs smooth averaging even when - updates come at irregular intervals. This is performed by scaling - the "epsilon" according to the time since the last update. - Specifically, epsilon = time_diff / timescale - - As a general rule, the average will take on a completely new value - after 'timescale' seconds.""" - epsilon = time_diff / timescale - if epsilon > 1: epsilon = 1.0 - return self._rolling_ave(time_diff, read_diff, last_ave, epsilon) - - def _rolling_ave(self, time_diff, read_diff, last_ave, epsilon): - """perform a "rolling average" iteration - a rolling average "folds" new data into an existing average with - some weight, epsilon. epsilon must be between 0.0 and 1.0 (inclusive) - a value of 0.0 means only the old value (initial value) counts, - and a value of 1.0 means only the newest value is considered.""" - - try: - recent_rate = read_diff / time_diff - except ZeroDivisionError: - recent_rate = None - if last_ave is None: return recent_rate - elif recent_rate is None: return last_ave - - # at this point, both last_ave and recent_rate are numbers - return epsilon * recent_rate + (1 - epsilon) * last_ave - - def _round_remaining_time(self, rt, start_time=15.0): - """round the remaining time, depending on its size - If rt is between n*start_time and (n+1)*start_time round downward - to the nearest multiple of n (for any counting number n). - If rt < start_time, round down to the nearest 1. - For example (for start_time = 15.0): - 2.7 -> 2.0 - 25.2 -> 25.0 - 26.4 -> 26.0 - 35.3 -> 34.0 - 63.6 -> 60.0 - """ - - if rt < 0: return 0.0 - shift = int(math.log(rt/start_time)/math.log(2)) - rt = int(rt) - if shift <= 0: return rt - return float(int(rt) >> shift << shift) - - -def format_time(seconds, use_hours=0): - if seconds is None or seconds < 0: - if use_hours: return '--:--:--' - else: return '--:--' - else: - seconds = int(seconds) - minutes = seconds / 60 - seconds = seconds % 60 - if use_hours: - hours = minutes / 60 - minutes = minutes % 60 - return '%02i:%02i:%02i' % (hours, minutes, seconds) - else: - return '%02i:%02i' % (minutes, seconds) - -def format_number(number, SI=0, space=' '): - """Turn numbers into human-readable metric-like numbers""" - symbols = ['', # (none) - 'k', # kilo - 'M', # mega - 'G', # giga - 'T', # tera - 'P', # peta - 'E', # exa - 'Z', # zetta - 'Y'] # yotta - - if SI: step = 1000.0 - else: step = 1024.0 - - thresh = 999 - depth = 0 - max_depth = len(symbols) - 1 - - # we want numbers between 0 and thresh, but don't exceed the length - # of our list. In that event, the formatting will be screwed up, - # but it'll still show the right number. - while number > thresh and depth < max_depth: - depth = depth + 1 - number = number / step - - if type(number) == type(1) or type(number) == type(1L): - # it's an int or a long, which means it didn't get divided, - # which means it's already short enough - format = '%i%s%s' - elif number < 9.95: - # must use 9.95 for proper sizing. For example, 9.99 will be - # rounded to 10.0 with the .1f format string (which is too long) - format = '%.1f%s%s' - else: - format = '%.0f%s%s' - - return(format % (float(number or 0), space, symbols[depth])) diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/sslfactory.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/sslfactory.py deleted file mode 100644 index 07848dac7c..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/sslfactory.py +++ /dev/null @@ -1,90 +0,0 @@ -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., -# 59 Temple Place, Suite 330, -# Boston, MA 02111-1307 USA - -# This file is part of urlgrabber, a high-level cross-protocol url-grabber - -import httplib -import urllib2 - -try: - from M2Crypto import SSL - from M2Crypto import httpslib - from M2Crypto import m2urllib2 - - SSL.Connection.clientPostConnectionCheck = None - have_m2crypto = True -except ImportError: - have_m2crypto = False - -DEBUG = None - -if have_m2crypto: - - class M2SSLFactory: - - def __init__(self, ssl_ca_cert, ssl_context): - self.ssl_context = self._get_ssl_context(ssl_ca_cert, ssl_context) - - def _get_ssl_context(self, ssl_ca_cert, ssl_context): - """ - Create an ssl context using the CA cert file or ssl context. - - The CA cert is used first if it was passed as an option. If not, - then the supplied ssl context is used. If no ssl context was supplied, - None is returned. - """ - if ssl_ca_cert: - context = SSL.Context() - context.load_verify_locations(ssl_ca_cert) - context.set_verify(SSL.verify_none, -1) - return context - else: - return ssl_context - - def create_https_connection(self, host, response_class = None): - connection = httplib.HTTPSConnection(host, self.ssl_context) - if response_class: - connection.response_class = response_class - return connection - - def create_opener(self, *handlers): - return m2urllib2.build_opener(self.ssl_context, *handlers) - - -class SSLFactory: - - def create_https_connection(self, host, response_class = None): - connection = httplib.HTTPSConnection(host) - if response_class: - connection.response_class = response_class - return connection - - def create_opener(self, *handlers): - return urllib2.build_opener(*handlers) - - - -def get_factory(ssl_ca_cert = None, ssl_context = None): - """ Return an SSLFactory, based on if M2Crypto is available. """ - if have_m2crypto: - return M2SSLFactory(ssl_ca_cert, ssl_context) - else: - # Log here if someone provides the args but we don't use them. - if ssl_ca_cert or ssl_context: - if DEBUG: - DEBUG.warning("SSL arguments supplied, but M2Crypto is not available. " - "Using Python SSL.") - return SSLFactory() |