diff options
7 files changed, 0 insertions, 3688 deletions
| diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py deleted file mode 100644 index 7bcd9d5541..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py +++ /dev/null @@ -1,53 +0,0 @@ -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -# GNU Library General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -# Copyright 2002-2006 Michael D. Stenner, Ryan Tomayko - -# $Id: __init__.py,v 1.20 2006/09/22 00:58:55 mstenner Exp $ - -"""A high-level cross-protocol url-grabber. - -Using urlgrabber, data can be fetched in three basic ways: - -  urlgrab(url) copy the file to the local filesystem -  urlopen(url) open the remote file and return a file object -     (like urllib2.urlopen) -  urlread(url) return the contents of the file as a string - -When using these functions (or methods), urlgrabber supports the -following features: - -  * identical behavior for http://, ftp://, and file:// urls -  * http keepalive - faster downloads of many files by using -    only a single connection -  * byte ranges - fetch only a portion of the file -  * reget - for a urlgrab, resume a partial download -  * progress meters - the ability to report download progress -    automatically, even when using urlopen! -  * throttling - restrict bandwidth usage -  * retries - automatically retry a download if it fails. The -    number of retries and failure types are configurable. -  * authenticated server access for http and ftp -  * proxy support - support for authenticated http and ftp proxies -  * mirror groups - treat a list of mirrors as a single source, -    automatically switching mirrors if there is a failure. -""" - -__version__ = '3.1.0' -__date__    = '2006/09/21' -__author__  = 'Michael D. Stenner <mstenner@linux.duke.edu>, ' \ -              'Ryan Tomayko <rtomayko@naeblis.cx>' -__url__     = 'http://linux.duke.edu/projects/urlgrabber/' - -from grabber import urlgrab, urlopen, urlread diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py deleted file mode 100644 index 001b4e32d6..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py +++ /dev/null @@ -1,463 +0,0 @@ -#   This library is free software; you can redistribute it and/or -#   modify it under the terms of the GNU Lesser General Public -#   License as published by the Free Software Foundation; either -#   version 2.1 of the License, or (at your option) any later version. -# -#   This library is distributed in the hope that it will be useful, -#   but WITHOUT ANY WARRANTY; without even the implied warranty of -#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU -#   Lesser General Public License for more details. -# -#   You should have received a copy of the GNU Lesser General Public -#   License along with this library; if not, write to the  -#      Free Software Foundation, Inc.,  -#      59 Temple Place, Suite 330,  -#      Boston, MA  02111-1307  USA - -# This file is part of urlgrabber, a high-level cross-protocol url-grabber -# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko - -# $Id: byterange.py,v 1.12 2006/07/20 20:15:58 mstenner Exp $ - -import os -import stat -import urllib -import urllib2 -import rfc822 - -DEBUG = None - -try:     -    from cStringIO import StringIO -except ImportError, msg:  -    from StringIO import StringIO - -class RangeError(IOError): -    """Error raised when an unsatisfiable range is requested.""" -    pass -     -class HTTPRangeHandler(urllib2.BaseHandler): -    """Handler that enables HTTP Range headers. -     -    This was extremely simple. The Range header is a HTTP feature to -    begin with so all this class does is tell urllib2 that the  -    "206 Partial Content" reponse from the HTTP server is what we  -    expected. -     -    Example: -        import urllib2 -        import byterange -         -        range_handler = range.HTTPRangeHandler() -        opener = urllib2.build_opener(range_handler) -         -        # install it -        urllib2.install_opener(opener) -         -        # create Request and set Range header -        req = urllib2.Request('http://www.python.org/') -        req.header['Range'] = 'bytes=30-50' -        f = urllib2.urlopen(req) -    """ -     -    def http_error_206(self, req, fp, code, msg, hdrs): -        # 206 Partial Content Response -        r = urllib.addinfourl(fp, hdrs, req.get_full_url()) -        r.code = code -        r.msg = msg -        return r -     -    def http_error_416(self, req, fp, code, msg, hdrs): -        # HTTP's Range Not Satisfiable error -        raise RangeError('Requested Range Not Satisfiable') - -class HTTPSRangeHandler(HTTPRangeHandler): -    """ Range Header support for HTTPS. """ - -    def https_error_206(self, req, fp, code, msg, hdrs): -        return self.http_error_206(req, fp, code, msg, hdrs) - -    def https_error_416(self, req, fp, code, msg, hdrs): -        self.https_error_416(req, fp, code, msg, hdrs) - -class RangeableFileObject: -    """File object wrapper to enable raw range handling. -    This was implemented primarilary for handling range  -    specifications for file:// urls. This object effectively makes  -    a file object look like it consists only of a range of bytes in  -    the stream. -     -    Examples: -        # expose 10 bytes, starting at byte position 20, from  -        # /etc/aliases. -        >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30)) -        # seek seeks within the range (to position 23 in this case) -        >>> fo.seek(3) -        # tell tells where your at _within the range_ (position 3 in -        # this case) -        >>> fo.tell() -        # read EOFs if an attempt is made to read past the last -        # byte in the range. the following will return only 7 bytes. -        >>> fo.read(30) -    """ -     -    def __init__(self, fo, rangetup): -        """Create a RangeableFileObject. -        fo       -- a file like object. only the read() method need be  -                    supported but supporting an optimized seek() is  -                    preferable. -        rangetup -- a (firstbyte,lastbyte) tuple specifying the range -                    to work over. -        The file object provided is assumed to be at byte offset 0. -        """ -        self.fo = fo -        (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup) -        self.realpos = 0 -        self._do_seek(self.firstbyte) -         -    def __getattr__(self, name): -        """This effectively allows us to wrap at the instance level. -        Any attribute not found in _this_ object will be searched for -        in self.fo.  This includes methods.""" -        if hasattr(self.fo, name): -            return getattr(self.fo, name) -        raise AttributeError, name -     -    def tell(self): -        """Return the position within the range. -        This is different from fo.seek in that position 0 is the  -        first byte position of the range tuple. For example, if -        this object was created with a range tuple of (500,899), -        tell() will return 0 when at byte position 500 of the file. -        """ -        return (self.realpos - self.firstbyte) -     -    def seek(self,offset,whence=0): -        """Seek within the byte range. -        Positioning is identical to that described under tell(). -        """ -        assert whence in (0, 1, 2) -        if whence == 0:   # absolute seek -            realoffset = self.firstbyte + offset -        elif whence == 1: # relative seek -            realoffset = self.realpos + offset -        elif whence == 2: # absolute from end of file -            # XXX: are we raising the right Error here? -            raise IOError('seek from end of file not supported.') -         -        # do not allow seek past lastbyte in range -        if self.lastbyte and (realoffset >= self.lastbyte): -            realoffset = self.lastbyte -         -        self._do_seek(realoffset - self.realpos) -         -    def read(self, size=-1): -        """Read within the range. -        This method will limit the size read based on the range. -        """ -        size = self._calc_read_size(size) -        rslt = self.fo.read(size) -        self.realpos += len(rslt) -        return rslt -     -    def readline(self, size=-1): -        """Read lines within the range. -        This method will limit the size read based on the range. -        """ -        size = self._calc_read_size(size) -        rslt = self.fo.readline(size) -        self.realpos += len(rslt) -        return rslt -     -    def _calc_read_size(self, size): -        """Handles calculating the amount of data to read based on -        the range. -        """ -        if self.lastbyte: -            if size > -1: -                if ((self.realpos + size) >= self.lastbyte): -                    size = (self.lastbyte - self.realpos) -            else: -                size = (self.lastbyte - self.realpos) -        return size -         -    def _do_seek(self,offset): -        """Seek based on whether wrapped object supports seek(). -        offset is relative to the current position (self.realpos). -        """ -        assert offset >= 0 -        if not hasattr(self.fo, 'seek'): -            self._poor_mans_seek(offset) -        else: -            self.fo.seek(self.realpos + offset) -        self.realpos+= offset -         -    def _poor_mans_seek(self,offset): -        """Seek by calling the wrapped file objects read() method. -        This is used for file like objects that do not have native -        seek support. The wrapped objects read() method is called -        to manually seek to the desired position. -        offset -- read this number of bytes from the wrapped -                  file object. -        raise RangeError if we encounter EOF before reaching the  -        specified offset. -        """ -        pos = 0 -        bufsize = 1024 -        while pos < offset: -            if (pos + bufsize) > offset: -                bufsize = offset - pos -            buf = self.fo.read(bufsize) -            if len(buf) != bufsize: -                raise RangeError('Requested Range Not Satisfiable') -            pos+= bufsize - -class FileRangeHandler(urllib2.FileHandler): -    """FileHandler subclass that adds Range support. -    This class handles Range headers exactly like an HTTP -    server would. -    """ -    def open_local_file(self, req): -        import mimetypes -        import mimetools -        host = req.get_host() -        file = req.get_selector() -        localfile = urllib.url2pathname(file) -        stats = os.stat(localfile) -        size = stats[stat.ST_SIZE] -        modified = rfc822.formatdate(stats[stat.ST_MTIME]) -        mtype = mimetypes.guess_type(file)[0] -        if host: -            host, port = urllib.splitport(host) -            if port or socket.gethostbyname(host) not in self.get_names(): -                raise urllib2.URLError('file not on local host') -        fo = open(localfile,'rb') -        brange = req.headers.get('Range',None) -        brange = range_header_to_tuple(brange) -        assert brange != () -        if brange: -            (fb,lb) = brange -            if lb == '': lb = size -            if fb < 0 or fb > size or lb > size: -                raise RangeError('Requested Range Not Satisfiable') -            size = (lb - fb) -            fo = RangeableFileObject(fo, (fb,lb)) -        headers = mimetools.Message(StringIO( -            'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % -            (mtype or 'text/plain', size, modified))) -        return urllib.addinfourl(fo, headers, 'file:'+file) - - -# FTP Range Support  -# Unfortunately, a large amount of base FTP code had to be copied -# from urllib and urllib2 in order to insert the FTP REST command. -# Code modifications for range support have been commented as  -# follows: -# -- range support modifications start/end here - -from urllib import splitport, splituser, splitpasswd, splitattr, \ -                   unquote, addclosehook, addinfourl -import ftplib -import socket -import sys -import ftplib -import mimetypes -import mimetools - -class FTPRangeHandler(urllib2.FTPHandler): -    def ftp_open(self, req): -        host = req.get_host() -        if not host: -            raise IOError, ('ftp error', 'no host given') -        host, port = splitport(host) -        if port is None: -            port = ftplib.FTP_PORT - -        # username/password handling -        user, host = splituser(host) -        if user: -            user, passwd = splitpasswd(user) -        else: -            passwd = None -        host = unquote(host) -        user = unquote(user or '') -        passwd = unquote(passwd or '') -         -        try: -            host = socket.gethostbyname(host) -        except socket.error, msg: -            raise urllib2.URLError(msg) -        path, attrs = splitattr(req.get_selector()) -        dirs = path.split('/') -        dirs = map(unquote, dirs) -        dirs, file = dirs[:-1], dirs[-1] -        if dirs and not dirs[0]: -            dirs = dirs[1:] -        try: -            fw = self.connect_ftp(user, passwd, host, port, dirs) -            type = file and 'I' or 'D' -            for attr in attrs: -                attr, value = splitattr(attr) -                if attr.lower() == 'type' and \ -                   value in ('a', 'A', 'i', 'I', 'd', 'D'): -                    type = value.upper() -             -            # -- range support modifications start here -            rest = None -            range_tup = range_header_to_tuple(req.headers.get('Range',None))     -            assert range_tup != () -            if range_tup: -                (fb,lb) = range_tup -                if fb > 0: rest = fb -            # -- range support modifications end here -             -            fp, retrlen = fw.retrfile(file, type, rest) -             -            # -- range support modifications start here -            if range_tup: -                (fb,lb) = range_tup -                if lb == '':  -                    if retrlen is None or retrlen == 0: -                        raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.') -                    lb = retrlen -                    retrlen = lb - fb -                    if retrlen < 0: -                        # beginning of range is larger than file -                        raise RangeError('Requested Range Not Satisfiable') -                else: -                    retrlen = lb - fb -                    fp = RangeableFileObject(fp, (0,retrlen)) -            # -- range support modifications end here -             -            headers = "" -            mtype = mimetypes.guess_type(req.get_full_url())[0] -            if mtype: -                headers += "Content-Type: %s\n" % mtype -            if retrlen is not None and retrlen >= 0: -                headers += "Content-Length: %d\n" % retrlen -            sf = StringIO(headers) -            headers = mimetools.Message(sf) -            return addinfourl(fp, headers, req.get_full_url()) -        except ftplib.all_errors, msg: -            raise IOError, ('ftp error', msg), sys.exc_info()[2] - -    def connect_ftp(self, user, passwd, host, port, dirs): -        fw = ftpwrapper(user, passwd, host, port, dirs) -        return fw - -class ftpwrapper(urllib.ftpwrapper): -    # range support note: -    # this ftpwrapper code is copied directly from -    # urllib. The only enhancement is to add the rest -    # argument and pass it on to ftp.ntransfercmd -    def retrfile(self, file, type, rest=None): -        self.endtransfer() -        if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 -        else: cmd = 'TYPE ' + type; isdir = 0 -        try: -            self.ftp.voidcmd(cmd) -        except ftplib.all_errors: -            self.init() -            self.ftp.voidcmd(cmd) -        conn = None -        if file and not isdir: -            # Use nlst to see if the file exists at all -            try: -                self.ftp.nlst(file) -            except ftplib.error_perm, reason: -                raise IOError, ('ftp error', reason), sys.exc_info()[2] -            # Restore the transfer mode! -            self.ftp.voidcmd(cmd) -            # Try to retrieve as a file -            try: -                cmd = 'RETR ' + file -                conn = self.ftp.ntransfercmd(cmd, rest) -            except ftplib.error_perm, reason: -                if str(reason)[:3] == '501': -                    # workaround for REST not supported error -                    fp, retrlen = self.retrfile(file, type) -                    fp = RangeableFileObject(fp, (rest,'')) -                    return (fp, retrlen) -                elif str(reason)[:3] != '550': -                    raise IOError, ('ftp error', reason), sys.exc_info()[2] -        if not conn: -            # Set transfer mode to ASCII! -            self.ftp.voidcmd('TYPE A') -            # Try a directory listing -            if file: cmd = 'LIST ' + file -            else: cmd = 'LIST' -            conn = self.ftp.ntransfercmd(cmd) -        self.busy = 1 -        # Pass back both a suitably decorated object and a retrieval length -        return (addclosehook(conn[0].makefile('rb'), -                            self.endtransfer), conn[1]) - - -#################################################################### -# Range Tuple Functions -# XXX: These range tuple functions might go better in a class. - -_rangere = None -def range_header_to_tuple(range_header): -    """Get a (firstbyte,lastbyte) tuple from a Range header value. -     -    Range headers have the form "bytes=<firstbyte>-<lastbyte>". This -    function pulls the firstbyte and lastbyte values and returns -    a (firstbyte,lastbyte) tuple. If lastbyte is not specified in -    the header value, it is returned as an empty string in the -    tuple. -     -    Return None if range_header is None -    Return () if range_header does not conform to the range spec  -    pattern. -     -    """ -    global _rangere -    if range_header is None: return None -    if _rangere is None: -        import re -        _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)') -    match = _rangere.match(range_header) -    if match:  -        tup = range_tuple_normalize(match.group(1,2)) -        if tup and tup[1]:  -            tup = (tup[0],tup[1]+1) -        return tup -    return () - -def range_tuple_to_header(range_tup): -    """Convert a range tuple to a Range header value. -    Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None -    if no range is needed. -    """ -    if range_tup is None: return None -    range_tup = range_tuple_normalize(range_tup) -    if range_tup: -        if range_tup[1]:  -            range_tup = (range_tup[0],range_tup[1] - 1) -        return 'bytes=%s-%s' % range_tup -     -def range_tuple_normalize(range_tup): -    """Normalize a (first_byte,last_byte) range tuple. -    Return a tuple whose first element is guaranteed to be an int -    and whose second element will be '' (meaning: the last byte) or  -    an int. Finally, return None if the normalized tuple == (0,'') -    as that is equivelant to retrieving the entire file. -    """ -    if range_tup is None: return None -    # handle first byte -    fb = range_tup[0] -    if fb in (None,''): fb = 0 -    else: fb = int(fb) -    # handle last byte -    try: lb = range_tup[1] -    except IndexError: lb = '' -    else:   -        if lb is None: lb = '' -        elif lb != '': lb = int(lb) -    # check if range is over the entire file -    if (fb,lb) == (0,''): return None -    # check that the range is valid -    if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb)) -    return (fb,lb) - diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py deleted file mode 100644 index fefdab36f6..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py +++ /dev/null @@ -1,1477 +0,0 @@ -#   This library is free software; you can redistribute it and/or -#   modify it under the terms of the GNU Lesser General Public -#   License as published by the Free Software Foundation; either -#   version 2.1 of the License, or (at your option) any later version. -# -#   This library is distributed in the hope that it will be useful, -#   but WITHOUT ANY WARRANTY; without even the implied warranty of -#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU -#   Lesser General Public License for more details. -# -#   You should have received a copy of the GNU Lesser General Public -#   License along with this library; if not, write to the  -#      Free Software Foundation, Inc.,  -#      59 Temple Place, Suite 330,  -#      Boston, MA  02111-1307  USA - -# This file is part of urlgrabber, a high-level cross-protocol url-grabber -# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko - -"""A high-level cross-protocol url-grabber. - -GENERAL ARGUMENTS (kwargs) - -  Where possible, the module-level default is indicated, and legal -  values are provided. - -  copy_local = 0   [0|1] - -    ignored except for file:// urls, in which case it specifies -    whether urlgrab should still make a copy of the file, or simply -    point to the existing copy. The module level default for this -    option is 0. - -  close_connection = 0   [0|1] - -    tells URLGrabber to close the connection after a file has been -    transfered. This is ignored unless the download happens with the -    http keepalive handler (keepalive=1).  Otherwise, the connection -    is left open for further use. The module level default for this -    option is 0 (keepalive connections will not be closed). - -  keepalive = 1   [0|1] - -    specifies whether keepalive should be used for HTTP/1.1 servers -    that support it. The module level default for this option is 1 -    (keepalive is enabled). - -  progress_obj = None - -    a class instance that supports the following methods: -      po.start(filename, url, basename, length, text) -      # length will be None if unknown -      po.update(read) # read == bytes read so far -      po.end() - -  text = None -   -    specifies an alternativ text item in the beginning of the progress -    bar line. If not given, the basename of the file is used. - -  throttle = 1.0 - -    a number - if it's an int, it's the bytes/second throttle limit. -    If it's a float, it is first multiplied by bandwidth.  If throttle -    == 0, throttling is disabled.  If None, the module-level default -    (which can be set on default_grabber.throttle) is used. See -    BANDWIDTH THROTTLING for more information. - -  timeout = None - -    a positive float expressing the number of seconds to wait for socket -    operations. If the value is None or 0.0, socket operations will block -    forever. Setting this option causes urlgrabber to call the settimeout -    method on the Socket object used for the request. See the Python -    documentation on settimeout for more information. -    http://www.python.org/doc/current/lib/socket-objects.html - -  bandwidth = 0 - -    the nominal max bandwidth in bytes/second.  If throttle is a float -    and bandwidth == 0, throttling is disabled.  If None, the -    module-level default (which can be set on -    default_grabber.bandwidth) is used. See BANDWIDTH THROTTLING for -    more information. - -  range = None - -    a tuple of the form (first_byte, last_byte) describing a byte -    range to retrieve. Either or both of the values may set to -    None. If first_byte is None, byte offset 0 is assumed. If -    last_byte is None, the last byte available is assumed. Note that -    the range specification is python-like in that (0,10) will yeild -    the first 10 bytes of the file. - -    If set to None, no range will be used. -     -  reget = None   [None|'simple'|'check_timestamp'] - -    whether to attempt to reget a partially-downloaded file.  Reget -    only applies to .urlgrab and (obviously) only if there is a -    partially downloaded file.  Reget has two modes: - -      'simple' -- the local file will always be trusted.  If there -        are 100 bytes in the local file, then the download will always -        begin 100 bytes into the requested file. - -      'check_timestamp' -- the timestamp of the server file will be -        compared to the timestamp of the local file.  ONLY if the -        local file is newer than or the same age as the server file -        will reget be used.  If the server file is newer, or the -        timestamp is not returned, the entire file will be fetched. - -    NOTE: urlgrabber can do very little to verify that the partial -    file on disk is identical to the beginning of the remote file. -    You may want to either employ a custom "checkfunc" or simply avoid -    using reget in situations where corruption is a concern. - -  user_agent = 'urlgrabber/VERSION' - -    a string, usually of the form 'AGENT/VERSION' that is provided to -    HTTP servers in the User-agent header. The module level default -    for this option is "urlgrabber/VERSION". - -  http_headers = None - -    a tuple of 2-tuples, each containing a header and value.  These -    will be used for http and https requests only.  For example, you -    can do -      http_headers = (('Pragma', 'no-cache'),) - -  ftp_headers = None - -    this is just like http_headers, but will be used for ftp requests. - -  proxies = None - -    a dictionary that maps protocol schemes to proxy hosts. For -    example, to use a proxy server on host "foo" port 3128 for http -    and https URLs: -      proxies={ 'http' : 'http://foo:3128', 'https' : 'http://foo:3128' } -    note that proxy authentication information may be provided using -    normal URL constructs: -      proxies={ 'http' : 'http://user:host@foo:3128' } -    Lastly, if proxies is None, the default environment settings will -    be used. - -  prefix = None - -    a url prefix that will be prepended to all requested urls.  For -    example: -      g = URLGrabber(prefix='http://foo.com/mirror/') -      g.urlgrab('some/file.txt') -      ## this will fetch 'http://foo.com/mirror/some/file.txt' -    This option exists primarily to allow identical behavior to -    MirrorGroup (and derived) instances.  Note: a '/' will be inserted -    if necessary, so you cannot specify a prefix that ends with a -    partial file or directory name. - -  opener = None -   -    Overrides the default urllib2.OpenerDirector provided to urllib2 -    when making requests.  This option exists so that the urllib2 -    handler chain may be customized.  Note that the range, reget, -    proxy, and keepalive features require that custom handlers be -    provided to urllib2 in order to function properly.  If an opener -    option is provided, no attempt is made by urlgrabber to ensure -    chain integrity.  You are responsible for ensuring that any -    extension handlers are present if said features are required. -     -  data = None - -    Only relevant for the HTTP family (and ignored for other -    protocols), this allows HTTP POSTs.  When the data kwarg is -    present (and not None), an HTTP request will automatically become -    a POST rather than GET.  This is done by direct passthrough to -    urllib2.  If you use this, you may also want to set the -    'Content-length' and 'Content-type' headers with the http_headers -    option.  Note that python 2.2 handles the case of these -    badly and if you do not use the proper case (shown here), your -    values will be overridden with the defaults. -     - -RETRY RELATED ARGUMENTS - -  retry = None - -    the number of times to retry the grab before bailing.  If this is -    zero, it will retry forever. This was intentional... really, it -    was :). If this value is not supplied or is supplied but is None -    retrying does not occur. - -  retrycodes = [-1,2,4,5,6,7] - -    a sequence of errorcodes (values of e.errno) for which it should -    retry. See the doc on URLGrabError for more details on this.  You -    might consider modifying a copy of the default codes rather than -    building yours from scratch so that if the list is extended in the -    future (or one code is split into two) you can still enjoy the -    benefits of the default list.  You can do that with something like -    this: - -      retrycodes = urlgrabber.grabber.URLGrabberOptions().retrycodes -      if 12 not in retrycodes: -          retrycodes.append(12) -       -  checkfunc = None - -    a function to do additional checks. This defaults to None, which -    means no additional checking.  The function should simply return -    on a successful check.  It should raise URLGrabError on an -    unsuccessful check.  Raising of any other exception will be -    considered immediate failure and no retries will occur. - -    If it raises URLGrabError, the error code will determine the retry -    behavior.  Negative error numbers are reserved for use by these -    passed in functions, so you can use many negative numbers for -    different types of failure.  By default, -1 results in a retry, -    but this can be customized with retrycodes. - -    If you simply pass in a function, it will be given exactly one -    argument: a CallbackObject instance with the .url attribute -    defined and either .filename (for urlgrab) or .data (for urlread). -    For urlgrab, .filename is the name of the local file.  For -    urlread, .data is the actual string data.  If you need other -    arguments passed to the callback (program state of some sort), you -    can do so like this: - -      checkfunc=(function, ('arg1', 2), {'kwarg': 3}) - -    if the downloaded file has filename /tmp/stuff, then this will -    result in this call (for urlgrab): - -      function(obj, 'arg1', 2, kwarg=3) -      # obj.filename = '/tmp/stuff' -      # obj.url = 'http://foo.com/stuff' -       -    NOTE: both the "args" tuple and "kwargs" dict must be present if -    you use this syntax, but either (or both) can be empty. - -  failure_callback = None - -    The callback that gets called during retries when an attempt to -    fetch a file fails.  The syntax for specifying the callback is -    identical to checkfunc, except for the attributes defined in the -    CallbackObject instance.  The attributes for failure_callback are: - -      exception = the raised exception -      url       = the url we're trying to fetch -      tries     = the number of tries so far (including this one) -      retry     = the value of the retry option - -    The callback is present primarily to inform the calling program of -    the failure, but if it raises an exception (including the one it's -    passed) that exception will NOT be caught and will therefore cause -    future retries to be aborted. - -    The callback is called for EVERY failure, including the last one. -    On the last try, the callback can raise an alternate exception, -    but it cannot (without severe trickiness) prevent the exception -    from being raised. - -  interrupt_callback = None - -    This callback is called if KeyboardInterrupt is received at any -    point in the transfer.  Basically, this callback can have three -    impacts on the fetch process based on the way it exits: - -      1) raise no exception: the current fetch will be aborted, but -         any further retries will still take place - -      2) raise a URLGrabError: if you're using a MirrorGroup, then -         this will prompt a failover to the next mirror according to -         the behavior of the MirrorGroup subclass.  It is recommended -         that you raise URLGrabError with code 15, 'user abort'.  If -         you are NOT using a MirrorGroup subclass, then this is the -         same as (3). - -      3) raise some other exception (such as KeyboardInterrupt), which -         will not be caught at either the grabber or mirror levels. -         That is, it will be raised up all the way to the caller. - -    This callback is very similar to failure_callback.  They are -    passed the same arguments, so you could use the same function for -    both. -       -  urlparser = URLParser() - -    The URLParser class handles pre-processing of URLs, including -    auth-handling for user/pass encoded in http urls, file handing -    (that is, filenames not sent as a URL), and URL quoting.  If you -    want to override any of this behavior, you can pass in a -    replacement instance.  See also the 'quote' option. - -  quote = None - -    Whether or not to quote the path portion of a url. -      quote = 1    ->  quote the URLs (they're not quoted yet) -      quote = 0    ->  do not quote them (they're already quoted) -      quote = None ->  guess what to do - -    This option only affects proper urls like 'file:///etc/passwd'; it -    does not affect 'raw' filenames like '/etc/passwd'.  The latter -    will always be quoted as they are converted to URLs.  Also, only -    the path part of a url is quoted.  If you need more fine-grained -    control, you should probably subclass URLParser and pass it in via -    the 'urlparser' option. - -BANDWIDTH THROTTLING - -  urlgrabber supports throttling via two values: throttle and -  bandwidth Between the two, you can either specify and absolute -  throttle threshold or specify a theshold as a fraction of maximum -  available bandwidth. - -  throttle is a number - if it's an int, it's the bytes/second -  throttle limit.  If it's a float, it is first multiplied by -  bandwidth.  If throttle == 0, throttling is disabled.  If None, the -  module-level default (which can be set with set_throttle) is used. - -  bandwidth is the nominal max bandwidth in bytes/second.  If throttle -  is a float and bandwidth == 0, throttling is disabled.  If None, the -  module-level default (which can be set with set_bandwidth) is used. - -  THROTTLING EXAMPLES: - -  Lets say you have a 100 Mbps connection.  This is (about) 10^8 bits -  per second, or 12,500,000 Bytes per second.  You have a number of -  throttling options: - -  *) set_bandwidth(12500000); set_throttle(0.5) # throttle is a float - -     This will limit urlgrab to use half of your available bandwidth. - -  *) set_throttle(6250000) # throttle is an int - -     This will also limit urlgrab to use half of your available -     bandwidth, regardless of what bandwidth is set to. - -  *) set_throttle(6250000); set_throttle(1.0) # float - -     Use half your bandwidth - -  *) set_throttle(6250000); set_throttle(2.0) # float - -    Use up to 12,500,000 Bytes per second (your nominal max bandwidth) - -  *) set_throttle(6250000); set_throttle(0) # throttle = 0 - -     Disable throttling - this is more efficient than a very large -     throttle setting. - -  *) set_throttle(0); set_throttle(1.0) # throttle is float, bandwidth = 0 - -     Disable throttling - this is the default when the module is loaded. - -  SUGGESTED AUTHOR IMPLEMENTATION (THROTTLING) - -  While this is flexible, it's not extremely obvious to the user.  I -  suggest you implement a float throttle as a percent to make the -  distinction between absolute and relative throttling very explicit. - -  Also, you may want to convert the units to something more convenient -  than bytes/second, such as kbps or kB/s, etc. - -""" - -# $Id: grabber.py,v 1.48 2006/09/22 00:58:05 mstenner Exp $ - -import os -import os.path -import sys -import urlparse -import rfc822 -import time -import string -import urllib -import urllib2 -from stat import *  # S_* and ST_* - -######################################################################## -#                     MODULE INITIALIZATION -######################################################################## -try: -    exec('from ' + (__name__.split('.'))[0] + ' import __version__') -except: -    __version__ = '???' - -import sslfactory - -auth_handler = urllib2.HTTPBasicAuthHandler( \ -     urllib2.HTTPPasswordMgrWithDefaultRealm()) - -try: -    from i18n import _ -except ImportError, msg: -    def _(st): return st - -try: -    from httplib import HTTPException -except ImportError, msg: -    HTTPException = None - -try: -    # This is a convenient way to make keepalive optional. -    # Just rename the module so it can't be imported. -    import keepalive -    from keepalive import HTTPHandler, HTTPSHandler -    have_keepalive = True -except ImportError, msg: -    have_keepalive = False - -try: -    # add in range support conditionally too -    import byterange -    from byterange import HTTPRangeHandler, HTTPSRangeHandler, \ -         FileRangeHandler, FTPRangeHandler, range_tuple_normalize, \ -         range_tuple_to_header, RangeError -except ImportError, msg: -    range_handlers = () -    RangeError = None -    have_range = 0 -else: -    range_handlers = (HTTPRangeHandler(), HTTPSRangeHandler(), -        FileRangeHandler(), FTPRangeHandler()) -    have_range = 1 - - -# check whether socket timeout support is available (Python >= 2.3) -import socket -try: -    TimeoutError = socket.timeout -    have_socket_timeout = True -except AttributeError: -    TimeoutError = None -    have_socket_timeout = False - -######################################################################## -# functions for debugging output.  These functions are here because they -# are also part of the module initialization. -DEBUG = None -def set_logger(DBOBJ): -    """Set the DEBUG object.  This is called by _init_default_logger when -    the environment variable URLGRABBER_DEBUG is set, but can also be -    called by a calling program.  Basically, if the calling program uses -    the logging module and would like to incorporate urlgrabber logging, -    then it can do so this way.  It's probably not necessary as most -    internal logging is only for debugging purposes. - -    The passed-in object should be a logging.Logger instance.  It will -    be pushed into the keepalive and byterange modules if they're -    being used.  The mirror module pulls this object in on import, so -    you will need to manually push into it.  In fact, you may find it -    tidier to simply push your logging object (or objects) into each -    of these modules independently. -    """ - -    global DEBUG -    DEBUG = DBOBJ -    if have_keepalive and keepalive.DEBUG is None: -        keepalive.DEBUG = DBOBJ -    if have_range and byterange.DEBUG is None: -        byterange.DEBUG = DBOBJ -    if sslfactory.DEBUG is None: -        sslfactory.DEBUG = DBOBJ - -def _init_default_logger(): -    '''Examines the environment variable URLGRABBER_DEBUG and creates -    a logging object (logging.logger) based on the contents.  It takes -    the form - -      URLGRABBER_DEBUG=level,filename -       -    where "level" can be either an integer or a log level from the -    logging module (DEBUG, INFO, etc).  If the integer is zero or -    less, logging will be disabled.  Filename is the filename where -    logs will be sent.  If it is "-", then stdout will be used.  If -    the filename is empty or missing, stderr will be used.  If the -    variable cannot be processed or the logging module cannot be -    imported (python < 2.3) then logging will be disabled.  Here are -    some examples: - -      URLGRABBER_DEBUG=1,debug.txt   # log everything to debug.txt -      URLGRABBER_DEBUG=WARNING,-     # log warning and higher to stdout -      URLGRABBER_DEBUG=INFO          # log info and higher to stderr -       -    This funtion is called during module initialization.  It is not -    intended to be called from outside.  The only reason it is a -    function at all is to keep the module-level namespace tidy and to -    collect the code into a nice block.''' - -    try: -        dbinfo = os.environ['URLGRABBER_DEBUG'].split(',') -        import logging -        level = logging._levelNames.get(dbinfo[0], int(dbinfo[0])) -        if level < 1: raise ValueError() - -        formatter = logging.Formatter('%(asctime)s %(message)s') -        if len(dbinfo) > 1: filename = dbinfo[1] -        else: filename = '' -        if filename == '': handler = logging.StreamHandler(sys.stderr) -        elif filename == '-': handler = logging.StreamHandler(sys.stdout) -        else:  handler = logging.FileHandler(filename) -        handler.setFormatter(formatter) -        DBOBJ = logging.getLogger('urlgrabber') -        DBOBJ.addHandler(handler) -        DBOBJ.setLevel(level) -    except (KeyError, ImportError, ValueError): -        DBOBJ = None -    set_logger(DBOBJ) - -_init_default_logger() -######################################################################## -#                 END MODULE INITIALIZATION -######################################################################## - - - -class URLGrabError(IOError): -    """ -    URLGrabError error codes: - -      URLGrabber error codes (0 -- 255) -        0    - everything looks good (you should never see this) -        1    - malformed url -        2    - local file doesn't exist -        3    - request for non-file local file (dir, etc) -        4    - IOError on fetch -        5    - OSError on fetch -        6    - no content length header when we expected one -        7    - HTTPException -        8    - Exceeded read limit (for urlread) -        9    - Requested byte range not satisfiable. -        10   - Byte range requested, but range support unavailable -        11   - Illegal reget mode -        12   - Socket timeout -        13   - malformed proxy url -        14   - HTTPError (includes .code and .exception attributes) -        15   - user abort -         -      MirrorGroup error codes (256 -- 511) -        256  - No more mirrors left to try - -      Custom (non-builtin) classes derived from MirrorGroup (512 -- 767) -        [ this range reserved for application-specific error codes ] - -      Retry codes (< 0) -        -1   - retry the download, unknown reason - -    Note: to test which group a code is in, you can simply do integer -    division by 256: e.errno / 256 - -    Negative codes are reserved for use by functions passed in to -    retrygrab with checkfunc.  The value -1 is built in as a generic -    retry code and is already included in the retrycodes list. -    Therefore, you can create a custom check function that simply -    returns -1 and the fetch will be re-tried.  For more customized -    retries, you can use other negative number and include them in -    retry-codes.  This is nice for outputting useful messages about -    what failed. - -    You can use these error codes like so: -      try: urlgrab(url) -      except URLGrabError, e: -         if e.errno == 3: ... -           # or -         print e.strerror -           # or simply -         print e  #### print '[Errno %i] %s' % (e.errno, e.strerror) -    """ -    pass - -class CallbackObject: -    """Container for returned callback data. - -    This is currently a dummy class into which urlgrabber can stuff -    information for passing to callbacks.  This way, the prototype for -    all callbacks is the same, regardless of the data that will be -    passed back.  Any function that accepts a callback function as an -    argument SHOULD document what it will define in this object. - -    It is possible that this class will have some greater -    functionality in the future. -    """ -    def __init__(self, **kwargs): -        self.__dict__.update(kwargs) - -def urlgrab(url, filename=None, **kwargs): -    """grab the file at <url> and make a local copy at <filename> -    If filename is none, the basename of the url is used. -    urlgrab returns the filename of the local file, which may be different -    from the passed-in filename if the copy_local kwarg == 0. -     -    See module documentation for a description of possible kwargs. -    """ -    return default_grabber.urlgrab(url, filename, **kwargs) - -def urlopen(url, **kwargs): -    """open the url and return a file object -    If a progress object or throttle specifications exist, then -    a special file object will be returned that supports them. -    The file object can be treated like any other file object. -     -    See module documentation for a description of possible kwargs. -    """ -    return default_grabber.urlopen(url, **kwargs) - -def urlread(url, limit=None, **kwargs): -    """read the url into a string, up to 'limit' bytes -    If the limit is exceeded, an exception will be thrown.  Note that urlread -    is NOT intended to be used as a way of saying "I want the first N bytes" -    but rather 'read the whole file into memory, but don't use too much' -     -    See module documentation for a description of possible kwargs. -    """ -    return default_grabber.urlread(url, limit, **kwargs) - - -class URLParser: -    """Process the URLs before passing them to urllib2. - -    This class does several things: - -      * add any prefix -      * translate a "raw" file to a proper file: url -      * handle any http or https auth that's encoded within the url -      * quote the url - -    Only the "parse" method is called directly, and it calls sub-methods. - -    An instance of this class is held in the options object, which -    means that it's easy to change the behavior by sub-classing and -    passing the replacement in.  It need only have a method like: - -        url, parts = urlparser.parse(url, opts) -    """ - -    def parse(self, url, opts): -        """parse the url and return the (modified) url and its parts - -        Note: a raw file WILL be quoted when it's converted to a URL. -        However, other urls (ones which come with a proper scheme) may -        or may not be quoted according to opts.quote - -          opts.quote = 1     --> quote it -          opts.quote = 0     --> do not quote it -          opts.quote = None  --> guess -        """ -        quote = opts.quote -         -        if opts.prefix: -            url = self.add_prefix(url, opts.prefix) -             -        parts = urlparse.urlparse(url) -        (scheme, host, path, parm, query, frag) = parts - -        if not scheme or (len(scheme) == 1 and scheme in string.letters): -            # if a scheme isn't specified, we guess that it's "file:" -            if url[0] not in '/\\': url = os.path.abspath(url) -            url = 'file:' + urllib.pathname2url(url) -            parts = urlparse.urlparse(url) -            quote = 0 # pathname2url quotes, so we won't do it again -             -        if scheme in ['http', 'https']: -            parts = self.process_http(parts) -             -        if quote is None: -            quote = self.guess_should_quote(parts) -        if quote: -            parts = self.quote(parts) -         -        url = urlparse.urlunparse(parts) -        return url, parts - -    def add_prefix(self, url, prefix): -        if prefix[-1] == '/' or url[0] == '/': -            url = prefix + url -        else: -            url = prefix + '/' + url -        return url - -    def process_http(self, parts): -        (scheme, host, path, parm, query, frag) = parts - -        if '@' in host and auth_handler: -            try: -                user_pass, host = host.split('@', 1) -                if ':' in user_pass: -                    user, password = user_pass.split(':', 1) -            except ValueError, e: -                raise URLGrabError(1, _('Bad URL: %s') % url) -            if DEBUG: DEBUG.info('adding HTTP auth: %s, XXXXXXXX', user) -            auth_handler.add_password(None, host, user, password) - -        return (scheme, host, path, parm, query, frag) - -    def quote(self, parts): -        """quote the URL - -        This method quotes ONLY the path part.  If you need to quote -        other parts, you should override this and pass in your derived -        class.  The other alternative is to quote other parts before -        passing into urlgrabber. -        """ -        (scheme, host, path, parm, query, frag) = parts -        path = urllib.quote(path) -        return (scheme, host, path, parm, query, frag) - -    hexvals = '0123456789ABCDEF' -    def guess_should_quote(self, parts): -        """ -        Guess whether we should quote a path.  This amounts to -        guessing whether it's already quoted. - -        find ' '   ->  1 -        find '%'   ->  1 -        find '%XX' ->  0 -        else       ->  1 -        """ -        (scheme, host, path, parm, query, frag) = parts -        if ' ' in path: -            return 1 -        ind = string.find(path, '%') -        if ind > -1: -            while ind > -1: -                if len(path) < ind+3: -                    return 1 -                code = path[ind+1:ind+3].upper() -                if     code[0] not in self.hexvals or \ -                       code[1] not in self.hexvals: -                    return 1 -                ind = string.find(path, '%', ind+1) -            return 0 -        return 1 -     -class URLGrabberOptions: -    """Class to ease kwargs handling.""" - -    def __init__(self, delegate=None, **kwargs): -        """Initialize URLGrabberOptions object. -        Set default values for all options and then update options specified -        in kwargs. -        """ -        self.delegate = delegate -        if delegate is None: -            self._set_defaults() -        self._set_attributes(**kwargs) -     -    def __getattr__(self, name): -        if self.delegate and hasattr(self.delegate, name): -            return getattr(self.delegate, name) -        raise AttributeError, name -     -    def raw_throttle(self): -        """Calculate raw throttle value from throttle and bandwidth  -        values. -        """ -        if self.throttle <= 0:   -            return 0 -        elif type(self.throttle) == type(0):  -            return float(self.throttle) -        else: # throttle is a float -            return self.bandwidth * self.throttle -         -    def derive(self, **kwargs): -        """Create a derived URLGrabberOptions instance. -        This method creates a new instance and overrides the -        options specified in kwargs. -        """ -        return URLGrabberOptions(delegate=self, **kwargs) -         -    def _set_attributes(self, **kwargs): -        """Update object attributes with those provided in kwargs.""" -        self.__dict__.update(kwargs) -        if have_range and kwargs.has_key('range'): -            # normalize the supplied range value -            self.range = range_tuple_normalize(self.range) -        if not self.reget in [None, 'simple', 'check_timestamp']: -            raise URLGrabError(11, _('Illegal reget mode: %s') \ -                               % (self.reget, )) - -    def _set_defaults(self): -        """Set all options to their default values.  -        When adding new options, make sure a default is -        provided here. -        """ -        self.progress_obj = None -        self.throttle = 1.0 -        self.bandwidth = 0 -        self.retry = None -        self.retrycodes = [-1,2,4,5,6,7] -        self.checkfunc = None -        self.copy_local = 0 -        self.close_connection = 0 -        self.range = None -        self.user_agent = 'urlgrabber/%s' % __version__ -        self.keepalive = 1 -        self.proxies = None -        self.reget = None -        self.failure_callback = None -        self.interrupt_callback = None -        self.prefix = None -        self.opener = None -        self.cache_openers = True -        self.timeout = None -        self.text = None -        self.http_headers = None -        self.ftp_headers = None -        self.data = None -        self.urlparser = URLParser() -        self.quote = None -        self.ssl_ca_cert = None -        self.ssl_context = None - -class URLGrabber: -    """Provides easy opening of URLs with a variety of options. -     -    All options are specified as kwargs. Options may be specified when -    the class is created and may be overridden on a per request basis. -     -    New objects inherit default values from default_grabber. -    """ -     -    def __init__(self, **kwargs): -        self.opts = URLGrabberOptions(**kwargs) -     -    def _retry(self, opts, func, *args): -        tries = 0 -        while 1: -            # there are only two ways out of this loop.  The second has -            # several "sub-ways" -            #   1) via the return in the "try" block -            #   2) by some exception being raised -            #      a) an excepton is raised that we don't "except" -            #      b) a callback raises ANY exception -            #      c) we're not retry-ing or have run out of retries -            #      d) the URLGrabError code is not in retrycodes -            # beware of infinite loops :) -            tries = tries + 1 -            exception = None -            retrycode = None -            callback  = None -            if DEBUG: DEBUG.info('attempt %i/%s: %s', -                                 tries, opts.retry, args[0]) -            try: -                r = apply(func, (opts,) + args, {}) -                if DEBUG: DEBUG.info('success') -                return r -            except URLGrabError, e: -                exception = e -                callback = opts.failure_callback -                retrycode = e.errno -            except KeyboardInterrupt, e: -                exception = e -                callback = opts.interrupt_callback - -            if DEBUG: DEBUG.info('exception: %s', exception) -            if callback: -                if DEBUG: DEBUG.info('calling callback: %s', callback) -                cb_func, cb_args, cb_kwargs = self._make_callback(callback) -                obj = CallbackObject(exception=exception, url=args[0], -                                     tries=tries, retry=opts.retry) -                cb_func(obj, *cb_args, **cb_kwargs) - -            if (opts.retry is None) or (tries == opts.retry): -                if DEBUG: DEBUG.info('retries exceeded, re-raising') -                raise - -            if (retrycode is not None) and (retrycode not in opts.retrycodes): -                if DEBUG: DEBUG.info('retrycode (%i) not in list %s, re-raising', -                                     retrycode, opts.retrycodes) -                raise -     -    def urlopen(self, url, **kwargs): -        """open the url and return a file object -        If a progress object or throttle value specified when this  -        object was created, then  a special file object will be  -        returned that supports them. The file object can be treated  -        like any other file object. -        """ -        opts = self.opts.derive(**kwargs) -        (url,parts) = opts.urlparser.parse(url, opts)  -        def retryfunc(opts, url): -            return URLGrabberFileObject(url, filename=None, opts=opts) -        return self._retry(opts, retryfunc, url) -     -    def urlgrab(self, url, filename=None, **kwargs): -        """grab the file at <url> and make a local copy at <filename> -        If filename is none, the basename of the url is used. -        urlgrab returns the filename of the local file, which may be  -        different from the passed-in filename if copy_local == 0. -        """ -        opts = self.opts.derive(**kwargs) -        (url,parts) = opts.urlparser.parse(url, opts)  -        (scheme, host, path, parm, query, frag) = parts -        if filename is None: -            filename = os.path.basename( urllib.unquote(path) ) -        if scheme == 'file' and not opts.copy_local: -            # just return the name of the local file - don't make a  -            # copy currently -            path = urllib.url2pathname(path) -            if host: -                path = os.path.normpath('//' + host + path) -            if not os.path.exists(path): -                raise URLGrabError(2,  -                      _('Local file does not exist: %s') % (path, )) -            elif not os.path.isfile(path): -                raise URLGrabError(3,  -                              _('Not a normal file: %s') % (path, )) -            elif not opts.range: -                return path -         -        def retryfunc(opts, url, filename): -            fo = URLGrabberFileObject(url, filename, opts) -            try: -                fo._do_grab() -                if not opts.checkfunc is None: -                    cb_func, cb_args, cb_kwargs = \ -                             self._make_callback(opts.checkfunc) -                    obj = CallbackObject() -                    obj.filename = filename -                    obj.url = url -                    apply(cb_func, (obj, )+cb_args, cb_kwargs) -            finally: -                fo.close() -            return filename -         -        return self._retry(opts, retryfunc, url, filename) -     -    def urlread(self, url, limit=None, **kwargs): -        """read the url into a string, up to 'limit' bytes -        If the limit is exceeded, an exception will be thrown.  Note -        that urlread is NOT intended to be used as a way of saying  -        "I want the first N bytes" but rather 'read the whole file  -        into memory, but don't use too much' -        """ -        opts = self.opts.derive(**kwargs) -        (url,parts) = opts.urlparser.parse(url, opts)  -        if limit is not None: -            limit = limit + 1 -             -        def retryfunc(opts, url, limit): -            fo = URLGrabberFileObject(url, filename=None, opts=opts) -            s = '' -            try: -                # this is an unfortunate thing.  Some file-like objects -                # have a default "limit" of None, while the built-in (real) -                # file objects have -1.  They each break the other, so for -                # now, we just force the default if necessary. -                if limit is None: s = fo.read() -                else: s = fo.read(limit) - -                if not opts.checkfunc is None: -                    cb_func, cb_args, cb_kwargs = \ -                             self._make_callback(opts.checkfunc) -                    obj = CallbackObject() -                    obj.data = s -                    obj.url = url -                    apply(cb_func, (obj, )+cb_args, cb_kwargs) -            finally: -                fo.close() -            return s -             -        s = self._retry(opts, retryfunc, url, limit) -        if limit and len(s) > limit: -            raise URLGrabError(8,  -                        _('Exceeded limit (%i): %s') % (limit, url)) -        return s -         -    def _make_callback(self, callback_obj): -        if callable(callback_obj): -            return callback_obj, (), {} -        else: -            return callback_obj - -# create the default URLGrabber used by urlXXX functions. -# NOTE: actual defaults are set in URLGrabberOptions -default_grabber = URLGrabber() - -class URLGrabberFileObject: -    """This is a file-object wrapper that supports progress objects  -    and throttling. - -    This exists to solve the following problem: lets say you want to -    drop-in replace a normal open with urlopen.  You want to use a -    progress meter and/or throttling, but how do you do that without -    rewriting your code?  Answer: urlopen will return a wrapped file -    object that does the progress meter and-or throttling internally. -    """ - -    def __init__(self, url, filename, opts): -        self.url = url -        self.filename = filename -        self.opts = opts -        self.fo = None -        self._rbuf = '' -        self._rbufsize = 1024*8 -        self._ttime = time.time() -        self._tsize = 0 -        self._amount_read = 0 -        self._opener = None -        self._do_open() -         -    def __getattr__(self, name): -        """This effectively allows us to wrap at the instance level. -        Any attribute not found in _this_ object will be searched for -        in self.fo.  This includes methods.""" -        if hasattr(self.fo, name): -            return getattr(self.fo, name) -        raise AttributeError, name -    -    def _get_opener(self): -        """Build a urllib2 OpenerDirector based on request options.""" -        if self.opts.opener: -            return self.opts.opener -        elif self._opener is None: -            handlers = [] -            need_keepalive_handler = (have_keepalive and self.opts.keepalive) -            need_range_handler = (range_handlers and \ -                                  (self.opts.range or self.opts.reget)) -            # if you specify a ProxyHandler when creating the opener -            # it _must_ come before all other handlers in the list or urllib2 -            # chokes. -            if self.opts.proxies: -                handlers.append( CachedProxyHandler(self.opts.proxies) ) - -                # ------------------------------------------------------- -                # OK, these next few lines are a serious kludge to get -                # around what I think is a bug in python 2.2's -                # urllib2.  The basic idea is that default handlers -                # get applied first.  If you override one (like a -                # proxy handler), then the default gets pulled, but -                # the replacement goes on the end.  In the case of -                # proxies, this means the normal handler picks it up -                # first and the proxy isn't used.  Now, this probably -                # only happened with ftp or non-keepalive http, so not -                # many folks saw it.  The simple approach to fixing it -                # is just to make sure you override the other -                # conflicting defaults as well.  I would LOVE to see -                # these go way or be dealt with more elegantly.  The -                # problem isn't there after 2.2.  -MDS 2005/02/24 -                if not need_keepalive_handler: -                    handlers.append( urllib2.HTTPHandler() ) -                if not need_range_handler: -                    handlers.append( urllib2.FTPHandler() ) -                # ------------------------------------------------------- - -            ssl_factory = sslfactory.get_factory(self.opts.ssl_ca_cert, -                self.opts.ssl_context) - -            if need_keepalive_handler: -                handlers.append(HTTPHandler()) -                handlers.append(HTTPSHandler(ssl_factory)) -            if need_range_handler: -                handlers.extend( range_handlers ) -            handlers.append( auth_handler ) -            if self.opts.cache_openers: -                self._opener = CachedOpenerDirector(ssl_factory, *handlers) -            else: -                self._opener = ssl_factory.create_opener(*handlers) -            # OK, I don't like to do this, but otherwise, we end up with -            # TWO user-agent headers. -            self._opener.addheaders = [] -        return self._opener -         -    def _do_open(self): -        opener = self._get_opener() - -        req = urllib2.Request(self.url, self.opts.data) # build request object -        self._add_headers(req) # add misc headers that we need -        self._build_range(req) # take care of reget and byterange stuff - -        fo, hdr = self._make_request(req, opener) -        if self.reget_time and self.opts.reget == 'check_timestamp': -            # do this if we have a local file with known timestamp AND -            # we're in check_timestamp reget mode. -            fetch_again = 0 -            try: -                modified_tuple  = hdr.getdate_tz('last-modified') -                modified_stamp  = rfc822.mktime_tz(modified_tuple) -                if modified_stamp > self.reget_time: fetch_again = 1 -            except (TypeError,): -                fetch_again = 1 -             -            if fetch_again: -                # the server version is newer than the (incomplete) local -                # version, so we should abandon the version we're getting -                # and fetch the whole thing again. -                fo.close() -                self.opts.reget = None -                del req.headers['Range'] -                self._build_range(req) -                fo, hdr = self._make_request(req, opener) - -        (scheme, host, path, parm, query, frag) = urlparse.urlparse(self.url) -        path = urllib.unquote(path) -        if not (self.opts.progress_obj or self.opts.raw_throttle() \ -                or self.opts.timeout): -            # if we're not using the progress_obj, throttling, or timeout -            # we can get a performance boost by going directly to -            # the underlying fileobject for reads. -            self.read = fo.read -            if hasattr(fo, 'readline'): -                self.readline = fo.readline -        elif self.opts.progress_obj: -            try:     -                length = int(hdr['Content-Length']) -                length = length + self._amount_read     # Account for regets -            except (KeyError, ValueError, TypeError):  -                length = None - -            self.opts.progress_obj.start(str(self.filename), -                                         urllib.unquote(self.url), -                                         os.path.basename(path),  -                                         length, text=self.opts.text) -            self.opts.progress_obj.update(0) -        (self.fo, self.hdr) = (fo, hdr) -     -    def _add_headers(self, req): -        if self.opts.user_agent: -            req.add_header('User-agent', self.opts.user_agent) -        try: req_type = req.get_type() -        except ValueError: req_type = None -        if self.opts.http_headers and req_type in ('http', 'https'): -            for h, v in self.opts.http_headers: -                req.add_header(h, v) -        if self.opts.ftp_headers and req_type == 'ftp': -            for h, v in self.opts.ftp_headers: -                req.add_header(h, v) - -    def _build_range(self, req): -        self.reget_time = None -        self.append = 0 -        reget_length = 0 -        rt = None -        if have_range and self.opts.reget and type(self.filename) == type(''): -            # we have reget turned on and we're dumping to a file -            try: -                s = os.stat(self.filename) -            except OSError: -                pass -            else: -                self.reget_time = s[ST_MTIME] -                reget_length = s[ST_SIZE] - -                # Set initial length when regetting -                self._amount_read = reget_length     - -                rt = reget_length, '' -                self.append = 1 -                 -        if self.opts.range: -            if not have_range: -                raise URLGrabError(10, _('Byte range requested but range '\ -                                         'support unavailable')) -            rt = self.opts.range -            if rt[0]: rt = (rt[0] + reget_length, rt[1]) - -        if rt: -            header = range_tuple_to_header(rt) -            if header: req.add_header('Range', header) - -    def _make_request(self, req, opener): -        try: -            if have_socket_timeout and self.opts.timeout: -                old_to = socket.getdefaulttimeout() -                socket.setdefaulttimeout(self.opts.timeout) -                try: -                    fo = opener.open(req) -                finally: -                    socket.setdefaulttimeout(old_to) -            else: -                fo = opener.open(req) -            hdr = fo.info() -        except ValueError, e: -            raise URLGrabError(1, _('Bad URL: %s') % (e, )) -        except RangeError, e: -            raise URLGrabError(9, str(e)) -        except urllib2.HTTPError, e: -            new_e = URLGrabError(14, str(e)) -            new_e.code = e.code -            new_e.exception = e -            raise new_e -        except IOError, e: -            if hasattr(e, 'reason') and have_socket_timeout and \ -                   isinstance(e.reason, TimeoutError): -                raise URLGrabError(12, _('Timeout: %s') % (e, )) -            else: -                raise URLGrabError(4, _('IOError: %s') % (e, )) -        except OSError, e: -            raise URLGrabError(5, _('OSError: %s') % (e, )) -        except HTTPException, e: -            raise URLGrabError(7, _('HTTP Exception (%s): %s') % \ -                            (e.__class__.__name__, e)) -        else: -            return (fo, hdr) -         -    def _do_grab(self): -        """dump the file to self.filename.""" -        if self.append: new_fo = open(self.filename, 'ab') -        else: new_fo = open(self.filename, 'wb') -        bs = 1024*8 -        size = 0 - -        block = self.read(bs) -        size = size + len(block) -        while block: -            new_fo.write(block) -            block = self.read(bs) -            size = size + len(block) - -        new_fo.close() -        try: -            modified_tuple  = self.hdr.getdate_tz('last-modified') -            modified_stamp  = rfc822.mktime_tz(modified_tuple) -            os.utime(self.filename, (modified_stamp, modified_stamp)) -        except (TypeError,), e: pass - -        return size -     -    def _fill_buffer(self, amt=None): -        """fill the buffer to contain at least 'amt' bytes by reading -        from the underlying file object.  If amt is None, then it will -        read until it gets nothing more.  It updates the progress meter -        and throttles after every self._rbufsize bytes.""" -        # the _rbuf test is only in this first 'if' for speed.  It's not -        # logically necessary -        if self._rbuf and not amt is None: -            L = len(self._rbuf) -            if amt > L: -                amt = amt - L -            else: -                return - -        # if we've made it here, then we don't have enough in the buffer -        # and we need to read more. - -        buf = [self._rbuf] -        bufsize = len(self._rbuf) -        while amt is None or amt: -            # first, delay if necessary for throttling reasons -            if self.opts.raw_throttle(): -                diff = self._tsize/self.opts.raw_throttle() - \ -                       (time.time() - self._ttime) -                if diff > 0: time.sleep(diff) -                self._ttime = time.time() -                 -            # now read some data, up to self._rbufsize -            if amt is None: readamount = self._rbufsize -            else:           readamount = min(amt, self._rbufsize) -            try: -                new = self.fo.read(readamount) -            except socket.error, e: -                raise URLGrabError(4, _('Socket Error: %s') % (e, )) -            except TimeoutError, e: -                raise URLGrabError(12, _('Timeout: %s') % (e, )) -            except IOError, e: -                raise URLGrabError(4, _('IOError: %s') %(e,)) -            newsize = len(new) -            if not newsize: break # no more to read - -            if amt: amt = amt - newsize -            buf.append(new) -            bufsize = bufsize + newsize -            self._tsize = newsize -            self._amount_read = self._amount_read + newsize -            if self.opts.progress_obj: -                self.opts.progress_obj.update(self._amount_read) - -        self._rbuf = string.join(buf, '') -        return - -    def read(self, amt=None): -        self._fill_buffer(amt) -        if amt is None: -            s, self._rbuf = self._rbuf, '' -        else: -            s, self._rbuf = self._rbuf[:amt], self._rbuf[amt:] -        return s - -    def readline(self, limit=-1): -        i = string.find(self._rbuf, '\n') -        while i < 0 and not (0 < limit <= len(self._rbuf)): -            L = len(self._rbuf) -            self._fill_buffer(L + self._rbufsize) -            if not len(self._rbuf) > L: break -            i = string.find(self._rbuf, '\n', L) - -        if i < 0: i = len(self._rbuf) -        else: i = i+1 -        if 0 <= limit < len(self._rbuf): i = limit - -        s, self._rbuf = self._rbuf[:i], self._rbuf[i:] -        return s - -    def close(self): -        if self.opts.progress_obj: -            self.opts.progress_obj.end(self._amount_read) -        self.fo.close() -        if self.opts.close_connection: -            try: self.fo.close_connection() -            except: pass - -_handler_cache = [] -def CachedOpenerDirector(ssl_factory = None, *handlers): -    for (cached_handlers, opener) in _handler_cache: -        if cached_handlers == handlers: -            for handler in opener.handlers: -                handler.add_parent(opener) -            return opener -    if not ssl_factory: -        ssl_factory = sslfactory.get_factory() -    opener = ssl_factory.create_opener(*handlers) -    _handler_cache.append( (handlers, opener) ) -    return opener - -_proxy_cache = [] -def CachedProxyHandler(proxies): -    for (pdict, handler) in _proxy_cache: -        if pdict == proxies: -            if DEBUG: DEBUG.debug('re-using proxy settings: %s', proxies) -            break -    else: -        for k, v in proxies.items(): -            utype, url = urllib.splittype(v) -            host, other = urllib.splithost(url) -            if (utype is None) or (host is None): -                raise URLGrabError(13, _('Bad proxy URL: %s') % v) - -        if DEBUG: DEBUG.info('creating new proxy handler: %s', proxies) -        handler = urllib2.ProxyHandler(proxies) -        _proxy_cache.append( (proxies, handler) ) -    return handler - -##################################################################### -# DEPRECATED FUNCTIONS -def set_throttle(new_throttle): -    """Deprecated. Use: default_grabber.throttle = new_throttle""" -    default_grabber.throttle = new_throttle - -def set_bandwidth(new_bandwidth): -    """Deprecated. Use: default_grabber.bandwidth = new_bandwidth""" -    default_grabber.bandwidth = new_bandwidth - -def set_progress_obj(new_progress_obj): -    """Deprecated. Use: default_grabber.progress_obj = new_progress_obj""" -    default_grabber.progress_obj = new_progress_obj - -def set_user_agent(new_user_agent): -    """Deprecated. Use: default_grabber.user_agent = new_user_agent""" -    default_grabber.user_agent = new_user_agent -     -def retrygrab(url, filename=None, copy_local=0, close_connection=0, -              progress_obj=None, throttle=None, bandwidth=None, -              numtries=3, retrycodes=[-1,2,4,5,6,7], checkfunc=None): -    """Deprecated. Use: urlgrab() with the retry arg instead""" -    kwargs = {'copy_local' :  copy_local,  -              'close_connection' : close_connection, -              'progress_obj' : progress_obj,  -              'throttle' : throttle,  -              'bandwidth' : bandwidth, -              'retry' : numtries, -              'retrycodes' : retrycodes, -              'checkfunc' : checkfunc  -              } -    return urlgrab(url, filename, **kwargs) - -         -##################################################################### -#  TESTING -def _main_test(): -    import sys -    try: url, filename = sys.argv[1:3] -    except ValueError: -        print 'usage:', sys.argv[0], \ -              '<url> <filename> [copy_local=0|1] [close_connection=0|1]' -        sys.exit() - -    kwargs = {} -    for a in sys.argv[3:]: -        k, v = string.split(a, '=', 1) -        kwargs[k] = int(v) - -    set_throttle(1.0) -    set_bandwidth(32 * 1024) -    print "throttle: %s,  throttle bandwidth: %s B/s" % (default_grabber.throttle,  -                                                        default_grabber.bandwidth) - -    try: from progress import text_progress_meter -    except ImportError, e: pass -    else: kwargs['progress_obj'] = text_progress_meter() - -    try: name = apply(urlgrab, (url, filename), kwargs) -    except URLGrabError, e: print e -    else: print 'LOCAL FILE:', name - - -def _retry_test(): -    import sys -    try: url, filename = sys.argv[1:3] -    except ValueError: -        print 'usage:', sys.argv[0], \ -              '<url> <filename> [copy_local=0|1] [close_connection=0|1]' -        sys.exit() - -    kwargs = {} -    for a in sys.argv[3:]: -        k, v = string.split(a, '=', 1) -        kwargs[k] = int(v) - -    try: from progress import text_progress_meter -    except ImportError, e: pass -    else: kwargs['progress_obj'] = text_progress_meter() - -    def cfunc(filename, hello, there='foo'): -        print hello, there -        import random -        rnum = random.random() -        if rnum < .5: -            print 'forcing retry' -            raise URLGrabError(-1, 'forcing retry') -        if rnum < .75: -            print 'forcing failure' -            raise URLGrabError(-2, 'forcing immediate failure') -        print 'success' -        return -         -    kwargs['checkfunc'] = (cfunc, ('hello',), {'there':'there'}) -    try: name = apply(retrygrab, (url, filename), kwargs) -    except URLGrabError, e: print e -    else: print 'LOCAL FILE:', name - -def _file_object_test(filename=None): -    import random, cStringIO, sys -    if filename is None: -        filename = __file__ -    print 'using file "%s" for comparisons' % filename -    fo = open(filename) -    s_input = fo.read() -    fo.close() - -    for testfunc in [_test_file_object_smallread, -                     _test_file_object_readall, -                     _test_file_object_readline, -                     _test_file_object_readlines]: -        fo_input = cStringIO.StringIO(s_input) -        fo_output = cStringIO.StringIO() -        wrapper = URLGrabberFileObject(fo_input, None, 0) -        print 'testing %-30s ' % testfunc.__name__, -        testfunc(wrapper, fo_output) -        s_output = fo_output.getvalue() -        if s_output == s_input: print 'passed' -        else: print 'FAILED' -             -def _test_file_object_smallread(wrapper, fo_output): -    while 1: -        s = wrapper.read(23) -        fo_output.write(s) -        if not s: return - -def _test_file_object_readall(wrapper, fo_output): -    s = wrapper.read() -    fo_output.write(s) - -def _test_file_object_readline(wrapper, fo_output): -    while 1: -        s = wrapper.readline() -        fo_output.write(s) -        if not s: return - -def _test_file_object_readlines(wrapper, fo_output): -    li = wrapper.readlines() -    fo_output.write(string.join(li, '')) - -if __name__ == '__main__': -    _main_test() -    _retry_test() -    _file_object_test('test') diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py deleted file mode 100644 index 71393e2b8d..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py +++ /dev/null @@ -1,617 +0,0 @@ -#   This library is free software; you can redistribute it and/or -#   modify it under the terms of the GNU Lesser General Public -#   License as published by the Free Software Foundation; either -#   version 2.1 of the License, or (at your option) any later version. -# -#   This library is distributed in the hope that it will be useful, -#   but WITHOUT ANY WARRANTY; without even the implied warranty of -#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU -#   Lesser General Public License for more details. -# -#   You should have received a copy of the GNU Lesser General Public -#   License along with this library; if not, write to the  -#      Free Software Foundation, Inc.,  -#      59 Temple Place, Suite 330,  -#      Boston, MA  02111-1307  USA - -# This file is part of urlgrabber, a high-level cross-protocol url-grabber -# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko - -"""An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive. - ->>> import urllib2 ->>> from keepalive import HTTPHandler ->>> keepalive_handler = HTTPHandler() ->>> opener = urllib2.build_opener(keepalive_handler) ->>> urllib2.install_opener(opener) ->>>  ->>> fo = urllib2.urlopen('http://www.python.org') - -If a connection to a given host is requested, and all of the existing -connections are still in use, another connection will be opened.  If -the handler tries to use an existing connection but it fails in some -way, it will be closed and removed from the pool. - -To remove the handler, simply re-run build_opener with no arguments, and -install that opener. - -You can explicitly close connections by using the close_connection() -method of the returned file-like object (described below) or you can -use the handler methods: - -  close_connection(host) -  close_all() -  open_connections() - -NOTE: using the close_connection and close_all methods of the handler -should be done with care when using multiple threads. -  * there is nothing that prevents another thread from creating new -    connections immediately after connections are closed -  * no checks are done to prevent in-use connections from being closed - ->>> keepalive_handler.close_all() - -EXTRA ATTRIBUTES AND METHODS - -  Upon a status of 200, the object returned has a few additional -  attributes and methods, which should not be used if you want to -  remain consistent with the normal urllib2-returned objects: - -    close_connection()  -  close the connection to the host -    readlines()         -  you know, readlines() -    status              -  the return status (ie 404) -    reason              -  english translation of status (ie 'File not found') - -  If you want the best of both worlds, use this inside an -  AttributeError-catching try: - -  >>> try: status = fo.status -  >>> except AttributeError: status = None - -  Unfortunately, these are ONLY there if status == 200, so it's not -  easy to distinguish between non-200 responses.  The reason is that -  urllib2 tries to do clever things with error codes 301, 302, 401, -  and 407, and it wraps the object upon return. - -  For python versions earlier than 2.4, you can avoid this fancy error -  handling by setting the module-level global HANDLE_ERRORS to zero. -  You see, prior to 2.4, it's the HTTP Handler's job to determine what -  to handle specially, and what to just pass up.  HANDLE_ERRORS == 0 -  means "pass everything up".  In python 2.4, however, this job no -  longer belongs to the HTTP Handler and is now done by a NEW handler, -  HTTPErrorProcessor.  Here's the bottom line: - -    python version < 2.4 -        HANDLE_ERRORS == 1  (default) pass up 200, treat the rest as -                            errors -        HANDLE_ERRORS == 0  pass everything up, error processing is -                            left to the calling code -    python version >= 2.4 -        HANDLE_ERRORS == 1  pass up 200, treat the rest as errors -        HANDLE_ERRORS == 0  (default) pass everything up, let the -                            other handlers (specifically, -                            HTTPErrorProcessor) decide what to do - -  In practice, setting the variable either way makes little difference -  in python 2.4, so for the most consistent behavior across versions, -  you probably just want to use the defaults, which will give you -  exceptions on errors. - -""" - -# $Id: keepalive.py,v 1.16 2006/09/22 00:58:05 mstenner Exp $ - -import urllib2 -import httplib -import socket -import thread - -DEBUG = None - -import sslfactory - -import sys -if sys.version_info < (2, 4): HANDLE_ERRORS = 1 -else: HANDLE_ERRORS = 0 -     -class ConnectionManager: -    """ -    The connection manager must be able to: -      * keep track of all existing -      """ -    def __init__(self): -        self._lock = thread.allocate_lock() -        self._hostmap = {} # map hosts to a list of connections -        self._connmap = {} # map connections to host -        self._readymap = {} # map connection to ready state - -    def add(self, host, connection, ready): -        self._lock.acquire() -        try: -            if not self._hostmap.has_key(host): self._hostmap[host] = [] -            self._hostmap[host].append(connection) -            self._connmap[connection] = host -            self._readymap[connection] = ready -        finally: -            self._lock.release() - -    def remove(self, connection): -        self._lock.acquire() -        try: -            try: -                host = self._connmap[connection] -            except KeyError: -                pass -            else: -                del self._connmap[connection] -                del self._readymap[connection] -                self._hostmap[host].remove(connection) -                if not self._hostmap[host]: del self._hostmap[host] -        finally: -            self._lock.release() - -    def set_ready(self, connection, ready): -        try: self._readymap[connection] = ready -        except KeyError: pass -         -    def get_ready_conn(self, host): -        conn = None -        self._lock.acquire() -        try: -            if self._hostmap.has_key(host): -                for c in self._hostmap[host]: -                    if self._readymap[c]: -                        self._readymap[c] = 0 -                        conn = c -                        break -        finally: -            self._lock.release() -        return conn - -    def get_all(self, host=None): -        if host: -            return list(self._hostmap.get(host, [])) -        else: -            return dict(self._hostmap) - -class KeepAliveHandler: -    def __init__(self): -        self._cm = ConnectionManager() -         -    #### Connection Management -    def open_connections(self): -        """return a list of connected hosts and the number of connections -        to each.  [('foo.com:80', 2), ('bar.org', 1)]""" -        return [(host, len(li)) for (host, li) in self._cm.get_all().items()] - -    def close_connection(self, host): -        """close connection(s) to <host> -        host is the host:port spec, as in 'www.cnn.com:8080' as passed in. -        no error occurs if there is no connection to that host.""" -        for h in self._cm.get_all(host): -            self._cm.remove(h) -            h.close() -         -    def close_all(self): -        """close all open connections""" -        for host, conns in self._cm.get_all().items(): -            for h in conns: -                self._cm.remove(h) -                h.close() -         -    def _request_closed(self, request, host, connection): -        """tells us that this request is now closed and the the -        connection is ready for another request""" -        self._cm.set_ready(connection, 1) - -    def _remove_connection(self, host, connection, close=0): -        if close: connection.close() -        self._cm.remove(connection) -         -    #### Transaction Execution -    def do_open(self, req): -        host = req.get_host() -        if not host: -            raise urllib2.URLError('no host given') - -        try: -            h = self._cm.get_ready_conn(host) -            while h: -                r = self._reuse_connection(h, req, host) - -                # if this response is non-None, then it worked and we're -                # done.  Break out, skipping the else block. -                if r: break - -                # connection is bad - possibly closed by server -                # discard it and ask for the next free connection -                h.close() -                self._cm.remove(h) -                h = self._cm.get_ready_conn(host) -            else: -                # no (working) free connections were found.  Create a new one. -                h = self._get_connection(host) -                if DEBUG: DEBUG.info("creating new connection to %s (%d)", -                                     host, id(h)) -                self._cm.add(host, h, 0) -                self._start_transaction(h, req) -                r = h.getresponse() -        except (socket.error, httplib.HTTPException), err: -            raise urllib2.URLError(err) -             -        # if not a persistent connection, don't try to reuse it -        if r.will_close: self._cm.remove(h) - -        if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason) -        r._handler = self -        r._host = host -        r._url = req.get_full_url() -        r._connection = h -        r.code = r.status -        r.headers = r.msg -        r.msg = r.reason -         -        if r.status == 200 or not HANDLE_ERRORS: -            return r -        else: -            return self.parent.error('http', req, r, -                                     r.status, r.msg, r.headers) - -    def _reuse_connection(self, h, req, host): -        """start the transaction with a re-used connection -        return a response object (r) upon success or None on failure. -        This DOES not close or remove bad connections in cases where -        it returns.  However, if an unexpected exception occurs, it -        will close and remove the connection before re-raising. -        """ -        try: -            self._start_transaction(h, req) -            r = h.getresponse() -            # note: just because we got something back doesn't mean it -            # worked.  We'll check the version below, too. -        except (socket.error, httplib.HTTPException): -            r = None -        except: -            # adding this block just in case we've missed -            # something we will still raise the exception, but -            # lets try and close the connection and remove it -            # first.  We previously got into a nasty loop -            # where an exception was uncaught, and so the -            # connection stayed open.  On the next try, the -            # same exception was raised, etc.  The tradeoff is -            # that it's now possible this call will raise -            # a DIFFERENT exception -            if DEBUG: DEBUG.error("unexpected exception - closing " + \ -                                  "connection to %s (%d)", host, id(h)) -            self._cm.remove(h) -            h.close() -            raise -                     -        if r is None or r.version == 9: -            # httplib falls back to assuming HTTP 0.9 if it gets a -            # bad header back.  This is most likely to happen if -            # the socket has been closed by the server since we -            # last used the connection. -            if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)", -                                 host, id(h)) -            r = None -        else: -            if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h)) - -        return r - -    def _start_transaction(self, h, req): -        try: -            if req.has_data(): -                data = req.get_data() -                h.putrequest('POST', req.get_selector()) -                if not req.headers.has_key('Content-type'): -                    h.putheader('Content-type', -                                'application/x-www-form-urlencoded') -                if not req.headers.has_key('Content-length'): -                    h.putheader('Content-length', '%d' % len(data)) -            else: -                h.putrequest('GET', req.get_selector()) -        except (socket.error, httplib.HTTPException), err: -            raise urllib2.URLError(err) - -        for args in self.parent.addheaders: -            h.putheader(*args) -        for k, v in req.headers.items(): -            h.putheader(k, v) -        h.endheaders() -        if req.has_data(): -            h.send(data) - -    def _get_connection(self, host): -        return NotImplementedError - -class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler): -    def __init__(self): -        KeepAliveHandler.__init__(self) - -    def http_open(self, req): -        return self.do_open(req) - -    def _get_connection(self, host): -        return HTTPConnection(host) - -class HTTPSHandler(KeepAliveHandler, urllib2.HTTPSHandler): -    def __init__(self, ssl_factory=None): -        KeepAliveHandler.__init__(self) -        if not ssl_factory: -            ssl_factory = sslfactory.get_factory() -        self._ssl_factory = ssl_factory -     -    def https_open(self, req): -        return self.do_open(req) - -    def _get_connection(self, host): -        return self._ssl_factory.get_https_connection(host) - -class HTTPResponse(httplib.HTTPResponse): -    # we need to subclass HTTPResponse in order to -    # 1) add readline() and readlines() methods -    # 2) add close_connection() methods -    # 3) add info() and geturl() methods - -    # in order to add readline(), read must be modified to deal with a -    # buffer.  example: readline must read a buffer and then spit back -    # one line at a time.  The only real alternative is to read one -    # BYTE at a time (ick).  Once something has been read, it can't be -    # put back (ok, maybe it can, but that's even uglier than this), -    # so if you THEN do a normal read, you must first take stuff from -    # the buffer. - -    # the read method wraps the original to accomodate buffering, -    # although read() never adds to the buffer. -    # Both readline and readlines have been stolen with almost no -    # modification from socket.py -     - -    def __init__(self, sock, debuglevel=0, strict=0, method=None): -        if method: # the httplib in python 2.3 uses the method arg -            httplib.HTTPResponse.__init__(self, sock, debuglevel, method) -        else: # 2.2 doesn't -            httplib.HTTPResponse.__init__(self, sock, debuglevel) -        self.fileno = sock.fileno -        self.code = None -        self._rbuf = '' -        self._rbufsize = 8096 -        self._handler = None # inserted by the handler later -        self._host = None    # (same) -        self._url = None     # (same) -        self._connection = None # (same) - -    _raw_read = httplib.HTTPResponse.read - -    def close(self): -        if self.fp: -            self.fp.close() -            self.fp = None -            if self._handler: -                self._handler._request_closed(self, self._host, -                                              self._connection) - -    def close_connection(self): -        self._handler._remove_connection(self._host, self._connection, close=1) -        self.close() -         -    def info(self): -        return self.headers - -    def geturl(self): -        return self._url - -    def read(self, amt=None): -        # the _rbuf test is only in this first if for speed.  It's not -        # logically necessary -        if self._rbuf and not amt is None: -            L = len(self._rbuf) -            if amt > L: -                amt -= L -            else: -                s = self._rbuf[:amt] -                self._rbuf = self._rbuf[amt:] -                return s - -        s = self._rbuf + self._raw_read(amt) -        self._rbuf = '' -        return s - -    def readline(self, limit=-1): -        data = "" -        i = self._rbuf.find('\n') -        while i < 0 and not (0 < limit <= len(self._rbuf)): -            new = self._raw_read(self._rbufsize) -            if not new: break -            i = new.find('\n') -            if i >= 0: i = i + len(self._rbuf) -            self._rbuf = self._rbuf + new -        if i < 0: i = len(self._rbuf) -        else: i = i+1 -        if 0 <= limit < len(self._rbuf): i = limit -        data, self._rbuf = self._rbuf[:i], self._rbuf[i:] -        return data - -    def readlines(self, sizehint = 0): -        total = 0 -        list = [] -        while 1: -            line = self.readline() -            if not line: break -            list.append(line) -            total += len(line) -            if sizehint and total >= sizehint: -                break -        return list - - -class HTTPConnection(httplib.HTTPConnection): -    # use the modified response class -    response_class = HTTPResponse - -class HTTPSConnection(httplib.HTTPSConnection): -    response_class = HTTPResponse -     -######################################################################### -#####   TEST FUNCTIONS -######################################################################### - -def error_handler(url): -    global HANDLE_ERRORS -    orig = HANDLE_ERRORS -    keepalive_handler = HTTPHandler() -    opener = urllib2.build_opener(keepalive_handler) -    urllib2.install_opener(opener) -    pos = {0: 'off', 1: 'on'} -    for i in (0, 1): -        print "  fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i) -        HANDLE_ERRORS = i -        try: -            fo = urllib2.urlopen(url) -            foo = fo.read() -            fo.close() -            try: status, reason = fo.status, fo.reason -            except AttributeError: status, reason = None, None -        except IOError, e: -            print "  EXCEPTION: %s" % e -            raise -        else: -            print "  status = %s, reason = %s" % (status, reason) -    HANDLE_ERRORS = orig -    hosts = keepalive_handler.open_connections() -    print "open connections:", hosts -    keepalive_handler.close_all() - -def continuity(url): -    import md5 -    format = '%25s: %s' -     -    # first fetch the file with the normal http handler -    opener = urllib2.build_opener() -    urllib2.install_opener(opener) -    fo = urllib2.urlopen(url) -    foo = fo.read() -    fo.close() -    m = md5.new(foo) -    print format % ('normal urllib', m.hexdigest()) - -    # now install the keepalive handler and try again -    opener = urllib2.build_opener(HTTPHandler()) -    urllib2.install_opener(opener) - -    fo = urllib2.urlopen(url) -    foo = fo.read() -    fo.close() -    m = md5.new(foo) -    print format % ('keepalive read', m.hexdigest()) - -    fo = urllib2.urlopen(url) -    foo = '' -    while 1: -        f = fo.readline() -        if f: foo = foo + f -        else: break -    fo.close() -    m = md5.new(foo) -    print format % ('keepalive readline', m.hexdigest()) - -def comp(N, url): -    print '  making %i connections to:\n  %s' % (N, url) - -    sys.stdout.write('  first using the normal urllib handlers') -    # first use normal opener -    opener = urllib2.build_opener() -    urllib2.install_opener(opener) -    t1 = fetch(N, url) -    print '  TIME: %.3f s' % t1 - -    sys.stdout.write('  now using the keepalive handler       ') -    # now install the keepalive handler and try again -    opener = urllib2.build_opener(HTTPHandler()) -    urllib2.install_opener(opener) -    t2 = fetch(N, url) -    print '  TIME: %.3f s' % t2 -    print '  improvement factor: %.2f' % (t1/t2, ) -     -def fetch(N, url, delay=0): -    import time -    lens = [] -    starttime = time.time() -    for i in range(N): -        if delay and i > 0: time.sleep(delay) -        fo = urllib2.urlopen(url) -        foo = fo.read() -        fo.close() -        lens.append(len(foo)) -    diff = time.time() - starttime - -    j = 0 -    for i in lens[1:]: -        j = j + 1 -        if not i == lens[0]: -            print "WARNING: inconsistent length on read %i: %i" % (j, i) - -    return diff - -def test_timeout(url): -    global DEBUG -    dbbackup = DEBUG -    class FakeLogger: -        def debug(self, msg, *args): print msg % args -        info = warning = error = debug -    DEBUG = FakeLogger() -    print "  fetching the file to establish a connection" -    fo = urllib2.urlopen(url) -    data1 = fo.read() -    fo.close() -  -    i = 20 -    print "  waiting %i seconds for the server to close the connection" % i -    while i > 0: -        sys.stdout.write('\r  %2i' % i) -        sys.stdout.flush() -        time.sleep(1) -        i -= 1 -    sys.stderr.write('\r') - -    print "  fetching the file a second time" -    fo = urllib2.urlopen(url) -    data2 = fo.read() -    fo.close() - -    if data1 == data2: -        print '  data are identical' -    else: -        print '  ERROR: DATA DIFFER' - -    DEBUG = dbbackup - -     -def test(url, N=10): -    print "checking error hander (do this on a non-200)" -    try: error_handler(url) -    except IOError, e: -        print "exiting - exception will prevent further tests" -        sys.exit() -    print -    print "performing continuity test (making sure stuff isn't corrupted)" -    continuity(url) -    print -    print "performing speed comparison" -    comp(N, url) -    print -    print "performing dropped-connection check" -    test_timeout(url) -     -if __name__ == '__main__': -    import time -    import sys -    try: -        N = int(sys.argv[1]) -        url = sys.argv[2] -    except: -        print "%s <integer> <url>" % sys.argv[0] -    else: -        test(url, N) diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py deleted file mode 100644 index 9664c6b5c5..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py +++ /dev/null @@ -1,458 +0,0 @@ -#   This library is free software; you can redistribute it and/or -#   modify it under the terms of the GNU Lesser General Public -#   License as published by the Free Software Foundation; either -#   version 2.1 of the License, or (at your option) any later version. -# -#   This library is distributed in the hope that it will be useful, -#   but WITHOUT ANY WARRANTY; without even the implied warranty of -#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU -#   Lesser General Public License for more details. -# -#   You should have received a copy of the GNU Lesser General Public -#   License along with this library; if not, write to the  -#      Free Software Foundation, Inc.,  -#      59 Temple Place, Suite 330,  -#      Boston, MA  02111-1307  USA - -# This file is part of urlgrabber, a high-level cross-protocol url-grabber -# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko - -"""Module for downloading files from a pool of mirrors - -DESCRIPTION - -  This module provides support for downloading files from a pool of -  mirrors with configurable failover policies.  To a large extent, the -  failover policy is chosen by using different classes derived from -  the main class, MirrorGroup. - -  Instances of MirrorGroup (and cousins) act very much like URLGrabber -  instances in that they have urlread, urlgrab, and urlopen methods. -  They can therefore, be used in very similar ways. - -    from urlgrabber.grabber import URLGrabber -    from urlgrabber.mirror import MirrorGroup -    gr = URLGrabber() -    mg = MirrorGroup(gr, ['http://foo.com/some/directory/', -                          'http://bar.org/maybe/somewhere/else/', -                          'ftp://baz.net/some/other/place/entirely/'] -    mg.urlgrab('relative/path.zip') - -  The assumption is that all mirrors are identical AFTER the base urls -  specified, so that any mirror can be used to fetch any file. - -FAILOVER - -  The failover mechanism is designed to be customized by subclassing -  from MirrorGroup to change the details of the behavior.  In general, -  the classes maintain a master mirror list and a "current mirror" -  index.  When a download is initiated, a copy of this list and index -  is created for that download only.  The specific failover policy -  depends on the class used, and so is documented in the class -  documentation.  Note that ANY behavior of the class can be -  overridden, so any failover policy at all is possible (although -  you may need to change the interface in extreme cases). - -CUSTOMIZATION - -  Most customization of a MirrorGroup object is done at instantiation -  time (or via subclassing).  There are four major types of -  customization: - -    1) Pass in a custom urlgrabber - The passed in urlgrabber will be -       used (by default... see #2) for the grabs, so options to it -       apply for the url-fetching - -    2) Custom mirror list - Mirror lists can simply be a list of -       stings mirrors (as shown in the example above) but each can -       also be a dict, allowing for more options.  For example, the -       first mirror in the list above could also have been: - -         {'mirror': 'http://foo.com/some/directory/', -          'grabber': <a custom grabber to be used for this mirror>, -          'kwargs': { <a dict of arguments passed to the grabber> }} - -       All mirrors are converted to this format internally.  If -       'grabber' is omitted, the default grabber will be used.  If -       kwargs are omitted, then (duh) they will not be used. - -    3) Pass keyword arguments when instantiating the mirror group. -       See, for example, the failure_callback argument. - -    4) Finally, any kwargs passed in for the specific file (to the -       urlgrab method, for example) will be folded in.  The options -       passed into the grabber's urlXXX methods will override any -       options specified in a custom mirror dict. - -""" - -# $Id: mirror.py,v 1.14 2006/02/22 18:26:46 mstenner Exp $ - -import random -import thread  # needed for locking to make this threadsafe - -from grabber import URLGrabError, CallbackObject, DEBUG - -try: -    from i18n import _ -except ImportError, msg: -    def _(st): return st - -class GrabRequest: -    """This is a dummy class used to hold information about the specific -    request.  For example, a single file.  By maintaining this information -    separately, we can accomplish two things: - -      1) make it a little easier to be threadsafe -      2) have request-specific parameters -    """ -    pass - -class MirrorGroup: -    """Base Mirror class - -    Instances of this class are built with a grabber object and a list -    of mirrors.  Then all calls to urlXXX should be passed relative urls. -    The requested file will be searched for on the first mirror.  If the -    grabber raises an exception (possibly after some retries) then that -    mirror will be removed from the list, and the next will be attempted. -    If all mirrors are exhausted, then an exception will be raised. - -    MirrorGroup has the following failover policy: - -      * downloads begin with the first mirror - -      * by default (see default_action below) a failure (after retries) -        causes it to increment the local AND master indices.  Also, -        the current mirror is removed from the local list (but NOT the -        master list - the mirror can potentially be used for other -        files) - -      * if the local list is ever exhausted, a URLGrabError will be -        raised (errno=256, no more mirrors) - -    OPTIONS - -      In addition to the required arguments "grabber" and "mirrors", -      MirrorGroup also takes the following optional arguments: -       -      default_action - -        A dict that describes the actions to be taken upon failure -        (after retries).  default_action can contain any of the -        following keys (shown here with their default values): - -          default_action = {'increment': 1, -                            'increment_master': 1, -                            'remove': 1, -                            'remove_master': 0, -                            'fail': 0} - -        In this context, 'increment' means "use the next mirror" and -        'remove' means "never use this mirror again".  The two -        'master' values refer to the instance-level mirror list (used -        for all files), whereas the non-master values refer to the -        current download only. - -        The 'fail' option will cause immediate failure by re-raising -        the exception and no further attempts to get the current -        download. - -        This dict can be set at instantiation time, -          mg = MirrorGroup(grabber, mirrors, default_action={'fail':1}) -        at method-execution time (only applies to current fetch), -          filename = mg.urlgrab(url, default_action={'increment': 0}) -        or by returning an action dict from the failure_callback -          return {'fail':0} -        in increasing precedence. -         -        If all three of these were done, the net result would be: -              {'increment': 0,         # set in method -               'increment_master': 1,  # class default -               'remove': 1,            # class default -               'remove_master': 0,     # class default -               'fail': 0}              # set at instantiation, reset -                                       # from callback - -      failure_callback - -        this is a callback that will be called when a mirror "fails", -        meaning the grabber raises some URLGrabError.  If this is a -        tuple, it is interpreted to be of the form (cb, args, kwargs) -        where cb is the actual callable object (function, method, -        etc).  Otherwise, it is assumed to be the callable object -        itself.  The callback will be passed a grabber.CallbackObject -        instance along with args and kwargs (if present).  The following -        attributes are defined withing the instance: - -           obj.exception    = < exception that was raised > -           obj.mirror       = < the mirror that was tried > -           obj.relative_url = < url relative to the mirror > -           obj.url          = < full url that failed > -                              # .url is just the combination of .mirror -                              # and .relative_url - -        The failure callback can return an action dict, as described -        above. - -        Like default_action, the failure_callback can be set at -        instantiation time or when the urlXXX method is called.  In -        the latter case, it applies only for that fetch. - -        The callback can re-raise the exception quite easily.  For -        example, this is a perfectly adequate callback function: - -          def callback(obj): raise obj.exception - -        WARNING: do not save the exception object (or the -        CallbackObject instance).  As they contain stack frame -        references, they can lead to circular references. - -    Notes: -      * The behavior can be customized by deriving and overriding the -        'CONFIGURATION METHODS' -      * The 'grabber' instance is kept as a reference, not copied. -        Therefore, the grabber instance can be modified externally -        and changes will take effect immediately. -    """ - -    # notes on thread-safety: - -    #   A GrabRequest should never be shared by multiple threads because -    #   it's never saved inside the MG object and never returned outside it. -    #   therefore, it should be safe to access/modify grabrequest data -    #   without a lock.  However, accessing the mirrors and _next attributes -    #   of the MG itself must be done when locked to prevent (for example) -    #   removal of the wrong mirror. - -    ############################################################## -    #  CONFIGURATION METHODS  -  intended to be overridden to -    #                            customize behavior -    def __init__(self, grabber, mirrors, **kwargs): -        """Initialize the MirrorGroup object. - -        REQUIRED ARGUMENTS - -          grabber  - URLGrabber instance -          mirrors  - a list of mirrors - -        OPTIONAL ARGUMENTS - -          failure_callback  - callback to be used when a mirror fails -          default_action    - dict of failure actions - -        See the module-level and class level documentation for more -        details. -        """ - -        # OVERRIDE IDEAS: -        #   shuffle the list to randomize order -        self.grabber = grabber -        self.mirrors = self._parse_mirrors(mirrors) -        self._next = 0 -        self._lock = thread.allocate_lock() -        self.default_action = None -        self._process_kwargs(kwargs) - -    # if these values are found in **kwargs passed to one of the urlXXX -    # methods, they will be stripped before getting passed on to the -    # grabber -    options = ['default_action', 'failure_callback'] -     -    def _process_kwargs(self, kwargs): -        self.failure_callback = kwargs.get('failure_callback') -        self.default_action   = kwargs.get('default_action') -        -    def _parse_mirrors(self, mirrors): -        parsed_mirrors = [] -        for m in mirrors: -            if type(m) == type(''): m = {'mirror': m} -            parsed_mirrors.append(m) -        return parsed_mirrors -     -    def _load_gr(self, gr): -        # OVERRIDE IDEAS: -        #   shuffle gr list -        self._lock.acquire() -        gr.mirrors = list(self.mirrors) -        gr._next = self._next -        self._lock.release() - -    def _get_mirror(self, gr): -        # OVERRIDE IDEAS: -        #   return a random mirror so that multiple mirrors get used -        #   even without failures. -        if not gr.mirrors: -            raise URLGrabError(256, _('No more mirrors to try.')) -        return gr.mirrors[gr._next] - -    def _failure(self, gr, cb_obj): -        # OVERRIDE IDEAS: -        #   inspect the error - remove=1 for 404, remove=2 for connection -        #                       refused, etc. (this can also be done via -        #                       the callback) -        cb = gr.kw.get('failure_callback') or self.failure_callback -        if cb: -            if type(cb) == type( () ): -                cb, args, kwargs = cb -            else: -                args, kwargs = (), {} -            action = cb(cb_obj, *args, **kwargs) or {} -        else: -            action = {} -        # XXXX - decide - there are two ways to do this -        # the first is action-overriding as a whole - use the entire action -        # or fall back on module level defaults -        #action = action or gr.kw.get('default_action') or self.default_action -        # the other is to fall through for each element in the action dict -        a = dict(self.default_action or {}) -        a.update(gr.kw.get('default_action', {})) -        a.update(action) -        action = a -        self.increment_mirror(gr, action) -        if action and action.get('fail', 0): raise - -    def increment_mirror(self, gr, action={}): -        """Tell the mirror object increment the mirror index - -        This increments the mirror index, which amounts to telling the -        mirror object to use a different mirror (for this and future -        downloads). - -        This is a SEMI-public method.  It will be called internally, -        and you may never need to call it.  However, it is provided -        (and is made public) so that the calling program can increment -        the mirror choice for methods like urlopen.  For example, with -        urlopen, there's no good way for the mirror group to know that -        an error occurs mid-download (it's already returned and given -        you the file object). -         -        remove  ---  can have several values -           0   do not remove the mirror from the list -           1   remove the mirror for this download only -           2   remove the mirror permanently - -        beware of remove=0 as it can lead to infinite loops -        """ -        badmirror = gr.mirrors[gr._next] - -        self._lock.acquire() -        try: -            ind = self.mirrors.index(badmirror) -        except ValueError: -            pass -        else: -            if action.get('remove_master', 0): -                del self.mirrors[ind] -            elif self._next == ind and action.get('increment_master', 1): -                self._next += 1 -            if self._next >= len(self.mirrors): self._next = 0 -        self._lock.release() -         -        if action.get('remove', 1): -            del gr.mirrors[gr._next] -        elif action.get('increment', 1): -            gr._next += 1 -        if gr._next >= len(gr.mirrors): gr._next = 0 - -        if DEBUG: -            grm = [m['mirror'] for m in gr.mirrors] -            DEBUG.info('GR   mirrors: [%s] %i', ' '.join(grm), gr._next) -            selfm = [m['mirror'] for m in self.mirrors] -            DEBUG.info('MAIN mirrors: [%s] %i', ' '.join(selfm), self._next) - -    ##################################################################### -    # NON-CONFIGURATION METHODS -    # these methods are designed to be largely workhorse methods that -    # are not intended to be overridden.  That doesn't mean you can't; -    # if you want to, feel free, but most things can be done by -    # by overriding the configuration methods :) - -    def _join_url(self, base_url, rel_url): -        if base_url.endswith('/') or rel_url.startswith('/'): -            return base_url + rel_url -        else: -            return base_url + '/' + rel_url -         -    def _mirror_try(self, func, url, kw): -        gr = GrabRequest() -        gr.func = func -        gr.url  = url -        gr.kw   = dict(kw) -        self._load_gr(gr) - -        for k in self.options: -            try: del kw[k] -            except KeyError: pass - -        while 1: -            mirrorchoice = self._get_mirror(gr) -            fullurl = self._join_url(mirrorchoice['mirror'], gr.url) -            kwargs = dict(mirrorchoice.get('kwargs', {})) -            kwargs.update(kw) -            grabber = mirrorchoice.get('grabber') or self.grabber -            func_ref = getattr(grabber, func) -            if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl) -            try: -                return func_ref( *(fullurl,), **kwargs ) -            except URLGrabError, e: -                if DEBUG: DEBUG.info('MIRROR: failed') -                obj = CallbackObject() -                obj.exception = e -                obj.mirror = mirrorchoice['mirror'] -                obj.relative_url = gr.url -                obj.url = fullurl -                self._failure(gr, obj) - -    def urlgrab(self, url, filename=None, **kwargs): -        kw = dict(kwargs) -        kw['filename'] = filename -        func = 'urlgrab' -        return self._mirror_try(func, url, kw) -     -    def urlopen(self, url, **kwargs): -        kw = dict(kwargs) -        func = 'urlopen' -        return self._mirror_try(func, url, kw) - -    def urlread(self, url, limit=None, **kwargs): -        kw = dict(kwargs) -        kw['limit'] = limit -        func = 'urlread' -        return self._mirror_try(func, url, kw) -             - -class MGRandomStart(MirrorGroup): -    """A mirror group that starts at a random mirror in the list. - -    This behavior of this class is identical to MirrorGroup, except that -    it starts at a random location in the mirror list. -    """ - -    def __init__(self, grabber, mirrors, **kwargs): -        """Initialize the object - -        The arguments for intialization are the same as for MirrorGroup -        """ -        MirrorGroup.__init__(self, grabber, mirrors, **kwargs) -        self._next = random.randrange(len(mirrors)) - -class MGRandomOrder(MirrorGroup): -    """A mirror group that uses mirrors in a random order. - -    This behavior of this class is identical to MirrorGroup, except that -    it uses the mirrors in a random order.  Note that the order is set at -    initialization time and fixed thereafter.  That is, it does not pick a -    random mirror after each failure. -    """ - -    def __init__(self, grabber, mirrors, **kwargs): -        """Initialize the object - -        The arguments for intialization are the same as for MirrorGroup -        """ -        MirrorGroup.__init__(self, grabber, mirrors, **kwargs) -        random.shuffle(self.mirrors) - -if __name__ == '__main__': -    pass diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/progress.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/progress.py deleted file mode 100644 index 02db524e76..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/progress.py +++ /dev/null @@ -1,530 +0,0 @@ -#   This library is free software; you can redistribute it and/or -#   modify it under the terms of the GNU Lesser General Public -#   License as published by the Free Software Foundation; either -#   version 2.1 of the License, or (at your option) any later version. -# -#   This library is distributed in the hope that it will be useful, -#   but WITHOUT ANY WARRANTY; without even the implied warranty of -#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU -#   Lesser General Public License for more details. -# -#   You should have received a copy of the GNU Lesser General Public -#   License along with this library; if not, write to the  -#      Free Software Foundation, Inc.,  -#      59 Temple Place, Suite 330,  -#      Boston, MA  02111-1307  USA - -# This file is part of urlgrabber, a high-level cross-protocol url-grabber -# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko - -# $Id: progress.py,v 1.7 2005/08/19 21:59:07 mstenner Exp $ - -import sys -import time -import math -import thread -     -class BaseMeter: -    def __init__(self): -        self.update_period = 0.3 # seconds - -        self.filename   = None -        self.url        = None -        self.basename   = None -        self.text       = None -        self.size       = None -        self.start_time = None -        self.last_amount_read = 0 -        self.last_update_time = None -        self.re = RateEstimator() -         -    def start(self, filename=None, url=None, basename=None, -              size=None, now=None, text=None): -        self.filename = filename -        self.url      = url -        self.basename = basename -        self.text     = text - -        #size = None #########  TESTING -        self.size = size -        if not size is None: self.fsize = format_number(size) + 'B' - -        if now is None: now = time.time() -        self.start_time = now -        self.re.start(size, now) -        self.last_amount_read = 0 -        self.last_update_time = now -        self._do_start(now) -         -    def _do_start(self, now=None): -        pass - -    def update(self, amount_read, now=None): -        # for a real gui, you probably want to override and put a call -        # to your mainloop iteration function here -        if now is None: now = time.time() -        if (now >= self.last_update_time + self.update_period) or \ -               not self.last_update_time: -            self.re.update(amount_read, now) -            self.last_amount_read = amount_read -            self.last_update_time = now -            self._do_update(amount_read, now) - -    def _do_update(self, amount_read, now=None): -        pass - -    def end(self, amount_read, now=None): -        if now is None: now = time.time() -        self.re.update(amount_read, now) -        self.last_amount_read = amount_read -        self.last_update_time = now -        self._do_end(amount_read, now) - -    def _do_end(self, amount_read, now=None): -        pass -         -class TextMeter(BaseMeter): -    def __init__(self, fo=sys.stderr): -        BaseMeter.__init__(self) -        self.fo = fo - -    def _do_update(self, amount_read, now=None): -        etime = self.re.elapsed_time() -        fetime = format_time(etime) -        fread = format_number(amount_read) -        #self.size = None -        if self.text is not None: -            text = self.text -        else: -            text = self.basename -        if self.size is None: -            out = '\r%-60.60s    %5sB %s ' % \ -                  (text, fread, fetime) -        else: -            rtime = self.re.remaining_time() -            frtime = format_time(rtime) -            frac = self.re.fraction_read() -            bar = '='*int(25 * frac) - -            out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s ETA ' % \ -                  (text, frac*100, bar, fread, frtime) - -        self.fo.write(out) -        self.fo.flush() - -    def _do_end(self, amount_read, now=None): -        total_time = format_time(self.re.elapsed_time()) -        total_size = format_number(amount_read) -        if self.text is not None: -            text = self.text -        else: -            text = self.basename -        if self.size is None: -            out = '\r%-60.60s    %5sB %s ' % \ -                  (text, total_size, total_time) -        else: -            bar = '='*25 -            out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s     ' % \ -                  (text, 100, bar, total_size, total_time) -        self.fo.write(out + '\n') -        self.fo.flush() - -text_progress_meter = TextMeter - -class MultiFileHelper(BaseMeter): -    def __init__(self, master): -        BaseMeter.__init__(self) -        self.master = master - -    def _do_start(self, now): -        self.master.start_meter(self, now) - -    def _do_update(self, amount_read, now): -        # elapsed time since last update -        self.master.update_meter(self, now) - -    def _do_end(self, amount_read, now): -        self.ftotal_time = format_time(now - self.start_time) -        self.ftotal_size = format_number(self.last_amount_read) -        self.master.end_meter(self, now) - -    def failure(self, message, now=None): -        self.master.failure_meter(self, message, now) - -    def message(self, message): -        self.master.message_meter(self, message) - -class MultiFileMeter: -    helperclass = MultiFileHelper -    def __init__(self): -        self.meters = [] -        self.in_progress_meters = [] -        self._lock = thread.allocate_lock() -        self.update_period = 0.3 # seconds -         -        self.numfiles         = None -        self.finished_files   = 0 -        self.failed_files     = 0 -        self.open_files       = 0 -        self.total_size       = None -        self.failed_size      = 0 -        self.start_time       = None -        self.finished_file_size = 0 -        self.last_update_time = None -        self.re = RateEstimator() - -    def start(self, numfiles=None, total_size=None, now=None): -        if now is None: now = time.time() -        self.numfiles         = numfiles -        self.finished_files   = 0 -        self.failed_files     = 0 -        self.open_files       = 0 -        self.total_size       = total_size -        self.failed_size      = 0 -        self.start_time       = now -        self.finished_file_size = 0 -        self.last_update_time = now -        self.re.start(total_size, now) -        self._do_start(now) - -    def _do_start(self, now): -        pass - -    def end(self, now=None): -        if now is None: now = time.time() -        self._do_end(now) -         -    def _do_end(self, now): -        pass - -    def lock(self): self._lock.acquire() -    def unlock(self): self._lock.release() - -    ########################################################### -    # child meter creation and destruction -    def newMeter(self): -        newmeter = self.helperclass(self) -        self.meters.append(newmeter) -        return newmeter -     -    def removeMeter(self, meter): -        self.meters.remove(meter) -         -    ########################################################### -    # child functions - these should only be called by helpers -    def start_meter(self, meter, now): -        if not meter in self.meters: -            raise ValueError('attempt to use orphaned meter') -        self._lock.acquire() -        try: -            if not meter in self.in_progress_meters: -                self.in_progress_meters.append(meter) -                self.open_files += 1 -        finally: -            self._lock.release() -        self._do_start_meter(meter, now) -         -    def _do_start_meter(self, meter, now): -        pass -         -    def update_meter(self, meter, now): -        if not meter in self.meters: -            raise ValueError('attempt to use orphaned meter') -        if (now >= self.last_update_time + self.update_period) or \ -               not self.last_update_time: -            self.re.update(self._amount_read(), now) -            self.last_update_time = now -            self._do_update_meter(meter, now) - -    def _do_update_meter(self, meter, now): -        pass - -    def end_meter(self, meter, now): -        if not meter in self.meters: -            raise ValueError('attempt to use orphaned meter') -        self._lock.acquire() -        try: -            try: self.in_progress_meters.remove(meter) -            except ValueError: pass -            self.open_files     -= 1 -            self.finished_files += 1 -            self.finished_file_size += meter.last_amount_read -        finally: -            self._lock.release() -        self._do_end_meter(meter, now) - -    def _do_end_meter(self, meter, now): -        pass - -    def failure_meter(self, meter, message, now): -        if not meter in self.meters: -            raise ValueError('attempt to use orphaned meter') -        self._lock.acquire() -        try: -            try: self.in_progress_meters.remove(meter) -            except ValueError: pass -            self.open_files     -= 1 -            self.failed_files   += 1 -            if meter.size and self.failed_size is not None: -                self.failed_size += meter.size -            else: -                self.failed_size = None -        finally: -            self._lock.release() -        self._do_failure_meter(meter, message, now) - -    def _do_failure_meter(self, meter, message, now): -        pass - -    def message_meter(self, meter, message): -        pass - -    ######################################################## -    # internal functions -    def _amount_read(self): -        tot = self.finished_file_size -        for m in self.in_progress_meters: -            tot += m.last_amount_read -        return tot - - -class TextMultiFileMeter(MultiFileMeter): -    def __init__(self, fo=sys.stderr): -        self.fo = fo -        MultiFileMeter.__init__(self) - -    # files: ###/### ###%  data: ######/###### ###%  time: ##:##:##/##:##:## -    def _do_update_meter(self, meter, now): -        self._lock.acquire() -        try: -            format = "files: %3i/%-3i %3i%%   data: %6.6s/%-6.6s %3i%%   " \ -                     "time: %8.8s/%8.8s" -            df = self.finished_files -            tf = self.numfiles or 1 -            pf = 100 * float(df)/tf + 0.49 -            dd = self.re.last_amount_read -            td = self.total_size -            pd = 100 * (self.re.fraction_read() or 0) + 0.49 -            dt = self.re.elapsed_time() -            rt = self.re.remaining_time() -            if rt is None: tt = None -            else: tt = dt + rt - -            fdd = format_number(dd) + 'B' -            ftd = format_number(td) + 'B' -            fdt = format_time(dt, 1) -            ftt = format_time(tt, 1) -             -            out = '%-79.79s' % (format % (df, tf, pf, fdd, ftd, pd, fdt, ftt)) -            self.fo.write('\r' + out) -            self.fo.flush() -        finally: -            self._lock.release() - -    def _do_end_meter(self, meter, now): -        self._lock.acquire() -        try: -            format = "%-30.30s %6.6s    %8.8s    %9.9s" -            fn = meter.basename -            size = meter.last_amount_read -            fsize = format_number(size) + 'B' -            et = meter.re.elapsed_time() -            fet = format_time(et, 1) -            frate = format_number(size / et) + 'B/s' -             -            out = '%-79.79s' % (format % (fn, fsize, fet, frate)) -            self.fo.write('\r' + out + '\n') -        finally: -            self._lock.release() -        self._do_update_meter(meter, now) - -    def _do_failure_meter(self, meter, message, now): -        self._lock.acquire() -        try: -            format = "%-30.30s %6.6s %s" -            fn = meter.basename -            if type(message) in (type(''), type(u'')): -                message = message.splitlines() -            if not message: message = [''] -            out = '%-79s' % (format % (fn, 'FAILED', message[0] or '')) -            self.fo.write('\r' + out + '\n') -            for m in message[1:]: self.fo.write('  ' + m + '\n') -            self._lock.release() -        finally: -            self._do_update_meter(meter, now) - -    def message_meter(self, meter, message): -        self._lock.acquire() -        try: -            pass -        finally: -            self._lock.release() - -    def _do_end(self, now): -        self._do_update_meter(None, now) -        self._lock.acquire() -        try: -            self.fo.write('\n') -            self.fo.flush() -        finally: -            self._lock.release() -         -###################################################################### -# support classes and functions - -class RateEstimator: -    def __init__(self, timescale=5.0): -        self.timescale = timescale - -    def start(self, total=None, now=None): -        if now is None: now = time.time() -        self.total = total -        self.start_time = now -        self.last_update_time = now -        self.last_amount_read = 0 -        self.ave_rate = None -         -    def update(self, amount_read, now=None): -        if now is None: now = time.time() -        if amount_read == 0: -            # if we just started this file, all bets are off -            self.last_update_time = now -            self.last_amount_read = 0 -            self.ave_rate = None -            return - -        #print 'times', now, self.last_update_time -        time_diff = now         - self.last_update_time -        read_diff = amount_read - self.last_amount_read -        self.last_update_time = now -        self.last_amount_read = amount_read -        self.ave_rate = self._temporal_rolling_ave(\ -            time_diff, read_diff, self.ave_rate, self.timescale) -        #print 'results', time_diff, read_diff, self.ave_rate -         -    ##################################################################### -    # result methods -    def average_rate(self): -        "get the average transfer rate (in bytes/second)" -        return self.ave_rate - -    def elapsed_time(self): -        "the time between the start of the transfer and the most recent update" -        return self.last_update_time - self.start_time - -    def remaining_time(self): -        "estimated time remaining" -        if not self.ave_rate or not self.total: return None -        return (self.total - self.last_amount_read) / self.ave_rate - -    def fraction_read(self): -        """the fraction of the data that has been read -        (can be None for unknown transfer size)""" -        if self.total is None: return None -        elif self.total == 0: return 1.0 -        else: return float(self.last_amount_read)/self.total - -    ######################################################################### -    # support methods -    def _temporal_rolling_ave(self, time_diff, read_diff, last_ave, timescale): -        """a temporal rolling average performs smooth averaging even when -        updates come at irregular intervals.  This is performed by scaling -        the "epsilon" according to the time since the last update. -        Specifically, epsilon = time_diff / timescale - -        As a general rule, the average will take on a completely new value -        after 'timescale' seconds.""" -        epsilon = time_diff / timescale -        if epsilon > 1: epsilon = 1.0 -        return self._rolling_ave(time_diff, read_diff, last_ave, epsilon) -     -    def _rolling_ave(self, time_diff, read_diff, last_ave, epsilon): -        """perform a "rolling average" iteration -        a rolling average "folds" new data into an existing average with -        some weight, epsilon.  epsilon must be between 0.0 and 1.0 (inclusive) -        a value of 0.0 means only the old value (initial value) counts, -        and a value of 1.0 means only the newest value is considered.""" -         -        try: -            recent_rate = read_diff / time_diff -        except ZeroDivisionError: -            recent_rate = None -        if last_ave is None: return recent_rate -        elif recent_rate is None: return last_ave - -        # at this point, both last_ave and recent_rate are numbers -        return epsilon * recent_rate  +  (1 - epsilon) * last_ave - -    def _round_remaining_time(self, rt, start_time=15.0): -        """round the remaining time, depending on its size -        If rt is between n*start_time and (n+1)*start_time round downward -        to the nearest multiple of n (for any counting number n). -        If rt < start_time, round down to the nearest 1. -        For example (for start_time = 15.0): -         2.7  -> 2.0 -         25.2 -> 25.0 -         26.4 -> 26.0 -         35.3 -> 34.0 -         63.6 -> 60.0 -        """ - -        if rt < 0: return 0.0 -        shift = int(math.log(rt/start_time)/math.log(2)) -        rt = int(rt) -        if shift <= 0: return rt -        return float(int(rt) >> shift << shift) -         - -def format_time(seconds, use_hours=0): -    if seconds is None or seconds < 0: -        if use_hours: return '--:--:--' -        else:         return '--:--' -    else: -        seconds = int(seconds) -        minutes = seconds / 60 -        seconds = seconds % 60 -        if use_hours: -            hours = minutes / 60 -            minutes = minutes % 60 -            return '%02i:%02i:%02i' % (hours, minutes, seconds) -        else: -            return '%02i:%02i' % (minutes, seconds) -             -def format_number(number, SI=0, space=' '): -    """Turn numbers into human-readable metric-like numbers""" -    symbols = ['',  # (none) -               'k', # kilo -               'M', # mega -               'G', # giga -               'T', # tera -               'P', # peta -               'E', # exa -               'Z', # zetta -               'Y'] # yotta -     -    if SI: step = 1000.0 -    else: step = 1024.0 - -    thresh = 999 -    depth = 0 -    max_depth = len(symbols) - 1 -     -    # we want numbers between 0 and thresh, but don't exceed the length -    # of our list.  In that event, the formatting will be screwed up, -    # but it'll still show the right number. -    while number > thresh and depth < max_depth: -        depth  = depth + 1 -        number = number / step - -    if type(number) == type(1) or type(number) == type(1L): -        # it's an int or a long, which means it didn't get divided, -        # which means it's already short enough -        format = '%i%s%s' -    elif number < 9.95: -        # must use 9.95 for proper sizing.  For example, 9.99 will be -        # rounded to 10.0 with the .1f format string (which is too long) -        format = '%.1f%s%s' -    else: -        format = '%.0f%s%s' -         -    return(format % (float(number or 0), space, symbols[depth])) diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/sslfactory.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/sslfactory.py deleted file mode 100644 index 07848dac7c..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/sslfactory.py +++ /dev/null @@ -1,90 +0,0 @@ -#   This library is free software; you can redistribute it and/or -#   modify it under the terms of the GNU Lesser General Public -#   License as published by the Free Software Foundation; either -#   version 2.1 of the License, or (at your option) any later version. -# -#   This library is distributed in the hope that it will be useful, -#   but WITHOUT ANY WARRANTY; without even the implied warranty of -#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU -#   Lesser General Public License for more details. -# -#   You should have received a copy of the GNU Lesser General Public -#   License along with this library; if not, write to the  -#      Free Software Foundation, Inc.,  -#      59 Temple Place, Suite 330,  -#      Boston, MA  02111-1307  USA - -# This file is part of urlgrabber, a high-level cross-protocol url-grabber - -import httplib -import urllib2 - -try: -    from M2Crypto import SSL -    from M2Crypto import httpslib -    from M2Crypto import m2urllib2 - -    SSL.Connection.clientPostConnectionCheck = None -    have_m2crypto = True -except ImportError: -    have_m2crypto = False - -DEBUG = None - -if have_m2crypto: -     -    class M2SSLFactory: - -        def __init__(self, ssl_ca_cert, ssl_context): -            self.ssl_context = self._get_ssl_context(ssl_ca_cert, ssl_context) - -        def _get_ssl_context(self, ssl_ca_cert, ssl_context): -            """ -            Create an ssl context using the CA cert file or ssl context. - -            The CA cert is used first if it was passed as an option. If not, -            then the supplied ssl context is used. If no ssl context was supplied, -            None is returned. -            """ -            if ssl_ca_cert: -                context = SSL.Context() -                context.load_verify_locations(ssl_ca_cert) -                context.set_verify(SSL.verify_none, -1) -                return context -            else: -                return ssl_context - -        def create_https_connection(self, host, response_class = None): -            connection = httplib.HTTPSConnection(host, self.ssl_context) -            if response_class: -                connection.response_class = response_class -            return connection - -        def create_opener(self, *handlers): -            return m2urllib2.build_opener(self.ssl_context, *handlers) - - -class SSLFactory: - -    def create_https_connection(self, host, response_class = None): -        connection = httplib.HTTPSConnection(host) -        if response_class: -            connection.response_class = response_class -        return connection - -    def create_opener(self, *handlers): -        return urllib2.build_opener(*handlers) - -    - -def get_factory(ssl_ca_cert = None, ssl_context = None): -    """ Return an SSLFactory, based on if M2Crypto is available. """ -    if have_m2crypto: -        return M2SSLFactory(ssl_ca_cert, ssl_context) -    else: -        # Log here if someone provides the args but we don't use them. -        if ssl_ca_cert or ssl_context: -            if DEBUG: -                DEBUG.warning("SSL arguments supplied, but M2Crypto is not available. " -                        "Using Python SSL.") -        return SSLFactory() | 
