diff options
| author | Tom Zanussi <tom.zanussi@linux.intel.com> | 2014-08-04 07:55:07 -0500 |
|---|---|---|
| committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2014-08-11 10:52:16 +0100 |
| commit | 00dcdb29c89634ab267d328eb00f8eb70c696655 (patch) | |
| tree | 7394971204e43effc1ffdb227232dc45994762fa /scripts/lib | |
| parent | 84e1e13ce3af216e304f61c7ea6e5e9338f94bc6 (diff) | |
| download | openembedded-core-00dcdb29c89634ab267d328eb00f8eb70c696655.tar.gz openembedded-core-00dcdb29c89634ab267d328eb00f8eb70c696655.tar.bz2 openembedded-core-00dcdb29c89634ab267d328eb00f8eb70c696655.zip | |
wic: Remove 3rdparty/urlgrabber
wic doesn't use it, so remove it.
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Diffstat (limited to 'scripts/lib')
7 files changed, 0 insertions, 3688 deletions
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py deleted file mode 100644 index 7bcd9d5541..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py +++ /dev/null @@ -1,53 +0,0 @@ -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Library General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -# Copyright 2002-2006 Michael D. Stenner, Ryan Tomayko - -# $Id: __init__.py,v 1.20 2006/09/22 00:58:55 mstenner Exp $ - -"""A high-level cross-protocol url-grabber. - -Using urlgrabber, data can be fetched in three basic ways: - - urlgrab(url) copy the file to the local filesystem - urlopen(url) open the remote file and return a file object - (like urllib2.urlopen) - urlread(url) return the contents of the file as a string - -When using these functions (or methods), urlgrabber supports the -following features: - - * identical behavior for http://, ftp://, and file:// urls - * http keepalive - faster downloads of many files by using - only a single connection - * byte ranges - fetch only a portion of the file - * reget - for a urlgrab, resume a partial download - * progress meters - the ability to report download progress - automatically, even when using urlopen! - * throttling - restrict bandwidth usage - * retries - automatically retry a download if it fails. The - number of retries and failure types are configurable. - * authenticated server access for http and ftp - * proxy support - support for authenticated http and ftp proxies - * mirror groups - treat a list of mirrors as a single source, - automatically switching mirrors if there is a failure. -""" - -__version__ = '3.1.0' -__date__ = '2006/09/21' -__author__ = 'Michael D. Stenner <mstenner@linux.duke.edu>, ' \ - 'Ryan Tomayko <rtomayko@naeblis.cx>' -__url__ = 'http://linux.duke.edu/projects/urlgrabber/' - -from grabber import urlgrab, urlopen, urlread diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py deleted file mode 100644 index 001b4e32d6..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py +++ /dev/null @@ -1,463 +0,0 @@ -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., -# 59 Temple Place, Suite 330, -# Boston, MA 02111-1307 USA - -# This file is part of urlgrabber, a high-level cross-protocol url-grabber -# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko - -# $Id: byterange.py,v 1.12 2006/07/20 20:15:58 mstenner Exp $ - -import os -import stat -import urllib -import urllib2 -import rfc822 - -DEBUG = None - -try: - from cStringIO import StringIO -except ImportError, msg: - from StringIO import StringIO - -class RangeError(IOError): - """Error raised when an unsatisfiable range is requested.""" - pass - -class HTTPRangeHandler(urllib2.BaseHandler): - """Handler that enables HTTP Range headers. - - This was extremely simple. The Range header is a HTTP feature to - begin with so all this class does is tell urllib2 that the - "206 Partial Content" reponse from the HTTP server is what we - expected. - - Example: - import urllib2 - import byterange - - range_handler = range.HTTPRangeHandler() - opener = urllib2.build_opener(range_handler) - - # install it - urllib2.install_opener(opener) - - # create Request and set Range header - req = urllib2.Request('http://www.python.org/') - req.header['Range'] = 'bytes=30-50' - f = urllib2.urlopen(req) - """ - - def http_error_206(self, req, fp, code, msg, hdrs): - # 206 Partial Content Response - r = urllib.addinfourl(fp, hdrs, req.get_full_url()) - r.code = code - r.msg = msg - return r - - def http_error_416(self, req, fp, code, msg, hdrs): - # HTTP's Range Not Satisfiable error - raise RangeError('Requested Range Not Satisfiable') - -class HTTPSRangeHandler(HTTPRangeHandler): - """ Range Header support for HTTPS. """ - - def https_error_206(self, req, fp, code, msg, hdrs): - return self.http_error_206(req, fp, code, msg, hdrs) - - def https_error_416(self, req, fp, code, msg, hdrs): - self.https_error_416(req, fp, code, msg, hdrs) - -class RangeableFileObject: - """File object wrapper to enable raw range handling. - This was implemented primarilary for handling range - specifications for file:// urls. This object effectively makes - a file object look like it consists only of a range of bytes in - the stream. - - Examples: - # expose 10 bytes, starting at byte position 20, from - # /etc/aliases. - >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30)) - # seek seeks within the range (to position 23 in this case) - >>> fo.seek(3) - # tell tells where your at _within the range_ (position 3 in - # this case) - >>> fo.tell() - # read EOFs if an attempt is made to read past the last - # byte in the range. the following will return only 7 bytes. - >>> fo.read(30) - """ - - def __init__(self, fo, rangetup): - """Create a RangeableFileObject. - fo -- a file like object. only the read() method need be - supported but supporting an optimized seek() is - preferable. - rangetup -- a (firstbyte,lastbyte) tuple specifying the range - to work over. - The file object provided is assumed to be at byte offset 0. - """ - self.fo = fo - (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup) - self.realpos = 0 - self._do_seek(self.firstbyte) - - def __getattr__(self, name): - """This effectively allows us to wrap at the instance level. - Any attribute not found in _this_ object will be searched for - in self.fo. This includes methods.""" - if hasattr(self.fo, name): - return getattr(self.fo, name) - raise AttributeError, name - - def tell(self): - """Return the position within the range. - This is different from fo.seek in that position 0 is the - first byte position of the range tuple. For example, if - this object was created with a range tuple of (500,899), - tell() will return 0 when at byte position 500 of the file. - """ - return (self.realpos - self.firstbyte) - - def seek(self,offset,whence=0): - """Seek within the byte range. - Positioning is identical to that described under tell(). - """ - assert whence in (0, 1, 2) - if whence == 0: # absolute seek - realoffset = self.firstbyte + offset - elif whence == 1: # relative seek - realoffset = self.realpos + offset - elif whence == 2: # absolute from end of file - # XXX: are we raising the right Error here? - raise IOError('seek from end of file not supported.') - - # do not allow seek past lastbyte in range - if self.lastbyte and (realoffset >= self.lastbyte): - realoffset = self.lastbyte - - self._do_seek(realoffset - self.realpos) - - def read(self, size=-1): - """Read within the range. - This method will limit the size read based on the range. - """ - size = self._calc_read_size(size) - rslt = self.fo.read(size) - self.realpos += len(rslt) - return rslt - - def readline(self, size=-1): - """Read lines within the range. - This method will limit the size read based on the range. - """ - size = self._calc_read_size(size) - rslt = self.fo.readline(size) - self.realpos += len(rslt) - return rslt - - def _calc_read_size(self, size): - """Handles calculating the amount of data to read based on - the range. - """ - if self.lastbyte: - if size > -1: - if ((self.realpos + size) >= self.lastbyte): - size = (self.lastbyte - self.realpos) - else: - size = (self.lastbyte - self.realpos) - return size - - def _do_seek(self,offset): - """Seek based on whether wrapped object supports seek(). - offset is relative to the current position (self.realpos). - """ - assert offset >= 0 - if not hasattr(self.fo, 'seek'): - self._poor_mans_seek(offset) - else: - self.fo.seek(self.realpos + offset) - self.realpos+= offset - - def _poor_mans_seek(self,offset): - """Seek by calling the wrapped file objects read() method. - This is used for file like objects that do not have native - seek support. The wrapped objects read() method is called - to manually seek to the desired position. - offset -- read this number of bytes from the wrapped - file object. - raise RangeError if we encounter EOF before reaching the - specified offset. - """ - pos = 0 - bufsize = 1024 - while pos < offset: - if (pos + bufsize) > offset: - bufsize = offset - pos - buf = self.fo.read(bufsize) - if len(buf) != bufsize: - raise RangeError('Requested Range Not Satisfiable') - pos+= bufsize - -class FileRangeHandler(urllib2.FileHandler): - """FileHandler subclass that adds Range support. - This class handles Range headers exactly like an HTTP - server would. - """ - def open_local_file(self, req): - import mimetypes - import mimetools - host = req.get_host() - file = req.get_selector() - localfile = urllib.url2pathname(file) - stats = os.stat(localfile) - size = stats[stat.ST_SIZE] - modified = rfc822.formatdate(stats[stat.ST_MTIME]) - mtype = mimetypes.guess_type(file)[0] - if host: - host, port = urllib.splitport(host) - if port or socket.gethostbyname(host) not in self.get_names(): - raise urllib2.URLError('file not on local host') - fo = open(localfile,'rb') - brange = req.headers.get('Range',None) - brange = range_header_to_tuple(brange) - assert brange != () - if brange: - (fb,lb) = brange - if lb == '': lb = size - if fb < 0 or fb > size or lb > size: - raise RangeError('Requested Range Not Satisfiable') - size = (lb - fb) - fo = RangeableFileObject(fo, (fb,lb)) - headers = mimetools.Message(StringIO( - 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % - (mtype or 'text/plain', size, modified))) - return urllib.addinfourl(fo, headers, 'file:'+file) - - -# FTP Range Support -# Unfortunately, a large amount of base FTP code had to be copied -# from urllib and urllib2 in order to insert the FTP REST command. -# Code modifications for range support have been commented as -# follows: -# -- range support modifications start/end here - -from urllib import splitport, splituser, splitpasswd, splitattr, \ - unquote, addclosehook, addinfourl -import ftplib -import socket -import sys -import ftplib -import mimetypes -import mimetools - -class FTPRangeHandler(urllib2.FTPHandler): - def ftp_open(self, req): - host = req.get_host() - if not host: - raise IOError, ('ftp error', 'no host given') - host, port = splitport(host) - if port is None: - port = ftplib.FTP_PORT - - # username/password handling - user, host = splituser(host) - if user: - user, passwd = splitpasswd(user) - else: - passwd = None - host = unquote(host) - user = unquote(user or '') - passwd = unquote(passwd or '') - - try: - host = socket.gethostbyname(host) - except socket.error, msg: - raise urllib2.URLError(msg) - path, attrs = splitattr(req.get_selector()) - dirs = path.split('/') - dirs = map(unquote, dirs) - dirs, file = dirs[:-1], dirs[-1] - if dirs and not dirs[0]: - dirs = dirs[1:] - try: - fw = self.connect_ftp(user, passwd, host, port, dirs) - type = file and 'I' or 'D' - for attr in attrs: - attr, value = splitattr(attr) - if attr.lower() == 'type' and \ - value in ('a', 'A', 'i', 'I', 'd', 'D'): - type = value.upper() - - # -- range support modifications start here - rest = None - range_tup = range_header_to_tuple(req.headers.get('Range',None)) - assert range_tup != () - if range_tup: - (fb,lb) = range_tup - if fb > 0: rest = fb - # -- range support modifications end here - - fp, retrlen = fw.retrfile(file, type, rest) - - # -- range support modifications start here - if range_tup: - (fb,lb) = range_tup - if lb == '': - if retrlen is None or retrlen == 0: - raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.') - lb = retrlen - retrlen = lb - fb - if retrlen < 0: - # beginning of range is larger than file - raise RangeError('Requested Range Not Satisfiable') - else: - retrlen = lb - fb - fp = RangeableFileObject(fp, (0,retrlen)) - # -- range support modifications end here - - headers = "" - mtype = mimetypes.guess_type(req.get_full_url())[0] - if mtype: - headers += "Content-Type: %s\n" % mtype - if retrlen is not None and retrlen >= 0: - headers += "Content-Length: %d\n" % retrlen - sf = StringIO(headers) - headers = mimetools.Message(sf) - return addinfourl(fp, headers, req.get_full_url()) - except ftplib.all_errors, msg: - raise IOError, ('ftp error', msg), sys.exc_info()[2] - - def connect_ftp(self, user, passwd, host, port, dirs): - fw = ftpwrapper(user, passwd, host, port, dirs) - return fw - -class ftpwrapper(urllib.ftpwrapper): - # range support note: - # this ftpwrapper code is copied directly from - # urllib. The only enhancement is to add the rest - # argument and pass it on to ftp.ntransfercmd - def retrfile(self, file, type, rest=None): - self.endtransfer() - if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 - else: cmd = 'TYPE ' + type; isdir = 0 - try: - self.ftp.voidcmd(cmd) - except ftplib.all_errors: - self.init() - self.ftp.voidcmd(cmd) - conn = None - if file and not isdir: - # Use nlst to see if the file exists at all - try: - self.ftp.nlst(file) - except ftplib.error_perm, reason: - raise IOError, ('ftp error', reason), sys.exc_info()[2] - # Restore the transfer mode! - self.ftp.voidcmd(cmd) - # Try to retrieve as a file - try: - cmd = 'RETR ' + file - conn = self.ftp.ntransfercmd(cmd, rest) - except ftplib.error_perm, reason: - if str(reason)[:3] == '501': - # workaround for REST not supported error - fp, retrlen = self.retrfile(file, type) - fp = RangeableFileObject(fp, (rest,'')) - return (fp, retrlen) - elif str(reason)[:3] != '550': - raise IOError, ('ftp error', reason), sys.exc_info()[2] - if not conn: - # Set transfer mode to ASCII! - self.ftp.voidcmd('TYPE A') - # Try a directory listing - if file: cmd = 'LIST ' + file - else: cmd = 'LIST' - conn = self.ftp.ntransfercmd(cmd) - self.busy = 1 - # Pass back both a suitably decorated object and a retrieval length - return (addclosehook(conn[0].makefile('rb'), - self.endtransfer), conn[1]) - - -#################################################################### -# Range Tuple Functions -# XXX: These range tuple functions might go better in a class. - -_rangere = None -def range_header_to_tuple(range_header): - """Get a (firstbyte,lastbyte) tuple from a Range header value. - - Range headers have the form "bytes=<firstbyte>-<lastbyte>". This - function pulls the firstbyte and lastbyte values and returns - a (firstbyte,lastbyte) tuple. If lastbyte is not specified in - the header value, it is returned as an empty string in the - tuple. - - Return None if range_header is None - Return () if range_header does not conform to the range spec - pattern. - - """ - global _rangere - if range_header is None: return None - if _rangere is None: - import re - _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)') - match = _rangere.match(range_header) - if match: - tup = range_tuple_normalize(match.group(1,2)) - if tup and tup[1]: - tup = (tup[0],tup[1]+1) - return tup - return () - -def range_tuple_to_header(range_tup): - """Convert a range tuple to a Range header value. - Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None - if no range is needed. - """ - if range_tup is None: return None - range_tup = range_tuple_normalize(range_tup) - if range_tup: - if range_tup[1]: - range_tup = (range_tup[0],range_tup[1] - 1) - return 'bytes=%s-%s' % range_tup - -def range_tuple_normalize(range_tup): - """Normalize a (first_byte,last_byte) range tuple. - Return a tuple whose first element is guaranteed to be an int - and whose second element will be '' (meaning: the last byte) or - an int. Finally, return None if the normalized tuple == (0,'') - as that is equivelant to retrieving the entire file. - """ - if range_tup is None: return None - # handle first byte - fb = range_tup[0] - if fb in (None,''): fb = 0 - else: fb = int(fb) - # handle last byte - try: lb = range_tup[1] - except IndexError: lb = '' - else: - if lb is None: lb = '' - elif lb != '': lb = int(lb) - # check if range is over the entire file - if (fb,lb) == (0,''): return None - # check that the range is valid - if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb)) - return (fb,lb) - diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py deleted file mode 100644 index fefdab36f6..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py +++ /dev/null @@ -1,1477 +0,0 @@ -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., -# 59 Temple Place, Suite 330, -# Boston, MA 02111-1307 USA - -# This file is part of urlgrabber, a high-level cross-protocol url-grabber -# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko - -"""A high-level cross-protocol url-grabber. - -GENERAL ARGUMENTS (kwargs) - - Where possible, the module-level default is indicated, and legal - values are provided. - - copy_local = 0 [0|1] - - ignored except for file:// urls, in which case it specifies - whether urlgrab should still make a copy of the file, or simply - point to the existing copy. The module level default for this - option is 0. - - close_connection = 0 [0|1] - - tells URLGrabber to close the connection after a file has been - transfered. This is ignored unless the download happens with the - http keepalive handler (keepalive=1). Otherwise, the connection - is left open for further use. The module level default for this - option is 0 (keepalive connections will not be closed). - - keepalive = 1 [0|1] - - specifies whether keepalive should be used for HTTP/1.1 servers - that support it. The module level default for this option is 1 - (keepalive is enabled). - - progress_obj = None - - a class instance that supports the following methods: - po.start(filename, url, basename, length, text) - # length will be None if unknown - po.update(read) # read == bytes read so far - po.end() - - text = None - - specifies an alternativ text item in the beginning of the progress - bar line. If not given, the basename of the file is used. - - throttle = 1.0 - - a number - if it's an int, it's the bytes/second throttle limit. - If it's a float, it is first multiplied by bandwidth. If throttle - == 0, throttling is disabled. If None, the module-level default - (which can be set on default_grabber.throttle) is used. See - BANDWIDTH THROTTLING for more information. - - timeout = None - - a positive float expressing the number of seconds to wait for socket - operations. If the value is None or 0.0, socket operations will block - forever. Setting this option causes urlgrabber to call the settimeout - method on the Socket object used for the request. See the Python - documentation on settimeout for more information. - http://www.python.org/doc/current/lib/socket-objects.html - - bandwidth = 0 - - the nominal max bandwidth in bytes/second. If throttle is a float - and bandwidth == 0, throttling is disabled. If None, the - module-level default (which can be set on - default_grabber.bandwidth) is used. See BANDWIDTH THROTTLING for - more information. - - range = None - - a tuple of the form (first_byte, last_byte) describing a byte - range to retrieve. Either or both of the values may set to - None. If first_byte is None, byte offset 0 is assumed. If - last_byte is None, the last byte available is assumed. Note that - the range specification is python-like in that (0,10) will yeild - the first 10 bytes of the file. - - If set to None, no range will be used. - - reget = None [None|'simple'|'check_timestamp'] - - whether to attempt to reget a partially-downloaded file. Reget - only applies to .urlgrab and (obviously) only if there is a - partially downloaded file. Reget has two modes: - - 'simple' -- the local file will always be trusted. If there - are 100 bytes in the local file, then the download will always - begin 100 bytes into the requested file. - - 'check_timestamp' -- the timestamp of the server file will be - compared to the timestamp of the local file. ONLY if the - local file is newer than or the same age as the server file - will reget be used. If the server file is newer, or the - timestamp is not returned, the entire file will be fetched. - - NOTE: urlgrabber can do very little to verify that the partial - file on disk is identical to the beginning of the remote file. - You may want to either employ a custom "checkfunc" or simply avoid - using reget in situations where corruption is a concern. - - user_agent = 'urlgrabber/VERSION' - - a string, usually of the form 'AGENT/VERSION' that is provided to - HTTP servers in the User-agent header. The module level default - for this option is "urlgrabber/VERSION". - - http_headers = None - - a tuple of 2-tuples, each containing a header and value. These - will be used for http and https requests only. For example, you - can do - http_headers = (('Pragma', 'no-cache'),) - - ftp_headers = None - - this is just like http_headers, but will be used for ftp requests. - - proxies = None - - a dictionary that maps protocol schemes to proxy hosts. For - example, to use a proxy server on host "foo" port 3128 for http - and https URLs: - proxies={ 'http' : 'http://foo:3128', 'https' : 'http://foo:3128' } - note that proxy authentication information may be provided using - normal URL constructs: - proxies={ 'http' : 'http://user:host@foo:3128' } - Lastly, if proxies is None, the default environment settings will - be used. - - prefix = None - - a url prefix that will be prepended to all requested urls. For - example: - g = URLGrabber(prefix='http://foo.com/mirror/') - g.urlgrab('some/file.txt') - ## this will fetch 'http://foo.com/mirror/some/file.txt' - This option exists primarily to allow identical behavior to - MirrorGroup (and derived) instances. Note: a '/' will be inserted - if necessary, so you cannot specify a prefix that ends with a - partial file or directory name. - - opener = None - - Overrides the default urllib2.OpenerDirector provided to urllib2 - when making requests. This option exists so that the urllib2 - handler chain may be customized. Note that the range, reget, - proxy, and keepalive features require that custom handlers be - provided to urllib2 in order to function properly. If an opener - option is provided, no attempt is made by urlgrabber to ensure - chain integrity. You are responsible for ensuring that any - extension handlers are present if said features are required. - - data = None - - Only relevant for the HTTP family (and ignored for other - protocols), this allows HTTP POSTs. When the data kwarg is - present (and not None), an HTTP request will automatically become - a POST rather than GET. This is done by direct passthrough to - urllib2. If you use this, you may also want to set the - 'Content-length' and 'Content-type' headers with the http_headers - option. Note that python 2.2 handles the case of these - badly and if you do not use the proper case (shown here), your - values will be overridden with the defaults. - - -RETRY RELATED ARGUMENTS - - retry = None - - the number of times to retry the grab before bailing. If this is - zero, it will retry forever. This was intentional... really, it - was :). If this value is not supplied or is supplied but is None - retrying does not occur. - - retrycodes = [-1,2,4,5,6,7] - - a sequence of errorcodes (values of e.errno) for which it should - retry. See the doc on URLGrabError for more details on this. You - might consider modifying a copy of the default codes rather than - building yours from scratch so that if the list is extended in the - future (or one code is split into two) you can still enjoy the - benefits of the default list. You can do that with something like - this: - - retrycodes = urlgrabber.grabber.URLGrabberOptions().retrycodes - if 12 not in retrycodes: - retrycodes.append(12) - - checkfunc = None - - a function to do additional checks. This defaults to None, which - means no additional checking. The function should simply return - on a successful check. It should raise URLGrabError on an - unsuccessful check. Raising of any other exception will be - considered immediate failure and no retries will occur. - - If it raises URLGrabError, the error code will determine the retry - behavior. Negative error numbers are reserved for use by these - passed in functions, so you can use many negative numbers for - different types of failure. By default, -1 results in a retry, - but this can be customized with retrycodes. - - If you simply pass in a function, it will be given exactly one - argument: a CallbackObject instance with the .url attribute - defined and either .filename (for urlgrab) or .data (for urlread). - For urlgrab, .filename is the name of the local file. For - urlread, .data is the actual string data. If you need other - arguments passed to the callback (program state of some sort), you - can do so like this: - - checkfunc=(function, ('arg1', 2), {'kwarg': 3}) - - if the downloaded file has filename /tmp/stuff, then this will - result in this call (for urlgrab): - - function(obj, 'arg1', 2, kwarg=3) - # obj.filename = '/tmp/stuff' - # obj.url = 'http://foo.com/stuff' - - NOTE: both the "args" tuple and "kwargs" dict must be present if - you use this syntax, but either (or both) can be empty. - - failure_callback = None - - The callback that gets called during retries when an attempt to - fetch a file fails. The syntax for specifying the callback is - identical to checkfunc, except for the attributes defined in the - CallbackObject instance. The attributes for failure_callback are: - - exception = the raised exception - url = the url we're trying to fetch - tries = the number of tries so far (including this one) - retry = the value of the retry option - - The callback is present primarily to inform the calling program of - the failure, but if it raises an exception (including the one it's - passed) that exception will NOT be caught and will therefore cause - future retries to be aborted. - - The callback is called for EVERY failure, including the last one. - On the last try, the callback can raise an alternate exception, - but it cannot (without severe trickiness) prevent the exception - from being raised. - - interrupt_callback = None - - This callback is called if KeyboardInterrupt is received at any - point in the transfer. Basically, this callback can have three - impacts on the fetch process based on the way it exits: - - 1) raise no exception: the current fetch will be aborted, but - any further retries will still take place - - 2) raise a URLGrabError: if you're using a MirrorGroup, then - this will prompt a failover to the next mirror according to - the behavior of the MirrorGroup subclass. It is recommended - that you raise URLGrabError with code 15, 'user abort'. If - you are NOT using a MirrorGroup subclass, then this is the - same as (3). - - 3) raise some other exception (such as KeyboardInterrupt), which - will not be caught at either the grabber or mirror levels. - That is, it will be raised up all the way to the caller. - - This callback is very similar to failure_callback. They are - passed the same arguments, so you could use the same function for - both. - - urlparser = URLParser() - - The URLParser class handles pre-processing of URLs, including - auth-handling for user/pass encoded in http urls, file handing - (that is, filenames not sent as a URL), and URL quoting. If you - want to override any of this behavior, you can pass in a - replacement instance. See also the 'quote' option. - - quote = None - - Whether or not to quote the path portion of a url. - quote = 1 -> quote the URLs (they're not quoted yet) - quote = 0 -> do not quote them (they're already quoted) - quote = None -> guess what to do - - This option only affects proper urls like 'file:///etc/passwd'; it - does not affect 'raw' filenames like '/etc/passwd'. The latter - will always be quoted as they are converted to URLs. Also, only - the path part of a url is quoted. If you need more fine-grained - control, you should probably subclass URLParser and pass it in via - the 'urlparser' option. - -BANDWIDTH THROTTLING - - urlgrabber supports throttling via two values: throttle and - bandwidth Between the two, you can either specify and absolute - throttle threshold or specify a theshold as a fraction of maximum - available bandwidth. - - throttle is a number - if it's an int, it's the bytes/second - throttle limit. If it's a float, it is first multiplied by - bandwidth. If throttle == 0, throttling is disabled. If None, the - module-level default (which can be set with set_throttle) is used. - - bandwidth is the nominal max bandwidth in bytes/second. If throttle - is a float and bandwidth == 0, throttling is disabled. If None, the - module-level default (which can be set with set_bandwidth) is used. - - THROTTLING EXAMPLES: - - Lets say you have a 100 Mbps connection. This is (about) 10^8 bits - per second, or 12,500,000 Bytes per second. You have a number of - throttling options: - - *) set_bandwidth(12500000); set_throttle(0.5) # throttle is a float - - This will limit urlgrab to use half of your available bandwidth. - - *) set_throttle(6250000) # throttle is an int - - This will also limit urlgrab to use half of your available - bandwidth, regardless of what bandwidth is set to. |
