summaryrefslogtreecommitdiff
path: root/scripts/lib
diff options
context:
space:
mode:
authorTom Zanussi <tom.zanussi@linux.intel.com>2014-08-04 07:55:07 -0500
committerRichard Purdie <richard.purdie@linuxfoundation.org>2014-08-11 10:52:16 +0100
commit00dcdb29c89634ab267d328eb00f8eb70c696655 (patch)
tree7394971204e43effc1ffdb227232dc45994762fa /scripts/lib
parent84e1e13ce3af216e304f61c7ea6e5e9338f94bc6 (diff)
downloadopenembedded-core-00dcdb29c89634ab267d328eb00f8eb70c696655.tar.gz
openembedded-core-00dcdb29c89634ab267d328eb00f8eb70c696655.tar.bz2
openembedded-core-00dcdb29c89634ab267d328eb00f8eb70c696655.zip
wic: Remove 3rdparty/urlgrabber
wic doesn't use it, so remove it. Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Diffstat (limited to 'scripts/lib')
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py53
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py463
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py1477
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py617
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py458
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/progress.py530
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/sslfactory.py90
7 files changed, 0 insertions, 3688 deletions
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py
deleted file mode 100644
index 7bcd9d5541..0000000000
--- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Library General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
-# Copyright 2002-2006 Michael D. Stenner, Ryan Tomayko
-
-# $Id: __init__.py,v 1.20 2006/09/22 00:58:55 mstenner Exp $
-
-"""A high-level cross-protocol url-grabber.
-
-Using urlgrabber, data can be fetched in three basic ways:
-
- urlgrab(url) copy the file to the local filesystem
- urlopen(url) open the remote file and return a file object
- (like urllib2.urlopen)
- urlread(url) return the contents of the file as a string
-
-When using these functions (or methods), urlgrabber supports the
-following features:
-
- * identical behavior for http://, ftp://, and file:// urls
- * http keepalive - faster downloads of many files by using
- only a single connection
- * byte ranges - fetch only a portion of the file
- * reget - for a urlgrab, resume a partial download
- * progress meters - the ability to report download progress
- automatically, even when using urlopen!
- * throttling - restrict bandwidth usage
- * retries - automatically retry a download if it fails. The
- number of retries and failure types are configurable.
- * authenticated server access for http and ftp
- * proxy support - support for authenticated http and ftp proxies
- * mirror groups - treat a list of mirrors as a single source,
- automatically switching mirrors if there is a failure.
-"""
-
-__version__ = '3.1.0'
-__date__ = '2006/09/21'
-__author__ = 'Michael D. Stenner <mstenner@linux.duke.edu>, ' \
- 'Ryan Tomayko <rtomayko@naeblis.cx>'
-__url__ = 'http://linux.duke.edu/projects/urlgrabber/'
-
-from grabber import urlgrab, urlopen, urlread
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py
deleted file mode 100644
index 001b4e32d6..0000000000
--- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py
+++ /dev/null
@@ -1,463 +0,0 @@
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330,
-# Boston, MA 02111-1307 USA
-
-# This file is part of urlgrabber, a high-level cross-protocol url-grabber
-# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
-
-# $Id: byterange.py,v 1.12 2006/07/20 20:15:58 mstenner Exp $
-
-import os
-import stat
-import urllib
-import urllib2
-import rfc822
-
-DEBUG = None
-
-try:
- from cStringIO import StringIO
-except ImportError, msg:
- from StringIO import StringIO
-
-class RangeError(IOError):
- """Error raised when an unsatisfiable range is requested."""
- pass
-
-class HTTPRangeHandler(urllib2.BaseHandler):
- """Handler that enables HTTP Range headers.
-
- This was extremely simple. The Range header is a HTTP feature to
- begin with so all this class does is tell urllib2 that the
- "206 Partial Content" reponse from the HTTP server is what we
- expected.
-
- Example:
- import urllib2
- import byterange
-
- range_handler = range.HTTPRangeHandler()
- opener = urllib2.build_opener(range_handler)
-
- # install it
- urllib2.install_opener(opener)
-
- # create Request and set Range header
- req = urllib2.Request('http://www.python.org/')
- req.header['Range'] = 'bytes=30-50'
- f = urllib2.urlopen(req)
- """
-
- def http_error_206(self, req, fp, code, msg, hdrs):
- # 206 Partial Content Response
- r = urllib.addinfourl(fp, hdrs, req.get_full_url())
- r.code = code
- r.msg = msg
- return r
-
- def http_error_416(self, req, fp, code, msg, hdrs):
- # HTTP's Range Not Satisfiable error
- raise RangeError('Requested Range Not Satisfiable')
-
-class HTTPSRangeHandler(HTTPRangeHandler):
- """ Range Header support for HTTPS. """
-
- def https_error_206(self, req, fp, code, msg, hdrs):
- return self.http_error_206(req, fp, code, msg, hdrs)
-
- def https_error_416(self, req, fp, code, msg, hdrs):
- self.https_error_416(req, fp, code, msg, hdrs)
-
-class RangeableFileObject:
- """File object wrapper to enable raw range handling.
- This was implemented primarilary for handling range
- specifications for file:// urls. This object effectively makes
- a file object look like it consists only of a range of bytes in
- the stream.
-
- Examples:
- # expose 10 bytes, starting at byte position 20, from
- # /etc/aliases.
- >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30))
- # seek seeks within the range (to position 23 in this case)
- >>> fo.seek(3)
- # tell tells where your at _within the range_ (position 3 in
- # this case)
- >>> fo.tell()
- # read EOFs if an attempt is made to read past the last
- # byte in the range. the following will return only 7 bytes.
- >>> fo.read(30)
- """
-
- def __init__(self, fo, rangetup):
- """Create a RangeableFileObject.
- fo -- a file like object. only the read() method need be
- supported but supporting an optimized seek() is
- preferable.
- rangetup -- a (firstbyte,lastbyte) tuple specifying the range
- to work over.
- The file object provided is assumed to be at byte offset 0.
- """
- self.fo = fo
- (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup)
- self.realpos = 0
- self._do_seek(self.firstbyte)
-
- def __getattr__(self, name):
- """This effectively allows us to wrap at the instance level.
- Any attribute not found in _this_ object will be searched for
- in self.fo. This includes methods."""
- if hasattr(self.fo, name):
- return getattr(self.fo, name)
- raise AttributeError, name
-
- def tell(self):
- """Return the position within the range.
- This is different from fo.seek in that position 0 is the
- first byte position of the range tuple. For example, if
- this object was created with a range tuple of (500,899),
- tell() will return 0 when at byte position 500 of the file.
- """
- return (self.realpos - self.firstbyte)
-
- def seek(self,offset,whence=0):
- """Seek within the byte range.
- Positioning is identical to that described under tell().
- """
- assert whence in (0, 1, 2)
- if whence == 0: # absolute seek
- realoffset = self.firstbyte + offset
- elif whence == 1: # relative seek
- realoffset = self.realpos + offset
- elif whence == 2: # absolute from end of file
- # XXX: are we raising the right Error here?
- raise IOError('seek from end of file not supported.')
-
- # do not allow seek past lastbyte in range
- if self.lastbyte and (realoffset >= self.lastbyte):
- realoffset = self.lastbyte
-
- self._do_seek(realoffset - self.realpos)
-
- def read(self, size=-1):
- """Read within the range.
- This method will limit the size read based on the range.
- """
- size = self._calc_read_size(size)
- rslt = self.fo.read(size)
- self.realpos += len(rslt)
- return rslt
-
- def readline(self, size=-1):
- """Read lines within the range.
- This method will limit the size read based on the range.
- """
- size = self._calc_read_size(size)
- rslt = self.fo.readline(size)
- self.realpos += len(rslt)
- return rslt
-
- def _calc_read_size(self, size):
- """Handles calculating the amount of data to read based on
- the range.
- """
- if self.lastbyte:
- if size > -1:
- if ((self.realpos + size) >= self.lastbyte):
- size = (self.lastbyte - self.realpos)
- else:
- size = (self.lastbyte - self.realpos)
- return size
-
- def _do_seek(self,offset):
- """Seek based on whether wrapped object supports seek().
- offset is relative to the current position (self.realpos).
- """
- assert offset >= 0
- if not hasattr(self.fo, 'seek'):
- self._poor_mans_seek(offset)
- else:
- self.fo.seek(self.realpos + offset)
- self.realpos+= offset
-
- def _poor_mans_seek(self,offset):
- """Seek by calling the wrapped file objects read() method.
- This is used for file like objects that do not have native
- seek support. The wrapped objects read() method is called
- to manually seek to the desired position.
- offset -- read this number of bytes from the wrapped
- file object.
- raise RangeError if we encounter EOF before reaching the
- specified offset.
- """
- pos = 0
- bufsize = 1024
- while pos < offset:
- if (pos + bufsize) > offset:
- bufsize = offset - pos
- buf = self.fo.read(bufsize)
- if len(buf) != bufsize:
- raise RangeError('Requested Range Not Satisfiable')
- pos+= bufsize
-
-class FileRangeHandler(urllib2.FileHandler):
- """FileHandler subclass that adds Range support.
- This class handles Range headers exactly like an HTTP
- server would.
- """
- def open_local_file(self, req):
- import mimetypes
- import mimetools
- host = req.get_host()
- file = req.get_selector()
- localfile = urllib.url2pathname(file)
- stats = os.stat(localfile)
- size = stats[stat.ST_SIZE]
- modified = rfc822.formatdate(stats[stat.ST_MTIME])
- mtype = mimetypes.guess_type(file)[0]
- if host:
- host, port = urllib.splitport(host)
- if port or socket.gethostbyname(host) not in self.get_names():
- raise urllib2.URLError('file not on local host')
- fo = open(localfile,'rb')
- brange = req.headers.get('Range',None)
- brange = range_header_to_tuple(brange)
- assert brange != ()
- if brange:
- (fb,lb) = brange
- if lb == '': lb = size
- if fb < 0 or fb > size or lb > size:
- raise RangeError('Requested Range Not Satisfiable')
- size = (lb - fb)
- fo = RangeableFileObject(fo, (fb,lb))
- headers = mimetools.Message(StringIO(
- 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
- (mtype or 'text/plain', size, modified)))
- return urllib.addinfourl(fo, headers, 'file:'+file)
-
-
-# FTP Range Support
-# Unfortunately, a large amount of base FTP code had to be copied
-# from urllib and urllib2 in order to insert the FTP REST command.
-# Code modifications for range support have been commented as
-# follows:
-# -- range support modifications start/end here
-
-from urllib import splitport, splituser, splitpasswd, splitattr, \
- unquote, addclosehook, addinfourl
-import ftplib
-import socket
-import sys
-import ftplib
-import mimetypes
-import mimetools
-
-class FTPRangeHandler(urllib2.FTPHandler):
- def ftp_open(self, req):
- host = req.get_host()
- if not host:
- raise IOError, ('ftp error', 'no host given')
- host, port = splitport(host)
- if port is None:
- port = ftplib.FTP_PORT
-
- # username/password handling
- user, host = splituser(host)
- if user:
- user, passwd = splitpasswd(user)
- else:
- passwd = None
- host = unquote(host)
- user = unquote(user or '')
- passwd = unquote(passwd or '')
-
- try:
- host = socket.gethostbyname(host)
- except socket.error, msg:
- raise urllib2.URLError(msg)
- path, attrs = splitattr(req.get_selector())
- dirs = path.split('/')
- dirs = map(unquote, dirs)
- dirs, file = dirs[:-1], dirs[-1]
- if dirs and not dirs[0]:
- dirs = dirs[1:]
- try:
- fw = self.connect_ftp(user, passwd, host, port, dirs)
- type = file and 'I' or 'D'
- for attr in attrs:
- attr, value = splitattr(attr)
- if attr.lower() == 'type' and \
- value in ('a', 'A', 'i', 'I', 'd', 'D'):
- type = value.upper()
-
- # -- range support modifications start here
- rest = None
- range_tup = range_header_to_tuple(req.headers.get('Range',None))
- assert range_tup != ()
- if range_tup:
- (fb,lb) = range_tup
- if fb > 0: rest = fb
- # -- range support modifications end here
-
- fp, retrlen = fw.retrfile(file, type, rest)
-
- # -- range support modifications start here
- if range_tup:
- (fb,lb) = range_tup
- if lb == '':
- if retrlen is None or retrlen == 0:
- raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.')
- lb = retrlen
- retrlen = lb - fb
- if retrlen < 0:
- # beginning of range is larger than file
- raise RangeError('Requested Range Not Satisfiable')
- else:
- retrlen = lb - fb
- fp = RangeableFileObject(fp, (0,retrlen))
- # -- range support modifications end here
-
- headers = ""
- mtype = mimetypes.guess_type(req.get_full_url())[0]
- if mtype:
- headers += "Content-Type: %s\n" % mtype
- if retrlen is not None and retrlen >= 0:
- headers += "Content-Length: %d\n" % retrlen
- sf = StringIO(headers)
- headers = mimetools.Message(sf)
- return addinfourl(fp, headers, req.get_full_url())
- except ftplib.all_errors, msg:
- raise IOError, ('ftp error', msg), sys.exc_info()[2]
-
- def connect_ftp(self, user, passwd, host, port, dirs):
- fw = ftpwrapper(user, passwd, host, port, dirs)
- return fw
-
-class ftpwrapper(urllib.ftpwrapper):
- # range support note:
- # this ftpwrapper code is copied directly from
- # urllib. The only enhancement is to add the rest
- # argument and pass it on to ftp.ntransfercmd
- def retrfile(self, file, type, rest=None):
- self.endtransfer()
- if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
- else: cmd = 'TYPE ' + type; isdir = 0
- try:
- self.ftp.voidcmd(cmd)
- except ftplib.all_errors:
- self.init()
- self.ftp.voidcmd(cmd)
- conn = None
- if file and not isdir:
- # Use nlst to see if the file exists at all
- try:
- self.ftp.nlst(file)
- except ftplib.error_perm, reason:
- raise IOError, ('ftp error', reason), sys.exc_info()[2]
- # Restore the transfer mode!
- self.ftp.voidcmd(cmd)
- # Try to retrieve as a file
- try:
- cmd = 'RETR ' + file
- conn = self.ftp.ntransfercmd(cmd, rest)
- except ftplib.error_perm, reason:
- if str(reason)[:3] == '501':
- # workaround for REST not supported error
- fp, retrlen = self.retrfile(file, type)
- fp = RangeableFileObject(fp, (rest,''))
- return (fp, retrlen)
- elif str(reason)[:3] != '550':
- raise IOError, ('ftp error', reason), sys.exc_info()[2]
- if not conn:
- # Set transfer mode to ASCII!
- self.ftp.voidcmd('TYPE A')
- # Try a directory listing
- if file: cmd = 'LIST ' + file
- else: cmd = 'LIST'
- conn = self.ftp.ntransfercmd(cmd)
- self.busy = 1
- # Pass back both a suitably decorated object and a retrieval length
- return (addclosehook(conn[0].makefile('rb'),
- self.endtransfer), conn[1])
-
-
-####################################################################
-# Range Tuple Functions
-# XXX: These range tuple functions might go better in a class.
-
-_rangere = None
-def range_header_to_tuple(range_header):
- """Get a (firstbyte,lastbyte) tuple from a Range header value.
-
- Range headers have the form "bytes=<firstbyte>-<lastbyte>". This
- function pulls the firstbyte and lastbyte values and returns
- a (firstbyte,lastbyte) tuple. If lastbyte is not specified in
- the header value, it is returned as an empty string in the
- tuple.
-
- Return None if range_header is None
- Return () if range_header does not conform to the range spec
- pattern.
-
- """
- global _rangere
- if range_header is None: return None
- if _rangere is None:
- import re
- _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)')
- match = _rangere.match(range_header)
- if match:
- tup = range_tuple_normalize(match.group(1,2))
- if tup and tup[1]:
- tup = (tup[0],tup[1]+1)
- return tup
- return ()
-
-def range_tuple_to_header(range_tup):
- """Convert a range tuple to a Range header value.
- Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None
- if no range is needed.
- """
- if range_tup is None: return None
- range_tup = range_tuple_normalize(range_tup)
- if range_tup:
- if range_tup[1]:
- range_tup = (range_tup[0],range_tup[1] - 1)
- return 'bytes=%s-%s' % range_tup
-
-def range_tuple_normalize(range_tup):
- """Normalize a (first_byte,last_byte) range tuple.
- Return a tuple whose first element is guaranteed to be an int
- and whose second element will be '' (meaning: the last byte) or
- an int. Finally, return None if the normalized tuple == (0,'')
- as that is equivelant to retrieving the entire file.
- """
- if range_tup is None: return None
- # handle first byte
- fb = range_tup[0]
- if fb in (None,''): fb = 0
- else: fb = int(fb)
- # handle last byte
- try: lb = range_tup[1]
- except IndexError: lb = ''
- else:
- if lb is None: lb = ''
- elif lb != '': lb = int(lb)
- # check if range is over the entire file
- if (fb,lb) == (0,''): return None
- # check that the range is valid
- if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb))
- return (fb,lb)
-
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py
deleted file mode 100644
index fefdab36f6..0000000000
--- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py
+++ /dev/null
@@ -1,1477 +0,0 @@
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330,
-# Boston, MA 02111-1307 USA
-
-# This file is part of urlgrabber, a high-level cross-protocol url-grabber
-# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
-
-"""A high-level cross-protocol url-grabber.
-
-GENERAL ARGUMENTS (kwargs)
-
- Where possible, the module-level default is indicated, and legal
- values are provided.
-
- copy_local = 0 [0|1]
-
- ignored except for file:// urls, in which case it specifies
- whether urlgrab should still make a copy of the file, or simply
- point to the existing copy. The module level default for this
- option is 0.
-
- close_connection = 0 [0|1]
-
- tells URLGrabber to close the connection after a file has been
- transfered. This is ignored unless the download happens with the
- http keepalive handler (keepalive=1). Otherwise, the connection
- is left open for further use. The module level default for this
- option is 0 (keepalive connections will not be closed).
-
- keepalive = 1 [0|1]
-
- specifies whether keepalive should be used for HTTP/1.1 servers
- that support it. The module level default for this option is 1
- (keepalive is enabled).
-
- progress_obj = None
-
- a class instance that supports the following methods:
- po.start(filename, url, basename, length, text)
- # length will be None if unknown
- po.update(read) # read == bytes read so far
- po.end()
-
- text = None
-
- specifies an alternativ text item in the beginning of the progress
- bar line. If not given, the basename of the file is used.
-
- throttle = 1.0
-
- a number - if it's an int, it's the bytes/second throttle limit.
- If it's a float, it is first multiplied by bandwidth. If throttle
- == 0, throttling is disabled. If None, the module-level default
- (which can be set on default_grabber.throttle) is used. See
- BANDWIDTH THROTTLING for more information.
-
- timeout = None
-
- a positive float expressing the number of seconds to wait for socket
- operations. If the value is None or 0.0, socket operations will block
- forever. Setting this option causes urlgrabber to call the settimeout
- method on the Socket object used for the request. See the Python
- documentation on settimeout for more information.
- http://www.python.org/doc/current/lib/socket-objects.html
-
- bandwidth = 0
-
- the nominal max bandwidth in bytes/second. If throttle is a float
- and bandwidth == 0, throttling is disabled. If None, the
- module-level default (which can be set on
- default_grabber.bandwidth) is used. See BANDWIDTH THROTTLING for
- more information.
-
- range = None
-
- a tuple of the form (first_byte, last_byte) describing a byte
- range to retrieve. Either or both of the values may set to
- None. If first_byte is None, byte offset 0 is assumed. If
- last_byte is None, the last byte available is assumed. Note that
- the range specification is python-like in that (0,10) will yeild
- the first 10 bytes of the file.
-
- If set to None, no range will be used.
-
- reget = None [None|'simple'|'check_timestamp']
-
- whether to attempt to reget a partially-downloaded file. Reget
- only applies to .urlgrab and (obviously) only if there is a
- partially downloaded file. Reget has two modes:
-
- 'simple' -- the local file will always be trusted. If there
- are 100 bytes in the local file, then the download will always
- begin 100 bytes into the requested file.
-
- 'check_timestamp' -- the timestamp of the server file will be
- compared to the timestamp of the local file. ONLY if the
- local file is newer than or the same age as the server file
- will reget be used. If the server file is newer, or the
- timestamp is not returned, the entire file will be fetched.
-
- NOTE: urlgrabber can do very little to verify that the partial
- file on disk is identical to the beginning of the remote file.
- You may want to either employ a custom "checkfunc" or simply avoid
- using reget in situations where corruption is a concern.
-
- user_agent = 'urlgrabber/VERSION'
-
- a string, usually of the form 'AGENT/VERSION' that is provided to
- HTTP servers in the User-agent header. The module level default
- for this option is "urlgrabber/VERSION".
-
- http_headers = None
-
- a tuple of 2-tuples, each containing a header and value. These
- will be used for http and https requests only. For example, you
- can do
- http_headers = (('Pragma', 'no-cache'),)
-
- ftp_headers = None
-
- this is just like http_headers, but will be used for ftp requests.
-
- proxies = None
-
- a dictionary that maps protocol schemes to proxy hosts. For
- example, to use a proxy server on host "foo" port 3128 for http
- and https URLs:
- proxies={ 'http' : 'http://foo:3128', 'https' : 'http://foo:3128' }
- note that proxy authentication information may be provided using
- normal URL constructs:
- proxies={ 'http' : 'http://user:host@foo:3128' }
- Lastly, if proxies is None, the default environment settings will
- be used.
-
- prefix = None
-
- a url prefix that will be prepended to all requested urls. For
- example:
- g = URLGrabber(prefix='http://foo.com/mirror/')
- g.urlgrab('some/file.txt')
- ## this will fetch 'http://foo.com/mirror/some/file.txt'
- This option exists primarily to allow identical behavior to
- MirrorGroup (and derived) instances. Note: a '/' will be inserted
- if necessary, so you cannot specify a prefix that ends with a
- partial file or directory name.
-
- opener = None
-
- Overrides the default urllib2.OpenerDirector provided to urllib2
- when making requests. This option exists so that the urllib2
- handler chain may be customized. Note that the range, reget,
- proxy, and keepalive features require that custom handlers be
- provided to urllib2 in order to function properly. If an opener
- option is provided, no attempt is made by urlgrabber to ensure
- chain integrity. You are responsible for ensuring that any
- extension handlers are present if said features are required.
-
- data = None
-
- Only relevant for the HTTP family (and ignored for other
- protocols), this allows HTTP POSTs. When the data kwarg is
- present (and not None), an HTTP request will automatically become
- a POST rather than GET. This is done by direct passthrough to
- urllib2. If you use this, you may also want to set the
- 'Content-length' and 'Content-type' headers with the http_headers
- option. Note that python 2.2 handles the case of these
- badly and if you do not use the proper case (shown here), your
- values will be overridden with the defaults.
-
-
-RETRY RELATED ARGUMENTS
-
- retry = None
-
- the number of times to retry the grab before bailing. If this is
- zero, it will retry forever. This was intentional... really, it
- was :). If this value is not supplied or is supplied but is None
- retrying does not occur.
-
- retrycodes = [-1,2,4,5,6,7]
-
- a sequence of errorcodes (values of e.errno) for which it should
- retry. See the doc on URLGrabError for more details on this. You
- might consider modifying a copy of the default codes rather than
- building yours from scratch so that if the list is extended in the
- future (or one code is split into two) you can still enjoy the
- benefits of the default list. You can do that with something like
- this:
-
- retrycodes = urlgrabber.grabber.URLGrabberOptions().retrycodes
- if 12 not in retrycodes:
- retrycodes.append(12)
-
- checkfunc = None
-
- a function to do additional checks. This defaults to None, which
- means no additional checking. The function should simply return
- on a successful check. It should raise URLGrabError on an
- unsuccessful check. Raising of any other exception will be
- considered immediate failure and no retries will occur.
-
- If it raises URLGrabError, the error code will determine the retry
- behavior. Negative error numbers are reserved for use by these
- passed in functions, so you can use many negative numbers for
- different types of failure. By default, -1 results in a retry,
- but this can be customized with retrycodes.
-
- If you simply pass in a function, it will be given exactly one
- argument: a CallbackObject instance with the .url attribute
- defined and either .filename (for urlgrab) or .data (for urlread).
- For urlgrab, .filename is the name of the local file. For
- urlread, .data is the actual string data. If you need other
- arguments passed to the callback (program state of some sort), you
- can do so like this:
-
- checkfunc=(function, ('arg1', 2), {'kwarg': 3})
-
- if the downloaded file has filename /tmp/stuff, then this will
- result in this call (for urlgrab):
-
- function(obj, 'arg1', 2, kwarg=3)
- # obj.filename = '/tmp/stuff'
- # obj.url = 'http://foo.com/stuff'
-
- NOTE: both the "args" tuple and "kwargs" dict must be present if
- you use this syntax, but either (or both) can be empty.
-
- failure_callback = None
-
- The callback that gets called during retries when an attempt to
- fetch a file fails. The syntax for specifying the callback is
- identical to checkfunc, except for the attributes defined in the
- CallbackObject instance. The attributes for failure_callback are:
-
- exception = the raised exception
- url = the url we're trying to fetch
- tries = the number of tries so far (including this one)
- retry = the value of the retry option
-
- The callback is present primarily to inform the calling program of
- the failure, but if it raises an exception (including the one it's
- passed) that exception will NOT be caught and will therefore cause
- future retries to be aborted.
-
- The callback is called for EVERY failure, including the last one.
- On the last try, the callback can raise an alternate exception,
- but it cannot (without severe trickiness) prevent the exception
- from being raised.
-
- interrupt_callback = None
-
- This callback is called if KeyboardInterrupt is received at any
- point in the transfer. Basically, this callback can have three
- impacts on the fetch process based on the way it exits:
-
- 1) raise no exception: the current fetch will be aborted, but
- any further retries will still take place
-
- 2) raise a URLGrabError: if you're using a MirrorGroup, then
- this will prompt a failover to the next mirror according to
- the behavior of the MirrorGroup subclass. It is recommended
- that you raise URLGrabError with code 15, 'user abort'. If
- you are NOT using a MirrorGroup subclass, then this is the
- same as (3).
-
- 3) raise some other exception (such as KeyboardInterrupt), which
- will not be caught at either the grabber or mirror levels.
- That is, it will be raised up all the way to the caller.
-
- This callback is very similar to failure_callback. They are
- passed the same arguments, so you could use the same function for
- both.
-
- urlparser = URLParser()
-
- The URLParser class handles pre-processing of URLs, including
- auth-handling for user/pass encoded in http urls, file handing
- (that is, filenames not sent as a URL), and URL quoting. If you
- want to override any of this behavior, you can pass in a
- replacement instance. See also the 'quote' option.
-
- quote = None
-
- Whether or not to quote the path portion of a url.
- quote = 1 -> quote the URLs (they're not quoted yet)
- quote = 0 -> do not quote them (they're already quoted)
- quote = None -> guess what to do
-
- This option only affects proper urls like 'file:///etc/passwd'; it
- does not affect 'raw' filenames like '/etc/passwd'. The latter
- will always be quoted as they are converted to URLs. Also, only
- the path part of a url is quoted. If you need more fine-grained
- control, you should probably subclass URLParser and pass it in via
- the 'urlparser' option.
-
-BANDWIDTH THROTTLING
-
- urlgrabber supports throttling via two values: throttle and
- bandwidth Between the two, you can either specify and absolute
- throttle threshold or specify a theshold as a fraction of maximum
- available bandwidth.
-
- throttle is a number - if it's an int, it's the bytes/second
- throttle limit. If it's a float, it is first multiplied by
- bandwidth. If throttle == 0, throttling is disabled. If None, the
- module-level default (which can be set with set_throttle) is used.
-
- bandwidth is the nominal max bandwidth in bytes/second. If throttle
- is a float and bandwidth == 0, throttling is disabled. If None, the
- module-level default (which can be set with set_bandwidth) is used.
-
- THROTTLING EXAMPLES:
-
- Lets say you have a 100 Mbps connection. This is (about) 10^8 bits
- per second, or 12,500,000 Bytes per second. You have a number of
- throttling options:
-
- *) set_bandwidth(12500000); set_throttle(0.5) # throttle is a float
-
- This will limit urlgrab to use half of your available bandwidth.
-
- *) set_throttle(6250000) # throttle is an int
-
- This will also limit urlgrab to use half of your available
- bandwidth, regardless of what bandwidth is set to.