diff options
-rw-r--r-- | scripts/lib/wic/filemap.py | 531 |
1 files changed, 531 insertions, 0 deletions
diff --git a/scripts/lib/wic/filemap.py b/scripts/lib/wic/filemap.py new file mode 100644 index 0000000000..1d04328322 --- /dev/null +++ b/scripts/lib/wic/filemap.py @@ -0,0 +1,531 @@ +# Copyright (c) 2012 Intel, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +""" +This module implements python implements a way to get file block. Two methods +are supported - the FIEMAP ioctl and the 'SEEK_HOLE / SEEK_DATA' features of +the file seek syscall. The former is implemented by the 'FilemapFiemap' class, +the latter is implemented by the 'FilemapSeek' class. Both classes provide the +same API. The 'filemap' function automatically selects which class can be used +and returns an instance of the class. +""" + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# pylint: disable=R0902 + +import os +import struct +import array +import fcntl +import tempfile +import logging + +def get_block_size(file_obj): + """ + Returns block size for file object 'file_obj'. Errors are indicated by the + 'IOError' exception. + """ + + from fcntl import ioctl + import struct + + # Get the block size of the host file-system for the image file by calling + # the FIGETBSZ ioctl (number 2). + binary_data = ioctl(file_obj, 2, struct.pack('I', 0)) + return struct.unpack('I', binary_data)[0] + +class ErrorNotSupp(Exception): + """ + An exception of this type is raised when the 'FIEMAP' or 'SEEK_HOLE' feature + is not supported either by the kernel or the file-system. + """ + pass + +class Error(Exception): + """A class for all the other exceptions raised by this module.""" + pass + + +class _FilemapBase(object): + """ + This is a base class for a couple of other classes in this module. This + class simply performs the common parts of the initialization process: opens + the image file, gets its size, etc. The 'log' parameter is the logger object + to use for printing messages. + """ + + def __init__(self, image, log=None): + """ + Initialize a class instance. The 'image' argument is full path to the + file or file object to operate on. + """ + + self._log = log + if self._log is None: + self._log = logging.getLogger(__name__) + + self._f_image_needs_close = False + + if hasattr(image, "fileno"): + self._f_image = image + self._image_path = image.name + else: + self._image_path = image + self._open_image_file() + + try: + self.image_size = os.fstat(self._f_image.fileno()).st_size + except IOError as err: + raise Error("cannot get information about file '%s': %s" + % (self._f_image.name, err)) + + try: + self.block_size = get_block_size(self._f_image) + except IOError as err: + raise Error("cannot get block size for '%s': %s" + % (self._image_path, err)) + + self.blocks_cnt = self.image_size + self.block_size - 1 + self.blocks_cnt /= self.block_size + + try: + self._f_image.flush() + except IOError as err: + raise Error("cannot flush image file '%s': %s" + % (self._image_path, err)) + + try: + os.fsync(self._f_image.fileno()), + except OSError as err: + raise Error("cannot synchronize image file '%s': %s " + % (self._image_path, err.strerror)) + + self._log.debug("opened image \"%s\"" % self._image_path) + self._log.debug("block size %d, blocks count %d, image size %d" + % (self.block_size, self.blocks_cnt, self.image_size)) + + def __del__(self): + """The class destructor which just closes the image file.""" + if self._f_image_needs_close: + self._f_image.close() + + def _open_image_file(self): + """Open the image file.""" + try: + self._f_image = open(self._image_path, 'rb') + except IOError as err: + raise Error("cannot open image file '%s': %s" + % (self._image_path, err)) + + self._f_image_needs_close = True + + def block_is_mapped(self, block): # pylint: disable=W0613,R0201 + """ + This method has has to be implemented by child classes. It returns + 'True' if block number 'block' of the image file is mapped and 'False' + otherwise. + """ + + raise Error("the method is not implemented") + + def block_is_unmapped(self, block): # pylint: disable=W0613,R0201 + """ + This method has has to be implemented by child classes. It returns + 'True' if block number 'block' of the image file is not mapped (hole) + and 'False' otherwise. + """ + + raise Error("the method is not implemented") + + def get_mapped_ranges(self, start, count): # pylint: disable=W0613,R0201 + """ + This method has has to be implemented by child classes. This is a + generator which yields ranges of mapped blocks in the file. The ranges + are tuples of 2 elements: [first, last], where 'first' is the first + mapped block and 'last' is the last mapped block. + + The ranges are yielded for the area of the file of size 'count' blocks, + starting from block 'start'. + """ + + raise Error("the method is not implemented") + + def get_unmapped_ranges(self, start, count): # pylint: disable=W0613,R0201 + """ + This method has has to be implemented by child classes. Just like + 'get_mapped_ranges()', but yields unmapped block ranges instead + (holes). + """ + + raise Error("the method is not implemented") + + +# The 'SEEK_HOLE' and 'SEEK_DATA' options of the file seek system call +_SEEK_DATA = 3 +_SEEK_HOLE = 4 + +def _lseek(file_obj, offset, whence): + """This is a helper function which invokes 'os.lseek' for file object + 'file_obj' and with specified 'offset' and 'whence'. The 'whence' + argument is supposed to be either '_SEEK_DATA' or '_SEEK_HOLE'. When + there is no more data or hole starting from 'offset', this function + returns '-1'. Otherwise the data or hole position is returned.""" + + try: + return os.lseek(file_obj.fileno(), offset, whence) + except OSError as err: + # The 'lseek' system call returns the ENXIO if there is no data or + # hole starting from the specified offset. + if err.errno == os.errno.ENXIO: + return -1 + elif err.errno == os.errno.EINVAL: + raise ErrorNotSupp("the kernel or file-system does not support " + "\"SEEK_HOLE\" and \"SEEK_DATA\"") + else: + raise + +class FilemapSeek(_FilemapBase): + """ + This class uses the 'SEEK_HOLE' and 'SEEK_DATA' to find file block mapping. + Unfortunately, the current implementation requires the caller to have write + access to the image file. + """ + + def __init__(self, image, log=None): + """Refer the '_FilemapBase' class for the documentation.""" + + # Call the base class constructor first + _FilemapBase.__init__(self, image, log) + self._log.debug("FilemapSeek: initializing") + + self._probe_seek_hole() + + def _probe_seek_hole(self): + """ + Check whether the system implements 'SEEK_HOLE' and 'SEEK_DATA'. + Unfortunately, there seems to be no clean way for detecting this, + because often the system just fakes them by just assuming that all + files are fully mapped, so 'SEEK_HOLE' always returns EOF and + 'SEEK_DATA' always returns the requested offset. + + I could not invent a better way of detecting the fake 'SEEK_HOLE' + implementation than just to create a temporary file in the same + directory where the image file resides. It would be nice to change this + to something better. + """ + + directory = os.path.dirname(self._image_path) + + try: + tmp_obj = tempfile.TemporaryFile("w+", dir=directory) + except IOError as err: + raise ErrorNotSupp("cannot create a temporary in \"%s\": %s" + % (directory, err)) + + try: + os.ftruncate(tmp_obj.fileno(), self.block_size) + except OSError as err: + raise ErrorNotSupp("cannot truncate temporary file in \"%s\": %s" + % (directory, err)) + + offs = _lseek(tmp_obj, 0, _SEEK_HOLE) + if offs != 0: + # We are dealing with the stub 'SEEK_HOLE' implementation which + # always returns EOF. + self._log.debug("lseek(0, SEEK_HOLE) returned %d" % offs) + raise ErrorNotSupp("the file-system does not support " + "\"SEEK_HOLE\" and \"SEEK_DATA\" but only " + "provides a stub implementation") + + tmp_obj.close() + + def block_is_mapped(self, block): + """Refer the '_FilemapBase' class for the documentation.""" + offs = _lseek(self._f_image, block * self.block_size, _SEEK_DATA) + if offs == -1: + result = False + else: + result = (offs / self.block_size == block) + + self._log.debug("FilemapSeek: block_is_mapped(%d) returns %s" + % (block, result)) + return result + + def block_is_unmapped(self, block): + """Refer the '_FilemapBase' class for the documentation.""" + return not self.block_is_mapped(block) + + def _get_ranges(self, start, count, whence1, whence2): + """ + This function implements 'get_mapped_ranges()' and + 'get_unmapped_ranges()' depending on what is passed in the 'whence1' + and 'whence2' arguments. + """ + + assert whence1 != whence2 + end = start * self.block_size + limit = end + count * self.block_size + + while True: + start = _lseek(self._f_image, end, whence1) + if start == -1 or start >= limit or start == self.image_size: + break + + end = _lseek(self._f_image, start, whence2) + if end == -1 or end == self.image_size: + end = self.blocks_cnt * self.block_size + if end > limit: + end = limit + + start_blk = start / self.block_size + end_blk = end / self.block_size - 1 + self._log.debug("FilemapSeek: yielding range (%d, %d)" + % (start_blk, end_blk)) + yield (start_blk, end_blk) + + def get_mapped_ranges(self, start, count): + """Refer the '_FilemapBase' class for the documentation.""" + self._log.debug("FilemapSeek: get_mapped_ranges(%d, %d(%d))" + % (start, count, start + count - 1)) + return self._get_ranges(start, count, _SEEK_DATA, _SEEK_HOLE) + + def get_unmapped_ranges(self, start, count): + """Refer the '_FilemapBase' class for the documentation.""" + self._log.debug("FilemapSeek: get_unmapped_ranges(%d, %d(%d))" + % (start, count, start + count - 1)) + return self._get_ranges(start, count, _SEEK_HOLE, _SEEK_DATA) + + +# Below goes the FIEMAP ioctl implementation, which is not very readable +# because it deals with the rather complex FIEMAP ioctl. To understand the +# code, you need to know the FIEMAP interface, which is documented in the +# "Documentation/filesystems/fiemap.txt" file in the Linux kernel sources. + +# Format string for 'struct fiemap' +_FIEMAP_FORMAT = "=QQLLLL" +# sizeof(struct fiemap) +_FIEMAP_SIZE = struct.calcsize(_FIEMAP_FORMAT) +# Format string for 'struct fiemap_extent' +_FIEMAP_EXTENT_FORMAT = "=QQQQQLLLL" +# sizeof(struct fiemap_extent) +_FIEMAP_EXTENT_SIZE = struct.calcsize(_FIEMAP_EXTENT_FORMAT) +# The FIEMAP ioctl number +_FIEMAP_IOCTL = 0xC020660B +# This FIEMAP ioctl flag which instructs the kernel to sync the file before +# reading the block map +_FIEMAP_FLAG_SYNC = 0x00000001 +# Size of the buffer for 'struct fiemap_extent' elements which will be used +# when invoking the FIEMAP ioctl. The larger is the buffer, the less times the +# FIEMAP ioctl will be invoked. +_FIEMAP_BUFFER_SIZE = 256 * 1024 + +class FilemapFiemap(_FilemapBase): + """ + This class provides API to the FIEMAP ioctl. Namely, it allows to iterate + over all mapped blocks and over all holes. + + This class synchronizes the image file every time it invokes the FIEMAP + ioctl in order to work-around early FIEMAP implementation kernel bugs. + """ + + def __init__(self, image, log=None): + """ + Initialize a class instance. The 'image' argument is full the file + object to operate on. + """ + + # Call the base class constructor first + _FilemapBase.__init__(self, image, log) + self._log.debug("FilemapFiemap: initializing") + + self._buf_size = _FIEMAP_BUFFER_SIZE + + # Calculate how many 'struct fiemap_extent' elements fit the buffer + self._buf_size -= _FIEMAP_SIZE + self._fiemap_extent_cnt = self._buf_size / _FIEMAP_EXTENT_SIZE + assert self._fiemap_extent_cnt > 0 + self._buf_size = self._fiemap_extent_cnt * _FIEMAP_EXTENT_SIZE + self._buf_size += _FIEMAP_SIZE + + # Allocate a mutable buffer for the FIEMAP ioctl + self._buf = array.array('B', [0] * self._buf_size) + + # Check if the FIEMAP ioctl is supported + self.block_is_mapped(0) + + def _invoke_fiemap(self, block, count): + """ + Invoke the FIEMAP ioctl for 'count' blocks of the file starting from + block number 'block'. + + The full result of the operation is stored in 'self._buf' on exit. + Returns the unpacked 'struct fiemap' data structure in form of a python + list (just like 'struct.upack()'). + """ + + if self.blocks_cnt != 0 and (block < 0 or block >= self.blocks_cnt): + raise Error("bad block number %d, should be within [0, %d]" + % (block, self.blocks_cnt)) + + # Initialize the 'struct fiemap' part of the buffer. We use the + # '_FIEMAP_FLAG_SYNC' flag in order to make sure the file is + # synchronized. The reason for this is that early FIEMAP + # implementations had many bugs related to cached dirty data, and + # synchronizing the file is a necessary work-around. + struct.pack_into(_FIEMAP_FORMAT, self._buf, 0, block * self.block_size, + count * self.block_size, _FIEMAP_FLAG_SYNC, 0, + self._fiemap_extent_cnt, 0) + + try: + fcntl.ioctl(self._f_image, _FIEMAP_IOCTL, self._buf, 1) + except IOError as err: + # Note, the FIEMAP ioctl is supported by the Linux kernel starting + # from version 2.6.28 (year 2008). + if err.errno == os.errno.EOPNOTSUPP: + errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \ + "by the file-system" + self._log.debug(errstr) + raise ErrorNotSupp(errstr) + if err.errno == os.errno.ENOTTY: + errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \ + "by the kernel" + self._log.debug(errstr) + raise ErrorNotSupp(errstr) + raise Error("the FIEMAP ioctl failed for '%s': %s" + % (self._image_path, err)) + + return struct.unpack(_FIEMAP_FORMAT, self._buf[:_FIEMAP_SIZE]) + + def block_is_mapped(self, block): + """Refer the '_FilemapBase' class for the documentation.""" + struct_fiemap = self._invoke_fiemap(block, 1) + + # The 3rd element of 'struct_fiemap' is the 'fm_mapped_extents' field. + # If it contains zero, the block is not mapped, otherwise it is + # mapped. + result = bool(struct_fiemap[3]) + self._log.debug("FilemapFiemap: block_is_mapped(%d) returns %s" + % (block, result)) + return result + + def block_is_unmapped(self, block): + """Refer the '_FilemapBase' class for the documentation.""" + return not self.block_is_mapped(block) + + def _unpack_fiemap_extent(self, index): + """ + Unpack a 'struct fiemap_extent' structure object number 'index' from + the internal 'self._buf' buffer. + """ + + offset = _FIEMAP_SIZE + _FIEMAP_EXTENT_SIZE * index + return struct.unpack(_FIEMAP_EXTENT_FORMAT, + self._buf[offset : offset + _FIEMAP_EXTENT_SIZE]) + + def _do_get_mapped_ranges(self, start, count): + """ + Implements most the functionality for the 'get_mapped_ranges()' + generator: invokes the FIEMAP ioctl, walks through the mapped extents + and yields mapped block ranges. However, the ranges may be consecutive + (e.g., (1, 100), (100, 200)) and 'get_mapped_ranges()' simply merges + them. + """ + + block = start + while block < start + count: + struct_fiemap = self._invoke_fiemap(block, count) + + mapped_extents = struct_fiemap[3] + if mapped_extents == 0: + # No more mapped blocks + return + + extent = 0 + while extent < mapped_extents: + fiemap_extent = self._unpack_fiemap_extent(extent) + + # Start of the extent + extent_start = fiemap_extent[0] + # Starting block number of the extent + extent_block = extent_start / self.block_size + # Length of the extent + extent_len = fiemap_extent[2] + # Count of blocks in the extent + extent_count = extent_len / self.block_size + + # Extent length and offset have to be block-aligned + assert extent_start % self.block_size == 0 + assert extent_len % self.block_size == 0 + + if extent_block > start + count - 1: + return + + first = max(extent_block, block) + last = min(extent_block + extent_count, start + count) - 1 + yield (first, last) + + extent += 1 + + block = extent_block + extent_count + + def get_mapped_ranges(self, start, count): + """Refer the '_FilemapBase' class for the documentation.""" + self._log.debug("FilemapFiemap: get_mapped_ranges(%d, %d(%d))" + % (start, count, start + count - 1)) + iterator = self._do_get_mapped_ranges(start, count) + first_prev, last_prev = iterator.next() + + for first, last in iterator: + if last_prev == first - 1: + last_prev = last + else: + self._log.debug("FilemapFiemap: yielding range (%d, %d)" + % (first_prev, last_prev)) + yield (first_prev, last_prev) + first_prev, last_prev = first, last + + self._log.debug("FilemapFiemap: yielding range (%d, %d)" + % (first_prev, last_prev)) + yield (first_prev, last_prev) + + def get_unmapped_ranges(self, start, count): + """Refer the '_FilemapBase' class for the documentation.""" + self._log.debug("FilemapFiemap: get_unmapped_ranges(%d, %d(%d))" + % (start, count, start + count - 1)) + hole_first = start + for first, last in self._do_get_mapped_ranges(start, count): + if first > hole_first: + self._log.debug("FilemapFiemap: yielding range (%d, %d)" + % (hole_first, first - 1)) + yield (hole_first, first - 1) + + hole_first = last + 1 + + if hole_first < start + count: + self._log.debug("FilemapFiemap: yielding range (%d, %d)" + % (hole_first, start + count - 1)) + yield (hole_first, start + count - 1) + +def filemap(image, log=None): + """ + Create and return an instance of a Filemap class - 'FilemapFiemap' or + 'FilemapSeek', depending on what the system we run on supports. If the + FIEMAP ioctl is supported, an instance of the 'FilemapFiemap' class is + returned. Otherwise, if 'SEEK_HOLE' is supported an instance of the + 'FilemapSeek' class is returned. If none of these are supported, the + function generates an 'Error' type exception. + """ + + try: + return FilemapFiemap(image, log) + except ErrorNotSupp: + return FilemapSeek(image, log) |