diff options
author | Richard Purdie <richard.purdie@linuxfoundation.org> | 2013-03-14 17:26:20 +0000 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2013-03-18 21:26:06 +0000 |
commit | 556dee0c4d6d8a87c0cddbd2f60fe5917d009f18 (patch) | |
tree | 0392d47555698777c94dda9a02fe1208e192f06d | |
parent | e0705e4801a9855a4289d18e653c35190021206e (diff) | |
download | openembedded-core-556dee0c4d6d8a87c0cddbd2f60fe5917d009f18.tar.gz openembedded-core-556dee0c4d6d8a87c0cddbd2f60fe5917d009f18.tar.bz2 openembedded-core-556dee0c4d6d8a87c0cddbd2f60fe5917d009f18.zip |
package: Add cachedpath optimisation
Currently, various standard library operations like os.walk(),
os.path.isdir() and os.path.islink() each call stat or lstat which
involves a syscall into the kernel. There is no caching since they could
conceivably have changed on disk. The result is that for something like
the do_package task of the kernel we're spending over two minutes making
868,000 individual stat calls for 23,000 files. This is suboptimal.
This patch adds lib/oe/cachedpath.py which are a set of replacement
functions for these operations which use cached stat data rather than
hitting the kernel each time. It gives a nice performance improvement
halving the build time of the kernel do_package.
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r-- | meta/classes/base.bbclass | 2 | ||||
-rw-r--r-- | meta/classes/package.bbclass | 56 | ||||
-rw-r--r-- | meta/lib/oe/cachedpath.py | 235 |
3 files changed, 268 insertions, 25 deletions
diff --git a/meta/classes/base.bbclass b/meta/classes/base.bbclass index 4ec1eda6bc..5fe9a84efe 100644 --- a/meta/classes/base.bbclass +++ b/meta/classes/base.bbclass @@ -10,7 +10,7 @@ inherit utility-tasks inherit metadata_scm inherit logging -OE_IMPORTS += "os sys time oe.path oe.utils oe.data oe.package oe.packagegroup oe.sstatesig oe.lsb" +OE_IMPORTS += "os sys time oe.path oe.utils oe.data oe.package oe.packagegroup oe.sstatesig oe.lsb oe.cachedpath" OE_IMPORTS[type] = "list" def oe_import(d): diff --git a/meta/classes/package.bbclass b/meta/classes/package.bbclass index f3a6bc726d..b6f87674a0 100644 --- a/meta/classes/package.bbclass +++ b/meta/classes/package.bbclass @@ -288,7 +288,7 @@ def copydebugsources(debugsrcdir, d): basepath = dvar for p in debugsrcdir.split("/"): basepath = basepath + "/" + p - if not os.path.exists(basepath): + if not cpath.exists(basepath): nosuchdir.append(basepath) bb.utils.mkdirhier(basepath) @@ -388,7 +388,7 @@ python package_do_split_locales() { localedir = os.path.join(dvar + datadir, 'locale') - if not os.path.isdir(localedir): + if not cpath.isdir(localedir): bb.debug(1, "No locale files in this package") return @@ -628,7 +628,7 @@ python fixup_perms () { continue origin = dvar + dir - if not (os.path.exists(origin) and os.path.isdir(origin) and not os.path.islink(origin)): + if not (cpath.exists(origin) and cpath.isdir(origin) and not cpath.islink(origin)): continue link = fs_perms_table[dir].link @@ -654,7 +654,7 @@ python fixup_perms () { continue origin = dvar + dir - if not (os.path.exists(origin) and os.path.isdir(origin)): + if not (cpath.exists(origin) and cpath.isdir(origin)): continue fix_perms(origin, fs_perms_table[dir].mode, fs_perms_table[dir].uid, fs_perms_table[dir].gid, dir) @@ -735,7 +735,7 @@ python split_and_strip_files () { baselibdir = os.path.abspath(dvar + os.sep + d.getVar("base_libdir", True)) if (d.getVar('INHIBIT_PACKAGE_DEBUG_SPLIT', True) != '1') and \ (d.getVar('INHIBIT_PACKAGE_STRIP', True) != '1'): - for root, dirs, files in os.walk(dvar): + for root, dirs, files in cpath.walk(dvar): for f in files: file = os.path.join(root, f) if file.endswith(".ko") and file.find("/lib/modules/") != -1: @@ -749,18 +749,20 @@ python split_and_strip_files () { continue try: - ltarget = oe.path.realpath(file, dvar, False) - s = os.lstat(ltarget) + ltarget = cpath.realpath(file, dvar, False) + s = cpath.lstat(ltarget) except OSError, (err, strerror): if err != errno.ENOENT: raise # Skip broken symlinks continue + if not s: + continue # Check its an excutable if (s[stat.ST_MODE] & stat.S_IXUSR) or (s[stat.ST_MODE] & stat.S_IXGRP) or (s[stat.ST_MODE] & stat.S_IXOTH) \ or ((file.startswith(libdir) or file.startswith(baselibdir)) and ".so" in f): # If it's a symlink, and points to an ELF file, we capture the readlink target - if os.path.islink(file): + if cpath.islink(file): target = os.readlink(file) if isELF(ltarget): #bb.note("Sym: %s (%d)" % (ltarget, isELF(ltarget))) @@ -918,8 +920,8 @@ python populate_packages () { for file in files: if os.path.isabs(file): file = '.' + file - if not os.path.islink(file): - if os.path.isdir(file): + if not cpath.islink(file): + if cpath.isdir(file): newfiles = [ os.path.join(file,x) for x in os.listdir(file) ] if newfiles: files += newfiles @@ -929,7 +931,7 @@ python populate_packages () { if [ file ] != globbed: files += globbed continue - if (not os.path.islink(file)) and (not os.path.exists(file)): + if (not cpath.islink(file)) and (not cpath.exists(file)): continue if file in seen: continue @@ -938,33 +940,33 @@ python populate_packages () { def mkdir(src, dest, p): src = os.path.join(src, p) dest = os.path.join(dest, p) - bb.utils.mkdirhier(dest) - fstat = os.stat(src) - os.chmod(dest, fstat.st_mode) + fstat = cpath.stat(src) + os.mkdir(dest, fstat.st_mode) os.chown(dest, fstat.st_uid, fstat.st_gid) if p not in seen: seen.append(p) + cpath.updatecache(dest) def mkdir_recurse(src, dest, paths): - if os.path.exists(dest + '/' + paths): + if cpath.exists(dest + '/' + paths): return while paths.startswith("./"): paths = paths[2:] p = "." for c in paths.split("/"): p = os.path.join(p, c) - if not os.path.exists(os.path.join(dest, p)): + if not cpath.exists(os.path.join(dest, p)): mkdir(src, dest, p) - if os.path.isdir(file) and not os.path.islink(file): + if cpath.isdir(file) and not cpath.islink(file): mkdir_recurse(dvar, root, file) continue mkdir_recurse(dvar, root, os.path.dirname(file)) fpath = os.path.join(root,file) - if not os.path.islink(file): + if not cpath.islink(file): os.link(file, fpath) - fstat = os.stat(file) + fstat = cpath.stat(file) os.chmod(fpath, fstat.st_mode) os.chown(fpath, fstat.st_uid, fstat.st_gid) continue @@ -975,7 +977,7 @@ python populate_packages () { os.chdir(workdir) unshipped = [] - for root, dirs, files in os.walk(dvar): + for root, dirs, files in cpath.walk(dvar): dir = root[len(dvar):] if not dir: dir = os.sep @@ -1009,8 +1011,8 @@ python package_fixsymlinks () { for path in pkgfiles[pkg]: rpath = path[len(inst_root):] pkg_files[pkg].append(rpath) - rtarget = oe.path.realpath(path, inst_root, True, assume_dir = True) - if not os.path.lexists(rtarget): + rtarget = cpath.realpath(path, inst_root, True, assume_dir = True) + if not cpath.lexists(rtarget): dangling_links[pkg].append(os.path.normpath(rtarget[len(inst_root):])) newrdepends = {} @@ -1394,7 +1396,7 @@ python package_do_shlibs() { renames = list() for file in pkgfiles[pkg]: soname = None - if os.path.islink(file): + if cpath.islink(file): continue if targetos == "darwin" or targetos == "darwin8": darwin_so(file) @@ -1781,6 +1783,10 @@ python do_package () { # as any change to rpmdeps requires this to be rerun. # PACKAGE_BBCLASS_VERSION = "1" + # Init cachedpath + global cpath + cpath = oe.cachedpath.CachedPath() + ########################################################################### # Sanity test the setup ########################################################################### @@ -1827,6 +1833,8 @@ python do_package () { # Split up PKGD into PKGDEST ########################################################################### + cpath = oe.cachedpath.CachedPath() + for f in (d.getVar('PACKAGESPLITFUNCS', True) or '').split(): bb.build.exec_func(f, d) @@ -1841,7 +1849,7 @@ python do_package () { pkgdest = d.getVar('PKGDEST', True) for pkg in packages: pkgfiles[pkg] = [] - for walkroot, dirs, files in os.walk(pkgdest + "/" + pkg): + for walkroot, dirs, files in cpath.walk(pkgdest + "/" + pkg): for file in files: pkgfiles[pkg].append(walkroot + os.sep + file) diff --git a/meta/lib/oe/cachedpath.py b/meta/lib/oe/cachedpath.py new file mode 100644 index 0000000000..e350c8a70e --- /dev/null +++ b/meta/lib/oe/cachedpath.py @@ -0,0 +1,235 @@ +# +# Based on standard python library functions but avoid +# repeated stat calls. Its assumed the files will not change from under us +# so we can cache stat calls. +# + +import os +import errno +import stat as statmod + +class CachedPath(object): + def __init__(self): + self.statcache = {} + self.lstatcache = {} + self.normpathcache = {} + return + + def updatecache(self, x): + x = self.normpath(x) + if x in self.statcache: + del self.statcache[x] + if x in self.lstatcache: + del self.lstatcache[x] + + def normpath(self, path): + if path in self.normpathcache: + return self.normpathcache[path] + newpath = os.path.normpath(path) + self.normpathcache[path] = newpath + return newpath + + def _callstat(self, path): + if path in self.statcache: + return self.statcache[path] + try: + st = os.stat(path) + self.statcache[path] = st + return st + except os.error: + self.statcache[path] = False + return False + + # We might as well call lstat and then only + # call stat as well in the symbolic link case + # since this turns out to be much more optimal + # in real world usage of this cache + def callstat(self, path): + path = self.normpath(path) + self.calllstat(path) + return self.statcache[path] + + def calllstat(self, path): + path = self.normpath(path) + if path in self.lstatcache: + return self.lstatcache[path] + #bb.error("LStatpath:" + path) + try: + lst = os.lstat(path) + self.lstatcache[path] = lst + if not statmod.S_ISLNK(lst.st_mode): + self.statcache[path] = lst + else: + self._callstat(path) + return lst + except (os.error, AttributeError): + self.lstatcache[path] = False + self.statcache[path] = False + return False + + # This follows symbolic links, so both islink() and isdir() can be true + # for the same path ono systems that support symlinks + def isfile(self, path): + """Test whether a path is a regular file""" + st = self.callstat(path) + if not st: + return False + return statmod.S_ISREG(st.st_mode) + + # Is a path a directory? + # This follows symbolic links, so both islink() and isdir() + # can be true for the same path on systems that support symlinks + def isdir(self, s): + """Return true if the pathname refers to an existing directory.""" + st = self.callstat(s) + if not st: + return False + return statmod.S_ISDIR(st.st_mode) + + def islink(self, path): + """Test whether a path is a symbolic link""" + st = self.calllstat(path) + if not st: + return False + return statmod.S_ISLNK(st.st_mode) + + # Does a path exist? + # This is false for dangling symbolic links on systems that support them. + def exists(self, path): + """Test whether a path exists. Returns False for broken symbolic links""" + if self.callstat(path): + return True + return False + + def lexists(self, path): + """Test whether a path exists. Returns True for broken symbolic links""" + if self.calllstat(path): + return True + return False + + def stat(self, path): + return self.callstat(path) + + def lstat(self, path): + return self.calllstat(path) + + def walk(self, top, topdown=True, onerror=None, followlinks=False): + # Matches os.walk, not os.path.walk() + + # We may not have read permission for top, in which case we can't + # get a list of the files the directory contains. os.path.walk + # always suppressed the exception then, rather than blow up for a + # minor reason when (say) a thousand readable directories are still + # left to visit. That logic is copied here. + try: + # Note that listdir and error are globals in this module due + # to earlier import-*. + names = os.listdir(top) + except error, err: + if onerror is not None: + onerror(err) + return + + dirs, nondirs = [], [] + for name in names: + if self.isdir(os.path.join(top, name)): + dirs.append(name) + else: + nondirs.append(name) + + if topdown: + yield top, dirs, nondirs + for name in dirs: + new_path = os.path.join(top, name) + if followlinks or not self.islink(new_path): + for x in self.walk(new_path, topdown, onerror, followlinks): + yield x + if not topdown: + yield top, dirs, nondirs + + ## realpath() related functions + def __is_path_below(self, file, root): + return (file + os.path.sep).startswith(root) + + def __realpath_rel(self, start, rel_path, root, loop_cnt, assume_dir): + """Calculates real path of symlink 'start' + 'rel_path' below + 'root'; no part of 'start' below 'root' must contain symlinks. """ + have_dir = True + + for d in rel_path.split(os.path.sep): + if not have_dir and not assume_dir: + raise OSError(errno.ENOENT, "no such directory %s" % start) + + if d == os.path.pardir: # '..' + if len(start) >= len(root): + # do not follow '..' before root + start = os.path.dirname(start) + else: + # emit warning? + pass + else: + (start, have_dir) = self.__realpath(os.path.join(start, d), + root, loop_cnt, assume_dir) + + assert(self.__is_path_below(start, root)) + + return start + + def __realpath(self, file, root, loop_cnt, assume_dir): + while self.islink(file) and len(file) >= len(root): + if loop_cnt == 0: + raise OSError(errno.ELOOP, file) + + loop_cnt -= 1 + target = os.path.normpath(os.readlink(file)) + + if not os.path.isabs(target): + tdir = os.path.dirname(file) + assert(self.__is_path_below(tdir, root)) + else: + tdir = root + + file = self.__realpath_rel(tdir, target, root, loop_cnt, assume_dir) + + try: + is_dir = self.isdir(file) + except: + is_dir = False + + return (file, is_dir) + + def realpath(self, file, root, use_physdir = True, loop_cnt = 100, assume_dir = False): + """ Returns the canonical path of 'file' with assuming a + toplevel 'root' directory. When 'use_physdir' is set, all + preceding path components of 'file' will be resolved first; + this flag should be set unless it is guaranteed that there is + no symlink in the path. When 'assume_dir' is not set, missing + path components will raise an ENOENT error""" + + root = os.path.normpath(root) + file = os.path.normpath(file) + + if not root.endswith(os.path.sep): + # letting root end with '/' makes some things easier + root = root + os.path.sep + + if not self.__is_path_below(file, root): + raise OSError(errno.EINVAL, "file '%s' is not below root" % file) + + try: + if use_physdir: + file = self.__realpath_rel(root, file[(len(root) - 1):], root, loop_cnt, assume_dir) + else: + file = self.__realpath(file, root, loop_cnt, assume_dir)[0] + except OSError, e: + if e.errno == errno.ELOOP: + # make ELOOP more readable; without catching it, there will + # be printed a backtrace with 100s of OSError exceptions + # else + raise OSError(errno.ELOOP, + "too much recursions while resolving '%s'; loop in '%s'" % + (file, e.strerror)) + + raise + + return file |