123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518 |
- #! /usr/bin/env python
- # encoding: utf-8
- # Thomas Nagy, 2017-2018 (ita)
- """
- A system for fast partial rebuilds
- Creating a large amount of task objects up front can take some time.
- By making a few assumptions, it is possible to avoid posting creating
- task objects for targets that are already up-to-date.
- On a silly benchmark the gain observed for 1M tasks can be 5m->10s
- for a single file change.
- Usage::
- def options(opt):
- opt.load('fast_partial')
- Assuptions:
- * Mostly for C/C++/Fortran targets with link tasks (object-only targets are not handled)
- * For full project builds: no --targets and no pruning from subfolders
- * The installation phase is ignored
- * `use=` dependencies are specified up front even across build groups
- * Task generator source files are not obtained from globs
- Implementation details:
- * The first layer obtains file timestamps to recalculate file hashes only
- when necessary (similar to md5_tstamp); the timestamps are then stored
- in a dedicated pickle file
- * A second layer associates each task generator to a file set to help
- detecting changes. Task generators are to create their tasks only when
- the related files have been modified. A specific db file is created
- to store such data (5m -> 1m10)
- * A third layer binds build context proxies onto task generators, replacing
- the default context. While loading data for the full build uses more memory
- (4GB -> 9GB), partial builds are then much faster (1m10 -> 13s)
- * A fourth layer enables a 2-level cache on file signatures to
- reduce the size of the main pickle file (13s -> 10s)
- """
- import os
- from waflib import Build, Context, Errors, Logs, Task, TaskGen, Utils
- from waflib.TaskGen import feature, after_method, taskgen_method
- import waflib.Node
- DONE = 0
- DIRTY = 1
- NEEDED = 2
- SKIPPABLE = ['cshlib', 'cxxshlib', 'cstlib', 'cxxstlib', 'cprogram', 'cxxprogram']
- TSTAMP_DB = '.wafpickle_tstamp_db_file'
- SAVED_ATTRS = 'root node_sigs task_sigs imp_sigs raw_deps node_deps'.split()
- class bld_proxy(object):
- def __init__(self, bld):
- object.__setattr__(self, 'bld', bld)
- object.__setattr__(self, 'node_class', type('Nod3', (waflib.Node.Node,), {}))
- self.node_class.__module__ = 'waflib.Node'
- self.node_class.ctx = self
- object.__setattr__(self, 'root', self.node_class('', None))
- for x in SAVED_ATTRS:
- if x != 'root':
- object.__setattr__(self, x, {})
- self.fix_nodes()
- def __setattr__(self, name, value):
- bld = object.__getattribute__(self, 'bld')
- setattr(bld, name, value)
- def __delattr__(self, name):
- bld = object.__getattribute__(self, 'bld')
- delattr(bld, name)
- def __getattribute__(self, name):
- try:
- return object.__getattribute__(self, name)
- except AttributeError:
- bld = object.__getattribute__(self, 'bld')
- return getattr(bld, name)
- def __call__(self, *k, **kw):
- return self.bld(*k, **kw)
- def fix_nodes(self):
- for x in ('srcnode', 'path', 'bldnode'):
- node = self.root.find_dir(getattr(self.bld, x).abspath())
- object.__setattr__(self, x, node)
- def set_key(self, store_key):
- object.__setattr__(self, 'store_key', store_key)
- def fix_tg_path(self, *tgs):
- # changing Node objects on task generators is possible
- # yet, all Node objects must belong to the same parent
- for tg in tgs:
- tg.path = self.root.make_node(tg.path.abspath())
- def restore(self):
- dbfn = os.path.join(self.variant_dir, Context.DBFILE + self.store_key)
- Logs.debug('rev_use: reading %s', dbfn)
- try:
- data = Utils.readf(dbfn, 'rb')
- except (EnvironmentError, EOFError):
- # handle missing file/empty file
- Logs.debug('rev_use: Could not load the build cache %s (missing)', dbfn)
- else:
- try:
- waflib.Node.pickle_lock.acquire()
- waflib.Node.Nod3 = self.node_class
- try:
- data = Build.cPickle.loads(data)
- except Exception as e:
- Logs.debug('rev_use: Could not pickle the build cache %s: %r', dbfn, e)
- else:
- for x in SAVED_ATTRS:
- object.__setattr__(self, x, data.get(x, {}))
- finally:
- waflib.Node.pickle_lock.release()
- self.fix_nodes()
- def store(self):
- data = {}
- for x in Build.SAVED_ATTRS:
- data[x] = getattr(self, x)
- db = os.path.join(self.variant_dir, Context.DBFILE + self.store_key)
- try:
- waflib.Node.pickle_lock.acquire()
- waflib.Node.Nod3 = self.node_class
- x = Build.cPickle.dumps(data, Build.PROTOCOL)
- finally:
- waflib.Node.pickle_lock.release()
- Logs.debug('rev_use: storing %s', db)
- Utils.writef(db + '.tmp', x, m='wb')
- try:
- st = os.stat(db)
- os.remove(db)
- if not Utils.is_win32:
- os.chown(db + '.tmp', st.st_uid, st.st_gid)
- except (AttributeError, OSError):
- pass
- os.rename(db + '.tmp', db)
- class bld(Build.BuildContext):
- def __init__(self, **kw):
- super(bld, self).__init__(**kw)
- self.hashes_md5_tstamp = {}
- def __call__(self, *k, **kw):
- # this is one way of doing it, one could use a task generator method too
- bld = kw['bld'] = bld_proxy(self)
- ret = TaskGen.task_gen(*k, **kw)
- self.task_gen_cache_names = {}
- self.add_to_group(ret, group=kw.get('group'))
- ret.bld = bld
- bld.set_key(ret.path.abspath().replace(os.sep, '') + str(ret.idx))
- return ret
- def is_dirty(self):
- return True
- def store_tstamps(self):
- # Called after a build is finished
- # For each task generator, record all files involved in task objects
- # optimization: done only if there was something built
- do_store = False
- try:
- f_deps = self.f_deps
- except AttributeError:
- f_deps = self.f_deps = {}
- self.f_tstamps = {}
- allfiles = set()
- for g in self.groups:
- for tg in g:
- try:
- staleness = tg.staleness
- except AttributeError:
- staleness = DIRTY
- if staleness != DIRTY:
- # DONE case: there was nothing built
- # NEEDED case: the tg was brought in because of 'use' propagation
- # but nothing really changed for them, there may be incomplete
- # tasks (object files) and in this case it is best to let the next build
- # figure out if an input/output file changed
- continue
- do_cache = False
- for tsk in tg.tasks:
- if tsk.hasrun == Task.SUCCESS:
- do_cache = True
- pass
- elif tsk.hasrun == Task.SKIPPED:
- pass
- else:
- # one failed task, clear the cache for this tg
- try:
- del f_deps[(tg.path.abspath(), tg.idx)]
- except KeyError:
- pass
- else:
- # just store the new state because there is a change
- do_store = True
- # skip the rest because there is no valid cache possible
- break
- else:
- if not do_cache:
- # all skipped, but is there anything in cache?
- try:
- f_deps[(tg.path.abspath(), tg.idx)]
- except KeyError:
- # probably cleared because a wscript file changed
- # store it
- do_cache = True
- if do_cache:
- # there was a rebuild, store the data structure too
- tg.bld.store()
- # all tasks skipped but no cache
- # or a successful task build
- do_store = True
- st = set()
- for tsk in tg.tasks:
- st.update(tsk.inputs)
- st.update(self.node_deps.get(tsk.uid(), []))
- # TODO do last/when loading the tgs?
- lst = []
- for k in ('wscript', 'wscript_build'):
- n = tg.path.find_node(k)
- if n:
- n.get_bld_sig()
- lst.append(n.abspath())
- lst.extend(sorted(x.abspath() for x in st))
- allfiles.update(lst)
- f_deps[(tg.path.abspath(), tg.idx)] = lst
- for x in allfiles:
- # f_tstamps has everything, while md5_tstamp can be relatively empty on partial builds
- self.f_tstamps[x] = self.hashes_md5_tstamp[x][0]
- if do_store:
- dbfn = os.path.join(self.variant_dir, TSTAMP_DB)
- Logs.debug('rev_use: storing %s', dbfn)
- dbfn_tmp = dbfn + '.tmp'
- x = Build.cPickle.dumps([self.f_tstamps, f_deps], Build.PROTOCOL)
- Utils.writef(dbfn_tmp, x, m='wb')
- os.rename(dbfn_tmp, dbfn)
- Logs.debug('rev_use: stored %s', dbfn)
- def store(self):
- self.store_tstamps()
- if self.producer.dirty:
- Build.BuildContext.store(self)
- def compute_needed_tgs(self):
- # assume the 'use' keys are not modified during the build phase
- dbfn = os.path.join(self.variant_dir, TSTAMP_DB)
- Logs.debug('rev_use: Loading %s', dbfn)
- try:
- data = Utils.readf(dbfn, 'rb')
- except (EnvironmentError, EOFError):
- Logs.debug('rev_use: Could not load the build cache %s (missing)', dbfn)
- self.f_deps = {}
- self.f_tstamps = {}
- else:
- try:
- self.f_tstamps, self.f_deps = Build.cPickle.loads(data)
- except Exception as e:
- Logs.debug('rev_use: Could not pickle the build cache %s: %r', dbfn, e)
- self.f_deps = {}
- self.f_tstamps = {}
- else:
- Logs.debug('rev_use: Loaded %s', dbfn)
- # 1. obtain task generators that contain rebuilds
- # 2. obtain the 'use' graph and its dual
- stales = set()
- reverse_use_map = Utils.defaultdict(list)
- use_map = Utils.defaultdict(list)
- for g in self.groups:
- for tg in g:
- if tg.is_stale():
- stales.add(tg)
- try:
- lst = tg.use = Utils.to_list(tg.use)
- except AttributeError:
- pass
- else:
- for x in lst:
- try:
- xtg = self.get_tgen_by_name(x)
- except Errors.WafError:
- pass
- else:
- use_map[tg].append(xtg)
- reverse_use_map[xtg].append(tg)
- Logs.debug('rev_use: found %r stale tgs', len(stales))
- # 3. dfs to post downstream tg as stale
- visited = set()
- def mark_down(tg):
- if tg in visited:
- return
- visited.add(tg)
- Logs.debug('rev_use: marking down %r as stale', tg.name)
- tg.staleness = DIRTY
- for x in reverse_use_map[tg]:
- mark_down(x)
- for tg in stales:
- mark_down(tg)
- # 4. dfs to find ancestors tg to mark as needed
- self.needed_tgs = needed_tgs = set()
- def mark_needed(tg):
- if tg in needed_tgs:
- return
- needed_tgs.add(tg)
- if tg.staleness == DONE:
- Logs.debug('rev_use: marking up %r as needed', tg.name)
- tg.staleness = NEEDED
- for x in use_map[tg]:
- mark_needed(x)
- for xx in visited:
- mark_needed(xx)
- # so we have the whole tg trees to post in the set "needed"
- # load their build trees
- for tg in needed_tgs:
- tg.bld.restore()
- tg.bld.fix_tg_path(tg)
- # the stale ones should be fully build, while the needed ones
- # may skip a few tasks, see create_compiled_task and apply_link_after below
- Logs.debug('rev_use: amount of needed task gens: %r', len(needed_tgs))
- def post_group(self):
- # assumption: we can ignore the folder/subfolders cuts
- def tgpost(tg):
- try:
- f = tg.post
- except AttributeError:
- pass
- else:
- f()
- if not self.targets or self.targets == '*':
- for tg in self.groups[self.current_group]:
- # this can cut quite a lot of tg objects
- if tg in self.needed_tgs:
- tgpost(tg)
- else:
- # default implementation
- return Build.BuildContext.post_group()
- def get_build_iterator(self):
- if not self.targets or self.targets == '*':
- self.compute_needed_tgs()
- return Build.BuildContext.get_build_iterator(self)
- @taskgen_method
- def is_stale(self):
- # assume no globs
- self.staleness = DIRTY
- # 1. the case of always stale targets
- if getattr(self, 'always_stale', False):
- return True
- # 2. check if the db file exists
- db = os.path.join(self.bld.variant_dir, Context.DBFILE)
- try:
- dbstat = os.stat(db).st_mtime
- except OSError:
- Logs.debug('rev_use: must post %r because this is a clean build')
- return True
- # 3. check if the configuration changed
- if os.stat(self.bld.bldnode.find_node('c4che/build.config.py').abspath()).st_mtime > dbstat:
- Logs.debug('rev_use: must post %r because the configuration has changed', self.name)
- return True
- # 3.a any tstamp data?
- try:
- f_deps = self.bld.f_deps
- except AttributeError:
- Logs.debug('rev_use: must post %r because there is no f_deps', self.name)
- return True
- # 4. check if this is the first build (no cache)
- try:
- lst = f_deps[(self.path.abspath(), self.idx)]
- except KeyError:
- Logs.debug('rev_use: must post %r because there it has no cached data', self.name)
- return True
- try:
- cache = self.bld.cache_tstamp_rev_use
- except AttributeError:
- cache = self.bld.cache_tstamp_rev_use = {}
- # 5. check the timestamp of each dependency files listed is unchanged
- f_tstamps = self.bld.f_tstamps
- for x in lst:
- try:
- old_ts = f_tstamps[x]
- except KeyError:
- Logs.debug('rev_use: must post %r because %r is not in cache', self.name, x)
- return True
- try:
- try:
- ts = cache[x]
- except KeyError:
- ts = cache[x] = os.stat(x).st_mtime
- except OSError:
- del f_deps[(self.path.abspath(), self.idx)]
- Logs.debug('rev_use: must post %r because %r does not exist anymore', self.name, x)
- return True
- else:
- if ts != old_ts:
- Logs.debug('rev_use: must post %r because the timestamp on %r changed %r %r', self.name, x, old_ts, ts)
- return True
- self.staleness = DONE
- return False
- @taskgen_method
- def create_compiled_task(self, name, node):
- # skip the creation of object files
- # assumption: object-only targets are not skippable
- if self.staleness == NEEDED:
- # only libraries/programs can skip object files
- for x in SKIPPABLE:
- if x in self.features:
- return None
- out = '%s.%d.o' % (node.name, self.idx)
- task = self.create_task(name, node, node.parent.find_or_declare(out))
- try:
- self.compiled_tasks.append(task)
- except AttributeError:
- self.compiled_tasks = [task]
- return task
- @feature(*SKIPPABLE)
- @after_method('apply_link')
- def apply_link_after(self):
- # cprogram/cxxprogram might be unnecessary
- if self.staleness != NEEDED:
- return
- for tsk in self.tasks:
- tsk.hasrun = Task.SKIPPED
- def path_from(self, node):
- # handle nodes of distinct types
- if node.ctx is not self.ctx:
- node = self.ctx.root.make_node(node.abspath())
- return self.default_path_from(node)
- waflib.Node.Node.default_path_from = waflib.Node.Node.path_from
- waflib.Node.Node.path_from = path_from
- def h_file(self):
- # similar to md5_tstamp.py, but with 2-layer cache
- # global_cache for the build context common for all task generators
- # local_cache for the build context proxy (one by task generator)
- #
- # the global cache is not persistent
- # the local cache is persistent and meant for partial builds
- #
- # assume all calls are made from a single thread
- #
- filename = self.abspath()
- st = os.stat(filename)
- global_cache = self.ctx.bld.hashes_md5_tstamp
- local_cache = self.ctx.hashes_md5_tstamp
- if filename in global_cache:
- # value already calculated in this build
- cval = global_cache[filename]
- # the value in global cache is assumed to be calculated once
- # reverifying it could cause task generators
- # to get distinct tstamp values, thus missing rebuilds
- local_cache[filename] = cval
- return cval[1]
- if filename in local_cache:
- cval = local_cache[filename]
- if cval[0] == st.st_mtime:
- # correct value from a previous build
- # put it in the global cache
- global_cache[filename] = cval
- return cval[1]
- ret = Utils.h_file(filename)
- local_cache[filename] = global_cache[filename] = (st.st_mtime, ret)
- return ret
- waflib.Node.Node.h_file = h_file
|