From 109e6d019c1880eda9e0eb11c59ae926c4188faa Mon Sep 17 00:00:00 2001 From: Holger Freyther Date: Tue, 9 Oct 2007 15:01:33 +0000 Subject: contrib/mtn2git: mtn add is not recursive by default, actually add files ;) --- contrib/mtn2git/git2mtn.py | 50 +++ contrib/mtn2git/mtn/.mtn2git_empty | 0 contrib/mtn2git/mtn/__init__.py | 6 + contrib/mtn2git/mtn/authors.py | 11 + contrib/mtn2git/mtn/common.py | 49 +++ contrib/mtn2git/mtn/genproxy.py | 25 ++ contrib/mtn2git/mtn/mtn.py | 419 +++++++++++++++++++++++++ contrib/mtn2git/mtn/utility.py | 100 ++++++ contrib/mtn2git/mtn2git.py | 610 +++++++++++++++++++++++++++++++++++++ contrib/mtn2git/status.py | 47 +++ 10 files changed, 1317 insertions(+) create mode 100755 contrib/mtn2git/git2mtn.py create mode 100644 contrib/mtn2git/mtn/.mtn2git_empty create mode 100644 contrib/mtn2git/mtn/__init__.py create mode 100644 contrib/mtn2git/mtn/authors.py create mode 100644 contrib/mtn2git/mtn/common.py create mode 100755 contrib/mtn2git/mtn/genproxy.py create mode 100644 contrib/mtn2git/mtn/mtn.py create mode 100644 contrib/mtn2git/mtn/utility.py create mode 100755 contrib/mtn2git/mtn2git.py create mode 100644 contrib/mtn2git/status.py (limited to 'contrib/mtn2git') diff --git a/contrib/mtn2git/git2mtn.py b/contrib/mtn2git/git2mtn.py new file mode 100755 index 0000000000..99681a9ca2 --- /dev/null +++ b/contrib/mtn2git/git2mtn.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +""" + Copyright (C) 2006, 2007 Holger Hans Peter Freyther + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +""" + +############# +# +# Use: This tool can merge one git-branch back to one branch in monotone +# +# Discussion: +# Merging from git to a monotone branch. Currently I see two modes which +# should be supported. +# +# a) linear development. Only a couple of changes are done on top of the +# branch and nothing get merged. In this case we can merge everything +# back and each rev gets a cert with the branch. +# This should be possible using programs like git-rebase. +# b) we have merges inside our git-rev-list history. This means we need to +# merge every revision and can't attach any branch certs to the revision. +# And once we are done with this we will create a propagate like commit +# entry and we can give that new revision a cert with the branch name. +# +# This means working in git is treated like a branch! +# +# One difficulty is with git. This propagate like commit will create a new revision +# in monotone but none in git as both trees/manifests are the same. So what we have +# to make sure is to use the latest mtn revision for a given mark/git revision. This +# is where mtn2git.py needs to help. We will save a list of mtn revisions that have the +# same git version and then will read every of them and check the branch certs and will +# use the one matching our target branch! +############# diff --git a/contrib/mtn2git/mtn/.mtn2git_empty b/contrib/mtn2git/mtn/.mtn2git_empty new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contrib/mtn2git/mtn/__init__.py b/contrib/mtn2git/mtn/__init__.py new file mode 100644 index 0000000000..e7424ccd3d --- /dev/null +++ b/contrib/mtn2git/mtn/__init__.py @@ -0,0 +1,6 @@ +from mtn import Automate, Operations + +__all__ = [ + "Automate", + "Operations" + ] diff --git a/contrib/mtn2git/mtn/authors.py b/contrib/mtn2git/mtn/authors.py new file mode 100644 index 0000000000..90542a02da --- /dev/null +++ b/contrib/mtn2git/mtn/authors.py @@ -0,0 +1,11 @@ +authors='''Authors: +Grahame Bowland + +Contributors: +Matt Johnston +Nathaniel Smith +Bruce Stephens +Lapo Luchini +David Reiss + +''' diff --git a/contrib/mtn2git/mtn/common.py b/contrib/mtn2git/mtn/common.py new file mode 100644 index 0000000000..1bbf6031c9 --- /dev/null +++ b/contrib/mtn2git/mtn/common.py @@ -0,0 +1,49 @@ + +import datetime +import time +import fcntl +import os +import signal +import traceback +import sys + +def parse_timecert(value): + return apply(datetime.datetime, time.strptime(value, "%Y-%m-%dT%H:%M:%S")[:6]) + +def set_nonblocking(fd): + fl = fcntl.fcntl(fd, fcntl.F_GETFL) + fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NDELAY) + +def terminate_popen3(process): + print >> sys.stderr, ("[%s] stopping process: %s" % (os.getpid(), process.pid)) + try: + process.tochild.close() + process.fromchild.close() + process.childerr.close() + if process.poll() == -1: + # the process is still running, so kill it. + os.kill(process.pid, signal.SIGKILL) + process.wait() + except: + print >> sys.stderr, ("%s failed_to_stop %s (%s)" % (os.getpid(), process.pid, traceback.format_exc())) + +def ago(event): + def plural(v, singular, plural): + if v == 1: + return "%d %s" % (v, singular) + else: + return "%d %s" % (v, plural) + now = datetime.datetime.utcnow() + ago = now - event + if ago.days > 0: + rv = "%s" % (plural(ago.days, "day", "days")) + elif ago.seconds > 3600: + hours = ago.seconds / 3600 + minutes = (ago.seconds - (hours * 3600)) / 60 + rv = "%s" % (plural(hours, "hour", "hours")) + else: + minutes = ago.seconds / 60 + seconds = (ago.seconds - (minutes * 60)) + rv = "%s" % (plural(minutes, "minute", "minutes")) + return rv + diff --git a/contrib/mtn2git/mtn/genproxy.py b/contrib/mtn2git/mtn/genproxy.py new file mode 100755 index 0000000000..5ba67eefd6 --- /dev/null +++ b/contrib/mtn2git/mtn/genproxy.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python + +class GeneratorProxy(object): + def __init__(self, generator): + self.generator = generator + def __iter__(self): + return self + def next(self): + return self.generator.next() + +class Seedy(GeneratorProxy): + def __del__(self): + print "testing" + +def test(): + yield 2 + yield 3 + yield 4 + +if __name__ == '__main__': + a = test() + b = Seedy(test()) + for i in b: + print i + \ No newline at end of file diff --git a/contrib/mtn2git/mtn/mtn.py b/contrib/mtn2git/mtn/mtn.py new file mode 100644 index 0000000000..aa086d78d4 --- /dev/null +++ b/contrib/mtn2git/mtn/mtn.py @@ -0,0 +1,419 @@ + +import os +import re +import fcntl +import pipes +import select +import threading +import popen2 +from common import set_nonblocking, terminate_popen3 +from traceback import format_exc +import genproxy +import sys + +# regular expressions that are of general use when +# validating monotone output +def group_compile(r): + return re.compile('('+r+')') + +hex_re = r'[A-Fa-f0-9]*' +hex_re_c = group_compile(hex_re) +revision_re = r'[A-Fa-f0-9]{40}' +revision_re_c = group_compile(revision_re) +name_re = r'^[\S]+' +name_re_c = group_compile(name_re) + +class MonotoneException(Exception): + pass + +class Revision(str): + def __init__(self, v): + # special case that must be handled: empty (initial) revision ID '' + str.__init__(v) + self.obj_type = "revision" + if v != '' and not revision_re_c.match(self): + raise MonotoneException("Not a valid revision ID: %s" % (v)) + def abbrev(self): + return '[' + self[:8] + '..]' + +class Author(str): + def __init__(self, v): + str.__init__(v) + self.obj_type = "author" + +class Runner: + def __init__(self, monotone, database): + self.base_command = [monotone, "--db=%s" % pipes.quote(database)] + +packet_header_re = re.compile(r'^(\d+):(\d+):([lm]):(\d+):') + +class Automate(Runner): + """Runs commands via a particular monotone process. This + process is started the first time run() is called, and + stopped when this class instance is deleted or the stop() + method is called. + + If an error occurs, the monotone process may need to be + stopped and a new one created. + """ + def __init__(self, *args, **kwargs): + Runner.__init__(*[self] + list(args), **kwargs) + self.lock = threading.Lock() + self.process = None + + def stop(self): + if not self.process: + return + terminate_popen3(self.process) + self.process = None + + def __process_required(self): + if self.process != None: + return + to_run = self.base_command + ['automate', 'stdio'] + self.process = popen2.Popen3(to_run, capturestderr=True) + # breaks down with toposort and a lot of input + #map (set_nonblocking, [ self.process.fromchild, + # self.process.tochild, + # self.process.childerr ]) + map (set_nonblocking, [ self.process.fromchild, + self.process.childerr ]) + + def run(self, *args, **kwargs): + print >> sys.stderr, (("automate is running:", args, kwargs)) + + lock = self.lock + stop = self.stop + class CleanRequest(genproxy.GeneratorProxy): + def __init__(self, *args, **kwargs): + genproxy.GeneratorProxy.__init__(self, *args, **kwargs) + + # nb; this used to be False, but True seems to behave more sensibly. + # in particular, if someone holds down Refresh sometimes the code + # gets here before __del__ is called on the previous iterator, + # causing a pointless error to occur + if not lock.acquire(True): + # I've checked; this exception does _not_ cause __del__ to run, so + # we don't accidentally unlock a lock below + raise MonotoneException("Automate request cannot be called: it is already locked! This indicates a logic error in ViewMTN; please report.") + + def __del__(self): + def read_any_unread_output(): + try: + # this'll raise StopIteration if we're done + self.next() + # okay, we're not done.. + print >> sys.stderr, ("warning: Automate output not completely read; reading manually.") + for stanza in self: + pass + except StopIteration: + pass + + try: + read_any_unread_output() + lock.release() + except: + print >> sys.stderr, ("exception cleaning up after Automation; calling stop()!") + stop() + + return CleanRequest(self.__run(*args, **kwargs)) + + def __run(self, command, args): + enc = "l%d:%s" % (len(command), command) + enc += ''.join(["%d:%s" % (len(x), x) for x in args]) + 'e' + + # number of tries to get a working mtn going.. + for i in xrange(2): + self.__process_required() + try: + self.process.tochild.write(enc) + self.process.tochild.flush() + break + except: + # mtn has died underneath the automate; restart it + print >> sys.stderr, ("exception writing to child process; attempting restart: %s" % format_exc()) + self.stop() + + import sys + def read_result_packets(): + buffer = "" + while True: + r_stdin, r_stdout, r_stderr = select.select([self.process.fromchild], [], [], None) + if not r_stdin and not r_stdout and not r_stderr: + break + + if self.process.fromchild in r_stdin: + data = self.process.fromchild.read() + if data == "": + break + buffer += data + + # loop, trying to get complete packets out of our buffer + complete, in_packet = False, False + while not complete and buffer != '': + if not in_packet: + m = packet_header_re.match(buffer) + if not m: + break + in_packet = True + cmdnum, errnum, pstate, length = m.groups() + errnum = int(errnum) + length = int(length) + header_length = m.end(m.lastindex) + 1 # the '1' is the colon + + if len(buffer) < length + header_length: + # not enough data read from client yet; go round + break + else: + result = buffer[header_length:header_length+length] + buffer = buffer[header_length+length:] + complete = pstate == 'l' + in_packet = False + yield errnum, complete, result + + if complete: + break + + # get our response, and yield() it back one line at a time + code_max = -1 + data_buf = '' + for code, is_last, data in read_result_packets(): + if code and code > code_max: + code_max = code + data_buf += data + while True: + nl_idx = data_buf.find('\n') + if nl_idx == -1: + break + yield data_buf[:nl_idx+1] + data_buf = data_buf[nl_idx+1:] + # left over data? + if data_buf: + yield data_buf + if code_max > 0: + raise MonotoneException("error code %d in automate packet." % (code_max)) + +class Standalone(Runner): + """Runs commands by running monotone. One monotone process + per command""" + + def run(self, command, args): + # as we pass popen3 as sequence, it executes monotone with these + # arguments - and does not pass them through the shell according + # to help(os.popen3) +# print(("standalone is running:", command, args)) + to_run = self.base_command + [command] + args + process = popen2.Popen3(to_run, capturestderr=True) + for line in process.fromchild: + yield line + stderr_data = process.childerr.read() + if len(stderr_data) > 0: + raise MonotoneException("data on stderr for command '%s': %s" % (command, + stderr_data)) + terminate_popen3(process) + +class MtnObject: + def __init__(self, obj_type): + self.obj_type = obj_type + +class Tag(MtnObject): + def __init__(self, name, revision, author, branches): + MtnObject.__init__(self, "tag") + self.name, self.revision, self.author, self.branches = name, Revision(revision), author, branches + +class Branch(MtnObject): + def __init__(self, name): + MtnObject.__init__(self, "branch") + self.name = name + +class File(MtnObject): + def __init__(self, name, in_revision): + MtnObject.__init__(self, "file") + self.name = name + self.in_revision = in_revision + +class Dir(MtnObject): + def __init__(self, name, in_revision): + MtnObject.__init__(self, "dir") + self.name = name + self.in_revision = in_revision + +basic_io_name_tok = re.compile(r'^(\S+)') + +def basic_io_from_stream(gen): + # all of these x_consume functions return parsed string + # token to add to stanza, name of next consume function to call + # new value of line (eg. with consumed tokens removed) + + def hex_consume(line): + m = hex_re_c.match(line[1:]) + if line[0] != '[' or not m: + raise MonotoneException("This is not a hex token: %s" % line) + end_of_match = m.end(m.lastindex) + if line[end_of_match+1] != ']': + raise MonotoneException("Hex token ends in character other than ']': %s" % line) + return Revision(m.groups()[0]), choose_consume, line[end_of_match+2:] + + def name_consume(line): + m = name_re_c.match(line) + if not m: + raise MonotoneException("Not a name: %s" % line) + end_of_match = m.end(m.lastindex) + return m.groups()[0], choose_consume, line[end_of_match:] + + def choose_consume(line): + line = line.lstrip() + if line == '': + consumer = choose_consume + elif line[0] == '[': + consumer = hex_consume + elif line[0] == '"': + consumer = string_consume + else: + consumer = name_consume + return None, consumer, line + + class StringState: + def __init__(self): + self.in_escape = False + self.has_started = False + self.has_ended = False + self.value = '' + + def string_consume(line, state=None): + if not state: + state = StringState() + + if not state.has_started: + if line[0] != '"': + raise MonotoneException("Not a string: %s" % line) + line = line[1:] + state.has_started = True + + idx = 0 + for idx, c in enumerate(line): + if state.in_escape: + if c != '\\' and c != '"': + raise MonotoneException("Invalid escape code: %s in %s\n" % (c, line)) + state.value += c + state.in_escape = False + else: + if c == '\\': + state.in_escape = True + elif c == '"': + state.has_ended = True + break + else: + state.value += c + + if state.has_ended: + return state.value, choose_consume, line[idx+1:] + else: + return (None, + lambda s: string_consume(s, state), + line[idx+1:]) + + consumer = choose_consume + current_stanza = [] + for line in gen: + # if we're not in an actual consumer (which we shouldn't be, unless + # we're parsing some sort of multi-line token) and we have a blank + # line, it indicates the end of any current stanza + if (consumer == choose_consume) and (line == '' or line == '\n') and current_stanza: + yield current_stanza + current_stanza = [] + continue + + while line != '' and line != '\n': + new_token, consumer, line = consumer(line) + if new_token != None: + current_stanza.append(new_token) + if current_stanza: + yield current_stanza + +class Operations: + def __init__(self, runner_args): + self.standalone = apply(Standalone, runner_args) + self.automate = apply(Automate, runner_args) + + def tags(self): + for stanza in basic_io_from_stream(self.automate.run('tags', [])): + if stanza[0] == 'tag': + branches = [] + for branch in stanza[7:]: + branches.append(Branch(branch)) + yield Tag(stanza[1], stanza[3], stanza[5], branches) + + def branches(self): + for line in (t.strip() for t in self.automate.run('branches', [])): + if not line: + continue + yield apply(Branch, (line,)) + + def graph(self): + for line in self.automate.run('graph', []): + yield line + + def parents(self, revision): + if revision != "": + for line in (t.strip() for t in self.automate.run('parents', [revision])): + if not line: + continue + yield apply(Revision, (line,)) + + def ancestry_difference(self, new_rev, old_revs): + """ + new_rev a single new revision number + old_revs a list of revisions + """ + if new_rev != "": + for line in (t.strip() for t in self.automate.run('ancestry_difference', [new_rev]+old_revs)): + if not line: + continue + yield apply(Revision, (line,)) + + def children(self, revision): + if revision != "": + for line in (t.strip() for t in self.automate.run('children', [revision])): + if not line: + continue + yield apply(Revision, (line,)) + + def toposort(self, revisions): + for line in (t.strip() for t in self.automate.run('toposort', revisions)): + if not line: + continue + yield apply(Revision, (line,)) + + def heads(self, branch): + for line in (t.strip() for t in self.automate.run('heads', [branch])): + if not line: + continue + yield apply(Revision, (line,)) + + def get_content_changed(self, revision, path): + for stanza in basic_io_from_stream(self.automate.run('get_content_changed', [revision, path])): + yield stanza + + def get_revision(self, revision): + for stanza in basic_io_from_stream(self.automate.run('get_revision', [revision])): + yield stanza + + def get_manifest_of(self, revision): + for stanza in basic_io_from_stream(self.automate.run('get_manifest_of', [revision])): + yield stanza + + def get_file(self, fileid): + for stanza in self.automate.run('get_file', [fileid]): + yield stanza + + def certs(self, revision): + for stanza in basic_io_from_stream(self.automate.run('certs', [revision])): + yield stanza + + def diff(self, revision_from, revision_to, files=[]): + args = ['-r', revision_from, '-r', revision_to] + files + for line in self.standalone.run('diff', args): + yield line + + diff --git a/contrib/mtn2git/mtn/utility.py b/contrib/mtn2git/mtn/utility.py new file mode 100644 index 0000000000..c7345c5d1e --- /dev/null +++ b/contrib/mtn2git/mtn/utility.py @@ -0,0 +1,100 @@ + +import popen2 +import select +import fcntl +import os + +def set_nonblocking(fd): + fl = fcntl.fcntl(fd, fcntl.F_GETFL) + fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NDELAY) + +def run_command(command, timeout=None, to_child=None): + "returns a tuple of (was_timeout, exit_code, data_read)" + p = popen2.Popen3(command, capturestderr=True) + set_nonblocking(p.fromchild) + set_nonblocking(p.childerr) + fromchild_read = "" + childerr_read = "" + was_timeout = False + if to_child != None: + p.tochild.write(to_child) + p.tochild.close() + while 1: + ro, rw, re = select.select([p.fromchild], [], [p.childerr], timeout) + if not ro and not rw and not re: + was_timeout = True + break + if p.fromchild in ro: + recv = p.fromchild.read() + if recv == "": break + fromchild_read += recv + if p.childerr in re: + recv = p.childerr.read() + if recv == "": break + childerr_read += recv + if not was_timeout: + # check for any data we might have missed (due to a premature break) + # (if there isn't anything we just get a IOError, which we don't mind + try: fromchild_read += p.fromchild.read() + except IOError: pass + try: childerr_read += p.childerr.read() + except IOError: pass + p.fromchild.close() + # if there wasn't a timeout, the program should have exited; in which case we should wait() for it + # otherwise, it might be hung, so the parent should wait for it. + # (wrap in a try: except: just in case some other thread happens to wait() and grab ours; god wrapping + # python around UNIX is horrible sometimes) + exitcode = None + try: + if not was_timeout: exitcode = p.wait() >> 8 + except: pass + return { 'run_command' : command, + 'timeout' : was_timeout, + 'exitcode' : exitcode, + 'fromchild' : fromchild_read, + 'childerr' : childerr_read } + +def iter_command(command, timeout=None): + p = popen2.Popen3(command, capturestderr=True) + set_nonblocking(p.fromchild) + set_nonblocking(p.childerr) + fromchild_read = "" + childerr_read = "" + was_timeout = False + while 1: + ro, rw, re = select.select([p.fromchild], [], [p.childerr], timeout) + if not ro and not rw and not re: + was_timeout = True + break + if p.fromchild in ro: + recv = p.fromchild.read() + if recv == "": break + fromchild_read += recv + while 1: + nl = fromchild_read.find('\n') + if nl == -1: break + yield fromchild_read[:nl] + fromchild_read = fromchild_read[nl+1:] + if p.childerr in re: + recv = p.childerr.read() + if recv == "": break + childerr_read += recv + if not was_timeout: + # check for any data we might have missed (due to a premature break) + # (if there isn't anything we just get a IOError, which we don't mind + try: fromchild_read += p.fromchild.read() + except IOError: pass + try: childerr_read += p.childerr.read() + except IOError: pass + p.fromchild.close() + p.tochild.close() + # yield anything left over + to_yield = fromchild_read.split('\n') + while len(to_yield): yield to_yield.pop() + # call wait() + try: + if not was_timeout: p.wait() + except: pass + if len(childerr_read): raise Exception("data on stderr (command is %s)" % command, childerr_read) + if was_timeout: raise Exception("command timeout") + diff --git a/contrib/mtn2git/mtn2git.py b/contrib/mtn2git/mtn2git.py new file mode 100755 index 0000000000..cda4f39fca --- /dev/null +++ b/contrib/mtn2git/mtn2git.py @@ -0,0 +1,610 @@ +#!/usr/bin/env python + +""" + Copyright (C) 2006, 2007 Holger Hans Peter Freyther + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +""" + +#### +# TODO: +# -tag handling +# -work with n-merges +# +# DISCUSSION: +# -For some reason the get_revision information might be inaccurate +# and I should consider just comparing the manifests. +# I would use the manifests of the parents and consider all files deleted +# and then remove every dir/file that is inside the new manifest from this +# list. +# Benefits: +# - 1:1 match of the manifest regardles of get_revision information +# - Renaming is handled by git anyway +# Downsides: +# - The size of the import will grow. +# + +import mtn +import os +import sys +import datetime +import email.Utils + +import status + + +def get_mark(revision): + """ + Get a mark for a specific revision. If the revision is known the former + mark will be returned. Otherwise a new mark will be allocated and stored + to the mark file. + """ + if revision in status.marks: + return status.marks[revision] + status.last_mark += 1 + status.marks[revision] = status.last_mark + print >> status.mark_file, "%d: %s" % (status.last_mark, revision) + return status.last_mark + +def has_mark(revision): + return revision in status.marks + + +def mark_empty_revision(revision, parent): + """Git does not like empty merges, just skip the revision""" + # TODO, FIXME, XXX, We might want to add a reset cmd here + print >> sys.stderr, "Found an empty revision, skipping '%s'" % revision + parent_mark = status.marks[parent] + status.marks[revision] = parent_mark + + # There is another mtn revision that is using this mark! + if not parent_mark in status.same_revisions: + status.same_revisions[parent_mark] = [] + status.same_revisions[parent_mark].append(revision) + + +def get_branch_name(revision): + """ + TODO for unnamed branches (e.g. as we lack the certs) we might want to follow + the parents until we end up at a item with a branch name and then use the last + item without a name... + """ + if "branch" in revision: + branch = revision["branch"] + else: + #branch = "initial-%s" % revision["revision"] + branch = "mtn-unnamed-branch" + return branch + +def reset_git(ops, revision): + """ + Find the name of the branch of this revision + """ + branch = get_branch_name(revision) + + cmd = [] + cmd += ["reset refs/heads/%s" % branch] + cmd += ["from :%s" % get_mark(revision["revision"])] + cmd += [""] + print "\n".join(cmd) + +def filter_renamed(manifest, renamed): + """ + If we base from a revision that has already done + the move, git-fast-import will complain that the file + has been already moved + """ + if len(renamed) == 0: + return renamed + + for line in manifest: + if line[0] == "file": + renamed = filter(lambda (to,from_,manifest): to != line[1], renamed) + + return renamed + +def get_git_date(revision): + """ + Convert the "date" cert of monotone to a time understandable by git. No timezone + conversions are done. + """ + dt = datetime.datetime.strptime(revision["date"], "%Y-%m-%dT%H:%M:%S").strftime("%a, %d %b %Y %H:%M:%S +0000") + return dt + +def recursively_delete(ops, manifest, revision, dir_name, to_delete): + """ + Recursively delete all files that dir_name inside the name + """ + for line in manifest: + if line[0] == "dir" or line[0] == "file": + if line[1].startswith(dir_name): + print >> sys.stderr, "Deleting '%s'" % line[1] + to_delete.add((line[1], revision)) + elif line[0] in ["format_version"]: + assert(line[1] == "1") + else: + print >> sys.stderr, line[0] + assert(False) + + return to_delete + +def recursively_rename(ops, manifest, revision, old_name, new_name, to_add_dirs, to_add_files, to_remove_items, files_deleted, files_sticky): + """ + mtn has a rename command and can rename entrie directories. For git we will have to do the recursive renaming + ourselves. Basicly we will get all files and replace old_name with new_name but only: + + If the file of the old_manifest is not in our to be deleted list + """ + old_dir = old_name + "/" + for line in manifest: + if line[1].startswith(old_dir) or line[1] == old_name: + already_handled = False + for (deleted,_) in files_deleted: + if line[1] == deleted: + already_handled = True + break + + # Don't rename files that should be in the same directory + if line[1] in files_sticky: + already_handled = True + + if already_handled: + pass + elif line[0] == "file": + print >> sys.stderr, "Will add '%s' old: '%s' new: '%s' => result: '%s'" % (line[1], old_name, new_name, line[1].replace(old_name, new_name, 1)) + to_add_files.add((line[1].replace(old_name, new_name, 1), None, revision)) + elif line[0] == "dir": + to_add_dirs.add((line[1].replace(old_name, new_name, 1), revision)) + elif line[0] in ["format_version"]: + assert(line[1] == "1") + else: + print >> sys.stderr, line[0] + assert(False) + + return (to_add_files, to_add_dirs) + +# +# We need to recursively rename the directories. Now the difficult part is to undo certain operations. +# +# e.g we rename the whole dir and then rename a file back. We could revive a directory that was marked +# for deletion. +# +# rename "weird/two/three" +# to "unweird/four" +# +# rename "weird/two/three/four" +# to "weird/two/three" +# +# Here we would schedule weird/two/three for deletion but then revive it again. So three does not +# get copied to unweird/four/three +# """ +def recursively_rename_directories(ops, manifests, rename_commands, files_deleted, files_moved_sticky): + to_add_directories = set() + to_add_files = set() + to_remove_items = set() + + for (old_name, new_name, old_revision) in rename_commands: + # Check if we have the above case and rename a more specific directory + # and then we will alter the result... + inner_rename = False + for (other_old_name, other_new_name, other_rev) in rename_commands: + if old_name.startswith(other_old_name + "/") and other_old_name != old_name: + inner_rename = True + print >> sys.stderr, "Inner rename detected", old_name, other_old_name + # Fixup the renaming + def rename(filename, filerev, rev, would_be_new_name): + if filename.startswith(would_be_new_name + "/"): + return filename.replace(would_be_new_name, new_name, 1), filerev, rev + return filename, filerev, rev + + would_be_new_name = other_new_name + "/" + old_name[len(other_old_name)+1:] + to_remove_items = set(filter(lambda (item,_): item != new_name, to_remove_items)) + to_add_directories = set(filter(lambda (item,_): item != would_be_new_name, to_add_directories)) + to_add_directories.add((new_name, old_revision)) + to_add_files = set(map(lambda (fn, fr, r): rename(fn, fr, r, would_be_new_name), to_add_files)) + + if not inner_rename: + to_remove_items.add((old_name, old_revision)) + recursively_delete(ops, manifests[old_revision], old_revision, old_name + "/", to_remove_items) + recursively_rename(ops, manifests[old_revision], old_revision, old_name, new_name, to_add_directories, to_add_files, to_remove_items, files_deleted, files_moved_sticky) + + return (to_add_directories, to_add_files, to_remove_items) + + +def build_tree(manifest): + dirs = {} + files = {} + for line in manifest: + if line[0] == "file": + files[line[1]] = (line[3],line[4:]) + elif line[0] == "dir": + dirs[line[1]] = 1 + elif line[0] != "format_version": + print >> sys.stderr, line[0] + assert(False) + return (dirs,files) + +def compare_with_manifest(all_added, all_modified, all_deleted, new_manifest, old_manifests): + """ + Sanity check that the difference between the old and the new manifest is the one + we have in all_added, all_modified, all_deleted + """ + old_trees = {} + really_added = {} + really_modified = {} + really_removed = {} + + current_dirs, current_files = build_tree(new_manifest) + + for parent in old_manifests.keys(): + old_trees[parent] = build_tree(old_manifests[parent]) + + print >> sys.stderr, len(old_manifests) + +def fast_import(ops, revision): + """Import a revision into git using git-fast-import. + + First convert the revision to something git-fast-import + can understand + """ + assert("revision" in revision) + assert("author" in revision) + assert("committer" in revision) + assert("parent" in revision) + + + branch = get_branch_name(revision) + + # Okay: We sometimes have merged where the old manifest is the new one + # I have no idea how this can happen but there are at least two examples in the + # net.venge.monotone history. + # The problem ist git-fast-import will not let us create the same manifest again. + # So if we are in a merge, propagation and the old manifest is the new one we will + # do a git-reset. + # Examples in the mtn history: 6dc36d2cba722f500c06f33e225367461059d90e, dc661f0c25ee96a5a5cf5b5b60deafdf8ccaf286 + # and 7b8331681bf77cd8329662dbffed0311765e7547, 13b1a1e617a362c5735002937fead98d788737f7 + # aa05aa9171bac92766b769bbb703287f53e08693 is a merge of the same manifest... + # so we will just go with one of the two revisions.. + # We will have the same manifest if we propagate something from one branch to another. git does + # not have a special revision showing that copy but will only change the head. + # We will do the same and reset the branch to this revision. + for parent in revision["parent"]: + manifest_version = parse_revision(ops, parent)["manifest"] + if manifest_version == revision["manifest"]: + mark_empty_revision(revision["revision"], parent) + reset_git(ops, revision) + return + + # Use the manifest to find dirs and files + manifest = [line for line in ops.get_manifest_of(revision["revision"])] + manifests = {} + dirs = {} + for parent in revision["parent"]: + manifests[parent] = [line for line in ops.get_manifest_of(parent)] + for line in manifests[parent]: + if line[0] == "dir": + if not parent in dirs: + dirs[parent] = {} + dirs[parent][line[1]] = 1 + + # We can not just change the mode of a file but we need to modifiy the whole file. We + # will simply add it to the modified list and ask to retrieve the status from the manifest + for (file, attribute, value, rev) in revision["set_attributes"]: + if attribute == "mtn:execute": + revision["modified"].append((file, None, rev)) + for (file, attribute, rev) in revision["clear_attributes"]: + if attribute == "mtn:execute": + revision["modified"].append((file, None, rev)) + + + + cmd = [] + cmd += ["commit refs/heads/%s" % branch] + cmd += ["mark :%s" % get_mark(revision["revision"])] + cmd += ["author <%s> %s" % (revision["author"], get_git_date(revision))] + cmd += ["committer <%s> %s" % (revision["committer"], get_git_date(revision))] + cmd += ["data %d" % len(revision["changelog"])] + cmd += ["%s" % revision["changelog"]] + + # Emulation for renaming. We will split them into two lists + file_renamed_del = set() + file_renamed_new = set() + file_moved_sticky = set() + + if len(revision["parent"]) != 0: + cmd += ["from :%s" % get_mark(revision["parent"][0])] + renamed = revision["renamed"] + + to_rename_directories = [] + for (new_name, old_name, old_revision) in renamed: + # 24cba5923360fef7c5cc81d51000e30b90355eb9 is a rev where src == dest but the + # directory got renamed, so this means this file got added to the new directory + # TODO, XXX, FIXME check if this can be done for directories as well + if new_name == old_name and not old_name in dirs[old_revision]: + print >> sys.stderr, "Bogus rename in %s (%s, %s)?" % (revision["revision"], new_name, old_name) + file_moved_sticky.add(old_name) + + # Check if the old_name was a directory in the old manifest + # If we rename a directory we will need to recursively remove and recursively + # add... + # Add the '/' otherwise we might rename the wrong directory which shares the + # same prefix. + # fca159c5c00ae4158c289f5aabce995378d4e41b is quite funny. It renames a directory + # and then renames another directory within the renamed one and in the worse case + # we will revive a deleted directory, file... + elif old_name in dirs[old_revision]: + print >> sys.stderr, "Detected directory rename '%s' => '%s'" % (old_name, new_name) + assert(old_revision in manifests) + to_rename_directories.append((old_name, new_name, old_revision)) + else: + print >> sys.stderr, "Renaming %s => %s" % (old_name, new_name) + file_renamed_new.add((new_name, None, revision["revision"])) + file_renamed_del.add((old_name, old_revision)) + + # The first parent is our from. + for parent in revision["parent"][1:]: + cmd += ["merge :%s" % get_mark(parent)] + + # Do the renaming now + (renamed_dirs, renamed_new, renamed_old) = recursively_rename_directories(ops, manifests, to_rename_directories, file_renamed_del.union(set(revision["removed"])), file_moved_sticky) + + # Sanity check, don't remove anything we modify + all_added = set(revision["added_dirs"]).union(renamed_dirs) + all_modifications = set(revision["modified"]).union(set(revision["added_files"])).union(renamed_new).union(file_renamed_new) + all_deleted = set(revision["removed"]).union(renamed_old).union(file_renamed_del) + all_deleted_new = all_deleted + + # Check if we delete and add at the same time + for (deleted,rev) in all_deleted: + for (added,_) in all_added: + if added == deleted: + print >> sys.stderr, "Added and Deleted", added, deleted + all_deleted_new = set(filter(lambda (dele,_): dele != added, all_deleted_new)) + assert((added,rev) not in all_deleted_new) + + for (modified,_,_) in all_modifications: + if modified == deleted: + print >> sys.stderr, "Modified and Deleted", modified, deleted + all_deleted_new = set(filter(lambda (dele,_): dele != modified, all_deleted_new)) + assert((modified,rev) not in all_deleted_new) + + # Filtered list of to be deleted items + all_deleted = all_deleted_new + + # Check if we delete but the manifest has a file like this + for line in manifest: + if line[0] == "dir" or line[0] == "file": + for (deleted,rev) in all_deleted: + if line[1] == deleted: + # 91da98265a39c93946e00adf5d7bf92b341de847 of mtn has a delete + rename + print >> sys.stderr, "Trying to delete a file which is in the new manifest", line[1], deleted + assert(False) + + compare_with_manifest(all_added, all_modifications, all_deleted, manifest, manifests) + + for (dir_name, rev) in all_added: + cmd += ["M 644 inline %s" % os.path.join(dir_name, ".mtn2git_empty")] + cmd += ["data <> sys.stderr, "Cert untrusted?, this must be bad", cert + return False + return True + +def get_file_and_mode(operations, manifest, file_name, _file_revision, rev = None): + mode = 644 + + file_revision = None + for line in manifest: + if line[0] == "file" and line[1] == file_name: + assert(line[1] == file_name) + assert(line[2] == "content") + + if _file_revision: + assert(line[3] == _file_revision) + file_revision = line[3] + + attributes = line[4:] + assert(len(attributes) % 3 == 0) + if len(attributes) >= 3: + for i in range(0, len(attributes)%3+1): + if attributes[i] == "attr" and attributes[i+1] == "mtn:execute" and attributes[i+2] == "true": + mode = 755 + break + + assert(file_revision) + file = "".join([file for file in operations.get_file(file_revision)]) + return (mode, file) + + print >> sys.stderr, file_name, rev + assert(False) + + +def parse_revision(operations, revision): + """ + Parse a revision as of mtn automate get_revision + + Return a tuple with the current version, a list of parents, + a list of operations and their revision + """ + if not is_trusted(operations, revision): + raise Exception("Revision %s is not trusted!" % revision) + + # The order of certain operations, e.g rename matter so don't use a set + revision_description = {} + revision_description["revision"] = revision + revision_description["added_dirs"] = [] + revision_description["added_files"] = [] + revision_description["removed"] = [] + revision_description["modified"] = [] + revision_description["renamed"] = [] + revision_description["set_attributes"] = [] + revision_description["clear_attributes"] = [] + + old_rev = None + + for line in operations.get_revision(revision): + if line[0] == "format_version": + assert(line[1] == "1") + elif line[0] == "old_revision": + if not "parent" in revision_description: + revision_description["parent"] = [] + if len(line[1]) != 0: + revision_description["parent"].append(line[1]) + old_rev = line[1] + elif line[0] == "new_manifest": + revision_description["manifest"] = line[1] + elif line[0] == "rename": + revision_description["renamed"].append((line[3], line[1], old_rev)) + elif line[0] == "patch": + revision_description["modified"].append((line[1], line[5], old_rev)) + elif line[0] == "delete": + revision_description["removed"].append((line[1], old_rev)) + elif line[0] == "add_dir": + revision_description["added_dirs"].append((line[1], old_rev)) + elif line[0] == "add_file": + revision_description["added_files"].append((line[1], line[3], old_rev)) + elif line[0] == "clear": + revision_description["clear_attributes"].append((line[1], line[3], old_rev)) + elif line[0] == "set": + revision_description["set_attributes"].append((line[1], line[3], line[5], old_rev)) + else: + print >> sys.stderr, line + assert(False) + + for cert in operations.certs(revision): + # Known cert names used by mtn, we can ignore them as they can't be converted to git + if cert[5] in ["suspend", "testresult", "file-comment", "comment", "release-candidate"]: + pass + elif cert[5] in ["author", "changelog", "date", "branch", "tag"]: + revision_description[cert[5]] = cert[7] + if cert[5] == "author": + revision_description["committer"] = cert[1] + else: + print >> sys.stderr, "Unknown Cert: Ignoring", cert[5], cert[7] + assert(False) + + return revision_description + + +def tests(ops, revs): + """Load a bunch of revisions and exit""" + for rev in revs: + print >> sys.stderr, rev + fast_import(ops, parse_revision(ops, rev)) + + sys.exit() + +def main(mtn_cli, db, rev): + if not db: + print >> sys.stderr, "You need to specifiy a monotone db" + sys.exit() + + ops = mtn.Operations([mtn_cli, db]) + + # Double rename in mtn + #tests(ops, ["fca159c5c00ae4158c289f5aabce995378d4e41b"]) + + # Rename and remove in OE + #tests(ops, ["74db43a4ad2bccd5f2fd59339e4ece0092f8dcb0"]) + + # Rename + Dele + #tests(ops, ["91da98265a39c93946e00adf5d7bf92b341de847"]) + + # Issue with renaming in OE + #tests(ops, ["c81294b86c62ee21791776732f72f4646f402445"]) + + # Unterminated inner renames + #tests(ops, ["d813a779ef7157f88dade0b8ccef32f28ff34a6e", "4d027b6bcd69e7eb5b64b2e720c9953d5378d845", "af5ffd789f2852e635aa4af88b56a893b7a83a79"]) + + # Broken rename in OE. double replacing of the directory command + #tests(ops, ["11f85aab185581dcbff7dce29e44f7c1f0572a27"]) + + if rev: + tests(ops, [rev]) + sys.exit() + + branches = [branch.name for branch in ops.branches()] + ops.automate.stop() + + all_revs = [] + for branch in branches: + heads = [head for head in ops.heads(branch)] + if branch in status.former_heads: + old_heads = status.former_heads[branch] + else: + old_heads = [] + + for head in heads: + all_revs += ops.ancestry_difference(head, old_heads) + status.former_heads[branch] = heads + + sorted_revs = [rev for rev in ops.toposort(all_revs)] + for rev in sorted_revs: + if has_mark(rev): + print >> sys.stderr, "Already having commit '%s'" % rev + else: + print >> sys.stderr, "Going to import revision ", rev + fast_import(ops, parse_revision(ops, rev)) + + +if __name__ == "__main__": + import optparse + parser = optparse.OptionParser() + parser.add_option("-d", "--db", dest="database", + help="The monotone database to use") + parser.add_option("-m", "--marks", dest="marks", default="mtn2git-marks", + help="The marks allocated by the mtn2git command") + parser.add_option("-t", "--mtn", dest="mtn", default="mtn", + help="The name of the mtn command to use") + parser.add_option("-s", "--status", dest="status", default="mtn2git.status.v2", + help="The status file as used by %prog") + parser.add_option("-r", "--revision", dest="rev", default=None, + help="Import a single revision to help debugging.") + + (options,_) = parser.parse_args(sys.argv) + status.mark_file = file(options.marks, "a") + + try: + status.load(options.status) + except IOError: + print >> sys.stderr, "Failed to open the status file" + main(options.mtn, options.database, options.rev) + status.store(options.status) diff --git a/contrib/mtn2git/status.py b/contrib/mtn2git/status.py new file mode 100644 index 0000000000..47d06b51fd --- /dev/null +++ b/contrib/mtn2git/status.py @@ -0,0 +1,47 @@ +""" + Copyright (C) 2006, 2007 Holger Hans Peter Freyther + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +""" + +import pickle + +marks = {} +last_mark = 0 +mark_file = None +former_heads = {} +same_revisions = {} + +def load(status_name): + global marks, last_mark, mark_file, former_heads, same_revisions + file = open(status_name, "rb") + marks = pickle.load(file) + last_mark = pickle.load(file) + former_heads = pickle.load(file) + same_revisions = pickle.load(file) + file.close() + +def store(status_name): + global marks, last_mark, mark_file, former_heads, same_revisions + file = open(status_name, "wb") + pickle.dump(marks, file) + pickle.dump(last_mark, file) + pickle.dump(former_heads, file) + pickle.dump(same_revisions, file) + file.close() -- cgit v1.2.3