contrib/mtn2git: mtn add is not recursive by default, actually add files ;)

author: Holger Freyther <zecke@selfish.org> 2007-10-09 15:01:33 +0000
committer: Holger Freyther <zecke@selfish.org> 2007-10-09 15:01:33 +0000
commit: 109e6d019c1880eda9e0eb11c59ae926c4188faa (patch)
tree: d315697759c832780ba81ebc3e830841926e4c8d /contrib/mtn2git
parent: dc11f7ab4057b0c334dac98773efa66e090cc6f6 (diff)
10 files changed, 1317 insertions, 0 deletions
diff --git a/contrib/mtn2git/git2mtn.py b/contrib/mtn2git/git2mtn.py
new file mode 100755
index 0000000000..99681a9ca2
--- /dev/null
+++ b/contrib/mtn2git/git2mtn.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+
+"""
+  Copyright (C) 2006, 2007 Holger Hans Peter Freyther
+
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+  THE SOFTWARE.
+"""
+
+#############
+#
+# Use: This tool can merge one git-branch back to one branch in monotone
+#
+# Discussion:
+#   Merging from git to a monotone branch. Currently I see two modes which
+#   should be supported.
+#
+#    a) linear development. Only a couple of changes are done on top of the
+#       branch and nothing get merged. In this case we can merge everything
+#       back and each rev gets a cert with the branch.
+#       This should be possible using programs like git-rebase.
+#    b) we have merges inside our git-rev-list history. This means we need to
+#       merge every revision and can't attach any branch certs to the revision.
+#       And once we are done with this we will create a propagate like commit
+#       entry and we can give that new revision a cert with the branch name.
+#
+#       This means working in git is treated like a branch!
+#
+#       One difficulty is with git. This propagate like commit will create a new revision
+#       in monotone but none in git as both trees/manifests are the same. So what we have
+#       to make sure is to use the latest mtn revision for a given mark/git revision. This
+#       is where mtn2git.py needs to help. We will save a list of mtn revisions that have the
+#       same git version and then will read every of them and check the branch certs and will
+#       use the one matching our target branch!
+#############
diff --git a/contrib/mtn2git/mtn/.mtn2git_empty b/contrib/mtn2git/mtn/.mtn2git_empty
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/contrib/mtn2git/mtn/.mtn2git_empty
diff --git a/contrib/mtn2git/mtn/__init__.py b/contrib/mtn2git/mtn/__init__.py
new file mode 100644
index 0000000000..e7424ccd3d
--- /dev/null
+++ b/contrib/mtn2git/mtn/__init__.py
@@ -0,0 +1,6 @@
+from mtn import Automate, Operations
+
+__all__ = [
+    "Automate",
+    "Operations"
+    ]
diff --git a/contrib/mtn2git/mtn/authors.py b/contrib/mtn2git/mtn/authors.py
new file mode 100644
index 0000000000..90542a02da
--- /dev/null
+++ b/contrib/mtn2git/mtn/authors.py
@@ -0,0 +1,11 @@
+authors='''Authors:
+Grahame Bowland <grahame@angrygoats.net>
+
+Contributors:
+Matt Johnston <matt@ucc.asn.au>
+Nathaniel Smith <njs@pobox.com>
+Bruce Stephens <monotone@cenderis.demon.co.uk>
+Lapo Luchini <lapo@lapo.it>
+David Reiss <davidn@gmail.com>
+
+'''
diff --git a/contrib/mtn2git/mtn/common.py b/contrib/mtn2git/mtn/common.py
new file mode 100644
index 0000000000..1bbf6031c9
--- /dev/null
+++ b/contrib/mtn2git/mtn/common.py
@@ -0,0 +1,49 @@
+
+import datetime
+import time
+import fcntl
+import os
+import signal
+import traceback
+import sys
+
+def parse_timecert(value):
+    return apply(datetime.datetime, time.strptime(value, "%Y-%m-%dT%H:%M:%S")[:6])
+
+def set_nonblocking(fd):
+	fl = fcntl.fcntl(fd, fcntl.F_GETFL)
+	fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NDELAY)
+
+def terminate_popen3(process):
+        print >> sys.stderr, ("[%s] stopping process: %s" % (os.getpid(), process.pid))
+        try:
+            process.tochild.close()
+            process.fromchild.close()
+            process.childerr.close()
+            if process.poll() == -1:
+                # the process is still running, so kill it.
+                os.kill(process.pid, signal.SIGKILL)
+            process.wait()
+        except:
+            print >> sys.stderr, ("%s failed_to_stop %s (%s)" % (os.getpid(), process.pid, traceback.format_exc()))
+
+def ago(event):
+    def plural(v, singular, plural):
+        if v == 1:
+            return "%d %s" % (v, singular)
+        else:
+            return "%d %s" % (v, plural)
+    now = datetime.datetime.utcnow()
+    ago = now - event
+    if ago.days > 0:
+        rv = "%s" % (plural(ago.days, "day", "days"))
+    elif ago.seconds > 3600:
+        hours = ago.seconds / 3600
+        minutes = (ago.seconds - (hours * 3600)) / 60
+        rv = "%s" % (plural(hours, "hour", "hours"))
+    else:
+        minutes = ago.seconds / 60
+        seconds = (ago.seconds - (minutes * 60))
+        rv = "%s" % (plural(minutes, "minute", "minutes"))
+    return rv
+
diff --git a/contrib/mtn2git/mtn/genproxy.py b/contrib/mtn2git/mtn/genproxy.py
new file mode 100755
index 0000000000..5ba67eefd6
--- /dev/null
+++ b/contrib/mtn2git/mtn/genproxy.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python
+
+class GeneratorProxy(object):
+    def __init__(self, generator):
+        self.generator = generator
+    def __iter__(self):
+        return self
+    def next(self):
+        return self.generator.next()
+
+class Seedy(GeneratorProxy):
+    def __del__(self):
+        print "testing"
+        
+def test():
+    yield 2
+    yield 3
+    yield 4
+
+if __name__ == '__main__':
+    a = test()
+    b = Seedy(test())
+    for i in b:
+        print i
+    
+\ No newline at end of file
diff --git a/contrib/mtn2git/mtn/mtn.py b/contrib/mtn2git/mtn/mtn.py
new file mode 100644
index 0000000000..aa086d78d4
--- /dev/null
+++ b/contrib/mtn2git/mtn/mtn.py
@@ -0,0 +1,419 @@
+
+import os
+import re
+import fcntl
+import pipes
+import select
+import threading
+import popen2
+from common import set_nonblocking, terminate_popen3
+from traceback import format_exc
+import genproxy
+import sys
+
+# regular expressions that are of general use when 
+# validating monotone output
+def group_compile(r):
+    return re.compile('('+r+')')
+
+hex_re = r'[A-Fa-f0-9]*'
+hex_re_c = group_compile(hex_re)
+revision_re = r'[A-Fa-f0-9]{40}'
+revision_re_c = group_compile(revision_re)
+name_re = r'^[\S]+'
+name_re_c = group_compile(name_re)
+
+class MonotoneException(Exception):
+    pass
+
+class Revision(str):
+    def __init__(self, v):
+        # special case that must be handled: empty (initial) revision ID ''
+        str.__init__(v)
+        self.obj_type = "revision"
+        if v != '' and not revision_re_c.match(self):
+            raise MonotoneException("Not a valid revision ID: %s" % (v))
+    def abbrev(self):
+        return '[' + self[:8] + '..]'
+
+class Author(str):
+    def __init__(self, v):
+        str.__init__(v)
+        self.obj_type = "author"
+        
+class Runner:
+    def __init__(self, monotone, database):
+        self.base_command = [monotone, "--db=%s" % pipes.quote(database)]
+
+packet_header_re = re.compile(r'^(\d+):(\d+):([lm]):(\d+):')
+
+class Automate(Runner):
+    """Runs commands via a particular monotone process. This 
+       process is started the first time run() is called, and 
+       stopped when this class instance is deleted or the stop()
+       method is called.
+       
+       If an error occurs, the monotone process may need to be 
+       stopped and a new one created.
+       """
+    def __init__(self, *args, **kwargs):
+        Runner.__init__(*[self] + list(args), **kwargs)
+        self.lock = threading.Lock()
+        self.process = None
+
+    def stop(self):
+        if not self.process:
+            return
+        terminate_popen3(self.process)
+        self.process = None
+ 
+    def __process_required(self):
+        if self.process != None:
+            return
+        to_run = self.base_command + ['automate', 'stdio']
+        self.process = popen2.Popen3(to_run, capturestderr=True)
+        # breaks down with toposort and a lot of input
+        #map (set_nonblocking, [ self.process.fromchild,
+        #                        self.process.tochild,
+        #                        self.process.childerr ])
+        map (set_nonblocking, [ self.process.fromchild,
+                                self.process.childerr ])
+
+    def run(self, *args, **kwargs):
+        print >> sys.stderr, (("automate is running:", args, kwargs))
+
+        lock = self.lock
+        stop = self.stop
+        class CleanRequest(genproxy.GeneratorProxy):
+            def __init__(self, *args, **kwargs):
+                genproxy.GeneratorProxy.__init__(self, *args, **kwargs)
+
+                # nb; this used to be False, but True seems to behave more sensibly.
+                # in particular, if someone holds down Refresh sometimes the code 
+                # gets here before __del__ is called on the previous iterator, 
+                # causing a pointless error to occur
+                if not lock.acquire(True):
+                    # I've checked; this exception does _not_ cause __del__ to run, so 
+                    # we don't accidentally unlock a lock below
+                    raise MonotoneException("Automate request cannot be called: it is already locked! This indicates a logic error in ViewMTN; please report.")
+
+            def __del__(self):
+                def read_any_unread_output():
+                    try:
+                        # this'll raise StopIteration if we're done
+                        self.next()
+                        # okay, we're not done..
+                        print >> sys.stderr, ("warning: Automate output not completely read; reading manually.")
+                        for stanza in self:
+                            pass
+                    except StopIteration:
+                        pass
+
+                try:
+                    read_any_unread_output()
+                    lock.release()
+                except:
+                    print >> sys.stderr, ("exception cleaning up after Automation; calling stop()!")
+                    stop()
+
+        return CleanRequest(self.__run(*args, **kwargs))
+
+    def __run(self, command, args):
+        enc = "l%d:%s" % (len(command), command)
+        enc += ''.join(["%d:%s" % (len(x), x) for x in args]) + 'e'
+
+        # number of tries to get a working mtn going..
+        for i in xrange(2):
+            self.__process_required()
+            try:
+                self.process.tochild.write(enc)
+                self.process.tochild.flush()
+                break
+            except:
+                # mtn has died underneath the automate; restart it
+                print >> sys.stderr, ("exception writing to child process; attempting restart: %s" % format_exc())
+                self.stop()
+
+        import sys
+        def read_result_packets():
+            buffer = ""
+            while True:
+                r_stdin, r_stdout, r_stderr = select.select([self.process.fromchild], [], [], None)
+                if not r_stdin and not r_stdout and not r_stderr:
+                    break
+
+                if self.process.fromchild in r_stdin:
+                    data = self.process.fromchild.read()
+                    if data == "":
+                        break
+                    buffer += data
+
+                # loop, trying to get complete packets out of our buffer
+                complete, in_packet = False, False
+                while not complete and buffer != '':
+                    if not in_packet:
+                        m = packet_header_re.match(buffer)
+                        if not m:
+                            break
+                        in_packet = True
+                        cmdnum, errnum, pstate, length = m.groups()
+                        errnum = int(errnum)
+                        length = int(length)
+                        header_length = m.end(m.lastindex) + 1 # the '1' is the colon
+
+                    if len(buffer) < length + header_length:
+                        # not enough data read from client yet; go round
+                        break
+                    else:
+                        result = buffer[header_length:header_length+length]
+                        buffer = buffer[header_length+length:]
+                        complete = pstate == 'l'
+                        in_packet = False
+                        yield errnum, complete, result
+
+                if complete:
+                    break
+                
+        # get our response, and yield() it back one line at a time
+        code_max = -1
+        data_buf = ''
+        for code, is_last, data in read_result_packets():
+            if code and code > code_max:
+                code_max = code
+            data_buf += data
+            while True:
+                nl_idx = data_buf.find('\n')
+                if nl_idx == -1:
+                    break
+                yield data_buf[:nl_idx+1]
+                data_buf = data_buf[nl_idx+1:]
+        # left over data?
+        if data_buf:
+            yield data_buf
+        if code_max > 0:
+            raise MonotoneException("error code %d in automate packet." % (code_max))
+
+class Standalone(Runner):
+    """Runs commands by running monotone. One monotone process 
+       per command"""
+
+    def run(self, command, args):
+        # as we pass popen3 as sequence, it executes monotone with these 
+        # arguments - and does not pass them through the shell according 
+        # to help(os.popen3)
+#       print(("standalone is running:", command, args))
+        to_run = self.base_command + [command] + args
+        process = popen2.Popen3(to_run, capturestderr=True)
+        for line in process.fromchild:
+            yield line
+        stderr_data = process.childerr.read()
+        if len(stderr_data) > 0:
+            raise MonotoneException("data on stderr for command '%s': %s" % (command, 
+                                                                             stderr_data))
+        terminate_popen3(process)
+
+class MtnObject:
+    def __init__(self, obj_type):
+        self.obj_type = obj_type
+
+class Tag(MtnObject):
+    def __init__(self, name, revision, author, branches):
+        MtnObject.__init__(self, "tag")
+        self.name, self.revision, self.author, self.branches = name, Revision(revision), author, branches
+
+class Branch(MtnObject):
+    def __init__(self, name):
+        MtnObject.__init__(self, "branch")
+        self.name = name
+
+class File(MtnObject):
+    def __init__(self, name, in_revision):
+        MtnObject.__init__(self, "file")
+        self.name = name
+        self.in_revision = in_revision
+
+class Dir(MtnObject):
+    def __init__(self, name, in_revision):
+        MtnObject.__init__(self, "dir")
+        self.name = name
+        self.in_revision = in_revision
+
+basic_io_name_tok = re.compile(r'^(\S+)')
+
+def basic_io_from_stream(gen):
+    # all of these x_consume functions return parsed string 
+    # token to add to stanza, name of next consume function to call
+    # new value of line (eg. with consumed tokens removed)
+
+    def hex_consume(line):
+        m = hex_re_c.match(line[1:])
+        if line[0] != '[' or not m:
+            raise MonotoneException("This is not a hex token: %s" % line)
+        end_of_match = m.end(m.lastindex)
+        if line[end_of_match+1] != ']':
+            raise MonotoneException("Hex token ends in character other than ']': %s" % line)
+        return Revision(m.groups()[0]), choose_consume, line[end_of_match+2:]
+
+    def name_consume(line):
+        m = name_re_c.match(line)
+        if not m:
+            raise MonotoneException("Not a name: %s" % line)
+        end_of_match = m.end(m.lastindex)
+        return m.groups()[0], choose_consume, line[end_of_match:]
+
+    def choose_consume(line):
+        line = line.lstrip()
+        if line == '':
+            consumer = choose_consume
+        elif line[0] == '[':
+            consumer = hex_consume
+        elif line[0] == '"':
+            consumer = string_consume
+        else:
+            consumer = name_consume
+        return None, consumer, line
+
+    class StringState:
+        def __init__(self):
+            self.in_escape = False
+            self.has_started = False
+            self.has_ended = False
+            self.value = ''
+
+    def string_consume(line, state=None):
+        if not state:
+            state = StringState()
+
+        if not state.has_started:
+            if line[0] != '"':
+                raise MonotoneException("Not a string: %s" % line)
+            line = line[1:]
+            state.has_started = True
+
+        idx = 0
+        for idx, c in enumerate(line):
+            if state.in_escape:
+                if c != '\\' and c != '"':
+                    raise MonotoneException("Invalid escape code: %s in %s\n" % (c, line))
+                state.value += c
+                state.in_escape = False
+            else:
+                if c == '\\':
+                    state.in_escape = True
+                elif c == '"':
+                    state.has_ended = True
+                    break
+                else:
+                    state.value += c
+
+        if state.has_ended:
+            return state.value, choose_consume, line[idx+1:]
+        else:
+            return (None, 
+                    lambda s: string_consume(s, state), 
+                    line[idx+1:])
+
+    consumer = choose_consume
+    current_stanza = []
+    for line in gen:
+        # if we're not in an actual consumer (which we shouldn't be, unless 
+        # we're parsing some sort of multi-line token) and we have a blank 
+        # line, it indicates the end of any current stanza
+        if (consumer == choose_consume) and (line == '' or line == '\n') and current_stanza:
+            yield current_stanza
+            current_stanza = []
+            continue
+
+        while line != '' and line != '\n':
+            new_token, consumer, line = consumer(line)
+            if new_token != None:
+                current_stanza.append(new_token)
+    if current_stanza:
+        yield current_stanza
+        
+class Operations:
+    def __init__(self, runner_args):
+        self.standalone = apply(Standalone, runner_args)
+        self.automate = apply(Automate, runner_args)
+
+    def tags(self):
+        for stanza in basic_io_from_stream(self.automate.run('tags', [])):
+            if stanza[0] == 'tag':
+                branches = []
+                for branch in stanza[7:]:
+                    branches.append(Branch(branch))
+                yield Tag(stanza[1], stanza[3], stanza[5], branches)
+
+    def branches(self):
+        for line in (t.strip() for t in self.automate.run('branches', [])):
+            if not line:
+                continue
+            yield apply(Branch, (line,))
+
+    def graph(self):
+        for line in self.automate.run('graph', []):
+            yield line
+
+    def parents(self, revision):
+        if revision != "":
+            for line in (t.strip() for t in self.automate.run('parents', [revision])):
+                if not line:
+                    continue
+                yield apply(Revision, (line,))
+
+    def ancestry_difference(self, new_rev, old_revs):
+	"""
+	new_rev a single new revision number
+	old_revs a list of revisions
+	"""
+	if new_rev != "":
+	    for line in (t.strip() for t in self.automate.run('ancestry_difference', [new_rev]+old_revs)):
+		if not line:
+		    continue
+		yield apply(Revision, (line,))
+
+    def children(self, revision):
+        if revision != "":
+            for line in (t.strip() for t in self.automate.run('children', [revision])):
+                if not line:
+                    continue
+                yield apply(Revision, (line,))
+
+    def toposort(self, revisions):
+        for line in (t.strip() for t in self.automate.run('toposort', revisions)):
+            if not line:
+                continue
+            yield apply(Revision, (line,))
+
+    def heads(self, branch):
+        for line in (t.strip() for t in self.automate.run('heads', [branch])):
+            if not line:
+                continue
+            yield apply(Revision, (line,))
+
+    def get_content_changed(self, revision, path):
+        for stanza in basic_io_from_stream(self.automate.run('get_content_changed', [revision, path])):
+            yield stanza
+
+    def get_revision(self, revision):
+        for stanza in basic_io_from_stream(self.automate.run('get_revision', [revision])):
+            yield stanza
+
+    def get_manifest_of(self, revision):
+        for stanza in basic_io_from_stream(self.automate.run('get_manifest_of', [revision])):
+            yield stanza
+
+    def get_file(self, fileid):
+        for stanza in self.automate.run('get_file', [fileid]):
+            yield stanza
+
+    def certs(self, revision):
+        for stanza in basic_io_from_stream(self.automate.run('certs', [revision])):
+            yield stanza
+
+    def diff(self, revision_from, revision_to, files=[]):
+        args = ['-r', revision_from, '-r', revision_to] + files
+        for line in self.standalone.run('diff', args):
+            yield line
+
+
diff --git a/contrib/mtn2git/mtn/utility.py b/contrib/mtn2git/mtn/utility.py
new file mode 100644
index 0000000000..c7345c5d1e
--- /dev/null
+++ b/contrib/mtn2git/mtn/utility.py
@@ -0,0 +1,100 @@
+
+import popen2
+import select
+import fcntl
+import os
+
+def set_nonblocking(fd):
+    fl = fcntl.fcntl(fd, fcntl.F_GETFL)
+    fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NDELAY)
+
+def run_command(command, timeout=None, to_child=None):
+    "returns a tuple of (was_timeout, exit_code, data_read)"
+    p = popen2.Popen3(command, capturestderr=True)
+    set_nonblocking(p.fromchild)
+    set_nonblocking(p.childerr)
+    fromchild_read = ""
+    childerr_read = ""
+    was_timeout = False
+    if to_child != None:
+        p.tochild.write(to_child)
+    p.tochild.close()
+    while 1:
+        ro, rw, re = select.select([p.fromchild], [], [p.childerr], timeout)
+        if not ro and not rw and not re:
+            was_timeout = True
+            break
+        if p.fromchild in ro:
+            recv = p.fromchild.read()
+            if recv == "": break
+            fromchild_read += recv
+        if p.childerr in re:
+            recv = p.childerr.read()
+            if recv == "": break
+            childerr_read += recv
+    if not was_timeout:
+        # check for any data we might have missed (due to a premature break)
+        # (if there isn't anything we just get a IOError, which we don't mind
+        try: fromchild_read += p.fromchild.read()
+        except IOError: pass
+        try: childerr_read += p.childerr.read()
+        except IOError: pass
+    p.fromchild.close()
+    # if there wasn't a timeout, the program should have exited; in which case we should wait() for it
+    # otherwise, it might be hung, so the parent should wait for it.
+    # (wrap in a try: except: just in case some other thread happens to wait() and grab ours; god wrapping 
+    # python around UNIX is horrible sometimes)
+    exitcode = None
+    try: 
+        if not was_timeout: exitcode = p.wait() >> 8
+    except: pass
+    return { 'run_command' : command,
+         'timeout' : was_timeout, 
+         'exitcode' : exitcode, 
+         'fromchild' : fromchild_read, 
+         'childerr' : childerr_read }
+
+def iter_command(command, timeout=None):
+    p = popen2.Popen3(command, capturestderr=True)
+    set_nonblocking(p.fromchild)
+    set_nonblocking(p.childerr)
+    fromchild_read = ""
+    childerr_read = ""
+    was_timeout = False
+    while 1:
+        ro, rw, re = select.select([p.fromchild], [], [p.childerr], timeout)
+        if not ro and not rw and not re:
+            was_timeout = True
+            break
+        if p.fromchild in ro:
+            recv = p.fromchild.read()
+            if recv == "": break
+            fromchild_read += recv
+            while 1:
+                nl = fromchild_read.find('\n')
+                if nl == -1: break
+                yield fromchild_read[:nl]
+                fromchild_read = fromchild_read[nl+1:]
+        if p.childerr in re:
+            recv = p.childerr.read()
+            if recv == "": break
+            childerr_read += recv
+    if not was_timeout:
+        # check for any data we might have missed (due to a premature break)
+        # (if there isn't anything we just get a IOError, which we don't mind
+        try: fromchild_read += p.fromchild.read()
+        except IOError: pass
+        try: childerr_read += p.childerr.read()
+        except IOError: pass
+    p.fromchild.close()
+    p.tochild.close()
+    # yield anything left over
+    to_yield = fromchild_read.split('\n')
+    while len(to_yield): yield to_yield.pop()
+    # call wait()
+    try:
+        if not was_timeout: p.wait()
+    except: pass
+    if len(childerr_read): raise Exception("data on stderr (command is %s)" % command, childerr_read)
+    if was_timeout: raise Exception("command timeout")
+
diff --git a/contrib/mtn2git/mtn2git.py b/contrib/mtn2git/mtn2git.py
new file mode 100755
index 0000000000..cda4f39fca
--- /dev/null
+++ b/contrib/mtn2git/mtn2git.py
@@ -0,0 +1,610 @@
+#!/usr/bin/env python
+
+"""
+  Copyright (C) 2006, 2007 Holger Hans Peter Freyther
+
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+  THE SOFTWARE.
+"""
+
+####
+# TODO:
+#   -tag handling
+#   -work with n-merges
+#
+# DISCUSSION:
+#   -For some reason the get_revision information might be inaccurate
+#    and I should consider just comparing the manifests.
+#    I would use the manifests of the parents and consider all files deleted
+#    and then remove every dir/file that is inside the new manifest from this
+#    list.
+#      Benefits:
+#         - 1:1 match of the manifest regardles of get_revision information
+#         - Renaming is handled by git anyway
+#      Downsides:
+#         - The size of the import will grow.
+#
+
+import mtn
+import os
+import sys
+import datetime
+import email.Utils
+
+import status
+
+
+def get_mark(revision):
+    """
+    Get a mark for a specific revision. If the revision is known the former
+    mark will be returned. Otherwise a new mark will be allocated and stored
+    to the mark file.
+    """
+    if revision in status.marks:
+        return status.marks[revision]
+    status.last_mark += 1
+    status.marks[revision] = status.last_mark
+    print >> status.mark_file, "%d: %s" % (status.last_mark, revision)
+    return status.last_mark
+
+def has_mark(revision):
+    return revision in status.marks
+
+
+def mark_empty_revision(revision, parent):
+    """Git does not like empty merges, just skip the revision"""
+    # TODO, FIXME, XXX, We might want to add a reset cmd here
+    print >> sys.stderr, "Found an empty revision, skipping '%s'" % revision
+    parent_mark = status.marks[parent]
+    status.marks[revision] = parent_mark
+
+    # There is another mtn revision that is using this mark!
+    if not parent_mark in status.same_revisions:
+        status.same_revisions[parent_mark] = []
+    status.same_revisions[parent_mark].append(revision)
+
+
+def get_branch_name(revision):
+    """
+    TODO for unnamed branches (e.g. as we lack the certs) we might want to follow
+    the parents until we end up at a item with a branch name and then use the last
+    item without a name...
+    """
+    if "branch" in revision:
+        branch = revision["branch"]
+    else:
+        #branch = "initial-%s" % revision["revision"]
+        branch = "mtn-unnamed-branch"
+    return branch
+
+def reset_git(ops, revision):
+    """
+    Find the name of the branch of this revision
+    """
+    branch = get_branch_name(revision)
+
+    cmd = []
+    cmd += ["reset refs/heads/%s" % branch]
+    cmd += ["from :%s" % get_mark(revision["revision"])]
+    cmd += [""]
+    print "\n".join(cmd)
+
+def filter_renamed(manifest, renamed):
+    """
+    If we base from a revision that has already done
+    the move, git-fast-import will complain that the file
+    has been already moved
+    """
+    if len(renamed) == 0:
+        return renamed
+
+    for line in manifest:
+        if line[0] == "file":
+            renamed = filter(lambda (to,from_,manifest): to != line[1], renamed)
+            
+    return renamed
+
+def get_git_date(revision):
+    """
+    Convert the "date" cert of monotone to a time understandable by git. No timezone
+    conversions are done.
+    """
+    dt = datetime.datetime.strptime(revision["date"], "%Y-%m-%dT%H:%M:%S").strftime("%a, %d %b %Y %H:%M:%S +0000")
+    return dt
+
+def recursively_delete(ops, manifest, revision, dir_name, to_delete):
+    """
+    Recursively delete all files that dir_name inside the name
+    """
+    for line in manifest:
+        if line[0] == "dir" or line[0] == "file":
+            if line[1].startswith(dir_name):
+                print >> sys.stderr, "Deleting '%s'" % line[1]
+                to_delete.add((line[1], revision))
+        elif line[0] in ["format_version"]:
+            assert(line[1] == "1")
+        else:
+            print >> sys.stderr, line[0]
+            assert(False)
+
+    return to_delete
+
+def recursively_rename(ops, manifest, revision, old_name, new_name, to_add_dirs, to_add_files, to_remove_items, files_deleted, files_sticky):
+    """
+    mtn has a rename command and can rename entrie directories. For git we will have to do the recursive renaming
+    ourselves. Basicly we will get all files and replace old_name with new_name but only:
+
+        If the file of the old_manifest is not in our to be deleted list
+    """
+    old_dir = old_name + "/"
+    for line in manifest:
+        if line[1].startswith(old_dir) or line[1] == old_name:
+            already_handled = False
+            for (deleted,_) in files_deleted:
+                if line[1] == deleted:
+                    already_handled = True
+                    break 
+
+            # Don't rename files that should be in the same directory
+            if line[1] in files_sticky:
+                already_handled = True
+
+            if already_handled:
+                pass
+            elif line[0] == "file":
+                print >> sys.stderr, "Will add '%s' old: '%s' new: '%s' => result: '%s'" % (line[1], old_name, new_name, line[1].replace(old_name, new_name, 1))
+                to_add_files.add((line[1].replace(old_name, new_name, 1), None, revision))
+            elif line[0] == "dir":
+                to_add_dirs.add((line[1].replace(old_name, new_name, 1), revision))
+            elif line[0] in ["format_version"]:
+                assert(line[1] == "1")
+            else:
+                print >> sys.stderr, line[0]
+                assert(False)
+
+    return (to_add_files, to_add_dirs)
+
+#
+#    We need to recursively rename the directories. Now the difficult part is to undo certain operations.
+#    
+#    e.g we rename the whole dir and then rename a file back. We could revive a directory that was marked
+#    for deletion.
+#
+#    rename "weird/two/three"
+#    to "unweird/four"
+#
+#    rename "weird/two/three/four"
+#    to "weird/two/three"
+#
+#    Here we would schedule weird/two/three for deletion but then revive it again. So three does not
+#    get copied to unweird/four/three
+#    """
+def recursively_rename_directories(ops, manifests, rename_commands, files_deleted, files_moved_sticky):
+    to_add_directories = set()
+    to_add_files = set()
+    to_remove_items = set()
+
+    for (old_name, new_name, old_revision) in rename_commands:
+        # Check if we have the above case and rename a more specific directory
+        # and then we will alter the result...
+        inner_rename = False
+        for (other_old_name, other_new_name, other_rev) in rename_commands:
+            if old_name.startswith(other_old_name + "/") and other_old_name != old_name:
+                inner_rename = True
+                print >> sys.stderr, "Inner rename detected", old_name, other_old_name
+                # Fixup the renaming
+                def rename(filename, filerev, rev, would_be_new_name):
+                    if filename.startswith(would_be_new_name + "/"):
+                        return filename.r
author	Holger Freyther <zecke@selfish.org>	2007-10-09 15:01:33 +0000
committer	Holger Freyther <zecke@selfish.org>	2007-10-09 15:01:33 +0000
commit	109e6d019c1880eda9e0eb11c59ae926c4188faa (patch)
tree	d315697759c832780ba81ebc3e830841926e4c8d /contrib/mtn2git
parent	dc11f7ab4057b0c334dac98773efa66e090cc6f6 (diff)