diff options
Diffstat (limited to 'contrib/mtn2git/mtn2git.py')
-rwxr-xr-x | contrib/mtn2git/mtn2git.py | 402 |
1 files changed, 123 insertions, 279 deletions
diff --git a/contrib/mtn2git/mtn2git.py b/contrib/mtn2git/mtn2git.py index a23e757936..1de3010756 100755 --- a/contrib/mtn2git/mtn2git.py +++ b/contrib/mtn2git/mtn2git.py @@ -1,7 +1,7 @@ #!/usr/bin/env python """ - Copyright (C) 2006, 2007 Holger Hans Peter Freyther + Copyright (C) 2006, 2007, 2008 Holger Hans Peter Freyther Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -27,18 +27,6 @@ # -tag handling # -work with n-merges # -# DISCUSSION: -# -For some reason the get_revision information might be inaccurate -# and I should consider just comparing the manifests. -# I would use the manifests of the parents and consider all files deleted -# and then remove every dir/file that is inside the new manifest from this -# list. -# Benefits: -# - 1:1 match of the manifest regardles of get_revision information -# - Renaming is handled by git anyway -# Downsides: -# - The size of the import will grow. -# import mtn import os @@ -48,6 +36,17 @@ import email.Utils import status +# Interesting revisions: +# Rename with dest==src: 24cba5923360fef7c5cc81d51000e30b90355eb9 +# Recursive rename: fca159c5c00ae4158c289f5aabce995378d4e41b +# Delete+Rename: 91da98265a39c93946e00adf5d7bf92b341de847 +# +# +# + +# Our manifest/tree fifo construct +cached_tree = {} +cached_fifo = [] def get_mark(revision): """ @@ -104,21 +103,6 @@ def reset_git(ops, revision): cmd += [""] print "\n".join(cmd) -def filter_renamed(manifest, renamed): - """ - If we base from a revision that has already done - the move, git-fast-import will complain that the file - has been already moved - """ - if len(renamed) == 0: - return renamed - - for line in manifest: - if line[0] == "file": - renamed = filter(lambda (to,from_,manifest): to != line[1], renamed) - - return renamed - def get_git_date(revision): """ Convert the "date" cert of monotone to a time understandable by git. No timezone @@ -127,135 +111,90 @@ def get_git_date(revision): dt = datetime.datetime.strptime(revision["date"], "%Y-%m-%dT%H:%M:%S").strftime("%a, %d %b %Y %H:%M:%S +0000") return dt -def recursively_delete(ops, manifest, revision, dir_name, to_delete): - """ - Recursively delete all files that dir_name inside the name - """ - for line in manifest: - if line[0] == "dir" or line[0] == "file": - if line[1].startswith(dir_name): - print >> sys.stderr, "Deleting '%s'" % line[1] - to_delete.add((line[1], revision)) - elif line[0] in ["format_version"]: - assert(line[1] == "1") - else: - print >> sys.stderr, line[0] - assert(False) +def is_executable_attribute_set(attributes, rev): + assert(len(attributes) % 3 == 0), rev - return to_delete + if len(attributes) >= 3: + for i in range(0, len(attributes)%3+1): + if attributes[i] == "attr" and attributes[i+1] == "mtn:execute" and attributes[i+2] == "true": + return True + return False -def recursively_rename(ops, manifest, revision, old_name, new_name, to_add_dirs, to_add_files, to_remove_items, files_deleted, files_sticky): - """ - mtn has a rename command and can rename entrie directories. For git we will have to do the recursive renaming - ourselves. Basicly we will get all files and replace old_name with new_name but only: - If the file of the old_manifest is not in our to be deleted list - """ - old_dir = old_name + "/" - for line in manifest: - if line[1].startswith(old_dir) or line[1] == old_name: - already_handled = False - for (deleted,_) in files_deleted: - if line[1] == deleted: - already_handled = True - break - - # Don't rename files that should be in the same directory - if line[1] in files_sticky: - already_handled = True - - if already_handled: - pass - elif line[0] == "file": - print >> sys.stderr, "Will add '%s' old: '%s' new: '%s' => result: '%s'" % (line[1], old_name, new_name, line[1].replace(old_name, new_name, 1)) - to_add_files.add((line[1].replace(old_name, new_name, 1), None, revision)) - elif line[0] == "dir": - to_add_dirs.add((line[1].replace(old_name, new_name, 1), revision)) - elif line[0] in ["format_version"]: - assert(line[1] == "1") - else: - print >> sys.stderr, line[0] - assert(False) - - return (to_add_files, to_add_dirs) +def build_tree(manifest, rev): + """Assemble a filesystem tree from a given manifest""" -# -# We need to recursively rename the directories. Now the difficult part is to undo certain operations. -# -# e.g we rename the whole dir and then rename a file back. We could revive a directory that was marked -# for deletion. -# -# rename "weird/two/three" -# to "unweird/four" -# -# rename "weird/two/three/four" -# to "weird/two/three" -# -# Here we would schedule weird/two/three for deletion but then revive it again. So three does not -# get copied to unweird/four/three -# """ -def recursively_rename_directories(ops, manifests, rename_commands, files_deleted, files_moved_sticky): - to_add_directories = set() - to_add_files = set() - to_remove_items = set() - - for (old_name, new_name, old_revision) in rename_commands: - # Check if we have the above case and rename a more specific directory - # and then we will alter the result... - inner_rename = False - for (other_old_name, other_new_name, other_rev) in rename_commands: - if old_name.startswith(other_old_name + "/") and other_old_name != old_name: - inner_rename = True - print >> sys.stderr, "Inner rename detected", old_name, other_old_name - # Fixup the renaming - def rename(filename, filerev, rev, would_be_new_name): - if filename.startswith(would_be_new_name + "/"): - return filename.replace(would_be_new_name, new_name, 1), filerev, rev - return filename, filerev, rev - - would_be_new_name = other_new_name + "/" + old_name[len(other_old_name)+1:] - to_remove_items = set(filter(lambda (item,_): item != new_name, to_remove_items)) - to_add_directories = set(filter(lambda (item,_): item != would_be_new_name, to_add_directories)) - to_add_directories.add((new_name, old_revision)) - to_add_files = set(map(lambda (fn, fr, r): rename(fn, fr, r, would_be_new_name), to_add_files)) - - if not inner_rename: - to_remove_items.add((old_name, old_revision)) - recursively_delete(ops, manifests[old_revision], old_revision, old_name + "/", to_remove_items) - recursively_rename(ops, manifests[old_revision], old_revision, old_name, new_name, to_add_directories, to_add_files, to_remove_items, files_deleted, files_moved_sticky) - - return (to_add_directories, to_add_files, to_remove_items) - - -def build_tree(manifest): - dirs = {} - files = {} + class tree: + def __init__(self): + self.dirs = {} + self.files= {} + + tree = tree() for line in manifest: if line[0] == "file": - files[line[1]] = (line[3],line[4:]) + tree.files[line[1]] = (line[3], is_executable_attribute_set(line[4:], rev)) elif line[0] == "dir": - dirs[line[1]] = 1 + tree.dirs[line[1]] = 1 elif line[0] != "format_version": - print >> sys.stderr, line[0] - assert(False) - return (dirs,files) + assert(False), "Rev: %s: Line[0]: '%s'" % (rev, line[0]) -def compare_with_manifest(all_added, all_modified, all_deleted, new_manifest, old_manifests): - """ - Sanity check that the difference between the old and the new manifest is the one - we have in all_added, all_modified, all_deleted - """ - old_trees = {} - really_added = {} - really_modified = {} - really_removed = {} + return tree - current_dirs, current_files = build_tree(new_manifest) +def get_and_cache_tree(ops, revision): + """Simple FIFO to cache a number of trees""" + global cached_tree, cached_fifo - for parent in old_manifests.keys(): - old_trees[parent] = build_tree(old_manifests[parent]) + if revision in cached_tree: + return cached_tree[revision] + + tree = build_tree([line for line in ops.get_manifest_of(revision)], revision) + cached_tree[revision] = tree + cached_fifo.append(revision) + + # Shrink + if len(cached_fifo) > 100: + old_name = cached_fifo[0] + cached_fifo = cached_fifo[1:] + del cached_tree[old_name] + + return tree + +def diff_manifest(old_tree, new_tree): + """Find additions, modifications and deletions""" + added = set() + modified = set() + deleted = set() + + # Removed dirs + for dir in old_tree.dirs.keys(): + if not dir in new_tree.dirs: + deleted.add((dir,True)) + + # New dirs + for dir in new_tree.dirs.keys(): + if not dir in old_tree.dirs: + added.add(dir) + + # Deleted files + for file in old_tree.files.keys(): + if not file in new_tree.files: + deleted.add((file,False)) + + # Added files, goes to modifications + for file in new_tree.files.keys(): + if not file in old_tree.files: + modified.add((file, new_tree.files[file][0])) + continue + + # The file changed, either contents or executable attribute + old = old_tree.files[file] + new = new_tree.files[file] + if old != new: + modified.add((file, new_tree.files[file][0])) + + + return (added, modified, deleted) - print >> sys.stderr, len(old_manifests) def fast_import(ops, revision): """Import a revision into git using git-fast-import. @@ -292,27 +231,28 @@ def fast_import(ops, revision): return # Use the manifest to find dirs and files - manifest = [line for line in ops.get_manifest_of(revision["revision"])] - manifests = {} - dirs = {} - for parent in revision["parent"]: - manifests[parent] = [line for line in ops.get_manifest_of(parent)] - for line in manifests[parent]: - if line[0] == "dir": - if not parent in dirs: - dirs[parent] = {} - dirs[parent][line[1]] = 1 + current_tree = get_and_cache_tree(ops, revision["revision"]) + + all_added = set() + all_modifications = set() + all_deleted = set() - # We can not just change the mode of a file but we need to modifiy the whole file. We - # will simply add it to the modified list and ask to retrieve the status from the manifest - for (file, attribute, value, rev) in revision["set_attributes"]: - if attribute == "mtn:execute": - revision["modified"].append((file, None, rev)) - for (file, attribute, rev) in revision["clear_attributes"]: - if attribute == "mtn:execute": - revision["modified"].append((file, None, rev)) + # Now diff the manifests + for parent in revision["parent"]: + (added, modified, deleted) = diff_manifest(get_and_cache_tree(ops, parent), current_tree) + all_added = all_added.union(added) + all_modifications = all_modifications.union(modified) + all_deleted = all_deleted.union(deleted) + if len(revision["parent"]) == 0: + (added, modified, deleted) = diff_manifest(build_tree([],""), current_tree) + all_added = all_added.union(added) + all_modifications = all_modifications.union(modified) + all_deleted = all_deleted.union(deleted) + # TODO: + # Readd the sanity check to see if we deleted and modified an entry. This + # could probably happen if we have more than one parent (on a merge)? cmd = [] cmd += ["commit refs/heads/%s" % branch] @@ -322,97 +262,28 @@ def fast_import(ops, revision): cmd += ["data %d" % len(revision["changelog"])] cmd += ["%s" % revision["changelog"]] - # Emulation for renaming. We will split them into two lists - file_renamed_del = set() - file_renamed_new = set() - file_moved_sticky = set() - if len(revision["parent"]) != 0: cmd += ["from :%s" % get_mark(revision["parent"][0])] - renamed = revision["renamed"] - - to_rename_directories = [] - for (new_name, old_name, old_revision) in renamed: - # 24cba5923360fef7c5cc81d51000e30b90355eb9 is a rev where src == dest but the - # directory got renamed, so this means this file got added to the new directory - # TODO, XXX, FIXME check if this can be done for directories as well - if new_name == old_name and not old_name in dirs[old_revision]: - print >> sys.stderr, "Bogus rename in %s (%s, %s)?" % (revision["revision"], new_name, old_name) - file_moved_sticky.add(old_name) - - # Check if the old_name was a directory in the old manifest - # If we rename a directory we will need to recursively remove and recursively - # add... - # Add the '/' otherwise we might rename the wrong directory which shares the - # same prefix. - # fca159c5c00ae4158c289f5aabce995378d4e41b is quite funny. It renames a directory - # and then renames another directory within the renamed one and in the worse case - # we will revive a deleted directory, file... - elif old_name in dirs[old_revision]: - print >> sys.stderr, "Detected directory rename '%s' => '%s'" % (old_name, new_name) - assert(old_revision in manifests) - to_rename_directories.append((old_name, new_name, old_revision)) - else: - print >> sys.stderr, "Renaming %s => %s" % (old_name, new_name) - file_renamed_new.add((new_name, None, revision["revision"])) - file_renamed_del.add((old_name, old_revision)) # The first parent is our from. for parent in revision["parent"][1:]: cmd += ["merge :%s" % get_mark(parent)] - # Do the renaming now - (renamed_dirs, renamed_new, renamed_old) = recursively_rename_directories(ops, manifests, to_rename_directories, file_renamed_del.union(set(revision["removed"])), file_moved_sticky) - - # Sanity check, don't remove anything we modify - all_added = set(revision["added_dirs"]).union(renamed_dirs) - all_modifications = set(revision["modified"]).union(set(revision["added_files"])).union(renamed_new).union(file_renamed_new) - all_deleted = set(revision["removed"]).union(renamed_old).union(file_renamed_del) - all_deleted_new = all_deleted - - # Check if we delete and add at the same time - for (deleted,rev) in all_deleted: - for (added,_) in all_added: - if added == deleted: - print >> sys.stderr, "Added and Deleted", added, deleted - all_deleted_new = set(filter(lambda (dele,_): dele != added, all_deleted_new)) - assert((added,rev) not in all_deleted_new) - - for (modified,_,_) in all_modifications: - if modified == deleted: - print >> sys.stderr, "Modified and Deleted", modified, deleted - all_deleted_new = set(filter(lambda (dele,_): dele != modified, all_deleted_new)) - assert((modified,rev) not in all_deleted_new) - - # Filtered list of to be deleted items - all_deleted = all_deleted_new - # Check if we delete but the manifest has a file like this - for line in manifest: - if line[0] == "dir" or line[0] == "file": - for (deleted,rev) in all_deleted: - if line[1] == deleted: - # 91da98265a39c93946e00adf5d7bf92b341de847 of mtn has a delete + rename - print >> sys.stderr, "Trying to delete a file which is in the new manifest", line[1], deleted - assert(False) - - compare_with_manifest(all_added, all_modifications, all_deleted, manifest, manifests) - - for (dir_name, rev) in all_added: + for dir_name in all_added: cmd += ["M 644 inline %s" % os.path.join(dir_name, ".mtn2git_empty")] cmd += ["data <<EOF"] cmd += ["EOF"] cmd += [""] - for (file_name, file_revision, rev) in all_modifications: - (mode, file) = get_file_and_mode(ops, manifest, file_name, file_revision, revision["revision"]) + for (file_name, file_revision) in all_modifications: + (mode, file) = get_file_and_mode(ops, current_tree, file_name, file_revision, revision["revision"]) cmd += ["M %d inline %s" % (mode, file_name)] cmd += ["data %d" % len(file)] cmd += ["%s" % file] - for (path, rev) in all_deleted: - assert(rev in dirs) - if path in dirs[rev]: + for (path, is_dir) in all_deleted: + if is_dir: cmd += ["D %s" % os.path.join(path, ".mtn2git_empty")] else: cmd += ["D %s" % path] @@ -428,33 +299,20 @@ def is_trusted(operations, revision): return False return True -def get_file_and_mode(operations, manifest, file_name, _file_revision, rev = None): - mode = 644 +def get_file_and_mode(operations, file_tree, file_name, _file_revision, rev = None): + assert file_name in file_tree.files, "get_file_and_mode: Revision '%s', file_name='%s' " % (rev, file_name) - file_revision = None - for line in manifest: - if line[0] == "file" and line[1] == file_name: - assert(line[1] == file_name) - assert(line[2] == "content") - - if _file_revision: - assert(line[3] == _file_revision) - file_revision = line[3] - - attributes = line[4:] - assert(len(attributes) % 3 == 0) - if len(attributes) >= 3: - for i in range(0, len(attributes)%3+1): - if attributes[i] == "attr" and attributes[i+1] == "mtn:execute" and attributes[i+2] == "true": - mode = 755 - break + (file_revision, executable) = file_tree.files[file_name] + if _file_revision: + assert _file_revision == file_revision, "Same filerevision for file_name='%s' in rev='%s' (%s,%s)" % (file_name, rev, file_revision, _file_revision) - assert(file_revision) - file = "".join([file for file in operations.get_file(file_revision)]) - return (mode, file) + if executable: + mode = 755 + else: + mode = 644 - print >> sys.stderr, file_name, rev - assert(False) + file = "".join([file for file in operations.get_file(file_revision)]) + return (mode, file) def parse_revision(operations, revision): @@ -491,20 +349,12 @@ def parse_revision(operations, revision): old_rev = line[1] elif line[0] == "new_manifest": revision_description["manifest"] = line[1] - elif line[0] == "rename": - revision_description["renamed"].append((line[3], line[1], old_rev)) - elif line[0] == "patch": - revision_description["modified"].append((line[1], line[5], old_rev)) - elif line[0] == "delete": - revision_description["removed"].append((line[1], old_rev)) - elif line[0] == "add_dir": - revision_description["added_dirs"].append((line[1], old_rev)) - elif line[0] == "add_file": - revision_description["added_files"].append((line[1], line[3], old_rev)) elif line[0] == "clear": revision_description["clear_attributes"].append((line[1], line[3], old_rev)) elif line[0] == "set": revision_description["set_attributes"].append((line[1], line[3], line[5], old_rev)) + elif line[0] in ["rename", "patch", "delete", "add_dir", "add_file"]: + pass else: print >> sys.stderr, line assert(False) @@ -580,10 +430,7 @@ def main(mtn_cli, db, rev): for head in heads: print >> sys.stderr, old_heads, head all_revs += ops.ancestry_difference(head, old_heads) - for rev in all_revs: - if not rev in branch_heads: - branch_heads[rev] = [] - branch_heads[rev].append(branch) + status.former_heads[branch] = heads sorted_revs = [rev for rev in ops.toposort(all_revs)] @@ -593,10 +440,6 @@ def main(mtn_cli, db, rev): else: print >> sys.stderr, "Going to import revision ", rev fast_import(ops, parse_revision(ops, rev)) - branches = branch_heads[rev] - for branch in branches: - status.former_heads[branch] = [rev] - if __name__ == "__main__": import optparse @@ -621,3 +464,4 @@ if __name__ == "__main__": print >> sys.stderr, "Failed to open the status file" main(options.mtn, options.database, options.rev) status.store(options.status) + |