File: git_revision_recorder.py

package info (click to toggle)
cvs2svn 2.3.0-2
links: PTS
area: main
in suites: squeeze
size: 3,528 kB
ctags: 2,828
sloc: python: 20,721; sh: 509; perl: 121; makefile: 79
file content (114 lines) | stat: -rw-r--r-- 4,196 bytes
parent folder | download | duplicates (2)
# (Be in -*- python -*- mode.)
#
# ====================================================================
# Copyright (c) 2007-2009 CollabNet.  All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution.  The terms
# are also available at http://subversion.tigris.org/license-1.html.
# If newer versions of this license are posted there, you may use a
# newer version instead, at your option.
#
# This software consists of voluntary contributions made by many
# individuals.  For exact contribution history, see the revision
# history and logs, available at http://cvs2svn.tigris.org/.
# ====================================================================

"""Write file contents to a stream of git-fast-import blobs."""

import itertools

from cvs2svn_lib.symbol import Trunk
from cvs2svn_lib.cvs_item import CVSRevisionDelete
from cvs2svn_lib.cvs_item import CVSSymbol
from cvs2svn_lib.fulltext_revision_recorder import FulltextRevisionRecorder
from cvs2svn_lib.key_generator import KeyGenerator


class GitRevisionRecorder(FulltextRevisionRecorder):
  """Output file revisions to git-fast-import."""

  def __init__(self, blob_filename):
    self.blob_filename = blob_filename

  def start(self):
    self.dump_file = open(self.blob_filename, 'wb')
    self._mark_generator = KeyGenerator()

  def start_file(self, cvs_file_items):
    self._cvs_file_items = cvs_file_items

  def _get_original_source(self, cvs_rev):
    """Return the original source of the contents of CVS_REV.

    Return the first non-delete CVSRevision with the same contents as
    CVS_REV.  'First' here refers to deltatext order; i.e., the very
    first revision is HEAD on trunk, then backwards to the root of a
    branch, then out to the tip of a branch.

    The candidates are all revisions along the CVS delta-dependency
    chain until the next one that has a deltatext (inclusive).  Of the
    candidates, CVSRevisionDeletes are disqualified because, even
    though CVS records their contents, it is impossible to extract
    their fulltext using commands like 'cvs checkout -p'.

    If there is no other CVSRevision that has the same content, return
    CVS_REV itself."""

    # Keep track of the "best" source CVSRevision found so far:
    best_source_rev = None

    for cvs_rev in itertools.chain(
          [cvs_rev], self._cvs_file_items.iter_deltatext_ancestors(cvs_rev)
          ):
      if not isinstance(cvs_rev, CVSRevisionDelete):
        best_source_rev = cvs_rev

      if cvs_rev.deltatext_exists:
        break

    return best_source_rev

  def record_fulltext(self, cvs_rev, log, fulltext):
    """Write the fulltext to a blob if it is original and not a delete.

    The reason we go to this trouble is to avoid writing the same file
    contents multiple times for a string of revisions that don't have
    deltatexts (as, for example, happens with dead revisions and
    imported revisions)."""

    if isinstance(cvs_rev, CVSRevisionDelete):
      # There is no need to record a delete revision, and its token
      # will never be needed:
      return None

    source = self._get_original_source(cvs_rev)

    if source.id == cvs_rev.id:
      # Revision is its own source; write it out:
      mark = self._mark_generator.gen_id()
      self.dump_file.write('blob\n')
      self.dump_file.write('mark :%d\n' % (mark,))
      self.dump_file.write('data %d\n' % (len(fulltext),))
      self.dump_file.write(fulltext)
      self.dump_file.write('\n')
      return mark
    else:
      # Return as revision_recorder_token the CVSRevision.id of the
      # original source revision:
      return source.revision_recorder_token

  def finish_file(self, cvs_file_items):
    # Determine the original source of each CVSSymbol, and store it as
    # the symbol's revision_recorder_token.
    for cvs_item in cvs_file_items.values():
      if isinstance(cvs_item, CVSSymbol):
        cvs_source = cvs_item.get_cvs_revision_source(cvs_file_items)
        cvs_item.revision_recorder_token = cvs_source.revision_recorder_token

    del self._cvs_file_items

  def finish(self):
    self.dump_file.close()