#!/usr/bin/python
# (Be in -*- python -*- mode.)
#
# cvs2svn: ...
#
# ====================================================================
# Copyright (c) 2000-2004 CollabNet.  All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution.  The terms
# are also available at http://subversion.tigris.org/license-1.html.
# If newer versions of this license are posted there, you may use a
# newer version instead, at your option.
#
# This software consists of voluntary contributions made by many
# individuals.  For exact contribution history, see the revision
# history and logs, available at http://cvs2svn.tigris.org/.
# ====================================================================

VERSION = 'r' + "$LastChangedRevision: 1793 $"[22:-2]

import cvs2svn_rcsparse
import os
import sys
import sha
import re
import time
import fileinput
import fnmatch
import string
import getopt
import stat
import md5
import marshal
import errno
import popen2
import types
import ConfigParser
import UserDict
try:
  # Try to get access to a bunch of encodings for use with --encoding.
  # See http://cjkpython.i18n.org/ for details.
  import iconv_codec
except ImportError:
  pass

# Warnings and errors start with these strings.  They are typically
# followed by a colon and a space, as in "%s: " ==> "WARNING: ".
warning_prefix = "WARNING"
error_prefix = "ERROR"

# Make sure this Python is recent enough.
if sys.hexversion < 0x02020000:
  sys.stderr.write("'%s: Python 2.2 or higher required, "
                   "see www.python.org.\n" % error_prefix)
  sys.exit(1)

# Pretend we have true booleans on older python versions
try:
  True
except:
  True = 1
  False = 0

# Opening pipes was a mess before Python 2.4, because some methods did
# not exist on some platforms, and some behaved differenly on other.
# Python 2.4 solved this by adding the subprocess module, but since we
# cannot require such a new version, we cannot use it directly, but
# must implement a simplified Popen using the best means neccessary.
#
# The SimplePopen class only has the following members and methods, all
# behaving as documented in the subprocess.Popen class:
#     - stdin
#     - stdout
#     - stderr
#     - wait
try:
  # First try subprocess.Popen...
  import subprocess
  class SimplePopen:
    def __init__(self, cmd, capture_stderr):
      if capture_stderr:
        stderr = subprocess.PIPE
      else:
        stderr = None
      self._popen = subprocess.Popen(cmd, stdin=subprocess.PIPE,
                                    stdout=subprocess.PIPE, stderr=stderr)
      self.stdin = self._popen.stdin
      self.stdout = self._popen.stdout
      if capture_stderr:
        self.stderr = self._popen.stderr
      self.wait = self._popen.wait
except ImportError:
  if hasattr(popen2, 'Popen3'):
    # ...then try popen2.Popen3...
    class SimplePopen:
      def __init__(self, cmd, capture_stderr):
        self._popen3 = popen2.Popen3(cmd, capture_stderr)
        self.stdin = self._popen3.tochild
        self.stdout = self._popen3.fromchild
        if capture_stderr:
          self.stderr = self._popen3.childerr
        self.wait = self._popen3.wait
  else:
    # ...and if all fails, use popen2.popen3...
    class SimplePopen:
      def __init__(self, cmd, capture_stderr):
        if type(cmd) != types.StringType:
          cmd = argv_to_command_string(cmd)
        self.stdout, self.stdin, self.stderr = popen2.popen3(cmd, mode='b')
      def wait(self):
        return self.stdout.close() or self.stdin.close() or \
               self.stderr.close()

# DBM module selection

# 1. If we have bsddb3, it is probably newer than bsddb.  Fake bsddb = bsddb3,
#    so that the dbhash module used by anydbm will use bsddb3.
try:
  import bsddb3
  sys.modules['bsddb'] = sys.modules['bsddb3']
except ImportError:
  pass

# 2. These DBM modules are not good for cvs2svn.
import anydbm
if (anydbm._defaultmod.__name__ == 'dumbdbm'
    or anydbm._defaultmod.__name__ == 'dbm'):
  sys.stderr.write(
    error_prefix
    + ': your installation of Python does not contain a suitable\n'
    + 'DBM module -- cvs2svn cannot continue.\n'
    + 'See http://python.org/doc/current/lib/module-anydbm.html to solve.\n')
  sys.exit(1)

# 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
#    Unfortunately, gdbm appears not to be trouble free, either.
if hasattr(anydbm._defaultmod, 'bsddb') \
    and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
  try:
    gdbm = __import__('gdbm')
  except ImportError:
    sys.stderr.write(warning_prefix +
        ': The version of the bsddb module found '
        'on your computer has been reported to malfunction on some datasets, '
        'causing KeyError exceptions. You may wish to upgrade your Python to '
        'version 2.3 or later.\n')
  else:
    anydbm._defaultmod = gdbm

trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
cvs_branch_tag = re.compile('^((?:[0-9]+\\.[0-9]+\\.)+)0\\.([0-9]+)$')
rcs_branch_tag = re.compile('^(?:[0-9]+\\.[0-9]+\\.)+[0-9]+$')

SVN_KEYWORDS_VALUE = 'Author Date Id Revision'

# This really only matches standard '1.1.1.*'-style vendor revisions.
# One could conceivably have a file whose default branch is 1.1.3 or
# whatever, or was that at some point in time, with vendor revisions
# 1.1.3.1, 1.1.3.2, etc.  But with the default branch gone now (which
# is the only time this regexp gets used), we'd have no basis for
# assuming that the non-standard vendor branch had ever been the
# default branch anyway, so we don't want this to match them anyway.
vendor_revision = re.compile('^(1\\.1\\.1)\\.([0-9])+$')

# If this run's output is a repository, then (in the tmpdir) we use
# a dumpfile of this name for repository loads.
#
# If this run's output is a dumpfile, then this is default name of
# that dumpfile, but in the current directory (unless the user has
# specified a dumpfile path, of course, in which case it will be
# wherever the user said).
DUMPFILE = 'cvs2svn-dump'

# This file appears with different suffixes at different stages of
# processing.  CVS revisions are cleaned and sorted here, for commit
# grouping.  See design-notes.txt for details.
DATAFILE = 'cvs2svn-data'

# This file contains a marshalled copy of all the statistics that we
# gather throughout the various runs of cvs2svn.  The data stored as a
# marshalled dictionary.
STATISTICS_FILE = 'cvs2svn-statistics'

# This text file contains records (1 per line) that describe svn
# filesystem paths that are the opening and closing source revisions
# for copies to tags and branches.  The format is as follows:
#
# SYMBOL_NAME SVN_REVNUM TYPE SVN_PATH
#
# Where type is either OPENING or CLOSING.  The SYMBOL_NAME and
# SVN_REVNUM are the primary and secondary sorting criteria for
# creating SYMBOL_OPENINGS_CLOSINGS_SORTED.
SYMBOL_OPENINGS_CLOSINGS = 'cvs2svn-symbolic-names.txt'
# A sorted version of the above file.
SYMBOL_OPENINGS_CLOSINGS_SORTED = 'cvs2svn-symbolic-names-s.txt'

# This file is a temporary file for storing symbolic_name -> closing
# CVSRevision until the end of our pass where we can look up the
# corresponding SVNRevNum for the closing revs and write these out to
# the SYMBOL_OPENINGS_CLOSINGS.
SYMBOL_CLOSINGS_TMP = 'cvs2svn-symbolic-names-closings-tmp.txt'

# Skeleton version of an svn filesystem.
# (These supersede and will eventually replace the two above.)
# See class SVNRepositoryMirror for how these work.
SVN_MIRROR_REVISIONS_DB = 'cvs2svn-svn-revisions.db'
SVN_MIRROR_NODES_DB = 'cvs2svn-svn-nodes.db'

# Offsets pointing to the beginning of each SYMBOLIC_NAME in
# SYMBOL_OPENINGS_CLOSINGS_SORTED
SYMBOL_OFFSETS_DB = 'cvs2svn-symbolic-name-offsets.db'

# Maps CVSRevision.unique_key()s to lists of symbolic names, where
# the CVSRevision is the last such that is a source for those symbolic
# names.  For example, if branch B's number is 1.3.0.2 in this CVS
# file, and this file's 1.3 is the latest (by date) revision among
# *all* CVS files that is a source for branch B, then the
# CVSRevision.unique_key() corresponding to this file at 1.3 would
# list at least B in its list.
SYMBOL_LAST_CVS_REVS_DB = 'cvs2svn-symbol-last-cvs-revs.db'

# Maps CVSRevision.unique_key() to corresponding line in s-revs.
###PERF Or, we could map to an offset into s-revs, instead of dup'ing
### the s-revs data in this database.
CVS_REVS_DB = 'cvs2svn-cvs-revs.db'

# Lists all symbolic names that are tags.  Keys are strings (symbolic
# names), values are ignorable.
TAGS_DB = 'cvs2svn-tags.db'

# A list all tags.  Each line consists of the tag name and the number
# of files in which it exists, separated by a space.
TAGS_LIST = 'cvs2svn-tags.txt'

# A list of all branches.  The file is stored as a plain text file
# to make it easy to look at in an editor.  Each line contains the
# branch name, the number of files where the branch is created, the
# commit count, and a list of tags and branches that are defined on
# revisions in the branch.
BRANCHES_LIST = 'cvs2svn-branches.txt'

# These two databases provide a bidirectional mapping between
# CVSRevision.unique_key()s and Subversion revision numbers.
#
# The first maps CVSRevision.unique_key() to a number; the values are
# not unique.
#
# The second maps Subversion revision numbers to tuples (c_rev_keys,
# motivating_revnum, symbolic_name, date).
#
# c_rev_keys is a list of CVSRevision.unique_key()s.
#
# If the SVNCommit is a default branch synchronization,
# motivating_revnum is the svn_revnum of the primary SVNCommit that
# motivated it; otherwise it is None.  (NOTE: Secondary commits that
# fill branches and tags also have a motivating commit, but we do not
# record it because it is (currently) not needed for anything.)
# motivating_revnum is used when generating the log message for the
# commit that synchronizes the default branch with trunk.
#
# symbolic_name is the symbolic name associated with the commit (if it
# filled a symbolic name) or None otherwise.
#
# date is the date of the commit.
CVS_REVS_TO_SVN_REVNUMS = 'cvs2svn-cvs-revs-to-svn-revnums.db'
SVN_REVNUMS_TO_CVS_REVS = 'cvs2svn-svn-revnums-to-cvs-revs.db'

# How many bytes to read at a time from a pipe.  128 kiB should be
# large enough to be efficient without wasting too much memory.
PIPE_READ_SIZE = 128 * 1024

# Record the default RCS branches, if any, for CVS filepaths.
#
# The keys are CVS filepaths, relative to the top of the repository
# and with the ",v" stripped off, so they match the cvs paths used in
# Commit.commit().  The values are vendor branch revisions, such as
# '1.1.1.1', or '1.1.1.2', or '1.1.1.96'.  The vendor branch revision
# represents the highest vendor branch revision thought to have ever
# been head of the default branch.
#
# The reason we record a specific vendor revision, rather than a
# default branch number, is that there are two cases to handle:
#
# One case is simple.  The RCS file lists a default branch explicitly
# in its header, such as '1.1.1'.  In this case, we know that every
# revision on the vendor branch is to be treated as head of trunk at
# that point in time.
#
# But there's also a degenerate case.  The RCS file does not currently
# have a default branch, yet we can deduce that for some period in the
# past it probably *did* have one.  For example, the file has vendor
# revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2,
# and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2.  In this
# case, we should record 1.1.1.96 as the last vendor revision to have
# been the head of the default branch.
DEFAULT_BRANCHES_DB = 'cvs2svn-default-branches.db'

# Records the author and log message for each changeset.
# The keys are author+log digests, the same kind used to identify
# unique revisions in the .revs, etc files.  Each value is a tuple
# of two elements: '(author logmessage)'.
METADATA_DB = "cvs2svn-metadata.db"

# A temporary on-disk hash that maps CVSRevision unique keys to a new
# timestamp for that CVSRevision.  These new timestamps are created in
# pass2, and this hash is used exclusively in pass2.
TWEAKED_TIMESTAMPS_DB = "cvs2svn-fixed-timestamps.db"

REVS_SUFFIX = '.revs'
CLEAN_REVS_SUFFIX = '.c-revs'
SORTED_REVS_SUFFIX = '.s-revs'
RESYNC_SUFFIX = '.resync'

SVN_INVALID_REVNUM = -1

COMMIT_THRESHOLD = 5 * 60	# flush a commit if a 5 minute gap occurs

# Things that can happen to a file.
OP_NOOP   = '-'
OP_ADD    = 'A'
OP_DELETE = 'D'
OP_CHANGE = 'C'

# A deltatext either does or doesn't represent some change.
DELTATEXT_NONEMPTY = 'N'
DELTATEXT_EMPTY    = 'E'

DIGEST_END_IDX = 9 + (sha.digestsize * 2)

# Constants used in SYMBOL_OPENINGS_CLOSINGS
OPENING = 'O'
CLOSING = 'C'

class FatalException(Exception):
  """Exception thrown on a non-recoverable error.

  If this exception is thrown by main(), it is caught by the global
  layer of the program, its string representation is printed, and the
  program is ended with an exit code of 1."""

  pass


class FatalError(FatalException):
  """A FatalException that prepends error_prefix to the message."""

  def __init__(self, msg):
    """Use (error_prefix + ': ' + MSG + '\n') as the error message."""

    FatalException.__init__(self, '%s: %s\n' % (error_prefix, msg,))


def temp(basename):
  """Return a path to BASENAME in Ctx().tmpdir.
  This is a convenience function to save horizontal space in source."""
  return os.path.join(Ctx().tmpdir, basename)

# Since the unofficial set also includes [/\] we need to translate those
# into ones that don't conflict with Subversion limitations.
def _clean_symbolic_name(name):
  """Return symbolic name NAME, translating characters that Subversion
  does not allow in a pathname."""
  name = name.replace('/','++')
  name = name.replace('\\','--')
  return name

def _path_join(*components):
  """Join two or more pathname COMPONENTS, inserting '/' as needed.
  Empty component are skipped."""
  return string.join(filter(None, components), '/')

def _path_split(path):
  """Split the svn pathname PATH into a pair, (HEAD, TAIL).

  This is similar to os.path.split(), but always uses '/' as path
  separator.  PATH is an svn path, which should not start with a '/'.
  HEAD is everything before the last slash, and TAIL is everything
  after.  If PATH ends in a slash, TAIL will be empty.  If there is no
  slash in PATH, HEAD will be empty.  If PATH is empty, both HEAD and
  TAIL are empty."""

  pos = path.rfind('/')
  if pos == -1:
    return ('', path,)
  else:
    return (path[:pos], path[pos+1:],)

def to_utf8(value, mode='replace'):
  """Encode (as Unicode) VALUE, trying the encodings in Ctx.encoding
  as valid source encodings.  Raise UnicodeError on failure of all
  source encodings."""
  ### FIXME: The 'replace' default mode should be an option,
  ### like --encoding is.
  for encoding in Ctx().encoding:
    try:
      return unicode(value, encoding, mode).encode('utf8')
    except UnicodeError:
      Log().write(LOG_VERBOSE, "Encoding '%s' failed for string '%s'"
                  % (encoding, value))
  raise UnicodeError

def run_command(command):
  if os.system(command):
    raise FatalError('Command failed: "%s"' % (command,))


class CommandFailedException(Exception):
  """Exception raised if check_command_runs() fails."""

  pass


def check_command_runs(cmd, cmdname):
  """Check whether the command CMD can be executed without errors.

  CMD is a list or string, as accepted by SimplePopen.  CMDNAME is the
  name of the command as it should be included in exception error
  messages.

  This function checks three things: (1) the command can be run
  without throwing an OSError; (2) it exits with status=0; (3) it
  doesn't output anything to stderr.  If any of these conditions is
  not met, raise a CommandFailedException describing the problem."""

  try:
    pipe = SimplePopen(cmd, True)
  except OSError, e:
    raise CommandFailedException('error executing %s: %s' % (cmdname, e,))
  pipe.stdin.close()
  pipe.stdout.read()
  errmsg = pipe.stderr.read()
  status = pipe.wait()
  if status or errmsg:
    msg = 'error executing %s: status %s' % (cmdname, status,)
    if errmsg:
      msg += ', error output:\n%s' % (errmsg,)
    raise CommandFailedException(msg)


class CVSRepository:
  """A CVS repository from which data can be extracted."""

  def __init__(self, cvs_repos_path):
    """CVS_REPOS_PATH is the top of the CVS repository (at least as
    far as this run is concerned)."""

    if not os.path.isdir(cvs_repos_path):
      raise FatalError("The specified CVS repository path '%s' is not an "
                       "existing directory." % cvs_repos_path)

    self.cvs_repos_path = os.path.normpath(cvs_repos_path)
    self.cvs_prefix_re = re.compile(
        r'^' + re.escape(self.cvs_repos_path)
        + r'(' + re.escape(os.sep) + r'|$)')

  def get_cvs_path(self, fname):
    """Return the path to FNAME relative to cvs_repos_path, with ',v' removed.

    FNAME is a filesystem name that has to be within
    self.cvs_repos_path.  Return the filename relative to
    self.cvs_repos_path, with ',v' striped off if present, and with
    os.sep converted to '/'."""

    (tail, n) = self.cvs_prefix_re.subn('', fname, 1)
    if n != 1:
      raise FatalError(
          "get_cvs_path: '%s' is not a sub-path of '%s'"
          % (fname, self.cvs_repos_path,))
    if tail.endswith(',v'):
      tail = tail[:-2]
    return string.replace(tail, os.sep, '/')

  def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
    """Return a command string, and the pipe created using that
    string.  C_REV is a CVSRevision.  If SUPPRESS_KEYWORD_SUBSTITUTION
    is True, then suppress the substitution of RCS/CVS keywords in the
    output.  The pipe returns the text of that CVS Revision."""
    raise NotImplementedError


class CVSRepositoryViaRCS(CVSRepository):
  """A CVSRepository accessed via RCS."""

  def __init__(self, cvs_repos_path):
    CVSRepository.__init__(self, cvs_repos_path)
    try:
      check_command_runs([ 'co', '-V' ], 'co')
    except CommandFailedException, e:
      raise FatalError('%s\n'
                       'Please check that co is installed and in your PATH\n'
                       '(it is a part of the RCS software).' % (e,))

  def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
    pipe_cmd = [ 'co', '-q', '-x,v', '-p' + c_rev.rev ]
    if suppress_keyword_substitution:
      pipe_cmd.append('-kk')
    pipe_cmd.append(c_rev.rcs_path())
    pipe = SimplePopen(pipe_cmd, True)
    pipe.stdin.close()
    return pipe_cmd, pipe


class CVSRepositoryViaCVS(CVSRepository):
  """A CVSRepository accessed via CVS."""

  def __init__(self, cvs_repos_path):
    CVSRepository.__init__(self, cvs_repos_path)
    # Ascend above the specified root if necessary, to find the
    # cvs_repository_root (a directory containing a CVSROOT directory)
    # and the cvs_module (the path of the conversion root within the
    # cvs repository) NB: cvs_module must be seperated by '/' *not* by
    # os.sep .
    def is_cvs_repository_root(path):
      return os.path.isdir(os.path.join(path, 'CVSROOT'))

    self.cvs_repository_root = os.path.abspath(self.cvs_repos_path)
    self.cvs_module = ""
    while not is_cvs_repository_root(self.cvs_repository_root):
      # Step up one directory:
      prev_cvs_repository_root = self.cvs_repository_root
      self.cvs_repository_root, module_component = \
          os.path.split(self.cvs_repository_root)
      if self.cvs_repository_root == prev_cvs_repository_root:
        # Hit the root (of the drive, on Windows) without finding a
        # CVSROOT dir.
        raise FatalError(
            "the path '%s' is not a CVS repository, nor a path "
            "within a CVS repository.  A CVS repository contains "
            "a CVSROOT directory within its root directory."
            % (self.cvs_repos_path,))

      self.cvs_module = module_component + "/" + self.cvs_module

    os.environ['CVSROOT'] = self.cvs_repository_root

    def cvs_ok(global_arguments):
      check_command_runs(
          [ 'cvs' ] + global_arguments + [ '--version' ], 'cvs')

    self.global_arguments = [ "-q", "-R" ]
    try:
      cvs_ok(self.global_arguments)
    except CommandFailedException, e:
      self.global_arguments = [ "-q" ]
      try:
        cvs_ok(self.global_arguments)
      except CommandFailedException, e:
        raise FatalError(
            '%s\n'
            'Please check that cvs is installed and in your PATH.' % (e,))

  def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
    pipe_cmd = [ 'cvs' ] + self.global_arguments + \
               [ 'co', '-r' + c_rev.rev, '-p' ]
    if suppress_keyword_substitution:
      pipe_cmd.append('-kk')
    pipe_cmd.append(self.cvs_module + c_rev.cvs_path)
    pipe = SimplePopen(pipe_cmd, True)
    pipe.stdin.close()
    return pipe_cmd, pipe


def generate_ignores(c_rev):
  # Read in props
  pipe_cmd, pipe = Ctx().cvs_repository.get_co_pipe(c_rev)
  buf = pipe.stdout.read(PIPE_READ_SIZE)
  raw_ignore_val = ""
  while buf:
    raw_ignore_val = raw_ignore_val + buf
    buf = pipe.stdout.read(PIPE_READ_SIZE)
  pipe.stdout.close()
  error_output = pipe.stderr.read()
  exit_status = pipe.wait()
  if exit_status:
    raise FatalError("The command '%s' failed with exit status: %s\n"
                     "and the following output:\n"
                     "%s" % (pipe_cmd, exit_status, error_output))

  # Tweak props: First, convert any spaces to newlines...
  raw_ignore_val = '\n'.join(raw_ignore_val.split())
  raw_ignores = raw_ignore_val.split('\n')
  ignore_vals = [ ]
  for ignore in raw_ignores:
    # Reset the list if we encounter a '!'
    # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
    if ignore == '!':
      ignore_vals = [ ]
      continue
    # Skip empty lines
    if len(ignore) == 0:
      continue
    ignore_vals.append(ignore)
  return ignore_vals

# Return a string that has not been returned by gen_key() before.
gen_key_base = 0L
def gen_key():
  global gen_key_base
  key = '%x' % gen_key_base
  gen_key_base = gen_key_base + 1
  return key

# ============================================================================
# This code is copied with a few modifications from:
#   subversion/subversion/bindings/swig/python/svn/core.py

if sys.platform == "win32":
  _escape_shell_arg_re = re.compile(r'(\\+)(\"|$)')

  def escape_shell_arg(arg):
    # The (very strange) parsing rules used by the C runtime library are
    # described at:
    # http://msdn.microsoft.com/library/en-us/vclang/html/_pluslang_Parsing_C.2b2b_.Command.2d.Line_Arguments.asp

    # double up slashes, but only if they are followed by a quote character
    arg = re.sub(_escape_shell_arg_re, r'\1\1\2', arg)

    # surround by quotes and escape quotes inside
    arg = '"' + string.replace(arg, '"', '"^""') + '"'
    return arg


  def argv_to_command_string(argv):
    """Flatten a list of command line arguments into a command string.

    The resulting command string is expected to be passed to the system
    shell which os functions like popen() and system() invoke internally.
    """

    # According cmd's usage notes (cmd /?), it parses the command line by
    # "seeing if the first character is a quote character and if so, stripping
    # the leading character and removing the last quote character."
    # So to prevent the argument string from being changed we add an extra set
    # of quotes around it here.
    return '"' + string.join(map(escape_shell_arg, argv), " ") + '"'

else:
  def escape_shell_arg(str):
    return "'" + string.replace(str, "'", "'\\''") + "'"

  def argv_to_command_string(argv):
    """Flatten a list of command line arguments into a command string.

    The resulting command string is expected to be passed to the system
    shell which os functions like popen() and system() invoke internally.
    """

    return string.join(map(escape_shell_arg, argv), " ")
# ============================================================================

def format_date(date):
  """Return an svn-compatible date string for DATE (seconds since epoch)."""
  # A Subversion date looks like "2002-09-29T14:44:59.000000Z"
  return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date))

def sort_file(infile, outfile):
  # sort the log files

  # GNU sort will sort our dates differently (incorrectly!) if our
  # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
  # it to 'C'
  lc_all_tmp = os.environ.get('LC_ALL', None)
  os.environ['LC_ALL'] = 'C'
  # The -T option to sort has a nice side effect.  The Win32 sort is
  # case insensitive and cannot be used, and since it does not
  # understand the -T option and dies if we try to use it, there is
  # no risk that we use that sort by accident.
  run_command('sort -T %s %s > %s' % (Ctx().tmpdir, infile, outfile))
  if lc_all_tmp is None:
    del os.environ['LC_ALL']
  else:
    os.environ['LC_ALL'] = lc_all_tmp

def match_regexp_list(regexp_list, string):
  """Test whether STRING matches any of the compiled regexps in
  REGEXP_LIST."""
  for regexp in regexp_list:
    if regexp.match(string):
      return True
  return False

class LF_EOL_Filter:
  """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
  into LFs only."""
  def __init__(self, stream):
    self.stream = stream
    self.carry_cr = False
    self.eof = False

  def read(self, size):
    while True:
      buf = self.stream.read(size)
      self.eof = len(buf) == 0
      if self.carry_cr:
        buf = '\r' + buf
        self.carry_cr = False
      if not self.eof and buf[-1] == '\r':
        self.carry_cr = True
        buf = buf[:-1]
      buf = string.replace(buf, '\r\n', '\n')
      buf = string.replace(buf, '\r', '\n')
      if len(buf) > 0 or self.eof:
        return buf


# These constants represent the log levels that this script supports
LOG_WARN = -1
LOG_QUIET = 0
LOG_NORMAL = 1
LOG_VERBOSE = 2
class Log:
  """A Simple logging facility.  Each line will be timestamped is
  self.use_timestamps is TRUE.  This class is a Borg, see
  http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
  __shared_state = {}
  def __init__(self):
    self.__dict__ = self.__shared_state
    if self.__dict__:
      return
    self.log_level = LOG_NORMAL
    # Set this to true if you want to see timestamps on each line output.
    self.use_timestamps = None
    self.logger = sys.stdout

  def _timestamp(self):
    """Output a detailed timestamp at the beginning of each line output."""
    self.logger.write(time.strftime('[%Y-%m-%d %I:%m:%S %Z] - '))

  def write(self, log_level, *args):
    """This is the public method to use for writing to a file.  Only
    messages whose LOG_LEVEL is <= self.log_level will be printed.  If
    there are multiple ARGS, they will be separated by a space."""
    if log_level > self.log_level:
      return
    if self.use_timestamps:
      self._timestamp()
    self.logger.write(' '.join(map(str,args)) + "\n")
    # Ensure that log output doesn't get out-of-order with respect to
    # stderr output.
    self.logger.flush()


class Cleanup:
  """This singleton class manages any files created by cvs2svn.  When
  you first create a file, call Cleanup.register, passing the
  filename, and the last pass that you need the file.  After the end
  of that pass, your file will be cleaned up after running an optional
  callback.  This class is a Borg, see
  http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""

  __shared_state = {}
  def __init__(self):
    self.__dict__ = self.__shared_state
    if self.__dict__:
      return
    self._log = {}
    self._callbacks = {}

  def register(self, file, which_pass, callback=None):
    """Register FILE for cleanup at the end of WHICH_PASS, running
    function CALLBACK prior to removal.  Registering a given FILE is
    idempotent; you may register as many times as you wish, but it
    will only be cleaned up once.

    Note that if a file is registered multiple times, only the first
    callback registered for that file will be called at cleanup
    time.  Also note that if you register a database file you must
    close the database before cleanup, e.g. using a callback."""
    self._log.setdefault(which_pass, {})[file] = 1
    if callback and not self._callbacks.has_key(file):
      self._callbacks[file] = callback

  def cleanup(self, which_pass):
    """Clean up all files, and invoke callbacks, for pass WHICH_PASS."""
    if not self._log.has_key(which_pass):
      return
    for file in self._log[which_pass]:
      Log().write(LOG_VERBOSE, "Deleting", file)
      if self._callbacks.has_key(file):
        self._callbacks[file]()
      os.unlink(file)


# Always use these constants for opening databases.
DB_OPEN_READ = 'r'
DB_OPEN_NEW = 'n'


class AbstractDatabase(UserDict.DictMixin):
  """An abstract base class for anydbm-based databases."""

  def __init__(self, filename, mode):
    """A convenience function for opening an anydbm database."""
    # pybsddb3 has a bug which prevents it from working with
    # Berkeley DB 4.2 if you open the db with 'n' ("new").  This
    # causes the DB_TRUNCATE flag to be passed, which is disallowed
    # for databases protected by lock and transaction support
    # (bsddb databases use locking from bsddb version 4.2.4 onwards).
    #
    # Therefore, manually perform the removal (we can do this, because
    # we know that for bsddb - but *not* anydbm in general - the database
    # consists of one file with the name we specify, rather than several
    # based on that name).
    if mode == 'n' and anydbm._defaultmod.__name__ == 'dbhash':
      if os.path.isfile(filename):
        os.unlink(filename)
      mode = 'c'

    self.db = anydbm.open(filename, mode)

    # Import implementations for many mapping interface methods.
    # Note that we specifically do not do this for any method which handles
    # *values*, because our derived classes may define __getitem__ and
    # __setitem__ to override the storage of values, and grabbing methods
    # directly from the dbm object would bypass this.
    for meth_name in ('__delitem__', 'keys',
        '__iter__', 'has_key', '__contains__', 'iterkeys', 'clear'):
      meth_ref = getattr(self.db, meth_name, None)
      if meth_ref:
        setattr(self, meth_name, meth_ref)

  def __delitem__(self, key):
    "gdbm does not define a __delitem__ we can assign."
    del self.db[key]


class SDatabase(AbstractDatabase):
  """A database that can only store strings."""

  def __getitem__(self, key):
    return self.db[key]

  def __setitem__(self, key, value):
    self.db[key] = value


class Database(AbstractDatabase):
  """A database that uses the marshal module to store built-in types."""

  def __getitem__(self, key):
    return marshal.loads(self.db[key])

  def __setitem__(self, key, value):
    self.db[key] = marshal.dumps(value)


class StatsKeeper:
  __shared_state = { }
  def __init__(self):
    self.__dict__ = self.__shared_state
    if self.__dict__:
      return
    self.filename = temp(STATISTICS_FILE)
    Cleanup().register(self.filename, pass8)
    # This can get kinda large, so we don't store it in our data dict.
    self.repos_files = { }

    if os.path.exists(self.filename):
      self.unarchive()
    else:
      self.data = { 'cvs_revs_count' : 0,
                    'tags': { },
                    'branches' : { },
                    'repos_size' : 0,
                    'repos_file_count' : 0,
                    'svn_rev_count' : None,
                    'first_rev_date' : 1L<<32,
                    'last_rev_date' : 0,
                    'pass_timings' : { },
                    'start_time' : 0,
                    'end_time' : 0,
                    }

  def log_duration_for_pass(self, duration, pass_num):
    self.data['pass_timings'][pass_num] = duration

  def set_start_time(self, start):
    self.data['start_time'] = start

  def set_end_time(self, end):
    self.data['end_time'] = end

  def _bump_item(self, key, amount=1):
    self.data[key] = self.data[key] + amount

  def reset_c_rev_info(self):
    self.data['cvs_revs_count'] = 0
    self.data['tags'] = { }
    self.data['branches'] = { }

  def record_c_rev(self, c_rev):
    self._bump_item('cvs_revs_count')

    for tag in c_rev.tags:
      self.data['tags'][tag] = None
    for branch in c_rev.branches:
      self.data['branches'][branch] = None

    if c_rev.timestamp < self.data['first_rev_date']:
      self.data['first_rev_date'] = c_rev.timestamp

    if c_rev.timestamp > self.data['last_rev_date']:
      self.data['last_rev_date'] = c_rev.timestamp

    # Only add the size if this is the first time we see the file.
    if not self.repos_files.has_key(c_rev.fname):
      self._bump_item('repos_size', c_rev.file_size)
    self.repos_files[c_rev.fname] = None

    self.data['repos_file_count'] = len(self.repos_files)

  def set_svn_rev_count(self, count):
    self.data['svn_rev_count'] = count

  def svn_rev_count(self):
    return self.data['svn_rev_count']

  def archive(self):
    open(self.filename, 'w').write(marshal.dumps(self.data))

  def unarchive(self):
    self.data = marshal.loads(open(self.filename, 'r').read())

  def __str__(self):
    svn_revs_str = ""
    if self.data['svn_rev_count'] is not None:
      svn_revs_str = ('Total SVN Commits:      %10s\n'
                      % self.data['svn_rev_count'])

    return ('\n'                                \
            'cvs2svn Statistics:\n'             \
            '------------------\n'              \
            'Total CVS Files:        %10i\n'    \
            'Total CVS Revisions:    %10i\n'    \
            'Total Unique Tags:      %10i\n'    \
            'Total Unique Branches:  %10i\n'    \
            'CVS Repos Size in KB:   %10i\n'    \
            '%s'                                \
            'First Revision Date:    %s\n'      \
            'Last Revision Date:     %s\n'      \
            '------------------'                \
            % (self.data['repos_file_count'],
               self.data['cvs_revs_count'],
               len(self.data['tags']),
               len(self.data['branches']),
               (self.data['repos_size'] / 1024),
               svn_revs_str,
               time.ctime(self.data['first_rev_date']),
               time.ctime(self.data['last_rev_date']),
               ))

  def timings(self):
    passes = self.data['pass_timings'].keys()
    passes.sort()
    str = 'Timings:\n------------------\n'

    def desc(val):
      if val == 1: return "second"
      return "seconds"

    for pass_num in passes:
      duration = int(self.data['pass_timings'][pass_num])
      p_str = ('pass %d:%6d %s\n'
               % (pass_num, duration, desc(duration)))
      str = str + p_str

    total = int(self.data['end_time'] - self.data['start_time'])
    str = str + ('total: %6d %s' % (total, desc(total)))
    return str


class LastSymbolicNameDatabase:
  """ Passing every CVSRevision in s-revs to this class will result in
  a Database whose key is the last CVS Revision a symbolicname was
  seen in, and whose value is a list of all symbolicnames that were
  last seen in that revision."""
  def __init__(self, mode):
    self.symbols = {}
    self.symbol_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB), mode)
    Cleanup().register(temp(SYMBOL_LAST_CVS_REVS_DB), pass5)

  # Once we've gone through all the revs,
  # symbols.keys() will be a list of all tags and branches, and
  # their corresponding values will be a key into the last CVS revision
  # that they were used in.
  def log_revision(self, c_rev):
    # Gather last CVS Revision for symbolic name info and tag info
    for tag in c_rev.tags:
      self.symbols[tag] = c_rev.unique_key()
    if c_rev.op is not OP_DELETE:
      for branch in c_rev.branches:
        self.symbols[branch] = c_rev.unique_key()

  # Creates an inversion of symbols above--a dictionary of lists (key
  # = CVS rev unique_key: val = list of symbols that close in that
  # rev.
  def create_database(self):
    for sym, rev_unique_key in self.symbols.items():
      ary = self.symbol_revs_db.get(rev_unique_key, [])
      ary.append(sym)
      self.symbol_revs_db[rev_unique_key] = ary


class CVSRevisionDatabase:
  """A Database to store CVSRevision objects and retrieve them by their
  unique_key()."""

  def __init__(self, mode):
    """Initialize an instance, opening database in MODE (like the MODE
    argument to Database or anydbm.open())."""
    self.cvs_revs_db = SDatabase(temp(CVS_REVS_DB), mode)
    Cleanup().register(temp(CVS_REVS_DB), pass8)

  def log_revision(self, c_rev):
    """Add C_REV, a CVSRevision, to the database."""
    self.cvs_revs_db[c_rev.unique_key()] = str(c_rev)

  def get_revision(self, unique_key):
    """Return the CVSRevision stored under UNIQUE_KEY."""
    return CVSRevision(Ctx(), self.cvs_revs_db[unique_key])


def TagsDatabase(mode):
  """A Database to store which symbolic names are tags.
  Each key is a tag name.
  The value has no meaning, and should be set to None."""
  db = SDatabase(temp(TAGS_DB), mode)
  Cleanup().register(temp(TAGS_DB), pass8)
  return db


class Project:
  """A project within a CVS repository."""

  def __init__(self, project_cvs_repos_path,
               trunk_path, branches_path, tags_path):
    """Create a new Project record.

    PROJECT_CVS_REPOS_PATH is the main CVS directory for this project
    (within the filesystem).  TRUNK_PATH, BRANCHES_PATH, and TAGS_PATH
    are the full, normalized directory names in svn for the
    corresponding part of the repository."""

    self.project_cvs_repos_path = project_cvs_repos_path
    prefix = Ctx().cvs_repository.cvs_repos_path
    if not self.project_cvs_repos_path.startswith(prefix):
      raise FatalError("Project '%s' must start with '%s'"
                       % (self.project_cvs_repos_path, prefix,))
    # The project's main directory as a cvs_path:
    self.project_cvs_path = self.project_cvs_repos_path[len(prefix):]
    if self.project_cvs_path.startswith(os.sep):
      self.project_cvs_path = self.project_cvs_path[1:]
    self.trunk_path = trunk_path
    self.branches_path = branches_path
    self.tags_path = tags_path
    verify_paths_disjoint(self.trunk_path, self.branches_path, self.tags_path)

  def is_source(self, svn_path):
    """Return True iff SVN_PATH is a legitimate source for this project.

    Legitimate paths are self.trunk_path or any directory directly
    under self.branches_path."""

    if svn_path == self.trunk_path:
      return True

    (head, tail,) = _path_split(svn_path)
    if head == self.branches_path:
      return True

    return False

  def is_unremovable(self, svn_path):
    """Return True iff the specified path must not be removed."""

    return svn_path in [self.trunk_path, self.branches_path, self.tags_path]

  def get_branch_path(self, branch_name):
    """Return the svnpath for the branch named BRANCH_NAME."""

    return _path_join(self.branches_path, _clean_symbolic_name(branch_name))

  def get_tag_path(self, tag_name):
    """Return the svnpath for the tag named TAG_NAME."""

    return _path_join(self.tags_path, _clean_symbolic_name(tag_name))

  def _relative_name(self, cvs_path):
    """Convert CVS_PATH into a name relative to this project's root directory.

    CVS_PATH has to begin (textually) with self.project_cvs_path.
    Remove prefix and optional '/'."""

    if not cvs_path.startswith(self.project_cvs_path):
      raise FatalError(
          "_relative_name: '%s' is not a sub-path of '%s'"
          % (cvs_path, self.project_cvs_path,))
    l = len(self.project_cvs_path)
    if cvs_path[l] == os.sep:
      l += 1
    return cvs_path[l:]

  def make_trunk_path(self, cvs_path):
    """Return the trunk path for CVS_PATH.

    Return the svn path for this file on trunk."""

    return _path_join(self.trunk_path, self._relative_name(cvs_path))

  def make_branch_path(self, branch_name, cvs_path):
    """Return the svn path for CVS_PATH on branch BRANCH_NAME."""

    return _path_join(self.get_branch_path(branch_name),
                      self._relative_name(cvs_path))


class CVSRevision:
  def __init__(self, ctx, *args):
    """Initialize a new CVSRevision with Ctx object CTX, and ARGS.

    If CTX is None, the following members and methods of the
    instantiated CVSRevision class object will be unavailable (or
    simply will not work correctly, if at all):
       cvs_path
       svn_path
       is_default_branch_revision()

    (Note that this class treats CTX as const, because the caller
    likely passed in a Borg instance of a Ctx.  The reason this class
    takes CTX as as a parameter, instead of just instantiating a Ctx
    itself, is that this class should be usable outside cvs2svn.)

    If there is one argument in ARGS, it is a string, in the format of
    a line from a revs file.  Do *not* include a trailing newline.

    If there are multiple ARGS, there must be 17 of them,
    comprising a parsed revs line:
       timestamp       -->  (int) date stamp for this cvs revision
       digest          -->  (string) digest of author+logmsg
       prev_timestamp  -->  (int) date stamp for the previous cvs revision
       next_timestamp  -->  (int) date stamp for the next cvs revision
       op              -->  (char) OP_ADD, OP_CHANGE, or OP_DELETE
       prev_rev        -->  (string or None) previous CVS rev, e.g., "1.2"
       rev             -->  (string) this CVS rev, e.g., "1.3"
       next_rev        -->  (string or None) next CVS rev, e.g., "1.4"
       file_in_attic   -->  (char or None) true if RCS file is in Attic
       file_executable -->  (char or None) true if RCS file has exec bit set.
       file_size       -->  (int) size of the RCS file
       deltatext_code  -->  (char) 'N' if non-empty deltatext, else 'E'
       fname           -->  (string) relative path of file in CVS repos
       mode            -->  (string or None) "kkv", "kb", etc.
       branch_name     -->  (string or None) branch on which this rev occurred
       tags            -->  (list of strings) all tags on this revision
       branches        -->  (list of strings) all branches rooted in this rev

    The two forms of initialization are equivalent.

    WARNING: Due to the resync process in pass2, prev_timestamp or
    next_timestamp may be incorrect in the c-revs or s-revs files."""

    self._ctx = ctx
    if len(args) == 17:
      (self.timestamp, self.digest, self.prev_timestamp, self.next_timestamp,
       self.op, self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
       self.file_executable, self.file_size, self.deltatext_code,
       self.fname,
       self.mode, self.branch_name, self.tags, self.branches) = args
    elif len(args) == 1:
      data = args[0].split(' ', 15)
      (self.timestamp, self.digest, self.prev_timestamp, self.next_timestamp,
       self.op, self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
       self.file_executable, self.file_size, self.deltatext_code,
       self.mode, self.branch_name, numtags, remainder) = data
      # Patch up data items which are not simple strings
      self.timestamp = int(self.timestamp, 16)
      if self.prev_timestamp == "*":
        self.prev_timestamp = 0
      else:
        self.prev_timestamp = int(self.prev_timestamp)
      if self.next_timestamp == "*":
        self.next_timestamp = 0
      else:
        self.next_timestamp = int(self.next_timestamp)
      if self.prev_rev == "*":
        self.prev_rev = None
      if self.next_rev == "*":
        self.next_rev = None
      if self.file_in_attic == "*":
        self.file_in_attic = None
      if self.file_executable == "*":
        self.file_executable = None
      self.file_size = int(self.file_size)
      if self.mode == "*":
        self.mode = None
      if self.branch_name == "*":
        self.branch_name = None
      numtags = int(numtags)
      tags_and_numbranches_and_remainder = remainder.split(' ', numtags + 1)
      self.tags = tags_and_numbranches_and_remainder[:-2]
      numbranches = int(tags_and_numbranches_and_remainder[-2])
      remainder = tags_and_numbranches_and_remainder[-1]
      branches_and_fname = remainder.split(' ', numbranches)
      self.branches = branches_and_fname[:-1]
      self.fname = branches_and_fname[-1]
    else:
      raise TypeError, 'CVSRevision() takes 2 or 18 arguments (%d given)' % \
          (len(args) + 1)
    if ctx is not None:
      self.cvs_path = ctx.cvs_repository.get_cvs_path(self.fname)
      if self.branch_name:
        self.svn_path = ctx.project.make_branch_path(self.branch_name,
                                                     self.cvs_path)
      else:
        self.svn_path = ctx.project.make_trunk_path(self.cvs_path)

  # The 'primary key' of a CVS Revision is the revision number + the
  # filename.  To provide a unique key (say, for a dict), we just glom
  # them together in a string.  By passing in self.prev_rev or
  # self.next_rev, you can get the unique key for their respective
  # CVSRevisions.
  def unique_key(self, revnum="0"):
    if revnum is "0":
      revnum = self.rev
    elif revnum is None:
      return None
    return revnum + "/" + self.fname

  def __str__(self):
    return ('%08lx %s %s %s %s %s %s %s %s %s %d %s %s %s %d%s%s %d%s%s %s'
            % (self.timestamp, self.digest, self.prev_timestamp or "*",
              self.next_timestamp or "*", self.op, (self.prev_rev or "*"),
              self.rev, (self.next_rev or "*"), (self.file_in_attic or "*"),
              (self.file_executable or "*"),
              self.file_size,
              self.deltatext_code, (self.mode or "*"),
              (self.branch_name or "*"),
              len(self.tags), self.tags and " " or "", " ".join(self.tags),
              len(self.branches), self.branches and " " or "",
              " ".join(self.branches),
              self.fname, ))

  # Returns true if this CVSRevision is the opening CVSRevision for
  # NAME (for this RCS file).
  def opens_symbolic_name(self, name):
    if name in self.tags:
      return 1
    if name in self.branches:
      # If this c_rev opens a branch and our op is OP_DELETE, then
      # that means that the file that this c_rev belongs to was
      # created on the branch, so for all intents and purposes, this
      # c_rev is *technically* not an opening.  See Issue #62 for more
      # information.
      if self.op != OP_DELETE:
        return 1
    return 0

  def is_default_branch_revision(self):
    """Return 1 if SELF.rev of SELF.cvs_path is a default branch
    revision according to DEFAULT_BRANCHES_DB (see the conditions
    documented there), else return None."""
    val = self._ctx._default_branches_db.get(self.cvs_path, None)
    if val is not None:
      val_last_dot = val.rindex(".")
      our_last_dot = self.rev.rindex(".")
      default_branch = val[:val_last_dot]
      our_branch = self.rev[:our_last_dot]
      default_rev_component = int(val[val_last_dot + 1:])
      our_rev_component = int(self.rev[our_last_dot + 1:])
      if (default_branch == our_branch
          and our_rev_component <= default_rev_component):
        return 1
    # else
    return None

  def rcs_path(self):
    """Returns the actual filesystem path to the RCS file of this
    CVSRevision."""
    if self.file_in_attic is None:
      return self.fname
    else:
      basepath, filename = os.path.split(self.fname)
      return os.path.join(basepath, 'Attic', filename)

  def filename(self):
    "Return the last path component of self.fname, minus the ',v'"
    return os.path.split(self.fname)[-1][:-2]

class SymbolDatabase:
  """This database records information on all symbols in the RCS
  files.  It is created in pass 1 and it is used in pass 2."""
  def __init__(self):
    # A hash that maps tag names to commit counts
    self.tags = { }
    # A hash that maps branch names to lists of the format
    # [ create_count, commit_count, blockers ], where blockers
    # is a hash that lists the symbols that depend on the
    # the branch.  The blockers hash is used as a set, so the
    # values are not used.
    self.branches = { }

  def register_tag_creation(self, name):
    """Register the creation of the tag NAME."""
    self.tags[name] = self.tags.get(name, 0) + 1

  def _branch(self, name):
    """Helper function to get a branch node that will create and
    initialize the node if it does not exist."""
    if not self.branches.has_key(name):
      self.branches[name] = [ 0, 0, { } ]
    return self.branches[name]

  def register_branch_creation(self, name):
    """Register the creation of the branch NAME."""
    self._branch(name)[0] += 1

  def register_branch_commit(self, name):
    """Register a commit on the branch NAME."""
    self._branch(name)[1] += 1

  def register_branch_blocker(self, name, blocker):
    """Register BLOCKER as a blocker on the branch NAME."""
    self._branch(name)[2][blocker] = None

  def branch_has_commit(self, name):
    """Return non-zero if NAME has commits.  Returns 0 if name
    is not a branch or if it has no commits."""
    return self.branches.has_key(name) and self.branches[name][1]

  def find_excluded_symbols(self, regexp_list):
    """Returns a hash of all symbols that match the regexps in
    REGEXP_LIST.  The hash is used as a set so the values are
    not used."""
    excludes = { }
    for tag in self.tags:
      if match_regexp_list(regexp_list, tag):
        excludes[tag] = None
    for branch in self.branches:
      if match_regexp_list(regexp_list, branch):
        excludes[branch] = None
    return excludes

  def find_branch_exclude_blockers(self, branch, excludes):
    """Find all blockers of BRANCH, excluding the ones in the hash
    EXCLUDES."""
    blockers = { }
    if excludes.has_key(branch):
      for blocker in self.branches[branch][2]:
        if not excludes.has_key(blocker):
          blockers[blocker] = None
    return blockers

  def find_blocked_excludes(self, excludes):
    """Find all branches not in EXCLUDES that have blocking symbols that
    are not themselves excluded.  Return a hash that maps branch names
    to a hash of blockers.  The hash of blockes is used as a set so the
    values are not used."""
    blocked_branches = { }
    for branch in self.branches:
      blockers = self.find_branch_exclude_blockers(branch, excludes)
      if blockers:
        blocked_branches[branch] = blockers
    return blocked_branches

  def find_mismatches(self, excludes=None):
    """Find all symbols that are defined as both tags and branches,
    excluding the ones in EXCLUDES.  Returns a list of 4-tuples with
    the symbol name, tag count, branch count and commit count."""
    if excludes is None:
      excludes = { }
    mismatches = [ ]
    for branch in self.branches:
      if not excludes.has_key(branch) and self.tags.has_key(branch):
        mismatches.append((branch,                    # name
                           self.tags[branch],         # tag count
                           self.branches[branch][0],  # branch count
                           self.branches[branch][1])) # commit count
    return mismatches

  def read(self):
    """Read the symbol database from files."""
    f = open(temp(TAGS_LIST))
    while 1:
      line = f.readline()
      if not line:
        break
      tag, count = line.split()
      self.tags[tag] = int(count)

    f = open(temp(BRANCHES_LIST))
    while 1:
      line = f.readline()
      if not line:
        break
      words = line.split()
      self.branches[words[0]] = [ int(words[1]), int(words[2]), { } ]
      for blocker in words[3:]:
        self.branches[words[0]][2][blocker] = None

  def write(self):
    """Store the symbol database to files."""
    f = open(temp(TAGS_LIST), "w")
    Cleanup().register(temp(TAGS_LIST), pass2)
    for tag, count in self.tags.items():
      f.write("%s %d\n" % (tag, count))

    f = open(temp(BRANCHES_LIST), "w")
    Cleanup().register(temp(BRANCHES_LIST), pass2)
    for branch, info in self.branches.items():
      f.write("%s %d %d" % (branch, info[0], info[1]))
      if info[2]:
        f.write(" ")
        f.write(" ".join(info[2].keys()))
      f.write("\n")

class CollectData(cvs2svn_rcsparse.Sink):
  def __init__(self):
    self.revs = open(temp(DATAFILE + REVS_SUFFIX), 'w')
    Cleanup().register(temp(DATAFILE + REVS_SUFFIX), pass2)
    self.resync = open(temp(DATAFILE + RESYNC_SUFFIX), 'w')
    Cleanup().register(temp(DATAFILE + RESYNC_SUFFIX), pass2)
    self.default_branches_db = SDatabase(temp(DEFAULT_BRANCHES_DB),
                                         DB_OPEN_NEW)
    Cleanup().register(temp(DEFAULT_BRANCHES_DB), pass5)
    self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_NEW)
    Cleanup().register(temp(METADATA_DB), pass8)
    self.fatal_errors = []
    self.num_files = 0
    self.symbol_db = SymbolDatabase()

    # 1 if we've collected data for at least one file, None otherwise.
    self.found_valid_file = None

    # See set_fname() for initializations of other variables.

  def set_fname(self, canonical_name, filename):
    """Prepare to receive data for FILENAME.  FILENAME is the absolute
    filesystem path to the file in question, and CANONICAL_NAME is
    FILENAME with the 'Attic' component removed (if the file is indeed
    in the Attic) ."""
    self.fname = canonical_name

    # We calculate and save some file metadata here, where we can do
    # it only once per file, instead of waiting until later where we
    # would have to do the same calculations once per CVS *revision*.

    self.cvs_path = Ctx().cvs_repository.get_cvs_path(self.fname)

    # If the paths are not the same, then that means that the
    # canonical_name has had the 'Attic' component stripped out.
    self.file_in_attic = None
    if canonical_name != filename:
      self.file_in_attic = 1

    file_stat = os.stat(filename)
    # The size of our file in bytes
    self.file_size = file_stat[stat.ST_SIZE]

    # Whether or not the executable bit is set.
    self.file_executable = None
    if file_stat[0] & stat.S_IXUSR:
      self.file_executable = 1

    # revision -> [timestamp, author, old-timestamp]
    self.rev_data = { }

    # Maps revision number (key) to the revision number of the
    # previous revision along this line of development.
    #
    # For the first revision R on a branch, we consider the revision
    # from which R sprouted to be the 'previous'.
    #
    # Note that this revision can't be determined arithmetically (due
    # to cvsadmin -o, which is why this is necessary).
    #
    # If the key has no previous revision, then store None as key's
    # value.
    self.prev_rev = { }

    # This dict is essentially self.prev_rev with the values mapped in
    # the other direction, so following key -> value will yield you
    # the next revision number.
    #
    # Unlike self.prev_rev, if the key has no next revision, then the
    # key is not present.
    self.next_rev = { }

    # Track the state of each revision so that in set_revision_info,
    # we can determine if our op is an add/change/delete.  We can do
    # this because in set_revision_info, we'll have all of the
    # revisions for a file at our fingertips, and we need to examine
    # the state of our prev_rev to determine if we're an add or a
    # change--without the state of the prev_rev, we are unable to
    # distinguish between an add and a change.
    self.rev_state = { }

    # Hash mapping branch numbers, like '1.7.2', to branch names,
    # like 'Release_1_0_dev'.
    self.branch_names = { }

    # RCS flags (used for keyword expansion).
    self.mode = None

    # Hash mapping revision numbers, like '1.7', to lists of names
    # indicating which branches sprout from that revision, like
    # ['Release_1_0_dev', 'experimental_driver', ...].
    self.branchlist = { }

    # Like self.branchlist, but the values are lists of tag names that
    # apply to the key revision.
    self.taglist = { }

    # If set, this is an RCS branch number -- rcsparse calls this the
    # "principal branch", but CVS and RCS refer to it as the "default
    # branch", so that's what we call it, even though the rcsparse API
    # setter method is still 'set_principal_branch'.
    self.default_branch = None

    # If the RCS file doesn't have a default branch anymore, but does
    # have vendor revisions, then we make an educated guess that those
    # revisions *were* the head of the default branch up until the
    # commit of 1.2, at which point the file's default branch became
    # trunk.  This records the date at which 1.2 was committed.
    self.first_non_vendor_revision_date = None

    # A list of all symbols defined for the current file.  Used to
    # prevent multiple definitions of a symbol, something which can
    # easily happen when --symbol-transform is used.
    self.defined_symbols = { }

  def set_principal_branch(self, branch):
    self.default_branch = branch

  def set_expansion(self, mode):
    self.mode = mode

  def set_branch_name(self, branch_number, name):
    """Record that BRANCH_NUMBER is the branch number for branch NAME,
    and that NAME sprouts from BRANCH_NUMBER .
    BRANCH_NUMBER is an RCS branch number with an odd number of components,
    for example '1.7.2' (never '1.7.0.2')."""
    if not self.branch_names.has_key(branch_number):
      self.branch_names[branch_number] = name
      # The branchlist is keyed on the revision number from which the
      # branch sprouts, so strip off the odd final component.
      sprout_rev = branch_number[:branch_number.rfind(".")]
      self.branchlist.setdefault(sprout_rev, []).append(name)
      self.symbol_db.register_branch_creation(name)
    else:
      sys.stderr.write("%s: in '%s':\n"
                       "   branch '%s' already has name '%s',\n"
                       "   cannot also have name '%s', ignoring the latter\n"
                       % (warning_prefix, self.fname, branch_number,
                          self.branch_names[branch_number], name))

  def rev_to_branch_name(self, revision):
    """Return the name of the branch on which REVISION lies.
    REVISION is a non-branch revision number with an even number of,
    components, for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
    For the convenience of callers, REVISION can also be a trunk
    revision such as '1.2', in which case just return None."""
    if trunk_rev.match(revision):
      return None
    return self.branch_names.get(revision[:revision.rindex(".")])

  def define_tag(self, name, revision):
    """Record a bidirectional mapping between symbolic NAME and REVISION.
    REVISION is an unprocessed revision number from the RCS file's
    header, for example: '1.7', '1.7.0.2', or '1.1.1' or '1.1.1.1'.
    This function will determine what kind of symbolic name it is by
    inspection, and record it in the right places."""
    for (pattern, replacement) in Ctx().symbol_transforms:
      newname = pattern.sub(replacement, name)
      if newname != name:
        Log().write(LOG_WARN, "   symbol '%s' transformed to '%s'"
                    % (name, newname))
        name = newname
    if self.defined_symbols.has_key(name):
      err = "%s: Multiple definitions of the symbol '%s' in '%s'" \
                % (error_prefix, name, self.fname)
      sys.stderr.write(err + "\n")
      self.fatal_errors.append(err)
    self.defined_symbols[name] = None
    m = cvs_branch_tag.match(revision)
    if m:
      self.set_branch_name(m.group(1) + m.group(2), name)
    elif rcs_branch_tag.match(revision):
      self.set_branch_name(revision, name)
    else:
      self.taglist.setdefault(revision, []).append(name)
      self.symbol_db.register_tag_creation(name)

  def define_revision(self, revision, timestamp, author, state,
                      branches, next):

    # Record the state of our revision for later calculations
    self.rev_state[revision] = state

    # store the rev_data as a list in case we have to jigger the timestamp
    self.rev_data[revision] = [int(timestamp), author, None]

    # When on trunk, the RCS 'next' revision number points to what
    # humans might consider to be the 'previous' revision number.  For
    # example, 1.3's RCS 'next' is 1.2.
    #
    # However, on a branch, the RCS 'next' revision number really does
    # point to what humans would consider to be the 'next' revision
    # number.  For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
    #
    # In other words, in RCS, 'next' always means "where to find the next
    # deltatext that you need this revision to retrieve.
    #
    # That said, we don't *want* RCS's behavior here, so we determine
    # whether we're on trunk or a branch and set self.prev_rev
    # accordingly.
    #
    # One last thing.  Note that if REVISION is a branch revision,
    # instead of mapping REVISION to NEXT, we instead map NEXT to
    # REVISION.  Since we loop over all revisions in the file before
    # doing anything with the data we gather here, this 'reverse
    # assignment' effectively does the following:
    #
    # 1. Gives us no 'prev' value for REVISION (in this
    # iteration... it may have been set in a previous iteration)
    #
    # 2. Sets the 'prev' value for the revision with number NEXT to
    # REVISION.  So when we come around to the branch revision whose
    # revision value is NEXT, its 'prev' and 'prev_rev' are already
    # set.
    if trunk_rev.match(revision):
      self.prev_rev[revision] = next
      self.next_rev[next] = revision
    elif next:
      self.prev_rev[next] = revision
      self.next_rev[revision] = next

    for b in branches:
      self.prev_rev[b] = revision

    # Ratchet up the highest vendor head revision, if necessary.
    if self.default_branch:
      default_branch_root = self.default_branch + "."
      if ((revision.find(default_branch_root) == 0)
          and (default_branch_root.count('.') == revision.count('.'))):
        # This revision is on the default branch, so record that it is
        # the new highest default branch head revision.
        self.default_branches_db[self.cvs_path] = revision
    else:
      # No default branch, so make an educated guess.
      if revision == '1.2':
        # This is probably the time when the file stopped having a
        # default branch, so make a note of it.
        self.first_non_vendor_revision_date = timestamp
      else:
        m = vendor_revision.match(revision)
        if m and ((not self.first_non_vendor_revision_date)
                  or (timestamp < self.first_non_vendor_revision_date)):
          # We're looking at a vendor revision, and it wasn't
          # committed after this file lost its default branch, so bump
          # the maximum trunk vendor revision in the permanent record.
          self.default_branches_db[self.cvs_path] = revision

    if not trunk_rev.match(revision):
      # Check for unlabeled branches, record them.  We tried to collect
      # all branch names when we parsed the symbolic name header
      # earlier, of course, but that didn't catch unlabeled branches.
      # If a branch is unlabeled, this is our first encounter with it,
      # so we have to record its data now.
      branch_number = revision[:revision.rindex(".")]
      if not self.branch_names.has_key(branch_number):
        branch_name = "unlabeled-" + branch_number
        self.set_branch_name(branch_number, branch_name)

      # Register the commit on this non-trunk branch
      branch_name = self.branch_names[branch_number]
      self.symbol_db.register_branch_commit(branch_name)

  def tree_completed(self):
    "The revision tree has been parsed.  Analyze it for consistency."

    # Our algorithm depends upon the timestamps on the revisions occuring
    # monotonically over time.  That is, we want to see rev 1.34 occur in
    # time before rev 1.35.  If we inserted 1.35 *first* (due to the time-
    # sorting), and then tried to insert 1.34, we'd be screwed.

    # to perform the analysis, we'll simply visit all of the 'previous'
    # links that we have recorded and validate that the timestamp on the
    # previous revision is before the specified revision

    # if we have to resync some nodes, then we restart the scan. just keep
    # looping as long as we need to restart.
    while 1:
      for current, prev in self.prev_rev.items():
        if not prev:
          # no previous revision exists (i.e. the initial revision)
          continue
        t_c = self.rev_data[current][0]
        t_p = self.rev_data[prev][0]
        if t_p >= t_c:
          # the previous revision occurred later than the current revision.
          # shove the previous revision back in time (and any before it that
          # may need to shift).

          # We sync backwards and not forwards because any given CVS
          # Revision has only one previous revision.  However, a CVS
          # Revision can *be* a previous revision for many other
          # revisions (e.g., a revision that is the source of multiple
          # branches).  This becomes relevant when we do the secondary
          # synchronization in pass 2--we can make certain that we
          # don't resync a revision earlier than it's previous
          # revision, but it would be non-trivial to make sure that we
          # don't resync revision R *after* any revisions that have R
          # as a previous revision.
          while t_p >= t_c:
            self.rev_data[prev][0] = t_c - 1	# new timestamp
            self.rev_data[prev][2] = t_p	# old timestamp
            delta = t_c - 1 - t_p
            msg =  "PASS1 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
                  % (self.cvs_path, prev, time.ctime(t_p), delta)
            Log().write(LOG_VERBOSE, msg)
            if (delta > COMMIT_THRESHOLD
                or delta < (COMMIT_THRESHOLD * -1)):
              str = "%s: Significant timestamp change for '%s' (%d seconds)"
              Log().write(LOG_WARN,
                          str % (warning_prefix, self.cvs_path, delta))
            current = prev
            prev = self.prev_rev[current]
            if not prev:
              break
            t_c = t_c - 1		# self.rev_data[current][0]
            t_p = self.rev_data[prev][0]

          # break from the for-loop
          break
      else:
        # finished the for-loop (no resyncing was performed)
        return

  def set_revision_info(self, revision, log, text):
    timestamp, author, old_ts = self.rev_data[revision]
    digest = sha.new(log + '\0' + author).hexdigest()
    if old_ts:
      # the timestamp on this revision was changed. log it for later
      # resynchronization of other files's revisions that occurred
      # for this time and log message.
      self.resync.write('%08lx %s %08lx\n' % (old_ts, digest, timestamp))

    # "...Give back one kadam to honor the Hebrew God whose Ark this is."
    #       -- Imam to Indy and Sallah, in 'Raiders of the Lost Ark'
    #
    # If revision 1.1 appears to have been created via 'cvs add'
    # instead of 'cvs import', then this file probably never had a
    # default branch, so retroactively remove its record in the
    # default branches db.  The test is that the log message CVS uses
    # for 1.1 in imports is "Initial revision\n" with no period.
    if revision == '1.1' and log != 'Initial revision\n':
      try:
        del self.default_branches_db[self.cvs_path]
      except KeyError:
        pass

    # Get the timestamps of the previous and next revisions
    prev_rev = self.prev_rev[revision]
    prev_timestamp, ign, ign = self.rev_data.get(prev_rev, [0, None, None])

    next_rev = self.next_rev.get(revision)
    next_timestamp, ign, ign = self.rev_data.get(next_rev, [0, None, None])

    # How to tell if a CVSRevision is an add, a change, or a deletion:
    #
    # It's a delete if RCS state is 'dead'
    #
    # It's an add if RCS state is 'Exp.' and
    #      - we either have no previous revision
    #        or
    #      - we have a previous revision whose state is 'dead'
    #
    # Anything else is a change.
    if self.rev_state[revision] == 'dead':
      op = OP_DELETE
    elif ((self.prev_rev.get(revision, None) is None)
          or (self.rev_state[self.prev_rev[revision]] == 'dead')):
      op = OP_ADD
    else:
      op = OP_CHANGE

    def is_branch_revision(rev):
      """Return True if this revision is not a trunk revision,
      else return False."""
      if rev.count('.') >= 3:
        return True
      return False

    def is_same_line_of_development(rev1, rev2):
      """Return True if rev1 and rev2 are on the same line of
      development (i.e., both on trunk, or both on the same branch);
      return False otherwise.  Either rev1 or rev2 can be None, in
      which case automatically return False."""
      if rev1 is None or rev2 is None:
        return False
      if rev1.count('.') == 1 and rev2.count('.') == 1:
        return True
      if rev1[0:rev1.rfind('.')] == rev2[0:rev2.rfind('.')]:
        return True
      return False

    # There can be an odd situation where the tip revision of a branch
    # is alive, but every predecessor on the branch is in state 'dead', 
    # yet the revision from which the branch sprouts is alive.  (This
    # is sort of a mirror image of the more common case of adding a
    # file on a branch, in which the first revision on the branch is
    # alive while the revision from which it sprouts is dead.)
    #
    # In this odd situation, we must mark the first live revision on
    # the branch as an OP_CHANGE instead of an OP_ADD, because it
    # reflects, however indirectly, a change w.r.t. the source
    # revision from which the branch sprouts.
    #
    # This is issue #89.
    cur_num = revision
    if is_branch_revision(revision) and self.rev_state[revision] != 'dead':
      while 1:
        prev_num = self.prev_rev.get(cur_num, None)
        if not cur_num or not prev_num:
          break
        if (not is_same_line_of_development(cur_num, prev_num)
            and self.rev_state[cur_num] == 'dead'
            and self.rev_state[prev_num] != 'dead'):
          op = OP_CHANGE
        cur_num = self.prev_rev.get(cur_num, None)

    if text:
      deltatext_code = DELTATEXT_NONEMPTY
    else:
      deltatext_code = DELTATEXT_EMPTY

    c_rev = CVSRevision(Ctx(), timestamp, digest, prev_timestamp,
                        next_timestamp, op,
                        prev_rev, revision, next_rev,
                        self.file_in_attic, self.file_executable,
                        self.file_size,
                        deltatext_code, self.fname,
                        self.mode, self.rev_to_branch_name(revision),
                        self.taglist.get(revision, []),
                        self.branchlist.get(revision, []))
    self.revs.write(str(c_rev) + "\n")
    StatsKeeper().record_c_rev(c_rev)

    if not self.metadata_db.has_key(digest):
      self.metadata_db[digest] = (author, log)

  def parse_completed(self):
    # Walk through all branches and tags and register them with
    # their parent branch in the symbol database.
    for revision, symbols in self.taglist.items() + self.branchlist.items():
      for symbol in symbols:
        name = self.rev_to_branch_name(revision)
        if name is not None:
          self.symbol_db.register_branch_blocker(name, symbol)

    self.num_files = self.num_files + 1

  def write_symbol_db(self):
    self.symbol_db.write()

class SymbolingsLogger:
  """Manage the file that contains lines for symbol openings and
  closings.

  This data will later be used to determine valid SVNRevision ranges
  from which a file can be copied when creating a branch or tag in
  Subversion.  Do this by finding "Openings" and "Closings" for each
  file copied onto a branch or tag.

  An "Opening" is the CVSRevision from which a given branch/tag
  sprouts on a path.

  The "Closing" for that branch/tag and path is the next CVSRevision
  on the same line of development as the opening.

  For example, on file 'foo.c', branch BEE has branch number 1.2.2 and
  obviously sprouts from revision 1.2.  Therefore, 1.2 is the opening
  for BEE on path 'foo.c', and 1.3 is the closing for BEE on path
  'foo.c'.  Note that there may be many revisions chronologically
  between 1.2 and 1.3, for example, revisions on branches of 'foo.c',
  perhaps even including on branch BEE itself.  But 1.3 is the next
  revision *on the same line* as 1.2, that is why it is the closing
  revision for those symbolic names of which 1.2 is the opening.

  The reason for doing all this hullabaloo is to make branch and tag
  creation as efficient as possible by minimizing the number of copies
  and deletes per creation.  For example, revisions 1.2 and 1.3 of
  foo.c might correspond to revisions 17 and 30 in Subversion.  That
  means that when creating branch BEE, there is some motivation to do
  the copy from one of 17-30.  Now if there were another file,
  'bar.c', whose opening and closing CVSRevisions for BEE corresponded
  to revisions 24 and 39 in Subversion, we would know that the ideal
  thing would be to copy the branch from somewhere between 24 and 29,
  inclusive.
  """
  def __init__(self):
    self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS), 'w')
    Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS), pass6)
    self.closings = open(temp(SYMBOL_CLOSINGS_TMP), 'w')
    Cleanup().register(temp(SYMBOL_CLOSINGS_TMP), pass5)

    # This keys of this dictionary are *source* cvs_paths for which
    # we've encountered an 'opening' on the default branch.  The
    # values are the (uncleaned) symbolic names that this path has
    # opened.
    self.open_paths_with_default_branches = { }

  def log_revision(self, c_rev, svn_revnum):
    """Log any openings found in C_REV, and if C_REV.next_rev is not
    None, a closing.  The opening uses SVN_REVNUM, but the closing (if
    any) will have its revnum determined later."""
    for name in c_rev.tags + c_rev.branches:
      self._note_default_branch_opening(c_rev, name)
      if c_rev.op != OP_DELETE:
        self._log(name, svn_revnum,
                  c_rev.cvs_path, c_rev.branch_name, OPENING)

      # If our c_rev has a next_rev, then that's the closing rev for
      # this source revision.  Log it to closings for later processing
      # since we don't know the svn_revnum yet.
      if c_rev.next_rev is not None:
        self.closings.write('%s %s\n' %
                            (name, c_rev.unique_key(c_rev.next_rev)))

  def _log(self, name, svn_revnum, cvs_path, branch_name, type):
    """Write out a single line to the symbol_openings_closings file
    representing that SVN_REVNUM of SVN_PATH on BRANCH_NAME is either the
    opening or closing (TYPE) of NAME (a symbolic name).

    TYPE should only be one of the following global constants:
    OPENING or CLOSING."""
    # 8 places gives us 999,999,999 SVN revs.  That *should* be enough.
    self.symbolings.write(
        '%s %.8d %s %s %s\n'
        % (name, svn_revnum, type, branch_name or '*', cvs_path))

  def close(self):
    """Iterate through the closings file, lookup the svn_revnum for
    each closing CVSRevision, and write a proper line out to the
    symbolings file."""
    # Use this to get the c_rev of our rev_key
    cvs_revs_db = CVSRevisionDatabase(DB_OPEN_READ)

    self.closings.close()
    for line in fileinput.FileInput(temp(SYMBOL_CLOSINGS_TMP)):
      (name, rev_key) = line.rstrip().split(" ", 1)
      svn_revnum = Ctx()._persistence_manager.get_svn_revnum(rev_key)

      c_rev = cvs_revs_db.get_revision(rev_key)
      self._log(name, svn_revnum, c_rev.cvs_path, c_rev.branch_name, CLOSING)

    self.symbolings.close()

  def _note_default_branch_opening(self, c_rev, symbolic_name):
    """If C_REV is a default branch revision, log C_REV.cvs_path as an
    opening for SYMBOLIC_NAME."""
    self.open_paths_with_default_branches.setdefault(
        c_rev.cvs_path, []).append(symbolic_name)

  def log_default_branch_closing(self, c_rev, svn_revnum):
    """If self.open_paths_with_default_branches contains
    C_REV.cvs_path, then call log each name in
    self.open_paths_with_default_branches[C_REV.cvs_path] as a closing
    with SVN_REVNUM as the closing revision number."""
    path = c_rev.cvs_path
    if self.open_paths_with_default_branches.has_key(path):
      # log each symbol as a closing
      for name in self.open_paths_with_default_branches[path]:
        self._log(name, svn_revnum, path, None, CLOSING)
      # Remove them from the openings list as we're done with them.
      del self.open_paths_with_default_branches[path]


class PersistenceManager:
  """The PersistenceManager allows us to effectively store SVNCommits
  to disk and retrieve them later using only their subversion revision
  number as the key.  It also returns the subversion revision number
  for a given CVSRevision's unique key.

  All information pertinent to each SVNCommit is stored in a series of
  on-disk databases so that SVNCommits can be retrieved on-demand.

  MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
  In 'new' mode, PersistenceManager will initialize a new set of on-disk
  databases and be fully-featured.
  In 'read' mode, PersistenceManager will open existing on-disk databases
  and the set_* methods will be unavailable."""
  def __init__(self, mode):
    self.mode = mode
    if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
      raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
    self.svn2cvs_db = Database(temp(SVN_REVNUMS_TO_CVS_REVS), mode)
    Cleanup().register(temp(SVN_REVNUMS_TO_CVS_REVS), pass8)
    self.cvs2svn_db = Database(temp(CVS_REVS_TO_SVN_REVNUMS), mode)
    Cleanup().register(temp(CVS_REVS_TO_SVN_REVNUMS), pass8)
    self.svn_commit_metadata = Database(temp(METADATA_DB), DB_OPEN_READ)
    self.cvs_revisions = CVSRevisionDatabase(DB_OPEN_READ)
    ###PERF kff Elsewhere there are comments about sucking the tags db
    ### into memory.  That seems like a good idea.
    if not Ctx().trunk_only:
      self.tags_db = TagsDatabase(DB_OPEN_READ)

    # "branch_name" -> svn_revnum in which branch was last filled.
    # This is used by CVSCommit._pre_commit, to prevent creating a fill
    # revision which would have nothing to do.
    self.last_filled = {}

  def get_svn_revnum(self, cvs_rev_unique_key):
    """Return the Subversion revision number in which
    CVS_REV_UNIQUE_KEY was committed, or SVN_INVALID_REVNUM if there
    is no mapping for CVS_REV_UNIQUE_KEY."""
    return int(self.cvs2svn_db.get(cvs_rev_unique_key, SVN_INVALID_REVNUM))

  def get_svn_commit(self, svn_revnum):
    """Return an SVNCommit that corresponds to SVN_REVNUM.

    If no SVNCommit exists for revnum SVN_REVNUM, then return None.

    This method can throw SVNCommitInternalInconsistencyError.
    """
    svn_commit = SVNCommit("Retrieved from disk", svn_revnum)
    (c_rev_keys, motivating_revnum, name, date) = self.svn2cvs_db.get(
        str(svn_revnum), (None, None, None, None))
    if c_rev_keys is None:
      return None

    digest = None
    for key in c_rev_keys:
      c_rev = self.cvs_revisions.get_revision(key)
      svn_commit.add_revision(c_rev)
      # Set the author and log message for this commit by using
      # CVSRevision metadata, but only if haven't done so already.
      if digest is None:
        digest = c_rev.digest
        author, log_msg = self.svn_commit_metadata[digest]
        svn_commit.set_author(author)
        svn_commit.set_log_msg(log_msg)

    svn_commit.set_date(date)

    # If we're doing a trunk-only conversion, we don't need to do any more
    # work.
    if Ctx().trunk_only:
      return svn_commit

    if name:
      if svn_commit.cvs_revs:
        raise SVNCommit.SVNCommitInternalInconsistencyError(
            "An SVNCommit cannot have cvs_revisions *and* a corresponding\n"
            "symbolic name ('%s') to fill."
            % (_clean_symbolic_name(name),))
      svn_commit.set_symbolic_name(name)
      if self.tags_db.has_key(name):
        svn_commit.is_tag = 1

    if motivating_revnum is not None:
      svn_commit.set_motivating_revnum(motivating_revnum)

    return svn_commit

  def put_svn_commit(self, svn_revnum, cvs_revs,
                     date, name, motivating_revnum):
    """Record the bidirectional mapping between SVN_REVNUM and
    CVS_REVS and record associated attributes."""
    if self.mode == DB_OPEN_READ:
      raise RuntimeError, \
          'Write operation attempted on read-only PersistenceManager'

    for c_rev in cvs_revs:
      Log().write(LOG_VERBOSE, " ", c_rev.unique_key())

    self.svn2cvs_db[str(svn_revnum)] = ([x.unique_key() for x in cvs_revs],
                                        motivating_revnum, name, date)

    for c_rev in cvs_revs:
      self.cvs2svn_db[c_rev.unique_key()] = svn_revnum

    # If it is not a primary commit, then record last_filled.  name is
    # allowed to be None.
    if name or motivating_revnum:
      self.last_filled[name] = svn_revnum


class CVSCommit:
  """Each instance of this class contains a number of CVS Revisions
  that correspond to one or more Subversion Commits.  After all CVS
  Revisions are added to the grouping, calling process_revisions will
  generate a Subversion Commit (or Commits) for the set of CVS
  Revisions in the grouping."""

  def __init__(self, digest, author, log):
    self.digest = digest
    self.author = author
    self.log = log

    # Symbolic names for which the last source revision has already
    # been seen and for which the CVSRevisionAggregator has already
    # generated a fill SVNCommit.  See self.process_revisions().
    self.done_symbols = [ ]

    self.files = { }
    # Lists of CVSRevisions
    self.changes = [ ]
    self.deletes = [ ]

    # Start out with a t_min higher than any incoming time T, and a
    # t_max lower than any incoming T.  This way the first T will
    # push t_min down to T, and t_max up to T, naturally (without any
    # special-casing), and successive times will then ratchet them
    # outward as appropriate.
    self.t_min = 1L<<32
    self.t_max = 0

    # This will be set to the SVNCommit that occurs in self._commit.
    self.motivating_commit = None

    # This is a list of all non-primary commits motivated by the main
    # commit.  We gather these so that we can set their dates to the
    # same date as the primary commit.
    self.secondary_commits = [ ]

    # State for handling default branches.
    #
    # Here is a tempting, but ultimately nugatory, bit of logic, which
    # I share with you so you may appreciate the less attractive, but
    # refreshingly non-nugatory, logic which follows it:
    #
    # If some of the commits in this txn happened on a non-trunk
    # default branch, then those files will have to be copied into
    # trunk manually after being changed on the branch (because the
    # RCS "default branch" appears as head, i.e., trunk, in practice).
    # As long as those copies don't overwrite any trunk paths that
    # were also changed in this commit, then we can do the copies in
    # the same revision, because they won't cover changes that don't
    # appear anywhere/anywhen else.  However, if some of the trunk dst
    # paths *did* change in this commit, then immediately copying the
    # branch changes would lose those trunk mods forever.  So in this
    # case, we need to do at least that copy in its own revision.  And
    # for simplicity's sake, if we're creating the new revision for
    # even one file, then we just do all such copies together in the
    # new revision.
    #
    # Doesn't that sound nice?
    #
    # Unfortunately, Subversion doesn't support copies with sources
    # in the current txn.  All copies must be based in committed
    # revisions.  Therefore, we generate the above-described new
    # revision unconditionally.
    #
    # This is a list of c_revs, and a c_rev is appended for each
    # default branch commit that will need to be copied to trunk (or
    # deleted from trunk) in some generated revision following the
    # "regular" revision.
    self.default_branch_cvs_revisions = [ ]

  def __cmp__(self, other):
    # Commits should be sorted by t_max.  If both self and other have
    # the same t_max, break the tie using t_min, and lastly, digest.
    # If all those are equal, then compare based on ids, to ensure
    # that no two instances compare equal.
    return (cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
            or cmp(self.digest, other.digest) or cmp(id(self), id(other)))

  def __hash__(self):
    return id(self)

  def has_file(self, fname):
    return self.files.has_key(fname)

  def revisions(self):
    return self.changes + self.deletes

  def opens_symbolic_name(self, name):
    """Returns true if any CVSRevision in this commit is on a tag or a
    branch or is the origin of a tag or branch."""
    for c_rev in self.revisions():
      if c_rev.opens_symbolic_name(name):
        return 1
    return 0

  def add_revision(self, c_rev):
    # Record the time range of this commit.
    #
    # ### ISSUE: It's possible, though unlikely, that the time range
    # of a commit could get gradually expanded to be arbitrarily
    # longer than COMMIT_THRESHOLD.  I'm not sure this is a huge
    # problem, and anyway deciding where to break it up would be a
    # judgement call.  For now, we just print a warning in commit() if
    # this happens.
    if c_rev.timestamp < self.t_min:
      self.t_min = c_rev.timestamp
    if c_rev.timestamp > self.t_max:
      self.t_max = c_rev.timestamp

    if c_rev.op == OP_DELETE:
      self.deletes.append(c_rev)
    else:
      # OP_CHANGE or OP_ADD
      self.changes.append(c_rev)

    self.files[c_rev.fname] = 1

  def _pre_commit(self):
    """Generates any SVNCommits that must exist before the main
    commit."""

    # There may be multiple c_revs in this commit that would cause
    # branch B to be filled, but we only want to fill B once.  On the
    # other hand, there might be multiple branches committed on in
    # this commit.  Whatever the case, we should count exactly one
    # commit per branch, because we only fill a branch once per
    # CVSCommit.  This list tracks which branches we've already
    # counted.
    accounted_for_sym_names = [ ]

    def fill_needed(c_rev, pm):
      """Return 1 if this is the first commit on a new branch (for
      this file) and we need to fill the branch; else return 0
      (meaning that some other file's first commit on the branch has
      already done the fill for us).

      If C_REV.op is OP_ADD, only return 1 if the branch that this
      commit is on has no last filled revision.

      PM is a PersistenceManager to query.
      """

      # Different '.' counts indicate that c_rev is now on a different
      # line of development (and may need a fill)
      if c_rev.rev.count('.') != c_rev.prev_rev.count('.'):
        svn_revnum = pm.get_svn_revnum(c_rev.unique_key(c_rev.prev_rev))
        # It should be the case that when we have a file F that
        # is added on branch B (thus, F on trunk is in state
        # 'dead'), we generate an SVNCommit to fill B iff the branch
        # has never been filled before.
        #
        # If this c_rev.op == OP_ADD, *and* the branch has never
        # been filled before, then fill it now.  Otherwise, no need to
        # fill it.
        if c_rev.op == OP_ADD:
          if pm.last_filled.get(c_rev.branch_name, None) is None:
            return 1
        elif c_rev.op == OP_CHANGE:
          if svn_revnum > pm.last_filled.get(c_rev.branch_name, 0):
            return 1
        elif c_rev.op == OP_DELETE:
          if pm.last_filled.get(c_rev.branch_name, None) is None:
            return 1
      return 0

    for c_rev in self.changes + self.deletes:
      # If a commit is on a branch, we must ensure that the branch
      # path being committed exists (in HEAD of the Subversion
      # repository).  If it doesn't exist, we will need to fill the
      # branch.  After the fill, the path on which we're committing
      # will exist.
      if c_rev.branch_name \
          and c_rev.branch_name not in accounted_for_sym_names \
          and c_rev.branch_name not in self.done_symbols \
          and fill_needed(c_rev, Ctx()._persistence_manager):
        svn_commit = SVNCommit("pre-commit symbolic name '%s'"
                               % c_rev.branch_name)
        svn_commit.set_symbolic_name(c_rev.branch_name)
        self.secondary_commits.append(svn_commit)
        accounted_for_sym_names.append(c_rev.branch_name)

  def _commit(self):
    """Generates the primary SVNCommit that corresponds to this
    CVSCommit."""
    # Generate an SVNCommit unconditionally.  Even if the only change
    # in this CVSCommit is a deletion of an already-deleted file (that
    # is, a CVS revision in state 'dead' whose predecessor was also in
    # state 'dead'), the conversion will still generate a Subversion
    # revision containing the log message for the second dead
    # revision, because we don't want to lose that information.
    svn_commit = SVNCommit("commit")
    self.motivating_commit = svn_commit

    for c_rev in self.changes:
      svn_commit.add_revision(c_rev)
      # Only make a change if we need to.  When 1.1.1.1 has an empty
      # deltatext, the explanation is almost always that we're looking
      # at an imported file whose 1.1 and 1.1.1.1 are identical.  On
      # such imports, CVS creates an RCS file where 1.1 has the
      # content, and 1.1.1.1 has an empty deltatext, i.e, the same
      # content as 1.1.  There's no reason to reflect this non-change
      # in the repository, so we want to do nothing in this case.  (If
      # we were really paranoid, we could make sure 1.1's log message
      # is the CVS-generated "Initial revision\n", but I think the
      # conditions below are strict enough.)
      if not ((c_rev.deltatext_code == DELTATEXT_EMPTY)
              and (c_rev.rev == "1.1.1.1")):
        if c_rev.is_default_branch_revision():
          self.default_branch_cvs_revisions.append(c_rev)

    for c_rev in self.deletes:
      # When a file is added on a branch, CVS not only adds the file
      # on the branch, but generates a trunk revision (typically
      # 1.1) for that file in state 'dead'.  We only want to add
      # this revision if the log message is not the standard cvs
      # fabricated log message.
      if c_rev.prev_rev is None:
        # c_rev.branches may be empty if the originating branch
        # has been excluded.
        if not c_rev.branches:
          continue
        cvs_generated_msg = ('file %s was initially added on branch %s.\n'
                             % (c_rev.filename(),
                                c_rev.branches[0]))
        author, log_msg = \
            Ctx()._persistence_manager.svn_commit_metadata[c_rev.digest]
        if log_msg == cvs_generated_msg:
          continue

      svn_commit.add_revision(c_rev)
      if c_rev.is_default_branch_revision():
        self.default_branch_cvs_revisions.append(c_rev)

    # There is a slight chance that we didn't actually register any
    # CVSRevisions with our SVNCommit (see loop over self.deletes
    # above), so if we have no CVSRevisions, we don't flush the
    # svn_commit to disk and roll back our revnum.
    if len(svn_commit.cvs_revs) > 0:
      svn_commit.flush()
    else:
      # We will not be flushing this SVNCommit, so rollback the
      # SVNCommit revision counter.
      SVNCommit.revnum = SVNCommit.revnum - 1

    if not Ctx().trunk_only:
      for c_rev in self.revisions():
        Ctx()._symbolings_logger.log_revision(c_rev, svn_commit.revnum)

  def _post_commit(self):
    """Generates any SVNCommits that we can perform now that _commit
    has happened.  That is, handle non-trunk default branches.
    Sometimes an RCS file has a non-trunk default branch, so a commit
    on that default branch would be visible in a default CVS checkout
    of HEAD.  If we don't copy that commit over to Subversion's trunk,
    then there will be no Subversion tree which corresponds to that
    CVS checkout.  Of course, in order to copy the path over, we may
    first need to delete the existing trunk there.  """

    # Only generate a commit if we have default branch revs
    if len(self.default_branch_cvs_revisions):
      # Generate an SVNCommit for all of our default branch c_revs.
      svn_commit = SVNCommit("post-commit default branch(es)")
      svn_commit.set_motivating_revnum(self.motivating_commit.revnum)
      for c_rev in self.default_branch_cvs_revisions:
        svn_commit.add_revision(c_rev)
        Ctx()._symbolings_logger.log_default_branch_closing(c_rev,
                                                            svn_commit.revnum)
      self.secondary_commits.append(svn_commit)

  def process_revisions(self, done_symbols):
    """Process all the CVSRevisions that this instance has, creating
    one or more SVNCommits in the process.  Generate fill SVNCommits
    only for symbols not in DONE_SYMBOLS (avoids unnecessary
    fills).

    Return the primary SVNCommit that corresponds to this CVSCommit.
    The returned SVNCommit is the commit that motivated any other
    SVNCommits generated in this CVSCommit."""
    self.done_symbols = done_symbols
    seconds = self.t_max - self.t_min + 1

    Log().write(LOG_VERBOSE, '-' * 60)
    Log().write(LOG_VERBOSE, 'CVS Revision grouping:')
    if seconds == 1:
      Log().write(LOG_VERBOSE, '  Start time: %s (duration: 1 second)'
                  % time.ctime(self.t_max))
    else:
      Log().write(LOG_VERBOSE, '  Start time: %s' % time.ctime(self.t_min))
      Log().write(LOG_VERBOSE, '  End time:   %s (duration: %d seconds)'
                  % (time.ctime(self.t_max), seconds))

    if seconds > COMMIT_THRESHOLD + 1:
      Log().write(LOG_WARN, '%s: grouping spans more than %d seconds'
                  % (warning_prefix, COMMIT_THRESHOLD))

    if Ctx().trunk_only: # Only do the primary commit if we're trunk-only
      self._commit()
      return self.motivating_commit

    self._pre_commit()
    self._commit()
    self._post_commit()

    for svn_commit in self.secondary_commits:
      svn_commit.set_date(self.motivating_commit.get_date())
      svn_commit.flush()

    return self.motivating_commit


class SVNCommit:
  """This represents one commit to the Subversion Repository.  There
  are three types of SVNCommits:

  1. Commits one or more CVSRevisions (cannot fill a symbolic name).

  2. Creates or fills a symbolic name (cannot commit CVSRevisions).

  3. Updates trunk to reflect the contents of a particular branch
     (this is to handle RCS default branches)."""

  # The revision number to assign to the next new SVNCommit.
  # We start at 2 because SVNRepositoryMirror uses the first commit
  # to create trunk, tags, and branches.
  revnum = 2

  class SVNCommitInternalInconsistencyError(Exception):
    """Exception raised if we encounter an impossible state in the
    SVNCommit Databases."""
    pass

  def __init__(self, description="", revnum=None, cvs_revs=None):
    """Instantiate an SVNCommit.  DESCRIPTION is for debugging only.
    If REVNUM, the SVNCommit will correspond to that revision number;
    and if CVS_REVS, then they must be the exact set of CVSRevisions for
    REVNUM.

    It is an error to pass CVS_REVS without REVNUM, but you may pass
    REVNUM without CVS_REVS, and then add a revision at a time by
    invoking add_revision()."""
    self._description = description

    # Revprop metadata for this commit.
    #
    # These initial values are placeholders.  At least the log and the
    # date should be different by the time these are used.
    #
    # They are private because their values should be returned encoded
    # in UTF8, but callers aren't required to set them in UTF8.
    # Therefore, accessor methods are used to set them, and
    # self.get_revprops() is used to to get them, in dictionary form.
    self._author = Ctx().username
    self._log_msg = "This log message means an SVNCommit was used too soon."
    self._max_date = 0  # Latest date seen so far.

    self.cvs_revs = cvs_revs or []
    if revnum:
      self.revnum = revnum
    else:
      self.revnum = SVNCommit.revnum
      SVNCommit.revnum = SVNCommit.revnum + 1

    # The (uncleaned) symbolic name that is filled in this SVNCommit, if any.
    self.symbolic_name = None

    # If this commit is a default branch synchronization, this
    # variable represents the subversion revision number of the
    # *primary* commit where the default branch changes actually
    # happened.  It is None otherwise.
    #
    # It is possible for multiple synchronization commits to refer to
    # the same motivating commit revision number, and it is possible
    # for a single synchronization commit to contain CVSRevisions on
    # multiple different default branches.
    self.motivating_revnum = None

    # is_tag is true only if this commit is a fill of a symbolic name
    # that is a tag, None in all other cases.
    self.is_tag = None

  def set_symbolic_name(self, symbolic_name):
    "Set self.symbolic_name to SYMBOLIC_NAME."
    self.symbolic_name = symbolic_name

  def set_motivating_revnum(self, revnum):
    "Set self.motivating_revnum to REVNUM."
    self.motivating_revnum = revnum

  def set_author(self, author):
    """Set this SVNCommit's author to AUTHOR (a locally-encoded string).
    This is the only way to set an SVNCommit's author."""
    self._author = author

  def set_log_msg(self, msg):
    """Set this SVNCommit's log message to MSG (a locally-encoded string).
    This is the only way to set an SVNCommit's log message."""
    self._log_msg = msg

  def set_date(self, date):
    """Set this SVNCommit's date to DATE (an integer).
    Note that self.add_revision() updates this automatically based on
    a CVSRevision; so you may not need to call this at all, and even
    if you do, the value may be overwritten by a later call to
    self.add_revision()."""
    self._max_date = date

  def get_date(self):
    """Returns this SVNCommit's date as an integer."""
    return self._max_date

  def get_revprops(self):
    """Return the Subversion revprops for this SVNCommit."""
    date = format_date(self._max_date)
    try:
      utf8_author = None
      if self._author is not None:
        utf8_author = to_utf8(self._author)
      utf8_log = to_utf8(self.get_log_msg())
      return { 'svn:author' : utf8_author,
               'svn:log'    : utf8_log,
               'svn:date'   : date }
    except UnicodeError:
      Log().write(LOG_WARN, '%s: problem encoding author or log message:'
                  % warning_prefix)
      Log().write(LOG_WARN, "  author: '%s'" % self._author)
      Log().write(LOG_WARN, "  log:    '%s'" % self.get_log_msg().rstrip())
      Log().write(LOG_WARN, "  date:   '%s'" % date)
      Log().write(LOG_WARN,
                  "(subversion rev %s)  Related files:" % self.revnum)
      for c_rev in self.cvs_revs:
        Log().write(LOG_WARN, " ", c_rev.fname)

      Log().write(LOG_WARN, "Consider rerunning with one or more ",
                  "'--encoding' parameters.\n")
      # It's better to fall back to the original (unknown encoding) data
      # than to either 1) quit or 2) record nothing at all.
      return { 'svn:author' : self._author,
               'svn:log'    : self.get_log_msg(),
               'svn:date'   : date }

  def add_revision(self, cvs_rev):
    self.cvs_revs.append(cvs_rev)
    if cvs_rev.timestamp > self._max_date:
      self._max_date = cvs_rev.timestamp

  def flush(self):
    Log().write(LOG_NORMAL, "Creating Subversion r%d (%s)"
                % (self.revnum, self._description))
    Ctx()._persistence_manager.put_svn_commit(self.revnum,
                                              self.cvs_revs,
                                              self._max_date,
                                              self.symbolic_name,
                                              self.motivating_revnum)

  def __str__(self):
    """ Print a human-readable description of this SVNCommit.  This
    description is not intended to be machine-parseable (although
    we're not going to stop you if you try!)"""

    ret = "SVNCommit #: " + str(self.revnum) + "\n"
    if self.symbolic_name:
      ret += ("   symbolic name: " + _clean_symbolic_name(self.symbolic_name)
              + "\n")
    else:
      ret += "   NO symbolic name\n"
    ret += "   debug description: " + self._description + "\n"
    ret += "   cvs_revs:\n"
    for c_rev in self.cvs_revs:
      ret += "     " + c_rev.unique_key() + "\n"
    return ret

  def get_log_msg(self):
    """Returns the actual log message for a primary commit, and the
    appropriate manufactured log message for a secondary commit."""
    if self.symbolic_name is not None:
      return self._log_msg_for_symbolic_name_commit()
    elif self.motivating_revnum is not None:
      return self._log_msg_for_default_branch_commit()
    else:
      return self._log_msg

  def _log_msg_for_symbolic_name_commit(self):
    """Creates a log message for a manufactured commit that fills
    self.symbolic_name.  If self.is_tag is true, write the log message
    as though for a tag, else write it as though for a branch."""
    type = 'branch'
    if self.is_tag:
      type = 'tag'

    # In Python 2.2.3, we could use textwrap.fill().  Oh well :-).
    space_or_newline = ' '
    cleaned_symbolic_name = _clean_symbolic_name(self.symbolic_name)
    if len(cleaned_symbolic_name) >= 13:
      space_or_newline = '\n'

    return "This commit was manufactured by cvs2svn to create %s%s'%s'." \
           % (type, space_or_newline, cleaned_symbolic_name)

  def _log_msg_for_default_branch_commit(self):
    """Creates a log message for a manufactured commit that
    synchronizes a non-trunk default branch with trunk."""
    msg = 'This commit was generated by cvs2svn to compensate for '     \
          'changes in r%d,\n'                                           \
          'which included commits to RCS files with non-trunk default ' \
          'branches.\n' % self.motivating_revnum
    return msg

class CVSRevisionAggregator:
  """This class groups CVSRevisions into CVSCommits that represent
  at least one SVNCommit."""
  def __init__(self):
    self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_READ)
    if not Ctx().trunk_only:
      self.last_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB),
                                   DB_OPEN_READ)

    # A map { key : CVSCommit } of CVS commits currently being
    # accumulated.  If the CVSCommit is still open to further
    # CVSRevisions, then key is CVSRevision.digest.  If not (because
    # an inbound commit wanted to affect a file that was already
    # within the CVSCommit), then key is CVSRevision.digest plus some
    # number of appended '-'.
    self.cvs_commits = {}

    # List of ready commits.
    self.ready_queue = [ ]

    # A map { symbol : None } of symbolic names for which the last
    # source CVSRevision has already been processed but which haven't
    # been closed yet.
    self.pending_symbols = {}

    # A list of closed symbols.  That is, we've already encountered
    # the last CVSRevision that is a source for that symbol, the final
    # fill for this symbol has been done, and we never need to fill it
    # again.
    self.done_symbols = [ ]

    # This variable holds the most recently created primary svn_commit
    # object.  CVSRevisionAggregator maintains this variable merely
    # for its date, so that it can set dates for the SVNCommits
    # created in self._attempt_to_commit_symbols().
    self.latest_primary_svn_commit = None

    Ctx()._symbolings_logger = SymbolingsLogger()
    Ctx()._persistence_manager = PersistenceManager(DB_OPEN_NEW)
    Ctx()._default_branches_db = SDatabase(temp(DEFAULT_BRANCHES_DB),
                                           DB_OPEN_READ)

  def _extract_ready_commits(self, timestamp):
    """Extract and return any active commits that expire by TIMESTAMP."""

    for digest_key, cvs_commit in self.cvs_commits.items():
      if cvs_commit.t_max + COMMIT_THRESHOLD < timestamp:
        self.ready_queue.append(cvs_commit)
        del self.cvs_commits[digest_key]

  def _commit_ready_commits(self):
    """Sort the commits from self.ready_queue by time, then process them."""
    self.ready_queue.sort()
    while self.ready_queue:
      cvs_commit = self.ready_queue[0]
      del self.ready_queue[0]
      self.latest_primary_svn_commit = \
          cvs_commit.process_revisions(self.done_symbols)
      self._attempt_to_commit_symbols()

  def process_revision(self, c_rev):
    # Each time we read a new line, scan the accumulating commits to
    # see if any are ready for processing.
    self._extract_ready_commits(c_rev.timestamp)

    for digest_key, cvs_commit in self.cvs_commits.items():
      # If the inbound commit is on the same file as a pending commit,
      # close the pending commit to further changes.  Don't flush it though,
      # as there may be other pending commits dated before this one.
      # ### ISSUE: the has_file() check below is not optimal.
      # It does fix the dataloss bug where revisions would get lost
      # if checked in too quickly, but it can also break apart the
      # commits.  The correct fix would require tracking the dependencies
      # between change sets and committing them in proper order.
      if cvs_commit.has_file(c_rev.fname):
        unused_id = digest_key + '-'
        # Find a string that does is not already a key in
        # the self.cvs_commits dict
        while self.cvs_commits.has_key(unused_id):
          unused_id = unused_id + '-'
        self.cvs_commits[unused_id] = cvs_commit
        del self.cvs_commits[digest_key]

    # Add this item into the set of still-available commits.
    if self.cvs_commits.has_key(c_rev.digest):
      cvs_commit = self.cvs_commits[c_rev.digest]
    else:
      author, log = self.metadata_db[c_rev.digest]
      cvs_commit = CVSCommit(c_rev.digest, author, log)
      self.cvs_commits[c_rev.digest] = cvs_commit
    cvs_commit.add_revision(c_rev)

    # Any elements in self.ready_queue at this point need to be
    # processed, because this latest rev couldn't possibly be part of
    # any of them.
    self._commit_ready_commits()

    self._add_pending_symbols(c_rev)

  def flush(self):
    """Commit anything left in self.cvs_commits.  Then inform the
    SymbolingsLogger that all commits are done."""

    self._extract_ready_commits(1L<<32)
    self._commit_ready_commits()

    if not Ctx().trunk_only:
      Ctx()._symbolings_logger.close()

  def _add_pending_symbols(self, c_rev):
    """Add to self.pending_symbols any symbols from C_REV for which
    C_REV is the last CVSRevision.

    If we're not doing a trunk-only conversion, get the symbolic names
    that this c_rev is the last *source* CVSRevision for and add them
    to those left over from previous passes through the aggregator."""

    if not Ctx().trunk_only:
      for sym in self.last_revs_db.get(c_rev.unique_key(), []):
        self.pending_symbols[sym] = None

  def _attempt_to_commit_symbols(self):
    """Generate one SVNCommit for each symbol in self.pending_symbols
    that doesn't have an opening CVSRevision in either self.ready_queue
    or self.cvs_commits.values()."""

    # Make a list of all symbols from self.pending_symbols that do not
    # have *source* CVSRevisions in the pending commit queues
    # (self.cvs_commits or self.ready_queue):
    closeable_symbols = []
    pending_commits = self.cvs_commits.values() + self.ready_queue
    for sym in self.pending_symbols:
      for cvs_commit in pending_commits:
        if cvs_commit.opens_symbolic_name(sym):
          break
      else:
        closeable_symbols.append(sym)

    # Sort the closeable symbols so that we will always process the
    # symbols in the same order, regardless of the order in which the
    # dict hashing algorithm hands them back to us.  We do this so
    # that our tests will get the same results on all platforms.
    closeable_symbols.sort()
    for sym in closeable_symbols:
      svn_commit = SVNCommit("closing tag/branch '%s'" % sym)
      svn_commit.set_symbolic_name(sym)
      svn_commit.set_date(self.latest_primary_svn_commit.get_date())
      svn_commit.flush()
      self.done_symbols.append(sym)
      del self.pending_symbols[sym]


class SymbolingsReader:
  """Provides an interface to the SYMBOL_OPENINGS_CLOSINGS_SORTED file
  and the SYMBOL_OFFSETS_DB.  Does the heavy lifting of finding and
  returning the correct opening and closing Subversion revision
  numbers for a given symbolic name."""
  def __init__(self):
    """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and
    reads the offsets database into memory."""
    self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
    # The offsets_db is really small, and we need to read and write
    # from it a fair bit, so suck it into memory
    offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_READ)
    self.offsets = { }
    for key in offsets_db:
      #print " ZOO:", key, offsets_db[key]
      self.offsets[key] = offsets_db[key]

  def filling_guide_for_symbol(self, symbolic_name, svn_revnum):
    """Given SYMBOLIC_NAME and SVN_REVNUM, return a new
    SymbolicNameFillingGuide object.

    Note that if we encounter an opening rev in this fill, but the
    corresponding closing rev takes place later than SVN_REVNUM, the
    closing will not be passed to SymbolicNameFillingGuide in this
    fill (and will be discarded when encountered in a later fill).
    This is perfectly fine, because we can still do a valid fill
    without the closing--we always try to fill what we can as soon as
    we can."""

    openings_closings_map = OpeningsClosingsMap(symbolic_name)

    # It's possible to have a branch start with a file that was added
    # on a branch
    if self.offsets.has_key(symbolic_name):
      # set our read offset for self.symbolings to the offset for
      # symbolic_name
      self.symbolings.seek(self.offsets[symbolic_name])

      while 1:
        fpos = self.symbolings.tell()
        line = self.symbolings.readline().rstrip()
        if not line:
          break
        name, revnum, type, branch_name, cvs_path = line.split(" ", 4)
        if branch_name == '*':
          svn_path = Ctx().project.make_trunk_path(cvs_path)
        else:
          svn_path = Ctx().project.make_branch_path(branch_name, cvs_path)
        revnum = int(revnum)
        if revnum > svn_revnum or name != symbolic_name:
          break
        openings_closings_map.register(svn_path, revnum, type)

      # get current offset of the read marker and set it to the offset
      # for the beginning of the line we just read if we used anything
      # we read.
      if not openings_closings_map.is_empty():
        self.offsets[symbolic_name] = fpos

    return SymbolicNameFillingGuide(openings_closings_map)


class SvnRevisionRange:
  """The range of subversion revision numbers from which a path can be
  copied.  self.opening_revnum is the number of the earliest such
  revision, and self.closing_revnum is one higher than the number of
  the last such revision.  If self.closing_revnum is None, then no
  closings were registered."""

  def __init__(self, opening_revnum):
    self.opening_revnum = opening_revnum
    self.closing_revnum = None

  def add_closing(self, closing_revnum):
    # When we have a non-trunk default branch, we may have multiple
    # closings--only register the first closing we encounter.
    if self.closing_revnum is None:
      self.closing_revnum = closing_revnum

  def __str__(self):
    if self.closing_revnum is None:
      return '[%d:]' % (self.opening_revnum,)
    else:
      return '[%d:%d]' % (self.opening_revnum, self.closing_revnum,)


class OpeningsClosingsMap:
  """A dictionary of openings and closings for a symbolic name in the
  current SVNCommit.

  The user should call self.register() for the openings and closings,
  then self.get_node_tree() to retrieve the information as a
  SymbolicNameFillingGuide."""

  def __init__(self, symbolic_name):
    """Initialize OpeningsClosingsMap and prepare it for receiving
    openings and closings."""

    self.name = symbolic_name

    # A dictionary of SVN_PATHS to SvnRevisionRange objects.
    self.things = { }

  def register(self, svn_path, svn_revnum, type):
    """Register an opening or closing revision for this symbolic name.
    SVN_PATH is the source path that needs to be copied into
    self.symbolic_name, and SVN_REVNUM is either the first svn
    revision number that we can copy from (our opening), or the last
    (not inclusive) svn revision number that we can copy from (our
    closing).  TYPE indicates whether this path is an opening or a a
    closing.

    The opening for a given SVN_PATH must be passed before the closing
    for it to have any effect... any closing encountered before a
    corresponding opening will be discarded.

    It is not necessary to pass a corresponding closing for every
    opening.
    """
    # Always log an OPENING
    if type == OPENING:
      self.things[svn_path] = SvnRevisionRange(svn_revnum)
    # Only log a closing if we've already registered the opening for that
    # path.
    elif type == CLOSING and self.things.has_key(svn_path):
      self.things[svn_path].add_closing(svn_revnum)

  def is_empty(self):
    """Return true if we haven't accumulated any openings or closings,
    false otherwise."""
    return not len(self.things)

  def get_things(self):
    """Return a list of (svn_path, SvnRevisionRange) tuples for all
    svn_paths with registered openings or closings."""

    return self.things.items()


class SymbolicNameFillingGuide:
  """A node tree representing the source paths to be copied to fill
  self.symbolic_name in the current SVNCommit.

  self._node_tree is the root of the directory tree, in the form {
  path_component : subnode }.  Leaf nodes are instances of
  SvnRevisionRange.  Intermediate (directory) nodes are dictionaries
  mapping relative names to subnodes.

  By walking self._node_tree and calling self.get_best_revnum() on
  each node, the caller can determine what subversion revision number
  to copy the path corresponding to that node from.  self._node_tree
  should be treated as read-only.

  The caller can then descend to sub-nodes to see if their "best
  revnum" differs from their parents' and if it does, take appropriate
  actions to "patch up" the subtrees."""

  def __init__(self, openings_closings_map):
    """Initializes a SymbolicNameFillingGuide for SYMBOLIC_NAME and
    store into it the openings and closings from
    OPENINGS_CLOSINGS_MAP."""

    self.name = openings_closings_map.name

    # The dictionary that holds our node tree as a map { node_key :
    # node }.
    self._node_tree = { }

    for svn_path, svn_revision_range in openings_closings_map.get_things():
      (head, tail) = _path_split(svn_path)
      self._get_node_for_path(head)[tail] = svn_revision_range

    #self.print_node_tree(self._node_tree)

  def _get_node_for_path(self, svn_path):
    """Return the node key for svn_path, creating new nodes as needed."""
    # Walk down the path, one node at a time.
    node = self._node_tree
    for component in svn_path.split('/'):
      if node.has_key(component):
        node = node[component]
      else:
        old_node = node
        node = {}
        old_node[component] = node

    return node

  def get_best_revnum(self, node, preferred_revnum):
    """Determine the best subversion revision number to use when
    copying the source tree beginning at NODE.  Returns a
    subversion revision number.

    PREFERRED_REVNUM is passed to best_rev and used to calculate the
    best_revnum."""

    def score_revisions(svn_revision_ranges):
      """Return a list of revisions and scores based on
      SVN_REVISION_RANGES.  The returned list looks like:

         [(REV1 SCORE1), (REV2 SCORE2), ...]

      where the tuples are sorted by revision number.
      SVN_REVISION_RANGES is a list of SvnRevisionRange objects.

      For each svn revision that appears as either an opening_revnum
      or closing_revnum for one of the svn_revision_ranges, output a
      tuple indicating how many of the SvnRevisionRanges include that
      svn_revision in its range.  A score thus indicates that copying
      the corresponding revision (or any following revision up to the
      next revision in the list) of the object in question would yield
      that many correct paths at or underneath the object.  There may
      be other paths underneath it which are not correct and would
      need to be deleted or recopied; those can only be detected by
      descending and examining their scores.

      If OPENINGS is empty, return the empty list."""
      openings = [ x.opening_revnum
                   for x in svn_revision_ranges ]
      closings = [ x.closing_revnum
                   for x in svn_revision_ranges
                   if x.closing_revnum is not None ]

      # First look for easy out.
      if not openings:
        return []

      # Create a list with both openings (which increment the total)
      # and closings (which decrement the total):
      things = [(rev,1) for rev in openings] + [(rev,-1) for rev in closings]
      # Sort by revision number:
      things.sort()
      # Initialize output list with zeroth element of things.  This
      # element must exist, because it was already verified that
      # openings is not empty.
      scores = [ things[0] ]
      total = scores[-1][1]
      for (rev, change) in things[1:]:
        total += change
        if rev == scores[-1][0]:
          # Same revision as last entry; modify last entry:
          scores[-1] = (rev, total)
        else:
          # Previously-unseen revision; create new entry:
          scores.append((rev, total))
      return scores

    def best_rev(scores, preferred_rev):
      """Return the revision with the highest score from SCORES, a list
      returned by score_revisions().  When the maximum score is shared
      by multiple revisions, the oldest revision is selected, unless
      PREFERRED_REV is one of the possibilities, in which case, it is
      selected."""
      max_score = 0
      preferred_rev_score = -1
      rev = SVN_INVALID_REVNUM
      if preferred_rev is None:
        # Comparison order of different types is arbitrary.  Do not
        # expect None to compare less than int values below.
        preferred_rev = SVN_INVALID_REVNUM
      for revnum, count in scores:
        if count > max_score:
          max_score = count
          rev = revnum
        if revnum <= preferred_rev:
          preferred_rev_score = count
      if preferred_rev_score == max_score:
        rev = preferred_rev
      return rev, max_score

    # Aggregate openings and closings from the rev tree
    svn_revision_ranges = self._list_revnums(node)

    # Score the lists
    scores = score_revisions(svn_revision_ranges)

    revnum, max_score = best_rev(scores, preferred_revnum)

    if revnum == SVN_INVALID_REVNUM:
      raise FatalError("failed to find a revision "
                       + "to copy from when copying %s" % name)
    return revnum, max_score

  def _list_revnums(self, node):
    """Return a list of all the SvnRevisionRanges (including
    duplicates) for all leaf nodes at and under NODE."""

    if isinstance(node, SvnRevisionRange):
      # It is a leaf node.
      return [ node ]
    else:
      # It is an intermediate node.
      revnums = []
      for key, subnode in node.items():
        revnums.extend(self._list_revnums(subnode))
      return revnums

  def get_sources(self):
    """Return the list of sources for this symbolic name.

    The Project instance defines what are legitimate sources.  Raise
    an exception if a change occurred outside of the source
    directories."""

    return self._get_sub_sources('', self._node_tree)

  def _get_sub_sources(self, start_svn_path, start_node):
    """Return the list of sources for this symbolic name, starting the
    search at path START_SVN_PATH, which is node START_NODE.  This is
    a helper method, called by get_sources() (see)."""

    project = Ctx().project
    if isinstance(start_node, SvnRevisionRange):
      # This implies that a change was found outside of the
      # legitimate sources.  This should never happen.
      raise
    elif project.is_source(start_svn_path):
      # This is a legitimate source.  Add it to list.
      return [ FillSource(start_svn_path, start_node) ]
    else:
      # This is a directory that is not a legitimate source.  (That's
      # OK because it hasn't changed directly.)  But directories
      # within it have been changed, so we need to search recursively
      # to find their enclosing sources.
      sources = []
      for entry, node in start_node.items():
        svn_path = _path_join(start_svn_path, entry)
        sources.extend(self._get_sub_sources(svn_path, node))

    return sources

  def print_node_tree(self, node, name='/', indent_depth=0):
    """For debugging purposes.  Prints all nodes in TREE that are
    rooted at NODE.  INDENT_DEPTH is used to indent the output of
    recursive calls."""
    if not indent_depth:
      print "TREE", "=" * 75
    if isinstance(node, SvnRevisionRange):
      print "TREE:", " " * (indent_depth * 2), name, node
    else:
      print "TREE:", " " * (indent_depth * 2), name
      for key, value in node.items():
        self.print_node_tree(value, key, (indent_depth + 1))


class FillSource:
  """Representation of a fill source used by the symbol filler in
  SVNRepositoryMirror."""
  def __init__(self, prefix, node):
    """Create an unscored fill source with a prefix and a key."""
    self.prefix = prefix
    self.node = node
    self.score = None
    self.revnum = None

  def set_score(self, score, revnum):
    """Set the SCORE and REVNUM."""
    self.score = score
    self.revnum = revnum

  def __cmp__(self, other):
    """Comparison operator used to sort FillSources in descending
    score order."""
    if self.score is None or other.score is None:
      raise TypeError, 'Tried to compare unscored FillSource'
    return cmp(other.score, self.score)


class SVNRepositoryMirror:
  """Mirror a Subversion Repository as it is constructed, one
  SVNCommit at a time.  The mirror is skeletal; it does not contain
  file contents.  The creation of a dumpfile or Subversion repository
  is handled by delegates.  See self.add_delegate method for how to
  set delegates.

  The structure of the repository is kept in two databases and one
  hash.  The revs_db database maps revisions to root node keys, and
  the nodes_db database maps node keys to nodes.  A node is a hash
  from directory names to keys.  Both the revs_db and the nodes_db are
  stored on disk and each access is expensive.

  The nodes_db database only has the keys for old revisions.  The
  revision that is being contructed is kept in memory in the new_nodes
  hash which is cheap to access.

  You must invoke _start_commit between SVNCommits.

  *** WARNING *** All path arguments to methods in this class CANNOT
      have leading or trailing slashes.
  """

  class SVNRepositoryMirrorPathExistsError(Exception):
    """Exception raised if an attempt is made to add a path to the
    repository mirror and that path already exists in the youngest
    revision of the repository."""
    pass

  class SVNRepositoryMirrorUnexpectedOperationError(Exception):
    """Exception raised if a CVSRevision is found to have an unexpected
    operation (OP) value."""
    pass

  class SVNRepositoryMirrorInvalidFillOperationError(Exception):
    """Exception raised if an empty SymbolicNameFillingGuide is returned
    during a fill where the branch in question already exists."""
    pass

  def __init__(self):
    """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""
    self.delegates = [ ]

    # This corresponds to the 'revisions' table in a Subversion fs.
    self.revs_db = SDatabase(temp(SVN_MIRROR_REVISIONS_DB), DB_OPEN_NEW)
    Cleanup().register(temp(SVN_MIRROR_REVISIONS_DB), pass8)

    # This corresponds to the 'nodes' table in a Subversion fs.  (We
    # don't need a 'representations' or 'strings' table because we
    # only track metadata, not file contents.)
    self.nodes_db = Database(temp(SVN_MIRROR_NODES_DB), DB_OPEN_NEW)
    Cleanup().register(temp(SVN_MIRROR_NODES_DB), pass8)

    # Start at revision 0 without a root node.  It will be created
    # by _open_writable_root_node.
    self.youngest = 0
    self.new_root_key = None
    self.new_nodes = { }

    if not Ctx().trunk_only:
      ###PERF IMPT: Suck this into memory.
      self.tags_db = TagsDatabase(DB_OPEN_READ)
      self.symbolings_reader = SymbolingsReader()

  def _initialize_repository(self, date):
    """Initialize the repository by creating the directories for
    trunk, tags, and branches.  This method should only be called
    after all delegates are added to the repository mirror."""
    # Make a 'fake' SVNCommit so we can take advantage of the revprops
    # magic therein
    svn_commit = SVNCommit("Initialization", 1)
    svn_commit.set_date(date)
    svn_commit.set_log_msg("New repository initialized by cvs2svn.")

    self._start_commit(svn_commit)
    self._mkdir(Ctx().project.trunk_path)
    if not Ctx().trunk_only:
      self._mkdir(Ctx().project.branches_path)
      self._mkdir(Ctx().project.tags_path)

  def _start_commit(self, svn_commit):
    """Start a new commit."""
    if self.youngest > 0:
      self._end_commit()

    self.youngest = svn_commit.revnum
    self.new_root_key = None
    self.new_nodes = { }

    self._invoke_delegates('start_commit', svn_commit)

  def _end_commit(self):
    """Called at the end of each commit.  This method copies the newly
    created nodes to the on-disk nodes db."""
    if self.new_root_key is None:
      # No changes were made in this revision, so we make the root node
      # of the new revision be the same as the last one.
      self.revs_db[str(self.youngest)] = self.revs_db[str(self.youngest - 1)]
    else:
      self.revs_db[str(self.youngest)] = self.new_root_key
      # Copy the new nodes to the nodes_db
      for key, value in self.new_nodes.items():
        self.nodes_db[key] = value

  def _get_node(self, key):
    """Returns the node contents for KEY which may refer to either
    self.nodes_db or self.new_nodes."""
    if self.new_nodes.has_key(key):
      return self.new_nodes[key]
    else:
      return self.nodes_db[key]

  def _open_readonly_node(self, path, revnum):
    """Open a readonly node for PATH at revision REVNUM.  Returns the
    node key and node contents if the path exists, else (None, None)."""
    # Get the root key
    if revnum == self.youngest:
      if self.new_root_key is None:
        node_key = self.revs_db[str(self.youngest - 1)]
      else:
        node_key = self.new_root_key
    else:
      node_key = self.revs_db[str(revnum)]

    for component in path.split('/'):
      node_contents = self._get_node(node_key)
      node_key = node_contents.get(component, None)
      if node_key is None:
        return None

    return node_key

  def _open_writable_root_node(self):
    """Open a writable root node.  The current root node is returned
    immeditely if it is already writable.  If not, create a new one by
    copying the contents of the root node of the previous version."""
    if self.new_root_key is not None:
      return self.new_root_key, self.new_nodes[self.new_root_key]

    if self.youngest < 2:
      new_contents = { }
    else:
      new_contents = self.nodes_db[self.revs_db[str(self.youngest - 1)]]
    self.new_root_key = gen_key()
    self.new_nodes = { self.new_root_key: new_contents }

    return self.new_root_key, new_contents

  def _open_writable_node(self, svn_path, create):
    """Open a writable node for the path SVN_PATH, creating SVN_PATH
    and any missing directories if CREATE is True."""
    parent_key, parent_contents = self._open_writable_root_node()

    # Walk up the path, one node at a time.
    path_so_far = None
    components = svn_path.split('/')
    for i in range(len(components)):
      component = components[i]
      path_so_far = _path_join(path_so_far, component)
      this_key = parent_contents.get(component, None)
      if this_key is not None:
        # The component exists.
        this_contents = self.new_nodes.get(this_key, None)
        if this_contents is None:
          # Suck the node from the nodes_db, but update the key
          this_contents = self.nodes_db[this_key]
          this_key = gen_key()
          self.new_nodes[this_key] = this_contents
          parent_contents[component] = this_key
      elif create:
        # The component does not exists, so we create it.
        this_contents = { }
        this_key = gen_key()
        self.new_nodes[this_key] = this_contents
        parent_contents[component] = this_key
        if i < len(components) - 1:
          self._invoke_delegates('mkdir', path_so_far)
      else:
        # The component does not exists and we are not instructed to
        # create it, so we give up.
        return None, None

      parent_key = this_key
      parent_contents = this_contents

    return this_key, this_contents

  def _path_exists(self, path):
    """If PATH exists in self.youngest of the svn repository mirror,
    return true, else return None.

    PATH must not start with '/'."""
    return self._open_readonly_node(path, self.youngest) is not None

  def _fast_delete_path(self, parent_path, parent_contents, component):
    """Delete COMPONENT from the parent direcory PARENT_PATH with the
    contents PARENT_CONTENTS.  Do nothing if COMPONENT does not exist
    in PARENT_CONTENTS."""
    if parent_contents.has_key(component):
      del parent_contents[component]
      self._invoke_delegates('delete_path',
                             _path_join(parent_path, component))

  def _delete_path(self, svn_path, should_prune=False):
    """Delete PATH from the tree.  If SHOULD_PRUNE is true, then delete
    all ancestor directories that are made empty when SVN_PATH is deleted.
    In other words, SHOULD_PRUNE is like the -P option to 'cvs checkout'.

    NOTE: This function ignores requests to delete the root directory
    or any directory for which Ctx().project.is_unremovable() returns
    True, either directly or by pruning."""

    if svn_path == '' or Ctx().project.is_unremovable(svn_path):
      return

    (parent_path, entry,) = _path_split(svn_path)
    if parent_path:
      parent_key, parent_contents = \
          self._open_writable_node(parent_path, False)
    else:
      parent_key, parent_contents = self._open_writable_root_node()

    if parent_key is not None:
      self._fast_delete_path(parent_path, parent_contents, entry)
      # The following recursion makes pruning an O(n^2) operation in the
      # worst case (where n is the depth of SVN_PATH), but the worst case
      # is probably rare, and the constant cost is pretty low.  Another
      # drawback is that we issue a delete for each path and not just
      # a single delete for the topmost directory pruned.
      if should_prune and len(parent_contents) == 0:
        self._delete_path(parent_path, True)

  def _mkdir(self, path):
    """Create PATH in the repository mirror at the youngest revision."""
    self._open_writable_node(path, True)
    self._invoke_delegates('mkdir', path)

  def _change_path(self, cvs_rev):
    """Register a change in self.youngest for the CVS_REV's svn_path
    in the repository mirror."""
    # We do not have to update the nodes because our mirror is only
    # concerned with the presence or absence of paths, and a file
    # content change does not cause any path changes.
    self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, False))

  def _add_path(self, cvs_rev):
    """Add the CVS_REV's svn_path to the repository mirror."""
    self._open_writable_node(cvs_rev.svn_path, True)
    self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, True))

  def _copy_path(self, src_path, dest_path, src_revnum):
    """Copy SRC_PATH at subversion revision number SRC_REVNUM to
    DEST_PATH. In the youngest revision of the repository, DEST_PATH's
    parent *must* exist, but DEST_PATH *cannot* exist.

    Return the node key and the contents of the new node at DEST_PATH
    as a dictionary."""
    # get the contents of the node of our src_path
    src_key = self._open_readonly_node(src_path, src_revnum)
    src_contents = self._get_node(src_key)

    # Get the parent path and the base path of the dest_path
    (dest_parent, dest_basename,) = _path_split(dest_path)
    dest_parent_key, dest_parent_contents = \
                   self._open_writable_node(dest_parent, False)

    if dest_parent_contents.has_key(dest_basename):
      msg = "Attempt to add path '%s' to repository mirror " % dest_path
      msg = msg + "when it already exists in the mirror."
      raise self.SVNRepositoryMirrorPathExistsError, msg

    dest_parent_contents[dest_basename] = src_key
    self._invoke_delegates('copy_path', src_path, dest_path, src_revnum)

    # Yes sir, src_key and src_contents are also the contents of the
    # destination.  This is a cheap copy, remember!  :-)
    return src_key, src_contents

  def _fill_symbolic_name(self, svn_commit):
    """Performs all copies necessary to create as much of the the tag
    or branch SVN_COMMIT.symbolic_name as possible given the current
    revision of the repository mirror.

    The symbolic name is guaranteed to exist in the Subversion
    repository by the end of this call, even if there are no paths
    under it."""
    symbol_fill = self.symbolings_reader.filling_guide_for_symbol(
        svn_commit.symbolic_name, self.youngest)
    # Get the list of sources for the symbolic name.
    sources = symbol_fill.get_sources()

    if sources:
      if self.tags_db.has_key(svn_commit.symbolic_name):
        dest_prefix = Ctx().project.get_tag_path(svn_commit.symbolic_name)
      else:
        dest_prefix = Ctx().project.get_branch_path(svn_commit.symbolic_name)

      dest_key = self._open_writable_node(dest_prefix, False)[0]
      self._fill(symbol_fill, dest_prefix, dest_key, sources)
    else:
      # We can only get here for a branch whose first commit is an add
      # (as opposed to a copy).
      dest_path = Ctx().project.get_branch_path(symbol_fill.name)
      if not self._path_exists(dest_path):
        # If our symbol_fill was empty, that means that our first
        # commit on the branch was to a file added on the branch, and
        # that this is our first fill of that branch.
        #
        # This case is covered by test 16.
        #
        # ...we create the branch by copying trunk from the our
        # current revision number minus 1
        source_path = Ctx().project.trunk_path
        entries = self._copy_path(source_path, dest_path,
                                  svn_commit.revnum - 1)[1]
        # Now since we've just copied trunk to a branch that's
        # *supposed* to be empty, we delete any entries in the
        # copied directory.
        for entry in entries:
          del_path = dest_path + '/' + entry
          # Delete but don't prune.
          self._delete_path(del_path)
      else:
        msg = "Error filling branch '" \
              + _clean_symbolic_name(symbol_fill.name) + "'.\n"
        msg = msg + "Received an empty SymbolicNameFillingGuide and\n"
        msg = msg + "attempted to create a branch that already exists."
        raise self.SVNRepositoryMirrorInvalidFillOperationError, msg

  def _fill(self, symbol_fill, dest_prefix, dest_key, sources,
            path = None, parent_source_prefix = None,
            preferred_revnum = None, prune_ok = None):
    """Fill the tag or branch at DEST_PREFIX + PATH with items from
    SOURCES, and recurse into the child items.

    DEST_PREFIX is the prefix of the destination directory, e.g.
    '/tags/my_tag' or '/branches/my_branch', and SOURCES is a list of
    FillSource classes that are candidates to be copied to the
    destination.  DEST_KEY is the key in self.nodes_db to the
    destination, or None if the destination does not yet exist.

    PATH is the path relative to DEST_PREFIX.  If PATH is None, we
    are at the top level, e.g. '/tags/my_tag'.

    PARENT_SOURCE_PREFIX is the source prefix that was used to copy
    the parent directory, and PREFERRED_REVNUM is an int which is the
    source revision number that the caller (who may have copied KEY's
    parent) used to perform its copy.  If PREFERRED_REVNUM is None,
    then no revision is preferable to any other (which probably means
    that no copies have happened yet).

    PRUNE_OK means that a copy has been made in this recursion, and
    it's safe to prune directories that are not in
    SYMBOL_FILL._node_tree, provided that said directory has a source
    prefix of one of the PARENT_SOURCE_PREFIX.

    PATH, PARENT_SOURCE_PREFIX, PRUNE_OK, and PREFERRED_REVNUM
    should only be passed in by recursive calls."""
    # Calculate scores and revnums for all sources
    for source in sources:
      src_revnum, score = symbol_fill.get_best_revnum(source.node,
                                                      preferred_revnum)
      source.set_score(score, src_revnum)

    # Sort the sources in descending score order so that we will make
    # a eventual copy from the source with the highest score.
    sources.sort()
    copy_source = sources[0]

    src_path = _path_join(copy_source.prefix, path)
    dest_path = _path_join(dest_prefix, path)

    # Figure out if we shall copy to this destination and delete any
    # destination path that is in the way.
    do_copy = 0
    if dest_key is None:
      do_copy = 1
    elif prune_ok and (parent_source_prefix != copy_source.prefix or
                       copy_source.revnum != preferred_revnum):
      # We are about to replace the destination, so we need to remove
      # it before we perform the copy.
      self._delete_path(dest_path)
      do_copy = 1

    if do_copy:
      dest_key, dest_entries = self._copy_path(src_path, dest_path,
                                               copy_source.revnum)
      prune_ok = 1
    else:
      dest_entries = self._get_node(dest_key)

    # Create the SRC_ENTRIES hash from SOURCES.  The keys are path
    # elements and the values are lists of FillSource classes where
    # this path element exists.
    src_entries = {}
    for source in sources:
      if isinstance(source.node, SvnRevisionRange):
        continue
      for entry, node in source.node.items():
        src_entries.setdefault(entry, []).append(
            FillSource(source.prefix, node))

    if prune_ok:
      # Delete the entries in DEST_ENTRIES that are not in src_entries.
      delete_list = [ ]
      for entry in dest_entries:
        if not src_entries.has_key(entry):
          delete_list.append(entry)
      if delete_list:
        if not self.new_nodes.has_key(dest_key):
          dest_key, dest_entries = self._open_writable_node(dest_path, True)
        # Sort the delete list to get "diffable" dumpfiles.
        delete_list.sort()
        for entry in delete_list:
          self._fast_delete_path(dest_path, dest_entries, entry)

    # Recurse into the SRC_ENTRIES keys sorted in alphabetical order.
    src_keys = src_entries.keys()
    src_keys.sort()
    for src_key in src_keys:
      next_dest_key = dest_entries.get(src_key, None)
      self._fill(symbol_fill, dest_prefix, next_dest_key,
                 src_entries[src_key], _path_join(path, src_key),
                 copy_source.prefix, sources[0].revnum, prune_ok)

  def _synchronize_default_branch(self, svn_commit):
    """Propagate any changes that happened on a non-trunk default
    branch to the trunk of the repository.  See
    CVSCommit._post_commit() for details on why this is necessary."""
    for cvs_rev in svn_commit.cvs_revs:
      svn_trunk_path = Ctx().project.make_trunk_path(cvs_rev.cvs_path)
      if cvs_rev.op == OP_ADD or cvs_rev.op == OP_CHANGE:
        if self._path_exists(svn_trunk_path):
          # Delete the path on trunk...
          self._delete_path(svn_trunk_path)
        # ...and copy over from branch
        self._copy_path(cvs_rev.svn_path, svn_trunk_path,
                        svn_commit.motivating_revnum)
      elif cvs_rev.op == OP_DELETE:
        # delete trunk path
        self._delete_path(svn_trunk_path)
      else:
        msg = ("Unknown CVSRevision operation '%s' in default branch sync."
               % cvs_rev.op)
        raise self.SVNRepositoryMirrorUnexpectedOperationError, msg

  def commit(self, svn_commit):
    """Add an SVNCommit to the SVNRepository, incrementing the
    Repository revision number, and changing the repository.  Invoke
    the delegates' _start_commit() method."""

    if svn_commit.revnum == 2:
      self._initialize_repository(svn_commit.get_date())

    self._start_commit(svn_commit)

    if svn_commit.symbolic_name:
      Log().write(LOG_VERBOSE, "Filling symbolic name:",
                  _clean_symbolic_name(svn_commit.symbolic_name))
      self._fill_symbolic_name(svn_commit)
    elif svn_commit.motivating_revnum:
      Log().write(LOG_VERBOSE, "Synchronizing default_branch motivated by %d"
                  % svn_commit.motivating_revnum)
      self._synchronize_default_branch(svn_commit)
    else: # This actually commits CVSRevisions
      if len(svn_commit.cvs_revs) > 1: plural = "s"
      else: plural = ""
      Log().write(LOG_VERBOSE, "Committing %d CVSRevision%s"
                  % (len(svn_commit.cvs_revs), plural))
      for cvs_rev in svn_commit.cvs_revs:
        # See comment in CVSCommit._commit() for what this is all
        # about.  Note that although asking self._path_exists() is
        # somewhat expensive, we only do it if the first two (cheap)
        # tests succeed first.
        if not ((cvs_rev.deltatext_code == DELTATEXT_EMPTY)
                and (cvs_rev.rev == "1.1.1.1")
                and self._path_exists(cvs_rev.svn_path)):
          if cvs_rev.op == OP_ADD:
            self._add_path(cvs_rev)
          elif cvs_rev.op == OP_CHANGE:
            # Fix for Issue #74:
            #
            # Here's the scenario.  You have file FOO that is imported
            # on a non-trunk vendor branch.  So in r1.1 and r1.1.1.1,
            # the file exists.
            #
            # Moving forward in time, FOO is deleted on the default
            # branch (r1.1.1.2).  cvs2svn determines that this delete
            # also needs to happen on trunk, so FOO is deleted on
            # trunk.
            #
            # Along come r1.2, whose op is OP_CHANGE (because r1.1 is
            # not 'dead', we assume it's a change).  However, since
            # our trunk file has been deleted, svnadmin blows up--you
            # can't change a file that doesn't exist!
            #
            # Soooo... we just check the path, and if it doesn't
            # exist, we do an add... if the path does exist, it's
            # business as usual.
            if not self._path_exists(cvs_rev.svn_path):
              self._add_path(cvs_rev)
            else:
              self._change_path(cvs_rev)

        if cvs_rev.op == OP_DELETE:
          self._delete_path(cvs_rev.svn_path, Ctx().prune)

  def cleanup(self):
    """Callback for the Cleanup.register in self.__init__."""
    self.revs_db = None
    self.nodes_db = None

  def add_delegate(self, delegate):
    """Adds DELEGATE to self.delegates.

    For every delegate you add, as soon as SVNRepositoryMirror
    performs a repository action method, SVNRepositoryMirror will call
    the delegate's corresponding repository action method.  Multiple
    delegates will be called in the order that they are added.  See
    SVNRepositoryMirrorDelegate for more information."""
    self.delegates.append(delegate)

  def _invoke_delegates(self, method, *args):
    """Iterate through each of our delegates, in the order that they
    were added, and call the delegate's method named METHOD with the
    arguments in ARGS."""
    for delegate in self.delegates:
      getattr(delegate, method)(*args)

  def finish(self):
    """Calls the delegate finish method."""
    self._end_commit()
    self._invoke_delegates('finish')
    self.cleanup()


class SVNCommitItem:
  """A wrapper class for CVSRevision objects upon which
  Subversion-related data (such as properties) may be hung."""

  def __init__(self, c_rev, svn_props_changed):
    """Initialize instance and record the properties for this file.
    SVN_PROPS_CHANGED indicates whether the svn: properties are known
    to have changed since the last revision.

    The properties are set by the SVNPropertySetters in
    Ctx().svn_property_setters, then we read a couple of the
    properties back out for our own purposes."""

    self.c_rev = c_rev
    # Did the svn properties change for this file (i.e., do they have
    # to be written to the dumpfile?)
    self.svn_props_changed = svn_props_changed

    # The properties for this item as a map { key : value }.  If VALUE
    # is None, no property should be set.
    self.svn_props = { }

    for svn_property_setter in Ctx().svn_property_setters:
      svn_property_setter.set_properties(self)

    # Remember if we need to filter the EOLs.  We could actually use
    # self.svn_props now, since it is initialized for each revision.
    self.needs_eol_filter = \
        self.svn_props.get('svn:eol-style', None) is not None

    self.has_keywords = self.svn_props.get('svn:keywords', None) is not None


class SVNRepositoryMirrorDelegate:
  """Abstract superclass for any delegate to SVNRepositoryMirror.
  Subclasses must implement all of the methods below.

  For each method, a subclass implements, in its own way, the
  Subversion operation implied by the method's name.  For example, for
  the add_path method, the DumpfileDelegate would write out a
  "Node-add:" command to a Subversion dumpfile, the StdoutDelegate
  would merely print that the path is being added to the repository,
  and the RepositoryDelegate would actually cause the path to be added
  to the Subversion repository that it is creating.
  """

  def start_commit(self, svn_commit):
    """Perform any actions needed to start SVNCommit SVN_COMMIT;
    see subclass implementation for details."""
    raise NotImplementedError

  def mkdir(self, path):
    """PATH is a string; see subclass implementation for details."""
    raise NotImplementedError

  def add_path(self, s_item):
    """S_ITEM is an SVNCommitItem; see subclass implementation for
    details."""
    raise NotImplementedError

  def change_path(self, s_item):
    """S_ITEM is an SVNCommitItem; see subclass implementation for
    details."""
    raise NotImplementedError

  def delete_path(self, path):
    """PATH is a string; see subclass implementation for
    details."""
    raise NotImplementedError

  def copy_path(self, src_path, dest_path, src_revnum):
    """SRC_PATH and DEST_PATH are both strings, and SRC_REVNUM is a
    subversion revision number (int); see subclass implementation for
    details."""
    raise NotImplementedError

  def finish(self):
    """Perform any cleanup necessary after all revisions have been
    committed."""
    raise NotImplementedError


class DumpfileDelegate(SVNRepositoryMirrorDelegate):
  """Create a Subversion dumpfile."""

  def __init__(self, dumpfile_path=None):
    """Return a new DumpfileDelegate instance, attached to a dumpfile
    DUMPFILE_PATH (Ctx().dumpfile, if None), using Ctx().encoding."""
    if dumpfile_path:
      self.dumpfile_path = dumpfile_path
    else:
      self.dumpfile_path = Ctx().dumpfile

    self.dumpfile = open(self.dumpfile_path, 'wb')
    self._write_dumpfile_header(self.dumpfile)

  def _write_dumpfile_header(self, dumpfile):
    # Initialize the dumpfile with the standard headers.
    #
    # Since the CVS repository doesn't have a UUID, and the Subversion
    # repository will be created with one anyway, we don't specify a
    # UUID in the dumpflie
    dumpfile.write('SVN-fs-dump-format-version: 2\n\n')

  def _utf8_path(self, path):
    """Return a copy of PATH encoded in UTF-8."""
    pieces = string.split(path, '/')
    # Convert each path component separately (as they may each use
    # different encodings).
    for i in range(len(pieces)):
      try:
        # Log messages can be converted with the 'replace' strategy,
        # but we can't afford any lossiness here.
        pieces[i] = to_utf8(pieces[i], 'strict')
      except UnicodeError:
        raise FatalError(
            "Unable to convert a path '%s' to internal encoding.\n"
            "Consider rerunning with one or more '--encoding' parameters."
            % (path,))
    return string.join(pieces, '/')

  def _string_for_prop(self, name, value):
    """Return a property in the form needed for the dumpfile."""

    return 'K %d\n%s\nV %d\n%s\n' % (len(name), name, len(value), value)

  def start_commit(self, svn_commit):
    """Emit the start of SVN_COMMIT (an SVNCommit)."""

    self.revision = svn_commit.revnum

    # The start of a new commit typically looks like this:
    #
    #   Revision-number: 1
    #   Prop-content-length: 129
    #   Content-length: 129
    #
    #   K 7
    #   svn:log
    #   V 27
    #   Log message for revision 1.
    #   K 10
    #   svn:author
    #   V 7
    #   jrandom
    #   K 8
    #   svn:date
    #   V 27
    #   2003-04-22T22:57:58.132837Z
    #   PROPS-END
    #
    # Notice that the length headers count everything -- not just the
    # length of the data but also the lengths of the lengths, including
    # the 'K ' or 'V ' prefixes.
    #
    # The reason there are both Prop-content-length and Content-length
    # is that the former includes just props, while the latter includes
    # everything.  That's the generic header form for any entity in a
    # dumpfile.  But since revisions only have props, the two lengths
    # are always the same for revisions.

    # Calculate the output needed for the property definitions.
    props = svn_commit.get_revprops()
    prop_names = props.keys()
    prop_names.sort()
    prop_strings = []
    for propname in prop_names:
      if props[propname] is not None:
        prop_strings.append(self._string_for_prop(propname, props[propname]))

    all_prop_strings = ''.join(prop_strings) + 'PROPS-END\n'
    total_len = len(all_prop_strings)

    # Print the revision header and props
    self.dumpfile.write('Revision-number: %d\n'
                        'Prop-content-length: %d\n'
                        'Content-length: %d\n'
                        '\n'
                        % (self.revision, total_len, total_len))

    self.dumpfile.write(all_prop_strings)
    self.dumpfile.write('\n')

  def mkdir(self, path):
    """Emit the creation of directory PATH."""
    self.dumpfile.write("Node-path: %s\n"
                        "Node-kind: dir\n"
                        "Node-action: add\n"
                        "\n"
                        "\n" % self._utf8_path(path))

  def _add_or_change_path(self, s_item, op):
    """Emit the addition or change corresponding to S_ITEM.
    OP is either the constant OP_ADD or OP_CHANGE."""

    # Validation stuffs
    if op == OP_ADD:
      action = 'add'
    elif op == OP_CHANGE:
      action = 'change'
    else:
      raise FatalError("_add_or_change_path() called with bad op ('%s')"
                       % (op,))

    # Convenience variables
    c_rev = s_item.c_rev

    # The property handling here takes advantage of an undocumented
    # but IMHO consistent feature of the Subversion dumpfile-loading
    # code.  When a node's properties aren't mentioned (that is, the
    # "Prop-content-length:" header is absent, no properties are
    # listed at all, and there is no "PROPS-END\n" line) then no
    # change is made to the node's properties.
    #
    # This is consistent with the way dumpfiles behave w.r.t. text
    # content changes, so I'm comfortable relying on it.  If you
    # commit a change to *just* the properties of some node that
    # already has text contents from a previous revision, then in the
    # dumpfile output for the prop change, no "Text-content-length:"
    # nor "Text-content-md5:" header will be present, and the text of
    # the file will not be given.  But this does not cause the file's
    # text to be erased!  It simply remains unchanged.
    #
    # This works out great for cvs2svn, due to lucky coincidences:
    #
    # For files, the only properties we ever set are set in the first
    # revision; all other revisions (including on branches) inherit
    # from that.  After the first revision, we never change file
    # properties, therefore, there is no need to remember the full set
    # of properties on a given file once we've set it.
    #
    # For directories, the only property we set is "svn:ignore", and
    # while we may change it after the first revision, we always do so
    # based on the contents of a ".cvsignore" file -- in other words,
    # CVS is doing the remembering for us, so we still don't have to
    # preserve the previous value of the property ourselves.

    # Calculate the (sorted-by-name) property string and length, if any.
    if s_item.svn_props_changed:
      svn_props = s_item.svn_props
      prop_contents = ''
      prop_names = svn_props.keys()
      prop_names.sort()
      for pname in prop_names:
        pvalue = svn_props[pname]
        if pvalue is not None:
          prop_contents += self._string_for_prop(pname, pvalue)
      prop_contents += 'PROPS-END\n'
      props_header = 'Prop-content-length: %d\n' % len(prop_contents)
    else:
      prop_contents = ''
      props_header = ''

    # treat .cvsignore as a directory property
    dir_path, basename = os.path.split(c_rev.svn_path)
    if basename == ".cvsignore":
      ignore_vals = generate_ignores(c_rev)
      ignore_contents = '\n'.join(ignore_vals)
      ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
                         (len(ignore_contents), ignore_contents))
      ignore_contents = ignore_contents + 'PROPS-END\n'
      ignore_len = len(ignore_contents)

      # write headers, then props
      self.dumpfile.write('Node-path: %s\n'
                          'Node-kind: dir\n'
                          'Node-action: change\n'
                          'Prop-content-length: %d\n'
                          'Content-length: %d\n'
                          '\n'
                          '%s'
                          % (self._utf8_path(dir_path), ignore_len,
                             ignore_len, ignore_contents))

    # If the file has keywords, we must prevent CVS/RCS from expanding
    # the keywords because they must be unexpanded in the repository,
    # or Subversion will get confused.
    pipe_cmd, pipe = Ctx().cvs_repository.get_co_pipe(
        c_rev, suppress_keyword_substitution=s_item.has_keywords)

    self.dumpfile.write('Node-path: %s\n'
                        'Node-kind: file\n'
                        'Node-action: %s\n'
                        '%s'  # no property header if no props
                        'Text-content-length: '
                        % (self._utf8_path(c_rev.svn_path),
                           action, props_header))

    pos = self.dumpfile.tell()

    self.dumpfile.write('0000000000000000\n'
                        'Text-content-md5: 00000000000000000000000000000000\n'
                        'Content-length: 0000000000000000\n'
                        '\n')

    if prop_contents:
      self.dumpfile.write(prop_contents)

    # Insert a filter to convert all EOLs to LFs if neccessary
    if s_item.needs_eol_filter:
      data_reader = LF_EOL_Filter(pipe.stdout)
    else:
      data_reader = pipe.stdout

    # Insert the rev contents, calculating length and checksum as we go.
    checksum = md5.new()
    length = 0
    while True:
      buf = data_reader.read(PIPE_READ_SIZE)
      if buf == '':
        break
      checksum.update(buf)
      length = length + len(buf)
      self.dumpfile.write(buf)

    pipe.stdout.close()
    error_output = pipe.stderr.read()
    exit_status = pipe.wait()
    if exit_status:
      raise FatalError("The command '%s' failed with exit status: %s\n"
                       "and the following output:\n"
                       "%s" % (pipe_cmd, exit_status, error_output))

    # Go back to patch up the length and checksum headers:
    self.dumpfile.seek(pos, 0)
    # We left 16 zeros for the text length; replace them with the real
    # length, padded on the left with spaces:
    self.dumpfile.write('%16d' % length)
    # 16... + 1 newline + len('Text-content-md5: ') == 35
    self.dumpfile.seek(pos + 35, 0)
    self.dumpfile.write(checksum.hexdigest())
    # 35... + 32 bytes of checksum + 1 newline + len('Content-length: ') == 84
    self.dumpfile.seek(pos + 84, 0)
    # The content length is the length of property data, text data,
    # and any metadata around/inside around them.
    self.dumpfile.write('%16d' % (length + len(prop_contents)))
    # Jump back to the end of the stream
    self.dumpfile.seek(0, 2)

    # This record is done (write two newlines -- one to terminate
    # contents that weren't themselves newline-termination, one to
    # provide a blank line for readability.
    self.dumpfile.write('\n\n')

  def add_path(self, s_item):
    """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
    self._add_or_change_path(s_item, OP_ADD)

  def change_path(self, s_item):
    """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
    self._add_or_change_path(s_item, OP_CHANGE)

  def delete_path(self, path):
    """Emit the deletion of PATH."""
    self.dumpfile.write('Node-path: %s\n'
                        'Node-action: delete\n'
                        '\n' % self._utf8_path(path))

  def copy_path(self, src_path, dest_path, src_revnum):
    """Emit the copying of SRC_PATH at SRC_REV to DEST_PATH."""
    # We don't need to include "Node-kind:" for copies; the loader
    # ignores it anyway and just uses the source kind instead.
    self.dumpfile.write('Node-path: %s\n'
                        'Node-action: add\n'
                        'Node-copyfrom-rev: %d\n'
                        'Node-copyfrom-path: /%s\n'
                        '\n'
                        % (self._utf8_path(dest_path),
                           src_revnum,
                           self._utf8_path(src_path)))

  def finish(self):
    """Perform any cleanup necessary after all revisions have been
    committed."""
    self.dumpfile.close()


class RepositoryDelegate(DumpfileDelegate):
  """Creates a new Subversion Repository.  DumpfileDelegate does all
  of the heavy lifting."""
  def __init__(self):
    self.svnadmin = Ctx().svnadmin
    self.target = Ctx().target
    if not Ctx().existing_svnrepos:
      Log().write(LOG_NORMAL,"Creating new repository '%s'" % (self.target))
      if not Ctx().fs_type:
        # User didn't say what kind repository (bdb, fsfs, etc).
        # We still pass --bdb-txn-nosync.  It's a no-op if the default
        # repository type doesn't support it, but we definitely want
        # it if BDB is the default.
        run_command('%s create %s "%s"' % (self.svnadmin,
                                           "--bdb-txn-nosync",
                                           self.target))
      elif Ctx().fs_type == 'bdb':
        # User explicitly specified bdb.
        #
        # Since this is a BDB repository, pass --bdb-txn-nosync,
        # because it gives us a 4-5x speed boost (if cvs2svn is
        # creating the repository, cvs2svn should be the only program
        # accessing the svn repository (until cvs is done, at least)).
        # But we'll turn no-sync off in self.finish(), unless
        # instructed otherwise.
        run_command('%s create %s %s "%s"' % (self.svnadmin,
                                              "--fs-type=bdb",
                                              "--bdb-txn-nosync",
                                              self.target))
      else:
        # User specified something other than bdb.
        run_command('%s create %s "%s"' % (self.svnadmin,
                                           "--fs-type=%s" % Ctx().fs_type,
                                           self.target))

    # Since the output of this run is a repository, not a dumpfile,
    # the temporary dumpfiles we create should go in the tmpdir.
    DumpfileDelegate.__init__(self, temp(Ctx().dumpfile))

    # This is 1 if a commit is in progress, otherwise None.
    self._commit_in_progress = None

    self.dumpfile = open(self.dumpfile_path, 'w+b')
    self.loader_pipe = SimplePopen([ self.svnadmin, 'load', '-q',
                                     self.target ], True)
    self.loader_pipe.stdout.close()
    try:
      self._write_dumpfile_header(self.loader_pipe.stdin)
    except IOError:
      raise FatalError("svnadmin failed with the following output while "
                       "loading the dumpfile:\n"
                       + self.loader_pipe.stderr.read())

  def _feed_pipe(self):
    """Feed the revision stored in the dumpfile to the svnadmin
    load pipe."""
    self.dumpfile.seek(0)
    while 1:
      data = self.dumpfile.read(128*1024) # Chunk size is arbitrary
      if not len(data):
        break
      try:
        self.loader_pipe.stdin.write(data)
      except IOError:
        raise FatalError("svnadmin failed with the following output "
                         "while loading the dumpfile:\n"
                         + self.loader_pipe.stderr.read())

  def start_commit(self, svn_commit):
    """Start a new commit.  If a commit is already in progress, close
    the dumpfile, load it into the svn repository, open a new
    dumpfile, and write the header into it."""
    if self._commit_in_progress:
      self._feed_pipe()
    self.dumpfile.seek(0)
    self.dumpfile.truncate()
    DumpfileDelegate.start_commit(self, svn_commit)
    self._commit_in_progress = 1

  def finish(self):
    """Loads the last commit into the repository."""
    self._feed_pipe()
    self.dumpfile.close()
    self.loader_pipe.stdin.close()
    error_output = self.loader_pipe.stderr.read()
    exit_status = self.loader_pipe.wait()
    if exit_status:
      raise FatalError('svnadmin load failed with exit status: %s\n'
                       'and the following output:\n'
                       '%s' % (exit_status, error_output,))
    os.remove(self.dumpfile_path)

    # If this is a BDB repository, and we created the repository, and
    # --bdb-no-sync wasn't passed, then comment out the DB_TXN_NOSYNC
    # line in the DB_CONFIG file, because txn syncing should be on by
    # default in BDB repositories.
    #
    # We determine if this is a BDB repository by looking for the
    # DB_CONFIG file, which doesn't exist in FSFS, rather than by
    # checking Ctx().fs_type.  That way this code will Do The Right
    # Thing in all circumstances.
    db_config = os.path.join(self.target, "db/DB_CONFIG")
    if (not Ctx().existing_svnrepos and not Ctx().bdb_txn_nosync
        and os.path.exists(db_config)):
      no_sync = 'set_flags DB_TXN_NOSYNC\n'

      contents = open(db_config, 'r').readlines()
      index = contents.index(no_sync)
      contents[index] = '# ' + no_sync
      contents = open(db_config, 'w').writelines(contents)


class StdoutDelegate(SVNRepositoryMirrorDelegate):
  """Makes no changes to the disk, but writes out information to
  STDOUT about what the SVNRepositoryMirror is doing.  Of course, our
  print statements will state that we're doing something, when in
  reality, we aren't doing anything other than printing out that we're
  doing something.  Kind of zen, really."""
  def __init__(self, total_revs):
    self.total_revs = total_revs

  def start_commit(self, svn_commit):
    """Prints out the Subversion revision number of the commit that is
    being started."""
    Log().write(LOG_VERBOSE, "=" * 60)
    Log().write(LOG_NORMAL, "Starting Subversion r%d / %d" %
                (svn_commit.revnum, self.total_revs))

  def mkdir(self, path):
    """Print a line stating that we are creating directory PATH."""
    Log().write(LOG_VERBOSE, "  New Directory", path)

  def add_path(self, s_item):
    """Print a line stating that we are 'adding' s_item.c_rev.svn_path."""
    Log().write(LOG_VERBOSE, "  Adding", s_item.c_rev.svn_path)

  def change_path(self, s_item):
    """Print a line stating that we are 'changing' s_item.c_rev.svn_path."""
    Log().write(LOG_VERBOSE, "  Changing", s_item.c_rev.svn_path)

  def delete_path(self, path):
    """Print a line stating that we are 'deleting' PATH."""
    Log().write(LOG_VERBOSE, "  Deleting", path)

  def copy_path(self, src_path, dest_path, src_revnum):
    """Print a line stating that we are 'copying' revision SRC_REVNUM
    of SRC_PATH to DEST_PATH."""
    Log().write(LOG_VERBOSE, "  Copying revision", src_revnum, "of", src_path)
    Log().write(LOG_VERBOSE, "                to", dest_path)

  def finish(self):
    """State that we are done creating our repository."""
    Log().write(LOG_VERBOSE, "Finished creating Subversion repository.")
    Log().write(LOG_QUIET, "Done.")

def pass1():
  OS_SEP_PLUS_ATTIC = os.sep + 'Attic'
  Log().write(LOG_QUIET, "Examining all CVS ',v' files...")
  cd = CollectData()

  def visit_file(baton, dirname, files):
    cd = baton
    for fname in files:
      if fname[-2:] != ',v':
        continue
      cd.found_valid_file = 1
      pathname = os.path.join(dirname, fname)
      if dirname[-6:] == OS_SEP_PLUS_ATTIC:
        # drop the 'Attic' portion from the pathname for the canonical name.
        cd.set_fname(os.path.join(dirname[:-6], fname), pathname)
      else:
        # If this file also exists in the attic, it's a fatal error
        attic_path = os.path.join(dirname, 'Attic', fname)
        if os.path.exists(attic_path):
          err = "%s: A CVS repository cannot contain both %s and %s" \
                % (error_prefix, pathname, attic_path)
          sys.stderr.write(err + '\n')
          cd.fatal_errors.append(err)
        cd.set_fname(pathname, pathname)
      Log().write(LOG_NORMAL, pathname)
      try:
        cvs2svn_rcsparse.parse(open(pathname, 'rb'), cd)
      except (cvs2svn_rcsparse.common.RCSParseError, ValueError,
              RuntimeError):
        err = "%s: '%s' is not a valid ,v file" \
              % (error_prefix, pathname)
        sys.stderr.write(err + '\n')
        cd.fatal_errors.append(err)
      except:
        Log().write(LOG_WARN,
                    "Exception occurred while parsing %s" % pathname)
        raise

  os.path.walk(Ctx().project.project_cvs_repos_path, visit_file, cd)
  Log().write(LOG_VERBOSE, 'Processed', cd.num_files, 'files')

  cd.write_symbol_db()

  if len(cd.fatal_errors) > 0:
    raise FatalException("Pass 1 complete.\n"
                         + "=" * 75 + "\n"
                         + "Error summary:\n"
                         + "\n".join(cd.fatal_errors) + "\n"
                         + "Exited due to fatal error(s).\n")

  if cd.found_valid_file is None:
    raise FatalException(
        "\n"
        "No RCS files found in your CVS Repository!\n"
        "Are you absolutely certain you are pointing cvs2svn\n"
        "at a CVS repository?\n"
        "\n"
        "Exited due to fatal error(s).\n")

  StatsKeeper().reset_c_rev_info()
  StatsKeeper().archive()
  Log().write(LOG_QUIET, "Done")

def pass2():
  "Pass 2: clean up the revision information."

  symbol_db = SymbolDatabase()
  symbol_db.read()

  # Convert the list of regexps to a list of strings
  excludes = symbol_db.find_excluded_symbols(Ctx().excludes)

  error_detected = 0

  Log().write(LOG_QUIET, "Checking for blocked exclusions...")
  blocked_excludes = symbol_db.find_blocked_excludes(excludes)
  if blocked_excludes:
    for branch, blockers in blocked_excludes.items():
      sys.stderr.write(error_prefix + ": The branch '%s' cannot be "
                       "excluded because the following symbols depend "
                       "on it:\n" % (branch))
      for blocker in blockers:
        sys.stderr.write("    '%s'\n" % (blocker))
    sys.stderr.write("\n")
    error_detected = 1

  Log().write(LOG_QUIET, "Checking for forced tags with commits...")
  invalid_forced_tags = [ ]
  for forced_tag in Ctx().forced_tags:
    if excludes.has_key(forced_tag):
      continue
    if symbol_db.branch_has_commit(forced_tag):
      invalid_forced_tags.append(forced_tag)
  if invalid_forced_tags:
    sys.stderr.write(error_prefix + ": The following branches cannot be "
                     "forced to be tags because they have commits:\n")
    for tag in invalid_forced_tags:
      sys.stderr.write("    '%s'\n" % (tag))
    sys.stderr.write("\n")
    error_detected = 1

  Log().write(LOG_QUIET, "Checking for tag/branch mismatches...")
  mismatches = symbol_db.find_mismatches(excludes)
  def is_not_forced(mismatch):
    name = mismatch[0]
    return not (name in Ctx().forced_tags or name in Ctx().forced_branches)
  mismatches = filter(is_not_forced, mismatches)
  if mismatches:
    sys.stderr.write(error_prefix + ": The following symbols are tags "
                     "in some files and branches in others.\nUse "
                     "--force-tag, --force-branch and/or --exclude to "
                     "resolve the symbols.\n")
    for name, tag_count, branch_count, commit_count in mismatches:
      sys.stderr.write("    '%s' is a tag in %d files, a branch in "
                       "%d files and has commits in %d files.\n"
                       % (name, tag_count, branch_count, commit_count))
    error_detected = 1

  # Bail out now if we found errors
  if error_detected:
    sys.exit(1)

  # Create the tags database
  tags_db = TagsDatabase(DB_OPEN_NEW)
  for tag in symbol_db.tags:
    if tag not in Ctx().forced_branches:
      tags_db[tag] = None
  for tag in Ctx().forced_tags:
    tags_db[tag] = None

  Log().write(LOG_QUIET, "Re-synchronizing CVS revision timestamps...")

  # We may have recorded some changes in revisions' timestamp.  We need to
  # scan for any other files which may have had the same log message and
  # occurred at "the same time" and change their timestamps, too.

  # read the resync data file
  def read_resync(fname):
    "Read the .resync file into memory."

    ### note that we assume that we can hold the entire resync file in
    ### memory. really large repositories with whacky timestamps could
    ### bust this assumption. should that ever happen, then it is possible
    ### to split the resync file into pieces and make multiple passes,
    ### using each piece.

    #
    # A digest maps to a sequence of lists which specify a lower and upper
    # time bound for matching up the commit.  We keep a sequence of these
    # because a number of checkins with the same log message (e.g. an empty
    # log message) could need to be remapped.  We also make them a list
    # because we will dynamically expand the lower/upper bound as we find
    # commits that fall into a particular msg and time range.
    #
    # resync == digest -> [ [old_time_lower, old_time_upper, new_time], ... ]
    #
    resync = { }

    for line in fileinput.FileInput(fname):
      t1 = int(line[:8], 16)
      digest = line[9:DIGEST_END_IDX]
      t2 = int(line[DIGEST_END_IDX+1:], 16)
      t1_l = t1 - COMMIT_THRESHOLD/2
      t1_u = t1 + COMMIT_THRESHOLD/2
      resync.setdefault(digest, []).append([t1_l, t1_u, t2])

    # For each digest, sort the resync items in it in increasing order,
    # based on the lower time bound.
    for val in resync.values():
      val.sort()

    return resync

  resync = read_resync(temp(DATAFILE + RESYNC_SUFFIX))

  output = open(temp(DATAFILE + CLEAN_REVS_SUFFIX), 'w')
  Cleanup().register(temp(DATAFILE + CLEAN_REVS_SUFFIX), pass3)

  tweaked_timestamps_db = Database(temp(TWEAKED_TIMESTAMPS_DB), DB_OPEN_NEW)
  Cleanup().register(temp(TWEAKED_TIMESTAMPS_DB), pass2)

  # process the revisions file, looking for items to clean up
  for line in fileinput.FileInput(temp(DATAFILE + REVS_SUFFIX)):
    c_rev = CVSRevision(Ctx(), line[:-1])

    # Skip this entire revision if it's on an excluded branch
    if excludes.has_key(c_rev.branch_name):
      continue

    new_prev_ts = None
    if c_rev.prev_rev is not None:
      new_prev_ts = tweaked_timestamps_db.get(
        c_rev.unique_key(c_rev.prev_rev), None)
    if new_prev_ts:
      c_rev.prev_timestamp = new_prev_ts

    new_next_ts = None
    if c_rev.next_rev is not None:
      new_next_ts = tweaked_timestamps_db.get(
        c_rev.unique_key(c_rev.next_rev), None)
    if new_next_ts:
      c_rev.next_timestamp = new_next_ts

    # Remove all references to excluded tags and branches
    def not_excluded(symbol, excludes=excludes):
      return not excludes.has_key(symbol)
    c_rev.branches = filter(not_excluded, c_rev.branches)
    c_rev.tags = filter(not_excluded, c_rev.tags)

    # Convert all branches that are forced to be tags
    for forced_tag in Ctx().forced_tags:
      if forced_tag in c_rev.branches:
        c_rev.branches.remove(forced_tag)
        c_rev.tags.append(forced_tag)

    # Convert all tags that are forced to be branches
    for forced_branch in Ctx().forced_branches:
      if forced_branch in c_rev.tags:
        c_rev.tags.remove(forced_branch)
        c_rev.branches.append(forced_branch)

    # see if this is "near" any of the resync records we
    # have recorded for this digest [of the log message].
    for record in resync.get(c_rev.digest, []):
      if record[2] == c_rev.timestamp:
        # This means that either c_rev is the same revision that
        # caused the resync record to exist, or c_rev is a different
        # CVS revision that happens to have the same timestamp.  In
        # either case, we don't have to do anything, so we...
        continue

      if record[0] <= c_rev.timestamp <= record[1]:
        # bingo!  We probably want to remap the time on this c_rev,
        # unless the remapping would be useless because the new time
        # would fall outside the COMMIT_THRESHOLD window for this
        # commit group.
        new_timestamp = record[2]
        # If the new timestamp is earlier than that of our previous revision
        if new_timestamp < c_rev.prev_timestamp:
          desc = ("%s: Attempt to set timestamp of revision %s on file %s"
                  + " to time %s, which is before previous the time of"
                  + " revision %s (%s):")
          Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
                                        c_rev.cvs_path, new_timestamp,
                                        c_rev.prev_rev, c_rev.prev_timestamp))
          # If resyncing our rev to c_rev.prev_timestamp + 1 will place
          # the timestamp of c_rev within COMMIT_THRESHOLD of the
          # attempted resync time, then sync back to c_rev.prev_timestamp
          # + 1...
          if ((c_rev.prev_timestamp + 1) - new_timestamp) < COMMIT_THRESHOLD:
            new_timestamp = c_rev.prev_timestamp + 1
            Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
                                                          new_timestamp))
          else:
            Log().write(LOG_WARN, "%s: Timestamp left untouched" %
                        warning_prefix)
            continue

        # If the new timestamp is later than that of our next revision
        elif c_rev.next_timestamp and new_timestamp > c_rev.next_timestamp:
          desc = ("%s: Attempt to set timestamp of revision %s on file %s"
                  + " to time %s, which is after time of next"
                  + " revision %s (%s):")
          Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
                                        c_rev.cvs_path, new_timestamp,
                                        c_rev.prev_rev, c_rev.next_timestamp))
          # If resyncing our rev to c_rev.next_timestamp - 1 will place
          # the timestamp of c_rev within COMMIT_THRESHOLD of the
          # attempted resync time, then sync forward to c_rev.next_timestamp
          # - 1...
          if (new_timestamp - (c_rev.next_timestamp - 1)) < COMMIT_THRESHOLD:
            new_timestamp = c_rev.next_timestamp - 1
            Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
                                                          new_timestamp))
          else:
            Log().write(LOG_WARN, "%s: Timestamp left untouched" %
                        warning_prefix)
            continue

        # Fix for Issue #71: Avoid resyncing two consecutive revisions
        # to the same timestamp.
        elif (new_timestamp == c_rev.prev_timestamp
              or new_timestamp == c_rev.next_timestamp):
          continue

        # adjust the time range. we want the COMMIT_THRESHOLD from the
        # bounds of the earlier/latest commit in this group.
        record[0] = min(record[0], c_rev.timestamp - COMMIT_THRESHOLD/2)
        record[1] = max(record[1], c_rev.timestamp + COMMIT_THRESHOLD/2)

        msg = "PASS2 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
              % (c_rev.cvs_path, c_rev.rev, time.ctime(c_rev.timestamp),
                 new_timestamp - c_rev.timestamp)
        Log().write(LOG_VERBOSE, msg)

        c_rev.timestamp = new_timestamp
        tweaked_timestamps_db[c_rev.unique_key()] = new_timestamp

        # stop looking for hits
        break

    output.write(str(c_rev) + "\n")
  Log().write(LOG_QUIET, "Done")

def pass3():
  Log().write(LOG_QUIET, "Sorting CVS revisions...")
  sort_file(temp(DATAFILE + CLEAN_REVS_SUFFIX),
            temp(DATAFILE + SORTED_REVS_SUFFIX))
  Cleanup().register(temp(DATAFILE + SORTED_REVS_SUFFIX), pass5)
  Log().write(LOG_QUIET, "Done")

def pass4():
  """Iterate through sorted revs, storing them in a database.
  If we're not doing a trunk-only conversion, generate the
  LastSymbolicNameDatabase, which contains the last CVSRevision
  that is a source for each tag or branch.
  """
  Log().write(LOG_QUIET,
      "Copying CVS revision data from flat file to database...")
  cvs_revs_db = CVSRevisionDatabase(DB_OPEN_NEW)
  if not Ctx().trunk_only:
    Log().write(LOG_QUIET,
        "Finding last CVS revisions for all symbolic names...")
    last_sym_name_db = LastSymbolicNameDatabase(DB_OPEN_NEW)
  else:
    # This is to avoid testing Ctx().trunk_only every time around the loop
    class DummyLSNDB:
      def noop(*args): pass
      log_revision = noop
      create_database = noop
    last_sym_name_db = DummyLSNDB()

  for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
    c_rev = CVSRevision(Ctx(), line[:-1])
    cvs_revs_db.log_revision(c_rev)
    last_sym_name_db.log_revision(c_rev)
    StatsKeeper().record_c_rev(c_rev)

  last_sym_name_db.create_database()
  StatsKeeper().archive()
  Log().write(LOG_QUIET, "Done")

def pass5():
  """
  Generate the SVNCommit <-> CVSRevision mapping
  databases.  CVSCommit._commit also calls SymbolingsLogger to register
  CVSRevisions that represent an opening or closing for a path on a
  branch or tag.  See SymbolingsLogger for more details.
  """
  Log().write(LOG_QUIET, "Mapping CVS revisions to Subversion commits...")

  aggregator = CVSRevisionAggregator()
  for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
    c_rev = CVSRevision(Ctx(), line[:-1])
    if not (Ctx().trunk_only and c_rev.branch_name is not None):
      aggregator.process_revision(c_rev)
  aggregator.flush()

  StatsKeeper().set_svn_rev_count(SVNCommit.revnum - 1)
  StatsKeeper().archive()
  Log().write(LOG_QUIET, "Done")

def pass6():
  Log().write(LOG_QUIET, "Sorting symbolic name source revisions...")

  if not Ctx().trunk_only:
    sort_file(temp(SYMBOL_OPENINGS_CLOSINGS),
              temp(SYMBOL_OPENINGS_CLOSINGS_SORTED))
    Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), pass8)
  Log().write(LOG_QUIET, "Done")

def pass7():
  Log().write(LOG_QUIET, "Determining offsets for all symbolic names...")

  def generate_offsets_for_symbolings():
    """This function iterates through all the lines in
    SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
    SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
    where SYMBOLIC_NAME is first encountered.  This will allow us to
    seek to the various offsets in the file and sequentially read only
    the openings and closings that we need."""

    ###PERF This is a fine example of a db that can be in-memory and
    #just flushed to disk when we're done.  Later, it can just be sucked
    #back into memory.
    offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_NEW)
    Cleanup().register(temp(SYMBOL_OFFSETS_DB), pass8)

    file = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
    old_sym = ""
    while 1:
      fpos = file.tell()
      line = file.readline()
      if not line:
        break
      sym, svn_revnum, cvs_rev_key = line.split(" ", 2)
      if sym != old_sym:
        Log().write(LOG_VERBOSE, " ", sym)
        old_sym = sym
        offsets_db[sym] = fpos

  if not Ctx().trunk_only:
    generate_offsets_for_symbolings()
  Log().write(LOG_QUIET, "Done.")

def pass8():
  svncounter = 2 # Repository initialization is 1.
  repos = SVNRepositoryMirror()
  persistence_manager = PersistenceManager(DB_OPEN_READ)

  if Ctx().target:
    if not Ctx().dry_run:
      repos.add_delegate(RepositoryDelegate())
    Log().write(LOG_QUIET, "Starting Subversion Repository.")
  else:
    if not Ctx().dry_run:
      repos.add_delegate(DumpfileDelegate())
    Log().write(LOG_QUIET, "Starting Subversion Dumpfile.")

  repos.add_delegate(StdoutDelegate(StatsKeeper().svn_rev_count()))

  while 1:
    svn_commit = persistence_manager.get_svn_commit(svncounter)
    if not svn_commit:
      break
    repos.commit(svn_commit)
    svncounter += 1

  repos.finish()

_passes = [
  pass1,
  pass2,
  pass3,
  pass4,
  pass5,
  pass6,
  pass7,
  pass8,
  ]


class Ctx:
  """Session state for this run of cvs2svn.  For example, run-time
  options are stored here.  This class is a Borg, see
  http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531.
  """
  __shared_state = { }
  def __init__(self):
    self.__dict__ = self.__shared_state
    if self.__dict__:
      return
    # Else, initialize to defaults.
    self.target = None
    self.dumpfile = DUMPFILE
    self.tmpdir = '.'
    self.verbose = 0
    self.quiet = 0
    self.prune = 1
    self.existing_svnrepos = 0
    self.dump_only = 0
    self.dry_run = 0
    self.trunk_only = 0
    self.trunk_base = "trunk"
    self.tags_base = "tags"
    self.branches_base = "branches"
    self.encoding = ["ascii"]
    self.mime_types_file = None
    self.auto_props_file = None
    self.auto_props_ignore_case = False
    self.no_default_eol = 0
    self.eol_from_mime_type = 0
    self.keywords_off = 0
    self.use_cvs = None
    self.svnadmin = "svnadmin"
    self.username = None
    self.print_help = 0
    self.skip_cleanup = 0
    self.bdb_txn_nosync = 0
    self.fs_type = None
    self.forced_branches = []
    self.forced_tags = []
    self.excludes = []
    self.symbol_transforms = []
    self.svn_property_setters = []


class SVNPropertySetter:
  """Abstract class for objects that can set properties on a SVNCommitItem."""

  def set_properties(self, s_item):
    """Set any properties that can be determined for S_ITEM."""

    raise NotImplementedError


class CVSRevisionNumberSetter(SVNPropertySetter):
  """Set the cvs2svn:cvs-rev property to the CVS revision number."""

  def set_properties(self, s_item):
    s_item.svn_props['cvs2svn:cvs-rev'] = s_item.c_rev.rev
    s_item.svn_props_changed = True


class ExecutablePropertySetter(SVNPropertySetter):
  """Set the svn:executable property based on c_rev.file_executable."""

  def set_properties(self, s_item):
    if s_item.c_rev.file_executable:
      s_item.svn_props['svn:executable'] = '*'


class BinaryFileEOLStyleSetter(SVNPropertySetter):
  """Set the eol-style for binary files to None."""

  def set_properties(self, s_item):
    if s_item.c_rev.mode == 'b':
      s_item.svn_props['svn:eol-style'] = None


class MimeMapper(SVNPropertySetter):
  """A class that provides mappings from file names to MIME types."""

  def __init__(self, mime_types_file):
    self.mappings = { }

    for line in fileinput.input(mime_types_file):
      if line.startswith("#"):
        continue

      # format of a line is something like
      # text/plain c h cpp
      extensions = line.split()
      if len(extensions) < 2:
        continue
      type = extensions.pop(0)
      for ext in extensions:
        if self.mappings.has_key(ext) and self.mappings[ext] != type:
          sys.stderr.write("%s: ambiguous MIME mapping for *.%s (%s or %s)\n"
                           % (warning_prefix, ext, self.mappings[ext], type))
        self.mappings[ext] = type

  def set_properties(self, s_item):
    basename, extension = os.path.splitext(
        os.path.basename(s_item.c_rev.cvs_path)
        )

    # Extension includes the dot, so strip it (will leave extension
    # empty if filename ends with a dot, which is ok):
    extension = extension[1:]

    # If there is no extension (or the file ends with a period), use
    # the base name for mapping.  This allows us to set mappings for
    # files such as README or Makefile:
    if not extension:
      extension = basename

    mime_type = self.mappings.get(extension, None)
    if mime_type is not None:
      s_item.svn_props['svn:mime-type'] = mime_type


class AutoPropsPropertySetter(SVNPropertySetter):
  """Set arbitrary svn properties based on an auto-props configuration.

  This class supports case-sensitive or case-insensitive pattern
  matching.  The 'correct' behavior is not quite clear, because
  subversion itself does an inconsistent job of handling case in
  auto-props patterns; see
  http://subversion.tigris.org/issues/show_bug.cgi?id=2036.

  If a property specified in auto-props has already been set to a
  different value, print a warning and leave the old property value
  unchanged."""

  class Pattern:
    """Describes the properties to be set for files matching a pattern."""
    def __init__(self, pattern, propdict):
      # A glob-like pattern:
      self.pattern = pattern
      # A dictionary of properties that should be set:
      self.propdict = propdict

    def match(self, basename):
      """Does the file with the specified basename match pattern?"""
      return fnmatch.fnmatch(basename, self.pattern)

  def __init__(self, configfilename, ignore_case):
    config = ConfigParser.ConfigParser()
    if ignore_case:
      self.transform_case = self.squash_case
    else:
      config.optionxform = self.preserve_case
      self.transform_case = self.preserve_case

    config.readfp(file(configfilename))
    self.patterns = []
    for section in config.sections():
      if self.transform_case(section) == 'auto-props':
        for (pattern, value) in config.items(section):
          if value:
            self._add_pattern(pattern, value)

  def squash_case(self, s):
    return s.lower()

  def preserve_case(self, s):
    return s

  def _add_pattern(self, pattern, value):
    props = value.split(';')
    propdict = {}
    for prop in props:
      s = prop.split('=', 1)
      if len(s) == 1:
        propdict[s[0]] = None
      else:
        propdict[s[0]] = s[1]
    self.patterns.append(
        self.Pattern(self.transform_case(pattern), propdict))

  def get_propdict(self, path):
    basename = self.transform_case(os.path.basename(path))
    propdict = {}
    for pattern in self.patterns:
      if pattern.match(basename):
        for (key,value) in pattern.propdict.items():
          if propdict.has_key(key):
            if propdict[key] != value:
              Log().write(
                  LOG_WARN,
                  "Contradictory values set for property '%s' for file %s."
                  % (k, path,))
          else:
            propdict[key] = value

    print 'propdict %s -> %s' % (path, propdict,) ###
    return propdict

  def set_properties(self, s_item):
    propdict = self.get_propdict(s_item.c_rev.cvs_path)
    for (k,v) in propdict.items():
      if s_item.svn_props.has_key(k):
        if s_item.svn_props[k] != v:
          Log().write(
              LOG_WARN,
              "Property '%s' already set for file %s."
              % (k, s_item.c_rev.cvs_path,))
      else:
        s_item.svn_props[k] = v


class BinaryFileDefaultMimeTypeSetter(SVNPropertySetter):
  """If the file is binary and its svn:mime-type property is not yet
  set, set it to 'application/octet-stream'."""

  def set_properties(self, s_item):
    if not s_item.svn_props.has_key('svn:mime-type') \
           and s_item.c_rev.mode == 'b':
      s_item.svn_props['svn:mime-type'] = 'application/octet-stream'


class EOLStyleFromMimeTypeSetter(SVNPropertySetter):
  """Set svn:eol-style based on svn:mime-type.

  If svn:mime-type is known but svn:eol-style is not, then set
  svn:eol-style based on svn:mime-type as follows: if svn:mime-type
  starts with 'text/', then set svn:eol-style to native; otherwise,
  force it to remain unset.  See also issue #39."""

  def set_properties(self, s_item):
    if not s_item.svn_props.has_key('svn:eol-style') \
       and s_item.svn_props.get('svn:mime-type', None) is not None:
      if s_item.svn_props['svn:mime-type'].startswith("text/"):
        s_item.svn_props['svn:eol-style'] = 'native'
      else:
        s_item.svn_props['svn:eol-style'] = None


class DefaultEOLStyleSetter(SVNPropertySetter):
  """Set the eol-style if one has not already been set."""

  def __init__(self, value):
    """Initialize with the specified default VALUE."""

    self.value = value

  def set_properties(self, s_item):
    if not s_item.svn_props.has_key('svn:eol-style'):
      s_item.svn_props['svn:eol-style'] = self.value


class KeywordsPropertySetter(SVNPropertySetter):
  """If the svn:keywords property is not yet set, set it based on the
  file's mode.  See issue #2."""

  def __init__(self, value):
    """Use VALUE for the value of the svn:keywords property if it is
    to be set."""

    self.value = value

  def set_properties(self, s_item):
    if not s_item.svn_props.has_key('svn:keywords') \
           and s_item.c_rev.mode in [None, 'kv', 'kvl']:
      s_item.svn_props['svn:keywords'] = self.value


def convert(start_pass, end_pass):
  "Convert a CVS repository to an SVN repository."

  cleanup = Cleanup()
  times = [ None ] * (end_pass + 1)
  times[start_pass - 1] = time.time()
  StatsKeeper().set_start_time(time.time())
  for i in range(start_pass - 1, end_pass):
    Log().write(LOG_QUIET, '----- pass %d -----' % (i + 1))
    _passes[i]()
    times[i + 1] = time.time()
    StatsKeeper().log_duration_for_pass(times[i + 1] - times[i], i + 1)
    # Dispose of items in Ctx() not intended to live past the end of the pass
    # (Identified by exactly one leading underscore)
    for attr in dir(Ctx()):
      if (len(attr) > 2 and attr[0] == '_' and attr[1] != '_'
          and attr[:6] != "_Ctx__"):
        delattr(Ctx(), attr)
    if not Ctx().skip_cleanup:
      cleanup.cleanup(_passes[i])
    StatsKeeper().set_end_time(time.time())

  Log().write(LOG_QUIET, StatsKeeper())
  if end_pass < 4:
    Log().write(LOG_QUIET,
                '(These are unaltered CVS repository stats and do not\n'
                ' reflect tags or branches excluded via --exclude)\n')
  Log().write(LOG_NORMAL, StatsKeeper().timings())


def normalize_ttb_path(opt, path):
  """Normalize a path to be used for --trunk, --tags, or --branches.

  1. Strip leading, trailing, and duplicated '/'.
  2. Verify that the path is not empty.

  Return the normalized path.

  If the path is invalid, write an error message and exit."""

  norm_path = _path_join(*path.split('/'))
  if not norm_path:
    raise FatalError("cannot pass an empty path to %s." % (opt,))
  return norm_path


def verify_paths_disjoint(*paths):
  """Verify that all of the paths in the argument list are disjoint.

  If any of the paths is nested in another one (i.e., in the sense
  that 'a/b/c/d' is nested in 'a/b'), or any two paths are identical,
  write an error message and exit."""

  paths = [(path.split('/'), path) for path in paths]
  # If all overlapping elements are equal, a shorter list is
  # considered "less than" a longer one.  Therefore if any paths are
  # nested, this sort will leave at least one such pair adjacent, in
  # the order [nest,nestling].
  paths.sort()
  for i in range(1, len(paths)):
    split_path1, path1 = paths[i - 1]
    split_path2, path2 = paths[i]
    if len(split_path1) <= len(split_path2) \
       and split_path2[:len(split_path1)] == split_path1:
      raise FatalError("paths %s and %s are not disjoint." % (path1, path2,))


def usage():
  print 'USAGE: %s [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
        % os.path.basename(sys.argv[0])
  print '  --help, -h           print this usage message and exit with success'
  print '  --version            print the version number'
  print '  -q                   quiet'
  print '  -v                   verbose'
  print '  -s PATH              path for SVN repos'
  print '  -p START[:END]       start at pass START, end at pass END of %d' \
        % len(_passes)
  print '                       If only START is given, run only pass START'
  print '                       (implicitly enables --skip-cleanup)'
  print '  --existing-svnrepos  load into existing SVN repository'
  print '  --dumpfile=PATH      name of intermediate svn dumpfile'
  print '  --tmpdir=PATH        directory to use for tmp data (default to cwd)'
  print '  --profile            profile with \'hotshot\' (into file cvs2svn.hotshot)'
  print '  --dry-run            do not create a repository or a dumpfile;'
  print '                       just print what would happen.'
  print '  --use-cvs            use CVS instead of RCS \'co\' to extract data'
  print '                       (only use this if having problems with RCS)'
  print '  --svnadmin=PATH      path to the svnadmin program'
  print '  --trunk-only         convert only trunk commits, not tags nor branches'
  print '  --trunk=PATH         path for trunk (default: %s)'    \
        % Ctx().trunk_base
  print '  --branches=PATH      path for branches (default: %s)' \
        % Ctx().branches_base
  print '  --tags=PATH          path for tags (default: %s)'     \
        % Ctx().tags_base
  print '  --no-prune           don\'t prune empty directories'
  print '  --dump-only          just produce a dumpfile, don\'t commit to a repos'
  print '  --encoding=ENC       encoding of paths and log messages in CVS repos'
  print '                       Multiple of these options may be passed, where they'
  print '                       will be treated as an ordered list of encodings to'
  print '                       attempt (with "ascii" as a hardcoded last resort)'
  print '  --force-branch=NAME  force NAME to be a branch'
  print '  --force-tag=NAME     force NAME to be a tag'
  print '  --exclude=REGEXP     exclude branches and tags matching REGEXP'
  print '  --symbol-transform=P:S transform symbol names from P to S where P and S'
  print '                       use Python regexp and reference syntax respectively'
  print '  --username=NAME      username for cvs2svn-synthesized commits'
  print '  --skip-cleanup       prevent the deletion of intermediate files'
  print '  --bdb-txn-nosync     pass --bdb-txn-nosync to "svnadmin create"'
  print '  --fs-type=TYPE       pass --fs-type=TYPE to "svnadmin create"'
  print '  --cvs-revnums        record CVS revision numbers as file properties'
  print '  --auto-props=FILE    set file properties from the auto-props section'
  print '                       of a file in svn config format'
  print '  --auto-props-ignore-case Ignore case when matching auto-props patterns'
  print '  --mime-types=FILE    specify an apache-style mime.types file for'
  print '                       setting svn:mime-type'
  print '  --eol-from-mime-type set svn:eol-style from mime type if known'
  print '  --no-default-eol     don\'t set svn:eol-style to \'native\' for'
  print '                       non-binary files with undetermined mime types'
  print '  --keywords-off       don\'t set svn:keywords on any files (by default,'
  print '                       cvs2svn sets svn:keywords on non-binary files to'
  print '                       "%s")' % SVN_KEYWORDS_VALUE

def main():
  # Convenience var, so we don't have to keep instantiating this Borg.
  ctx = Ctx()

  profiling = None
  start_pass = 1
  end_pass = len(_passes)

  try:
    opts, args = getopt.getopt(sys.argv[1:], 'p:s:qvh',
                               [ "help", "create", "trunk=",
                                 "username=", "existing-svnrepos",
                                 "branches=", "tags=", "encoding=",
                                 "force-branch=", "force-tag=", "exclude=",
                                 "use-cvs", "mime-types=",
                                 "auto-props=", "auto-props-ignore-case",
                                 "eol-from-mime-type", "no-default-eol",
                                 "trunk-only", "no-prune", "dry-run",
                                 "dump-only", "dumpfile=", "tmpdir=",
                                 "svnadmin=", "skip-cleanup", "cvs-revnums",
                                 "bdb-txn-nosync", "fs-type=",
                                 "version", "profile",
                                 "keywords-off", "symbol-transform="])
  except getopt.GetoptError, e:
    sys.stderr.write(error_prefix + ': ' + str(e) + '\n\n')
    usage()
    sys.exit(1)

  for opt, value in opts:
    if opt == '--version':
        print '%s version %s' % (os.path.basename(sys.argv[0]), VERSION)
        sys.exit(0)
    elif opt == '-p':
      # Don't cleanup if we're doing incrementals.
      ctx.skip_cleanup = 1
      if value.find(':') > 0:
        start_pass, end_pass = map(int, value.split(':'))
      else:
        end_pass = start_pass = int(value)
      if start_pass > len(_passes) or start_pass < 1:
        raise FatalError(
            'illegal value (%d) for starting pass.  Must be 1 through %d.'
            % (int(start_pass), len(_passes),))
      if end_pass < start_pass or end_pass > len(_passes):
        raise FatalError(
            'illegal value (%d) for ending pass.  Must be %d through %d.'
            % (int(end_pass), int(start_pass), len(_passes),))
    elif (opt == '--help') or (opt == '-h'):
      ctx.print_help = 1
    elif opt == '-v':
      Log().log_level = LOG_VERBOSE
      ctx.verbose = 1
    elif opt == '-q':
      Log().log_level = LOG_QUIET
      ctx.quiet = 1
    elif opt == '-s':
      ctx.target = value
    elif opt == '--existing-svnrepos':
      ctx.existing_svnrepos = 1
    elif opt == '--dumpfile':
      ctx.dumpfile = value
    elif opt == '--tmpdir':
      ctx.tmpdir = value
    elif opt == '--use-cvs':
      ctx.use_cvs = 1
    elif opt == '--svnadmin':
      ctx.svnadmin = value
    elif opt == '--trunk-only':
      ctx.trunk_only = 1
    elif opt == '--trunk':
      ctx.trunk_base = normalize_ttb_path(opt, value)
    elif opt == '--branches':
      ctx.branches_base = normalize_ttb_path(opt, value)
    elif opt == '--tags':
      ctx.tags_base = normalize_ttb_path(opt, value)
    elif opt == '--no-prune':
      ctx.prune = None
    elif opt == '--dump-only':
      ctx.dump_only = 1
    elif opt == '--dry-run':
      ctx.dry_run = 1
    elif opt == '--encoding':
      ctx.encoding.insert(-1, value)
    elif opt == '--force-branch':
      ctx.forced_branches.append(value)
    elif opt == '--force-tag':
      ctx.forced_tags.append(value)
    elif opt == '--exclude':
      try:
        ctx.excludes.append(re.compile('^' + value + '$'))
      except re.error, e:
        raise FatalError("'%s' is not a valid regexp." % (value,))
    elif opt == '--mime-types':
      ctx.mime_types_file = value
    elif opt == '--auto-props':
      ctx.auto_props_file = value
    elif opt == '--auto-props-ignore-case':
      ctx.auto_props_ignore_case = True
    elif opt == '--eol-from-mime-type':
      ctx.eol_from_mime_type = 1
    elif opt == '--no-default-eol':
      ctx.no_default_eol = 1
    elif opt == '--keywords-off':
      ctx.keywords_off = 1
    elif opt == '--username':
      ctx.username = value
    elif opt == '--skip-cleanup':
      ctx.skip_cleanup = 1
    elif opt == '--cvs-revnums':
      ctx.svn_property_setters.append(CVSRevisionNumberSetter())
    elif opt == '--bdb-txn-nosync':
      ctx.bdb_txn_nosync = 1
    elif opt == '--fs-type':
      ctx.fs_type = value
    elif opt == '--create':
      sys.stderr.write(warning_prefix +
          ': The behaviour produced by the --create option is now the '
          'default,\nand passing the option is deprecated.\n')
    elif opt == '--profile':
      profiling = 1
    elif opt == '--symbol-transform':
      [pattern, replacement] = value.split(":")
      try:
        pattern = re.compile(pattern)
      except re.error, e:
        raise FatalError("'%s' is not a valid regexp." % (pattern,))
      ctx.symbol_transforms.append((pattern, replacement,))

  if ctx.print_help:
    usage()
    sys.exit(0)

  # Consistency check for options and arguments.
  if len(args) == 0:
    usage()
    sys.exit(1)

  if len(args) > 1:
    sys.stderr.write(error_prefix +
                     ": must pass only one CVS repository.\n")
    usage()
    sys.exit(1)

  cvsroot = args[0]

  if ctx.use_cvs:
    ctx.cvs_repository = CVSRepositoryViaCVS(cvsroot)
  else:
    ctx.cvs_repository = CVSRepositoryViaRCS(cvsroot)

  if (not ctx.target) and (not ctx.dump_only) and (not ctx.dry_run):
    raise FatalError("must pass one of '-s' or '--dump-only'.")

  def not_both(opt1val, opt1name, opt2val, opt2name):
    if opt1val and opt2val:
      raise FatalError("cannot pass both '%s' and '%s'."
                       % (opt1name, opt2name,))

  not_both(ctx.target, '-s',
           ctx.dump_only, '--dump-only')

  not_both(ctx.dump_only, '--dump-only',
           ctx.existing_svnrepos, '--existing-svnrepos')

  not_both(ctx.bdb_txn_nosync, '--bdb-txn-nosync',
           ctx.existing_svnrepos, '--existing-svnrepos')

  not_both(ctx.dump_only, '--dump-only',
           ctx.bdb_txn_nosync, '--bdb-txn-nosync')

  not_both(ctx.quiet, '-q',
           ctx.verbose, '-v')

  not_both(ctx.fs_type, '--fs-type',
           ctx.existing_svnrepos, '--existing-svnrepos')

  if ctx.fs_type and ctx.fs_type != 'bdb' and ctx.bdb_txn_nosync:
    raise FatalError("cannot pass --bdb-txn-nosync with --fs-type=%s."
                     % ctx.fs_type)

  # Create the default project (using ctx.trunk, ctx.branches, and ctx.tags):
  ctx.project = Project(ctx.cvs_repository.cvs_repos_path,
                        ctx.trunk_base, ctx.branches_base, ctx.tags_base)

  if ctx.existing_svnrepos and not os.path.isdir(ctx.target):
    raise FatalError("the svn-repos-path '%s' is not an "
                     "existing directory." % ctx.target)

  if not ctx.dump_only and not ctx.existing_svnrepos \
     and (not ctx.dry_run) and os.path.exists(ctx.target):
    raise FatalError("the svn-repos-path '%s' exists.\n"
                     "Remove it, or pass '--existing-svnrepos'."
                     % ctx.target)

  if ctx.target and not ctx.dry_run:
    # Verify that svnadmin can be executed.  The 'help' subcommand
    # should be harmless.
    try:
      check_command_runs([ctx.svnadmin, 'help'], 'svnadmin')
    except CommandFailedException, e:
      raise FatalError(
          '%s\n'
          'svnadmin could not be executed.  Please ensure that it is\n'
          'installed and/or use the --svnadmin option.' % (e,))

  ctx.svn_property_setters.append(ExecutablePropertySetter())

  ctx.svn_property_setters.append(BinaryFileEOLStyleSetter())

  if ctx.mime_types_file:
    ctx.svn_property_setters.append(MimeMapper(ctx.mime_types_file))

  if ctx.auto_props_file:
    ctx.svn_property_setters.append(AutoPropsPropertySetter(
        ctx.auto_props_file, ctx.auto_props_ignore_case))

  ctx.svn_property_setters.append(BinaryFileDefaultMimeTypeSetter())

  if ctx.eol_from_mime_type:
    ctx.svn_property_setters.append(EOLStyleFromMimeTypeSetter())

  if ctx.no_default_eol:
    ctx.svn_property_setters.append(DefaultEOLStyleSetter(None))
  else:
    ctx.svn_property_setters.append(DefaultEOLStyleSetter('native'))

  if not ctx.keywords_off:
    ctx.svn_property_setters.append(
        KeywordsPropertySetter(SVN_KEYWORDS_VALUE))

  # Make sure the tmp directory exists.  Note that we don't check if
  # it's empty -- we want to be able to use, for example, "." to hold
  # tempfiles.  But if we *did* want check if it were empty, we'd do
  # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
  if not os.path.exists(ctx.tmpdir):
    os.mkdir(ctx.tmpdir)
  elif not os.path.isdir(ctx.tmpdir):
    raise FatalError(
        "cvs2svn tried to use '%s' for temporary files, but that path\n"
        "  exists and is not a directory.  Please make it be a directory,\n"
        "  or specify some other directory for temporary files."
        % (ctx.tmpdir,))

  # But do lock the tmpdir, to avoid process clash.
  try:
    os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
  except OSError, e:
    if e.errno == errno.EACCES:
      raise FatalError("Permission denied:"
                       + " No write access to directory '%s'." % ctx.tmpdir)
    if e.errno == errno.EEXIST:
      raise FatalError(
          "cvs2svn is using directory '%s' for temporary files, but\n"
          "  subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
          "  cvs2svn process is currently using '%s' as its temporary\n"
          "  workspace.  If you are certain that is not the case,\n"
          "  then remove the '%s/cvs2svn.lock' subdirectory."
          % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir,))
    raise
  try:
    if profiling:
      import hotshot
      prof = hotshot.Profile('cvs2svn.hotshot')
      prof.runcall(convert, start_pass, end_pass)
      prof.close()
    else:
      convert(start_pass, end_pass)
  finally:
    try: os.rmdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
    except: pass


if __name__ == '__main__':
  try:
    main()
  except FatalException, e:
    sys.stderr.write(str(e))
    sys.exit(1)


