File: symbol_strategy.py

package info (click to toggle)
cvs2svn 2.4.0-4
  • links: PTS
  • area: main
  • in suites: stretch
  • size: 3,720 kB
  • sloc: python: 22,383; sh: 512; perl: 121; makefile: 84
file content (720 lines) | stat: -rw-r--r-- 22,072 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
# (Be in -*- python -*- mode.)
#
# ====================================================================
# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution.  The terms
# are also available at http://subversion.tigris.org/license-1.html.
# If newer versions of this license are posted there, you may use a
# newer version instead, at your option.
#
# This software consists of voluntary contributions made by many
# individuals.  For exact contribution history, see the revision
# history and logs, available at http://cvs2svn.tigris.org/.
# ====================================================================

"""SymbolStrategy classes determine how to convert symbols."""

import re

from cvs2svn_lib.common import FatalError
from cvs2svn_lib.common import path_join
from cvs2svn_lib.common import normalize_svn_path
from cvs2svn_lib.log import logger
from cvs2svn_lib.symbol import Trunk
from cvs2svn_lib.symbol import TypedSymbol
from cvs2svn_lib.symbol import Branch
from cvs2svn_lib.symbol import Tag
from cvs2svn_lib.symbol import ExcludedSymbol
from cvs2svn_lib.symbol_statistics import SymbolPlanError


class StrategyRule:
  """A single rule that might determine how to convert a symbol."""

  def start(self, symbol_statistics):
    """This method is called once before get_symbol() is ever called.

    The StrategyRule can override this method to do whatever it wants
    to prepare itself for work.  SYMBOL_STATISTICS is an instance of
    SymbolStatistics containing the statistics for all symbols in all
    projects."""

    pass

  def get_symbol(self, symbol, stats):
    """Return an object describing what to do with the symbol in STATS.

    SYMBOL holds a Trunk or Symbol object as it has been determined so
    far.  Hopefully one of these method calls will turn any naked
    Symbol instances into TypedSymbols.

    If this rule applies to the SYMBOL (whose statistics are collected
    in STATS), then return a new or modified AbstractSymbol object.
    If this rule doesn't apply, return SYMBOL unchanged."""

    raise NotImplementedError()

  def finish(self):
    """This method is called once after get_symbol() is done being called.

    The StrategyRule can override this method do whatever it wants to
    release resources, etc."""

    pass


class _RegexpStrategyRule(StrategyRule):
  """A Strategy rule that bases its decisions on regexp matches.

  If self.regexp matches a symbol name, return self.action(symbol);
  otherwise, return the symbol unchanged."""

  def __init__(self, pattern, action):
    """Initialize a _RegexpStrategyRule.

    PATTERN is a string that will be treated as a regexp pattern.
    PATTERN must match a full symbol name for the rule to apply (i.e.,
    it is anchored at the beginning and end of the symbol name).

    ACTION is the class representing how the symbol should be
    converted.  It should be one of the classes Branch, Tag, or
    ExcludedSymbol.

    If PATTERN matches a symbol name, then get_symbol() returns
    ACTION(name, id); otherwise it returns SYMBOL unchanged."""

    try:
      self.regexp = re.compile('^' + pattern + '$')
    except re.error:
      raise FatalError("%r is not a valid regexp." % (pattern,))

    self.action = action

  def log(self, symbol):
    raise NotImplementedError()

  def get_symbol(self, symbol, stats):
    if isinstance(symbol, (Trunk, TypedSymbol)):
      return symbol
    elif self.regexp.match(symbol.name):
      self.log(symbol)
      return self.action(symbol)
    else:
      return symbol


class ForceBranchRegexpStrategyRule(_RegexpStrategyRule):
  """Force symbols matching pattern to be branches."""

  def __init__(self, pattern):
    _RegexpStrategyRule.__init__(self, pattern, Branch)

  def log(self, symbol):
    logger.verbose(
        'Converting symbol %s as a branch because it matches regexp "%s".'
        % (symbol, self.regexp.pattern,)
        )


class ForceTagRegexpStrategyRule(_RegexpStrategyRule):
  """Force symbols matching pattern to be tags."""

  def __init__(self, pattern):
    _RegexpStrategyRule.__init__(self, pattern, Tag)

  def log(self, symbol):
    logger.verbose(
        'Converting symbol %s as a tag because it matches regexp "%s".'
        % (symbol, self.regexp.pattern,)
        )


class ExcludeRegexpStrategyRule(_RegexpStrategyRule):
  """Exclude symbols matching pattern."""

  def __init__(self, pattern):
    _RegexpStrategyRule.__init__(self, pattern, ExcludedSymbol)

  def log(self, symbol):
    logger.verbose(
        'Excluding symbol %s because it matches regexp "%s".'
        % (symbol, self.regexp.pattern,)
        )


class ExcludeTrivialImportBranchRule(StrategyRule):
  """If a symbol is a trivial import branch, exclude it.

  A trivial import branch is defined to be a branch that only had a
  single import on it (no other kinds of commits) in every file in
  which it appeared.  In most cases these branches are worthless."""

  def get_symbol(self, symbol, stats):
    if isinstance(symbol, (Trunk, TypedSymbol)):
      return symbol
    if stats.tag_create_count == 0 \
          and stats.branch_create_count == stats.trivial_import_count:
      logger.verbose(
          'Excluding branch %s because it is a trivial import branch.'
          % (symbol,)
          )
      return ExcludedSymbol(symbol)
    else:
      return symbol


class ExcludeVendorBranchRule(StrategyRule):
  """If a symbol is a pure vendor branch, exclude it.

  A pure vendor branch is defined to be a branch that only had imports
  on it (no other kinds of commits) in every file in which it
  appeared."""

  def get_symbol(self, symbol, stats):
    if isinstance(symbol, (Trunk, TypedSymbol)):
      return symbol
    if stats.tag_create_count == 0 \
          and stats.branch_create_count == stats.pure_ntdb_count:
      logger.verbose(
          'Excluding branch %s because it is a pure vendor branch.'
          % (symbol,)
          )
      return ExcludedSymbol(symbol)
    else:
      return symbol


class UnambiguousUsageRule(StrategyRule):
  """If a symbol is used unambiguously as a tag/branch, convert it as such."""

  def get_symbol(self, symbol, stats):
    if isinstance(symbol, (Trunk, TypedSymbol)):
      return symbol
    is_tag = stats.tag_create_count > 0
    is_branch = stats.branch_create_count > 0 or stats.branch_commit_count > 0
    if is_tag and is_branch:
      # Can't decide
      return symbol
    elif is_branch:
      logger.verbose(
          'Converting symbol %s as a branch because it is always used '
          'as a branch.'
          % (symbol,)
          )
      return Branch(symbol)
    elif is_tag:
      logger.verbose(
          'Converting symbol %s as a tag because it is always used '
          'as a tag.'
          % (symbol,)
          )
      return Tag(symbol)
    else:
      # The symbol didn't appear at all:
      return symbol


class BranchIfCommitsRule(StrategyRule):
  """If there was ever a commit on the symbol, convert it as a branch."""

  def get_symbol(self, symbol, stats):
    if isinstance(symbol, (Trunk, TypedSymbol)):
      return symbol
    elif stats.branch_commit_count > 0:
      logger.verbose(
          'Converting symbol %s as a branch because there are commits on it.'
          % (symbol,)
          )
      return Branch(symbol)
    else:
      return symbol


class HeuristicStrategyRule(StrategyRule):
  """Convert symbol based on how often it was used as a branch/tag.

  Whichever happened more often determines how the symbol is
  converted."""

  def get_symbol(self, symbol, stats):
    if isinstance(symbol, (Trunk, TypedSymbol)):
      return symbol
    elif stats.tag_create_count >= stats.branch_create_count:
      logger.verbose(
          'Converting symbol %s as a tag because it is more often used '
          'as a tag.'
          % (symbol,)
          )
      return Tag(symbol)
    else:
      logger.verbose(
          'Converting symbol %s as a branch because it is more often used '
          'as a branch.'
          % (symbol,)
          )
      return Branch(symbol)


class _CatchAllRule(StrategyRule):
  """Base class for catch-all rules.

  Usually this rule will appear after a list of more careful rules
  (including a general rule like UnambiguousUsageRule) and will
  therefore only apply to the symbols not handled earlier."""

  def __init__(self, action):
    self._action = action

  def log(self, symbol):
    raise NotImplementedError()

  def get_symbol(self, symbol, stats):
    if isinstance(symbol, (Trunk, TypedSymbol)):
      return symbol
    else:
      self.log(symbol)
      return self._action(symbol)


class AllBranchRule(_CatchAllRule):
  """Convert all symbols as branches.

  Usually this rule will appear after a list of more careful rules
  (including a general rule like UnambiguousUsageRule) and will
  therefore only apply to the symbols not handled earlier."""

  def __init__(self):
    _CatchAllRule.__init__(self, Branch)

  def log(self, symbol):
    logger.verbose(
        'Converting symbol %s as a branch because no other rules applied.'
        % (symbol,)
        )


class AllTagRule(_CatchAllRule):
  """Convert all symbols as tags.

  We don't worry about conflicts here; they will be caught later by
  SymbolStatistics.check_consistency().

  Usually this rule will appear after a list of more careful rules
  (including a general rule like UnambiguousUsageRule) and will
  therefore only apply to the symbols not handled earlier."""

  def __init__(self):
    _CatchAllRule.__init__(self, Tag)

  def log(self, symbol):
    logger.verbose(
        'Converting symbol %s as a tag because no other rules applied.'
        % (symbol,)
        )


class AllExcludedRule(_CatchAllRule):
  """Exclude all symbols.

  Usually this rule will appear after a list of more careful rules
  (including a SymbolHintsFileRule or several ManualSymbolRules)
  and will therefore only apply to the symbols not handled earlier."""

  def __init__(self):
    _CatchAllRule.__init__(self, ExcludedSymbol)

  def log(self, symbol):
    logger.verbose(
        'Excluding symbol %s by catch-all rule.' % (symbol,)
        )


class TrunkPathRule(StrategyRule):
  """Set the base path for Trunk."""

  def __init__(self, trunk_path):
    self.trunk_path = trunk_path

  def get_symbol(self, symbol, stats):
    if isinstance(symbol, Trunk) and symbol.base_path is None:
      symbol.base_path = self.trunk_path

    return symbol


class SymbolPathRule(StrategyRule):
  """Set the base paths for symbol LODs."""

  def __init__(self, symbol_type, base_path):
    self.symbol_type = symbol_type
    self.base_path = base_path

  def get_symbol(self, symbol, stats):
    if isinstance(symbol, self.symbol_type) and symbol.base_path is None:
      symbol.base_path = path_join(self.base_path, symbol.name)

    return symbol


class BranchesPathRule(SymbolPathRule):
  """Set the base paths for Branch LODs."""

  def __init__(self, branch_path):
    SymbolPathRule.__init__(self, Branch, branch_path)


class TagsPathRule(SymbolPathRule):
  """Set the base paths for Tag LODs."""

  def __init__(self, tag_path):
    SymbolPathRule.__init__(self, Tag, tag_path)


class HeuristicPreferredParentRule(StrategyRule):
  """Use a heuristic rule to pick preferred parents.

  Pick the parent that should be preferred for any TypedSymbols.  As
  parent, use the symbol that appeared most often as a possible parent
  of the symbol in question.  If multiple symbols are tied, choose the
  one that comes first according to the Symbol class's natural sort
  order."""

  def _get_preferred_parent(self, stats):
    """Return the LODs that are most often possible parents in STATS.

    Return the set of LinesOfDevelopment that appeared most often as
    possible parents.  The return value might contain multiple symbols
    if multiple LinesOfDevelopment appeared the same number of times."""

    best_count = -1
    best_symbol = None
    for (symbol, count) in stats.possible_parents.items():
      if count > best_count or (count == best_count and symbol < best_symbol):
        best_count = count
        best_symbol = symbol

    if best_symbol is None:
      return None
    else:
      return best_symbol

  def get_symbol(self, symbol, stats):
    if isinstance(symbol, TypedSymbol) and symbol.preferred_parent_id is None:
      preferred_parent = self._get_preferred_parent(stats)
      if preferred_parent is None:
        logger.verbose('%s has no preferred parent' % (symbol,))
      else:
        symbol.preferred_parent_id = preferred_parent.id
        logger.verbose(
            'The preferred parent of %s is %s' % (symbol, preferred_parent,)
            )

    return symbol


class ManualTrunkRule(StrategyRule):
  """Change the SVN path of Trunk LODs.

  Members:

    project_id -- (int or None) The id of the project whose trunk
        should be affected by this rule.  If project_id is None, then
        the rule is not project-specific.

    svn_path -- (str) The SVN path that should be used as the base
        directory for this trunk.  This member must not be None,
        though it may be the empty string for a single-project,
        trunk-only conversion.

  """

  def __init__(self, project_id, svn_path):
    self.project_id = project_id
    self.svn_path = normalize_svn_path(svn_path, allow_empty=True)

  def get_symbol(self, symbol, stats):
    if (self.project_id is not None
        and self.project_id != stats.lod.project.id):
      return symbol

    if isinstance(symbol, Trunk):
      symbol.base_path = self.svn_path

    return symbol


def convert_as_branch(symbol):
  logger.verbose(
      'Converting symbol %s as a branch because of manual setting.'
      % (symbol,)
      )
  return Branch(symbol)


def convert_as_tag(symbol):
  logger.verbose(
      'Converting symbol %s as a tag because of manual setting.'
      % (symbol,)
      )
  return Tag(symbol)


def exclude(symbol):
  logger.verbose(
      'Excluding symbol %s because of manual setting.'
      % (symbol,)
      )
  return ExcludedSymbol(symbol)


class ManualSymbolRule(StrategyRule):
  """Change how particular symbols are converted.

  Members:

    project_id -- (int or None) The id of the project whose trunk
        should be affected by this rule.  If project_id is None, then
        the rule is not project-specific.

    symbol_name -- (str) The name of the symbol that should be
        affected by this rule.

    conversion -- (callable or None) A callable that converts the
        symbol to its preferred output type.  This should normally be
        one of (convert_as_branch, convert_as_tag, exclude).  If this
        member is None, then this rule does not affect the symbol's
        output type.

    svn_path -- (str) The SVN path that should be used as the base
        directory for this trunk.  This member must not be None,
        though it may be the empty string for a single-project,
        trunk-only conversion.

    parent_lod_name -- (str or None) The name of the line of
        development that should be preferred as the parent of this
        symbol.  (The preferred parent is the line of development from
        which the symbol should sprout.)  If this member is set to the
        string '.trunk.', then the symbol will be set to sprout
        directly from trunk.  If this member is set to None, then this
        rule won't affect the symbol's parent.

  """

  def __init__(
        self, project_id, symbol_name, conversion, svn_path, parent_lod_name
        ):
    self.project_id = project_id
    self.symbol_name = symbol_name
    self.conversion = conversion
    if svn_path is None:
      self.svn_path = None
    else:
      self.svn_path = normalize_svn_path(svn_path, allow_empty=True)
    self.parent_lod_name = parent_lod_name

  def _get_parent_by_id(self, parent_lod_name, stats):
    """Return the LOD object for the parent with name PARENT_LOD_NAME.

    STATS is the _Stats object describing a symbol whose parent needs
    to be determined from its name.  If none of its possible parents
    has name PARENT_LOD_NAME, raise a SymbolPlanError."""

    for pp in stats.possible_parents.keys():
      if isinstance(pp, Trunk):
        pass
      elif pp.name == parent_lod_name:
        return pp
    else:
      parent_counts = stats.possible_parents.items()
      parent_counts.sort(lambda a,b: - cmp(a[1], b[1]))
      lines = [
          '%s is not a valid parent for %s;'
              % (parent_lod_name, stats.lod,),
          '    possible parents (with counts):'
          ]
      for (symbol, count) in parent_counts:
        if isinstance(symbol, Trunk):
          lines.append('        .trunk. : %d' % count)
        else:
          lines.append('        %s : %d' % (symbol.name, count))
      raise SymbolPlanError('\n'.join(lines))

  def get_symbol(self, symbol, stats):
    if (self.project_id is not None
        and self.project_id != stats.lod.project.id):
      return symbol

    elif isinstance(symbol, Trunk):
      return symbol

    elif self.symbol_name == stats.lod.name:
      if self.conversion is not None:
        symbol = self.conversion(symbol)

      if self.parent_lod_name is None:
        pass
      elif self.parent_lod_name == '.trunk.':
        symbol.preferred_parent_id = stats.lod.project.trunk_id
      else:
        symbol.preferred_parent_id = self._get_parent_by_id(
            self.parent_lod_name, stats
            ).id

      if self.svn_path is not None:
        symbol.base_path = self.svn_path

    return symbol


class SymbolHintsFileRule(StrategyRule):
  """Use manual symbol configurations read from a file.

  The input file is line-oriented with the following format:

      <project-id> <symbol-name> <conversion> [<svn-path> [<parent-lod-name>]]

  Where the fields are separated by whitespace and

      project-id -- the numerical id of the Project to which the
          symbol belongs (numbered starting with 0).  This field can
          be '.' if the rule is not project-specific.

      symbol-name -- the name of the symbol being specified, or
          '.trunk.' if the rule should apply to trunk.

      conversion -- how the symbol should be treated in the
          conversion.  This is one of the following values: 'branch',
          'tag', or 'exclude'.  This field can be '.' if the rule
          shouldn't affect how the symbol is treated in the
          conversion.

      svn-path -- the SVN path that should serve as the root path of
          this LOD.  The path should be expressed as a path relative
          to the SVN root directory, with or without a leading '/'.
          This field can be omitted or '.' if the rule shouldn't
          affect the LOD's SVN path.

      parent-lod-name -- the name of the LOD that should serve as this
          symbol's parent.  This field can be omitted or '.'  if the
          rule shouldn't affect the symbol's parent, or it can be
          '.trunk.' to indicate that the symbol should sprout from the
          project's trunk."""

  comment_re = re.compile(r'^(\#|$)')

  conversion_map = {
      'branch' : convert_as_branch,
      'tag' : convert_as_tag,
      'exclude' : exclude,
      '.' : None,
      }

  def __init__(self, filename):
    self.filename = filename

  def start(self, symbol_statistics):
    self._rules = []

    f = open(self.filename, 'r')
    for l in f:
      l = l.rstrip()
      s = l.lstrip()
      if self.comment_re.match(s):
        continue
      fields = s.split()

      if len(fields) < 3:
        raise FatalError(
            'The following line in "%s" cannot be parsed:\n    "%s"'
            % (self.filename, l,)
            )

      project_id = fields.pop(0)
      symbol_name = fields.pop(0)
      conversion = fields.pop(0)

      if fields:
        svn_path = fields.pop(0)
        if svn_path == '.':
          svn_path = None
        elif svn_path[0] == '/':
          svn_path = svn_path[1:]
      else:
        svn_path = None

      if fields:
        parent_lod_name = fields.pop(0)
      else:
        parent_lod_name = '.'

      if fields:
        raise FatalError(
            'The following line in "%s" cannot be parsed:\n    "%s"'
            % (self.filename, l,)
            )

      if project_id == '.':
        project_id = None
      else:
        try:
          project_id = int(project_id)
        except ValueError:
          raise FatalError(
              'Illegal project_id in the following line:\n    "%s"' % (l,)
              )

      if symbol_name == '.trunk.':
        if conversion not in ['.', 'trunk']:
          raise FatalError('Trunk cannot be converted as a different type')

        if parent_lod_name != '.':
          raise FatalError('Trunk\'s parent cannot be set')

        if svn_path is None:
          # This rule doesn't do anything:
          pass
        else:
          self._rules.append(ManualTrunkRule(project_id, svn_path))

      else:
        try:
          conversion = self.conversion_map[conversion]
        except KeyError:
          raise FatalError(
              'Illegal conversion in the following line:\n    "%s"' % (l,)
              )

        if parent_lod_name == '.':
          parent_lod_name = None

        if conversion is None \
               and svn_path is None \
               and parent_lod_name is None:
          # There is nothing to be done:
          pass
        else:
          self._rules.append(
              ManualSymbolRule(
                  project_id, symbol_name,
                  conversion, svn_path, parent_lod_name
                  )
              )

    for rule in self._rules:
      rule.start(symbol_statistics)

  def get_symbol(self, symbol, stats):
    for rule in self._rules:
      symbol = rule.get_symbol(symbol, stats)

    return symbol

  def finish(self):
    for rule in self._rules:
      rule.finish()

    del self._rules