File: linelog.py

package info (click to toggle)
mercurial 4.8.2-1%2Bdeb10u1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 41,932 kB
  • sloc: python: 150,616; ansic: 39,675; tcl: 3,715; lisp: 1,448; sh: 1,285; makefile: 569; cpp: 291; xml: 36; sql: 30
file content (436 lines) | stat: -rw-r--r-- 14,885 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
# linelog - efficient cache for annotate data
#
# Copyright 2018 Google LLC.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
"""linelog is an efficient cache for annotate data inspired by SCCS Weaves.

SCCS Weaves are an implementation of
https://en.wikipedia.org/wiki/Interleaved_deltas. See
mercurial/help/internals/linelog.txt for an exploration of SCCS weaves
and how linelog works in detail.

Here's a hacker's summary: a linelog is a program which is executed in
the context of a revision. Executing the program emits information
about lines, including the revision that introduced them and the line
number in the file at the introducing revision. When an insertion or
deletion is performed on the file, a jump instruction is used to patch
in a new body of annotate information.
"""
from __future__ import absolute_import, print_function

import abc
import struct

from .thirdparty import (
    attr,
)
from . import (
    pycompat,
)

_llentry = struct.Struct('>II')

class LineLogError(Exception):
    """Error raised when something bad happens internally in linelog."""

@attr.s
class lineinfo(object):
    # Introducing revision of this line.
    rev = attr.ib()
    # Line number for this line in its introducing revision.
    linenum = attr.ib()
    # Private. Offset in the linelog program of this line. Used internally.
    _offset = attr.ib()

@attr.s
class annotateresult(object):
    rev = attr.ib()
    lines = attr.ib()
    _eof = attr.ib()

    def __iter__(self):
        return iter(self.lines)

class _llinstruction(object):

    __metaclass__ = abc.ABCMeta

    @abc.abstractmethod
    def __init__(self, op1, op2):
        pass

    @abc.abstractmethod
    def __str__(self):
        pass

    def __repr__(self):
        return str(self)

    @abc.abstractmethod
    def __eq__(self, other):
        pass

    @abc.abstractmethod
    def encode(self):
        """Encode this instruction to the binary linelog format."""

    @abc.abstractmethod
    def execute(self, rev, pc, emit):
        """Execute this instruction.

        Args:
          rev: The revision we're annotating.
          pc: The current offset in the linelog program.
          emit: A function that accepts a single lineinfo object.

        Returns:
          The new value of pc. Returns None if exeuction should stop
          (that is, we've found the end of the file.)
        """

class _jge(_llinstruction):
    """If the current rev is greater than or equal to op1, jump to op2."""

    def __init__(self, op1, op2):
        self._cmprev = op1
        self._target = op2

    def __str__(self):
        return r'JGE %d %d' % (self._cmprev, self._target)

    def __eq__(self, other):
        return (type(self) == type(other)
                and self._cmprev == other._cmprev
                and self._target == other._target)

    def encode(self):
        return _llentry.pack(self._cmprev << 2, self._target)

    def execute(self, rev, pc, emit):
        if rev >= self._cmprev:
            return self._target
        return pc + 1

class _jump(_llinstruction):
    """Unconditional jumps are expressed as a JGE with op1 set to 0."""

    def __init__(self, op1, op2):
        if op1 != 0:
            raise LineLogError("malformed JUMP, op1 must be 0, got %d" % op1)
        self._target = op2

    def __str__(self):
        return r'JUMP %d' % (self._target)

    def __eq__(self, other):
        return (type(self) == type(other)
                and self._target == other._target)

    def encode(self):
        return _llentry.pack(0, self._target)

    def execute(self, rev, pc, emit):
        return self._target

class _eof(_llinstruction):
    """EOF is expressed as a JGE that always jumps to 0."""

    def __init__(self, op1, op2):
        if op1 != 0:
            raise LineLogError("malformed EOF, op1 must be 0, got %d" % op1)
        if op2 != 0:
            raise LineLogError("malformed EOF, op2 must be 0, got %d" % op2)

    def __str__(self):
        return r'EOF'

    def __eq__(self, other):
        return type(self) == type(other)

    def encode(self):
        return _llentry.pack(0, 0)

    def execute(self, rev, pc, emit):
        return None

class _jl(_llinstruction):
    """If the current rev is less than op1, jump to op2."""

    def __init__(self, op1, op2):
        self._cmprev = op1
        self._target = op2

    def __str__(self):
        return r'JL %d %d' % (self._cmprev, self._target)

    def __eq__(self, other):
        return (type(self) == type(other)
                and self._cmprev == other._cmprev
                and self._target == other._target)

    def encode(self):
        return _llentry.pack(1 | (self._cmprev << 2), self._target)

    def execute(self, rev, pc, emit):
        if rev < self._cmprev:
            return self._target
        return pc + 1

class _line(_llinstruction):
    """Emit a line."""

    def __init__(self, op1, op2):
        # This line was introduced by this revision number.
        self._rev = op1
        # This line had the specified line number in the introducing revision.
        self._origlineno = op2

    def __str__(self):
        return r'LINE %d %d' % (self._rev, self._origlineno)

    def __eq__(self, other):
        return (type(self) == type(other)
                and self._rev == other._rev
                and self._origlineno == other._origlineno)

    def encode(self):
        return _llentry.pack(2 | (self._rev << 2), self._origlineno)

    def execute(self, rev, pc, emit):
        emit(lineinfo(self._rev, self._origlineno, pc))
        return pc + 1

def _decodeone(data, offset):
    """Decode a single linelog instruction from an offset in a buffer."""
    try:
        op1, op2 = _llentry.unpack_from(data, offset)
    except struct.error as e:
        raise LineLogError('reading an instruction failed: %r' % e)
    opcode = op1 & 0b11
    op1 = op1 >> 2
    if opcode == 0:
        if op1 == 0:
            if op2 == 0:
                return _eof(op1, op2)
            return _jump(op1, op2)
        return _jge(op1, op2)
    elif opcode == 1:
        return _jl(op1, op2)
    elif opcode == 2:
        return _line(op1, op2)
    raise NotImplementedError('Unimplemented opcode %r' % opcode)

class linelog(object):
    """Efficient cache for per-line history information."""

    def __init__(self, program=None, maxrev=0):
        if program is None:
            # We pad the program with an extra leading EOF so that our
            # offsets will match the C code exactly. This means we can
            # interoperate with the C code.
            program = [_eof(0, 0), _eof(0, 0)]
        self._program = program
        self._lastannotate = None
        self._maxrev = maxrev

    def __eq__(self, other):
        return (type(self) == type(other)
                and self._program == other._program
                and self._maxrev == other._maxrev)

    def __repr__(self):
        return '<linelog at %s: maxrev=%d size=%d>' % (
            hex(id(self)), self._maxrev, len(self._program))

    def debugstr(self):
        fmt = r'%%%dd %%s' % len(str(len(self._program)))
        return pycompat.sysstr('\n').join(
            fmt % (idx, i) for idx, i in enumerate(self._program[1:], 1))

    @classmethod
    def fromdata(cls, buf):
        if len(buf) % _llentry.size != 0:
            raise LineLogError(
                "invalid linelog buffer size %d (must be a multiple of %d)" % (
                    len(buf), _llentry.size))
        expected = len(buf) / _llentry.size
        fakejge = _decodeone(buf, 0)
        if isinstance(fakejge, _jump):
            maxrev = 0
        else:
            maxrev = fakejge._cmprev
        numentries = fakejge._target
        if expected != numentries:
            raise LineLogError("corrupt linelog data: claimed"
                               " %d entries but given data for %d entries" % (
                                   expected, numentries))
        instructions = [_eof(0, 0)]
        for offset in pycompat.xrange(1, numentries):
            instructions.append(_decodeone(buf, offset * _llentry.size))
        return cls(instructions, maxrev=maxrev)

    def encode(self):
        hdr = _jge(self._maxrev, len(self._program)).encode()
        return hdr + ''.join(i.encode() for i in self._program[1:])

    def clear(self):
        self._program = []
        self._maxrev = 0
        self._lastannotate = None

    def replacelines_vec(self, rev, a1, a2, blines):
        return self.replacelines(rev, a1, a2, 0, len(blines),
                                 _internal_blines=blines)

    def replacelines(self, rev, a1, a2, b1, b2, _internal_blines=None):
        """Replace lines [a1, a2) with lines [b1, b2)."""
        if self._lastannotate:
            # TODO(augie): make replacelines() accept a revision at
            # which we're editing as well as a revision to mark
            # responsible for the edits. In hg-experimental it's
            # stateful like this, so we're doing the same thing to
            # retain compatibility with absorb until that's imported.
            ar = self._lastannotate
        else:
            ar = self.annotate(rev)
            #        ar = self.annotate(self._maxrev)
        if a1 > len(ar.lines):
            raise LineLogError(
                '%d contains %d lines, tried to access line %d' % (
                    rev, len(ar.lines), a1))
        elif a1 == len(ar.lines):
            # Simulated EOF instruction since we're at EOF, which
            # doesn't have a "real" line.
            a1inst = _eof(0, 0)
            a1info = lineinfo(0, 0, ar._eof)
        else:
            a1info = ar.lines[a1]
            a1inst = self._program[a1info._offset]
        programlen = self._program.__len__
        oldproglen = programlen()
        appendinst = self._program.append

        # insert
        blineinfos = []
        bappend = blineinfos.append
        if b1 < b2:
            # Determine the jump target for the JGE at the start of
            # the new block.
            tgt = oldproglen + (b2 - b1 + 1)
            # Jump to skip the insert if we're at an older revision.
            appendinst(_jl(rev, tgt))
            for linenum in pycompat.xrange(b1, b2):
                if _internal_blines is None:
                    bappend(lineinfo(rev, linenum, programlen()))
                    appendinst(_line(rev, linenum))
                else:
                    newrev, newlinenum = _internal_blines[linenum]
                    bappend(lineinfo(newrev, newlinenum, programlen()))
                    appendinst(_line(newrev, newlinenum))
        # delete
        if a1 < a2:
            if a2 > len(ar.lines):
                raise LineLogError(
                    '%d contains %d lines, tried to access line %d' % (
                        rev, len(ar.lines), a2))
            elif a2 == len(ar.lines):
                endaddr = ar._eof
            else:
                endaddr = ar.lines[a2]._offset
            if a2 > 0 and rev < self._maxrev:
                # If we're here, we're deleting a chunk of an old
                # commit, so we need to be careful and not touch
                # invisible lines between a2-1 and a2 (IOW, lines that
                # are added later).
                endaddr = ar.lines[a2 - 1]._offset + 1
            appendinst(_jge(rev, endaddr))
        # copy instruction from a1
        a1instpc = programlen()
        appendinst(a1inst)
        # if a1inst isn't a jump or EOF, then we need to add an unconditional
        # jump back into the program here.
        if not isinstance(a1inst, (_jump, _eof)):
            appendinst(_jump(0, a1info._offset + 1))
        # Patch instruction at a1, which makes our patch live.
        self._program[a1info._offset] = _jump(0, oldproglen)

        # Update self._lastannotate in place. This serves as a cache to avoid
        # expensive "self.annotate" in this function, when "replacelines" is
        # used continuously.
        if len(self._lastannotate.lines) > a1:
            self._lastannotate.lines[a1]._offset = a1instpc
        else:
            assert isinstance(a1inst, _eof)
            self._lastannotate._eof = a1instpc
        self._lastannotate.lines[a1:a2] = blineinfos
        self._lastannotate.rev = max(self._lastannotate.rev, rev)

        if rev > self._maxrev:
            self._maxrev = rev

    def annotate(self, rev):
        pc = 1
        lines = []
        executed = 0
        # Sanity check: if instructions executed exceeds len(program), we
        # hit an infinite loop in the linelog program somehow and we
        # should stop.
        while pc is not None and executed < len(self._program):
            inst = self._program[pc]
            lastpc = pc
            pc = inst.execute(rev, pc, lines.append)
            executed += 1
        if pc is not None:
            raise LineLogError(
                r'Probably hit an infinite loop in linelog. Program:\n' +
                self.debugstr())
        ar = annotateresult(rev, lines, lastpc)
        self._lastannotate = ar
        return ar

    @property
    def maxrev(self):
        return self._maxrev

    # Stateful methods which depend on the value of the last
    # annotation run. This API is for compatiblity with the original
    # linelog, and we should probably consider refactoring it.
    @property
    def annotateresult(self):
        """Return the last annotation result. C linelog code exposed this."""
        return [(l.rev, l.linenum) for l in self._lastannotate.lines]

    def getoffset(self, line):
        return self._lastannotate.lines[line]._offset

    def getalllines(self, start=0, end=0):
        """Get all lines that ever occurred in [start, end).

        Passing start == end == 0 means "all lines ever".

        This works in terms of *internal* program offsets, not line numbers.
        """
        pc = start or 1
        lines = []
        # only take as many steps as there are instructions in the
        # program - if we don't find an EOF or our stop-line before
        # then, something is badly broken.
        for step in pycompat.xrange(len(self._program)):
            inst = self._program[pc]
            nextpc = pc + 1
            if isinstance(inst, _jump):
                nextpc = inst._target
            elif isinstance(inst, _eof):
                return lines
            elif isinstance(inst, (_jl, _jge)):
                pass
            elif isinstance(inst, _line):
                lines.append((inst._rev, inst._origlineno))
            else:
                raise LineLogError("Illegal instruction %r" % inst)
            if nextpc == end:
                return lines
            pc = nextpc
        raise LineLogError("Failed to perform getalllines")