# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

"""
This script takes a log from the replace-malloc logalloc library on stdin
and munges it so that it can be used with the logalloc-replay tool.

Given the following output:
  13663 malloc(42)=0x7f0c33502040
  13663 malloc(24)=0x7f0c33503040
  13663 free(0x7f0c33502040)
The resulting output is:
  1 malloc(42)=#1
  1 malloc(24)=#2
  1 free(#1)

See README for more details.
"""

import sys
from collections import defaultdict, deque


class IdMapping:
    """Class to map values to ids.

    Each value is associated to an increasing id, starting from 1.
    When a value is removed, its id is recycled and will be reused for
    subsequent values.
    """

    def __init__(self):
        self.id = 1
        self._values = {}
        self._recycle = deque()

    def __getitem__(self, value):
        if value not in self._values:
            if self._recycle:
                self._values[value] = self._recycle.popleft()
            else:
                self._values[value] = self.id
                self.id += 1
        return self._values[value]

    def __delitem__(self, value):
        if value == 0:
            return
        self._recycle.append(self._values[value])
        del self._values[value]

    def __contains__(self, value):
        return value == 0 or value in self._values


class Ignored(Exception):
    pass


def split_log_line(line):
    try:
        # The format for each line is:
        # <pid> [<tid>] <function>([<args>])[=<result>]
        #
        # The original format didn't include the tid, so we try to parse
        # lines whether they have one or not.
        pid, func_call = line.split(" ", 1)
        call, result = func_call.split(")")
        func, args = call.split("(")
        args = args.split(",") if args else []
        if result:
            if result[0] != "=":
                raise Ignored("Malformed input")
            result = result[1:]
        if " " in func:
            tid, func = func.split(" ", 1)
        else:
            tid = pid
        return pid, tid, func, args, result
    except Exception:
        raise Ignored("Malformed input")


NUM_ARGUMENTS = {
    "jemalloc_stats": 0,
    "free": 1,
    "malloc": 1,
    "posix_memalign": 2,
    "aligned_alloc": 2,
    "calloc": 2,
    "realloc": 2,
    "memalign": 2,
    "valloc": 1,
}


def main():
    pids = IdMapping()
    processes = defaultdict(lambda: {"pointers": IdMapping(), "tids": IdMapping()})
    for line in sys.stdin:
        line = line.strip()

        try:
            pid, tid, func, args, result = split_log_line(line)

            # Replace pid with an id.
            pid = pids[int(pid)]

            process = processes[pid]
            tid = process["tids"][int(tid)]

            pointers = process["pointers"]

            if func not in NUM_ARGUMENTS:
                raise Ignored("Unknown function")

            if len(args) != NUM_ARGUMENTS[func]:
                raise Ignored("Malformed input")

            if func in ("jemalloc_stats", "free") and result:
                raise Ignored("Malformed input")

            if func in ("free", "realloc"):
                ptr = int(args[0], 16)
                if ptr and ptr not in pointers:
                    raise Ignored("Did not see an alloc for pointer")
                args[0] = "#%d" % pointers[ptr]
                del pointers[ptr]

            if result:
                result = int(result, 16)
                if not result:
                    raise Ignored("Result is NULL")
                result = "#%d" % pointers[result]

            print(
                "%d %d %s(%s)%s"
                % (pid, tid, func, ",".join(args), "=%s" % result if result else "")
            )

        except Exception as e:
            print('Ignored "%s": %s' % (line, e), file=sys.stderr)


if __name__ == "__main__":
    main()
