File: get_hints.py

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (191 lines) | stat: -rwxr-xr-x 7,834 bytes parent folder | download | duplicates (13)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#!/usr/bin/env python3

# Copyright 2022 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can
# be found in the LICENSE file.
"""
This script generates the branch hints for profile-guided optimization of
the builtins in the following format:

block_hint,<builtin_name>,<basic_block_id_for_true_destination>,<basic_block_id_for_false_destination>,<hint>

where hint is an integer representation of the expected boolean result of the
branch condition. The expected boolean result is generated for a specific given
branch by comparing the counts of the two destination basic blocks. V8's
control flow graph is always in edge-split form, guaranteeing that each
destination block only has a single predecessor, and thus guaranteeing that the
execution counts of these basic blocks are equal to how many times the branch
condition is true or false.

Usage: get_hints.py [--min MIN] [--ratio RATIO] log_file output_file

where:
    1. log_file is the file produced after running v8 with the
       --turbo-profiling-output=log_file flag after building with
       v8_enable_builtins_profiling = true.
    2. output_file is the file which the hints and builtin hashes are written
       to.
    3. --min MIN provides the minimum count at which a basic block will be taken
       as a valid destination of a hinted branch decision.
    4. --ratio RATIO provides the ratio at which, when compared to the
       alternative destination's count, a branch destination's count is
       considered sufficient to require a branch hint to be produced.
"""

import argparse
import sys

PARSER = argparse.ArgumentParser(
    description="A script that generates the branch hints for profile-guided \
                optimization",
    epilog="Example:\n\tget_hints.py --min n1 --ratio n2 branches_file log_file output_file\""
)
PARSER.add_argument(
    '--min',
    type=int,
    default=1000,
    help="The minimum count at which a basic block will be taken as a valid \
          destination of a hinted branch decision")
PARSER.add_argument(
    '--ratio',
    type=int,
    default=40,
    help="The ratio at which, when compared to the alternative destination's \
          count,a branch destination's count is considered sufficient to \
          require a branch hint to be produced")
PARSER.add_argument(
    'log_file',
    help="The v8.log file produced after running v8 with the --turbo-profiling-output=log_file flag after building with v8_enable_builtins_profiling = true"
)
PARSER.add_argument(
    'output_file',
    help="The file which the hints and builtin hashes are written to")

ARGS = vars(PARSER.parse_args())

BLOCK_COUNT_MARKER = "block"
BRANCH_HINT_MARKER = "block_hint"
BUILTIN_HASH_MARKER = "builtin_hash"
NORMALIZED_BLOCK_COUNT_MARKER = "block_count"
NORMALIZED_BUILTIN_COUNT_MARKER = "builtin_count"

MAX_NORMALIZED_COUNT = 10000


def parse_log_file(log_file):
  block_counts = {}
  branches = []
  builtin_hashes = {}
  max_execution_count = 0
  try:
    with open(log_file, "r") as f:
      for line in f.readlines():
        fields = line.split('\t')
        if fields[0] == BLOCK_COUNT_MARKER:
          builtin_name = fields[1]
          block_id = int(fields[2])
          count = float(fields[3])
          if block_id == 0 and count > max_execution_count:
            max_execution_count = count
          if builtin_name not in block_counts:
            block_counts[builtin_name] = []
          while len(block_counts[builtin_name]) <= block_id:
            block_counts[builtin_name].append(0)
          block_counts[builtin_name][block_id] += count
        elif fields[0] == BUILTIN_HASH_MARKER:
          builtin_name = fields[1]
          builtin_hash = int(fields[2])
          if builtin_name in builtin_hashes:
            old_hash = builtin_hashes[builtin_name]
            assert old_hash == builtin_hash, (
                "Merged PGO file contains multiple incompatible builtin "
                "versions: {old_hash} != {builtin_hash}")
          else:
            builtin_hashes[builtin_name] = builtin_hash
        elif fields[0] == BRANCH_HINT_MARKER:
          builtin_name = fields[1]
          true_block_id = int(fields[2])
          false_block_id = int(fields[3])
          branches.append((builtin_name, true_block_id, false_block_id))
  except IOError as e:
    print(f"Cannot read from {log_file}. {e.strerror}.")
    sys.exit(1)
  return [block_counts, branches, builtin_hashes, max_execution_count]


def get_branch_hints(block_counts, branches, min_count, threshold_ratio):
  branch_hints = {}
  for (builtin_name, true_block_id, false_block_id) in branches:
    if builtin_name in block_counts:
      true_block_count = 0
      false_block_count = 0
      if true_block_id < len(block_counts[builtin_name]):
        true_block_count = block_counts[builtin_name][true_block_id]
      if false_block_id < len(block_counts[builtin_name]):
        false_block_count = block_counts[builtin_name][false_block_id]
      hint = -1
      if (true_block_count >= min_count) and (true_block_count / threshold_ratio
                                              >= false_block_count):
        hint = 1
      elif (false_block_count >= min_count) and (
          false_block_count / threshold_ratio >= true_block_count):
        hint = 0
      if hint >= 0:
        branch_hints[(builtin_name, true_block_id, false_block_id)] = hint
  return branch_hints


def normalize_count(block_counts, max_count):
  normalized_block_counts = {}

  for builtin in block_counts:
    for block, count in enumerate(block_counts[builtin]):
      block_count_normalized = int(count * MAX_NORMALIZED_COUNT / max_count)
      if builtin not in normalized_block_counts:
        normalized_block_counts[builtin] = {}
      normalized_block_counts[builtin][block] = block_count_normalized

  return normalized_block_counts


def write_hints_to_output(output_file, branch_hints, builtin_hashes,
                          block_counts):
  try:
    with open(output_file, "w") as f:
      for key in branch_hints:
        f.write("{},{},{},{},{}\n".format(BRANCH_HINT_MARKER, key[0], key[1],
                                          key[2], branch_hints[key]))
      # Put the NORMALIZED_BUILTIN_COUNT_MARKER before NORMALIZED_BLOCK_COUNT_MARKER,
      # because we will use them to calculate call probability.
      # The normalized count of a builtin/block means how much frequently the
      # builtin/block was executed.
      # We will take it as "density" of the builtin/block in post process, hence
      # the execution time could be "density * size".
      for builtin_name in block_counts.keys():
        f.write("{},{},{}\n".format(NORMALIZED_BUILTIN_COUNT_MARKER,
                                    builtin_name,
                                    block_counts[builtin_name][0]))

      for builtin_name in block_counts:
        for block_id in block_counts[builtin_name]:
          f.write("{},{},{},{}\n".format(NORMALIZED_BLOCK_COUNT_MARKER,
                                         builtin_name, block_id,
                                         block_counts[builtin_name][block_id]))

      for builtin_name in builtin_hashes:
        f.write("{},{},{}\n".format(BUILTIN_HASH_MARKER, builtin_name,
                                    builtin_hashes[builtin_name]))
  except IOError as e:
    print(f"Cannot read from {output_file}. {e.strerror}.")
    sys.exit(1)


[block_counts, branches, builtin_hashes,
 max_count] = parse_log_file(ARGS['log_file'])
branch_hints = get_branch_hints(block_counts, branches, ARGS['min'],
                                ARGS['ratio'])

block_counts = normalize_count(block_counts, max_count)

write_hints_to_output(ARGS['output_file'], branch_hints, builtin_hashes,
                      block_counts)