File: pakfile.py

package info (click to toggle)
chromium 139.0.7258.127-2
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 6,122,156 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (181 lines) | stat: -rw-r--r-- 6,812 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# Copyright 2021 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Size analysis of .pak files."""

import collections
import logging
import os
import posixpath
import sys
import zipfile
import zlib

import archive_util
import file_format
import models
import path_util

sys.path.insert(1, path_util.FromToolsSrcRoot('tools', 'grit'))
from grit.format import data_pack

_UNCOMPRESSED_COMPRESSION_RATIO_THRESHOLD = 0.9


class PakIdMap:
  def __init__(self):
    self._dict = collections.defaultdict(set)

  def Update(self, object_paths_by_name, ninja_source_mapper):
    # IDS_ macro usages result in templated function calls that contain the
    # resource ID in them. These names are collected along with all other
    # symbols by running "nm" on them. We just need to extract the values.
    PREFIX = 'void ui::AllowlistedResource<'
    id_start_idx = len(PREFIX)
    id_end_idx = -len('>()')
    for name in object_paths_by_name:
      if name.startswith(PREFIX):
        pak_id = int(name[id_start_idx:id_end_idx])
        object_paths = object_paths_by_name[name]
        self._dict[pak_id].update(
            (o, ninja_source_mapper.FindSourceForPath(o)) for o in object_paths)

  def Lookup(self, pak_id):
    ret = self._dict.get(pak_id)
    if ret:
      ret = sorted(ret)
    return ret


def _IsPakContentUncompressed(content):
  raw_size = len(content)
  # Assume anything less than 100 bytes cannot be compressed.
  if raw_size < 100:
    return False

  compressed_size = len(zlib.compress(content, 1))
  compression_ratio = compressed_size / float(raw_size)
  return compression_ratio < _UNCOMPRESSED_COMPRESSION_RATIO_THRESHOLD


def _ParsePakInfoFile(pak_info_path):
  with open(pak_info_path, 'r') as info_file:
    res_info = {}
    for line in info_file.readlines():
      name, res_id, path = line.split(',')
      res_info[int(res_id)] = (name, path.strip())
  return res_info


def _CreateSymbolsFromFile(file_name, contents, res_info, symbols_by_id):
  # Reversed so that aliases are clobbered by the entries they are aliases of.
  id_map = {id(v): k for k, v in reversed(contents.resources.items())}
  alias_map = {
      k: id_map[id(v)]
      for k, v in contents.resources.items() if id_map[id(v)] != k
  }
  name = posixpath.basename(file_name)
  # Hyphens used for language regions. E.g.: en-GB.pak, sr-Latn.pak, ...
  # Longest translated .pak file without hyphen: fil.pak
  if '-' in name or len(name) <= 7:
    section_name = models.SECTION_PAK_TRANSLATIONS
  else:
    # E.g.: resources.pak, chrome_100_percent.pak.
    section_name = models.SECTION_PAK_NONTRANSLATED
  overhead = 12 + 6  # Header size plus extra offset
  # Key just needs to be unique from other IDs and pak overhead symbols.
  symbols_by_id[-len(symbols_by_id) - 1] = models.Symbol(
      section_name, overhead, full_name='Overhead: {}'.format(file_name))
  for pak_id in sorted(contents.resources):
    aliased_pak_id = alias_map.get(pak_id)
    if aliased_pak_id is not None:
      # 4 extra bytes of metadata (2 16-bit ints)
      size = 4
      pak_id = aliased_pak_id
    else:
      resource_data = contents.resources[pak_id]
      # 6 extra bytes of metadata (1 32-bit int, 1 16-bit int)
      size = len(resource_data) + 6
      name, source_path = res_info[pak_id]
      if pak_id not in symbols_by_id:
        full_name = '{}: {}'.format(source_path, name)
        new_symbol = models.Symbol(section_name,
                                   0,
                                   address=pak_id,
                                   full_name=full_name)
        if (section_name == models.SECTION_PAK_NONTRANSLATED
            and _IsPakContentUncompressed(resource_data)):
          new_symbol.flags |= models.FLAG_UNCOMPRESSED
        symbols_by_id[pak_id] = new_symbol

    symbols_by_id[pak_id].size += size
  return section_name


def _FinalizeSymbols(symbols_by_id, pak_id_map):
  """Converts dict -> list, adds paths, and adds aliases."""
  raw_symbols = []
  for pak_id, symbol in symbols_by_id.items():
    raw_symbols.append(symbol)
    path_tuples = pak_id_map.Lookup(pak_id)
    if not path_tuples:
      continue
    symbol.object_path, symbol.source_path = path_tuples[0]
    if len(path_tuples) == 1:
      continue
    aliases = symbol.aliases or [symbol]
    symbol.aliases = aliases
    for object_path, source_path in path_tuples[1:]:
      new_sym = models.Symbol(symbol.section_name,
                              symbol.size,
                              address=symbol.address,
                              full_name=symbol.full_name,
                              object_path=object_path,
                              source_path=source_path,
                              aliases=aliases)
      aliases.append(new_sym)
      raw_symbols.append(new_sym)

  # Pre-sort to make final sort faster.
  file_format.SortSymbols(raw_symbols)
  return raw_symbols


def CreatePakSymbolsFromApk(section_ranges, apk_path, apk_pak_paths,
                            pak_info_path, pak_id_map):
  """Uses files in apk to find and add pak symbols."""
  with zipfile.ZipFile(apk_path) as z:
    pak_zip_infos = [z.getinfo(p) for p in apk_pak_paths]
    res_info = _ParsePakInfoFile(pak_info_path)
    symbols_by_id = {}
    for zip_info in pak_zip_infos:
      contents = data_pack.ReadDataPackFromString(z.read(zip_info))
      if zip_info.compress_type != zipfile.ZIP_STORED:
        logging.warning(
            'Expected .pak files to be STORED, but this one is compressed: %s',
            zip_info.filename)
      path = archive_util.RemoveAssetSuffix(zip_info.filename)
      section_name = _CreateSymbolsFromFile(path, contents, res_info,
                                            symbols_by_id)
      archive_util.ExtendSectionRange(section_ranges, section_name,
                                      zip_info.compress_size)
  return _FinalizeSymbols(symbols_by_id, pak_id_map)


def CreatePakSymbolsFromFiles(section_ranges, pak_paths, pak_info_path,
                              output_directory, pak_id_map):
  """Uses files from --pak-file args to find and add pak symbols."""
  if pak_info_path:
    res_info = _ParsePakInfoFile(pak_info_path)
  symbols_by_id = {}
  for pak_path in pak_paths:
    if not pak_info_path:
      res_info = _ParsePakInfoFile(pak_path + '.info')
    with open(pak_path, 'rb') as f:
      contents = data_pack.ReadDataPackFromString(f.read())
    section_name = _CreateSymbolsFromFile(
        os.path.relpath(pak_path, output_directory), contents, res_info,
        symbols_by_id)
    archive_util.ExtendSectionRange(section_ranges, section_name,
                                    os.path.getsize(pak_path))
  return _FinalizeSymbols(symbols_by_id, pak_id_map)