File: generate_ad_network_hashes.py

package info (click to toggle)
chromium-browser 41.0.2272.118-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie-kfreebsd
  • size: 2,189,132 kB
  • sloc: cpp: 9,691,462; ansic: 3,341,451; python: 712,689; asm: 518,779; xml: 208,926; java: 169,820; sh: 119,353; perl: 68,907; makefile: 28,311; yacc: 13,305; objc: 11,385; tcl: 3,186; cs: 2,225; sql: 2,217; lex: 2,215; lisp: 1,349; pascal: 1,256; awk: 407; ruby: 155; sed: 53; php: 14; exp: 11
file content (102 lines) | stat: -rw-r--r-- 3,200 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# !/usr/bin/env python
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

# Generates the hashed_ad_networks.[h,cc] files. Takes an input the name of a
# file with all ad network host patterns, new-line separated. If given an
# optional root output file name, generates the files <root_output_name>.h and
# <root_output_name>.cc. If no output name is given, the output name is
# 'hashed_ad_networks'.
#
# These are found at chrome/browser/extensions/activity_log/hashed_ad_networks.*
# and are used by the ActivityLog and HashedAdNetworkDatabase for recognizing
# ad injection.

from hashlib import sha256
from argparse import ArgumentParser
import sys

_LICENSE = '''\
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// This file is automatically generated from the script in
// chrome/browser/extensions/activity_log/generate_ad_network_hashes.py.
// DO NOT MODIFY BY HAND!
'''

_H_TEMPLATE = '''\
%(license)s
#ifndef CHROME_BROWSER_EXTENSIONS_ACTIVITY_LOG_HASHED_AD_NETWORKS_H_
#define CHROME_BROWSER_EXTENSIONS_ACTIVITY_LOG_HASHED_AD_NETWORKS_H_

#include "base/basictypes.h"

namespace extensions {

extern const char* kHashedAdNetworks[];
extern const int kNumHashedAdNetworks;

}  // namespace extensions

#endif  // CHROME_BROWSER_EXTENSIONS_ACTIVITY_LOG_HASHED_AD_NETWORKS_H_
'''

_CC_TEMPLATE = '''\
%(license)s
#include "chrome/browser/extensions/activity_log/hashed_ad_networks.h"

#include "base/basictypes.h"

namespace extensions {

const char* const kHashedAdNetworks[] = {
  %(ad_networks)s
};

const int kNumHashedAdNetworks = arraysize(kHashedAdNetworks);

}  // namespace extensions
'''


def Generate(input_filename, output_root_filename):
  '''Generate the .h and .cc files for the hashed_ad_network source files.
  |input_filename|
      The name of the input file, which should have one host to be hashed per
      line.
  |output_root_filename|
      The root name of the output files. This will generate a .h and .cc file,
      like |output_root_filename|.[h,cc].
  '''
  with open(input_filename, 'r') as file:
    hashes = ['"%s"' % sha256(line.strip()).hexdigest()[:16].upper()
              for line in file.readlines()]

  # Hashes should be sorted in C++ so we can do a binary search over them.
  hashes.sort()
  ad_networks = ',\n  '.join(hashes)
  for ext, template in (('.h', _H_TEMPLATE),
                        ('.cc', _CC_TEMPLATE)):
    with open(output_root_filename + ext, 'w') as out:
      out.write(template % {
        'license': _LICENSE,
        'ad_networks': ad_networks
      })


if __name__ == '__main__':
  parser = ArgumentParser(
      description='Generate hashed_ad_networks.[h,cc] source files')
  parser.add_argument(
      'input_file',
      help='The name of the input file with the hosts to be hashed')
  parser.add_argument(
      '-o', '--out',
      help='The root name of the output source file',
      default='hashed_ad_networks')

  args = parser.parse_args()
  Generate(args.input_file, args.out)