File: merge_data.py

package info (click to toggle)
chromium-browser 41.0.2272.118-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie-kfreebsd
  • size: 2,189,132 kB
  • sloc: cpp: 9,691,462; ansic: 3,341,451; python: 712,689; asm: 518,779; xml: 208,926; java: 169,820; sh: 119,353; perl: 68,907; makefile: 28,311; yacc: 13,305; objc: 11,385; tcl: 3,186; cs: 2,225; sql: 2,217; lex: 2,215; lisp: 1,349; pascal: 1,256; awk: 407; ruby: 155; sed: 53; php: 14; exp: 11
file content (150 lines) | stat: -rw-r--r-- 4,112 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# Copyright (c) 2014 The Native Client Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

from collections import defaultdict
import hashlib
import json
import sys

from print_data import PrintData


# Original data arrives as a dictionary of:
# name = { configs:[], type:'', properties: {}}
# where properties is a sub dictionary of the same type.
# So:
# {
#   'libnacl': {
#      'configs' : [ X1_Y1, X2_Y1, ...],
#      'type': 'source_set' | 'NaClSharedLibrary' | ....,
#      'properties' : {
#         'sources' : {
#             'source_file1.c' : {
#               'configs' : [ X1_Y1, X2_Y1, ...],
#             },
#             'source_file1.c' : {
#               'configs' : [ X1_Y1, X2_Y1, ...],
#             },
#


def HashKeys(key_a, key_b):
  m = hashlib.md5()
  if key_a:
    m.update(str(key_a))
  if key_b:
    m.update(str(key_b))
  return m.hexdigest()


def FindBsInConfigOfA(configs, setb, keya):
  bhits = []
  for config in configs:
    a, b = config.split('_')
    if a == keya:
      bhits.append(b)
  return bhits

#
# TransformRawToIndexed
#
# Transforms data from the original form shown above, to a dictionary of
# dictionaries with condition A as the key to the outer dictionary, and
# the set of B conditions for that key
#
# {
#   'keyA1' : {
#     'keyBx keyBy ...' : [ name1, name2 ]
#     'keyBz' : [ name3 ],
#   }
# }
#
def CreateIndexedLookup(tree, seta, setb):
  # Build dictionary of dictionaries with a key of seta
  out = {}
  for keya in seta:
    out[keya] = defaultdict(list)

  for name in tree:
    configs = sorted(tree[name].configs)
    for keya in seta:
      bhits = FindBsInConfigOfA(configs, setb, keya)
      if bhits:
        out[keya][' '.join(bhits)].append(name)
  return out


#
# MergeNodesToConditions
#
# Takes a dataset containing DICT[targets] = configs and generates a list of
# tupples containing the list of targets sharing a set of conditions.
# (name1, name2,...), (conda1, conda2), (condb1, condb2...)
#
def MergeRawTree(original, seta, availa, availb):
  merge_A = defaultdict(list)

  merge_arch = {}
  merge_data = {}

  # Build table of DATA[OS][CPU_SET] = List of NODES
  data = CreateIndexedLookup(original, seta, availb)

  # For every item in the data table, generate key based on CPUs and NODES
  # matching a configuration, and build tables indexed by that key.
  for key_a in data:
    for arches, hits in data[key_a].iteritems():
      keyval_hash = HashKeys(arches, hits)
      merge_A[keyval_hash].append(key_a)
      merge_arch[keyval_hash] = arches.split(' ')
      merge_data[keyval_hash] = hits

  # Merge all subsets
  keys = merge_A.keys()
  for i, i_key in enumerate(keys):
    # For each key, get the PLATFORMS, CPUS, and NODES
    i_oses = merge_A[i_key]
    i_cpus = merge_arch[i_key]
    i_hits = merge_data[i_key]

    for j, j_key in enumerate(keys):
      # For each other non-matching key, get PLATFORMS, CPUS, and NODES
      if i == j:
        continue
      j_oses = merge_A[j_key]
      j_cpus = merge_arch[j_key]
      j_hits = merge_data[j_key]

      # If the set of supported CPUS matches
      if i_cpus == j_cpus:
        i_set = set(i_hits)
        j_set = set(j_hits)
        # If the nodes in J are a subset of nodes in I
        # then move those nodes from I to J and add the OS
        if j_set.issubset(i_set):
          for hit in j_hits:
            merge_data[i_key].remove(hit)
          for os in i_oses:
            if os not in j_oses:
              merge_A[j_key].append(os)

  out_list = []
  keys = merge_A.keys()
  for i in range(len(keys)):
    hash_cpu_data = keys[i]
    os_use = merge_A[hash_cpu_data]
    cpus = merge_arch[hash_cpu_data]
    names = merge_data[hash_cpu_data]

    node = (names, os_use, cpus)
    out_list.append(node)

  def SortByEffect(x, y):
    # Sort by bigest combition first (# platforms * # cpu architectures)
    xval = len(x[1]) * len(x[2])
    yval = len(y[1]) * len(y[2])
    return  yval - xval

  out_list = sorted(out_list, cmp=SortByEffect)
  return out_list