File: update_annotations_sheet.py

package info (click to toggle)
chromium 138.0.7204.157-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 6,071,864 kB
  • sloc: cpp: 34,936,859; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,953; asm: 946,768; xml: 739,967; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,806; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (496 lines) | stat: -rwxr-xr-x 17,370 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
#!/usr/bin/env vpython3
# Copyright 2017 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""This script updates network traffic annotations sheet. To run the script, you
should first generate annotations.tsv using traffic_annotation_auditor, and then
call:
update_annotations_sheet --config=[settings.json] [path_to_annotations.tsv]

Run update_annotations_sheet --config-help for help on configuration file.

TODO(rhalavati): Add tests.
"""

from __future__ import print_function

import argparse
import csv
import datetime
import httplib2
import io
import json
import os
import re
import sys

from apiclient import discovery, http as googlehttp
from infra_libs import luci_auth
from oauth2client import client
from oauth2client import tools
from oauth2client.file import Storage
from generator_utils import load_tsv_file


SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
SRC_DIR = os.path.abspath(os.path.join(SCRIPT_DIR, "../../.."))


def GetCurrentChromeVersion():
  with io.open(os.path.join(SRC_DIR, "chrome/VERSION")) as f:
    contents = f.read()
  version_parts = dict(
      re.match(r"(\w+)=(\d+)", line).groups() for line in contents.split("\n")
      if line)
  return tuple(
      int(version_parts[part]) for part in ["MAJOR", "MINOR", "BUILD", "PATCH"])


def VersionTupleToString(version_tuple):
  return '.'.join(map(str, version_tuple))


class HttpRequestWithRetries(googlehttp.HttpRequest):
  """Same as HttpRequest, but all requests are retried up to 5 times."""

  def execute(self, http=None, num_retries=1):
    return super().execute(http=http, num_retries=5)


class SheetEditor():
  """Loads and updates traffic annotation's sheet."""

  # If modifying these scopes, delete your previously saved credentials.
  SCOPES = "https://www.googleapis.com/auth/spreadsheets"
  APPLICATION_NAME = "Chrome Network Traffic Annotations Spreadsheet Updater"

  def __init__(self, spreadsheet_id, annotations_sheet_name,
               chrome_version_sheet_name, silent_change_columns,
               last_update_column_name, credentials_file_path,
               client_secret_file_path, verbose):
    """ Initializes the SheetEditor. Please refer to 'PrintConfigHelp' function
    for description of input arguments.

    Args:
      spreadsheet_id: str
          ID of annotations spreadsheet.
      annotations_sheet_name: str
          Name of the sheet that contains the annotations.
      chrome_version_sheet_name: str
          Name of the sheet that contains the changes stats.
      silent_change_columns: list of str
          List of the columns whose changes are not reported in the stats.
      last_update_column_name: str
          Header of the column that keeps the latest update date.
      credentials_file_path: str
          Absolute path to read/save user credentials.
      client_secret_file_path: str
          Absolute path to read client_secret.json.
      verbose: bool
          Flag requesting dump of details of actions.
    """
    print("Getting credential to update annotations report.")
    self.service = self._InitializeService(
        self._GetCredentials(credentials_file_path, client_secret_file_path))
    print("Successfully got credential to update annotations report.")
    self.spreadsheet_id = spreadsheet_id
    self.annotations_sheet_name = annotations_sheet_name
    self.chrome_version_sheet_name = chrome_version_sheet_name
    self.silent_change_columns = silent_change_columns
    self.last_update_column_name = last_update_column_name
    self.annotations_sheet_id = self._GetAnnotationsSheetId()
    self.required_row_updates = []
    self.required_cell_updates = []
    self.delete_count = 0
    self.insert_count = 0
    self.update_count = 0
    self.verbose = verbose
    self.today = datetime.datetime.now().strftime("%m/%d/%Y")


  def _InitializeService(self, credentials):
    """ Initializes the Google Sheets API service.

    Args:
      credentials: OAuth2Credentials user credentials.

    Returns:
      googleapiclient.discovery.Resource Spreadsheet API service.
    """
    http = credentials.authorize(httplib2.Http())
    discoveryUrl = ("https://sheets.googleapis.com/$discovery/rest?version=v4")
    return discovery.build("sheets",
                           "v4",
                           http=http,
                           requestBuilder=HttpRequestWithRetries,
                           discoveryServiceUrl=discoveryUrl)


  def _GetCredentials(self, credentials_file_path, client_secret_file_path):
    """ Gets valid user credentials from storage. If nothing has been stored, or
    if the stored credentials are invalid, the OAuth2 flow is completed to
    obtain the new credentials.

    When running in the buildbot, uses LUCI credentials instead.

    Args:
      credentials_file_path: str Absolute path to read/save user credentials.
      client_secret_file_path: str Absolute path to read client_secret.json.

    Returns:
      OAuth2Credentials The obtained user credentials.
    """
    if luci_auth.available():
      return luci_auth.LUCICredentials(scopes=[self.SCOPES])

    store = Storage(credentials_file_path)
    credentials = store.get()
    if not credentials or credentials.invalid:
      flow = client.flow_from_clientsecrets(client_secret_file_path,
                                            self.SCOPES)
      flow.user_agent = self.APPLICATION_NAME
      flags = tools.argparser.parse_args([])
      credentials = tools.run_flow(flow, store, flags)
      print("Storing credentials to " + credentials_file_path)
    return credentials


  def _GetAnnotationsSheetId(self):
    """ Gets the id of the sheet containing annotations table.

    Returns:
      int Id of the sheet.
    """
    response = self.service.spreadsheets().get(
        spreadsheetId=self.spreadsheet_id,
        ranges=self.annotations_sheet_name,
        includeGridData=False).execute()
    return response["sheets"][0]["properties"]["sheetId"]


  def LoadAnnotationsSheet(self):
    """ Loads the sheet's content.

    Returns:
      list of list Table of annotations loaded from the trix.
    """
    result = self.service.spreadsheets().values().get(
        spreadsheetId=self.spreadsheet_id,
        range=self.annotations_sheet_name).execute()
    return result.get("values", [])


  def _CreateInsertRequest(self, row):
    self.required_row_updates.append(
        { "insertDimension": {
            "range": {
              "sheetId": self.annotations_sheet_id,
              "dimension": "ROWS",
              "startIndex": row, # 0 index.
              "endIndex": row + 1
            }
          }
        })
    self.insert_count += 1


  def _CreateAppendRequest(self, row_count):
    self.required_row_updates.append(
        { "appendDimension": {
            "sheetId": self.annotations_sheet_id,
            "dimension": "ROWS",
            "length": row_count
          }
        })
    self.insert_count += row_count


  def _CreateDeleteRequest(self, row):
    self.required_row_updates.append(
        { "deleteDimension": {
            "range": {
              "sheetId": self.annotations_sheet_id,
              "dimension": "ROWS",
              "startIndex": row,
              "endIndex": row + 1
            }
          }
        })
    self.delete_count += 1


  def _CreateUpdateRequest(self, row, column, value):
    # If having more than 26 columns, update cell_name.
    assert(column < 26)
    cell_name = "%s%i" % (chr(65 + column), 1 + row)
    self.required_cell_updates.append(
        { "range": "%s!%s:%s" % (
              self.annotations_sheet_name, cell_name, cell_name),
          "values": [[value]] })


  def GenerateUpdates(self, file_contents):
    """ Generates required updates to refresh the sheet, using the input file
    contents.

    Args:
      file_contents: list of list Table of annotations read from file. Each item
          represents one row of the annotation table, and each row is presented
          as a list of its column values.

    Returns:
      bool Flag specifying if everything was OK or not.
    """
    print("Generating updates for report.")
    sheet_contents = self.LoadAnnotationsSheet()
    if not sheet_contents:
      print("Could not read previous content.")
      return False

    headers = file_contents[0]
    silent_change_column_indices = []
    for title in self.silent_change_columns:
      if title not in headers:
        print("ERROR: Could not find %s column." % title)
        return False
      silent_change_column_indices.append(headers.index(title))

    last_update_column = headers.index(self.last_update_column_name)

    # Step 1: Compare old and new contents, generate add/remove requests so that
    # both contents would become the same size with matching unique ids (at
    # column 0).
    # Ignores header row (row 0).
    old_set = set(row[0] for row in sheet_contents[1:])
    new_set = set(row[0] for row in file_contents[1:])
    removed_ids = old_set - new_set
    added_ids = list(new_set - old_set)
    added_ids.sort()
    if self.verbose:
      for id in removed_ids:
        print("Deleted: %s" % id)
      for id in added_ids:
        print("Added: %s" % id)

    empty_row = [''] * len(file_contents[0])
    # Skip first row (it's the header row).
    row = 1
    while row < len(sheet_contents):
      row_id = sheet_contents[row][0]
      # If a row is removed, remove it from previous sheet.
      if row_id in removed_ids:
        self._CreateDeleteRequest(row)
        sheet_contents.pop(row)
        continue
      # If there are rows to add, and they should be before current row, insert
      # an empty row before current row. The empty row will be filled later.
      if added_ids and added_ids[0] < row_id:
        self._CreateInsertRequest(row)
        sheet_contents.insert(row, empty_row[:])
        added_ids.pop(0)
      row += 1

    # If there are still rows to be added, they should come at the end.
    if added_ids:
      self._CreateAppendRequest(len(added_ids))
      while added_ids:
        sheet_contents.append(empty_row[:])
        added_ids.pop()

    assert(len(file_contents) == len(sheet_contents))

    # Step 2: Compare cells of old and new contents, issue requests to update
    # cells with different values. Ignore headers row.
    for row in range(1, len(file_contents)):
      file_row = file_contents[row]
      sheet_row = sheet_contents[row]

      major_update = False
      for col in range(len(file_row)):
        # Ignore 'Last Update' column for now.
        if col == last_update_column:
          continue
        if file_row[col] != sheet_row[col]:
          self._CreateUpdateRequest(row, col, file_row[col])
          if self.verbose and sheet_row[0]:
            print("Updating: %s - %s" % (file_row[0], file_contents[0][col]))
          if col not in silent_change_column_indices:
            major_update = True
      # If there has been a change in a column that is not silently updated,
      # update the date as well.
      if major_update:
        self._CreateUpdateRequest(row, last_update_column, self.today)
        # If the row is not entirely new, increase the update count.
        if sheet_row[0]:
          self.update_count += 1
    return True


  def ApplyUpdates(self):
    """ Applies the updates stored in |self.required_row_updates| and
    |self.required_cell_updates| to the sheet.
    """
    # Insert/Remove rows.
    print("Applying updates for the report.")
    if self.required_row_updates:
      self.service.spreadsheets().batchUpdate(
          spreadsheetId=self.spreadsheet_id,
          body={"requests": self.required_row_updates}).execute()

    # Refresh Cells.
    if self.required_cell_updates:
      batch_update_values_request_body = {
        "value_input_option": "RAW",
        "data": self.required_cell_updates
      }
      self.service.spreadsheets().values().batchUpdate(
          spreadsheetId=self.spreadsheet_id,
          body=batch_update_values_request_body).execute()


  def GiveUpdateSummary(self):
    return "New annotations: %s, Modified annotations: %s, " \
           "Removed annotations: %s" % (
                self.insert_count, self.update_count, self.delete_count)


  def UpdateChromeVersion(self, version_tuple):
    self.service.spreadsheets().values().update(
        spreadsheetId=self.spreadsheet_id,
        range="%s!A1:A1" % self.chrome_version_sheet_name,
        valueInputOption="RAW",
        body={
            "values": [[VersionTupleToString(version_tuple)]]
        }).execute()

  def GetChromeVersionFromSheet(self):
    response = self.service.spreadsheets().values().get(
        spreadsheetId=self.spreadsheet_id,
        range="%s!A1:A1" % self.chrome_version_sheet_name).execute()
    version_string = response["values"][0][0]
    return tuple(int(part) for part in version_string.split('.'))


def PrintConfigHelp():
  print("The config.json file should have the following items:\n"
        "spreadsheet_id:\n"
        "  ID of annotations spreadsheet.\n"
        "annotations_sheet_name:\n"
        "  Name of the sheet that contains the annotations.\n"
        "chrome_version_sheet_name:\n"
        "  Name of the sheet that contains the Chrome version.\n"
        "silent_change_columns:\n"
        "  List of the columns whose changes don't affect the Last Update "
        "column.\n"
        "last_update_column_name:\n"
        "  Header of the column that keeps the latest update date.\n"
        "credentials_file_path:\n"
        "  Absolute path of the file that keeps user credentials.\n"
        "client_secret_file_path:\n"
        "  Absolute path of the file that keeps client_secret.json. The file\n"
        "  can be created as specified in:\n"
        "  https://developers.google.com/sheets/api/quickstart/python")


def main():
  parser = argparse.ArgumentParser(
      description="Network Traffic Annotations Sheet Updater")
  parser.add_argument(
      "--config-file",
      help="Configurations file.")
  parser.add_argument(
      "--annotations-file",
      help="TSV annotations file exported from auditor.")
  parser.add_argument(
      '--verbose', action='store_true',
      help='Reports all updates.')
  parser.add_argument('--yes',
                      action='store_true',
                      help='Performs all actions without confirmation.')
  parser.add_argument(
      '--force',
      action='store_true',
      help='Performs all actions without confirmation, regardless of the '
      'sheet being older or newer than this version. Implies --yes.')
  parser.add_argument(
      '--config-help', action='store_true',
      help='Shows the configurations help.')
  args = parser.parse_args()
  if args.force:
    args.yes = True

  print("Updating annotations sheet.")
  if args.config_help:
    PrintConfigHelp()
    return 0

  # Load and parse config file.
  with open(args.config_file) as config_file:
    config = json.load(config_file)

  # Load and parse annotations file.
  file_content = load_tsv_file(args.annotations_file, args.verbose)
  if not file_content:
    print("Could not read annotations file.")
    return -1

  sheet_editor = SheetEditor(
      spreadsheet_id=config["spreadsheet_id"],
      annotations_sheet_name=config["annotations_sheet_name"],
      chrome_version_sheet_name=config["chrome_version_sheet_name"],
      silent_change_columns=config["silent_change_columns"],
      last_update_column_name=config["last_update_column_name"],
      credentials_file_path=config.get("credentials_file_path", None),
      client_secret_file_path=config.get("client_secret_file_path", None),
      verbose=args.verbose)

  current_version = GetCurrentChromeVersion()
  current_version_string = VersionTupleToString(current_version)
  print("This is Chrome version %s" % current_version_string)

  sheet_version = sheet_editor.GetChromeVersionFromSheet()
  sheet_version_string = VersionTupleToString(sheet_version)
  print("Sheet contains Chrome version %s" % sheet_version_string)

  if sheet_version > current_version and not args.force:
    print("Sheet is already newer than this Chrome version. Aborting.")
    return 0

  if not sheet_editor.GenerateUpdates(file_content):
    print("Error generating updates for file content.")
    return -1

  main_sheet_needs_update = (sheet_editor.required_cell_updates
                             or sheet_editor.required_row_updates)
  version_needs_update = current_version != sheet_version

  if main_sheet_needs_update or version_needs_update:
    if main_sheet_needs_update:
      print("%s" % sheet_editor.GiveUpdateSummary())
    else:
      print("No updates to annotations required.")

    if current_version != sheet_version:
      print("The '%s' sheet will be updated to '%s'." %
            (sheet_editor.chrome_version_sheet_name, current_version_string))

    if not args.yes:
      print("Proceed with update?")
      if raw_input("(Y/n): ").strip().lower() != "y":
        return -1

    if main_sheet_needs_update:
      sheet_editor.ApplyUpdates()
    if version_needs_update:
      sheet_editor.UpdateChromeVersion(current_version)
    print("Updates applied.")

  else:
    print("No updates required.")

  return 0


if __name__ == "__main__":
  sys.exit(main())