File: hashing_tools.py

package info (click to toggle)
chromium-browser 41.0.2272.118-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie-kfreebsd
  • size: 2,189,132 kB
  • sloc: cpp: 9,691,462; ansic: 3,341,451; python: 712,689; asm: 518,779; xml: 208,926; java: 169,820; sh: 119,353; perl: 68,907; makefile: 28,311; yacc: 13,305; objc: 11,385; tcl: 3,186; cs: 2,225; sql: 2,217; lex: 2,215; lisp: 1,349; pascal: 1,256; awk: 407; ruby: 155; sed: 53; php: 14; exp: 11
file content (74 lines) | stat: -rwxr-xr-x 1,863 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/python
# Copyright (c) 2012 The Native Client Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Hashing related operations.

Provides for hashing files or directory trees.
Timestamps, archive order, and dot files/directories are ignored to keep
hashes stable.
"""

import hashlib
import os


def HashFileContents(filename):
  """Return the hash (sha1) of tthe contents of a file.

  Args:
    filename: Filename to read.
  Returns:
    The sha1 of a file.
  """
  hasher = hashlib.sha1()
  fh = open(filename, 'rb')
  try:
    while True:
      data = fh.read(4096)
      if not data:
        break
      hasher.update(data)
  finally:
    fh.close()
  return hasher.hexdigest()


def StableHashPath(path):
  """Hash (sha1) everything in a path in a stable (reproducible) way.

  Dot files and timestamps are ignored.
  Args:
    path: Path to hash.
  Returns:
    The sha1 of the file/directory.
  """
  hasher = hashlib.sha1()

  if os.path.isfile(path):
    hasher.update('singlefile:' + HashFileContents(path))
    return hasher.hexdigest()

  def RemoveExcludedPaths(paths):
    for p in [p for p in paths if p.startswith('.')]:
      paths.remove(p)

  for root, dirs, files in os.walk(path):
    dirs.sort()
    files.sort()
    RemoveExcludedPaths(dirs)
    RemoveExcludedPaths(files)
    # Including directory names in the hash so that
    # empty directories do count.
    # Also, as a side effect, all of the relative path components
    # are incorporated.
    # Terminating with \x00 to avoid injection attacks.
    for d in dirs:
      hasher.update('dir:' + d + '\x00')
    for f in files:
      hasher.update('filename:' + f + '\x00')
      hasher.update('contents:' + HashFileContents(
          os.path.join(root, f)))

  return hasher.hexdigest()