1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
|
"""Simple benchmark to compare the speed of scandir.walk() with os.walk()."""
import optparse
import os
import stat
import sys
import timeit
import warnings
with warnings.catch_warnings(record=True):
import scandir
DEPTH = 4
NUM_DIRS = 5
NUM_FILES = 50
def os_walk_pre_35(top, topdown=True, onerror=None, followlinks=False):
"""Pre Python 3.5 implementation of os.walk() that doesn't use scandir."""
islink, join, isdir = os.path.islink, os.path.join, os.path.isdir
try:
names = os.listdir(top)
except OSError as err:
if onerror is not None:
onerror(err)
return
dirs, nondirs = [], []
for name in names:
if isdir(join(top, name)):
dirs.append(name)
else:
nondirs.append(name)
if topdown:
yield top, dirs, nondirs
for name in dirs:
new_path = join(top, name)
if followlinks or not islink(new_path):
for x in os_walk_pre_35(new_path, topdown, onerror, followlinks):
yield x
if not topdown:
yield top, dirs, nondirs
def create_tree(path, depth=DEPTH):
"""Create a directory tree at path with given depth, and NUM_DIRS and
NUM_FILES at each level.
"""
os.mkdir(path)
for i in range(NUM_FILES):
filename = os.path.join(path, 'file{0:03}.txt'.format(i))
with open(filename, 'wb') as f:
f.write(b'foo')
if depth <= 1:
return
for i in range(NUM_DIRS):
dirname = os.path.join(path, 'dir{0:03}'.format(i))
create_tree(dirname, depth - 1)
def get_tree_size(path):
"""Return total size of all files in directory tree at path."""
size = 0
try:
for entry in scandir.scandir(path):
if entry.is_symlink():
pass
elif entry.is_dir():
size += get_tree_size(os.path.join(path, entry.name))
else:
size += entry.stat().st_size
except OSError:
pass
return size
def benchmark(path, get_size=False):
sizes = {}
if get_size:
def do_os_walk():
size = 0
for root, dirs, files in os.walk(path):
for filename in files:
fullname = os.path.join(root, filename)
st = os.lstat(fullname)
if not stat.S_ISLNK(st.st_mode):
size += st.st_size
sizes['os_walk'] = size
def do_scandir_walk():
sizes['scandir_walk'] = get_tree_size(path)
else:
def do_os_walk():
for root, dirs, files in os.walk(path):
pass
def do_scandir_walk():
for root, dirs, files in scandir.walk(path):
pass
# Run this once first to cache things, so we're not benchmarking I/O
print("Priming the system's cache...")
do_scandir_walk()
# Use the best of 3 time for each of them to eliminate high outliers
os_walk_time = 1000000
scandir_walk_time = 1000000
N = 3
for i in range(N):
print('Benchmarking walks on {0}, repeat {1}/{2}...'.format(
path, i + 1, N))
os_walk_time = min(os_walk_time, timeit.timeit(do_os_walk, number=1))
scandir_walk_time = min(scandir_walk_time,
timeit.timeit(do_scandir_walk, number=1))
if get_size:
if sizes['os_walk'] == sizes['scandir_walk']:
equality = 'equal'
else:
equality = 'NOT EQUAL!'
print('os.walk size {0}, scandir.walk size {1} -- {2}'.format(
sizes['os_walk'], sizes['scandir_walk'], equality))
print('os.walk took {0:.3f}s, scandir.walk took {1:.3f}s -- {2:.1f}x as fast'.format(
os_walk_time, scandir_walk_time, os_walk_time / scandir_walk_time))
if __name__ == '__main__':
usage = """Usage: benchmark.py [-h] [tree_dir]
Create a large directory tree named "benchtree" (relative to this script) and
benchmark os.walk() versus scandir.walk(). If tree_dir is specified, benchmark
using it instead of creating a tree."""
parser = optparse.OptionParser(usage=usage)
parser.add_option('-s', '--size', action='store_true',
help='get size of directory tree while walking')
parser.add_option('-c', '--scandir', type='choice', choices=['best', 'generic', 'c', 'python', 'os'], default='best',
help='version of scandir() to use, default "%default"')
options, args = parser.parse_args()
if args:
tree_dir = args[0]
else:
tree_dir = os.path.join(os.path.dirname(__file__), 'benchtree')
if not os.path.exists(tree_dir):
print('Creating tree at {0}: depth={1}, num_dirs={2}, num_files={3}'.format(
tree_dir, DEPTH, NUM_DIRS, NUM_FILES))
create_tree(tree_dir)
if options.scandir == 'generic':
scandir.scandir = scandir.scandir_generic
elif options.scandir == 'c':
if scandir.scandir_c is None:
print("ERROR: Compiled C version of scandir not found!")
sys.exit(1)
scandir.scandir = scandir.scandir_c
elif options.scandir == 'python':
if scandir.scandir_python is None:
print("ERROR: Python version of scandir not found!")
sys.exit(1)
scandir.scandir = scandir.scandir_python
elif options.scandir == 'os':
if not hasattr(os, 'scandir'):
print("ERROR: Python 3.5's os.scandir() not found!")
sys.exit(1)
scandir.scandir = os.scandir
elif hasattr(os, 'scandir'):
scandir.scandir = os.scandir
if scandir.scandir == getattr(os, 'scandir', None):
print("Using Python 3.5's builtin os.scandir()")
elif scandir.scandir == scandir.scandir_c:
print('Using fast C version of scandir')
elif scandir.scandir == scandir.scandir_python:
print('Using slower ctypes version of scandir')
elif scandir.scandir == scandir.scandir_generic:
print('Using very slow generic version of scandir')
else:
print('ERROR: Unsure which version of scandir we are using!')
sys.exit(1)
if hasattr(os, 'scandir'):
os.walk = os_walk_pre_35
print('Comparing against pre-Python 3.5 version of os.walk()')
else:
print('Comparing against builtin version of os.walk()')
benchmark(tree_dir, get_size=options.size)
|