1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
|
#
# Copyright 2004-2006 Zuza Software Foundation
#
# This file is part of translate.
#
# translate is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# translate is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
import argparse
import cProfile
import os
import pstats
import random
import sys
from importlib import import_module
from translate.storage import factory, placeables
class TranslateBenchmarker:
"""class to aid in benchmarking Translate Toolkit stores."""
def __init__(self, test_dir, storeclass) -> None:
"""Sets up benchmarking on the test directory."""
self.test_dir = os.path.abspath(test_dir)
self.StoreClass = storeclass
self.extension = self.StoreClass.Extensions[0]
self.project_dir = os.path.join(self.test_dir, "benchmark")
self.file_dir = os.path.join(self.project_dir, "zxx")
self.parsedfiles = []
def clear_test_dir(self) -> None:
"""Removes the given directory."""
if os.path.exists(self.test_dir):
for dirpath, subdirs, filenames in os.walk(self.test_dir, topdown=False):
for name in filenames:
os.remove(os.path.join(dirpath, name))
for name in subdirs:
os.rmdir(os.path.join(dirpath, name))
if os.path.exists(self.test_dir):
os.rmdir(self.test_dir)
assert not os.path.exists(self.test_dir)
def create_sample_files(
self,
num_dirs,
files_per_dir,
strings_per_file,
source_words_per_string,
target_words_per_string,
) -> None:
"""Creates sample files for benchmarking."""
if not os.path.exists(self.test_dir):
os.mkdir(self.test_dir)
if not os.path.exists(self.project_dir):
os.mkdir(self.project_dir)
if not os.path.exists(self.file_dir):
os.mkdir(self.file_dir)
for dirnum in range(num_dirs):
if num_dirs > 1:
dirname = os.path.join(self.file_dir, f"sample_{dirnum}")
if not os.path.exists(dirname):
os.mkdir(dirname)
else:
dirname = self.file_dir
for filenum in range(files_per_dir):
sample_file = self.StoreClass()
for _stringnum in range(strings_per_file):
source_string = " ".join(
f"word{random.randint(0, strings_per_file) * i}" # noqa: S311
for i in range(source_words_per_string)
)
sample_unit = sample_file.addsourceunit(source_string)
sample_unit.target = " ".join(
f"drow{random.randint(0, strings_per_file) * i}" # noqa: S311
for i in range(target_words_per_string)
)
sample_file.savefile(
os.path.join(dirname, f"file_{filenum}.{self.extension}")
)
def parse_files(self, file_dir=None) -> None:
"""Parses all the files in the test directory into memory."""
count = 0
self.parsedfiles = []
if file_dir is None:
file_dir = self.file_dir
for dirpath, _subdirs, filenames in os.walk(file_dir, topdown=False):
for name in filenames:
pofilename = os.path.join(dirpath, name)
parsedfile = self.StoreClass(open(pofilename, "rb"))
count += len(parsedfile.units)
self.parsedfiles.append(parsedfile)
print(f"counted {count} units")
def parse_placeables(self) -> None:
"""Parses placeables."""
count = 0
for parsedfile in self.parsedfiles:
for unit in parsedfile.units:
placeables.parse(unit.source, placeables.general.parsers)
placeables.parse(unit.target, placeables.general.parsers)
count += len(parsedfile.units)
print(f"counted {count} units")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Process some integers.")
parser.add_argument(
"podir",
metavar="DIR",
type=str,
nargs="?",
help="PO dir to use (default: create sample files)",
)
parser.add_argument(
"--store-type",
dest="storetype",
action="store_const",
const="po",
default="po",
help="type of the store to benchmark (default: %(default)s)",
)
parser.add_argument(
"--check-parsing",
dest="check_parsing",
action="store_true",
help="benchmark parsing files",
)
parser.add_argument(
"--check-placeables",
dest="check_placeables",
action="store_true",
help="benchmark placeables",
)
args = parser.parse_args()
storetype = args.storetype
if storetype in factory._classes_str:
_module, _class = factory._classes_str[storetype]
module = import_module(f"translate.storage.{_module}")
storeclass = getattr(module, _class)
else:
print(
f"StoreClass: '{storetype}' is not a base class that the class factory can load"
)
sys.exit()
sample_files = [
# num_dirs, files_per_dir, strings_per_file, source_words_per_string, target_words_per_string
# (1, 1, 2, 2, 2),
(
1,
1,
10000,
5,
10,
), # Creat 1 very large file with German like ratios or source to target
# (100, 10, 10, 5, 10), # Create lots of directories and files with smaller then average size
# (1, 5, 10, 10, 10),
# (1, 10, 10, 10, 10),
# (5, 10, 10, 10, 10),
# (5, 10, 100, 20, 20),
# (10, 20, 100, 10, 10),
# (10, 20, 100, 10, 10),
# (100, 2, 140, 3, 3), # OpenOffice.org approximate ratios
]
for sample_file_sizes in sample_files:
benchmarker = TranslateBenchmarker("BenchmarkDir", storeclass)
benchmarker.clear_test_dir()
if args.podir is None:
benchmarker.create_sample_files(*sample_file_sizes)
methods = []
if args.check_parsing:
methods.append(("parse_files", repr(args.podir)))
if args.check_placeables:
methods.append(("parse_placeables", ""))
for methodname, methodparam in methods:
print("_______________________________________________________")
statsfile = f"{methodname}_{storetype}{'_{}_{}_{}_{}_{}.stats'.format(*sample_file_sizes)}"
cProfile.run(f"benchmarker.{methodname}({methodparam})", statsfile)
stats = pstats.Stats(statsfile)
stats.sort_stats("time").print_stats(20)
print("_______________________________________________________")
benchmarker.clear_test_dir()
|