1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
|
#!/usr/bin/env python
# Copyright 2011 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""SiteCompare component to handle bulk scrapes.
Invokes a list of browsers and sends them to a list of URLs,
saving the rendered results to a specified directory, then
performs comparison operations on the resulting bitmaps and
saves the results
"""
from __future__ import print_function
# This line is necessary to work around a QEMU bug
import _imaging
import os # Functions for walking the directory tree
import types # Runtime type-checking
import command_line # command-line parsing
import drivers # Functions for driving keyboard/mouse/windows, OS-specific
import operators # Functions that, given two bitmaps as input, produce
# output depending on the performance of an operation
import scrapers # Functions that know how to capture a render from
# particular browsers
import commands.compare2 # compare one page in two versions of same browser
import commands.maskmaker # generate a mask based on repeated scrapes
import commands.measure # measure length of time a page takes to load
import commands.scrape # scrape a URL or series of URLs to a bitmap
# The timeload command is obsolete (too flaky); it may be reinstated
# later but for now it's been superceded by "measure"
# import commands.timeload # measure length of time a page takes to load
def Scrape(browsers, urls, window_size=(1024, 768),
window_pos=(0, 0), timeout=20, save_path=None, **kwargs):
"""Invoke one or more browsers over one or more URLs, scraping renders.
Args:
browsers: browsers to invoke with optional version strings
urls: URLs to visit
window_size: size of the browser window to display
window_pos: location of browser window
timeout: time (in seconds) to wait for page to load
save_path: root of save path, automatically appended with browser and
version
kwargs: miscellaneous keyword args, passed to scraper
Returns:
None
@TODO(jhaas): more parameters, or perhaps an indefinite dictionary
parameter, for things like length of time to wait for timeout, speed
of mouse clicks, etc. Possibly on a per-browser, per-URL, or
per-browser-per-URL basis
"""
if type(browsers) in types.StringTypes: browsers = [browsers]
if save_path is None:
# default save path is "scrapes" off the current root
save_path = os.path.join(os.path.split(__file__)[0], "Scrapes")
for browser in browsers:
# Browsers should be tuples of (browser, version)
if type(browser) in types.StringTypes: browser = (browser, None)
scraper = scrapers.GetScraper(browser)
full_path = os.path.join(save_path, browser[0], scraper.version)
drivers.windowing.PreparePath(full_path)
scraper.Scrape(urls, full_path, window_size, window_pos, timeout, kwargs)
def Compare(base, compare, ops, root_path=None, out_path=None):
"""Compares a series of scrapes using a series of operators.
Args:
base: (browser, version) tuple of version to consider the baseline
compare: (browser, version) tuple of version to compare to
ops: list of operators plus operator arguments
root_path: root of the scrapes
out_path: place to put any output from the operators
Returns:
None
@TODO(jhaas): this method will likely change, to provide a robust and
well-defined way of chaining operators, applying operators conditionally,
and full-featured scripting of the operator chain. There also needs
to be better definition of the output; right now it's to stdout and
a log.txt file, with operator-dependent images saved for error output
"""
if root_path is None:
# default save path is "scrapes" off the current root
root_path = os.path.join(os.path.split(__file__)[0], "Scrapes")
if out_path is None:
out_path = os.path.join(os.path.split(__file__)[0], "Compares")
if type(base) in types.StringTypes: base = (base, None)
if type(compare) in types.StringTypes: compare = (compare, None)
if type(ops) in types.StringTypes: ops = [ops]
base_dir = os.path.join(root_path, base[0])
compare_dir = os.path.join(root_path, compare[0])
if base[1] is None:
# base defaults to earliest capture
base = (base[0], max(os.listdir(base_dir)))
if compare[1] is None:
# compare defaults to latest capture
compare = (compare[0], min(os.listdir(compare_dir)))
out_path = os.path.join(out_path, base[0], base[1], compare[0], compare[1])
drivers.windowing.PreparePath(out_path)
# TODO(jhaas): right now we're just dumping output to a log file
# (and the console), which works as far as it goes but isn't nearly
# robust enough. Change this after deciding exactly what we want to
# change it to.
out_file = open(os.path.join(out_path, "log.txt"), "w")
description_string = ("Comparing %s %s to %s %s" %
(base[0], base[1], compare[0], compare[1]))
out_file.write(description_string)
print(description_string)
base_dir = os.path.join(base_dir, base[1])
compare_dir = os.path.join(compare_dir, compare[1])
for filename in os.listdir(base_dir):
out_file.write("%s: " % filename)
if not os.path.isfile(os.path.join(compare_dir, filename)):
out_file.write("Does not exist in target directory\n")
print("File %s does not exist in target directory" % filename)
continue
base_filename = os.path.join(base_dir, filename)
compare_filename = os.path.join(compare_dir, filename)
for op in ops:
if type(op) in types.StringTypes: op = (op, None)
module = operators.GetOperator(op[0])
ret = module.Compare(base_filename, compare_filename)
if ret is None:
print("%s: OK" % (filename,))
out_file.write("OK\n")
else:
print("%s: %s" % (filename, ret[0]))
out_file.write("%s\n" % (ret[0]))
ret[1].save(os.path.join(out_path, filename))
out_file.close()
def main():
"""Main executable. Parse the command line and invoke the command."""
cmdline = command_line.CommandLine()
# The below two commands are currently unstable so have been disabled
# commands.compare2.CreateCommand(cmdline)
# commands.maskmaker.CreateCommand(cmdline)
commands.measure.CreateCommand(cmdline)
commands.scrape.CreateCommand(cmdline)
cmdline.ParseCommandLine()
return 0
if __name__ == "__main__":
sys.exit(main())
|