1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
|
require 'set'
require 'yaml'
require_relative '../helper'
require_relative 'human_typo'
# statistical tests on tree_spell algorithms
class ExploreTest < Test::Unit::TestCase
MINI_DIRECTORIES = YAML.load_file(File.expand_path('../fixtures/mini_dir.yml', __dir__))
RSPEC_DIRECTORIES = YAML.load_file(File.expand_path('../fixtures/rspec_dir.yml', __dir__))
def test_checkers_with_many_typos_on_mini
n_repeat = 10_000
many_typos n_repeat, MINI_DIRECTORIES, 'Minitest'
end
def test_checkers_with_many_typos_on_rspec
n_repeat = 10_000
many_typos n_repeat, RSPEC_DIRECTORIES, 'Rspec'
end
def test_human_typo
n_repeat = 10_000
total_changes = 0
word = 'any_string_that_is_40_characters_long_sp'
n_repeat.times do
word_error = TreeSpell::HumanTypo.new(word).call
total_changes += DidYouMean::Levenshtein.distance(word, word_error)
end
mean_changes = (total_changes.to_f / n_repeat).round(2)
puts ''
puts "HumanTypo mean_changes: #{mean_changes} with n_repeat: #{n_repeat}"
puts 'Expected mean_changes: 2.1 with n_repeat: 10000, plus/minus 0.03'
puts ''
end
def test_execution_speed
n_repeat = 1_000
puts ''
puts 'Testing execution time of Standard'
measure_execution_speed(n_repeat) do |files, error|
DidYouMean::SpellChecker.new(dictionary: files).correct error
end
puts ''
puts 'Testing execution time of Tree'
measure_execution_speed(n_repeat) do |files, error|
DidYouMean::TreeSpellChecker.new(dictionary: files).correct error
end
puts ''
puts 'Testing execution time of Augmented Tree'
measure_execution_speed(n_repeat) do |files, error|
DidYouMean::TreeSpellChecker.new(dictionary: files, augment: true).correct error
end
end
private
def measure_execution_speed(n_repeat, &block)
len = RSPEC_DIRECTORIES.length
start_time = Time.now
n_repeat.times do
word = RSPEC_DIRECTORIES[rand len]
word_error = TreeSpell::HumanTypo.new(word).call
block.call(RSPEC_DIRECTORIES, word_error)
end
time_ms = (Time.now - start_time).to_f * 1000 / n_repeat
puts "Average time (ms): #{time_ms.round(1)}"
end
def many_typos(n_repeat, files, title)
first_times = [0, 0, 0]
total_suggestions = [0, 0, 0]
total_failures = [0, 0, 0]
len = files.length
n_repeat.times do
word = files[rand len]
word_error = TreeSpell::HumanTypo.new(word).call
suggestions_a = group_suggestions word_error, files
check_first_is_right word, suggestions_a, first_times
check_no_suggestions suggestions_a, total_suggestions
check_for_failure word, suggestions_a, total_failures
end
print_results first_times, total_suggestions, total_failures, n_repeat, title
end
def group_suggestions(word_error, files)
a0 = DidYouMean::TreeSpellChecker.new(dictionary: files).correct word_error
a1 = ::DidYouMean::SpellChecker.new(dictionary: files).correct word_error
a2 = a0.empty? ? a1 : a0
[a0, a1, a2]
end
def check_for_failure(word, suggestions_a, total_failures)
suggestions_a.each_with_index.map do |a, i|
total_failures[i] += 1 unless a.include? word
end
end
def check_first_is_right(word, suggestions_a, first_times)
suggestions_a.each_with_index.map do |a, i|
first_times[i] += 1 if word == a.first
end
end
def check_no_suggestions(suggestions_a, total_suggestions)
suggestions_a.each_with_index.map do |a, i|
total_suggestions[i] += a.length
end
end
def print_results(first_times, total_suggestions, total_failures, n_repeat, title)
algorithms = ['Tree ', 'Standard ', 'Augmented']
print_header title
(0..2).each do |i|
ft = (first_times[i].to_f / n_repeat * 100).round(1)
mns = (total_suggestions[i].to_f / (n_repeat - total_failures[i])).round(1)
f = (total_failures[i].to_f / n_repeat * 100).round(1)
puts " #{algorithms[i]} #{' ' * 7} #{ft} #{' ' * 14} #{mns} #{' ' * 15} #{f} #{' ' * 16}"
end
end
def print_header(title)
puts "#{' ' * 30} #{title} Summary #{' ' * 31}"
puts '-' * 80
puts " Method | First Time (\%) Mean Suggestions Failures (\%) #{' ' * 13}"
puts '-' * 80
end
end
|