1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
|
# findDocWords.tcl --
#
# This script attempts to find all non-dictionary words in the Tcl or Tk
# documentation tree. It handles the fairly common compoundWord trick our
# docs use, and isn't confused by nroff formatting directives, so it isn't
# just a standard spell check.
#
# Arguments:
# 1: Directory to look for man source files in.
# 2: Path to a plain text dictionary. Try /usr/share/dict/words on Linux.
#
# Copyright © 2024 Donal K Fellows.
# See "license.terms" for the license.
lassign $argv dir dictionary
set f [open $dictionary]
while {[gets $f line] > 0} {
dict set realWord [string tolower $line] yes
}
close $f
puts "loaded [dict size $realWord] words from dictionary"
set files [glob -directory $dir {*.[13n]}]
set found {}
proc identifyWords {fragment filename} {
global realWord found
foreach frag [split [string map {\\fB "" \\fR "" \\fI "" \\fP "" \\0 _} $fragment] _] {
if {[string is entier $frag]} continue
set frag [string trim $frag "\\0123456789"]
if {$frag eq ""} continue
foreach word [regexp -all -inline {^[a-z]+|[A-Z][a-z]*} $frag] {
set word [string tolower $word]
if {![dict exists $realWord $word]} {
dict lappend found $word $filename
}
}
}
}
foreach fn $files {
set f [open $fn]
foreach word [regexp -all -inline {[\\\w]+} [read $f]] {
identifyWords $word $fn
}
close $f
}
set len [tcl::mathfunc::max {*}[lmap word [dict keys $found] {string length $word}]]
foreach word [lsort [dict keys $found]] {
puts [format "%-${len}s: %s" $word [lindex [dict get $found $word] 0]]
}
|