File: longwrds.awk

package info (click to toggle)
gawk 3.0.3-1
  • links: PTS
  • area: main
  • in suites: hamm, slink
  • size: 4,520 kB
  • ctags: 2,175
  • sloc: ansic: 20,481; awk: 2,476; yacc: 2,039; makefile: 877; sh: 409
file content (20 lines) | stat: -rw-r--r-- 455 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# From Gawk Manual modified by bug fix and removal of punctuation
# Record every word which is used at least once
{
	for (i = 1; i <= NF; i++) {
		tmp = tolower($i)
		if (0 != (pos = match(tmp, /([a-z]|-)+/)))
			used[substr(tmp, pos, RLENGTH)] = 1
	}
}

#Find a number of distinct words longer than 10 characters
END {
	num_long_words = 0
	for (x in used) 
		if (length(x) > 10) {
			++num_long_words
			print x
		}
	print num_long_words, "long words"
}