File: wordfreq.awk

package info (click to toggle)
gawk 3.0.3-1
  • links: PTS
  • area: main
  • in suites: hamm, slink
  • size: 4,520 kB
  • ctags: 2,175
  • sloc: ansic: 20,481; awk: 2,476; yacc: 2,039; makefile: 877; sh: 409
file content (13 lines) | stat: -rw-r--r-- 316 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
# Print list of word frequencies
{
    $0 = tolower($0)    # remove case distinctions
    gsub(/[^a-z0-9_ \t]/, "", $0)  # remove punctuation
    for (i = 1; i <= NF; i++)
        freq[$i]++
}
END {
    sort = "sort +1 -nr"
    for (word in freq)
        printf "%s\t%d\n", word, freq[word] | sort
    close(sort)
}