File: wordfreq.awk

package info (click to toggle)
gawk 1%3A3.1.5.dfsg-4
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 9,144 kB
  • ctags: 3,545
  • sloc: ansic: 31,602; awk: 4,970; sh: 4,409; yacc: 2,634; makefile: 1,358; sed: 16
file content (20 lines) | stat: -rw-r--r-- 422 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# wordfreq.awk --- print list of word frequencies

{
    $0 = tolower($0)    # remove case distinctions
    # remove punctuation
    gsub(/[^[:alnum:]_[:blank:]]/, "", $0)
    for (i = 1; i <= NF; i++)
        freq[$i]++
}

END {
    for (word in freq)
        printf "%s\t%d\n", word, freq[word]
}
END {
    sort = "sort -k 2nr"
    for (word in freq)
        printf "%s\t%d\n", word, freq[word] | sort
    close(sort)
}