File: wordfreq.awk

package info (click to toggle)
gawk 1%3A4.1.1%2Bdfsg-1
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 15,604 kB
  • ctags: 5,728
  • sloc: ansic: 47,935; sh: 12,995; awk: 10,263; yacc: 5,815; makefile: 2,279; sed: 121
file content (16 lines) | stat: -rw-r--r-- 347 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# wordfreq.awk --- print list of word frequencies

{
    $0 = tolower($0)    # remove case distinctions
    # remove punctuation
    gsub(/[^[:alnum:]_[:blank:]]/, "", $0)
    for (i = 1; i <= NF; i++)
        freq[$i]++
}

END {
    sort = "sort -k 2nr"
    for (word in freq)
        printf "%s\t%d\n", word, freq[word] | sort
    close(sort)
}