File: wordfreq.awk

package info (click to toggle)
gawk 1%3A4.2.1%2Bdfsg-1
  • links: PTS
  • area: main
  • in suites: buster
  • size: 20,880 kB
  • sloc: ansic: 50,919; awk: 12,043; yacc: 6,393; sh: 5,675; makefile: 2,856; sed: 128; csh: 6
file content (16 lines) | stat: -rw-r--r-- 347 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# wordfreq.awk --- print list of word frequencies

{
    $0 = tolower($0)    # remove case distinctions
    # remove punctuation
    gsub(/[^[:alnum:]_[:blank:]]/, "", $0)
    for (i = 1; i <= NF; i++)
        freq[$i]++
}

END {
    sort = "sort -k 2nr"
    for (word in freq)
        printf "%s\t%d\n", word, freq[word] | sort
    close(sort)
}