File: wordfreq.awk

package info (click to toggle)
gawk 1%3A4.1.4%2Bdfsg-1
  • links: PTS
  • area: main
  • in suites: stretch
  • size: 16,836 kB
  • ctags: 5,783
  • sloc: ansic: 48,799; awk: 11,167; yacc: 5,939; sh: 5,579; makefile: 2,554; sed: 121
file content (16 lines) | stat: -rw-r--r-- 347 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# wordfreq.awk --- print list of word frequencies

{
    $0 = tolower($0)    # remove case distinctions
    # remove punctuation
    gsub(/[^[:alnum:]_[:blank:]]/, "", $0)
    for (i = 1; i <= NF; i++)
        freq[$i]++
}

END {
    sort = "sort -k 2nr"
    for (word in freq)
        printf "%s\t%d\n", word, freq[word] | sort
    close(sort)
}