File: urlchk.awk

package info (click to toggle)
gawk 1%3A4.1.1%2Bdfsg-1
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 15,604 kB
  • ctags: 5,728
  • sloc: ansic: 47,935; sh: 12,995; awk: 10,263; yacc: 5,815; makefile: 2,279; sed: 121
file content (28 lines) | stat: -rw-r--r-- 1,053 bytes parent folder | download | duplicates (12)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
BEGIN {
  if (ARGC != 2) {
    print "URLCHK - check if URLs have changed"
    print "IN:\n    the file with URLs as a command-line parameter"
    print "    file contains URL, old length, new length"
    print "PARAMS:\n    -v Proxy=MyProxy -v ProxyPort=8080"
    print "OUT:\n    same as file with URLs"
    print "JK 02.03.1998"
    exit
  }
  URLfile = ARGV[1]; ARGV[1] = ""
  if (Proxy     != "") Proxy     = " -v Proxy="     Proxy
  if (ProxyPort != "") ProxyPort = " -v ProxyPort=" ProxyPort
  while ((getline < URLfile) > 0)
     Length[$1] = $3 + 0
  close(URLfile)      # now, URLfile is read in and can be updated
  GetHeader = "gawk " Proxy ProxyPort " -v Method=\"HEAD\" -f geturl.awk "
  for (i in Length) {
    GetThisHeader = GetHeader i " 2>&1"
    while ((GetThisHeader | getline) > 0)
      if (toupper($0) ~ /CONTENT-LENGTH/) NewLength = $2 + 0
    close(GetThisHeader)
    print i, Length[i], NewLength > URLfile
    if (Length[i] != NewLength)  # report only changed URLs
      print i, Length[i], NewLength
  }
  close(URLfile)
}