1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
|
#!/usr/bin/env python
import sys
import re
import httplib
import os
# compile regular expression to pull out URLs in ITK
# The ignored characters an common delineators, and not strick to the standard
http_re = re.compile(r'(http://[^\s<>\{\}\|\]\[\)\("]*)')
http_dict = dict()
for arg in sys.argv[1:]:
if not os.path.isfile(arg):
continue
f = open(arg)
for l in f.readlines():
mo = http_re.search(l)
if mo is not None:
http_dict[mo.group(1)] = arg
f.close()
if len(http_dict) > 1:
print("Found ", len(http_dict), " unique URLS.")
# compile regular expression to pull out the server address and path
server_re = re.compile(r"http://([^/]+)(/?[^\s]*)")
for url, filename in http_dict.items():
mo = server_re.search(url)
server = mo.group(1)
path = mo.group(2)
try:
# print("Verifying URL: ", url,)
# connect to server and get the path
conn = httplib.HTTPConnection(server)
conn.request("GET", path)
r1 = conn.getresponse()
if r1.status == httplib.OK:
# URL is OK do nothing
# print(" URL: ", url, r1.status, r1.reason)
pass
elif r1.status == httplib.MOVED_PERMANENTLY:
print(filename, ": ERROR (URL needs update): ", url)
print(r1.status, r1.reason, " to: ", r1.getheader("location"))
elif r1.status == httplib.FOUND:
print(
filename,
": INFO URL: ",
url,
r1.status,
r1.reason,
" to: ",
r1.getheader("location"),
)
pass
elif r1.status == httplib.FORBIDDEN:
print(filename, ": INFO URL: ", url, r1.status, r1.reason)
pass
elif r1.status == httplib.NOT_FOUND:
print(filename, ": ERROR URL: ", url, r1.status, r1.reason)
else:
print(filename, ": UNKNOWN URL: ", url, '"', r1.status, '"', r1.reason)
pass
except Exception as e:
print()
print(filename, ": ERROR (exception): ", url)
print(e)
except:
print(filename, ": ERROR (exception): ", url)
print("Unexpected error:", sys.exc_info()[0])
raise
finally:
conn.close()
|