File: VerifyURLs.py

package info (click to toggle)

insighttoolkit5 5.4.5-1

links: PTS, VCS
area: main
in suites: forky
size: 704,588 kB
sloc: cpp: 784,579; ansic: 628,724; xml: 44,704; fortran: 34,250; python: 22,934; sh: 4,078; pascal: 2,636; lisp: 2,158; makefile: 461; yacc: 328; asm: 205; perl: 203; lex: 146; tcl: 132; javascript: 98; csh: 81

file content (80 lines) | stat: -rwxr-xr-x 2,323 bytes

parent folder | download | duplicates (3)

#!/usr/bin/env python


import sys
import re
import httplib
import os

# compile regular expression to pull out URLs in ITK
# The ignored characters an common delineators, and not strick to the standard
http_re = re.compile(r'(http://[^\s<>\{\}\|\]\[\)\("]*)')
http_dict = dict()

for arg in sys.argv[1:]:
    if not os.path.isfile(arg):
        continue
    f = open(arg)
    for l in f.readlines():
        mo = http_re.search(l)
        if mo is not None:
            http_dict[mo.group(1)] = arg

    f.close()

if len(http_dict) > 1:
    print("Found ", len(http_dict), " unique URLS.")

# compile regular expression to pull out the server address and path
server_re = re.compile(r"http://([^/]+)(/?[^\s]*)")

for url, filename in http_dict.items():
    mo = server_re.search(url)
    server = mo.group(1)
    path = mo.group(2)

    try:
        # print("Verifying URL: ", url,)

        # connect to server and get the path
        conn = httplib.HTTPConnection(server)
        conn.request("GET", path)
        r1 = conn.getresponse()

        if r1.status == httplib.OK:
            # URL is OK do nothing
            # print("   URL: ", url, r1.status, r1.reason)
            pass
        elif r1.status == httplib.MOVED_PERMANENTLY:
            print(filename, ": ERROR (URL needs update): ", url)
            print(r1.status, r1.reason, " to: ", r1.getheader("location"))
        elif r1.status == httplib.FOUND:
            print(
                filename,
                ": INFO URL: ",
                url,
                r1.status,
                r1.reason,
                " to: ",
                r1.getheader("location"),
            )
            pass
        elif r1.status == httplib.FORBIDDEN:
            print(filename, ": INFO URL: ", url, r1.status, r1.reason)
            pass
        elif r1.status == httplib.NOT_FOUND:
            print(filename, ": ERROR URL: ", url, r1.status, r1.reason)
        else:
            print(filename, ": UNKNOWN URL: ", url, '"', r1.status, '"', r1.reason)
            pass

    except Exception as e:
        print()
        print(filename, ": ERROR (exception): ", url)
        print(e)
    except:
        print(filename, ": ERROR (exception): ", url)
        print("Unexpected error:", sys.exc_info()[0])
        raise
    finally:
        conn.close()