File: VerifyURLs.py

package info (click to toggle)
insighttoolkit4 4.13.3withdata-dfsg1-4
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 489,260 kB
  • sloc: cpp: 557,342; ansic: 146,850; fortran: 34,788; python: 16,572; sh: 2,187; lisp: 2,070; tcl: 993; java: 362; perl: 200; makefile: 129; csh: 81; pascal: 69; xml: 19; ruby: 10
file content (75 lines) | stat: -rwxr-xr-x 2,257 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python

from __future__ import print_function

import sys
import re
import httplib
import os

# compile regular expression to pull out URLs in ITK
# The ignored characters an common delineators, and not strick to the standard
http_re = re.compile("(http://[^\s<>\{\}\|\]\[\)\(\"]*)")
http_dict = dict()

for arg in sys.argv[1:]:
    if not os.path.isfile( arg ):
        continue
    f = open( arg, "r" )
    for l in  f.readlines():
        mo =  http_re.search( l )
        if mo is not None:
            http_dict[mo.group(1)] = arg

    f.close()

if len( http_dict ) > 1:
    print("Found ", len( http_dict ), " unique URLS.")

# compile regular expression to pull out the server address and path
server_re = re.compile( "http://([^/]+)(/?[^\s]*)" )

for url,filename in http_dict.items():
    mo = server_re.search( url )
    server = mo.group( 1 )
    path = mo.group( 2 )

    try:
        #print("Verifying URL: ", url,)

        # connect to server and get the path
        conn = httplib.HTTPConnection( server )
        conn.request("GET", path )
        r1 = conn.getresponse()


        if  r1.status == httplib.OK:
            # URL is OK do nothing
            #print("   URL: ", url, r1.status, r1.reason)
            pass
        elif r1.status == httplib.MOVED_PERMANENTLY:
            print(filename,": ERROR (URL needs update): ", url)
            print(r1.status, r1.reason, " to: ", r1.getheader("location"))
        elif r1.status == httplib.FOUND:
            print(filename,": INFO URL: ", url, r1.status, r1.reason, " to: ", r1.getheader("location"))
            pass
        elif r1.status == httplib.FORBIDDEN:
            print(filename,": INFO URL: ", url, r1.status, r1.reason)
            pass
        elif r1.status == httplib.NOT_FOUND:
            print(filename,": ERROR URL: ", url, r1.status, r1.reason)
        else:
            print(filename, ": UNKNOWN URL: ", url, "\"", r1.status, "\"", r1.reason)
            pass


    except Exception as e:
        print()
        print(filename,": ERROR (exception): ", url)
        print(e)
    except:
        print(filename,": ERROR (exception): ", url)
        print("Unexpected error:", sys.exc_info()[0])
        raise
    finally:
        conn.close()