File: get-pattern

package info (click to toggle)
whatweb 0.4.8~git20120606-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 7,956 kB
  • sloc: ruby: 53,738; sh: 577; makefile: 34
file content (74 lines) | stat: -rwxr-xr-x 1,499 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env ruby
# Page Pattern Generator
#  Aung Khant, http://yehg.net

# Feed a url
# Get its tag pattern that's ready to use in your plugin

# Codes taken from Andrew Horton

require 'net/http'
require 'net/https'
require 'open-uri'

if RUBY_VERSION =~ /^1.9/
        require 'digest/md5'
else
        require 'md5'
end

# from tag_pattern.rb
def pg_tag_pattern(b)
# remove stuff between script and /script
# don't bother with  !--, --> or noscript and /noscript
    inscript=false;
	
    tag_pattern = b.scan(/<([^\s>]*)/).flatten.map {|x| x.downcase!; r=nil;
		r=x if inscript==false
		inscript=true if x=="script"
		(inscript=false; r=x) if x=="/script"
		r
	}.compact.join(",")
end

def usage
    puts "Usage: ./page-pattern http://www.example.com/\n"
end

   
if ARGV.length <1
	usage
	exit
end

url= ARGV[0].to_s 

url = 'http://' + url if url !~ /^http(s)?:\/\//
uri = URI.parse(url)
uri.path += '/' if uri.path.size == 0
http = Net::HTTP.new(uri.host,uri.port)
http.open_timeout=180
http.read_timeout=180
http.use_ssl= true if uri.scheme == "https"
http.verify_mode = OpenSSL::SSL::VERIFY_NONE if uri.scheme == "https"
 
begin
	req,body = http.get(uri.path)
rescue
	raise "Cannot resolve or connect to #{uri.to_s}."
end

puts "
== Page Pattern Generator 0.1 for WhatWeb == 
	by Aung Khant, http://yehg.net

"

puts "URL: #{uri.to_s}

{:name=>'HTML Tag Pattern', :tagpattern=>'" + pg_tag_pattern(body) + "'},\n
{:name=>'MD5 hash', :md5=>'"+  Digest::MD5.hexdigest(body) + "'}\n\n"