File: target.rb

package info (click to toggle)
whatweb 0.4.8~git20141014-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 8,456 kB
  • ctags: 706
  • sloc: ruby: 31,354; sh: 614; makefile: 39
file content (266 lines) | stat: -rw-r--r-- 7,177 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
class Target
	attr_reader :target
	attr_reader :uri, :status, :ip, :body, :headers, :raw_headers, :raw_response
	attr_reader :cookies
	attr_reader :md5sum
	attr_reader :tag_pattern
	attr_reader :is_url, :is_file
	attr_accessor :http_options

	@@meta_refresh_regex=/<meta[\s]+http\-equiv[\s]*=[\s]*['"]?refresh['"]?[^>]+content[\s]*=[^>]*[0-9]+;[\s]*url=['"]?([^"'>]+)['"]?[^>]*>/i


	def inspect
	#	"#{target} " + [@uri,@status,@ip,@body,@headers,@raw_headers,@raw_response,@cookies,@md5sum,@tag_pattern,@is_url,@is_file].join(",")
		"URI\n#{'*' * 40}\n#{@uri}" +
		"status\n#{'*' * 40}\n#{@status}" +
		"ip\n#{'*' * 40}\n#{@ip}" +
		"header\n#{'*' * 40}\n#{@headers}" +
		"cookies\n#{'*' * 40}\n#{@cookies}" +
		"raw_headers\n#{'*' * 40}\n#{@raw_headers}" +
		"raw_response\n#{'*' * 40}\n#{@raw_response}" +
		"body\n#{'*' * 40}\n#{@body}" +
		"md5sum\n#{'*' * 40}\n#{@md5sum}" +
		"tag_pattern\n#{'*' * 40}\n#{@tag_pattern}" +
		"is_url\n#{'*' * 40}\n#{@is_url}" +
		"is_file\n#{'*' * 40}\n#{@is_file}"
	end

	def to_s
		@target
	end

	def Target.meta_refresh_regex
		@@meta_refresh_regex
	end
	def is_file?
		@is_file
	end

	def is_url?
		@is_url
	end

	def initialize(target=nil)
		@target=target
		@headers={}
		@http_options={:method=>"GET"}
#		@status=0

		if @target =~ /^http[s]?:\/\//
			@is_url=true 
		else
			@is_url=false
		end

		if File.exists?(@target)
			@is_file=true
			if File.directory?(@target)
				raise "Error: #{@target} is a directory"
			end
			if File.readable?(@target) == false
				raise "Error: You do not have permission to view #{@target}"
			end
		else
			@is_file=false
		end

		if self.is_url?
			@uri=URI.parse(URI.encode(@target))

			# is this taking control away from the user?
			# [400] http://www.alexa.com  [200] http://www.alexa.com/
			@uri.path = "/" if @uri.path.empty?
		else
			# @uri=URI.parse("file://"+@target)
			@uri=URI.parse("")
		end
	end

	def open
		if self.is_file?
			open_file
		else
			open_url(@http_options)
		end

		## after open 
		if @body.nil?
			# Initialize @body variable if the connection is terminated prematurely
			# This is usually caused by HTTP status codes: 101, 102, 204, 205, 305
			@body=""
		else
			@md5sum=Digest::MD5.hexdigest(@body)
			@tag_pattern = make_tag_pattern(@body)
			if @raw_headers
				@raw_response = @raw_headers + @body
			else
				@raw_response = @body
				@raw_headers = ""
				@cookies=[]
			end
		end
	end


	def open_file
	begin
		# target is a file
		@body=File.open(@target).read

		# target is a http packet file
		if @body =~ /^HTTP\/1\.\d [\d]{3} (.+)\r\n\r\n/m
			# extract http header
			@headers=Hash.new	
			pageheaders = body.to_s.split(/\r\n\r\n/).first.to_s.split(/\r\n/)
			@raw_headers = pageheaders.join("\n") + "\r\n\r\n"
			@status = pageheaders.first.scan(/^HTTP\/1\.\d ([\d]{3}) /).flatten.first.to_i
			@cookies=[]
			for k in 1...pageheaders.length
				section=pageheaders[k].split(/:/).first.to_s.downcase
				if section =~ /^set-cookie$/i
					@cookies << pageheaders[k].scan(/:[\s]*(.+)$/).flatten.first
				else
					@headers[section]=pageheaders[k].scan(/:[\s]*(.+)$/).flatten.first
				end
			end
			@headers["set-cookie"] = @cookies.join("\n") unless @cookies.nil? or @cookies.empty?
			# extract html source
			if @body =~ /^HTTP\/1\.\d [\d]{3} .+?\r\n\r\n(.+)/m
				@body = @body.scan(/^HTTP\/1\.\d [\d]{3} .+?\r\n\r\n(.+)/m).flatten.first
			end
		end
	rescue => err
		raise
	end
	end

	def open_url(options)
		begin
			if $USE_PROXY == true
				http=ExtendedHTTP::Proxy($PROXY_HOST,$PROXY_PORT, $PROXY_USER, $PROXY_PASS).new(@uri.host,@uri.port)
			else
				http=ExtendedHTTP.new(@uri.host,@uri.port)
			end

			# set timeouts
			http.open_timeout = $HTTP_OPEN_TIMEOUT
			http.read_timeout = $HTTP_READ_TIMEOUT

			# if it's https://
			# i wont worry about certificates, verfication, etc
			if @uri.class == URI::HTTPS
				http.use_ssl = true
				http.verify_mode = OpenSSL::SSL::VERIFY_NONE
			end
			
			getthis = @uri.path + (@uri.query.nil? ? "" : "?" + @uri.query)
			req=nil

			if options[:method] == "GET"
				req=Net::HTTP::Get.new(getthis, $CUSTOM_HEADERS)
			end
			if options[:method] == "HEAD"
				req=Net::HTTP::Head.new(getthis, $CUSTOM_HEADERS)
			end		
			if options[:method] == "POST"
				req=Net::HTTP::Post.new(getthis, $CUSTOM_HEADERS)
	                        req.set_form_data(options[:data])
			end


			if $BASIC_AUTH_USER	
				req.basic_auth $BASIC_AUTH_USER, $BASIC_AUTH_PASS
			end
			res=http.request(req)
			@raw_headers=http.raw.join("\n")
			@headers={}; res.each_header {|x,y| @headers[x]=y }
			@headers["set-cookie"] = res.get_fields('set-cookie').join("\n") unless @headers["set-cookie"].nil?
			@body=res.body
# body is ASCII-8BIT at this stage

			@status=res.code.to_i
			puts @uri.to_s + " [#{status}]" if  $verbose > 0 

=begin
if @raw_headers =~ /^Server:.*^Server:/m
	puts "raw_headers-"*20
	puts @raw_headers
	exit
end
puts @raw_headers+"\n"+"*"*40
=end

		rescue SocketError => err
			error(@target + " ERROR: Socket error #{err}")
			return
		rescue TimeoutError => err
			error(@target + " ERROR: Timed out #{err}")
			return
		rescue Errno::ETIMEDOUT	=>err # for ruby 1.8.7 patch level 249
			error(@target + " ERROR: Timed out (ETIMEDOUT) #{err}")
			return
		rescue EOFError => err
			error(@target + " ERROR: EOF error #{err}")
			return
		rescue StandardError => err		
			err = "Not HTTP or cannot resolve hostname" if err.to_s == "undefined method `closed?' for nil:NilClass"
			error(@target + " ERROR: #{err}")
			return
		rescue => err
			error(@target + " ERROR: #{err}")
			return
		end

		begin
			@ip=IPSocket.getaddress(@uri.host)
		rescue StandardError => err		
			err = "Cannot resolve hostname" if err.to_s == "undefined method `closed?' for nil:NilClass"
			error(@target + " ERROR: #{err}")
			return
		end
	end


	def get_redirection_target
		newtarget_m=nil
		newtarget_h=nil
		newtarget=nil			

		if @@meta_refresh_regex =~ @body
			metarefresh=@body.scan(@@meta_refresh_regex).flatten.first
			metarefresh=decode_html_entities(metarefresh)
			newtarget_m=URI.join(@target,metarefresh).to_s # this works for relative and absolute
		end

		unless @status.nil? or @headers.nil?
			newtarget_h=URI.join(@target,@headers["location"]).to_s if (300..399) === @status and @headers["location"]
		end

		# if both meta refresh location and HTTP location are set, then the HTTP location overrides
		if newtarget_m or newtarget_h
			case $FOLLOW_REDIRECT
			when "never"
				no_redirects=true # this never gets back to main loop but no prob
			when "http-only"
				newtarget = newtarget_h
			when "meta-only"
				newtarget = newtarget_m
			when "same-site"
				newtarget = (newtarget_h or newtarget_m) if URI.parse((newtarget_h or newtarget_m)).host == @uri.host # defaults to _h if both are present
			when "same-domain"
				newtarget = (newtarget_h or newtarget_m) if TLD.same_domain?(
					@uri.host, URI.parse((newtarget_h or newtarget_m)).host)
			when "always"
				newtarget = (newtarget_h or newtarget_m)
			else
				error("Error: Invalid REDIRECT mode")
			end
		end
		newtarget=nil if newtarget == @uri.to_s # circular redirection not allowed

		newtarget
	end

end