File: robots.txt.rb

package info (click to toggle)
whatweb 0.6.1-1
  • links: PTS
  • area: main
  • in suites: forky, sid
  • size: 23,948 kB
  • sloc: ruby: 43,493; sh: 213; makefile: 41
file content (49 lines) | stat: -rw-r--r-- 1,150 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
##
# This file is part of WhatWeb and may be subject to
# redistribution and commercial restrictions. Please see the WhatWeb
# web site for more information on licensing and terms of use.
# https://morningstarsecurity.com/research/whatweb
##
Plugin.define do
name "robots_txt"
authors [
  "Brendan Coles <bcoles@gmail.com>", # 2010-10-22
  # v0.2 # Added aggressive `/robots.txt` retrieval. 
  # v0.3 # 2011-03-23 # Removed aggressive section. 
]
version "0.3"
description "This plugin identifies robots.txt files and extracts both allowed and disallowed directories. - More Info: http://www.robotstxt.org/"

# Google results as at 2011-03-23 #
# 920 for inurl:robots.txt filetype:txt



# Passive #
passive do
	m=[]

	# Extract directories if current file is robots.txt
	if @base_uri.path == "/robots.txt" and @body =~ /^User-agent:/i

		# File Exists
		m << { :name=>"File Exists" }

		# Disallow
		if @body =~ /^Disallow:[\s]*(.+)$/i
			m << { :string=>@body.scan(/^Disallow:[\s]*(.+)/i) }
		end

		# Allow
		if @body =~ /^Allow:[\s]*(.+)$/i
			m << { :string=>@body.scan(/^Allow:[\s]*(.+)/i) }
		end

	end

	# Return passive matches
	m
end

end