1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
|
# Directions for robots. See this URL:
# http://www.robotstxt.org/robotstxt.html
# for a description of the file format.
User-agent: HTTrack
User-agent: puf
User-agent: MSIECrawler
Disallow: /
# The Krugle web crawler (though based on Nutch) is OK.
User-agent: Krugle
Allow: /
Disallow: /~guido/orlijn/
Disallow: /webstats/
# No one should be crawling us with Nutch.
User-agent: Nutch
Disallow: /
# Hide old versions of the documentation and various large sets of files.
User-agent: *
Disallow: /~guido/orlijn/
Disallow: /webstats/
|