1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
|
# robots.txt for http://clinicaltrials.gov
# $Revision: 187936 $
#
User-Agent: *
Disallow: /cs/ # obsolete
Disallow: /c/ # obsolete
## Keep /ct/ # it is obsolete, but we handle it well
Disallow: /cta/ # same info available elsewhere
Disallow: /beta/ # don't want persistent links to beta site
Disallow: /ct2/archive/ # leaving this site to archive page
Disallow: /ct2/bye/ # leaving this site to 3rd party page
Disallow: /ct2/helpdesk/ # helpdesk form
Disallow: /ct2/results/rss.xml # rss feed of current search results
Disallow: /*?displayxml=true # don't index xml files
Disallow: /*?resultsxml=true # don't index xml files
# Don't index archive site, don't want it found (and ranked highly) by the
# likes of Google since we really want people to come to the main site.
Disallow: /archive/ # don't index archive site
# Some docs say must be a whole number of seconds.
# Crawlers differ whether this means delay-between-calls or whether it is max-requests-in-window.
# Google ignore this value, but supposedly honored by yandex and bing.
Crawl-delay: 1
# crawlers are advised to avoid our browse hierarchy because
# it finds all studies many times, with slightly different URL's
# and term highlighting. Much better if crawlers find the studies
# using the crawler link: /ct2/crawl
|