1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
|
#
# robots.txt
#
# This file is to prevent the crawling and indexing of certain parts
# of your site by web crawlers and spiders run by sites like Yahoo!
# and Google. By telling these "robots" where not to go on your site,
# you save bandwidth and server resources.
#
# This file will be ignored unless it is at the root of your host:
# Used: http://example.com/robots.txt
# Ignored: http://example.com/site/robots.txt
#
# For more information about the robots.txt standard, see:
# http://www.robotstxt.org/robotstxt.html
User-agent: *
# Directories
Disallow: */includes/
Disallow: */misc/
Disallow: */modules/
Disallow: */profiles/
Disallow: */scripts/
Disallow: */themes/
Disallow: */internaluseronly/
# Files
Disallow: */CHANGELOG.txt
Disallow: */cron.php
Disallow: */INSTALL.mysql.txt
Disallow: */INSTALL.pgsql.txt
Disallow: */INSTALL.sqlite.txt
Disallow: */install.php
Disallow: */INSTALL.txt
Disallow: */LICENSE.txt
Disallow: */MAINTAINERS.txt
Disallow: */update.php
Disallow: */UPGRADE.txt
Disallow: */xmlrpc.php
# Paths (clean URLs)
Disallow: */admin/
Disallow: */comment/reply/
Disallow: */content/
Disallow: */file/
Disallow: */filter/tips/
Disallow: */node/
Disallow: */search
Disallow: */user/register/
Disallow: */user/password/
Disallow: */user/login/
Disallow: */user/logout/
# Paths (no clean URLs)
Disallow: */?q=admin/
Disallow: */?q=comment/reply/
Disallow: */?q=content/
Disallow: */?q=file/
Disallow: */?q=filter/tips/
Disallow: */?q=node/
Disallow: */?q=search/
Disallow: */?q=user/password/
Disallow: */?q=user/register/
Disallow: */?q=user/login/
Disallow: */?q=user/logout/
# Sitemap details.
Sitemap: https://www.nbc.com/sitemap.xml
Sitemap: https://www.nbc.com/video_sitemap.xml
# Sitemap for the Google PlayGuide.
Sitemap: https://api.nbc.com/googlePlayGuide/feed.json
|