File: www.open.ac.uk

package info (click to toggle)
python-protego 0.5.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 30,052 kB
  • sloc: python: 1,579; perl: 190; cpp: 33; sh: 4; makefile: 3
file content (49 lines) | stat: -rw-r--r-- 1,487 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# 
# This file is to prevent the crawling and indexing of certain parts
# of our site by web crawlers and spiders run by sites like Google. 
# By telling these "robots" where not to go on the site,
# we save bandwidth and server resources.
#
# For more information about the robots.txt standard, see:
# http://www.robotstxt.org/wc/robots.html
User-agent: rightnow_webindexer # RightNow # CUSTOM
User-agent: *    # applies to all robots
Disallow: /*cgi-bin*
Disallow: /*CFIDE*                 
# feeds
Disallow: /*feed-items*
Disallow: /*feed=*
Disallow: /library/news/feed*
Disallow: /libraryservices/feeds*
Disallow: /*feed?*
Disallow: /*Tooltip-feed-atom*      
# search results
Disallow: /library/digital-archive/search*
Disallow: /Arts/reading/UK/search_basic_results*
Disallow: /Arts/reading/UK/browse_reader*
Disallow: /libraryservices/beta/search/*
Disallow: /outbound/article/*
# Paths
Disallow: /author/admin/
Disallow: /libraryservices/feedback/poll/*
Disallow: /*hello-world
# parameters
Disallow: /*sort=*
Disallow: /*URL=*
Disallow: /*url=*
Disallow: /*MEDIA=*
Disallow: /*KWCAMPAIGN=*
Disallow: /*CATCODE=*
Disallow: /*payments?rid=*
Disallow: /*replytocom*
Disallow: /*attachment_id=*
Disallow: /*ajaxCalendar=*
Disallow: /*timein=*
Disallow: /*field_category_value*
Disallow: /*pid=*
Disallow: /*tag=*
# wikis
Disallow: /wikis/PIRATE/*
NoIndex: /wikis/PIRATE/*
Disallow: /wikis/IET-Intranet*
NoIndex: /wikis/IET-Intranet*