File: alta

package info (click to toggle)
netcat 1.10-8
  • links: PTS
  • area: main
  • in suites: hamm, slink
  • size: 304 kB
  • ctags: 210
  • sloc: ansic: 1,667; sh: 737; makefile: 101
file content (33 lines) | stat: -rwxr-xr-x 1,140 bytes parent folder | download | duplicates (21)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#! /bin/sh
## special handler for altavista, since they only hand out chunks of 10 at
## a time.  Tries to isolate out results without the leading/trailing trash.
## multiword arguments are foo+bar, as usual.
## Second optional arg switches the "what" field, to e.g. "news"

test "${1}" = "" && echo 'Needs an argument to search for!' && exit 1
WHAT="web"
test "${2}" && WHAT="${2}"

# convert multiple args
PLUSARG="`echo $* | sed 's/ /+/g'`"

# Plug in arg.  only doing simple-q for now; pg=aq for advanced-query
# embedded quotes define phrases; otherwise it goes wild on multi-words
QB="GET /cgi-bin/query?pg=q&what=${WHAT}&fmt=c&q=\"${PLUSARG}\""

# ping 'em once, to get the routing warm
nc -z -w 8 www.altavista.digital.com 24015 2> /dev/null
echo "=== Altavista ==="

for xx in 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 \
  190 200 210 220 230 240 250 260 270 280 290 300 310 320 330 340 350 ; do
  echo "${QB}&stq=${xx}" | nc -w 15 www.altavista.digital.com 80 | \
  egrep '^<a href="http://'
done

exit 0

# old filter stuff
  sed -e '/Documents .* matching .* query /,/query?.*stq=.* Document/p' \
  -e d