1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
|
#!/bin/bash
# wgetter2.bash
# Author: Little Monster [monster@monstruum.co.uk]
# ==> Used in ABS Guide with permission of script author.
# ==> This script still needs debugging and fixups (exercise for reader).
# ==> It could also use some additional editing in the comments.
# This is wgetter2 --
#+ a Bash script to make wget a bit more friendly, and save typing.
# Carefully crafted by Little Monster.
# More or less complete on 02/02/2005.
# If you think this script can be improved,
#+ email me at: monster@monstruum.co.uk
# ==> and cc: to the author of the ABS Guide, please.
# This script is licenced under the GPL.
# You are free to copy, alter and re-use it,
#+ but please don't try to claim you wrote it.
# Log your changes here instead.
# =======================================================================
# changelog:
# 07/02/2005. Fixups by Little Monster.
# 02/02/2005. Minor additions by Little Monster.
# (See after # +++++++++++ )
# 29/01/2005. Minor stylistic edits and cleanups by author of ABS Guide.
# Added exit error codes.
# 22/11/2004. Finished initial version of second version of wgetter:
# wgetter2 is born.
# 01/12/2004. Changed 'runn' function so it can be run 2 ways --
# either ask for a file name or have one input on the CL.
# 01/12/2004. Made sensible handling of no URL's given.
# 01/12/2004. Made loop of main options, so you don't
# have to keep calling wgetter 2 all the time.
# Runs as a session instead.
# 01/12/2004. Added looping to 'runn' function.
# Simplified and improved.
# 01/12/2004. Added state to recursion setting.
# Enables re-use of previous value.
# 05/12/2004. Modified the file detection routine in the 'runn' function
# so it's not fooled by empty values, and is cleaner.
# 01/02/2004. Added cookie finding routine from later version (which
# isn't ready yet), so as not to have hard-coded paths.
# =======================================================================
# Error codes for abnormal exit.
E_USAGE=67 # Usage message, then quit.
E_NO_OPTS=68 # No command-line args entered.
E_NO_URLS=69 # No URLs passed to script.
E_NO_SAVEFILE=70 # No save filename passed to script.
E_USER_EXIT=71 # User decides to quit.
# Basic default wget command we want to use.
# This is the place to change it, if required.
# NB: if using a proxy, set http_proxy = yourproxy in .wgetrc.
# Otherwise delete --proxy=on, below.
# ====================================================================
CommandA="wget -nc -c -t 5 --progress=bar --random-wait --proxy=on -r"
# ====================================================================
# --------------------------------------------------------------------
# Set some other variables and explain them.
pattern=" -A .jpg,.JPG,.jpeg,.JPEG,.gif,.GIF,.htm,.html,.shtml,.php"
# wget's option to only get certain types of file.
# comment out if not using
today=`date +%F` # Used for a filename.
home=$HOME # Set HOME to an internal variable.
# In case some other path is used, change it here.
depthDefault=3 # Set a sensible default recursion.
Depth=$depthDefault # Otherwise user feedback doesn't tie in properly.
RefA="" # Set blank referring page.
Flag="" # Default to not saving anything,
#+ or whatever else might be wanted in future.
lister="" # Used for passing a list of urls directly to wget.
Woptions="" # Used for passing wget some options for itself.
inFile="" # Used for the run function.
newFile="" # Used for the run function.
savePath="$home/w-save"
Config="$home/.wgetter2rc"
# This is where some variables can be stored,
#+ if permanently changed from within the script.
Cookie_List="$home/.cookielist"
# So we know where the cookies are kept . . .
cFlag="" # Part of the cookie file selection routine.
# Define the options available. Easy to change letters here if needed.
# These are the optional options; you don't just wait to be asked.
save=s # Save command instead of executing it.
cook=c # Change cookie file for this session.
help=h # Usage guide.
list=l # Pass wget the -i option and URL list.
runn=r # Run saved commands as an argument to the option.
inpu=i # Run saved commands interactively.
wopt=w # Allow to enter options to pass directly to wget.
# --------------------------------------------------------------------
if [ -z "$1" ]; then # Make sure we get something for wget to eat.
echo "You must at least enter a URL or option!"
echo "-$help for usage."
exit $E_NO_OPTS
fi
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# added added added added added added added added added added added added
if [ ! -e "$Config" ]; then # See if configuration file exists.
echo "Creating configuration file, $Config"
echo "# This is the configuration file for wgetter2" > "$Config"
echo "# Your customised settings will be saved in this file" >> "$Config"
else
source $Config # Import variables we set outside the script.
fi
if [ ! -e "$Cookie_List" ]; then
# Set up a list of cookie files, if there isn't one.
echo "Hunting for cookies . . ."
find -name cookies.txt >> $Cookie_List # Create the list of cookie files.
fi # Isolate this in its own 'if' statement,
#+ in case we got interrupted while searching.
if [ -z "$cFlag" ]; then # If we haven't already done this . . .
echo # Make a nice space after the command prompt.
echo "Looks like you haven't set up your source of cookies yet."
n=0 # Make sure the counter
#+ doesn't contain random values.
while read; do
Cookies[$n]=$REPLY # Put the cookie files we found into an array.
echo "$n) ${Cookies[$n]}" # Create a menu.
n=$(( n + 1 )) # Increment the counter.
done < $Cookie_List # Feed the read statement.
echo "Enter the number of the cookie file you want to use."
echo "If you won't be using cookies, just press RETURN."
echo
echo "I won't be asking this again. Edit $Config"
echo "If you decide to change at a later date"
echo "or use the -${cook} option for per session changes."
read
if [ ! -z $REPLY ]; then # User didn't just press return.
Cookie=" --load-cookies ${Cookies[$REPLY]}"
# Set the variable here as well as in the config file.
echo "Cookie=\" --load-cookies ${Cookies[$REPLY]}\"" >> $Config
fi
echo "cFlag=1" >> $Config # So we know not to ask again.
fi
# end added section end added section end added section end added section
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Another variable.
# This one may or may not be subject to variation.
# A bit like the small print.
CookiesON=$Cookie
# echo "cookie file is $CookiesON" # For debugging.
# echo "home is ${home}" # For debugging.
# Got caught with this one!
wopts()
{
echo "Enter options to pass to wget."
echo "It is assumed you know what you're doing."
echo
echo "You can pass their arguments here too."
# That is to say, everything passed here is passed to wget.
read Wopts
# Read in the options to be passed to wget.
Woptions=" $Wopts"
# ^ Why the leading space?
# Assign to another variable.
# Just for fun, or something . . .
echo "passing options ${Wopts} to wget"
# Mainly for debugging.
# Is cute.
return
}
save_func()
{
echo "Settings will be saved."
if [ ! -d $savePath ]; then # See if directory exists.
mkdir $savePath # Create the directory to save things in
#+ if it isn't already there.
fi
Flag=S
# Tell the final bit of code what to do.
# Set a flag since stuff is done in main.
return
}
usage() # Tell them how it works.
{
echo "Welcome to wgetter. This is a front end to wget."
echo "It will always run wget with these options:"
echo "$CommandA"
echo "and the pattern to match: $pattern \
(which you can change at the top of this script)."
echo "It will also ask you for recursion depth, \
and if you want to use a referring page."
echo "Wgetter accepts the following options:"
echo ""
echo "-$help : Display this help."
echo "-$save : Save the command to a file $savePath/wget-($today) \
instead of running it."
echo "-$runn : Run saved wget commands instead of starting a new one -"
echo "Enter filename as argument to this option."
echo "-$inpu : Run saved wget commands interactively --"
echo "The script will ask you for the filename."
echo "-$cook : Change the cookies file for this session."
echo "-$list : Tell wget to use URL's from a list instead of \
from the command-line."
echo "-$wopt : Pass any other options direct to wget."
echo ""
echo "See the wget man page for additional options \
you can pass to wget."
echo ""
exit $E_USAGE # End here. Don't process anything else.
}
list_func() # Gives the user the option to use the -i option to wget,
#+ and a list of URLs.
{
while [ 1 ]; do
echo "Enter the name of the file containing URL's (press q to change
your mind)."
read urlfile
if [ ! -e "$urlfile" ] && [ "$urlfile" != q ]; then
# Look for a file, or the quit option.
echo "That file does not exist!"
elif [ "$urlfile" = q ]; then # Check quit option.
echo "Not using a url list."
return
else
echo "using $urlfile."
echo "If you gave url's on the command-line, I'll use those first."
# Report wget standard behaviour to the user.
lister=" -i $urlfile" # This is what we want to pass to wget.
return
fi
done
}
cookie_func() # Give the user the option to use a different cookie file.
{
while [ 1 ]; do
echo "Change the cookies file. Press return if you don't want to change
it."
read Cookies
# NB: this is not the same as Cookie, earlier.
# There is an 's' on the end.
# Bit like chocolate chips.
if [ -z "$Cookies" ]; then # Escape clause for wusses.
return
elif [ ! -e "$Cookies" ]; then
echo "File does not exist. Try again." # Keep em going . . .
else
CookiesON=" --load-cookies $Cookies" # File is good -- use it!
return
fi
done
}
run_func()
{
if [ -z "$OPTARG" ]; then
# Test to see if we used the in-line option or the query one.
if [ ! -d "$savePath" ]; then # If directory doesn't exist . . .
echo "$savePath does not appear to exist."
echo "Please supply path and filename of saved wget commands:"
read newFile
until [ -f "$newFile" ]; do # Keep going till we get something.
echo "Sorry, that file does not exist. Please try again."
# Try really hard to get something.
read newFile
done
# -----------------------------------------------------------------------
# if [ -z ( grep wget ${newfile} ) ]; then
# Assume they haven't got the right file and bail out.
# echo "Sorry, that file does not contain wget commands. Aborting."
# exit
# fi
#
# This is bogus code.
# It doesn't actually work.
# If anyone wants to fix it, feel free!
# -----------------------------------------------------------------------
filePath="${newFile}"
else
echo "Save path is $savePath"
echo "Please enter name of the file which you want to use."
echo "You have a choice of:"
ls $savePath # Give them a choice.
read inFile
until [ -f "$savePath/$inFile" ]; do # Keep going till
#+ we get something.
if [ ! -f "${savePath}/${inFile}" ]; then # If file doesn't exist.
echo "Sorry, that file does not exist. Please choose from:"
ls $savePath # If a mistake is made.
read inFile
fi
done
filePath="${savePath}/${inFile}" # Make one variable . . .
fi
else filePath="${savePath}/${OPTARG}" # Which can be many things . . .
fi
if [ ! -f "$filePath" ]; then # If a bogus file got through.
echo "You did not specify a suitable file."
echo "Run this script with the -${save} option first."
echo "Aborting."
exit $E_NO_SAVEFILE
fi
echo "Using: $filePath"
while read; do
eval $REPLY
echo "Completed: $REPLY"
done < $filePath # Feed the actual file we are using into a 'while' loop.
exit
}
# Fish out any options we are using for the script.
# This is based on the demo in "Learning The Bash Shell" (O'Reilly).
while getopts ":$save$cook$help$list$runn:$inpu$wopt" opt
do
case $opt in
$save) save_func;; # Save some wgetter sessions for later.
$cook) cookie_func;; # Change cookie file.
$help) usage;; # Get help.
$list) list_func;; # Allow wget to use a list of URLs.
$runn) run_func;; # Useful if you are calling wgetter from,
#+ for example, a cron script.
$inpu) run_func;; # When you don't know what your files are named.
$wopt) wopts;; # Pass options directly to wget.
\?) echo "Not a valid option."
echo "Use -${wopt} to pass options directly to wget,"
echo "or -${help} for help";; # Catch anything else.
esac
done
shift $((OPTIND - 1)) # Do funky magic stuff with $#.
if [ -z "$1" ] && [ -z "$lister" ]; then
# We should be left with at least one URL
#+ on the command-line, unless a list is
#+ being used -- catch empty CL's.
echo "No URL's given! You must enter them on the same line as wgetter2."
echo "E.g., wgetter2 http://somesite http://anothersite."
echo "Use $help option for more information."
exit $E_NO_URLS # Bail out, with appropriate error code.
fi
URLS=" $@"
# Use this so that URL list can be changed if we stay in the option loop.
while [ 1 ]; do
# This is where we ask for the most used options.
# (Mostly unchanged from version 1 of wgetter)
if [ -z $curDepth ]; then
Current=""
else Current=" Current value is $curDepth"
fi
echo "How deep should I go? \
(integer: Default is $depthDefault.$Current)"
read Depth # Recursion -- how far should we go?
inputB="" # Reset this to blank on each pass of the loop.
echo "Enter the name of the referring page (default is none)."
read inputB # Need this for some sites.
echo "Do you want to have the output logged to the terminal"
echo "(y/n, default is yes)?"
read noHide # Otherwise wget will just log it to a file.
case $noHide in # Now you see me, now you don't.
y|Y ) hide="";;
n|N ) hide=" -b";;
* ) hide="";;
esac
if [ -z ${Depth} ]; then
# User accepted either default or current depth,
#+ in which case Depth is now empty.
if [ -z ${curDepth} ]; then
# See if a depth was set on a previous iteration.
Depth="$depthDefault"
# Set the default recursion depth if nothing
#+ else to use.
else Depth="$curDepth" # Otherwise, set the one we used before.
fi
fi
Recurse=" -l $Depth" # Set how deep we want to go.
curDepth=$Depth # Remember setting for next time.
if [ ! -z $inputB ]; then
RefA=" --referer=$inputB" # Option to use referring page.
fi
WGETTER="${CommandA}${pattern}${hide}${RefA}${Recurse}\
${CookiesON}${lister}${Woptions}${URLS}"
# Just string the whole lot together . . .
# NB: no embedded spaces.
# They are in the individual elements so that if any are empty,
#+ we don't get an extra space.
if [ -z "${CookiesON}" ] && [ "$cFlag" = "1" ] ; then
echo "Warning -- can't find cookie file"
# This should be changed,
#+ in case the user has opted to not use cookies.
fi
if [ "$Flag" = "S" ]; then
echo "$WGETTER" >> $savePath/wget-${today}
# Create a unique filename for today, or append to it if it exists.
echo "$inputB" >> $savePath/site-list-${today}
# Make a list, so it's easy to refer back to,
#+ since the whole command is a bit confusing to look at.
echo "Command saved to the file $savePath/wget-${today}"
# Tell the user.
echo "Referring page URL saved to the file$ \
savePath/site-list-${today}"
# Tell the user.
Saver=" with save option"
# Stick this somewhere, so it appears in the loop if set.
else
echo "*****************"
echo "*****Getting*****"
echo "*****************"
echo ""
echo "$WGETTER"
echo ""
echo "*****************"
eval "$WGETTER"
fi
echo ""
echo "Starting over$Saver."
echo "If you want to stop, press q."
echo "Otherwise, enter some URL's:"
# Let them go again. Tell about save option being set.
read
case $REPLY in
# Need to change this to a 'trap' clause.
q|Q ) exit $E_USER_EXIT;; # Exercise for the reader?
* ) URLS=" $REPLY";;
esac
echo ""
done
exit 0
|