##############################################################################
# The Faq-O-Matic is Copyright 1997 by Jon Howell, all rights reserved.      #
#                                                                            #
# This program is free software; you can redistribute it and/or              #
# modify it under the terms of the GNU General Public License                #
# as published by the Free Software Foundation; either version 2             #
# of the License, or (at your option) any later version.                     #
#                                                                            #
# This program is distributed in the hope that it will be useful,            #
# but WITHOUT ANY WARRANTY; without even the implied warranty of             #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              #
# GNU General Public License for more details.                               #
#                                                                            #
# You should have received a copy of the GNU General Public License          #
# along with this program; if not, write to the Free Software                #
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.#
#                                                                            #
# Jon Howell can be contacted at:                                            #
# 6211 Sudikoff Lab, Dartmouth College                                       #
# Hanover, NH  03755-3510                                                    #
# jonh@cs.dartmouth.edu                                                      #
#                                                                            #
# An electronic copy of the GPL is available at:                             #
# http://www.gnu.org/copyleft/gpl.html                                       #
#                                                                            #
##############################################################################

use strict;

##
## maintenance.pm
##
## This module should be invoked periodically by cron.
## It can be given an argument to run a specific task, or it will
## automatically determine which tasks to run.
##
## FAQ::OMatic::maintenance::main() (via the dispatch.pm CGI mechanism) will
##	make this script do its thing.
##
## FAQ::OMatic::maintenance::invoke(host, port, url) will request the url from the host.
##	Cron scripts should invoke me that way, which will cause the above
##	invocation (and make maintenance do its thing).

package FAQ::OMatic::maintenance;

use CGI;
use Socket;

use FAQ::OMatic;
use FAQ::OMatic::Log;
use FAQ::OMatic::Auth;
use FAQ::OMatic::buildSearchDB;
use FAQ::OMatic::Versions;
use FAQ::OMatic::ImageRef;
use FAQ::OMatic::Slow;

my $badKeyMessage = 'Bad maintenance key.';

sub main {
	my $cgi = FAQ::OMatic::dispatch::cgi();

	## Demand a secret key from the caller, so that we don't have
	## Joe Q. Random firing up umpteen copies of the mainenance script
	## and slowing things down. With the hints that keep it from doing
	## much very often, this probably doesn't matter, but anyway.
	if ($cgi->param('secret') ne $FAQ::OMatic::Config::maintenanceSecret) {
		print FAQ::OMatic::header($cgi, '-type'=>"text/plain");
		print "$badKeyMessage\n";
		return;
	}

	my $slow = '';
	my $tasks = $cgi->param('tasks') || '';
#	if ($tasks ne 'mirrorClient'
#		and $tasks ne 'rebuildCache') {
#		# (don't force out the header for Slow processes, which need
#		# to be able to redirect.)
#		hprint(FAQ::OMatic::header($cgi, '-type'=>'text/html'));
#		hprint("<title>FAQ-O-Matic Maintenance</title>\n");
#		hflush();
#			# just in case some other junk sneaks out on the fd
#	} else {
#		$slow = '-slow';	# tell task to run interactively
#	}

	# do everything in "slow" mode -- fork a process (so the web
	# server won't kill it), and redirect all output to a Slow file.
	my $fh = FAQ::OMatic::Slow::split();
	hSetFilehandle($fh);
	hprint("This is process ID ".$$."<p>\n");
	hflush();

	my %schedules = ('month'=>1,'week'=>1,'day'=>1,'hour'=>1);
	my @tasks = split(',', ($cgi->param('tasks')||''));
	if ((@tasks == 0) or ((@tasks == 1) and $schedules{$tasks[0]})) {
		@tasks = periodicTasks($tasks[0] || '');
	}

	my %taskUntaint = map {$_=>$_}
		( 'writeMaintenanceHint', 'trimUHDB', 'trimSubmitTmps',
		 'buildSearchDB', 'trim', 'cookies', 'errors', 'logSummary',
		 'rebuildAllSummaries', 'rebuildCache', 'expireBags', 'bagAllImages',
		 'mirrorClient', 'trimSlowOutput');

	hprint("<pre>\n");
	foreach my $i (sort @tasks) {
		$i =~ s/\d+ //;
		if (defined $taskUntaint{$i}) {
			$i = $taskUntaint{$i};
			hprint("--- $i\n");
			hflush();
			if (not eval "$i(); return 1;") {
				hprint("*** Task $i failed\n    Error: $@\n");
				hprint(FAQ::OMatic::stackTrace('html'));
				hflush();
			}
			hflush();
		} else {
			hprint("*** Task $i undefined\n");
			hflush();
		}
	}

	# output results
	hprint("\n</pre>\n");

	# provide a link to the install page, just for kicks
	FAQ::OMatic::getParams($cgi);
	hprint(FAQ::OMatic::button(
			FAQ::OMatic::makeAref('install', {}, ''),
			"Go To Install/Configuration Page"));

	hflush();
}

sub hSetFilehandle {
	my $filehandle = shift;
	FAQ::OMatic::setLocal('maintenance-fh', $filehandle);
}

sub hprint {
	# I have no idea why I thought this should be a separate
	# function. I think because I wanted to have the output appear
	# as it was generated, but didn't want to have 'print' calls to
	# weed out if I ever changed how I did printing.
	my $html = FAQ::OMatic::getLocal('maintenance-html') || '';
	$html .= join('', @_);
	FAQ::OMatic::setLocal('maintenance-html', $html);
	# we don't flush until explicitly asked to. That way, if a routine
	# decides to Slow::split, it can get its redirect out before we
	# send out a header.
}

sub hflush {
	my $html = FAQ::OMatic::getLocal('maintenance-html') || '';
	my $filehandle = FAQ::OMatic::getLocal('maintenance-fh') || \*STDOUT;
	print $filehandle $html;
	FAQ::OMatic::flush($filehandle);
	FAQ::OMatic::setLocal('maintenance-html', '');
}

sub periodicTasks {
	my $arg = shift || '';
	my $lastMaintenance = readMaintenanceHint();

	my @thenTime = localtime($lastMaintenance);
	my @nowTime = localtime($^T);

	my $newYear =	($thenTime[5] != $nowTime[5])?1:0;
	my $newMonth =	(!defined($thenTime[4])
					or !defined($nowTime[4])
					or ($thenTime[4] != $nowTime[4])
					or $newYear
					or ($arg eq 'month'))
						?1:0;

	# a tricky case:
	my $newWeek = 	(($thenTime[6] > $nowTime[6])
						# it's now "earlier" in the week than then
					or ($lastMaintenance<($^T-(86400*7)))
					or ($arg eq 'week')) ? 1 : 0;
						# or it's at least a week later

	my $newDay =	(($thenTime[3] != $nowTime[3]) or $newMonth
					or ($arg eq 'day'))?1:0;
	my $newHour =	(($thenTime[2] != $nowTime[2]) or $newDay
					or ($arg eq 'hour'))?1:0;

	my @tasks = ();

	# The number in front of the task is the sort order
	push @tasks, (
		'10 buildSearchDB'
		) if $newHour;
	push @tasks, (
		'40 mirrorClient',	# if we're a mirror, this will do an update.
		'50 cookies',
		'60 logSummary',
		'80 trimUHDB',		# turns on a flag so trim() will trim uhdbs
		'81 trimSubmitTmps',# turns on a flag so trim() will trim submitTmps
		'82 trimSlowOutput',# turns on a flag so trim() will trim slow-outputs
		'89 trim' 		# traverse metadir (needed for trim() to do anything)
		) if $newDay;
	push @tasks, (
		'55 errors'
		) if $newWeek;
	push @tasks, '98 writeMaintenanceHint';

	hprint('Executing schedules:'
		.($newHour	? ' Hourly'	:'')
		.($newDay	? ' Daily'	:'')
		.($newWeek	? ' Weekly'	:'')
		."\n\n");

	my %tasks = map { ($_,1) } @tasks;
	return keys %tasks;
}

# sub runScript {
# 	my $script = shift;
# 	$html.= "    Executing $script...\n";
# 	if (system("$script")) {
# 		$html.= "   ...failed because $!\n";
# 	}
# }

sub readMaintenanceHint {
	my $lastMaintenance;
	if (open LMHINT, "$FAQ::OMatic::Config::metaDir/lastMaintenance") {
		<LMHINT> =~ m/^(\d+)/;
		$lastMaintenance = defined($1) ? int($1) : 0;
			# THANKS Michael Gerdts <gerdts@cae.wisc.edu> for the defined() test
		close LMHINT;
	} else {
		$lastMaintenance = 0;
	}
	return $lastMaintenance;
}

############################################################################
########  Task definitions  ################################################
############################################################################

sub writeMaintenanceHint {
	if (open LMHINT, ">$FAQ::OMatic::Config::metaDir/lastMaintenance") {
		print LMHINT $^T."  ".scalar localtime($^T)."\n";
		close LMHINT;
	}
	FAQ::OMatic::Versions::setVersion('MaintenanceInvoked');
}

sub trimUHDB {
	my $trimset = FAQ::OMatic::getLocal('trimset') || {};
	$trimset->{'uhdb'} = 1;
	FAQ::OMatic::setLocal('trimset', $trimset);
}

sub trimSubmitTmps {
	my $trimset = FAQ::OMatic::getLocal('trimset') || {};
	$trimset->{'submitTmp'} = 1;
	FAQ::OMatic::setLocal('trimset', $trimset);
}

sub trimSlowOutput {
	my $trimset = FAQ::OMatic::getLocal('trimset') || {};
	$trimset->{'slow'} = 1;
	FAQ::OMatic::setLocal('trimset', $trimset);
}

sub buildSearchDB {
	if ((not -f "$FAQ::OMatic::Config::metaDir/freshSearchDBHint")
		or (-M "$FAQ::OMatic::Config::metaDir/freshSearchDBHint" > 1/24)) {
		FAQ::OMatic::buildSearchDB::build();
	} else {
		hprint("    (not needed)\n");
	}
}

sub rebuildAllSummaries {
	FAQ::OMatic::Log::rebuildAllSummaries();
}

sub trim {
	if (not opendir NEWLOGDIR, $FAQ::OMatic::Config::metaDir) {
		hprint("*** Couldn't scan $FAQ::OMatic::Config::metaDir.");
		return;
	}

	my $trimset = FAQ::OMatic::getLocal('trimset') || {};
	hprint("trimming: ".join(' ', sort keys %{$trimset})."\n");

	my $daybefore = FAQ::OMatic::Log::adddays(FAQ::OMatic::Log::numericToday(), -2);
	while (defined(my $file = readdir(NEWLOGDIR))) {
		# untaint file -- we should be able to trust the operating system
		# to provide only reasonable filenames from a readdir().
		$file =~ m/^(.*)$/;
		$file = $1;

		# uhdb's (unique host databases, part of the access log)
		if ($trimset->{'uhdb'} and ($file =~ m/^[\d-]+.uhdb./)) {
			my @dates = ($file =~ m/^([\d-]+)/);
			my $date = $dates[0];

			# Delete all uhdb files before yesterday to save
			# space.  we save the day-before's log, since
			# we need it if we summarize yesterday's log (which
			# is another task on the maintenance agenda).
			# (uhdb = Unique Host Database files. Since no
			# "new" unique hosts will be joining logs from the
			# past, having the files around doesn't help
			# anymore.)
			# Notice we do these tests based on the name of the
			# file, not the mod date, since it could have been
			# generated today by a regenerateSmrys.
			if ($date lt $daybefore) {
				hprint("removing $file\n");
				unlink "$FAQ::OMatic::Config::metaDir/$file";
			}
		}

		# submitTmp
		if ($trimset->{'submitTmp'} and ($file =~ m/^submitTmp\./)) {
			# only trim files older than a day
			if (-M "$FAQ::OMatic::Config::metaDir/$file" > 1.0) {
				hprint("removing $file\n");
				unlink "$FAQ::OMatic::Config::metaDir/$file";
			}
		}

		# slow
		if ($trimset->{'slow'} and ($file =~ m/^slow-output\./)) {
			# only trim files older than a day
			if (-M "$FAQ::OMatic::Config::metaDir/$file" > 1.0) {
				hprint("removing $file\n");
				unlink "$FAQ::OMatic::Config::metaDir/$file";
			}
		}
	}
	close NEWLOGDIR;
}

sub cookies {
	# throw away old cookies
	if (not open COOKIES, "$FAQ::OMatic::Config::metaDir/cookies") {
		hprint("no cookie file.\n");
		return;
	}
	if (not open NEWCOOKIES, ">$FAQ::OMatic::Config::metaDir/cookies.new") {
		hprint("can't create $FAQ::OMatic::Config::metaDir/cookies.new.\n");
		return;
	}
	while (<COOKIES>) {
		my ($cookie,$id,$time) = split(' ');
		if ($time
				+($FAQ::OMatic::Config::cookieLife||3600)
				+$FAQ::OMatic::Auth::cookieExtra
					> $^T) {
			# TODO: that 3600 default should be encoded somewhere,
			# rather than being a magic number. It also appears in Auth.pm.
			print NEWCOOKIES $_;
		}
	}
	close COOKIES;
	close NEWCOOKIES;
	unlink "$FAQ::OMatic::Config::metaDir/cookies";
	if (not rename("$FAQ::OMatic::Config::metaDir/cookies.new","$FAQ::OMatic::Config::metaDir/cookies")) {
		hprint ("Couldn't rename $FAQ::OMatic::Config::metaDir/cookies.new "
			."to $FAQ::OMatic::Config::metaDir/cookies\n"
			."because $!\n");
	}
}

sub errors {
	if (not $FAQ::OMatic::Config::maintSendErrors) {
		hprint("    Config says don't mail errors.\n");
		return;
	}

	my $size = (-s "$FAQ::OMatic::Config::metaDir/errors") || 0;
	if ($size == 0) {
		hprint("    No ($size) errors to mail.\n");
		return;
	}

	my $msg = "Errors from ".FAQ::OMatic::fomTitle()
		." (v. $FAQ::OMatic::VERSION):\n\n";
	if (not open(ERRORS, "$FAQ::OMatic::Config::metaDir/errors")) {
		hprint("   Couldn't read $FAQ::OMatic::Config::metaDir/errors "
			."because $!; not truncating\n");
	} else {
		hprint("   Sending errors...\n");
		my @errs = <ERRORS>;
		close ERRORS;
		FAQ::OMatic::sendEmail($FAQ::OMatic::Config::adminEmail,
					"Faq-O-Matic Error Log",
					$msg.join('', @errs));

		# truncate errorfile
		open ERRORS, ">$FAQ::OMatic::Config::metaDir/errors";
		close ERRORS;
	}
}

sub logSummary {
	my $yesterday = FAQ::OMatic::Log::adddays(FAQ::OMatic::Log::numericToday(), -1);

	FAQ::OMatic::Log::summarizeDay($yesterday);
}

sub cronInvoke {
	# special version to be called from cron to alert user to errors
	my @reply = invoke(@_);
	my $reply = join('', @reply);

	# if the cron job fails to run, we should print a message out,
	# so that the user knows to fix it. (most crons mail the message
	# to the user.)
	my ($proto,$code,@rest) = split(' ', $reply[0]);
	# THANKS to Andrew W. Nosenko <awn@bcs.zp.ua>
	# for this patch to ignore 'Moved Temporarily' response.
	if ($code != '200' && $code != '302') {
		print "Unable to invoke maintenance task; HTTP reply follows:\n\n";
		print $reply;
	}
	if ($reply =~ m/$badKeyMessage/) {
		print "$badKeyMessage\n";
		print "Admin must access the install page and\n"
			."click \"Set up the maintenance cron job.\"\n";
	}
}

sub invoke {
	my $host = shift;
	my $port = shift;
	my $url = shift;
	my $verbose = shift;

	my $proto = getprotobyname('tcp');
	socket(HTTPSOCK, PF_INET, SOCK_STREAM, $proto);
	my $httpsock = \*HTTPSOCK;	# filehandles are such a nasty legacy in Perl
	my $sin = sockaddr_in($port, inet_aton($host));
	if (not connect(HTTPSOCK, $sin)) {
		die "maintenance::invoke can't connect(): $!, $@!\n"
	}
	print $httpsock "GET $url HTTP/1.0\nHost: $host\n\n";
		# Thanks to Gabor Melis <gabor.melis@essnet.se> for the "Host:"
		# header, which makes virtually hosted sites work right.
	FAQ::OMatic::flush($httpsock);
		# Thanks to Miro Jurisic <meeroh@MIT.EDU> for this fix.

	my @reply = <$httpsock>;
	close $httpsock;

	if ($verbose) {
		hprint(join('', @reply));
	}
	return @reply if wantarray();
}

# We used to have a verifyCache task that rebuilt cached files if
# they were older than their item file (or the config file).
# The former condition wasn't very useful, since caches are rewritten
# whenever items are. The latter shouldn't really have happened
# automatically, because it can be verrrry slow on a big FAQ.
# And in neither case could you refresh *every* item in the cache.
# The new task simply reads and writes every item in the item/ directory,
# which ensures that its cache and dependencies are up-to-date.
# (note also the 'updateAllDependencies' flag to saveToFile().)
#
sub rebuildCache {
	my $slow = shift || '';

	return if ((not defined $FAQ::OMatic::Config::cacheDir)
				or ($FAQ::OMatic::Config::cacheDir eq ''));
	
	if ($slow) {
		my $fh = FAQ::OMatic::Slow::split();
		hSetFilehandle($fh);
		# from now on, our output goes into the slow-output file
		# to be periodically loaded by the browser.
	}

	my $itemName;
	foreach $itemName (FAQ::OMatic::getAllItemNames()) {
		hprint( "<br>Updating $itemName\n");
		my $item = new FAQ::OMatic::Item($itemName);
		if (not eval {
			$item->saveToFile('', '', 'noChange', 'updateAllDependencies');
			1;
		}) {
			FAQ::OMatic::gripe('note', "save failed for ".$itemName);
		}


		# flush stdout
		hflush();
	}

	FAQ::OMatic::Versions::setVersion('CacheRebuild');
}

sub expireBags {
	return if ((not defined $FAQ::OMatic::Config::cacheDir)
				or ($FAQ::OMatic::Config::cacheDir eq '')
				or (not defined $FAQ::OMatic::Config::bagsDir)
				or ($FAQ::OMatic::Config::bagsDir eq ''));
	
	my $anyMessages = 0;
	my @bagList = grep { not m/\.desc$/ }
		FAQ::OMatic::getAllItemNames($FAQ::OMatic::Config::bagsDir);
	my $bagName;
	foreach $bagName (@bagList) {
		#hprint("<br>Checking $bagName\n");
		my @dependents = FAQ::OMatic::Item::getDependencies("bags.".$bagName);
		if (scalar(@dependents) == 0) {
			# don't declare system-supplied bags invalid
			# THANKS: to John Goerzen for pointing this out
			my ($prefix) = ($bagName =~ m/^(.*)\.[^\.]+$/);
			# THANKS: Edwin Chiu <edwinc@s-scape.ca> for fixing an
			# uninitialized value warning coming from here.
			next if (defined($prefix)
				&& FAQ::OMatic::ImageRef::validImage($prefix));

			if (not $anyMessages) {
				hprint("The following suggestion(s) are based on "
					."dependency files.\n You might run rebuildCache first "
					."if you want to be certain that they are valid.\n\n");
			}
			$anyMessages = 1;
			my $msg = "Consider removing bag $bagName; it is not linked from "
				."any item in the FAQ.";
			hprint "<br>$msg\n";
			# TODO: in a future version, we could actually just unlink
			# TODO: the unreferenced bags. (garbage collection).
			# The comment above about rebuildCache would apply.
			# Dependencies should stay current, but I'd sure hate
			# to accidentally blast your bag.
		}
	}
}

sub bagAllImages {
	return if ((not defined $FAQ::OMatic::Config::bagsDir)
				or ($FAQ::OMatic::Config::bagsDir eq ''));

	require FAQ::OMatic::ImageRef;
	FAQ::OMatic::ImageRef::bagAllImages();

	FAQ::OMatic::Versions::setVersion('SystemBags');
}

sub mirrorClient {
	my $slow = shift || '';
	my $url = $FAQ::OMatic::Config::mirrorURL;

	if ((not defined $url)
		or ($url eq '')) {
		hprint("mirrorClient() exiting silently because this is "
			."not a mirror.\n");
		return;
	}

	if ($slow) {
		my $fh = FAQ::OMatic::Slow::split();
		hSetFilehandle($fh);
		# from now on, our output goes into the slow-output file
		# to be periodically loaded by the browser.
	}

	hprint("<p>Querying master site for item and bag modification times.<p>\n");
	hflush();

	my $limit = -1;	# Set to a small number for debugging, so you don't
					# have to wait for the whole mirror to complete.
					# Set to -1 for normal operation.

	require FAQ::OMatic::install;

	# cheesily parse the URL. This seemed better than use'ing the
	# whole URL.pm kit. I had bad luck with it once. Maybe I'm irrational.
	my ($host, $port, $path) =
		$url =~ m#http://([^/:]+)(:\d+)?/(.*)$#;
	if (defined $port) {
		$port =~ s/^://;
	} else {
		$port = 80;
	}
	$path = "/$path?cmd=mirrorServer";

	my @reply = invoke($host, $port, $path);

	# chew HTTP headers until a blank line
	while (not $reply[0] =~ m/^[\r\n]*$/) {
		shift @reply;
	}
	shift @reply;	# chew off that blank line
	@reply = grep { not m/^#/ } @reply;		# chew off comment lines
	@reply = map { chomp $_; $_ } @reply;		# chomp LFs

	# first line of remaining content must be version number
	my $version = shift @reply;
	if ($version ne 'version 1.0') {
		die "This FAQ-O-Matic version $FAQ::OMatic::VERSION only understands "
			."mirrorServer data version 1.0; received $version.";
	}

	#hprint  join("\n", @reply);

	my @itemURL = grep { m/^itemURL\s/ } @reply;
	my $itemURL = ($itemURL[0] =~ m/^itemURL\s+(.*)$/)[0];
	if (not defined $itemURL) {
		die "master didn't send itemURL line.";
	}

	my @configs = grep { m/^config\s/ } @reply;
	my $config;
	my $map = FAQ::OMatic::install::readConfig();
	hprint("configs supplied: ".scalar(@configs)."\n");
	foreach $config (@configs) {
		my ($left,$right) =
			($config =~ m/config (\$\S+) = (.+)$/);
		if (defined $left and defined $right) {
			# unless the config line is buggy, as can happen when
			# the mirrorServer FAQ is running 2.618 :v(, update our config
			$map->{$left} = $right;
			hprint("<br>$left => $right\n");
		}
	}
	FAQ::OMatic::install::writeConfig($map);
	# now make sure that config takes effect for all the cache
	# files we're about to write
	FAQ::OMatic::install::rereadConfig();
	hflush();

	my $count = 0;
	my @items = grep { m/^item\s/ } @reply;
	my $line;
	foreach $line (@items) {
		my ($file,$lms) =
			($line =~ m/item\s+(\S+)\s+(\S+)/);
		if (not defined $file || $file eq '') {
			hprint("<br>Can't parse: $line\n");
			next;
		}
		my $item = new FAQ::OMatic::Item($file);
		my $existingLms = $item->{'LastModifiedSecs'} || -1;
		if ($lms != $existingLms) {
			hprint("<br>$file ".$item->getTitle().": item needs update\n");
			mirrorItem($host, $port, $itemURL."$file", $file, '');
			$count++;	# each net access counts 1, whether or not it takes
		} else {
			# hprint "<br>Already have: $file ".$item->getTitle()."\n";
			# Benign output supressed so you can see which items you
			# don't have.
		}

		if ($limit>=0 && $count >= $limit) {
			hprint("<p>stopping because count = limit ($limit)\n");
			return;
		}
		# flush output
		hflush();
	}

	my @bagsURL = grep { m/^bagsURL\s/ } @reply;
	my $bagsURL = ($bagsURL[0] =~ m/^bagsURL\s+(.*)$/)[0];
	if (not defined $bagsURL) {
		die "master didn't send bagsURL line.";
	}

	my @bags = grep { m/^bag\s/ } @reply;
	foreach $line (@bags) {
		my ($bagword,$file,$lms) = split(/\s+/, $line);
		if (not defined($bagword)
			|| ($bagword ne 'bag')
			|| not defined($file)) {
			hprint("bad line: $line");
			#continue; LEFT OFF
		}
		if ((not defined($lms)) || ($lms eq '')) {
			# some old mirrorServer forgot to send a date --- assume the
			# file is recently modified.
			$lms = time();
		}
		$file = FAQ::OMatic::Bags::untaintBagName($file);
		if ($file eq '') {
			hprint("<br>Tainted bag name in '$line'\n");
			next;
		}

		my $descFile = $file.".desc";
		my $descItem = new FAQ::OMatic::Item($descFile,
							$FAQ::OMatic::Config::bagsDir);
		my $existingLms = $descItem->{'LastModifiedSecs'} || -1;

		if ($lms != $existingLms) {
			hprint("<br>${file}: bag needs update\n");
			# transfer bag byte-for-byte to my bags dir
			mirrorBag($host, $port, $bagsURL."$file", $file);
			# transfer the .desc file, using same item mirroring code as above
			mirrorItem($host, $port, $bagsURL.$descFile,
				$descFile, $FAQ::OMatic::Config::bagsDir);
			# update the link in any items that point to this bag
			FAQ::OMatic::Bags::updateDependents($file);
			$count += 2;
		} else {
			# hprint "<br>Already have: $file\n";
		}

		if ($limit>=0 && $count >= $limit) {
			hprint("<p>stopping because count = limit ($limit)\n");
			return;
		}
		# flush output
		hflush();
	}
}

# a close relative of previous function invoke().
sub mirrorItem {
	my $host = shift;
	my $port = shift;
	my $path = shift;
	my $itemFilename = shift;
	my $itemDir = shift;

	my $proto = getprotobyname('tcp');
	my $sin = sockaddr_in($port, inet_aton($host));

	socket(HTTPSOCK, PF_INET, SOCK_STREAM, $proto);
	my $httpsock = \*HTTPSOCK;

	if (not connect($httpsock, $sin)) {
		die "mirrorItem can't connect(): $!, $@!\n"
	}
	my $request = "GET $path HTTP/1.0";
	print $httpsock "$request\n\n";
	FAQ::OMatic::flush($httpsock);
		# Thanks to Miro Jurisic <meeroh@MIT.EDU> for this fix.
	my $httpstatus = <$httpsock>;	# get initial result
	chomp $httpstatus;
	my ($statusNum) = ($httpstatus =~ m/\s(\d+)\s/);
	if ($statusNum != 200) {
		hprint("<br>Request '$request' for $itemFilename from "
			."$host:$port failed: ($statusNum) $httpstatus\n");
		close($httpsock);
		return;
	}
	while (<$httpsock>) {			# blow past HTTP headers
		last if ($_ =~ m/^[\r\n]*$/);
	}

	my $item = new FAQ::OMatic::Item();
	$item->{'filename'} = $itemFilename;
	$item->loadFromFileHandle($httpsock);
	close($httpsock);

	$item->{'titleChanged'} = 'true';
		# since we just mirrored this guy, the title may very well
		# have changed, so we need to be sure to rewrite dependent items.
	$item->saveToFile($itemFilename, $itemDir, 'noChange', 'updateAllDeps',
		'noRecomputeDependencies');
		# notice we're passing in a filename we got from that
		#	web server -- an insidious master might try to pass
		#	off bogus item filenames with '..'s in them. But saveToFile()
		#	has a taint-check to prevent that sort of thing.
		# 'noChange' keeps lastModified date intact, so we won't keep
		#	re-mirroring this item.
		# 'updateAllDependencies' is necessary, because otherwise
		#	saveToFile only adds those dependencies that are "new" to
		#	this item -- but we only have the item, not the .dep file,
		#	so we need to always regenerate all deps.
		# 'noRecomputeDependencies' prevents Item.pm from trying to
		#	resolve forward references to nonexistent items. For example, an
		#	item can have a parent that hasn't been reached yet in the
		#	mirroring.
	hprint("<br>$itemFilename (".$item->getTitle()."): "
		."item mirrored successfully\n");
}

sub mirrorBag {
	my $host = shift;
	my $port = shift;
	my $path = shift;
	my $bagFilename = shift;	# already untainted by caller

	my $proto = getprotobyname('tcp');
	my $sin = sockaddr_in($port, inet_aton($host));

	socket(HTTPSOCK, PF_INET, SOCK_STREAM, $proto);
	my $httpsock = \*HTTPSOCK;	# filehandles are such a nasty legacy in Perl

	if (not connect($httpsock, $sin)) {
		die "mirrorBag can't connect(): $!, $@!\n"
	}
	my $request = "GET $path HTTP/1.0";
	print $httpsock "$request\n\n";
	FAQ::OMatic::flush($httpsock);
		# Thanks to Miro Jurisic <meeroh@MIT.EDU> for this fix.
	my $httpstatus = <$httpsock>;	# get initial result
	chomp $httpstatus;
	my ($statusNum) = ($httpstatus =~ m/\s(\d+)\s/);
	if ($statusNum != 200) {
		hprint("<br>Request '$request' for $bagFilename from "
			."$host:$port failed: ($statusNum) $httpstatus\n");
		close($httpsock);
		return;
	}
	while (<$httpsock>) {			# blow past HTTP headers
		last if ($_ =~ m/^[\r\n]*$/);
	}

	# input looks good at this point -- open output bag file
	if (not open(BAGFILE, ">".$FAQ::OMatic::Config::bagsDir.$bagFilename)) {
		hprint("<br>open of $bagFilename failed: $!\n");
		close $httpsock;
		return;
	}

	my $sizeBytes = 0;
	my $buf;
	while (read($httpsock, $buf, 4096)) {
		print BAGFILE $buf;
		$sizeBytes += length($buf);
		# TODO: maybe have (mirror-site-admin)-configurable length limit here
	}
	close(BAGFILE);
	close($httpsock);

	if (not chmod(0644, $FAQ::OMatic::Config::bagsDir.$bagFilename)) {
		FAQ::OMatic::gripe('problem', "chmod("
			.$FAQ::OMatic::Config::bagsDir.$bagFilename
			." failed: $!");
	}

	if ($sizeBytes == 0) {
		hprint("<br><b>Uh oh, I read no bytes for $bagFilename.</b>\n");
		return;
	}

	hprint("<br>${bagFilename}: bag mirrored successfully\n");
}

sub mtime {
	my $filename = shift;
	return (stat($filename))[9] || 0;
}

1;
