# debpartial_mirror - partial debian mirror package tool
# (c) 2004 Otavio Salvador <otavio@debian.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA	02111-1307	USA

import StringIO
import threading
import Queue
import re
import os
import os.path
from stat import *
import pycurl
from string import split, join
from rfc822 import formatdate
from errno import EEXIST

from debpartial_mirror import DisplayStatus


class DownloadQueue(Queue.Queue):
	""" Implement a Queue without duplicated items. """

	counter = 0

	def _put(self, item):
		if item not in self.queue:
			self.queue.append(item)


class DownloadFileHandler:
	def __init__(self, filename, mode, buffering = 0):
		self._filename = filename
		self._mode = mode
		self._buffering = buffering
		self._openned = False
		self.__fp = None

	def write(self, string):
		if not self._openned:
			self.__fp = open(self._filename, self._mode, self._buffering)
			self._openned = True

		self.__fp.write(string)

	def close(self):
		if self.__fp:
			self.__fp.close()
			self.__fp = None
			self._openned = False



class Curl:
	buggy_curl = False

	def __init__(self, DisplayStatus, Queue):
		# Handle bug version of pycurl (versions pior 7.14 are buggy)
		major, minor, revision = pycurl.version_info()[1].split('.')
		if (int(major) + 0.01*int(minor) < 7.14) and not Curl.buggy_curl:
			print "WARNING: Due a bug in libcurl up to 7.13 we can't use resume support!"
			Curl.buggy_curl = True

		self._curl = pycurl.Curl()
		self._curl.setopt(pycurl.FOLLOWLOCATION, 1)
		self._curl.setopt(pycurl.MAXREDIRS, 5)
		self._curl.setopt(pycurl.NOSIGNAL, 0)
		self._curl.setopt(pycurl.CONNECTTIMEOUT, 30)
		self._curl.setopt(pycurl.PROGRESSFUNCTION, self.progress)
		self._curl.setopt(pycurl.NOPROGRESS, 0)
		self._curl.setopt(pycurl.FAILONERROR, 1)
		self._curl.setopt(pycurl.HTTP_VERSION, pycurl.CURL_HTTP_VERSION_1_0)
		self._curl.setopt(pycurl.VERBOSE, 0)

		self._curl.parent = self
		self._curl.callback = None
		self._curl.error_callback = None
		self._curl.filename = None

		self._fp = None
		self._startSize = 0
		self.DisplayStatus = DisplayStatus
		self.Queue = Queue

	def __processDir(self):
		# in case it's a directory, we process to find all needed
		# objects and queue all.
		if self._fp != None and isinstance(self._fp, StringIO.StringIO):
			self._fp.seek(0)
			matches = re.compile(
				'<a href="([a-zA-Z0-9_][a-zA-Z0-9\-\./_]+)">',
				 re.IGNORECASE | re.MULTILINE
			).findall(self._fp.read())

			for filen in matches:
				try:
					os.makedirs(os.path.dirname(self._curl.filename))
				except OSError, (errno, msg):
					if errno != EEXIST:
						print "ERROR:", msg
				# (uri, destine, callback, error_callback, no_cache)
				self.Queue.put((
					os.path.dirname(self._curl.url) + '/' + filen,
					os.path.dirname(self._curl.filename) + '/' + filen,
					self._curl.callback, self._curl.error_callback,
					self._curl.no_cache
				))

	def set_target(self, url, filename, callback, error_callback, no_cache):
		self._curl.setopt(pycurl.URL, url)
		self._curl.url = url
		self._curl.callback = callback
		self._curl.error_callback = error_callback
		self._curl.filename = filename
		self._curl.setopt(pycurl.RESUME_FROM, 0)
		self._curl.no_cache = no_cache

		if self._fp is not None:
			try:
				self._fp.close()
				del self._fp # don't leave the files open
			except IOError:
				self._fp = None

		extra_header = []
		if filename.endswith('/'):
			self._fp = StringIO.StringIO()
		else:
			self._curl.filename += ".partial"

			# We'll rename it back if is the same file
			if os.path.exists (filename):
				extra_header.append("if-modified-since:" + formatdate(
					os.path.getmtime(filename)
				))
				os.rename (filename, self._curl.filename)

			if not Curl.buggy_curl and os.path.exists (self._curl.filename):
				#dowload interrupted, needs resume
				self._startSize = os.stat(self._curl.filename)[ST_SIZE]
				self._curl.setopt(pycurl.RESUME_FROM, self._startSize)
				self._fp = DownloadFileHandler(self._curl.filename, "ab")
			else :
				self._fp = DownloadFileHandler(self._curl.filename, "wb")

		self._curl.setopt(pycurl.WRITEFUNCTION, self._fp.write)


		if not no_cache:
			extra_header.append("Pragma:")

		if extra_header:
			self._curl.setopt(pycurl.HTTPHEADER, extra_header)

	def close(self):
		self.__processDir()
		self.callback()
		self._fp = None
		self._curl.close()

	def progress(self, download_t, download_d, upload_t, upload_d):
		#If we doesn't know how much data we will receive, return.
		if download_t == 0 or download_d == 0:
		   return

		if self.DisplayStatus[self._curl.url] == None and self._curl.url[-1] != '/':
			self.DisplayStatus.start(self._curl.url, download_t + self._startSize)

		if self._curl.url[-1] != '/':
			self.DisplayStatus.update(self._curl.url, download_d + self._startSize)

	def callback(self):
		if not self._curl.filename:
			return # It hasn't set!
		#remove trailing .partial after download is completed
		if not isinstance(self._fp, StringIO.StringIO) and os.path.exists (self._curl.filename):
			self._fp.close()
			os.rename (self._curl.filename, split (self._curl.filename,".partial")[0])
		else:
			self.__processDir()

		#exec user passed callback
		if self._curl.callback != None:
			self._curl.callback
			self._curl.callback = None

	def error_callback(self):
		if self._curl.error_callback != None:
			self._curl.error_callback
			self._curl.error_callback = None


class DownloadFetcher(threading.Thread):
	def __init__(self, info, downloaders):
		# Add DisplayStatus object.
		# TODO: the DisplayStatus type should be set in the configuration file
		if info == "text":
			self.DisplayStatus = DisplayStatus.TextDisplayStatus()
		elif info == "log":
			self.DisplayStatus = DisplayStatus.LogDisplayStatus()
		elif info == "quiet":
			self.DisplayStatus = DisplayStatus.QuietDisplayStatus()
		else:
			self.DisplayStatus = info

		# Create the needed pycurl objects to manage the connections.
		self._multi = pycurl.CurlMulti()
		self._multi.handles = []
		self._free = Queue.Queue()
		self.queue = DownloadQueue()
		for i in range(downloaders):
			curl = Curl(self.DisplayStatus, self.queue)
			self._multi.handles.append(curl)

		self.running = False
		map(lambda x: self._free.put(x), self._multi.handles)

		threading.Thread.__init__(self)

	def start(self):
		self.running = True
		threading.Thread.start(self)

	def run(self):
		total_handles = 0;
		while self.running:
			Download.lock.acquire()
			while self.queue.qsize() > 0:
				try:
					fetcher = self._free.get_nowait()
					url, filename, callback, error_callback, no_cache = self.queue.get_nowait()
					fetcher.set_target(url, filename, callback, error_callback, no_cache)
					self._multi.add_handle(fetcher._curl)
					total_handles += 1
				except Queue.Empty:
					break
			Download.lock.release()

			#If all fetcher are free, we finished our job
			if self._free.qsize() == len (self._multi.handles):
				break

			# Run the internal curl state machine for the multi stack
			ret = self._multi.select(1.0)
			if ret == -1:
				print "Select returned -1"
				break

			while 1:
				ret, num_handles = self._multi.perform()

				if num_handles < total_handles:
					#One or more fetcher finished download, we call
					#respective curl callbacks, and free the fetcher itself"
					num_failed, ok_list, err_list = self._multi.info_read()
					for curl in ok_list:
						curl.parent.callback()
						self._multi.remove_handle(curl)
						self._free.put(curl.parent)
					for curl, errno, errmsg in err_list:
						self.DisplayStatus.errored(curl.url, errmsg)
						curl.parent.error_callback()
						self._multi.remove_handle(curl)
						self._free.put(curl.parent)
					#update the number of actual fetcher
					total_handles = num_handles
				if ret != pycurl.E_CALL_MULTI_PERFORM:
					break

		Download.lock.acquire()
		while self.queue.qsize()>0:
			self.queue.get_nowait()
		Download.lock.release()
		try:
			while len(self._multi.handles) > 0:
				curl = self._multi.handles.pop()
				try:
					self._multi.remove_handle(curl._curl)
				except:
					pass
				curl.close()
				del curl
				self._multi.close()
		except:
			pass
		self.running = False
		self.DisplayStatus.clean()


class Download:
	""" A Download queue """
	# Fetcher to use
	d_fetchers_list = {}

	# Our lock handler
	lock = threading.Lock()

	def __init__(self, info="text", max=3, name=None):
		self.__max = max
		self.__info = info
		self.__name = name
		self.d_fetcher = None
		if self.__name == None:
			self.__name = str(self).split(" ")[-1].rstrip(">")
		if not isinstance(self.__name,str):
			raise TypeError, "The name passed, was not a string"
		elif self.__name in Download.d_fetchers_list:
			raise KeyError, "A Downloaded fetcher with name '%s' alrady exists" % self.__name

	def set_name (name):
		"""Set a name for the queue"""
		print "This function should not be called"
		if not self.__name:
			self.__name = name

	def request (self, uri, destine, callback = None, error_callback = None, no_cache = False):
		"""Add an uri to the queue. The uri is copied on the local path destine.
		   Optionally, it is possible to pass two callbacks functions that are
		   executed once download is terminated (callback) or if it failed (error_callback)"""
		# Create the needed d_fetcher.
		self.lock.acquire()
		if self.d_fetcher == None or not self.d_fetcher.running:
			self.d_fetcher = DownloadFetcher(self.__info, self.__max)
			self.d_fetcher.setDaemon(True)
			self.d_fetcher.start()
			Download.d_fetchers_list[self.__name]= self.d_fetcher
		self.d_fetcher.queue.put((uri, destine, callback, error_callback, no_cache))
		self.lock.release()

	def get (self, uri, destine, callback = None, error_callback = None, no_cache = False):
		self.request (uri, destine, callback, error_callback, no_cache)

	def get_name (self):
		"""Return the name of this queue"""
		return self.__name

	def get_names (self):
		"""Return a list the current queues"""
		return Download.d_fetchers_list.keys()

	def wait(self, name):
		"""Wait for all files in the queue 'name' to be downloaded """
		try:
			self._wait(Download.d_fetchers_list[name])
			self.lock.acquire()
			Download.d_fetchers_list.pop(name)
			d_fetcher = None
			self.lock.release()
		except KeyError:
			print "No thread with given name"

	def _wait(self, d_fetcher):
		d_fetcher.join(0.5)
		while d_fetcher and d_fetcher.running:
			d_fetcher.join(0.5)

	def wait_all(self):
		"""Wait for all the files in all the queues are downloaded."""
		# We need to use timeout to handle signals
		while 1:
			try:
				i = Download.d_fetchers_list.iterkeys()
				n = i.next()
				f = Download.d_fetchers_list[n]
				f.running = False
				self.wait(n)
			except StopIteration:
				break

	def wait_mine(self):
		"""Wait for all files in this queue to be downloaded."""
		self.wait(self.__name)
