# debpartial_mirror - partial debian mirror package tool # (c) 2004 Otavio Salvador # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import StringIO import threading import Queue import re import os import os.path from stat import * import pycurl from string import split, join from rfc822 import formatdate from errno import EEXIST from debpartial_mirror import DisplayStatus class DownloadQueue(Queue.Queue): """ Implement a Queue without duplicated items. """ counter = 0 def _put(self, item): if item not in self.queue: self.queue.append(item) class DownloadFileHandler: def __init__(self, filename, mode, buffering = 0): self._filename = filename self._mode = mode self._buffering = buffering self._openned = False self.__fp = None def write(self, string): if not self._openned: self.__fp = open(self._filename, self._mode, self._buffering) self._openned = True self.__fp.write(string) def close(self): if self.__fp: self.__fp.close() self.__fp = None self._openned = False class Curl: buggy_curl = False def __init__(self, DisplayStatus, Queue): # Handle bug version of pycurl (versions pior 7.14 are buggy) major, minor, revision = pycurl.version_info()[1].split('.') if (int(major) + 0.01*int(minor) < 7.14) and not Curl.buggy_curl: print "WARNING: Due a bug in libcurl up to 7.13 we can't use resume support!" Curl.buggy_curl = True self._curl = pycurl.Curl() self._curl.setopt(pycurl.FOLLOWLOCATION, 1) self._curl.setopt(pycurl.MAXREDIRS, 5) self._curl.setopt(pycurl.NOSIGNAL, 0) self._curl.setopt(pycurl.CONNECTTIMEOUT, 30) self._curl.setopt(pycurl.PROGRESSFUNCTION, self.progress) self._curl.setopt(pycurl.NOPROGRESS, 0) self._curl.setopt(pycurl.FAILONERROR, 1) self._curl.setopt(pycurl.HTTP_VERSION, pycurl.CURL_HTTP_VERSION_1_0) self._curl.setopt(pycurl.VERBOSE, 0) self._curl.parent = self self._curl.callback = None self._curl.error_callback = None self._curl.filename = None self._fp = None self._startSize = 0 self.DisplayStatus = DisplayStatus self.Queue = Queue def __processDir(self): # in case it's a directory, we process to find all needed # objects and queue all. if self._fp != None and isinstance(self._fp, StringIO.StringIO): self._fp.seek(0) matches = re.compile( '', re.IGNORECASE | re.MULTILINE ).findall(self._fp.read()) for filen in matches: try: os.makedirs(os.path.dirname(self._curl.filename)) except OSError, (errno, msg): if errno != EEXIST: print "ERROR:", msg # (uri, destine, callback, error_callback, no_cache) self.Queue.put(( os.path.dirname(self._curl.url) + '/' + filen, os.path.dirname(self._curl.filename) + '/' + filen, self._curl.callback, self._curl.error_callback, self._curl.no_cache )) def set_target(self, url, filename, callback, error_callback, no_cache): self._curl.setopt(pycurl.URL, url) self._curl.url = url self._curl.callback = callback self._curl.error_callback = error_callback self._curl.filename = filename self._curl.setopt(pycurl.RESUME_FROM, 0) self._curl.no_cache = no_cache if self._fp is not None: try: self._fp.close() del self._fp # don't leave the files open except IOError: self._fp = None extra_header = [] if filename.endswith('/'): self._fp = StringIO.StringIO() else: self._curl.filename += ".partial" # We'll rename it back if is the same file if os.path.exists (filename): extra_header.append("if-modified-since:" + formatdate( os.path.getmtime(filename) )) os.rename (filename, self._curl.filename) if not Curl.buggy_curl and os.path.exists (self._curl.filename): #dowload interrupted, needs resume self._startSize = os.stat(self._curl.filename)[ST_SIZE] self._curl.setopt(pycurl.RESUME_FROM, self._startSize) self._fp = DownloadFileHandler(self._curl.filename, "ab") else : self._fp = DownloadFileHandler(self._curl.filename, "wb") self._curl.setopt(pycurl.WRITEFUNCTION, self._fp.write) if not no_cache: extra_header.append("Pragma:") if extra_header: self._curl.setopt(pycurl.HTTPHEADER, extra_header) def close(self): self.__processDir() self.callback() self._fp = None self._curl.close() def progress(self, download_t, download_d, upload_t, upload_d): #If we doesn't know how much data we will receive, return. if download_t == 0 or download_d == 0: return if self.DisplayStatus[self._curl.url] == None and self._curl.url[-1] != '/': self.DisplayStatus.start(self._curl.url, download_t + self._startSize) if self._curl.url[-1] != '/': self.DisplayStatus.update(self._curl.url, download_d + self._startSize) def callback(self): if not self._curl.filename: return # It hasn't set! #remove trailing .partial after download is completed if not isinstance(self._fp, StringIO.StringIO) and os.path.exists (self._curl.filename): self._fp.close() os.rename (self._curl.filename, split (self._curl.filename,".partial")[0]) else: self.__processDir() #exec user passed callback if self._curl.callback != None: self._curl.callback self._curl.callback = None def error_callback(self): if self._curl.error_callback != None: self._curl.error_callback self._curl.error_callback = None class DownloadFetcher(threading.Thread): def __init__(self, info, downloaders): # Add DisplayStatus object. # TODO: the DisplayStatus type should be set in the configuration file if info == "text": self.DisplayStatus = DisplayStatus.TextDisplayStatus() elif info == "log": self.DisplayStatus = DisplayStatus.LogDisplayStatus() elif info == "quiet": self.DisplayStatus = DisplayStatus.QuietDisplayStatus() else: self.DisplayStatus = info # Create the needed pycurl objects to manage the connections. self._multi = pycurl.CurlMulti() self._multi.handles = [] self._free = Queue.Queue() self.queue = DownloadQueue() for i in range(downloaders): curl = Curl(self.DisplayStatus, self.queue) self._multi.handles.append(curl) self.running = False map(lambda x: self._free.put(x), self._multi.handles) threading.Thread.__init__(self) def start(self): self.running = True threading.Thread.start(self) def run(self): total_handles = 0; while self.running: Download.lock.acquire() while self.queue.qsize() > 0: try: fetcher = self._free.get_nowait() url, filename, callback, error_callback, no_cache = self.queue.get_nowait() fetcher.set_target(url, filename, callback, error_callback, no_cache) self._multi.add_handle(fetcher._curl) total_handles += 1 except Queue.Empty: break Download.lock.release() #If all fetcher are free, we finished our job if self._free.qsize() == len (self._multi.handles): break # Run the internal curl state machine for the multi stack ret = self._multi.select(1.0) if ret == -1: print "Select returned -1" break while 1: ret, num_handles = self._multi.perform() if num_handles < total_handles: #One or more fetcher finished download, we call #respective curl callbacks, and free the fetcher itself" num_failed, ok_list, err_list = self._multi.info_read() for curl in ok_list: curl.parent.callback() self._multi.remove_handle(curl) self._free.put(curl.parent) for curl, errno, errmsg in err_list: self.DisplayStatus.errored(curl.url, errmsg) curl.parent.error_callback() self._multi.remove_handle(curl) self._free.put(curl.parent) #update the number of actual fetcher total_handles = num_handles if ret != pycurl.E_CALL_MULTI_PERFORM: break Download.lock.acquire() while self.queue.qsize()>0: self.queue.get_nowait() Download.lock.release() try: while len(self._multi.handles) > 0: curl = self._multi.handles.pop() try: self._multi.remove_handle(curl._curl) except: pass curl.close() del curl self._multi.close() except: pass self.running = False self.DisplayStatus.clean() class Download: """ A Download queue """ # Fetcher to use d_fetchers_list = {} # Our lock handler lock = threading.Lock() def __init__(self, info="text", max=3, name=None): self.__max = max self.__info = info self.__name = name self.d_fetcher = None if self.__name == None: self.__name = str(self).split(" ")[-1].rstrip(">") if not isinstance(self.__name,str): raise TypeError, "The name passed, was not a string" elif self.__name in Download.d_fetchers_list: raise KeyError, "A Downloaded fetcher with name '%s' alrady exists" % self.__name def set_name (name): """Set a name for the queue""" print "This function should not be called" if not self.__name: self.__name = name def request (self, uri, destine, callback = None, error_callback = None, no_cache = False): """Add an uri to the queue. The uri is copied on the local path destine. Optionally, it is possible to pass two callbacks functions that are executed once download is terminated (callback) or if it failed (error_callback)""" # Create the needed d_fetcher. self.lock.acquire() if self.d_fetcher == None or not self.d_fetcher.running: self.d_fetcher = DownloadFetcher(self.__info, self.__max) self.d_fetcher.setDaemon(True) self.d_fetcher.start() Download.d_fetchers_list[self.__name]= self.d_fetcher self.d_fetcher.queue.put((uri, destine, callback, error_callback, no_cache)) self.lock.release() def get (self, uri, destine, callback = None, error_callback = None, no_cache = False): self.request (uri, destine, callback, error_callback, no_cache) def get_name (self): """Return the name of this queue""" return self.__name def get_names (self): """Return a list the current queues""" return Download.d_fetchers_list.keys() def wait(self, name): """Wait for all files in the queue 'name' to be downloaded """ try: self._wait(Download.d_fetchers_list[name]) self.lock.acquire() Download.d_fetchers_list.pop(name) d_fetcher = None self.lock.release() except KeyError: print "No thread with given name" def _wait(self, d_fetcher): d_fetcher.join(0.5) while d_fetcher and d_fetcher.running: d_fetcher.join(0.5) def wait_all(self): """Wait for all the files in all the queues are downloaded.""" # We need to use timeout to handle signals while 1: try: i = Download.d_fetchers_list.iterkeys() n = i.next() f = Download.d_fetchers_list[n] f.running = False self.wait(n) except StopIteration: break def wait_mine(self): """Wait for all files in this queue to be downloaded.""" self.wait(self.__name)