#!/usr/bin/python
#
# Copyright (C) 2007-2008 Julian Andres Klode <jak@jak-linux.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

'''Provide the FileManager class which creates a partial mirror and provides
useful informations about the contents of it.'''

from __future__ import with_statement
import gzip, bz2, os, re, fnmatch, sys, cPickle, time, gc

import urlgrabber, apt_pkg
from debian_bundle.debfile import DebFile, DebError

__all__ = ['FileManager']
A = open('README.icons', 'w')
print >> A, (
'''Info:
 This file contains multiple sections, formatted RFC822-like.
 
 Each sections has the following required keys:
 	File - The name of the icon file in /usr/share/app-install/icons/
 	Location - The path of the icon inside the package
 	Package - The name of the package the file has been extracted of
 
 It may also contain the following optional keys:
 	Orig-Location: The initial filename, without following symlinks, etc.
 	Orig-Package: The initial package, without following symlinks.
'''
)

class FString(str):
	'''A custom string class'''

	pkg, requester = None, None

	def add(self, fobj):
		'''Add the given fileobj'''
		self.pkg = fobj.pkg
		self.requester = fobj.requester
		return self

def mkdir_for_file(fname):
	'''Create a directory for the given filename'''
	try:
		os.makedirs(os.path.dirname(fname))
	except OSError:
		return False
	else:
		return True

# Regular expression matching interesting fields in the Packages files
PKG = re.compile('(Package:.*|Filename:.*\n)').findall
FILETYPES = r'''(usr/share/(?: # Limit to files in /usr/share/
				applications/.*\.desktop| # Add desktop files
				gstreamer-.*/plugin-info/.*\.supported| # Add Gstreamer Plugin info
				(?:[^/]+/[^/]+|icons/(?:hicolor|gnome|crystalsvg)/.*|.+/icons/.+|icons/[^/]*|pixmaps/.*|.*/logos/|.*logo[^/]+)(?:png|svg|tiff|xpm) # Add icons
				)|usr/lib/GNUstep/.*/Resources/[^/]+)\s+(\S+) # Match the package(s)'''
ALL2 = re.compile(FILETYPES, re.X).finditer
	
# A list of all architectures. Most important one comes first.
ARCHITECTURES =  ('amd64', 'i386', 'alpha', 'arm', 'armel', 'hppa', 'ia64',
				  'mips', 'mipsel', 'powerpc', 's390', 'sparc')

def get_path(*args):
	'''Join arguments and normalize the resulting path'''
	return os.path.normpath(os.path.join(*args))

class FileManager(object):
	'''Provides access to every file contained in at least one package on a mirror.

	This class uses cPickle to speed up testing.'''

	def __init__(self, local, mirror, suite='testing', components=('main',), archs=None):
		if archs is None:
			archs = ARCHITECTURES
		if os.path.exists('files.pkl'):
			print 'I: Loading Contents from cPickle cache...'
			with open('files.pkl', 'rb') as fobj:
				self._files = cPickle.load(fobj)
		else:
			self._files  = {}
		self._objs   = {}
		self._packages = {}
		self._mirror = mirror
		self._icons  = {}
		self._seen   = {}
		self._archs  = archs
		self._comps  = components
		self._pkgset = set()
		base		 = os.path.join(mirror, 'dists', suite)
		self._cont   = os.path.join(base, 'Contents-%s.gz')
		self._pkgf   = os.path.join(base, '%s', 'binary-%s', 'Packages.bz2')
		self.local   = local
		self.lpkgf   = os.path.join(local, 'dists', suite, '%s', 'binary-%s', 'Packages')
		self.lcont   = os.path.join(local, 'dists', suite, 'Contents-%s.gz')
		mkdir_for_file(self.lcont)

		for arch in self._archs:
			self._arch = arch
			print 'I: Initializing Architecture', arch + '\t-',
			sys.stdout.flush()
			if not arch in self._files:
				self._add_files(arch)
			for packages in self.files.itervalues():
				for pkg in packages:
					self._pkgset.add(pkg)

			self._add_packages(arch)

			for key in self.files.keys():
				i = self._files[arch][key]
				for pkg in i.copy():
					if not pkg in self._packages[arch]:
						i.remove(pkg)
				if not i:
					del self._files[arch][key]

			self.search_dirs = ("usr/share/icons/hicolor/48x48",
                 "usr/share/icons/hicolor/64x64",
                 "usr/share/icons/hicolor/128x128",
                 "usr/share/icons/hicolor/32x32",
                 "usr/share/icons/hicolor/scalable",
                 "usr/share/icons/gnome/48x48",
                 "usr/share/icons/gnome/64x64",
                 "usr/share/icons/gnome/128x128",
                 "usr/share/icons/gnome/32x32",
                 "usr/share/icons/gnome/scalable",
                 "usr/share/icons/crystalsvg/48x48",
                 "usr/share/icons/crystalsvg/64x64",
                 "usr/share/icons/crystalsvg/128x128",
                 "usr/share/icons/crystalsvg/32x32",
                 "usr/share/icons/crystalsvg/scalable",
                 "usr/share/pixmaps",
                 "usr/share/icons/hicolor/22x22",
                 "usr/share/icons/hicolor/16x16",
                 "usr/share/icons/gnome/22x22",
                 "usr/share/icons/gnome/16x16",
                 "usr/share/icons")



			self._icons[arch] = re.findall('usr/share/(?:icons|pixmaps)/'
			                               '.*\.(?:png|svg|xpm)',
			                               '\n'.join(self._files[arch]))
			self._icons[arch] = sorted(self._icons[arch], key=self.search_dir)
			self._pkgset.clear()
			print

		# Cache the values
		if not os.path.exists('files.pkl'):
			with open('files.pkl', 'wb') as fobj:
				cPickle.dump(self._files, fobj, -1)

	def search_dir(self, fname):
		cnt=0
		for i in self.search_dirs:
			if fname.startswith(i):
				return cnt
			cnt+=1

	def _add_files(self, arch):
		'''Build a dictionary of File->Packages based on the Contents-ARCH.gz file'''
		self._files[arch] = {}
		print 'Contents',
		sys.stdout.flush()
		if not os.path.exists(self.lcont % arch):
			urlgrabber.urlgrab(self._cont % arch, self.lcont % arch)

		fobj = gzip.GzipFile(self.lcont % arch)
		try:
			cont = fobj.read()
		finally:
			fobj.close()

		lines = ALL2(cont)
		for line in lines:
			self._files[arch][line.group(1)] = set(pkg.split("/")[-1] for pkg in line.group(2).split(','))

	def _add_packages(self, arch):
		'''Add all packages from the Packages file'''
		self._packages[arch] = {}
		for comp in self._comps:
			print comp,
			sys.stdout.flush()
			tgt = self.lpkgf % (comp, arch)
			if not os.path.exists(tgt):
				mkdir_for_file(tgt)
				with open(tgt, 'w') as tgtobj:
					# Write all interesting fields into a decompressed file.
					data = urlgrabber.urlread(self._pkgf % (comp, arch))
					data = bz2.decompress(data)
					data = '\n'.join(PKG(data))
					tgtobj.write(data)
				del tgtobj

			with open(tgt) as fobj:
				parser = apt_pkg.ParseTagFile(fobj)
				while parser.Step() == 1:
					pkg = parser.Section['Package']
					#if not pkg in self._pkgset:
					#	continue
					self._packages[arch][parser.Section['Package']] = parser.Section['Filename']

	def _set_arch(self, arch):
		'''Set the current architecture the class works with.'''
		if not arch in self._archs:
			raise ValueError, 'The architecture %s is not supported' % arch
		self._arch = arch

	def _get_package_data(self, pkg,):
		'''Download the package and return TarObj. This file
		can not be closed. The created file object is cached in self._objs.'''
		if not pkg in self._objs:
			local  = os.path.join(self.local,  self._packages[self._arch][pkg])
			if not pkg in self._seen:
				self._seen[pkg] = 1
			else:
				self._seen[pkg] += 1
			if os.path.exists(local):
				print 'O:', '%02d %-20.20s %-20.20s' % (self._seen[pkg], pkg, pkg)
			else:
				print 'F:', '   %-20.20s %-20.20s %s' % (pkg, pkg, local)
				mkdir_for_file(local)
				remote = os.path.join(self._mirror, self._packages[self._arch][pkg])
				local  = urlgrabber.urlgrab(remote, local)

			self._objs[pkg] = DebFile(local).data.tgz()


		self._objs[pkg].time = time.time()

		if len(self._objs) > 20:
			# Close all files except the current one
			for i in sorted(self._objs.keys(), key=lambda x: self._objs[x].time)[:50]:
				self._objs[i].close()
				del self._objs[i]
			# Collect the garbage
			gc.collect()
		return self._objs[pkg]

	def filter(self, pat_, files=None, *pats):
		'''Return a dict(pkg1=list(files), ..) with all packages
		with files matching the shell pattern pat. See fnmatch.filter()'''
		if files is None:
			files = self._files[self._arch]
		ret = {}
		pats = list(pats)
		pats.insert(0, pat_)
		for pat in pats:
			for fname in fnmatch.filter(files, pat):
				for pkg in self._files[self._arch][fname]:
					try:
						ret[pkg].append(fname)
					except KeyError:
						ret[pkg] = [fname]
		return ret

	@staticmethod
	def _normalize_member(fname):
		'''try (not so hard) to obtain a member file name in a form relative
		to the .tar.gz root and with no heading '.' '''
		if fname.startswith('./'):
			fname = fname[2:]
		elif fname.startswith('/'):
			fname = fname[1:]
		return fname

	def _extract(self, tarobj, fname):
		'''Return a member from a tarobj, do not follow links'''
		fname = self._normalize_member(fname)
		try:
			return tarobj.getmember('./' + fname)
		except KeyError:
			return tarobj.getmember(fname)

	def _getmember(self, fname, requester=None):
		'''Return a member, following all symlinks'''
		fname    = self._normalize_member(fname)
		try:
			packages = self._files[self._arch][fname] #KeyError
		except KeyError:
			if not requester:
				raise KeyError
			package = requester
		else:
			for package in packages:
				break
			if 'kdebase-data' in packages and 'kde' in self.packages.get(requester, ''):
				package = 'kdebase-data'
			if requester and len(packages) > 1:
				if requester.endswith('-data'):
					requester = requester[:-5]
				elif requester.endswith('-common'):
					requester = requester[:-7]
				if requester in packages:
					package = requester
				elif (requester + '-data') in packages:
					package = (requester + '-data')
				elif (requester + '-common') in packages:
					package = (requester + '-common')

		try:
			tarobj  = self._get_package_data(package)
		except DebError:
			raise KeyError

		tarinfo = self._extract(tarobj, fname) #KeyError
		while tarinfo.issym() or tarinfo.islnk():
			fname = get_path(os.path.dirname(fname), tarinfo.linkname)
			tarinfo = self._extract(tarobj, fname) #KeyError

		tarobj.pkg = package
		return tarobj, tarinfo

	def extract_file(self, fname, tgt, requester=None):
		'''Like TarFile.makefile()'''
		if not os.path.exists(tgt):
			mkdir_for_file(tgt)
			tarobj, member = self._getmember(fname, requester) #KeyError
			## START_INFO
			sys.stdout = A
			req_src, req_pkg = self._normalize_member(fname), requester
			rel_src, rel_pkg = self._normalize_member(member.name), tarobj.pkg
			print 'File:', os.path.basename(tgt)
			print 'Location:', rel_src
			print 'Package:', rel_pkg
			if rel_src != req_src:
				print 'Orig-Location:', req_src
			if rel_pkg != req_pkg:
				print 'Orig-Package:', req_pkg
			print
			#A.flush()
			sys.stdout = sys.__stdout__
			## END_INFO

			print 'X:    %-20.20s %-20.20s %-40.40s %s' % (req_pkg, rel_pkg,  rel_src, os.path.basename(tgt))
			tarobj.makefile(member, tgt)
		return tgt

	def get_file(self, fname, requester=None):
		'''Return a file object corresponding to a given file name.'''
		tarobj, tarinfo = self._getmember(fname, requester) #KeyError
		ret = tarobj.extractfile(tarinfo)
		ret.pkg = tarobj.pkg
		ret.requester = requester
		print 'G:    %-20.20s %-20.20s %s' % (requester, tarobj.pkg,  fname)
		return ret

	def get_content(self, fname, requester=None):
		'''Same as get_file().read()'''
		try:
			fobj = self.get_file(fname, requester)
		except KeyError:
			return
		try:
			return FString(fobj.read()).add(fobj)
		finally:
			fobj.close()

	@property
	def archs(self):
		'''An iterator over all supported architectures, which sets the
		current architecture and afterwards yields it.'''
		for i in self._archs:
			self._arch = i
			yield self._arch

	@property
	def packages(self):
		'''A dict(pkg=str(pkgfile), ..) for the current architecture'''
		return self._packages[self._arch]

	@property
	def files(self):
		'''A dict(file=list(packages), ..) for the current architecture'''
		return self._files[self._arch]

	@property
	def icons(self):
		'''A dict(file=list(packages), ..) for the current architecture'''
		return self._icons[self._arch]

	def arch(self):
		'''The current architecture. (writeable)'''
		return self._arch

	arch = property(arch, _set_arch)
