#
# FILE            $Id: doc_collect.py,v 1.13 1998/05/19 19:40:48 dlarsson Exp $
#
# DESCRIPTION     Collect info about modules, and generate Manual page.
#
# AUTHOR          SEISY/LKSB Daniel Larsson
#
# Permission to use, copy, modify, and distribute this software and its
# documentation for any purpose and without fee is hereby granted,
# provided that the above copyright notice appear in all copies and that
# both that copyright notice and this permission notice appear in
# supporting documentation, and that the name of ABB Industrial Systems
# not be used in advertising or publicity pertaining to
# distribution of the software without specific, written prior permission.
#
# ABB INDUSTRIAL SYSTEMS DISCLAIMS ALL WARRANTIES WITH REGARD TO
# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
# FITNESS, IN NO EVENT SHALL ABB INDUSTRIAL SYSTEMS BE LIABLE
# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
# 
# Copyright (C) ABB Industrial Systems AB, 1996
# Unpublished work.  All Rights Reserved.
#
# HISTORY:
# $Log: /Gendoc/doc_collect.py $
# 
# 3     98-05-25 22:13 Daniel
# Removed obsolete 'codehack' module use.
# Revision 1.13  1998/05/19 19:40:48  dlarsson
# Merged in changes from home.
#
# 
# 2     98-04-01 14:11 Daniel
# Added Python 1.5 features to deduce the module from a class
# (thanks to mcfletch!).
# 
# 1     98-04-01 13:15 Daniel
# Revision 1.12  1998/02/05 18:13:47  dlarsson
# Added 'BuiltinFunctionCollector' and initial support for "class like" types,
# such as Jim Fulton's ExtensionClass.
#
# Revision 1.11  1998/02/04 17:57:49  dlarsson
# 'whichmodule' now returns a module object, not its name. This caused gendoc
# to skip all classes.
#
# Revision 1.10  1998/01/19 11:37:48  dlarsson
# Fixes for Python 1.5:
# - pickle.whichmodule interface changed
# - sys.builtin_module_names is now a tuple (not a list)
#
# Revision 1.9  1996/09/04 14:35:18  omfadmin
# Removed setext code.
#
# Revision 1.8  1996/08/26  20:34:24  omfadmin
# Can now generate doc for private things (name begins with single
# underscore) (Robin Friedrich).
#
# Revision 1.7  1996/07/12  15:51:29  omfadmin
# Oops, forgot to remove debug prints.
#
# Revision 1.6  1996/07/12  15:46:18  omfadmin
# Added alias do manual pages.
#
# Revision 1.5  1996/07/11  16:40:15  omfadmin
# Moved some docstring code to the new docutil module. This is
# now shared between parser and import mode
#
# Revision 1.4  1996/07/10  03:29:44  omfadmin
# Added links from inherited methods in synopsis to description.
#
# Revision 1.3  1996/07/10  03:00:34  omfadmin
# Lots of improvements. Important things:
#
# - Better doc generation of inherited methods.
# - Now skips aliases (Should probably do something more sensible).
# - __author__ is special.
#
# Revision 1.2  1996/06/13  17:56:48  omfadmin
# Removed the restrictive copyright.
#
# Revision 1.1  1995/03/20  21:46:25  dlarsson
# Initial revision
#
#

"""Collect information about a module by traversing its dictionary,
and traverse the objects found there (classes, functions)."""

__author__ = "Daniel Larsson, dlarsson@sw.seisy.abb.se"
__version__ = '$Revision: 3 $'

import ManualPage
import regex, string, docutil
from types import *

include_private_methods = 0

# Verbosity level
VERBOSE = 0



# *** This is of course overly simplified: ***
# Everything that contains an equal sign, or starts
# with a comment character ('#') is a code segment.
# Those that don't aren't.
from regex_syntax import RE_SYNTAX_EGREP
old_syntax = regex.set_syntax(RE_SYNTAX_EGREP)
_code	  = regex.compile('[a-zA-Z0-9]\(|=|^['+string.whitespace+']*# ')
regex.set_syntax(old_syntax)


def _getdoc(obj):
    if hasattr(obj, '__doc__') and obj.__doc__:
	return docutil.stripleadingtabs(obj.__doc__)
    else:
	return ''

def _funline(fun):
    "Return the line nr where 'fun' is defined."
    import linecache, regex
    co = fun.func_code
    filename = co.co_filename
    try:
        lineno = co.co_firstlineno
    except AttributeError:
	import codehack
        lineno = codehack.getlineno(co)
    line = linecache.getline(filename, lineno)
    line = line[:string.find(line, ':')]
    return line[regex.match('['+string.whitespace+']*', line):]

def _isClass(cls):
    """Determines if 'cls' is a class.

    A class is either a standard Python class object, or an
    extension type mimicking the class protocol."""
    try:
	cls.__bases__
	return 1
    except:
	return 0

_classmap = {}

def whichmoduleobj(cls):
    """Figure out the module in which a class occurs.
    
    Search sys.modules for the module.
    Cache in classmap.
    Return a module object.
    If the class cannot be found, raise exception.
    """
    ### Altered by mcfletch 98.03.31 to use 1.5 stuff
    if hasattr( cls, '__module__' ):
	try:
	    import sys
	    return sys.modules[ cls.__module__ ] # is this always imported???
	except:
	    pass
    ### End mcfletch alterations

    if _classmap.has_key(cls):
	return _classmap[cls]
    #
    # Grab a function object and look in its code object for the co_filename
    # attribute to determine the module name
    funcs = filter(lambda member: type(member) == FunctionType, cls.__dict__.values())
    if funcs:
	filename = funcs[0].func_code.co_filename
	import os
	module = os.path.splitext(os.path.basename(filename))[0]
	d = {}
	exec 'import %s' % module in d
	_classmap[cls] = d[module]
	return d[module]
    #
    import sys
    clsname = cls.__name__
    for name, module in sys.modules.items():
	if module and module.__name__ != '__main__' and \
	   hasattr(module, clsname) and \
	   getattr(module, clsname) is cls:
	    break
    else:
	raise AttributeError, 'Cannot find module for class %s' % clsname
    _classmap[cls] = module
    return module


def all_methods(clazz, methoddict=None):
    """Return all methods of a class.

    Returns a dictionary ~class:[methods]~, containing all
    inherited methods. Overridden methods are removed from
    base classes.

    The class argument is not a Python class, but a ClassCollect
    instance.
    """

    if methoddict is None:
	methoddict = {}
    for base in clazz._bases_:
	all_methods(base, methoddict)

    methods = filter(lambda k: k.__class__ == FunctionCollect, clazz._children_.values())
    methods = map(lambda fun: fun.name(), methods)

    # Remove overridden methods in all base classes
    for mlist in methoddict.values():
	for m in methods:
	    if m in mlist:
		mlist.remove(m)

    # Add myself to dictionary
    methoddict[clazz] = methods
    return methoddict
    



# To determine the type of a variable, we do `type(var)`
# to convert the type to a string. Then we extract the
# type string with this regular expression
_type		= regex.compile("<type '\\(.*\\)'>")
_ignore_value	= regex.compile("<[A-Za-z]+ object at [0-9a-f]+>")

class ItemCollect:
    """Base class for document collectors."""

    def __init__(self, obj, name=None):
	"""Initialize collector with object and object name"""
	self._obj_ = obj
	if name:
	    self._name_ = name
	else:
	    self._name_ = obj.__name__
	self._module_ = None
	if hasattr(obj, '__doc__'):
	    doc = obj.__doc__
	else:
	    doc = ''
	self._oneliner_, self._doc_ = docutil.split_doc(doc)

    def collect(self):
	"""Collect doc info"""
	pass

    def class_child(self):
	"""Inform object it is a class child.
	
	If it is a child, it should not split the oneliner doc
	from the rest of the doc string, so we merge them back
	again. This is a bit of a kludge.
	"""
	if self._oneliner_:
	    if self._doc_:
		self._doc_ = self._oneliner_+'\n\n'+self._doc_
	    else:
		self._doc_ = self._oneliner_

    def has_doc(self):
	"""Does the object have a document?"""
	return self._doc_ != ''

    def name(self):
	"Returns the name of the object."
	return self._name_

    def type(self):
	"Returns the type string for the object."
	type_str = `type(self._obj_)`
	if _type.search(type_str) != -1:
	    return _type.group(1)
	else:
	    return ''

    def short_head(self):
	"A short head is the type string + the name."
	return self.type()+' '+self.name()

    def head(self):
	"Returns 'short_head' + the object's value."
	value = `self._obj_`

	# If the value matches the _ignore_value regular expression,
	# don't bother writing it.
	if _ignore_value.match(value) != -1:
	    value = '...'
	elif len(value) > 30:
	    value = value[:26]+' ...'
	return self.short_head()+' = '+value

    def index_txt(self):
	"""The index text is used to generate markers for indices."""
	return self.name()+', '+self.type()

    def module(self):
	"""Return this object's module (not 100% accurate)."""
	return self._module_

    def set_module(self, module):
	"""Set this object's module."""
	self._module_ = module

    def write_doc(self, manpage, sect = None):
	"""Generate documentation for this object.
		
	**TODO**:
		
	It would be nice if I could recognize definition lists
	somehow, such as this one:
		
	an item		the definition for item
		
	another item	a long definition spanning more than one
	                line.

	Maybe also recognize something like:
		
	an item
	  a definition
        """
	docutil.docregex_parse(manpage, sect, self._doc_)


    def write(self, manpage, lvl=0):
	"Generate a document for this object under the DESCRIPTION section."

	# Put a section inside 'DESCRIPTION' describing this item
	sect = manpage.section('DESCRIPTION')
	# MARKER HERE
	sect = manpage.section(self.head(),
			       sect, self.index_txt(),
			       self.module().index_txt(),
			       search=0) # Don't search for existing section
	self.write_doc(manpage, sect)
	return ''


class InstanceCollect(ItemCollect):
    def has_doc(self):
	"Instances 'inherit' their class' documentation. We skip that here."
	return 0

    def type(self):
	"The type string of an instance is its class' name."
	return self._obj_.__class__.__name__


class ClassCollect(ItemCollect):
    "Collect information about a class"
    SKIP = ['__doc__', '__builtins__']
    
    def __init__(self, classobj, name):
	ItemCollect.__init__(self, classobj, name)

    def collect(self):
	dict = self._obj_.__dict__
	keys = dict.keys()
	keys.sort()
	self._children_ = {}

	if include_private_methods:
	    # this will always fail to inhibit since names can't start with *
	    inhibitor = '*'
	else:
	    # Inhibit names beginning with a single underscore
	    inhibitor = '_'
	for member in keys:
	    if (member[0] != inhibitor or member[1] == '_') and member not in self.SKIP:
		if VERBOSE > 0:
		    print 'Class %s: Adding %s' % (self.name(), member)
		doc = collector(dict[member], member, self.module())
		if doc:
		    self._children_[member] = doc
		    # Inform object it is a class child.
		    # This has to do with how the doc string
		    # is produced (see doc in
		    # ItemCollect.class_child).
		    doc.class_child()

	self._bases_ = []
	for base in self._obj_.__bases__:
	    module_collector = collector(whichmoduleobj(base))
	    self._bases_.append(collector(base, base.__name__, module_collector))

    def head(self):
	"Return class head"
	head = self.short_head()
	if self._bases_:
	    from string import joinfields
	    bases = joinfields(map(lambda i: i.name(), self._bases_), ', ')
	    head = head + '(' + bases +')'
	return head


    def write(self, manpage=None, lvl=1):
	"Generate a document for this class"

	# Create a new manual page for me.
	manpage = ManualPage.ManualPage(self.module().name()+'-'+self.name())
	manpage.set_author(self.module().author)

	# Figure out which are the inherited methods
	inherited = all_methods(self)

	# MARKER HERE
	title = 'Class '+self.name()
	if self._oneliner_:
	    title = title+' - '+self._oneliner_
	manpage.title(title, self.index_txt(), self.module().index_txt())
	synopsis = manpage.section('SYNOPSIS')
	manpage.code('import '+self.module().name())

	sect = manpage.section('DESCRIPTION')
	self.write_doc(manpage)
#	self.children_write(manpage, sect)
	self.children_write(inherited, manpage, sect)

	# Generate SYNOPSIS part
	code = self.head()+'\n'+self.children_synopsis(manpage, lvl)

	# Remove myself from inherited dict
	del inherited[self]

	# Generate synopsis for inherited methods
	code = code + self.inherited_synopsis(self._obj_, manpage, inherited, lvl)
	manpage.code(code, synopsis)

	# Do I have any aliases?
	aliases = self.module()._aliases_
	if aliases.has_key(self.name()):
	    code = ''
	    for alias in aliases[self.name()]:
		code = code + 'alias %s = %s\n' % (alias, self.name())
	    manpage.code(code, synopsis)

	sect = manpage.section('SEE ALSO')
	manpage.paragraph(manpage.reference(self.module().name()+'_overview',
					    self.module().name()),
			  sect)

	for base in self._bases_:
	    manpage.paragraph(manpage.reference(base.module().name()+'-'+base.name(),
						base.name()),
			      sect)

	return manpage


    def children_write(self, inherited, manpage, sect):
	"Generate docs for inherited children."
	all = []
	for base, members in inherited.items():
	    all = all + map(lambda n, base=base: base._children_[n], members)

	all.sort(lambda x, y: cmp(x.name(), y.name()))
	for child in all:
	    if child.has_doc():
		# MARKER HERE
		subsect = manpage.section(child.head(),
					  sect,
					  child.index_txt(),
					  self.index_txt(),
					  search=0)
		child.write_doc(manpage, subsect)

		# if this is a class, do recursive generation
		if _isClass(child._obj_):
		    child.children_write(all_methods(child), manpage, subsect)


    def children_synopsis(self, manpage, lvl):
	"Generate synopsis for children."
	code = ''
	keys = self._children_.keys()
	keys.sort()
	for key in keys:
	    child = self._children_[key]
	    if child.has_doc():
		child_txt = manpage.reference(child.head(), child.head())
	    else:
		child_txt = child.head()
	    code = code+'  '*lvl+child_txt+'\n'
	    # if this is a class, do recursive generation
	    if _isClass(child._obj_):
		code = code+child.children_synopsis(manpage, lvl+1)
	return code

    def inherited_synopsis(self, clazz, manpage, inherited, lvl):
	code = ''
	for base, members in inherited.items():
	    members.sort()
	    if members:
		code = code + '\n' + '  '*lvl + \
		       '# Methods inherited by %s from %s\n' % (clazz.__name__, base._obj_.__name__)
		for method in members:
		    child = base._children_[method]
		    if child.has_doc():
			child_txt = manpage.reference(child.head(), child.head())
		    else:
			child_txt = child.head()

		    code = code + '  '*lvl + child_txt + '\n'

	return code


class FunctionCollect(ItemCollect):
    def __init__(self, funobj, name):
	ItemCollect.__init__(self, funobj, name)

    def head(self):
	"Return function head"
	return _funline(self._obj_)
		
    def write(self, manpage=None):
	# Generate new manual page
	manpage = ManualPage.ManualPage(self.module().name()+'-'+self.name())
	manpage.set_author(self.module().author)

	# Second argument indicates this is an index
	# Third argument indicates it is a nested index
	# MARKER HERE
	title = 'Function '+self.name()
	if self._oneliner_:
	    title = title+' - '+self._oneliner_
	manpage.title(title, self.index_txt(), self.module().index_txt())
	manpage.section('SYNOPSIS')
	manpage.code('import '+self.module().name())
	manpage.code(self.head())
	sect = manpage.section('DESCRIPTION')
	self.write_doc(manpage, sect)
	sect = manpage.section('SEE ALSO')
	manpage.paragraph(manpage.reference(self.module().name()+'_overview',
					    self.module().name()),
			  sect)
	return manpage

class BuiltinFunctionCollect(ItemCollect):
    def __init__(self, funobj, name):
	ItemCollect.__init__(self, funobj, name)

    def head(self):
	"Return function head"
	if self._oneliner_:
	    return self._oneliner_
	else:
	    return "built-in function " + self.name()

    def write(self, manpage=None):
	# Generate new manual page
	manpage = ManualPage.ManualPage(self.module().name()+'-'+self.name())
	manpage.set_author(self.module().author)

	# Second argument indicates this is an index
	# Third argument indicates it is a nested index
	# MARKER HERE
	title = 'Built-in function '+self.name()
	if self._oneliner_:
	    title = title+' - '+self._oneliner_
	manpage.title(title, self.index_txt(), self.module().index_txt())
	manpage.section('SYNOPSIS')
	manpage.code('import '+self.module().name())
	manpage.code(self.head())
	sect = manpage.section('DESCRIPTION')
	self.write_doc(manpage, sect)
	sect = manpage.section('SEE ALSO')
	manpage.paragraph(manpage.reference(self.module().name()+'_overview',
					    self.module().name()),
			  sect)
	return manpage

class ModuleCollect(ItemCollect):
    SKIP = ['__doc__', '__name__', '__builtins__', 'test']

    def __init__(self, module, name):
	ItemCollect.__init__(self, module)
	self.author = None
	self._module_refs_ = []
	self._classes_ = []
	self._functions_ = []
	self.__builtinFuns = []
	self._others_ = []
	self._aliases_ = {}
	self.__isBuiltin = not module.__dict__.has_key('__file__')
	from os import path
	self.__inPython = not self.__isBuiltin and \
			  path.splitext(module.__file__)[1] not in ('.py', '.pyw')
	self._child_map_ = { ItemCollect:       self._others_,
			     InstanceCollect:   self._others_,
			     FunctionCollect:   self._functions_,
			     ClassCollect:      self._classes_,
			     BuiltinFunctionCollect: self.__builtinFuns }

    def collect(self):
	import sys
#	from pickle import whichmodule
	dict = self._obj_.__dict__

	# Keep track of collected objects so we can
	# names referring to the same object
	collected_objects = []

	keys = dict.keys()
	keys.sort()
	for member in keys:
	    value = dict[member]
	    if VERBOSE:
		print "Found", value, "(%s)" % type(value)
	    if type(value) == ModuleType:
		if member not in tuple(sys.builtin_module_names) + ('__',):
		    self._module_refs_.append(value)
	    elif member[0] == '_' and  member[1] != '_':
		if VERBOSE > 1:
		    print 'Module %s: Skipping private %s' % (self.name(), member)
	    elif member in self.SKIP:
		if VERBOSE > 1:
		    print 'Module %s: Skipping %s (in SKIP list)' % (self.name(), member)
	    elif _isClass(value) and \
		 whichmoduleobj(value).__name__ != self.name():
		if VERBOSE > 1:
		    print 'Module %s: Skipping %s (class not defined in module)' % (self.name(), member)
	    elif member == '__author__':
		self.author = value

	    elif member in collected_objects:
		continue
			
	    else:
		if VERBOSE > 0:
		    if _isClass(value):
			print 'Module %s: Adding %s (%s)' % (self.name(),
							     member,
							     whichmoduleobj(value))
		    else:
			print 'Module %s: Adding %s' % (self.name(),
								member)
		collected_objects.append(member)
		collect = None # Aliases have preallocated collectors
		try:
		    name = value.__name__
		    if name != member and type(value) != type(type('')):
			# It must be an alias!
			if VERBOSE > 0:
			    print 'Module %s: Adding alias %s for %s' % (self.name(),
									 member,
									 name)
			if not self._aliases_.has_key(name):
			    self._aliases_[name] = []
			self._aliases_[name].append(member)
		except AttributeError:
		    name = member
		doc = collector(value, name, self, collect)
		if doc and name == member:
		    self._child_map_[doc.__class__].append(doc)

    def write(self, manpage=None):
	"Generate a set of documents for this module."

	# Generate new manual page
	manpage = ManualPage.ManualPage(self.name()+'_overview')

	manpage.set_author(self.author)

	# MARKER HERE
	title = 'Module '+self.name()
	if self._oneliner_:
	    title = title+' - '+self._oneliner_
	manpage.title(title, self.index_txt())
	synopsis = manpage.section('SYNOPSIS')

	# Write module description
	sect = manpage.section('DESCRIPTION')
	see_also = manpage.section('SEE ALSO')
	self.write_doc(manpage, sect)

	# Let children generate doc. They can either generate a new
	# document, or add things to the module document.
	fun = lambda rest, next, \
	      f=self.synopsis, sect=synopsis, manpage=manpage: \
	      rest+f(next, manpage, sect)

	children = [('Classes', self._classes_),
		    ('Functions', self._functions_ + self.__builtinFuns),
		    ('Variables', self._others_)]

	pages = reduce(fun, children, [])

	module_refs = ''
	for module in self._module_refs_:
	    module_refs = module_refs + \
			  manpage.reference(module.__name__ + '_overview',
					    module.__name__) + '\n'
	if module_refs:
	    manpage.code(module_refs, see_also)

	return (manpage, pages)

    def synopsis(self, children, manpage, sect):
	title, child_list = children
	code = ''
	files = []
	for child in child_list:
	    ref=child.head()
	    if child.has_doc() or child.__class__ in [ClassCollect]:
		file = child.write(manpage)
		if file:
		    ref = manpage.reference(self.name()+'-'+child.name(),
					    child.head())
		    files.append(file)
	    elif VERBOSE > 1:
		print 'Module %s: No doc string for %s' % (self.name(), child.name())
	    code = code+ref+'\n'
	if code:
	    code = '# '+title+'\n'+code
	    manpage.code(code, sect)
	return files
		

_doctype_map = {}
_doctype_map[ClassType]		= ClassCollect
_doctype_map[FunctionType]	= FunctionCollect
_doctype_map[BuiltinFunctionType]=BuiltinFunctionCollect
_doctype_map[ModuleType]	= ModuleCollect
_doctype_map[InstanceType]	= InstanceCollect


# Dictionary of all collectors available
collectors = {}

def collector(obj, name=None, module=None, collect=None):
    global collectors
    if collect:
	collect.set_module(module)
	collect.collect()
	return collect

    if VERBOSE > 2:
	print "Available collectors %s" % collectors.keys()
    try:
	if collectors.has_key(obj):
	    return collectors[obj]
	else:
	    col = make_collector(obj, name)
	    col.set_module(module)
	    collectors[obj] = col
	    col.collect()
    except TypeError:
	col = make_collector(obj, name)
	col.set_module(module)
	col.collect()
    return col


def make_collector(obj, name=None):
    try:
	if _doctype_map.has_key(type(obj)):
	    return _doctype_map[type(obj)](obj, name)
	elif _isClass(obj):
	    return ClassCollector(obj, name)
	else:
	    return ItemCollect(obj, name)
    except AccessError:
	n = name
	if not n:
	    n = `obj`
	    print "Couldn't get type of", n, "(Access denied)"


def doc_collect(module_list):
    """Return a list of collectors given a list of modules"""
    return map(collector, module_list)

def gendoc(collect_list):
    """Generate documentation given a list of collectors
    A list of manual pages is returned."""
    manpages = []
    for collector in collect_list:
	manpages.append(collector.write())
    return manpages

def test():
    "Uh...well...soon I'll do this properly"
    import OMFmisc, OMFdefs
    VERBOSE=2
    collectors = doc_collect([OMFmisc, OMFdefs])
    print gendoc(collectors)
    import pickle
    doc = make_collector(pickle)
    doc.write()

if __name__ == '__main__':
    test()
