File: mobtypes

package info (click to toggle)
mobyle 1.5.5%2Bdfsg-6
links: PTS, VCS
area: main
in suites: buster
size: 8,288 kB
sloc: python: 22,709; makefile: 35; sh: 33; ansic: 10; xml: 6
file content (374 lines) | stat: -rwxr-xr-x 15,922 bytes
parent folder | download | duplicates (4)
#! /usr/bin/env python

#############################################################
#                                                           #
#   Author: Bertrand Neron                                  #
#   Organization:'Biological Software and Databases' Group, #
#                Institut Pasteur, Paris.                   #
#   Distributed under GPLv2 Licence. Please refer to the    #
#   COPYING.LIB document.                                   #
#                                                           #
#############################################################

import os , sys
import time
from lxml import etree

MOBYLEHOME = None
if os.environ.has_key('MOBYLEHOME'):
    MOBYLEHOME = os.environ['MOBYLEHOME']
if not MOBYLEHOME:
    sys.exit('MOBYLEHOME must be defined in your environment')

if ( MOBYLEHOME ) not in sys.path:
    sys.path.append( MOBYLEHOME )
if ( os.path.join( MOBYLEHOME , 'Src' ) ) not in sys.path:    
    sys.path.append( os.path.join( MOBYLEHOME , 'Src' ) )

from Mobyle.Classes.DataType import DataTypeFactory
import Mobyle.Parser 
import Mobyle.Classes.Core
import Local.CustomClasses
from Mobyle.MobyleError import ParserError



class XmlDataTypeRepository:
    TAIL    = '//'
    SEP     = '|'
    COMMENT = '#'
    XML_HEADER = "%-30.30s %s %-15.15s %s %s" % ( " xml class" , SEP , "python class", SEP , "description" )
    PYTHON_HEADER = " Mobyle python class used "
    BIOTYPE_HEADER = " biotypes used " 
    
    def __init__(self, path):
    
        self._dataTypeFactory = DataTypeFactory()
        self.descriptions = {}
        self.biotypes = []    
        self.xmlTypes = []  # [ xmlType, python Type ]  
        self.pythonTypes = [] 
        self.parse( path )
        
    def parse(self, xmlPaths):
        self.name = 'xml list'
        errors = []
        biotypes = {}
        for xmlPath in xmlPaths:
            doc = etree.parse( "file://%s" % os.path.abspath( xmlPath ) )
            parameters = doc.xpath( './/parameter' )
            for parameter in parameters:
                try:
                    typeNode = parameter.xpath( 'type' )[0]
                    mt = Mobyle.Parser.parseType( typeNode , self._dataTypeFactory )
                    for biotype in mt.getBioTypes():
                        biotypes[ biotype ] = None
                except IndexError:
                    parameterName = parameter.xpath( 'name/text()')[0]
                    errors.append( "%s/%s haven't any type" % (xmlPath , parameterName ) ) 
                except ParserError , err:
                    parameterName = parameter.xpath( 'name/text()')[0]
                    errors.append( "%s/%s : %s" % (xmlPath , parameterName , err ) )
        if errors:
            for error in errors:
                print >> sys.stderr , "ERROR ", error
            sys.exit(1)
        
        for dtName in self._dataTypeFactory.definedDataTypes.keys():
            ancestors = self._dataTypeFactory.definedDataTypes[ dtName ].ancestors
            mobyleClasses = dir( Mobyle.Classes )
            localCustomClasses = dir( Local.CustomClasses )
            if dtName in mobyleClasses:
                self.pythonTypes.append( dtName )
            elif dtName in localCustomClasses:
                self.pythonTypes.append( dtName )
            else:
                self.xmlTypes.append( ( ancestors[0] , ancestors[1] ) )
        
        self.biotypes = biotypes.keys()
        self.biotypes.sort()
        self.pythonTypes.sort()
        self.xmlTypes.sort()
        
    def mistakeDetector(self):
        warnings = []
        xmlCaseError = {}
        for xmlType , pythonType in self.xmlTypes:
            upXmlType = xmlType.upper()
            if xmlCaseError.has_key( upXmlType ):
                warnings.append( "found to similar xml datatype %s and %s" % ( xmlType , xmlCaseError[ upXmlType ] ))
            else:
                xmlCaseError[ upXmlType ] = xmlType
        biotypeCaseError = {}
        for biotype in self.biotypes:
            upBiotype = biotype.upper()
            if biotypeCaseError.has_key( upBiotype ):
                warnings.append( "found to similar biotype %s and %s" % (biotype , biotypeCaseError[ upBiotype] ) )
            else:
                biotypeCaseError[ upBiotype ] = biotype
        
        return warnings
            
            
    def __str__(self):
        repr = ''
        comment = '# mobyle datatype repositotry %s #' % time.strftime( '%x %X' , time.localtime() )
        
        pythonBody = '\n'.join( [ "%-25.25s %s " % ( pythonType , self.SEP ) for pythonType in self.pythonTypes ] )

        repr = repr + "%(comment)s\n%(diese)s\n%(header)s\n%(diese)s\n%(body)s\n%(tail)s\n" % {"comment": comment ,
                                                                                               "diese"  : '#'*26 ,
                                                                                               "header" : self.PYTHON_HEADER ,
                                                                                               "body"   : pythonBody ,
                                                                                               "tail"   : self.TAIL
                                                                                              }


        xmlBody = '\n'.join( [ "%-30.30s %s %-15.15s %s " % ( xmlType , self.SEP , pythonType , self.SEP ) for xmlType , pythonType in self.xmlTypes ] )
        repr = repr + "\n%(diese)s\n%(header)s\n%(diese)s\n%(body)s\n%(tail)s\n" % {"diese" : '#'*64 ,
                                                                                    "header" : self.XML_HEADER ,
                                                                                    "body"   : xmlBody ,
                                                                                    "tail"   : self.TAIL 
                                                                                   }
        
        biotypesBody = '\n'.join([ '%-15.15s %s' % ( biotype , self.SEP ) for biotype in self.biotypes ] )
        repr = repr + "\n%(diese)s\n%(header)s\n%(diese)s\n%(body)s\n%(tail)s\n" % {"diese"  : '#'*15 ,
                                                                                    "header" : self.BIOTYPE_HEADER ,
                                                                                    "body"   : biotypesBody ,
                                                                                    "tail"   : self.TAIL 
                                                                                   }
        return repr
    
    
    
    def diff(self , repository ):
        import difflib
        sm = difflib.SequenceMatcher()
        pythonDiff = ''
        xmlDiff = ''
        bioDiff = ''
        #sm.set_seqs( self.pythonTypes , repository.pythonTypes )
        sm.set_seqs( repository.pythonTypes , self.pythonTypes )
        blocks = sm.get_opcodes()
        for block in blocks:
            if block[0] == 'equal' :
                continue
            elif block[0] == 'insert' :
                pythonDiff += '\n'.join( [ '+ '+ str( Type ) for Type in self.pythonTypes[ block[3] : block[4] ] ] )
                pythonDiff += '\n'
            elif block[0] == 'delete':
                pythonDiff +=  '\n'.join( [ '- '+ str( Type ) for Type in repository.pythonTypes[ block[1] : block[2] ] ])
                pythonDiff += '\n'
        if pythonDiff:
            pythonDiff =  "Mobyle python diff\n%s\n%s\n" % ( "="*20 , pythonDiff )
        
        sm.set_seqs( repository.xmlTypes , self.xmlTypes )    
        blocks = sm.get_opcodes()
        for block in blocks:
            if block[0] == 'equal' :
                continue
            elif block[0] == 'insert' :
                xmlDiff += '\n'.join( ["+ %-15.15s %s %-30.30s %s" % ( Type[0] , self.SEP , Type[1]  , self.SEP ) for Type in self.xmlTypes[ block[3] : block[4] ] ] )
                xmlDiff += '\n'
            elif block[0] == 'delete':
                xmlDiff += '\n'.join( ["- %-15.15s %s %-30.30s %s" % ( Type[0] , self.SEP , Type[1]  , self.SEP ) for Type in repository.xmlTypes[ block[1] : block[2] ] ] ) 
                xmlDiff += '\n'
        if xmlDiff:
            xmlDiff =  "xml class diff\n%s\n%s\n" % ( "="*15 , xmlDiff)
            
            
        sm.set_seqs( repository.biotypes , self.biotypes )    
        blocks = sm.get_opcodes()
        for block in blocks:
            if block[0] == 'equal' :
                continue
            elif block[0] == 'insert' :
                bioDiff += '\n'.join( [ '+ '+ str( Type ) for Type in self.biotypes[ block[3] : block[4] ] ] )
                pythonDiff += '\n'
            elif block[0] == 'delete':
                bioDiff +=  '\n'.join( [ '- '+ str( Type ) for Type in repository.biotypes[ block[1] : block[2] ] ])
                bioDiff += '\n'
        if bioDiff:
            bioDiff =  "biotypes diff\n%s\n%s\n" % ( "="*14 , bioDiff )
        
        if pythonDiff or xmlDiff or bioDiff: 
            return "--- %s\n+++ %s\n\n %s%s%s" % ( repository.name , self.name ,  pythonDiff , xmlDiff , bioDiff )
    
    
class DataTypeReference( XmlDataTypeRepository ):
    
    
    def parse(self , repositoryPath ):
        self.name = repositoryPath
        f = file( repositoryPath , 'r' )
        header_found = False
        tail_found = False
        
        for line in f:
            line = line.strip()
            if line.startswith( self.COMMENT ):
                continue
            elif line == self.PYTHON_HEADER.strip():
                header_found = True
                self.pythonTypes = self._parsePythonType( f )
            elif line == self.XML_HEADER.strip():
                header_found = True
                self.xmlTypes , self.descriptions = self._parseXmlType( f )
            elif line == self.BIOTYPE_HEADER.strip():
                header_found = True
                self.biotypes = self._parseBiotype( f )
            elif line.startswith( self.TAIL ):
                tail_found = True
                break
            else:
                if not header_found:
                    print >> sys.stderr , "ERROR bad file format for DataType Reference %s : No header found" % f.name
                    sys.exit(2)
                    
    
    def _parsePythonType(self , refFile ):
        tail_found = False
        pythonTypes = {} 
        for line in refFile:
            if line.startswith( self.COMMENT ):
                continue
            elif line.startswith( self.TAIL ):
                tail_found = True
                break
            else:
                fields = line.split( self.SEP )
                fields = [ item.strip() for item in fields ]
                if fields == ['']: # skip blank line
                    continue
                else:
                    pythonType = fields[0]
                    if pythonTypes.has_key( pythonType ):
                        print >> sys.stderr , "the reference %s has 2 %s entries" % ( ref.name , pythonType )
                        sys.exit(2)
                    else:
                        pythonTypes[ pythonType ] = None 
                        
        if not tail_found:
            print >> sys.stderr , "ERROR bad file format for DataType Reference %s : Unexpected end of file " % refFile.name
            sys.exit(2)
        
        pythonTypes = pythonTypes.keys()
        pythonTypes.sort()
        return pythonTypes
        
    def _parseXmlType(self, refFile ):
        tail_found = False
        descriptions = {}
        for line in refFile:
            if line.startswith( self.COMMENT ):
                continue
            elif line.startswith( self.TAIL ):
                tail_found = True
                break
            else:
                fields = line.split( self.SEP )
                fields = [ item.strip() for item in fields ]
                if fields == ['']: # skip blank line
                    continue
                else:
                    xmlKlass , pythonKlass , description = fields
                    dataType = self._dataTypeFactory.newDataType(  pythonKlass , xmlName = xmlKlass )
                    descriptions[ xmlKlass ] = description
        if not tail_found:
            print >> sys.stderr , "ERROR bad file format for DataType Reference %s : Unexpected end of file " % refFile.name
            sys.exit(2)
        
        xmlTypes = []
        for dtName in self._dataTypeFactory.definedDataTypes.keys():
            ancestors = self._dataTypeFactory.definedDataTypes[ dtName ].ancestors
            mobyleClasses = dir( Mobyle.Classes )
            localCustomClasses = dir( Local.CustomClasses )
            if dtName in mobyleClasses:
                pass
            elif dtName in localCustomClasses:
                pass
            else:
                xmlTypes.append( ( ancestors[0] , ancestors[1] ) )
        
        xmlTypes.sort()
        return xmlTypes , descriptions
        
    def _parseBiotype(self, refFile ):
        biotypes = {}
        tail_found = False
        for line in refFile:
            if line.startswith( self.COMMENT ):
                continue
            elif line.startswith( self.TAIL ):
                tail_found = True
                break
            else:
                fields = line.split( self.SEP )
                fields = [ item.strip() for item in fields ]
                if fields == ['']: # skip blank line
                    continue
                else:
                    biotype = fields[0]
                    if biotypes.has_key( biotype ):
                        print >> sys.stderr , "the reference %s has 2 %s entries" % ( ref.name , biotype )
                        sys.exit(2)
                    else:
                        biotypes[ biotype ] = None 
        
        if not tail_found:
            print >> sys.stderr , "ERROR bad file format for DataType Reference %s : Unexpected end of file " % refFile.name
            sys.exit(2)
        biotypes = biotypes.keys()
        biotypes.sort()    
        return  biotypes   
        
        
if __name__ == '__main__':
    from getopt import gnu_getopt , GetoptError
    
    def usage():
        print """
        generate on the standart output a mobyle types repository ( python , xml and biotype ).
        if -ref is specified do a diff between the ref and the xml
        
        usage: mobtypes < --ref path to a datatype repository > < xmlpaths to analyze >
        if no paths are provide as arguments, use the xmlpaths installed (local and imported) for this portal.
        options:
            -h or --help  ... Print this message and exit.
            -r or --ref   ... the path to a datatype repository.
        """

    try:
        opts, xmlPaths = gnu_getopt( sys.argv[1:], "hr:", [ "help" , "ref=" ] )
        refPath = None
        for option , value in opts: 
            if option in ( "-h","--help" ):
                usage()
                sys.exit( 0 )
            elif option in ( "-r" , "--ref=" ):
                refPath = value
                if not os.path.exists( value ):
                    print >> sys.stderr , "ERROR : %s %s the No such file" % ( option , value )
                    sys.exit(1)
        
        if not xmlPaths:
            from Mobyle.Registry import registry
            xmlPaths = [  p.path for p in registry.programs ]
            
    except GetoptError:
            print usage()
            sys.exit( 1 )

    xmlRepository = XmlDataTypeRepository( xmlPaths )
    if refPath:
        ref = DataTypeReference( refPath )
        print ref.diff( xmlRepository )
    else:
        print xmlRepository
        w = xmlRepository.mistakeDetector()
        if w:
            print >> sys.stderr , " -- WARNINGS -- "
            print >> sys.stderr , '\n'.join( w )