File: DataType.py

package info (click to toggle)
mobyle 1.5.5%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 8,288 kB
  • sloc: python: 22,709; makefile: 35; sh: 33; ansic: 10; xml: 6
file content (280 lines) | stat: -rw-r--r-- 10,799 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
########################################################################################
#                                                                                      #
#   Author: Bertrand Neron,                                                            #
#   Organization:'Biological Software and Databases' Group, Institut Pasteur, Paris.   #  
#   Distributed under GPLv2 Licence. Please refer to the COPYING.LIB document.         #
#                                                                                      #
########################################################################################

"""

"""

import os , os.path , re
import types
from logging import getLogger
c_log = getLogger(__name__)

from Mobyle.MobyleError import MobyleError


class DataType( object ):

    def __init__( self , name = None ):
        if name:
            self.name = name
        else:
            self.name = self.__class__.__name__[0:-8]
        
        self.ancestors = [ k.__name__[0:-8] for k in self.__class__.mro() ][0:-2]

        if self.name not in self.ancestors :
            self.ancestors.insert( 0 , self.name )
        
            
    def isPipableToDataType( self , targetDataType ):
        return targetDataType.name in self.ancestors

    def getName( self ):
        return self.name
    
    def getRealName(self):
        return self.__class__.__name__[0:-8]
    
    def isFile( self ):
        return False
    
    def isMultiple(self):
        return False
    
    def toDom( self ):
        """
        @return: a dom representation of this datatype
        @rtype: element
        """
        from lxml import etree        
        
        if self.name == self.__class__.__name__[0:-8] :
            klass = self.name
            superKlass = None 
        else:
            klass = self.name
            superKlass = self.__class__.__name__[0:-8]
            
        dataTypeNode = etree.Element( "datatype" )
        klassNode = etree.Element( "class" )
        klassNode.text =  klass 
        dataTypeNode.append( klassNode )
        if superKlass :
            superKlassNode = etree.Element( "superclass" )
            superKlassNode.text =  superKlass 
            dataTypeNode.append( superKlassNode )
        return dataTypeNode 

    def __eq__(self , other ):
        return self.ancestors == other.ancestors
    
    def __str__(self):
        return self.name
    

class MultipleDataType( object ):
    """
    This is a container to handle several files with the same datatype.
    In the Session these data will see independently as X data with datatype Y.
    It cannot handle a group of file which must be manage as one data.
    """
    
    def __init__(self , dt , name = None ):
        """
        @param name: the name of the datatype as it specified in class when a superclass is specified too 
        @type name: string
        """
        self.dataType = dt
        if name:
            self.name = name
        else:
            #self.name = self.__class__.__name__[0:-8]+self.dataType.getName()
            self.name= "Multiple"+self.dataType.getName()
        
        self.ancestors = [ k.__name__[0:-8] for k in self.__class__.mro() ][0:-2]
        if self.name not in self.ancestors :
            self.ancestors.insert( 0 , self.name )
        
    def isPipableToDataType(self , targetDataType):
        return self.dataType.isPipableToDataType( targetDataType )
    
    def getName(self):
        return self.name
    
    def getRealName(self):
        return self.__class__.__name__[:-8]+self.dataType.__class__.__name__[:-8]
    
    def __str__(self):
        return self.name
    
    def isFile(self):
        return True
    
    def isMultiple(self):
        return True
    
    def toDom(self):
        return self.dataType.toDom()

    def toFile( self, data, dest, destFileName, src, srcFileName):
        return self.dataType.toFile(data  , dest , destFileName , src , srcFileName)
    
    def head( self , data ):
        return "a definir"
    
    def cleanData( self , data ):
        """
        convert the data in right encoding and replace windows end of line by unix one.
        """
        return self.dataType.cleanData(data)
        
    def supportedFormat( self ):
        """
        @return: the list supported by the format detector
        @rtype: list of string
        """
        #est ce obligatoire d'etre une methode de class
        #ici on devrait appler la methode supportedFormat du type que l'on wrap???
        return self.dataType.supportedFormat()
    
    def supportedConversion( self ):
        """
        @return: the list of dataFormat conversion available.
        @rtype: list of tuple [ (string input format, string output formt) , ... ]
        """
        #est ce obligatoire d'etre une methode de class
        #ici on devrait appler la methode supportedConversion du type que l'on wrap???
        return self.dataType.supportedConversion()
        
    def detect( self , value  ):
        """
        detects the format of the sequence(s) contained in fileName
        @param value: the src object and the filename in the src of the data to detect
        @type value: tuple ( session/Job/MobyleJob instance , string filename )
        @return: a tuple of the detection run information:
            - the detected format,
            - the detected items number,
            - program name used for the detection.
        """
        return self.dataType.detect(value)
   
    def convert( self, value ):
        #value, acceptedMobyleType, detectedMobyleType = None,  paramFile= False
        """
        convert the sequence contain in the file fileName in the rigth format
        throws an UnsupportedFormatError if the output format is not supported
        or a MobyleError if something goes wrong during the conversion.

        @param values: is a list of tuple ( src , srcFileName) 
          - srcfilename is the name of the file to convert in the src
          - src must be a L{Job} instance the conversion are perform only by jobs (not session) .
        @type value: ( L{Job} instance dest, L{Job} instance, src)
        @return: the fileName ( basename ) of the  sequence file and the effective MobyleType associated to this 
        value
        @rtype: list of tuple [ ( string fileName , MobyleType instance ), ...]
        @raise UnSupportedFormatError: if the data cannot be converted in any suitable format
        """            
        return self.dataType.convert(value)

    def validate( self , param ):
        """
        """  
        values = param.getValue()
        sep = param.getSeparator()
        values= values.split( sep )
        newParam = param.clone( self.dataType )
        for value in values:
            newParam.setValue( value )
            try:
                valid = newParam.validate()
            except  UserValueError ,err:
                raise UserValueError( parameter = param , msg = str(err) )
        return True   
            
      
class DataTypeFactory( object ):
    
    _ref = None
    
    def __new__( cls ):
        if cls._ref is None:
            cls._ref = super( DataTypeFactory , cls ).__new__( cls )
        return cls._ref
    
    def __init__(self):
        self.definedDataTypes = {}
    
    def newDataType( self, pythonName , xmlName = None):
        realName = xmlName or pythonName
        try:
            realName = realName + "DataType"
        except TypeError:
                raise MobyleError , "the argument \"name\" must be a string ( %s received )" %str( type( realName ))     
            
        if realName in self.definedDataTypes:
            dt = self.definedDataTypes[ realName ]
            if( pythonName != dt.getRealName() ):
                c_log.error("consistency error:")
                raise MobyleError , "consistency error: a \"%s\" is already defined with python type \"%s\" instead of \"%s\"" %( dt.getName() ,
                                                                                                                                  dt.getRealName() ,
                                                                                                                                  realName[:-8]
                                                                                                                                 )
            return dt   
        else:
            import Local.CustomClasses
            import Mobyle.Classes
            def localDataType( python_name ):
                
                if python_name + "DataType" in dir( Local.CustomClasses ):
                    return "Local.CustomClasses.%sDataType"%python_name
            def coreDataType( python_name ):
                if python_name + "DataType" in dir( Mobyle.Classes ):
                    return "Mobyle.Classes.%sDataType" %python_name
            
            fulldts = localDataType( pythonName ) or coreDataType( pythonName )
            if fulldts:
                self.definedDataTypes[ realName ] = eval("%s('%s')" % (fulldts,xmlName or ''))
                return self.definedDataTypes[ realName ] #xmlname
            elif( pythonName.startswith( "Multiple") ):
                inside_name = pythonName[8:] 
                fulldts = localDataType(inside_name) or coreDataType(inside_name)
                if fulldts:
                    dt = eval( fulldts+"()" )
                    if xmlName:
                        mdt = MultipleDataType( dt , name = xmlName )
                    else:
                        mdt = MultipleDataType( dt )
                    self.definedDataTypes[ realName ] = mdt
                    return  mdt
                else:
                    raise MobyleError , "invalid DataType : %s" %( pythonName )
            else:
                raise MobyleError , "invalid DataType : %s" %( pythonName )
                
            
            
    def issubclass(self , dataType1 , name2 , xmlName2= None ):
        """
        @param dataType1: the dataType to test
        @type dataType1: instance of a Datatype
        @param name2: the value of element superclass or class if there is no superclass
        @type name2:  string
        @param xmlName2: the value of element class when the element superclass is specify
        @type xmlName2:  string
        @return: True if dataType1 is an instance of the datatype represente by name2 , xmlName2. False otherwise  
        @rtype: boolean
        """
        dataType2 = self.newDataType( name2 , xmlName= xmlName2 )
        try:
            return  issubclass( dataType1.__class__ , dataType2 .__class__ )
        except AttributeError , err :
            raise TypeError , "there is no DataType named "+ str( name2 )