Author: Andreas Tille <tille@debian.org>
Last-Update: Wed, 24 Jun 2015 19:24:02 +0200
Description: 2to3 conversion to Python3

--- a/tools/ecoPCRFilter.py
+++ b/tools/ecoPCRFilter.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import struct
 import sys
@@ -78,7 +78,7 @@ class Filter(object):
         file = self.__universalOpen(file)
         (recordCount,) = struct.unpack('> I',file.read(4))
     
-        for i in xrange(recordCount):
+        for i in range(recordCount):
             (recordSize,)=struct.unpack('>I',file.read(4))
             record = file.read(recordSize)
             yield record
@@ -236,12 +236,12 @@ class ColumnFile(object):
         elif hasattr(stream,'next'):
             self._stream = stream
         else:
-            raise ValueError,'stream must be string or an iterator'
+            raise ValueError('stream must be string or an iterator')
         self._delimiter=sep
         self._strip=strip
         if types:
             self._types=[x for x in types]
-            for i in xrange(len(self._types)):
+            for i in range(len(self._types)):
                 if self._types[i] is bool:
                     self._types[i]=ColumnFile.str2bool
         else:
@@ -257,16 +257,16 @@ class ColumnFile(object):
     def __iter__(self):
         return self
     
-    def next(self):
-        ligne = self._stream.next()
+    def __next__(self):
+        ligne = next(self._stream)
         while ligne[0] == self._skip:
-            ligne = self._stream.next()
+            ligne = next(self._stream)
         data = ligne.split(self._delimiter)
         if self._strip or self._types:
             data = [x.strip() for x in data]
         if self._types:
             it = self.endLessIterator(self._types)
-            data = [x[1](x[0]) for x in ((y,it.next()) for y in data)]
+            data = [x[1](x[0]) for x in ((y,next(it)) for y in data)]
         return data
     
     def endLessIterator(self,endedlist):
@@ -286,18 +286,19 @@ class Table(list):
         
     def printTable(self):
         for h in self.headers:
-            print "\t%s\t|" % h,
-        print "\n"
+            print("\t%s\t|" % h, end=' ')
+        print("\n")
         for l in self.lines:
             for c in l:
-                print "\t%s\t|" % c
-            print "\n"
+                print("\t%s\t|" % c)
+            print("\n")
             
     def getColumn(self,n):
-        print "\t%s\n" % self.header[n]
+        print("\t%s\n" % self.header[n])
         for i in range(len(self.lines)):
-            print "\t%s\n" % i[n]
+            print("\t%s\n" % i[n])
         
         
         
 
+
--- a/tools/ecoPCRFormat.py
+++ b/tools/ecoPCRFormat.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/python3
 
 import re
 import gzip
@@ -80,12 +80,12 @@ class ColumnFile(object):
         elif hasattr(stream,'next'):
             self._stream = stream
         else:
-            raise ValueError,'stream must be string or an iterator'
+            raise ValueError('stream must be string or an iterator')
         self._delimiter=sep
         self._strip=strip
         if types:
             self._types=[x for x in types]
-            for i in xrange(len(self._types)):
+            for i in range(len(self._types)):
                 if self._types[i] is bool:
                     self._types[i]=ColumnFile.str2bool
         else:
@@ -100,14 +100,14 @@ class ColumnFile(object):
     def __iter__(self):
         return self
     
-    def next(self):
-        ligne = self._stream.next()
+    def __next__(self):
+        ligne = next(self._stream)
         data = ligne.split(self._delimiter)
         if self._strip or self._types:
             data = [x.strip() for x in data]
         if self._types:
             it = endLessIterator(self._types)
-            data = [x[1](x[0]) for x in ((y,it.next()) for y in data)]
+            data = [x[1](x[0]) for x in ((y,next(it)) for y in data)]
         return data
     
 def taxonCmp(t1,t2):
@@ -149,22 +149,22 @@ def readNodeTable(file):
                               str,str,bool,
                               int,bool,int,
                               bool,bool,bool,str))
-    print >>sys.stderr,"Reading taxonomy dump file..."
+    print("Reading taxonomy dump file...", file=sys.stderr)
     taxonomy=[[n[0],n[2],n[1]] for n in nodes]
-    print >>sys.stderr,"List all taxonomy rank..."    
+    print("List all taxonomy rank...", file=sys.stderr)    
     ranks =list(set(x[1] for x in taxonomy))
     ranks.sort()
-    ranks = dict(map(None,ranks,xrange(len(ranks))))
+    ranks = dict(map(None,ranks,range(len(ranks))))
     
-    print >>sys.stderr,"Sorting taxons..."
+    print("Sorting taxons...", file=sys.stderr)
     taxonomy.sort(taxonCmp)
 
-    print >>sys.stderr,"Indexing taxonomy..."
+    print("Indexing taxonomy...", file=sys.stderr)
     index = {}
     for t in taxonomy:
         index[t[0]]=bsearchTaxon(taxonomy, t[0])
     
-    print >>sys.stderr,"Indexing parent and rank..."
+    print("Indexing parent and rank...", file=sys.stderr)
     for t in taxonomy:
         t[1]=ranks[t[1]]
         t[2]=index[t[2]]
@@ -200,7 +200,7 @@ def deletedNodeIterator(file):
 def readTaxonomyDump(taxdir):
     taxonomy,ranks,index = readNodeTable('%s/nodes.dmp' % taxdir)
     
-    print >>sys.stderr,"Adding scientific name..."
+    print("Adding scientific name...", file=sys.stderr)
 
     alternativeName=[]
     for taxid,name,classname in nameIterator('%s/names.dmp' % taxdir):
@@ -208,11 +208,11 @@ def readTaxonomyDump(taxdir):
         if classname == 'scientific name':
             taxonomy[index[taxid]].append(name)
         
-    print >>sys.stderr,"Adding taxid alias..."
+    print("Adding taxid alias...", file=sys.stderr)
     for taxid,current in mergedNodeIterator('%s/merged.dmp' % taxdir):
         index[taxid]=index[current]
     
-    print >>sys.stderr,"Adding deleted taxid..."
+    print("Adding deleted taxid...", file=sys.stderr)
     for taxid in deletedNodeIterator('%s/delnodes.dmp' % taxdir):
         index[taxid]=None
     
@@ -453,11 +453,11 @@ def ecoSeqWriter(file,input,taxindex,par
                 skipped.append(entry['id'])
             where = universalTell(input)
             progressBar(where, inputsize)
-            print >>sys.stderr," Readed sequences : %d     " % seqcount,
+            print(" Readed sequences : %d     " % seqcount, end=' ', file=sys.stderr)
         else:
             skipped.append(entry['id'])
         
-    print >>sys.stderr
+    print(file=sys.stderr)
     output.seek(0,0)
     output.write(struct.pack('> I',seqcount))
     
@@ -478,7 +478,7 @@ def ecoRankWriter(file,ranks):
     output = open(file,'wb')
     output.write(struct.pack('> I',len(ranks)))
 
-    rankNames = ranks.keys()
+    rankNames = list(ranks.keys())
     rankNames.sort()
     
     for rank in rankNames:
@@ -521,8 +521,8 @@ def ecoDBWriter(prefix,taxonomy,seqFileN
                      taxonomy[3], 
                      parser)
         if sk:
-            print >>sys.stderr,"Skipped entry :"
-            print >>sys.stderr,sk
+            print("Skipped entry :", file=sys.stderr)
+            print(sk, file=sys.stderr)
         
 def ecoParseOptions(arguments):
     opt = {
@@ -562,25 +562,25 @@ def ecoParseOptions(arguments):
             opt['parser']=sequenceIteratorFactory(emblEntryParser,
                                                   entryIterator)
         else:
-            raise ValueError,'Unknown option %s' % name
+            raise ValueError('Unknown option %s' % name)
 
     return opt,filenames
 
 
 def printHelp():
-    print "-----------------------------------"
-    print " ecoPCRFormat.py"
-    print "-----------------------------------"
-    print "ecoPCRFormat.py [option] <argument>"
-    print "-----------------------------------"
-    print "-e    --embl        :[E]mbl format"
-    print "-f    --fasta       :[F]asta format"
-    print "-g    --genbank     :[G]enbank format"
-    print "-h    --help        :[H]elp - print this help"
-    print "-n    --name        :[N]ame of the new database created"
-    print "-t    --taxonomy    :[T]axonomy - path to the taxonomy database"
-    print "                    :bcp-like dump from GenBank taxonomy database."
-    print "-----------------------------------"
+    print("-----------------------------------")
+    print(" ecoPCRFormat.py")
+    print("-----------------------------------")
+    print("ecoPCRFormat.py [option] <argument>")
+    print("-----------------------------------")
+    print("-e    --embl        :[E]mbl format")
+    print("-f    --fasta       :[F]asta format")
+    print("-g    --genbank     :[G]enbank format")
+    print("-h    --help        :[H]elp - print this help")
+    print("-n    --name        :[N]ame of the new database created")
+    print("-t    --taxonomy    :[T]axonomy - path to the taxonomy database")
+    print("                    :bcp-like dump from GenBank taxonomy database.")
+    print("-----------------------------------")
 
 if __name__ == '__main__':
     
@@ -590,3 +590,4 @@ if __name__ == '__main__':
     
     ecoDBWriter(opt['prefix'], taxonomy, filenames, opt['parser'])
     
+
--- a/tools/ecoSort.py
+++ b/tools/ecoSort.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import struct
 import sys
@@ -98,7 +98,7 @@ class Filter(object):
         file = self.__universalOpen(file)
         (recordCount,) = struct.unpack('> I',file.read(4))
     
-        for i in xrange(recordCount):
+        for i in range(recordCount):
             (recordSize,)=struct.unpack('>I',file.read(4))
             record = file.read(recordSize)
             yield record
@@ -283,12 +283,12 @@ class ColumnFile(object):
         elif hasattr(stream,'next'):
             self._stream = stream
         else:
-            raise ValueError,'stream must be string or an iterator'
+            raise ValueError('stream must be string or an iterator')
         self._delimiter=sep
         self._strip=strip
         if types:
             self._types=[x for x in types]
-            for i in xrange(len(self._types)):
+            for i in range(len(self._types)):
                 if self._types[i] is bool:
                     self._types[i]=ColumnFile.str2bool
         else:
@@ -305,16 +305,16 @@ class ColumnFile(object):
     def __iter__(self):
         return self
     
-    def next(self):
-        ligne = self._stream.next()
+    def __next__(self):
+        ligne = next(self._stream)
         while ligne[0] == self._skip:
-            ligne = self._stream.next()
+            ligne = next(self._stream)
         data = ligne.split(self._delimiter)
         if self._strip or self._types:
             data = [x.strip() for x in data]
         if self._types:
             it = self.endLessIterator(self._types)
-            data = [x[1](x[0]) for x in ((y,it.next()) for y in data)]
+            data = [x[1](x[0]) for x in ((y,next(it)) for y in data)]
         return data
     
     def endLessIterator(self,endedlist):
@@ -396,7 +396,7 @@ def _parseOligoResult(filter,file,strand
     for s in filter.ecoPCRResultIterator(file):
         o = s[key]
         taxid = s['taxid']
-        if not seq.has_key(o):
+        if o not in seq:
             seq[o] = [1,taxid]
         else:
             seq[o][0] = seq[o][0] + 1
@@ -410,7 +410,7 @@ def _parseTaxonomyResult(table):
         taxid = l[2]
         scName = l[3]
         count = l[1]
-        if not tax.has_key(taxid):
+        if taxid not in tax:
             tax[taxid] = [1,scName,count]
         else:
             tax[taxid][0] = tax[taxid][0] + 1
@@ -464,12 +464,12 @@ def customSort(table,x,y):
     tmp = {}
     
     for l in table:
-        if tmp.has_key(l[x]):
+        if l[x] in tmp:
             tmp[l[x]] = tmp[l[x]] + l[y]
         else:
             tmp[l[x]] = l[y]
     
-    for k,v in tmp.items():
+    for k,v in list(tmp.items()):
         cTable.append([k,v])
     
     return cTable
@@ -484,12 +484,12 @@ def countColumnOccurrence(table,x):
     tmp = {}
     
     for l in table:
-        if tmp.has_key(l[x]):
+        if l[x] in tmp:
             tmp[l[x]] = tmp[l[x]] + 1
         else:
             tmp[l[x]] = 1
     
-    for k,v in tmp.items():
+    for k,v in list(tmp.items()):
         cTable.append([k,v])
     
     return cTable
@@ -502,15 +502,15 @@ def buildSpecificityTable(table):
     
     tmp = {}
     for l in table:
-        if not tmp.has_key(l[5]):
+        if l[5] not in tmp:
             tmp[l[5]] = {}
-        if not tmp[l[5]].has_key(l[3]):
+        if l[3] not in tmp[l[5]]:
             tmp[l[5]][l[3]] = l[1]
         else:
             tmp[l[5]][l[3]] = tmp[l[5]][l[3]] + l[1]
     
-    for mismatch in tmp.items():
-        for taxon,count in mismatch[1].items():
+    for mismatch in list(tmp.items()):
+        for taxon,count in list(mismatch[1].items()):
             speTable.append([mismatch[0],taxon,count])     
 
     return speTable
@@ -531,7 +531,7 @@ def buildOligoTable(table, file, filter,
     seq = _parseOligoResult(filter, file, strand)
     
     i = 0
-    for oligo, info in seq.items():
+    for oligo, info in list(seq.items()):
         table.append(0)
         count, lctTaxid = info[0], info[1]
         scName = filter.findTaxonByTaxid(info[1])[3]
@@ -554,7 +554,7 @@ def buildTaxonomicTable(table):
     tax = _parseTaxonomyResult(table) 
   
     i = 0
-    for taxid, info in tax.items():
+    for taxid, info in list(tax.items()):
         taxTable.append(0)
         numOfOligo, scName, numOfAmpl = info[0], info[1], info[2]
         taxTable[i]=[scName,numOfOligo,numOfAmpl,taxid]
@@ -578,9 +578,9 @@ def _parseSequenceResult(filter,file,id)
     for s in filter.ecoPCRResultIterator(file):
         seq = s['sq_des']
         id = s[key]
-        if not idIndex.has_key(id):
+        if id not in idIndex:
             idIndex[id] = []
-        if not sequences.has_key(seq):
+        if seq not in sequences:
             sequences[seq] = [id]
         else:
             sequences[seq].append(id)
@@ -598,7 +598,7 @@ def _sortSequences(file,filter):
     
     sequences, idIndex = _parseSequenceResult(filter,file,'species')
     
-    for s,id in sequences.items():
+    for s,id in list(sequences.items()):
         if len(id) == 1 or _sameValuesInList(id):
             idIndex[id[0]].append(1)
         else:
@@ -606,7 +606,7 @@ def _sortSequences(file,filter):
                 idIndex[e].append(0)
     
    
-    for id,values in idIndex.items():
+    for id,values in list(idIndex.items()):
         idIndex[id] = float(values.count(1)) / float(len(values)) * 100
 
             
@@ -622,15 +622,15 @@ def getIntraSpeciesDiversity(table,file,
     
     seq, idIndex = _sortSequences(file,filter)
     
-    for id,percent in idIndex.items():
+    for id,percent in list(idIndex.items()):
         if percent == 100:
             intraDiv[id] = [0,[]]
-            for seq,idList in sequences.items():
+            for seq,idList in list(sequences.items()):
                 if id in idList:
                     intraDiv[id][0] = intraDiv[id][0] + 1
                     intraDiv[id][1].append(seq)
                     
-    for id, values in intraDiv.items():
+    for id, values in list(intraDiv.items()):
         table.append(id,values[0],values[1])
                     
     
@@ -649,10 +649,10 @@ def printTable(table):
     """
 
     format = ("%20s | " * len(table.headers))[:-3]
-    print format % tuple([str(e) for e in table.headers ]) +"\n" + "-"*23*len(table.headers)
+    print(format % tuple([str(e) for e in table.headers ]) +"\n" + "-"*23*len(table.headers))
     for l in table:
-        print format % tuple([str(e) for e in l ])
-    print "# %d results" % len(table)
+        print(format % tuple([str(e) for e in l ]))
+    print("# %d results" % len(table))
        
         
 def saveAsCSV(table,path):
@@ -809,3 +809,4 @@ def start():
 
 
 
+