Description: Convert python2 syntax to python3
Author: Nilesh Patra <nilesh@debian.org>
Last-Update: 2021-10-15
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -40,8 +40,8 @@
 master_doc = 'index'
 
 # General information about the project.
-project = u'PyVCF'
-copyright = u'2012, James Casbon, @jdoughertyii'
+project = 'PyVCF'
+copyright = '2012, James Casbon, @jdoughertyii'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -179,8 +179,8 @@
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
-  ('index', 'PyVCF.tex', u'PyVCF Documentation',
-   u'James Casbon, @jdoughertyii', 'manual'),
+  ('index', 'PyVCF.tex', 'PyVCF Documentation',
+   'James Casbon, @jdoughertyii', 'manual'),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
@@ -212,6 +212,6 @@
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [
-    ('index', 'pyvcf', u'PyVCF Documentation',
-     [u'James Casbon, @jdoughertyii'], 1)
+    ('index', 'pyvcf', 'PyVCF Documentation',
+     ['James Casbon, @jdoughertyii'], 1)
 ]
--- a/scripts/vcf_filter.py
+++ b/scripts/vcf_filter.py
@@ -108,7 +108,7 @@
     while len(args.rest):
         filter_name = args.rest.pop(0)
         if filter_name not in filters:
-            sys.exit("%s is not a known filter (%s)" % (filter_name, str(filters.keys())))
+            sys.exit("%s is not a known filter (%s)" % (filter_name, str(list(filters.keys()))))
 
         # create a parser only for arguments of current filter
         filt_parser = create_filt_parser(filter_name)
--- a/scripts/vcf_sample_filter.py
+++ b/scripts/vcf_sample_filter.py
@@ -34,6 +34,6 @@
     sf = SampleFilter(infile=args.file, outfile=args.o,
                       filters=args.f, invert=args.invert)
     if args.f is None:
-        print "Samples:"
+        print("Samples:")
         for idx, val in enumerate(sf.samples):
-            print "{0}: {1}".format(idx, val)
+            print("{0}: {1}".format(idx, val))
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -362,7 +362,7 @@
         If there are i alleles with frequency p_i, H=1-sum_i(p_i^2)
         """
         allele_freqs = [1-sum(self.aaf)] + self.aaf
-        return 1 - sum(map(lambda x: x**2, allele_freqs))
+        return 1 - sum([x**2 for x in allele_freqs])
 
     def get_hom_refs(self):
         """ The list of hom ref genotypes"""
@@ -558,9 +558,8 @@
             return True
 
 
-class _AltRecord(object):
+class _AltRecord(object, metaclass=ABCMeta):
     '''An alternative allele record: either replacement string, SV placeholder, or breakend'''
-    __metaclass__ = ABCMeta
 
     def __init__(self, type, **kwargs):
         super(_AltRecord, self).__init__(**kwargs)
@@ -596,7 +595,7 @@
         return len(self.sequence)
 
     def __eq__(self, other):
-        if isinstance(other, basestring):
+        if isinstance(other, str):
             return self.sequence == other
         elif not isinstance(other, self.__class__):
             return False
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -22,8 +22,8 @@
 except ImportError:
     cparse = None
 
-from model import _Call, _Record, make_calldata_tuple
-from model import _Substitution, _Breakend, _SingleBreakend, _SV
+from .model import _Call, _Record, make_calldata_tuple
+from .model import _Substitution, _Breakend, _SingleBreakend, _SV
 
 
 # Metadata parsers/constants
@@ -468,7 +468,7 @@
 
         nfields = len(samp_fmt._fields)
 
-        for name, sample in itertools.izip(self.samples, samples):
+        for name, sample in zip(self.samples, samples):
 
             # parse the data for this sample
             sampdat = [None] * nfields
@@ -548,7 +548,7 @@
         else:
             return _Substitution(str)
 
-    def next(self):
+    def __next__(self):
         '''Return the next record in the file.'''
         line = next(self.reader)
         row = self._row_pattern.split(line.rstrip())
@@ -641,7 +641,7 @@
     """VCF Writer. On Windows Python 2, open stream with 'wb'."""
 
     # Reverse keys and values in header field count dictionary
-    counts = dict((v,k) for k,v in field_counts.iteritems())
+    counts = dict((v,k) for k,v in field_counts.items())
 
     def __init__(self, stream, template, lineterminator="\n"):
         self.writer = csv.writer(stream, delimiter="\t",
@@ -654,30 +654,30 @@
         # get a maximum key).
         self.info_order = collections.defaultdict(
             lambda: len(template.infos),
-            dict(zip(template.infos.iterkeys(), itertools.count())))
+            dict(list(zip(iter(template.infos.keys()), itertools.count()))))
 
         two = '##{key}=<ID={0},Description="{1}">\n'
         four = '##{key}=<ID={0},Number={num},Type={2},Description="{3}">\n'
         _num = self._fix_field_count
-        for (key, vals) in template.metadata.iteritems():
+        for (key, vals) in template.metadata.items():
             if key in SINGULAR_METADATA:
                 vals = [vals]
             for val in vals:
                 if isinstance(val, dict):
                     values = ','.join('{0}={1}'.format(key, value)
-                                      for key, value in val.items())
+                                      for key, value in list(val.items()))
                     stream.write('##{0}=<{1}>\n'.format(key, values))
                 else:
                     stream.write('##{0}={1}\n'.format(key, val))
-        for line in template.infos.itervalues():
+        for line in template.infos.values():
             stream.write(four.format(key="INFO", *line, num=_num(line.num)))
-        for line in template.formats.itervalues():
+        for line in template.formats.values():
             stream.write(four.format(key="FORMAT", *line, num=_num(line.num)))
-        for line in template.filters.itervalues():
+        for line in template.filters.values():
             stream.write(two.format(key="FILTER", *line))
-        for line in template.alts.itervalues():
+        for line in template.alts.values():
             stream.write(two.format(key="ALT", *line))
-        for line in template.contigs.itervalues():
+        for line in template.contigs.values():
             if line.length:
                 stream.write('##contig=<ID={0},length={1}>\n'.format(*line))
             else:
--- a/vcf/sample_filter.py
+++ b/vcf/sample_filter.py
@@ -7,7 +7,7 @@
 import warnings
 
 
-from parser import Reader, Writer
+from .parser import Reader, Writer
 
 
 class SampleFilter(object):
@@ -81,13 +81,13 @@
                 # is int, check if it's an idx
                 if item < len(self.samples):
                     return item
-        filters = set(filter(lambda x: x is not None, map(filt2idx, filt_s)))
+        filters = set([x for x in map(filt2idx, filt_s) if x is not None])
         if len(filters) < len(filt_s):
             # TODO print the filters that were ignored
             warnings.warn("Invalid filters, ignoring", RuntimeWarning)
 
         if self.invert:
-            filters = set(xrange(len(self.samples))).difference(filters)
+            filters = set(range(len(self.samples))).difference(filters)
 
         # `sample_filter` setter updates `samples`
         self.parser.sample_filter = filters
--- a/vcf/test/prof.py
+++ b/vcf/test/prof.py
@@ -19,7 +19,7 @@
 elif sys.argv[1] == 'time':
     n = 1
     t = timeit.timeit('parse_1kg()',  "from __main__ import parse_1kg", number=n)
-    print t/n
+    print(t/n)
 
 elif sys.argv[1] == 'stat':
     import statprof
@@ -30,4 +30,4 @@
         statprof.stop()
         statprof.display()
 else:
-    print 'prof.py profile/time'
+    print('prof.py profile/time')
--- a/vcf/test/test_vcf.py
+++ b/vcf/test/test_vcf.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+
 import unittest
 try:
     unittest.skip
@@ -6,9 +6,9 @@
     import unittest2 as unittest
 import doctest
 import os
-import commands
-import cPickle
-from StringIO import StringIO
+import subprocess
+import pickle
+from io import StringIO
 import subprocess
 import sys
 
@@ -137,7 +137,7 @@
         """Test VCF inputs with ##contig inputs containing only IDs. produced by bcftools 1.2+
         """
         reader = vcf.Reader(fh("contig_idonly.vcf"))
-        for cid, contig in reader.contigs.items():
+        for cid, contig in list(reader.contigs.items()):
             if cid == "1":
                 assert contig.length is None
             elif cid == "2":
@@ -390,24 +390,24 @@
         reader2 = vcf.Reader(out)
 
         for l, r in zip(records, reader2):
-            self.assertEquals(l.INFO, r.INFO)
+            self.assertEqual(l.INFO, r.INFO)
 
 
 class TestBadInfoFields(unittest.TestCase):
     def test_parse(self):
         reader = vcf.Reader(fh('bad-info-character.vcf'))
         record = next(reader)
-        self.assertEquals(record.INFO['DOT_1'], None)
-        self.assertEquals(record.INFO['DOT_3'], [None, None, None])
-        self.assertEquals(record.INFO['DOT_N'], [None])
-        self.assertEquals(record.INFO['EMPTY_1'], None)
+        self.assertEqual(record.INFO['DOT_1'], None)
+        self.assertEqual(record.INFO['DOT_3'], [None, None, None])
+        self.assertEqual(record.INFO['DOT_N'], [None])
+        self.assertEqual(record.INFO['EMPTY_1'], None)
         # Perhaps EMPTY_3 should yield [None, None, None] but this is really a
         # cornercase of unspecified behaviour.
-        self.assertEquals(record.INFO['EMPTY_3'], [None])
-        self.assertEquals(record.INFO['EMPTY_N'], [None])
-        self.assertEquals(record.INFO['NOTEMPTY_1'], 1)
-        self.assertEquals(record.INFO['NOTEMPTY_3'], [1, 2, 3])
-        self.assertEquals(record.INFO['NOTEMPTY_N'], [1])
+        self.assertEqual(record.INFO['EMPTY_3'], [None])
+        self.assertEqual(record.INFO['EMPTY_N'], [None])
+        self.assertEqual(record.INFO['NOTEMPTY_1'], 1)
+        self.assertEqual(record.INFO['NOTEMPTY_3'], [1, 2, 3])
+        self.assertEqual(record.INFO['NOTEMPTY_N'], [1])
         pass
 
 
@@ -440,7 +440,7 @@
         self.assertEqual(f['Options'], '"< 4 and > 3"')
 
         for l, r in zip(records, reader2):
-            self.assertEquals(l.INFO, r.INFO)
+            self.assertEqual(l.INFO, r.INFO)
 
 
 class TestGatkOutputWriter(unittest.TestCase):
@@ -463,13 +463,13 @@
         print (out_str)
         reader2 = vcf.Reader(out)
 
-        self.assertEquals(reader.samples, reader2.samples)
-        self.assertEquals(reader.formats, reader2.formats)
-        self.assertEquals(reader.infos, reader2.infos)
-        self.assertEquals(reader.contigs, reader2.contigs)
+        self.assertEqual(reader.samples, reader2.samples)
+        self.assertEqual(reader.formats, reader2.formats)
+        self.assertEqual(reader.infos, reader2.infos)
+        self.assertEqual(reader.contigs, reader2.contigs)
 
         for l, r in zip(records, reader2):
-            self.assertEquals(l.samples, r.samples)
+            self.assertEqual(l.samples, r.samples)
 
             # test for call data equality, since equality on the sample calls
             # may not always mean their data are all equal
@@ -493,12 +493,12 @@
         print (out.getvalue())
         reader2 = vcf.Reader(out)
 
-        self.assertEquals(reader.samples, reader2.samples)
-        self.assertEquals(reader.formats, reader2.formats)
-        self.assertEquals(reader.infos, reader2.infos)
+        self.assertEqual(reader.samples, reader2.samples)
+        self.assertEqual(reader.formats, reader2.formats)
+        self.assertEqual(reader.infos, reader2.infos)
 
         for l, r in zip(records, reader2):
-            self.assertEquals(l.samples, r.samples)
+            self.assertEqual(l.samples, r.samples)
 
             # test for call data equality, since equality on the sample calls
             # may not always mean their data are all equal
@@ -522,7 +522,7 @@
         out_str = out.getvalue()
         for line in out_str.split("\n"):
             if line.startswith("##PEDIGREE"):
-                self.assertEquals(line, '##PEDIGREE=<Derived="Tumor",Original="Germline">')
+                self.assertEqual(line, '##PEDIGREE=<Derived="Tumor",Original="Germline">')
             if line.startswith("##SAMPLE"):
                 assert line.startswith('##SAMPLE=<'), "Found dictionary in meta line: {0}".format(line)
 
@@ -955,7 +955,7 @@
     def test_pickle(self):
         reader = vcf.Reader(fh('example-4.0.vcf'))
         for var in reader:
-            self.assertEqual(cPickle.loads(cPickle.dumps(var)), var)
+            self.assertEqual(pickle.loads(pickle.dumps(var)), var)
 
 
     def assert_has_expected_coordinates(
@@ -1498,7 +1498,7 @@
     @unittest.skip("test currently broken")
     def testApplyFilter(self):
         # FIXME: broken with distribute
-        s, out = commands.getstatusoutput('python scripts/vcf_filter.py --site-quality 30 test/example-4.0.vcf sq')
+        s, out = subprocess.getstatusoutput('python scripts/vcf_filter.py --site-quality 30 test/example-4.0.vcf sq')
         #print(out)
         self.assertEqual(s, 0)
         buf = StringIO()
@@ -1528,7 +1528,7 @@
     @unittest.skip("test currently broken")
     def testApplyMultipleFilters(self):
         # FIXME: broken with distribute
-        s, out = commands.getstatusoutput('python scripts/vcf_filter.py --site-quality 30 '
+        s, out = subprocess.getstatusoutput('python scripts/vcf_filter.py --site-quality 30 '
         '--genotype-quality 50 test/example-4.0.vcf sq mgq')
         self.assertEqual(s, 0)
         #print(out)
@@ -1599,7 +1599,7 @@
                 assert recs[1] is not None
 
         # test files with many chromosomes, set 'vcf_record_sort_key' to define chromosome order
-        chr_order = map(str, range(1, 30)) + ['X', 'Y', 'M']
+        chr_order = list(map(str, list(range(1, 30)))) + ['X', 'Y', 'M']
         get_key = lambda r: (chr_order.index(r.CHROM.replace('chr','')), r.POS)
         reader1 = vcf.Reader(fh('issue-140-file1.vcf'))
         reader2 = vcf.Reader(fh('issue-140-file2.vcf'))
--- a/vcf/utils.py
+++ b/vcf/utils.py
@@ -37,19 +37,19 @@
         next_idx_to_k = dict(
             (i, get_key(r)) for i, r in enumerate(nexts) if r is not None)
         keys_with_prev_contig = [
-            k for k in next_idx_to_k.values() if k[0] == min_k[0]]
+            k for k in list(next_idx_to_k.values()) if k[0] == min_k[0]]
 
         if any(keys_with_prev_contig):
             min_k = min(keys_with_prev_contig)   # finish previous contig
         else:
             min_k = min(next_idx_to_k.values())   # move on to next contig
 
-        min_k_idxs = set([i for i, k in next_idx_to_k.items() if k == min_k])
+        min_k_idxs = set([i for i, k in list(next_idx_to_k.items()) if k == min_k])
         yield [nexts[i] if i in min_k_idxs else None for i in range(len(nexts))]
 
         for i in min_k_idxs:
             try:
-                nexts[i] = readers[i].next()
+                nexts[i] = next(readers[i])
             except StopIteration:
                 nexts[i] = None