# Copyright 2001-2004 by Brad Chapman.  All rights reserved.
# Revisions copyright 2007-2016 by Peter Cock. All rights reserved.
# Revisions copyright 2013 by Kai Blin. All rights reserved.
# Revisions copyright 2015-2016 by Peter Cock.
# Revisions copyright 2019 by Sergio Valqui.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""Tests for the GenBank module."""


import os
import sys
import unittest
import warnings
from datetime import datetime

from io import StringIO

from Bio import BiopythonWarning
from Bio import BiopythonParserWarning

from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq

# GenBank stuff to test:
from Bio import GenBank


class TestBasics(unittest.TestCase):
    def do_comparison(self, good_record, test_record):
        """Compare two records to see if they are the same.

        This compares the two GenBank records line by line.
        """
        good_handle = StringIO(good_record)
        test_handle = StringIO(test_record)
        while True:
            good_line = good_handle.readline()
            test_line = test_handle.readline()
            if not good_line and not test_line:
                break
            self.assertTrue(good_line, "Extra info in Test: %r" % test_line)
            self.assertTrue(test_line, "Extra info in Expected: %r" % good_line)
            test_normalized = " ".join(x for x in test_line.split() if x)
            good_normalized = " ".join(x for x in good_line.split() if x)
            self.assertEqual(test_normalized, good_normalized)

    def test_write_format(self):
        """Test writing to the difference formats."""
        # We only test writing on a subset of the examples:
        filenames = [
            "noref.gb",
            "cor6_6.gb",
            "iro.gb",
            "pri1.gb",
            "arab1.gb",
            "extra_keywords.gb",
            "one_of.gb",
            "origin_line.gb",
        ]
        # don't test writing on protein_refseq, since it is horribly nasty
        # don't test writing on the CONTIG refseq, because the wrapping of
        # locations won't work exactly
        # don't test writing on blank_seq because it lacks a sequence type
        # don't test dbsource_wrap because it is a junky RefSeq file
        record_parser = GenBank.RecordParser(debug_level=0)
        for filename in filenames:
            path = os.path.join("GenBank", filename)
            with open(path) as cur_handle, open(path) as compare_handle:
                iterator = GenBank.Iterator(cur_handle, record_parser)
                compare_iterator = GenBank.Iterator(compare_handle)
                while True:
                    cur_rec = next(iterator)
                    compare_record = next(compare_iterator)
                    if cur_rec is None or compare_record is None:
                        break
                    output_record = str(cur_rec) + "\n"
                    self.do_comparison(compare_record, output_record)

    def test_cleaning_features(self):
        """Test the ability to clean up feature values."""
        gb_parser = GenBank.FeatureParser(
            feature_cleaner=GenBank.utils.FeatureValueCleaner()
        )
        path = "GenBank/arab1.gb"
        with open(path) as handle:
            iterator = GenBank.Iterator(handle, gb_parser)
            first_record = next(iterator)
        # test for cleaning of translation
        translation_feature = first_record.features[1]
        test_trans = translation_feature.qualifiers["translation"][0]
        self.assertNotIn(" ", test_trans, "Did not clean spaces out of the translation")
        self.assertNotIn(
            "\012", test_trans, "Did not clean newlines out of the translation"
        )

    def test_ensembl_locus(self):
        """Test the ENSEMBL locus line."""
        line = "LOCUS       HG531_PATCH 1000000 bp DNA HTG 18-JUN-2011\n"
        s = GenBank.Scanner.GenBankScanner()
        c = GenBank._FeatureConsumer(True)
        s._feed_first_line(c, line)
        self.assertEqual(c.data.name, "HG531_PATCH")
        self.assertEqual(c._expected_size, 1000000)
        line = "LOCUS       HG531_PATCH 759984 bp DNA HTG 18-JUN-2011\n"
        s = GenBank.Scanner.GenBankScanner()
        c = GenBank._FeatureConsumer(True)
        s._feed_first_line(c, line)
        self.assertEqual(c.data.name, "HG531_PATCH")
        self.assertEqual(c._expected_size, 759984)
        line = "LOCUS       HG506_HG1000_1_PATCH 814959 bp DNA HTG 18-JUN-2011\n"
        s = GenBank.Scanner.GenBankScanner()
        c = GenBank._FeatureConsumer(True)
        s._feed_first_line(c, line)
        self.assertEqual(c.data.name, "HG506_HG1000_1_PATCH")
        self.assertEqual(c._expected_size, 814959)
        line = "LOCUS       HG506_HG1000_1_PATCH 1219964 bp DNA HTG 18-JUN-2011\n"
        s = GenBank.Scanner.GenBankScanner()
        c = GenBank._FeatureConsumer(True)
        s._feed_first_line(c, line)
        self.assertEqual(c.data.name, "HG506_HG1000_1_PATCH")
        self.assertEqual(c._expected_size, 1219964)


class TestRecordParser(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.rec_parser = GenBank.RecordParser(debug_level=0)

    def perform_record_parser_test(
        self,
        record,
        length,
        locus,
        definition,
        accession,
        titles,
        features,
        tls=None,
        tsa=None,
    ):
        self.assertEqual(len(record.sequence), length)
        self.assertEqual(record.locus, locus)
        self.assertEqual(record.definition, definition)
        self.assertEqual(record.accession, accession)
        self.assertEqual(
            tuple(reference.title for reference in record.references), titles
        )
        self.assertEqual(len(record.features), len(features))
        for feature1, feature2 in zip(record.features, features):
            self.assertEqual(feature1.key, feature2[0])
            self.assertEqual(feature1.location, feature2[1])
            self.assertEqual(len(feature1.qualifiers), len(feature2[2]))
            for qualifier, (key, value) in zip(feature1.qualifiers, feature2[2]):
                self.assertEqual(qualifier.key, key)
                self.assertEqual(qualifier.value, value)
        if tls:
            self.assertEqual(tls, record.tls)
        if tsa:
            self.assertEqual(tsa, record.tsa)

    def test_record_parser_01(self):
        path = "GenBank/noref.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            record = next(records)
        length = 1622
        locus = "NM_006141"
        definition = (
            "Homo sapiens dynein, cytoplasmic, light intermediate polypeptide 2 "
            "(DNCLI2), mRNA"
        )
        accession = ["NM_006141"]
        titles = ()
        features = [
            (
                "source",
                "1..1622",
                (
                    ("/organism=", '"Homo sapiens"'),
                    ("/db_xref=", '"taxon:9606"'),
                    ("/map=", '"16"'),
                ),
            ),
            (
                "gene",
                "1..1622",
                (
                    ("/gene=", '"DNCLI2"'),
                    ("/note=", '"LIC2"'),
                    ("/db_xref=", '"LocusID:1783"'),
                ),
            ),
            (
                "CDS",
                "7..1485",
                (
                    ("/gene=", '"DNCLI2"'),
                    (
                        "/note=",
                        '"similar to R. norvegicus and G. gallus dynein light '
                        "intermediate chain 2, Swiss-Prot Accession Numbers Q62698 and "
                        'Q90828, respectively"',
                    ),
                    ("/codon_start=", "1"),
                    ("/db_xref=", '"LocusID:1783"'),
                    (
                        "/product=",
                        '"dynein, cytoplasmic, light intermediate polypeptide 2"',
                    ),
                    ("/protein_id=", '"NP_006132.1"'),
                    ("/db_xref=", '"GI:5453634"'),
                    (
                        "/translation=",
                        '"MAPVGVEKKLLLGPNGPAVAAAGDLTSEEEEGQSLWSSILSEVSTRARSKLPSGKNILVFG'
                        "EDGSGKTTLMTKLQGAEHGKKGRGLEYLYLSVHDEDRDDHTRCNVWILDGDLYHKGLLKFAV"
                        "SAESLPETLVIFVADMSRPWTVMESLQKWASVLREHIDKMKIPPEKMRELERKFVKDFQDYM"
                        "EPEEGCQGSPQRRGPLTSGSDEENVALPLGDNVLTHNLGIPVLVVCTKCDAVSVLEKEHDYR"
                        "DEHLDFIQSHLRRFCLQYGAALIYTSVKEEKNLDLLYKYIVHKTYGFHFTTPALVVEKDAVF"
                        "IPAGWDNEKKIAILHENFTTVKPEDAYEDFIVKPPVRKLVHDKELAAEDEQVFLMKQQSLLA"
                        "KQPATPTRASESPARGPSGSPRTQGRGGPASVPSSSPGTSVKKPDPNIKNNAASEGVLASFF"
                        'NSLLSKKTGSPGSPGAGGVQSTAKKSGQKTVLSNVQEELDRMTRKPDSMVTNSSTENEA"',
                    ),
                ),
            ),
        ]
        self.perform_record_parser_test(
            record, length, locus, definition, accession, titles, features
        )

    def test_record_parser_02(self):
        path = "GenBank/cor6_6.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            record = next(records)
            length = 513
            locus = "ATCOR66M"
            definition = "A.thaliana cor6.6 mRNA"
            accession = ["X55053"]
            titles = (
                "Direct Submission",
                "cDNA sequence analysis and expression of two cold-regulated genes of "
                "Arabidopsis thaliana",
            )
            features = [
                (
                    "source",
                    "1..513",
                    (
                        ("/organism=", '"Arabidopsis thaliana"'),
                        ("/strain=", '"Columbia"'),
                        ("/db_xref=", '"taxon:3702"'),
                    ),
                ),
                ("gene", "50..250", (("/gene=", '"cor6.6"'),)),
                (
                    "CDS",
                    "50..250",
                    (
                        ("/gene=", '"cor6.6"'),
                        ("/note=", '"cold regulated"'),
                        ("/codon_start=", "1"),
                        ("/protein_id=", '"CAA38894.1"'),
                        ("/db_xref=", '"GI:16230"'),
                        ("/db_xref=", '"SWISS-PROT:P31169"'),
                        (
                            "/translation=",
                            '"MSETNKNAFQAGQAAGKAEEKSNVLLDKAKDAAAAAGASAQQAGKSISDAAVGGVNF'
                            'VKDKTGLNK"',
                        ),
                    ),
                ),
            ]
            self.perform_record_parser_test(
                record, length, locus, definition, accession, titles, features
            )
            record = next(records)
            length = 880
            locus = "ATKIN2"
            definition = "A.thaliana kin2 gene"
            accession = ["X62281"]
            titles = (
                "Direct Submission",
                "Structure and expression of kin2, one of two cold- and ABA-induced "
                "genes of Arabidopsis thaliana",
            )
            features = [
                (
                    "source",
                    "1..880",
                    (
                        ("/organism=", '"Arabidopsis thaliana"'),
                        ("/strain=", '"ssp. L. Heynh, Colombia"'),
                        ("/db_xref=", '"taxon:3702"'),
                    ),
                ),
                ("TATA_signal", "9..20", ()),
                ("exon", "44..160", (("/gene=", '"kin2"'), ("/number=", "1"))),
                ("prim_transcript", "44..>579", (("/gene=", '"kin2"'),)),
                ("mRNA", "join(44..160,320..390,504..>579)", (("/gene=", '"kin2"'),)),
                ("gene", "44..579", (("/gene=", '"kin2"'),)),
                (
                    "CDS",
                    "join(104..160,320..390,504..579)",
                    (
                        ("/gene=", '"kin2"'),
                        ("/codon_start=", "1"),
                        ("/protein_id=", '"CAA44171.1"'),
                        ("/db_xref=", '"GI:16354"'),
                        ("/db_xref=", '"SWISS-PROT:P31169"'),
                        (
                            "/translation=",
                            '"MSETNKNAFQAGQAAGKAERRRAMFCWTRPRMLLLQLELPRNRAGKSISDAAVGGVN'
                            'FVKDKTGLNK"',
                        ),
                    ),
                ),
                ("intron", "161..319", (("/gene=", '"kin2"'), ("/number=", "1"))),
                ("exon", "320..390", (("/gene=", '"kin2"'), ("/number=", "2"))),
                ("intron", "391..503", (("/gene=", '"kin2"'), ("/number=", "2"))),
                ("exon", "504..>579", (("/gene=", '"kin2"'), ("/number=", "3"))),
                ("polyA_signal", "620..625", ()),
                ("polyA_signal", "641..646", ()),
                ("polyA_site", "785", ()),
                ("polyA_site", "800", ()),
            ]
            self.perform_record_parser_test(
                record, length, locus, definition, accession, titles, features
            )
            record = next(records)
            length = 441
            locus = "BNAKINI"
            definition = "Rapeseed Kin1 protein (kin1) mRNA, complete cds"
            accession = ["M81224"]
            titles = ("Nucleotide sequence of a winter B. napus Kin 1 cDNA",)
            features = [
                (
                    "source",
                    "1..441",
                    (
                        ("/organism=", '"Brassica napus"'),
                        ("/cultivar=", '"Jet neuf"'),
                        ("/db_xref=", '"taxon:3708"'),
                        ("/dev_stage=", '"cold induced"'),
                        ("/tissue_type=", '"leaf"'),
                    ),
                ),
                ("gene", "34..300", (("/gene=", '"kin1"'),)),
                (
                    "CDS",
                    "34..231",
                    (
                        ("/gene=", '"kin1"'),
                        ("/codon_start=", "1"),
                        ("/evidence=", "experimental"),
                        ("/protein_id=", '"AAA32993.1"'),
                        ("/db_xref=", '"GI:167146"'),
                        (
                            "/translation=",
                            '"MADNKQSFQAGQASGRAEEKGNVLMDKVKDAATAAGASAQTAGQKITEAAGGAVNLV'
                            'KEKTGMNK"',
                        ),
                    ),
                ),
                (
                    "polyA_signal",
                    "241..247",
                    (("/gene=", '"kin1"'), ("/note=", '"putative"')),
                ),
                (
                    "polyA_signal",
                    "294..300",
                    (("/gene=", '"kin1"'), ("/note=", '"putative"')),
                ),
                ("polyA_site", "441", (("/gene=", '"kin1"'),)),
            ]
            self.perform_record_parser_test(
                record, length, locus, definition, accession, titles, features
            )
            record = next(records)
            length = 206
            locus = "ARU237582"
            definition = "Armoracia rusticana csp14 gene (partial), exons 2-3"
            accession = ["AJ237582"]
            titles = ("", "Direct Submission")
            features = [
                (
                    "source",
                    "1..206",
                    (
                        ("/organism=", '"Armoracia rusticana"'),
                        ("/db_xref=", '"taxon:3704"'),
                        ("/country=", '"Russia:Bashkortostan"'),
                    ),
                ),
                ("mRNA", "join(<1..48,143..>206)", (("/gene=", '"csp14"'),)),
                ("exon", "1..48", (("/gene=", '"csp14"'), ("/number=", "2"))),
                ("gene", "1..206", (("/gene=", '"csp14"'),)),
                (
                    "CDS",
                    "join(<1..48,143..>206)",
                    (
                        ("/gene=", '"csp14"'),
                        ("/codon_start=", "2"),
                        ("/product=", '"cold shock protein"'),
                        ("/protein_id=", '"CAB39890.1"'),
                        ("/db_xref=", '"GI:4538893"'),
                        ("/translation=", '"DKAKDAAAAAGASAQQAGKNISDAAAGGVNFVKEKTG"'),
                    ),
                ),
                ("intron", "49..142", (("/gene=", '"csp14"'), ("/number=", "2"))),
                ("exon", "143..206", (("/gene=", '"csp14"'), ("/number=", "3"))),
            ]
            self.perform_record_parser_test(
                record, length, locus, definition, accession, titles, features
            )
            record = next(records)
            length = 282
            locus = "BRRBIF72"
            definition = "Brassica rapa (clone bif72) kin mRNA, complete cds"
            accession = ["L31939"]
            titles = ("Nucleotide sequences of kin gene in chinese cabbage",)
            features = [
                (
                    "source",
                    "1..282",
                    (
                        ("/organism=", '"Brassica rapa"'),
                        ("/db_xref=", '"taxon:3711"'),
                        ("/dev_stage=", '"flower"'),
                    ),
                ),
                ("gene", "24..221", (("/gene=", '"kin"'),)),
                (
                    "CDS",
                    "24..221",
                    (
                        ("/gene=", '"kin"'),
                        ("/codon_start=", "1"),
                        ("/protein_id=", '"AAA91051.1"'),
                        ("/db_xref=", '"GI:1209262"'),
                        (
                            "/translation=",
                            '"MADNKQSFQAGQAAGRAEEKGNVLLMDKVKDAATAAGALQTAGQKITEAAGGAVNLV'
                            'KEKTGMNK"',
                        ),
                    ),
                ),
            ]
            self.perform_record_parser_test(
                record, length, locus, definition, accession, titles, features
            )
            record = next(records)
            length = 497
            locus = "AF297471"
            definition = "Brassica napus BN28a (BN28a) gene, complete cds"
            accession = ["AF297471"]
            titles = (
                "BN28a, a low temperature-induced gene of Brassica napus",
                "Direct Submission",
            )
            features = [
                (
                    "source",
                    "1..497",
                    (
                        ("/organism=", '"Brassica napus"'),
                        ("/cultivar=", '"Cascade"'),
                        ("/db_xref=", '"taxon:3708"'),
                    ),
                ),
                (
                    "mRNA",
                    "join(<1..54,241..309,423..>497)",
                    (("/gene=", '"BN28a"'), ("/product=", '"BN28a"')),
                ),
                ("gene", "<1..>497", (("/gene=", '"BN28a"'),)),
                (
                    "CDS",
                    "join(1..54,241..309,423..497)",
                    (
                        ("/gene=", '"BN28a"'),
                        (
                            "/note=",
                            '"low temperature-induced; similar to Brassica napus Kin1 '
                            'in Accession Number M81224"',
                        ),
                        ("/codon_start=", "1"),
                        ("/product=", '"BN28a"'),
                        ("/protein_id=", '"AAG13407.1"'),
                        ("/db_xref=", '"GI:10121869"'),
                        (
                            "/translation=",
                            '"MADNKQSFQAGQAAGRAEEKGNVLMDKVKDAATAAGASAQTAGQKITEAAGGAVNLV'
                            'KEKTGMNK"',
                        ),
                    ),
                ),
            ]
            self.perform_record_parser_test(
                record, length, locus, definition, accession, titles, features
            )

    def test_record_parser_03(self):
        path = "GenBank/iro.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            record = next(records)
        length = 1326
        locus = "IRO125195"
        definition = "Homo sapiens mRNA full length insert cDNA clone EUROIMAGE 125195"
        accession = ["AL109817"]
        titles = (
            "The European IMAGE consortium for integrated Molecular analysis "
            "of human gene transcripts",
            "Direct Submission",
        )
        features = [
            (
                "source",
                "1..1326",
                (
                    ("/organism=", '"Homo sapiens"'),
                    ("/db_xref=", '"taxon:9606"'),
                    ("/chromosome=", '"21"'),
                    ("/clone=", '"IMAGE cDNA clone 125195"'),
                    ("/clone_lib=", '"Soares fetal liver spleen 1NFLS"'),
                    (
                        "/note=",
                        '"contains Alu repeat; '
                        "likely to be be derived from unprocessed nuclear RNA "
                        "or genomic DNA; "
                        "encodes putative exons identical to FTCD; "
                        "formimino transferase cyclodeaminase; "
                        "formimino transferase (EC 2.1.2.5) "
                        '/formimino tetrahydro folate cyclodeaminase (EC 4.3.1.4)"',
                    ),
                ),
            ),
            ("gene", "341..756", (("/gene=", '"FTCD"'),)),
            ("exon", "341..384", (("/gene=", '"FTCD"'), ("/number=", "1"))),
            ("intron", "385..617", (("/gene=", '"FTCD"'), ("/number=", "1"))),
            ("exon", "618..756", (("/gene=", '"FTCD"'), ("/number=", "2"))),
        ]
        self.perform_record_parser_test(
            record, length, locus, definition, accession, titles, features
        )

    def test_record_parser_04(self):
        path = "GenBank/pri1.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            record = next(records)
        length = 741
        locus = "HUGLUT1"
        definition = "Human fructose transporter (GLUT5) gene, promoter and exon 1"
        accession = ["U05344"]
        titles = (
            "Regulation of expression of the human fructose transporter (GLUT5) by "
            "cyclic AMP",
            "Direct Submission",
        )
        features = [
            (
                "source",
                "1..741",
                (
                    ("/organism=", '"Homo sapiens"'),
                    ("/db_xref=", '"taxon:9606"'),
                    ("/chromosome=", '"1"'),
                    ("/map=", '"1p31"'),
                    ("/clone=", '"lambda hGT5-157"'),
                    ("/tissue_type=", '"liver"'),
                    (
                        "/clone_lib=",
                        '"partial Hae III/Alu I fetal human liver library in lambda '
                        'Ch4A of Maniatis"',
                    ),
                    ("/dev_stage=", '"fetal"'),
                ),
            ),
            ("repeat_region", "1..73", (("/rpt_family=", '"Alu"'),)),
            ("promoter", "1..513", ()),
            ("5'UTR", "514..609", (("/gene=", '"GLUT5"'),)),
            (
                "exon",
                "514..642",
                (
                    ("/gene=", '"GLUT5"'),
                    ("/number=", "1"),
                    ("/product=", '"fructose transporter"'),
                ),
            ),
        ]
        self.perform_record_parser_test(
            record, length, locus, definition, accession, titles, features
        )

    def test_record_parser_05(self):
        path = "GenBank/arab1.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            record = next(records)
        length = 86436
        locus = "AC007323"
        definition = (
            "Genomic sequence for Arabidopsis thaliana BAC T25K16 from chromosome I, "
            "complete sequence"
        )
        accession = ["AC007323"]
        titles = (
            "Genomic sequence for Arabidopsis thaliana BAC T25K16 from chromosome I",
            "Direct Submission",
            "Direct Submission",
            "Direct Submission",
            "Direct Submission",
        )
        features = [
            (
                "source",
                "1..86436",
                (
                    ("/organism=", '"Arabidopsis thaliana"'),
                    ("/db_xref=", '"taxon:3702"'),
                    ("/chromosome=", '"1"'),
                    ("/clone=", '"T25K16"'),
                ),
            ),
            (
                "CDS",
                "join(3462..3615,3698..3978,4077..4307,4408..4797,4876..5028,5141..5332)",
                (
                    (
                        "/note=",
                        '"containing similarity to NAM-like proteins gi|3695378"',
                    ),
                    ("/codon_start=", "1"),
                    ("/evidence=", "not_experimental"),
                    ("/product=", '"T25K16.1"'),
                    ("/protein_id=", '"AAF26460.1"'),
                    ("/db_xref=", '"GI:6715633"'),
                    (
                        "/translation=",
                        '"MEDQVGFGFRPNDEELVGHYLRNKIEGNTSRDVEVAISEVNICSYDPWNLRFQSKYKSRDA'
                        "MWYFFSRRENNKGNRQSRTTVSGKWKLTGESVEVKDQWGFCSEGFRGKIGHKRVLVFLDGRY"
                        "PDKTKSDWVIHEFHYDLLPEHQKLCNVTLFRFSSYFRLSLLSPMFYTDELMCLPPEILQRTY"
                        "VICRLEYKGDDADILSAYAIDPTPAFVPNMTSSAGSVVNQSRQRNSGSYNTYSEYDSANHGQ"
                        "QFNENSNIMQQQPLQGSFNPLLEYDFANHGGQWLSDYIDLQQQVPYLAPYENESEMIWKHVI"
                        "EENFEFLVDERTSMQQHYSDHRPKKPVSGVLPDDSSDTETGSMIFEDTSSSTDSVGSSDEPG"
                        "HTRIDDIPSLNIIEPLHNYKAQEQPKQQSKEKVISSQKSECEWKMAEDSIKIPPSTNTVKQS"
                        'WIVLENAQWNYLKNMIIGVLLFISVISWIILVG"',
                    ),
                ),
            ),
            (
                "CDS",
                "complement(join("
                "6617..6953,7266..7351,7464..7603,7916..7998,8087..8166,8273..8368"
                "))",
                (
                    ("/note=", '"hypothetical protein"'),
                    ("/codon_start=", "1"),
                    ("/evidence=", "not_experimental"),
                    ("/product=", '"T25K16.2"'),
                    ("/protein_id=", '"AAF26477.1"'),
                    ("/db_xref=", '"GI:6715650"'),
                    (
                        "/translation=",
                        '"MAASEHRCVGCGFRVKSLFIQYSPGNIRLMKCGNCKEVADEYIECERMVCFNHFLSLFGPK'
                        "VYRHVLYNAINPATVNIQVKNYFNSTSRCVVGEIHRQTYLKSPELIIDRSLLLRKSDEESSF"
                        "SDSPVLLSIKVLIGVLSANAAFIISFAIATKGLLNEVSRESLLLQVWEFPMSVIFFVDILLL"
                        "TSNSMALKGQTFKMFSMQIVFCCCYFGISQCKFVFKPVMTESTMTRCIAVCLIAHLIRFLVG"
                        'QIFEPTIFLIQIGSLLQYMSYFFRIV"',
                    ),
                ),
            ),
            (
                "CDS",
                "complement(11566..12642)",
                (
                    ("/note=", '"putative RAP2.8 protein gi|3695373"'),
                    ("/codon_start=", "1"),
                    ("/evidence=", "not_experimental"),
                    ("/product=", '"T25K16.3"'),
                    ("/protein_id=", '"AAF26476.1"'),
                    ("/db_xref=", '"GI:6715649"'),
                    (
                        "/translation=",
                        '"MDLSLAPTTTTSSDQEQDRDQELTSNIGASSSSGPSGNNNNLPMMMIPPPEKEHMFDKVVT'
                        "PSDVGKLNRLVIPKQHAERYFPLDSSNNQNGTLLNFQDRNGKMWRFRYSYWNSSQSYVMTKG"
                        "WSRFVKEKKLDAGDIVSFQRGIGDESERSKLYIDWRHRPDMSLVQAHQFGNFGFNFNFPTTS"
                        "QYSNRFHPLPEYNSVPIHRGLNIGNHQRSYYNTQRQEFVGYGYGNLAGRCYYTGSPLDHRNI"
                        "VGSEPLVIDSVPVVPGRLTPVMLPPLPPPPSTAGKRLRLFGVNMECGNDYNQQEESWLVPRG"
                        'EIGASSSSSSALRLNLSTDHDDDNDDGDDGDDDQFAKKGKSSLSLNFNP"',
                    ),
                ),
            ),
            (
                "CDS",
                "join("
                "23221..24174,24244..24357,24412..24664,24743..25137,25226..25445,"
                "25527..25711,25783..25905,25994..26478,26564..26730,26814..26983,"
                "27074..27235,27320..27415,27505..28133,28314..28507,28592..28782,"
                "28862..30013,30112..30518,30604..30781"
                ")",
                (
                    (
                        "/note=",
                        '"similar to UFD1 protein emb|CAB10321.1; similar to ESTs '
                        'gb|H36434, gb|AI996152.1"',
                    ),
                    ("/codon_start=", "1"),
                    ("/evidence=", "not_experimental"),
                    ("/product=", '"T25K16.4"'),
                    ("/protein_id=", '"AAF26461.1"'),
                    ("/db_xref=", '"GI:6715634"'),
                    (
                        "/translation=",
                        '"MVMEDEPREATIKPSYWLDACEDISCDLIDDLVSEFDPSSVAVNESTDENGVINDFFGGID'
                        "HILDSIKNGGGLPNNGVSDTNSQINEVTVTPQVIAKETVKENGLQKNGGKRDEFSKEEGDKD"
                        "RKRARVCSYQSERSNLSGRGHVNNSREGDRFMNRKRTRNWDEAGNNKKKRECNNYRRDGRDR"
                        "EVRGYWERDKVGSNELVYRSGTWEADHERDVKKVSGGNRECDVKAEENKSKPEERKEKVVEE"
                        "QARRYQLDVLEQAKAKNTIAFLETGAGKTLIAILLIKSVHKDLMSQNRKMLSVFLVPKVPLV"
                        "YQVPPNKKHQAEVIRNQTCFQVGHYCGEMGQDFWDSRRWQREFESKQFLKLTSFFLFSSTQV"
                        "LVMTAQILLNILRHSIIRMETIDLLILDECHHAVKKHPYSLVMSEFYHTTPKDKRPAIFGMT"
                        "ASPVNLKGVSSQVDCAIKIRNLETKLDSTVCTIKDRKELEKHVPMPSEIVVEYDKAATMWSL"
                        "HETIKQMIAAVEEAAQASSRKSKWQFMGARDAGAKDELRQVYGVSERTESDGAANLIHKLRA"
                        "INYTLAELGQWCAYKVGQSFLSALQSDERVNFQVDVKFQESYLSEVVSLLQCELLEGAAAEK"
                        "VAAEVGKPENGNAHDEMEEGELPDDPVVSGGEHVDEVIGAAVADGKVTPKVQSLIKLLLKYQ"
                        "HTADFRAIVFVERVVAALVLPKVRIKVFAELPSLSFIRCASMIGHNNSQEMKSSQMQDTISK"
                        "FRDGHVTLLVATSVAEEGLDIRQCNVVMRFDLAKTVLAYIQSRGRARKPGSDYILMVERYIK"
                        "SFKNYILIFVTTGHQISTDMSTCVTCRGNVSHAAFLRNARNSEETLRKEAIERTDLSHLKDT"
                        "SRLISIDAVPGTVYKVEATGAMVSLNSAVGLVHFYCSQLPGDRYAILRPEFSMEKHEKPGGH"
                        "TEYSCRLQLPCNAPFEILEGPVCSSMRLAQQVDIIVSACKKLHEMGAFTDMLLPDKGSGQDA"
                        "EKADQDDEGEPVPGTARHREFYPEGVADVLKGEWVSSGKEVCESSKLFHLYMYNVRCVDFGS"
                        "SKDPFLSEVSEFAILFGNELDAEVLSMSMDLYVARAMITKASLAFKGSLDITENQLSSLKKF"
                        "HVRLMSIVLDVDVEPSTTPWDPAKAYLFVPVTDNTSMEPIKGINWELVEKITKTTAWDNPLQ"
                        "RARPDVYLGTNERTLGGDRREYGFGKLRHNIVFGQKSHPTYGIRGAVASFDVVRASGLLPVR"
                        "DAFEKEVEEDLSKGKLMMADGCMVAEDLIGKIVTAAHSGKRFYVDSICYDMSAETSFPRKEG"
                        "YLGPLEYNTYADYYKQKIYVVQDRLFFYFLHNLRLLRLYKSSSIMLFIRYGVDLNCKQQPLI"
                        "KGRGVSYCKNLLSPRFEQSGESETVLDKTYYVFLPPELCVVHPLSGSLIRGAQRLPSIMRRV"
                        "ESMLLAVQLKNLISYPIPTSKILEALTAASCQETFCYERAELLGDAYLKWVVSRFLFLKYPQ"
                        "KHEGQLTRMRQQMVSNMVLYQFALVKGLQSYIQADRFAPSRWSAPGVPPVFDEDTKDGGSSF"
                        "FDEEQKPVSEENSDVFEDGEMEDGELEGDLSSYRVLSSKTLADVVEALIGVYYVEGGKIAAN"
                        "HLMKWIGIHVEDDPDEVDGTLKNVNVPESVLKSIDFVGLERALKYEFKEKGLLVEAITHASR"
                        "PSSGVSCYQRLEFVGDAVLDHLITRHLFFTYTSLPPGRLTDLRAAAVNNENFARVAVKHKLH"
                        "LYLRHGSSALEKQVNKIKKQSILFSKSFKCLTVWLLFVFQIREFVKEVQTESSKPGFNSFGL"
                        "GDCKAPKVLGDIVESIAGAIFLDSGKDTTAAWKVFQPLLQPMVTPETLPMHPVRELQERCQQ"
                        "QAEGLEYKASRSGNTATVEVFIDGVQVGVAQNPQKKMAQKLAARNALAALKEKEIAESKEKH"
                        "INNGNAGEDQGENENGNKKNGHQPFTRQTLNDICLRKNWPMPSYRCVKEGGPAHAKRFTFGV"
                        'RVNTSDRGWTDECIGEPMPSVKKAKDSAAVLLLELLNKTFS"',
                    ),
                ),
            ),
            (
                "CDS",
                "complement(join("
                "31084..31126,31223..31304,31341..31515,31635..31700,31790..31897,"
                "31984..32049,32133..32161,32249..32372"
                "))",
                (
                    (
                        "/note=",
                        '"putative inorganic pyrophosphatase gi|3510259; similar to '
                        'ESTs gb|T42316, gb|AI994042.1, gb|AI994013.1, emb|Z29202"',
                    ),
                    ("/codon_start=", "1"),
                    ("/evidence=", "not_experimental"),
                    ("/product=", '"T25K16.5"'),
                    ("/protein_id=", '"AAF26475.1"'),
                    ("/db_xref=", '"GI:6715648"'),
                    (
                        "/translation=",
                        '"MSEETKDNQRLQRPAPRLNERILSSLSRRSVAAHPWHDLEIGPGAPQIFNVVVEITKGSKV'
                        "KYELDKKTGLIKVDRILYSSVVYPHNYGFVPRTLCEDNDPIDVLVIMQEPVLPGCFLRARAI"
                        "GLMPMIDQGEKDDKIIAVCVDDPEYKHYTDIKELPPHRLSEIRRFFEDCILFLQCSSLFISI"
                        'DLSTNKKNENKEVAVNDFLPSESAVEAIQYSMDLYAEYILHTLRR"',
                    ),
                ),
            ),
            (
                "CDS",
                "complement(join("
                "33694..34029,34103..35173,35269..35349,35432..35701,36326..36387,"
                "36512..36623,36725..36763"
                "))",
                (
                    (
                        "/note=",
                        '"putative late elongated hypocotyl emb|CAA07004; similar to '
                        'ESTS gb|AI993521.1, gb|AA650979"',
                    ),
                    ("/codon_start=", "1"),
                    ("/evidence=", "not_experimental"),
                    ("/product=", '"T25K16.6"'),
                    ("/protein_id=", '"AAF26474.1"'),
                    ("/db_xref=", '"GI:6715647"'),
                    (
                        "/translation=",
                        '"MDTNTSGEELLAKARKPYTITKQRERWTEDEHERFLEALRLYGRAWQRIEEHIGTKTAVQI'
                        "RSHAQKFFTKFGKAHSFWFTFQLEKEAEVKGIPVCQALDIEIPPPRPKRKPNTPYPRKPGNN"
                        "GTSSSQVSSAKDAKLVSSASSSQLNQAFLDLEKMPFSEKTSTGKENQDENCSGVSTVNKYPL"
                        "PTKVSGDIETSKTSTVDNAVQDVPKKNKDKDGNDGTTVHSMQNYPWHFHADIVNGNIAKCPQ"
                        "NHPSGMVSQDFMFHPMREETHGHANLQATTASATTTASHQAFPACHSQDDYRSFLQISSTFS"
                        "NLIMSTLLQNPAAHAAATFAASVWPYASVGNSGDSSTPMSSSPPSITAIAAATVAAATAWWA"
                        "SHGLLPVCAPAPITCVPFSTVAVPTPAMTEMDTVENTQPFEKQNTALQDQNLASKSPASSSD"
                        "DSDETGVTKLNADSKTNDDKIEEVVVTAAVHDSNTAQKKNLVDRSSCGSNTPSGSDAETDAL"
                        "DKMEKDKEDVKETDENQPDVIELNNRKIKMRDNNSNNNATTDSWKEVSEEGRIAFQALFARE"
                        "RLPQSFSPPQVAENVNRKQSDTSMPLAPNFKSQDSCAADQEGVVMIGVGTCKSLKTRQTGFK"
                        'PYKRCSMEVKESQVGNINNQSDEKVCKRLRLEGEAST"',
                    ),
                ),
            ),
            (
                "CDS",
                "complement(join("
                "38600..38756,38838..38989,39111..39516,39915..40031,40377..40579"
                "))",
                (
                    (
                        "/note=",
                        '"similar to Medicago truncatula MtN2 gi|3193308; similar to '
                        'EST gb|H77065"',
                    ),
                    ("/codon_start=", "1"),
                    ("/evidence=", "not_experimental"),
                    ("/product=", '"T25K16.7"'),
                    ("/protein_id=", '"AAF26473.1"'),
                    ("/db_xref=", '"GI:6715646"'),
                    (
                        "/translation=",
                        '"MAGDMQGVRVVEKYSPVIVMVMSNVAMGSVNALVKKALDVGVNHMVIGAYRMAISALILVP'
                        "FAYVLERASLMQFFFLLGLSYTSATVSCALVSMLPAITFALALIFRTENVKILKTKAGMLKV"
                        "IGTLICISGALFLTFYKGPQISNSHSHSHGGASHNNNDQDKANNWLLGCLYLTIGTVLLSLW"
                        "MLFQGTLSIKYPCKYSSTCLMSIFAAFQCALLSLYKSRDVNDWIIDDRFVITVIIYAGVVGQ"
                        "AMTTVATTWGIKKLGAVFASAFFPLTLISATLFDFLILHTPLYLGSVIGSLVTITGLYMFLW"
                        'GKNKETESSTALSSGMDNEAQYTTPNKDNDSKSPV"',
                    ),
                ),
            ),
            (
                "CDS",
                "complement(join("
                "45150..45261,45343..45656,45719..45847,46075..46313,47448..47684,"
                "47777..48554,48638..48868"
                "))",
                (
                    (
                        "/note=",
                        '"putative pyruvate dehydrogenase E1 alpha subunit gi|2454182; '
                        "similar to ESTs emb|Z48417, gb|AW039459.1, gb|T15146, "
                        'emb|Z48416, gb|AF066871, gb|T76832, gb|AI996061.1"',
                    ),
                    ("/codon_start=", "1"),
                    ("/evidence=", "not_experimental"),
                    ("/product=", '"T25K16.8"'),
                    ("/protein_id=", '"AAF26472.1"'),
                    ("/db_xref=", '"GI:6715645"'),
                    (
                        "/translation=",
                        '"MATAFAPTKLTATVPLHGSHENRLLLPIRLAPPSSFLGSTRSLSLRRLNHSNATRRSPVVS'
                        "VQEVVKEKQSTNNTSLLITKEEGLELYEDMILGRSFEDMCAQMYYRGKMFGFVHLYNGQEAV"
                        "STGFIKLLTKSDSVVSTYRDHVHALSKGVSARAVMSELFGKVTGCCRGQGGSMHMFSKEHNM"
                        "LGGFAFIGEGIPVATGAAFSSKYRREVLKQDCDDVTVAFFGDGTCNNGQFFECLNMAALYKL"
                        "PIIFVVENNLWAIGMSHLRATSDPEIWKKGPAFGMPGVHVDGMDVLKVREVAKEAVTRARRG"
                        "EGPTLVECETYRFRGHSLADPDELRDAAEKAKYAARDPIAALKKYLIENKLAKEAELKSIEK"
                        "KIDELVEEAVEFADASPQPGRSQLLENVFADPKGFGIGPDGRYRSQPLQIKVSSSELSVLDE"
                        "EKEEEVVKGEAEPNKDSVVSKAEPVKKPRPCELYVCNIPRSYDIAQLLDMFQPFGTVISVEV"
                        "VSRNPQTGESRGSGYVTMGSINSAKIAIASLDGTVRARETKKQEVGGREMRVRYSVDMNPGT"
                        "RRNPEVLNSTPKKILMYESQHKVYVGNLPWFTQPDGLRNHFSKFGTIVSTRVLHDRKTGRNR"
                        'VFAFLSFTSGEERDAALSFNGTVNNMKVAESSSEKVSRRVSRKPTVLLLLQRHLLDTNNV"',
                    ),
                ),
            ),
            (
                "CDS",
                "complement(join(49986..50039,50121..50333,50585..50656))",
                (
                    (
                        "/note=",
                        '"similar to acidic ribosomal protein p1 gi|2252857; '
                        'similar to ESTs gb|T42111, gb|AI099979, gb|AA728491"',
                    ),
                    ("/codon_start=", "1"),
                    ("/evidence=", "not_experimental"),
                    ("/product=", '"T25K16.9"'),
                    ("/protein_id=", '"AAF26471.1"'),
                    ("/db_xref=", '"GI:6715644"'),
                    (
                        "/translation=",
                        '"MSTVGELACSYAVMILEDEGIAITADKIATLVKAAGVSIESYWPMLFAKMAEKRNVTDLIM'
                        'NVGAGGGGGAPVAAAAPAAGGGAAAAPAAEEKKKDEPAEESDGDLGFGLFD"',
                    ),
                ),
            ),
            (
                "CDS",
                "join("
                "51941..52048,52136..52432,52640..52885,53186..53326,53405..54196"
                ")",
                (
                    ("/note=", '"hypothetical protein"'),
                    ("/codon_start=", "1"),
                    ("/evidence=", "not_experimental"),
                    ("/product=", '"T25K16.10"'),
                    ("/protein_id=", '"AAF26462.1"'),
                    ("/db_xref=", '"GI:6715635"'),
                    (
                        "/translation=",
                        '"MGKKNGSSSWLTAVKRAFRSPTKKDHSNDVEEDEEKKREKRRWFRKPATQESPVKSSGISP'
                        "PAPQEDSLNVNSKPSPETAPSYATTTPPSNAGKPPSAVVPIATSASKTLAPRRIYYARENYA"
                        "AVVIQTSFRGYLARRALRALKGLVKLQALVRGHNVRKQAKMTLRCMQALVRVQSRVLDQRKR"
                        "LSHDGSRKSAFSDSHAVFESRYLQDLSDRQSMSREGSSAAEDWDDRPHTIDAVKVMLQRRRD"
                        "TALRHDKTNLSQAFSQKMWRTVGNQSTEGHHEVELEEERPKWLDRWMATRPWDKRASSRASV"
                        "DQRVSVKTVEIDTSQPYSRTGAGSPSRGQRPSSPSRTSHHYQSRNNFSATPSPAKSRPILIR"
                        "SASPRCQRDPREDRDRAAYSYTSNTPSLRSNYSFTARSGCSISTTMVNNASLLPNYMASTES"
                        "AKARIRSHSAPRQRPSTPERDRAGLVKKRLSYPVPPPAEYEDNNSLRSPSFKSVAGSHFGGM"
                        'LEQQSNYSSCCTESNGVEISPASTSDFRNWLR"',
                    ),
                ),
            ),
            (
                "CDS",
                "complement(57094..58680)",
                (
                    (
                        "/note=",
                        '"putative fatty acid elongase 3-ketoacyl-coA synthase 1 '
                        "gi|4091810; similar to ESTs gb|T42377, gb|N96054, gb|T44368, "
                        'gb|AI999379.1, emb|Z26005"',
                    ),
                    ("/codon_start=", "1"),
                    ("/evidence=", "not_experimental"),
                    ("/product=", '"T25K16.11"'),
                    ("/protein_id=", '"AAF26470.1"'),
                    ("/db_xref=", '"GI:6715643"'),
                    (
                        "/translation=",
                        '"MERTNSIEMDRERLTAEMAFRDSSSAVIRIRRRLPDLLTSVKLKYVKLGLHNSCNVTTILF'
                        "FLIILPLTGTVLVQLTGLTFDTFSELWSNQAVQLDTATRLTCLVFLSFVLTLYVANRSKPVY"
                        "LVDFSCYKPEDERKISVDSFLTMTEENGSFTDDTVQFQQRISNRAGLGDETYLPRGITSTPP"
                        "KLNMSEARAEAEAVMFGALDSLFEKTGIKPAEVGILIVNCSLFNPTPSLSAMIVNHYKMRED"
                        "IKSYNLGGMGCSAGLISIDLANNLLKANPNSYAVVVSTENITLNWYFGNDRSMLLCNCIFRM"
                        "GGAAILLSNRRQDRKKSKYSLVNVVRTHKGSDDKNYNCVYQKEDERGTIGVSLARELMSVAG"
                        "DALKTNITTLGPMVLPLSEQLMFLISLVKRKMFKLKVKPYIPDFKLAFEHFCIHAGGRAVLD"
                        "EVQKNLDLKDWHMEPSRMTLHRFGNTSSSSLWYEMAYTEAKGRVKAGDRLWQIAFGSGFKCN"
                        'SAVWKALRPVSTEEMTGNAWAGSIDQYPVKVVQ"',
                    ),
                ),
            ),
            (
                "CDS",
                "complement(join(59508..59665,61670..61826,63133..63513))",
                (
                    ("/note=", '"hypothetical protein"'),
                    ("/codon_start=", "1"),
                    ("/evidence=", "not_experimental"),
                    ("/product=", '"T25K16.12"'),
                    ("/protein_id=", '"AAF26469.1"'),
                    ("/db_xref=", '"GI:6715642"'),
                    (
                        "/translation=",
                        '"MEKRSDSESVEILGDWDSPPPEERIVMVSVPTSPESDYARSNQPKEIESRVSDKETASASG'
                        "EVAARRVLPPWMDPSYEWGGGKWKVDGRKNKNKKEKEKEKEEIIPFKEIIEALLGNSGDKVQ"
                        "QDNKVFEVAPSLHVVELRKTGDDTLEFHKVYFRFNLYQPVQLPLILFVVIRFSMLKIIHYHQ"
                        'FTMAHIKEFVCMWDTHLYKEITNLNIWDTLSSTLVLAIWTVNASHE"',
                    ),
                ),
            ),
            (
                "CDS",
                "complement(join("
                "64100..64177,64272..64358,64453..64509,64603..64719,64812..64919,"
                "65033..65158,65265..65354,65435..65566,65809..65862,65964..66044,"
                "66152..66259,66380..66451,66537..66599,67026..67214"
                "))",
                (
                    (
                        "/note=",
                        '"similar to wpk4 protein kinase dbj|BAA34675; similar to ESTs '
                        'dbj|AB015122, gb|AI997157.1"',
                    ),
                    ("/codon_start=", "1"),
                    ("/evidence=", "not_experimental"),
                    ("/product=", '"T25K16.13"'),
                    ("/protein_id=", '"AAF26468.1"'),
                    ("/db_xref=", '"GI:6715641"'),
                    (
                        "/translation=",
                        '"MSGSRRKATPASRTRVGNYEMGRTLGEGSFAKVKYAKNTVTGDQAAIKILDREKVFRHKMV'
                        "EQLKREISTMKLIKHPNVVEIIEVMASKTKIYIVLELVNGGELFDKIAQQGRLKEDEARRYF"
                        "QQLINAVDYCHSRGVYHRDLKPENLILDANGVLKVSDFGLSAFSRQVREDGLLHTACGTPNY"
                        "VAPEVLSDKGYDGAAADVWSCGVILFVLMAGYLPFDEPNLMTLYKRVRICKAEFSCPPWFSQ"
                        "GAKRVIKRILEPNPITRISIAELLEDEWFKKGYKPPSFDQDDEDITIDDVDAAFSNSKECLV"
                        "TEKKEKPVSMNAFELISSSSEFSLENLFEKQAQLVKKETRFTSQRSASEIMSKMEETAKPLG"
                        "FNVRKDNYKIKMKGDKSGRKGQLSVATEVFEVAPSLHVVELRKTGGDTLEFHKVCDSFYKNF"
                        'SSGLKDVVWNTDAAAEEQKQ"',
                    ),
                ),
            ),
            (
                "CDS",
                "complement(join(69831..69987,70534..70670,70743..71357,71644..71700))",
                (
                    (
                        "/note=",
                        '"similar to ataxia-telangiectasia group D protein pir|A49618"',
                    ),
                    ("/codon_start=", "1"),
                    ("/evidence=", "not_experimental"),
                    ("/product=", '"T25K16.14"'),
                    ("/protein_id=", '"AAF26467.1"'),
                    ("/db_xref=", '"GI:6715640"'),
                    (
                        "/translation=",
                        '"MVSDLPLDEDDIALLKSPYCDDGGDEDVNSAPNIFTYDNVPLKKRHYLGTSDTFRSFEPLN'
                        "EHACIVCDIADDGVVPCSGNECPLAVHRKCVELDCEDPATFYCPYCWFKEQATRSTALRTRG"
                        "VAAAKTLVQYGCSELRSGDIVMTRENSQLENGSDNSLPMQLHENLHQLQELVKHLKARNSQL"
                        "DESTDQFIDMEKSCGEAYAVVNDQPKRVLWTVNEEKMLREGVEKFSDTINKNMPWKKILEMG"
                        "KGIFHTTRNSSDLKDKWRNMVRIIILIWLRSRLTSSSSSQRSEIKMERERNAGVMKKMSPTG"
                        'TIQRLEFVGWYL"',
                    ),
                ),
            ),
            (
                "CDS",
                "join("
                "72285..72371,72789..72865,72989..73097,73190..73442,73524..73585"
                ")",
                (
                    (
                        "/note=",
                        '"similar to SYT gi|2252866; similar to ESTs emb|F14390, '
                        'gb|H36066, emb|F14391"',
                    ),
                    ("/codon_start=", "1"),
                    ("/evidence=", "not_experimental"),
                    ("/product=", '"T25K16.15"'),
                    ("/protein_id=", '"AAF26463.1"'),
                    ("/db_xref=", '"GI:6715636"'),
                    (
                        "/translation=",
                        '"MQQQQSPQMFPMVPSIPPANNITTEQIQKYLDENKKLIMAIMENQNLGKLAECAQYQALLQ'
                        "KNLMYLAAIADAQPPPPTPGPSPSTAVAAQMATPHSGMQPPSYFMQHPQASPAGIFAPRGPL"
                        "QFGSPLQFQDPQQQQQIHQQAMQGHMGIRPMGMTNNGMQHAMQQPETGLGGNVGLRGGKQDG"
                        'ADGQGKDDGK"',
                    ),
                ),
            ),
            (
                "CDS",
                "complement(join(73807..73990,74036..74145))",
                (
                    (
                        "/note=",
                        '"similar to stress-induced protein OZI1 precursor pir|S59544; '
                        'similar to EST gb|AI995719.1"',
                    ),
                    ("/codon_start=", "1"),
                    ("/evidence=", "not_experimental"),
                    ("/product=", '"T25K16.16"'),
                    ("/protein_id=", '"AAF26466.1"'),
                    ("/db_xref=", '"GI:6715639"'),
                    (
                        "/translation=",
                        '"MASGGKAKYIIGALIGSFGISYIFDKVISDNKIFGGKDDLNGYLLVKISGTTPGTVSNKEW'
                        'WAATDEKFQAWPRTAGPPVVMNPISRQNFIVKTRPE"',
                    ),
                ),
            ),
            (
                "CDS",
                "join(75335..76249,76516..76653,76733..76982,77015..77148)",
                (
                    ("/note=", '"putative reverse transcriptase gb|AAD17395"'),
                    ("/codon_start=", "1"),
                    ("/evidence=", "not_experimental"),
                    ("/product=", '"T25K16.17"'),
                    ("/protein_id=", '"AAF26464.1"'),
                    ("/db_xref=", '"GI:6715637"'),
                    (
                        "/translation=",
                        '"MKEDRRLPHKRDAFQFLKTKAAYVIVIVLTYAFGYFSAYHYHQPLQQQLPPSTTAVETTKP'
                        "QVCSIDNFRVTTPCGNLVPPELIRQTVIDRIFNGTSPYIDFPPPHAKKFLRPKRIKGWGSYG"
                        "AVFENLIRRVKPKTIVEVGSFLGASAIHMANLTRRLGLEETQILCVDDFRGWPGFRDRFKDM"
                        "ALVNGDVLLMYQFMQNVVISDFSGSILPVPFSTGSALEKLCEWGVTADLVEIDAGHDFNSAW"
                        "ADINRAVRILRPGGVIFGHDYFTAADNRGVRRAVNLFAEINRLKVKTDGQHWVIDSVKVINK"
                        "GTRFAISKTVAKIKEDANQWFFAQVLENQDLVNEQAVHISVKVLRGFLRDEHGKVLIHARRS"
                        "FASVHSKLDATFLCWQWAMESMKSLRVDKIIFASEDNDLIGAVTRLPSWPSYKFQIHFLLGE"
                        'LIRSSNLGAHLIAKSVTMEDRRQSYVATGFPFWLKHLFEKERSIA"',
                    ),
                ),
            ),
            (
                "CDS",
                "complement(join(82723..82738,82751..83373,83586..84581))",
                (
                    ("/note=", '"putative cytochrome P450 gi|3831440"'),
                    ("/codon_start=", "1"),
                    ("/evidence=", "not_experimental"),
                    ("/product=", '"T25K16.18"'),
                    ("/protein_id=", '"AAF26465.1"'),
                    ("/db_xref=", '"GI:6715638"'),
                    (
                        "/translation=",
                        '"MFSLNMRTEIESLWVFALASKFNIYMQQHFASLLVAIAITWFTITIVFWSTPGGPAWGKYF'
                        "FTRRFISLDYNRKYKNLIPGPRGFPLVGSMSLRSSHVAHQRIASVAEMSNAKRLMAFSLGDT"
                        "KVVVTCHPAVAKEILNSSVFADRPVDETAYGLMFNRAMGFAPNGTYWRTLRRLGSNHLFNPK"
                        "QIKQSEDQRRVIATQMVNAFARNPKSACAVRDLLKTASLCNMMGLVFGREYELESNNNLESE"
                        "CLKGLVEEGYDLLGTLNWTDHLPWLAGLDFQQIRFRCSQLVPKVNLLLSRIIHEQRAATGNF"
                        "LDMLLSLQGSEKLSESDMVAVLWEMIFRGTDTVAVLVEWVLARIVMHPKVQLTVHDELDRVV"
                        "GRSRTVDESDLPSLTYLTAMIKEVLRLHPPGPLLSWARLSITDTSVDGYHVPAGTTAMVNMW"
                        "AIARDPHVWEDPLEFKPERFVAKEGEAEFSVFGSDLRLAPFGSGKRVCPGKNLGLTTVSFWV"
                        'ATLLHEFEWLPSVEANPPDLSEVLRLSCEMACPLIVNVSSRRKIIAWMF"',
                    ),
                ),
            ),
        ]
        self.perform_record_parser_test(
            record, length, locus, definition, accession, titles, features
        )

    def test_record_parser_06(self):
        path = "GenBank/protein_refseq.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            record = next(records)
        length = 182
        locus = "NP_034640"
        definition = "interferon beta, fibroblast [Mus musculus]"
        accession = ["NP_034640"]
        titles = (
            "structure and expression of a cloned cdna for mouse interferon-beta",
        )
        features = [
            (
                "source",
                "1..182",
                (
                    ("/organism=", '"Mus musculus"'),
                    ("/db_xref=", '"taxon:10090"'),
                    ("/chromosome=", '"4"'),
                    ("/map=", '"4 42.6 cM"'),
                ),
            ),
            ("Protein", "1..182", (("/product=", '"interferon beta, fibroblast"'),)),
            ("sig_peptide", "1..21", ()),
            (
                "Region",
                "1..182",
                (
                    ("/region_name=", '"Interferon alpha/beta domain"'),
                    ("/db_xref=", '"CDD:pfam00143"'),
                    ("/note=", '"interferon"'),
                ),
            ),
            ("mat_peptide", "22..182", (("/product=", '"ifn-beta"'),)),
            (
                "Region",
                "56..170",
                (
                    ("/region_name=", '"Interferon alpha, beta and delta."'),
                    ("/db_xref=", '"CDD:IFabd"'),
                    ("/note=", '"IFabd"'),
                ),
            ),
            (
                "CDS",
                "1..182",
                (
                    ("/gene=", '"Ifnb"'),
                    ("/db_xref=", '"LocusID:15977"'),
                    ("/db_xref=", '"MGD:MGI:107657"'),
                    ("/coded_by=", '"NM_010510.1:21..569"'),
                ),
            ),
        ]
        self.perform_record_parser_test(
            record, length, locus, definition, accession, titles, features
        )

    def test_record_parser_07(self):
        path = "GenBank/extra_keywords.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            record = next(records)
        length = 154329
        locus = "DMBR25B3"
        definition = "Drosophila melanogaster BAC clone BACR25B3"
        accession = ["AL138972"]
        titles = (
            "Sequencing the distal X chromosome of Drosophila melanogaster",
            "Direct Submission",
        )
        features = [
            (
                "source",
                "1..154329",
                (
                    ("/organism=", '"Drosophila melanogaster"'),
                    ("/db_xref=", '"taxon:7227"'),
                    ("/clone=", '"BAC BACR25B3"'),
                ),
            ),
            (
                "gene",
                "complement(22148..27773)",
                (("/gene=", '"EG:BACR25B3.11"'), ("/note=", "")),
            ),
            (
                "CDS",
                "complement(join("
                "22148..22299,22375..22791,22860..23560,23630..24555,24616..24888,"
                "25024..25178,26677..27009,27623..27773"
                "))",
                (
                    ("/gene=", '"EG:BACR25B3.11"'),
                    (
                        "/note=",
                        "\"/prediction=(method:''genefinder'', version:''084'',"
                        " score:''105.71''); /prediction=(method:''genscan'',"
                        " version:''1.0''); /match=(desc:''BASEMENT MEMBRANE-SPECIFIC"
                        " HEPARAN SULFATE PROTEOGLYCAN CORE PROTEIN PRECURSOR (HSPG)"
                        " (PERLECAN) (PLC)'', species:''Homo sapiens (Human)'',"
                        " ranges:(query:24292..24549,"
                        " target:SWISS-PROT::P98160:3713..3628, score:''201.00''),"
                        " (query:24016..24291, target:SWISS-PROT::P98160:3815..3724,"
                        " score:''139.00''), (query:23857..24006,"
                        " target:SWISS-PROT::P98160:3866..3817, score:''99.00''),"
                        " (query:24052..24327, target:SWISS-PROT::P98160:4059..3968,"
                        " score:''143.00''), (query:24046..24312,"
                        " target:SWISS-PROT::P98160:4341..4253, score:''116.00''),"
                        " (query:23806..23901, target:SWISS-PROT::P98160:4177..4146,"
                        " score:''76.00''), (query:23203..23382,"
                        " target:SWISS-PROT::P98160:4062..4003, score:''116.00''),"
                        " (query:22523..22777, target:SWISS-PROT::P98160:4288..4204,"
                        " score:''112.00''), (query:22235..22300,"
                        " target:SWISS-PROT::P98160:4358..4337, score:''64.00'')),"
                        " method:''blastx'', version:''1.4.9'');"
                        " /match=(desc:''GM03359.5prime GM Drosophila melanogaster"
                        " ovary BlueScript Drosophila melanogaster cDNA clone GM03359"
                        " 5prime, mRNA sequence'', species:''Drosophila melanogaster"
                        " (fruit fly)'', ranges:(query:25024..25235,"
                        " target:EMBL::AA801707:438..227, score:''1024.00''),"
                        " (query:24851..24898, target:EMBL::AA801707:476..429,"
                        " score:''204.00'')), method:''blastn'', version:''1.4.9'');"
                        " /match=(desc:''LD08615.5prime LD Drosophila melanogaster"
                        " embryo BlueScript Drosophila melanogaster cDNA clone LD08615"
                        " 5prime, mRNA sequence'', species:''Drosophila melanogaster"
                        " (fruit fly)'', ranges:(query:24629..24727,"
                        " target:EMBL::AA264808:99..1, score:''495.00''),"
                        " (query:24417..24566, target:EMBL::AA264808:250..101,"
                        " score:''687.00''), (query:24048..24420,"
                        " target:EMBL::AA264808:618..246, score:''1847.00''),"
                        " (query:23986..24036, target:EMBL::AA264808:678..628,"
                        " score:''237.00'')), method:''blastn'', version:''1.4.9'');"
                        " /match=(desc:''HL02745.5prime HL Drosophila melanogaster head"
                        " BlueScript Drosophila melanogaster cDNA clone HL02745 5prime,"
                        " mRNA sequence'', species:''Drosophila melanogaster (fruit"
                        " fly)'', ranges:(query:23944..24045,"
                        " target:EMBL::AA697546:103..2, score:''510.00''),"
                        " (query:23630..23943, target:EMBL::AA697546:416..103,"
                        " score:''1570.00''), (query:23419..23561,"
                        " target:EMBL::AA697546:558..416, score:''715.00''),"
                        " (query:23306..23417, target:EMBL::AA697546:670..559,"
                        " score:''524.00''), (query:23280..23316,"
                        " target:EMBL::AA697546:695..659, score:''167.00'')),"
                        " method:''blastn'', version:''1.4.9'');"
                        " /match=(desc:''GM08137.5prime GM Drosophila melanogaster"
                        " ovary BlueScript Drosophila melanogaster cDNA clone GM08137"
                        " 5prime, mRNA sequence'', species:''Drosophila melanogaster"
                        " (fruit fly)'', ranges:(query:23235..23278,"
                        " target:EMBL::AA696682:44..1, score:''139.00''),"
                        " (query:22986..23251, target:EMBL::AA696682:294..29,"
                        " score:''1321.00'')), method:''blastn'', version:''1.4.9'')\"",
                    ),
                    ("/codon_start=", "1"),
                    ("/protein_id=", '"CAB72284.1"'),
                    ("/db_xref=", '"GI:6946669"'),
                    (
                        "/translation=",
                        '"MACNCNQSMIYQSNERRDYNCPGAPQYPYNRFKGGVSLKDTPCMVLYICADFKSSKLSSAK'
                        "PIISGPATTRAPAISYVCQPNDFKCVSHPHTCVRANMVCDGIYDCTDHSDEFNCIAGKGSGK"
                        "SESNSGSGSFKRWKKSPEQGRRSLAKAVKNRKLRKRSFAKSRDYSLKLDDQSSNLRAGESTD"
                        "VECYSSDDTYTDVVWERSDGAPLSNNVRQVGNRLVISNVSPSDAGNYVCKCKTDEGDLYTTS"
                        "YKLEVEDQPHELKSSKIVYAKVGANADLQCGADESRQPTYRWSRQYGQLQAGRSLMNEKLSL"
                        "DSVQANDAGTYICTAQYADGETADFPNILVVTGAIPQFRQEPRSYMSFPTLPNSSFKFNFEL"
                        "TFRPENGDGLLLFNGQTRGSGDYIALSLKDRYAEFRFDFGGKPMLVRAEEPLALNEWHTVRV"
                        "SRFKRDGYIQVDEQHPVAFPTLQQIPQLDLIEDLYIGGVPNWELLPADAVSQQVGFVGCISR"
                        "LTLQGRTVELIREAKYKEGITDCRPCAQGPCQNKGVCLESQTEQAYTCICQPGWTGRDCAIE"
                        "GTQCTPGVCGAGRCENTENDMECLCPLNRSGDRCQYNEILNEHSLNFKGNSFAAYGTPKVTK"
                        "VNITLSVRPASLEDSVILYTAESTLPSGDYLALVLRGGHAELLINTAARLDPVVVRSAEPLP"
                        "LNRWTRIEIRRRLGEGILRVGDGPERKAKAPGSDRILSLKTHLYVGGYDRSTVKVNRDVNIT"
                        "KGFDGCISRLYNFQKPVNLLADIKDAANIQSCGETNMIGGDEDSDNEPPVPPPTPDVHENEL"
                        "QPYAMAPCASDPCENGGSCSEQEDVAVCSCPFGFSGKHCQEHLQLGFNASFRGDGYVELNRS"
                        "HFQPALEQSYTSMGIVFTTNKPNGLLFWWGQEAGEEYTGQDFIAAAVVDGYVEYSMRLDGEE"
                        "AVIRNSDIRVDNGERHIVIAKRDENTAILEVDRMLHSGETRPTSKKSMKLPGNVFVGGAPDL"
                        'EVFTGFRYKHNLNGCIVVVEGETVGQINLSSAAVNGVNANVCPA"',
                    ),
                ),
            ),
            ("gene", "complement(29926..33978)", (("/gene=", '"EG:BACR25B3.10"'),)),
            (
                "CDS",
                "complement(join("
                "29926..30108,30270..30519,30617..31076,31197..31591,31659..31836,"
                "32324..32634,32686..33289,33533..33713,33817..33978"
                "))",
                (
                    ("/gene=", '"EG:BACR25B3.10"'),
                    (
                        "/note=",
                        "\"/prediction=(method:''genefinder'', version:''084'',"
                        " score:''98.50''); /prediction=(method:''genscan'',"
                        " version:''1.0''); /match=(desc:''BASEMENT MEMBRANE-SPECIFIC"
                        " HEPARAN SULFATE PROTEOGLYCAN CORE PROTEIN PRECURSOR (HSPG)"
                        " (PERLECAN) (PLC)'', species:''Homo sapiens (Human)'',"
                        " ranges:(query:33540..33716,"
                        " target:SWISS-PROT::P98160:2716..2658, score:''113.00''),"
                        " (query:32859..32963, target:SWISS-PROT::P98160:3341..3307,"
                        " score:''63.00''), (query:33150..33215,"
                        " target:SWISS-PROT::P98160:3530..3509, score:''73.00''),"
                        " (query:32973..33089, target:SWISS-PROT::P98160:3588..3550,"
                        " score:''71.00''), (query:32358..32567,"
                        " target:SWISS-PROT::P98160:3650..3581, score:''107.00''),"
                        " (query:31222..31323, target:SWISS-PROT::P98160:2620..2587,"
                        " score:''80.00''), (query:31489..31572,"
                        " target:SWISS-PROT::P98160:3387..3360, score:''72.00''),"
                        " (query:31495..31593, target:SWISS-PROT::P98160:3575..3543,"
                        " score:''60.00'')), method:''blastx'', version:''1.4.9'');"
                        " /match=(desc:''GM02481.5prime GM Drosophila melanogaster"
                        " ovary BlueScript Drosophila melanogaster cDNA clone GM02481"
                        " 5prime, mRNA sequence'', species:''Drosophila melanogaster"
                        " (fruit fly)'', ranges:(query:30008..30036,"
                        " target:EMBL::AA695253:29..1, score:''145.00''),"
                        " (query:29549..30004, target:EMBL::AA695253:487..32,"
                        " score:''2262.00'')), method:''blastn'', version:''1.4.9'')\"",
                    ),
                    ("/codon_start=", "1"),
                    ("/protein_id=", '"CAB72285.1"'),
                    ("/db_xref=", '"GI:6946670"'),
                    (
                        "/translation=",
                        '"MFLATLDTNDPTDIGTEDPVLTQIIVSIQKPEITIVPVGGSMTLSCSGRMRWSNSPVIVNW'
                        "YKENSRLPENVEVQGGNLYLYDLQVSDSGVYICQAVNNETASVFKDTVSITITKKDQLSPAE"
                        "IVNLPSHVTFEEYVNNEIICEVLGNPAPRVTWARVDGHADAQSTRTYDNRLIFDSPRKSDEG"
                        "RYRCQAENDQNRDEKYVIVYVQSNPPQPPPQQDRLYITPEEINGLAGESFQLNCQFTSVASL"
                        "RYDWSHNGRSLSSSPARNVEIRGNTLEVRDASESDSGVYTCVAYDVRTRRNFTESARVNIDR"
                        "REEQPFGVLMRMMILTDSLINHSNKPIIESLEQNILIIQGEDYSITCEASGSPYPSIKWAKV"
                        "HDFMPENVHISGNVLTIYGARFENRGVYSCVAENDHGSDLSSTSIDIEPRERPSVKIVSAPL"
                        "QTFSVGAPASLYCTVEGIPDPTVEWVRVDGQPLSPRHKIQSPGYMVIDDIQLEDSGDYECRA"
                        "KNIVGEATGVATITVQEPTLVQIIPDNRDLRLTEGDELSLTCVGSGVPNPEVEWVNEMALKR"
                        "DLYSPPSNTAILKIYRVTKADAGIYTCHGKNEAGSDEAHVRVEVQERRGDIGGVDDDSDRDP"
                        "INYNPPQQQNPGIHQPGSNQLLATDIGDNVTLTCDMFQPLNTRWERVDGAPLPRNAYTIKNR"
                        "LEIVRVEQQNLGQYRCNGIGRDGNVKTYFVKELVLMPLPRIRFYPNIPLTVEAGQNLDVHCQ"
                        "VENVRPEDVHWSTDNNRPLPSSVRIVGSVLRFVSITQAAAGEYRCSAFNQYGNRSQIARVAV"
                        "KKPADFHQVPQSQLQRHREGENIQLQCTVTDQYGVRAQDNVEFNWFRDDRRPLPNNARTDSQ"
                        'ILVLTNLRPEDAGRYICNSYDVDRGQQLPEVSIDLQVLSE"',
                    ),
                ),
            ),
            ("gene", "complement(36119..56153)", (("/gene=", '"EG:BACR25B3.1"'),)),
            (
                "CDS",
                "complement(join("
                "36119..37213,37281..39517,39656..40042,40345..40434,40519..40612,"
                "40681..40814,41546..41620,41855..42085,42188..42415,42751..42876,"
                "43604..43837,44241..44438,44812..44928,45148..45233,45661..45793,"
                "45976..46125,46518..46688,47222..47315,47683..47831,48411..48878,"
                "49437..49562,49763..49876,49971..50102,50319..50441,50827..50937,"
                "52849..52966,56031..56153))",
                (
                    ("/gene=", '"EG:BACR25B3.1"'),
                    (
                        "/note=",
                        "\"/prediction=(method:''genscan'', version:''1.0''); "
                        "/prediction=(method:''genefinder'', version:''084''); "
                        "/match=(desc:''LOW-DENSITY LIPOPROTEIN RECEPTOR-RELATED PROTEIN PRECURSOR (LRP)'', "
                        "species:''Caenorhabditis elegans'', "
                        "ranges:"
                        "(query:50831..50941, target:SWISS-PROT::Q04833:1221..1185, score:''95.00''), "
                        "(query:50840..51025, target:SWISS-PROT::Q04833:2865..2804, score:''102.00''), "
                        "(query:50828..50935, target:SWISS-PROT::Q04833:3788..3753, score:''119.00''), "
                        "(query:50323..50394, target:SWISS-PROT::Q04833:3706..3683, score:''77.00''), "
                        "(query:50326..50433, target:SWISS-PROT::Q04833:1263..1228, score:''120.00''), "
                        "(query:49948..50079, target:SWISS-PROT::Q04833:2917..2874, score:''88.00''), "
                        "(query:49432..49587, target:SWISS-PROT::Q04833:4085..4034, score:''102.00''), "
                        "(query:49429..49560, target:SWISS-PROT::Q04833:3915..3872, score:''97.00''), "
                        "(query:48622..48720, target:SWISS-PROT::Q04833:1302..1270, score:''99.00''), "
                        "(query:47698..47799, target:SWISS-PROT::Q04833:3996..3963, score:''88.00''), "
                        "(query:47686..47775, target:SWISS-PROT::Q04833:3835..3806, score:''59.00''), "
                        "(query:47692..47787, target:SWISS-PROT::Q04833:4041..4010, score:''83.00''), "
                        "(query:47229..47315, target:SWISS-PROT::Q04833:3742..3714, score:''88.00''), "
                        "(query:47220..47312, target:SWISS-PROT::Q04833:3829..3799, score:''67.00''), "
                        "(query:47232..47318, target:SWISS-PROT::Q04833:3866..3838, score:''78.00''), "
                        "(query:46552..46656, target:SWISS-PROT::Q04833:1344..1310, score:''95.00''), "
                        "(query:46543..46650, target:SWISS-PROT::Q04833:3951..3916, score:''98.00''), "
                        "(query:45983..46129, target:SWISS-PROT::Q04833:2870..2822, score:''82.00''), "
                        "(query:45971..46096, target:SWISS-PROT::Q04833:4089..4048, score:''82.00''), "
                        "(query:45678..45764, target:SWISS-PROT::Q04833:3666..3638, score:''80.00''), "
                        "(query:45128..45238, target:SWISS-PROT::Q04833:94..58, score:''100.00''), "
                        "(query:45158..45268, target:SWISS-PROT::Q04833:3990..3954, score:''80.00''), "
                        "(query:44263..44379, target:SWISS-PROT::Q04833:85..47, score:''77.00''), "
                        "(query:44251..44367, target:SWISS-PROT::Q04833:3995..3957, score:''100.00''), "
                        "(query:43605..43688, target:SWISS-PROT::Q04833:2994..2967, score:''84.00''), "
                        "(query:42764..42877, target:SWISS-PROT::Q04833:2951..2914, score:''77.00''), "
                        "(query:42180..42377, target:SWISS-PROT::Q04833:260..195, score:''148.00''), "
                        "(query:42234..42419, target:SWISS-PROT::Q04833:3199..3138, score:''106.00''), "
                        "(query:39807..40013, target:SWISS-PROT::Q04833:2901..2833, score:''167.00''), "
                        "(query:39645..39857, target:SWISS-PROT::Q04833:3138..3068, score:''151.00''), "
                        "(query:39846..40046, target:SWISS-PROT::Q04833:3241..3175, score:''132.00''), "
                        "(query:39654..39866, target:SWISS-PROT::Q04833:3913..3843, score:''201.00'')), "
                        "method:''blastx'', version:''1.4.9''); "
                        "/match=(desc:''LOW-DENSITY LIPOPROTEIN RECEPTOR-RELATED PROTEIN 2 PRECURSOR (MEGALIN) (GLYCOPROTEIN 330)'', "
                        "species:''Homo sapiens (Human)'', "
                        "ranges:"
                        "(query:50834..50935, target:SWISS-PROT::P98164:2733..2700, score:''99.00''), "
                        "(query:50840..50947, target:SWISS-PROT::P98164:3063..3028, score:''94.00''), "
                        "(query:50831..50926, target:SWISS-PROT::P98164:3918..3887, score:''102.00''), "
                        "(query:50326..50433, target:SWISS-PROT::P98164:1222..1187, score:''107.00''), "
                        "(query:50302..50394, target:SWISS-PROT::P98164:3762..3732, score:''91.00''), "
                        "(query:49773..49904, target:SWISS-PROT::P98164:2939..2896, score:''90.00''), "
                        "(query:49438..49578, target:SWISS-PROT::P98164:217..171, score:''116.00''), "
                        "(query:49429..49545, target:SWISS-PROT::P98164:3796..3758, score:''108.00''), "
                        "(query:48622..48720, target:SWISS-PROT::P98164:3544..3512, score:''94.00''), "
                        "(query:48595..48708, target:SWISS-PROT::P98164:3720..3683, score:''86.00''), "
                        "(query:47701..47814, target:SWISS-PROT::P98164:2817..2780, score:''90.00''), "
                        "(query:47692..47799, target:SWISS-PROT::P98164:3674..3639, score:''60.00''), "
                        "(query:47217..47366, target:SWISS-PROT::P98164:3716..3667, score:''96.00''), "
                        "(query:46543..46647, target:SWISS-PROT::P98164:1101..1067, score:''107.00''), "
                        "(query:46552..46656, target:SWISS-PROT::P98164:3873..3839, score:''84.00''), "
                        "(query:45989..46126, target:SWISS-PROT::P98164:3832..3787, score:''98.00''), "
                        "(query:45149..45274, target:SWISS-PROT::P98164:2775..2734, score:''99.00''), "
                        "(query:44780..44893, target:SWISS-PROT::P98164:268..231, score:''76.00''), "
                        "(query:44813..44905, target:SWISS-PROT::P98164:1223..1193, score:''73.00''), "
                        "(query:44251..44361, target:SWISS-PROT::P98164:3630..3594, score:''119.00''), "
                        "(query:43602..43700, target:SWISS-PROT::P98164:179..147, score:''97.00''), "
                        "(query:43674..43781, target:SWISS-PROT::P98164:191..156, score:''90.00''), "
                        "(query:43584..43685, target:SWISS-PROT::P98164:1107..1074, score:''89.00''), "
                        "(query:42758..42865, target:SWISS-PROT::P98164:1264..1229, score:''79.00''), "
                        "(query:42204..42413, target:SWISS-PROT::P98164:2810..2741, score:''136.00''), "
                        "(query:42189..42377, target:SWISS-PROT::P98164:3027..2965, score:''125.00''), "
                        "(query:42186..42293, target:SWISS-PROT::P98164:3110..3075, score:''109.00''), "
                        "(query:42198..42389, target:SWISS-PROT::P98164:3584..3521, score:''137.00''), "
                        "(query:42309..42422, target:SWISS-PROT::P98164:3793..3756, score:''95.00''), "
                        "(query:39654..39791, target:SWISS-PROT::P98164:63..18, score:''132.00''), "
                        "(query:39786..40049, target:SWISS-PROT::P98164:1183..1096, score:''230.00''), "
                        "(query:39657..39890, target:SWISS-PROT::P98164:3109..3032, score:''200.00''), "
                        "(query:39780..39983, target:SWISS-PROT::P98164:3756..3689, score:''194.00''), "
                        "(query:39618..39761, target:SWISS-PROT::P98164:3845..3798, score:''105.00''), "
                        "(query:39651..39779, target:SWISS-PROT::P98164:3964..3922, score:''128.00'')), "
                        "method:''blastx'', version:''1.4.9''); "
                        "/match=(desc:''GM06086.5prime GM Drosophila melanogaster ovary BlueScript Drosophila melanogaster cDNA clone GM06086 5prime, mRNA sequence'', "
                        "species:''Drosophila melanogaster (fruit fly)'', "
                        "ranges:"
                        "(query:50852..51290, target:EMBL::AA802674:672..234, score:''2195.00'')), "
                        "method:''blastn'', version:''1.4.9''); "
                        "/match=(desc:''SD04592.5prime SD Drosophila melanogaster Schneider L2 cell culture pOT2 Drosophila melanogaster cDNA clone SD04592 5prime, mRNA sequence'', "
                        "species:''Drosophila melanogaster (fruit fly)'', "
                        "ranges:(query:37280..37708, target:EMBL::AI532939:429..1, score:''2136.00''), "
                        "(query:37097..37217, target:EMBL::AI532939:545..425, score:''569.00'')), "
                        "method:''blastn'', version:''1.4.9''); "
                        "/match=(desc:''GH03622.5prime GH Drosophila melanogaster head pOT2 Drosophila melanogaster cDNA clone GH03622 5prime, mRNA sequence'', "
                        "species:''Drosophila melanogaster (fruit fly)'', "
                        "ranges:(query:36446..37075, target:EMBL::AI063674:1..630, score:''3150.00'')), "
                        "method:''blastn'', version:''1.4.9''); "
                        "EST embl|AA802674|AA802674 comes from the 5' UTR\"",
                    ),
                    ("/codon_start=", "1"),
                    ("/protein_id=", '"CAB72286.1"'),
                    ("/db_xref=", '"GI:6946671"'),
                    (
                        "/translation=",
                        '"MLLLQLLLQLLLLGKLLLGKTPPTVFGFRLLFAAFRFPLSLHFPHRMHDHFFVRGDTHSCG'
                        "WKNSTTFTIRISAIYRYLNQCQANEFRCNNGDCIDARKRCNNVSDCSEGEDENEECPAACSG"
                        "MEYQCRDGTRCISVSQQCDGHSDCSDGDDEEHCDGIVPKLRYTCPKGKFTCRDLSCISIVHR"
                        "CDGRADCPNDRSDEEGCPCLYDKWQCDDGTCIAKELLCNGNIDCPEDISDERYCEGGYDSEE"
                        "CRFDEFHCGTGECIPMRQVCDNIYDCNDYSDEVNCVEGEEEDRVGIPIGHQPWRPASKHDDW"
                        "LHEMDTSEYQVYQPSNVYEKANSQNPCASNQFRCTTSNVCIPLHLRCDGFYHCNDMSDEKSC"
                        "EQYQRHTTTRRPLTLATPTSRITTQGPGLLERRNTTTATEASRWPWATKTTTIATTTSNPIT"
                        "TVGVANSPPQTCLENIEFACHNRDCISIESVCDGIPDCGRNEDEDDALCKCSGDKYKCQRGG"
                        "GCIPKSQVCDGKPQCHDRSDESACHLHGRLNKTRLGVKCLESQYQCGDGSCISGYKRCNGIH"
                        "DCADASDEYNCIYDYEDTYDTDPNNNPLNECDILEFECDYSQCLPLEKKCDGYADCEDMSDE"
                        "LECQSYTDHCLESEFECDSYCLPRDQLCNGIPNCQDGSDERNCTFCREDAYLCNTGECVADN"
                        "QRCNGIADCADGSDERHCARIYCPPNKLACNGTCVSRRIKCDGIRDCLDGYDEMYCPETNNH"
                        "YPTQNVNVIRPKLGPNPIPKSCRPHEWQCANLECIDSSLQCNEIKDCSDGSDEELSVCFGTA"
                        "TTRLKPSDCSPEQFYCDESCYNRSVRCNGHVDCSDGSDEVGCSLPCPQHQCPSGRCYTESER"
                        "CDRHRHCEDGSDEANCTAILCKDNEFLCFDRQFCINATQQCDGYYDCRDFSDEQNCIGCYAN"
                        "QFRCNNGDCVSGSAPCNGYSECSDHSDELNCGGTQECLPNQFRCNSGQCVSSSVRCNGRTDC"
                        "QDSSDEQNCGHRHTEVSQGLETTGVFTTSTTSTTAMTPLRIICPPTSFKCENGPCISLGLKC"
                        "NGRVDCPYDGSDEADCGQISNDIDPADSNDRRPNQLNLKTYPDSQIIKESREVIFRCRDEGP"
                        "ARAKVKWSRPGGRPLPPGFTDRNGRLEIPNIRVEDAGTYVCEAVGYASYIPGQQVTVNLNVE"
                        "RSWGENKYEEIRSNRIRYGTVPHIDLEFFGLDNDVGSRPESACTEYQATCMNGECIDKSSIC"
                        "DGNPDCSDASDEQSCSLGLKCQPNQFMCSNSKCVDRTWRCDGENDCGDNSDETSCDPEPSGA"
                        "PCRYNEFQCRSGHCIPKSFQCDNVPDCTDGTDEVGCMAPLPIRPPPQSVSLLEYEVLELTCV"
                        "ATGTPTPTIVWRLNWGHVPDKCESKSYGGTGTLRCPDMRPQDSGAYSCEIINTRGTHFVNPD"
                        "TIVTVRPVRTDVCEAGFFNMLARKAEECVQCFCFGVAKACDSANLFTYAIHPPILSHRVVSV"
                        "ELSPLRQIVINEAAPGQDLLTLLHGVQFRATNVHFSGRETPYLALPADYMGNQLKSYGGNLR"
                        "YEVNYRGSGRPVNGPDVIITGNRFTLTYRVRTQPGQNNRVSIPFVPGGWQKPDGRKASREEI"
                        "MMILANVDNILIRLGYLDSTAREVDLINIALDSAGTADKGLGSASLVEKCQCPPGYVGDSCE"
                        "SCASGYVRQPGGPWLGHCVPFIPDSCPSGTYGDPRRGVPCKECPCPLTGSNNFASGCQQSPD"
                        "GDVVCRCNEGYTGRRCEQCAAGYQGNPLAAGGICRRIPDTSCNVDGTYSVHSNGTCQCKDSV"
                        "IGEQCDTCKSKSFHLNSFTYTGCIECFCSGVGLDCDSSTWYRDQVTSTFGRSRVDHGFVLVT"
                        "NYMQPTPDTVPVSMAAEPNALSFIGSADQSGNTLYWSLPAAFLGNKLSSYGGKLTYTLSYSP"
                        "LPNGIMSRNSAPDVVIKSGEDLRLIHYRKSQVVPSVANTYSVEIKESAWQRGDEVVANREHV"
                        "LMALSDITAIYIKATYTTSTKEASLRQVTLDVATPTNLGTPRAVEVEQCRCPEGYLGLSCEQ"
                        "CAPGYARDPEGGIYLGLCRPCECNGHSKYCNSDTGDCEECSDNTEGPSCERCAAGYVGDATR"
                        "GTIYDCQPDEGYPIPSPPAPGNQTLECTAYCQIEGIYDCRGNECLCKRNVIGDQCDQCRPGT"
                        "YGLSAQNQDGCKECYCSGLASQCRSAALYRQLIPVDFILNAPLITDESGAVQDTENLIPDIS"
                        "RNMYTYTHTSYLPKYWSLRGSVLGNQLFSYGGRLSYSLIVESYGNYERGHDIVLIGNGLKLI"
                        "WSRPDGNENQEEYNVRLHEDEQWTRQDRESARPASRSDFMTVLSDLQHILIRATPRVPTQST"
                        "SIGNVILESAVTTRTPGATHASDIELCQCPSGYVGTSCESCAPLHYRDASGSCSLCPCDVSN"
                        'TESCDLVSGGYVECRCKARWKGDRCREIGE"',
                    ),
                ),
            ),
            ("gene", "complement(70720..75241)", (("/gene=", '"EG:BACR25B3.2"'),)),
            (
                "CDS",
                "complement(join("
                "70720..70988,71424..71621,72605..72768,72839..73016,73086..73559,"
                "75217..75241"
                "))",
                (
                    ("/gene=", '"EG:BACR25B3.2"'),
                    (
                        "/note=",
                        "\"/prediction=(method:''genefinder'', version:''084'',"
                        " score:''41.82''); /prediction=(method:''genscan'',"
                        " version:''1.0'')\"",
                    ),
                    ("/codon_start=", "1"),
                    ("/protein_id=", '"CAB72287.1"'),
                    ("/db_xref=", '"GI:6946672"'),
                    (
                        "/translation=",
                        '"MANSKVVAHDESLQGINDSEWQLMGDDIDDGLLDDVDETLKPMETKSEEEDLPTGNWFSQS'
                        "VHRVRRSINRLFGSDDNQERGRRQQRERSQRNRDAINRQKELRRRQKEDHNRWKQMRMERQL"
                        "EKQRLVKRTNHVVFNRATDPRKRASDLYDENEASGYHEEDTTLYRTYFVVNEPYDNEYRDRE"
                        "SVQFQNLQKLLDDDLRNFFHSNYEGNDDEEQEIRSTLERVEPTNDNFKIRVQLRIELPTSVN"
                        "DFGSKLQQQLNVYNRIENLSAATDGVFSFTESSDIEEEAIDVTLPQEEVEGSGSDDSSCRGD"
                        "ATFTCPRSGKTICDEMRCDREIQCPDGEDEEYCNYPNVCTEDQFKCDDKCLELKKRCDGSID"
                        "CLDQTDEAGCINAPEPEPEPEPEPEPEPESEPEAEPEPEPEPEPESEPEQEPEPQVPEANGK"
                        'FY"',
                    ),
                ),
            ),
            ("gene", "121867..127124", (("/gene=", '"EG:BACR25B3.3"'),)),
            (
                "CDS",
                "join("
                "121867..122046,122174..122630,123672..123823,124063..124320,"
                "124392..124688,124755..125018,125094..125254,125317..125576,"
                "126793..127124)",
                (
                    ("/gene=", '"EG:BACR25B3.3"'),
                    (
                        "/note=",
                        "\"/prediction=(method:''genscan'', version:''1.0'',"
                        " score:''174.91''); /prediction=(method:''genefinder'',"
                        " version:''084''); /match=(desc:''PROBABLE G PROTEIN-COUPLED"
                        " RECEPTOR C13B9.4 IN CHROMOSOME III'',"
                        " species:''Caenorhabditis elegans'',"
                        " ranges:(query:123671..123775,"
                        " target:SWISS-PROT::Q09460:107..141, score:''80.00''),"
                        " (query:123743..123829, target:SWISS-PROT::Q09460:235..263,"
                        " score:''72.00''), (query:124072..124332,"
                        " target:SWISS-PROT::Q09460:265..351, score:''161.00''),"
                        " (query:124392..124691, target:SWISS-PROT::Q09460:349..448,"
                        " score:''206.00''), (query:124755..124958,"
                        " target:SWISS-PROT::Q09460:448..515, score:''123.00''),"
                        " (query:124764..125027, target:SWISS-PROT::Q09460:454..541,"
                        " score:''108.00'')), method:''blastx'', version:''1.4.9'');"
                        " /match=(desc:''CALCITONIN RECEPTOR PRECURSOR (CT-R)'',"
                        " species:''Sus scrofa (Pig)'', ranges:(query:124165..124236,"
                        " target:SWISS-PROT::P25117:191..214, score:''54.00''),"
                        " (query:124392..124580, target:SWISS-PROT::P25117:233..295,"
                        " score:''118.00''), (query:124725..124886,"
                        " target:SWISS-PROT::P25117:318..371, score:''127.00'')),"
                        " method:''blastx'', version:''1.4.9'')\"",
                    ),
                    ("/codon_start=", "1"),
                    ("/protein_id=", '"CAB72288.1"'),
                    ("/db_xref=", '"GI:6946673"'),
                    (
                        "/translation=",
                        '"MGAGNRKSETKTKTEAEIEIEMERDQFSIAANACMSMGPMLISKDKAPCSGGRVRHADSLH'
                        "IYYAVDGKMTLLSNILDCGGCISAQRFTRLLRQSGSSGPSPSAPTAGTFESKSMLEPTSSHS"
                        "LATGRVPLLHDFDASTTESPGTYVLDGVARVAQLALEPTVMDALPDSDTEQVLGNLNSSAPW"
                        "NLTLASAAATNFENCSALFVNYTLPQTEFAIRKCELDGRWGSRPNATEVNPPGWTDYGPCYK"
                        "PEIIRLMQQMGSKDFDAYIDIARRTRTLEIVGLCLSLFALIVSLLIFCTFRSLRNNRTKIHK"
                        "NLFVAMVLQVIIRLTLYLDQFRRGNKEAATNTSLSVIENTPYLCEASYVLLEYARTAMFMWM"
                        "FIEGLYLHNMVTVAVFQGSFPLKFFSRLGWCVPILMTTVWARCTVMYMDTSLGECLWNYNLT"
                        "PYYWILEGPRLAVILLNFCFLVNIIRVLVMKLRQSQASDIEQTRKAVRAAIVLLPLLGITNL"
                        "LHQLAPLKTATNFAVWSYGTHFLTSFQGFFIALIYCFLNGEVRAVLLKSLATQLSVRGHPEW"
                        "APKRASMYSGAYNTAPDTDAVQPAGDPSATGKRISPPNKRLNGRKPSSASIVMIHEPQQRQR"
                        "LMPRLQNKAREKGKDRVEKTDAEAEPDPTISHIHSKEAGSARSRTRGSKWIMGICFRGQMCD"
                        "AGLAKDAANIHDVANAADVDACSGSNNNYHNINNNNGSQNNNSIHCNHRDDDKVKGESQSDF"
                        'KEPSNTNAESLVHLALFTAHTSNTQNNTHRNTIFTPIRRRNCS"',
                    ),
                ),
            ),
            ("gene", "complement(128489..129414)", (("/gene=", '"EG:BACR25B3.4"'),)),
            (
                "CDS",
                "complement(join("
                "128489..128715,128777..129140,129196..129313,129374..129414"
                "))",
                (
                    ("/gene=", '"EG:BACR25B3.4"'),
                    (
                        "/note=",
                        "\"/prediction=(method:''genefinder'', version:''084'',"
                        " score:''61.35''); /prediction=(method:''genscan'',"
                        " version:''1.0''); /match=(desc:''VACUOLAR PROTON-ATPASE"
                        " SUBUNIT D'', species:''Oryctolagus cuniculus (Rabbit)'',"
                        " ranges:(query:129190..129324, target:SPTREMBL::O97755:55..11,"
                        " score:''130.00''), (query:128778..129176,"
                        " target:SPTREMBL::O97755:174..42, score:''472.00''),"
                        " (query:128546..128716, target:SPTREMBL::O97755:231..175,"
                        " score:''169.00'')), method:''blastx'', version:''1.4.9'');"
                        " /match=(desc:''VACUOLAR ATP SYNTHASE SUBUNIT D (EC 3.6.1.34)"
                        " (V-ATPASE D SUBUNIT) (V- ATPASE 28 KD ACCESSORY PROTEIN)'',"
                        " species:''Bos taurus (Bovine)'',"
                        " ranges:(query:129190..129324,"
                        " target:SWISS-PROT::P39942:55..11, score:''130.00''),"
                        " (query:128778..129176, target:SWISS-PROT::P39942:174..42,"
                        " score:''471.00''), (query:128546..128716,"
                        " target:SWISS-PROT::P39942:231..175, score:''173.00'')),"
                        " method:''blastx'', version:''1.4.9'');"
                        " /match=(desc:''GH28048.5prime GH Drosophila melanogaster head"
                        " pOT2 Drosophila melanogaster cDNA clone GH28048 5prime, mRNA"
                        " sequence'', species:''Drosophila melanogaster (fruit fly)'',"
                        " ranges:(query:129196..129317, target:EMBL::AI517334:233..112,"
                        " score:''412.00''), (query:128777..129145,"
                        " target:EMBL::AI517334:597..229, score:''1251.00'')),"
                        " method:''blastn'', version:''1.4.9'');"
                        " /match=(desc:''GH07112.5prime GH Drosophila melanogaster head"
                        " pOT2 Drosophila melanogaster cDNA clone GH07112 5prime, mRNA"
                        " sequence'', species:''Drosophila melanogaster (fruit fly)'',"
                        " ranges:(query:129196..129317, target:EMBL::AI108302:223..102,"
                        " score:''412.00''), (query:128777..129145,"
                        " target:EMBL::AI108302:587..219, score:''1251.00''),"
                        " (query:128636..128716, target:EMBL::AI108302:667..587,"
                        " score:''243.00'')), method:''blastn'', version:''1.4.9'')\"",
                    ),
                    ("/codon_start=", "1"),
                    ("/protein_id=", '"CAB72289.1"'),
                    ("/db_xref=", '"GI:6946674"'),
                    (
                        "/translation=",
                        '"MAAKDRLPIFPSRGAQTLMKSRLAGATKGHGLLKKKADALQMRFRLILGKIIETKTLMGQV'
                        "MKEAAFSLAEVKFTTGDINQIVLQNVTKAQIKIRTKKDNVAGVTLPIFEPYTDGVDTYELAG"
                        "LARGGQQLAKLKKNYQSAVRLLVQLASLQTSFVTLDDVIKVTNRRVNAIEHVIIPRINRTIE"
                        "YIISELDELEREEFYRLKKIQDKKREARKASDKLRAEQRLLGQMAEAQEVQNILDEDGDEDL"
                        'LF"',
                    ),
                ),
            ),
            ("gene", "132240..132926", (("/gene=", '"EG:BACR25B3.5"'),)),
            (
                "CDS",
                "132240..132926",
                (
                    ("/gene=", '"EG:BACR25B3.5"'),
                    (
                        "/note=",
                        "\"/prediction=(method:''genefinder'', version:''084'',"
                        " score:''48.06''); /prediction=(method:''genscan'',"
                        " version:''1.0'', score:''132.90'');"
                        " /match=(desc:''N-ACETYLTRANSFERASE'', species:''Drosophila"
                        " melanogaster (Fruit fly)'', ranges:(query:132249..132326,"
                        " target:SPTREMBL::Q94521:60..85, score:''64.00''),"
                        " (query:132600..132842, target:SPTREMBL::Q94521:171..251,"
                        " score:''105.00'')), method:''blastx'', version:''1.4.9'');"
                        " EST embl|AI063093|AI063093 comes from the 3' UTR\"",
                    ),
                    ("/codon_start=", "1"),
                    ("/protein_id=", '"CAB72290.1"'),
                    ("/db_xref=", '"GI:6946675"'),
                    (
                        "/translation=",
                        '"MEYKMIAPEHSEQVMEHLRRNFFADEPLNKAAGLCQNGSSCPALEAHCAEAIQHRMSVMAV'
                        "DAKEKDTLKIVGVVLNGILKPGDTAKALSKLDCNDDADFRKIFDLLHRHNLKHNLFEHFDVD"
                        "CMFDVRILSVDSCYRGQGIANELVKRSVAVAKKNGFRLLKADATGIFSQKIFRSHGFEVFSE"
                        'QPYSKYTDENGKVILPVEAPHIKLQQLYKAICADDQDEKKQSL"',
                    ),
                ),
            ),
            ("gene", "complement(133492..134407)", (("/gene=", '"EG:BACR25B3.6"'),)),
            (
                "CDS",
                "complement(join("
                "133492..133595,133663..133748,133867..134135,134198..134407"
                "))",
                (
                    ("/gene=", '"EG:BACR25B3.6"'),
                    (
                        "/note=",
                        "\"/prediction=(method:''genscan'', version:''1.0'',"
                        " score:''119.22''); /prediction=(method:''genefinder'',"
                        " version:''084''); /match=(desc:''LD41675.5prime LD Drosophila"
                        " melanogaster embryo pOT2 Drosophila melanogaster cDNA clone"
                        " LD41675 5prime, mRNA sequence'', species:''Drosophila"
                        " melanogaster (fruit fly)'', ranges:(query:134192..134531,"
                        " target:EMBL::AI515958:340..1, score:''1691.00''),"
                        " (query:133879..134139, target:EMBL::AI515958:591..331,"
                        " score:''1305.00'')), method:''blastn'', version:''1.4.9'')\"",
                    ),
                    ("/codon_start=", "1"),
                    ("/protein_id=", '"CAB72291.1"'),
                    ("/db_xref=", '"GI:6946676"'),
                    (
                        "/translation=",
                        '"MNGLPPSKHYNLTHYQQRYNWDCGLSCIIMILSAQQREQLLGNFDAVCGEEGFGSSTWTID'
                        "LCYLLMRYQVRHEYFTQTLGIDPNYAQHTYYSKIIDKDERRVTRKFKDARAHGLRVEQRTVD"
                        "MEVILRHLARHGPVILLTNASLLTCEVCKRNVLEKFGYAGHYVVLCGYDMAAQKLFYHNPEV"
                        'HDGHICRCLIESMDTARRAYGTDEDIIFIYEKKETRE"',
                    ),
                ),
            ),
            ("gene", "135479..136829", (("/gene=", '"EG:BACR25B3.7"'),)),
            (
                "CDS",
                "join(135479..135749,135961..136586,136641..136829)",
                (
                    ("/gene=", '"EG:BACR25B3.7"'),
                    (
                        "/note=",
                        "\"/prediction=(method:''genefinder'', version:''084'',"
                        " score:''66.07''); /prediction=(method:''genscan'',"
                        " version:''1.0'', score:''145.64'');"
                        " /match=(desc:''HYPOTHETICAL 40.4 KD TRP-ASP REPEATS"
                        " CONTAINING PROTEIN C14B1.4 IN CHROMOSOME III'',"
                        " species:''Caenorhabditis elegans'',"
                        " ranges:(query:135548..135748,"
                        " target:SWISS-PROT::Q17963:39..105, score:''120.00''),"
                        " (query:135957..136586, target:SWISS-PROT::Q17963:105..314,"
                        " score:''899.00''), (query:136641..136823,"
                        " target:SWISS-PROT::Q17963:315..375, score:''219.00'')),"
                        " method:''blastx'', version:''1.4.9'');"
                        " /match=(desc:''LD30385.5prime LD Drosophila melanogaster"
                        " embryo pOT2 Drosophila melanogaster cDNA clone LD30385"
                        " 5prime, mRNA sequence'', species:''Drosophila melanogaster"
                        " (fruit fly)'', ranges:(query:135288..135749,"
                        " target:EMBL::AA950546:102..563, score:''2301.00''),"
                        " (query:135956..136047, target:EMBL::AA950546:559..650,"
                        " score:''442.00'')), method:''blastn'', version:''1.4.9'');"
                        " /match=(desc:''LD10938.5prime LD Drosophila melanogaster"
                        " embryo BlueScript Drosophila melanogaster cDNA clone LD10938"
                        " 5prime, mRNA sequence'', species:''Drosophila melanogaster"
                        " (fruit fly)'', ranges:(query:136108..136288,"
                        " target:EMBL::AA392005:776..596, score:''212.00'')),"
                        " method:''blastn'', version:''1.4.9'')\"",
                    ),
                    ("/codon_start=", "1"),
                    ("/protein_id=", '"CAB72292.1"'),
                    ("/db_xref=", '"GI:6946677"'),
                    (
                        "/translation=",
                        '"MVPIGAVHGGHPGVVHPPQQPLPTAPSGPNSLQPNSVGQPGATTSSNSSASNKSSLSVKPN'
                        "YTLKFTLAGHTKAVSAVKFSPNGEWLASSSADKLIKIWGAYDGKFEKTISGHKLGISDVAWS"
                        "SDSRLLVSGSDDKTLKVWELSTGKSLKTLKGHSNYVFCCNFNPQSNLIVSGSFDESVRIWDV"
                        "RTGKCLKTLPAHSDPVSAVHFNRDGSLIVSSSYDGLCRIWDTASGQCLKTLIDDDNPPVSFV"
                        "KFSPNGKYILAATLDNTLKLWDYSKGKCLKTYTGHKNEKYCIFANFSVTGGKWIVSGSEDNM"
                        'VYIWNLQSKEVVQKLQGHTDTVLCTACHPTENIIASAALENDKTIKLWKSDT"',
                    ),
                ),
            ),
            ("gene", "145403..147087", (("/gene=", '"EG:BACR25B3.8"'),)),
            (
                "CDS",
                "join(145403..146203,146515..147087)",
                (
                    ("/gene=", '"EG:BACR25B3.8"'),
                    ("/codon_start=", "1"),
                    ("/protein_id=", '"CAB72293.1"'),
                    ("/db_xref=", '"GI:6946678"'),
                    (
                        "/translation=",
                        '"MNSTTKHLLHCTLLITVIVTFEVFSGGIKIDENSFTLVDPWTEYGQLATVLLYLLRFLTLL'
                        "TLPQVLFNFCGLVFYNAFPEKVVLKGSPLLAPFICIRVVTRGDFPDLVKTNVLRNMNTCLDT"
                        "GLENFLIEVVTDKAVNLSQHRRIREIVVPKEYKTRTGALFKSRALQYCLEDNVNVLNDSDWI"
                        "VHLDEETLLTENSVRGIINFVLDGKHPFGQGLITYANENVVNWLTTLADSFRVSDDMGKLRL"
                        "QFKLFHKPLFSWKGSYVVTQVSAERSVSFDNGIDGSVAEDCFFAMRAFSQGYTFNFIEGEMY"
                        "EKSPFTLLDFLQQRKRWLQGILLVVHSKMIPFKHKLLLGISVYSWVTMPLSTSNIIFAALYP"
                        "IPCPNLVDFVCAFIAAINIYMYVFGVIKSFSLYRFGLFRFLACVLGAVCTIPVNVVIENVAV"
                        'IWGLVGKKHKFYVVQKDVRVLETV"',
                    ),
                ),
            ),
            ("gene", "complement(148860..152785)", (("/gene=", '"EG:BACR25B3.9"'),)),
            (
                "CDS",
                "complement(join("
                "148860..148905,148966..149462,149546..151809,151881..152032,"
                "152106..152785"
                "))",
                (
                    ("/gene=", '"EG:BACR25B3.9"'),
                    (
                        "/note=",
                        "\"/prediction=(method:''genscan'', version:''1.0'');"
                        " /prediction=(method:''genefinder'', version:''084'');"
                        " /match=(desc:''HYPOTHETICAL 135.8 KD PROTEIN'',"
                        " species:''Drosophila melanogaster (Fruit fly)'',"
                        " ranges:(query:152096..152785, target:SPTREMBL::Q9XZ29:230..1,"
                        " score:''1147.00''), (query:151882..152043,"
                        " target:SPTREMBL::Q9XZ29:277..224, score:''250.00''),"
                        " (query:149546..151816, target:SPTREMBL::Q9XZ29:1032..276,"
                        " score:''3735.00''), (query:148953..149465,"
                        " target:SPTREMBL::Q9XZ29:1202..1032, score:''890.00''),"
                        " (query:148863..148907, target:SPTREMBL::Q9XZ29:1212..1198,"
                        " score:''76.00'')), method:''blastx'', version:''1.4.9'');"
                        " /match=(desc:''LD21815.5prime LD Drosophila melanogaster"
                        " embryo pOT2 Drosophila melanogaster cDNA clone LD21815 5prime"
                        " similar to L19117: Drosophila melanogaster (chromosome X"
                        " 3A6-8) kinesin-like protein of 3A (klp3A) mRNA sequence'',"
                        " species:''Drosophila melanogaster (fruit fly)'',"
                        " ranges:(query:152482..152787, target:EMBL::AA816942:460..155,"
                        " score:''1485.00''), (query:152401..152483,"
                        " target:EMBL::AA816942:540..458, score:''397.00'')),"
                        " method:''blastn'', version:''1.4.9'')\"",
                    ),
                    ("/codon_start=", "1"),
                    ("/protein_id=", '"CAB72294.1"'),
                    ("/db_xref=", '"GI:6946679"'),
                    (
                        "/translation=",
                        '"MSSEDPSCVAVALRVRPLVQSELDRGCRIAVERSADGAPQVTVNRNESYTYNYVFDIDDSQ'
                        "KDLFETCVQAKVKKLLNGYNVTILAYGQTGSGKTYTMGTAFNGVLDDHVGVIPRAVHDIFTA"
                        "IAEMQSEFRFAVTCSFVELYQEQFYDLFSSKTRDKATVDIREVKNRIIMPGLTELVVTSAQQ"
                        "VTDHLIRGSAGRAVAATAMNETSSRSHAIFTLTLVATKLDGKQSVTTSRFNLVDLAGSERCS"
                        "KTLASGDRFKEGVNINKGLLALGNVINALGSGQAAGYIPYRQSKLTRLLQDSLGGNSITLMI"
                        "ACVSPADYNVAETLSTLRYADRALQIKNKPVVNLDPHAAEVNMLKDVIQKLRVELLSGGKMS"
                        "SSLISAVGAAGLGAIPCEESLAGSMANAAEIQRLKEQVRTLQDRNRKLQQELHQSLLDLTEK"
                        "EMRAHIAEQAHDKLRSHVSELKNKLDQREQAQFGNENTNGDNEMRDFSLLVNRVHVELQRTQ"
                        "EELESQGHESRQRLSSRSHTEGGESGGDEVHEMLHSHSEEYTNKQMNFAGELRNINRQLDLK"
                        "QELHERIMRNFSRLDSDDEDVKLRLCNQKIDDLEAERRDLMDQLRNIKSKDISAKLAEERRK"
                        "RLQLLEQEISDLRRKLITQANLLKIRDKEREKIQNLSTEIRTMKESKVKLIRAMRGESEKFR"
                        "QWKMVREKELTQLKSKDRKMQSEIVRQQTLHSKQRQVLKRKCEEALAANKRLKDALERQASA"
                        "QAQRHKYKDNGGSAAGSSNANAKTDSWVDRELEIILSLIDAEHSLEQLMEDRAVINNHYHLL"
                        "QQEKTSDPAEAAEQARILASLEEELEMRNAQISDLQQKVCPTDLDSRIRSLAEGVQSLGESR"
                        "TVSKQLLKTLVQQRRLQASSLNEQRTTLDELRAQLLDAQQQEDAASKRLRLLQSQHEEQMLA"
                        "QQRAYEEKVSVLIRTANQRWAEARSPAEDQQRNQILEELLSSREALQQELDKLRAKNKSKSK"
                        "AVKSEPQDLDDSFQIVDGNETVVLSDVSDDPDWVPSTSKSKRIQSDSRNVISPPEKQDANVT"
                        "SLGNSSIQSLNSTSATEDGKRCKGCKCRTKCTTKRCGCLSGNNACSETCVCKSNCRNPLNLK"
                        "DHASQCGDGDGQKDETEDADKSDDDGDDEPQTSKENAVKFVTPEAPGKVVASPKQTLQEPKA"
                        'AATPLMNSNVVEDINGPKLAKMSGLAFDTPKRKFF"',
                    ),
                ),
            ),
            (
                "gene",
                "join(153490..154269,AL121804.2:41..610,AL121804.2:672..1487)",
                (("/gene=", '"EG:BACR7C10.3"'),),
            ),
            (
                "CDS",
                "join(153490..154269,AL121804.2:41..610,AL121804.2:672..1487)",
                (
                    ("/gene=", '"EG:BACR7C10.3"'),
                    ("/codon_start=", "1"),
                    ("/protein_id=", '"CAB72295.1"'),
                    ("/db_xref=", '"GI:6946680"'),
                    (
                        "/translation=",
                        '"MEEEAPRFNVLEEAFNGNGNGCANVEATQSAILKVLTRVNRFQMRVRKHIEDNYTEFLPNN'
                        "TSPDIFLEESGSLNREIHDMLENLGSEGLDALDEANVKMAGNGRQLREILLGLGVSEHVLRI"
                        "DELFQCVEEAKATKDYLVLLDLVGRLRAFIYGDDSVDGDAQVATPEVRRIFKALECYETIKV"
                        "KYHVQAYMLQQSLQERFDRLVQLQCKSFPTSRCVTLQVSRDQTQLQDIVQALFQEPYNPARL"
                        "AEFLLDNCIEPVIMRPVMADYSEEADGGTYVRLSLSYATKEPSSAHVRPNYKQVLENLRLLL"
                        "HTLAGINCSVSRDQHVFGIIGDHVKDKMLKLLVDECLIPAVPESTEEYQTSTLCEDVAQLEQ"
                        "LLVDSFIINPEQDRALGQFVEKYETYYRNRMYRRVLETAREIIQRDLQDMVLVAPNNHSAEV"
                        "ANDPFLFPRCMISKSAQDFVKLMDRILRQPTDKLGDQEADPIAGVISIMLHTYINEVPKVHR"
                        "KLLESIPQQAVLFHNNCMFFTHWVAQHANKGIESLAALAKTLQATGQQHFRVQVDYQSSILM"
                        "GIMQEFEFESTHTLGSGPLKLVRQCLRQLELLKNVWANVLPETVYNATFCELINTFVAELIR"
                        "RVFTLRHISAQMACELSDLIDVVLQRAPTLFREPNEVVQVLSWLKLQQLKAMLNASLMEITE"
                        'LWGDGVGPLTASYKSDEIKHLIRALFQDTDWRAKAITQIV"',
                    ),
                ),
            ),
        ]
        self.perform_record_parser_test(
            record, length, locus, definition, accession, titles, features
        )

    def test_record_parser_08(self):
        path = "GenBank/one_of.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            record = next(records)
        length = 2509
        locus = "HSTMPO1"
        definition = "Human thymopoietin (TMPO) gene, exon 1"
        accession = ["U18266"]
        titles = (
            "Structure and mapping of the human thymopoietin (TMPO) gene and"
            " relationship of TMPO beta to rat lamin-associated polypeptide 2",
            "Direct Submission",
        )
        features = [
            (
                "source",
                "1..2509",
                (
                    ("/organism=", '"Homo sapiens"'),
                    ("/db_xref=", '"taxon:9606"'),
                    ("/chromosome=", '"12"'),
                    ("/map=", '"12q22; 64% (% distance from centromere to telomere)"'),
                    ("/clone=", '"P1.516 (DMPC-HFFno.1B-0943F)"'),
                    (
                        "/clone_lib=",
                        '"DuPont Merck Hum Fibroblast P1 Library no.1 Series B'
                        ' (compressed) (Genome Systems Inc)"',
                    ),
                ),
            ),
            ("5'UTR", "one-of(1888,1901)..2200", (("/gene=", '"TMPO"'),)),
            (
                "gene",
                "join("
                "1888..2509,U18267.1:1..270,U18268.1:1..309,U18270.1:1..6905,"
                "U18269.1:1..128,U18271.1:1..3234"
                ")",
                (("/gene=", '"TMPO"'),),
            ),
            (
                "exon",
                "one-of(1888,1901)..2479",
                (("/gene=", '"TMPO"'), ("/number=", "1")),
            ),
            (
                "CDS",
                "join("
                "2201..2479,U18267.1:120..246,U18268.1:130..288,U18270.1:4691..4788,"
                "U18269.1:82..>128"
                ")",
                (
                    ("/gene=", '"TMPO"'),
                    ("/codon_start=", "1"),
                    ("/product=", '"thymopoietin beta"'),
                    ("/protein_id=", '"AAB60434.1"'),
                    ("/db_xref=", '"GI:885684"'),
                    (
                        "/translation=",
                        '"MPEFLEDPSVLTKDKLKSELVANNVTLPAGEQRKDVYVQLYLQHLTARNRPPLPAGTNSKG'
                        "PPDFSSDEEREPTPVLGSGAAAAGRSRAAVGRKATKKTDKPRQEDKDDLDVTELTNEDLLDQ"
                        "LVKYGVNPGPIVGTTRKLYEKKLLKLREQGTESRSSTPLPTISSSAENTRQNGSNDSDRYSD"
                        'NEEDSKIELKLEKREPLKGRAKTPVTLKQRRVEHNQSYSQAGITETEWTSGS"',
                    ),
                ),
            ),
            (
                "CDS",
                "join("
                "2201..2479,U18267.1:120..246,U18268.1:130..288,U18270.1:39..1558"
                ")",
                (
                    ("/gene=", '"TMPO"'),
                    ("/codon_start=", "1"),
                    ("/product=", '"thymopoietin alpha"'),
                    ("/protein_id=", '"AAB60433.1"'),
                    ("/db_xref=", '"GI:885683"'),
                    (
                        "/translation=",
                        '"MPEFLEDPSVLTKDKLKSELVANNVTLPAGEQRKDVYVQLYLQHLTARNRPPLPAGTNSKG'
                        "PPDFSSDEEREPTPVLGSGAAAAGRSRAAVGRKATKKTDKPRQEDKDDLDVTELTNEDLLDQ"
                        "LVKYGVNPGPIVGTTRKLYEKKLLKLREQGTESRSSTPLPTISSSAENTRQNGSNDSDRYSD"
                        "NEEGKKKEHKKVKSTRDIVPFSELGTTPSGGGFFQGISFPEISTRPPLGSTELQAAKKVHTS"
                        "KGDLPREPLVATNLPGRGQLQKLASERNLFISCKSSHDRCLEKSSSSSSQPEHSAMLVSTAA"
                        "SPSLIKETTTGYYKDIVENICGREKSGIQPLCPERSHISDQSPLSSKRKALEESESSQLISP"
                        "PLAQAIRDYVNSLLVQGGVGSLPGTSNSMPPLDVENIQKRIDQSKFQETEFLSPPRKVPRLS"
                        "EKSVEERDSGSFVAFQNIPGSELMSSFAKTVVSHSLTTLGLEVAKQSQHDKIDASELSFPFH"
                        "ESILKVIEEEWQQVDRQLPSLACKYPVSSREATQILSVPKVDDEILGFISEATPLGGIQAAS"
                        "TESCNQQLDLALCRAYEAAASALQIATHTAFVAKAMQADISEAAQILSSDPSRTHQALGILS"
                        "KTYDAASYICEAAFDEVKMAAHTMGNATVGRRYLWLKDCKINLASKNKLASTPFKGGTLFGG"
                        'EVCKVIKKRGNKH"',
                    ),
                ),
            ),
        ]
        self.perform_record_parser_test(
            record, length, locus, definition, accession, titles, features
        )

    def test_record_parser_09(self):
        path = "GenBank/NT_019265.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            record = next(records)
        length = 0
        locus = "NT_019265"
        definition = "Homo sapiens chromosome 1 working draft sequence segment"
        accession = ["NT_019265"]
        titles = ("Direct Submission",)
        features = [
            (
                "source",
                "1..1250660",
                (
                    ("/organism=", '"Homo sapiens"'),
                    ("/db_xref=", '"taxon:9606"'),
                    ("/chromosome=", '"1"'),
                ),
            ),
            (
                "source",
                "1..3290",
                (
                    ("/note=", '"Accession AL391218 sequenced by The Sanger Centre"'),
                    ("/organism=", '"Homo sapiens"'),
                    ("/db_xref=", '"taxon:9606"'),
                    ("/clone=", '"RP11-13G5"'),
                ),
            ),
            (
                "misc_feature",
                "215902..365470",
                (
                    ("/standard_name=", '"RP11-242F24"'),
                    ("/note=", '"FISH-mapped clone"'),
                ),
            ),
            (
                "variation",
                "217508",
                (
                    ("/allele=", '"T"'),
                    ("/allele=", '"C"'),
                    ("/db_xref=", '"dbSNP:811400"'),
                ),
            ),
            (
                "mRNA",
                "join("
                "342430..342515,363171..363300,365741..365814,376398..376499,"
                "390169..390297,391257..391379,392606..392679,398230..398419,"
                "399082..399167,399534..399650,405844..405913,406704..406761,"
                "406868..407010,407962..408091,408508..409092"
                ")",
                (
                    ("/gene=", '"FLJ10737"'),
                    ("/product=", '"hypothetical protein FLJ10737"'),
                    ("/transcript_id=", '"XM_057697.1"'),
                    ("/db_xref=", '"LocusID:55735"'),
                ),
            ),
        ]
        self.perform_record_parser_test(
            record, length, locus, definition, accession, titles, features
        )

    def test_record_parser_10(self):
        path = "GenBank/origin_line.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            record = next(records)
        length = 180
        locus = "NC_002678"
        definition = "Mesorhizobium loti, complete genome (edited)"
        accession = ["NC_002678"]
        titles = (
            "Complete genome structure of the nitrogen-fixing symbiotic bacterium"
            " Mesorhizobium loti",
            "Direct Submission",
        )
        features = [
            (
                "source",
                "1..180",
                (
                    ("/organism=", '"Mesorhizobium loti"'),
                    ("/strain=", '"MAFF303099"'),
                    ("/db_xref=", '"taxon:381"'),
                ),
            ),
            ("gene", "20..120", (("/gene=", '"fake"'),)),
        ]
        self.perform_record_parser_test(
            record, length, locus, definition, accession, titles, features
        )

    def test_record_parser_11(self):
        path = "GenBank/blank_seq.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            record = next(records)
        length = 360
        locus = "NP_001832"
        definition = "cannabinoid receptor 2 (macrophage) [Homo sapiens]"
        accession = ["NP_001832"]
        titles = (
            "Molecular characterization of a peripheral receptor for cannabinoids",
            "Expression of central and peripheral cannabinoid receptors in human "
            "immune tissues and leukocyte subpopulations",
            "Molecular cloning, expression and function of the murine CB2 peripheral "
            "cannabinoid receptor",
        )
        features = [
            (
                "source",
                "1..360",
                (
                    ("/organism=", '"Homo sapiens"'),
                    ("/db_xref=", '"taxon:9606"'),
                    ("/chromosome=", '"1"'),
                    ("/map=", '"1p36.11"'),
                ),
            ),
            (
                "Protein",
                "1..360",
                (("/product=", '"cannabinoid receptor 2 (macrophage)"'),),
            ),
            (
                "Region",
                "50..299",
                (
                    ("/region_name=", '"7 transmembrane receptor (rhodopsin family)"'),
                    ("/db_xref=", '"CDD:pfam00001"'),
                    ("/note=", '"7tm_1"'),
                ),
            ),
            (
                "CDS",
                "1..360",
                (
                    ("/pseudo", ""),
                    ("/gene=", '"CNR2"'),
                    ("/db_xref=", '"LocusID:1269"'),
                    ("/db_xref=", '"MIM:605051"'),
                    ("/coded_by=", '"NM_001841.1:127..1209"'),
                ),
            ),
        ]
        self.perform_record_parser_test(
            record, length, locus, definition, accession, titles, features
        )

    def test_record_parser_12(self):
        path = "GenBank/dbsource_wrap.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            record = next(records)
        length = 64
        locus = "SCX3_BUTOC"
        definition = "Neurotoxin III"
        accession = ["P01485"]
        titles = (
            "Neurotoxins from the venoms of two scorpions: Buthus occitanus tunetanus"
            " and Buthus occitanus mardochei",
        )
        features = [
            (
                "source",
                "1..64",
                (
                    ("/organism=", '"Buthus occitanus tunetanus"'),
                    ("/db_xref=", '"taxon:6871"'),
                ),
            ),
            ("Protein", "1..64", (("/product=", '"Neurotoxin III"'),)),
            (
                "Bond",
                "bond(12,63)",
                (("/bond_type=", '"disulfide"'), ("/note=", '"BY SIMILARITY."')),
            ),
            (
                "Bond",
                "bond(16,36)",
                (("/bond_type=", '"disulfide"'), ("/note=", '"BY SIMILARITY."')),
            ),
            (
                "Bond",
                "bond(22,46)",
                (("/bond_type=", '"disulfide"'), ("/note=", '"BY SIMILARITY."')),
            ),
            (
                "Bond",
                "bond(26,48)",
                (("/bond_type=", '"disulfide"'), ("/note=", '"BY SIMILARITY."')),
            ),
            ("Site", "64", (("/site_type=", '"amidation"'),)),
        ]
        self.perform_record_parser_test(
            record, length, locus, definition, accession, titles, features
        )

    def test_record_parser_13(self):
        path = "GenBank/gbvrl1_start.seq"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            record = next(records)
            length = 2007
            locus = "AB000048"
            definition = (
                "Feline panleukopenia virus DNA for nonstructural protein 1,"
                " complete cds"
            )
            accession = ["AB000048"]
            titles = (
                "Evolutionary pattern of feline panleukopenia virus differs from that"
                " of canine parvovirus",
                "Direct Submission",
            )
            features = [
                (
                    "source",
                    "1..2007",
                    (
                        ("/organism=", '"Feline panleukopenia virus"'),
                        ("/mol_type=", '"genomic DNA"'),
                        ("/isolate=", '"483"'),
                        ("/db_xref=", '"taxon:10786"'),
                        ("/lab_host=", '"Felis domesticus"'),
                    ),
                ),
                (
                    "CDS",
                    "1..2007",
                    (
                        ("/codon_start=", "1"),
                        ("/product=", '"nonstructural protein 1"'),
                        ("/protein_id=", '"BAA19009.1"'),
                        ("/db_xref=", '"GI:1769754"'),
                        (
                            "/translation=",
                            '"MSGNQYTEEVMEGVNWLKKHAEDEAFSFVFKCDNVQLNGKDVRWNNYTKPIQNEELT'
                            "SLIRGAQTAMDQTEEEEMDWESEVDSLAKKQVQTFDALIKKCLFEVFVSKNIEPNECV"
                            "WFIQHEWGKDQGWHCHVLLHSKNLQQATGKWLRRQMNMYWSRWLVTLCSINLTPTEKI"
                            "KLREIAEDSEWVTILTYRHKQTKKDYVKMVHFGNMIAYYFLTKKKIVHMTKESGYFLS"
                            "TDSGWKFNFMKYQDRHTVSTLYTEQMKPETVETTVTTAQETKRGRIQTKKEVSIKCTL"
                            "RDLVSKRVTSPEDWMMLQPDSYIEMMAQPGGENLLKNTLEICTLTLARTKTAFELILE"
                            "KADNTKLTNFDLANSRTCQIFRMHGWNWIKVCHAIACVLNRQGGKRNTVLFHGPASTG"
                            "KSIIAQAIAQAVGNVGCYNAANVNFPFNDCTNKNLIWVEEAGNFGQQVNQFKAICSGQ"
                            "TIRIDQKGKGSKQIEPTPVIMTTNENITIVRIGCEERPEHTQPIRDRMLNIKLVCKLP"
                            "GDFGLVDKEEWPLICAWLVKHGYQSTMANYTHHWGKVPEWDENWAEPKIQEGINSPGC"
                            "KDLETQAASNPQSQDHVLTPLTPDVVDLALEPWSTPDTPIAETANQQSNQLGVTHKDV"
                            'QASPTWSEIEADLRAIFTSEQLEEDFRDDLD"',
                        ),
                    ),
                ),
            ]
            self.perform_record_parser_test(
                record, length, locus, definition, accession, titles, features
            )

            record = next(records)
            length = 2007
            locus = "AB000049"
            definition = (
                "Feline panleukopenia virus DNA for nonstructural protein 1,"
                " complete cds"
            )
            accession = ["AB000049"]
            titles = (
                "Evolutionary pattern of feline panleukopenia virus differs that of"
                " canine parvovirus",
                "Direct Submission",
            )
            features = [
                (
                    "source",
                    "1..2007",
                    (
                        ("/organism=", '"Feline panleukopenia virus"'),
                        ("/mol_type=", '"genomic DNA"'),
                        ("/isolate=", '"94-1"'),
                        ("/db_xref=", '"taxon:10786"'),
                        ("/lab_host=", '"Felis domesticus"'),
                    ),
                ),
                (
                    "CDS",
                    "1..2007",
                    (
                        ("/codon_start=", "1"),
                        ("/product=", '"nonstructural protein 1"'),
                        ("/protein_id=", '"BAA19010.1"'),
                        ("/db_xref=", '"GI:1769756"'),
                        (
                            "/translation=",
                            '"MSGNQYTEEVMEGVNWLKKHAEDEAFSFVFKCDNVQLNGKDVRWNNYTKPIQNEELT'
                            "SLIRGAQTAMDQTEEEEMDWESEVDSLAKKQVQTFDALIKKCLFEVFVSKNIEPNECV"
                            "WFIQHEWGKDQGWHCHVLLHSKNLQQATGKWLRRQMNMYWSRWLVTLCSINLTPTEKI"
                            "KLREIAEDSEWVTILTYRHKQTKKDYVKMVHFGNMIAYYFLTKKKIVHMTKESGYFLS"
                            "TDSGWKFNFMKYQDRHTVSTLYTEQMKPETVETTVTTAQETKRGRIQTKKEVSIKCTL"
                            "RDLVSKRVTSPEDWMMLQPDSYIEMMAQPGGENLLKNTLEICTLTLARTKTAFELILE"
                            "KADNTKLTNFDLANSRTCQIFRMHGWNWIKVCHAIACVLNRQGGKRNTVLFHGPASTG"
                            "KSIIAQAIAQAVGNVGCYNAANVNFPFNDCTNKNLIWVEEAGNFGQQVNQFKAICSGQ"
                            "TIRIDQKGKGSKQIEPTPVIMTTNENITIVRIGCEERPEHTQPIRDRMLNIKLVCKLP"
                            "GDFGLVDKEEWPLICAWLVKHGYQSTMANYTHHWGKVPEWDENWAEPKIQEGINSPGC"
                            "KDLETQAASNPQSQDHVLTPLTPDVVDLALEPWSTPDTPIAETANQQSNQLGVTHKDV"
                            'QASPTWSEIEADLRAIFTSEQLEEDFRDDLD"',
                        ),
                    ),
                ),
            ]
            self.perform_record_parser_test(
                record, length, locus, definition, accession, titles, features
            )

            record = next(records)
            length = 1755
            locus = "AB000050"
            definition = (
                "Feline panleukopenia virus DNA for capsid protein 2, complete cds"
            )
            accession = ["AB000050"]
            titles = (
                "Evolutionary pattern of feline panleukopenia virus differs from that"
                " of canine parvovirus",
                "Direct Submission",
            )
            features = [
                (
                    "source",
                    "1..1755",
                    (
                        ("/organism=", '"Feline panleukopenia virus"'),
                        ("/mol_type=", '"genomic DNA"'),
                        ("/isolate=", '"94-1"'),
                        ("/db_xref=", '"taxon:10786"'),
                        ("/lab_host=", '"Felis domesticus"'),
                    ),
                ),
                (
                    "CDS",
                    "1..1755",
                    (
                        ("/codon_start=", "1"),
                        ("/product=", '"capsid protein 2"'),
                        ("/protein_id=", '"BAA19011.1"'),
                        ("/db_xref=", '"GI:1769758"'),
                        (
                            "/translation=",
                            '"MSDGAVQPDGGQPAVRNERATGSGNGSGGGGGGGSGGVGISTGTFNNQTEFKFLENG'
                            "WVEITANSSRLVHLNMPESENYKRVVVNNMDKTAVKGNMALDDTHVQIVTPWSLVDAN"
                            "AWGVWFNPGDWQLIVNTMSELHLVSFEQEIFNVVLKTVSESATQPPTKVYNNDLTASL"
                            "MVALDSNNTMPFTPAAMRSETLGFYPWKPTIPTPWRYYFQWDRTLIPSHTGTSGTPTN"
                            "VYHGTDPDDVQFYTIENSVPVHLLRTGDEFATGTFFFDCKPCRLTHTWQTNRALGLPP"
                            "FLNSLPQSEGATNFGDIGVQQDKRRGVTQMGNTDYITEATIMRPAEVGYSAPYYSFEA"
                            "STQGPFKTPIAAGRGGAQTDENQAADGDPRYAFGRQHGQKTTTTGETPERFTYIAHQD"
                            "TGRYPEGDWIQNINFNLPVTNDNVLLPTDPIGGKTGINYTNIFNTYGPLTALNNVPPV"
                            "YPNGQIWDKEFDTDLKPRLHVNAPFVCQNNCPGQLFVKVAPNLTNEYDPDASANMSRI"
                            "VTYSDFWWKGKLVFKAKLRASHTWNPIQQMSINVDNQFNYVPNNIGAMKIVYEKSQLA"
                            'PRKLY"',
                        ),
                    ),
                ),
            ]
            self.perform_record_parser_test(
                record, length, locus, definition, accession, titles, features
            )

    def test_record_parser_14(self):
        path = "GenBank/NC_005816.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", BiopythonParserWarning)
                # BiopythonParserWarning: Premature end of file in sequence data
                record = next(records)
        length = 9609
        locus = "NC_005816"
        definition = (
            "Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, complete"
            " sequence"
        )
        accession = ["NC_005816"]
        titles = (
            "Genetics of metabolic variations between Yersinia pestis biovars and the"
            " proposal of a new biovar, microtus",
            "Complete genome sequence of Yersinia pestis strain 91001, an isolate"
            " avirulent to humans",
            "Direct Submission",
            "Direct Submission",
        )
        features = [
            (
                "source",
                "1..9609",
                (
                    ("/organism=", '"Yersinia pestis biovar Microtus str. 91001"'),
                    ("/mol_type=", '"genomic DNA"'),
                    ("/strain=", '"91001"'),
                    ("/db_xref=", '"taxon:229193"'),
                    ("/plasmid=", '"pPCP1"'),
                    ("/biovar=", '"Microtus"'),
                ),
            ),
            ("repeat_region", "1..1954", ()),
            (
                "gene",
                "87..1109",
                (("/locus_tag=", '"YP_pPCP01"'), ("/db_xref=", '"GeneID:2767718"')),
            ),
            (
                "CDS",
                "87..1109",
                (
                    ("/locus_tag=", '"YP_pPCP01"'),
                    (
                        "/note=",
                        '"similar to corresponding CDS from previously sequenced pPCP'
                        " plasmid of Yersinia pestis KIM (AF053945) and CO92"
                        " (AL109969), also many transposase entries for insertion"
                        " sequence IS100 of Yersinia pestis. Contains IS21-like element"
                        ' transposase, HTH domain (Interpro|IPR007101)"',
                    ),
                    ("/codon_start=", "1"),
                    ("/transl_table=", "11"),
                    ("/product=", '"putative transposase"'),
                    ("/protein_id=", '"NP_995567.1"'),
                    ("/db_xref=", '"GI:45478712"'),
                    ("/db_xref=", '"GeneID:2767718"'),
                    (
                        "/translation=",
                        '"MVTFETVMEIKILHKQGMSSRAIARELGISRNTVKRYLQAKSEPPKYTPRPAVASLLDEYR'
                        "DYIRQRIADAHPYKIPATVIAREIRDQGYRGGMTILRAFIRSLSVPQEQEPAVRFETEPGRQ"
                        "MQVDWGTMRNGRSPLHVFVAVLGYSRMLYIEFTDNMRYDTLETCHRNAFRFFGGVPREVLYD"
                        "NMKTVVLQRDAYQTGQHRFHPSLWQFGKEMGFSPRLCRPFRAQTKGKVERMVQYTRNSFYIP"
                        "LMTRLRPMGITVDVETANRHGLRWLHDVANQRKHETIQARPCDRWLEEQQSMLALPPEKKEY"
                        'DVHLDENLVNFDKHPLHHPLSIYDSFCRGVA"',
                    ),
                ),
            ),
            (
                "misc_feature",
                "87..959",
                (
                    ("/locus_tag=", '"YP_pPCP01"'),
                    (
                        "/note=",
                        '"Transposase and inactivated derivatives [DNA replication,'
                        ' recombination, and repair]; Region: COG4584"',
                    ),
                    ("/db_xref=", '"CDD:34222"'),
                ),
            ),
            (
                "misc_feature",
                "<111..209",
                (
                    ("/locus_tag=", '"YP_pPCP01"'),
                    (
                        "/note=",
                        '"Helix-turn-helix domain of Hin and related proteins, a family'
                        " of DNA-binding domains unique to bacteria and represented by"
                        " the Hin protein of Salmonella. The basic HTH domain is a"
                        " simple fold comprised of three core helices that form a"
                        ' right-handed...; Region: HTH_Hin_like; cl01116"',
                    ),
                    ("/db_xref=", '"CDD:186341"'),
                ),
            ),
            (
                "misc_feature",
                "438..812",
                (
                    ("/locus_tag=", '"YP_pPCP01"'),
                    ("/note=", '"Integrase core domain; Region: rve; cl01316"'),
                    ("/db_xref=", '"CDD:194099"'),
                ),
            ),
            (
                "gene",
                "1106..1888",
                (("/locus_tag=", '"YP_pPCP02"'), ("/db_xref=", '"GeneID:2767716"')),
            ),
            (
                "CDS",
                "1106..1888",
                (
                    ("/locus_tag=", '"YP_pPCP02"'),
                    (
                        "/note=",
                        '"similar to corresponding CDS form previously sequenced pPCP'
                        " plasmid of Yersinia pestis KIM (AF053945) and CO92"
                        " (AL109969), also many ATP-binding protein entries for"
                        " insertion sequence IS100 of Yersinia pestis. Contains"
                        " Chaperonin clpA/B (Interpro|IPR001270). Contains"
                        " ATP/GTP-binding site motif A (P-loop) (Interpro|IPR001687,"
                        " Molecular Function: ATP binding (GO:0005524)). Contains"
                        " Bacterial chromosomal replication initiator protein, DnaA"
                        " (Interpro|IPR001957, Molecular Function: DNA binding"
                        " (GO:0003677), Molecular Function: DNA replication origin"
                        " binding (GO:0003688), Molecular Function: ATP binding"
                        " (GO:0005524), Biological Process: DNA replication initiation"
                        " (GO:0006270), Biological Process: regulation of DNA"
                        " replication (GO:0006275)). Contains AAA ATPase"
                        " (Interpro|IPR003593, Molecular Function: nucleotide binding"
                        ' (GO:0000166))"',
                    ),
                    ("/codon_start=", "1"),
                    ("/transl_table=", "11"),
                    ("/product=", '"transposase/IS protein"'),
                    ("/protein_id=", '"NP_995568.1"'),
                    ("/db_xref=", '"GI:45478713"'),
                    ("/db_xref=", '"GeneID:2767716"'),
                    (
                        "/translation=",
                        '"MMMELQHQRLMALAGQLQLESLISAAPALSQQAVDQEWSYMDFLEHLLHEEKLARHQRKQA'
                        "MYTRMAAFPAVKTFEEYDFTFATGAPQKQLQSLRSLSFIERNENIVLLGPSGVGKTHLAIAM"
                        "GYEAVRAGIKVRFTTAADLLLQLSTAQRQGRYKTTLQRGVMAPRLLIIDEIGYLPFSQEEAK"
                        "LFFQVIAKRYEKSAMILTSNLPFGQWDQTFAGDAALTSAMLDRILHHSHVVQIKGESYRLRQ"
                        'KRKAGVIAEANPE"',
                    ),
                ),
            ),
            (
                "misc_feature",
                "1109..1885",
                (
                    ("/locus_tag=", '"YP_pPCP02"'),
                    (
                        "/note=",
                        '"transposase/IS protein; Provisional; Region: PRK09183"',
                    ),
                    ("/db_xref=", '"CDD:181681"'),
                ),
            ),
            (
                "misc_feature",
                "1367..>1669",
                (
                    ("/locus_tag=", '"YP_pPCP02"'),
                    (
                        "/note=",
                        '"The AAA+ (ATPases Associated with a wide variety of cellular'
                        " Activities) superfamily represents an ancient group of"
                        " ATPases belonging to the ASCE (for additional strand,"
                        " catalytic E) division of the P-loop NTPase fold. The ASCE"
                        ' division also includes...; Region: AAA; cd00009"',
                    ),
                    ("/db_xref=", '"CDD:99707"'),
                ),
            ),
            (
                "misc_feature",
                "1433..1456",
                (
                    ("/locus_tag=", '"YP_pPCP02"'),
                    ("/note=", '"Walker A motif; other site"'),
                    ("/db_xref=", '"CDD:99707"'),
                ),
            ),
            (
                "misc_feature",
                "order(1436..1459,1619..1621)",
                (
                    ("/locus_tag=", '"YP_pPCP02"'),
                    ("/note=", '"ATP binding site [chemical binding]; other site"'),
                    ("/db_xref=", '"CDD:99707"'),
                ),
            ),
            (
                "misc_feature",
                "1607..1624",
                (
                    ("/locus_tag=", '"YP_pPCP02"'),
                    ("/note=", '"Walker B motif; other site"'),
                    ("/db_xref=", '"CDD:99707"'),
                ),
            ),
            (
                "gene",
                "2925..3119",
                (
                    ("/gene=", '"rop"'),
                    ("/locus_tag=", '"YP_pPCP03"'),
                    ("/gene_synonym=", '"rom"'),
                    ("/db_xref=", '"GeneID:2767717"'),
                ),
            ),
            (
                "CDS",
                "2925..3119",
                (
                    ("/gene=", '"rop"'),
                    ("/locus_tag=", '"YP_pPCP03"'),
                    ("/gene_synonym=", '"rom"'),
                    (
                        "/note=",
                        '"Best Blastp hit =gi|16082682|ref|NP_395229.1| (NC_003132)'
                        " putative replication regulatory protein [Yersinia pestis],"
                        " gi|5763813|emb|CAB531 66.1| (AL109969) putative replication"
                        " regulatory protein [Yersinia pestis]; similar to"
                        " gb|AAK91579.1| (AY048853), RNAI modulator protein Rom"
                        " [Salmonella choleraesuis], Contains Regulatory protein Rop"
                        ' (Interpro|IPR000769)"',
                    ),
                    ("/codon_start=", "1"),
                    ("/transl_table=", "11"),
                    ("/product=", '"putative replication regulatory protein"'),
                    ("/protein_id=", '"NP_995569.1"'),
                    ("/db_xref=", '"GI:45478714"'),
                    ("/db_xref=", '"GeneID:2767717"'),
                    (
                        "/translation=",
                        '"MNKQQQTALNMARFIRSQSLILLEKLDALDADEQAAMCERLHELAEELQNSIQARFEAESE'
                        'TGT"',
                    ),
                ),
            ),
            (
                "misc_feature",
                "2925..3107",
                (
                    ("/gene=", '"rop"'),
                    ("/locus_tag=", '"YP_pPCP03"'),
                    ("/gene_synonym=", '"rom"'),
                    ("/note=", '"Rop protein; Region: Rop; pfam01815"'),
                    ("/db_xref=", '"CDD:145136"'),
                ),
            ),
            (
                "gene",
                "3486..3857",
                (("/locus_tag=", '"YP_pPCP04"'), ("/db_xref=", '"GeneID:2767720"')),
            ),
            (
                "CDS",
                "3486..3857",
                (
                    ("/locus_tag=", '"YP_pPCP04"'),
                    (
                        "/note=",
                        '"Best Blastp hit = gi|321919|pir||JQ1541 hypothetical 16.9K'
                        ' protein - Salmonella typhi murium plasmid NTP16."',
                    ),
                    ("/codon_start=", "1"),
                    ("/transl_table=", "11"),
                    ("/product=", '"hypothetical protein"'),
                    ("/protein_id=", '"NP_995570.1"'),
                    ("/db_xref=", '"GI:45478715"'),
                    ("/db_xref=", '"GeneID:2767720"'),
                    (
                        "/translation=",
                        '"MSKKRRPQKRPRRRRFFHRLRPPDEHHKNRRSSQRWRNPTGLKDTRRFPPEAPSCALLFRP'
                        'CRLPDTSPPFSLREAWRFLIAHAVGISVRCRSFAPSWAVCTNPPFSPTTAPYPVTIVLSPTR"',
                    ),
                ),
            ),
            (
                "misc_feature",
                "3498..3626",
                (
                    ("/locus_tag=", '"YP_pPCP04"'),
                    (
                        "/note=",
                        '"ProfileScan match to entry PS50323 ARG_RICH, E-value 8.981"',
                    ),
                ),
            ),
            (
                "gene",
                "4343..4780",
                (
                    ("/gene=", '"pim"'),
                    ("/locus_tag=", '"YP_pPCP05"'),
                    ("/db_xref=", '"GeneID:2767712"'),
                ),
            ),
            (
                "CDS",
                "4343..4780",
                (
                    ("/gene=", '"pim"'),
                    ("/locus_tag=", '"YP_pPCP05"'),
                    (
                        "/note=",
                        '"similar to many previously sequenced pesticin immunity'
                        " protein entries of Yersinia pestis plasmid pPCP, e.g. gi|"
                        " 16082683|,ref|NP_395230.1| (NC_003132) ,"
                        " gi|1200166|emb|CAA90861.1| (Z54145 ) , gi|1488655|"
                        " emb|CAA63439.1| (X92856) , gi|2996219|gb|AAC62543.1|"
                        ' (AF053945) , and gi|5763814|emb|CAB531 67.1| (AL109969)"',
                    ),
                    ("/codon_start=", "1"),
                    ("/transl_table=", "11"),
                    ("/product=", '"pesticin immunity protein"'),
                    ("/protein_id=", '"NP_995571.1"'),
                    ("/db_xref=", '"GI:45478716"'),
                    ("/db_xref=", '"GeneID:2767712"'),
                    (
                        "/translation=",
                        '"MGGGMISKLFCLALIFLSSSGLAEKNTYTAKDILQNLELNTFGNSLSHGIYGKQTTFKQTE'
                        "FTNIKSNTKKHIALINKDNSWMISLKILGIKRDEYTVCFEDFSLIRPPTYVAIHPLLIKKVK"
                        'SGNFIVVKEIKKSIPGCTVYYH"',
                    ),
                ),
            ),
            (
                "gene",
                "complement(4815..5888)",
                (
                    ("/gene=", '"pst"'),
                    ("/locus_tag=", '"YP_pPCP06"'),
                    ("/db_xref=", '"GeneID:2767721"'),
                ),
            ),
            (
                "CDS",
                "complement(4815..5888)",
                (
                    ("/gene=", '"pst"'),
                    ("/locus_tag=", '"YP_pPCP06"'),
                    (
                        "/note=",
                        '"Best Blastp hit =|16082684|ref|NP_395231.1| (NC_003132)'
                        " pesticin [Yersinia pestis], gi|984824|gb|AAA75369.1| (U31974)"
                        " pesticin [Yersinia pestis], gi|1488654|emb|CAA63438.1|"
                        " (X92856) pesticin [Yersinia pestis],"
                        " gi|2996220|gb|AAC62544.1| (AF053945) pesticin [Yersinia"
                        " pestis], gi|5763815|emb|CAB53168.1| (AL1099 69) pesticin"
                        ' [Yersinia pestis]"',
                    ),
                    ("/codon_start=", "1"),
                    ("/transl_table=", "11"),
                    ("/product=", '"pesticin"'),
                    ("/protein_id=", '"NP_995572.1"'),
                    ("/db_xref=", '"GI:45478717"'),
                    ("/db_xref=", '"GeneID:2767721"'),
                    (
                        "/translation=",
                        '"MSDTMVVNGSGGVPAFLFSGSTLSSYRPNFEANSITIALPHYVDLPGRSNFKLMYIMGFPI'
                        "DTEMEKDSEYSNKIRQESKISKTEGTVSYEQKITVETGQEKDGVKVYRVMVLEGTIAESIEH"
                        "LDKKENEDILNNNRNRIVLADNTVINFDNISQLKEFLRRSVNIVDHDIFSSNGFEGFNPTSH"
                        "FPSNPSSDYFNSTGVTFGSGVDLGQRSKQDLLNDGVPQYIADRLDGYYMLRGKEAYDKVRTA"
                        "PLTLSDNEAHLLSNIYIDKFSHKIEGLFNDANIGLRFSDLPLRTRTALVSIGYQKGFKLSRT"
                        'APTVWNKVIAKDWNGLVNAFNNIVDGMSDRRKREGALVQKDIDSGLLK"',
                    ),
                ),
            ),
            (
                "variation",
                "5910..5911",
                (("/note=", '"compared to AF053945"'), ("/replace=", '""')),
            ),
            (
                "variation",
                "5933^5934",
                (("/note=", '"compared to AL109969"'), ("/replace=", '"a"')),
            ),
            (
                "variation",
                "5933^5934",
                (("/note=", '"compared to AF053945"'), ("/replace=", '"aa"')),
            ),
            (
                "variation",
                "5948",
                (("/note=", '"compared to AL109969"'), ("/replace=", '"c"')),
            ),
            (
                "gene",
                "6005..6421",
                (("/locus_tag=", '"YP_pPCP07"'), ("/db_xref=", '"GeneID:2767719"')),
            ),
            (
                "CDS",
                "6005..6421",
                (
                    ("/locus_tag=", '"YP_pPCP07"'),
                    (
                        "/note=",
                        '"Best Blastp hit = gi|16082685|ref|NP_395232.1| (NC_003132)'
                        " hypothetical protein [Yersinia pestis],"
                        " gi|5763816|emb|CAB53169.1| (AL109969) hypothetical protein"
                        ' [Yersinia pestis]"',
                    ),
                    ("/codon_start=", "1"),
                    ("/transl_table=", "11"),
                    ("/product=", '"hypothetical protein"'),
                    ("/protein_id=", '"NP_995573.1"'),
                    ("/db_xref=", '"GI:45478718"'),
                    ("/db_xref=", '"GeneID:2767719"'),
                    (
                        "/translation=",
                        '"MKFHFCDLNHSYKNQEGKIRSRKTAPGNIRKKQKGDNVSKTKSGRHRLSKTDKRLLAALVV'
                        "AGYEERTARDLIQKHVYTLTQADLRHLVSEISNGVGQSQAYDAIYQARRIRLARKYLSGKKP"
                        'EGVEPREGQEREDLP"',
                    ),
                ),
            ),
            (
                "variation",
                "6525",
                (
                    ("/note=", '"compared to AF053945 and AL109969"'),
                    ("/replace=", '"c"'),
                ),
            ),
            (
                "gene",
                "6664..7602",
                (
                    ("/gene=", '"pla"'),
                    ("/locus_tag=", '"YP_pPCP08"'),
                    ("/db_xref=", '"GeneID:2767715"'),
                ),
            ),
            (
                "CDS",
                "6664..7602",
                (
                    ("/gene=", '"pla"'),
                    ("/locus_tag=", '"YP_pPCP08"'),
                    ("/EC_number=", '"3.4.23.48"'),
                    (
                        "/note=",
                        '"outer membrane protease; involved in virulence in many'
                        " organisms; OmpT; IcsP; SopA; Pla; PgtE; omptin; in"
                        " Escherichia coli OmpT can degrade antimicrobial peptides; in"
                        " Yersinia Pla activates plasminogen during infection; in"
                        ' Shigella flexneria SopA cleaves the autotransporter IcsA"',
                    ),
                    ("/codon_start=", "1"),
                    ("/transl_table=", "11"),
                    ("/product=", '"outer membrane protease"'),
                    ("/protein_id=", '"NP_995574.1"'),
                    ("/db_xref=", '"GI:45478719"'),
                    ("/db_xref=", '"GeneID:2767715"'),
                    (
                        "/translation=",
                        '"MKKSSIVATIITILSGSANAASSQLIPNISPDSFTVAASTGMLSGKSHEMLYDAETGRKIS'
                        "QLDWKIKNVAILKGDISWDPYSFLTLNARGWTSLASGSGNMDDYDWMNENQSEWTDHSSHPA"
                        "TNVNHANEYDLNVKGWLLQDENYKAGITAGYQETRFSWTATGGSYSYNNGAYTGNFPKGVRV"
                        "IGYNQRFSMPYIGLAGQYRINDFELNALFKFSDWVRAHDNDEHYMRDLTFREKTSGSRYYGT"
                        "VINAGYYVTPNAKVFAEFTYSKYDEGKGGTQIIDKNSGDSVSIGGDAAGISNKNYTVTAGLQ"
                        'YRF"',
                    ),
                ),
            ),
            (
                "misc_feature",
                "6664..7599",
                (
                    ("/gene=", '"pla"'),
                    ("/locus_tag=", '"YP_pPCP08"'),
                    ("/note=", '"Omptin family; Region: Omptin; cl01886"'),
                    ("/db_xref=", '"CDD:186487"'),
                ),
            ),
            (
                "gene",
                "complement(7789..8088)",
                (("/locus_tag=", '"YP_pPCP09"'), ("/db_xref=", '"GeneID:2767713"')),
            ),
            (
                "CDS",
                "complement(7789..8088)",
                (
                    ("/locus_tag=", '"YP_pPCP09"'),
                    (
                        "/note=",
                        '"Best Blastp hit = gi|16082687|ref|NP_395234.1| (NC_003132)'
                        " putative transcriptional regulator [Yersinia pestis],"
                        " gi|5763818|emb|CAB53171.1| (AL109969) putative"
                        ' transcriptional regulator [Yersinia pestis]."',
                    ),
                    ("/codon_start=", "1"),
                    ("/transl_table=", "11"),
                    ("/product=", '"putative transcriptional regulator"'),
                    ("/protein_id=", '"NP_995575.1"'),
                    ("/db_xref=", '"GI:45478720"'),
                    ("/db_xref=", '"GeneID:2767713"'),
                    (
                        "/translation=",
                        '"MRTLDEVIASRSPESQTRIKEMADEMILEVGLQMMREELQLSQKQVAEAMGISQPAVTKLE'
                        'QRGNDLKLATLKRYVEAMGGKLSLDVELPTGRRVAFHV"',
                    ),
                ),
            ),
            (
                "misc_feature",
                "complement(7837..7995)",
                (
                    ("/locus_tag=", '"YP_pPCP09"'),
                    (
                        "/note=",
                        '"Helix-turn-helix XRE-family like proteins. Prokaryotic DNA'
                        " binding proteins belonging to the xenobiotic response element"
                        " family of transcriptional regulators; Region: HTH_XRE;"
                        ' cl09100"',
                    ),
                    ("/db_xref=", '"CDD:195788"'),
                ),
            ),
            (
                "gene",
                "complement(8088..8360)",
                (("/locus_tag=", '"YP_pPCP10"'), ("/db_xref=", '"GeneID:2767714"')),
            ),
            (
                "CDS",
                "complement(8088..8360)",
                (
                    ("/locus_tag=", '"YP_pPCP10"'),
                    (
                        "/note=",
                        '"Best Blastp hit = gi|16082688|ref|NP_395235.1| (NC_003132)'
                        " hypothetical protein [ Yersinia pestis],"
                        " gi|5763819|emb|CAB53172.1| (AL109969) hypothetical protein"
                        ' [Yersinia pestis]"',
                    ),
                    ("/codon_start=", "1"),
                    ("/transl_table=", "11"),
                    ("/product=", '"hypothetical protein"'),
                    ("/protein_id=", '"NP_995576.1"'),
                    ("/db_xref=", '"GI:45478721"'),
                    ("/db_xref=", '"GeneID:2767714"'),
                    (
                        "/translation=",
                        '"MADLKKLQVYGPELPRPYADTVKGSRYKNMKELRVQFSGRPIRAFYAFDPIRRAIVLCAGD'
                        'KSNDKRFYEKLVRIAEDEFTAHLNTLESK"',
                    ),
                ),
            ),
            (
                "misc_feature",
                "complement(8091..>8357)",
                (
                    ("/locus_tag=", '"YP_pPCP10"'),
                    (
                        "/note=",
                        '"Phage derived protein Gp49-like (DUF891); Region: Gp49;'
                        ' cl01470"',
                    ),
                    ("/db_xref=", '"CDD:194142"'),
                ),
            ),
            (
                "variation",
                "8529^8530",
                (("/note=", '"compared to AL109969"'), ("/replace=", '"tt"')),
            ),
        ]
        self.perform_record_parser_test(
            record, length, locus, definition, accession, titles, features
        )

    def test_record_parser_15(self):
        path = "GenBank/no_end_marker.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", BiopythonParserWarning)
                # BiopythonParserWarning: Premature end of file in sequence data
                record = next(records)
        length = 6497
        locus = "AB070938"
        definition = "Streptomyces avermitilis melanin biosynthetic gene cluster"
        accession = ["AB070938"]
        titles = ()
        features = [
            (
                "source",
                "1..6497",
                (
                    ("/organism=", '"Streptomyces avermitilis"'),
                    ("/mol_type=", '"genomic DNA"'),
                    ("/db_xref=", '"taxon:33903"'),
                ),
            )
        ]
        self.perform_record_parser_test(
            record, length, locus, definition, accession, titles, features
        )

    def test_record_parser_16(self):
        path = "GenBank/wrong_sequence_indent.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", BiopythonParserWarning)
                # BiopythonParserWarning: Invalid indentation for sequence line
                record = next(records)
        length = 6497
        locus = "AB070938"
        definition = "Streptomyces avermitilis melanin biosynthetic gene cluster"
        accession = ["AB070938"]
        titles = ()
        features = [
            (
                "source",
                "1..6497",
                (
                    ("/organism=", '"Streptomyces avermitilis"'),
                    ("/mol_type=", '"genomic DNA"'),
                    ("/db_xref=", '"taxon:33903"'),
                ),
            )
        ]
        self.perform_record_parser_test(
            record, length, locus, definition, accession, titles, features
        )

    def test_record_parser_17(self):
        path = "GenBank/invalid_locus_line_spacing.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", BiopythonParserWarning)
                # BiopythonParserWarning: Attempting to parse malformed locus line
                record = next(records)
        length = 6497
        locus = "AB070938"
        definition = "Streptomyces avermitilis melanin biosynthetic gene cluster"
        accession = ["AB070938"]
        titles = ()
        features = [
            (
                "source",
                "1..6497",
                (
                    ("/organism=", '"Streptomyces avermitilis"'),
                    ("/mol_type=", '"genomic DNA"'),
                    ("/db_xref=", '"taxon:33903"'),
                ),
            )
        ]
        self.perform_record_parser_test(
            record, length, locus, definition, accession, titles, features
        )

    def test_record_parser_18(self):
        path = "GenBank/empty_feature_qualifier.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            record = next(records)
        length = 6497
        locus = "AB070938"
        definition = "Streptomyces avermitilis melanin biosynthetic gene cluster"
        accession = ["AB070938"]
        titles = ()
        features = [
            (
                "source",
                "1..6497",
                (
                    ("/organism=", '"Streptomyces avermitilis"'),
                    ("/mol_type=", '"genomic DNA"'),
                    ("/db_xref=", '"taxon:33903"'),
                    ("/note=", '"This is a correct note, the following one isn\'t"'),
                    ("/note", ""),
                ),
            )
        ]
        self.perform_record_parser_test(
            record, length, locus, definition, accession, titles, features
        )

    def test_record_parser_19(self):
        path = "GenBank/invalid_misc_feature.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", BiopythonParserWarning)
                # BiopythonParserWarning: line too short to contain a feature
                record = next(records)
        length = 6497
        locus = "AB070938"
        definition = "Streptomyces avermitilis melanin biosynthetic gene cluster"
        accession = ["AB070938"]
        titles = ()
        features = [
            (
                "source",
                "1..6497",
                (
                    ("/organism=", '"Streptomyces avermitilis"'),
                    ("/mol_type=", '"genomic DNA"'),
                    ("/db_xref=", '"taxon:33903"'),
                ),
            )
        ]
        self.perform_record_parser_test(
            record, length, locus, definition, accession, titles, features
        )

    def test_record_parser_20(self):
        path = "GenBank/1MRR_A.gp"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            record = next(records)
        length = 375
        locus = "1MRR_A"
        definition = (
            "Chain A, Substitution Of Manganese For Iron In Ribonucleotide Reductase"
            " From Escherichia Coli. Spectroscopic And Crystallographic"
            " Characterization"
        )
        accession = ["1MRR_A"]
        titles = (
            "Three-dimensional structure of the free radical protein of ribonucleotide"
            " reductase",
            "Substitution of manganese for iron in ribonucleotide reductase from"
            " Escherichia coli. Spectroscopic and crystallographic characterization",
            "Direct Submission",
        )
        features = [
            (
                "source",
                "1..375",
                (("/organism=", '"Escherichia coli"'), ("/db_xref=", '"taxon:562"')),
            ),
            (
                "Region",
                "28..340",
                (
                    ("/region_name=", '"RNRR2"'),
                    (
                        "/note=",
                        '"Ribonucleotide Reductase, R2/beta subunit, ferritin-like'
                        ' diiron-binding domain; cd01049"',
                    ),
                    ("/db_xref=", '"CDD:153108"'),
                ),
            ),
            (
                "SecStr",
                "35..46",
                (("/sec_str_type=", '"helix"'), ("/note=", '"helix 1"')),
            ),
            (
                "Site",
                "order(37,44,109..110,113,116..117,120,123,137..138,141)",
                (
                    ("/site_type=", '"other"'),
                    ("/note=", '"dimer interface [polypeptide binding]"'),
                    ("/db_xref=", '"CDD:153108"'),
                ),
            ),
            (
                "Site",
                "order(48,84,115,118,122,236..237,241)",
                (
                    ("/site_type=", '"other"'),
                    ("/note=", '"putative radical transfer pathway"'),
                    ("/db_xref=", '"CDD:153108"'),
                ),
            ),
            (
                "SecStr",
                "57..65",
                (("/sec_str_type=", '"helix"'), ("/note=", '"helix 2"')),
            ),
            (
                "SecStr",
                "67..87",
                (("/sec_str_type=", '"helix"'), ("/note=", '"helix 3"')),
            ),
            (
                "Site",
                "order(84,115,118,204,238,241)",
                (
                    ("/site_type=", '"other"'),
                    ("/note=", '"diiron center [ion binding]"'),
                    ("/db_xref=", '"CDD:153108"'),
                ),
            ),
            (
                "Het",
                "join(bond(84),bond(115),bond(118),bond(238))",
                (("/heterogen=", '"( MN,1000 )"'),),
            ),
            (
                "SecStr",
                "102..129",
                (("/sec_str_type=", '"helix"'), ("/note=", '"helix 4"')),
            ),
            (
                "Het",
                "join(bond(115),bond(204),bond(238),bond(241))",
                (("/heterogen=", '"( MN,1001 )"'),),
            ),
            (
                "Site",
                "122",
                (
                    ("/site_type=", '"other"'),
                    ("/note=", '"tyrosyl radical"'),
                    ("/db_xref=", '"CDD:153108"'),
                ),
            ),
            (
                "SecStr",
                "133..140",
                (("/sec_str_type=", '"helix"'), ("/note=", '"helix 5"')),
            ),
            (
                "SecStr",
                "143..151",
                (("/sec_str_type=", '"helix"'), ("/note=", '"helix 6"')),
            ),
            (
                "SecStr",
                "153..169",
                (("/sec_str_type=", '"helix"'), ("/note=", '"helix 7"')),
            ),
            (
                "SecStr",
                "172..177",
                (("/sec_str_type=", '"sheet"'), ("/note=", '"strand 1"')),
            ),
            (
                "SecStr",
                "180..185",
                (("/sec_str_type=", '"sheet"'), ("/note=", '"strand 2"')),
            ),
            (
                "SecStr",
                "186..216",
                (("/sec_str_type=", '"helix"'), ("/note=", '"helix 8"')),
            ),
            ("Het", "join(bond(194),bond(272))", (("/heterogen=", '"( HG,1003 )"'),)),
            ("Het", "bond(196)", (("/heterogen=", '"( HG,1005 )"'),)),
            ("Het", "join(bond(196),bond(196))", (("/heterogen=", '"( HG,1002 )"'),)),
            (
                "Het",
                "join(bond(210),bond(214),bond(214))",
                (("/heterogen=", '"( HG,1004 )"'),),
            ),
            (
                "SecStr",
                "225..253",
                (("/sec_str_type=", '"helix"'), ("/note=", '"helix 9"')),
            ),
            (
                "SecStr",
                "260..269",
                (("/sec_str_type=", '"helix"'), ("/note=", '"helix 10"')),
            ),
            ("Bond", "bond(268,272)", (("/bond_type=", '"disulfide"'),)),
            (
                "SecStr",
                "270..285",
                (("/sec_str_type=", '"helix"'), ("/note=", '"helix 11"')),
            ),
            (
                "Het",
                "join(bond(284),bond(305),bond(309),bond(305))",
                (("/heterogen=", '"( HG,1006 )"'),),
            ),
            (
                "SecStr",
                "301..319",
                (("/sec_str_type=", '"helix"'), ("/note=", '"helix 12"')),
            ),
        ]
        self.perform_record_parser_test(
            record, length, locus, definition, accession, titles, features
        )

    def test_record_parser_tsa(self):
        path = "GenBank/tsa_acropora.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            record = next(records)
        length = 0
        locus = "GHGH01000000"
        definition = "TSA: Acropora millepora, transcriptome shotgun assembly"
        accession = ["GHGH00000000"]
        tsa = ["GHGH01000001", "GHGH01126539"]
        titles = (
            "Acropora millepora genome sequencing and assembly",
            "Direct Submission",
        )
        features = [
            (
                "source",
                "1..126539",
                (
                    ("/organism=", '"Acropora millepora"'),
                    ("/mol_type=", '"transcribed RNA"'),
                    ("/db_xref=", '"taxon:45264"'),
                    ("/tissue_type=", '"late planula"'),
                    ("/country=", '"Australia: Queensland"'),
                    ("/collection_date=", '"2011"'),
                ),
            )
        ]
        self.perform_record_parser_test(
            record, length, locus, definition, accession, titles, features, tsa=tsa
        )

    def test_record_parser_tls(self):
        path = "GenBank/tls_KDHP01000000.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.rec_parser)
            record = next(records)
        length = 0
        locus = "KBUV01000000"
        definition = "TLS: soil metagenome 16S ribosomal RNA, targeted locus study"
        accession = ["KBUV00000000"]
        tls = ["KBUV01000001", "KBUV01003714"]
        titles = (
            "Spatio-temporal dynamics of soil bacterial communities in function of"
            " Amazon forest phenology",
            "Direct Submission",
        )
        features = [
            (
                "source",
                "1..3714",
                (
                    ("/organism=", '"soil metagenome"'),
                    ("/mol_type=", '"genomic DNA"'),
                    (
                        "/isolation_source=",
                        '"soil samples in a lowland tropical evergreen rain forest in'
                        ' Amazonia"',
                    ),
                    ("/db_xref=", '"taxon:410658"'),
                    ("/environmental_sample", ""),
                    ("/country=", '"Brazil: Manaus"'),
                    ("/lat_lon=", '"2.92 S 59.95 W"'),
                    ("/collection_date=", '"2013"'),
                    ("/note=", '"metagenomic"'),
                ),
            )
        ]
        self.perform_record_parser_test(
            record, length, locus, definition, accession, titles, features, tls=tls
        )


class TestFeatureParser(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.feat_parser = GenBank.FeatureParser(debug_level=0)

    def shorten(self, seq):
        if len(seq) <= 60:
            return seq
        else:
            return seq[:54] + "..." + seq[-3:]

    def perform_feature_parser_test(
        self,
        record,
        seq,
        id,
        name,
        description,
        annotations,
        references,
        features,
        dbxrefs,
    ):
        self.assertEqual(self.shorten(record.seq), seq)
        self.assertEqual(record.id, id)
        self.assertEqual(record.name, name)
        self.assertEqual(record.description, description)
        references_found = []
        for key in record.annotations:
            if key == "references":
                for reference in record.annotations[key]:
                    references_found.append(str(reference))
            else:
                self.assertIn(key, annotations)
        for key in annotations:
            self.assertEqual(record.annotations[key], annotations[key])
        self.assertEqual(references_found, references)
        for feature1, (feature2, strand) in zip(record.features, features):
            self.assertEqual(str(feature1), feature2)
            self.assertEqual(feature1.strand, strand)
        self.assertEqual(record.dbxrefs, dbxrefs)

    def test_feature_parser_01(self):
        path = "GenBank/noref.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.feat_parser)
            record = next(records)
        seq = "GGCAAGATGGCGCCGGTGGGGGTGGAGAAGAAGCTGCTGCTAGGTCCCAACGGG...AAA"
        id = "NM_006141.1"
        name = "NM_006141"
        description = (
            "Homo sapiens dynein, cytoplasmic, light intermediate polypeptide 2"
            " (DNCLI2), mRNA"
        )
        annotations = {
            "accessions": ["NM_006141"],
            "comment": """\
PROVISIONAL REFSEQ: This record has not yet been subject to final
NCBI review. The reference sequence was derived from AF035812.1.""",
            "data_file_division": "PRI",
            "date": "01-NOV-2000",
            "gi": "5453633",
            "keywords": [""],
            "molecule_type": "mRNA",
            "organism": "Homo sapiens",
            "sequence_version": 1,
            "source": "human",
            "taxonomy": [
                "Eukaryota",
                "Metazoa",
                "Chordata",
                "Craniata",
                "Vertebrata",
                "Euteleostomi",
                "Mammalia",
                "Eutheria",
                "Primates",
                "Catarrhini",
                "Hominidae",
                "Homo",
            ],
        }
        references = []
        features = (
            (
                """\
type: source
location: [0:1622](+)
qualifiers:
    Key: db_xref, Value: ['taxon:9606']
    Key: map, Value: ['16']
    Key: organism, Value: ['Homo sapiens']
""",
                1,
            ),
            (
                """\
type: gene
location: [0:1622](+)
qualifiers:
    Key: db_xref, Value: ['LocusID:1783']
    Key: gene, Value: ['DNCLI2']
    Key: note, Value: ['LIC2']
""",
                1,
            ),
            (
                """\
type: CDS
location: [6:1485](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['LocusID:1783', 'GI:5453634']
    Key: gene, Value: ['DNCLI2']
    Key: note, Value: ['similar to R. norvegicus and G. gallus dynein light intermediate chain 2, Swiss-Prot Accession Numbers Q62698 and Q90828, respectively']
    Key: product, Value: ['dynein, cytoplasmic, light intermediate polypeptide 2']
    Key: protein_id, Value: ['NP_006132.1']
    Key: translation, Value: ['MAPVGVEKKLLLGPNGPAVAAAGDLTSEEEEGQSLWSSILSEVSTRARSKLPSGKNILVFGEDGSGKTTLMTKLQGAEHGKKGRGLEYLYLSVHDEDRDDHTRCNVWILDGDLYHKGLLKFAVSAESLPETLVIFVADMSRPWTVMESLQKWASVLREHIDKMKIPPEKMRELERKFVKDFQDYMEPEEGCQGSPQRRGPLTSGSDEENVALPLGDNVLTHNLGIPVLVVCTKCDAVSVLEKEHDYRDEHLDFIQSHLRRFCLQYGAALIYTSVKEEKNLDLLYKYIVHKTYGFHFTTPALVVEKDAVFIPAGWDNEKKIAILHENFTTVKPEDAYEDFIVKPPVRKLVHDKELAAEDEQVFLMKQQSLLAKQPATPTRASESPARGPSGSPRTQGRGGPASVPSSSPGTSVKKPDPNIKNNAASEGVLASFFNSLLSKKTGSPGSPGAGGVQSTAKKSGQKTVLSNVQEELDRMTRKPDSMVTNSSTENEA']
""",
                1,
            ),
        )
        dbxrefs = []
        self.perform_feature_parser_test(
            record,
            seq,
            id,
            name,
            description,
            annotations,
            references,
            features,
            dbxrefs,
        )

    def test_feature_parser_02(self):
        path = "GenBank/cor6_6.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.feat_parser)
            record = next(records)
            seq = "AACAAAACACACATCAAAAACGATTTTACAAGAAAAAAATATCTGAAAAATGTC...AAA"
            id = "X55053.1"
            name = "ATCOR66M"
            description = "A.thaliana cor6.6 mRNA"
            annotations = {
                "accessions": ["X55053"],
                "comment": """\
Cor6.6 homologous to KIN1. KIN1 is a cold-regulated Arabidopsis
gene with suggested similarity to type I fish antifreeze proteins.""",
                "data_file_division": "PLN",
                "date": "02-MAR-1992",
                "gi": "16229",
                "keywords": [
                    "antifreeze protein homology",
                    "cold-regulated gene",
                    "cor6.6 gene",
                    "KIN1 homology",
                ],
                "molecule_type": "mRNA",
                "organism": "Arabidopsis thaliana",
                "sequence_version": 1,
                "source": "thale cress",
                "taxonomy": [
                    "Eukaryota",
                    "Viridiplantae",
                    "Streptophyta",
                    "Embryophyta",
                    "Tracheophyta",
                    "euphyllophytes",
                    "Spermatophyta",
                    "Magnoliophyta",
                    "eudicotyledons",
                    "Rosidae",
                    "Capparales",
                    "Brassicaceae",
                    "Arabidopsis",
                ],
            }
            references = [
                "location: [0:513]\nauthors: Thomashow,M.F.\ntitle: Direct"
                " Submission\njournal: Submitted (01-FEB-1991) M.F. Thomashow, Dept."
                " Crop and Soil Sciences, Dept. Microbiology, Michigan State"
                " University, East Lansing, Michigan 48824, USA\nmedline id: \npubmed"
                " id: \ncomment: \n",
                "location: [0:513]\nauthors: Gilmour,S.J., Artus,N.N. and"
                " Thomashow,M.F.\ntitle: cDNA sequence analysis and expression of two"
                " cold-regulated genes of Arabidopsis thaliana\njournal: Plant Mol."
                " Biol. 18 (1), 13-21 (1992)\nmedline id: 92119220\npubmed id:"
                " \ncomment: \n",
            ]
            features = (
                (
                    """\
type: source
location: [0:513](+)
qualifiers:
    Key: db_xref, Value: ['taxon:3702']
    Key: organism, Value: ['Arabidopsis thaliana']
    Key: strain, Value: ['Columbia']
""",
                    1,
                ),
                (
                    """\
type: gene
location: [49:250](+)
qualifiers:
    Key: gene, Value: ['cor6.6']
""",
                    1,
                ),
                (
                    """\
type: CDS
location: [49:250](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:16230', 'SWISS-PROT:P31169']
    Key: gene, Value: ['cor6.6']
    Key: note, Value: ['cold regulated']
    Key: protein_id, Value: ['CAA38894.1']
    Key: translation, Value: ['MSETNKNAFQAGQAAGKAEEKSNVLLDKAKDAAAAAGASAQQAGKSISDAAVGGVNFVKDKTGLNK']
""",
                    1,
                ),
            )
            dbxrefs = []
            self.perform_feature_parser_test(
                record,
                seq,
                id,
                name,
                description,
                annotations,
                references,
                features,
                dbxrefs,
            )
            record = next(records)
            seq = "ATTTGGCCTATAAATATAAACCCTTAAGCCCACATATCTTCTCAATCCATCACA...ATA"
            id = "X62281.1"
            name = "ATKIN2"
            description = "A.thaliana kin2 gene"
            annotations = {
                "accessions": ["X62281"],
                "data_file_division": "PLN",
                "date": "23-JUL-1992",
                "gi": "16353",
                "keywords": ["kin2 gene"],
                "molecule_type": "DNA",
                "organism": "Arabidopsis thaliana",
                "sequence_version": 1,
                "source": "thale cress",
                "taxonomy": [
                    "Eukaryota",
                    "Viridiplantae",
                    "Streptophyta",
                    "Embryophyta",
                    "Tracheophyta",
                    "euphyllophytes",
                    "Spermatophyta",
                    "Magnoliophyta",
                    "eudicotyledons",
                    "Rosidae",
                    "Capparales",
                    "Brassicaceae",
                    "Arabidopsis",
                ],
            }
            references = [
                "location: [0:880]\nauthors: Borg-Franck,M.E.\ntitle: Direct"
                " Submission\njournal: Submitted (27-SEP-1991) M.E. Borg-Franck, Inst"
                " of Biotechnology, University of Helsinki, Karvaamokuja 3, SF-00380"
                " Helsinki, FINLAND\nmedline id: \npubmed id: \ncomment: \n",
                "location: [0:880]\nauthors: Kurkela,S. and Borg-Franck,M.\ntitle:"
                " Structure and expression of kin2, one of two cold- and ABA-induced"
                " genes of Arabidopsis thaliana\njournal: Plant Mol. Biol. 19 (4),"
                " 689-692 (1992)\nmedline id: 92329728\npubmed id: \ncomment: \n",
            ]
            features = (
                (
                    """\
type: source
location: [0:880](+)
qualifiers:
    Key: db_xref, Value: ['taxon:3702']
    Key: organism, Value: ['Arabidopsis thaliana']
    Key: strain, Value: ['ssp. L. Heynh, Colombia']
""",
                    1,
                ),
                (
                    """\
type: TATA_signal
location: [8:20](+)
qualifiers:
""",
                    1,
                ),
                (
                    """\
type: exon
location: [43:160](+)
qualifiers:
    Key: gene, Value: ['kin2']
    Key: number, Value: ['1']
""",
                    1,
                ),
                (
                    """\
type: prim_transcript
location: [43:>579](+)
qualifiers:
    Key: gene, Value: ['kin2']
""",
                    1,
                ),
                (
                    """\
type: mRNA
location: join{[43:160](+), [319:390](+), [503:>579](+)}
qualifiers:
    Key: gene, Value: ['kin2']
""",
                    1,
                ),
                (
                    """\
type: gene
location: [43:579](+)
qualifiers:
    Key: gene, Value: ['kin2']
""",
                    1,
                ),
                (
                    """\
type: CDS
location: join{[103:160](+), [319:390](+), [503:579](+)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:16354', 'SWISS-PROT:P31169']
    Key: gene, Value: ['kin2']
    Key: protein_id, Value: ['CAA44171.1']
    Key: translation, Value: ['MSETNKNAFQAGQAAGKAERRRAMFCWTRPRMLLLQLELPRNRAGKSISDAAVGGVNFVKDKTGLNK']
""",
                    1,
                ),
                (
                    """\
type: intron
location: [160:319](+)
qualifiers:
    Key: gene, Value: ['kin2']
    Key: number, Value: ['1']
""",
                    1,
                ),
                (
                    """\
type: exon
location: [319:390](+)
qualifiers:
    Key: gene, Value: ['kin2']
    Key: number, Value: ['2']
""",
                    1,
                ),
                (
                    """\
type: intron
location: [390:503](+)
qualifiers:
    Key: gene, Value: ['kin2']
    Key: number, Value: ['2']
""",
                    1,
                ),
                (
                    """\
type: exon
location: [503:>579](+)
qualifiers:
    Key: gene, Value: ['kin2']
    Key: number, Value: ['3']
""",
                    1,
                ),
                (
                    """\
type: polyA_signal
location: [619:625](+)
qualifiers:
""",
                    1,
                ),
                (
                    """\
type: polyA_signal
location: [640:646](+)
qualifiers:
""",
                    1,
                ),
                (
                    """\
type: polyA_site
location: [784:785](+)
qualifiers:
""",
                    1,
                ),
                (
                    """\
type: polyA_site
location: [799:800](+)
qualifiers:
""",
                    1,
                ),
            )
            dbxrefs = []
            self.perform_feature_parser_test(
                record,
                seq,
                id,
                name,
                description,
                annotations,
                references,
                features,
                dbxrefs,
            )
            record = next(records)
            seq = "AAAAAAACACAACAAAACTCAATAAATAAACAAATGGCAGACAACAAGCAGAGC...TTC"
            id = "M81224.1"
            name = "BNAKINI"
            description = "Rapeseed Kin1 protein (kin1) mRNA, complete cds"
            annotations = {
                "accessions": ["M81224"],
                "data_file_division": "PLN",
                "date": "27-APR-1993",
                "gi": "167145",
                "keywords": [""],
                "molecule_type": "mRNA",
                "organism": "Brassica napus",
                "sequence_version": 1,
                "source": (
                    "Brassica napus (cultivar Jet neuf) cold induced leaf cDNA to mRNA"
                ),
                "taxonomy": [
                    "Eukaryota",
                    "Viridiplantae",
                    "Embryophyta",
                    "Tracheophyta",
                    "Spermatophyta",
                    "Magnoliophyta",
                    "eudicotyledons",
                    "core eudicots",
                    "Rosidae",
                    "eurosids II",
                    "Brassicales",
                    "Brassicaceae",
                    "Brassica",
                ],
            }
            references = [
                "location: [0:441]\nauthors: Orr,W., Iu,B., White,T., Robert,L.S. and"
                " Singh,J.\ntitle: Nucleotide sequence of a winter B. napus Kin 1"
                " cDNA\njournal: Plant Physiol. 98, 1532-1534 (1992)\nmedline id:"
                " \npubmed id: \ncomment: \n"
            ]
            features = (
                (
                    """\
type: source
location: [0:441](+)
qualifiers:
    Key: cultivar, Value: ['Jet neuf']
    Key: db_xref, Value: ['taxon:3708']
    Key: dev_stage, Value: ['cold induced']
    Key: organism, Value: ['Brassica napus']
    Key: tissue_type, Value: ['leaf']
""",
                    1,
                ),
                (
                    """\
type: gene
location: [33:300](+)
qualifiers:
    Key: gene, Value: ['kin1']
""",
                    1,
                ),
                (
                    """\
type: CDS
location: [33:231](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:167146']
    Key: evidence, Value: ['experimental']
    Key: gene, Value: ['kin1']
    Key: protein_id, Value: ['AAA32993.1']
    Key: translation, Value: ['MADNKQSFQAGQASGRAEEKGNVLMDKVKDAATAAGASAQTAGQKITEAAGGAVNLVKEKTGMNK']
""",
                    1,
                ),
                (
                    """\
type: polyA_signal
location: [240:247](+)
qualifiers:
    Key: gene, Value: ['kin1']
    Key: note, Value: ['putative']
""",
                    1,
                ),
                (
                    """\
type: polyA_signal
location: [293:300](+)
qualifiers:
    Key: gene, Value: ['kin1']
    Key: note, Value: ['putative']
""",
                    1,
                ),
                (
                    """\
type: polyA_site
location: [440:441](+)
qualifiers:
    Key: gene, Value: ['kin1']
""",
                    1,
                ),
            )
            dbxrefs = []
            self.perform_feature_parser_test(
                record,
                seq,
                id,
                name,
                description,
                annotations,
                references,
                features,
                dbxrefs,
            )
            record = next(records)
            seq = "GGACAAGGCCAAGGATGCTGCTGCTGCAGCTGGAGCTTCCGCGCAACAAGTAAA...GGC"
            id = "AJ237582.1"
            name = "ARU237582"
            description = "Armoracia rusticana csp14 gene (partial), exons 2-3"
            annotations = {
                "accessions": ["AJ237582"],
                "data_file_division": "PLN",
                "date": "24-MAR-1999",
                "gi": "4538892",
                "keywords": ["cold shock protein", "csp14 gene"],
                "molecule_type": "DNA",
                "organism": "Armoracia rusticana",
                "sequence_version": 1,
                "source": "horseradish",
                "taxonomy": [
                    "Eukaryota",
                    "Viridiplantae",
                    "Streptophyta",
                    "Embryophyta",
                    "Tracheophyta",
                    "euphyllophytes",
                    "Spermatophyta",
                    "Magnoliophyta",
                    "eudicotyledons",
                    "Rosidae",
                    "Capparales",
                    "Brassicaceae",
                    "Armoracia",
                ],
            }
            references = [
                "location: [0:206]\nauthors: Baymiev,A.K., Gimalov,F.R. and"
                " Vakhitov,V.A.\ntitle: \njournal: Unpublished\nmedline id: \npubmed"
                " id: \ncomment: \n",
                "location: [0:206]\nauthors: Baymiev,A.K.\ntitle: Direct"
                " Submission\njournal: Submitted (20-MAR-1999) Baymiev A.K.,"
                " Departament of Biochemistry and Cytochemistry, Ufa Scientific Centre,"
                " pr. Oktyabrya 69, Ufa, Bashkortostan, Russia, 450054, RUSSIA\nmedline"
                " id: \npubmed id: \ncomment: \n",
            ]
            features = (
                (
                    """\
type: source
location: [0:206](+)
qualifiers:
    Key: country, Value: ['Russia:Bashkortostan']
    Key: db_xref, Value: ['taxon:3704']
    Key: organism, Value: ['Armoracia rusticana']
""",
                    1,
                ),
                (
                    """\
type: mRNA
location: join{[<0:48](+), [142:>206](+)}
qualifiers:
    Key: gene, Value: ['csp14']
""",
                    1,
                ),
                (
                    """\
type: exon
location: [0:48](+)
qualifiers:
    Key: gene, Value: ['csp14']
    Key: number, Value: ['2']
""",
                    1,
                ),
                (
                    """\
type: gene
location: [0:206](+)
qualifiers:
    Key: gene, Value: ['csp14']
""",
                    1,
                ),
                (
                    """\
type: CDS
location: join{[<0:48](+), [142:>206](+)}
qualifiers:
    Key: codon_start, Value: ['2']
    Key: db_xref, Value: ['GI:4538893']
    Key: gene, Value: ['csp14']
    Key: product, Value: ['cold shock protein']
    Key: protein_id, Value: ['CAB39890.1']
    Key: translation, Value: ['DKAKDAAAAAGASAQQAGKNISDAAAGGVNFVKEKTG']
""",
                    1,
                ),
                (
                    """\
type: intron
location: [48:142](+)
qualifiers:
    Key: gene, Value: ['csp14']
    Key: number, Value: ['2']
""",
                    1,
                ),
                (
                    """\
type: exon
location: [142:206](+)
qualifiers:
    Key: gene, Value: ['csp14']
    Key: number, Value: ['3']
""",
                    1,
                ),
            )
            dbxrefs = []
            self.perform_feature_parser_test(
                record,
                seq,
                id,
                name,
                description,
                annotations,
                references,
                features,
                dbxrefs,
            )
            record = next(records)
            seq = "AACAAAACTCAATAAATAAACAAATGGCAGACAACAAGCAGAGCTTCCAAGCCG...TTT"
            id = "L31939.1"
            name = "BRRBIF72"
            description = "Brassica rapa (clone bif72) kin mRNA, complete cds"
            annotations = {
                "accessions": ["L31939"],
                "data_file_division": "PLN",
                "date": "01-MAR-1996",
                "gi": "1209261",
                "keywords": [""],
                "molecule_type": "mRNA",
                "organism": "Brassica rapa",
                "sequence_version": 1,
                "source": "Brassica rapa flower cDNA to mRNA",
                "taxonomy": [
                    "Eukaryota",
                    "Viridiplantae",
                    "Embryophyta",
                    "Tracheophyta",
                    "Spermatophyta",
                    "Magnoliophyta",
                    "eudicotyledons",
                    "core eudicots",
                    "Rosidae",
                    "eurosids II",
                    "Brassicales",
                    "Brassicaceae",
                    "Brassica",
                ],
            }
            references = [
                "location: [0:282]\nauthors: Kim,J.-B., Kim,H.-U., Park,B.-S.,"
                " Yun,C.-H., Cho,W.-S., Ryu,J.-C. and Chung,T.-Y.\ntitle: Nucleotide"
                " sequences of kin gene in chinese cabbage\njournal: Unpublished"
                " (1994)\nmedline id: \npubmed id: \ncomment: \n"
            ]
            features = (
                (
                    """\
type: source
location: [0:282](+)
qualifiers:
    Key: db_xref, Value: ['taxon:3711']
    Key: dev_stage, Value: ['flower']
    Key: organism, Value: ['Brassica rapa']
""",
                    1,
                ),
                (
                    """\
type: gene
location: [23:221](+)
qualifiers:
    Key: gene, Value: ['kin']
""",
                    1,
                ),
                (
                    """\
type: CDS
location: [23:221](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:1209262']
    Key: gene, Value: ['kin']
    Key: protein_id, Value: ['AAA91051.1']
    Key: translation, Value: ['MADNKQSFQAGQAAGRAEEKGNVLLMDKVKDAATAAGALQTAGQKITEAAGGAVNLVKEKTGMNK']
""",
                    1,
                ),
            )
            dbxrefs = []
            self.perform_feature_parser_test(
                record,
                seq,
                id,
                name,
                description,
                annotations,
                references,
                features,
                dbxrefs,
            )
            record = next(records)
            seq = "ATGGCAGACAACAAGCAGAGCTTCCAAGCCGGTCAAGCCGCTGGTCGTGCTGAG...TAG"
            id = "AF297471.1"
            name = "AF297471"
            description = "Brassica napus BN28a (BN28a) gene, complete cds"
            annotations = {
                "accessions": ["AF297471"],
                "data_file_division": "PLN",
                "date": "14-SEP-2000",
                "gi": "10121868",
                "keywords": [""],
                "molecule_type": "DNA",
                "organism": "Brassica napus",
                "sequence_version": 1,
                "source": "rape",
                "taxonomy": [
                    "Eukaryota",
                    "Viridiplantae",
                    "Embryophyta",
                    "Tracheophyta",
                    "Spermatophyta",
                    "Magnoliophyta",
                    "eudicotyledons",
                    "core eudicots",
                    "Rosidae",
                    "eurosids II",
                    "Brassicales",
                    "Brassicaceae",
                    "Brassica",
                ],
            }
            references = [
                "location: [0:497]\nauthors: Byass,L.J. and Flanagan,A.M.\ntitle:"
                " BN28a, a low temperature-induced gene of Brassica napus\njournal:"
                " Unpublished\nmedline id: \npubmed id: \ncomment: \n",
                "location: [0:497]\nauthors: Byass,L.J. and Flanagan,A.M.\ntitle:"
                " Direct Submission\njournal: Submitted (18-AUG-2000) AFNS, University"
                " of Alberta, 4-10 Agriculture/Forestry Centre, Edmonton, Alberta T6G"
                " 2P5, Canada\nmedline id: \npubmed id: \ncomment: \n",
            ]
            features = (
                (
                    """\
type: source
location: [0:497](+)
qualifiers:
    Key: cultivar, Value: ['Cascade']
    Key: db_xref, Value: ['taxon:3708']
    Key: organism, Value: ['Brassica napus']
""",
                    1,
                ),
                (
                    """\
type: mRNA
location: join{[<0:54](+), [240:309](+), [422:>497](+)}
qualifiers:
    Key: gene, Value: ['BN28a']
    Key: product, Value: ['BN28a']
""",
                    1,
                ),
                (
                    """\
type: gene
location: [<0:>497](+)
qualifiers:
    Key: gene, Value: ['BN28a']
""",
                    1,
                ),
                (
                    """\
type: CDS
location: join{[0:54](+), [240:309](+), [422:497](+)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:10121869']
    Key: gene, Value: ['BN28a']
    Key: note, Value: ['low temperature-induced; similar to Brassica napus Kin1 in Accession Number M81224']
    Key: product, Value: ['BN28a']
    Key: protein_id, Value: ['AAG13407.1']
    Key: translation, Value: ['MADNKQSFQAGQAAGRAEEKGNVLMDKVKDAATAAGASAQTAGQKITEAAGGAVNLVKEKTGMNK']
""",
                    1,
                ),
            )
            dbxrefs = []
            self.perform_feature_parser_test(
                record,
                seq,
                id,
                name,
                description,
                annotations,
                references,
                features,
                dbxrefs,
            )

    def test_feature_parser_03(self):
        path = "GenBank/iro.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.feat_parser)
            record = next(records)
        seq = "CACAGGCCCAGAGCCACTCCTGCCTACAGGTTCTGAGGGCTCAGGGGACCTCCT...AAA"
        id = "AL109817.1"
        name = "IRO125195"
        description = "Homo sapiens mRNA full length insert cDNA clone EUROIMAGE 125195"
        annotations = {
            "accessions": ["AL109817"],
            "comment": """\
EURO-IMAGE Consortium Contact: Auffray C
CNRS UPR 420 - Genetique Moleculaire et Biologie du Developement
IFR 1221 - Rue Guy Moquet 19, Batiment G - BP 8
94801 Villejuif  Cedex, FRANCE
Tel: ++33-1-49 58 34 98
Fax: ++33-1-49 58 35 09
e-mail: auffray@infobiogen.fr
This clone is available royalty-free through IMAGE Consortium
Distributors.
IMPORTANT: This sequence represents the full insert of this IMAGE
cDNA clone. No attempt has been made to verify whether this
corresponds to the full-length of the original mRNA from which it
was derived.""",
            "data_file_division": "PRI",
            "date": "11-AUG-1999",
            "gi": "5731880",
            "keywords": ["FLI_CDNA"],
            "molecule_type": "mRNA",
            "organism": "Homo sapiens",
            "sequence_version": 1,
            "source": "human",
            "taxonomy": [
                "Eukaryota",
                "Metazoa",
                "Chordata",
                "Craniata",
                "Vertebrata",
                "Mammalia",
                "Eutheria",
                "Primates",
                "Catarrhini",
                "Hominidae",
                "Homo",
            ],
        }
        references = [
            "location: [0:1326]\nauthors: Auffray,C., Ansorge,W., Ballabio,A.,"
            " Estivill,X., Gibson,K., Lehrach,H., Poustka,A. and Lundeberg,J.\ntitle:"
            " The European IMAGE consortium for integrated Molecular analysis of human"
            " gene transcripts\njournal: Unpublished\nmedline id: \npubmed id:"
            " \ncomment: \n",
            "location: [0:1326]\nauthors: Carim,L., Estivill,X., Sumoy,L. and"
            " Escarceller,M.\ntitle: Direct Submission\njournal: Submitted"
            " (11-AUG-1999) Dept. Genetica Molecular, Institut de Recerca Oncologica"
            " (IRO), Hospital Duran i Reynals, Autovia de Castelldefels Km 2,7"
            " L'Hospitalet de Llobregat, 08907 Barcelona, Catalunya, SPAIN. Tel:"
            " ++34-93-260-7775 Fax: ++34-93-260-7776 WWW site: http://www.iro.es e-mail"
            " enquiries: lsumoy@iro.es, mescarceller@iro.es\nmedline id: \npubmed id:"
            " \ncomment: \n",
        ]
        features = (
            (
                """\
type: source
location: [0:1326](+)
qualifiers:
    Key: chromosome, Value: ['21']
    Key: clone, Value: ['IMAGE cDNA clone 125195']
    Key: clone_lib, Value: ['Soares fetal liver spleen 1NFLS']
    Key: db_xref, Value: ['taxon:9606']
    Key: note, Value: ['contains Alu repeat; likely to be be derived from unprocessed nuclear RNA or genomic DNA; encodes putative exons identical to FTCD; formimino transferase cyclodeaminase; formimino transferase (EC 2.1.2.5) /formimino tetrahydro folate cyclodeaminase (EC 4.3.1.4)']
    Key: organism, Value: ['Homo sapiens']
""",
                1,
            ),
            (
                """\
type: gene
location: [340:756](+)
qualifiers:
    Key: gene, Value: ['FTCD']
""",
                1,
            ),
            (
                """\
type: exon
location: [340:384](+)
qualifiers:
    Key: gene, Value: ['FTCD']
    Key: number, Value: ['1']
""",
                1,
            ),
            (
                """\
type: intron
location: [384:617](+)
qualifiers:
    Key: gene, Value: ['FTCD']
    Key: number, Value: ['1']
""",
                1,
            ),
            (
                """\
type: exon
location: [617:756](+)
qualifiers:
    Key: gene, Value: ['FTCD']
    Key: number, Value: ['2']
""",
                1,
            ),
        )
        dbxrefs = []
        self.perform_feature_parser_test(
            record,
            seq,
            id,
            name,
            description,
            annotations,
            references,
            features,
            dbxrefs,
        )

    def test_feature_parser_04(self):
        path = "GenBank/pri1.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.feat_parser)
            record = next(records)
        seq = "GATCATGCATGCACTCCAGCCTGGGACAAGAGCGAAACTCCGTCTCAAAAAAAA...GCA"
        id = "U05344.1"
        name = "HUGLUT1"
        description = "Human fructose transporter (GLUT5) gene, promoter and exon 1"
        annotations = {
            "accessions": ["U05344"],
            "data_file_division": "PRI",
            "date": "16-NOV-1994",
            "gi": "452475",
            "keywords": [""],
            "molecule_type": "DNA",
            "organism": "Homo sapiens",
            "sequence_version": 1,
            "source": "human",
            "taxonomy": [
                "Eukaryota",
                "Metazoa",
                "Chordata",
                "Craniata",
                "Vertebrata",
                "Euteleostomi",
                "Mammalia",
                "Eutheria",
                "Primates",
                "Catarrhini",
                "Hominidae",
                "Homo",
            ],
        }
        references = [
            "location: [0:741]\nauthors: Mahraoui,L., Takeda,J., Mesonero,J.,"
            " Chantret,I., Dussaulx,E., Bell,G.I. and Brot-Laroche,E.\ntitle:"
            " Regulation of expression of the human fructose transporter (GLUT5) by"
            " cyclic AMP\njournal: Biochem. J. 301 (Pt 1), 169-175 (1994)\nmedline id:"
            " 94311827\npubmed id: \ncomment: \n",
            "location: [0:741]\nauthors: Takeda,J.\ntitle: Direct Submission\njournal:"
            " Submitted (24-JAN-1994) Jun Takeda, Howard Hughes Medical Institute, The"
            " University of Chicago, 5841 S. Maryland Ave., Chicago, IL 60637,"
            " USA\nmedline id: \npubmed id: \ncomment: \n",
        ]
        features = (
            (
                """\
type: source
location: [0:741](+)
qualifiers:
    Key: chromosome, Value: ['1']
    Key: clone, Value: ['lambda hGT5-157']
    Key: clone_lib, Value: ['partial Hae III/Alu I fetal human liver library in lambda Ch4A of Maniatis']
    Key: db_xref, Value: ['taxon:9606']
    Key: dev_stage, Value: ['fetal']
    Key: map, Value: ['1p31']
    Key: organism, Value: ['Homo sapiens']
    Key: tissue_type, Value: ['liver']
""",
                1,
            ),
            (
                """\
type: repeat_region
location: [0:73](+)
qualifiers:
    Key: rpt_family, Value: ['Alu']
""",
                1,
            ),
            (
                """\
type: promoter
location: [0:513](+)
qualifiers:
""",
                1,
            ),
            (
                """\
type: 5'UTR
location: [513:609](+)
qualifiers:
    Key: gene, Value: ['GLUT5']
""",
                1,
            ),
            (
                """\
type: exon
location: [513:642](+)
qualifiers:
    Key: gene, Value: ['GLUT5']
    Key: number, Value: ['1']
    Key: product, Value: ['fructose transporter']
""",
                1,
            ),
        )
        dbxrefs = []
        self.perform_feature_parser_test(
            record,
            seq,
            id,
            name,
            description,
            annotations,
            references,
            features,
            dbxrefs,
        )

    def test_feature_parser_05(self):
        path = "GenBank/arab1.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.feat_parser)
            record = next(records)
        seq = "AAGCTTTGCTACGATCTACATTTGGGAATGTGAGTCTCTTATTGTAACCTTAGG...CTT"
        id = "AC007323.5"
        name = "AC007323"
        description = (
            "Genomic sequence for Arabidopsis thaliana BAC T25K16 from chromosome I,"
            " complete sequence"
        )
        annotations = {
            "accessions": ["AC007323"],
            "comment": """\
On Dec 16, 1999 this sequence version replaced gi:5729683.""",
            "data_file_division": "PLN",
            "date": "19-JAN-2000",
            "gi": "6587720",
            "keywords": ["HTG"],
            "molecule_type": "DNA",
            "organism": "Arabidopsis thaliana",
            "sequence_version": 5,
            "source": "thale cress",
            "taxonomy": [
                "Eukaryota",
                "Viridiplantae",
                "Embryophyta",
                "Tracheophyta",
                "Spermatophyta",
                "Magnoliophyta",
                "eudicotyledons",
                "core eudicots",
                "Rosidae",
                "eurosids II",
                "Brassicales",
                "Brassicaceae",
                "Arabidopsis",
            ],
        }
        references = [
            "location: [0:86436]\nauthors: Dunn,P., Shinn,P., Brooks,S., Buehler,E.,"
            " Chao,Q., Johnson-Hopson,C., Khan,S., Kim,C., Altafi,H., Bei,Q., Chin,C.,"
            " Chiou,J., Choi,E., Conn,L., Conway,A., Gonzales,A., Hansen,N., Howing,B.,"
            " Koo,T., Lam,B., Lee,J., Lenz,C., Li,J., Liu,A., Liu,K., Liu,S.,"
            " Mukharsky,N., Nguyen,M., Palm,C., Pham,P., Sakano,H., Schwartz,J.,"
            " Southwick,A., Thaveri,A., Toriumi,M., Vaysberg,M., Yu,G.,"
            " Federspiel,N.A., Theologis,A. and Ecker,J.R.\ntitle: Genomic sequence for"
            " Arabidopsis thaliana BAC T25K16 from chromosome I\njournal:"
            " Unpublished\nmedline id: \npubmed id: \ncomment: \n",
            "location: [0:86436]\nauthors: Ecker,J.R.\ntitle: Direct"
            " Submission\njournal: Submitted (17-APR-1999) Arabidopsis thaliana Genome"
            " Center, Department of Biology, University of Pennsylvania, 38th Street"
            " and Hamilton Walk, Philadelphia, Pennsylvania 19104-6018, USA\nmedline"
            " id: \npubmed id: \ncomment: \n",
            "location: [0:86436]\nauthors: Ecker,J.R.\ntitle: Direct"
            " Submission\njournal: Submitted (11-AUG-1999) Arabidopsis thaliana Genome"
            " Center, Department of Biology, University of Pennsylvania, 38th Street"
            " and Hamilton Walk, Philadelphia, Pennsylvania 19104-6018, USA\nmedline"
            " id: \npubmed id: \ncomment: \n",
            "location: [0:86436]\nauthors: Ecker,J.R.\ntitle: Direct"
            " Submission\njournal: Submitted (16-DEC-1999) Arabidopsis thaliana Genome"
            " Center, Department of Biology, University of Pennsylvania, 38th Street"
            " and Hamilton Walk, Philadelphia, Pennsylvania 19104-6018, USA\nmedline"
            " id: \npubmed id: \ncomment: \n",
            "location: [0:86436]\nauthors: Chao,Q., Brooks,S., Buehler,E.,"
            " Johnson-Hopson,C., Khan,S., Kim,C., Shinn,P., Altafi,H., Bei,B., Chin,C.,"
            " Chiou,J., Choi,E., Conn,L., Conway,A., Gonzalez,A., Hansen,N., Howing,B.,"
            " Koo,T., Lam,B., Lee,J., Lenz,C., Li,J., Liu,A., Liu,J., Liu,S.,"
            " Mukharsky,N., Nguyen,M., Palm,C., Pham,P., Sakano,H., Schwartz,J.,"
            " Southwick,A., Thaveri,A., Toriumi,M., Vaysberg,M., Yu,G., Davis,R.,"
            " Federspiel,N., Theologis,A. and Ecker,J.\ntitle: Direct"
            " Submission\njournal: Submitted (19-JAN-2000) Arabidopsis thaliana Genome"
            " Center, Department of Biology, University of Pennsylvania, 38th and"
            " Hamilton Walk, Philadelphia, PA 19104-6018, USA\nmedline id: \npubmed id:"
            " \ncomment: \n",
        ]
        features = (
            (
                """\
type: source
location: [0:86436](+)
qualifiers:
    Key: chromosome, Value: ['1']
    Key: clone, Value: ['T25K16']
    Key: db_xref, Value: ['taxon:3702']
    Key: organism, Value: ['Arabidopsis thaliana']
""",
                1,
            ),
            (
                """\
type: CDS
location: join{[3461:3615](+), [3697:3978](+), [4076:4307](+), [4407:4797](+), [4875:5028](+), [5140:5332](+)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6715633']
    Key: evidence, Value: ['not_experimental']
    Key: note, Value: ['containing similarity to NAM-like proteins gi|3695378']
    Key: product, Value: ['T25K16.1']
    Key: protein_id, Value: ['AAF26460.1']
    Key: translation, Value: ['MEDQVGFGFRPNDEELVGHYLRNKIEGNTSRDVEVAISEVNICSYDPWNLRFQSKYKSRDAMWYFFSRRENNKGNRQSRTTVSGKWKLTGESVEVKDQWGFCSEGFRGKIGHKRVLVFLDGRYPDKTKSDWVIHEFHYDLLPEHQKLCNVTLFRFSSYFRLSLLSPMFYTDELMCLPPEILQRTYVICRLEYKGDDADILSAYAIDPTPAFVPNMTSSAGSVVNQSRQRNSGSYNTYSEYDSANHGQQFNENSNIMQQQPLQGSFNPLLEYDFANHGGQWLSDYIDLQQQVPYLAPYENESEMIWKHVIEENFEFLVDERTSMQQHYSDHRPKKPVSGVLPDDSSDTETGSMIFEDTSSSTDSVGSSDEPGHTRIDDIPSLNIIEPLHNYKAQEQPKQQSKEKVISSQKSECEWKMAEDSIKIPPSTNTVKQSWIVLENAQWNYLKNMIIGVLLFISVISWIILVG']
""",
                1,
            ),
            (
                """\
type: CDS
location: join{[8272:8368](-), [8086:8166](-), [7915:7998](-), [7463:7603](-), [7265:7351](-), [6616:6953](-)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6715650']
    Key: evidence, Value: ['not_experimental']
    Key: note, Value: ['hypothetical protein']
    Key: product, Value: ['T25K16.2']
    Key: protein_id, Value: ['AAF26477.1']
    Key: translation, Value: ['MAASEHRCVGCGFRVKSLFIQYSPGNIRLMKCGNCKEVADEYIECERMVCFNHFLSLFGPKVYRHVLYNAINPATVNIQVKNYFNSTSRCVVGEIHRQTYLKSPELIIDRSLLLRKSDEESSFSDSPVLLSIKVLIGVLSANAAFIISFAIATKGLLNEVSRESLLLQVWEFPMSVIFFVDILLLTSNSMALKGQTFKMFSMQIVFCCCYFGISQCKFVFKPVMTESTMTRCIAVCLIAHLIRFLVGQIFEPTIFLIQIGSLLQYMSYFFRIV']
""",
                -1,
            ),
            (
                """\
type: CDS
location: [11565:12642](-)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6715649']
    Key: evidence, Value: ['not_experimental']
    Key: note, Value: ['putative RAP2.8 protein gi|3695373']
    Key: product, Value: ['T25K16.3']
    Key: protein_id, Value: ['AAF26476.1']
    Key: translation, Value: ['MDLSLAPTTTTSSDQEQDRDQELTSNIGASSSSGPSGNNNNLPMMMIPPPEKEHMFDKVVTPSDVGKLNRLVIPKQHAERYFPLDSSNNQNGTLLNFQDRNGKMWRFRYSYWNSSQSYVMTKGWSRFVKEKKLDAGDIVSFQRGIGDESERSKLYIDWRHRPDMSLVQAHQFGNFGFNFNFPTTSQYSNRFHPLPEYNSVPIHRGLNIGNHQRSYYNTQRQEFVGYGYGNLAGRCYYTGSPLDHRNIVGSEPLVIDSVPVVPGRLTPVMLPPLPPPPSTAGKRLRLFGVNMECGNDYNQQEESWLVPRGEIGASSSSSSALRLNLSTDHDDDNDDGDDGDDDQFAKKGKSSLSLNFNP']
""",
                -1,
            ),
            (
                """\
type: CDS
location: join{[23220:24174](+), [24243:24357](+), [24411:24664](+), [24742:25137](+), [25225:25445](+), [25526:25711](+), [25782:25905](+), [25993:26478](+), [26563:26730](+), [26813:26983](+), [27073:27235](+), [27319:27415](+), [27504:28133](+), [28313:28507](+), [28591:28782](+), [28861:30013](+), [30111:30518](+), [30603:30781](+)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6715634']
    Key: evidence, Value: ['not_experimental']
    Key: note, Value: ['similar to UFD1 protein emb|CAB10321.1; similar to ESTs gb|H36434, gb|AI996152.1']
    Key: product, Value: ['T25K16.4']
    Key: protein_id, Value: ['AAF26461.1']
    Key: translation, Value: ['MVMEDEPREATIKPSYWLDACEDISCDLIDDLVSEFDPSSVAVNESTDENGVINDFFGGIDHILDSIKNGGGLPNNGVSDTNSQINEVTVTPQVIAKETVKENGLQKNGGKRDEFSKEEGDKDRKRARVCSYQSERSNLSGRGHVNNSREGDRFMNRKRTRNWDEAGNNKKKRECNNYRRDGRDREVRGYWERDKVGSNELVYRSGTWEADHERDVKKVSGGNRECDVKAEENKSKPEERKEKVVEEQARRYQLDVLEQAKAKNTIAFLETGAGKTLIAILLIKSVHKDLMSQNRKMLSVFLVPKVPLVYQVPPNKKHQAEVIRNQTCFQVGHYCGEMGQDFWDSRRWQREFESKQFLKLTSFFLFSSTQVLVMTAQILLNILRHSIIRMETIDLLILDECHHAVKKHPYSLVMSEFYHTTPKDKRPAIFGMTASPVNLKGVSSQVDCAIKIRNLETKLDSTVCTIKDRKELEKHVPMPSEIVVEYDKAATMWSLHETIKQMIAAVEEAAQASSRKSKWQFMGARDAGAKDELRQVYGVSERTESDGAANLIHKLRAINYTLAELGQWCAYKVGQSFLSALQSDERVNFQVDVKFQESYLSEVVSLLQCELLEGAAAEKVAAEVGKPENGNAHDEMEEGELPDDPVVSGGEHVDEVIGAAVADGKVTPKVQSLIKLLLKYQHTADFRAIVFVERVVAALVLPKVRIKVFAELPSLSFIRCASMIGHNNSQEMKSSQMQDTISKFRDGHVTLLVATSVAEEGLDIRQCNVVMRFDLAKTVLAYIQSRGRARKPGSDYILMVERYIKSFKNYILIFVTTGHQISTDMSTCVTCRGNVSHAAFLRNARNSEETLRKEAIERTDLSHLKDTSRLISIDAVPGTVYKVEATGAMVSLNSAVGLVHFYCSQLPGDRYAILRPEFSMEKHEKPGGHTEYSCRLQLPCNAPFEILEGPVCSSMRLAQQVDIIVSACKKLHEMGAFTDMLLPDKGSGQDAEKADQDDEGEPVPGTARHREFYPEGVADVLKGEWVSSGKEVCESSKLFHLYMYNVRCVDFGSSKDPFLSEVSEFAILFGNELDAEVLSMSMDLYVARAMITKASLAFKGSLDITENQLSSLKKFHVRLMSIVLDVDVEPSTTPWDPAKAYLFVPVTDNTSMEPIKGINWELVEKITKTTAWDNPLQRARPDVYLGTNERTLGGDRREYGFGKLRHNIVFGQKSHPTYGIRGAVASFDVVRASGLLPVRDAFEKEVEEDLSKGKLMMADGCMVAEDLIGKIVTAAHSGKRFYVDSICYDMSAETSFPRKEGYLGPLEYNTYADYYKQKIYVVQDRLFFYFLHNLRLLRLYKSSSIMLFIRYGVDLNCKQQPLIKGRGVSYCKNLLSPRFEQSGESETVLDKTYYVFLPPELCVVHPLSGSLIRGAQRLPSIMRRVESMLLAVQLKNLISYPIPTSKILEALTAASCQETFCYERAELLGDAYLKWVVSRFLFLKYPQKHEGQLTRMRQQMVSNMVLYQFALVKGLQSYIQADRFAPSRWSAPGVPPVFDEDTKDGGSSFFDEEQKPVSEENSDVFEDGEMEDGELEGDLSSYRVLSSKTLADVVEALIGVYYVEGGKIAANHLMKWIGIHVEDDPDEVDGTLKNVNVPESVLKSIDFVGLERALKYEFKEKGLLVEAITHASRPSSGVSCYQRLEFVGDAVLDHLITRHLFFTYTSLPPGRLTDLRAAAVNNENFARVAVKHKLHLYLRHGSSALEKQVNKIKKQSILFSKSFKCLTVWLLFVFQIREFVKEVQTESSKPGFNSFGLGDCKAPKVLGDIVESIAGAIFLDSGKDTTAAWKVFQPLLQPMVTPETLPMHPVRELQERCQQQAEGLEYKASRSGNTATVEVFIDGVQVGVAQNPQKKMAQKLAARNALAALKEKEIAESKEKHINNGNAGEDQGENENGNKKNGHQPFTRQTLNDICLRKNWPMPSYRCVKEGGPAHAKRFTFGVRVNTSDRGWTDECIGEPMPSVKKAKDSAAVLLLELLNKTFS']
""",
                1,
            ),
            (
                """\
type: CDS
location: join{[32248:32372](-), [32132:32161](-), [31983:32049](-), [31789:31897](-), [31634:31700](-), [31340:31515](-), [31222:31304](-), [31083:31126](-)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6715648']
    Key: evidence, Value: ['not_experimental']
    Key: note, Value: ['putative inorganic pyrophosphatase gi|3510259; similar to ESTs gb|T42316, gb|AI994042.1, gb|AI994013.1, emb|Z29202']
    Key: product, Value: ['T25K16.5']
    Key: protein_id, Value: ['AAF26475.1']
    Key: translation, Value: ['MSEETKDNQRLQRPAPRLNERILSSLSRRSVAAHPWHDLEIGPGAPQIFNVVVEITKGSKVKYELDKKTGLIKVDRILYSSVVYPHNYGFVPRTLCEDNDPIDVLVIMQEPVLPGCFLRARAIGLMPMIDQGEKDDKIIAVCVDDPEYKHYTDIKELPPHRLSEIRRFFEDCILFLQCSSLFISIDLSTNKKNENKEVAVNDFLPSESAVEAIQYSMDLYAEYILHTLRR']
""",
                -1,
            ),
            (
                """\
type: CDS
location: join{[36724:36763](-), [36511:36623](-), [36325:36387](-), [35431:35701](-), [35268:35349](-), [34102:35173](-), [33693:34029](-)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6715647']
    Key: evidence, Value: ['not_experimental']
    Key: note, Value: ['putative late elongated hypocotyl emb|CAA07004; similar to ESTS gb|AI993521.1, gb|AA650979']
    Key: product, Value: ['T25K16.6']
    Key: protein_id, Value: ['AAF26474.1']
    Key: translation, Value: ['MDTNTSGEELLAKARKPYTITKQRERWTEDEHERFLEALRLYGRAWQRIEEHIGTKTAVQIRSHAQKFFTKFGKAHSFWFTFQLEKEAEVKGIPVCQALDIEIPPPRPKRKPNTPYPRKPGNNGTSSSQVSSAKDAKLVSSASSSQLNQAFLDLEKMPFSEKTSTGKENQDENCSGVSTVNKYPLPTKVSGDIETSKTSTVDNAVQDVPKKNKDKDGNDGTTVHSMQNYPWHFHADIVNGNIAKCPQNHPSGMVSQDFMFHPMREETHGHANLQATTASATTTASHQAFPACHSQDDYRSFLQISSTFSNLIMSTLLQNPAAHAAATFAASVWPYASVGNSGDSSTPMSSSPPSITAIAAATVAAATAWWASHGLLPVCAPAPITCVPFSTVAVPTPAMTEMDTVENTQPFEKQNTALQDQNLASKSPASSSDDSDETGVTKLNADSKTNDDKIEEVVVTAAVHDSNTAQKKNLVDRSSCGSNTPSGSDAETDALDKMEKDKEDVKETDENQPDVIELNNRKIKMRDNNSNNNATTDSWKEVSEEGRIAFQALFARERLPQSFSPPQVAENVNRKQSDTSMPLAPNFKSQDSCAADQEGVVMIGVGTCKSLKTRQTGFKPYKRCSMEVKESQVGNINNQSDEKVCKRLRLEGEAST']
""",
                -1,
            ),
            (
                """\
type: CDS
location: join{[40376:40579](-), [39914:40031](-), [39110:39516](-), [38837:38989](-), [38599:38756](-)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6715646']
    Key: evidence, Value: ['not_experimental']
    Key: note, Value: ['similar to Medicago truncatula MtN2 gi|3193308; similar to EST gb|H77065']
    Key: product, Value: ['T25K16.7']
    Key: protein_id, Value: ['AAF26473.1']
    Key: translation, Value: ['MAGDMQGVRVVEKYSPVIVMVMSNVAMGSVNALVKKALDVGVNHMVIGAYRMAISALILVPFAYVLERASLMQFFFLLGLSYTSATVSCALVSMLPAITFALALIFRTENVKILKTKAGMLKVIGTLICISGALFLTFYKGPQISNSHSHSHGGASHNNNDQDKANNWLLGCLYLTIGTVLLSLWMLFQGTLSIKYPCKYSSTCLMSIFAAFQCALLSLYKSRDVNDWIIDDRFVITVIIYAGVVGQAMTTVATTWGIKKLGAVFASAFFPLTLISATLFDFLILHTPLYLGSVIGSLVTITGLYMFLWGKNKETESSTALSSGMDNEAQYTTPNKDNDSKSPV']
""",
                -1,
            ),
            (
                """\
type: CDS
location: join{[48637:48868](-), [47776:48554](-), [47447:47684](-), [46074:46313](-), [45718:45847](-), [45342:45656](-), [45149:45261](-)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6715645']
    Key: evidence, Value: ['not_experimental']
    Key: note, Value: ['putative pyruvate dehydrogenase E1 alpha subunit gi|2454182; similar to ESTs emb|Z48417, gb|AW039459.1, gb|T15146, emb|Z48416, gb|AF066871, gb|T76832, gb|AI996061.1']
    Key: product, Value: ['T25K16.8']
    Key: protein_id, Value: ['AAF26472.1']
    Key: translation, Value: ['MATAFAPTKLTATVPLHGSHENRLLLPIRLAPPSSFLGSTRSLSLRRLNHSNATRRSPVVSVQEVVKEKQSTNNTSLLITKEEGLELYEDMILGRSFEDMCAQMYYRGKMFGFVHLYNGQEAVSTGFIKLLTKSDSVVSTYRDHVHALSKGVSARAVMSELFGKVTGCCRGQGGSMHMFSKEHNMLGGFAFIGEGIPVATGAAFSSKYRREVLKQDCDDVTVAFFGDGTCNNGQFFECLNMAALYKLPIIFVVENNLWAIGMSHLRATSDPEIWKKGPAFGMPGVHVDGMDVLKVREVAKEAVTRARRGEGPTLVECETYRFRGHSLADPDELRDAAEKAKYAARDPIAALKKYLIENKLAKEAELKSIEKKIDELVEEAVEFADASPQPGRSQLLENVFADPKGFGIGPDGRYRSQPLQIKVSSSELSVLDEEKEEEVVKGEAEPNKDSVVSKAEPVKKPRPCELYVCNIPRSYDIAQLLDMFQPFGTVISVEVVSRNPQTGESRGSGYVTMGSINSAKIAIASLDGTVRARETKKQEVGGREMRVRYSVDMNPGTRRNPEVLNSTPKKILMYESQHKVYVGNLPWFTQPDGLRNHFSKFGTIVSTRVLHDRKTGRNRVFAFLSFTSGEERDAALSFNGTVNNMKVAESSSEKVSRRVSRKPTVLLLLQRHLLDTNNV']
""",
                -1,
            ),
            (
                """\
type: CDS
location: join{[50584:50656](-), [50120:50333](-), [49985:50039](-)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6715644']
    Key: evidence, Value: ['not_experimental']
    Key: note, Value: ['similar to acidic ribosomal protein p1 gi|2252857; similar to ESTs gb|T42111, gb|AI099979, gb|AA728491']
    Key: product, Value: ['T25K16.9']
    Key: protein_id, Value: ['AAF26471.1']
    Key: translation, Value: ['MSTVGELACSYAVMILEDEGIAITADKIATLVKAAGVSIESYWPMLFAKMAEKRNVTDLIMNVGAGGGGGAPVAAAAPAAGGGAAAAPAAEEKKKDEPAEESDGDLGFGLFD']
""",
                -1,
            ),
            (
                """\
type: CDS
location: join{[51940:52048](+), [52135:52432](+), [52639:52885](+), [53185:53326](+), [53404:54196](+)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6715635']
    Key: evidence, Value: ['not_experimental']
    Key: note, Value: ['hypothetical protein']
    Key: product, Value: ['T25K16.10']
    Key: protein_id, Value: ['AAF26462.1']
    Key: translation, Value: ['MGKKNGSSSWLTAVKRAFRSPTKKDHSNDVEEDEEKKREKRRWFRKPATQESPVKSSGISPPAPQEDSLNVNSKPSPETAPSYATTTPPSNAGKPPSAVVPIATSASKTLAPRRIYYARENYAAVVIQTSFRGYLARRALRALKGLVKLQALVRGHNVRKQAKMTLRCMQALVRVQSRVLDQRKRLSHDGSRKSAFSDSHAVFESRYLQDLSDRQSMSREGSSAAEDWDDRPHTIDAVKVMLQRRRDTALRHDKTNLSQAFSQKMWRTVGNQSTEGHHEVELEEERPKWLDRWMATRPWDKRASSRASVDQRVSVKTVEIDTSQPYSRTGAGSPSRGQRPSSPSRTSHHYQSRNNFSATPSPAKSRPILIRSASPRCQRDPREDRDRAAYSYTSNTPSLRSNYSFTARSGCSISTTMVNNASLLPNYMASTESAKARIRSHSAPRQRPSTPERDRAGLVKKRLSYPVPPPAEYEDNNSLRSPSFKSVAGSHFGGMLEQQSNYSSCCTESNGVEISPASTSDFRNWLR']
""",
                1,
            ),
            (
                """\
type: CDS
location: [57093:58680](-)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6715643']
    Key: evidence, Value: ['not_experimental']
    Key: note, Value: ['putative fatty acid elongase 3-ketoacyl-coA synthase 1 gi|4091810; similar to ESTs gb|T42377, gb|N96054, gb|T44368, gb|AI999379.1, emb|Z26005']
    Key: product, Value: ['T25K16.11']
    Key: protein_id, Value: ['AAF26470.1']
    Key: translation, Value: ['MERTNSIEMDRERLTAEMAFRDSSSAVIRIRRRLPDLLTSVKLKYVKLGLHNSCNVTTILFFLIILPLTGTVLVQLTGLTFDTFSELWSNQAVQLDTATRLTCLVFLSFVLTLYVANRSKPVYLVDFSCYKPEDERKISVDSFLTMTEENGSFTDDTVQFQQRISNRAGLGDETYLPRGITSTPPKLNMSEARAEAEAVMFGALDSLFEKTGIKPAEVGILIVNCSLFNPTPSLSAMIVNHYKMREDIKSYNLGGMGCSAGLISIDLANNLLKANPNSYAVVVSTENITLNWYFGNDRSMLLCNCIFRMGGAAILLSNRRQDRKKSKYSLVNVVRTHKGSDDKNYNCVYQKEDERGTIGVSLARELMSVAGDALKTNITTLGPMVLPLSEQLMFLISLVKRKMFKLKVKPYIPDFKLAFEHFCIHAGGRAVLDEVQKNLDLKDWHMEPSRMTLHRFGNTSSSSLWYEMAYTEAKGRVKAGDRLWQIAFGSGFKCNSAVWKALRPVSTEEMTGNAWAGSIDQYPVKVVQ']
""",
                -1,
            ),
            (
                """\
type: CDS
location: join{[63132:63513](-), [61669:61826](-), [59507:59665](-)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6715642']
    Key: evidence, Value: ['not_experimental']
    Key: note, Value: ['hypothetical protein']
    Key: product, Value: ['T25K16.12']
    Key: protein_id, Value: ['AAF26469.1']
    Key: translation, Value: ['MEKRSDSESVEILGDWDSPPPEERIVMVSVPTSPESDYARSNQPKEIESRVSDKETASASGEVAARRVLPPWMDPSYEWGGGKWKVDGRKNKNKKEKEKEKEEIIPFKEIIEALLGNSGDKVQQDNKVFEVAPSLHVVELRKTGDDTLEFHKVYFRFNLYQPVQLPLILFVVIRFSMLKIIHYHQFTMAHIKEFVCMWDTHLYKEITNLNIWDTLSSTLVLAIWTVNASHE']
""",
                -1,
            ),
            (
                """\
type: CDS
location: join{[67025:67214](-), [66536:66599](-), [66379:66451](-), [66151:66259](-), [65963:66044](-), [65808:65862](-), [65434:65566](-), [65264:65354](-), [65032:65158](-), [64811:64919](-), [64602:64719](-), [64452:64509](-), [64271:64358](-), [64099:64177](-)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6715641']
    Key: evidence, Value: ['not_experimental']
    Key: note, Value: ['similar to wpk4 protein kinase dbj|BAA34675; similar to ESTs dbj|AB015122, gb|AI997157.1']
    Key: product, Value: ['T25K16.13']
    Key: protein_id, Value: ['AAF26468.1']
    Key: translation, Value: ['MSGSRRKATPASRTRVGNYEMGRTLGEGSFAKVKYAKNTVTGDQAAIKILDREKVFRHKMVEQLKREISTMKLIKHPNVVEIIEVMASKTKIYIVLELVNGGELFDKIAQQGRLKEDEARRYFQQLINAVDYCHSRGVYHRDLKPENLILDANGVLKVSDFGLSAFSRQVREDGLLHTACGTPNYVAPEVLSDKGYDGAAADVWSCGVILFVLMAGYLPFDEPNLMTLYKRVRICKAEFSCPPWFSQGAKRVIKRILEPNPITRISIAELLEDEWFKKGYKPPSFDQDDEDITIDDVDAAFSNSKECLVTEKKEKPVSMNAFELISSSSEFSLENLFEKQAQLVKKETRFTSQRSASEIMSKMEETAKPLGFNVRKDNYKIKMKGDKSGRKGQLSVATEVFEVAPSLHVVELRKTGGDTLEFHKVCDSFYKNFSSGLKDVVWNTDAAAEEQKQ']
""",
                -1,
            ),
            (
                """\
type: CDS
location: join{[71643:71700](-), [70742:71357](-), [70533:70670](-), [69830:69987](-)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6715640']
    Key: evidence, Value: ['not_experimental']
    Key: note, Value: ['similar to ataxia-telangiectasia group D protein pir|A49618']
    Key: product, Value: ['T25K16.14']
    Key: protein_id, Value: ['AAF26467.1']
    Key: translation, Value: ['MVSDLPLDEDDIALLKSPYCDDGGDEDVNSAPNIFTYDNVPLKKRHYLGTSDTFRSFEPLNEHACIVCDIADDGVVPCSGNECPLAVHRKCVELDCEDPATFYCPYCWFKEQATRSTALRTRGVAAAKTLVQYGCSELRSGDIVMTRENSQLENGSDNSLPMQLHENLHQLQELVKHLKARNSQLDESTDQFIDMEKSCGEAYAVVNDQPKRVLWTVNEEKMLREGVEKFSDTINKNMPWKKILEMGKGIFHTTRNSSDLKDKWRNMVRIIILIWLRSRLTSSSSSQRSEIKMERERNAGVMKKMSPTGTIQRLEFVGWYL']
""",
                -1,
            ),
            (
                """\
type: CDS
location: join{[72284:72371](+), [72788:72865](+), [72988:73097](+), [73189:73442](+), [73523:73585](+)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6715636']
    Key: evidence, Value: ['not_experimental']
    Key: note, Value: ['similar to SYT gi|2252866; similar to ESTs emb|F14390, gb|H36066, emb|F14391']
    Key: product, Value: ['T25K16.15']
    Key: protein_id, Value: ['AAF26463.1']
    Key: translation, Value: ['MQQQQSPQMFPMVPSIPPANNITTEQIQKYLDENKKLIMAIMENQNLGKLAECAQYQALLQKNLMYLAAIADAQPPPPTPGPSPSTAVAAQMATPHSGMQPPSYFMQHPQASPAGIFAPRGPLQFGSPLQFQDPQQQQQIHQQAMQGHMGIRPMGMTNNGMQHAMQQPETGLGGNVGLRGGKQDGADGQGKDDGK']
""",
                1,
            ),
            (
                """\
type: CDS
location: join{[74035:74145](-), [73806:73990](-)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6715639']
    Key: evidence, Value: ['not_experimental']
    Key: note, Value: ['similar to stress-induced protein OZI1 precursor pir|S59544; similar to EST gb|AI995719.1']
    Key: product, Value: ['T25K16.16']
    Key: protein_id, Value: ['AAF26466.1']
    Key: translation, Value: ['MASGGKAKYIIGALIGSFGISYIFDKVISDNKIFGGKDDLNGYLLVKISGTTPGTVSNKEWWAATDEKFQAWPRTAGPPVVMNPISRQNFIVKTRPE']
""",
                -1,
            ),
            (
                """\
type: CDS
location: join{[75334:76249](+), [76515:76653](+), [76732:76982](+), [77014:77148](+)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6715637']
    Key: evidence, Value: ['not_experimental']
    Key: note, Value: ['putative reverse transcriptase gb|AAD17395']
    Key: product, Value: ['T25K16.17']
    Key: protein_id, Value: ['AAF26464.1']
    Key: translation, Value: ['MKEDRRLPHKRDAFQFLKTKAAYVIVIVLTYAFGYFSAYHYHQPLQQQLPPSTTAVETTKPQVCSIDNFRVTTPCGNLVPPELIRQTVIDRIFNGTSPYIDFPPPHAKKFLRPKRIKGWGSYGAVFENLIRRVKPKTIVEVGSFLGASAIHMANLTRRLGLEETQILCVDDFRGWPGFRDRFKDMALVNGDVLLMYQFMQNVVISDFSGSILPVPFSTGSALEKLCEWGVTADLVEIDAGHDFNSAWADINRAVRILRPGGVIFGHDYFTAADNRGVRRAVNLFAEINRLKVKTDGQHWVIDSVKVINKGTRFAISKTVAKIKEDANQWFFAQVLENQDLVNEQAVHISVKVLRGFLRDEHGKVLIHARRSFASVHSKLDATFLCWQWAMESMKSLRVDKIIFASEDNDLIGAVTRLPSWPSYKFQIHFLLGELIRSSNLGAHLIAKSVTMEDRRQSYVATGFPFWLKHLFEKERSIA']
""",
                1,
            ),
            (
                """\
type: CDS
location: join{[83585:84581](-), [82750:83373](-), [82722:82738](-)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6715638']
    Key: evidence, Value: ['not_experimental']
    Key: note, Value: ['putative cytochrome P450 gi|3831440']
    Key: product, Value: ['T25K16.18']
    Key: protein_id, Value: ['AAF26465.1']
    Key: translation, Value: ['MFSLNMRTEIESLWVFALASKFNIYMQQHFASLLVAIAITWFTITIVFWSTPGGPAWGKYFFTRRFISLDYNRKYKNLIPGPRGFPLVGSMSLRSSHVAHQRIASVAEMSNAKRLMAFSLGDTKVVVTCHPAVAKEILNSSVFADRPVDETAYGLMFNRAMGFAPNGTYWRTLRRLGSNHLFNPKQIKQSEDQRRVIATQMVNAFARNPKSACAVRDLLKTASLCNMMGLVFGREYELESNNNLESECLKGLVEEGYDLLGTLNWTDHLPWLAGLDFQQIRFRCSQLVPKVNLLLSRIIHEQRAATGNFLDMLLSLQGSEKLSESDMVAVLWEMIFRGTDTVAVLVEWVLARIVMHPKVQLTVHDELDRVVGRSRTVDESDLPSLTYLTAMIKEVLRLHPPGPLLSWARLSITDTSVDGYHVPAGTTAMVNMWAIARDPHVWEDPLEFKPERFVAKEGEAEFSVFGSDLRLAPFGSGKRVCPGKNLGLTTVSFWVATLLHEFEWLPSVEANPPDLSEVLRLSCEMACPLIVNVSSRRKIIAWMF']
""",
                -1,
            ),
        )
        dbxrefs = []
        self.perform_feature_parser_test(
            record,
            seq,
            id,
            name,
            description,
            annotations,
            references,
            features,
            dbxrefs,
        )

    def test_feature_parser_06(self):
        path = "GenBank/protein_refseq.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.feat_parser)
            record = next(records)
        seq = "MNNRWILHAAFLLCFSTTALSINYKQLQLQERTNIRKCQELLEQLNGKINLTYR...FQN"
        id = "NP_034640.1"
        name = "NP_034640"
        description = "interferon beta, fibroblast [Mus musculus]"
        annotations = {
            "accessions": ["NP_034640"],
            "comment": """\
PROVISIONAL REFSEQ: This record has not yet been subject to final
NCBI review. The reference sequence was derived from K00020.1.""",
            "data_file_division": "ROD",
            "date": "01-NOV-2000",
            "db_source": "REFSEQ: accession NM_010510.1",
            "gi": "6754304",
            "keywords": [""],
            "molecule_type": "protein",
            "organism": "Mus musculus",
            "pid": "g6754304",
            "sequence_version": 1,
            "source": "house mouse",
            "taxonomy": [
                "Eukaryota",
                "Metazoa",
                "Chordata",
                "Craniata",
                "Vertebrata",
                "Euteleostomi",
                "Mammalia",
                "Eutheria",
                "Rodentia",
                "Sciurognathi",
                "Muridae",
                "Murinae",
                "Mus",
            ],
        }
        references = [
            "location: [0:182]\nauthors: Higashi,Y., Sokawa,Y., Watanabe,Y., Kawade,Y.,"
            " Ohno,S., Takaoka,C. and Taniguchi,T.\ntitle: structure and expression of"
            " a cloned cdna for mouse interferon-beta\njournal: J. Biol. Chem. 258,"
            " 9522-9529 (1983)\nmedline id: 83265757\npubmed id: \ncomment: \n"
        ]
        features = (
            (
                """\
type: source
location: [0:182]
qualifiers:
    Key: chromosome, Value: ['4']
    Key: db_xref, Value: ['taxon:10090']
    Key: map, Value: ['4 42.6 cM']
    Key: organism, Value: ['Mus musculus']
""",
                None,
            ),
            (
                """\
type: Protein
location: [0:182]
qualifiers:
    Key: product, Value: ['interferon beta, fibroblast']
""",
                None,
            ),
            (
                """\
type: sig_peptide
location: [0:21]
qualifiers:
""",
                None,
            ),
            (
                """\
type: Region
location: [0:182]
qualifiers:
    Key: db_xref, Value: ['CDD:pfam00143']
    Key: note, Value: ['interferon']
    Key: region_name, Value: ['Interferon alpha/beta domain']
""",
                None,
            ),
            (
                """\
type: mat_peptide
location: [21:182]
qualifiers:
    Key: product, Value: ['ifn-beta']
""",
                None,
            ),
            (
                """\
type: Region
location: [55:170]
qualifiers:
    Key: db_xref, Value: ['CDD:IFabd']
    Key: note, Value: ['IFabd']
    Key: region_name, Value: ['Interferon alpha, beta and delta.']
""",
                None,
            ),
            (
                """\
type: CDS
location: [0:182]
qualifiers:
    Key: coded_by, Value: ['NM_010510.1:21..569']
    Key: db_xref, Value: ['LocusID:15977', 'MGD:MGI:107657']
    Key: gene, Value: ['Ifnb']
""",
                None,
            ),
        )
        dbxrefs = []
        self.perform_feature_parser_test(
            record,
            seq,
            id,
            name,
            description,
            annotations,
            references,
            features,
            dbxrefs,
        )

    def test_feature_parser_07(self):
        path = "GenBank/extra_keywords.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.feat_parser)
            record = next(records)
        seq = "TCCAGGGGATTCACGCGCAATATGTTTCCCTCGCTCGTCTGCAGGGTGTGGGAA...TTG"
        id = "AL138972.1"
        name = "DMBR25B3"
        description = "Drosophila melanogaster BAC clone BACR25B3"
        annotations = {
            "accessions": ["AL138972"],
            "comment": """\
Sequence submitted by Takis Benos, EMBL Outstation - The EBI,
Hinxton, Cambridge, CB10 1SD, U.K.
E-mail: benos@ebi.ac.uk on behalf of the European Drosophila Genome
Sequencing Consortium. For further information see the European
Drosophila Genome Sequencing Consortium's web site:
http://edgp.ebi.ac.uk/.
The syntax for the representation of annotation used in this record
is documented at:
ftp://ftp.ebi.ac.uk/pub/databases/edgp/sequence_annotation.README
Coding sequences are predicted from computer analysis, using both
gene and CDS prediction programs and matches to other sequences.
These predictions and matches have been evaluated by the annotators
and may have been refined by hand (in which case a Genefinder
prediction will have no score. The annotators have also used their
judgement on what matches to represent in this record. A far more
complete annotation record is available from FlyBase
(http://flybase.bio.indiana.edu/) through the FlyBase Annotation
Object linked by the db_xref qualifier in the Feature Table.
IMPORTANT:  This sequence is NOT necessarily the entire insert of
clone BACR25B3.  It may be shorter, since we are minimising the
overlap between clones to 100 bases, by trimming them. Sequence in
absolute orientation with respect to chromosome Clone=BACR25B3;
Contig ID=1; Length=154329; Status=Finished.""",
            "data_file_division": "INV",
            "date": "07-FEB-2000",
            "gi": "6946668",
            "keywords": [""],
            "molecule_type": "DNA",
            "organism": "Drosophila melanogaster",
            "sequence_version": 1,
            "source": "fruit fly",
            "taxonomy": [
                "Eukaryota",
                "Metazoa",
                "Arthropoda",
                "Tracheata",
                "Hexapoda",
                "Insecta",
                "Pterygota",
                "Neoptera",
                "Endopterygota",
                "Diptera",
                "Brachycera",
                "Muscomorpha",
                "Ephydroidea",
                "Drosophilidae",
                "Drosophila",
            ],
        }
        references = [
            "location: [0:154329]\nauthors: Murphy,L., Harris,D. and Barrell,B.\ntitle:"
            " Sequencing the distal X chromosome of Drosophila melanogaster\njournal:"
            " Unpublished\nmedline id: \npubmed id: \ncomment: Sanger Centre, Wellcome"
            " Trust Genome Campus, Hinxton Hall, Hinxton, Cambridge CB10 1SA, U.K.\n",
            "location: [0:154329]\nauthors: Benos,P.\ntitle: Direct"
            " Submission\njournal: Submitted (06-FEB-2000) European Drosophila Genome"
            " Sequencing Consortium\nmedline id: \npubmed id: \ncomment: \n",
        ]
        features = (
            (
                """\
type: source
location: [0:154329](+)
qualifiers:
    Key: clone, Value: ['BAC BACR25B3']
    Key: db_xref, Value: ['taxon:7227']
    Key: organism, Value: ['Drosophila melanogaster']
""",
                1,
            ),
            (
                """\
type: gene
location: [22147:27773](-)
qualifiers:
    Key: gene, Value: ['EG:BACR25B3.11']
    Key: note, Value: ['']
""",
                -1,
            ),
            (
                """\
type: CDS
location: join{[27622:27773](-), [26676:27009](-), [25023:25178](-), [24615:24888](-), [23629:24555](-), [22859:23560](-), [22374:22791](-), [22147:22299](-)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6946669']
    Key: gene, Value: ['EG:BACR25B3.11']
    Key: note, Value: ["/prediction=(method:''genefinder'', version:''084'', score:''105.71''); /prediction=(method:''genscan'', version:''1.0''); /match=(desc:''BASEMENT MEMBRANE-SPECIFIC HEPARAN SULFATE PROTEOGLYCAN CORE PROTEIN PRECURSOR (HSPG) (PERLECAN) (PLC)'', species:''Homo sapiens (Human)'', ranges:(query:24292..24549, target:SWISS-PROT::P98160:3713..3628, score:''201.00''), (query:24016..24291, target:SWISS-PROT::P98160:3815..3724, score:''139.00''), (query:23857..24006, target:SWISS-PROT::P98160:3866..3817, score:''99.00''), (query:24052..24327, target:SWISS-PROT::P98160:4059..3968, score:''143.00''), (query:24046..24312, target:SWISS-PROT::P98160:4341..4253, score:''116.00''), (query:23806..23901, target:SWISS-PROT::P98160:4177..4146, score:''76.00''), (query:23203..23382, target:SWISS-PROT::P98160:4062..4003, score:''116.00''), (query:22523..22777, target:SWISS-PROT::P98160:4288..4204, score:''112.00''), (query:22235..22300, target:SWISS-PROT::P98160:4358..4337, score:''64.00'')), method:''blastx'', version:''1.4.9''); /match=(desc:''GM03359.5prime GM Drosophila melanogaster ovary BlueScript Drosophila melanogaster cDNA clone GM03359 5prime, mRNA sequence'', species:''Drosophila melanogaster (fruit fly)'', ranges:(query:25024..25235, target:EMBL::AA801707:438..227, score:''1024.00''), (query:24851..24898, target:EMBL::AA801707:476..429, score:''204.00'')), method:''blastn'', version:''1.4.9''); /match=(desc:''LD08615.5prime LD Drosophila melanogaster embryo BlueScript Drosophila melanogaster cDNA clone LD08615 5prime, mRNA sequence'', species:''Drosophila melanogaster (fruit fly)'', ranges:(query:24629..24727, target:EMBL::AA264808:99..1, score:''495.00''), (query:24417..24566, target:EMBL::AA264808:250..101, score:''687.00''), (query:24048..24420, target:EMBL::AA264808:618..246, score:''1847.00''), (query:23986..24036, target:EMBL::AA264808:678..628, score:''237.00'')), method:''blastn'', version:''1.4.9''); /match=(desc:''HL02745.5prime HL Drosophila melanogaster head BlueScript Drosophila melanogaster cDNA clone HL02745 5prime, mRNA sequence'', species:''Drosophila melanogaster (fruit fly)'', ranges:(query:23944..24045, target:EMBL::AA697546:103..2, score:''510.00''), (query:23630..23943, target:EMBL::AA697546:416..103, score:''1570.00''), (query:23419..23561, target:EMBL::AA697546:558..416, score:''715.00''), (query:23306..23417, target:EMBL::AA697546:670..559, score:''524.00''), (query:23280..23316, target:EMBL::AA697546:695..659, score:''167.00'')), method:''blastn'', version:''1.4.9''); /match=(desc:''GM08137.5prime GM Drosophila melanogaster ovary BlueScript Drosophila melanogaster cDNA clone GM08137 5prime, mRNA sequence'', species:''Drosophila melanogaster (fruit fly)'', ranges:(query:23235..23278, target:EMBL::AA696682:44..1, score:''139.00''), (query:22986..23251, target:EMBL::AA696682:294..29, score:''1321.00'')), method:''blastn'', version:''1.4.9'')"]
    Key: protein_id, Value: ['CAB72284.1']
    Key: translation, Value: ['MACNCNQSMIYQSNERRDYNCPGAPQYPYNRFKGGVSLKDTPCMVLYICADFKSSKLSSAKPIISGPATTRAPAISYVCQPNDFKCVSHPHTCVRANMVCDGIYDCTDHSDEFNCIAGKGSGKSESNSGSGSFKRWKKSPEQGRRSLAKAVKNRKLRKRSFAKSRDYSLKLDDQSSNLRAGESTDVECYSSDDTYTDVVWERSDGAPLSNNVRQVGNRLVISNVSPSDAGNYVCKCKTDEGDLYTTSYKLEVEDQPHELKSSKIVYAKVGANADLQCGADESRQPTYRWSRQYGQLQAGRSLMNEKLSLDSVQANDAGTYICTAQYADGETADFPNILVVTGAIPQFRQEPRSYMSFPTLPNSSFKFNFELTFRPENGDGLLLFNGQTRGSGDYIALSLKDRYAEFRFDFGGKPMLVRAEEPLALNEWHTVRVSRFKRDGYIQVDEQHPVAFPTLQQIPQLDLIEDLYIGGVPNWELLPADAVSQQVGFVGCISRLTLQGRTVELIREAKYKEGITDCRPCAQGPCQNKGVCLESQTEQAYTCICQPGWTGRDCAIEGTQCTPGVCGAGRCENTENDMECLCPLNRSGDRCQYNEILNEHSLNFKGNSFAAYGTPKVTKVNITLSVRPASLEDSVILYTAESTLPSGDYLALVLRGGHAELLINTAARLDPVVVRSAEPLPLNRWTRIEIRRRLGEGILRVGDGPERKAKAPGSDRILSLKTHLYVGGYDRSTVKVNRDVNITKGFDGCISRLYNFQKPVNLLADIKDAANIQSCGETNMIGGDEDSDNEPPVPPPTPDVHENELQPYAMAPCASDPCENGGSCSEQEDVAVCSCPFGFSGKHCQEHLQLGFNASFRGDGYVELNRSHFQPALEQSYTSMGIVFTTNKPNGLLFWWGQEAGEEYTGQDFIAAAVVDGYVEYSMRLDGEEAVIRNSDIRVDNGERHIVIAKRDENTAILEVDRMLHSGETRPTSKKSMKLPGNVFVGGAPDLEVFTGFRYKHNLNGCIVVVEGETVGQINLSSAAVNGVNANVCPA']
""",
                -1,
            ),
            (
                """\
type: gene
location: [29925:33978](-)
qualifiers:
    Key: gene, Value: ['EG:BACR25B3.10']
""",
                -1,
            ),
            (
                """\
type: CDS
location: join{[33816:33978](-), [33532:33713](-), [32685:33289](-), [32323:32634](-), [31658:31836](-), [31196:31591](-), [30616:31076](-), [30269:30519](-), [29925:30108](-)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6946670']
    Key: gene, Value: ['EG:BACR25B3.10']
    Key: note, Value: ["/prediction=(method:''genefinder'', version:''084'', score:''98.50''); /prediction=(method:''genscan'', version:''1.0''); /match=(desc:''BASEMENT MEMBRANE-SPECIFIC HEPARAN SULFATE PROTEOGLYCAN CORE PROTEIN PRECURSOR (HSPG) (PERLECAN) (PLC)'', species:''Homo sapiens (Human)'', ranges:(query:33540..33716, target:SWISS-PROT::P98160:2716..2658, score:''113.00''), (query:32859..32963, target:SWISS-PROT::P98160:3341..3307, score:''63.00''), (query:33150..33215, target:SWISS-PROT::P98160:3530..3509, score:''73.00''), (query:32973..33089, target:SWISS-PROT::P98160:3588..3550, score:''71.00''), (query:32358..32567, target:SWISS-PROT::P98160:3650..3581, score:''107.00''), (query:31222..31323, target:SWISS-PROT::P98160:2620..2587, score:''80.00''), (query:31489..31572, target:SWISS-PROT::P98160:3387..3360, score:''72.00''), (query:31495..31593, target:SWISS-PROT::P98160:3575..3543, score:''60.00'')), method:''blastx'', version:''1.4.9''); /match=(desc:''GM02481.5prime GM Drosophila melanogaster ovary BlueScript Drosophila melanogaster cDNA clone GM02481 5prime, mRNA sequence'', species:''Drosophila melanogaster (fruit fly)'', ranges:(query:30008..30036, target:EMBL::AA695253:29..1, score:''145.00''), (query:29549..30004, target:EMBL::AA695253:487..32, score:''2262.00'')), method:''blastn'', version:''1.4.9'')"]
    Key: protein_id, Value: ['CAB72285.1']
    Key: translation, Value: ['MFLATLDTNDPTDIGTEDPVLTQIIVSIQKPEITIVPVGGSMTLSCSGRMRWSNSPVIVNWYKENSRLPENVEVQGGNLYLYDLQVSDSGVYICQAVNNETASVFKDTVSITITKKDQLSPAEIVNLPSHVTFEEYVNNEIICEVLGNPAPRVTWARVDGHADAQSTRTYDNRLIFDSPRKSDEGRYRCQAENDQNRDEKYVIVYVQSNPPQPPPQQDRLYITPEEINGLAGESFQLNCQFTSVASLRYDWSHNGRSLSSSPARNVEIRGNTLEVRDASESDSGVYTCVAYDVRTRRNFTESARVNIDRREEQPFGVLMRMMILTDSLINHSNKPIIESLEQNILIIQGEDYSITCEASGSPYPSIKWAKVHDFMPENVHISGNVLTIYGARFENRGVYSCVAENDHGSDLSSTSIDIEPRERPSVKIVSAPLQTFSVGAPASLYCTVEGIPDPTVEWVRVDGQPLSPRHKIQSPGYMVIDDIQLEDSGDYECRAKNIVGEATGVATITVQEPTLVQIIPDNRDLRLTEGDELSLTCVGSGVPNPEVEWVNEMALKRDLYSPPSNTAILKIYRVTKADAGIYTCHGKNEAGSDEAHVRVEVQERRGDIGGVDDDSDRDPINYNPPQQQNPGIHQPGSNQLLATDIGDNVTLTCDMFQPLNTRWERVDGAPLPRNAYTIKNRLEIVRVEQQNLGQYRCNGIGRDGNVKTYFVKELVLMPLPRIRFYPNIPLTVEAGQNLDVHCQVENVRPEDVHWSTDNNRPLPSSVRIVGSVLRFVSITQAAAGEYRCSAFNQYGNRSQIARVAVKKPADFHQVPQSQLQRHREGENIQLQCTVTDQYGVRAQDNVEFNWFRDDRRPLPNNARTDSQILVLTNLRPEDAGRYICNSYDVDRGQQLPEVSIDLQVLSE']
""",
                -1,
            ),
            (
                """\
type: gene
location: [36118:56153](-)
qualifiers:
    Key: gene, Value: ['EG:BACR25B3.1']
""",
                -1,
            ),
            (
                """\
type: CDS
location: join{[56030:56153](-), [52848:52966](-), [50826:50937](-), [50318:50441](-), [49970:50102](-), [49762:49876](-), [49436:49562](-), [48410:48878](-), [47682:47831](-), [47221:47315](-), [46517:46688](-), [45975:46125](-), [45660:45793](-), [45147:45233](-), [44811:44928](-), [44240:44438](-), [43603:43837](-), [42750:42876](-), [42187:42415](-), [41854:42085](-), [41545:41620](-), [40680:40814](-), [40518:40612](-), [40344:40434](-), [39655:40042](-), [37280:39517](-), [36118:37213](-)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6946671']
    Key: gene, Value: ['EG:BACR25B3.1']
    Key: note, Value: ["/prediction=(method:''genscan'', version:''1.0''); /prediction=(method:''genefinder'', version:''084''); /match=(desc:''LOW-DENSITY LIPOPROTEIN RECEPTOR-RELATED PROTEIN PRECURSOR (LRP)'', species:''Caenorhabditis elegans'', ranges:(query:50831..50941, target:SWISS-PROT::Q04833:1221..1185, score:''95.00''), (query:50840..51025, target:SWISS-PROT::Q04833:2865..2804, score:''102.00''), (query:50828..50935, target:SWISS-PROT::Q04833:3788..3753, score:''119.00''), (query:50323..50394, target:SWISS-PROT::Q04833:3706..3683, score:''77.00''), (query:50326..50433, target:SWISS-PROT::Q04833:1263..1228, score:''120.00''), (query:49948..50079, target:SWISS-PROT::Q04833:2917..2874, score:''88.00''), (query:49432..49587, target:SWISS-PROT::Q04833:4085..4034, score:''102.00''), (query:49429..49560, target:SWISS-PROT::Q04833:3915..3872, score:''97.00''), (query:48622..48720, target:SWISS-PROT::Q04833:1302..1270, score:''99.00''), (query:47698..47799, target:SWISS-PROT::Q04833:3996..3963, score:''88.00''), (query:47686..47775, target:SWISS-PROT::Q04833:3835..3806, score:''59.00''), (query:47692..47787, target:SWISS-PROT::Q04833:4041..4010, score:''83.00''), (query:47229..47315, target:SWISS-PROT::Q04833:3742..3714, score:''88.00''), (query:47220..47312, target:SWISS-PROT::Q04833:3829..3799, score:''67.00''), (query:47232..47318, target:SWISS-PROT::Q04833:3866..3838, score:''78.00''), (query:46552..46656, target:SWISS-PROT::Q04833:1344..1310, score:''95.00''), (query:46543..46650, target:SWISS-PROT::Q04833:3951..3916, score:''98.00''), (query:45983..46129, target:SWISS-PROT::Q04833:2870..2822, score:''82.00''), (query:45971..46096, target:SWISS-PROT::Q04833:4089..4048, score:''82.00''), (query:45678..45764, target:SWISS-PROT::Q04833:3666..3638, score:''80.00''), (query:45128..45238, target:SWISS-PROT::Q04833:94..58, score:''100.00''), (query:45158..45268, target:SWISS-PROT::Q04833:3990..3954, score:''80.00''), (query:44263..44379, target:SWISS-PROT::Q04833:85..47, score:''77.00''), (query:44251..44367, target:SWISS-PROT::Q04833:3995..3957, score:''100.00''), (query:43605..43688, target:SWISS-PROT::Q04833:2994..2967, score:''84.00''), (query:42764..42877, target:SWISS-PROT::Q04833:2951..2914, score:''77.00''), (query:42180..42377, target:SWISS-PROT::Q04833:260..195, score:''148.00''), (query:42234..42419, target:SWISS-PROT::Q04833:3199..3138, score:''106.00''), (query:39807..40013, target:SWISS-PROT::Q04833:2901..2833, score:''167.00''), (query:39645..39857, target:SWISS-PROT::Q04833:3138..3068, score:''151.00''), (query:39846..40046, target:SWISS-PROT::Q04833:3241..3175, score:''132.00''), (query:39654..39866, target:SWISS-PROT::Q04833:3913..3843, score:''201.00'')), method:''blastx'', version:''1.4.9''); /match=(desc:''LOW-DENSITY LIPOPROTEIN RECEPTOR-RELATED PROTEIN 2 PRECURSOR (MEGALIN) (GLYCOPROTEIN 330)'', species:''Homo sapiens (Human)'', ranges:(query:50834..50935, target:SWISS-PROT::P98164:2733..2700, score:''99.00''), (query:50840..50947, target:SWISS-PROT::P98164:3063..3028, score:''94.00''), (query:50831..50926, target:SWISS-PROT::P98164:3918..3887, score:''102.00''), (query:50326..50433, target:SWISS-PROT::P98164:1222..1187, score:''107.00''), (query:50302..50394, target:SWISS-PROT::P98164:3762..3732, score:''91.00''), (query:49773..49904, target:SWISS-PROT::P98164:2939..2896, score:''90.00''), (query:49438..49578, target:SWISS-PROT::P98164:217..171, score:''116.00''), (query:49429..49545, target:SWISS-PROT::P98164:3796..3758, score:''108.00''), (query:48622..48720, target:SWISS-PROT::P98164:3544..3512, score:''94.00''), (query:48595..48708, target:SWISS-PROT::P98164:3720..3683, score:''86.00''), (query:47701..47814, target:SWISS-PROT::P98164:2817..2780, score:''90.00''), (query:47692..47799, target:SWISS-PROT::P98164:3674..3639, score:''60.00''), (query:47217..47366, target:SWISS-PROT::P98164:3716..3667, score:''96.00''), (query:46543..46647, target:SWISS-PROT::P98164:1101..1067, score:''107.00''), (query:46552..46656, target:SWISS-PROT::P98164:3873..3839, score:''84.00''), (query:45989..46126, target:SWISS-PROT::P98164:3832..3787, score:''98.00''), (query:45149..45274, target:SWISS-PROT::P98164:2775..2734, score:''99.00''), (query:44780..44893, target:SWISS-PROT::P98164:268..231, score:''76.00''), (query:44813..44905, target:SWISS-PROT::P98164:1223..1193, score:''73.00''), (query:44251..44361, target:SWISS-PROT::P98164:3630..3594, score:''119.00''), (query:43602..43700, target:SWISS-PROT::P98164:179..147, score:''97.00''), (query:43674..43781, target:SWISS-PROT::P98164:191..156, score:''90.00''), (query:43584..43685, target:SWISS-PROT::P98164:1107..1074, score:''89.00''), (query:42758..42865, target:SWISS-PROT::P98164:1264..1229, score:''79.00''), (query:42204..42413, target:SWISS-PROT::P98164:2810..2741, score:''136.00''), (query:42189..42377, target:SWISS-PROT::P98164:3027..2965, score:''125.00''), (query:42186..42293, target:SWISS-PROT::P98164:3110..3075, score:''109.00''), (query:42198..42389, target:SWISS-PROT::P98164:3584..3521, score:''137.00''), (query:42309..42422, target:SWISS-PROT::P98164:3793..3756, score:''95.00''), (query:39654..39791, target:SWISS-PROT::P98164:63..18, score:''132.00''), (query:39786..40049, target:SWISS-PROT::P98164:1183..1096, score:''230.00''), (query:39657..39890, target:SWISS-PROT::P98164:3109..3032, score:''200.00''), (query:39780..39983, target:SWISS-PROT::P98164:3756..3689, score:''194.00''), (query:39618..39761, target:SWISS-PROT::P98164:3845..3798, score:''105.00''), (query:39651..39779, target:SWISS-PROT::P98164:3964..3922, score:''128.00'')), method:''blastx'', version:''1.4.9''); /match=(desc:''GM06086.5prime GM Drosophila melanogaster ovary BlueScript Drosophila melanogaster cDNA clone GM06086 5prime, mRNA sequence'', species:''Drosophila melanogaster (fruit fly)'', ranges:(query:50852..51290, target:EMBL::AA802674:672..234, score:''2195.00'')), method:''blastn'', version:''1.4.9''); /match=(desc:''SD04592.5prime SD Drosophila melanogaster Schneider L2 cell culture pOT2 Drosophila melanogaster cDNA clone SD04592 5prime, mRNA sequence'', species:''Drosophila melanogaster (fruit fly)'', ranges:(query:37280..37708, target:EMBL::AI532939:429..1, score:''2136.00''), (query:37097..37217, target:EMBL::AI532939:545..425, score:''569.00'')), method:''blastn'', version:''1.4.9''); /match=(desc:''GH03622.5prime GH Drosophila melanogaster head pOT2 Drosophila melanogaster cDNA clone GH03622 5prime, mRNA sequence'', species:''Drosophila melanogaster (fruit fly)'', ranges:(query:36446..37075, target:EMBL::AI063674:1..630, score:''3150.00'')), method:''blastn'', version:''1.4.9''); EST embl|AA802674|AA802674 comes from the 5' UTR"]
    Key: protein_id, Value: ['CAB72286.1']
    Key: translation, Value: ['MLLLQLLLQLLLLGKLLLGKTPPTVFGFRLLFAAFRFPLSLHFPHRMHDHFFVRGDTHSCGWKNSTTFTIRISAIYRYLNQCQANEFRCNNGDCIDARKRCNNVSDCSEGEDENEECPAACSGMEYQCRDGTRCISVSQQCDGHSDCSDGDDEEHCDGIVPKLRYTCPKGKFTCRDLSCISIVHRCDGRADCPNDRSDEEGCPCLYDKWQCDDGTCIAKELLCNGNIDCPEDISDERYCEGGYDSEECRFDEFHCGTGECIPMRQVCDNIYDCNDYSDEVNCVEGEEEDRVGIPIGHQPWRPASKHDDWLHEMDTSEYQVYQPSNVYEKANSQNPCASNQFRCTTSNVCIPLHLRCDGFYHCNDMSDEKSCEQYQRHTTTRRPLTLATPTSRITTQGPGLLERRNTTTATEASRWPWATKTTTIATTTSNPITTVGVANSPPQTCLENIEFACHNRDCISIESVCDGIPDCGRNEDEDDALCKCSGDKYKCQRGGGCIPKSQVCDGKPQCHDRSDESACHLHGRLNKTRLGVKCLESQYQCGDGSCISGYKRCNGIHDCADASDEYNCIYDYEDTYDTDPNNNPLNECDILEFECDYSQCLPLEKKCDGYADCEDMSDELECQSYTDHCLESEFECDSYCLPRDQLCNGIPNCQDGSDERNCTFCREDAYLCNTGECVADNQRCNGIADCADGSDERHCARIYCPPNKLACNGTCVSRRIKCDGIRDCLDGYDEMYCPETNNHYPTQNVNVIRPKLGPNPIPKSCRPHEWQCANLECIDSSLQCNEIKDCSDGSDEELSVCFGTATTRLKPSDCSPEQFYCDESCYNRSVRCNGHVDCSDGSDEVGCSLPCPQHQCPSGRCYTESERCDRHRHCEDGSDEANCTAILCKDNEFLCFDRQFCINATQQCDGYYDCRDFSDEQNCIGCYANQFRCNNGDCVSGSAPCNGYSECSDHSDELNCGGTQECLPNQFRCNSGQCVSSSVRCNGRTDCQDSSDEQNCGHRHTEVSQGLETTGVFTTSTTSTTAMTPLRIICPPTSFKCENGPCISLGLKCNGRVDCPYDGSDEADCGQISNDIDPADSNDRRPNQLNLKTYPDSQIIKESREVIFRCRDEGPARAKVKWSRPGGRPLPPGFTDRNGRLEIPNIRVEDAGTYVCEAVGYASYIPGQQVTVNLNVERSWGENKYEEIRSNRIRYGTVPHIDLEFFGLDNDVGSRPESACTEYQATCMNGECIDKSSICDGNPDCSDASDEQSCSLGLKCQPNQFMCSNSKCVDRTWRCDGENDCGDNSDETSCDPEPSGAPCRYNEFQCRSGHCIPKSFQCDNVPDCTDGTDEVGCMAPLPIRPPPQSVSLLEYEVLELTCVATGTPTPTIVWRLNWGHVPDKCESKSYGGTGTLRCPDMRPQDSGAYSCEIINTRGTHFVNPDTIVTVRPVRTDVCEAGFFNMLARKAEECVQCFCFGVAKACDSANLFTYAIHPPILSHRVVSVELSPLRQIVINEAAPGQDLLTLLHGVQFRATNVHFSGRETPYLALPADYMGNQLKSYGGNLRYEVNYRGSGRPVNGPDVIITGNRFTLTYRVRTQPGQNNRVSIPFVPGGWQKPDGRKASREEIMMILANVDNILIRLGYLDSTAREVDLINIALDSAGTADKGLGSASLVEKCQCPPGYVGDSCESCASGYVRQPGGPWLGHCVPFIPDSCPSGTYGDPRRGVPCKECPCPLTGSNNFASGCQQSPDGDVVCRCNEGYTGRRCEQCAAGYQGNPLAAGGICRRIPDTSCNVDGTYSVHSNGTCQCKDSVIGEQCDTCKSKSFHLNSFTYTGCIECFCSGVGLDCDSSTWYRDQVTSTFGRSRVDHGFVLVTNYMQPTPDTVPVSMAAEPNALSFIGSADQSGNTLYWSLPAAFLGNKLSSYGGKLTYTLSYSPLPNGIMSRNSAPDVVIKSGEDLRLIHYRKSQVVPSVANTYSVEIKESAWQRGDEVVANREHVLMALSDITAIYIKATYTTSTKEASLRQVTLDVATPTNLGTPRAVEVEQCRCPEGYLGLSCEQCAPGYARDPEGGIYLGLCRPCECNGHSKYCNSDTGDCEECSDNTEGPSCERCAAGYVGDATRGTIYDCQPDEGYPIPSPPAPGNQTLECTAYCQIEGIYDCRGNECLCKRNVIGDQCDQCRPGTYGLSAQNQDGCKECYCSGLASQCRSAALYRQLIPVDFILNAPLITDESGAVQDTENLIPDISRNMYTYTHTSYLPKYWSLRGSVLGNQLFSYGGRLSYSLIVESYGNYERGHDIVLIGNGLKLIWSRPDGNENQEEYNVRLHEDEQWTRQDRESARPASRSDFMTVLSDLQHILIRATPRVPTQSTSIGNVILESAVTTRTPGATHASDIELCQCPSGYVGTSCESCAPLHYRDASGSCSLCPCDVSNTESCDLVSGGYVECRCKARWKGDRCREIGE']
""",
                -1,
            ),
            (
                """\
type: gene
location: [70719:75241](-)
qualifiers:
    Key: gene, Value: ['EG:BACR25B3.2']
""",
                -1,
            ),
            (
                """\
type: CDS
location: join{[75216:75241](-), [73085:73559](-), [72838:73016](-), [72604:72768](-), [71423:71621](-), [70719:70988](-)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6946672']
    Key: gene, Value: ['EG:BACR25B3.2']
    Key: note, Value: ["/prediction=(method:''genefinder'', version:''084'', score:''41.82''); /prediction=(method:''genscan'', version:''1.0'')"]
    Key: protein_id, Value: ['CAB72287.1']
    Key: translation, Value: ['MANSKVVAHDESLQGINDSEWQLMGDDIDDGLLDDVDETLKPMETKSEEEDLPTGNWFSQSVHRVRRSINRLFGSDDNQERGRRQQRERSQRNRDAINRQKELRRRQKEDHNRWKQMRMERQLEKQRLVKRTNHVVFNRATDPRKRASDLYDENEASGYHEEDTTLYRTYFVVNEPYDNEYRDRESVQFQNLQKLLDDDLRNFFHSNYEGNDDEEQEIRSTLERVEPTNDNFKIRVQLRIELPTSVNDFGSKLQQQLNVYNRIENLSAATDGVFSFTESSDIEEEAIDVTLPQEEVEGSGSDDSSCRGDATFTCPRSGKTICDEMRCDREIQCPDGEDEEYCNYPNVCTEDQFKCDDKCLELKKRCDGSIDCLDQTDEAGCINAPEPEPEPEPEPEPEPESEPEAEPEPEPEPEPESEPEQEPEPQVPEANGKFY']
""",
                -1,
            ),
            (
                """\
type: gene
location: [121866:127124](+)
qualifiers:
    Key: gene, Value: ['EG:BACR25B3.3']
""",
                1,
            ),
            (
                """\
type: CDS
location: join{[121866:122046](+), [122173:122630](+), [123671:123823](+), [124062:124320](+), [124391:124688](+), [124754:125018](+), [125093:125254](+), [125316:125576](+), [126792:127124](+)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6946673']
    Key: gene, Value: ['EG:BACR25B3.3']
    Key: note, Value: ["/prediction=(method:''genscan'', version:''1.0'', score:''174.91''); /prediction=(method:''genefinder'', version:''084''); /match=(desc:''PROBABLE G PROTEIN-COUPLED RECEPTOR C13B9.4 IN CHROMOSOME III'', species:''Caenorhabditis elegans'', ranges:(query:123671..123775, target:SWISS-PROT::Q09460:107..141, score:''80.00''), (query:123743..123829, target:SWISS-PROT::Q09460:235..263, score:''72.00''), (query:124072..124332, target:SWISS-PROT::Q09460:265..351, score:''161.00''), (query:124392..124691, target:SWISS-PROT::Q09460:349..448, score:''206.00''), (query:124755..124958, target:SWISS-PROT::Q09460:448..515, score:''123.00''), (query:124764..125027, target:SWISS-PROT::Q09460:454..541, score:''108.00'')), method:''blastx'', version:''1.4.9''); /match=(desc:''CALCITONIN RECEPTOR PRECURSOR (CT-R)'', species:''Sus scrofa (Pig)'', ranges:(query:124165..124236, target:SWISS-PROT::P25117:191..214, score:''54.00''), (query:124392..124580, target:SWISS-PROT::P25117:233..295, score:''118.00''), (query:124725..124886, target:SWISS-PROT::P25117:318..371, score:''127.00'')), method:''blastx'', version:''1.4.9'')"]
    Key: protein_id, Value: ['CAB72288.1']
    Key: translation, Value: ['MGAGNRKSETKTKTEAEIEIEMERDQFSIAANACMSMGPMLISKDKAPCSGGRVRHADSLHIYYAVDGKMTLLSNILDCGGCISAQRFTRLLRQSGSSGPSPSAPTAGTFESKSMLEPTSSHSLATGRVPLLHDFDASTTESPGTYVLDGVARVAQLALEPTVMDALPDSDTEQVLGNLNSSAPWNLTLASAAATNFENCSALFVNYTLPQTEFAIRKCELDGRWGSRPNATEVNPPGWTDYGPCYKPEIIRLMQQMGSKDFDAYIDIARRTRTLEIVGLCLSLFALIVSLLIFCTFRSLRNNRTKIHKNLFVAMVLQVIIRLTLYLDQFRRGNKEAATNTSLSVIENTPYLCEASYVLLEYARTAMFMWMFIEGLYLHNMVTVAVFQGSFPLKFFSRLGWCVPILMTTVWARCTVMYMDTSLGECLWNYNLTPYYWILEGPRLAVILLNFCFLVNIIRVLVMKLRQSQASDIEQTRKAVRAAIVLLPLLGITNLLHQLAPLKTATNFAVWSYGTHFLTSFQGFFIALIYCFLNGEVRAVLLKSLATQLSVRGHPEWAPKRASMYSGAYNTAPDTDAVQPAGDPSATGKRISPPNKRLNGRKPSSASIVMIHEPQQRQRLMPRLQNKAREKGKDRVEKTDAEAEPDPTISHIHSKEAGSARSRTRGSKWIMGICFRGQMCDAGLAKDAANIHDVANAADVDACSGSNNNYHNINNNNGSQNNNSIHCNHRDDDKVKGESQSDFKEPSNTNAESLVHLALFTAHTSNTQNNTHRNTIFTPIRRRNCS']
""",
                1,
            ),
            (
                """\
type: gene
location: [128488:129414](-)
qualifiers:
    Key: gene, Value: ['EG:BACR25B3.4']
""",
                -1,
            ),
            (
                """\
type: CDS
location: join{[129373:129414](-), [129195:129313](-), [128776:129140](-), [128488:128715](-)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6946674']
    Key: gene, Value: ['EG:BACR25B3.4']
    Key: note, Value: ["/prediction=(method:''genefinder'', version:''084'', score:''61.35''); /prediction=(method:''genscan'', version:''1.0''); /match=(desc:''VACUOLAR PROTON-ATPASE SUBUNIT D'', species:''Oryctolagus cuniculus (Rabbit)'', ranges:(query:129190..129324, target:SPTREMBL::O97755:55..11, score:''130.00''), (query:128778..129176, target:SPTREMBL::O97755:174..42, score:''472.00''), (query:128546..128716, target:SPTREMBL::O97755:231..175, score:''169.00'')), method:''blastx'', version:''1.4.9''); /match=(desc:''VACUOLAR ATP SYNTHASE SUBUNIT D (EC 3.6.1.34) (V-ATPASE D SUBUNIT) (V- ATPASE 28 KD ACCESSORY PROTEIN)'', species:''Bos taurus (Bovine)'', ranges:(query:129190..129324, target:SWISS-PROT::P39942:55..11, score:''130.00''), (query:128778..129176, target:SWISS-PROT::P39942:174..42, score:''471.00''), (query:128546..128716, target:SWISS-PROT::P39942:231..175, score:''173.00'')), method:''blastx'', version:''1.4.9''); /match=(desc:''GH28048.5prime GH Drosophila melanogaster head pOT2 Drosophila melanogaster cDNA clone GH28048 5prime, mRNA sequence'', species:''Drosophila melanogaster (fruit fly)'', ranges:(query:129196..129317, target:EMBL::AI517334:233..112, score:''412.00''), (query:128777..129145, target:EMBL::AI517334:597..229, score:''1251.00'')), method:''blastn'', version:''1.4.9''); /match=(desc:''GH07112.5prime GH Drosophila melanogaster head pOT2 Drosophila melanogaster cDNA clone GH07112 5prime, mRNA sequence'', species:''Drosophila melanogaster (fruit fly)'', ranges:(query:129196..129317, target:EMBL::AI108302:223..102, score:''412.00''), (query:128777..129145, target:EMBL::AI108302:587..219, score:''1251.00''), (query:128636..128716, target:EMBL::AI108302:667..587, score:''243.00'')), method:''blastn'', version:''1.4.9'')"]
    Key: protein_id, Value: ['CAB72289.1']
    Key: translation, Value: ['MAAKDRLPIFPSRGAQTLMKSRLAGATKGHGLLKKKADALQMRFRLILGKIIETKTLMGQVMKEAAFSLAEVKFTTGDINQIVLQNVTKAQIKIRTKKDNVAGVTLPIFEPYTDGVDTYELAGLARGGQQLAKLKKNYQSAVRLLVQLASLQTSFVTLDDVIKVTNRRVNAIEHVIIPRINRTIEYIISELDELEREEFYRLKKIQDKKREARKASDKLRAEQRLLGQMAEAQEVQNILDEDGDEDLLF']
""",
                -1,
            ),
            (
                """\
type: gene
location: [132239:132926](+)
qualifiers:
    Key: gene, Value: ['EG:BACR25B3.5']
""",
                1,
            ),
            (
                """\
type: CDS
location: [132239:132926](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6946675']
    Key: gene, Value: ['EG:BACR25B3.5']
    Key: note, Value: ["/prediction=(method:''genefinder'', version:''084'', score:''48.06''); /prediction=(method:''genscan'', version:''1.0'', score:''132.90''); /match=(desc:''N-ACETYLTRANSFERASE'', species:''Drosophila melanogaster (Fruit fly)'', ranges:(query:132249..132326, target:SPTREMBL::Q94521:60..85, score:''64.00''), (query:132600..132842, target:SPTREMBL::Q94521:171..251, score:''105.00'')), method:''blastx'', version:''1.4.9''); EST embl|AI063093|AI063093 comes from the 3' UTR"]
    Key: protein_id, Value: ['CAB72290.1']
    Key: translation, Value: ['MEYKMIAPEHSEQVMEHLRRNFFADEPLNKAAGLCQNGSSCPALEAHCAEAIQHRMSVMAVDAKEKDTLKIVGVVLNGILKPGDTAKALSKLDCNDDADFRKIFDLLHRHNLKHNLFEHFDVDCMFDVRILSVDSCYRGQGIANELVKRSVAVAKKNGFRLLKADATGIFSQKIFRSHGFEVFSEQPYSKYTDENGKVILPVEAPHIKLQQLYKAICADDQDEKKQSL']
""",
                1,
            ),
            (
                """\
type: gene
location: [133491:134407](-)
qualifiers:
    Key: gene, Value: ['EG:BACR25B3.6']
""",
                -1,
            ),
            (
                """\
type: CDS
location: join{[134197:134407](-), [133866:134135](-), [133662:133748](-), [133491:133595](-)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6946676']
    Key: gene, Value: ['EG:BACR25B3.6']
    Key: note, Value: ["/prediction=(method:''genscan'', version:''1.0'', score:''119.22''); /prediction=(method:''genefinder'', version:''084''); /match=(desc:''LD41675.5prime LD Drosophila melanogaster embryo pOT2 Drosophila melanogaster cDNA clone LD41675 5prime, mRNA sequence'', species:''Drosophila melanogaster (fruit fly)'', ranges:(query:134192..134531, target:EMBL::AI515958:340..1, score:''1691.00''), (query:133879..134139, target:EMBL::AI515958:591..331, score:''1305.00'')), method:''blastn'', version:''1.4.9'')"]
    Key: protein_id, Value: ['CAB72291.1']
    Key: translation, Value: ['MNGLPPSKHYNLTHYQQRYNWDCGLSCIIMILSAQQREQLLGNFDAVCGEEGFGSSTWTIDLCYLLMRYQVRHEYFTQTLGIDPNYAQHTYYSKIIDKDERRVTRKFKDARAHGLRVEQRTVDMEVILRHLARHGPVILLTNASLLTCEVCKRNVLEKFGYAGHYVVLCGYDMAAQKLFYHNPEVHDGHICRCLIESMDTARRAYGTDEDIIFIYEKKETRE']
""",
                -1,
            ),
            (
                """\
type: gene
location: [135478:136829](+)
qualifiers:
    Key: gene, Value: ['EG:BACR25B3.7']
""",
                1,
            ),
            (
                """\
type: CDS
location: join{[135478:135749](+), [135960:136586](+), [136640:136829](+)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6946677']
    Key: gene, Value: ['EG:BACR25B3.7']
    Key: note, Value: ["/prediction=(method:''genefinder'', version:''084'', score:''66.07''); /prediction=(method:''genscan'', version:''1.0'', score:''145.64''); /match=(desc:''HYPOTHETICAL 40.4 KD TRP-ASP REPEATS CONTAINING PROTEIN C14B1.4 IN CHROMOSOME III'', species:''Caenorhabditis elegans'', ranges:(query:135548..135748, target:SWISS-PROT::Q17963:39..105, score:''120.00''), (query:135957..136586, target:SWISS-PROT::Q17963:105..314, score:''899.00''), (query:136641..136823, target:SWISS-PROT::Q17963:315..375, score:''219.00'')), method:''blastx'', version:''1.4.9''); /match=(desc:''LD30385.5prime LD Drosophila melanogaster embryo pOT2 Drosophila melanogaster cDNA clone LD30385 5prime, mRNA sequence'', species:''Drosophila melanogaster (fruit fly)'', ranges:(query:135288..135749, target:EMBL::AA950546:102..563, score:''2301.00''), (query:135956..136047, target:EMBL::AA950546:559..650, score:''442.00'')), method:''blastn'', version:''1.4.9''); /match=(desc:''LD10938.5prime LD Drosophila melanogaster embryo BlueScript Drosophila melanogaster cDNA clone LD10938 5prime, mRNA sequence'', species:''Drosophila melanogaster (fruit fly)'', ranges:(query:136108..136288, target:EMBL::AA392005:776..596, score:''212.00'')), method:''blastn'', version:''1.4.9'')"]
    Key: protein_id, Value: ['CAB72292.1']
    Key: translation, Value: ['MVPIGAVHGGHPGVVHPPQQPLPTAPSGPNSLQPNSVGQPGATTSSNSSASNKSSLSVKPNYTLKFTLAGHTKAVSAVKFSPNGEWLASSSADKLIKIWGAYDGKFEKTISGHKLGISDVAWSSDSRLLVSGSDDKTLKVWELSTGKSLKTLKGHSNYVFCCNFNPQSNLIVSGSFDESVRIWDVRTGKCLKTLPAHSDPVSAVHFNRDGSLIVSSSYDGLCRIWDTASGQCLKTLIDDDNPPVSFVKFSPNGKYILAATLDNTLKLWDYSKGKCLKTYTGHKNEKYCIFANFSVTGGKWIVSGSEDNMVYIWNLQSKEVVQKLQGHTDTVLCTACHPTENIIASAALENDKTIKLWKSDT']
""",
                1,
            ),
            (
                """\
type: gene
location: [145402:147087](+)
qualifiers:
    Key: gene, Value: ['EG:BACR25B3.8']
""",
                1,
            ),
            (
                """\
type: CDS
location: join{[145402:146203](+), [146514:147087](+)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6946678']
    Key: gene, Value: ['EG:BACR25B3.8']
    Key: protein_id, Value: ['CAB72293.1']
    Key: translation, Value: ['MNSTTKHLLHCTLLITVIVTFEVFSGGIKIDENSFTLVDPWTEYGQLATVLLYLLRFLTLLTLPQVLFNFCGLVFYNAFPEKVVLKGSPLLAPFICIRVVTRGDFPDLVKTNVLRNMNTCLDTGLENFLIEVVTDKAVNLSQHRRIREIVVPKEYKTRTGALFKSRALQYCLEDNVNVLNDSDWIVHLDEETLLTENSVRGIINFVLDGKHPFGQGLITYANENVVNWLTTLADSFRVSDDMGKLRLQFKLFHKPLFSWKGSYVVTQVSAERSVSFDNGIDGSVAEDCFFAMRAFSQGYTFNFIEGEMYEKSPFTLLDFLQQRKRWLQGILLVVHSKMIPFKHKLLLGISVYSWVTMPLSTSNIIFAALYPIPCPNLVDFVCAFIAAINIYMYVFGVIKSFSLYRFGLFRFLACVLGAVCTIPVNVVIENVAVIWGLVGKKHKFYVVQKDVRVLETV']
""",
                1,
            ),
            (
                """\
type: gene
location: [148859:152785](-)
qualifiers:
    Key: gene, Value: ['EG:BACR25B3.9']
""",
                -1,
            ),
            (
                """\
type: CDS
location: join{[152105:152785](-), [151880:152032](-), [149545:151809](-), [148965:149462](-), [148859:148905](-)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6946679']
    Key: gene, Value: ['EG:BACR25B3.9']
    Key: note, Value: ["/prediction=(method:''genscan'', version:''1.0''); /prediction=(method:''genefinder'', version:''084''); /match=(desc:''HYPOTHETICAL 135.8 KD PROTEIN'', species:''Drosophila melanogaster (Fruit fly)'', ranges:(query:152096..152785, target:SPTREMBL::Q9XZ29:230..1, score:''1147.00''), (query:151882..152043, target:SPTREMBL::Q9XZ29:277..224, score:''250.00''), (query:149546..151816, target:SPTREMBL::Q9XZ29:1032..276, score:''3735.00''), (query:148953..149465, target:SPTREMBL::Q9XZ29:1202..1032, score:''890.00''), (query:148863..148907, target:SPTREMBL::Q9XZ29:1212..1198, score:''76.00'')), method:''blastx'', version:''1.4.9''); /match=(desc:''LD21815.5prime LD Drosophila melanogaster embryo pOT2 Drosophila melanogaster cDNA clone LD21815 5prime similar to L19117: Drosophila melanogaster (chromosome X 3A6-8) kinesin-like protein of 3A (klp3A) mRNA sequence'', species:''Drosophila melanogaster (fruit fly)'', ranges:(query:152482..152787, target:EMBL::AA816942:460..155, score:''1485.00''), (query:152401..152483, target:EMBL::AA816942:540..458, score:''397.00'')), method:''blastn'', version:''1.4.9'')"]
    Key: protein_id, Value: ['CAB72294.1']
    Key: translation, Value: ['MSSEDPSCVAVALRVRPLVQSELDRGCRIAVERSADGAPQVTVNRNESYTYNYVFDIDDSQKDLFETCVQAKVKKLLNGYNVTILAYGQTGSGKTYTMGTAFNGVLDDHVGVIPRAVHDIFTAIAEMQSEFRFAVTCSFVELYQEQFYDLFSSKTRDKATVDIREVKNRIIMPGLTELVVTSAQQVTDHLIRGSAGRAVAATAMNETSSRSHAIFTLTLVATKLDGKQSVTTSRFNLVDLAGSERCSKTLASGDRFKEGVNINKGLLALGNVINALGSGQAAGYIPYRQSKLTRLLQDSLGGNSITLMIACVSPADYNVAETLSTLRYADRALQIKNKPVVNLDPHAAEVNMLKDVIQKLRVELLSGGKMSSSLISAVGAAGLGAIPCEESLAGSMANAAEIQRLKEQVRTLQDRNRKLQQELHQSLLDLTEKEMRAHIAEQAHDKLRSHVSELKNKLDQREQAQFGNENTNGDNEMRDFSLLVNRVHVELQRTQEELESQGHESRQRLSSRSHTEGGESGGDEVHEMLHSHSEEYTNKQMNFAGELRNINRQLDLKQELHERIMRNFSRLDSDDEDVKLRLCNQKIDDLEAERRDLMDQLRNIKSKDISAKLAEERRKRLQLLEQEISDLRRKLITQANLLKIRDKEREKIQNLSTEIRTMKESKVKLIRAMRGESEKFRQWKMVREKELTQLKSKDRKMQSEIVRQQTLHSKQRQVLKRKCEEALAANKRLKDALERQASAQAQRHKYKDNGGSAAGSSNANAKTDSWVDRELEIILSLIDAEHSLEQLMEDRAVINNHYHLLQQEKTSDPAEAAEQARILASLEEELEMRNAQISDLQQKVCPTDLDSRIRSLAEGVQSLGESRTVSKQLLKTLVQQRRLQASSLNEQRTTLDELRAQLLDAQQQEDAASKRLRLLQSQHEEQMLAQQRAYEEKVSVLIRTANQRWAEARSPAEDQQRNQILEELLSSREALQQELDKLRAKNKSKSKAVKSEPQDLDDSFQIVDGNETVVLSDVSDDPDWVPSTSKSKRIQSDSRNVISPPEKQDANVTSLGNSSIQSLNSTSATEDGKRCKGCKCRTKCTTKRCGCLSGNNACSETCVCKSNCRNPLNLKDHASQCGDGDGQKDETEDADKSDDDGDDEPQTSKENAVKFVTPEAPGKVVASPKQTLQEPKAAATPLMNSNVVEDINGPKLAKMSGLAFDTPKRKFF']
""",
                -1,
            ),
            (
                """\
type: gene
location: join{[153489:154269](+), AL121804.2[40:610](+), AL121804.2[671:1487](+)}
qualifiers:
    Key: gene, Value: ['EG:BACR7C10.3']
""",
                1,
            ),
            (
                """\
type: CDS
location: join{[153489:154269](+), AL121804.2[40:610](+), AL121804.2[671:1487](+)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:6946680']
    Key: gene, Value: ['EG:BACR7C10.3']
    Key: protein_id, Value: ['CAB72295.1']
    Key: translation, Value: ['MEEEAPRFNVLEEAFNGNGNGCANVEATQSAILKVLTRVNRFQMRVRKHIEDNYTEFLPNNTSPDIFLEESGSLNREIHDMLENLGSEGLDALDEANVKMAGNGRQLREILLGLGVSEHVLRIDELFQCVEEAKATKDYLVLLDLVGRLRAFIYGDDSVDGDAQVATPEVRRIFKALECYETIKVKYHVQAYMLQQSLQERFDRLVQLQCKSFPTSRCVTLQVSRDQTQLQDIVQALFQEPYNPARLAEFLLDNCIEPVIMRPVMADYSEEADGGTYVRLSLSYATKEPSSAHVRPNYKQVLENLRLLLHTLAGINCSVSRDQHVFGIIGDHVKDKMLKLLVDECLIPAVPESTEEYQTSTLCEDVAQLEQLLVDSFIINPEQDRALGQFVEKYETYYRNRMYRRVLETAREIIQRDLQDMVLVAPNNHSAEVANDPFLFPRCMISKSAQDFVKLMDRILRQPTDKLGDQEADPIAGVISIMLHTYINEVPKVHRKLLESIPQQAVLFHNNCMFFTHWVAQHANKGIESLAALAKTLQATGQQHFRVQVDYQSSILMGIMQEFEFESTHTLGSGPLKLVRQCLRQLELLKNVWANVLPETVYNATFCELINTFVAELIRRVFTLRHISAQMACELSDLIDVVLQRAPTLFREPNEVVQVLSWLKLQQLKAMLNASLMEITELWGDGVGPLTASYKSDEIKHLIRALFQDTDWRAKAITQIV']
""",
                1,
            ),
        )
        dbxrefs = []
        self.perform_feature_parser_test(
            record,
            seq,
            id,
            name,
            description,
            annotations,
            references,
            features,
            dbxrefs,
        )

    def test_feature_parser_08(self):
        path = "GenBank/one_of.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.feat_parser)
            record = next(records)
        seq = "GAATTCAGATAGAATGTAGACAAGAGGGATGGTGAGGAAAACCTACGGCAAGCA...GGC"
        id = "U18266.1"
        name = "HSTMPO1"
        description = "Human thymopoietin (TMPO) gene, exon 1"
        annotations = {
            "accessions": ["U18266"],
            "data_file_division": "PRI",
            "date": "01-JUL-1995",
            "gi": "885676",
            "keywords": [""],
            "molecule_type": "DNA",
            "organism": "Homo sapiens",
            "segment": "1 of 6",
            "sequence_version": 1,
            "source": "human",
            "taxonomy": [
                "Eukaryota",
                "Metazoa",
                "Chordata",
                "Craniata",
                "Vertebrata",
                "Euteleostomi",
                "Mammalia",
                "Eutheria",
                "Primates",
                "Catarrhini",
                "Hominidae",
                "Homo",
            ],
        }
        references = [
            "location: [0:2509]\nauthors: Harris,C.A., Andryuk,P.J., Cline,S.W.,"
            " Siekierka,J.J. and Goldstein,G.\ntitle: Structure and mapping of the"
            " human thymopoietin (TMPO) gene and relationship of TMPO beta to rat"
            " lamin-associated polypeptide 2\njournal: Unpublished\nmedline id:"
            " \npubmed id: \ncomment: \n",
            "location: [0:2509]\nauthors: Harris,C.A.\ntitle: Direct"
            " Submission\njournal: Submitted (07-DEC-1994) Crafford A. Harris,"
            " Immunobiology Research Institute, Route 22 East, Annandale, NJ"
            " 08801-0999, USA\nmedline id: \npubmed id: \ncomment: \n",
        ]
        features = (
            (
                """\
type: source
location: [0:2509](+)
qualifiers:
    Key: chromosome, Value: ['12']
    Key: clone, Value: ['P1.516 (DMPC-HFFno.1B-0943F)']
    Key: clone_lib, Value: ['DuPont Merck Hum Fibroblast P1 Library no.1 Series B (compressed) (Genome Systems Inc)']
    Key: db_xref, Value: ['taxon:9606']
    Key: map, Value: ['12q22; 64% (% distance from centromere to telomere)']
    Key: organism, Value: ['Homo sapiens']
""",
                1,
            ),
            (
                """\
type: 5'UTR
location: [one-of(1887,1900):2200](+)
qualifiers:
    Key: gene, Value: ['TMPO']
""",
                1,
            ),
            (
                """\
type: gene
location: join{[1887:2509](+), U18267.1[0:270](+), U18268.1[0:309](+), U18270.1[0:6905](+), U18269.1[0:128](+), U18271.1[0:3234](+)}
qualifiers:
    Key: gene, Value: ['TMPO']
""",
                1,
            ),
            (
                """\
type: exon
location: [one-of(1887,1900):2479](+)
qualifiers:
    Key: gene, Value: ['TMPO']
    Key: number, Value: ['1']
""",
                1,
            ),
            (
                """\
type: CDS
location: join{[2200:2479](+), U18267.1[119:246](+), U18268.1[129:288](+), U18270.1[4690:4788](+), U18269.1[81:>128](+)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:885684']
    Key: gene, Value: ['TMPO']
    Key: product, Value: ['thymopoietin beta']
    Key: protein_id, Value: ['AAB60434.1']
    Key: translation, Value: ['MPEFLEDPSVLTKDKLKSELVANNVTLPAGEQRKDVYVQLYLQHLTARNRPPLPAGTNSKGPPDFSSDEEREPTPVLGSGAAAAGRSRAAVGRKATKKTDKPRQEDKDDLDVTELTNEDLLDQLVKYGVNPGPIVGTTRKLYEKKLLKLREQGTESRSSTPLPTISSSAENTRQNGSNDSDRYSDNEEDSKIELKLEKREPLKGRAKTPVTLKQRRVEHNQSYSQAGITETEWTSGS']
""",
                1,
            ),
            (
                """\
type: CDS
location: join{[2200:2479](+), U18267.1[119:246](+), U18268.1[129:288](+), U18270.1[38:1558](+)}
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:885683']
    Key: gene, Value: ['TMPO']
    Key: product, Value: ['thymopoietin alpha']
    Key: protein_id, Value: ['AAB60433.1']
    Key: translation, Value: ['MPEFLEDPSVLTKDKLKSELVANNVTLPAGEQRKDVYVQLYLQHLTARNRPPLPAGTNSKGPPDFSSDEEREPTPVLGSGAAAAGRSRAAVGRKATKKTDKPRQEDKDDLDVTELTNEDLLDQLVKYGVNPGPIVGTTRKLYEKKLLKLREQGTESRSSTPLPTISSSAENTRQNGSNDSDRYSDNEEGKKKEHKKVKSTRDIVPFSELGTTPSGGGFFQGISFPEISTRPPLGSTELQAAKKVHTSKGDLPREPLVATNLPGRGQLQKLASERNLFISCKSSHDRCLEKSSSSSSQPEHSAMLVSTAASPSLIKETTTGYYKDIVENICGREKSGIQPLCPERSHISDQSPLSSKRKALEESESSQLISPPLAQAIRDYVNSLLVQGGVGSLPGTSNSMPPLDVENIQKRIDQSKFQETEFLSPPRKVPRLSEKSVEERDSGSFVAFQNIPGSELMSSFAKTVVSHSLTTLGLEVAKQSQHDKIDASELSFPFHESILKVIEEEWQQVDRQLPSLACKYPVSSREATQILSVPKVDDEILGFISEATPLGGIQAASTESCNQQLDLALCRAYEAAASALQIATHTAFVAKAMQADISEAAQILSSDPSRTHQALGILSKTYDAASYICEAAFDEVKMAAHTMGNATVGRRYLWLKDCKINLASKNKLASTPFKGGTLFGGEVCKVIKKRGNKH']
""",
                1,
            ),
        )
        dbxrefs = []
        self.perform_feature_parser_test(
            record,
            seq,
            id,
            name,
            description,
            annotations,
            references,
            features,
            dbxrefs,
        )

    def test_feature_parser_09(self):
        path = "GenBank/NT_019265.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.feat_parser)
            record = next(records)
        seq = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN...NNN"
        id = "NT_019265.6"
        name = "NT_019265"
        description = "Homo sapiens chromosome 1 working draft sequence segment"
        annotations = {
            "accessions": ["NT_019265"],
            "comment": """\
GENOME ANNOTATION REFSEQ:  NCBI contigs are derived from assembled
genomic sequence data. They may include both draft and finished
sequence.
On Oct 16, 2001 this sequence version replaced gi:15294341.
COMPLETENESS: not full length.""",
            "contig": "join(AL391218.9:105173..108462,gap(100),complement(AL512330.12:1..182490),complement(AL590128.4:9034..81287),gap(100),AL591163.7:85799..94832,gap(100),AL591163.7:94933..113245,gap(100),AL591163.7:42173..44897,complement(AL590128.4:1..6208),AL591163.7:51307..52779,gap(100),AL591163.7:52880..85698,gap(100),AL591163.7:113346..126143,complement(AL159177.12:184729..186047),AL031447.4:1..112158,complement(AL159177.12:1..72671),complement(AL591866.12:23507..86371),AL031848.11:1..142965,AL031847.17:1..166418,AL035406.25:1..161651,complement(AL356261.20:94599..98345),complement(AC026968.3:54432..54579),gap(100),complement(AC062024.2:98529..107911),gap(100),AC062024.2:7713..11594,gap(100),complement(AL356261.20:1..94498),complement(AL356693.18:19988..70853),gap(100),AL356693.18:17351..19887,gap(100),complement(AL356693.18:3037..17250),gap(100),complement(AL356693.18:1..2936),gap(100),AC026968.3:675..2393,gap(100),AC026968.3:1..574,gap(100),AL356261.20:179029..182233)",
            "data_file_division": "CON",
            "date": "16-OCT-2001",
            "gi": "16156830",
            "keywords": [""],
            "molecule_type": "DNA",
            "organism": "Homo sapiens",
            "sequence_version": 6,
            "source": "human",
            "taxonomy": [
                "Eukaryota",
                "Metazoa",
                "Chordata",
                "Craniata",
                "Vertebrata",
                "Euteleostomi",
                "Mammalia",
                "Eutheria",
                "Primates",
                "Catarrhini",
                "Hominidae",
                "Homo",
            ],
        }
        references = [
            "location: [0:1250660]\nauthors: NCBI Annotation Project.\ntitle: Direct"
            " Submission\njournal: Submitted (11-OCT-2001) National Center for"
            " Biotechnology Information, NIH, Bethesda, MD 20894, USA\nmedline id:"
            " \npubmed id: \ncomment: \n"
        ]
        features = (
            (
                """\
type: source
location: [0:1250660](+)
qualifiers:
    Key: chromosome, Value: ['1']
    Key: db_xref, Value: ['taxon:9606']
    Key: organism, Value: ['Homo sapiens']
""",
                1,
            ),
            (
                """\
type: source
location: [0:3290](+)
qualifiers:
    Key: clone, Value: ['RP11-13G5']
    Key: db_xref, Value: ['taxon:9606']
    Key: note, Value: ['Accession AL391218 sequenced by The Sanger Centre']
    Key: organism, Value: ['Homo sapiens']
""",
                1,
            ),
            (
                """\
type: misc_feature
location: [215901:365470](+)
qualifiers:
    Key: note, Value: ['FISH-mapped clone']
    Key: standard_name, Value: ['RP11-242F24']
""",
                1,
            ),
            (
                """\
type: variation
location: [217507:217508](+)
qualifiers:
    Key: allele, Value: ['T', 'C']
    Key: db_xref, Value: ['dbSNP:811400']
""",
                1,
            ),
            (
                """\
type: mRNA
location: join{[342429:342515](+), [363170:363300](+), [365740:365814](+), [376397:376499](+), [390168:390297](+), [391256:391379](+), [392605:392679](+), [398229:398419](+), [399081:399167](+), [399533:399650](+), [405843:405913](+), [406703:406761](+), [406867:407010](+), [407961:408091](+), [408507:409092](+)}
qualifiers:
    Key: db_xref, Value: ['LocusID:55735']
    Key: gene, Value: ['FLJ10737']
    Key: product, Value: ['hypothetical protein FLJ10737']
    Key: transcript_id, Value: ['XM_057697.1']
""",
                1,
            ),
        )
        dbxrefs = []
        self.perform_feature_parser_test(
            record,
            seq,
            id,
            name,
            description,
            annotations,
            references,
            features,
            dbxrefs,
        )

    def test_feature_parser_10(self):
        path = "GenBank/origin_line.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.feat_parser)
            record = next(records)
        seq = "TTAATTAACTGTCTTCGATTGCGTTTAATTGACGGTTTTCGATTAAAAGCGGTA...CGC"
        id = "NC_002678.1"
        name = "NC_002678"
        description = "Mesorhizobium loti, complete genome (edited)"
        annotations = {
            "accessions": ["NC_002678"],
            "data_file_division": "BCT",
            "date": "28-MAR-2001",
            "gi": "13470324",
            "keywords": [""],
            "molecule_type": "DNA",
            "organism": "Mesorhizobium loti",
            "sequence_version": 1,
            "source": "Mesorhizobium loti",
            "taxonomy": [
                "Bacteria",
                "Proteobacteria",
                "alpha subdivision",
                "Rhizobiaceae group",
                "Phyllobacteriaceae",
                "Mesorhizobium",
            ],
            "topology": "circular",
        }
        references = [
            "authors: Kaneko,T., Nakamura,Y., Sato,S., Asamizu,E., Kato,T.,"
            " Sasamoto,S., Watanabe,A., Idesawa,K., Ishikawa,A., Kawashima,K.,"
            " Kimura,T., Kishida,Y., Kiyokawa,C., Kohara,M., Matsumoto,M., Matsuno,A.,"
            " Mochizuki,Y., Nakayama,S., Nakazaki,N., Shimpo,S., Sugimoto,M.,"
            " Takeuchi,C., Yamada,M. and Tabata,S.\ntitle: Complete genome structure of"
            " the nitrogen-fixing symbiotic bacterium Mesorhizobium loti\njournal: DNA"
            " Res. 7, 331-338 (2000)\nmedline id: \npubmed id: \ncomment: \n",
            "location: [0:180]\nauthors: Kaneko,T.\ntitle: Direct Submission\njournal:"
            " Submitted (05-DEC-2000) Takakazu Kaneko, Kazusa DNA Research Institute,"
            " The First Laboratory for Plant Gene Research; Yana 1532-3, Kisarazu,"
            " Chiba 292-0812, Japan (E-mail:kaneko@kazusa.or.jp,"
            " URL:http://www.kazusa.or.jp/rhizobase/, Tel:81-438-52-3935,"
            " Fax:81-438-52-3934)\nmedline id: \npubmed id: \ncomment: \n",
        ]
        features = (
            (
                """\
type: source
location: [0:180](+)
qualifiers:
    Key: db_xref, Value: ['taxon:381']
    Key: organism, Value: ['Mesorhizobium loti']
    Key: strain, Value: ['MAFF303099']
""",
                1,
            ),
            (
                """\
type: gene
location: [19:120](+)
qualifiers:
    Key: gene, Value: ['fake']
""",
                1,
            ),
        )
        dbxrefs = []
        self.perform_feature_parser_test(
            record,
            seq,
            id,
            name,
            description,
            annotations,
            references,
            features,
            dbxrefs,
        )

    def test_feature_parser_11(self):
        path = "GenBank/blank_seq.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.feat_parser)
            record = next(records)
        seq = "MEECWVTEIANGSKDGLDSNPMKDYMILSGPQKTAVAVLCTLLGLLSALENVAV...SDC"
        id = "NP_001832.1"
        name = "NP_001832"
        description = "cannabinoid receptor 2 (macrophage) [Homo sapiens]"
        annotations = {
            "accessions": ["NP_001832"],
            "comment": """\
REVIEWED REFSEQ: This record has been curated by NCBI staff. The
reference sequence was derived from X74328.1.
Summary: The cannabinoid delta-9-tetrahydrocannabinol is the
principal psychoactive ingredient of marijuana. The proteins
encoded by this gene and the cannabinoid receptor 1 (brain) (CNR1)
gene have the characteristics of a guanine nucleotide-binding
protein (G-protein)-coupled receptor for cannabinoids. They inhibit
adenylate cyclase activity in a dose-dependent, stereoselective,
and pertussis toxin-sensitive manner. These proteins have been
found to be involved in the cannabinoid-induced CNS effects
(including alterations in mood and cognition) experienced by users
of marijuana. The cannabinoid receptors are members of family 1 of
the G-protein-coupled receptors.""",
            "data_file_division": "PRI",
            "date": "18-DEC-2001",
            "db_source": "REFSEQ: accession NM_001841.1",
            "gi": "4502929",
            "keywords": [""],
            "molecule_type": "protein",
            "organism": "Homo sapiens",
            "pid": "g4502929",
            "sequence_version": 1,
            "source": "human",
            "taxonomy": [
                "Eukaryota",
                "Metazoa",
                "Chordata",
                "Craniata",
                "Vertebrata",
                "Euteleostomi",
                "Mammalia",
                "Eutheria",
                "Primates",
                "Catarrhini",
                "Hominidae",
                "Homo",
            ],
            "topology": "linear",
        }
        references = [
            "location: [0:360]\nauthors: Munro,S., Thomas,K.L. and Abu-Shaar,M.\ntitle:"
            " Molecular characterization of a peripheral receptor for"
            " cannabinoids\njournal: Nature 365 (6441), 61-65 (1993)\nmedline id:"
            " 93368659\npubmed id: 7689702\ncomment: \n",
            "location: [0:360]\nauthors: Galiegue,S., Mary,S., Marchand,J.,"
            " Dussossoy,D., Carriere,D., Carayon,P., Bouaboula,M., Shire,D., Le Fur,G."
            " and Casellas,P.\ntitle: Expression of central and peripheral cannabinoid"
            " receptors in human immune tissues and leukocyte subpopulations\njournal:"
            " Eur. J. Biochem. 232 (1), 54-61 (1995)\nmedline id: 96048028\npubmed id:"
            " 7556170\ncomment: \n",
            "location: [0:360]\nauthors: Shire,D., Calandra,B., Rinaldi-Carmona,M.,"
            " Oustric,D., Pessegue,B., Bonnin-Cabanne,O., Le Fur,G., Caput,D. and"
            " Ferrara,P.\ntitle: Molecular cloning, expression and function of the"
            " murine CB2 peripheral cannabinoid receptor\njournal: Biochim. Biophys."
            " Acta 1307 (2), 132-136 (1996)\nmedline id: 96283804\npubmed id:"
            " 8679694\ncomment: \n",
        ]
        features = (
            (
                """\
type: source
location: [0:360]
qualifiers:
    Key: chromosome, Value: ['1']
    Key: db_xref, Value: ['taxon:9606']
    Key: map, Value: ['1p36.11']
    Key: organism, Value: ['Homo sapiens']
""",
                None,
            ),
            (
                """\
type: Protein
location: [0:360]
qualifiers:
    Key: product, Value: ['cannabinoid receptor 2 (macrophage)']
""",
                None,
            ),
            (
                """\
type: Region
location: [49:299]
qualifiers:
    Key: db_xref, Value: ['CDD:pfam00001']
    Key: note, Value: ['7tm_1']
    Key: region_name, Value: ['7 transmembrane receptor (rhodopsin family)']
""",
                None,
            ),
            (
                """\
type: CDS
location: [0:360]
qualifiers:
    Key: coded_by, Value: ['NM_001841.1:127..1209']
    Key: db_xref, Value: ['LocusID:1269', 'MIM:605051']
    Key: gene, Value: ['CNR2']
    Key: pseudo, Value: ['']
""",
                None,
            ),
        )
        dbxrefs = []
        self.perform_feature_parser_test(
            record,
            seq,
            id,
            name,
            description,
            annotations,
            references,
            features,
            dbxrefs,
        )

    def test_feature_parser_12(self):
        path = "GenBank/dbsource_wrap.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.feat_parser)
            record = next(records)
        seq = "VKDGYIVDDRNCTYFCGRNAYCNEECTKLKGESGYCQWASPYGNACYCYKVPDH...RCN"
        id = "P01485"
        name = "SCX3_BUTOC"
        description = "Neurotoxin III"
        annotations = {
            "accessions": ["P01485"],
            "comment": """\
[FUNCTION] BINDS TO SODIUM CHANNELS AND INHIBITS THE INACTIVATION
OF THE ACTIVATED CHANNELS, THEREBY BLOCKING NEURONAL TRANSMISSION.
[SUBCELLULAR LOCATION] SECRETED.
[SIMILARITY] BELONGS TO THE ALPHA/BETA-SCORPION TOXIN FAMILY.
ALPHA-TOXIN SUBFAMILY.""",
            "data_file_division": "INV",
            "date": "16-OCT-2001",
            "db_source": (
                "swissprot: locus SCX3_BUTOC, accession P01485; class: standard."
                " created: Jul 21, 1986. sequence updated: Jul 21, 1986. annotation"
                " updated: Oct 16, 2001. xrefs: gi: gi: 69530 xrefs (non-sequence"
                " databases): HSSP P01484, InterPro IPR003614, InterPro IPR002061,"
                " InterPro IPR001219, Pfam PF00537, PRINTS PR00284, ProDom PD000908,"
                " SMART SM00505"
            ),
            "gi": "134354",
            "keywords": ["Neurotoxin", "Sodium channel inhibitor", "Amidation"],
            "molecule_type": "protein",
            "organism": "Buthus occitanus tunetanus",
            "pid": "g134354",
            "source": "Tunisian scorpion",
            "taxonomy": [
                "Eukaryota",
                "Metazoa",
                "Arthropoda",
                "Chelicerata",
                "Arachnida",
                "Scorpiones",
                "Buthoidea",
                "Buthidae",
                "Buthus",
            ],
            "topology": "linear",
        }
        references = [
            "location: [0:64]\nauthors: Vargas,O., Gregoire,J., Martin,M.-F., Bechis,G."
            " and Rochat,H.\ntitle: Neurotoxins from the venoms of two scorpions:"
            " Buthus occitanus tunetanus and Buthus occitanus mardochei\njournal:"
            " Toxicon 20, 79-79 (1982)\nmedline id: \npubmed id: \ncomment: SEQUENCE.\n"
        ]
        features = (
            (
                """\
type: source
location: [0:64]
qualifiers:
    Key: db_xref, Value: ['taxon:6871']
    Key: organism, Value: ['Buthus occitanus tunetanus']
""",
                None,
            ),
            (
                """\
type: Protein
location: [0:64]
qualifiers:
    Key: product, Value: ['Neurotoxin III']
""",
                None,
            ),
            (
                """\
type: Bond
location: bond{[11:12], [62:63]}
qualifiers:
    Key: bond_type, Value: ['disulfide']
    Key: note, Value: ['BY SIMILARITY.']
""",
                None,
            ),
            (
                """\
type: Bond
location: bond{[15:16], [35:36]}
qualifiers:
    Key: bond_type, Value: ['disulfide']
    Key: note, Value: ['BY SIMILARITY.']
""",
                None,
            ),
            (
                """\
type: Bond
location: bond{[21:22], [45:46]}
qualifiers:
    Key: bond_type, Value: ['disulfide']
    Key: note, Value: ['BY SIMILARITY.']
""",
                None,
            ),
            (
                """\
type: Bond
location: bond{[25:26], [47:48]}
qualifiers:
    Key: bond_type, Value: ['disulfide']
    Key: note, Value: ['BY SIMILARITY.']
""",
                None,
            ),
            (
                """\
type: Site
location: [63:64]
qualifiers:
    Key: site_type, Value: ['amidation']
""",
                None,
            ),
        )
        dbxrefs = []
        self.perform_feature_parser_test(
            record,
            seq,
            id,
            name,
            description,
            annotations,
            references,
            features,
            dbxrefs,
        )

    def test_feature_parser_13(self):
        path = "GenBank/gbvrl1_start.seq"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.feat_parser)
            record = next(records)
            seq = "ATGTCTGGCAACCAGTATACTGAGGAAGTTATGGAGGGAGTAAATTGGTTAAAG...TAA"
            id = "AB000048.1"
            name = "AB000048"
            description = (
                "Feline panleukopenia virus DNA for nonstructural protein 1,"
                " complete cds"
            )
            annotations = {
                "accessions": ["AB000048"],
                "data_file_division": "VRL",
                "date": "05-FEB-1999",
                "gi": "1769753",
                "keywords": ["nonstructural protein 1"],
                "molecule_type": "DNA",
                "organism": "Feline panleukopenia virus",
                "sequence_version": 1,
                "source": "Feline panleukopenia virus",
                "taxonomy": [
                    "Viruses",
                    "ssDNA viruses",
                    "Parvoviridae",
                    "Parvovirinae",
                    "Parvovirus",
                ],
                "topology": "linear",
            }
            references = [
                "location: [0:2007]\nauthors: Horiuchi,M.\ntitle: Evolutionary pattern"
                " of feline panleukopenia virus differs from that of canine"
                " parvovirus\njournal: Unpublished\nmedline id: \npubmed id:"
                " \ncomment: \n",
                "location: [0:2007]\nauthors: Horiuchi,M.\ntitle: Direct"
                " Submission\njournal: Submitted (22-DEC-1996) Motohiro Horiuchi,"
                " Obihiro University of Agriculture and Veterinary Medicine, Veterinary"
                " Public Health; Inada cho, Obihiro, Hokkaido 080, Japan"
                " (E-mail:horiuchi@obihiro.ac.jp, Tel:0155-49-5392,"
                " Fax:0155-49-5402)\nmedline id: \npubmed id: \ncomment: \n",
            ]
            features = (
                (
                    """\
type: source
location: [0:2007](+)
qualifiers:
    Key: db_xref, Value: ['taxon:10786']
    Key: isolate, Value: ['483']
    Key: lab_host, Value: ['Felis domesticus']
    Key: mol_type, Value: ['genomic DNA']
    Key: organism, Value: ['Feline panleukopenia virus']
""",
                    1,
                ),
                (
                    """\
type: CDS
location: [0:2007](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:1769754']
    Key: product, Value: ['nonstructural protein 1']
    Key: protein_id, Value: ['BAA19009.1']
    Key: translation, Value: ['MSGNQYTEEVMEGVNWLKKHAEDEAFSFVFKCDNVQLNGKDVRWNNYTKPIQNEELTSLIRGAQTAMDQTEEEEMDWESEVDSLAKKQVQTFDALIKKCLFEVFVSKNIEPNECVWFIQHEWGKDQGWHCHVLLHSKNLQQATGKWLRRQMNMYWSRWLVTLCSINLTPTEKIKLREIAEDSEWVTILTYRHKQTKKDYVKMVHFGNMIAYYFLTKKKIVHMTKESGYFLSTDSGWKFNFMKYQDRHTVSTLYTEQMKPETVETTVTTAQETKRGRIQTKKEVSIKCTLRDLVSKRVTSPEDWMMLQPDSYIEMMAQPGGENLLKNTLEICTLTLARTKTAFELILEKADNTKLTNFDLANSRTCQIFRMHGWNWIKVCHAIACVLNRQGGKRNTVLFHGPASTGKSIIAQAIAQAVGNVGCYNAANVNFPFNDCTNKNLIWVEEAGNFGQQVNQFKAICSGQTIRIDQKGKGSKQIEPTPVIMTTNENITIVRIGCEERPEHTQPIRDRMLNIKLVCKLPGDFGLVDKEEWPLICAWLVKHGYQSTMANYTHHWGKVPEWDENWAEPKIQEGINSPGCKDLETQAASNPQSQDHVLTPLTPDVVDLALEPWSTPDTPIAETANQQSNQLGVTHKDVQASPTWSEIEADLRAIFTSEQLEEDFRDDLD']
""",
                    1,
                ),
            )
            dbxrefs = []
            self.maxDiff = None
            self.perform_feature_parser_test(
                record,
                seq,
                id,
                name,
                description,
                annotations,
                references,
                features,
                dbxrefs,
            )
            record = next(records)
            seq = "ATGTCTGGCAACCAGTATACTGAGGAAGTTATGGAGGGAGTAAATTGGTTAAAG...TAA"
            id = "AB000049.1"
            name = "AB000049"
            description = (
                "Feline panleukopenia virus DNA for nonstructural protein 1,"
                " complete cds"
            )
            annotations = {
                "accessions": ["AB000049"],
                "data_file_division": "VRL",
                "date": "05-FEB-1999",
                "gi": "1769755",
                "keywords": ["nonstructural protein 1"],
                "molecule_type": "DNA",
                "organism": "Feline panleukopenia virus",
                "sequence_version": 1,
                "source": "Feline panleukopenia virus",
                "taxonomy": [
                    "Viruses",
                    "ssDNA viruses",
                    "Parvoviridae",
                    "Parvovirinae",
                    "Parvovirus",
                ],
                "topology": "linear",
            }
            references = [
                "location: [0:2007]\nauthors: Horiuchi,M.\ntitle: Evolutionary pattern"
                " of feline panleukopenia virus differs that of canine"
                " parvovirus\njournal: Unpublished\nmedline id: \npubmed id:"
                " \ncomment: \n",
                "location: [0:2007]\nauthors: Horiuchi,M.\ntitle: Direct"
                " Submission\njournal: Submitted (22-DEC-1996) Motohiro Horiuchi,"
                " Obihiro University of Agriculture and Veterinary Medicine, Veterinary"
                " Public Health; Inada cho, Obihiro, Hokkaido 080, Japan"
                " (E-mail:horiuchi@obihiro.ac.jp, Tel:0155-49-5392,"
                " Fax:0155-49-5402)\nmedline id: \npubmed id: \ncomment: \n",
            ]
            features = (
                (
                    """\
type: source
location: [0:2007](+)
qualifiers:
    Key: db_xref, Value: ['taxon:10786']
    Key: isolate, Value: ['94-1']
    Key: lab_host, Value: ['Felis domesticus']
    Key: mol_type, Value: ['genomic DNA']
    Key: organism, Value: ['Feline panleukopenia virus']
""",
                    1,
                ),
                (
                    """\
type: CDS
location: [0:2007](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:1769756']
    Key: product, Value: ['nonstructural protein 1']
    Key: protein_id, Value: ['BAA19010.1']
    Key: translation, Value: ['MSGNQYTEEVMEGVNWLKKHAEDEAFSFVFKCDNVQLNGKDVRWNNYTKPIQNEELTSLIRGAQTAMDQTEEEEMDWESEVDSLAKKQVQTFDALIKKCLFEVFVSKNIEPNECVWFIQHEWGKDQGWHCHVLLHSKNLQQATGKWLRRQMNMYWSRWLVTLCSINLTPTEKIKLREIAEDSEWVTILTYRHKQTKKDYVKMVHFGNMIAYYFLTKKKIVHMTKESGYFLSTDSGWKFNFMKYQDRHTVSTLYTEQMKPETVETTVTTAQETKRGRIQTKKEVSIKCTLRDLVSKRVTSPEDWMMLQPDSYIEMMAQPGGENLLKNTLEICTLTLARTKTAFELILEKADNTKLTNFDLANSRTCQIFRMHGWNWIKVCHAIACVLNRQGGKRNTVLFHGPASTGKSIIAQAIAQAVGNVGCYNAANVNFPFNDCTNKNLIWVEEAGNFGQQVNQFKAICSGQTIRIDQKGKGSKQIEPTPVIMTTNENITIVRIGCEERPEHTQPIRDRMLNIKLVCKLPGDFGLVDKEEWPLICAWLVKHGYQSTMANYTHHWGKVPEWDENWAEPKIQEGINSPGCKDLETQAASNPQSQDHVLTPLTPDVVDLALEPWSTPDTPIAETANQQSNQLGVTHKDVQASPTWSEIEADLRAIFTSEQLEEDFRDDLD']
""",
                    1,
                ),
            )
            dbxrefs = []
            self.perform_feature_parser_test(
                record,
                seq,
                id,
                name,
                description,
                annotations,
                references,
                features,
                dbxrefs,
            )
            record = next(records)
            seq = "ATGAGTGATGGAGCAGTTCAACCAGACGGTGGTCAACCTGCTGTCAGAAATGAA...TAA"
            id = "AB000050.1"
            name = "AB000050"
            description = (
                "Feline panleukopenia virus DNA for capsid protein 2, complete cds"
            )
            annotations = {
                "accessions": ["AB000050"],
                "data_file_division": "VRL",
                "date": "05-FEB-1999",
                "gi": "1769757",
                "keywords": ["capsid protein 2"],
                "molecule_type": "DNA",
                "organism": "Feline panleukopenia virus",
                "sequence_version": 1,
                "source": "Feline panleukopenia virus",
                "taxonomy": [
                    "Viruses",
                    "ssDNA viruses",
                    "Parvoviridae",
                    "Parvovirinae",
                    "Parvovirus",
                ],
                "topology": "linear",
            }
            references = [
                "location: [0:1755]\nauthors: Horiuchi,M.\ntitle: Evolutionary pattern"
                " of feline panleukopenia virus differs from that of canine"
                " parvovirus\njournal: Unpublished\nmedline id: \npubmed id:"
                " \ncomment: \n",
                "location: [0:1755]\nauthors: Horiuchi,M.\ntitle: Direct"
                " Submission\njournal: Submitted (22-DEC-1996) Motohiro Horiuchi,"
                " Obihiro University of Agriculture and Veterinary Medicine, Veterinary"
                " Public Health; Inada cho, Obihiro, Hokkaido 080, Japan"
                " (E-mail:horiuchi@obihiro.ac.jp, Tel:0155-49-5392,"
                " Fax:0155-49-5402)\nmedline id: \npubmed id: \ncomment: \n",
            ]
            features = (
                (
                    """\
type: source
location: [0:1755](+)
qualifiers:
    Key: db_xref, Value: ['taxon:10786']
    Key: isolate, Value: ['94-1']
    Key: lab_host, Value: ['Felis domesticus']
    Key: mol_type, Value: ['genomic DNA']
    Key: organism, Value: ['Feline panleukopenia virus']
""",
                    1,
                ),
                (
                    """\
type: CDS
location: [0:1755](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:1769758']
    Key: product, Value: ['capsid protein 2']
    Key: protein_id, Value: ['BAA19011.1']
    Key: translation, Value: ['MSDGAVQPDGGQPAVRNERATGSGNGSGGGGGGGSGGVGISTGTFNNQTEFKFLENGWVEITANSSRLVHLNMPESENYKRVVVNNMDKTAVKGNMALDDTHVQIVTPWSLVDANAWGVWFNPGDWQLIVNTMSELHLVSFEQEIFNVVLKTVSESATQPPTKVYNNDLTASLMVALDSNNTMPFTPAAMRSETLGFYPWKPTIPTPWRYYFQWDRTLIPSHTGTSGTPTNVYHGTDPDDVQFYTIENSVPVHLLRTGDEFATGTFFFDCKPCRLTHTWQTNRALGLPPFLNSLPQSEGATNFGDIGVQQDKRRGVTQMGNTDYITEATIMRPAEVGYSAPYYSFEASTQGPFKTPIAAGRGGAQTDENQAADGDPRYAFGRQHGQKTTTTGETPERFTYIAHQDTGRYPEGDWIQNINFNLPVTNDNVLLPTDPIGGKTGINYTNIFNTYGPLTALNNVPPVYPNGQIWDKEFDTDLKPRLHVNAPFVCQNNCPGQLFVKVAPNLTNEYDPDASANMSRIVTYSDFWWKGKLVFKAKLRASHTWNPIQQMSINVDNQFNYVPNNIGAMKIVYEKSQLAPRKLY']
""",
                    1,
                ),
            )
            dbxrefs = []
            self.perform_feature_parser_test(
                record,
                seq,
                id,
                name,
                description,
                annotations,
                references,
                features,
                dbxrefs,
            )

    def test_feature_parser_14(self):
        path = "GenBank/NC_005816.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.feat_parser)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", BiopythonParserWarning)
                # BiopythonParserWarning: Premature end of file in sequence data
                record = next(records)
        seq = "TGTAACGAACGGTGCAATAGTGATCCACACCCAACGCCTGAAATCAGATCCAGG...CTG"
        id = "NC_005816.1"
        name = "NC_005816"
        description = (
            "Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, complete"
            " sequence"
        )
        annotations = {
            "accessions": ["NC_005816"],
            "comment": """\
PROVISIONAL REFSEQ: This record has not yet been subject to final
NCBI review. The reference sequence was derived from AE017046.
COMPLETENESS: full length.""",
            "data_file_division": "BCT",
            "date": "21-JUL-2008",
            "gi": "45478711",
            "keywords": [""],
            "molecule_type": "DNA",
            "organism": "Yersinia pestis biovar Microtus str. 91001",
            "sequence_version": 1,
            "source": "Yersinia pestis biovar Microtus str. 91001",
            "taxonomy": [
                "Bacteria",
                "Proteobacteria",
                "Gammaproteobacteria",
                "Enterobacteriales",
                "Enterobacteriaceae",
                "Yersinia",
            ],
            "topology": "circular",
        }
        references = [
            "location: [0:9609]\nauthors: Zhou,D., Tong,Z., Song,Y., Han,Y., Pei,D.,"
            " Pang,X., Zhai,J., Li,M., Cui,B., Qi,Z., Jin,L., Dai,R., Du,Z., Wang,J.,"
            " Guo,Z., Wang,J., Huang,P. and Yang,R.\ntitle: Genetics of metabolic"
            " variations between Yersinia pestis biovars and the proposal of a new"
            " biovar, microtus\njournal: J. Bacteriol. 186 (15), 5147-5152"
            " (2004)\nmedline id: \npubmed id: 15262951\ncomment: \n",
            "location: [0:9609]\nauthors: Song,Y., Tong,Z., Wang,J., Wang,L., Guo,Z.,"
            " Han,Y., Zhang,J., Pei,D., Zhou,D., Qin,H., Pang,X., Han,Y., Zhai,J.,"
            " Li,M., Cui,B., Qi,Z., Jin,L., Dai,R., Chen,F., Li,S., Ye,C., Du,Z.,"
            " Lin,W., Wang,J., Yu,J., Yang,H., Wang,J., Huang,P. and Yang,R.\ntitle:"
            " Complete genome sequence of Yersinia pestis strain 91001, an isolate"
            " avirulent to humans\njournal: DNA Res. 11 (3), 179-197 (2004)\nmedline"
            " id: \npubmed id: 15368893\ncomment: \n",
            "location: [0:9609]\nauthors: \nconsrtm: NCBI Genome Project\ntitle: Direct"
            " Submission\njournal: Submitted (16-MAR-2004) National Center for"
            " Biotechnology Information, NIH, Bethesda, MD 20894, USA\nmedline id:"
            " \npubmed id: \ncomment: \n",
            "location: [0:9609]\nauthors: Song,Y., Tong,Z., Wang,L., Han,Y., Zhang,J.,"
            " Pei,D., Wang,J., Zhou,D., Han,Y., Pang,X., Zhai,J., Chen,F., Qin,H.,"
            " Wang,J., Li,S., Guo,Z., Ye,C., Du,Z., Lin,W., Wang,J., Yu,J., Yang,H.,"
            " Wang,J., Huang,P. and Yang,R.\ntitle: Direct Submission\njournal:"
            " Submitted (24-APR-2003) The Institute of Microbiology and Epidemiology,"
            " Academy of Military Medical Sciences, No. 20, Dongdajie Street, Fengtai"
            " District, Beijing 100071, People's Republic of China\nmedline id:"
            " \npubmed id: \ncomment: \n",
        ]
        features = (
            (
                """\
type: source
location: [0:9609](+)
qualifiers:
    Key: biovar, Value: ['Microtus']
    Key: db_xref, Value: ['taxon:229193']
    Key: mol_type, Value: ['genomic DNA']
    Key: organism, Value: ['Yersinia pestis biovar Microtus str. 91001']
    Key: plasmid, Value: ['pPCP1']
    Key: strain, Value: ['91001']
""",
                1,
            ),
            (
                """\
type: repeat_region
location: [0:1954](+)
qualifiers:
""",
                1,
            ),
            (
                """\
type: gene
location: [86:1109](+)
qualifiers:
    Key: db_xref, Value: ['GeneID:2767718']
    Key: locus_tag, Value: ['YP_pPCP01']
""",
                1,
            ),
            (
                """\
type: CDS
location: [86:1109](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:45478712', 'GeneID:2767718']
    Key: locus_tag, Value: ['YP_pPCP01']
    Key: note, Value: ['similar to corresponding CDS from previously sequenced pPCP plasmid of Yersinia pestis KIM (AF053945) and CO92 (AL109969), also many transposase entries for insertion sequence IS100 of Yersinia pestis. Contains IS21-like element transposase, HTH domain (Interpro|IPR007101)']
    Key: product, Value: ['putative transposase']
    Key: protein_id, Value: ['NP_995567.1']
    Key: transl_table, Value: ['11']
    Key: translation, Value: ['MVTFETVMEIKILHKQGMSSRAIARELGISRNTVKRYLQAKSEPPKYTPRPAVASLLDEYRDYIRQRIADAHPYKIPATVIAREIRDQGYRGGMTILRAFIRSLSVPQEQEPAVRFETEPGRQMQVDWGTMRNGRSPLHVFVAVLGYSRMLYIEFTDNMRYDTLETCHRNAFRFFGGVPREVLYDNMKTVVLQRDAYQTGQHRFHPSLWQFGKEMGFSPRLCRPFRAQTKGKVERMVQYTRNSFYIPLMTRLRPMGITVDVETANRHGLRWLHDVANQRKHETIQARPCDRWLEEQQSMLALPPEKKEYDVHLDENLVNFDKHPLHHPLSIYDSFCRGVA']
""",
                1,
            ),
            (
                """\
type: misc_feature
location: [86:959](+)
qualifiers:
    Key: db_xref, Value: ['CDD:34222']
    Key: locus_tag, Value: ['YP_pPCP01']
    Key: note, Value: ['Transposase and inactivated derivatives [DNA replication, recombination, and repair]; Region: COG4584']
""",
                1,
            ),
            (
                """\
type: misc_feature
location: [<110:209](+)
qualifiers:
    Key: db_xref, Value: ['CDD:186341']
    Key: locus_tag, Value: ['YP_pPCP01']
    Key: note, Value: ['Helix-turn-helix domain of Hin and related proteins, a family of DNA-binding domains unique to bacteria and represented by the Hin protein of Salmonella. The basic HTH domain is a simple fold comprised of three core helices that form a right-handed...; Region: HTH_Hin_like; cl01116']
""",
                1,
            ),
            (
                """\
type: misc_feature
location: [437:812](+)
qualifiers:
    Key: db_xref, Value: ['CDD:194099']
    Key: locus_tag, Value: ['YP_pPCP01']
    Key: note, Value: ['Integrase core domain; Region: rve; cl01316']
""",
                1,
            ),
            (
                """\
type: gene
location: [1105:1888](+)
qualifiers:
    Key: db_xref, Value: ['GeneID:2767716']
    Key: locus_tag, Value: ['YP_pPCP02']
""",
                1,
            ),
            (
                """\
type: CDS
location: [1105:1888](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:45478713', 'GeneID:2767716']
    Key: locus_tag, Value: ['YP_pPCP02']
    Key: note, Value: ['similar to corresponding CDS form previously sequenced pPCP plasmid of Yersinia pestis KIM (AF053945) and CO92 (AL109969), also many ATP-binding protein entries for insertion sequence IS100 of Yersinia pestis. Contains Chaperonin clpA/B (Interpro|IPR001270). Contains ATP/GTP-binding site motif A (P-loop) (Interpro|IPR001687, Molecular Function: ATP binding (GO:0005524)). Contains Bacterial chromosomal replication initiator protein, DnaA (Interpro|IPR001957, Molecular Function: DNA binding (GO:0003677), Molecular Function: DNA replication origin binding (GO:0003688), Molecular Function: ATP binding (GO:0005524), Biological Process: DNA replication initiation (GO:0006270), Biological Process: regulation of DNA replication (GO:0006275)). Contains AAA ATPase (Interpro|IPR003593, Molecular Function: nucleotide binding (GO:0000166))']
    Key: product, Value: ['transposase/IS protein']
    Key: protein_id, Value: ['NP_995568.1']
    Key: transl_table, Value: ['11']
    Key: translation, Value: ['MMMELQHQRLMALAGQLQLESLISAAPALSQQAVDQEWSYMDFLEHLLHEEKLARHQRKQAMYTRMAAFPAVKTFEEYDFTFATGAPQKQLQSLRSLSFIERNENIVLLGPSGVGKTHLAIAMGYEAVRAGIKVRFTTAADLLLQLSTAQRQGRYKTTLQRGVMAPRLLIIDEIGYLPFSQEEAKLFFQVIAKRYEKSAMILTSNLPFGQWDQTFAGDAALTSAMLDRILHHSHVVQIKGESYRLRQKRKAGVIAEANPE']
""",
                1,
            ),
            (
                """\
type: misc_feature
location: [1108:1885](+)
qualifiers:
    Key: db_xref, Value: ['CDD:181681']
    Key: locus_tag, Value: ['YP_pPCP02']
    Key: note, Value: ['transposase/IS protein; Provisional; Region: PRK09183']
""",
                1,
            ),
            (
                """\
type: misc_feature
location: [1366:>1669](+)
qualifiers:
    Key: db_xref, Value: ['CDD:99707']
    Key: locus_tag, Value: ['YP_pPCP02']
    Key: note, Value: ['The AAA+ (ATPases Associated with a wide variety of cellular Activities) superfamily represents an ancient group of ATPases belonging to the ASCE (for additional strand, catalytic E) division of the P-loop NTPase fold. The ASCE division also includes...; Region: AAA; cd00009']
""",
                1,
            ),
            (
                """\
type: misc_feature
location: [1432:1456](+)
qualifiers:
    Key: db_xref, Value: ['CDD:99707']
    Key: locus_tag, Value: ['YP_pPCP02']
    Key: note, Value: ['Walker A motif; other site']
""",
                1,
            ),
            (
                """\
type: misc_feature
location: order{[1435:1459](+), [1618:1621](+)}
qualifiers:
    Key: db_xref, Value: ['CDD:99707']
    Key: locus_tag, Value: ['YP_pPCP02']
    Key: note, Value: ['ATP binding site [chemical binding]; other site']
""",
                1,
            ),
            (
                """\
type: misc_feature
location: [1606:1624](+)
qualifiers:
    Key: db_xref, Value: ['CDD:99707']
    Key: locus_tag, Value: ['YP_pPCP02']
    Key: note, Value: ['Walker B motif; other site']
""",
                1,
            ),
            (
                """\
type: gene
location: [2924:3119](+)
qualifiers:
    Key: db_xref, Value: ['GeneID:2767717']
    Key: gene, Value: ['rop']
    Key: gene_synonym, Value: ['rom']
    Key: locus_tag, Value: ['YP_pPCP03']
""",
                1,
            ),
            (
                """\
type: CDS
location: [2924:3119](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:45478714', 'GeneID:2767717']
    Key: gene, Value: ['rop']
    Key: gene_synonym, Value: ['rom']
    Key: locus_tag, Value: ['YP_pPCP03']
    Key: note, Value: ['Best Blastp hit =gi|16082682|ref|NP_395229.1| (NC_003132) putative replication regulatory protein [Yersinia pestis], gi|5763813|emb|CAB531 66.1| (AL109969) putative replication regulatory protein [Yersinia pestis]; similar to gb|AAK91579.1| (AY048853), RNAI modulator protein Rom [Salmonella choleraesuis], Contains Regulatory protein Rop (Interpro|IPR000769)']
    Key: product, Value: ['putative replication regulatory protein']
    Key: protein_id, Value: ['NP_995569.1']
    Key: transl_table, Value: ['11']
    Key: translation, Value: ['MNKQQQTALNMARFIRSQSLILLEKLDALDADEQAAMCERLHELAEELQNSIQARFEAESETGT']
""",
                1,
            ),
            (
                """\
type: misc_feature
location: [2924:3107](+)
qualifiers:
    Key: db_xref, Value: ['CDD:145136']
    Key: gene, Value: ['rop']
    Key: gene_synonym, Value: ['rom']
    Key: locus_tag, Value: ['YP_pPCP03']
    Key: note, Value: ['Rop protein; Region: Rop; pfam01815']
""",
                1,
            ),
            (
                """\
type: gene
location: [3485:3857](+)
qualifiers:
    Key: db_xref, Value: ['GeneID:2767720']
    Key: locus_tag, Value: ['YP_pPCP04']
""",
                1,
            ),
            (
                """\
type: CDS
location: [3485:3857](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:45478715', 'GeneID:2767720']
    Key: locus_tag, Value: ['YP_pPCP04']
    Key: note, Value: ['Best Blastp hit = gi|321919|pir||JQ1541 hypothetical 16.9K protein - Salmonella typhi murium plasmid NTP16.']
    Key: product, Value: ['hypothetical protein']
    Key: protein_id, Value: ['NP_995570.1']
    Key: transl_table, Value: ['11']
    Key: translation, Value: ['MSKKRRPQKRPRRRRFFHRLRPPDEHHKNRRSSQRWRNPTGLKDTRRFPPEAPSCALLFRPCRLPDTSPPFSLREAWRFLIAHAVGISVRCRSFAPSWAVCTNPPFSPTTAPYPVTIVLSPTR']
""",
                1,
            ),
            (
                """\
type: misc_feature
location: [3497:3626](+)
qualifiers:
    Key: locus_tag, Value: ['YP_pPCP04']
    Key: note, Value: ['ProfileScan match to entry PS50323 ARG_RICH, E-value 8.981']
""",
                1,
            ),
            (
                """\
type: gene
location: [4342:4780](+)
qualifiers:
    Key: db_xref, Value: ['GeneID:2767712']
    Key: gene, Value: ['pim']
    Key: locus_tag, Value: ['YP_pPCP05']
""",
                1,
            ),
            (
                """\
type: CDS
location: [4342:4780](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:45478716', 'GeneID:2767712']
    Key: gene, Value: ['pim']
    Key: locus_tag, Value: ['YP_pPCP05']
    Key: note, Value: ['similar to many previously sequenced pesticin immunity protein entries of Yersinia pestis plasmid pPCP, e.g. gi| 16082683|,ref|NP_395230.1| (NC_003132) , gi|1200166|emb|CAA90861.1| (Z54145 ) , gi|1488655| emb|CAA63439.1| (X92856) , gi|2996219|gb|AAC62543.1| (AF053945) , and gi|5763814|emb|CAB531 67.1| (AL109969)']
    Key: product, Value: ['pesticin immunity protein']
    Key: protein_id, Value: ['NP_995571.1']
    Key: transl_table, Value: ['11']
    Key: translation, Value: ['MGGGMISKLFCLALIFLSSSGLAEKNTYTAKDILQNLELNTFGNSLSHGIYGKQTTFKQTEFTNIKSNTKKHIALINKDNSWMISLKILGIKRDEYTVCFEDFSLIRPPTYVAIHPLLIKKVKSGNFIVVKEIKKSIPGCTVYYH']
""",
                1,
            ),
            (
                """\
type: gene
location: [4814:5888](-)
qualifiers:
    Key: db_xref, Value: ['GeneID:2767721']
    Key: gene, Value: ['pst']
    Key: locus_tag, Value: ['YP_pPCP06']
""",
                -1,
            ),
            (
                """\
type: CDS
location: [4814:5888](-)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:45478717', 'GeneID:2767721']
    Key: gene, Value: ['pst']
    Key: locus_tag, Value: ['YP_pPCP06']
    Key: note, Value: ['Best Blastp hit =|16082684|ref|NP_395231.1| (NC_003132) pesticin [Yersinia pestis], gi|984824|gb|AAA75369.1| (U31974) pesticin [Yersinia pestis], gi|1488654|emb|CAA63438.1| (X92856) pesticin [Yersinia pestis], gi|2996220|gb|AAC62544.1| (AF053945) pesticin [Yersinia pestis], gi|5763815|emb|CAB53168.1| (AL1099 69) pesticin [Yersinia pestis]']
    Key: product, Value: ['pesticin']
    Key: protein_id, Value: ['NP_995572.1']
    Key: transl_table, Value: ['11']
    Key: translation, Value: ['MSDTMVVNGSGGVPAFLFSGSTLSSYRPNFEANSITIALPHYVDLPGRSNFKLMYIMGFPIDTEMEKDSEYSNKIRQESKISKTEGTVSYEQKITVETGQEKDGVKVYRVMVLEGTIAESIEHLDKKENEDILNNNRNRIVLADNTVINFDNISQLKEFLRRSVNIVDHDIFSSNGFEGFNPTSHFPSNPSSDYFNSTGVTFGSGVDLGQRSKQDLLNDGVPQYIADRLDGYYMLRGKEAYDKVRTAPLTLSDNEAHLLSNIYIDKFSHKIEGLFNDANIGLRFSDLPLRTRTALVSIGYQKGFKLSRTAPTVWNKVIAKDWNGLVNAFNNIVDGMSDRRKREGALVQKDIDSGLLK']
""",
                -1,
            ),
            (
                """\
type: variation
location: [5909:5911](+)
qualifiers:
    Key: note, Value: ['compared to AF053945']
    Key: replace, Value: ['']
""",
                1,
            ),
            (
                """\
type: variation
location: [5933:5933](+)
qualifiers:
    Key: note, Value: ['compared to AL109969']
    Key: replace, Value: ['a']
""",
                1,
            ),
            (
                """\
type: variation
location: [5933:5933](+)
qualifiers:
    Key: note, Value: ['compared to AF053945']
    Key: replace, Value: ['aa']
""",
                1,
            ),
            (
                """\
type: variation
location: [5947:5948](+)
qualifiers:
    Key: note, Value: ['compared to AL109969']
    Key: replace, Value: ['c']
""",
                1,
            ),
            (
                """\
type: gene
location: [6004:6421](+)
qualifiers:
    Key: db_xref, Value: ['GeneID:2767719']
    Key: locus_tag, Value: ['YP_pPCP07']
""",
                1,
            ),
            (
                """\
type: CDS
location: [6004:6421](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:45478718', 'GeneID:2767719']
    Key: locus_tag, Value: ['YP_pPCP07']
    Key: note, Value: ['Best Blastp hit = gi|16082685|ref|NP_395232.1| (NC_003132) hypothetical protein [Yersinia pestis], gi|5763816|emb|CAB53169.1| (AL109969) hypothetical protein [Yersinia pestis]']
    Key: product, Value: ['hypothetical protein']
    Key: protein_id, Value: ['NP_995573.1']
    Key: transl_table, Value: ['11']
    Key: translation, Value: ['MKFHFCDLNHSYKNQEGKIRSRKTAPGNIRKKQKGDNVSKTKSGRHRLSKTDKRLLAALVVAGYEERTARDLIQKHVYTLTQADLRHLVSEISNGVGQSQAYDAIYQARRIRLARKYLSGKKPEGVEPREGQEREDLP']
""",
                1,
            ),
            (
                """\
type: variation
location: [6524:6525](+)
qualifiers:
    Key: note, Value: ['compared to AF053945 and AL109969']
    Key: replace, Value: ['c']
""",
                1,
            ),
            (
                """\
type: gene
location: [6663:7602](+)
qualifiers:
    Key: db_xref, Value: ['GeneID:2767715']
    Key: gene, Value: ['pla']
    Key: locus_tag, Value: ['YP_pPCP08']
""",
                1,
            ),
            (
                """\
type: CDS
location: [6663:7602](+)
qualifiers:
    Key: EC_number, Value: ['3.4.23.48']
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:45478719', 'GeneID:2767715']
    Key: gene, Value: ['pla']
    Key: locus_tag, Value: ['YP_pPCP08']
    Key: note, Value: ['outer membrane protease; involved in virulence in many organisms; OmpT; IcsP; SopA; Pla; PgtE; omptin; in Escherichia coli OmpT can degrade antimicrobial peptides; in Yersinia Pla activates plasminogen during infection; in Shigella flexneria SopA cleaves the autotransporter IcsA']
    Key: product, Value: ['outer membrane protease']
    Key: protein_id, Value: ['NP_995574.1']
    Key: transl_table, Value: ['11']
    Key: translation, Value: ['MKKSSIVATIITILSGSANAASSQLIPNISPDSFTVAASTGMLSGKSHEMLYDAETGRKISQLDWKIKNVAILKGDISWDPYSFLTLNARGWTSLASGSGNMDDYDWMNENQSEWTDHSSHPATNVNHANEYDLNVKGWLLQDENYKAGITAGYQETRFSWTATGGSYSYNNGAYTGNFPKGVRVIGYNQRFSMPYIGLAGQYRINDFELNALFKFSDWVRAHDNDEHYMRDLTFREKTSGSRYYGTVINAGYYVTPNAKVFAEFTYSKYDEGKGGTQIIDKNSGDSVSIGGDAAGISNKNYTVTAGLQYRF']
""",
                1,
            ),
            (
                """\
type: misc_feature
location: [6663:7599](+)
qualifiers:
    Key: db_xref, Value: ['CDD:186487']
    Key: gene, Value: ['pla']
    Key: locus_tag, Value: ['YP_pPCP08']
    Key: note, Value: ['Omptin family; Region: Omptin; cl01886']
""",
                1,
            ),
            (
                """\
type: gene
location: [7788:8088](-)
qualifiers:
    Key: db_xref, Value: ['GeneID:2767713']
    Key: locus_tag, Value: ['YP_pPCP09']
""",
                -1,
            ),
            (
                """\
type: CDS
location: [7788:8088](-)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:45478720', 'GeneID:2767713']
    Key: locus_tag, Value: ['YP_pPCP09']
    Key: note, Value: ['Best Blastp hit = gi|16082687|ref|NP_395234.1| (NC_003132) putative transcriptional regulator [Yersinia pestis], gi|5763818|emb|CAB53171.1| (AL109969) putative transcriptional regulator [Yersinia pestis].']
    Key: product, Value: ['putative transcriptional regulator']
    Key: protein_id, Value: ['NP_995575.1']
    Key: transl_table, Value: ['11']
    Key: translation, Value: ['MRTLDEVIASRSPESQTRIKEMADEMILEVGLQMMREELQLSQKQVAEAMGISQPAVTKLEQRGNDLKLATLKRYVEAMGGKLSLDVELPTGRRVAFHV']
""",
                -1,
            ),
            (
                """\
type: misc_feature
location: [7836:7995](-)
qualifiers:
    Key: db_xref, Value: ['CDD:195788']
    Key: locus_tag, Value: ['YP_pPCP09']
    Key: note, Value: ['Helix-turn-helix XRE-family like proteins. Prokaryotic DNA binding proteins belonging to the xenobiotic response element family of transcriptional regulators; Region: HTH_XRE; cl09100']
""",
                -1,
            ),
            (
                """\
type: gene
location: [8087:8360](-)
qualifiers:
    Key: db_xref, Value: ['GeneID:2767714']
    Key: locus_tag, Value: ['YP_pPCP10']
""",
                -1,
            ),
            (
                """\
type: CDS
location: [8087:8360](-)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GI:45478721', 'GeneID:2767714']
    Key: locus_tag, Value: ['YP_pPCP10']
    Key: note, Value: ['Best Blastp hit = gi|16082688|ref|NP_395235.1| (NC_003132) hypothetical protein [ Yersinia pestis], gi|5763819|emb|CAB53172.1| (AL109969) hypothetical protein [Yersinia pestis]']
    Key: product, Value: ['hypothetical protein']
    Key: protein_id, Value: ['NP_995576.1']
    Key: transl_table, Value: ['11']
    Key: translation, Value: ['MADLKKLQVYGPELPRPYADTVKGSRYKNMKELRVQFSGRPIRAFYAFDPIRRAIVLCAGDKSNDKRFYEKLVRIAEDEFTAHLNTLESK']
""",
                -1,
            ),
            (
                """\
type: misc_feature
location: [8090:>8357](-)
qualifiers:
    Key: db_xref, Value: ['CDD:194142']
    Key: locus_tag, Value: ['YP_pPCP10']
    Key: note, Value: ['Phage derived protein Gp49-like (DUF891); Region: Gp49; cl01470']
""",
                -1,
            ),
            (
                """\
type: variation
location: [8529:8529](+)
qualifiers:
    Key: note, Value: ['compared to AL109969']
    Key: replace, Value: ['tt']
""",
                1,
            ),
        )
        dbxrefs = ["Project:58037"]
        self.perform_feature_parser_test(
            record,
            seq,
            id,
            name,
            description,
            annotations,
            references,
            features,
            dbxrefs,
        )

    def test_feature_parser_15(self):
        path = "GenBank/no_end_marker.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.feat_parser)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", BiopythonParserWarning)
                # BiopythonParserWarning: Premature end of file in sequence data
                record = next(records)
        seq = "CTAGCAGCCCGCATCGCCCTCGACGTTGGCGATCATCGTGCGCAGCACCTTGAG...TGA"
        id = "AB070938.1"
        name = "AB070938"
        description = "Streptomyces avermitilis melanin biosynthetic gene cluster"
        annotations = {
            "accessions": ["AB070938"],
            "data_file_division": "BCT",
            "date": "11-OCT-2001",
            "gi": "15823953",
            "keywords": [""],
            "molecule_type": "DNA",
            "organism": "Streptomyces avermitilis",
            "sequence_version": 1,
            "source": "Streptomyces avermitilis",
            "taxonomy": [
                "Bacteria",
                "Actinobacteria",
                "Actinobacteridae",
                "Actinomycetales",
                "Streptomycineae",
                "Streptomycetaceae",
                "Streptomyces",
            ],
            "topology": "linear",
        }
        references = []
        features = (
            (
                """\
type: source
location: [0:6497](+)
qualifiers:
    Key: db_xref, Value: ['taxon:33903']
    Key: mol_type, Value: ['genomic DNA']
    Key: organism, Value: ['Streptomyces avermitilis']
""",
                1,
            ),
        )
        dbxrefs = []
        self.perform_feature_parser_test(
            record,
            seq,
            id,
            name,
            description,
            annotations,
            references,
            features,
            dbxrefs,
        )

    def test_feature_parser_16(self):
        path = "GenBank/wrong_sequence_indent.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.feat_parser)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", BiopythonParserWarning)
                # BiopythonParserWarning: Invalid indentation for sequence line
                record = next(records)
        seq = "CTAGCAGCCCGCATCGCCCTCGACGTTGGCGATCATCGTGCGCAGCACCTTGAG...TGA"
        id = "AB070938.1"
        name = "AB070938"
        description = "Streptomyces avermitilis melanin biosynthetic gene cluster"
        annotations = {
            "accessions": ["AB070938"],
            "data_file_division": "BCT",
            "date": "11-OCT-2001",
            "gi": "15823953",
            "keywords": [""],
            "molecule_type": "DNA",
            "organism": "Streptomyces avermitilis",
            "sequence_version": 1,
            "source": "Streptomyces avermitilis",
            "taxonomy": [
                "Bacteria",
                "Actinobacteria",
                "Actinobacteridae",
                "Actinomycetales",
                "Streptomycineae",
                "Streptomycetaceae",
                "Streptomyces",
            ],
            "topology": "linear",
        }
        references = []
        features = (
            (
                """\
type: source
location: [0:6497](+)
qualifiers:
    Key: db_xref, Value: ['taxon:33903']
    Key: mol_type, Value: ['genomic DNA']
    Key: organism, Value: ['Streptomyces avermitilis']
""",
                1,
            ),
        )
        dbxrefs = []
        self.perform_feature_parser_test(
            record,
            seq,
            id,
            name,
            description,
            annotations,
            references,
            features,
            dbxrefs,
        )

    def test_feature_parser_17(self):
        path = "GenBank/invalid_locus_line_spacing.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.feat_parser)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", BiopythonParserWarning)
                # BiopythonParserWarning: Attempting to parse malformed locus line
                record = next(records)
        seq = "CTAGCAGCCCGCATCGCCCTCGACGTTGGCGATCATCGTGCGCAGCACCTTGAG...TGA"
        id = "AB070938.1"
        name = "AB070938"
        description = "Streptomyces avermitilis melanin biosynthetic gene cluster"
        annotations = {
            "accessions": ["AB070938"],
            "data_file_division": "BCT",
            "date": "11-OCT-2001",
            "gi": "15823953",
            "keywords": [""],
            "molecule_type": "DNA",
            "organism": "Streptomyces avermitilis",
            "sequence_version": 1,
            "source": "Streptomyces avermitilis",
            "taxonomy": [
                "Bacteria",
                "Actinobacteria",
                "Actinobacteridae",
                "Actinomycetales",
                "Streptomycineae",
                "Streptomycetaceae",
                "Streptomyces",
            ],
            "topology": "linear",
        }
        references = []
        features = (
            (
                """\
type: source
location: [0:6497](+)
qualifiers:
    Key: db_xref, Value: ['taxon:33903']
    Key: mol_type, Value: ['genomic DNA']
    Key: organism, Value: ['Streptomyces avermitilis']
""",
                1,
            ),
        )
        dbxrefs = []
        self.perform_feature_parser_test(
            record,
            seq,
            id,
            name,
            description,
            annotations,
            references,
            features,
            dbxrefs,
        )

    def test_feature_parser_18(self):
        path = "GenBank/empty_feature_qualifier.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.feat_parser)
            record = next(records)
        seq = "CTAGCAGCCCGCATCGCCCTCGACGTTGGCGATCATCGTGCGCAGCACCTTGAG...TGA"
        id = "AB070938.1"
        name = "AB070938"
        description = "Streptomyces avermitilis melanin biosynthetic gene cluster"
        annotations = {
            "accessions": ["AB070938"],
            "data_file_division": "BCT",
            "date": "11-OCT-2001",
            "gi": "15823953",
            "keywords": [""],
            "molecule_type": "DNA",
            "organism": "Streptomyces avermitilis",
            "sequence_version": 1,
            "source": "Streptomyces avermitilis",
            "taxonomy": [
                "Bacteria",
                "Actinobacteria",
                "Actinobacteridae",
                "Actinomycetales",
                "Streptomycineae",
                "Streptomycetaceae",
                "Streptomyces",
            ],
            "topology": "linear",
        }
        references = []
        features = (
            (
                """\
type: source
location: [0:6497](+)
qualifiers:
    Key: db_xref, Value: ['taxon:33903']
    Key: mol_type, Value: ['genomic DNA']
    Key: note, Value: ["This is a correct note, the following one isn't"]
    Key: organism, Value: ['Streptomyces avermitilis']
""",
                1,
            ),
        )
        dbxrefs = []
        self.perform_feature_parser_test(
            record,
            seq,
            id,
            name,
            description,
            annotations,
            references,
            features,
            dbxrefs,
        )

    def test_feature_parser_19(self):
        path = "GenBank/invalid_misc_feature.gb"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.feat_parser)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", BiopythonParserWarning)
                # BiopythonParserWarning: line too short to contain a feature
                record = next(records)
        seq = "CTAGCAGCCCGCATCGCCCTCGACGTTGGCGATCATCGTGCGCAGCACCTTGAG...TGA"
        id = "AB070938.1"
        name = "AB070938"
        description = "Streptomyces avermitilis melanin biosynthetic gene cluster"
        annotations = {
            "accessions": ["AB070938"],
            "data_file_division": "BCT",
            "date": "11-OCT-2001",
            "gi": "15823953",
            "keywords": [""],
            "molecule_type": "DNA",
            "organism": "Streptomyces avermitilis",
            "sequence_version": 1,
            "source": "Streptomyces avermitilis",
            "taxonomy": [
                "Bacteria",
                "Actinobacteria",
                "Actinobacteridae",
                "Actinomycetales",
                "Streptomycineae",
                "Streptomycetaceae",
                "Streptomyces",
            ],
            "topology": "linear",
        }
        references = []
        features = (
            (
                """\
type: source
location: [0:6497](+)
qualifiers:
    Key: db_xref, Value: ['taxon:33903']
    Key: mol_type, Value: ['genomic DNA']
    Key: organism, Value: ['Streptomyces avermitilis']
""",
                1,
            ),
        )
        dbxrefs = []
        self.perform_feature_parser_test(
            record,
            seq,
            id,
            name,
            description,
            annotations,
            references,
            features,
            dbxrefs,
        )

    def test_feature_parser_20(self):
        path = "GenBank/1MRR_A.gp"
        with open(path) as handle:
            records = GenBank.Iterator(handle, self.feat_parser)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", BiopythonParserWarning)
                # BiopythonParserWarning: Dropping bond qualifier in feature location
                record = next(records)
        seq = "AYTTFSATKNDQLKEPMFFGQPVQVARYDQQKYDIFEKLIEKQLSFFWRPEEVD...FQL"
        id = "1MRR_A"
        name = "1MRR_A"
        description = (
            "Chain A, Substitution Of Manganese For Iron In Ribonucleotide Reductase"
            " From Escherichia Coli. Spectroscopic And Crystallographic"
            " Characterization"
        )
        annotations = {
            "accessions": ["1MRR_A"],
            "comment": """\
1 Ribonucleotide Reductase R1 Protein.""",
            "data_file_division": "BCT",
            "date": "10-OCT-2012",
            "db_source": (
                "pdb: molecule 1MRR, chain 65, release Aug 28, 2012; deposition: Jul"
                " 28, 1992; class: Reductase(Acting On Ch2); source: Mmdb_id: 50351,"
                " Pdb_id 1: 1MRR; Exp. method: X-Ray Diffraction."
            ),
            "gi": "494379",
            "keywords": [""],
            "molecule_type": "protein",
            "organism": "Escherichia coli",
            "source": "Escherichia coli",
            "taxonomy": [
                "Bacteria",
                "Proteobacteria",
                "Gammaproteobacteria",
                "Enterobacteriales",
                "Enterobacteriaceae",
                "Escherichia",
            ],
            "topology": "linear",
        }
        references = [
            "location: [0:375]\nauthors: Nordlund,P., Sjoberg,B.M. and"
            " Eklund,H.\ntitle: Three-dimensional structure of the free radical protein"
            " of ribonucleotide reductase\njournal: Nature 345 (6276), 593-598"
            " (1990)\nmedline id: \npubmed id: 2190093\ncomment: \n",
            "location: [0:375]\nauthors: Atta,M., Nordlund,P., Aberg,A., Eklund,H. and"
            " Fontecave,M.\ntitle: Substitution of manganese for iron in ribonucleotide"
            " reductase from Escherichia coli. Spectroscopic and crystallographic"
            " characterization\njournal: J. Biol. Chem. 267 (29), 20682-20688"
            " (1992)\nmedline id: \npubmed id: 1328209\ncomment: \n",
            "location: [0:375]\nauthors: Eklund,H. and Nordlund,P.\ntitle: Direct"
            " Submission\njournal: Submitted (28-JUL-1992)\nmedline id: \npubmed id:"
            " \ncomment: \n",
        ]
        features = (
            (
                """\
type: source
location: [0:375]
qualifiers:
    Key: db_xref, Value: ['taxon:562']
    Key: organism, Value: ['Escherichia coli']
""",
                None,
            ),
            (
                """\
type: Region
location: [27:340]
qualifiers:
    Key: db_xref, Value: ['CDD:153108']
    Key: note, Value: ['Ribonucleotide Reductase, R2/beta subunit, ferritin-like diiron-binding domain; cd01049']
    Key: region_name, Value: ['RNRR2']
""",
                None,
            ),
            (
                """\
type: SecStr
location: [34:46]
qualifiers:
    Key: note, Value: ['helix 1']
    Key: sec_str_type, Value: ['helix']
""",
                None,
            ),
            (
                """\
type: Site
location: order{[36:37], [43:44], [108:110], [112:113], [115:117], [119:120], [122:123], [136:138], [140:141]}
qualifiers:
    Key: db_xref, Value: ['CDD:153108']
    Key: note, Value: ['dimer interface [polypeptide binding]']
    Key: site_type, Value: ['other']
""",
                None,
            ),
            (
                """\
type: Site
location: order{[47:48], [83:84], [114:115], [117:118], [121:122], [235:237], [240:241]}
qualifiers:
    Key: db_xref, Value: ['CDD:153108']
    Key: note, Value: ['putative radical transfer pathway']
    Key: site_type, Value: ['other']
""",
                None,
            ),
            (
                """\
type: SecStr
location: [56:65]
qualifiers:
    Key: note, Value: ['helix 2']
    Key: sec_str_type, Value: ['helix']
""",
                None,
            ),
            (
                """\
type: SecStr
location: [66:87]
qualifiers:
    Key: note, Value: ['helix 3']
    Key: sec_str_type, Value: ['helix']
""",
                None,
            ),
            (
                """\
type: Site
location: order{[83:84], [114:115], [117:118], [203:204], [237:238], [240:241]}
qualifiers:
    Key: db_xref, Value: ['CDD:153108']
    Key: note, Value: ['diiron center [ion binding]']
    Key: site_type, Value: ['other']
""",
                None,
            ),
            (
                """\
type: Het
location: join{[83:84], [114:115], [117:118], [237:238]}
qualifiers:
    Key: heterogen, Value: ['( MN,1000 )']
""",
                None,
            ),
            (
                """\
type: SecStr
location: [101:129]
qualifiers:
    Key: note, Value: ['helix 4']
    Key: sec_str_type, Value: ['helix']
""",
                None,
            ),
            (
                """\
type: Het
location: join{[114:115], [203:204], [237:238], [240:241]}
qualifiers:
    Key: heterogen, Value: ['( MN,1001 )']
""",
                None,
            ),
            (
                """\
type: Site
location: [121:122]
qualifiers:
    Key: db_xref, Value: ['CDD:153108']
    Key: note, Value: ['tyrosyl radical']
    Key: site_type, Value: ['other']
""",
                None,
            ),
            (
                """\
type: SecStr
location: [132:140]
qualifiers:
    Key: note, Value: ['helix 5']
    Key: sec_str_type, Value: ['helix']
""",
                None,
            ),
            (
                """\
type: SecStr
location: [142:151]
qualifiers:
    Key: note, Value: ['helix 6']
    Key: sec_str_type, Value: ['helix']
""",
                None,
            ),
            (
                """\
type: SecStr
location: [152:169]
qualifiers:
    Key: note, Value: ['helix 7']
    Key: sec_str_type, Value: ['helix']
""",
                None,
            ),
            (
                """\
type: SecStr
location: [171:177]
qualifiers:
    Key: note, Value: ['strand 1']
    Key: sec_str_type, Value: ['sheet']
""",
                None,
            ),
            (
                """\
type: SecStr
location: [179:185]
qualifiers:
    Key: note, Value: ['strand 2']
    Key: sec_str_type, Value: ['sheet']
""",
                None,
            ),
            (
                """\
type: SecStr
location: [185:216]
qualifiers:
    Key: note, Value: ['helix 8']
    Key: sec_str_type, Value: ['helix']
""",
                None,
            ),
            (
                """\
type: Het
location: join{[193:194], [271:272]}
qualifiers:
    Key: heterogen, Value: ['( HG,1003 )']
""",
                None,
            ),
            (
                """\
type: Het
location: [195:196]
qualifiers:
    Key: heterogen, Value: ['( HG,1005 )']
""",
                None,
            ),
            (
                """\
type: Het
location: join{[195:196], [195:196]}
qualifiers:
    Key: heterogen, Value: ['( HG,1002 )']
""",
                None,
            ),
            (
                """\
type: Het
location: join{[209:210], [213:214], [213:214]}
qualifiers:
    Key: heterogen, Value: ['( HG,1004 )']
""",
                None,
            ),
            (
                """\
type: SecStr
location: [224:253]
qualifiers:
    Key: note, Value: ['helix 9']
    Key: sec_str_type, Value: ['helix']
""",
                None,
            ),
            (
                """\
type: SecStr
location: [259:269]
qualifiers:
    Key: note, Value: ['helix 10']
    Key: sec_str_type, Value: ['helix']
""",
                None,
            ),
            (
                """\
type: Bond
location: bond{[267:268], [271:272]}
qualifiers:
    Key: bond_type, Value: ['disulfide']
""",
                None,
            ),
            (
                """\
type: SecStr
location: [269:285]
qualifiers:
    Key: note, Value: ['helix 11']
    Key: sec_str_type, Value: ['helix']
""",
                None,
            ),
            (
                """\
type: Het
location: join{[283:284], [304:305], [308:309], [304:305]}
qualifiers:
    Key: heterogen, Value: ['( HG,1006 )']
""",
                None,
            ),
            (
                """\
type: SecStr
location: [300:319]
qualifiers:
    Key: note, Value: ['helix 12']
    Key: sec_str_type, Value: ['helix']
""",
                None,
            ),
        )
        dbxrefs = []
        self.perform_feature_parser_test(
            record,
            seq,
            id,
            name,
            description,
            annotations,
            references,
            features,
            dbxrefs,
        )


class GenBankTests(unittest.TestCase):
    """GenBank tests."""

    def test_invalid_product_line_raises_value_error(self):
        """Parsing invalid product line."""
        path = "GenBank/invalid_product.gb"
        self.assertRaises(ValueError, SeqIO.read, path, "genbank")

    def test_genbank_read(self):
        """GenBank.read(...) simple test."""
        path = "GenBank/NC_000932.gb"
        with open(path) as handle:
            record = GenBank.read(handle)
        self.assertEqual(["NC_000932"], record.accession)

    def test_genbank_read_multirecord(self):
        """GenBank.read(...) error on multiple record input."""
        path = "GenBank/cor6_6.gb"
        with open(path) as handle:
            self.assertRaises(ValueError, GenBank.read, handle)

    def test_genbank_read_invalid(self):
        """GenBank.read(...) error on invalid file (e.g. FASTA file)."""
        path = "GenBank/NC_000932.faa"
        with open(path) as handle:
            self.assertRaises(ValueError, GenBank.read, handle)

    def test_genbank_read_no_origin_no_end(self):
        """GenBank.read(...) error on malformed file."""
        path = "GenBank/no_origin_no_end.gb"
        with open(path) as handle:
            self.assertRaises(ValueError, GenBank.read, handle)

    # Evil hack with 000 to manipulate sort order to ensure this is tested
    # first (otherwise something silences the warning)
    def test_000_genbank_bad_loc_wrap_warning(self):
        """Feature line wrapping warning."""
        path = "GenBank/bad_loc_wrap.gb"
        with warnings.catch_warnings():
            warnings.simplefilter("error", BiopythonParserWarning)
            with open(path) as handle:
                with self.assertRaises(BiopythonParserWarning) as cm:
                    GenBank.read(handle)
                self.assertEqual(
                    "Non-standard feature line wrapping (didn't break on comma)?",
                    str(cm.exception),
                )

    # Similar hack as we also want to catch that warning here
    def test_001_negative_location_warning(self):
        """Un-parsable feature location warning."""
        path = "GenBank/negative_location.gb"
        with warnings.catch_warnings():
            warnings.simplefilter("error", BiopythonParserWarning)
            with self.assertRaises(BiopythonParserWarning) as cm:
                record = SeqIO.read(path, "genbank")
            self.assertEqual(
                "Couldn't parse feature location: '-2..492'", str(cm.exception)
            )

    def test_001_genbank_bad_origin_wrapping_location(self):
        """Bad origin wrapping."""
        path = "GenBank/bad_origin_wrap_linear.gb"
        with warnings.catch_warnings():
            warnings.simplefilter("error", BiopythonParserWarning)
            with self.assertRaises(BiopythonParserWarning) as cm:
                record = SeqIO.read(path, "genbank")
            self.assertIn(
                "It appears that '6801..100' is a feature that spans the origin",
                str(cm.exception),
            )

    def test_001_implicit_orign_wrap_fix(self):
        """Attempt to fix implied origin wrapping."""
        path = "GenBank/bad_origin_wrap.gb"
        with warnings.catch_warnings():
            warnings.simplefilter("error", BiopythonParserWarning)
            with self.assertRaises(BiopythonParserWarning) as cm:
                record = SeqIO.read(path, "genbank")
            self.assertEqual(
                str(cm.exception),
                "Attempting to fix invalid location '6801..100' "
                "as it looks like incorrect origin wrapping. "
                "Please fix input file, this could have "
                "unintended behavior.",
            )

    def test_compound_complex_origin_wrap(self):
        """Test the attempts to fix compound complex origin wrapping."""
        from Bio.SeqFeature import CompoundLocation

        path = "GenBank/bad_origin_wrap.gb"
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", BiopythonParserWarning)
            record = SeqIO.read(path, "genbank")

            self.assertIsInstance(record.features[3].location, CompoundLocation)
            self.assertEqual(
                str(record.features[3].location),
                "join{[<5399:5600](+), [5699:6100](+), [6800:7000](+), [0:100](+)}",
            )

            self.assertIsInstance(record.features[4].location, CompoundLocation)
            self.assertEqual(
                str(record.features[4].location),
                "join{[5399:5600](+), [5699:6100](+), [<6800:7000](+), [0:100](+)}",
            )

            self.assertIsInstance(record.features[5].location, CompoundLocation)
            self.assertEqual(
                str(record.features[5].location),
                "join{[5399:5600](+), [5699:6100](+), [0:100](-), [<6800:7000](-)}",
            )

    def test_implicit_orign_wrap_extract_and_translate(self):
        """Test that features wrapped around origin give expected data."""
        path = "GenBank/bad_origin_wrap_CDS.gb"
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", BiopythonParserWarning)
            with open(path) as handle:
                seq_record = SeqIO.read(handle, "genbank")
        seq_features = seq_record.features
        self.assertEqual(
            str(seq_features[1].extract(seq_record).seq.lower()),
            "atgccctataaaacccagggctgccttggaaaaggcgcaaccccaaccccctcgagccgcggcatataa",
        )
        self.assertEqual(
            str(seq_features[2].extract(seq_record).seq.lower()),
            "atgccgcggctcgagggggttggggttgcgccttttccaaggcagccctgggttttatag",
        )
        self.assertEqual(
            str(seq_features[1].extract(seq_record).seq.translate()),
            "MPYKTQGCLGKGATPTPSSRGI*",
        )
        self.assertEqual(
            str(seq_features[2].extract(seq_record).seq.translate()),
            "MPRLEGVGVAPFPRQPWVL*",
        )

    def test_fuzzy_origin_wrap(self):
        """Test features that wrap an origin, and have fuzzy location."""
        path = "GenBank/bad_origin_wrap_fuzzy.gb"
        with warnings.catch_warnings():
            warnings.simplefilter("error", BiopythonParserWarning)
            with self.assertRaises(BiopythonParserWarning) as cm:
                record = SeqIO.read(path, "genbank")
            self.assertEqual(
                str(cm.exception),
                "Attempting to fix invalid location '<2644..159' "
                "as it looks like incorrect origin wrapping. "
                "Please fix input file, this could have "
                "unintended behavior.",
            )

            with warnings.catch_warnings():
                warnings.simplefilter("ignore", BiopythonParserWarning)
                with open(path) as handle:
                    seq_record = SeqIO.read(handle, "genbank")
                    self.assertEqual(
                        str(seq_record.features[3].location),
                        "join{[<2643:2686](+), [0:159](+)}",
                    )

    def test_genbank_bad_loc_wrap_parsing(self):
        """Bad location wrapping."""
        path = "GenBank/bad_loc_wrap.gb"
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", BiopythonParserWarning)
            with open(path) as handle:
                record = GenBank.read(handle)
        self.assertEqual(1, len(record.features))
        loc = record.features[0].location
        self.assertEqual(
            loc,
            "join(3462..3615,3698..3978,4077..4307,4408..4797,4876..5028,5141..5332)",
        )

    def test_negative_location(self):
        """Negative feature locations."""
        path = "GenBank/negative_location.gb"
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", BiopythonParserWarning)
            record = SeqIO.read(path, "genbank")
            self.assertIsNone(record.features[-1].location)

    def test_dot_lineage(self):
        """Missing taxonomy lineage."""
        path = "GenBank/bad_loc_wrap.gb"
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", BiopythonParserWarning)
            record = SeqIO.read(path, "genbank")
        self.assertEqual(record.annotations["organism"], ".")
        self.assertEqual(record.annotations["taxonomy"], [])

    def test_tsa(self):
        """Test TSA annotation parsing."""
        path = "GenBank/tsa_acropora.gb"
        record = SeqIO.read(path, "genbank")
        self.assertIn("tsa", record.annotations)
        self.assertEqual(record.annotations["tsa"], ["GHGH01000001", "GHGH01126539"])

    def test_dblink(self):
        """Parse GenBank record with old DBLINK project entry."""
        path = "GenBank/NC_005816.gb"
        record = SeqIO.read(path, "gb")
        self.assertEqual(record.dbxrefs, ["Project:58037"])
        gb = record.format("gb")
        self.assertIn("\nDBLINK      Project: 58037\n", gb)
        embl = record.format("embl")
        self.assertIn("XX\nPR   Project:58037;\nXX\n", embl)

    def test_dblink_two(self):
        """Parse GenBank record with old and new DBLINK project entries."""
        path = "GenBank/NP_416719.gbwithparts"
        record = SeqIO.read(path, "gb")
        self.assertEqual(record.dbxrefs, ["Project:57779", "BioProject:PRJNA57779"])
        gb = record.format("gb")
        self.assertIn(
            """
DBLINK      Project: 57779
            BioProject: PRJNA57779
KEYWORDS    """,
            gb,
        )
        embl = record.format("embl")
        self.assertIn("XX\nPR   Project:PRJNA57779;\nXX\n", embl)

    def test_dbline_gb_embl(self):
        """Parse GenBank/EMBL paired records with PR project entry: GenBank."""
        record = SeqIO.read("GenBank/DS830848.gb", "gb")
        self.assertIn("BioProject:PRJNA16232", record.dbxrefs)
        gb = record.format("gb")
        self.assertIn(
            """
DBLINK      BioProject: PRJNA16232
            BioSample: SAMN03004382
KEYWORDS    """,
            gb,
        )
        # Also check EMBL output
        embl = record.format("embl")
        self.assertIn("XX\nPR   Project:PRJNA16232;\nXX\n", embl)

    def test_dbline_embl_gb(self):
        """Parse GenBank/EMBL paired records with PR project entry: EMBL."""
        record = SeqIO.read("EMBL/DS830848.embl", "embl")
        # TODO: Should we map this to BioProject:PRJNA16232
        self.assertIn("Project:PRJNA16232", record.dbxrefs)
        gb = record.format("gb")
        self.assertIn(
            """
DBLINK      Project: PRJNA16232
            MD5: 387e72e4f7ae804780d06f875ab3bc41
            ENA: ABJB010000000
            ENA: ABJB000000000
            BioSample: SAMN03004382
KEYWORDS    """,
            gb,
        )
        embl = record.format("embl")
        self.assertIn("XX\nPR   Project:PRJNA16232;\nXX\n", embl)

    def test_structured_comment_parsing(self):
        """Structured comment parsing."""
        # GISAID_EpiFlu(TM)Data, HM138502.gbk has both
        # 'comment' and 'structured_comment'
        path = "GenBank/HM138502.gbk"
        record = SeqIO.read(path, "genbank")
        self.assertEqual(
            record.annotations["comment"],
            "Swine influenza A (H1N1) virus isolated during human swine flu\noutbreak"
            " of 2009.",
        )
        self.assertEqual(
            record.annotations["structured_comment"]["GISAID_EpiFlu(TM)Data"][
                "Lineage"
            ],
            "swl",
        )
        self.assertEqual(
            len(record.annotations["structured_comment"]["GISAID_EpiFlu(TM)Data"]), 3
        )
        path = "GenBank/HM138502_output.gbk"
        with open(path) as ifile:
            self.assertEqual(record.format("gb"), ifile.read())
        # FluData structured comment
        path = "GenBank/EU851978.gbk"
        record = SeqIO.read(path, "genbank")
        self.assertEqual(
            record.annotations["structured_comment"]["FluData"]["LabID"], "2008704957"
        )
        self.assertEqual(len(record.annotations["structured_comment"]["FluData"]), 5)
        path = "GenBank/EU851978_output.gbk"
        with open(path) as ifile:
            self.assertEqual(record.format("gb"), ifile.read())
        # Assembly-Data structured comment
        path = "GenBank/KF527485.gbk"
        record = SeqIO.read(path, "genbank")
        self.assertEqual(
            record.annotations["structured_comment"]["Assembly-Data"][
                "Assembly Method"
            ],
            "Lasergene v. 10",
        )
        self.assertEqual(
            len(record.annotations["structured_comment"]["Assembly-Data"]), 2
        )
        path = "GenBank/KF527485_output.gbk"
        with open(path) as ifile:
            self.assertEqual(record.format("gb"), ifile.read())
        # No structured comment in NC_000932.gb, just a regular comment
        path = "GenBank/NC_000932.gb"
        record = SeqIO.read(path, "genbank")
        self.assertNotIn("structured_comment", record.annotations)
        self.assertEqual(
            record.annotations["comment"],
            "REVIEWED REFSEQ: This record has been curated by NCBI staff. The\n"
            "reference sequence was derived from AP000423.\n"
            "COMPLETENESS: full length.",
        )

    def test_multiline_structured_comment_parsing(self):
        """Multiline structured comment parsing."""
        # GU949562.1, MIENS-Data, environment has value on multiple lines
        path = "GenBank/GU949562.1.gb"
        record = SeqIO.read(path, "genbank")
        self.assertEqual(
            record.annotations["structured_comment"]["MIENS-Data"]["environment"],
            "Temperate shelf and sea biome [ENVO:00000895], "
            "coastal water body [ENVO:02000049], "
            "coastal water [ENVO:00002150]",
        )

    def test_malformed_structured_comment_parsing(self):
        """Test malformed structured comment gives warning.

        The comment will be ignored if it is not read by the parser AYW00820.1;
        Malformed key-value delimiter used. Should be " :: ", but the record uses ": "
        """
        path = "GenBank/invalid_structured_comment.gb"

        with warnings.catch_warnings(record=True) as caught:
            warnings.simplefilter("always")
            record = SeqIO.read(path, "genbank")
            self.assertNotIn("structured_comment", record.annotations)
            self.assertIn(
                "Structured comment not parsed for AYW00820.", str(caught[0].message)
            )

    def test_locus_line_topogoly(self):
        """Test if chromosome topology is conserved."""
        record = SeqIO.read("GenBank/DS830848.gb", "genbank")
        self.assertEqual(record.annotations["topology"], "linear")
        out_handle = StringIO()
        SeqIO.write([record], out_handle, "genbank")
        first_line = out_handle.getvalue().split("\n")[0]
        self.assertIn("linear", first_line)
        with open("GenBank/DS830848.gb") as fh:
            orig_first_line = fh.readline().strip()
        self.assertEqual(first_line, orig_first_line)

    def test_qualifier_order(self):
        """Check the qualifier order is preserved."""
        record = SeqIO.read("GenBank/DS830848.gb", "gb")
        f = record.features[0]
        self.assertEqual(
            list(f.qualifiers),
            ["organism", "mol_type", "strain", "db_xref", "dev_stage"],
        )

    def test_qualifier_escaping_read(self):
        """Check qualifier escaping is preserved when parsing."""
        # Make sure parsing improperly escaped qualifiers raises a warning
        with warnings.catch_warnings(record=True) as caught:
            warnings.simplefilter("always")
            record = SeqIO.read("GenBank/qualifier_escaping_read.gb", "gb")
            self.assertEqual(len(caught), 4)
            self.assertEqual(caught[0].category, BiopythonParserWarning)
            self.assertEqual(
                str(caught[0].message),
                'The NCBI states double-quote characters like " should be escaped'
                ' as "" (two double - quotes), but here it was not: '
                "%r" % 'One missing ""quotation mark" here',
            )
        # Check records parsed as expected
        f1 = record.features[0]
        f2 = record.features[1]
        f3 = record.features[2]
        f4 = record.features[3]
        f5 = record.features[4]
        self.assertEqual(f1.qualifiers["note"][0], '"This" is "already" "escaped"')
        self.assertEqual(f2.qualifiers["note"][0], 'One missing "quotation mark" here')
        self.assertEqual(f3.qualifiers["note"][0], 'End not properly "escaped"')
        self.assertEqual(f4.qualifiers["note"][0], '"Start" not properly escaped')
        self.assertEqual(f5.qualifiers["note"][0], 'Middle not "properly" escaped')

    def test_qualifier_escaping_write(self):
        """Check qualifier escaping is preserved when writing."""
        # Write some properly escaped qualifiers and test
        genbank_out = "GenBank/qualifier_escaping_write.gb"
        record = SeqIO.read(genbank_out, "gb")
        f1 = record.features[0]
        f2 = record.features[1]
        f1.qualifiers["note"][0] = '"Should" now "be" escaped in "file"'
        f2.qualifiers["note"][0] = '"Should also be escaped in file"'
        SeqIO.write(record, genbank_out, "gb")
        # Read newly escaped qualifiers and test
        record = SeqIO.read(genbank_out, "gb")
        f1 = record.features[0]
        f2 = record.features[1]
        self.assertEqual(
            f1.qualifiers["note"][0], '"Should" now "be" escaped in "file"'
        )
        self.assertEqual(f2.qualifiers["note"][0], '"Should also be escaped in file"')

    def test_long_names(self):
        """Various GenBank names which push the column based LOCUS line."""
        original = SeqIO.read("GenBank/iro.gb", "gb")
        self.assertEqual(len(original), 1326)
        # Acceptability of LOCUS line with length > 80
        # invalidates some of these tests
        for name, seq_len, ok in [
            ("short", 1, True),
            ("max_length_of_16", 1000, True),
            ("overly_long_at_17", 1000, True),
            ("excessively_long_at_22", 99999, True),
            ("excessively_long_at_22", 100000, True),
            ("pushing_the_limits_at_24", 999, True),
            ("pushing_the_limits_at_24", 1000, True),
            ("old_max_name_length_was_26", 10, True),  # 2 digits
            ("old_max_name_length_was_26", 9, True),
        ]:  # 1 digit
            # Make the length match the desired target
            record = original[:]
            # TODO - Implement Seq * int
            record.seq = Seq("N" * seq_len)
            record.annotations["molecule_type"] = original.annotations["molecule_type"]
            # Set the identifier to the desired name
            record.id = record.name = name
            # Attempt to output the record...
            if not ok:
                # e.g. ValueError:
                # Locus identifier 'excessively_long_at_22' is too long
                self.assertRaises(ValueError, record.format, "gb")
                continue
            with warnings.catch_warnings():
                # e.g. BiopythonWarning: Stealing space from length
                # field to allow long name in LOCUS line
                warnings.simplefilter("ignore", BiopythonWarning)
                # output = record.format("gb")
                handle = StringIO()
                self.assertEqual(1, SeqIO.write(record, handle, "gb"))
            handle.seek(0)
            line = handle.readline()
            self.assertIn(" %s " % name, line)
            self.assertIn(" %i bp " % seq_len, line)
            # Splitting based on whitespace rather than position due to
            # updated GenBank specification
            name_and_length = line.split()[1:3]
            self.assertEqual(name_and_length, [name, str(seq_len)], line)
            handle.seek(0)
            with warnings.catch_warnings():
                # e.g. BiopythonParserWarning: GenBank LOCUS line
                # identifier over 16 characters
                warnings.simplefilter("ignore", BiopythonWarning)
                new = SeqIO.read(handle, "gb")
            self.assertEqual(name, new.name)
            self.assertEqual(seq_len, len(new))

    def test_genbank_date_default(self):
        """Check if default date is handled correctly."""
        sequence_object = Seq("ATGC")
        # check if default value is inserted correctly
        record = SeqRecord(
            sequence_object,
            id="123456789",
            name="UnitTest",
            description="Test case for date parsing",
            annotations={"molecule_type": "DNA"},
        )
        handle = StringIO()
        SeqIO.write(record, handle, "genbank")
        handle.seek(0)
        gb = SeqIO.read(handle, "gb")
        self.assertEqual(gb.annotations["date"], "01-JAN-1980")

    def test_genbank_date_correct(self):
        """Check if user provided date is inserted correctly."""
        sequence_object = Seq("ATGC")
        record = SeqRecord(
            sequence_object,
            id="123456789",
            name="UnitTest",
            description="Test case for date parsing",
            annotations={"molecule_type": "DNA"},
        )
        record.annotations["date"] = "24-DEC-2015"
        handle = StringIO()
        SeqIO.write(record, handle, "genbank")
        handle.seek(0)
        gb = SeqIO.read(handle, "gb")
        self.assertEqual(gb.annotations["date"], "24-DEC-2015")

    def test_genbank_date_list(self):
        """Check if date lists are handled correctly."""
        sequence_object = Seq("ATGC")
        record = SeqRecord(
            sequence_object,
            id="123456789",
            name="UnitTest",
            description="Test case for date parsing",
            annotations={"molecule_type": "DNA"},
        )
        record.annotations["date"] = ["24-DEC-2015"]
        handle = StringIO()
        SeqIO.write(record, handle, "genbank")
        handle.seek(0)
        gb = SeqIO.read(handle, "gb")
        self.assertEqual(gb.annotations["date"], "24-DEC-2015")

        record = SeqRecord(
            sequence_object,
            id="123456789",
            name="UnitTest",
            description="Test case for date parsing",
            annotations={"molecule_type": "DNA"},
        )
        record.annotations["date"] = ["24-DEC-2015", "25-JAN-2016"]
        handle = StringIO()
        SeqIO.write(record, handle, "genbank")
        handle.seek(0)
        gb = SeqIO.read(handle, "gb")
        self.assertEqual(gb.annotations["date"], "01-JAN-1980")

    def test_genbank_date_datetime(self):
        """Check if datetime objects are handled correctly."""
        sequence_object = Seq("ATGC")
        record = SeqRecord(
            sequence_object,
            id="123456789",
            name="UnitTest",
            description="Test case for date parsing",
            annotations={"molecule_type": "DNA"},
        )
        record.annotations["date"] = datetime(2000, 2, 2)
        handle = StringIO()
        SeqIO.write(record, handle, "genbank")
        handle.seek(0)
        gb = SeqIO.read(handle, "gb")
        self.assertEqual(gb.annotations["date"], "02-FEB-2000")

    def test_genbank_date_invalid(self):
        """Check if invalid dates are treated as default."""
        invalid_dates = ("invalid date", "29-2-1981", "35-1-2018", "1-1-80", "1-9-99")

        sequence_object = Seq("ATGC")
        for invalid_date in invalid_dates:
            record = SeqRecord(
                sequence_object,
                id="123456789",
                name="UnitTest",
                description="Test case for date parsing",
                annotations={"molecule_type": "DNA"},
            )

            record.annotations["date"] = invalid_date
            handle = StringIO()
            SeqIO.write(record, handle, "genbank")
            handle.seek(0)
            gb = SeqIO.read(handle, "gb")
            self.assertEqual(gb.annotations["date"], "01-JAN-1980")

    def test_longer_locus_line(self):
        """Check that we can read and write files with longer locus lines."""
        # Create example file from existing file
        path = "GenBank/DS830848.gb"
        with open(path) as inhandle:
            data = inhandle.readlines()
        data[0] = (
            "LOCUS       AZZZAA021234567891234 2147483647 bp    DNA     linear   PRI"
            " 15-OCT-2018\n"
        )

        # Create memory file from modified genbank file
        in_tmp = StringIO()
        in_tmp.writelines(data)
        in_tmp.seek(0)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            in_tmp.seek(0)
            record = SeqIO.read(in_tmp, "genbank")

            # Create temporary output memory file
            out_tmp = StringIO()
            SeqIO.write(record, out_tmp, "genbank")

            # Check that the written file can be read back in
            out_tmp.seek(0)
            record_in = SeqIO.read(out_tmp, "genbank")
            self.assertEqual(record_in.id, "DS830848.1")
            self.assertEqual(record_in.name, "AZZZAA021234567891234")
            self.assertEqual(len(record_in.seq), 2147483647)

    if sys.maxsize > 2147483647:

        def test_extremely_long_sequence(self):
            """Tests if extremely long sequences can be read.

            This is only run if sys.maxsize > 2147483647.
            """
            # Create example file from existing file
            path = "GenBank/DS830848.gb"
            with open(path) as inhandle:
                data = inhandle.readlines()
            data[0] = (
                "LOCUS       AZZZAA02123456789 10000000000 bp    DNA     linear   PRI"
                " 15-OCT-2018\n"
            )

            # Create memory file from modified genbank file
            in_tmp = StringIO()
            in_tmp.writelines(data)
            in_tmp.seek(0)

            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                in_tmp.seek(0)
                record = SeqIO.read(in_tmp, "genbank")

                # Create temporary output memory file
                out_tmp = StringIO()
                SeqIO.write(record, out_tmp, "genbank")

                # Check that the written file can be read back in
                out_tmp.seek(0)
                record_in = SeqIO.read(out_tmp, "genbank")
                self.assertEqual(record_in.id, "DS830848.1")
                self.assertEqual(record_in.name, "AZZZAA02123456789")
                self.assertEqual(len(record_in.seq), 10000000000)

            def read_longer_than_maxsize():
                path = "GenBank/DS830848.gb"
                with open(path) as inhandle:
                    data2 = inhandle.readlines()
                data2[0] = (
                    "LOCUS       AZZZAA02123456789 "
                    + str(sys.maxsize + 1)
                    + " bp    DNA     linear   PRI 15-OCT-2018\n"
                )

                long_in_tmp = StringIO()
                long_in_tmp.writelines(data2)
                long_in_tmp.seek(0)
                record = SeqIO.read(long_in_tmp, "genbank")

            self.assertRaises(ValueError, read_longer_than_maxsize)


class LineOneTests(unittest.TestCase):
    """Check GenBank/EMBL topology / molecule_type parsing."""

    def test_topology_genbank(self):
        """Check GenBank LOCUS line parsing."""
        # This is a bit low level,
        # but can test pasing the LOCUS line only
        tests = [
            ("LOCUS       U00096", None, None, None, None),
            # This example is actually fungal,
            # accession U49845 from Saccharomyces cerevisiae:
            (
                "LOCUS       SCU49845     5028 bp    DNA             PLN      "
                " 21-JUN-1999",
                None,
                "DNA",
                "PLN",
                None,
            ),
            (
                "LOCUS       AB070938                6497 bp    DNA     linear   BCT"
                " 11-OCT-2001",
                "linear",
                "DNA",
                "BCT",
                None,
            ),
            (
                "LOCUS       NC_005816               9609 bp    DNA     circular BCT"
                " 21-JUL-2008",
                "circular",
                "DNA",
                "BCT",
                None,
            ),
            (
                "LOCUS       SCX3_BUTOC                64 aa            linear   INV"
                " 16-OCT-2001",
                "linear",
                None,
                "INV",
                None,
            ),
            (
                "LOCUS       pEH010                  5743 bp    DNA     circular",
                "circular",
                "DNA",
                None,
                [BiopythonParserWarning],
            ),
            # This is a test of the format > 80 chars long
            (
                "LOCUS       AZZZAA02123456789 1000000000 bp    DNA     linear   PRI"
                " 15-OCT-2018",
                "linear",
                "DNA",
                "PRI",
                None,
            ),
        ]
        for (line, topo, mol_type, div, warning_list) in tests:
            with warnings.catch_warnings(record=True) as caught:
                warnings.simplefilter("always")
                scanner = GenBank.Scanner.GenBankScanner()
                consumer = GenBank._FeatureConsumer(1, GenBank.FeatureValueCleaner)
                scanner._feed_first_line(consumer, line)
                t = consumer.data.annotations.get("topology", None)
                self.assertEqual(
                    t, topo, "Wrong topology %r not %r from %r" % (t, topo, line)
                )
                mt = consumer.data.annotations.get("molecule_type", None)
                self.assertEqual(
                    mt,
                    mol_type,
                    "Wrong molecule_type %r not %r from %r" % (mt, mol_type, line),
                )
                d = consumer.data.annotations.get("data_file_division", None)
                self.assertEqual(
                    d, div, "Wrong division %r not %r from %r" % (d, div, line)
                )
                if warning_list is None:
                    self.assertEqual(len(caught), 0)
                else:
                    self.assertEqual(len(caught), len(warning_list))
                    for i, warning_class in enumerate(warning_list):
                        self.assertEqual(caught[i].category, warning_class)

    def test_topology_embl(self):
        """Check EMBL ID line parsing."""
        # This is a bit low level, but can test pasing the ID line only
        tests = [
            # Modern examples with sequence version
            (
                "ID   X56734; SV 1; linear; mRNA; STD; PLN; 1859 BP.",
                "linear",
                "mRNA",
                "PLN",
            ),
            (
                "ID   CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP.",
                "linear",
                "genomic DNA",
                "MAM",
            ),
            # Example to match GenBank example used above:
            (
                "ID   U49845; SV 1; linear; genomic DNA; STD; FUN; 5028 BP.",
                "linear",
                "genomic DNA",
                "FUN",
            ),
            # Old examples:
            (
                "ID   BSUB9999   standard; circular DNA; PRO; 4214630 BP.",
                "circular",
                "DNA",
                "PRO",
            ),
            ("ID   SC10H5 standard; DNA; PRO; 4870 BP.", None, "DNA", "PRO"),
            # Patent example from 2016-06-10
            # ftp://ftp.ebi.ac.uk/pub/databases/embl/patent/
            (
                "ID   A01679; SV 1; linear; unassigned DNA; PAT; MUS; 12 BP.",
                "linear",
                "unassigned DNA",
                "MUS",
            ),
            # Old patent examples
            ("ID   NRP_AX000635; PRT; NR1; 15 SQ", None, None, "NR1"),
            ("ID   NRP0000016E; PRT; NR2; 5 SQ", None, None, "NR2"),
            # KIPO patent examples
            ("ID   DI500001       STANDARD;      PRT;   111 AA.", None, None, None),
            ("ID   DI644510   standard; PRT;  1852 AA.", None, None, None),
        ]
        for (line, topo, mol_type, div) in tests:
            scanner = GenBank.Scanner.EmblScanner()
            consumer = GenBank._FeatureConsumer(1, GenBank.FeatureValueCleaner)
            scanner._feed_first_line(consumer, line)
            t = consumer.data.annotations.get("topology", None)
            self.assertEqual(
                t, topo, "Wrong topology %r not %r from %r" % (t, topo, line)
            )
            mt = consumer.data.annotations.get("molecule_type", None)
            self.assertEqual(
                mt,
                mol_type,
                "Wrong molecule_type %r not %r from %r" % (mt, mol_type, line),
            )
            d = consumer.data.annotations.get("data_file_division", None)
            self.assertEqual(
                d, div, "Wrong division %r not %r from %r" % (d, div, line)
            )

    def test_first_line_imgt(self):
        """Check IMGT ID line parsing."""
        # This is a bit low level, but can test pasing the ID line only
        tests = [
            ("ID   HLA00001   standard; DNA; HUM; 3503 BP.", None, "DNA", "HUM"),
            ("ID   HLA00001; SV 1; standard; DNA; HUM; 3503 BP.", None, "DNA", "HUM"),
        ]
        for (line, topo, mol_type, div) in tests:
            scanner = GenBank.Scanner._ImgtScanner()
            consumer = GenBank._FeatureConsumer(1, GenBank.FeatureValueCleaner)
            scanner._feed_first_line(consumer, line)
            t = consumer.data.annotations.get("topology", None)
            self.assertEqual(
                t, topo, "Wrong topology %r not %r from %r" % (t, topo, line)
            )
            mt = consumer.data.annotations.get("molecule_type", None)
            self.assertEqual(
                mt,
                mol_type,
                "Wrong molecule_type %r not %r from %r" % (mt, mol_type, line),
            )
            d = consumer.data.annotations.get("data_file_division", None)
            self.assertEqual(
                d, div, "Wrong division %r not %r from %r" % (d, div, line)
            )


class OutputTests(unittest.TestCase):
    """GenBank output tests."""

    def test_mad_dots(self):
        """Writing and reading back accesssion.version variants."""
        for identifier in ["example", "example.1a", "example.1.2", "example.1-2"]:
            old = SeqRecord(
                Seq("ACGT"),
                id=identifier,
                name=identifier,
                description="mad dots",
                annotations={"molecule_type": "DNA"},
            )
            new = SeqIO.read(StringIO(old.format("gb")), "gb")
            self.assertEqual(old.id, new.id)
            self.assertEqual(old.name, new.name)
            self.assertEqual(old.description, new.description)
            self.assertEqual(old.seq, new.seq)

    def test_seqrecord_default_description(self):
        """Read in file using SeqRecord default description."""
        old = SeqRecord(
            Seq("ACGT"),
            id="example",
            name="short",
            annotations={"molecule_type": "DNA"},
        )
        self.assertEqual(old.description, "<unknown description>")
        txt = old.format("gb")
        self.assertIn("DEFINITION  .\n", txt)
        new = SeqIO.read(StringIO(txt), "gb")
        self.assertEqual(old.id, new.id)
        self.assertEqual(old.name, new.name)
        self.assertEqual("", new.description)
        self.assertEqual(old.seq, new.seq)

    # Evil hack with 000 to manipulate sort order to ensure this is
    # tested first (otherwise something silences the warning)
    def test_000_write_invalid_but_parsed_locus_line(self):
        """Make sure we survive writing slightly invalid LOCUS lines we could parse."""
        # grab a valid file
        path = "GenBank/NC_005816.gb"
        with open(path) as handle:
            lines = handle.readlines()

        # futz with the molecule type to make it lower case
        invalid_line = (
            "LOCUS       NC_005816               9609 bp    dna     circular BCT"
            " 21-JUL-2008\n"
        )
        lines[0] = invalid_line
        fake_handle = StringIO("".join(lines))

        # Make sure parsing this actually raises a warning
        with warnings.catch_warnings(record=True) as caught:
            warnings.simplefilter("always")
            rec = SeqIO.read(fake_handle, "genbank")
            self.assertEqual(len(caught), 1)
            self.assertEqual(caught[0].category, BiopythonParserWarning)
            self.assertEqual(
                str(caught[0].message),
                "Non-upper case molecule type in LOCUS line: dna",
            )

        out_handle = StringIO()

        ret = SeqIO.write([rec], out_handle, "genbank")
        self.assertEqual(ret, 1)

        out_handle.seek(0)
        out_lines = out_handle.readlines()
        self.assertEqual(out_lines[0], invalid_line)

    def test_write_tsa_data_division(self):
        """Make sure we don't kill the TSA data_file_division for TSA files."""
        with open("GenBank/tsa_acropora.gb") as infile:
            rec = SeqIO.read(infile, "genbank")
            infile.seek(0)
            first_line = infile.readline()

        outfile = StringIO()
        SeqIO.write([rec], outfile, "genbank")
        outfile.seek(0)
        first_line_written = outfile.readline()

        # ideally, we'd be able to compare these directly, but we also
        # break the "units" field at the moment, so use split instead
        original_division = first_line.split()[-2]
        written_division = first_line_written.split()[-2]

        self.assertEqual(original_division, written_division)


class GenBankScannerTests(unittest.TestCase):
    """GenBank Scanner tests, test parsing gbk and embl files."""

    gb_s = GenBank.Scanner.GenBankScanner()

    def gb_to_l_cds_f(self, filename, tags2id=None):
        """Gb file to Seq list parse CDS features."""
        with open(filename) as handle:
            if tags2id:
                l_cds_f = list(self.gb_s.parse_cds_features(handle, tags2id=tags2id))
            else:
                l_cds_f = list(self.gb_s.parse_cds_features(handle))
        return l_cds_f

    def gb_to_l_r(self, filename, do_features=False):
        """Gb file to Seq list parse records."""
        with open(filename) as handle:
            l_gb_r = list(self.gb_s.parse_records(handle, do_features=do_features))
        return l_gb_r

    def test_genbank_cds_interaction(self):
        """Test CDS interaction, parse CDS features on gb(k) files."""
        # Test parse CDS features on NC_000932.gb
        l_cds_f = self.gb_to_l_cds_f("GenBank/NC_000932.gb")
        # number of records, should be 85
        self.assertEqual(len(l_cds_f), 85)
        # Seq ID
        self.assertEqual(l_cds_f[0].id, "NP_051037.1")
        self.assertEqual(l_cds_f[84].id, "NP_051123.1")

        # Test parse CDS features on NC_005816.gb, Tag to ID
        l_cds_f = self.gb_to_l_cds_f(
            "GenBank/NC_005816.gb", tags2id=("gene", "locus_tag", "product")
        )
        # number of records, should be 10
        self.assertEqual(len(l_cds_f), 10)
        # Seq ID
        self.assertEqual(l_cds_f[0].id, "<unknown id>")
        self.assertEqual(l_cds_f[0].name, "YP_pPCP01")

        # Test parse CDS features on
        # NC_000932.gb and NC_005816.gb combined
        l_cds_f1 = self.gb_to_l_cds_f(
            "GenBank/NC_000932.gb", tags2id=("gene", "locus_tag", "product")
        )
        l_cds_f2 = self.gb_to_l_cds_f(
            "GenBank/NC_005816.gb", tags2id=("gene", "locus_tag", "product")
        )
        l_cds_combined = l_cds_f1 + l_cds_f2
        # number of records combined, should be 95
        self.assertEqual(len(l_cds_combined), 95)
        # Seq ID
        self.assertEqual(l_cds_combined[0].id, "rps12")
        self.assertEqual(l_cds_combined[0].description, "ribosomal protein S12")
        self.assertEqual(l_cds_combined[94].id, "<unknown id>")
        self.assertEqual(l_cds_combined[94].description, "hypothetical protein")

    def test_genbank_interaction(self):
        """Test GenBank records interaction on gbk files."""
        # Test parse records, on NC_005816, do_features False
        l_r = self.gb_to_l_r("GenBank/NC_005816.gb", do_features=False)
        # number of records, should be 1
        self.assertEqual(len(l_r), 1)
        self.assertEqual(l_r[0].id, "NC_005816.1")
        self.assertEqual(l_r[0].name, "NC_005816")
        self.assertEqual(
            l_r[0].description,
            "Yersinia pestis biovar "
            "Microtus str. 91001 plasmid "
            "pPCP1, complete sequence",
        )
        self.assertEqual(len(l_r[0].features), 0)

        # Test parse records on NC_005816, do_features True
        l_r = self.gb_to_l_r("GenBank/NC_005816.gb", do_features=True)
        # number of records, should be 1
        self.assertEqual(len(l_r), 1)
        self.assertEqual(l_r[0].id, "NC_005816.1")
        self.assertEqual(l_r[0].name, "NC_005816")
        self.assertEqual(
            l_r[0].description,
            "Yersinia pestis biovar "
            "Microtus str. 91001 plasmid "
            "pPCP1, complete sequence",
        )
        self.assertEqual(len(l_r[0].features), 41)

        # Test parse records on "GenBank/NC_000932.gb",
        # do_features False
        l_r = self.gb_to_l_r("GenBank/NC_000932.gb", do_features=False)
        # number of records, should be 1
        self.assertEqual(len(l_r), 1)
        self.assertEqual(l_r[0].id, "NC_000932.1")
        self.assertEqual(l_r[0].name, "NC_000932")
        self.assertEqual(
            l_r[0].description, "Arabidopsis thaliana chloroplast, complete genome"
        )
        self.assertEqual(len(l_r[0].features), 0)

        # Test parse records on NC_000932, do_features True
        l_r = self.gb_to_l_r("GenBank/NC_000932.gb", do_features=True)
        # number of records, should be 1
        self.assertEqual(len(l_r), 1)
        self.assertEqual(l_r[0].id, "NC_000932.1")
        self.assertEqual(l_r[0].name, "NC_000932")
        self.assertEqual(
            l_r[0].description, "Arabidopsis thaliana chloroplast, complete genome"
        )
        self.assertEqual(len(l_r[0].features), 259)

    def test_embl_cds_interaction(self):
        """Test EMBL CDS interaction, parse CDS features on embl files."""
        embl_s = GenBank.Scanner.EmblScanner()

        # Test parse CDS features on embl_file
        with open("EMBL/AE017046.embl") as handle_embl7046:
            l_cds_f = list(embl_s.parse_cds_features(handle_embl7046))
        # number of records, should be 10
        self.assertEqual(len(l_cds_f), 10)
        # Seq ID
        self.assertEqual(l_cds_f[0].id, "AAS58758.1")
        self.assertEqual(l_cds_f[0].description, "putative transposase")

    def test_embl_record_interaction(self):
        """Test EMBL Record interaction on embl files."""
        embl_s = GenBank.Scanner.EmblScanner()

        #  Test parse records on embl_file
        with open("EMBL/AE017046.embl") as handle_embl7046:
            l_embl_r = list(embl_s.parse_records(handle_embl7046, do_features=True))
        # number of records, should be 1
        self.assertEqual(len(l_embl_r), 1)
        self.assertEqual(l_embl_r[0].id, "AE017046.1")
        self.assertEqual(
            l_embl_r[0].description,
            "Yersinia pestis biovar Microtus "
            "str. 91001 plasmid pPCP1, complete "
            "sequence.",
        )
        self.assertEqual(len(l_embl_r[0].features), 29)


if __name__ == "__main__":
    runner = unittest.TextTestRunner(verbosity=2)
    unittest.main(testRunner=runner)
