File: test_sequence_generators.py

package info (click to toggle)
python-cogent 1.9-9
links: PTS, VCS
area: main
in suites: stretch
size: 19,480 kB
ctags: 27,660
sloc: python: 137,483; makefile: 148; sh: 10
file content (1125 lines) | stat: -rw-r--r-- 49,087 bytes
parent folder | download | duplicates (2)
#!/usr/bin/env python
"""test_sequence_generator.py: tests of the sequence_generator module.
"""
from cogent.seqsim.sequence_generators import permutations, combinations, \
    SequenceGenerator, Partition, Composition, \
    MageFrequencies, SequenceHandle, IUPAC_DNA, IUPAC_RNA, BaseFrequency, \
    PairFrequency, BasePairFrequency, RegionModel, ConstantRegion, \
    UnpairedRegion, ShuffledRegion, PairedRegion, MatchingRegion, \
    SequenceModel, Rule, Motif, Module, SequenceEmbedder
from StringIO import StringIO
from operator import mul
from sys import path
from cogent.maths.stats.util import Freqs
from cogent.util.misc import app_path
from cogent.struct.rna2d import ViennaStructure
from cogent.util.unit_test import TestCase, main

__author__ = "Rob Knight"
__copyright__ = "Copyright 2007-2016, The Cogent Project"
__credits__ = ["Rob Knight", "Daniel McDonald"]
__license__ = "GPL"
__version__ = "1.9"
__maintainer__ = "Rob Knight"
__email__ = "rob@spot.colorado.edu"
__status__ = "Development"

#need to skip some tests if RNAfold absent
if app_path('RNAfold'):
    RNAFOLD_PRESENT = True
else:
    RNAFOLD_PRESENT = False

class FunctionTests(TestCase):
    """Tests of standalone functions"""
    def setUp(self):
        self.standards = (0, 1, 5, 30, 173, 1000, 4382)
        
    def test_permuations_negative_k(self):
        """permutations should raise IndexError if k negative"""
        self.assertRaises(IndexError, permutations, 3, -1)
    
    def test_permutations_k_more_than_n(self):
        """permutations should raise IndexError if k > n"""
        self.assertRaises(IndexError, permutations, 3, 4)
    
    def test_permutations_negative_n(self):
        """permutations should raise IndexError if n negative"""
        self.assertRaises(IndexError, permutations, -3, -2)

    def test_permutations_k_equals_1(self):
        """permutations should return n if k=1"""
        for n in self.standards[1:]:
            self.assertEqual(permutations(n,1), n)

    def test_permutations_k_equals_2(self):
        """permutations should return n*(n-1) if k=2"""
        for n in self.standards[2:]:
            self.assertEqual(permutations(n,2), n*(n-1))

    def test_permutations_k_equals_n(self):
        """permutations should return n! if k=n"""
        for n in self.standards[1:]:
            self.assertEqual(permutations(n,n), reduce(mul, range(1,n+1)))
            
    def test_combinations_k_equals_n(self):
        """combinations should return 1 if k = n"""
        for n in self.standards:
            self.assertEqual(combinations(n,n), 1)
    
    def test_combinations_k_equals_n_minus_1(self):
        """combinations should return n if k=(n-1)"""
        for n in self.standards[1:]:
            self.assertEqual(combinations(n, n-1), n)

    def test_combinations_zero_k(self):
        """combinations should return 1 if k is zero"""
        for n in self.standards:
            self.assertEqual(combinations(n, 0), 1)

    def test_combinations_symmetry(self):
        """combinations(n,k) should equal combinations(n,n-k)"""
        for n in self.standards[3:]:
            for k in (0, 1, 5, 18):
                self.assertEquals(combinations(n, k), combinations(n, n-k))

    def test_combinations_arbitrary_values(self):
        """combinations(n,k) should equal results from spreadsheet"""
        results = {
            30:{0:1, 1:30, 5:142506, 18:86493225, 29:30, 30:1},
            173:{0:1, 1:173, 5:1218218079, 18:1.204353e24, 29:7.524850e32, \
                 30:3.611928e33},
            1000:{0:1, 1:1000, 5:8.2502913e12, 18:1.339124e38,29:7.506513e55, \
                  30:2.429608e57},
            4382:{0:1, 1:4382, 5:1.343350e16, 18:5.352761e49, 29:4.184411e74, \
                  30:6.0715804e76},
                  }
        for n in self.standards[3:]:
            for k in (0, 1, 5, 18, 29, 30):
                self.assertFloatEqualRel(combinations(n,k), results[n][k], 1e-5)
                
class SequenceGeneratorTests(TestCase):
    """Tests of SequenceGenerator, which fills in degenerate bases"""
    def setUp(self):
        """Defines a few standard generators"""
        self.rna_codons = SequenceGenerator('NNN')
        self.dna_iupac_small = SequenceGenerator('RH', IUPAC_DNA)
        self.empty = SequenceGenerator('')
        self.huge = SequenceGenerator('N'*50)
        self.binary = SequenceGenerator('01??01', {'0':'0','1':'1','?':'10'})

    def test_len(self):
        """len(SequenceGenerator) should return number of possible matches"""
        lengths = ((self.rna_codons, 64), (self.dna_iupac_small, 6),
                   (self.empty, 0), (self.binary, 4))
        for item, expected in lengths:
            self.assertEqual(len(item), expected)
        try:
            len(self.huge)
        except OverflowError:
            pass
        else:
            raise AssertionError, "Failed to raise expected OverflowError"

    def test_numPossibilities(self):
        """SequenceGenerator.numPossibilities() should be robust to overflow"""
        lengths = ((self.rna_codons, 64), (self.dna_iupac_small, 6),
                   (self.empty, 0), (self.binary, 4), (self.huge, 4**50))
        for item, expected in lengths:
            self.assertEqual(item.numPossibilities(), expected)

    def test_sequences(self):
        """SequenceGenerator should produce the correct list of sequences"""
        self.assertEqual(list(self.empty), [])
        self.assertEqual(list(self.dna_iupac_small), \
            ['AT','AC','AA','GT','GC','GA'])
        codons = []
        for first in 'UCAG':
            for second in 'UCAG':
                for third in 'UCAG':
                    codons.append(''.join([first, second, third]))
        self.assertEqual(list(self.rna_codons), codons)
        #test that it still works if we call the generator a second time
        self.assertEqual(list(self.rna_codons), codons)
        
    def test_iter(self):
        """SequenceGenerator should act like a list with for..in syntax"""
        as_list = list(self.rna_codons)
        for obs, exp in zip(self.rna_codons, as_list):
            self.assertEqual(obs, exp)

    def test_getitem(self):
        """SequenceGenerator should allow __getitem__ like a list"""
        as_list = list(self.rna_codons)
        for i in range(64):
            self.assertEqual(self.rna_codons[i], as_list[i])

        for i in range(1,65):
            self.assertEqual(self.rna_codons[-i], as_list[-i])

        self.assertEqual(self.huge[-1], 'G'*50)

    def test_getitem_slices(self):
        """SequenceGenerator slicing should work the same as a list"""
        e = list(self.rna_codons)
        o = self.rna_codons
        values = ( 
                    (o[:], e[:]),
                    (o[0:], e[0:]),
                    (o[1:], e[1:]),
                    (o[5:], e[5:]),
                    (o[0:5], e[0:5]),
                    (o[1:5], e[1:5]),
                    (o[5:5], e[5:5]),
                    (o[0:1], e[0:1]),
                    (o[len(o)-1:len(o)], e[len(e)-1:len(e)]),
                    (o[len(o):len(o)], e[len(e):len(e)]),
                )
        testnum = 0
        for obs, exp in values:
            testnum += 1
            self.assertEqual(list(obs), exp)

        big = list(self.huge[1:5])
        self.assertEqual(['U'*49+'C', 'U'*49+'A', 'U'*49+'G', 'U'*48+'CU'], big)

class PartitionTests(TestCase):
    """Tests of the Paritition object."""
    def test_single_partition(self):
        """If number of objects = bins * min, only one way to partition"""
        for num_bins in range(1, 10):
            for occupancy in range(10):
                self.assertEqual(len(Partition(num_bins*occupancy, 
                num_bins, occupancy)), 1)

    def test_partitions(self):
        """Test several properties of partitions, especially start/end"""
        for num_bins in range(1, 5):
            for occupancy in range(5):
                for num_items in \
                range(num_bins*occupancy, num_bins*occupancy + 10):
                    p = Partition(num_items, num_bins, occupancy)
                    l = [i for i in p]
                    l2 = [i for i in p]
                    #check that calling it twice doesn't break it
                    self.assertEqual(l, l2)
                    #check the lengths
                    self.assertEqual(len(p), len(l))
                    #check the ranges are the same...
                    self.assertEqual(l[0][1:], l[-1][0:-1])
                    #and that they contain the right values.
                    self.assertEqual(l[0][1:], [occupancy]*(num_bins - 1))
                    #check the first and last elements
                    self.assertEqual(l[0][0], l[-1][-1])
                    self.assertEqual(l[0][0], \
                    num_items - occupancy * (num_bins - 1))
                    
    def test_values(self):
        """Partition should match precalculated values"""
        self.assertEqual(len(Partition(20, 4, 1)), 969)

    def test_str(self):
        """str(partition) should work as expected"""
        p = Partition(20,4,1)
        self.assertEqual(str(p), "Items: 20 Pieces: 4 Min Per Piece: 1")

        p.NumItems = 13
        p.NumPieces = 2
        p.MinOccupancy = 0
        self.assertEqual(len(p), len(Partition(13, 2, 0)))
        self.assertEqual(str(p), "Items: 13 Pieces: 2 Min Per Piece: 0")

class CompositionTests(TestCase):
    """Tests of the Composition class."""
    def setUp(self):
        """Define a few standard compositions."""
        self.bases_10pct = Composition(10, 0, "ACGU")
        self.bases_5pct = Composition(5, 1, "ACGU")
        self.bases_extra = Composition(10, 0, "CYGEJ")
        self.small = Composition(20, 0, "xy")
        self.unique = Composition(20, 1, "z")

    def test_lengths(self):
        """Composition should return correct number of elements"""
        self.assertEqual(len(self.bases_10pct), len(Partition(10,4,0)))
        self.assertEqual(len(self.bases_5pct), len(Partition(20,4,1)))
        self.assertEqual(len(self.bases_extra), len(Partition(10,5,0)))
        self.assertEqual(len(self.small), len(Partition(5, 2, 0)))
        self.assertEqual(len(self.unique), len(Partition(5, 1, 1)))

    def test_known_vals(self):
        """Composition should return precalculated elements for known cases"""
        self.assertEqual(len(Composition(5,1,"ACGU")), 969) 
        self.assertEqual(len(Composition(5,0,"ACGU")), 1771)
        as_list = list(Composition(5,1,"ACGU"))
        self.assertEqual(as_list[0], Freqs('A'*17+'CGU'))
        self.assertEqual(as_list[-1], Freqs('U'*17+'ACG'))

    def test_updating(self):
        """Composition updates should reset frequencies correctly."""
        exp_list = list(Composition(5, 1, "GCAUN"))
        self.bases_10pct.Spacing = 5
        self.bases_10pct.Alphabet = "GCAUN"
        self.bases_10pct.MinOccupancy = 1
        self.assertEqual(list(self.bases_10pct), exp_list)

class MageFrequenciesTests(TestCase):
    """Tests of the MageFrequencies class -- presentation for Composition."""
    def setUp(self):
        """Define a few standard compositions."""
        self.bases_10pct = Composition(10, 0, "ACGU")
    def test_str(self):
        """MageFrequencies string conversions work correctly"""
        obs_list = list(self.bases_10pct)
        self.assertEqual(str(MageFrequencies(obs_list[0])), '1.0 0.0 0.0')
        self.assertEqual(str(MageFrequencies(obs_list[-1], "last")), \
            '{last} 0.0 0.0 0.0')
        self.assertEqual(str(MageFrequencies({'C':2, 'A':3, 'T':5, 'x':17}, \
        'bases')), '{bases} 0.3 0.2 0.0')

class SequenceHandleTests(TestCase):
    """Tests of the SequenceHandle class."""
    def setUp(self):
        """Define some standard SequenceHandles."""
        self.rna = SequenceHandle('uuca', 'ucag')
        self.any = SequenceHandle(['u', 1, None])
        self.empty = SequenceHandle()
        
    def test_init_good(self):
        """SequenceHandle should init OK without alphabet"""
        self.assertEqual(SequenceHandle('abc123'), list('abc123'))
        self.assertEqual(SequenceHandle(), list())
        self.assertEqual(SequenceHandle('abcaaa', 'abcd'), list('abcaaa'))
        self.assertEqual(SequenceHandle([1,2,3]), [1,2,3])

    def test_init_bad(self):
        """SequenceHandle should raise ValueError if item not in alphabet"""
        self.assertRaises(ValueError, SequenceHandle, 'abc1', 'abc')
        self.assertRaises(ValueError, SequenceHandle, '1', [1])

    def test_setitem_good(self):
        """SequenceHandle setitem should allow items in alphabet"""
        self.rna[0] = 'c'
        self.assertEqual(self.rna, list('cuca'))
        self.rna[-1] = 'u'
        self.assertEqual(self.rna, list('cucu'))
        self.any[1] = [1, 2, 3]
        self.assertEqual(self.any, ['u', [1, 2, 3], None])

    def test_setitem_bad(self):
        """SequenceHandle setitem should reject items not in alphabet"""
        self.assertRaises(ValueError, self.rna.__setitem__, 0, 'x')

    def test_setslice_good(self):
        """SequenceHandle setslice should allow same-length slice"""
        self.rna[:] = list('aaaa')
        self.assertEqual(self.rna, list('aaaa'))
        self.rna[0:1] = ['u']
        self.assertEqual(self.rna, list('uaaa'))
        self.rna[-2:] = ['g','g']
        self.assertEqual(self.rna, list('uagg'))

    def test_setslice_bad(self):
        """SequenceHandle setslice should reject bad items or length change"""
        self.assertRaises(ValueError, self.rna.__setslice__, 0, len(self.rna), \
            ['a']*5)
        self.assertRaises(ValueError, self.any.__setslice__, 0, len(self.any), \
            ['a']*5)
        self.assertRaises(ValueError, self.rna.__setslice__, 0, 1, ['x'])

    def test_string(self):
        """SequenceHandle str should join items without spaces"""
        #use ''.join if items are strings
        self.assertEqual(str(self.rna), 'uuca')
        self.assertEqual(str(self.empty), '')
        #if some of the items raise errors, use built-in method instead
        self.assertEqual(str(self.any), str(['u', 1, None]))

    def test_naughty_methods(self):
        """SequenceHandle list mutators should raise NotImplementedError"""
        r = self.rna
        naughty = [r.__delitem__, r.__delslice__, r.__iadd__, r.__imul__, \
                   r.append, r.extend, r.insert, r.pop, r.remove]
        for n in naughty:
            self.assertRaises(NotImplementedError, n)

class BaseFrequencyTests(TestCase):
    """Tests of BaseFrequency class: wrapper for FrequencyDistibution."""
    def test_init(self):
        """BaseFrequency should init as expected"""
        self.assertEqual(BaseFrequency('UUUCCCCAG'), \
                         Freqs('UUUCCCCAG', 'UCAG'))
        self.assertEqual(BaseFrequency('TTTCAGG', RNA=False), \
                         Freqs('TTTCAGG'))
    def test_init_bad(self):
        """BaseFrequency init should disallow bad characters"""
        self.assertRaises(Exception, BaseFrequency, 'TTTCAGG')
        self.assertRaises(Exception, BaseFrequency, 'UACGUA', False)

class PairFrequencyTests(TestCase):
    """Tests of PairFrequency class: wrapper for Freqs."""
    def test_init_one_parameter(self):
        """PairFrequency should interpret single parameter as pair probs"""
        obs = PairFrequency('UCCC')
        exp = Freqs({('U','U'):0.0625, ('U','C'):0.1875, 
                      ('C','U'):0.1875, ('C','C'):0.5625})
        
        for k, v in exp.items():
            self.assertEqual(v, obs[k])
        for k, v in obs.items():
            if k not in exp:
                self.assertEqual(v, 0)

        self.assertEqual(PairFrequency('UCCC', [('U','U'),('C','C')]), \
            Freqs({('U','U'):0.1, ('C','C'):0.9}))
        #check that the alphabets are right: should not raise error on
        #incrementing characters already there, but should raise KeyError
        #on anything that's missing.
        p = PairFrequency('UCCC')
        p[('U','U')] += 1
        try:
            p[('X','U')] += 1
        except KeyError:
            pass
        else:
            raise AssertionError, "Expected KeyError."
        p = PairFrequency('UCCC', (('C','C'),))
        p[('C','C')] += 1
        try:
            p[('U','U')] += 1
        except KeyError:
            pass
        else:
            raise AssertionError, "Expected KeyError."

class BasePairFrequencyTests(TestCase):
    """Tests of the BaseFrequency class, constructed for easy initialization."""
    def test_init(self):
        """BaseFrequency init should provide correct PairFrequency"""
        WatsonCrick = [('A','U'), ('U','A'),('G','C'),('C','G')]
        Wobble = WatsonCrick + [('G','U'), ('U','G')]
        #by default, basepair should have the wobble alphabet
        bpf = BasePairFrequency('UUACG')
        pf = PairFrequency('UUACG', Wobble)
        self.assertEqual(bpf, pf)
        self.assertEqual(bpf.Constraint, pf.Constraint)
        #can turn GU off, leading to watson-crickery
        bpf = BasePairFrequency('UUACG', False)
        #make sure this gives different results...
        self.assertNotEqual(bpf, pf)
        self.assertNotEqual(bpf.Constraint, pf.Constraint)
        #...but that the results are the same when the correct alphabet is used
        pf = PairFrequency('UUACG', WatsonCrick)
        self.assertEqual(bpf, pf)
        self.assertEqual(bpf.Constraint, pf.Constraint)

class RegionModelTests(TestCase):
    """Tests of the RegionModel class. Base class just returns the template."""
    def test_init(self):
        """RegionModel base class should always return current template."""
        #test blank region model
        r = RegionModel()
        self.assertEqual(str(r.Current), '')
        self.assertEqual(len(r), 0)
        #now assign it to a template
        r.Template = ('ACGUUCGA')
        self.assertEqual(str(r.Current), 'ACGUUCGA')
        self.assertEqual(len(r), len('ACGUUCGA'))
        #check that refresh doesn't break anything
        r.refresh()
        self.assertEqual(str(r.Current), 'ACGUUCGA')
        self.assertEqual(len(r), len('ACGUUCGA'))
        #check composition
        self.assertEqual(r.Composition, None)
        d = {'A':3, 'U':10}
        r.Composition = Freqs(d)
        self.assertEqual(r.Composition, d)
        #check that composition doesn't break the update
        r.refresh()
        self.assertEqual(str(r.Current), 'ACGUUCGA')
        self.assertEqual(len(r), len('ACGUUCGA'))
 
class ConstantRegionTests(TestCase):
    """Tests of the ConstantRegion class. Just returns the template."""
    def test_init(self):
        """ConstantRegion should always return current template."""
        #test blank region model
        r = ConstantRegion()
        self.assertEqual(str(r.Current), '')
        self.assertEqual(len(r), 0)
        #now assign it to a template
        r.Template = ('ACGUUCGA')
        self.assertEqual(str(r.Current), 'ACGUUCGA')
        self.assertEqual(len(r), len('ACGUUCGA'))
        #check that refresh doesn't break anything
        r.refresh()
        self.assertEqual(str(r.Current), 'ACGUUCGA')
        self.assertEqual(len(r), len('ACGUUCGA'))
        #check composition
        self.assertEqual(r.Composition, None)
        d = {'A':3, 'U':10}
        r.Composition = Freqs(d)
        self.assertEqual(r.Composition, d)
        #check that composition doesn't break the update
        r.refresh()
        self.assertEqual(str(r.Current), 'ACGUUCGA')
        self.assertEqual(len(r), len('ACGUUCGA'))

class ShuffledRegionTests(TestCase):
    """Shuffled region should randomize string"""
    def test_init(self):
        """Shuffled region should init ok with string, ignoring base freqs"""
        #general strategy: seqs should be different, but sorted seqs should
        #be the same
        empty = ''
        seq = 'UUUCCCCAAAGGG'
        #check that we don't get errors on empty template
        r = ShuffledRegion(empty)
        r.refresh()
        self.assertEqual(str(r.Current), '')
        #check that changing the template changes the sequence
        r.Template = seq
        self.assertNotEqual(str(r.Current), '')
        #check that it shuffled the sequence the first time
        self.assertNotEqual(str(r.Current), seq)
        curr = str(r.Current)
        as_list = list(curr)
        #check that we have the right number of each type of base
        as_list.sort()
        exp_as_list = list(seq)
        exp_as_list.sort()
        self.assertEqual(as_list, exp_as_list)
        #check that we get something different if we refresh again
        r.refresh()
        self.assertNotEqual(str(r.Current), curr)
        as_list = list(str(r.Current))
        as_list.sort()
        self.assertEqual(as_list, exp_as_list)

class PairedRegionTests(TestCase):
    """Tests of paired region generation."""
    def test_init(self):
        """Paired region init and mutation should give expected results"""
        WatsonCrick = {'A':'U', 'U':'A', 'C':'G', 'G':'C'}
        Wobble = {'A':'U', 'U':'AG', 'C':'G', 'G':'UC'}
        #check that empty init doesn't give errors
        r = PairedRegion()
        r.refresh()
        #check that mutation works correctly
        r.Template = "N"
        self.assertEqual(len(r), 1)
        r.monomers('UCCGGA')
        upstream = r.Current[0]
        downstream = r.Current[1]
        states = {}
        num_to_do = 10000
        for i in range(num_to_do):
            r.refresh()
            curr = (upstream[0], downstream[0])
            assert upstream[0] in Wobble[downstream[0]]
            states[curr] = states.get(curr, 0) + 1
        for i in states.keys():
            assert i[1] in Wobble[i[0]]
        for i in Wobble:
            for j in Wobble[i]:
                assert (i, j) in states.keys()
        expected_dict = {('A','U'):num_to_do/14, ('U','A'):num_to_do/14,
                        ('C','G'):num_to_do/14*4, ('G','C'):num_to_do/14*4,
                        ('U','G'):num_to_do/14*2, ('G','U'):num_to_do/14*2,}
        # the following for loop was replaced with the assertSimilarFreqs
        # call below it
        #for key, val in expected.items():
            #self.assertFloatEqualAbs(val, states[key], 130) #conservative?
        expected = [val for key, val in expected_dict.items()]
        observed = [states[key] for key, val in expected_dict.items()]
        self.assertSimilarFreqs(observed, expected)

        assert ('G','U') in states
        assert ('U','G') in states
            
        r.monomers('UCGA', GU=False)
        upstream = r.Current[0]
        downstream = r.Current[1]
        states = {}
        num_to_do = 10000
        for i in range(num_to_do):
            r.refresh()
            curr = (upstream[0], downstream[0])
            assert upstream[0] in WatsonCrick[downstream[0]]
            states[curr] = states.get(curr, 0) + 1
        for i in states.keys():
            assert i[1] in WatsonCrick[i[0]]
        for i in WatsonCrick:
            for j in WatsonCrick[i]:
                assert (i, j) in states.keys()
        expected_dict = {('A','U'):num_to_do/4, ('U','A'):num_to_do/4,
                        ('C','G'):num_to_do/4, ('G','C'):num_to_do/4,}
        expected = [val for key, val in expected_dict.items()]
        observed = [states[key] for key, val in expected_dict.items()]
        self.assertSimilarFreqs(observed, expected)
        #for key, val in expected.items():
        #    self.assertFloatEqualAbs(val, states[key], 130) #3 std devs
        assert ('G','U') not in states
        assert ('U','G') not in states

class SequenceModelTests(TestCase):
    """Tests of the SequenceModel class."""
    def test_init(self):
        """SequenceModel should init OK with Isoleucine motif."""
        helices = [PairedRegion('NNN'), PairedRegion('NNNNN')]
        constants = [ConstantRegion('CUAC'), ConstantRegion('UAUUGGGG')]
        order = "H0 C0 H1 - H1 C1 H0"
        isoleucine = SequenceModel(order=order, constants=constants, \
            helices=helices)
        isoleucine.Composition = BaseFrequency('UCAG')
        #print
        #print
        for i in range(10):
            isoleucine.refresh()
            #print list(isoleucine)

        #print
        isoleucine.Composition = BaseFrequency('UCAG')
        isoleucine.GU = False
        #print
        for i in range(10):
            isoleucine.refresh()
            #print list(isoleucine)
        #print

class RuleTests(TestCase):
    """Tests of the Rule class"""
    
    def test_init_bad_params(self):
        """Rule should fail validation except with exactly 5 parameters"""
        self.assertRaises(TypeError, Rule, 1, 1, 1, 1)
        self.assertRaises(TypeError, Rule, 1, 1, 1, 1, 1, 1)
        
    def test_init_bad_length(self):
        """Rule should fail validation if helix extends past downstream start"""
        self.assertRaises(ValueError, Rule, 0, 0, 1, 0, 2)
        self.assertRaises(ValueError, Rule, 0, 0, 10, 10, 12)

    def test_init_bad_negative_params(self):
        """Rule should fail validation if any parameters are negative"""
        self.assertRaises(ValueError, Rule, -1, 0, 1, 0, 1)
        self.assertRaises(ValueError, Rule, 0, -1, 1, 1, 1)
        self.assertRaises(ValueError, Rule, 0, 0, -1, 0, 5)
        self.assertRaises(ValueError, Rule, 0, 0, 0, -1, 1)
        self.assertRaises(ValueError, Rule, 0, 0, 1, 1, -1)

    def test_init_bad_zero_length(self):
        """Rule should fail validation if length is zero"""
        self.assertRaises(ValueError, Rule, 0, 0, 1, 1, 0)

    def test_init_overlap(self):
        """Rule should fail validation if bases must pair with themselves"""
        self.assertRaises(ValueError, Rule, 0, 0, 0, 0, 1)
        self.assertRaises(ValueError, Rule, 0, 10, 0, 15, 4)

    def test_init_wrong_order(self):
        """First sequence must have lower index"""
        self.assertRaises(ValueError, Rule, 1, 0, 0, 5, 3)

    def test_init_ok_length(self):
        """Rule should init OK if helix extends to exactly downstream start"""
        x = Rule(0, 0, 1, 0, 1)
        self.assertEqual(str(x), \
        "Up Seq: 0 Up Pos: 0 Down Seq: 1 Down Pos: 0 Length: 1")
        #check adjacent bases
        x = Rule(0, 0, 0, 1, 1)
        self.assertEqual(str(x), \
        "Up Seq: 0 Up Pos: 0 Down Seq: 0 Down Pos: 1 Length: 1")
        x = Rule(1, 10, 2, 8, 7)
        #check rule that would cause overlap if motifs weren't different 
        self.assertEqual(str(x), \
        "Up Seq: 1 Up Pos: 10 Down Seq: 2 Down Pos: 8 Length: 7")

    def test_str(self):
        """Rule str method should give expected results"""
        x = Rule(1, 10, 2, 8, 7)
        self.assertEqual(str(x), \
        "Up Seq: 1 Up Pos: 10 Down Seq: 2 Down Pos: 8 Length: 7")

class RuleTests_compatibility(TestCase):
    """Tests to see whether the Rule compatibility code works"""

    def setUp(self):
        """Sets up some standard rules"""
        self.x = Rule(1, 5, 2, 10, 3)
        self.x_ok = Rule(1, 8, 2, 14, 4)
        self.x_ok_diff_sequences = Rule(3, 5, 5, 10, 3)
        self.x_bad_first = Rule(1, 0, 3, 10, 10)
        self.x_bad_first_2 = Rule(0, 0, 1, 8, 2)
        self.x_bad_second = Rule(1, 15, 2, 15, 8)
        self.x_bad_second_2 = Rule(1, 14, 2, 8, 4)

    def test_is_compatible_ok(self):
        """Rule.isCompatible should return True if rules don't overlap"""
        self.assertEqual(self.x.isCompatible(self.x_ok), True)  #no return value
        self.assertEqual(self.x.isCompatible(self.x_ok_diff_sequences), True)
        #check that it's transitive
        self.assertEqual(self.x_ok.isCompatible(self.x), True)
        self.assertEqual(self.x_ok_diff_sequences.isCompatible(self.x), True)
        
    def test_is_compatible_bad(self):
        """Rule.isComaptible should return False if rules overlap"""
        tests = [   (self.x, self.x_bad_first),
                    (self.x, self.x_bad_first_2),
                    (self.x, self.x_bad_second),
                    (self.x, self.x_bad_second_2),
                ]
        for first, second in tests:
            self.assertEqual(first.isCompatible(second), False)
            #check that it's transitive
            self.assertEqual(second.isCompatible(first), False)

    def test_fits_in_sequence(self):
        """Rule.fitsInSequence should return True if sequence long enough"""
        sequences = map('x'.__mul__, range(21))     #0 to 20 copies of 'x'
        rules = [self.x, self.x_ok, self.x_ok_diff_sequences, self.x_bad_first,
                 self.x_bad_first_2, self.x_bad_second, self.x_bad_second_2]
        #test a bunch of values for all the rules we have handy
        for s in sequences:
            for r in rules:
                if r.UpstreamPosition + r.Length > len(s):
                    self.assertEqual(r.fitsInSequence(s), False)
                else:
                    self.assertEqual(r.fitsInSequence(s), True)
        #test a couple of specific boundary cases
        #length-1 helix
        r = Rule(0, 0, 1, 0, 1)
        self.assertEqual(r.fitsInSequence(''), False)
        self.assertEqual(r.fitsInSequence('x'), True)
        self.assertEqual(r.fitsInSequence('xx'), True)
        #length-2 helix starting one base from the start
        r = Rule(1, 1, 2, 2, 2)
        self.assertEqual(r.fitsInSequence(''), False)
        self.assertEqual(r.fitsInSequence('x'), False)
        self.assertEqual(r.fitsInSequence('xx'), False)
        self.assertEqual(r.fitsInSequence('xxx'), True)
        self.assertEqual(r.fitsInSequence('xxxx'), True)

class ModuleTests(TestCase):
    """Tests of the Module class, which holds sequences and structures."""
    def test_init_bad(self):
        """Module init should fail if seq/struct missing, or mismatched lengths"""
        #test incorrect param number
        self.assertRaises(TypeError, Module, 'abc')
        self.assertRaises(TypeError, Module, 'abc', 'def', 'ghi')
        #test incorrect lengths
        self.assertRaises(ValueError, Module, 'abc', 'abcd')
        self.assertRaises(ValueError, Module, 'abcd', 'acb')

    def test_init_good(self):
        """Module init should work if seq and struct same length"""
        m = Module('U', '.')
        self.assertEqual(m.Sequence, 'U')
        self.assertEqual(m.Structure, '.')
        m.Sequence = ''
        m.Structure = ''
        self.assertEqual(m.Sequence, '')
        self.assertEqual(m.Structure, '')
        m.Sequence = 'CCUAGG'
        m.Structure = '((..))'
        self.assertEqual(m.Sequence, 'CCUAGG')
        self.assertEqual(m.Structure, '((..))')
        m.Structure = ''
        self.assertRaises(ValueError, m.__len__)

    def test_len(self):
        """Module len should work if seq and struct same length"""
        m = Module('CUAG', '....')
        self.assertEqual(len(m), 4)
        m = Module('', '')
        self.assertEqual(len(m), 0)
        m.Sequence = 'AUCGAUCGA'
        self.assertRaises(ValueError, m.__len__)
   
    def test_str(self):
       """Module str should contain sequence and structure"""
       m = Module('CUAG', '....')
       self.assertEqual(str(m), 'Sequence:  CUAG\nStructure: ....')
       m = Module('', '')
       self.assertEqual(str(m), 'Sequence:  \nStructure: ')
   
    def test_matches(self):
        """Module matches should return correct result for seq/struct match"""
        empty = Module('', '')
        short_p = Module('AC', '((')
        short_u = Module('UU', '..')
        short_up = Module('UU', '((')
        long_all = Module('GGGACGGUUGGUUGGUU', ')))((..((....((((') #struct+seq
        long_seq = Module('GGGACGGUUGGUU', ')))))))))))))') #seq but not struct
        long_struct = Module('GGGGGGGGGGGGG', ')))((..((....') #struct, not seq
        long_none = Module('GGGGGGGGGGGGG', ')))))))))))))') #not struct or seq

        #test overall matching
        for matcher in [empty, short_p, short_u, short_up]:
            self.assertEqual(matcher.matches(long_all), True)
            for longer in [long_seq, long_struct, long_none]:
                if matcher is empty:
                    self.assertEqual(matcher.matches(longer), True)
                else:
                    self.assertEqual(matcher.matches(longer), False)
        #test specific positions
        positions = {3:short_p, 11:short_u, 7:short_up, 15:short_up}
        for module in [short_p, short_u, short_up]:
            for i in range(len(long_all)):
                result = module.matches(long_all, i)
                if positions.get(i, None) is module:
                    self.assertEqual(result, True)
                else:
                    self.assertEqual(result, False)

class MotifTests(TestCase):
    """Tests of the Motif object, which has a set of Modules and Rules."""
    def setUp(self):
        """Defines a few standard motifs"""
        self.ile_mod_0 = Module('NNNCUACNNNNN', '(((((..(((((')
        self.ile_mod_1 = Module('NNNNNUAUUGGGGNNN', ')))))......)))))')
        self.ile_rule_0 = Rule(0, 0, 1, 15, 3)
        self.ile_rule_1 = Rule(0, 7, 1, 4, 5)
        self.ile = Motif([self.ile_mod_0, self.ile_mod_1], \
            [self.ile_rule_0, self.ile_rule_1])
        self.hh_mod_0 = Module('NNNNUNNNNN', '(((((.((((')
        self.hh_mod_1 = Module('NNNNCUGANGAGNNN', ')))).......((((')
        self.hh_mod_2 = Module('NNNCGAAANNNN', '))))...)))))')
        self.hh_rule_0 = Rule(0, 0, 2, 11, 5)
        self.hh_rule_1 = Rule(0, 6, 1, 3, 4)
        self.hh_rule_2 = Rule(1, 11, 2, 3, 4)
        self.hh = Motif([self.hh_mod_0, self.hh_mod_1, self.hh_mod_2], \
                        [self.hh_rule_0, self.hh_rule_1, self.hh_rule_2])
        self.simple_0 = Module('CCCCC', '(((..')
        self.simple_1 = Module('GGGGG', '..)))')
        self.simple_r = Rule(0, 0, 1, 4, 3)
        self.simple = Motif([self.simple_0, self.simple_1], [self.simple_r])

    def test_init_bad_rule_lengths(self):
        """Motif init should fail if rules don't match module lengths"""
        bad_rule = Rule(0, 0, 1, 8, 6)
        self.assertRaises(ValueError, Motif, [self.simple_0, self.simple_1], \
            [bad_rule])

    def test_init_conflicting_rules(self):
        """Motif init should fail if rules overlap"""
        interferer = Rule(0, 2, 2, 20, 4)
        self.assertRaises(ValueError, Motif, [self.ile_mod_0, self.ile_mod_1, \
            self.ile_mod_0], [self.ile_rule_0, interferer]) 
    
    def test_matches_simple(self):
        """Test of simple match should work correctly"""
        index =                    '01234567890123456789012345678901'
        seq =                      'AAACCCCCUUUGGGGGAAACCCCCUUUGGGGG'
        struct =   ViennaStructure('((..((..))....))...(((.......)))')   
        struct_2 = ViennaStructure('((((((..((())))))))).....(((.)))')
                                    #substring right, not pair

        self.assertEqual(self.simple.matches(seq, struct, [19, 27]), True)
        self.assertEqual(self.simple.matches(seq, struct_2, [19,27]), False)

        for first_pos in range(len(seq) - len(self.simple_0) + 1):
            for second_pos in range(len(seq) - len(self.simple_1) + 1):
                #should match struct only at one location
                match=self.simple.matches(seq, struct, [first_pos, second_pos])
                if (first_pos == 19) and (second_pos == 27):
                    self.assertEqual(match, True)
                else:
                    self.assertEqual(match, False)
                #should never match in struct_2
                self.assertEqual(self.simple.matches(seq, struct_2, \
                    [first_pos, second_pos]), False)

        #check that it doesn't fail if there are _two_ matches
        index =  '01234567890123456789'
        seq =    'CCCCCGGGGGCCCCCGGGGG'
        struct = '(((....)))(((....)))'
        struct = ViennaStructure(struct)
        self.assertEqual(self.simple.matches(seq, struct, [0, 5]), True)
        self.assertEqual(self.simple.matches(seq, struct, [10,15]), True)
        #not allowed to cross-pair
        self.assertEqual(self.simple.matches(seq, struct, [0, 15]), False)

    def test_matches_ile(self):
        """Test of isoleucine match should work correctly"""
        index =    '012345678901234567890123456789012345'
        seq_good = 'AAACCCCUACUUUUUCCCAAAAAUAUUGGGGGGGAA'
        seq_bad =  'AAACCCCUACUUUUUCCCAAAAAUAUUGGGCGGGAA'
        st_good =  '...(((((..(((((...)))))......)))))..'
        st_bad =   '((((((((..(((((...)))))...))))))))..'

        st_good = ViennaStructure(st_good)
        st_bad = ViennaStructure(st_bad)

        for first_pos in range(len(seq_good) - len(self.ile_mod_0) + 1):
            for second_pos in range(len(seq_good) - len(self.ile_mod_1) + 1):
                #seq_good and struct_good should match at one location
                match=self.ile.matches(seq_good,st_good,[first_pos,second_pos])
                if (first_pos == 3) and (second_pos == 18):
                    self.assertEqual(match, True)
                else:
                    self.assertEqual(match, False)
                self.assertEqual(self.ile.matches(seq_good, st_bad, \
                    [first_pos, second_pos]), False)
                self.assertEqual(self.ile.matches(seq_bad, st_good, \
                    [first_pos, second_pos]), False)
                self.assertEqual(self.ile.matches(seq_bad, st_bad, \
                    [first_pos, second_pos]), False)

    def test_matches_hh(self):
        """Test of hammerhead match should work correctly"""
        index =    '0123456789012345678901234567890123456'
        seq_good = 'CCCCUAGGGGCCCCCUGAAGAGAAAUUUCGAAAGGGG'
        seq_bad ='CCCCCAGGGGCCCCCUGAAGAGAAAUUUCGAAGGGGG'
        structure ='(((((.(((()))).......(((())))...)))))'
        struct = ViennaStructure(structure)
        self.assertEqual(self.hh.matches(seq_good, struct, [0, 10, 25]), True)
        self.assertEqual(self.hh.matches(seq_bad, struct, [0, 10, 25]), False)

    def test_structureMatches_hh(self):
        """Test of hammerhead structureMatch should work correctly"""
        index =    '0123456789012345678901234567890123456'
        seq_good = 'CCCCUAGGGGCCCCCUGAAGAGAAAUUUCGAAAGGGG'
        seq_bad ='CCCCCAGGGGCCCCCUGAAGAGAAAUUUCGAAGGGGG'
        structure ='(((((.(((()))).......(((())))...)))))'
        struct = ViennaStructure(structure)
        self.assertEqual(self.hh.structureMatches(struct, [0, 10, 25]), True)
        self.assertEqual(self.hh.structureMatches(struct, [0, 10, 25]), True)

class SequenceEmbedderTests(TestCase):
    """Tests of the SequenceEmbedder class."""
    def setUp(self):
        """Define a few standard models and motifs"""
        ile_mod_0 = Module('NNNCUACNNNNN', '(((((..(((((')
        ile_mod_1 = Module('NNNNNUAUUGGGGNNN', ')))))......)))))')
        ile_rule_0 = Rule(0, 0, 1, 15, 5)
        ile_rule_1 = Rule(0, 7, 1, 4, 5)
        ile_motif = Motif([ile_mod_0, ile_mod_1], \
            [ile_rule_0, ile_rule_1])

        helices = [PairedRegion('NNN'), PairedRegion('NNNNN')]
        constants = [ConstantRegion('CUAC'), ConstantRegion('UAUUGGGG')]
        order = "H0 C0 H1 - H1 C1 H0"
        ile_model = SequenceModel(order=order, constants=constants, \
            helices=helices, composition=BaseFrequency('UCAG'))

        self.ile_embedder = SequenceEmbedder(length=50, num_to_do=10, \
            motif=ile_motif, model=ile_model, composition=BaseFrequency('UCAG'))

        short_ile_mod_0 = Module('NCUACNN', '(((..((')
        short_ile_mod_1 = Module('NNUAUUGGGGN', '))......)))')
        short_ile_rule_0 = Rule(0, 0, 1, 10, 3)
        short_ile_rule_1 = Rule(0, 5, 1, 1, 2)
        short_ile_motif = Motif([short_ile_mod_0, short_ile_mod_1], \
            [short_ile_rule_0, short_ile_rule_1])

        short_helices = [PairedRegion('N'), PairedRegion('NN')]
        short_constants = [ConstantRegion('CUAC'), ConstantRegion('UAUUGGGG')]
        short_order = "H0 C0 H1 - H1 C1 H0"
        short_ile_model = SequenceModel(order=short_order, \
            constants=short_constants, \
            helices=short_helices, composition=BaseFrequency('UCAG'))

        self.short_ile_embedder = SequenceEmbedder(length=50, num_to_do=10, \
            motif=short_ile_motif, model=short_ile_model, \
            composition=BaseFrequency('UCAG'))


    def test_composition_change(self):
        """Changes in composition should propagate."""
        rr = str(self.ile_embedder.RandomRegion.Current)
        #for base in 'UCAG':
        #    assert base in rr
        #the above two lines should generally be true but fail stochastically
        self.ile_embedder.Composition = BaseFrequency('CG')
        self.assertEqual(self.ile_embedder.Model.Composition, \
            BaseFrequency('CG'))
        self.assertEqual(self.ile_embedder.RandomRegion.Composition, \
            BaseFrequency('CG'))
        self.ile_embedder.RandomRegion.refresh()
        self.assertEqual(len(self.ile_embedder.RandomRegion), 22)
        rr = str(self.ile_embedder.RandomRegion.Current)
        assert ('C' in rr or 'G' in rr)
        assert 'A' not in rr
        assert 'U' not in rr

    def test_choose_locations_too_short(self):
        """SequenceEmbedder _choose_locations should fail if too little space"""
        self.ile_embedder.Length = 28   #no positions left over
        self.assertRaises(ValueError, self.ile_embedder._choose_locations)
        self.ile_embedder.Length = 29   #one position left over
        self.assertRaises(ValueError, self.ile_embedder._choose_locations)

    def test_choose_locations_exact(self):
        """SequenceEmbedder _choose_locations should pick all locations"""
        self.ile_embedder.Length = 30   #two positions left: must both be filled
        for i in range(10):
            first, second = self.ile_embedder._choose_locations()
            self.assertEqual(first, 0)
            self.assertEqual(second, 1)
            
    def test_choose_locations_even(self):
        """SequenceEmbedder _choose_locations should pick locations evenly"""
        self.ile_embedder.Length = 31   #three positions left
        counts = {}
        for i in range(1000):
            key = tuple(self.ile_embedder._choose_locations())
            assert key[0] != key[1]
            curr = counts.get(key, 0)
            counts[key] = curr + 1
        expected = [333, 333, 333]
        observed = [counts[(0,1)], counts[(0,2)], counts[(1,2)]]
        self.assertSimilarFreqs(observed, expected)
        #make sure nothing else snuck in there
        self.assertEqual(counts[(0,1)]+counts[(0,2)]+counts[(1,2)], 1000)

    def test_choose_locations_with_replacement(self):
        """SequenceEmbedder _choose_locations can sample with replacement"""
        self.ile_embedder.Length = 28   #exact fit
        self.ile_embedder.WithReplacement = True
        for i in range(10):
            first, second = self.ile_embedder._choose_locations()
            self.assertEqual(first, 0)
            self.assertEqual(second, 0)

        self.ile_embedder.Length = 29 #one left over: can be 0,0 0,1 1,1
        counts = {}
        for i in range(1000):
            key = tuple(self.ile_embedder._choose_locations())
            curr = counts.get(key, 0)
            counts[key] = curr + 1
        expected = [250, 500, 250]
        observed = [counts[(0,0)], counts[(0,1)], counts[(1,1)]]
        self.assertSimilarFreqs(observed, expected)
        #make sure nothing else snuck in there
        self.assertEqual(counts[(0,0)]+counts[(0,1)]+counts[(1,1)], 1000)
  
    def test_insert_modules(self):
        """SequenceEmbedder _insert_modules should make correct sequence"""
        ile = self.ile_embedder
        ile.Length = 50
        ile.RandomRegion.Current[:] = ['A'] * 22
        modules = list(ile.Model)
        ile.Positions = [0, 0]  #try inserting at first position
        self.assertEqual(str(ile), modules[0] + modules[1] + 'A'*22)

        ile.Positions = [3, 20]
        self.assertEqual(str(ile), 'A'*3+modules[0]+'A'*17+modules[1]+'A'*2)

    def test_refresh(self):
        """SequenceEmbedder refresh should change module sequences"""
        modules_before = list(self.ile_embedder.Model)
        random_before = str(self.ile_embedder.RandomRegion.Current)
        self.ile_embedder.refresh()
        random_after = str(self.ile_embedder.RandomRegion.Current)
        self.assertNotEqual(random_before, random_after)
        modules_after = list(self.ile_embedder.Model)
        for before, after in zip(modules_before, modules_after):
            self.assertNotEqual(before, after)
        #check that it works twice
        self.ile_embedder.refresh()
        random_third = str(self.ile_embedder.RandomRegion.Current)
        modules_third = list(self.ile_embedder.Model)
        self.assertNotEqual(random_third, random_before)
        self.assertNotEqual(random_third, random_after)
        for first, second, third in \
            zip(modules_before, modules_after, modules_third):
            self.assertNotEqual(first, third)
            self.assertNotEqual(second, third)

    def test_countMatches(self):
        """Shouldn't find any Ile matches if all the pairs are GU"""
        if not RNAFOLD_PRESENT:
            return
        self.ile_embedder.NumToDo = 100
        self.ile_embedder.Composition = BaseFrequency('GGGGGGGGGU')
        self.ile_embedder.Length = 40
        good_count = self.ile_embedder.countMatches()   
        self.assertEqual(good_count, 0)

    def test_countMatches_pass(self):
        """Should find some matches against a random background"""
        if not RNAFOLD_PRESENT:
            return
        self.ile_embedder.NumToDo = 100
        self.ile_embedder.Composition = BaseFrequency('UCAG')
        self.ile_embedder.Length = 40
        good_count = self.ile_embedder.countMatches()
        self.assertNotEqual(good_count, 0)

    def test_refresh_specific_position(self):
        """Should always find the module in the same position if specified"""
        first_module = Module('AAAAA', '(((((')
        second_module = Module('UUUUU', ')))))')
        rule_1 = Rule(0, 0, 1, 4, 5)
        helix = Motif([first_module, second_module], [rule_1])
        model = SequenceModel(constants=[ConstantRegion('AAAAA'), \
            ConstantRegion('UUUUU')], order='C0 - C1', \
            composition=BaseFrequency('A'))
        embedder = SequenceEmbedder(length=30, num_to_do=100, \
            motif=helix, model=model, composition=BaseFrequency('CG'), \
            positions=[3, 6])

        last = ''
        for i in range(100):
            embedder.refresh()
            curr = str(embedder)
            self.assertEqual(curr[3:8], 'AAAAA')
            self.assertEqual(curr[11:16], 'UUUUU')
            self.assertEqual(curr.count('A'), 5)
            self.assertEqual(curr.count('U'), 5)
            self.assertNotEqual(last, curr)
            last = curr
            
    def test_refresh_primers(self):
        """Module should appear in correct location with primers"""
        first_module = Module('AAAAA', '(((((')
        second_module = Module('UUUUU', ')))))')
        rule_1 = Rule(0, 0, 1, 4, 5)
        helix = Motif([first_module, second_module], [rule_1])
        model = SequenceModel(constants=[ConstantRegion('AAAAA'), \
            ConstantRegion('UUUUU')], order='C0 - C1', \
            composition=BaseFrequency('A'))
        embedder = SequenceEmbedder(length=30, num_to_do=100, \
            motif=helix, model=model, composition=BaseFrequency('CG'), \
            positions=[3, 6], primer_5 = 'UUU', primer_3 = 'AAA')

        last = ''
        for i in range(100):
            embedder.refresh()
            curr = str(embedder)
            self.assertEqual(curr[0:3], 'UUU')
            self.assertEqual(curr[6:11], 'AAAAA')
            self.assertEqual(curr[14:19], 'UUUUU')
            self.assertEqual(curr.count('A'), 8)
            self.assertEqual(curr.count('U'), 8)
            self.assertEqual(curr[-3:], 'AAA')
            self.assertNotEqual(last, curr)
            last = curr
 
    def xxx_test_count_long(self):
        self.ile_embedder.NumToDo = 100000
        self.ile_embedder.Composition = BaseFrequency('UCAG')
        print
        print "Extended helices"
        for length in range(30, 150):
            self.ile_embedder.Length = length
            good_count = self.ile_embedder.countMatches()   
            print "Length: %s Matches: %s/100000" % (length, good_count)
        print

    def xxx_test_count_short(self):
        self.short_ile_embedder.NumToDo = 10000
        self.short_ile_embedder.Composition = BaseFrequency('UCAG')
        print
        print "Minimal motif"
        for length in range(20, 150):
            self.short_ile_embedder.Length = length
            good_count = self.short_ile_embedder.countMatches()   
            print "Length: %s Matches: %s/10000" % (length, good_count)
        print



if __name__ == '__main__':
    main()