File: Organism.py

package info (click to toggle)
python-biopython 1.42-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 17,584 kB
  • ctags: 12,272
  • sloc: python: 80,461; xml: 13,834; ansic: 7,902; cpp: 1,855; sql: 1,144; makefile: 203
file content (144 lines) | stat: -rw-r--r-- 4,884 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
"""Deal with an Organism in a Genetic Algorithm population.
"""
# standard modules
import random
import array

# Sequence objects from Biopython
from Bio.Seq import MutableSeq

def function_population(new_genome, num_organisms, fitness_calculator):
    """Generate a population given a function to create genomes

    Arguments:

    o new_genome - A function or callable object that will return
    a genome that can be used for a new organism. This new genome
    should be a MutableSeq object with a specified alphabet.

    o num_organisms - The number of individuals we want in the population.

    o fitness_calculator -- A funtion that will calculate the fitness
    of the organism when given the organisms genome.
    """
    all_orgs = []

    for org_num in range(num_organisms):
        cur_genome = new_genome()
        all_orgs.append(Organism(cur_genome, fitness_calculator))

    return all_orgs

def random_population(genome_alphabet, genome_size, num_organisms,
                      fitness_calculator):
    """Generate a population of individuals with randomly set genomes.

    Arguments:

    o genome_alphabet -- An Alphabet object describing all of the
    possible letters that could potentially be in the genome of an
    organism.

    o genome_size -- The size of each organisms genome.

    o num_organism -- The number of organisms we want in the population.

    o fitness_calculator -- A funtion that will calculate the fitness
    of the organism when given the organisms genome.
    """
    all_orgs = []

    # a random number generator to get letters for the genome
    letter_rand = random.Random()

    # figure out what type of characters are in the alphabet
    if type(genome_alphabet.letters[0]) == type("A"):
        alphabet_type = "c"
    elif type(genome_alphabet.letters[0]) == type(1):
        alphabet_type = "i"
    elif type(genome_alphabet.letters[0]) == type(1.0):
        alphabet_type = "d"
    else:
        raise ValueError("Alphabet type is unsupported: %s" % alphabet.letters)

    for org_num in range(num_organisms):
        new_genome = MutableSeq(array.array(alphabet_type), genome_alphabet)

        # generate the genome randomly
        for gene_num in range(genome_size):
            new_gene = letter_rand.choice(genome_alphabet.letters)
            new_genome.append(new_gene)

        # add the new organism with this genome
        all_orgs.append(Organism(new_genome, fitness_calculator))

    return all_orgs

class Organism:
    """Represent a single individual in a population.

    Attributes:

    o genome -- The genome of the organism. This is a Bio.MutableSeq
    object that has the sequence of the genome, and the alphabet
    describing all elements that can be a part of the genome.

    o fitness -- The calculate fitness of the organism. This fitness is
    based on the last time it was calculated using the fitness_calculator.
    So... the fitness could potentially be out of date with the real genome
    if you are not careful to recalculate it after changes with
    recalculate_fitness()
    """
    def __init__(self, genome, fitness_calculator, start_fitness = None):
        """Initialize an organism

        Arguments:

        o genome -- A MutableSeq object representing the sequence of the
        genome.

        o fitness_calculator -- A funtion that will calculate the fitness
        of the organism when given the organisms genome.

        o start_fitness - the starting fitness corresponding with the
        given genome. If not supplied, the fitness will be calculated
        using fitness_calculator.
        """
        assert isinstance(genome, MutableSeq), "Genome must be a MutableSeq"
        
        self.genome = genome
        self._fitness_calc = fitness_calculator

        # calculate the fitness of the genome
        if start_fitness is None:
            self.fitness = self._fitness_calc(self.genome)
        else:
            self.fitness = start_fitness

    def __str__(self):
        """Provide a string output for debugging.
        """
        return "Genome: %s; Fitness %s" % (self.genome.data, self.fitness)

    def __cmp__(self, other):
        """Define comparisons for organisms.

        Compare organisms by their genomes.
        """
        return cmp(self.genome, other.genome)

    def copy(self):
        """Return a copy of the organism.

        This makes it easy to duplicate an organism before changing it.
        """
        copy_genome = self.genome[:]
        return Organism(copy_genome, self._fitness_calc, self.fitness)

    def recalculate_fitness(self):
        """Calculate and reset the fitness of the current genome

        This should be called after the genome is updated to ensure that
        fitness always stays in sync with the current genome.
        """
        self.fitness = self._fitness_calc(self.genome)