File: GeneralPoint.py

package info (click to toggle)
python-biopython 1.68%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 46,860 kB
  • ctags: 13,237
  • sloc: python: 160,306; xml: 93,216; ansic: 9,118; sql: 1,208; makefile: 155; sh: 63
file content (193 lines) | stat: -rw-r--r-- 6,387 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
#

"""
Generalized N-Point Crossover.

For even values of N, perform N point crossover
  (select N/2 points each in the two genomes, and alternate)
For odd values of N, perform symmetric N+1 point crossover
  (select N/2 points for both genomes)

N-Point introduction (my notation):

    | genome 1:    A-----B-----C-----D-----E-----F-----G
    | genome 2:    a=====b=====c=====d=====e=====f=====g
    |
    | 2-point, symmetric (points=1):
    |              A-----B-----C- 1 -D-----E-----F-----G
    |              a=====b=====c= 2 =d=====e=====f=====g
    | returns: (ABCdefg, abcDEFG)
    |
    | 2-point, asymmetric (points=2):
    |              A-----B- 1 -C-----D-----E-----F-----G
    |              a=====b=====c=====d=====e= 2 =f=====g
    | returns: (ABfg, abcdeCDEFG)

and for the drastic (n can be arbitrary to the length of the genome!):

    | 12-point, symmetric (points=11):
    |              A- 1 -B- 2 -C- 3 -D- 4 -E- 5 -F- 6 -G
    |              a= 7 =b= 8 =c= 9 =d= 10 e= 11 f= 12 g
    | returns: (AbCdEfG, aBcDeFg)
    | (note that points=12 will yield the same result, but 11
    |  may be somewhat faster)
"""
# standard modules
import random

from Bio._py3k import range


class GeneralPointCrossover(object):
    """Perform n-point crossover between genomes at some defined rates.

       Ideas on how to use this class:

           - Call it directly ( construct, do_crossover )
           - Use one of the provided subclasses
           - Inherit from it:
               * replace _generate_locs with a more domain
                 specific technique
               * replace _crossover with a more efficient
                 technique for your point-count
    """
    def __init__(self, points, crossover_prob=.1):
        """Initialize to do crossovers at the specified probability.
        """
        self._crossover_prob = crossover_prob

        self._sym = points % 2  # odd n, gets a symmetry flag
        self._npoints = (points + self._sym) // 2  # (N or N+1)//2

    def do_crossover(self, org_1, org_2):
        """Potentially do a crossover between the two organisms.
        """
        new_org = (org_1.copy(), org_2.copy())

        # determine if we have a crossover
        crossover_chance = random.random()
        if crossover_chance <= self._crossover_prob:

            # pre-compute bounds (len(genome))
            bound = (len(new_org[0].genome), len(new_org[1].genome))

            mbound = min(bound)
            # can't have more than 0,x_0...x_n,bound locations
            if (self._npoints == 0 or self._npoints > mbound - 2):
                self._npoints = mbound - 2

            y_locs = []
            # generate list for the shortest of the genomes
            x_locs = self._generate_locs(mbound)

            if (self._sym != 0):
                y_locs = x_locs
            else:
                # figure out which list we've generated
                # for, and generate the other
                if (mbound == bound[0]):
                    y_locs = self._generate_locs(bound[1])
                else:
                    y_locs = x_locs
                    xlocs = self._generate_locs(bound[0])

            # copy new genome strings over
            tmp = self._crossover(0, new_org, (x_locs, y_locs))
            new_org[1].genome = self._crossover(1, new_org, (x_locs, y_locs))
            new_org[0].genome = tmp

        return new_org

    def _generate_locs(self, bound):
        """Generalized Location Generator:

           arguments:

              - bound (int)   - upper bound

           returns: [0]+x_0...x_n+[bound]
             where n=self._npoints-1
               and 0 < x_0 < x_1 ... < bound
        """
        results = []
        for increment in range(self._npoints):
            x = random.randint(1, bound - 1)
            while (x in results):  # uniqueness
                x = random.randint(1, bound - 1)
            results.append(x)
        results.sort()             # sorted
        return [0] + results + [bound]  # [0, +n points+, bound]

    def _crossover(self, x, no, locs):
        """Generalized Crossover Function:

           arguments:
               - x (int)        - genome number [0|1]
               - no (organism,organism)

                - new organisms

                - locs (int list, int list)

                - lists of locations,
                  [0, +n points+, bound]
                  for each genome (sync'd with x)

            return type: sequence (to replace no[x])
        """
        s = no[x].genome[:locs[x][1]]
        for n in range(1, self._npoints):
            # flipflop between genome_0 and genome_1
            mode = (x + n) % 2
            # _generate_locs gives us [0, +n points+, bound]
            #  so we can iterate: { 0:loc(1) ... loc(n):bound }
            t = no[mode].genome[locs[mode][n]:locs[mode][n + 1]]
            if (s):
                s = s + t
            else:
                s = t
        return s


class TwoCrossover(GeneralPointCrossover):
    """Helper class for Two Point crossovers.

    Offers more efficient replacements to the GeneralPoint framework
    for single pivot crossovers
    """
    def _generate_locs(self, bound):
        """Replacement generation.

        See GeneralPoint._generate_locs documentation for details
        """
        return [0, random.randint(1, bound - 1), bound]

    def _crossover(self, x, no, locs):
        """Replacement crossover

           see GeneralPoint._crossover documentation for details
        """
        y = (x + 1) % 2
        return no[x].genome[:locs[x][1]] + no[y].genome[locs[y][1]:]


class InterleaveCrossover(GeneralPointCrossover):
    """Demonstration class for Interleaving crossover.

    Interleaving:  AbCdEfG, aBcDeFg
    """
    def __init__(self, crossover_prob=0.1):
        GeneralPointCrossover.__init__(self, 0, crossover_prob)

    def _generate_locs(self, bound):
        return list(range(-1, bound + 1))

    def _crossover(self, x, no, locs):
        s = no[x].genome[0:1]
        for n in range(1, self._npoints + 2):
            mode = (x + n) % 2
            s += no[mode].genome[n:n + 1]
        return s + no[mode].genome[self._npoints + 3:]