1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
|
# Copyright 2007 by Tiago Antao <tiagoantao@gmail.com>. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Utility functions for working with FDist (DEPRECATED)."""
from Bio.PopGen.GenePop import FileParser
import Bio.PopGen.FDist
# Quite a few utility functions could be done (like remove pop,
# add locus, etc...). The recommended strategy is convert back
# and forth from/to GenePop and use GenePop Utils
def convert_genepop_to_fdist(gp_rec, report_pops=None):
"""Converts a GenePop record to a FDist one.
Parameters:
gp_rec - Genepop Record (either standard or big)
Returns:
FDist record.
"""
if hasattr(gp_rec, "populations"):
return _convert_genepop_to_fdist(gp_rec)
else:
return _convert_genepop_to_fdist_big(gp_rec, report_pops)
def _convert_genepop_to_fdist(gp_rec):
"""Converts a standard GenePop record to a FDist one.
Parameters:
gp_rec - Genepop Record (Standard)
Returns:
FDist record.
"""
fd_rec = Bio.PopGen.FDist.Record()
fd_rec.data_org = 0
fd_rec.num_loci = len(gp_rec.loci_list)
fd_rec.num_pops = len(gp_rec.populations)
for lc_i in range(len(gp_rec.loci_list)):
alleles = []
pop_data = []
for pop_i in range(len(gp_rec.populations)):
for indiv in gp_rec.populations[pop_i]:
for al in indiv[1][lc_i]:
if al is not None and al not in alleles:
alleles.append(al)
alleles.sort() # Dominance requires this
# here we go again (necessary...)
for pop_i in range(len(gp_rec.populations)):
allele_counts = {}
for indiv in gp_rec.populations[pop_i]:
for al in indiv[1][lc_i]:
if al is not None:
count = allele_counts.get(al, 0)
allele_counts[al] = count + 1
allele_array = [] # We need the same order as in alleles
for allele in alleles:
allele_array.append(allele_counts.get(allele, 0))
pop_data.append(allele_array)
fd_rec.loci_data.append((len(alleles), pop_data))
return fd_rec
def _convert_genepop_to_fdist_big(gp_rec, report_pops=None):
"""Converts a big GenePop record to a FDist one.
Parameters:
gp_rec - Genepop Record (Big)
Returns:
FDist record.
"""
fd_rec = Bio.PopGen.FDist.Record()
fd_rec.data_org = 1
fd_rec.num_loci = len(gp_rec.loci_list)
num_loci = len(gp_rec.loci_list)
loci = []
for i in range(num_loci):
loci.append(set())
pops = []
work_rec = FileParser.read(gp_rec.fname)
lParser = work_rec.get_individual()
def init_pop():
my_pop = []
for i in range(num_loci):
my_pop.append({})
return my_pop
curr_pop = init_pop()
num_pops = 1
if report_pops:
report_pops(num_pops)
while lParser:
if lParser is not True:
for loci_pos in range(num_loci):
for al in lParser[1][loci_pos]:
if al is not None:
loci[loci_pos].add(al)
curr_pop[loci_pos][al] = curr_pop[loci_pos].get(al, 0) + 1
else:
pops.append(curr_pop)
num_pops += 1
if report_pops:
report_pops(num_pops)
curr_pop = init_pop()
lParser = work_rec.get_individual()
work_rec._handle.close() # TODO - Needs a proper fix
pops.append(curr_pop)
fd_rec.num_pops = num_pops
for loci_pos in range(num_loci):
alleles = sorted(loci[loci_pos])
loci_rec = [len(alleles), []]
for pop in pops:
pop_rec = []
for allele in alleles:
pop_rec.append(pop[loci_pos].get(allele, 0))
loci_rec[1].append(pop_rec)
fd_rec.loci_data.append(tuple(loci_rec))
return fd_rec
def _convert_genepop_to_fdist_big_old(gp_rec, report_loci=None):
"""Converts a big GenePop record to a FDist one.
Parameters:
gp_rec - Genepop Record (Big)
Returns:
FDist record.
"""
fd_rec = Bio.PopGen.FDist.Record()
def countPops(rec):
f2 = FileParser.read(rec.fname)
popCnt = 1
while f2.skip_population():
popCnt += 1
return popCnt
fd_rec.data_org = 0
fd_rec.num_loci = len(gp_rec.loci_list)
work_rec0 = FileParser.read(gp_rec.fname)
fd_rec.num_pops = countPops(work_rec0)
num_loci = len(gp_rec.loci_list)
for lc_i in range(num_loci):
if report_loci:
report_loci(lc_i, num_loci)
work_rec = FileParser.read(gp_rec.fname)
work_rec2 = FileParser.read(gp_rec.fname)
alleles = []
pop_data = []
lParser = work_rec.get_individual()
while lParser:
if lParser is not True:
for al in lParser[1][lc_i]:
if al is not None and al not in alleles:
alleles.append(al)
lParser = work_rec.get_individual()
# here we go again (necessary...)
alleles.sort()
def process_pop(pop_data, alleles, allele_counts):
allele_array = [] # We need the same order as in alleles
for allele in alleles:
allele_array.append(allele_counts.get(allele, 0))
pop_data.append(allele_array)
lParser = work_rec2.get_individual()
allele_counts = {}
for allele in alleles:
allele_counts[allele] = 0
allele_counts[None] = 0
while lParser:
if lParser is True:
process_pop(pop_data, alleles, allele_counts)
allele_counts = {}
for allele in alleles:
allele_counts[allele] = 0
allele_counts[None] = 0
else:
for al in lParser[1][lc_i]:
allele_counts[al] += 1
lParser = work_rec2.get_individual()
process_pop(pop_data, alleles, allele_counts)
fd_rec.loci_data.append((len(alleles), pop_data))
return fd_rec
def approximate_fst(desired_fst, simulated_fst, parameter_fst,
max_run_fst=1, min_run_fst=0, limit=0.005):
"""Calculates the next Fst attempt in order to approximate a
desired Fst.
"""
if abs(simulated_fst - desired_fst) < limit:
return parameter_fst, max_run_fst, min_run_fst
if simulated_fst > desired_fst:
max_run_fst = parameter_fst
next_parameter_fst = (min_run_fst + parameter_fst) / 2
else:
min_run_fst = parameter_fst
next_parameter_fst = (max_run_fst + parameter_fst) / 2
return next_parameter_fst, max_run_fst, min_run_fst
|