1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
|
#!/usr/bin/env python3
from dirichlet import multinomial, multinomialln, multinomial_coefficient, multinomial_coefficientln
import math
import operator
def fold(func, iterable, initial=None, reverse=False):
x=initial
if reverse:
iterable=reversed(iterable)
for e in iterable:
x=func(x,e) if x is not None else e
return x
def hwe_expectation(genotype, allele_counts):
"""@genotype is counts of A,B,C etc. alleles, e.g. (2,0) is AA and (1,1) is AB
@allele_counts is the counts of the alleles in the population"""
population_total_alleles = sum(allele_counts)
ploidy = sum(genotype)
genotype_coeff = multinomial_coefficient(ploidy, genotype)
allele_frequencies = [count / float(population_total_alleles) for count in allele_counts]
genotype_expected_frequency = genotype_coeff * fold(operator.mul, [math.pow(freq, p) for freq, p in zip(allele_frequencies, genotype)])
return genotype_expected_frequency
def add_tuple(a, b):
l = []
for i,j in zip(a,b):
l.append(i + j)
return tuple(l)
# TODO handle NULL case, genotype has frequency 0
def hwe_sampling_probln(genotype, genotypes, ploidy):
"""@genotype: counts of A,B,C etc. alleles, e.g. (2,0) is AA and (1,1) is AB
@genotypes: counts of the genotypes in the population,
e.g. (((2,0),1), ((1,1),2), ((0,2),1)) would be 1xAA, 2xAB, 1xBB
@return: the probability that there are exactly as many "genotype" in the
population as suggested by the genotype counts, given HWE"""
population_total_alleles = sum([sum(g[0]) * g[1] for g in genotypes])
#print "population_total_alleles", population_total_alleles
allele_counts = fold(add_tuple, [[a * g[1] for a in g[0]] for g in genotypes])
#print "allele_counts", allele_counts
genotype_counts = [g[1] for g in genotypes]
#print "genotype_counts", genotype_counts
population_total_genotypes = sum([gtc[1] for gtc in genotypes])
#print "total genotypes:", population_total_genotypes
#print "ploidy", ploidy
# number of arrangements of the alleles in the sample
arrangements_of_alleles_in_sample = multinomial_coefficientln(population_total_alleles, allele_counts)
#print "arrangements_of_alleles_in_sample", math.exp(arrangements_of_alleles_in_sample)
# number of arrangements which contain exactly count genotypes
#arrangements_with_exactly_count_genotype = multinomial_coefficientln(population_total_genotypes, genotype_counts)
#print math.exp(multinomial_coefficientln(ploidy, genotype))
arrangements_with_exactly_count_genotype = \
multinomial_coefficientln(ploidy, genotype) \
+ multinomial_coefficientln(population_total_genotypes, genotype_counts)
#print "arrangements_with_exactly_count_genotype", math.exp(arrangements_with_exactly_count_genotype)
return arrangements_with_exactly_count_genotype - arrangements_of_alleles_in_sample;
def inbreeding_coefficient(genotype, genotypes):
population_total_alleles = sum([sum(g[0]) * g[1] for g in genotypes])
allele_counts = fold(add_tuple, [[a * g[1] for a in g[0]] for g in genotypes])
genotype_counts = [g[1] for g in genotypes]
population_total_genotypes = sum([gtc[1] for gtc in genotypes])
expected = hwe_expectation(genotype, allele_counts) * population_total_genotypes
observed = 0
for g in genotypes:
if g[0] == genotype:
observed = g[1]
break
if observed == 0:
print("error, no observations of genotype, cannot calculate inbreeding coefficient")
return 0
return 1 - (observed / expected)
|