1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
|
#!/usr/bin/env python
"""Test out HMMs using the Occasionally Dishonest Casino.
This uses the ocassionally dishonest casino example from Biological
Sequence Analysis by Durbin et al.
In this example, we are dealing with a casino that has two types of
dice, a fair dice that has 1/6 probability of rolling any number and
a loaded dice that has 1/2 probability to roll a 6, and 1/10 probability
to roll any other number. The probability of switching from the fair to
loaded dice is .05 and the probability of switching from loaded to fair is
.1.
"""
from __future__ import print_function
import os
if os.name == 'java':
from Bio import MissingExternalDependencyError
# This is a slight miss-use of MissingExternalDependencyError,
# but it will do in the short term to skip this unit test on Jython
raise MissingExternalDependencyError("This test can cause a fatal error "
"on Jython with some versions of Java")
# standard modules
import random
# biopython
from Bio import Alphabet
from Bio.Seq import MutableSeq
from Bio.Seq import Seq
# HMM stuff we are testing
from Bio.HMM import MarkovModel
from Bio.HMM import Trainer
from Bio.HMM import Utilities
# whether we should print everything out. Set this to zero for
# regression testing
VERBOSE = 0
# -- set up our alphabets
class DiceRollAlphabet(Alphabet.Alphabet):
letters = ['1', '2', '3', '4', '5', '6']
class DiceTypeAlphabet(Alphabet.Alphabet):
letters = ['F', 'L']
# -- useful functions
def _loaded_dice_roll(chance_num, cur_state):
"""Generate a loaded dice roll based on the state and a random number
"""
if cur_state == 'F':
if chance_num <= (float(1) / float(6)):
return '1'
elif chance_num <= (float(2) / float(6)):
return '2'
elif chance_num <= (float(3) / float(6)):
return '3'
elif chance_num <= (float(4) / float(6)):
return '4'
elif chance_num <= (float(5) / float(6)):
return '5'
else:
return '6'
elif cur_state == 'L':
if chance_num <= (float(1) / float(10)):
return '1'
elif chance_num <= (float(2) / float(10)):
return '2'
elif chance_num <= (float(3) / float(10)):
return '3'
elif chance_num <= (float(4) / float(10)):
return '4'
elif chance_num <= (float(5) / float(10)):
return '5'
else:
return '6'
else:
raise ValueError("Unexpected cur_state %s" % cur_state)
def generate_rolls(num_rolls):
"""Generate a bunch of rolls corresponding to the casino probabilities.
Returns:
o The generate roll sequence
o The state sequence that generated the roll.
"""
# start off in the fair state
cur_state = 'F'
roll_seq = MutableSeq('', DiceRollAlphabet())
state_seq = MutableSeq('', DiceTypeAlphabet())
# generate the sequence
for roll in range(num_rolls):
state_seq.append(cur_state)
# generate a random number
chance_num = random.random()
# add on a new roll to the sequence
new_roll = _loaded_dice_roll(chance_num, cur_state)
roll_seq.append(new_roll)
# now give us a chance to switch to a new state
chance_num = random.random()
if cur_state == 'F':
if chance_num <= .05:
cur_state = 'L'
elif cur_state == 'L':
if chance_num <= .1:
cur_state = 'F'
return roll_seq.toseq(), state_seq.toseq()
# -- build a MarkovModel
mm_builder = MarkovModel.MarkovModelBuilder(DiceTypeAlphabet(),
DiceRollAlphabet())
mm_builder.allow_all_transitions()
mm_builder.set_random_probabilities()
"""
mm_builder.set_transition_score('F', 'L', .05)
mm_builder.set_transition_score('F', 'F', .95)
mm_builder.set_transition_score('L', 'F', .10)
mm_builder.set_transition_score('L', 'L', .9)
mm_builder.set_emission_score('F', '1', .17)
mm_builder.set_emission_score('F', '2', .17)
mm_builder.set_emission_score('F', '3', .17)
mm_builder.set_emission_score('F', '4', .17)
mm_builder.set_emission_score('F', '5', .17)
mm_builder.set_emission_score('F', '6', .17)
mm_builder.set_emission_score('L', '1', .1)
mm_builder.set_emission_score('L', '2', .1)
mm_builder.set_emission_score('L', '3', .1)
mm_builder.set_emission_score('L', '4', .1)
mm_builder.set_emission_score('L', '5', .1)
mm_builder.set_emission_score('L', '6', .5)
"""
# just get two different Markov Models -- we'll train one using
# Baum Welch, and one using the Standard trainer
baum_welch_mm = mm_builder.get_markov_model()
standard_mm = mm_builder.get_markov_model()
# get a sequence of rolls to train the markov model with
rolls, states = generate_rolls(3000)
# predicted_states, prob = my_mm.viterbi(rolls, DiceTypeAlphabet())
# print("prob: %f" % prob)
# Utilities.pretty_print_prediction(rolls, states, predicted_states)
# -- now train the model
def stop_training(log_likelihood_change, num_iterations):
"""Tell the training model when to stop.
"""
if VERBOSE:
print("ll change: %f" % log_likelihood_change)
if log_likelihood_change < 0.01:
return 1
elif num_iterations >= 10:
return 1
else:
return 0
# -- Standard Training with known states
print("Training with the Standard Trainer...")
known_training_seq = Trainer.TrainingSequence(rolls, states)
trainer = Trainer.KnownStateTrainer(standard_mm)
trained_mm = trainer.train([known_training_seq])
if VERBOSE:
print(trained_mm.transition_prob)
print(trained_mm.emission_prob)
test_rolls, test_states = generate_rolls(300)
predicted_states, prob = trained_mm.viterbi(test_rolls, DiceTypeAlphabet())
if VERBOSE:
print("Prediction probability: %f" % prob)
Utilities.pretty_print_prediction(test_rolls, test_states, predicted_states)
# -- Baum-Welch training without known state sequences
print("Training with Baum-Welch...")
training_seq = Trainer.TrainingSequence(rolls, Seq("", DiceTypeAlphabet()))
trainer = Trainer.BaumWelchTrainer(baum_welch_mm)
trained_mm = trainer.train([training_seq], stop_training)
if VERBOSE:
print(trained_mm.transition_prob)
print(trained_mm.emission_prob)
test_rolls, test_states = generate_rolls(300)
predicted_states, prob = trained_mm.viterbi(test_rolls, DiceTypeAlphabet())
if VERBOSE:
print("Prediction probability: %f" % prob)
Utilities.pretty_print_prediction(test_rolls, test_states, predicted_states)
|