File: PhaseLS.java

package info (click to toggle)
beagle 250227-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 9,364 kB
  • sloc: java: 17,684; sh: 55; makefile: 11
file content (195 lines) | stat: -rw-r--r-- 7,221 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
/*
 * Copyright (C) 2014-2021 Brian L. Browning
 *
 * This file is part of Beagle
 *
 * Beagle is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Beagle is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package phase;

import blbutil.Const;
import blbutil.MultiThreadUtils;
import blbutil.Utilities;
import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.IntStream;
import main.Par;

/**
 * <p>Class {@code PhaseLS} contains static methods for estimated genotypes
 * phase using a haploid Li and Stephens hidden Markov model.</p>
 *
 * @author Brian L. Browning {@code <browning@uw.edu>}
 */
public class PhaseLS {

    private PhaseLS() {
        // private constructor to prevent instantiation
    }

    /**
     * Updates genotype phase estimates in the target samples.
     * @param pd estimated phased genotypes at stage 1 markers
     * @throws NullPointerException if {@code pd == null}
     */
    public static void runStage1(PhaseData pd) {
        Par par = pd.fpd().par();
        int it = pd.it();
        int nThreads = par.nthreads();
        int nSamples = pd.fpd().targGT().nSamples();
        int nBurninIts = par.burnin();
        PbwtPhaseIbs phaseIbs = pbwtPhaseIbs(pd);
        if (par.em()) {
            Random rand = new Random(pd.seed());
            if (it==0) {
                initializeParameters(phaseIbs, rand);
            }
            else if (it<nBurninIts) {
                updateParameters(phaseIbs, rand);
            }
        }
        AtomicInteger samples = new AtomicInteger(0);
        ExecutorService es = Executors.newFixedThreadPool(nThreads);
        for (int j=0; j<nThreads; ++j) {
            es.submit(() -> {
                try {
                    PhaseBaum2 baum = new PhaseBaum2(phaseIbs);
                    for (int s=samples.getAndIncrement(); s<nSamples;
                            s=samples.getAndIncrement()) {
                        baum.phase(s);
                    }
                }
                catch (Throwable t) {
                    Utilities.exit(t);
                }
            } );
        }
        MultiThreadUtils.shutdownExecService(es);
    }

    private static PbwtPhaseIbs pbwtPhaseIbs(PhaseData pd) {
        boolean useBwd = (pd.it() & 1)==0;
        PbwtPhaseIbs phaseIbs = new PbwtPhaseIbs(pd, pd.codedSteps(), useBwd);
        return phaseIbs;
    }

    private static void initializeParameters(PbwtPhaseIbs phaseIbs, Random rand) {
        PhaseData pd = phaseIbs.phaseData();
        float prevRecInt = pd.recombIntensity();
        int maxInitialIts = 15;
        for (int j=0; j<maxInitialIts; ++j) {
            updateParameters(phaseIbs, rand);
            float recInt = pd.recombIntensity();
            if (Math.abs(recInt - prevRecInt) <= 0.1*prevRecInt) {
                break;
            }
            prevRecInt = recInt;
        }
    }

    private static void updateParameters(PbwtPhaseIbs phaseIbs, Random rand) {
        ParamEstimates paramEst = getParamEst(phaseIbs, rand);
        PhaseData pd = phaseIbs.phaseData();
        float prevPMismatch = pd.pMismatch();
        float pMismatch = paramEst.pMismatch();
        float recombIntensity = paramEst.recombIntensity();
        if (Float.isFinite(pMismatch) && pMismatch>prevPMismatch) {
            pd.updatePMismatch(pMismatch);
        }
        if (Float.isFinite(recombIntensity) && recombIntensity>0f) {
            pd.updateRecombIntensity(recombIntensity);
        }
    }

    private static ParamEstimates getParamEst(PbwtPhaseIbs phaseIbs, Random rand) {
        ParamEstimates paramEst = new ParamEstimates();
        PhaseData pd = phaseIbs.phaseData();
        FixedPhaseData fpd = pd.fpd();
        int[] sampleIndices = samplesToAnalyze(pd, rand);
        int nThreads = Math.min(fpd.par().nthreads(), sampleIndices.length);
        double maxSum = 20000.0/nThreads;
        int minIndices = 50;
        AtomicInteger counter = new AtomicInteger(0);
        ExecutorService es = Executors.newFixedThreadPool(nThreads);
        for (int j=0; j<nThreads; ++j) {
            es.submit(() -> {
                try {
                    HmmParamData hpd = new HmmParamData(phaseIbs);
                    int index = counter.getAndIncrement();
                    while ((hpd.sumSwitchProbs()<maxSum || index<minIndices)
                            && index<sampleIndices.length) {
                        hpd.update(sampleIndices[index]);
                        index = counter.getAndIncrement();
                        hpd.addEstimationData(paramEst);
                    }
                }
                catch (Throwable t) {
                    Utilities.exit(t);
                }
            } );
        }
        MultiThreadUtils.shutdownExecService(es);
        return paramEst;
    }

    private static int[] samplesToAnalyze(PhaseData pd, Random rand) {
        int maxSamplesToAnalyze = 500;
        int nTargSamples = pd.fpd().targGT().nSamples();
        int[] ia = IntStream.range(0, nTargSamples)
                .parallel()
                .toArray();
        if (nTargSamples <= maxSamplesToAnalyze) {
            return ia;
        }
        else {
            Utilities.shuffle(ia, maxSamplesToAnalyze, rand);
            return Arrays.copyOf(ia, maxSamplesToAnalyze);
        }
    }

    /**
     * Returns phased genotypes at all markers.
     * @param pd estimated phased genotypes at stage 1 markers
     * @return estimated phased genotypes at all markers
     * @throws NullPointerException if {@code pd == null}
     */
    public static Stage2Haps runStage2(PhaseData pd) {
        FixedPhaseData fpd = pd.fpd();
        int nThreads = fpd.par().nthreads();
        int nSamples = fpd.targGT().nSamples();
        LowFreqPhaseIbs phaseIbs = new LowFreqPhaseIbs(pd);
        Stage2Haps stage2Haps = new Stage2Haps(pd);
        AtomicInteger samples = new AtomicInteger(0);
        ExecutorService es = Executors.newFixedThreadPool(nThreads);
        for (int j=0; j<nThreads; ++j) {
            es.submit(() -> {
                try {
                    Stage2Baum baum = new Stage2Baum(phaseIbs, stage2Haps);
                    for (int s=samples.getAndIncrement(); s<nSamples;
                            s=samples.getAndIncrement()) {
                        baum.phase(s);
                    }
                }
                catch (Throwable t) {
                    Utilities.exit(t);
                }
            } );
        }
        MultiThreadUtils.shutdownExecService(es);
        return stage2Haps;
    }
}