File: CodedSteps.java

package info (click to toggle)
beagle 250227-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 9,364 kB
  • sloc: java: 17,684; sh: 55; makefile: 11
file content (156 lines) | stat: -rw-r--r-- 5,458 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
/*
 * Copyright (C) 2014-2021 Brian L. Browning
 *
 * This file is part of Beagle
 *
 * Beagle is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Beagle is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package phase;

import vcf.Steps;
import ints.IndexArray;
import ints.IntIntMap;
import java.util.Optional;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import vcf.Samples;
import vcf.XRefGT;

/**
 * <p>Class {@code CodedSteps} divides phased genotype data
 * into non-overlapping intervals (the steps), indexes the unique
 * allele sequences in each interval, and stores a map of haplotype
 * index to allele sequence index for each interval.</p>
 *
 * <p>Instances of class {@code CodedSteps} are immutable.</p>
 *
 * @author Brian L. Browning {@code <browning@uw.edu>}
 */
public class CodedSteps {

    private final Samples targSamples;
    private final Optional<XRefGT> refHaps;
    private final XRefGT allHaps;
    private final Steps steps;
    private final IndexArray[] codedSteps;

    /**
     * Constructs a new {@code CodedSteps} instance from the specified data.
     * @param estPhase the input data genotype phasing and the current phase
     * estimate for each target sample
     * @throws NullPointerException if {@code estPhase == null}
     */
    public CodedSteps(EstPhase estPhase) {
        FixedPhaseData fpd = estPhase.fpd();
        XRefGT targHaps = estPhase.phasedHaps();
        this.targSamples = targHaps.samples();
        this.refHaps = fpd.stage1XRefGT();
        this.allHaps = refHaps.isPresent()
                ? XRefGT.combine(targHaps, refHaps.get())
                : targHaps;
        this.steps = fpd.stage1Steps();
        this.codedSteps = codedSteps(allHaps, steps, fpd.par().nthreads());
    }

    private static IndexArray[] codedSteps(XRefGT gt, Steps steps, int nThreads) {
        int maxStepsPerBatch0 = 512;
        int nStepsPerBatch0 = (steps.size() + nThreads - 1)/nThreads;
        while (nStepsPerBatch0>maxStepsPerBatch0) {
            nStepsPerBatch0 = (nStepsPerBatch0+1) >> 1;
        }
        int stepsPerBatch = nStepsPerBatch0;
        int nBatches = (steps.size() + (stepsPerBatch-1)) / stepsPerBatch;
        return IntStream.range(0, nBatches)
                .parallel()
                .boxed()
                .flatMap(batch -> codedSteps(gt, steps, batch, stepsPerBatch))
                .toArray(IndexArray[]::new);
    }

    private static Stream<IndexArray> codedSteps(XRefGT gt, Steps steps,
            int batch, int batchSize) {
        int sentinal = -1;
        int startStep = batch*batchSize;
        int endStep = Math.min(startStep + batchSize, steps.size());
        int nSteps = endStep - startStep;
        int[][] hapToSeq = new int[nSteps][gt.nHaps()];
        int[] seqCnt = new int[nSteps];
        IntIntMap[] seqMap = IntStream.range(0, nSteps)
                .mapToObj(j -> new IntIntMap(8))
                .toArray(IntIntMap[]::new);
        for (int h=0, n=gt.nHaps(); h<n; ++h) {
            int mStart = steps.start(startStep);
            for (int j=0; j<nSteps; ++j) {
                int mEnd = steps.end(startStep + j);
                int key = gt.hash(h, mStart, mEnd);
                int seqIndex = seqMap[j].get(key, sentinal);
                if (seqIndex==sentinal) {
                    seqIndex = seqCnt[j]++;
                    seqMap[j].put(key, seqIndex);
                }
                hapToSeq[j][h] = seqIndex;
                mStart = mEnd;
            }
        }
        return IntStream.range(0, nSteps)
                .mapToObj(j -> new IndexArray(hapToSeq[j], seqCnt[j]));
    }

    /**
     * Returns a map from haplotype index to allele sequence index
     * for the specified step
     * @param step a step index
     * @return a map from haplotype index to allele sequence index
     * for the specified step
     * @throws IllegalArgumentException if
     * {@code step < 0 || step >= this.steps().size()}
     */
    public IndexArray get(int step) {
        return codedSteps[step];
    }

    /**
     * Returns the target samples.
     * @return the target samples
     */
    public Samples targSamples() {
        return targSamples;
    }

    /**
     * Returns the reference haplotypes
     * @return the reference haplotypes
     */
    public Optional<XRefGT> refHaps() {
        return refHaps;
    }

    /**
     * Return the phased target and reference genotype data that was used
     * to construct this {@code CodedSteps} instance.  The target haplotypes
     * precede the reference haplotypes.
     * @return the phased target and reference genotype data
     */
    public XRefGT allHaps() {
        return allHaps;
    }

    /**
     * Returns the partition of the markers into non-overlapping intervals.
     * @return the partition of the markers into non-overlapping intervals
     */
    public Steps steps() {
        return steps;
    }
}