1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
|
/*
* Copyright (C) 2014-2021 Brian L. Browning
*
* This file is part of Beagle
*
* Beagle is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Beagle is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package vcf;
import blbutil.BitArray;
import java.util.stream.IntStream;
import phase.EstPhase;
import phase.SamplePhase;
/**
* <p>Class {@code BitArrayRefGTRec} represents phased, nonmissing, genotypes
* for a list of samples at a single marker. Instances of class
* {@code BitArrayRefGTRec} store haplotype alleles in bit sets.</p>
*
* <p>Instances of class {@code BitArrayRefGTRec} are immutable.</p>
*
* @author Brian L. Browning {@code <browning@uw.edu>}
*/
public final class BitArrayRefGTRec implements GTRec {
private final int bitsPerAllele;
private final Marker marker;
private final Samples samples;
private final BitArray alleles;
/**
* Returns the current estimated phased, non-missing genotypes. This
* method converts column-major data into row-major data.
* @param estPhase the current estimated phased genotypes for each target
* sample
* @return the current estimated phased, non-missing genotypes
* @throws NullPointerException if {@code estPhase == null}
*/
public static BitArrayRefGTRec[] toBitArrayRefGTRecs(EstPhase estPhase) {
GT gt = estPhase.fpd().stage1TargGT();
Markers markers = gt.markers();
Samples samples = gt.samples();
BitArray[] bitLists = SamplePhase.toBitLists(estPhase);
return IntStream.range(0, bitLists.length)
.parallel()
.mapToObj(m -> new BitArrayRefGTRec(markers.marker(m), samples, bitLists[m]))
.toArray(BitArrayRefGTRec[]::new);
}
/**
* Returns the phased, non-missing genotypes as a {@code BitArrayRefGTRec[]}
* array. This method converts column-major data into row-major data.
* @param gt the genotype data
* @param nThreads the maximum number of computational threads for object
* construction
* @return the phased, non-missing genotypes as a {@code BitArrayRefGTRec[]}
* array
* @throws IllegalArgumentException if {@code nThreads < 1}
* @throws NullPointerException if {@code gt == null}
*/
public static BitArrayRefGTRec[] toBitArrayRefGTRecs(XRefGT gt, int nThreads) {
Markers markers = gt.markers();
Samples samples = gt.samples();
BitArray[] bitLists = gt.toBitLists(nThreads);
return IntStream.range(0, bitLists.length)
.parallel()
.mapToObj(m -> new BitArrayRefGTRec(markers.marker(m), samples, bitLists[m]))
.toArray(BitArrayRefGTRec[]::new);
}
private BitArrayRefGTRec(Marker marker, Samples samples, BitArray alleles) {
this.bitsPerAllele = marker.bitsPerAllele();
this.marker = marker;
this.samples = samples;
this.alleles = alleles;
}
@Override
public Samples samples() {
return samples;
}
@Override
public int size() {
return samples.size()<<1;
}
@Override
public Marker marker() {
return marker;
}
@Override
public boolean isPhased() {
return true;
}
@Override
public boolean isPhased(int sample) {
return true;
}
@Override
public int allele1(int sample) {
return allele(sample<<1);
}
@Override
public int allele2(int sample) {
return allele((sample<<1) | 0b1);
}
@Override
public int get(int hap) {
return allele(hap);
}
private int allele(int hap) {
int start = bitsPerAllele*hap;
int end = start + bitsPerAllele;
int allele = 0;
int mask = 1;
for (int j=start; j<end; ++j) {
if (alleles.get(j)) {
allele += mask;
}
mask <<= 1;
}
return allele;
}
@Override
public int[] alleles() {
return IntStream.range(0, size())
.map(h -> get(h))
.toArray();
}
/**
* Returns the data represented by {@code this} as a VCF
* record with a GT format field. The returned VCF record
* will have missing QUAL and INFO fields, will have "PASS"
* in the filter field, and will have a GT format field.
* @return the data represented by {@code this} as a VCF
* record with a GT format field
*/
@Override
public String toString() {
return GTRec.toVcfRec(this);
}
}
|