1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
|
/*
* Copyright (C) 2014-2021 Brian L. Browning
*
* This file is part of Beagle
*
* Beagle is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Beagle is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package vcf;
import blbutil.Const;
import java.util.Arrays;
/**
* <p>Interface {@code GTRec} represents represents genotype data for one
* marker.
* </p>
* <p>All instances of {@code GTRec} are required to be immutable.
* </p>
*
* @author Brian L. Browning {@code <browning@uw.edu>}
*/
public interface GTRec extends DuplicatesGTRec {
/**
* Returns the list of samples.
* @return the list of samples
*/
Samples samples();
/**
* Returns the allele frequencies. The {@code k}-th element of the
* returned array is the frequency of the {@code k}-th allele.
* @param rec the genotype data for a marker
* @return the allele frequencies
*/
static double[] alleleFreq(GTRec rec) {
int[] cnts = alleleCounts(rec);
int sum = Arrays.stream(cnts).sum();
double[] freq = new double[cnts.length];
if (sum>0) {
for (int al=0; al<cnts.length; ++al) {
freq[al] = (double) cnts[al]/sum;
}
}
return freq;
}
/**
* Returns the allele counts. The {@code k}-th element of the
* returned array is the count of the {@code k}-th allele.
* @param rec the genotype data for a marker
* @return the allele frequencies
*/
static int[] alleleCounts(GTRec rec) {
int nAlleles = rec.marker().nAlleles();
int[] cnts = new int[nAlleles];
for (int h=0, n = rec.size(); h<n; ++h) {
int allele = rec.get(h);
if (allele>=0) {
++cnts[allele];
}
}
return cnts;
}
/**
* Returns a VCF record corresponding to the specified {@code GTRec} object.
* The returned VCF record will have missing QUAL and INFO fields,
* will have "PASS" in the filter field, and will have a GT format field.
* @param gtRec the genotype data
* @return a VCF record corresponding to the specified {@code GTRec} object
* @throws NullPointerException if {@code gtRec == null}
*/
static String toVcfRec(GTRec gtRec) {
StringBuilder sb = new StringBuilder(100);
sb.append(gtRec.marker());
sb.append(Const.tab);
sb.append(Const.MISSING_DATA_CHAR); // QUAL
sb.append(Const.tab);
sb.append("PASS"); // FILTER
sb.append(Const.tab);
sb.append(Const.MISSING_DATA_CHAR); // INFO
sb.append(Const.tab);
sb.append("GT"); // FORMAT
for (int j=0, n=gtRec.samples().size(); j<n; ++j) {
int a1 = gtRec.allele1(j);
int a2 = gtRec.allele2(j);
sb.append(Const.tab);
if (a1==-1) {
sb.append(Const.MISSING_DATA_CHAR);
}
else {
sb.append(a1);
}
sb.append(gtRec.isPhased(j) ? Const.phasedSep : Const.unphasedSep);
if (a2==-1) {
sb.append(Const.MISSING_DATA_CHAR);
}
else {
sb.append(a2);
}
}
return sb.toString();
}
}
|