File: GTRec.java

package info (click to toggle)
beagle 241217-3
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 9,712 kB
  • sloc: java: 17,684; sh: 55; makefile: 11
file content (164 lines) | stat: -rw-r--r-- 5,246 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
/*
 * Copyright (C) 2014-2021 Brian L. Browning
 *
 * This file is part of Beagle
 *
 * Beagle is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Beagle is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package vcf;

import blbutil.Const;
import ints.IntArray;
import java.util.Arrays;

/**
 * <p>Interface {@code GTRec} represents represents genotype data for one
 * marker.
 * </p>
 * <p>All instances of {@code GTRec} are required to be immutable.
 * </p>
 *
 * @author Brian L. Browning {@code <browning@uw.edu>}
 */
public interface GTRec extends IntArray {

    /**
     * Returns the list of samples.
     * @return the list of samples
     */
    Samples samples();

    /**
     * Returns the marker.
     * @return the marker
     */
    Marker marker();

    /**
     * Returns the specified allele for the specified haplotype or
     * -1 if the allele is missing.  The two alleles for a sample
     * at a marker are arbitrarily ordered if
     * {@code this.unphased(marker, hap/2) == false}.
     * @param hap a haplotype index
     * @return the specified allele for the specified sample
     *
     * @throws IndexOutOfBoundsException if
     * {@code hap < 0 || hap >= this.size()}
     */
    @Override
    int get(int hap);

    /**
     * Returns the number of haplotypes.
     * @return the number of haplotypes
     */
    @Override
    int size();


    /**
     * Returns {@code true} if the genotype for the specified sample
     * has non-missing alleles and is either haploid or diploid with
     * a phased allele separator, and returns {@code false} otherwise.
     * @param sample a sample index
     * @return {@code true} if the genotype for the specified sample
     * is a phased, nonmissing genotype
     *
     * @throws IndexOutOfBoundsException if
     * {@code sample < 0 || sample >= this.size()/2}
     */
    boolean isPhased(int sample);

    /**
     * Returns {@code true} if every genotype for each sample is a phased,
     * non-missing genotype, and returns {@code false} otherwise.
     * @return {@code true} if the genotype for each sample is a phased,
     * non-missing genotype
     */
    boolean isPhased();

    /**
     * Returns the allele frequencies.  The {@code k}-th element of the
     * returned array is the frequency of the {@code k}-th allele.
     * @param rec the genotype data for a marker
     * @return the allele frequencies
     */
    static double[] alleleFreq(GTRec rec) {
        int[] cnts = alleleCounts(rec);
        int sum = Arrays.stream(cnts).sum();
        double[] freq = new double[cnts.length];
        if (sum>0) {
            for (int al=0; al<cnts.length; ++al) {
                freq[al] = (double) cnts[al]/sum;
            }
        }
        return freq;
    }

    /**
     * Returns the allele counts.  The {@code k}-th element of the
     * returned array is the count of the {@code k}-th allele.
     * @param rec the genotype data for a marker
     * @return the allele frequencies
     */
    static int[] alleleCounts(GTRec rec) {
        int nAlleles = rec.marker().nAlleles();
        int[] cnts = new int[nAlleles];
        for (int h=0, n = rec.size(); h<n; ++h) {
            int allele = rec.get(h);
            if (allele>=0) {
                ++cnts[allele];
            }
        }
        return cnts;
    }

    /**
     * Returns a VCF record corresponding to the specified {@code GTRec} object.
     * The returned VCF record will have missing QUAL and INFO fields,
     * will have "PASS" in the filter field, and will have a GT format field.
     * @param gtRec the genotype data
     * @return a VCF record corresponding to the specified {@code GTRec} object
     * @throws NullPointerException if {@code gtRec == null}
     */
    static String toVcfRec(GTRec gtRec) {
        Marker marker = gtRec.marker();
        StringBuilder sb = new StringBuilder(100);
        MarkerUtils.appendFirst7Fields(marker, sb);
        sb.append(Const.tab);
        sb.append(marker.info());  // INFO
        sb.append(Const.tab);
        sb.append("GT");           // FORMAT
        for (int s=0, n=gtRec.samples().size(); s<n; ++s) {
            int hap1 = s<<1;
            int a1 = gtRec.get(hap1);
            int a2 = gtRec.get(hap1 | 0b1);
            sb.append(Const.tab);
            if (a1==-1) {
                sb.append(Const.MISSING_DATA_CHAR);
            }
            else {
                sb.append(a1);
            }
            sb.append(gtRec.isPhased(s) ? Const.phasedSep : Const.unphasedSep);
            if (a2==-1) {
                sb.append(Const.MISSING_DATA_CHAR);
            }
            else {
                sb.append(a2);
            }
        }
        return sb.toString();
    }
}