File: RefGTRec.java

package info (click to toggle)
beagle 220722-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 9,644 kB
  • sloc: java: 17,045; sh: 55; makefile: 11
file content (245 lines) | stat: -rw-r--r-- 8,872 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
/*
 * Copyright (C) 2014-2021 Brian L. Browning
 *
 * This file is part of Beagle
 *
 * Beagle is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Beagle is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package vcf;

import ints.IntArray;

/**
 * <p>Interface {@code RefGTRec} represents represents phased genotype data
 * for one marker.  For implementations of this interface, unless otherwise
 * specified in the implementation documentation, if the {@code isAlleleCoded()}
 * method returns {@code false}, the {@code majorAllele()},
 * {@code alleleCount()}, and {@code hapIndex()} methods will be computationally
 * expensive with compute time proportional to the number of haplotypes.
 * Alternatively if the {@code isAlleleCoded()} method returns
 * {@code true}, the {@code maps()} and {@code map()} methods will be
 * computationally expensive with compute time proportional to the number
 * of haplotypes.
 * </p>
 * <p>All instances of {@code RefGTRec} are required to be immutable.
 * </p>
 *
 * @author Brian L. Browning {@code <browning@uw.edu>}
 */
public interface RefGTRec extends GTRec {

    /**
     * Returns an allele-coded {@code RefGTRec} instance for the
     * specified data.
     * @param rec the phased, non-missing genotype data
     * @return an allele-coded {@code RefGTRec} instance for the
     * specified data
     * @throws NullPointerException if {@code rec == null}
     */
    static RefGTRec alleleCodedInstance(RefGTRec rec) {
        if (rec.isAlleleCoded()) {
            return rec;
        }
        if (rec.marker().nAlleles()==2) {
            return new LowMafRefDiallelicGTRec(rec);
        }
        else {
            return new LowMafRefGTRec(rec);
        }
    }

    /**
     * Constructs and returns a new allele-coded {@code RefGTRec} instance
     * from the specified data.
     *
     * @param gtp a VCF record parser that extracts sample genotypes
     * @return an allele-coded {@code RefGTRec} instance
     *
     * @throws IllegalArgumentException if the VCF record contains an
     * unphased genotype or missing allele
     * @throws IllegalArgumentException if a format error is detected in the
     * VCF record
     * @throws NullPointerException if {@code gtp == null}
     */
    static RefGTRec alleleCodedInstance(VcfRecGTParser gtp) {
        if (gtp.nAlleles()==2) {
            return new LowMafRefDiallelicGTRec(gtp);
        }
        else {
            return new LowMafRefGTRec(gtp);
        }
    }

    /**
     * Constructs and returns a new allele-coded {@code RefGTRec} instance
     * from the specified data.
     *
     * @param marker the marker
     * @param samples the samples
     * @param hapIndices an array whose {@code j}-th element is {@code null}
     * if {@code j} is the major allele and otherwise is a list of haplotypes
     * sorted in increasing order that carry the {@code j}-th allele.
     * If there is more than one allele with a maximal allele count, the
     * major allele is the smallest allele with maximal allele count.
     * If a haplotype is contained in a list for more than one non-major allele,
     * the haplotype will be assumed to carry the smallest allele.
     * @return an allele-coded {@code RefGTRec} instance
     *
     * @throws IllegalArgumentException if the {@code (hapIndices[j] == null)}
     * and {@code j} is not the major allele or if
     * {@code (hapIndices[j] != null)} and {@code j} is the major allele
     * @throws IllegalArgumentException if any non-null element of
     * {@code hapIndices} is not a sorted list of distinct haplotype indices
     * between 0 (inclusive) and {@code 2*samples.size()} (exclusive)
     * @throws IllegalArgumentException if
     * {@code marker.nAlleles() != hapIndices.length}
     * @throws NullPointerException if
     * {@code marker == null || samples == null || hapIndices == null}
     */
    static RefGTRec hapCodedInstance(Marker marker, Samples samples,
            int[][] hapIndices) {
        if (marker.nAlleles()==2) {
            return new LowMafRefDiallelicGTRec(marker, samples, hapIndices);
        }
        else {
            return new LowMafRefGTRec(marker, samples, hapIndices);
        }
    }

    /**
     * Returns an array whose {@code j}-th element is {@code null}
     * if {@code j} is the major allele with lowest index, and otherwise is
     * an array of indices of haplotypes that carry the {@code j}-th allele
     * sorted in increasing order
     * @return an array whose {@code j}-th element is {@code null}
     * if {@code j} is the major allele with lowest index, and otherwise is
     * an array of indices of haplotypes that carry the {@code j}-th allele
     * sorted in increasing order
     */
    int[][] hapIndices();

    /**
     * Returns {@code true}.
     * @param sample the sample index
     * @return {@code true}
     *
     * @throws IndexOutOfBoundsException if
     * {@code sample < 0 || sample >= this.nSamples()}
     */
    @Override
    boolean isPhased(int sample);

    /**
     * Returns {@code true}.
     * @return {@code true}
     */
    @Override
    boolean isPhased();

    /**
     * Returns {@code true} if this instance stores the indices of haplotypes
     * that carry non-major alleles, and returns {@code false} otherwise.
     *
     * @return {@code true} if this instance stores the indices of haplotypes
     * that carry non-major alleles
     */
    boolean isAlleleCoded();

    /**
     * Returns the index of the major allele.
     * @return the index of the major allele
     */
    int majorAllele();

    /**
     * Returns an array of length {@code this.nAlleles()} whose {@code j}-th
     * element is the allele count of the {@code j}-th allele.
     * @return an array of allele counts
     */
    int[] alleleCounts();

    /**
     * Returns the number of haplotypes that carry the specified
     * non-major allele.
     * @param allele an allele index
     * @return the number of haplotypes that carry the specified non-major
     * allele
     * @throws IllegalArgumentException if
     * {@code allele == this.majorAllele()}
     * @throws IndexOutOfBoundsException if
     * {@code allele < 0 ||  allele >= this.nAlleles()}
     */
    int alleleCount(int allele);

    /**
     * Returns index of the haplotype that carries the specified copy of the
     * specified allele.
     * @param allele an allele index
     * @param copy a copy index
     * @return index of the haplotype that carries the specified allele
     * @throws IllegalArgumentException if
     * {@code allele == this.majorAllele()}
     * @throws IndexOutOfBoundsException if
     * {@code allele < 0 ||  allele >= this.nAlleles()}
     * @throws IndexOutOfBoundsException if
     * {@code copy < 0 ||  copy >= this.alleleCount(allele)}
     */
    int hapIndex(int allele, int copy);

    /**
     * Returns {@code true} if the specified haplotype carries the specified
     * allele and return {@code false} otherwise.
     * @param allele an allele index
     * @param hap a haplotype index
     * @return {@code true} if the specified haplotype carries the specified
     * allele
     * @throws IndexOutOfBoundsException if
     * {@code hap < 0 || hap >= this.size()}
     * @throws IndexOutOfBoundsException if
     * {@code allele < 0 || allele >= this.nAlleles()}
     */
    boolean isCarrier(int allele, int hap);

    /**
     * Returns {@code this.maps().length}
     * @return this.maps().length
     */
    int nMaps();

    /**
     * Returns an array of maps, which when composed map haplotype indices
     * to alleles.  The allele on haplotype {@code h} is determined
     * by the following calculation:
     * <pre>
            IntArray[] maps = this.maps();
            int value = maps[0].get(h);
            for (int j=1; j&lt;maps.length; ++j) {
               value = indexArrays[j].get(value);
            }
            int allele = value
       </pre>
     * @return an array of maps, which when composed map haplotype indices
     * to alleles
     */
    IntArray[] maps();

    /**
     * Returns {@code this.maps()[index]}.
     * @param index the index in {@code this.maps()}
     * @return {@code this.maps()[index]}
     * @throws IndexOutOfBoundsException if
     * {@code index < 0 || index >= this.nMaps()}
     */
    IntArray map(int index);
}