File: FilterUtil.java

package info (click to toggle)
beagle 220722-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 9,644 kB
  • sloc: java: 17,045; sh: 55; makefile: 11
file content (140 lines) | stat: -rw-r--r-- 5,684 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
/*
 * Copyright (C) 2014-2021 Brian L. Browning
 *
 * This file is part of Beagle
 *
 * Beagle is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Beagle is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package vcf;

import blbutil.Filter;
import blbutil.Utilities;
import java.io.File;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

/**
 * Class {@code FilterUtil} contains static methods for constructing
 * marker filters.
 *
 * @author Brian L. Browning {@code <browning@uw.edu>}
 */
public final class FilterUtil {

    private FilterUtil() {
        // private constructor to prevent instantiation
    }

    /**
     * Returns a filter that excludes markers that have an identifier
     * or genome coordinates that matches a line of the specified file,
     * or returns {@code null} if the {@code excludeMarkersFile} parameter is
     * {@code null}. Genome coordinates must be in "CHROM:POS" format.
     * @param excludeMarkersFile a file that contains an identifier
     * or genome coordinate of one excluded marker on each line
     * @return a filter that excludes markers that have an identifier
     * or genome coordinates that matches a line of the specified file,
     * or {@code null} if the {@code excludeMarkersFile} parameter is
     * {@code null}
     *
     * @throws IllegalArgumentException if the specified file does not exist
     * @throws IllegalArgumentException if the specified file is a directory
     * @throws IllegalArgumentException if any line of the specified
     * file contains two non-white-space characters separated by one or
     * more white-space characters
     */
    public static Filter<Marker> markerFilter(File excludeMarkersFile) {
        Set<String> excludeIds;
        if (excludeMarkersFile==null) {
            return Filter.acceptAllFilter();
        }
        else {
            excludeIds = Utilities.idSet(excludeMarkersFile);
            return idFilter(excludeIds);
        }
    }

    /**
     * Returns a filter that excludes samples that have an identifier
     * that matches a line of the specified file, or returns {@code null} if
     * the {@code excludeSamplesFile} parameter is {@code null}
     * @param excludeSamplesFile a file which contains an identifier
     * of one excluded sample on each line
     * @return a filter that excludes samples that have an identifier
     * that matches a line of the specified file, or {@code null} if
     * the {@code excludeSamplesFile} parameter is {@code null}
     *
     * @throws IllegalArgumentException if the specified file does not exist
     * @throws IllegalArgumentException if the specified file is a directory
     * @throws IllegalArgumentException if any line of the specified
     * file contains two non-white-space characters separated by one or
     * more white-space characters
     */
    public static Filter<String> sampleFilter(File excludeSamplesFile) {
        if (excludeSamplesFile==null) {
            return Filter.acceptAllFilter();
        }
        else {
           Set<String> exclude = Utilities.idSet(excludeSamplesFile);
           return Filter.excludeFilter(exclude);
        }
    }

    /**
     * Returns {@code true} if the specified marker has an identifier
     * is in the specified set, or if ("marker.chrom()" + ":" + "marker.pos()")
     * is in the specified set, and returns {@code false} otherwise.
     * @param marker a marker
     * @param set a set of marker identifiers and chromosome positions in
     * "CHROM:POS" format
     * @return {@code true} if the specified marker has an identifier
     * is in the specified set or if ("marker.chrom()" + ":" + "marker.pos()")
     * is in the specified set
     * @throws NullPointerException if {@code marker == null || set == null}
     */
    public static boolean markerIsInSet(Marker marker, Set<String> set) {
        for (int j=0, n=marker.nIds(); j<n; ++j) {
            if (set.contains(marker.id(j))) {
                return true;
            }
        }
        String posId = marker.chrom() + ':' + marker.pos();
        return set.contains(posId);
    }

    /**
     * Returns a filter that accepts all markers which do not have an
     * identifier or chromomsome position present in the specified
     * collection.
     * A marker is excluded if {@code exclude.contains(marker.id(j)) == true}
     * for any {@code 0 <= j < marker.nIds()} or if
     * {@code exclude.contains(marker.chrom() + ":" + marker.pos()) == true}.
     * @param exclude a collection of marker identifiers and chromosome
     * positions in "CHROM:POS" format
     * @return a filter that accepts all markers which do not have an
     * identifier or chromomsome position present in the specified
     * collection
     * @throws NullPointerException if {@code exclude == null}
     */
    public static Filter<Marker> idFilter(Collection<String> exclude) {
        final Set<String> excludeSet = new HashSet<>(exclude);
        if (excludeSet.isEmpty()) {
            return Marker -> true;
        }
        else {
            return (Marker marker) -> !markerIsInSet(marker, excludeSet);
        }
    }
}