1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
package jgi;
import shared.Parse;
import shared.Timer;
import shared.Tools;
import stream.Read;
import stream.SamLine;
import template.BBTool_ST;
/**
* Filters to select only reads with substitution errors
* for bases with quality scores in a certain interval.
* Used for manually examining specific reads that may have
* incorrectly calibrated quality scores, for example.
* @author Brian Bushnell
* @date May 5, 2015
*
*/
public class FilterReadsWithSubs extends BBTool_ST {
/*--------------------------------------------------------------*/
/*---------------- Initialization ----------------*/
/*--------------------------------------------------------------*/
/**
* Code entrance from the command line.
* Must be overridden; the commented body is an example.
* @param args Command line arguments
*/
public static void main(String[] args){
Timer t=new Timer();
FilterReadsWithSubs bbt=new FilterReadsWithSubs(args);
bbt.process(t);
}
public FilterReadsWithSubs(String[] args){super(args);}
@Override
public boolean parseArgument(String arg, String a, String b) {
// System.err.println("Calling parseArgument("+arg+","+a+","+b+")");
if(a.equals("minq")){
minq=Parse.parseIntKMG(b);
return true;
}else if(a.equals("maxq")){
maxq=Parse.parseIntKMG(b);
return true;
}else if(a.equals("keepperfect")){
keepPerfect=Parse.parseBoolean(b);
return true;
}else if(a.equals("countindels")){
countIndels=Parse.parseBoolean(b);
return true;
}else if(a.equals("minsubs")){
minsubs=Integer.parseInt(b);
return true;
}
//There was no match to the argument
return false;
}
@Override
protected void setDefaults(){
SamLine.SET_FROM_OK=true;
minq=0;
maxq=99;
minsubs=1;
countIndels=true;
keepPerfect=false;
}
@Override
protected final boolean useSharedHeader(){return true;}
@Override
protected void startupSubclass() {}
@Override
protected void shutdownSubclass() {}
@Override
protected void showStatsSubclass(Timer t, long readsIn, long basesIn) {
// TODO Auto-generated method stub
}
@Override
protected boolean processReadPair(Read r1, Read r2) {
assert(r2==null);
final byte[] quals=r1.quality, bases=r1.bases;
final byte[] match=(r1.match==null ? null : !r1.shortmatch() ? r1.match : Read.toLongMatchString(r1.match));
if(match==null || quals==null || bases==null){return false;}
int subs=0;
int indels=0;
for(int qpos=0, mpos=0, last=quals.length-1; mpos<match.length; mpos++){
final byte m=match[mpos];
final byte mprev=match[Tools.max(mpos-1, 0)];
final byte mnext=match[Tools.min(mpos+1, match.length-1)];
final byte q1=quals[qpos];
final byte b2=bases[qpos];
int sub=0, indel=0;
if(m=='S'){
sub=1;
}else if(m=='I'){
indel=1;
}else if(m=='m'){
if(mprev=='D' || mnext=='D'){
indel=1;
}
}else if(m=='D'){
//do nothing
}else if(m=='C'){
//do nothing
}else{
throw new RuntimeException("Bad symbol m='"+((char)m)+"'\n"+new String(match)+"\n"+new String(bases)+"\n");
}
subs+=sub;
indels+=indel;
if(q1>=minq && q1<=maxq){
if(sub>minsubs || (indel>0 && countIndels)){return true;}
}
if(m!='D'){qpos++;}
}
return keepPerfect && subs==0 && indels==0;
}
public int minq, maxq, minsubs;
public boolean countIndels;
public boolean keepPerfect;
}
|