1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
|
package driver;
import fileIO.ReadWrite;
import fileIO.TextFile;
import fileIO.TextStreamWriter;
import shared.Parse;
import shared.Shared;
import shared.Tools;
import stream.SamLine;
/**
*
* Selects only reads with long deletions
*
* @author Brian Bushnell
* @date Jun 21, 2013
*
*/
public final class SelectReads {
public static void main(String[] args){
assert(args.length>=2) : "Need 2 file names: <input> <output>";
assert(!args[0].equalsIgnoreCase(args[1])) : "File names must be different.";
ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
ReadWrite.setZipThreads(Shared.threads());
int minlen=1;
long reads=Long.MAX_VALUE;
char symbol='D';
if(args.length>2){symbol=(char)args[2].charAt(0);}
if(args.length>3){minlen=Integer.parseInt(args[3]);}
if(args.length>4){reads=Parse.parseKMG(args[4]);}
symbol=Tools.toUpperCase(symbol);
if(symbol=='='){symbol='M';}
if(symbol=='X'){symbol='S';}
if(symbol=='N'){symbol='D';}
if(symbol=='S' || symbol=='H' || symbol=='P'){symbol='C';}
final int index=Tools.indexOf(new char[] {'M','S','D','I','C'}, symbol);
assert(index>=0) : "Symbol (3rd argument) must be M, S, D, I, C (for match string symbols) or M, =, X, D, N, I, S, H, P (for cigar symbols).";
TextFile tf=new TextFile(args[0], true);
TextStreamWriter tsw=new TextStreamWriter(args[1], false, false, true);
tsw.start();
for(String line=tf.nextLine(); line!=null; line=tf.nextLine()){
if(line.charAt(0)=='@'){
tsw.println(line);
}else{
if((reads=reads-1)<0){break;}
SamLine sl=new SamLine(line);
if(testLine(sl, minlen, index)){
tsw.println(line);
}
}
}
tf.close();
tsw.poisonAndWait();
}
private static boolean testLine(SamLine sl, int minlen, int index){
assert(sl!=null);
if(!sl.mapped() || sl.cigar==null){return false;}
int[] msdic=sl.cigarToMdsiMax(sl.cigar);
return (msdic!=null && msdic[index]>=minlen);
}
}
|