File: SelectReads.java

package info (click to toggle)
bbmap 39.20%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 26,024 kB
  • sloc: java: 312,743; sh: 18,099; python: 5,247; ansic: 2,074; perl: 96; makefile: 39; xml: 38
file content (74 lines) | stat: -rwxr-xr-x 2,031 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
package driver;

import fileIO.ReadWrite;
import fileIO.TextFile;
import fileIO.TextStreamWriter;
import shared.Parse;
import shared.Shared;
import shared.Tools;
import stream.SamLine;

/**
 * 
 * Selects only reads with long deletions
 * 
 * @author Brian Bushnell
 * @date Jun 21, 2013
 *
 */
public final class SelectReads {
	
	public static void main(String[] args){
		
		assert(args.length>=2) : "Need 2 file names: <input> <output>";
		assert(!args[0].equalsIgnoreCase(args[1])) : "File names must be different.";
		
		ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
		ReadWrite.setZipThreads(Shared.threads());
		
		
		int minlen=1;
		long reads=Long.MAX_VALUE;
		char symbol='D';
		if(args.length>2){symbol=(char)args[2].charAt(0);}
		if(args.length>3){minlen=Integer.parseInt(args[3]);}
		if(args.length>4){reads=Parse.parseKMG(args[4]);}
		
		symbol=Tools.toUpperCase(symbol);
		if(symbol=='='){symbol='M';}
		if(symbol=='X'){symbol='S';}
		if(symbol=='N'){symbol='D';}
		if(symbol=='S' || symbol=='H' || symbol=='P'){symbol='C';}
		
		final int index=Tools.indexOf(new char[] {'M','S','D','I','C'}, symbol);
		assert(index>=0) : "Symbol (3rd argument) must be M, S, D, I, C (for match string symbols) or M, =, X, D, N, I, S, H, P (for cigar symbols).";
		
		TextFile tf=new TextFile(args[0], true);
		TextStreamWriter tsw=new TextStreamWriter(args[1], false, false, true);
		tsw.start();
		
		for(String line=tf.nextLine(); line!=null; line=tf.nextLine()){
			if(line.charAt(0)=='@'){
				tsw.println(line);
			}else{
				if((reads=reads-1)<0){break;}
				SamLine sl=new SamLine(line);
				if(testLine(sl, minlen, index)){
					tsw.println(line);
				}
			}
		}
		tf.close();
		tsw.poisonAndWait();
		
	}
	
	
	private static boolean testLine(SamLine sl, int minlen, int index){
		assert(sl!=null);
		if(!sl.mapped() || sl.cigar==null){return false;}
		int[] msdic=sl.cigarToMdsiMax(sl.cigar);
		return (msdic!=null && msdic[index]>=minlen);
	}
	
}