File: IlluminaHeaderParser2.java

package info (click to toggle)
bbmap 39.20%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 26,024 kB
  • sloc: java: 312,743; sh: 18,099; python: 5,247; ansic: 2,074; perl: 96; makefile: 39; xml: 38
file content (165 lines) | stat: -rwxr-xr-x 4,400 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
package hiseq;

import shared.LineParser;
import shared.LineParserS3;
import structures.ByteBuilder;

/**
 * Faster version of IlluminaHeaderParser using LineParser.
 * @author Brian Bushnell
 * @date April 3, 2024
 *
 */
public class IlluminaHeaderParser2 extends ReadHeaderParser {
	
	/*--------------------------------------------------------------*/
	/*----------------        Expected Format       ----------------*/
	/*--------------------------------------------------------------*/
	
	//@VP2-06:112:H7LNDMCVY:2:2437:14181:20134 (Novaseq6k)
	//2402:6:1101:6337:2237/1
	//MISEQ08:172:000000000-ABYD0:1:1101:18147:1925 1:N:0:TGGATATGCGCCAATT
	//HISEQ07:419:HBFNEADXX:1:1101:1238:2072
	//A00178:38:H5NYYDSXX:2:1101:3007:1000 1:N:0:CAACCTA+CTAGGTT
	//@LH00223:28:22GLGMLT3:1:1101:5928:1016 1:N:0:CTGCTTGGTT+CTAACGACAG (NovaseqX)
	
	//	@HWI-Mxxxx or @Mxxxx - MiSeq
	//	@HWUSI - GAIIx
	//	@HWI-Dxxxx - HiSeq 2000/2500
	//	@Kxxxx - HiSeq 3000(?)/4000
	//	@Nxxxx - NextSeq 500/550
	//	@Axxxxx - NovaSeq
	//	@Vxxxxx = NextSeq 2000
	//	@AAxxxxx - NextSeq 2000 P1/P2/P3
	//	@Hxxxxxx - NovaSeq S1/S2/S4
	//
	//	AAXX = Genome Analyzer 
	//	BCXX = HiSeq v1.5 
	//	ACXX = HiSeq High-Output v3 
	//	ANXX = HiSeq High-Output v4 
	//	ADXX = HiSeq RR v1 
	//	AMXX, BCXX =HiSeq RR v2 
	//	ALXX = HiSeqX 
	//	BGXX, AGXX = High-Output NextSeq 
	//	AFXX = Mid-Output NextSeq 
	//	5 letter/number = MiSeq
	
	/*--------------------------------------------------------------*/
	/*----------------             Main             ----------------*/
	/*--------------------------------------------------------------*/
	
	public static void main(String[] args) {
		IlluminaHeaderParser2 ihp=new IlluminaHeaderParser2();
		ihp.test(args.length>0 ? args[0] : null);
	}
	
	/*--------------------------------------------------------------*/
	/*----------------        Public Methods        ----------------*/
	/*--------------------------------------------------------------*/
	
	public IlluminaHeaderParser2 parse(String id_) {
		id=id_;
		lp.set(id_);
		whitespaceIndex=lp.indexOfWhitespace();
		return this;
	}
	
	public boolean canShrink() {
		return looksValid() && !looksShrunk();
	}
	
	public boolean looksValid() {
		return(lp.terms()>=8 && whitespaceIndex>=6 && whitespaceIndex<=7);
	}
	
	public boolean looksShrunk() {
		return(lp.terms()>3 && lp.bounds().get(2)==2);
	}
	
	@Override
	public String machine() {
		return lp.terms()<=0 ? null : lp.parseString(0);
	}

	@Override
	public String sample() {
		return null;
	}

	@Override
	public int run() {
		return lp.parseInt(1);
	}

	@Override
	public String flowcell() {
		return lp.parseString(2);
	}

	@Override
	public int lane() {return lp.parseInt(3);}

	@Override
	public int tile() {return lp.parseInt(4);}

	@Override
	public int xPos() {return lp.parseInt(5);}

	@Override
	public int yPos() {return lp.parseInt(6);}

	@Override
	public char pairCode() {return lp.parseChar(whitespaceIndex+1, 0);}

	@Override
	public char chastityCode() {return lp.parseChar(whitespaceIndex+2, 0);}

	@Override
	public int controlBits() {return lp.parseInt(whitespaceIndex+3);}

	@Override
	public String barcode() {
		return lp.terms()<=whitespaceIndex+4 ? null : lp.parseString(whitespaceIndex+4);
	}

	@Override
	public String index3() {
		return whitespaceIndex<7 ? null : lp.parseString(7);
	}

	@Override
	public int whitespaceIndex() {
		return whitespaceIndex;
	}

	@Override
	public String extra() {
		return lp.terms()<=whitespaceIndex+5 ? null : lp.parseString(whitespaceIndex+5);
	}
	
	public ByteBuilder appendTerm(ByteBuilder bb, int term) {
		return lp.appendTerm(bb, term);
	}
	
	public ByteBuilder appendCoordinates(ByteBuilder bb) {
		return bb.append(lane()).colon().append(tile()).colon()
		.append(xPos()).colon().append(yPos());
	}
	
	public long encodeCoordinates() {
		long x=lane();
		x=(x<<17)^tile();
		x=(x<<20)^xPos();
		x=(x<<20)^yPos();
		return x;
	}
	
	/*--------------------------------------------------------------*/
	/*----------------        Private Fields        ----------------*/
	/*--------------------------------------------------------------*/
	
	private final LineParserS3 lp=new LineParserS3(':');
	public LineParser lp() {return lp;}
	int whitespaceIndex=-1;
	
}