File: BarcodeMappingStats.java

package info (click to toggle)
bbmap 39.20%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 26,024 kB
  • sloc: java: 312,743; sh: 18,099; python: 5,247; ansic: 2,074; perl: 96; makefile: 39; xml: 38
file content (131 lines) | stat: -rwxr-xr-x 4,283 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
package barcode;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map.Entry;

import fileIO.ByteStreamWriter;
import shared.Tools;
import stream.Read;
import structures.ByteBuilder;

public class BarcodeMappingStats {
	
	/*--------------------------------------------------------------*/
	/*----------------          Constructor         ----------------*/
	/*--------------------------------------------------------------*/
	
	public BarcodeMappingStats() {}
	
	/*--------------------------------------------------------------*/
	/*----------------            Methods           ----------------*/
	/*--------------------------------------------------------------*/
	
	public void merge(BarcodeMappingStats bs) {
		for(Entry<String, Barcode> e : bs.codeMap.entrySet()) {
			Barcode b=e.getValue();
			incrementCodeMap(b.name, b.count());
		}
		for(Entry<String, HashMap<String, Barcode>> e : bs.sourceMap.entrySet()) {
			final String readKey=e.getKey();
			final HashMap<String, Barcode> map=e.getValue();
			for(Entry<String, Barcode> ee : map.entrySet()) {
				final Barcode b=ee.getValue();
				final String refKey=ee.getKey();
				incrementSourceMap(readKey, refKey, b.count());
			}
		}
	}
	
	public void increment(Read r, String refKey){
		String barcode=r.barcode(true);
		incrementCodeMap(barcode, r.pairCount());
		incrementSourceMap(barcode, refKey==null ? "UNKNOWN" : refKey, r.pairCount());
	}
	
	public void incrementCodeMap(String key, long amt) {
		Barcode b=codeMap.get(key);
		if(b==null){
			b=new Barcode(key);
			codeMap.put(key, b);
		}
		b.increment(amt);
	}
	
	public void incrementSourceMap(String readKey, String refKey, long amt) {
		HashMap<String, Barcode> map=sourceMap.get(readKey);
		if(map==null){
			map=new HashMap<String, Barcode>();
			sourceMap.put(readKey, map);
		}
		Barcode b=map.get(refKey);
		if(b==null){
			b=new Barcode(refKey);
			map.put(refKey, b);
		}
		b.increment(amt);
	}

	public void writeStats(String outbarcodes, boolean overwrite) {
		ByteBuilder bb=new ByteBuilder();
		ArrayList<Barcode> codeList=toSortedList(codeMap);
		final long sum=sum(codeList);
		final double invSum=1.0/(Tools.max(1, sum));

		ByteStreamWriter bsw=new ByteStreamWriter(outbarcodes, overwrite, false, true);
		bsw.start();

		bsw.println("#Reads\t"+sum);
		bsw.println("#Barcode\tSource\tCount\tFraction");
		for(Barcode bc : codeList) {
			HashMap<String, Barcode> map=sourceMap.get(bc.name);
			ArrayList<Barcode> sourceList=toSortedList(map);
			final long sum2=sum(sourceList);
			final double invSum2=1.0/(Tools.max(1, sum2));
			for(Barcode source : sourceList) {
				bb.append(bc.name).tab().append(source.name).tab().append(source.count()).tab().append(source.count()*invSum2, 6).nl();
				bsw.print(bb);
				bb.clear();
			}
		}
		errorState|=bsw.poisonAndWait();
	}
	
	private static long sum(ArrayList<Barcode> list) {
		long sum=0;
		for(Barcode bc : list) {
			sum+=bc.count();
		}
		return sum;
	}
	
	private static ArrayList<Barcode> toSortedList(HashMap<String, Barcode> map){
		if(map==null || map.isEmpty()){return null;}
		ArrayList<Barcode> list=new ArrayList<Barcode>(map.size());
		for(Entry<String, Barcode> e : map.entrySet()) {
			list.add(e.getValue());
		}
		Collections.sort(list);
		return list;
	}
	
	/*--------------------------------------------------------------*/
	/*----------------            Fields            ----------------*/
	/*--------------------------------------------------------------*/
	
	/** Raw counts of barcodes */
	public HashMap<String, Barcode> codeMap=new HashMap<String, Barcode>();
	
	/** 
	 * A table of tables.  Key1 is the barcode of a read; key2 is where the read mapped to.
	 * The barcode for key2 tracks the number of times reads with  key1 barcode mapped to key2's reference. 
	 * E.G. if key1 is ABC-DEF then the top barcode in its table would be expected to
	 * be ABC-DEF, and other entries would indicate contamination of that library.
	 * 
	 */
	public HashMap<String, HashMap<String, Barcode>> sourceMap=new HashMap<String, HashMap<String, Barcode>>();
	
	public boolean errorState=false;
	
}