File: CovStatsLine.java

package info (click to toggle)
bbmap 39.20%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 26,024 kB
  • sloc: java: 312,743; sh: 18,099; python: 5,247; ansic: 2,074; perl: 96; makefile: 39; xml: 38
file content (147 lines) | stat: -rwxr-xr-x 5,564 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
package jgi;

import java.util.Arrays;
import java.util.HashMap;

import shared.Tools;

/**
 * @author Brian Bushnell
 * @date Oct 10, 2014
 *
 */
public class CovStatsLine {
	
	public CovStatsLine(String s){
		this(s.split("\t"));
	}

	/**
	 * ID	Avg_fold	Length	Ref_GC	Covered_percent	Covered_bases	Plus_reads	Minus_reads	(optional.... Read_GC)
	 * @param split
	 */
	public CovStatsLine(String[] split) {
		assert(split.length>=8) : Arrays.toString(split);
		assert(!split[0].startsWith("#")) : Arrays.toString(split);
		assert(id_FNUM==0) : "Not initialized with header: "+id_FNUM;

		if(id_FNUM>=0){id=split[id_FNUM];}
		if(length_FNUM>=0){length=Integer.parseInt(split[length_FNUM]);}
		if(coveredBases_FNUM>=0){coveredBases=Integer.parseInt(split[coveredBases_FNUM]);}
		if(plusReads_FNUM>=0){plusReads=Long.parseLong(split[plusReads_FNUM]);}
		if(minusReads_FNUM>=0){minusReads=Long.parseLong(split[minusReads_FNUM]);}
		if(avgFold_FNUM>=0){avgFold=Double.parseDouble(split[avgFold_FNUM]);}
		if(refGC_FNUM>=0){refGC=Double.parseDouble(split[refGC_FNUM]);}
		if(median_FNUM>=0){median=Long.parseLong(split[median_FNUM]);}
		if(underMin_FNUM>=0){underMin=Integer.parseInt(split[underMin_FNUM]);}
		if(readGC_FNUM>=0){readGC=Double.parseDouble(split[readGC_FNUM]);}
		if(stdDev_FNUM>=0){stdDev=Double.parseDouble(split[stdDev_FNUM]);}
		
//		assert(split.length>=8) : Arrays.toString(split);
//		assert(!split[0].startsWith("#")) : Arrays.toString(split);
//		id=split[0];
//		avgFold=Double.parseDouble(split[1]);
//		length=Integer.parseInt(split[2]);
//		refGC=Double.parseDouble(split[3]);
////		coveredPercent=Double.parseDouble(split[4]);
//		coveredBases=Integer.parseInt(split[5]);
//		plusReads=Long.parseLong(split[6]);
//		minusReads=Long.parseLong(split[7]);
//		if(split.length==11){
//			median=Integer.parseInt(split[8]);
//			underMin=Integer.parseInt(split[9]);
//			readGC=Double.parseDouble(split[10]);
//		}else if(split.length==10){
//			median=Integer.parseInt(split[8]);
//			if(CoveragePileup.USE_BITSETS && CoveragePileup.USE_WINDOW){
//				underMin=Integer.parseInt(split[9]);
//			}else{
//				readGC=Double.parseDouble(split[9]);
//			}
//		}else if(split.length==9){
//			readGC=Double.parseDouble(split[8]);
//		}else if(split.length<9){
//			//do nothing
//		}
	}
	
	public final double coveredPercent(){
		return (100.0*coveredBases)/Tools.max(1, length);
	}
	
	public final long reads(){return plusReads+minusReads;}
	
	/**
	 * @param csl
	 */
	public void add(CovStatsLine csl) {
		double invlen2=1d/Tools.max(1, length+csl.length);
		avgFold=((avgFold*length)+(csl.avgFold*csl.length))*invlen2;
		refGC=((refGC*length)+(csl.refGC*csl.length))*invlen2;
		readGC=((readGC*reads())+(csl.readGC*csl.reads()))*1.0/(Tools.max(1, reads()+csl.reads()));
		
		length+=csl.length;
		coveredBases+=csl.coveredBases;
		plusReads+=csl.plusReads;
		minusReads+=csl.minusReads;
		median=median+csl.median;
		underMin=underMin+csl.underMin;
	}
	
	@Override
	public String toString(){
		return Tools.format("%s\t%.4f\t%d\t%.4f\t%.4f\t%d\t%d\t%d\t%d\t%d\t%.4f\t%.4f", id, avgFold, length,
				refGC, coveredPercent(), coveredBases, plusReads, minusReads, median, underMin, readGC, stdDev);
	}
	
	public static void initializeHeader(String header){
		while(header.startsWith("#")){header=header.substring(1);}
		String[] split=header.split("\t");
		HashMap<String, Integer> map=new HashMap<String, Integer>(23);
		for(int i=0; i<split.length; i++){
			String s=split[i].toLowerCase();
			if(s.startsWith("under_")){s="under_min";}
			map.put(split[i].toLowerCase(), i);
		}
		id_FNUM=map.containsKey("id") ? map.get("id") : -1;
		avgFold_FNUM=map.containsKey("avg_fold") ? map.get("avg_fold") : -1;
		length_FNUM=map.containsKey("length") ? map.get("length") : -1;
		refGC_FNUM=map.containsKey("ref_gc") ? map.get("ref_gc") : -1;
		coveredBases_FNUM=map.containsKey("covered_bases") ? map.get("covered_bases") : -1;
		plusReads_FNUM=map.containsKey("plus_reads") ? map.get("plus_reads") : -1;
		minusReads_FNUM=map.containsKey("minus_reads") ? map.get("minus_reads") : -1;
		median_FNUM=map.containsKey("median_fold") ? map.get("median_fold") : -1;
		underMin_FNUM=map.containsKey("under_min") ? map.get("under_min") : -1;
		readGC_FNUM=map.containsKey("read_gc") ? map.get("read_gc") : -1;
		stdDev_FNUM=map.containsKey("std_dev") ? map.get("std_dev") : -1;
		
		assert(id_FNUM==0) : "Bad header: "+id_FNUM+"\n"+header+"\n"+map;
	}
	
//	public static final String header1="#ID\tAvg_fold\tLength\tRef_GC\tCovered_percent\tCovered_bases\tPlus_reads\tMinus_reads\tMedian_fold\tUnder_min\tRead_GC";
//	public static final String header2="#ID\tAvg_fold\tLength\tRef_GC\tCovered_percent\tCovered_bases\tPlus_reads\tMinus_reads\tRead_GC";
	
	public String id;
	public int length;
	public int coveredBases;
	public long plusReads;
	public long minusReads;
	public double avgFold;
	public double refGC;
	public long median;
	public int underMin;
	public double readGC;
	public double stdDev;
	
	private static int id_FNUM=-1;
	private static int length_FNUM=-1;
	private static int coveredBases_FNUM=-1;
	private static int plusReads_FNUM=-1;
	private static int minusReads_FNUM=-1;
	private static int avgFold_FNUM=-1;
	private static int refGC_FNUM=-1;
	private static int median_FNUM=-1;
	private static int underMin_FNUM=-1;
	private static int readGC_FNUM=-1;
	private static int stdDev_FNUM=-1;
}