File: ReadComparatorClump.java

package info (click to toggle)
bbmap 39.20%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 26,024 kB
  • sloc: java: 312,743; sh: 18,099; python: 5,247; ansic: 2,074; perl: 96; makefile: 39; xml: 38
file content (128 lines) | stat: -rwxr-xr-x 3,008 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
package sort;

import dna.AminoAcid;
import shared.Tools;
import stream.Read;

/**
 * Sorts similarly to Clumpify
 * @author Brian Bushnell
 * @date Oct 5, 2022
 *
 */
public final class ReadComparatorClump extends ReadComparator {
	
	private ReadComparatorClump(){}
	
	@Override
	public int compare(Read a, Read b) {
		int x=compareInner(a, b);
		if(x==0){x=compareInner(a.mate, b.mate);}
		if(x==0){x=a.id.compareTo(b.id);}
		return ascending*x;
	}

	private static int compareInner(Read a, Read b) {
		if(a==b){return 0;}
		if(a==null){return 1;}
		if(b==null){return -1;}
		if(a.numericID!=b.numericID){return a.numericID>b.numericID ? 1 : -1;}
		if(a.strand()!=b.strand()){return a.strand()-b.strand();}
		if(a.start!=b.start){return a.start-b.start;}
		return 0;
	}
	
	/** Finds the global maximum, forward and reverse */
	public static final long set(Read r){
		if(r.length()<k){return setShort(r);}
		
		final byte[] bases=r.bases;
		long kmer=0;
		long rkmer=0;
		int len=0;
		
//		if(bases==null || bases.length<k){return -1;}
		
		long topCode=Long.MIN_VALUE;
		long topKmer=Long.MIN_VALUE;
		int topStrand=0;
		int topStop=0;
		
		for(int i=0; i<bases.length; i++){
			byte b=bases[i];
			long x=AminoAcid.baseToNumber[b];
			long x2=AminoAcid.baseToComplementNumber[b];
			kmer=((kmer<<2)|x)&mask;
			rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
			
			if(x<0){
				len=0;
			}else{len++;}
			
			if(len>=k){
				final long kmax=Tools.max(kmer, rkmer);
				final long code=Tools.hash64shift(kmax);
				
				if(code>topCode){
					topKmer=kmax;
					topCode=code;
					topStrand=(kmax==kmer ? 0 : 1);
					topStop=i;
				}
			}
		}
		if(topCode==Long.MIN_VALUE){
			return setShort(r);
		}
		r.numericID=topKmer;
		r.setStrand(topStrand);
		r.start=topStop;
		return topKmer;
	}
	
	/** Generates a key when the read is shorter than k */
	public static final long setShort(Read r){
		final byte[] bases=r.bases;
		final int max=Tools.min(bases.length, k);
		long kmer=0;
		long rkmer=0;
		
		for(int i=0; i<max; i++){
			byte b=bases[i];
			long x=AminoAcid.baseToNumber0[b];
			long x2=AminoAcid.baseToComplementNumber0[b];
			kmer=((kmer<<2)|x)&mask;
			rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
		}

		final long kmax=Tools.max(kmer, rkmer);
		r.numericID=kmax;
		r.setStrand((kmax==kmer) ? 0 : 1);
		r.start=max-1;
		return kmax;
	}
	
	@Override
	public void setAscending(boolean asc){
		ascending=(asc ? 1 : -1);
	}
	
//	public void setK(int k_){
//		k=k_;
//		assert(k>0 && k<=32) : k;
//		
//		shift=2*k;
//		shift2=shift-2;
//		mask=(shift>63 ? -1L : ~((-1L)<<shift));
//	}
	
	public static final ReadComparatorClump comparator=new ReadComparatorClump();
	
	private int ascending=-1;
	
	private static final int k=31;
	private static final int shift=2*k;
	private static final int shift2=shift-2;
	private static final long mask=(shift>63 ? -1L : ~((-1L)<<shift));;
	
}