1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
|
package cardinality;
import shared.Parser;
import shared.Tools;
/**
* @author Brian Bushnell
* @date Feb 20, 2020
*
*/
public final class BBLog_simple extends CardinalityTracker {
/*--------------------------------------------------------------*/
/*---------------- Initialization ----------------*/
/*--------------------------------------------------------------*/
/** Create a LogLog with default parameters */
BBLog_simple(){
this(2048, 31, -1, 0f);
}
/** Create a LogLog with parsed parameters */
BBLog_simple(Parser p){
super(p);
maxArray=new long[buckets];
counts=(trackCounts ? new int[buckets] : null);
}
/**
* Create a LogLog with specified parameters
* @param buckets_ Number of buckets (counters)
* @param k_ Kmer length
* @param seed Random number generator seed; -1 for a random seed
* @param minProb_ Ignore kmers with under this probability of being correct
*/
BBLog_simple(int buckets_, int k_, long seed, float minProb_){
super(buckets_, k_, seed, minProb_);
maxArray=new long[buckets];
counts=(trackCounts ? new int[buckets] : null);
}
/*--------------------------------------------------------------*/
/*---------------- Methods ----------------*/
/*--------------------------------------------------------------*/
@Override
public final long cardinality(){
double difSum=0;
int count=0;
for(int i=0; i<maxArray.length; i++){
long val=maxArray[i];
if(val>0){
long dif=Long.MAX_VALUE-val;
difSum+=dif;
count++;
}
}
final double mean=difSum/Tools.max(count, 1);
final double estimatePerSet=2*(Long.MAX_VALUE/mean);
final double total=estimatePerSet*count*((count+buckets)/(float)(buckets+buckets));
long cardinality=(long)(total);
lastCardinality=cardinality;
return cardinality;
}
@Override
public int[] getCounts(){
return counts;
}
@Override
public final void add(CardinalityTracker log){
assert(log.getClass()==this.getClass());
add((BBLog_simple)log);
}
public void add(BBLog_simple log){
if(maxArray!=log.maxArray){
for(int i=0; i<buckets; i++){
maxArray[i]=Tools.max(maxArray[i], log.maxArray[i]);
}
}
}
@Override
public void hashAndStore(final long number){
// if(number%SKIPMOD!=0){return;}
// final long key=hash(number, tables[((int)number)&numTablesMask]);
final long key=Tools.hash64shift(number);
// if(key<minKey){return;}
final int bucket=(int)(key&bucketMask);
{
maxArray[bucket]=Tools.max(key, maxArray[bucket]);
}
}
@Override
public final float[] compensationFactorLogBucketsArray(){
return null;
}
/*--------------------------------------------------------------*/
/*---------------- Fields ----------------*/
/*--------------------------------------------------------------*/
/** Maintains state. These are the actual buckets. */
private final long[] maxArray;
/** Counts associated with values in the buckets. */
private final int[] counts;
// private static long minKey=(long)(0.75f*Long.MAX_VALUE); //non-atomic 15% faster without this
}
|