1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
|
package findimagedupes::C;
use Inline
C => 'DATA',
NAME => 'findimagedupes::C',
VERSION => '0.01',
;
our $VERSION = '0.01';
1;
__DATA__
__C__
/* efficient bit-comparison */
#include <stdint.h>
#include <string.h>
#define LOOKUP_SIZE 65536
#define FP_CHUNKS 16
typedef uint16_t FP[FP_CHUNKS];
unsigned int simplecountbits (unsigned int i) {
unsigned int val = i, res = 0;
while (val) {
res += (val&1);
val >>= 1;
}
return(res);
}
void diffbits (SV* oldfiles, SV* newfiles, unsigned int threshold, unsigned limit) {
FP *the_data, *a, *b;
unsigned int lookup[LOOKUP_SIZE];
unsigned int i, j, k, m, bits, old, new;
HV *oldhash;
HE *oldhash_entry;
HV *newhash;
HE *newhash_entry;
unsigned int numkeys = 0;
SV *sv_val;
Inline_Stack_Vars;
if ((threshold<0) || (threshold>256)) {
croak("ridiculous threshold specified");
}
/* pack fingerprints into C array */
/* partly lifted from Inline::C-Cookbook */
if (! SvROK(newfiles)) {
croak("newfiles is not a reference");
}
newhash = (HV *)SvRV(newfiles);
new = hv_iterinit(newhash);
if (! SvROK(oldfiles)) {
croak("oldfiles is not a reference");
}
oldhash = (HV *)SvRV(oldfiles);
old = hv_iterinit(oldhash);
numkeys = new+old;
if (numkeys<2) {
/* minor optimization: return without doing anything */
/* malloc(0) could be bad... */
Inline_Stack_Void;
}
the_data = (FP *)malloc(numkeys*sizeof(FP));
if (!the_data) {
croak("malloc failed");
}
for (i = 0; i<new; i++) {
newhash_entry = hv_iternext(newhash);
sv_val = hv_iterval(newhash, newhash_entry);
memcpy(the_data+i, SvPV(sv_val, PL_na), sizeof(FP));
}
for (i = new; i<numkeys; i++) {
oldhash_entry = hv_iternext(oldhash);
sv_val = hv_iterval(oldhash, oldhash_entry);
memcpy(the_data+i, SvPV(sv_val, PL_na), sizeof(FP));
}
/* initialise lookup table */
/* XXX: fast enough? could optimise more or compile-in a static table */
for (i=0; i<LOOKUP_SIZE; i++) {
lookup[i] = simplecountbits(i);
}
/* look for matches */
Inline_Stack_Reset;
for (a=the_data, i=0, m=(limit>0 ? new : numkeys-1); i<m; a++, i++) {
for (b=a+1, j=i+1; j<numkeys; b++, j++) {
for (bits=0, k=0; k<FP_CHUNKS; k++) {
bits += lookup[(*a)[k]^(*b)[k]];
if (bits > threshold) goto abortmatch;
}
/* if (bits <= threshold) */ {
Inline_Stack_Push(sv_2mortal(newSViv(i)));
Inline_Stack_Push(sv_2mortal(newSViv(j)));
Inline_Stack_Push(sv_2mortal(newSViv(bits)));
}
abortmatch:;
}
}
Inline_Stack_Done;
/* clean up */
free(the_data);
}
|