File: C.pm

package info (click to toggle)
findimagedupes 2.18-7
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster
  • size: 200 kB
  • sloc: perl: 1,041; makefile: 11
file content (117 lines) | stat: -rw-r--r-- 2,544 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
package findimagedupes::C;

use Inline
        C => 'DATA',
        NAME => 'findimagedupes::C',
        VERSION => '0.01',
;

our $VERSION = '0.01';
1;

__DATA__

__C__

/* efficient bit-comparison */

#include <stdint.h>
#include <string.h>

#define LOOKUP_SIZE 65536
#define FP_CHUNKS 16

typedef uint16_t FP[FP_CHUNKS];

unsigned int simplecountbits (unsigned int i) {
	unsigned int val = i, res = 0;

	while (val) {
		res += (val&1);
		val >>= 1;
	}
	return(res);
}

void diffbits (SV* oldfiles, SV* newfiles, unsigned int threshold, unsigned limit) {
	FP *the_data, *a, *b;
	unsigned int lookup[LOOKUP_SIZE];
	unsigned int i, j, k, m, bits, old, new;
	HV *oldhash;
	HE *oldhash_entry;
	HV *newhash;
	HE *newhash_entry;
	unsigned int numkeys = 0;
	SV *sv_val;
	Inline_Stack_Vars;

	if ((threshold<0) || (threshold>256)) {
		croak("ridiculous threshold specified");
	}

	/* pack fingerprints into C array */
	/* partly lifted from Inline::C-Cookbook */

	if (! SvROK(newfiles)) {
		croak("newfiles is not a reference");
	}
	newhash = (HV *)SvRV(newfiles);
	new = hv_iterinit(newhash);

	if (! SvROK(oldfiles)) {
		croak("oldfiles is not a reference");
	}
	oldhash = (HV *)SvRV(oldfiles);
	old = hv_iterinit(oldhash);

	numkeys = new+old;
	if (numkeys<2) {
		/* minor optimization: return without doing anything */
		/* malloc(0) could be bad... */
		Inline_Stack_Void;
	}
	the_data = (FP *)malloc(numkeys*sizeof(FP));
	if (!the_data) {
		croak("malloc failed");
	}

	for (i = 0; i<new; i++) {
		newhash_entry = hv_iternext(newhash);
		sv_val = hv_iterval(newhash, newhash_entry);
		memcpy(the_data+i, SvPV(sv_val, PL_na), sizeof(FP));
	}
	for (i = new; i<numkeys; i++) {
		oldhash_entry = hv_iternext(oldhash);
		sv_val = hv_iterval(oldhash, oldhash_entry);
		memcpy(the_data+i, SvPV(sv_val, PL_na), sizeof(FP));
	}

	/* initialise lookup table */
	/* XXX: fast enough? could optimise more or compile-in a static table */
	for (i=0; i<LOOKUP_SIZE; i++) {
		lookup[i] = simplecountbits(i);
	}

	/* look for matches */
	Inline_Stack_Reset;
	for (a=the_data, i=0, m=(limit>0 ? new : numkeys-1); i<m; a++, i++) {
		for (b=a+1, j=i+1; j<numkeys; b++, j++) {
			for (bits=0, k=0; k<FP_CHUNKS; k++) {
				bits += lookup[(*a)[k]^(*b)[k]];
				if (bits > threshold) goto abortmatch;
			}
			/* if (bits <= threshold) */ {
				Inline_Stack_Push(sv_2mortal(newSViv(i)));
				Inline_Stack_Push(sv_2mortal(newSViv(j)));
				Inline_Stack_Push(sv_2mortal(newSViv(bits)));
			}
abortmatch:;
		}
	}
	Inline_Stack_Done;

	/* clean up */
	free(the_data);
}