1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
|
/* -*- C -*- */
#define PERL_NO_GET_CONTEXT
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "ppport.h"
void _process_buffer(pTHX_ SV* sv, unsigned int window, HV** counts_hv) {
HV* counts;
STRLEN len;
char* buffer = SvPV(sv, len);
if (!counts_hv || !*counts_hv)
*counts_hv = (HV*)sv_2mortal((SV*)newHV());
counts = *counts_hv;
if (DO_UTF8(sv)) {
char* next, * cur;
unsigned int c;
len = sv_len_utf8(sv);
unsigned int windows = (len < window) ? 0 : len - window + 1;
while (windows--) {
cur = next = buffer + UTF8SKIP(buffer);
for (c = window - 1; c--; cur += UTF8SKIP(cur)) ;
sv_inc(*hv_fetch(counts, buffer, -(cur - buffer), TRUE));
buffer = next;
}
}
else {
unsigned int windows = (len < window) ? 0 : len - window + 1;
while (windows--) {
sv_inc(*hv_fetch(counts, buffer++, window, TRUE));
}
}
}
MODULE = Text::Ngram PACKAGE = Text::Ngram
PROTOTYPES: DISABLE
HV*
_process_buffer(buffer, window)
SV* buffer
unsigned int window
CODE:
{
HV* newhv = NULL;
_process_buffer(aTHX_ buffer, window, &newhv);
RETVAL=newhv;
}
OUTPUT:
RETVAL
void
_process_buffer_incrementally(buffer, window, hash)
SV* buffer
unsigned int window
HV* hash
CODE:
_process_buffer(aTHX_ buffer, window, &hash);
|