File: Ngram.xs

package info (click to toggle)
libtext-ngram-perl 0.14-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 184 kB
  • sloc: ansic: 3,105; perl: 72; makefile: 2
file content (62 lines) | stat: -rw-r--r-- 1,498 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
/* -*- C -*- */
#define PERL_NO_GET_CONTEXT

#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"

#include "ppport.h"

void _process_buffer(pTHX_ SV* sv, unsigned int window, HV** counts_hv) {
    HV*    counts;
    STRLEN len;
    char*  buffer = SvPV(sv, len);

    if (!counts_hv || !*counts_hv)
        *counts_hv = (HV*)sv_2mortal((SV*)newHV());
    counts = *counts_hv;

    if (DO_UTF8(sv)) {
        char* next, * cur;
        unsigned int c;
        len = sv_len_utf8(sv);
        unsigned int windows = (len < window) ? 0 : len - window + 1;
        while (windows--) {
            cur = next = buffer + UTF8SKIP(buffer);
            for (c = window - 1;  c--; cur += UTF8SKIP(cur)) ;
            sv_inc(*hv_fetch(counts, buffer, -(cur - buffer), TRUE));
            buffer = next;
        }
    }
    else {
        unsigned int windows = (len < window) ? 0 : len - window + 1;
        while (windows--) {
            sv_inc(*hv_fetch(counts, buffer++, window, TRUE));
        }
    }
}

MODULE = Text::Ngram            PACKAGE = Text::Ngram

PROTOTYPES: DISABLE

HV*
_process_buffer(buffer, window)
    SV*          buffer
    unsigned int window
    CODE:
    {
        HV* newhv = NULL;
        _process_buffer(aTHX_ buffer, window, &newhv);
        RETVAL=newhv;
    }
    OUTPUT:
        RETVAL

void
_process_buffer_incrementally(buffer, window, hash)
    SV*          buffer
    unsigned int window
    HV* hash
    CODE:
        _process_buffer(aTHX_ buffer, window, &hash);