File: spell-check-logic.cgi

package info (click to toggle)
moodle 1.6.3-2%2Betch3
  • links: PTS
  • area: main
  • in suites: etch
  • size: 37,172 kB
  • ctags: 51,688
  • sloc: php: 231,916; sql: 5,631; xml: 2,688; sh: 1,185; perl: 638; makefile: 48; pascal: 36
file content (155 lines) | stat: -rw-r--r-- 5,000 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#! /usr/bin/perl -w

# Spell Checker Plugin for HTMLArea-3.0
# Implementation by Mihai Bazon.  Sponsored by www.americanbible.org
#
# htmlArea v3.0 - Copyright (c) 2002 interactivetools.com, inc.
# This notice MUST stay intact for use (see license.txt).
#
# A free WYSIWYG editor replacement for <textarea> fields.
# For full source code and docs, visit http://www.interactivetools.com/
#
# Version 3.0 developed by Mihai Bazon for InteractiveTools.
#	     http://students.infoiasi.ro/~mishoo
#
# $Id: spell-check-logic.cgi,v 1.1 2006/03/04 15:24:14 julmis Exp $

use strict;
use utf8;
use Encode;
use Text::Aspell;
use HTML::Parser;
use HTML::Entities;
use CGI;

my $debug = 0;

open (DEBUG, '>:encoding(UTF-8)', '> /tmp/spell-check-debug.log') if $debug;

# use Data::Dumper; # for debug only

my $speller = new Text::Aspell;
my $cgi = new CGI;

# FIXME: report a nice error...
die "Can't create speller!" unless $speller;

# add configurable option for this
my $dict = $cgi->param('dictionary') || 'en_US';
$speller->set_option('lang', $dict);

# ultra, fast, normal, bad-spellers
# bad-spellers seems to cause segmentation fault
$speller->set_option('sug-mode', 'ultra');

my @replacements = ();

sub text_handler {
    my ($offset, $length, $text, $is_cdata) = @_;
    if ($is_cdata or $text =~ /^\s*$/) {
        return 0;
    }
    # print STDERR "*** OFFSET: $offset, LENGTH: $length, $text\n";
    $text = decode_entities($text);
    $text =~ s/&#([0-9]+);/chr($1)/eg;
    $text =~ s/&#x([0-9a-fA-F]+);/chr(hex $1)/eg;
    my $repl = spellcheck($text);
    if ($repl) {
        push(@replacements, [ $offset, $length, $repl ]);
    }
}

my $p = HTML::Parser->new
  (api_version => 3,
   handlers => { start => [ sub {
                                my ($self, $tagname, $attrs) = @_;
                                # print STDERR "\033[1;31m parsing tag: $tagname\033[0m\n";
                                # following we skip words that have already been marked as "fixed".
                                if ($tagname eq "span" and $attrs->{class} =~ /HA-spellcheck-fixed/) {
                                    $self->handler(text => undef);
                                }
                            }, "self, tagname, attr"
                          ],
                 end => [ sub {
                              my ($self, $tagname) = @_;
                              # print STDERR "\033[1;32m END tag: $tagname\033[0m\n";
                              $self->handler(text => \&text_handler, 'offset, length, dtext, is_cdata');
                          }, "self, tagname"
                        ]
               }
  );
$p->handler(text => \&text_handler, 'offset, length, dtext, is_cdata');
$p->case_sensitive(1);
my $file_content = $cgi->param('content');

if ($debug) {
    open (FOO, '>:encoding(UTF-8)', '/tmp/spell-check-before');
    print FOO $file_content, "\n";
    close(FOO);
}

$p->parse($file_content);
$p->eof();

foreach (reverse @replacements) {
    substr($file_content, $_->[0], $_->[1], $_->[2]);
}

# we output UTF-8
binmode(STDOUT, ':encoding(UTF-8)'); # apparently, this sucks.
print "Content-type: text/html; charset: utf-8\n\n";
print qq^
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<link rel="stylesheet" type="text/css" media="all" href="spell-check-style.css" />
</head>
<body onload="window.parent.finishedSpellChecking();">^;

print $file_content;
if ($cgi->param('init') eq '1') {
    my @dicts = $speller->dictionary_info();
    my $dictionaries = '';
    foreach my $i (@dicts) {
        $dictionaries .= ',' . $i->{name} unless $i->{jargon};
    }
    $dictionaries =~ s/^,//;
    print qq^
<div id="HA-spellcheck-dictionaries"
>$dictionaries</div>
^;
}

if ($debug) {
    open (FOO, '>:encoding(UTF-8)', '/tmp/spell-check-after');
    print FOO $file_content, "\n";
    close(FOO);
}

print '</body></html>';

# Perl is beautiful.
sub spellcheck {
    my $text = shift;
    sub check {                 # called for each word in the text
        # input is in UTF-8
        my $U_word = shift;
        my $word = encode($speller->get_option('encoding'), $U_word);
        print DEBUG "*$U_word* ----> |$word|\n" if $debug;
        if ($speller->check($word)) {
            return $U_word;      # we return the word in UTF-8
        } else {
            # we should have suggestions; give them back to browser in UTF-8
            my $suggestions = decode($speller->get_option('encoding'), join(',', $speller->suggest($word)));
            my $ret = '<span class="HA-spellcheck-error">'.$U_word.'</span><span class="HA-spellcheck-suggestions">'.$suggestions.'</span>';
            return $ret;
        }
    }
    $text =~ s/([[:word:]']+)/check($1)/egs;
    # $text =~ s/(\w+)/check($1)/egs;

    # the following is definitely what we want to use; too bad it sucks most.
    # $text =~ s/(\p{IsWord}+)/check($1)/egs;
    return $text;
}