File: Scoring.pm

package info (click to toggle)
fuzzyocr 3.6.0-15
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 804 kB
  • sloc: perl: 3,127; sh: 45; makefile: 2
file content (121 lines) | stat: -rw-r--r-- 4,151 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at:
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>

use strict;
package FuzzyOcr::Scoring;

use base 'Exporter';
our @EXPORT_OK = qw(wrong_ctype corrupt_img known_img_hash wrong_extension);

use lib qw(..);
use FuzzyOcr::Config qw(get_pms get_config);
use FuzzyOcr::Logging qw(infolog);

# Provide custom scoring functions

sub wrong_ctype {
    my $conf = get_config();
    my $pms = get_pms();
    my ( $format, $ctype ) = @_;
    if ($conf->{'focr_wrongctype_score'}) {
        my $debuginfo = "";
        if ( $conf->{"focr_verbose"} > 0 ) {
            $debuginfo = 
              ("Image has format \"$format\" but content-type is \"$ctype\"");
        }
        infolog($debuginfo);
        my $ws = sprintf( "%0.3f", $conf->{'focr_wrongctype_score'} );
        for my $set ( 0 .. 3 ) {
            $pms->{conf}->{scoreset}->[$set]->{"FUZZY_OCR_WRONG_CTYPE"} = $ws;
        }
	my @dinfo = split('\n', $debuginfo);
        foreach (@dinfo) {
            $pms->test_log($_);
        }
        $pms->_handle_hit( "FUZZY_OCR_WRONG_CTYPE",
            $conf->{'focr_wrongctype_score'}, "BODY: ", "BODY",
	    $pms->{conf}->get_description_for_rule("FUZZY_OCR_WRONG_CTYPE"));
    }
}

sub wrong_extension {
    my $conf = get_config();
    my $pms = get_pms();
    my ( $format, $suffix ) = @_;
    if ($conf->{'focr_wrongext_score'}) {
        my $debuginfo = "";
        if ( $conf->{"focr_verbose"} > 0 ) {
            $debuginfo = 
              ("Image has format \"$format\" but file extension is \"$suffix\"");
        }
        infolog($debuginfo);
        my $ws = sprintf( "%0.3f", $conf->{'focr_wrongext_score'} );
        for my $set ( 0 .. 3 ) {
            $pms->{conf}->{scoreset}->[$set]->{"FUZZY_OCR_WRONG_EXTENSION"} = $ws;
        }
	my @dinfo = split('\n', $debuginfo);
        foreach (@dinfo) {
            $pms->test_log($_);
        }
        $pms->_handle_hit( "FUZZY_OCR_WRONG_EXTENSION",
            $conf->{'focr_wrongext_score'}, "BODY: ", "BODY",
            $pms->{conf}->get_description_for_rule("FUZZY_OCR_WRONG_EXTENSION"));
    }
}

sub corrupt_img {
    my $conf = get_config();
    my $pms = get_pms();
    my ($score, $err) = @_;
    if ($score>0) {
        my $debuginfo = "";
        if ( $conf->{"focr_verbose"} > 0 ) {
            chomp($err);
            $debuginfo = ("Corrupt image: $err");
        }
        infolog($debuginfo);
        my $ws = sprintf( "%0.3f", $score );
        for my $set ( 0 .. 3 ) {
            $pms->{conf}->{scoreset}->[$set]->{"FUZZY_OCR_CORRUPT_IMG"} = $ws;
        }
	my @dinfo = split('\n', $debuginfo);
        foreach (@dinfo) {
                $pms->test_log($_);
        }
        $pms->_handle_hit( "FUZZY_OCR_CORRUPT_IMG", $score, "BODY: ", "BODY",
            $pms->{conf}->get_description_for_rule("FUZZY_OCR_CORRUPT_IMG"));
    }
}

sub known_img_hash {
    my $conf = get_config();
    my $pms = get_pms();
    my $score = $_[0] || $conf->{'focr_base_score'};
    my $debuginfo = $_[1] ? "\n$_[1]" : '';
    my $ws = sprintf( "%0.3f", $score );
    for my $set ( 0 .. 3 ) {
        $pms->{conf}->{scoreset}->[$set]->{"FUZZY_OCR_KNOWN_HASH"} = $ws;
    }
    my @dinfo = split('\n', $debuginfo);
    foreach (@dinfo) {
        $pms->test_log($_);
    }
    $pms->_handle_hit( "FUZZY_OCR_KNOWN_HASH", $score, "BODY: ", "BODY",
        $pms->{conf}->get_description_for_rule("FUZZY_OCR_KNOWN_HASH"));
}

1;