File: Digest.pm

package info (click to toggle)
spamassassin 4.0.2-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 22,988 kB
  • sloc: perl: 88,863; ansic: 5,193; sh: 3,737; javascript: 339; sql: 295; makefile: 209; python: 49
file content (99 lines) | stat: -rw-r--r-- 2,874 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
package Mail::SpamAssassin::Pyzor::Digest;

# Copyright 2018 cPanel, LLC.
# All rights reserved.
# http://cpanel.net
#
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at:
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
#

use strict;
use warnings;

=encoding utf-8

=head1 NAME

Mail::SpamAssassin::Pyzor::Digest - Pyzor Digest module

=head1 SYNOPSIS

    my $digest = Mail::SpamAssassin::Pyzor::Digest::get( $mime_text );

=head1 DESCRIPTION

A reimplementation of L<https://github.com/SpamExperts/pyzor/blob/master/pyzor/digest.py>.

=cut

#----------------------------------------------------------------------

use Mail::SpamAssassin::Pyzor::Digest::Pieces ();
use Digest::SHA qw(sha1_hex);

our $VERSION = '0.03';

#----------------------------------------------------------------------

=head1 FUNCTIONS

=head2 $hex = get( $MSG )

This takes an email message in raw MIME text format (i.e., as saved in the
standard mbox format) and returns the message's Pyzor digest in lower-case
hexadecimal.

The output from this function should normally be identical to that of
the C<pyzor> script's C<digest> command. It is suitable for use in
L<Mail::SpamAssassin::Pyzor::Client>'s request methods.

=cut

sub get {
    my ($pms) = @_;
    return Digest::SHA::sha1_hex( ${ _get_predigest( $pms ) } );
}

# NB: This is called from the test.
sub _get_predigest {    ## no critic qw(RequireArgUnpacking)
    my ($pms) = @_;

    my $parsed = $pms->get_message();

    my @lines;

    my $payloads_ar = Mail::SpamAssassin::Pyzor::Digest::Pieces::digest_payloads($parsed);
    for my $payload (@$payloads_ar) {
        my @p_lines = Mail::SpamAssassin::Pyzor::Digest::Pieces::splitlines($payload);
        for my $line (@p_lines) {
            Mail::SpamAssassin::Pyzor::Digest::Pieces::normalize($line);

            next if !Mail::SpamAssassin::Pyzor::Digest::Pieces::should_handle_line($line);

            # Make sure we have an octet string.
            utf8::encode($line) if utf8::is_utf8($line);

            push @lines, $line;
        }
    }

    my $digest_sr = Mail::SpamAssassin::Pyzor::Digest::Pieces::assemble_lines( \@lines );
    return $digest_sr;
}

1;