File: Preserve.pm

package info (click to toggle)
libmkdoc-xml-perl 0.75-6
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 404 kB
  • sloc: perl: 2,629; xml: 17; makefile: 2
file content (120 lines) | stat: -rw-r--r-- 2,837 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# -------------------------------------------------------------------------------------
# MKDoc::XML::Tagger::Preserve
# -------------------------------------------------------------------------------------
# Author : Jean-Michel Hiver.
# Copyright : (c) MKDoc Holdings Ltd, 2003
#
# This module uses MKDoc::XML::Tagger, except it preserves specific tags to prevent
# them from being tagged twice. At the moment the module uses regexes to do that so it
# might not be very generic but it should at least work for XHTML <a> tags.
# -------------------------------------------------------------------------------------
package MKDoc::XML::Tagger::Preserve;
use MKDoc::XML::Tagger;
use strict;
use warnings;
use utf8;

our @Preserve = ();


##
# $class->process_data ($xml, @expressions);
# ------------------------------------------
# Tags $xml with @expressions, where expression is a list of hashes.
#
# For example:
#
# MKDoc::XML::Tagger::Preserve->process (
#     [ 'i_will_be_preserved', 'a' ],
#     'I like oranges and bananas',
#     { _expr => 'oranges', _tag => 'a', href => 'http://www.google.com?q=oranges' },
#     { _expr => 'bananas', _tag => 'a', href => 'http://www.google.com?q=bananas' },
#
# Will return
#
# 'I like <a href="http://www.google.com?q=oranges">oranges</a> and \
# <a href="http://www.google.com?q=bananas">bananas</a>.
##
sub process_data
{
    my $class = shift;
    local @Preserve = @{shift()};
    my $text  = shift;
    my @list  = ();


    ($text, @list) = _preserve_encode ($text);
    $text          = MKDoc::XML::Tagger->process_data ($text, @_);
    $text          = _preserve_decode ($text, @list);

    return $text;
}


sub process_file
{
    my $class = shift;
    my $file  = shift;
    open FP, "<$file" || do {
        warn "Cannot read-open $file";
        return [];
    };
    
    my $data = '';
    while (<FP>) { $data .= $_ }
    close FP;
    
    return $class->process_data ($data);
}


sub _preserve_encode
{
    my $text = shift;
    my @list = ();
    for my $tag (@Preserve)
    {
        my @tags = $text =~ /(<$tag\s.*?<\/$tag>)/gs;
        for my $tag (@tags) { while ($text =~ s/\Q$tag\E/_compute_unique_string ($text, $tag, \@list)/e) {} }
    }
    
    return $text, @list;
}


sub _preserve_decode
{
    my $text = shift; 
    my @tsil = reverse (@_);
    
    while (@tsil)
    {
        my $val = shift (@tsil);
        my $id  = shift (@tsil);
        $text =~ s/$id/$val/; 
    }
    
    return $text;
}


sub _compute_unique_string
{
    my $text = shift;
    my $str  = shift;
    my $list = shift;
    my $id   = join '', map { chr (ord ('a') + int (rand (26))) } 1..10;
    while ($text =~ /\Q$id\E/)
    {
        $id = join '', map { chr (ord ('a') + int (rand (26))) } 1..10;
    }
    
    push @{$list}, $id => $str;
    return $id;
}


1;


__END__