File: anonymize_mailbox

package info (click to toggle)
grepmail 5.3033-5
  • links: PTS, VCS
  • area: main
  • in suites: squeeze, wheezy
  • size: 2,424 kB
  • ctags: 535
  • sloc: perl: 9,438; makefile: 2
file content (164 lines) | stat: -rwxr-xr-x 3,621 bytes parent folder | download | duplicates (14)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#!/usr/bin/perl -w

$VERSION = '1.00';

use strict;
use FileHandle;

#-------------------------------------------------------------------------------

my $LINE = 0;
my $FILE_HANDLE = undef;
my $START = 0;
my $END = 0;
my $READ_BUFFER = '';

sub reset_file
{
  my $file_handle = shift;

  $FILE_HANDLE = $file_handle;
  $LINE = 1;
  $START = 0;
  $END = 0;
  $READ_BUFFER = '';
}

#-------------------------------------------------------------------------------

# Need this for a lookahead.
my $READ_CHUNK_SIZE = 0;

sub read_email
{
  # Undefined read buffer means we hit eof on the last read.
  return 0 unless defined $READ_BUFFER;

  my $line = $LINE;

  $START = $END;

  # Look for the start of the next email
  LOOK_FOR_NEXT_HEADER:
  while($READ_BUFFER =~ m/^(From\s.*\d:\d+:\d.* \d{4})/mg)
  {
    $END = pos($READ_BUFFER) - length($1);

    # Don't stop on email header for the first email in the buffer
    next if $END == 0;

    # Keep looking if the header we found is part of a "Begin Included
    # Message".
    my $end_of_string = substr($READ_BUFFER, $END-200, 200);
    if ($end_of_string =~
        /\n-----( Begin Included Message |Original Message)-----\n[^\n]*\n*$/i)
    {
      next;
    }

    # Found the next email!
    my $email = substr($READ_BUFFER, $START, $END-$START);
    $LINE += ($email =~ tr/\n//);
    
    return (1, $email, $line);
  }

  # Didn't find next email in current buffer. Most likely we need to read some
  # more of the mailbox. Shift the current email to the front of the buffer
  # unless we've already done so.
  $READ_BUFFER = substr($READ_BUFFER,$START) unless $START == 0;
  $START = 0;

  # Start looking at the end of the buffer, but back up some in case the edge
  # of the newly read buffer contains the start of a new header. I believe the
  # RFC says header lines can be at most 90 characters long.
  my $search_position = length($READ_BUFFER) - 90;
  $search_position = 0 if $search_position < 0;

  # Can't use sysread because it doesn't work with ungetc
  if ($READ_CHUNK_SIZE == 0)
  {
    local $/ = undef;

    if (eof $FILE_HANDLE)
    {
      my $email = $READ_BUFFER;
      undef $READ_BUFFER;
      return (1, $email, $line);
    }
    else
    {
      $READ_BUFFER = <$FILE_HANDLE>;
      pos($READ_BUFFER) = $search_position;
      goto LOOK_FOR_NEXT_HEADER;
    }
  }
  else
  {
    if (read($FILE_HANDLE, $READ_BUFFER, $READ_CHUNK_SIZE, length($READ_BUFFER)))
    {
      pos($READ_BUFFER) = $search_position;
      goto LOOK_FOR_NEXT_HEADER;
    }
    else
    {
      my $email = $READ_BUFFER;
      undef $READ_BUFFER;
      return (1, $email, $line);
    }
  }
}

sub Read_Chunk_Of_Body
{
  my $email = shift;

  local $/ = "\nFrom ";
  my $chunk = <$FILE_HANDLE>;
  local $/ = "From ";
  chomp $chunk;

  $LINE += ($chunk =~ tr/\n//);

  $$email .= $chunk;
}

die unless @ARGV;

$FILE_HANDLE = new FileHandle($ARGV[0]);

while(1)
{
  my ($status,$email,$line) = read_email();
  exit unless $status;

  my ($header,$body) = $email =~ /(.*?\n\n)(.*)/s;


  $body =~ s/\w/X/g;


  {
    my ($header_to) = $header =~ /^To: (.*)$/m;
    my ($header_subject) = $header =~ /^Subject: (.*)$/m;

    if (defined $header_to)
    {
      my $modified_header_to = $header_to;
      $modified_header_to =~ s/\w/X/g;

      $header =~ s/To: \Q$header_to\E/To: $modified_header_to/g;
    }

    if (defined $header_subject)
    {
      my $modified_header_subject = $header_subject;
      $modified_header_subject =~ s/\w/X/g;

      $header =~ s/Subject: \Q$header_subject\E/Subject: $modified_header_subject/g;
    }
  }


  print $header,$body;
}