File: Received.pm

package info (click to toggle)
spamassassin 3.1.7-2etch1
  • links: PTS
  • area: main
  • in suites: etch
  • size: 5,404 kB
  • ctags: 2,123
  • sloc: perl: 39,706; ansic: 3,133; sh: 2,009; sql: 170; makefile: 168
file content (1297 lines) | stat: -rw-r--r-- 57,850 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at:
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>

# ---------------------------------------------------------------------------

# So, what's the difference between a trusted and untrusted Received header?
# Basically, relays we *know* are trustworthy are 'trusted', all others after
# the last one of those are 'untrusted'.
#
# We determine trust by detecting if they are inside the network ranges
# specified in 'trusted_networks'.  There is also an inference algorithm
# which determines other trusted relays without user configuration.
#
# There's another type of Received header: the semi-trusted one.  This is the
# header added by *our* MX, at the boundary of trust; we can trust the IP
# address (and possibly rDNS) in this header, but that's about it; HELO name is
# untrustworthy.  We just use this internally for now.
#
# Finally, there's also 'internal_networks'.  These are the networks that you
# control; your MXes should be included.  This way, if you specify a wide range
# of trusted hosts, a mail that is relayed from a dynamic IP address via a
# 'trusted' host will not hit RCVD_IN_DYNABLOCK.

# ---------------------------------------------------------------------------

package Mail::SpamAssassin::Message::Metadata::Received;
1;

package Mail::SpamAssassin::Message::Metadata;
use strict;
use warnings;
use bytes;

use Mail::SpamAssassin::Dns;
use Mail::SpamAssassin::PerMsgStatus;
use Mail::SpamAssassin::Util::RegistrarBoundaries;
use Mail::SpamAssassin::Constants qw(:ip);

# ---------------------------------------------------------------------------

sub parse_received_headers {
  my ($self, $main, $msg) = @_;

  # argh.  this is only used to perform DNS lookups.
  # TODO! we need to get Dns.pm code into a class that is NOT
  # part of Mail::SpamAssassin::PerMsgStatus to avoid this crap!
  $self->{dns_pms} = $main->{parser_dns_pms};
  $self->{is_dns_available} = $self->{dns_pms}->is_dns_available();

  $self->{relays_trusted} = [ ];
  $self->{num_relays_trusted} = 0;
  $self->{relays_trusted_str} = '';

  $self->{relays_untrusted} = [ ];
  $self->{num_relays_untrusted} = 0;
  $self->{relays_untrusted_str} = '';

  $self->{relays_internal} = [ ];
  $self->{num_relays_internal} = 0;
  $self->{relays_internal_str} = '';

  $self->{relays_external} = [ ];
  $self->{num_relays_external} = 0;
  $self->{relays_external_str} = '';

  $self->{num_relays_unparseable} = 0;

  # now figure out what relays are trusted...
  my $trusted = $main->{conf}->{trusted_networks};
  my $internal = $main->{conf}->{internal_networks};
  my $first_by;
  my $in_trusted = 1;
  my $in_internal = 1;

  if ($trusted->get_num_nets() > 0 && $internal->get_num_nets() > 0) {
    # good; we can use both reliably.
  }
  elsif ($trusted->get_num_nets() <= 0 && $internal->get_num_nets() > 0) {
    $trusted = $internal;	# use 'internal' for 'trusted'
  }
  elsif ($trusted->get_num_nets() > 0 && $internal->get_num_nets() <= 0) {
    # use 'trusted' for 'internal'; compatibility with SpamAssassin 2.60
    $internal = $trusted;
  }

  my $did_user_specify_trust = ($trusted->get_num_nets() > 0);
  my $did_user_specify_internal = ($internal->get_num_nets() > 0);

  my $IP_PRIVATE = IP_PRIVATE;
  my $LOCALHOST = LOCALHOST;

  foreach my $line ( $msg->get_header('Received') ) {

    # qmail-scanner support hack: we may have had one of these set from the
    # previous (read: more recent) Received header.   if so, add it on to this
    # header's set, since that's the handover it was describing.

    my $qms_env_from;
    if ($self->{qmail_scanner_env_from}) {
      $qms_env_from = $self->{qmail_scanner_env_from};
      delete $self->{qmail_scanner_env_from};
    }

    $line =~ s/\n[ \t]+/ /gs;
    my $relay = $self->parse_received_line ($line);

    next unless $relay;

    # hack for qmail-scanner, as described above; add in the saved
    # metadata
    if ($qms_env_from) {
      $relay->{envfrom} = $qms_env_from;
      $self->make_relay_as_string($relay);
    }

    # trusted_networks matches?
    if ($in_trusted && $did_user_specify_trust && !$relay->{auth} && !$trusted->contains_ip ($relay->{ip}))
    {
      $in_trusted = 0;		# we're in deep water now
    }

    # internal_networks matches?
    if ($did_user_specify_internal) {
      if (!$relay->{auth} && !$internal->contains_ip ($relay->{ip})) {
	$in_internal = 0;
      }
    } else {
      # if the user didn't specify it, assume we immediately transition
      # to the external network (the internet) once we leave this host.
      $in_internal = 0;
    }

    # note: you can't be in internal networks, but not be in a trusted 
    # net. (bug 4760)
    if ($in_internal && !$in_trusted) {
      $in_trusted = 1;
    }

# OK, infer the trusted/untrusted handover, if we don't have real info.
# Here's the algorithm used (taken from Dan's mail):
# 
# Talking with Scott Banister (this was his idea) and Andrew Flury at
# IronPort, we came up with an alternate and easier algorithm that doesn't
# involve trees and we think should be good enough most of the time
# whenever trusted IP headers is not set.  It also has the nice property
# of being very easy to implement, but it should, of course, be tested
# out.
# 
# "first" = top Received line in the message
# 
# "public" = not a local or private IP address
# 
# "mypublicnet" = first public "by" address
# 
# 1. Ignore all Received line where the "from" IP is in mypublicnet/16
#    regardless of where they appear.  (The goal is to remove any relay
#    steps that involve your network, relying on /16 is good enough since
#    anything on your /16 is you or at worst involves your ISP.)
# 
# 2. Ignore all Received lines that contain local (127) or private (10.1,
#    etc.) IP addresses anywhere, whether "from" or "by".  (The goal
# 
# 3. The first Received line that you don't ignore is the one that
#    contains the "by" of your trusted relay and the "from" of the first
#    untrusted relay (which is used for bondedsender testing and so on).

    if ($in_trusted && !$did_user_specify_trust) {
      my $inferred_as_trusted = 0;

      # do we know what the IP addresses of the "by" host in the first
      # header is?  If not, set them from this header, since it's the
      # first one.  NOTE: this is a ref to an array, NOT a string.
      if (!defined $first_by && $self->{is_dns_available}) {
	$first_by = [ $self->lookup_all_ips ($relay->{by}) ];
      }

      # if the 'from' IP addr is in a reserved net range, it's not on
      # the public internet.
      if ($relay->{ip_private}) {
	dbg("received-header: 'from' ".$relay->{ip}." has private IP");
	$inferred_as_trusted = 1;
      }

      # if we find authentication tokens in the received header we can extend
      # the trust boundary to that host
      if ($relay->{auth}) {
	dbg("received-header: authentication method ".$relay->{auth});
	$inferred_as_trusted = 1;
      }

      # can we use DNS?  If not, we cannot use this algorithm, as we
      # cannot lookup hostnames. :(
      # Consider the first relay trusted, and all others untrusted.
      if (!$self->{is_dns_available}) {
	dbg("received-header: cannot use DNS, do not trust any hosts from here on");
      }

      # if the 'from' IP addr shares the same class B mask (/16) as
      # the first relay found in the message, it's still on the
      # user's network.
      elsif (Mail::SpamAssassin::Util::ips_match_in_16_mask
					([ $relay->{ip} ], $first_by))
      {
	dbg("received-header: 'from' ".$relay->{ip}." is near to first 'by'");
	$inferred_as_trusted = 1;
      }

      # if *all* of the IP addrs for the 'by' host are in a reserved net range,
      # it's not on the public internet.  Note that we should still stop if
      # only *some* of the IPs are reserved; this can happen for multi-homed
      # gateway hosts.  For example
      #
      #   PRIVATE NET    A          B    INTERNET
      #     scanner <---> gateway_MX <---> internet
      #
      # Interface A would be on a reserved net, but B would have a "public" IP
      # address.  Same can happen if the scanner runs on the gateway-MX, since
      # lookup_all_ips() will return [ public_IP_addr, 127.0.0.1 ] as the list
      # of addresses, and 127.0.0.1 is a "reserved" address. (bug 2113)

      else {
	my @ips = $self->lookup_all_ips ($relay->{by});
	my $found_non_rsvd = 0;
	my $found_rsvd = 0;
	foreach my $ip (@ips) {
	  next if ($ip =~ /^${LOCALHOST}$/o);

	  if ($ip !~ /${IP_PRIVATE}/o) {
	    dbg("received-header: 'by' ".$relay->{by}." has public IP $ip");
	    $found_non_rsvd = 1;
	  } else {
	    dbg("received-header: 'by' ".$relay->{by}." has private IP $ip");
	    $found_rsvd = 1;
	  }
	}

	if ($found_rsvd && !$found_non_rsvd) {
	  dbg("received-header: 'by' ".$relay->{by}." has no public IPs");
	  $inferred_as_trusted = 1;
	}
      }

      if (!$inferred_as_trusted) { $in_trusted = 0; }
    }

    dbg("received-header: relay ".$relay->{ip}.
	" trusted? ".($in_trusted ? "yes" : "no").
	" internal? ".($in_internal ? "yes" : "no"));

    if ($in_internal) {
      $relay->{internal} = 1;
    } else {
      $relay->{internal} = 0;
    }

    # be sure to mark up the as_string version for users too
    $relay->{as_string} =~ s/ intl=\d / intl=$relay->{internal} /;

    if ($in_trusted) {
      push (@{$self->{relays_trusted}}, $relay);
      $self->{allow_fetchmail_markers} = 1;
    } else {
      push (@{$self->{relays_untrusted}}, $relay);
      $self->{allow_fetchmail_markers} = 0;
    }

    if ($in_internal) {
      push (@{$self->{relays_internal}}, $relay);
    } else {
      push (@{$self->{relays_external}}, $relay);
    }
  }

  $self->{relays_trusted_str} = join(' ', map { $_->{as_string} }
                    @{$self->{relays_trusted}});
  $self->{relays_untrusted_str} = join(' ', map { $_->{as_string} }
                    @{$self->{relays_untrusted}});
  $self->{relays_internal_str} = join(' ', map { $_->{as_string} }
                    @{$self->{relays_internal}});
  $self->{relays_external_str} = join(' ', map { $_->{as_string} }
                    @{$self->{relays_external}});

  # drop the temp PerMsgStatus object
  delete $self->{dns_pms};

  # OK, we've now split the relay list into trusted and untrusted.

  # add the stringified representation to the message object, so Bayes
  # and rules can use it.  Note that rule_tests.t does not impl put_metadata,
  # so protect against that here.  These will not appear in the final
  # message; they're just used internally.

  if ($self->{msg}->can ("delete_header")) {
    $self->{msg}->delete_header ("X-Spam-Relays-Trusted");
    $self->{msg}->delete_header ("X-Spam-Relays-Untrusted");
    $self->{msg}->delete_header ("X-Spam-Relays-Internal");
    $self->{msg}->delete_header ("X-Spam-Relays-External");
 
    if ($self->{msg}->can ("put_metadata")) {
      $self->{msg}->put_metadata ("X-Spam-Relays-Trusted",
			$self->{relays_trusted_str});
      $self->{msg}->put_metadata ("X-Spam-Relays-Untrusted",
			$self->{relays_untrusted_str});
      $self->{msg}->put_metadata ("X-Spam-Relays-Internal",
			$self->{relays_internal_str});
      $self->{msg}->put_metadata ("X-Spam-Relays-External",
			$self->{relays_external_str});
    }
  }

  # be helpful; save some cumbersome typing
  $self->{num_relays_trusted} = scalar (@{$self->{relays_trusted}});
  $self->{num_relays_untrusted} = scalar (@{$self->{relays_untrusted}});
  $self->{num_relays_internal} = scalar (@{$self->{relays_internal}});
  $self->{num_relays_external} = scalar (@{$self->{relays_external}});

  dbg("metadata: X-Spam-Relays-Trusted: ".$self->{relays_trusted_str});
  dbg("metadata: X-Spam-Relays-Untrusted: ".$self->{relays_untrusted_str});
  dbg("metadata: X-Spam-Relays-Internal: ".$self->{relays_internal_str});
  dbg("metadata: X-Spam-Relays-External: ".$self->{relays_external_str});
}

sub lookup_all_ips {
  my ($self, $hostname) = @_;

  # cannot use gethostbyname without DNS :(
  if (!$self->{is_dns_available}) {
    return ();
  }
  
  my @addrs = $self->{dns_pms}->lookup_a ($hostname);

  # bug 2324: this fails if the user has an /etc/hosts entry for that
  # hostname; force a DNS lookup by appending a dot, but only if there's
  # a domain in the hostname (ie. it really is likely to be in external DNS).
  # use both sets of addrs, as the /etc/hosts data is usable anyway for
  # internal relaying.
  # NOW OFF: we now force DNS use through Net::DNS

  my @ips = ();
  my %seenaddr = ();
  foreach my $addr (@addrs) {
    next if ($seenaddr{$addr});
    $seenaddr{$addr} = 1;
    push (@ips, $addr);
  }
  return @ips;
}

# ---------------------------------------------------------------------------

sub parse_received_line {
  my ($self) = shift;
  local ($_) = shift;

  s/\s+/ /gs;
  my $ip = '';
  my $helo = '';
  my $rdns = '';
  my $by = '';
  my $id = '';
  my $ident = '';
  my $envfrom = '';
  my $mta_looked_up_dns = 0;
  my $IP_ADDRESS = IP_ADDRESS;
  my $IP_PRIVATE = IP_PRIVATE;
  my $LOCALHOST = LOCALHOST;
  my $auth = '';

  # Received: (qmail 27981 invoked by uid 225); 14 Mar 2003 07:24:34 -0000
  # Received: (qmail 84907 invoked from network); 13 Feb 2003 20:59:28 -0000
  # Received: (ofmipd 208.31.42.38); 17 Mar 2003 04:09:01 -0000
  # we don't care about this kind of gateway noise
  # Bug 4943: give /^(from/ a chance to be parsed
  if (/^\((?!from)/) { return; }

  # OK -- given knowledge of most Received header formats,
  # break them down.  We have to do something like this, because
  # some MTAs will swap position of rdns and helo -- so we can't
  # simply use simplistic regexps.

  # try to catch unique message identifier
  if (/\sid\s+<?([^\s<>;]{3,})/) {
    $id = $1;
  }

  # try to catch authenticated message identifier
  #
  # with ESMTPA, ESMTPSA, LMTPA, LMTPSA should cover RFC 3848 compliant MTAs
  # with ASMTP (Authenticated SMTP) is used by Earthlink, Exim 4.34, and others
  # with HTTP should only be authenticated webmail sessions
  if (/ by .*? with (ESMTPA|ESMTPSA|LMTPA|LMTPSA|ASMTP|HTTP)\;? /i) {
    $auth = $1;
  }
  # Courier v0.47 and possibly others
  elsif (/^from .*?(?:\]\)|\)\])\s+\(AUTH: (LOGIN|PLAIN|DIGEST-MD5|CRAM-MD5) \S+(?:, .*?)?\)\s+by\s+/) {
    $auth = $1;
  }
  # Sendmail, MDaemon, some webmail servers, and others
  elsif (/^from .*?(?:\]\)|\)\]) .*?\(.*?authenticated.*?\).*? by/) {
    $auth = 'Sendmail';
  }
  # Critical Path Messaging Server
  elsif (/\) by .+ \(\d{1,2}\.\d\.\d{3}(?:\.\d{1,3})?\) \(authenticated as .+\) id /) {
    $auth = 'CriticalPath';
  }
  # Postfix 2.3 and later with "smtpd_sasl_authenticated_header yes"
  elsif (/\)\s+\(Authenticated sender:\s+\S+\)\s+by\s+\S+\s+\(Postfix\)\s+with\s+/) {
    $auth = 'Postfix';
  }

  if (/^from /) {
    # try to catch enveloper senders
    if (/(?:return-path:? |envelope-(?:sender|from)[ =])(\S+)\b/i) {
      $envfrom = $1;
    }

    # bug 3236: ignore Squirrelmail injection steps.
    # from 142.169.110.122 (SquirrelMail authenticated user synapse) by
    # mail.nomis80.org with HTTP; Sat, 3 Apr 2004 10:33:43 -0500 (EST)
    if (/ \(SquirrelMail authenticated user /) {
      dbg("received-header: ignored SquirrelMail injection: $_");
      return;
    }

    # catch MS-ish headers here
    if (/ SMTPSVC/) {
      # MS servers using this fmt do not lookup the rDNS.
      # Received: from inet-vrs-05.redmond.corp.microsoft.com ([157.54.6.157])
      # by INET-IMC-05.redmond.corp.microsoft.com with Microsoft
      # SMTPSVC(5.0.2195.6624); Thu, 6 Mar 2003 12:02:35 -0800
      # Received: from 0 ([61.31.135.91]) by bass.bass.com.eg with Microsoft
      # SMTPSVC(5.0.2195.6713); Tue, 21 Sep 2004 08:59:06 +0300
      # Received: from 0 ([61.31.138.57] RDNS failed) by nccdi.com with 
      # Microsoft SMTPSVC(6.0.3790.0); Thu, 23 Sep 2004 08:51:06 -0700
      # Received: from tthompson ([217.35.105.172] unverified) by
      # mail.neosinteractive.com with Microsoft SMTPSVC(5.0.2195.5329);
      # Tue, 11 Mar 2003 13:23:01 +0000
      if (/^from (\S+) \(\[(${IP_ADDRESS})\][^\)]{0,40}\) by (\S+) with Microsoft SMTPSVC/) {
        $helo = $1; $ip = $2; $by = $3; goto enough;
      }

      # Received: from mail pickup service by mail1.insuranceiq.com with
      # Microsoft SMTPSVC; Thu, 13 Feb 2003 19:05:39 -0500
      if (/^from mail pickup service by (\S+) with Microsoft SMTPSVC;/) {
        return;
      }
    }

    if (/\[XMail /) { # bug 3791, bug 4053
      # Received: from list.brainbuzz.com (63.146.189.86:23198) by mx1.yourtech.net with [XMail 1.20 ESMTP Server] id <S72E> for <jason@ellingson.org.spamassassin.org> from <bounce-cscommunity-11965901@list.cramsession.com.spamassassin.org>; Sat, 18 Sep 2004 23:17:54 -0500
      # Received: from list.brainbuzz.com (63.146.189.86:23198) by mx1.yourtech.net (209.32.147.34:25) with [XMail 1.20 ESMTP Server] id <S72E> for <jason@ellingson.org.spamassassin.org> from <bounce-cscommunity-11965901@list.cramsession.com.spamassassin.org>; Sat, 18 Sep 2004 23:17:54 -0500
      if (/^from (\S+) \((\[?${IP_ADDRESS}\]?)(?::\d+|)\) by (\S+)(?: \(\S+\)|) with \[XMail/)
      {
	$helo = $1; $ip = $2; $by = $3;
        / id <(\S+)> / and $id = $1;
        / from <(\S+)>; / and $envfrom = $1;
        goto enough;
      }
    }

    if (/ecelerity /) {
      if (/^from \(\[(${IP_ADDRESS}):\d+\] helo=(\S+)\) by (\S+) /) {
        $ip = $1; $helo = $2; $by = $3;
        / id (\S+) / and $id = $1;
        goto enough;
      }
    }

    if (/Exim/) {
      # one of the HUGE number of Exim formats :(
      # This must be scriptable.  (update: it is. cf bug 3950, 3582)
      # mss 2004-09-27: See <http://www.exim.org/exim-html-4.40/doc/html/spec_14.html#IX1315>

      # Received: from [61.174.163.26] (helo=host) by sc8-sf-list1.sourceforge.net with smtp (Exim 3.31-VA-mm2 #1 (Debian)) id 18t2z0-0001NX-00 for <razor-users@lists.sourceforge.net>; Wed, 12 Mar 2003 01:57:10 -0800
      # Received: from [218.19.142.229] (helo=hotmail.com ident=yiuhyotp) by yzordderrex with smtp (Exim 3.35 #1 (Debian)) id 194BE5-0005Zh-00; Sat, 12 Apr 2003 03:58:53 +0100
      if (/^from \[(${IP_ADDRESS})\] \((.*?)\) by (\S+) /) {
	$ip = $1; my $sub = $2; $by = $3;
	$sub =~ s/helo=(\S+)// and $helo = $1;
	$sub =~ s/ident=(\S*)// and $ident = $1;
	goto enough;
      }

      # Received: from sc8-sf-list1-b.sourceforge.net ([10.3.1.13] helo=sc8-sf-list1.sourceforge.net) by sc8-sf-list2.sourceforge.net with esmtp (Exim 3.31-VA-mm2 #1 (Debian)) id 18t301-0007Bh-00; Wed, 12 Mar 2003 01:58:13 -0800
      # Received: from dsl092-072-213.bos1.dsl.speakeasy.net ([66.92.72.213] helo=blazing.arsecandle.org) by sc8-sf-list1.sourceforge.net with esmtp (Cipher TLSv1:DES-CBC3-SHA:168) (Exim 3.31-VA-mm2 #1 (Debian)) id 18lyuU-0007TI-00 for <SpamAssassin-talk@lists.sourceforge.net>; Thu, 20 Feb 2003 14:11:18 -0800
      # Received: from eclectic.kluge.net ([66.92.69.221] ident=[W9VcNxE2vKxgWHD05PJbLzIHSxcmZQ/O]) by sc8-sf-list1.sourceforge.net with esmtp (Cipher TLSv1:DES-CBC3-SHA:168) (Exim 3.31-VA-mm2 #1 (Debian)) id 18m0hT-00031I-00 for <spamassassin-talk@lists.sourceforge.net>; Thu, 20 Feb 2003 16:06:00 -0800
      # Received: from mail.ssccbelen.edu.pe ([216.244.149.154]) by yzordderrex
      # with esmtp (Exim 3.35 #1 (Debian)) id 18tqiz-000702-00 for
      # <jm@example.com>; Fri, 14 Mar 2003 15:03:57 +0000
      if (/^from (\S+) \(\[(${IP_ADDRESS})\](.*?)\) by (\S+) /) {
        $rdns=$1; $ip = $2; my $sub = $3; $by = $4;
        $sub =~ s/helo=(\S+)// and $helo = $1;
        $sub =~ s/ident=(\S*)// and $ident = $1;
        goto enough;
      }

      # Received: from boggle.ihug.co.nz [203.109.252.209] by grunt6.ihug.co.nz
      # with esmtp (Exim 3.35 #1 (Debian)) id 18SWRe-0006X6-00; Sun, 29 Dec 
      # 2002 18:57:06 +1300
      if (/^from (\S+) \[(${IP_ADDRESS})\](:\d+)? by (\S+) /) {
	$rdns= $1; $ip = $2; $helo = $1; $by = $4; goto enough;
      }

      # attempt to deal with other odd Exim formats; just match little bits
      # of the header.
      # Received: from helene8.i.pinwand.net (helene.cats.ms) [10.0.8.6.13219]
      # (mail) by lisbeth.i.pinwand.net with esmtp (Exim 3.35 #1 (Debian)) id
      # 1CO5y7-0001vC-00; Sun, 31 Oct 2004 04:01:23 +0100
      if (/^from (\S+) /) {
        $rdns= $1;      # assume this is the rDNS, not HELO.  is this appropriate?
      }
      if (/ \((\S+)\) /) {
        $helo = $1;
      }
      if (/ \[(${IP_ADDRESS})(?:\.\d+)?\] /) {
        $ip = $1;
      }
      if (/by (\S+) /) {
        $by = $1;
        # now, if we have a "by" and an IP, that's enough for most uses;
        # we have to make do with that.
        if ($ip) { goto enough; }
      }

      # else it's probably forged. fall through
    }

    # Received: from 217.137.58.28 ([217.137.58.28])
    # by webmail.ukonline.net (IMP) with HTTP
    # for <anarchyintheuk@localhost>; Sun, 11 Apr 2004 00:31:07 +0100
    if (/\bwith HTTP\b/ &&        # more efficient split up this way
        /^from (${IP_ADDRESS}) \(\[${IP_ADDRESS}\]\) by (\S+)/)
    {
      # some smarty-pants decided to fake a numeric HELO for HTTP
      # no rDNS for this format?
      $ip = $1; $by = $2; goto enough;
    }

    # Received: from ns.elcanto.co.kr (66.161.246.58 [66.161.246.58]) by
    # mail.ssccbelen.edu.pe with SMTP (Microsoft Exchange Internet Mail Service
    # Version 5.5.1960.3) id G69TW478; Thu, 13 Mar 2003 14:01:10 -0500
    if (/^from (\S+) \((\S+) \[(${IP_ADDRESS})\]\) by (\S+) with \S+ \(/) {
      $mta_looked_up_dns = 1;
      $rdns = $2; $ip = $3; $helo = $1; $by = $4; goto enough;
    }

    # from mail2.detr.gsi.gov.uk ([51.64.35.18] helo=ahvfw.dtlr.gsi.gov.uk) by mail4.gsi.gov.uk with smtp id 190K1R-0000me-00 for spamassassin-talk-admin@lists.sourceforge.net; Tue, 01 Apr 2003 12:33:46 +0100
    if (/^from (\S+) \(\[(${IP_ADDRESS})\] helo=(\S+)\) by (\S+) with /) {
      $rdns = $1; $ip = $2; $helo = $3; $by = $4;
      goto enough;
    }

    # from 12-211-5-69.client.attbi.com (<unknown.domain>[12.211.5.69]) by rwcrmhc53.attbi.com (rwcrmhc53) with SMTP id <2002112823351305300akl1ue>; Thu, 28 Nov 2002 23:35:13 +0000
    if (/^from (\S+) \(<unknown\S*>\[(${IP_ADDRESS})\]\) by (\S+) /) {
      $helo = $1; $ip = $2; $by = $3;
      goto enough;
    }

    # from attbi.com (h000502e08144.ne.client2.attbi.com[24.128.27.103]) by rwcrmhc53.attbi.com (rwcrmhc53) with SMTP id <20030222193438053008f7tee>; Sat, 22 Feb 2003 19:34:39 +0000
    if (/^from (\S+) \((\S+\.\S+)\[(${IP_ADDRESS})\]\) by (\S+) /) {
      $mta_looked_up_dns = 1;
      $helo = $1; $rdns = $2; $ip = $3; $by = $4;
      goto enough;
    }

    if (/ \(Postfix\) with/) {
      # Received: from localhost (unknown [127.0.0.1])
      # by cabbage.jmason.org (Postfix) with ESMTP id A96E18BD97
      # for <jm@localhost>; Thu, 13 Mar 2003 15:23:15 -0500 (EST)
      if ( /^from (\S+) \((\S+) \[(${IP_ADDRESS})\]\) by (\S+) / ) {
	$mta_looked_up_dns = 1;
	$helo = $1; $rdns = $2; $ip = $3; $by = $4;
	if ($rdns eq 'unknown') { $rdns = ''; }
	goto enough;
      }

      # Received: from 207.8.214.3 (unknown[211.94.164.65])
      # by puzzle.pobox.com (Postfix) with SMTP id 9029AFB732;
      # Sat,  8 Nov 2003 17:57:46 -0500 (EST)
      # (Pobox.com version: reported in bug 2745)
      if ( /^from (\S+) \((\S+)\[(${IP_ADDRESS})\]\) by (\S+) / ) {
	$mta_looked_up_dns = 1;
	$helo = $1; $rdns = $2; $ip = $3; $by = $4;
	if ($rdns eq 'unknown') { $rdns = ''; }
	goto enough;
      }
    }

    # MiB: 2003/11/29 Some qmail-ldap headers may be misinterpreted as sendmail-headers
    #      resulting in a messed-up interpretation. We have to skip sendmail tests
    #      if we find evidence that this is a qmail-ldap header.
    #
    unless (/^from .* by \S+ \(qmail-\S+\) with /) {
      #
      # sendmail:
      # Received: from mail1.insuranceiq.com (host66.insuranceiq.com [65.217.159.66] (may be forged)) by dogma.slashnull.org (8.11.6/8.11.6) with ESMTP id h2F0c2x31856 for <jm@jmason.org>; Sat, 15 Mar 2003 00:38:03 GMT
      # Received: from BAY0-HMR08.adinternal.hotmail.com (bay0-hmr08.bay0.hotmail.com [65.54.241.207]) by dogma.slashnull.org (8.11.6/8.11.6) with ESMTP id h2DBpvs24047 for <webmaster@efi.ie>; Thu, 13 Mar 2003 11:51:57 GMT
      # Received: from ran-out.mx.develooper.com (IDENT:qmailr@one.develooper.com [64.81.84.115]) by dogma.slashnull.org (8.11.6/8.11.6) with SMTP id h381Vvf19860 for <jm-cpan@jmason.org>; Tue, 8 Apr 2003 02:31:57 +0100
      # from rev.net (natpool62.rev.net [63.148.93.62] (may be forged)) (authenticated) by mail.rev.net (8.11.4/8.11.4) with ESMTP id h0KKa7d32306 for <spamassassin-talk@lists.sourceforge.net>
      #
      if (/^from (\S+) \((\S+) \[(${IP_ADDRESS})\].*\) by (\S+) \(/) {
        $mta_looked_up_dns = 1;
        $helo = $1; $rdns = $2; $ip = $3; $by = $4;
        $rdns =~ s/^IDENT:([^\@]*)\@// and $ident = $1; # remove IDENT lookups
        $rdns =~ s/^([^\@]*)\@// and $ident = $1;	# remove IDENT lookups
        goto enough;
      }
    }

    # Received: from 4wtgRl (kgbxn@[211.244.147.115]) by dogma.slashnull.org (8.11.6/8.11.6) with SMTP id h8BBsUJ18848; Thu, 11 Sep 2003 12:54:31 +0100
    if (/^from (\S+) \((\S*)\@\[(${IP_ADDRESS})\].*\) by (\S+) \(/) {
      $mta_looked_up_dns = 1;	# this one does.  there just wasn't one
      $helo = $1; $ip = $3; $by = $4;
      $ident = $2;
      goto enough;
    }

    # Received: from 213.123.174.21 by lw11fd.law11.hotmail.msn.com with HTTP;
    # Wed, 24 Jul 2002 16:36:44 GMT
    if (/by (\S+\.hotmail\.msn\.com) /) {
      $by = $1;
      /^from (\S+) / and $ip = $1;
      goto enough;
    }

    # Received: from x71-x56-x24-5.webspeed.dk (HELO niels) (69.96.3.15) by la.mx.develooper.com (qpsmtpd/0.27-dev) with SMTP; Fri, 02 Jan 2004 19:26:52 -0800
    # Received: from sc8-sf-sshgate.sourceforge.net (HELO sc8-sf-netmisc.sourceforge.net) (66.35.250.220) by la.mx.develooper.com (qpsmtpd/0.27-dev) with ESMTP; Fri, 02 Jan 2004 14:44:41 -0800
    # Received: from mx10.topofferz.net (HELO ) (69.6.60.10) by blazing.arsecandle.org with SMTP; 3 Mar 2004 20:34:38 -0000
    if (/^from (\S+) \((?:HELO|EHLO) (\S*)\) \((${IP_ADDRESS})\) by (\S+) \(qpsmtpd\/(\S+)\) with (ESMTP|SMTP)/) {
      $rdns = $1; $helo = $2; $ip = $3; $by = $4; goto enough;
    }

    # MiB (Michel Bouissou, 2003/11/16)
    # Moved some tests up because they might match on qmail tests, where this
    # is not qmail
    #
    # Received: from imo-m01.mx.aol.com ([64.12.136.4]) by eagle.glenraven.com
    # via smtpd (for [198.85.87.98]) with SMTP; Wed, 08 Oct 2003 16:25:37 -0400
    if (/^from (\S+) \(\[(${IP_ADDRESS})\]\) by (\S+) via smtpd \(for \S+\) with SMTP\(/) {
      $helo = $1; $ip = $2; $by = $3; goto enough;
    }

    # Try to match most of various qmail possibilities
    #
    # General format:
    # Received: from postfix3-2.free.fr (HELO machine.domain.com) (foobar@213.228.0.169) by totor.bouissou.net with SMTP; 14 Nov 2003 08:05:50 -0000
    #
    # "from (remote.rDNS|unknown)" is always there
    # "(HELO machine.domain.com)" is there only if HELO differs from remote rDNS.
    # HELO may be "" -- ie no string. "HELO" may also be "EHLO".  HELO string
    # may be an IP in fmt [1.2.3.4] -- do not strip [ and ], they are important.
    # "foobar@" is remote IDENT info, specified only if ident given by remote
    # Remote IP always appears between (parentheses), with or without IDENT@
    # "by local.system.domain.com" always appears
    #
    # Protocol can be different from "SMTP", i.e. "RC4-SHA encrypted SMTP" or "QMQP"
    # qmail's reported protocol shouldn't be "ESMTP", so by allowing only "with (.* )(SMTP|QMQP)"
    # we should avoid matching on some sendmailish Received: lines that reports remote IP
    # between ([218.0.185.24]) like qmail-ldap does, but use "with ESMTP".
    #
    # Normally, qmail-smtpd remote IP isn't between square brackets [], but some versions of
    # qmail-ldap seem to add square brackets around remote IP. These versions of qmail-ldap
    # use a longer format that also states the (envelope-sender <sender@domain>) and the
    # qmail-ldap version. Example:
    # Received: from unknown (HELO terpsichore.farfalle.com) (jdavid@[216.254.40.70]) (envelope-sender <jdavid@farfalle.com>) by mail13.speakeasy.net (qmail-ldap-1.03) with SMTP for <jm@jmason.org>; 12 Feb 2003 18:23:19 -0000
    #
    # Some others of the numerous qmail patches out there can also add variants of their own
    #
    # Received: from 211.245.85.228  (EHLO ) (211.245.85.228) by mta232.mail.scd.yahoo.com with SMTP; Sun, 25 Jan 2004 00:24:37 -0800
    #
    # bug 4813: make sure that the line doesn't have " id " after the
    # protocol since that's a sendmail line and not qmail ...
    if (/^from \S+( \((?:HELO|EHLO) \S*\))? \((\S+\@)?\[?${IP_ADDRESS}\]?\)( \(envelope-sender <\S+>\))? by \S+( \(.+\))* with (.* )?(SMTP|QMQP)(?! id )/ ) {
       if (/^from (\S+) \((?:HELO|EHLO) ([^ \(\)]*)\) \((\S*)\@\[?(${IP_ADDRESS})\]?\)( \(envelope-sender <\S+>\))? by (\S+)/) {
         $rdns = $1; $helo = $2; $ident = $3; $ip = $4; $by = $6;
       }
       elsif (/^from (\S+) \((?:HELO|EHLO) ([^ \(\)]*)\) \(\[?(${IP_ADDRESS})\]?\)( \(envelope-sender <\S+>\))? by (\S+)/) {
         $rdns = $1; $helo = $2; $ip = $3; $by = $5;
       }
       elsif (/^from (\S+) \((\S*)\@\[?(${IP_ADDRESS})\]?\)( \(envelope-sender <\S+>\))? by (\S+)/) {
	 # note: absence of HELO means that it matched rDNS in qmail-land
         $helo = $rdns = $1; $ident = $2; $ip = $3; $by = $5;
       }
       elsif (/^from (\S+) \(\[?(${IP_ADDRESS})\]?\)( \(envelope-sender <\S+>\))? by (\S+)/) {
         $helo = $rdns = $1; $ip = $2; $by = $4;
       }
       # qmail doesn't perform rDNS requests by itself, but is usually called
       # by tcpserver or a similar daemon that passes rDNS information to qmail-smtpd.
       # If qmail puts something else than "unknown" in the rDNS field, it means that
       # it received this information from the daemon that called it. If qmail-smtpd
       # writes "Received: from unknown", it means that either the remote has no
       # rDNS, or qmail was called by a daemon that didn't gave the rDNS information.
       if ($rdns ne "unknown") {
          $mta_looked_up_dns = 1;
       } else {
          $rdns = '';
       }
       goto enough;

    }
    # /MiB
    
    # Received: from [193.220.176.134] by web40310.mail.yahoo.com via HTTP;
    # Wed, 12 Feb 2003 14:22:21 PST
    if (/^from \[(${IP_ADDRESS})\] by (\S+) via HTTP\;/) {
      $ip = $1; $by = $2; goto enough;
    }

    # Received: from 192.168.5.158 ( [192.168.5.158]) as user jason@localhost by mail.reusch.net with HTTP; Mon, 8 Jul 2002 23:24:56 -0400
    if (/^from (\S+) \( \[(${IP_ADDRESS})\]\).*? by (\S+) /) {
      # TODO: is $1 helo?
      $ip = $2; $by = $3; goto enough;
    }

    # Received: from (64.52.135.194 [64.52.135.194]) by mail.unearthed.com with ESMTP id BQB0hUH2 Thu, 20 Feb 2003 16:13:20 -0700 (PST)
    if (/^from \((\S+) \[(${IP_ADDRESS})\]\) by (\S+) /) {
      $helo = $1; $ip = $2; $by = $3; goto enough;
    }

    # Received: from [65.167.180.251] by relent.cedata.com (MessageWall 1.1.0) with SMTP; 20 Feb 2003 23:57:15 -0000
    if (/^from \[(${IP_ADDRESS})\] by (\S+) /) {
      $ip = $1; $by = $2; goto enough;
    }

    # Received: from acecomms [202.83.84.95] by mailscan.acenet.net.au [202.83.84.27] with SMTP (MDaemon.PRO.v5.0.6.R) for <spamassassin-talk@lists.sourceforge.net>; Fri, 21 Feb 2003 09:32:27 +1000
    if (/^from (\S+) \[(${IP_ADDRESS})\] by (\S+) \[(\S+)\] with /) {
      $mta_looked_up_dns = 1;
      $helo = $1; $ip = $2;
      $by = $4; # use the IP addr for "by", more useful?
      goto enough;
    }

    # Received: from mail.sxptt.zj.cn ([218.0.185.24]) by dogma.slashnull.org
    # (8.11.6/8.11.6) with ESMTP id h2FH0Zx11330 for <webmaster@efi.ie>;
    # Sat, 15 Mar 2003 17:00:41 GMT
    if (/^from (\S+) \(\[(${IP_ADDRESS})\]\) by (\S+) \(/) { # sendmail
      $mta_looked_up_dns = 1;
      $helo = $1; $ip = $2; $by = $3; goto enough;
    }

    # Received: from umr-mail7.umr.edu (umr-mail7.umr.edu [131.151.1.64]) via ESMTP by mrelay1.cc.umr.edu (8.12.1/) id h06GHYLZ022481; Mon, 6 Jan 2003 10:17:34 -0600
    # Received: from Agni (localhost [::ffff:127.0.0.1]) (TLS: TLSv1/SSLv3, 168bits,DES-CBC3-SHA) by agni.forevermore.net with esmtp; Mon, 28 Oct 2002 14:48:52 -0800
    # Received: from gandalf ([4.37.75.131]) (authenticated bits=0) by herald.cc.purdue.edu (8.12.5/8.12.5/herald) with ESMTP id g9JLefrm028228 for <spamassassin-talk@lists.sourceforge.net>; Sat, 19 Oct 2002 16:40:41 -0500 (EST)
    # Received: from bushinternet.com (softdnserr [::ffff:61.99.99.67]) by mail.cs.helsinki.fi with esmtp; Fri, 22 Aug 2003 12:25:41 +0300
    if (/^from (\S+) \((\S+) \[(${IP_ADDRESS})\]\).*? by (\S+) /) { # sendmail
      if ($2 eq 'softdnserr') {
        $mta_looked_up_dns = 0; # bug 2326: couriertcpd
      } else {
        $mta_looked_up_dns = 1; $rdns = $2;
      }
      $helo = $1; $ip = $3; $by = $4; goto enough;
    }

    if (/^from (\S+) \(\[(${IP_ADDRESS})\]\).*? by (\S+) /) {
      $mta_looked_up_dns = 1;
      $helo = $1; $ip = $2; $by = $3; goto enough;
    }

    # Received: from roissy (p573.as1.exs.dublin.eircom.net [159.134.226.61])
    # (authenticated bits=0) by slate.dublin.wbtsystems.com (8.12.6/8.12.6)
    # with ESMTP id g9MFWcvb068860 for <jm@jmason.org>;
    # Tue, 22 Oct 2002 16:32:39 +0100 (IST)
    if (/^from (\S+) \((\S+) \[(${IP_ADDRESS})\]\)(?: \(authenticated bits=\d+\))? by (\S+) \(/) { # sendmail
      $mta_looked_up_dns = 1;
      $helo = $1; $rdns = $2; $ip = $3; $by = $4; goto enough;
    }

    # Received: from cabbage.jmason.org [127.0.0.1]
    # by localhost with IMAP (fetchmail-5.9.0)
    # for jm@localhost (single-drop); Thu, 13 Mar 2003 20:39:56 -0800 (PST)
    if (/^from (\S+) (?:\[(${IP_ADDRESS})\] )?by (\S+) with \S+ \(fetchmail/) {
      $self->found_pop_fetcher_sig();
      return;		# skip fetchmail handovers
    }

    # Let's try to support a few qmailish formats in one;
    # http://issues.apache.org/SpamAssassin/show_bug.cgi?id=2744#c14 :
    # Received: from unknown (HELO feux01a-isp) (213.199.4.210) by totor.bouissou.net with SMTP; 1 Nov 2003 07:05:19 -0000 
    # Received: from adsl-207-213-27-129.dsl.lsan03.pacbell.net (HELO merlin.net.au) (Owner50@207.213.27.129) by totor.bouissou.net with SMTP; 10 Nov 2003 06:30:34 -0000 
    if (/^from (\S+) \((?:HELO|EHLO) ([^\)]*)\) \((\S*@)?\[?(${IP_ADDRESS})\]?\).* by (\S+) /)
    {
      $mta_looked_up_dns = 1;
      $rdns = $1; $helo = $2; $ident = (defined $3) ? $3 : '';
      $ip = $4; $by = $5;
      if ($ident) { $ident =~ s/\@$//; }
      goto enough;
    }

    # Received: from x1-6-00-04-bd-d2-e0-a3.k317.webspeed.dk (benelli@80.167.158.170) by totor.bouissou.net with SMTP; 5 Nov 2003 23:18:42 -0000
    if (/^from (\S+) \((\S*@)?\[?(${IP_ADDRESS})\]?\).* by (\S+) /)
    {
      $mta_looked_up_dns = 1;
      # bug 2744 notes that if HELO == rDNS, qmail drops it.
      $rdns = $1; $helo = $rdns; $ident = (defined $2) ? $2 : '';
      $ip = $3; $by = $4;
      if ($ident) { $ident =~ s/\@$//; }
      goto enough;
    }

    # Received: from [129.24.215.125] by ws1-7.us4.outblaze.com with http for
    # _bushisevil_@mail.com; Thu, 13 Feb 2003 15:59:28 -0500
    if (/^from \[(${IP_ADDRESS})\] by (\S+) with http for /) {
      $ip = $1; $by = $2; goto enough;
    }

    # Received: from po11.mit.edu [18.7.21.73]
    # by stark.dyndns.tv with POP3 (fetchmail-5.9.7)
    # for stark@localhost (single-drop); Tue, 18 Feb 2003 10:43:09 -0500 (EST)
    # by po11.mit.edu (Cyrus v2.1.5) with LMTP; Tue, 18 Feb 2003 09:49:46 -0500
    if (/^from (\S+) \[(${IP_ADDRESS})\] by (\S+) with POP3 /) {
      $rdns = $1; $ip = $2; $by = $3; goto enough;
    }

    # Received: from snake.corp.yahoo.com(216.145.52.229) by x.x.org via smap (V1.3)
    # id xma093673; Wed, 26 Mar 03 20:43:24 -0600
    if (/^from (\S+)\((${IP_ADDRESS})\) by (\S+) via smap /) {
      $mta_looked_up_dns = 1;
      $rdns = $1; $ip = $2; $by = $3; goto enough;
    }

    # Received: from smtp.greyware.com(208.14.208.51, HELO smtp.sff.net) by x.x.org via smap (V1.3)
    # id xma002908; Fri, 27 Feb 04 14:16:56 -0800
    if (/^from (\S+)\((${IP_ADDRESS}), (?:HELO|EHLO) (\S*)\) by (\S+) via smap /) {
      $mta_looked_up_dns = 1;
      $rdns = $1; $ip = $2; $helo = $3; $by = $4; goto enough;
    }

    # Received: from [192.168.0.71] by web01-nyc.clicvu.com (Post.Office MTA
    # v3.5.3 release 223 ID# 0-64039U1000L100S0V35) with SMTP id com for
    # <x@x.org>; Tue, 25 Mar 2003 11:42:04 -0500
    if (/^from \[(${IP_ADDRESS})\] by (\S+) \(Post/) {
      $ip = $1; $by = $2; goto enough;
    }

    # Received: from [127.0.0.1] by euphoria (ArGoSoft Mail Server 
    # Freeware, Version 1.8 (1.8.2.5)); Sat, 8 Feb 2003 09:45:32 +0200
    if (/^from \[(${IP_ADDRESS})\] by (\S+) \(ArGoSoft/) {
      $ip = $1; $by = $2; goto enough;
    }

    # Received: from 157.54.8.23 by inet-vrs-05.redmond.corp.microsoft.com
    # (InterScan E-Mail VirusWall NT); Thu, 06 Mar 2003 12:02:35 -0800
    if (/^from (${IP_ADDRESS}) by (\S+) \(InterScan/) {
      $ip = $1; $by = $2; goto enough;
    }

    # Received: from faerber.muc.de by slarti.muc.de with BSMTP (rsmtp-qm-ot 0.4)
    # for asrg@ietf.org; 7 Mar 2003 21:10:38 -0000
    if (/^from (\S+) by (\S+) with BSMTP/) {
      return;	# BSMTP != a TCP/IP handover, ignore it
    }

    # Received: from spike (spike.ig.co.uk [193.32.60.32]) by mail.ig.co.uk with
    # SMTP id h27CrCD03362 for <asrg@ietf.org>; Fri, 7 Mar 2003 12:53:12 GMT
    if (/^from (\S+) \((\S+) \[(${IP_ADDRESS})\]\) by (\S+) with /) {
      $mta_looked_up_dns = 1;
      $helo = $1; $rdns = $2; $ip = $3; $by = $4; goto enough;
    }

    # Received: from customer254-217.iplannetworks.net (HELO AGAMENON) 
    # (baldusi@200.69.254.217 with plain) by smtp.mail.vip.sc5.yahoo.com with
    # SMTP; 11 Mar 2003 21:03:28 -0000
    if (/^from (\S+) \((?:HELO|EHLO) (\S*)\) \((\S+).*?\) by (\S+) with /) {
      $mta_looked_up_dns = 1;
      $rdns = $1; $helo = $2; $ip = $3; $by = $4;
      $ip =~ s/([^\@]*)\@//g and $ident = $1;	# remove IDENT lookups
      goto enough;
    }

    # Received: from mmail by argon.connect.org.uk with local (connectmail/exim)
    # id 18tOsg-0008FX-00; Thu, 13 Mar 2003 09:20:06 +0000
    # Received: from andrew by trinity.supernews.net with local (Exim 4.12)
    # id 18xeL6-000Dn1-00; Tue, 25 Mar 2003 02:39:00 +0000
    if (/^from (\S+) by (\S+) with local/) { return; }

    # Received: from [192.168.1.104] (account nazgul HELO [192.168.1.104])
    # by somewhere.com (CommuniGate Pro SMTP 3.5.7) with ESMTP-TLS id 2088434;
    # Fri, 07 Mar 2003 13:05:06 -0500
    if (/^from \[(${IP_ADDRESS})\] \(account \S+ (?:HELO|EHLO) (\S*)\) by (\S+) \(/) {
      $ip = $1; $helo = $2; $by = $3; goto enough;
    }

    # Received: from ([10.0.0.6]) by mail0.ciphertrust.com with ESMTP ; Thu,
    # 13 Mar 2003 06:26:21 -0500 (EST)
    if (/^from \(\[(${IP_ADDRESS})\]\) by (\S+) with /) {
      $ip = $1; $by = $2;
    }

    # Received: from ironport.com (10.1.1.5) by a50.ironport.com with ESMTP; 01 Apr 2003 12:00:51 -0800
    # Received: from dyn-81-166-39-132.ppp.tiscali.fr (81.166.39.132) by cpmail.dk.tiscali.com (6.7.018)
    if (/^from ([^\d]\S+) \((${IP_ADDRESS})\) by (\S+) /) {
      $helo = $1; $ip = $2; $by = $3; goto enough;
    }

    # Received: from scv3.apple.com (scv3.apple.com) by mailgate2.apple.com (Content Technologies SMTPRS 4.2.1) with ESMTP id <T61095998e1118164e13f8@mailgate2.apple.com>; Mon, 17 Mar 2003 17:04:54 -0800
    # bug 4704: Only let this match Content Technologies so it stops breaking things that come after it by matching first
    if (/^from (\S+) \((\S+)\) by (\S+) \(Content Technologies /) {
      return;		# useless without the $ip anyway!
      #$helo = $1; $rdns = $2; $by = $3; goto enough;
    }

    # Received: from 01al10015010057.ad.bls.com ([90.152.5.141] [90.152.5.141])
    # by aismtp3g.bls.com with ESMTP; Mon, 10 Mar 2003 11:10:41 -0500
    if (/^from (\S+) \(\[(\S+)\] \[(\S+)\]\) by (\S+) with /) {
      # not sure what $3 is ;)
      $helo = $1; $ip = $2; $by = $4;
      goto enough;
    }

    # Received: from 206.47.0.153 by dm3cn8.bell.ca with ESMTP (Tumbleweed MMS
    # SMTP Relay (MMS v5.0)); Mon, 24 Mar 2003 19:49:48 -0500
    if (/^from (${IP_ADDRESS}) by (\S+) with /) {
      $ip = $1; $by = $2;
      goto enough;
    }

    # Received: from pobox.com (h005018086b3b.ne.client2.attbi.com[66.31.45.164])
    # by rwcrmhc53.attbi.com (rwcrmhc53) with SMTP id <2003031302165605300suph7e>;
    # Thu, 13 Mar 2003 02:16:56 +0000
    if (/^from (\S+) \((\S+)\[(${IP_ADDRESS})\]\) by (\S+) /) {
      $mta_looked_up_dns = 1;
      $helo = $1; $rdns = $2; $ip = $3; $by = $4; goto enough;
    }

    # Received: from [10.128.128.81]:50999 (HELO dfintra.f-secure.com) by fsav4im2 ([10.128.128.74]:25) (F-Secure Anti-Virus for Internet Mail 6.0.34 Release) with SMTP; Tue, 5 Mar 2002 14:11:53 -0000
    if (/^from \[(${IP_ADDRESS})\]\S+ \((?:HELO|EHLO) (\S*)\) by (\S+) /) {
      $ip = $1; $helo = $2; $by = $3; goto enough;
    }

    # Received: from 62.180.7.250 (HELO daisy) by smtp.altavista.de (209.228.22.152) with SMTP; 19 Sep 2002 17:03:17 +0000
    if (/^from (${IP_ADDRESS}) \((?:HELO|EHLO) (\S*)\) by (\S+) /) {
      $ip = $1; $helo = $2; $by = $3; goto enough;
    }

    # Received: from oemcomputer [63.232.189.195] by highstream.net (SMTPD32-7.07) id A4CE7F2A0028; Sat, 01 Feb 2003 21:39:10 -0500
    if (/^from (\S+) \[(${IP_ADDRESS})\] by (\S+) /) {
      $helo = $1; $ip = $2; $by = $3; goto enough;
    }

    # from nodnsquery(192.100.64.12) by herbivore.monmouth.edu via csmap (V4.1) id srcAAAyHaywy
    if (/^from (\S+)\((${IP_ADDRESS})\) by (\S+) /) {
      $rdns = $1; $ip = $2; $by = $3; goto enough;
    }

    # Received: from [192.168.0.13] by <server> (MailGate 3.5.172) with SMTP;
    # Tue, 1 Apr 2003 15:04:55 +0100
    if (/^from \[(${IP_ADDRESS})\] by (\S+) \(MailGate /) {
      $ip = $1; $by = $2; goto enough;
    }

    # Received: from jmason.org (unverified [195.218.107.131]) by ni-mail1.dna.utvinternet.net <B0014212518@ni-mail1.dna.utvinternet.net>; Tue, 11 Feb 2003 12:18:12 +0000
    if (/^from (\S+) \(unverified \[(${IP_ADDRESS})\]\) by (\S+) /) {
      $helo = $1; $ip = $2; $by = $3; goto enough;
    }

    # # from 165.228.131.11 (proxying for 139.130.20.189) (SquirrelMail authenticated user jmmail) by jmason.org with HTTP
    # if (/^from (\S+) \(proxying for (${IP_ADDRESS})\) \([A-Za-z][^\)]+\) by (\S+) with /) {
    # $ip = $2; $by = $3; goto enough;
    # }
    if (/^from (${IP_ADDRESS}) \([A-Za-z][^\)]+\) by (\S+) with /) {
      $ip = $1; $by = $2; goto enough;
    }

    # Received: from [212.87.144.30] (account seiz [212.87.144.30] verified) by x.imd.net (CommuniGate Pro SMTP 4.0.3) with ESMTP-TLS id 5026665 for spamassassin-talk@lists.sourceforge.net; Wed, 15 Jan 2003 16:27:05 +0100
    # bug 4704 This pattern was checked as just an Exim format, but it does exist elsewhere
    # Received: from [206.51.230.145] (helo=t-online.de)
    #   by mxeu2.kundenserver.de with ESMTP (Nemesis),
    #  id 0MKpdM-1CkRpr14PF-000608; Fri, 31 Dec 2004 19:49:15 +0100
    # Received: from [218.19.142.229] (helo=hotmail.com ident=yiuhyotp)
    #   by yzordderrex with smtp (Exim 3.35 #1 (Debian)) id 194BE5-0005Zh-00; Sat, 12 Apr 2003 03:58:53 +0100
    if (/^from \[(${IP_ADDRESS})\] \(([^\)]+)\) by (\S+) /) {
      $ip = $1; my $sub = $2; $by = $3;
      $sub =~ s/helo=(\S+)// and $helo = $1;
      $sub =~ s/ident=(\S*)// and $ident = $1;
      goto enough;
    }

    # Received: from mtsbp606.email-info.net (?dXqpg3b0hiH9faI2OxLT94P/YKDD3rQ1?@64.253.199.166) by kde.informatik.uni-kl.de with SMTP; 30 Apr 2003 15:06:29
    if (/^from (\S+) \((?:\S+\@)?(${IP_ADDRESS})\) by (\S+) with /) {
      $rdns = $1; $ip = $2; $by = $3; goto enough;
    }

    # Obtuse smtpd: http://www.obtuse.com/
    # Received: from TCE-E-7-182-54.bta.net.cn(202.106.182.54) via SMTP
    #  by st.tahina.priv.at, id smtpdEDUB8h; Sun Nov 13 14:50:12 2005
    # Received: from pl027.nas934.d-osaka.nttpc.ne.jp(61.197.82.27), claiming to be "foo.woas.net" via SMTP
    #  by st.tahina.priv.at, id smtpd1PBsZT; Sun Nov 13 15:38:52 2005
    if (/^from (\S+)\((${IP_ADDRESS})\)(?:, claiming to be "(\S+)")? via \S+ by (\S+),/) {
      $rdns = $1; $ip = $2; $helo = (defined $3) ? $3 : ''; $by = $4;
      if ($1 ne 'UNKNOWN') {
	$mta_looked_up_dns = 1;
	$rdns = $1;
      }
      goto enough;
    }
  }

  # simta: http://rsug.itd.umich.edu/software/simta/
  # Note the ugly uppercase FROM/BY/ID
  # Received: FROM hackers.mr.itd.umich.edu (smtp.mail.umich.edu [141.211.14.81])
  #  BY madman.mr.itd.umich.edu ID 434B508E.174A6.13932 ; 11 Oct 2005 01:41:34 -0400
  # Received: FROM [192.168.1.24] (s233-64-90-216.try.wideopenwest.com [64.233.216.90])
  #  BY hackers.mr.itd.umich.edu ID 434B5051.8CDE5.15436 ; 11 Oct 2005 01:40:33 -0400
  if (/^FROM (\S+) \((\S+) \[(${IP_ADDRESS})\]\) BY (\S+) (?:ID (\S+) )?/ ) {
      $mta_looked_up_dns = 1;
      $helo = $1; $rdns = $2; $ip = $3; $by = $4;
      $id = $5 if (defined $5);
      goto enough;
  }

  # Norton AntiVirus Gateway
  # Received: (from localhost [24.180.47.240])
  #  by host.name (NAVGW 2.5.2.12) with SMTP id M2006060503484615455
  #  for <user@domain.co.uk>; Mon, 05 Jun 2006 03:48:47 +0100
  if (/^\(from (\S*) \[(${IP_ADDRESS})\]\) by (\S+) \(NAVGW .*?\) with /) {
    $helo = $1; $ip = $2; $by = $3;
    goto enough;
  }

  # ------------------------------------------------------------------------
  # IGNORED LINES: generally local-to-local or non-TCP/IP handovers

  # Received: by faerber.muc.de (OpenXP/32 v3.9.4 (Win32) alpha @
  # 2003-03-07-1751d); 07 Mar 2003 22:10:29 +0000
  # Received: by x.x.org (bulk_mailer v1.13); Wed, 26 Mar 2003 20:44:41 -0600
  # Received: by SPIDERMAN with Internet Mail Service (5.5.2653.19) id <19AF8VY2>; Tue, 25 Mar 2003 11:58:27 -0500
  # Received: by oak.ein.cz (Postfix, from userid 1002) id DABBD1BED3;
  # Thu, 13 Feb 2003 14:02:21 +0100 (CET)
  # ignore any lines starting with "by", we want the "from"s!
  if (/^by /) { return; }

  # Received: from raptor.research.att.com (bala@localhost) by
  # raptor.research.att.com (SGI-8.9.3/8.8.7) with ESMTP id KAA14788 
  # for <asrg@example.com>; Fri, 7 Mar 2003 10:37:56 -0500 (EST)
  # make this localhost-specific, so we know it's safe to ignore
  if (/^from \S+ \(\S+\@${LOCALHOST}\) by \S+ \(/) { return; }

  # from paul (helo=felix) by felix.peema.org with local-esmtp (Exim 4.43)
  # id 1Ccq0j-0002k2-Lk; Fri, 10 Dec 2004 19:01:01 +0000
  # Exim doco says this is local submission, cf switch -oMr
  if (/^from \S+ \S+ by \S+ with local-e?smtp /) { return; }

  # from 127.0.0.1 (AVG SMTP 7.0.299 [265.6.8]); Wed, 05 Jan 2005 15:06:48
  # -0800
  if (/^from 127\.0\.0\.1 \(AVG SMTP \S+ \[\S+\]\); /) { return; }

  # from qmail-scanner-general-admin@lists.sourceforge.net by alpha by uid 7791 with qmail-scanner-1.14 (spamassassin: 2.41. Clear:SA:0(-4.1/5.0):. Processed in 0.209512 secs)
  if (/^from \S+\@\S+ by \S+ by uid \S+ /) { return; }

  # Received: from DSmith1204@aol.com by imo-m09.mx.aol.com (mail_out_v34.13.) id 7.53.208064a0 (4394); Sat, 11 Jan 2003 23:24:31 -0500 (EST)
  if (/^from \S+\@\S+ by \S+ /) { return; }

  # Received: from Unknown/Local ([?.?.?.?]) by mailcity.com; Fri, 17 Jan 2003 15:23:29 -0000
  if (/^from Unknown\/Local \(/) { return; }

  # Received: from localhost (mailnull@localhost) by x.org (8.12.6/8.9.3) 
  # with SMTP id h2R2iivG093740; Wed, 26 Mar 2003 20:44:44 -0600 
  # (CST) (envelope-from x@x.org)
  # Received: from localhost (localhost [127.0.0.1]) (uid 500) by mail with local; Tue, 07 Jan 2003 11:40:47 -0600
  if (/^from ${LOCALHOST} \((?:\S+\@)?${LOCALHOST}[\)\[]/) { return; }

  # Received: from olgisoft.com (127.0.0.1) by 127.0.0.1 (EzMTS MTSSmtp
  # 1.55d5) ; Thu, 20 Mar 03 10:06:43 +0100 for <asrg@ietf.org>
  if (/^from \S+ \((?:\S+\@)?${LOCALHOST}\) /) { return; }

  # Received: from casper.ghostscript.com (raph@casper [127.0.0.1]) h148aux8016336verify=FAIL); Tue, 4 Feb 2003 00:36:56 -0800
  if (/^from (\S+) \(\S+\@\S+ \[${LOCALHOST}\]\) /) { return; }

  # Received: from (AUTH: e40a9cea) by vqx.net with esmtp (courier-0.40) for <asrg@ietf.org>; Mon, 03 Mar 2003 14:49:28 +0000
  if (/^from \(AUTH: (\S+)\) by (\S+) with /) { return; }

  # from localhost (localhost [[UNIX: localhost]]) by home.barryodonovan.com
  # (8.12.11/8.12.11/Submit) id iBADHRP6011034; Fri, 10 Dec 2004 13:17:27 GMT
  if (/^from localhost \(localhost \[\[UNIX: localhost\]\]\) by /) { return; }

  # header produced by command line /usr/bin/sendmail -t -f username@example.com
  # Received: (from username@localhost) by home.example.com
  # (8.12.11/8.12.11/Submit) id iBADHRP6011034; Fri, 10 Dec 2004 13:17:27 GMT
  if (/^\(from \S+\@localhost\) by \S+ /) { return; }

  # Received: Message by Barricade wilhelm.eyp.ee with ESMTP id h1I7hGU06122 for <spamassassin-talk@lists.sourceforge.net>; Tue, 18 Feb 2003 09:43:16 +0200
  if (/^Message by /) {
    return;	# whatever
  }

  # Received: FROM ca-ex-bridge1.nai.com BY scwsout1.nai.com ;
  # Fri Feb 07 10:18:12 2003 -0800
  if (/^FROM \S+ BY \S+ \; /) { return; }

  # Internal Amazon traffic
  # Received: from dc-mail-3102.iad3.amazon.com by mail-store-2001.amazon.com with ESMTP (peer crosscheck: dc-mail-3102.iad3.amazon.com)
  if (/^from \S+\.amazon\.com by \S+\.amazon\.com with ESMTP \(peer crosscheck: /) { return; }

  # Received: from GWGC6-MTA by gc6.jefferson.co.us with Novell_GroupWise; Tue, 30 Nov 2004 10:09:15 -0700
  if (/^from [^\.]+ by \S+ with Novell_GroupWise; /) { return; }

  # Received: from no.name.available by [165.224.43.143] via smtpd (for [165.224.216.89]) with ESMTP; Fri, 28 Jan 2005 13:06:39 -0500
  # Received: from no.name.available by [165.224.216.88] via smtpd (for lists.sourceforge.net [66.35.250.206]) with ESMTP; Fri, 28 Jan 2005 15:42:30 -0500
  # These are from an internal host protected by a Raptor firewall, to hosts
  # outside the firewall.  We can only ignore the handover since we don't have
  # enough info in those headers; however, from googling, it appears that
  # all samples are cases where the handover is safely ignored.
  if (/^from no\.name\.available by \S+ via smtpd \(for /) { return; }

  # from 156.56.111.196 by blazing.arsecandle.org (envelope-from <gentoo-announce-return-530-rod=arsecandle.org@lists.gentoo.org>, uid 502) with qmail-scanner-1.24 (clamdscan: 0.80/594. f-prot: 4.4.2/3.14.11. Clear:RC:0(156.56.111.196):. Processed in 0.288806 secs); 06 Feb 2005 21:11:38 -0000
  # these are safe to ignore.  the previous handover line has the full
  # details of the handover described here, it's just qmail-scanner
  # logging a little more.
  if (/^from \S+ by \S+ \(.{0,100}\) with qmail-scanner/) {
    $envfrom =~ s/^\s*<*//gs; $envfrom =~ s/>*\s*$//gs;
    $envfrom =~ s/[\s\0\#\[\]\(\)\<\>\|]/!/gs;
    $self->{qmail_scanner_env_from} = $envfrom; # hack!
    return;
  }

  # ------------------------------------------------------------------------
  # HANDOVERS WE KNOW WE CAN'T DEAL WITH: TCP transmission, but to MTAs that
  # just don't log enough info for us to use (ie. no IP address present).
  # Note: "goto unparseable" is strongly recommended here, unless you're sure
  # the regexp won't match something in the field; otherwise ALL_TRUSTED may
  # fire even in the presence of an unparseable Received header.

  # Received: from CATHY.IJS.SI by CATHY.IJS.SI (PMDF V4.3-10 #8779) id <01KTSSR50NSW001MXN@CATHY.IJS.SI>; Fri, 21 Mar 2003 20:50:56 +0100
  # Received: from MATT_LINUX by hippo.star.co.uk via smtpd (for mail.webnote.net [193.120.211.219]) with SMTP; 3 Jul 2002 15:43:50 UT
  # Received: from cp-its-ieg01.mail.saic.com by cpmx.mail.saic.com for me@jmason.org; Tue, 23 Jul 2002 14:09:10 -0700
  if (/^from \S+ by \S+ (?:with|via|for|\()/) { goto unparseable; }
  
  # Received: from virtual-access.org by bolero.conactive.com ; Thu, 20 Feb 2003 23:32:58 +0100
  if (/^from (\S+) by (\S+) *\;/) {
    goto unparseable;	# can't trust this
  }

  # ------------------------------------------------------------------------
  # FALL-THROUGH: OK, let's try some general patterns
  if (/\bhelo=([-A-Za-z0-9\.]+)[^-A-Za-z0-9\.]/) { $helo = $1; }
  elsif (/^from (\S+)[^-A-Za-z0-9\.]/) { $helo = $1; }
  if (/\[(${IP_ADDRESS})\]/) { $ip = $1; }
  if (/ by (\S+)[^-A-Za-z0-9\;\.]/) { $by = $1; }
  if ($ip && $by) { goto enough; }

  # ------------------------------------------------------------------------
  # OK, if we still haven't figured out at least the basics (IP and by), or
  # returned due to it being a known-crap format, let's warn so the user can
  # file a bug report or something.

  dbg("received-header: unknown format: $_");
  # and skip the line entirely!  We can't parse it...

unparseable:

  dbg("received-header: unparseable: $_");
  $self->{num_relays_unparseable}++;
  return;

  # ------------------------------------------------------------------------
  # OK, line parsed (at least partially); now deal with the contents

enough:

  # flag handovers we couldn't get an IP address from at all
  if ($ip eq '') {
    dbg("received-header: could not parse IP address from: $_");
  }

  $ip = Mail::SpamAssassin::Util::extract_ipv4_addr_from_string ($ip);
  if (!$ip) {
    dbg("received-header: could not parse IPv4 address, assuming IPv6");
    return;   # ignore IPv6 handovers
  }

  # DISABLED: if we cut out localhost-to-localhost SMTP handovers,
  # we will give FPs on SPF checks -- since the SMTP "MAIL FROM" addr
  # will be recorded, but we won't have the relays handover recorded
  # for that SMTP transaction, so we wind up checking the wrong IP
  # for the addr.
  if (0) {
    if ($ip eq '127.0.0.1') {
      dbg("received-header: ignoring localhost handover");
      return;   # ignore localhost handovers
    }
  }

  if ($rdns =~ /^unknown$/i) {
    $rdns = '';		# some MTAs seem to do this
  }

  $envfrom =~ s/^\s*<*//gs; $envfrom =~ s/>*\s*$//gs;
  $by =~ s/\;$//;

  # ensure invalid chars are stripped.  Replace with '!' to flag their
  # presence, though.  NOTE: this means "[1.2.3.4]" IP addr HELO
  # strings, which are legit by RFC-2821, look like "!1.2.3.4!".
  # still useful though.
  $ip =~ s/[\s\0\#\[\]\(\)\<\>\|]/!/gs;
  $rdns =~ s/[\s\0\#\[\]\(\)\<\>\|]/!/gs;
  $helo =~ s/[\s\0\#\[\]\(\)\<\>\|]/!/gs;
  $by =~ s/[\s\0\#\[\]\(\)\<\>\|]/!/gs;
  $ident =~ s/[\s\0\#\[\]\(\)\<\>\|]/!/gs;
  $envfrom =~ s/[\s\0\#\[\]\(\)\<\>\|]/!/gs;

  my $relay = {
    ip => $ip,
    by => $by,
    helo => $helo,
    id => $id,
    ident => $ident,
    envfrom => $envfrom,
    lc_by => (lc $by),
    lc_helo => (lc $helo),
    auth => $auth
  };

  # perform rDNS check if MTA has not done it for us.
  #
  # TODO: do this for untrusted headers anyway; if it mismatches it
  # could be a spamsign.  Probably better done later after we've
  # moved the "trusted" ones out of the way.  In fact, this op
  # here may be movable too; no need to lookup trusted IPs all the time.
  #
  if ($rdns eq '') {
    if (!$self->{is_dns_available}) {
      if ($mta_looked_up_dns) {
	# we know the MTA always does lookups, so this means the host
	# really has no rDNS (rather than that the MTA didn't bother
	# looking it up for us).
	$relay->{no_reverse_dns} = 1;
	$rdns = '';
      } else {
	$relay->{rdns_not_in_headers} = 1;
      }

    } else {
      $rdns = $self->{dns_pms}->lookup_ptr ($ip);

      if (!$rdns) {
	$relay->{no_reverse_dns} = 1;
	$rdns = '';
      }
    }
  }
  $relay->{rdns} = $rdns;
  $relay->{lc_rdns} = lc $rdns;

  $self->make_relay_as_string($relay);

  my $is_private = ($ip =~ /${IP_PRIVATE}/o);
  $relay->{ip_private} = $is_private;

  # add it to an internal array so Eval tests can use it
  return $relay;
}

sub make_relay_as_string {
  my ($self, $relay) = @_;

  # as-string rep. use spaces so things like Bayes can tokenize them easily.
  # NOTE: when tokenizing or matching, be sure to note that new
  # entries may be added to this string later.   However, the *order*
  # of entries must be preserved, so that regexps that assume that
  # e.g. "ip" comes before "helo" will still work.
  #
  my $asstr = "[ ip=$relay->{ip} rdns=$relay->{rdns} helo=$relay->{helo} by=$relay->{by} ident=$relay->{ident} envfrom=$relay->{envfrom} intl=0 id=$relay->{id} auth=$relay->{auth} ]";
  dbg("received-header: parsed as $asstr");
  $relay->{as_string} = $asstr;
}

# restart the parse if we find a fetchmail marker or similar.
# spamcop does this, and it's a great idea ;)
sub found_pop_fetcher_sig {
  my ($self) = @_;
  if ($self->{allow_fetchmail_markers}) {
    dbg("received-header: found fetchmail marker, restarting parse");
    $self->{relays_trusted} = [ ];
    $self->{relays_internal} = [ ];
    $self->{relays_external} = [ ];
  } else {
    dbg("received-header: found fetchmail marker outside trusted area, ignored");
  }
}

# ---------------------------------------------------------------------------

1;