1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
|
# This library is free software; you can redistribute it and/or modify
# it under the same terms as Perl itself.
#
# This license differs from the rest of public-inbox
#
# Our own jwz-style threading class based on Mail::Thread from CPAN.
# Mail::Thread is unmaintained and unavailable on some distros.
# We also do not want pruning or subject grouping, since we want
# to encourage strict threading and hopefully encourage people
# to use proper In-Reply-To/References.
#
# This includes fixes from several open bugs for Mail::Thread
#
# Avoid circular references
# - https://rt.cpan.org/Public/Bug/Display.html?id=22817
#
# And avoid recursion in recurse_down:
# - https://rt.cpan.org/Ticket/Display.html?id=116727
# - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=833479
package PublicInbox::SearchThread;
use strict;
use warnings;
use PublicInbox::MID qw($MID_EXTRACT);
sub thread {
my ($msgs, $ordersub, $ctx) = @_;
my (%id_table, @imposters);
keys(%id_table) = scalar @$msgs; # pre-size
# A. put all current non-imposter $msgs (non-ghosts) into %id_table
# (imposters are messages with reused Message-IDs)
# Sadly, we sort here anyways since the fill-in-the-blanks References:
# can be shakier if somebody used In-Reply-To with multiple, disparate
# messages. So, take the client Date: into account since we can't
# always determine ordering when somebody uses multiple In-Reply-To.
my @kids = sort { $a->{ds} <=> $b->{ds} } grep {
# this delete saves around 4K across 1K messages
# TODO: move this to a more appropriate place, breaks tests
# if we do it during psgi_cull
delete $_->{num};
bless $_, 'PublicInbox::SearchThread::Msg';
if (exists $id_table{$_->{mid}}) {
$_->{children} = [];
push @imposters, $_; # we'll deal with them later
undef;
} else {
$_->{children} = {}; # will become arrayref later
$id_table{$_->{mid}} = $_;
defined($_->{references});
}
} @$msgs;
for my $smsg (@kids) {
# This loop exists to help fill in gaps left from missing
# messages. It is not needed in a perfect world where
# everything is perfectly referenced, only the last ref
# matters.
my $prev;
for my $ref ($smsg->{references} =~ m/$MID_EXTRACT/go) {
# Find a Container object for the given Message-ID
my $cont = $id_table{$ref} //=
PublicInbox::SearchThread::Msg::ghost($ref);
# Link the References field's Containers together in
# the order implied by the References header
#
# * If they are already linked don't change the
# existing links
# * Do not add a link if adding that link would
# introduce a loop...
if ($prev &&
!$cont->{parent} && # already linked
!$cont->has_descendent($prev) # would loop
) {
$prev->add_child($cont);
}
$prev = $cont;
}
# C. Set the parent of this message to be the last element in
# References.
if (defined $prev && !$smsg->has_descendent($prev)) {
$prev->add_child($smsg);
}
}
my $ibx = $ctx->{ibx};
my @rootset = grep { # n.b.: delete prevents cyclic refs
!delete($_->{parent}) && $_->visible($ibx)
} values %id_table;
$ordersub->(\@rootset);
$_->order_children($ordersub, $ctx) for @rootset;
# parent imposter messages with reused Message-IDs
unshift(@{$id_table{$_->{mid}}->{children}}, $_) for @imposters;
\@rootset;
}
package PublicInbox::SearchThread::Msg;
use base qw(PublicInbox::Smsg);
use strict;
use warnings;
use Carp qw(croak);
# declare a ghost smsg (determined by absence of {blob})
sub ghost {
bless {
mid => $_[0],
children => {}, # becomes an array when sorted by ->order(...)
}, __PACKAGE__;
}
sub topmost {
my ($self) = @_;
my @q = ($self);
while (my $cont = shift @q) {
return $cont if $cont->{blob};
push @q, values %{$cont->{children}};
}
undef;
}
sub add_child {
my ($self, $child) = @_;
croak "Cowardly refusing to become my own parent: $self"
if $self == $child;
my $cid = $child->{mid};
# reparenting:
if (defined(my $parent = $child->{parent})) {
delete $parent->{children}->{$cid};
}
$self->{children}->{$cid} = $child;
$child->{parent} = $self;
}
sub has_descendent {
my ($self, $child) = @_;
my %seen; # loop prevention
while ($child) {
return 1 if $self == $child || $seen{$child}++;
$child = $child->{parent};
}
0;
}
# Do not show/keep ghosts iff they have no children. Sometimes
# a ghost Message-ID is the result of a long header line
# being folded/mangled by a MUA, and not a missing message.
sub visible ($$) {
my ($self, $ibx) = @_;
return 1 if $self->{blob};
if (my $by_mid = $ibx->smsg_by_mid($self->{mid})) {
%$self = (%$self, %$by_mid);
1;
} else {
(scalar values %{$self->{children}});
}
}
sub order_children {
my ($cur, $ordersub, $ctx) = @_;
my %seen = ($cur => 1); # self-referential loop prevention
my @q = ($cur);
my $ibx = $ctx->{ibx};
while (defined($cur = shift @q)) {
# the {children} hashref here...
my @c = grep { !$seen{$_}++ && visible($_, $ibx) }
values %{delete $cur->{children}};
$ordersub->(\@c) if scalar(@c) > 1;
$cur->{children} = \@c; # ...becomes an arrayref
push @q, @c;
}
}
1;
|