File: thread.rb

package info (click to toggle)
sup-mail 1.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,412 kB
  • sloc: ruby: 13,047; sh: 167; makefile: 12
file content (451 lines) | stat: -rw-r--r-- 13,270 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
# encoding: UTF-8
#
## Herein lies all the code responsible for threading messages. It's
## basically an online version of the JWZ threading algorithm:
## http://www.jwz.org/doc/threading.html
##
## I didn't implement it for efficiency, but thanks to our search
## engine backend, it's typically not applied to very many messages at
## once.
##
## At the top level, we have a ThreadSet, which represents a set of
## threads, e.g. a message folder or an inbox. Each ThreadSet contains
## zero or more Threads. A Thread represents all the message related
## to a particular subject. Each Thread has one or more Containers.  A
## Container is a recursive structure that holds the message tree as
## determined by the references: and in-reply-to: headers. Each
## Container holds zero or one messages. In the case of zero messages,
## it means we've seen a reference to the message but haven't (yet)
## seen the message itself.
##
## A Thread can have multiple top-level Containers if we decide to
## group them together independent of tree structure, typically if
## (e.g. due to someone using a primitive MUA) the messages have the
## same subject but we don't have evidence from in-reply-to: or
## references: headers. In this case Thread#each can optionally yield
## a faked root object tying them all together into one tree
## structure.

require 'set'

module Redwood

class Thread
  include Enumerable

  attr_reader :containers
  def initialize
    ## ah, the joys of a multithreaded application with a class called
    ## "Thread". i keep instantiating the wrong one...
    raise "wrong Thread class, buddy!" if block_given?
    @containers = []
  end

  def << c
    @containers << c
  end

  def empty?; @containers.empty?; end
  def empty!; @containers.clear; end
  def drop c; @containers.delete(c) or raise "bad drop"; end

  ## unused
  def dump f=$stdout
    f.puts "=== start thread with #{@containers.length} trees ==="
    @containers.each { |c| c.dump_recursive f; f.puts }
    f.puts "=== end thread ==="
  end

  ## yields each message, its depth, and its parent. the message yield
  ## parameter can be a Message object, or :fake_root, or nil (no
  ## message found but the presence of one deduced from other
  ## messages).
  def each fake_root=false
    adj = 0
    root = @containers.find_all { |c| c.message && !Message.subj_is_reply?(c.message.subj) }.argmin { |c| c.date }

    if root
      adj = 1
      root.first_useful_descendant.each_with_stuff do |c, d, par|
        yield c.message, d, (par ? par.message : nil)
      end
    elsif @containers.length > 1 && fake_root
      adj = 1
      yield :fake_root, 0, nil
    end

    @containers.each do |cont|
      next if cont == root
      fud = cont.first_useful_descendant
      fud.each_with_stuff do |c, d, par|
        ## special case here: if we're an empty root that's already
        ## been joined by a fake root, don't emit
        yield c.message, d + adj, (par ? par.message : nil) unless
          fake_root && c.message.nil? && root.nil? && c == fud
      end
    end
  end

  def first; each { |m, *| return m if m }; nil; end
  def has_message?; any? { |m, *| m.is_a? Message }; end
  def dirty?; any? { |m, *| m && m.dirty? }; end
  def date; map { |m, *| m.date if m }.compact.max; end
  def snippet
    with_snippets = select { |m, *| m && m.snippet && !m.snippet.empty? }
    first_unread, * = with_snippets.select { |m, *| m.has_label?(:unread) }.sort_by { |m, *| m.date }.first
    return first_unread.snippet if first_unread
    last_read, * = with_snippets.sort_by { |m, *| m.date }.last
    return last_read.snippet if last_read
    ""
  end
  def authors; map { |m, *| m.from if m }.compact.uniq; end

  def apply_label t; each { |m, *| m && m.add_label(t) }; end
  def remove_label t; each { |m, *| m && m.remove_label(t) }; end

  def toggle_label label
    if has_label? label
      remove_label label
      false
    else
      apply_label label
      true
    end
  end

  def set_labels l; each { |m, *| m && m.labels = l }; end
  def has_label? t; any? { |m, *| m && m.has_label?(t) }; end
  def each_dirty_message; each { |m, *| m && m.dirty? && yield(m) }; end

  def direct_participants
    map { |m, *| [m.from] + m.to if m }.flatten.compact.uniq
  end

  def participants
    map { |m, *| [m.from] + m.to + m.cc + m.bcc if m }.flatten.compact.uniq
  end

  def size; map { |m, *| m ? 1 : 0 }.sum; end
  def subj; argfind { |m, *| m && m.subj }; end
  def labels; inject(Set.new) { |s, (m, *)| m ? s | m.labels : s } end
  def labels= l
    raise ArgumentError, "not a set" unless l.is_a?(Set)
    each { |m, *| m && m.labels = l.dup }
  end

  def latest_message
    inject(nil) do |a, b|
      b = b.first
      if a.nil?
        b
      elsif b.nil?
        a
      else
        b.date > a.date ? b : a
      end
    end
  end

  def to_s
    "<thread containing: #{@containers.join ', '}>"
  end

  def sort_key
    m = latest_message
    m ? [-m.date.to_i, m.id] : [-Time.now.to_i, ""]
  end
end

## recursive structure used internally to represent message trees as
## described by reply-to: and references: headers.
##
## the 'id' field is the same as the message id. but the message might
## be empty, in the case that we represent a message that was referenced
## by another message (as an ancestor) but never received.
class Container
  attr_accessor :message, :parent, :children, :id, :thread

  def initialize id
    raise "non-String #{id.inspect}" unless id.is_a? String
    @id = id
    @message, @parent, @thread = nil, nil, nil
    @children = []
  end

  def each_with_stuff parent=nil
    yield self, 0, parent
    @children.sort_by(&:sort_key).each do |c|
      c.each_with_stuff(self) { |cc, d, par| yield cc, d + 1, par }
    end
  end

  def descendant_of? o
    if o == self
      true
    else
      @parent && @parent.descendant_of?(o)
    end
  end

  def == o; Container === o && id == o.id; end

  def empty?; @message.nil?; end
  def root?; @parent.nil?; end
  def root; root? ? self : @parent.root; end

  ## skip over any containers which are empty and have only one child. we use
  ## this make the threaded display a little nicer, and only stick in the
  ## "missing message" line when it's graphically necessary, i.e. when the
  ## missing message has more than one descendent.
  def first_useful_descendant
    if empty? && @children.size == 1
      @children.first.first_useful_descendant
    else
      self
    end
  end

  def find_attr attr
    if empty?
      @children.argfind { |c| c.find_attr attr }
    else
      @message.send attr
    end
  end
  def subj; find_attr :subj; end
  def date; find_attr :date; end

  def is_reply?; subj && Message.subj_is_reply?(subj); end

  def to_s
    [ "<#{id}",
      (@parent.nil? ? nil : "parent=#{@parent.id}"),
      (@children.empty? ? nil : "children=#{@children.map { |c| c.id }.inspect}"),
    ].compact.join(" ") + ">"
  end

  def dump_recursive f=$stdout, indent=0, root=true, parent=nil
    raise "inconsistency" unless parent.nil? || parent.children.include?(self)
    unless root
      f.print " " * indent
      f.print "+->"
    end
    line = "[#{thread.nil? ? ' ' : '*'}] " + #"[#{useful? ? 'U' : ' '}] " +
      if @message
        message.subj ##{@message.refs.inspect} / #{@message.replytos.inspect}"
      else
        "<no message>"
      end

    f.puts "#{id} #{line}"#[0 .. (105 - indent)]
    indent += 3
    @children.each { |c| c.dump_recursive f, indent, false, self }
  end

  def sort_key
    empty? ? [Time.now.to_i, ""] : [@message.date.to_i, @message.id]
  end
end

## A set of threads, so a forest. Is integrated with the index and
## builds thread structures by reading messages from it.
##
## If 'thread_by_subj' is true, puts messages with the same subject in
## one thread, even if they don't reference each other. This is
## helpful for crappy MUAs that don't set In-reply-to: or References:
## headers, but means that messages may be threaded unnecessarily.
##
## The following invariants are maintained: every Thread has at least one
## Container tree, and every Container tree has at least one Message.
class ThreadSet
  attr_reader :num_messages
  bool_reader :thread_by_subj

  def initialize index, thread_by_subj=true
    @index = index
    @num_messages = 0
    ## map from message ids to container objects
    @messages = SavingHash.new { |id| Container.new id }
    ## map from subject strings or (or root message ids) to thread objects
    @threads = SavingHash.new { Thread.new }
    @thread_by_subj = thread_by_subj
  end

  def thread_for_id mid; @messages.member?(mid) && @messages[mid].root.thread end
  def contains_id? id; @messages.member?(id) && !@messages[id].empty? end
  def thread_for m; thread_for_id m.id end
  def contains? m; contains_id? m.id end

  def threads; @threads.values end
  def size; @threads.size end

  def dump f=$stdout
    @threads.each do |s, t|
      f.puts "**********************"
      f.puts "** for subject #{s} **"
      f.puts "**********************"
      t.dump f
    end
  end

  ## link two containers
  def link p, c, overwrite=false
    if p == c || p.descendant_of?(c) || c.descendant_of?(p) # would create a loop
      #puts "*** linking parent #{p.id} and child #{c.id} would create a loop"
      return
    end

    #puts "in link for #{p.id} to #{c.id}, perform? #{c.parent.nil?} || #{overwrite}"

    return unless c.parent.nil? || overwrite
    remove_container c
    p.children << c
    c.parent = p

    ## if the child was previously a top-level container, it now ain't,
    ## so ditch our thread and kill it if necessary
    prune_thread_of c
  end
  private :link

  def remove_container c
    c.parent.children.delete c if c.parent # remove from tree
  end
  private :remove_container

  def prune_thread_of c
    return unless c.thread
    c.thread.drop c
    @threads.delete_if { |k, v| v == c.thread } if c.thread.empty?
    c.thread = nil
  end
  private :prune_thread_of

  def remove_id mid
    return unless @messages.member?(mid)
    c = @messages[mid]
    remove_container c
    prune_thread_of c
  end

  def remove_thread_containing_id mid
    return unless @messages.member?(mid)
    c = @messages[mid]
    t = c.root.thread
    @threads.delete_if { |key, thread| t == thread }
  end

  ## load in (at most) num number of threads from the index
  def load_n_threads num, opts={}
    @index.each_id_by_date opts do |mid, builder|
      break if size >= num unless num == -1
      next if contains_id? mid

      m = builder.call
      load_thread_for_message m, :skip_killed => opts[:skip_killed], :load_deleted => opts[:load_deleted], :load_spam => opts[:load_spam]
      yield size if block_given?
    end
  end

  ## loads in all messages needed to thread m
  ## may do nothing if m's thread is killed
  def load_thread_for_message m, opts={}
    good = @index.each_message_in_thread_for m, opts do |mid, builder|
      next if contains_id? mid
      add_message builder.call
    end
    add_message m if good
  end

  ## merges in a pre-loaded thread
  def add_thread t
    raise "duplicate" if @threads.values.member? t
    t.each { |m, *| add_message m }
  end

  ## merges two threads together. both must be members of this threadset.
  ## does its best, heuristically, to determine which is the parent.
  def join_threads threads
    return if threads.size < 2

    containers = threads.map do |t|
      c = @messages.member?(t.first.id) ? @messages[t.first.id] : nil
      raise "not in threadset: #{t.first.id}" unless c && c.message
      c
    end

    ## use subject headers heuristically
    parent = containers.find { |c| !c.is_reply? }

    ## no thread was rooted by a non-reply, so make a fake parent
    parent ||= @messages["joining-ref-" + containers.map { |c| c.id }.join("-")]

    containers.each do |c|
      next if c == parent
      c.message.add_ref parent.id
      link parent, c
    end

    true
  end

  def is_relevant? m
    m.refs.any? { |ref_id| @messages.member? ref_id }
  end

  def delete_message message
    el = @messages[message.id]
    return unless el.message
    el.message = nil
  end

  ## the heart of the threading code
  def add_message message
    el = @messages[message.id]
    return if el.message # we've seen it before

    #puts "adding: #{message.id}, refs #{message.refs.inspect}"

    el.message = message

    ## link via references:
    (message.refs + [el.id]).inject(nil) do |prev, ref_id|
      ref = @messages[ref_id]
      link prev, ref if prev
      ref
    end

    ## link via in-reply-to:
    message.replytos.each do |ref_id|
      ref = @messages[ref_id]
      link ref, el, true
      break # only do the first one
    end

    root = el.root
    key =
      if thread_by_subj?
        Message.normalize_subj root.subj
      else
        root.id
      end

    ## check to see if the subject is still the same (in the case
    ## that we first added a child message with a different
    ## subject)
    if root.thread
      if @threads.member?(key) && @threads[key] != root.thread
        @threads.delete key
      end
    else
      thread = @threads[key]
      thread << root
      root.thread = thread
    end

    ## last bit
    @num_messages += 1
  end
end

end