msgthr.git  about / heads / tags
non-recursive, container-agnostic message threading
blob 8619c62858de4e3c7ef55dbe580dcf28708728c9 6529 bytes (raw)
$ git show HEAD:lib/msgthr.rb	# shows this blob on the CLI

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
 
# Copyright (C) 2016 all contributors <msgthr-public@80x24.org>
# License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>

# Non-recursive, container-agnostic message threading.
#
# Usage is typically:
#
# * use Msgthr.new to create a new object
# * use Msgthr#add! for every message you have
# * use Msgthr#thread! to perform threading and (optionally) sort
# * use Msgthr#walk_thread to iterate through the threaded tree
#
# See https://80x24.org/msgthr/README for more info
# You may email us publically at mailto:msgthr-public@80x24.org
# Archives are at https://80x24.org/msgthr-public/
class Msgthr

  # an Array of root (parent-less) messages, only populated after
  # calling Msgthr#thread!
  attr_reader :rootset

  # raised when methods are called in an unsupported order
  StateError = Class.new(RuntimeError)

  # Initialize a Msgthr object
  def initialize
    @id_table = {}
    @rootset = []
    @state = :init # :init => :threaded => :ordered
  end

  # Clear internal data structures to save memory and prepare for reuse
  def clear
    @rootset.clear
    @id_table.clear
    @state = :init
  end

  # Performs threading on the messages and returns the rootset
  # (set of message containers without parents).
  #
  # Call this only after all #add operations are complete.
  #
  # If given an optional block, it will perform an in-place sort
  # using the block parameter.
  #
  # To thread and sort by unique +mid+ identifiers for each container:
  #
  #   msgthr.thread! { |ary| ary.sort_by!(&:mid) }
  #
  # If your opaque message pointer contains a +time+ accessor which gives
  # a Time object:
  #
  #   msgthr.thread! do |ary|
  #     ary.sort_by! do |cont| # Msgthr::Container
  #       cur = cont.topmost
  #       cur ? cur.msg.time : Time.at(0)
  #     end
  #   end
  #
  # Note, using Msgthr::Container#topmost is NOT necessary when accessing
  # Msgthr::Container#mid, as any known missing messages (ghosts)
  # will still have a +mid+.  However, Msgthr::Container#topmost is
  # necessary if accessing Msgthr::Container#msg.
  def thread!
    raise StateError, "already #@state" if @state != :init
    ret = @rootset
    @id_table.each_value { |cont| ret << cont if cont.parent.nil? }.clear
    @state = :threaded
    order! { |ary| yield(ary) } if block_given?
    ret
  end

  # Calling this method is unnecessary since msgthr 1.1.0.
  # In previous releases, the #thread! did not support a block
  # parameter for ordering.  This method remains for compatibility.
  def order!
    case @state
    when :init then raise StateError, "#thread! not called"
    when :ordered then raise StateError, "already #@state"
    # else @state == :threaded
    end

    yield @rootset
    @rootset.each do |cont|
      # this calls Msgthr::Container#order!, which is non-recursive
      cont.order! { |children| yield(children) }
    end
    @state = :ordered
    @rootset
  end

  # non-recursively walk a set of messages after #thread!
  # (and optionally, #order!).
  #
  # If you do not care about ordering, you may call this
  # immediately after all #add operations are complete starting
  # with msgthr 1.1.0
  #
  # This takes a block and yields 3 elements to it: +|level, container, index|+
  # for each message container.
  #
  # * +level+ is the current depth within the walk (non-negative Integer)
  # * +container+ is the Msgthr::Container object
  # * +index+ is the offset of the container within its level (starting at 0)
  #
  # To display the subject of each message with indentation,
  # assuming your +msg+ pointer has a +subject+ field:
  #
  #   msgthr.walk_thread do |level, container, index|
  #     msg = container.msg
  #     subject = msg ? msg.subject : "[missing: <#{container.mid}>]"
  #     indent = '  ' * level
  #     printf("#{indent} % 3d. %s\n", index, subject)
  #   end
  def walk_thread
    thread! if @state == :init
    order! { |_| } if @state == :threaded
    i = -1
    q = @rootset.map { |cont| [ 0, cont, i += 1 ] }
    while tmp = q.shift
      level, cont, idx = tmp
      yield(level, cont, idx)
      level += 1
      i = -1
      q = cont.children.map { |cld| [ level, cld, i += 1 ] }.concat(q)
    end
  end

  # Adds a message to prepare a Msgthr object for threading.
  #
  # * +mid+ is a unique identifier for the message in a given thread.
  #   It is typically a String or Integer, but may be anything usable
  #   as a Hash key in Ruby.
  #
  # * +refs+ should be an Array of unique identifiers belonging
  #   to ancestors of the current message.
  #   For mail and news messages, this is usually the parsed result
  #   of the "References:" header.  Order should be oldest to newest
  #   in terms of ancestry, with the last element being the
  #   immediate parent of the given message.
  #
  #   This is +nil+ for messages with no parent (root messages).
  #
  # * +msg+ is an opaque object which typically contains a
  #   Mail or Tmail object for handling mail.
  #
  # If +mid+ is a String, it is recommended to freeze the string before
  # calling this method to avoid wasting memory on hash keys.  Likewise
  # is true for any String objects in +refs+.
  #
  # Adding a message could link 2 messages in Msgthr,
  # by making one message a child of the other.
  # It's possible to have a callback called when a child is added
  # by passing a block to this method.
  # This block can access the child and parent messages. E.g.
  #
  #   msgthr.add(0, nil, '0')
  #   msgthr.add(1, [0], '1') do |parent, child|
  #     puts "#{parent.mid} -> #{child.mid}"
  #   end
  def add(mid, refs, msg) # :yields: parent, child
    @state == :init or raise StateError, "cannot add when already #@state"

    cur = @id_table[mid] ||= Msgthr::Container.new(mid)
    cur.msg = msg
    refs or return

    # n.b. centralized messaging systems (e.g. forums) do not need
    # multiple References:, only decentralized systems need it to
    # tolerate missing messages
    prev = nil
    refs.each do |ref|
      cont = @id_table[ref] ||= Msgthr::Container.new(ref)

      # link refs together in order implied,
      # but do not change existing links or loop
      if prev && !cont.parent && !cont.has_descendent(prev)
        prev.add_child(cont)
        yield(prev, cont) if block_given?
      end
      prev = cont
    end

    # set parent of this message to be the last element in refs
    if prev && !cur.has_descendent(prev)
      prev.add_child(cur)
      yield(prev, cur) if block_given?
    end
  end
end

require_relative 'msgthr/container'

git clone https://80x24.org/msgthr.git