about summary refs log tree commit
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2017-06-08 23:37:05 +0000
committerEric Wong <e@80x24.org>2017-06-08 23:38:24 +0000
commit1b633959b1153b40ff8fcddd8430e0c6b0e9574b (patch)
treeed372d868c1df615504ecacf53bf908e190fe899
parentb52e67f318794d2c4dde267ad4a540a62ae55b2e (diff)
Oh, so that's why I hadn't released this project earlier...
-rw-r--r--.document1
-rw-r--r--.gitignore1
-rw-r--r--GNUmakefile31
-rw-r--r--lib/msgthr.rb82
-rw-r--r--lib/msgthr/container.rb32
-rw-r--r--test/test_msgthr.rb20
6 files changed, 149 insertions, 18 deletions
diff --git a/.document b/.document
new file mode 100644
index 0000000..a65b417
--- /dev/null
+++ b/.document
@@ -0,0 +1 @@
+lib
diff --git a/.gitignore b/.gitignore
index 67c0ab0..ebea58c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@ pkg/
 /NEWS
 /NEWS.atom.xml
 /doc
+/www
diff --git a/GNUmakefile b/GNUmakefile
index 6a43f04..220d4e1 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -5,6 +5,11 @@ pkg = msgthr
 RUBY = ruby
 lib := lib
 VERSION := 1.0.0
+RSYNC_DEST := 80x24.org:/srv/80x24/msgthr/
+
+RSYNC = rsync
+OLDDOC = olddoc
+RDOC = rdoc
 
 all:: test
 test_units := $(wildcard test/test_*.rb)
@@ -41,5 +46,31 @@ pkg_extra :=
 
 package: $(pkggem)
 
+NEWS: .olddoc.yml
+        $(OLDDOC) prepare
+LATEST: NEWS
+
+doc:: .document .olddoc.yml
+        -find lib -type f -name '*.rbc' -exec rm -f '{}' ';'
+        $(RM) -r doc
+        $(RDOC) -f oldweb
+
+# this requires GNU coreutils variants
+ifneq ($(RSYNC_DEST),)
+publish_doc:
+        -git set-file-times
+        $(MAKE) doc
+        mkdir -p www
+        $(RM) -r www/rdoc
+        mv doc www/rdoc
+        install -m644 README www/README
+        install -m644 NEWS www/NEWS
+        install -m644 NEWS.atom.xml www/NEWS.atom.xml
+        for i in $$(find www -type f ! -regex '^.*\.gz$$'); do \
+          gzip --rsyncable -9 < $$i > $$i.gz; touch -r $$i $$i.gz; done
+        $(RSYNC) -av www/ $(RSYNC_DEST)
+        git ls-files | xargs touch
+endif
+
 .PHONY: all test $(test_units)
 .PHONY: check-warnings fix-perms
diff --git a/lib/msgthr.rb b/lib/msgthr.rb
index f8b5896..ea63731 100644
--- a/lib/msgthr.rb
+++ b/lib/msgthr.rb
@@ -2,6 +2,18 @@
 # License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
 
 # Non-recursive, container-agnostic message threading.
+#
+# Usage is typically:
+#
+# * use Msgthr.new to create a new object
+# * use Msgthr#add! for every message you have
+# * use Msgthr#thread! to perform threading operations
+# * optionally, use Msgthr#order! to sort messages
+# * use Msgthr#walk_thread to iterate through the threaded tree
+#
+# See https://80x24.org/msgthr/README for more info
+# You may email us publically at mailto:msgthr-public@80x24.org
+# Archives are at https://80x24.org/msgthr-public/
 class Msgthr
 
   # an Array of root (parent-less) messages, only populated after
@@ -20,20 +32,68 @@ class Msgthr
     @id_table.clear
   end
 
-  # Threads the message
-  # This does not sort
+  # Performs threading on the messages and returns the rootset
+  # (set of message containers without parents).
+  #
+  # Call this after all #add operations are complete.
+  # This does not sort, use #order! if sorting is necessary.
   def thread!
     ret = @rootset
     @id_table.each_value { |cont| ret << cont if cont.parent.nil? }.clear
     ret
   end
 
+  # Performs an in-place sort on messages after thread!
+  # This is optional and intended to be called this only after #thread!
+  #
+  # This takes a block which yields an array of Msgthr::Container
+  # objects for sorting.
+  #
+  # To sort by unique +mid+ identifiers for each container:
+  #
+  #   msgthr.order! { |ary| ary.sort_by!(&:mid) }
+  #
+  # If your opaque message pointer contains a +time+ accessor which gives
+  # a Time object:
+  #
+  #   msgthr.order! do |ary|
+  #     ary.sort_by! do |cont| # Msgthr::Container
+  #       cur = cont.topmost
+  #       cur ? cur.msg.time : Time.at(0)
+  #     end
+  #   end
+  #
+  # Note, using Msgthr::Container#topmost is NOT necessary when accessing
+  # Msgthr::Container#mid, as any known missing messages (ghosts)
+  # will still have a +mid+.  However, Msgthr::Container#topmost is
+  # necessary if accessing Msgthr::Container#msg.
   def order!
     yield @rootset
-    @rootset.each { |cont| cont.order! { |children| yield(children) } }
+    @rootset.each do |cont|
+      # this calls Msgthr::Container#order!, which is non-recursive
+      cont.order! { |children| yield(children) }
+    end
   end
 
-  # non-recursively walk a thread
+  # non-recursively walk a set of messages after #thread!
+  # (and optionally, #order!)
+  #
+  # This takes a block and yields 3 elements to it: +|level, container, index|+
+  # for each message container.
+  #
+  # * +level+ is the current depth within the walk (non-negative Integer)
+  # * +container+ is the Msgthr::Container object
+  # * +index+ is the offset of the container within its level (starting at 0)
+  #
+  # To display the subject of each message with indentation,
+  # assuming your +msg+ pointer has a +subject+ field:
+  #
+  #   msgthr.walk_thread do |level, container, index|
+  #     msg = container.msg
+  #     subject = msg ? msg.subject : "[missing: <#{container.mid}>]"
+  #     indent = '  ' * level
+  #     printf("#{indent} % 3d. %s\n", index, subject)
+  #   end
   def walk_thread
     i = -1
     q = @rootset.map { |cont| [ 0, cont, i += 1 ] }
@@ -49,20 +109,24 @@ class Msgthr
   # Adds a message to prepare a Msgthr object for threading.
   #
   # * +mid+ is a unique identifier for the message in a given thread.
+  #   It is typically a String or Integer, but may be anything usable
+  #   as a Hash key in Ruby.
   #
-  # * +refs+ should be an Array of unique identifiers.  For mail and
-  #   news messages, this is usually the parsed result of the
-  #   "References:" header.  Order should be oldest to newest
+  # * +refs+ should be an Array of unique identifiers belonging
+  #   to ancestors of the current message.
+  #   For mail and news messages, this is usually the parsed result
+  #   of the "References:" header.  Order should be oldest to newest
   #   in terms of ancestry, with the last element being the
   #   immediate parent of the given message.
   #
-  #   This is +nil+ for messages with no parent.
+  #   This is +nil+ for messages with no parent (root messages).
   #
   # * +msg+ is an opaque object which typically contains a
   #   Mail or Tmail object for handling mail.
   #
   # If +mid+ is a String, it is recommended to freeze the string before
-  # calling this method to avoid wasting memory on hash keys.
+  # calling this method to avoid wasting memory on hash keys.  Likewise
+  # is true for any String objects in +refs+.
   def add(mid, refs, msg)
     cur = @id_table[mid] ||= Msgthr::Container.new(mid)
     cur.msg = msg
diff --git a/lib/msgthr/container.rb b/lib/msgthr/container.rb
index 30f6775..e51507c 100644
--- a/lib/msgthr/container.rb
+++ b/lib/msgthr/container.rb
@@ -1,8 +1,13 @@
 # Copyright (C) 2016 all contributors <msgthr-public@80x24.org>
 # License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
 
-# An internal container class, this is exposed for sorting APIs
-# but should not be initialized in your own code.
+# An internal container class, this is exposed for Msgthr#order!
+# and Msgthr#walk_thread APIs.  They should should not be initialized
+# in your own code.
+#
+# One container object will exist for every message you call Msgthr#add! on,
+# so there can potentially be many of these objects for large sets of
+# messages.
 class Msgthr::Container
 
   # Unique message identifier, typically the Message-Id header for mail
@@ -11,21 +16,33 @@ class Msgthr::Container
   attr_reader :mid
 
   # Opaque data pointer, may be used by the user for any purpose.
-  # This may be +nil+ to denote missing (aka "ghost") messages.
+  # This is +nil+ to denote missing (aka "ghost") messages.
   attr_accessor :msg
 
-  attr_accessor :children # :nodoc:
+  # You probably do not need to use this.
+  # It is only safe to access this after Msgthr#order!
+  # This contains an Array of Msgthr::Container objects which have the
+  # +parent+ field pointing to us
+  attr_accessor :children
+
+  # You probably do not need to use this; and you should only use
+  # this after Msgthr#order!  This points to the +parent+ of the
+  # message if one exists, and +nil+ if a message has no parent.
+  # This will only be accurate once all messages are added to
+  # a Msgthr set via Msgthr#add
   attr_accessor :parent # :nodoc:
 
   def initialize(mid) # :nodoc:
     @mid = mid
     @children = {} # becomes an Array after order!
     @parent = nil
-    @msg = nil # opaque
+    @msg = nil # opaque pointer supplied by user
   end
 
-  # returns the topmost message container with an opaque message pointer
-  # in it.  This may be +nil+ if none are available.
+  # Returns the topmost message container with an opaque message pointer
+  # in it.  This may be +nil+ if no message is available.
+  # This is preferable to using the container yielded by Msgthr#order!
+  # directly when handling incomplete message sets.
   def topmost
     q = [ self ]
     while cont = q.shift
@@ -55,6 +72,7 @@ class Msgthr::Container
     false
   end
 
+  # only called by Msgthr#order!
   def order! # :nodoc:
     seen = { @mid => true }
     q = [ self ]
diff --git a/test/test_msgthr.rb b/test/test_msgthr.rb
index 19441c8..19cec75 100644
--- a/test/test_msgthr.rb
+++ b/test/test_msgthr.rb
@@ -10,11 +10,27 @@ class TestMsgthr < Test::Unit::TestCase
     thr.add('b', %w(c), 'B')
     thr.add('c', nil, 'c')
     thr.add('D', nil, 'D')
-    thr.add('d', nil, 'd')
+    thr.add('d', %w(missing), 'd')
     thr.thread!
     rootset = thr.order! { |c| c.sort_by!(&:mid) }
-    assert_equal %w(D c d), rootset.map(&:mid)
+    assert_equal %w(D c missing), rootset.map(&:mid)
     assert_equal 'D', rootset[0].msg
     assert_equal %w(b), rootset[1].children.map(&:mid)
+    out = ''.b
+    thr.walk_thread do |level, container, index|
+      msg = container.msg
+      summary = msg ? msg : "[missing: <#{container.mid}>]"
+      indent = '  ' * level
+      out << sprintf("#{indent} % 3d. %s\n", index, summary)
+    end
+    exp = <<EOF.b
+   0. D
+   1. c
+     0. B
+       0. abc
+   2. [missing: <missing>]
+     0. d
+EOF
+    assert_equal exp, out
   end
 end