about summary refs log tree commit homepage
path: root/lib/olddoc/man2html.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/olddoc/man2html.rb')
-rw-r--r--lib/olddoc/man2html.rb149
1 files changed, 149 insertions, 0 deletions
diff --git a/lib/olddoc/man2html.rb b/lib/olddoc/man2html.rb
new file mode 100644
index 0000000..82254d2
--- /dev/null
+++ b/lib/olddoc/man2html.rb
@@ -0,0 +1,149 @@
+# Copyright (C) 2019 all contributors <olddoc-public@80x24.org>
+# License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
+# frozen_string_literal: true
+require 'digest'
+require 'optparse'
+
+# linkifier for manpages rendered to a terminal.  man2html(1) and
+# groff generate too much style
+
+class Olddoc::Man2HTML # :nodoc:
+  SALT = rand
+  LINK_RE = %r{([\('!])?\b((?:ftps?|https?|nntps?|gopher)://
+     [\@:\w\.-]+(?:/
+     (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*)
+     (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)?
+     (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)?
+     )?
+    )}xi
+
+  PAIRS = {
+  "(" => %r/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays)
+  "'" => %r/('[\.,;\+]?)\z/, # Perl / Ruby
+  "!" => %r/(![\.,;\+]?)\z/, # Perl / Ruby
+  }
+
+  def initialize(opts) # :nodoc:
+  end
+
+  def run(argv) # :nodoc:
+    out = $stdout
+    OptionParser.new("", 24, '  ') do |opts|
+      opts.on('-o', '--output PATH', 'output to given file') { |path|
+        out = File.open(path, 'w')
+      }
+      opts.parse!(argv)
+    end
+    argv[0] or abort 'manpage required'
+    cols = '72'
+    env = ENV.to_hash
+    env.merge!({ 'COLUMNS' => cols, 'MANWIDTH' => cols, 'TERM' => 'dumb' })
+
+    # note: I don't care for the styles groff and man2html throw
+    # on us, I just want indented and wrapped text with <a hrefs>
+    # for URLs.
+
+    # try man-db options, first:
+    str = IO.popen(env, ['man', '--nh', '--nj', *argv], &:read)
+
+    if str.empty? || !$?.success?
+      str = IO.popen(env, ['man', *argv], &:read)
+    end
+    if $?.success?
+      sections = '[A-Z][A-Z ]+'
+      str = str.split(/^(#{sections})$/mo)
+
+      str = str.map! do |s|
+        case s
+        when /\A(#{sections})$/o
+          # this is to be compatible with HTML fragments pandoc used
+          sec = $1
+          anchor = sec.downcase.tr(' ', '-')
+          "<h1\nid=#{anchor.encode(xml: :attr)}>#{sec}</h1>"
+        else
+          state = linkify_1(s)
+          s.encode!(xml: :text)
+          linkify_2(state, s)
+          s.rstrip!
+          s.empty? ? '' : "<pre>#{s}</pre>"
+        end
+      end.join
+
+      out.print(str)
+
+      # use mtime of the original source
+      if out.respond_to?(:path)
+        path = out.path
+        out.close
+        stat = src_input_stat(argv)
+        File.utime(stat.atime, stat.mtime, path) if stat
+      end
+    end
+  end
+
+  def src_input_stat(argv)
+    argv.reverse_each do |f|
+      next unless File.file?(f)
+      return File.stat(f)
+    end
+
+    argv.reverse_each do |f|
+      path = IO.popen(%W(man -w #{f}), &:read)
+      path.chomp!
+      next unless File.file?(path)
+      return File.stat(path)
+    end
+    nil
+  end
+
+  def linkify_1(str) # :nodoc:
+    state = {}
+    str.gsub!(LINK_RE) do
+      head = $1 || ''
+      url = $2.dup
+      tail = ''.dup
+
+      # it's fairly common to end URLs in messages with
+      # '.', ',' or ';' to denote the end of a statement;
+      # assume the intent was to end the statement/sentence
+      # in English
+      if re = PAIRS[head]
+        url.sub!(re, '')
+        tail = $1
+      elsif url.sub!(/(\))?([\.,;])\z/, '')
+        tail = $2
+        # require ')' to be paired with '('
+        if $1  # ')'
+          if url.index('(').nil?
+            tail = ")#{tail}"
+          else
+            url += ')'
+          end
+        end
+      elsif url !~ /\(/ && url.sub!(/\)\z/, '')
+        tail = ')'
+      end
+
+      # salt this, as this could be exploited to show
+      # links in the HTML which don't show up in the raw mail.
+      key = Digest::MD5.hexdigest("#{url}#{SALT}").freeze
+      state[key] = url
+      "#{head}OLD-LINK-#{key}#{tail}"
+    end
+    state
+  end
+
+  def linkify_2(state, str) # :nodoc:
+    # Added "OLD-LINK-" prefix to avoid false-positives on git commits
+    str.gsub!(/\bOLD-LINK-([a-f0-9]{32})\b/) do
+      key = $1
+      url = state[key]
+      if url
+        %Q{<a\nhref=#{url.encode(xml: :attr)}>#{url.encode(xml: :text)}</a>}
+      else
+        # false positive or somebody tried to mess with us
+        key
+      end
+    end
+  end
+end