diff options
Diffstat (limited to 'lib/olddoc/man2html.rb')
-rw-r--r-- | lib/olddoc/man2html.rb | 149 |
1 files changed, 149 insertions, 0 deletions
diff --git a/lib/olddoc/man2html.rb b/lib/olddoc/man2html.rb new file mode 100644 index 0000000..82254d2 --- /dev/null +++ b/lib/olddoc/man2html.rb @@ -0,0 +1,149 @@ +# Copyright (C) 2019 all contributors <olddoc-public@80x24.org> +# License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt> +# frozen_string_literal: true +require 'digest' +require 'optparse' + +# linkifier for manpages rendered to a terminal. man2html(1) and +# groff generate too much style + +class Olddoc::Man2HTML # :nodoc: + SALT = rand + LINK_RE = %r{([\('!])?\b((?:ftps?|https?|nntps?|gopher):// + [\@:\w\.-]+(?:/ + (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*) + (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)? + (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)? + )? + )}xi + + PAIRS = { + "(" => %r/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays) + "'" => %r/('[\.,;\+]?)\z/, # Perl / Ruby + "!" => %r/(![\.,;\+]?)\z/, # Perl / Ruby + } + + def initialize(opts) # :nodoc: + end + + def run(argv) # :nodoc: + out = $stdout + OptionParser.new("", 24, ' ') do |opts| + opts.on('-o', '--output PATH', 'output to given file') { |path| + out = File.open(path, 'w') + } + opts.parse!(argv) + end + argv[0] or abort 'manpage required' + cols = '72' + env = ENV.to_hash + env.merge!({ 'COLUMNS' => cols, 'MANWIDTH' => cols, 'TERM' => 'dumb' }) + + # note: I don't care for the styles groff and man2html throw + # on us, I just want indented and wrapped text with <a hrefs> + # for URLs. + + # try man-db options, first: + str = IO.popen(env, ['man', '--nh', '--nj', *argv], &:read) + + if str.empty? || !$?.success? + str = IO.popen(env, ['man', *argv], &:read) + end + if $?.success? + sections = '[A-Z][A-Z ]+' + str = str.split(/^(#{sections})$/mo) + + str = str.map! do |s| + case s + when /\A(#{sections})$/o + # this is to be compatible with HTML fragments pandoc used + sec = $1 + anchor = sec.downcase.tr(' ', '-') + "<h1\nid=#{anchor.encode(xml: :attr)}>#{sec}</h1>" + else + state = linkify_1(s) + s.encode!(xml: :text) + linkify_2(state, s) + s.rstrip! + s.empty? ? '' : "<pre>#{s}</pre>" + end + end.join + + out.print(str) + + # use mtime of the original source + if out.respond_to?(:path) + path = out.path + out.close + stat = src_input_stat(argv) + File.utime(stat.atime, stat.mtime, path) if stat + end + end + end + + def src_input_stat(argv) + argv.reverse_each do |f| + next unless File.file?(f) + return File.stat(f) + end + + argv.reverse_each do |f| + path = IO.popen(%W(man -w #{f}), &:read) + path.chomp! + next unless File.file?(path) + return File.stat(path) + end + nil + end + + def linkify_1(str) # :nodoc: + state = {} + str.gsub!(LINK_RE) do + head = $1 || '' + url = $2.dup + tail = ''.dup + + # it's fairly common to end URLs in messages with + # '.', ',' or ';' to denote the end of a statement; + # assume the intent was to end the statement/sentence + # in English + if re = PAIRS[head] + url.sub!(re, '') + tail = $1 + elsif url.sub!(/(\))?([\.,;])\z/, '') + tail = $2 + # require ')' to be paired with '(' + if $1 # ')' + if url.index('(').nil? + tail = ")#{tail}" + else + url += ')' + end + end + elsif url !~ /\(/ && url.sub!(/\)\z/, '') + tail = ')' + end + + # salt this, as this could be exploited to show + # links in the HTML which don't show up in the raw mail. + key = Digest::MD5.hexdigest("#{url}#{SALT}").freeze + state[key] = url + "#{head}OLD-LINK-#{key}#{tail}" + end + state + end + + def linkify_2(state, str) # :nodoc: + # Added "OLD-LINK-" prefix to avoid false-positives on git commits + str.gsub!(/\bOLD-LINK-([a-f0-9]{32})\b/) do + key = $1 + url = state[key] + if url + %Q{<a\nhref=#{url.encode(xml: :attr)}>#{url.encode(xml: :text)}</a>} + else + # false positive or somebody tried to mess with us + key + end + end + end +end |