olddoc.git  about / heads / tags
olddoc - old-fashioned RDoc generator(s)
blob 6a5ffab19ed3c524077a5d43e7d49cfbb708b5b7 4045 bytes (raw)
$ git show HEAD:lib/olddoc/man2html.rb	# shows this blob on the CLI

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
 
# Copyright (C) 2019 all contributors <olddoc-public@80x24.org>
# License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
# frozen_string_literal: true
require 'digest'
require 'optparse'

# linkifier for manpages rendered to a terminal.  man2html(1) and
# groff generate too much style

class Olddoc::Man2HTML # :nodoc:
  SALT = rand
  LINK_RE = %r{([\('!])?\b((?:ftps?|https?|nntps?|gopher|imaps?)://
     [\@:\w\.-]+(?:/
     (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*)
     (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)?
     (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)?
     )?
    )}xi

  PAIRS = {
  "(" => %r/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays)
  "'" => %r/('[\.,;\+]?)\z/, # Perl / Ruby
  "!" => %r/(![\.,;\+]?)\z/, # Perl / Ruby
  }

  def initialize(opts) # :nodoc:
  end

  def run(argv) # :nodoc:
    out = $stdout
    OptionParser.new("", 24, '  ') do |opts|
      opts.on('-o', '--output PATH', 'output to given file') { |path|
        out = File.open(path, 'w')
      }
      opts.parse!(argv)
    end
    argv[0] or abort 'manpage required'
    cols = '72'
    env = ENV.to_hash
    env.merge!({ 'COLUMNS' => cols, 'MANWIDTH' => cols, 'TERM' => 'dumb' })

    # note: I don't care for the styles groff and man2html throw
    # on us, I just want indented and wrapped text with <a hrefs>
    # for URLs.

    # try man-db options, first:
    str = IO.popen(env, ['man', '--nh', '--nj', *argv], &:read)

    if str.empty? || !$?.success?
      str = IO.popen(env, ['man', *argv], &:read)
    end
    if $?.success?
      sections = '[A-Z][A-Z ]+'
      str = str.split(/^(#{sections})$/mo)

      str = str.map! do |s|
        case s
        when /\A(#{sections})$/o
          # this is to be compatible with HTML fragments pandoc used
          sec = $1
          anchor = sec.downcase.tr(' ', '-')
          "<h1\nid=#{anchor.encode(xml: :attr)}>#{sec}</h1>"
        else
          state = linkify_1(s)
          s.encode!(xml: :text)
          linkify_2(state, s)
          s.rstrip!
          s.empty? ? '' : "<pre>#{s}</pre>"
        end
      end.join

      out.print(str)

      # use mtime of the original source
      if out.respond_to?(:path)
        path = out.path
        out.close
        stat = src_input_stat(argv)
        File.utime(stat.atime, stat.mtime, path) if stat
      end
    end
  end

  def src_input_stat(argv)
    argv.reverse_each do |f|
      next unless File.file?(f)
      return File.stat(f)
    end

    argv.reverse_each do |f|
      path = IO.popen(%W(man -w #{f}), &:read)
      path.chomp!
      next unless File.file?(path)
      return File.stat(path)
    end
    nil
  end

  def linkify_1(str) # :nodoc:
    state = {}
    str.gsub!(LINK_RE) do
      head = $1 || ''
      url = $2.dup
      tail = ''.dup

      # it's fairly common to end URLs in messages with
      # '.', ',' or ';' to denote the end of a statement;
      # assume the intent was to end the statement/sentence
      # in English
      if re = PAIRS[head]
        url.sub!(re, '')
        tail = $1
      elsif url.sub!(/(\))?([\.,;])\z/, '')
        tail = $2
        # require ')' to be paired with '('
        if $1  # ')'
          if url.index('(').nil?
            tail = ")#{tail}"
          else
            url += ')'
          end
        end
      elsif url !~ /\(/ && url.sub!(/\)\z/, '')
        tail = ')'
      end

      # salt this, as this could be exploited to show
      # links in the HTML which don't show up in the raw mail.
      key = Digest::MD5.hexdigest("#{url}#{SALT}").freeze
      state[key] = url
      "#{head}OLD-LINK-#{key}#{tail}"
    end
    state
  end

  def linkify_2(state, str) # :nodoc:
    # Added "OLD-LINK-" prefix to avoid false-positives on git commits
    str.gsub!(/\bOLD-LINK-([a-f0-9]{32})\b/) do
      key = $1
      url = state[key]
      if url
        %Q{<a\nhref=#{url.encode(xml: :attr)}>#{url.encode(xml: :text)}</a>}
      else
        # false positive or somebody tried to mess with us
        key
      end
    end
  end
end

git clone https://80x24.org/olddoc.git