1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
| | # Copyright (C) 2019 all contributors <olddoc-public@80x24.org>
# License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
# frozen_string_literal: true
require 'digest'
require 'optparse'
# linkifier for manpages rendered to a terminal. man2html(1) and
# groff generate too much style
class Olddoc::Man2HTML # :nodoc:
SALT = rand
LINK_RE = %r{([\('!])?\b((?:ftps?|https?|nntps?|gopher|imaps?)://
[\@:\w\.-]+(?:/
(?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*)
(?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)?
(?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)?
)?
)}xi
PAIRS = {
"(" => %r/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays)
"'" => %r/('[\.,;\+]?)\z/, # Perl / Ruby
"!" => %r/(![\.,;\+]?)\z/, # Perl / Ruby
}
def initialize(opts) # :nodoc:
end
def run(argv) # :nodoc:
out = $stdout
OptionParser.new("", 24, ' ') do |opts|
opts.on('-o', '--output PATH', 'output to given file') { |path|
out = File.open(path, 'w')
}
opts.parse!(argv)
end
argv[0] or abort 'manpage required'
cols = '72'
env = ENV.to_hash
env.merge!({ 'COLUMNS' => cols, 'MANWIDTH' => cols, 'TERM' => 'dumb' })
# note: I don't care for the styles groff and man2html throw
# on us, I just want indented and wrapped text with <a hrefs>
# for URLs.
# try man-db options, first:
str = IO.popen(env, ['man', '--nh', '--nj', *argv], &:read)
if str.empty? || !$?.success?
str = IO.popen(env, ['man', *argv], &:read)
end
if $?.success?
sections = '[A-Z][A-Z ]+'
str = str.split(/^(#{sections})$/mo)
str = str.map! do |s|
case s
when /\A(#{sections})$/o
# this is to be compatible with HTML fragments pandoc used
sec = $1
anchor = sec.downcase.tr(' ', '-')
"<h1\nid=#{anchor.encode(xml: :attr)}>#{sec}</h1>"
else
state = linkify_1(s)
s.encode!(xml: :text)
linkify_2(state, s)
s.rstrip!
s.empty? ? '' : "<pre>#{s}</pre>"
end
end.join
out.print(str)
# use mtime of the original source
if out.respond_to?(:path)
path = out.path
out.close
stat = src_input_stat(argv)
File.utime(stat.atime, stat.mtime, path) if stat
end
end
end
def src_input_stat(argv)
argv.reverse_each do |f|
next unless File.file?(f)
return File.stat(f)
end
argv.reverse_each do |f|
path = IO.popen(%W(man -w #{f}), &:read)
path.chomp!
next unless File.file?(path)
return File.stat(path)
end
nil
end
def linkify_1(str) # :nodoc:
state = {}
str.gsub!(LINK_RE) do
head = $1 || ''
url = $2.dup
tail = ''.dup
# it's fairly common to end URLs in messages with
# '.', ',' or ';' to denote the end of a statement;
# assume the intent was to end the statement/sentence
# in English
if re = PAIRS[head]
url.sub!(re, '')
tail = $1
elsif url.sub!(/(\))?([\.,;])\z/, '')
tail = $2
# require ')' to be paired with '('
if $1 # ')'
if url.index('(').nil?
tail = ")#{tail}"
else
url += ')'
end
end
elsif url !~ /\(/ && url.sub!(/\)\z/, '')
tail = ')'
end
# salt this, as this could be exploited to show
# links in the HTML which don't show up in the raw mail.
key = Digest::MD5.hexdigest("#{url}#{SALT}").freeze
state[key] = url
"#{head}OLD-LINK-#{key}#{tail}"
end
state
end
def linkify_2(state, str) # :nodoc:
# Added "OLD-LINK-" prefix to avoid false-positives on git commits
str.gsub!(/\bOLD-LINK-([a-f0-9]{32})\b/) do
key = $1
url = state[key]
if url
%Q{<a\nhref=#{url.encode(xml: :attr)}>#{url.encode(xml: :text)}</a>}
else
# false positive or somebody tried to mess with us
key
end
end
end
end
|