diff options
author | Eric Wong <e@80x24.org> | 2018-01-29 00:43:45 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2018-01-29 01:03:10 +0000 |
commit | 702e3ad47738363ab847f43155e047c7c6612f80 (patch) | |
tree | ebb374f3a591e06461f55da92dc89e0375497460 /lib/dtas/encoding.rb | |
parent | 982105d44feeed91178a1b7331172fae6f797d92 (diff) | |
download | dtas-702e3ad47738363ab847f43155e047c7c6612f80.tar.gz |
This can be helpful for end users and is close to what other players use. We can fallback to Encoding.default_external by default (typically UTF-8) and then again using `charlock_holmes' if installed. Note: path names remain binary, because that's how proper filesystems operate.
Diffstat (limited to 'lib/dtas/encoding.rb')
-rw-r--r-- | lib/dtas/encoding.rb | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/lib/dtas/encoding.rb b/lib/dtas/encoding.rb new file mode 100644 index 0000000..71c877f --- /dev/null +++ b/lib/dtas/encoding.rb @@ -0,0 +1,58 @@ +# Copyright (C) 2018 all contributors <dtas-all@nongnu.org> +# License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt> +# frozen_string_literal: true + +# This module gets included in DTAS +module DTAS::Encoding # :nodoc: + def self.extended(mod) + mod.instance_eval { @charlock_holmes = nil} + end + +private + + def try_enc_harder(str, enc, old) # :nodoc: + case @charlock_holmes + when nil + begin + require 'charlock_holmes' + @charlock_holmes = CharlockHolmes::EncodingDetector.new + rescue LoadError + warn "`charlock_holmes` gem not available for encoding detection" + @charlock_holmes = false + end + when false + enc_fallback(str, enc, old) + else + res = @charlock_holmes.detect(str) + if det = res[:ruby_encoding] + str.force_encoding(det) + warn "charlock_holmes detected #{str.inspect} as #{det}..." + str.valid_encoding? or enc_fallback(str, det, old) + else + enc_fallback(str, enc, old) + end + end + str + end + + def enc_fallback(str, enc, old) # :nodoc: + str.force_encoding(old) + warn "could not detect encoding for #{str.inspect} (not #{enc})" + end + +public + + def try_enc(str, enc, harder = true) # :nodoc: + old = str.encoding + return str if old == enc + str.force_encoding(enc) + unless str.valid_encoding? + if harder + try_enc_harder(str, enc, old) + else + enc_fallback(str, enc, old) + end + end + str + end +end |