summary refs log tree commit
path: root/lib/dtas/encoding.rb
blob: 613e3765f76c844511a1797e988f3f9a0187737b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# Copyright (C) 2018-2019 all contributors <dtas-all@nongnu.org>
# License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
# frozen_string_literal: true

# This module gets included in DTAS
module DTAS::Encoding # :nodoc:
  def self.extended(mod)
    mod.instance_eval { @charlock_holmes = nil}
  end

private

  def try_enc_harder(str, enc, old) # :nodoc:
    case @charlock_holmes
    when nil
      begin
        require 'charlock_holmes'
        @charlock_holmes = CharlockHolmes::EncodingDetector.new
      rescue LoadError
        warn "`charlock_holmes` gem not available for encoding detection"
        @charlock_holmes = false
      end
    when false
      enc_fallback(str, enc, old)
    else
      res = @charlock_holmes.detect(str)
      if det = res[:ruby_encoding]
        str.force_encoding(det)
        warn "charlock_holmes detected #{str.inspect} as #{det}..."
        str.valid_encoding? or enc_fallback(str, det, old)
      else
        enc_fallback(str, enc, old)
      end
    end
    str
  end

  def enc_fallback(str, enc, old) # :nodoc:
    str.force_encoding(old)
    warn "could not detect encoding for #{str.inspect} (not #{enc})"
  end

public

  def try_enc(str, enc, harder = true) # :nodoc:
    old = str.encoding
    return str if old == enc
    str.force_encoding(enc)
    unless str.valid_encoding?
      if harder
        try_enc_harder(str, enc, old)
      else
        enc_fallback(str, enc, old)
      end
    end
    str
  end
end