From 702e3ad47738363ab847f43155e047c7c6612f80 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 29 Jan 2018 00:43:45 +0000 Subject: player: support guessing encodings for comments This can be helpful for end users and is close to what other players use. We can fallback to Encoding.default_external by default (typically UTF-8) and then again using `charlock_holmes' if installed. Note: path names remain binary, because that's how proper filesystems operate. --- lib/dtas/source/sox.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib/dtas/source/sox.rb') diff --git a/lib/dtas/source/sox.rb b/lib/dtas/source/sox.rb index f702b41..5e967c1 100644 --- a/lib/dtas/source/sox.rb +++ b/lib/dtas/source/sox.rb @@ -50,17 +50,19 @@ class DTAS::Source::Sox # :nodoc: out =~ /^Sample Rate\s*:\s*(\d+)/n and dst['rate'] = $1.to_i out =~ /^Precision\s*:\s*(\d+)-bit/n and dst['bits'] = $1.to_i + enc = Encoding.default_external # typically Encoding::UTF_8 if out =~ /\nComments\s*:[ \t]*\n?(.*)\z/mn comments = dst['comments'] = {} key = nil $1.split(/\n/n).each do |line| if line.sub!(/^([^=]+)=/ni, '') - key = DTAS.dedupe_str($1.upcase) + key = DTAS.dedupe_str(DTAS.try_enc($1.upcase, enc)) end (comments[key] ||= ''.b) << "#{line}\n" unless line.empty? end comments.each do |k,v| v.chomp! + DTAS.try_enc(v, enc) comments[k] = DTAS.dedupe_str(v) end end -- cgit v1.2.3-24-ge0c7