From 9581a01c87a7d8bf228bdec7d4f06bf8a3fed3ec Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 22 Nov 2015 01:05:05 +0000 Subject: introduce dtas-mlib for music library functions Eventually this will support searching and be the basis of an mpd-compatible proxy in front of dtas-player --- bin/dtas-mlib | 39 +++++ lib/dtas/mlib.rb | 290 +++++++++++++++++++++++++++++++ lib/dtas/mlib/migrations/0001_initial.rb | 42 +++++ test/test_mlib.rb | 31 ++++ 4 files changed, 402 insertions(+) create mode 100755 bin/dtas-mlib create mode 100644 lib/dtas/mlib.rb create mode 100644 lib/dtas/mlib/migrations/0001_initial.rb create mode 100644 test/test_mlib.rb diff --git a/bin/dtas-mlib b/bin/dtas-mlib new file mode 100755 index 0000000..0e17d91 --- /dev/null +++ b/bin/dtas-mlib @@ -0,0 +1,39 @@ +#!/usr/bin/env ruby +# Copyright (C) 2015 all contributors +# License: GPLv3 or later (https://www.gnu.org/licenses/gpl-3.0.txt) +usage = "#$0 [-d DATABASE-URI] ACTION [DIRECTORY]" +Thread.abort_on_exception = $stderr.sync = $stdout.sync = true +require 'dtas/mlib' +require 'optparse' +path = '~/.dtas/mlib.sqlite' +db = File.expand_path(path) +OptionParser.new('', 24, ' ') do |op| + op.banner = usage + op.on('-d', '--database ', "database (default: #{path}") do |d| + db = d + end + op.on('-h', '--help') do + puts(op.to_s) + exit + end + op.parse!(ARGV) +end + +unless db.include?('://') + dir = File.dirname(db) + unless File.directory?(dir) + require 'fileutils' + FileUtils.mkpath(dir) + end +end + +case action = ARGV.shift +when 'update' + directory = ARGV.shift or abort "DIRECTORY required\n#{usage}" +else + abort usage +end + +mlib = DTAS::Mlib.new(db) +mlib.migrate +mlib.__send__(action, directory) diff --git a/lib/dtas/mlib.rb b/lib/dtas/mlib.rb new file mode 100644 index 0000000..59a589c --- /dev/null +++ b/lib/dtas/mlib.rb @@ -0,0 +1,290 @@ +# -*- encoding: utf-8 -*- +# Copyright (C) 2015 all contributors +# License: GPLv3 or later (https://www.gnu.org/licenses/gpl-3.0.txt) + +require_relative '../dtas' +require_relative 'process' +require 'socket' + +# For the DTAS Music Library, based on what MPD uses. +class DTAS::Mlib + attr_accessor :follow_outside_symlinks + attr_accessor :follow_inside_symlinks + attr_accessor :tags + DM_DIR = -1 + DM_IGN = -2 + include DTAS::Process + + Job = Struct.new(:wd, :ctime, :parent_id, :path) + + # same capitalization as in mpd + TAGS = Hash[*( + %w(Artist ArtistSort + Album AlbumSort + AlbumArtist AlbumArtistSort + Title Track Name + Genre Date Composer Performer Comment Disc + MUSICBRAINZ_ARTISTID MUSICBRAINZ_ALBUMID + MUSICBRAINZ_ALBUMARTISTID + MUSICBRAINZ_TRACKID + MUSICBRAINZ_RELEASETRACKID).map! { |x| [ x.downcase, x ] }.flatten!)] + + def initialize(db) + if String === db + db = "sqlite://#{db}" unless db.include?('://') + require 'sequel/no_core_ext' + db = Sequel.connect(db, single_threaded: true) + end + if db.class.to_s.downcase.include?('sqlite') + db.transaction_mode = :immediate + db.synchronous = :off + end + @db = db + @pwd = nil + @follow_outside_symlinks = true + @follow_inside_symlinks = true + @root_node = nil + @tags = TAGS.dup + @tag_map = nil + @suffixes = nil + @work = nil + end + + def init_suffixes + `sox --help 2>/dev/null` =~ /\nAUDIO FILE FORMATS:\s*([^\n]+)/s + re = $1.split(/\s+/).map { |x| Regexp.quote(x) }.join('|') + @suffixes = Regexp.new("\\.(?:#{re})\\z", Regexp::IGNORECASE) + end + + def worker(todo) + @work.close + @db.tables # reconnect before chdir + @pwd = Dir.pwd.b + begin + buf = todo.recv(16384) # 4x bigger than PATH_MAX ought to be enough + exit if buf.empty? + job = Marshal.load(buf) + buf.clear + worker_work(job) + rescue => e + warn "#{e.message} (#{e.class}) #{e.backtrace.join("\n")}\n" + end while true + end + + def ignore(job) + @db.transaction do + node_ensure(job.parent_id, job.path, DM_IGN, job.ctime) + end + end + + def worker_work(job) + tlen = nil + wd = job.wd + if wd != @pwd + Dir.chdir(wd) + @pwd = wd + end + tmp = {} + path = job.path + tlen = qx(%W(soxi -D #{path}), no_raise: true) + return ignore(job) unless String === tlen + tlen = tlen.to_f + return ignore(job) if tlen < 0 + tlen = tlen.round + buf = qx(%W(soxi -a #{path}), no_raise: true) + return ignore(job) unless String === buf + + # no, we don't support comments with newlines in them + buf = buf.split("\n".freeze) + while line = buf.shift + tag, value = line.split('='.freeze, 2) + tag && value or next + tag.downcase! + tag_id = @tag_map[tag] or next + value.strip! + + # FIXME: this fallback needs testing + [ Encoding::UTF_8, Encoding::ISO_8859_1 ].each do |enc| + value.force_encoding(enc) + if value.valid_encoding? + value.encode!(Encoding::UTF_8) if enc != Encoding::UTF_8 + tmp[tag_id] = value + break + end + end + end + @db.transaction do + node_id = node_ensure(job.parent_id, path, tlen, job.ctime)[:id] + vals = @db[:vals] + comments = @db[:comments] + q = { node_id: node_id } + comments.where(q).delete + tmp.each do |tid, val| + v = vals[val: val] + q[:val_id] = v ? v[:id] : vals.insert(val: val) + q[:tag_id] = tid + comments.insert(q) + end + end + end + + def update(path, jobs: 8) + # n.b. "jobs" is for CPU concurrency. Audio media is typically stored + # on high-latency media or slow network file systems; so we use a high + # number of jobs by default to compensate for the seek-heavy workload + # this generates + init_suffixes + st = File.stat(path) # we always follow the first dir even if it's a symlink + st.directory? or + raise ArgumentError, "path: #{path.inspect} is not a directory" + @work and raise 'update already running' + todo, @work = UNIXSocket.pair(:SOCK_SEQPACKET) + @db.disconnect + jobs.times { |i| fork { worker(todo) } } + todo.close + scan_dir(path, st) + @work.close + Process.waitall + ensure + @work = nil + end + + def migrate + require 'sequel' + Sequel.extension(:migration, :core_extensions) # ugh... + @db.transaction do + Sequel::Migrator.apply(@db, "#{File.dirname(__FILE__)}/mlib/migrations") + root_node # ensure this exists + load_tags + end + end + + def load_tags + tag_map = {} + tags = @db[:tags] + @tags.each do |lc, mc| + unless q = tags[tag: mc] + q = { tag: mc } + q[:id] = tags.insert(q) + end + tag_map[lc] = q[:id] + end + + # Xiph tags use "tracknumber" and "discnumber" + %w(track disc).each do |x| + tag_id = tag_map[x] and tag_map["#{x}number"] = tag_id + end + @tag_map = tag_map + end + + def scan_any(path, parent_id) + st = File.lstat(path) rescue return + if st.directory? + scan_dir(path, st, parent_id) + elsif st.file? + scan_file(path, st, parent_id) + # elsif st.symlink? TODO + # scan_link(path, st, parent_id) + end + end + + def scan_file(path, st, parent_id) + return if @suffixes !~ path || st.size == 0 + + # no-op if no change + if node = @db[:nodes][name: path, parent_id: parent_id] + return if st.ctime.to_i == node[:ctime] || node[:tlen] == DM_IGN + end + + job = Job.new(@pwd, st.ctime.to_i, parent_id, path) + send_harder(@work, Marshal.dump(job)) + end + + def root_node + q = @root_node and return q + # root node always has parent_id: 1 + q = { + parent_id: 1, # self + name: '', + } + node = @db[:nodes][q] and return (@root_node = node) + begin + q[:tlen] = DM_DIR + q[:id] = @db[:nodes].insert(q) + q + rescue Sequel::DatabaseError + # we may conflict on insert if we didn't use a transaction + raise if @db.in_transaction? + @root_node = @db[:paths][q] or raise + end + end + + def dir_vivify(parts, ctime) + @db.transaction do + dir = root_node + last = parts.pop + parts.each do |name| + dir = node_ensure(dir[:id], name, DM_DIR) + end + node_ensure(dir[:id], last, DM_DIR, ctime) + end + end + + def node_update_maybe(node, tlen, ctime) + q = {} + q[:ctime] = ctime if ctime && ctime != node[:ctime] + q[:tlen] = tlen if tlen != node[:tlen] + return if q.empty? + node_id = node.delete(:id) + @db[:nodes].where(id: node_id).update(node.merge(q)) + node[:id] = node_id + end + + def node_ensure(parent_id, name, tlen, ctime = nil) + q = { name: name, parent_id: parent_id } + if node = @db[:nodes][q] + node_update_maybe(node, tlen, ctime) + else + # brand new node + node = q.dup + node[:tlen] = tlen + node[:ctime] = ctime + node[:id] = @db[:nodes].insert(node) + end + node + end + + def scan_dir(path, st, parent_id = nil) + prev_wd = @pwd + Dir.chdir(path) + cur = @pwd = Dir.pwd.b + + # TODO: use parent_id if given + dir = dir_vivify(cur.split(%r{/+}n), st.ctime.to_i) + Dir.foreach('.', encoding: Encoding::BINARY) do |x| + case x + when '.', '..', %r{\n}n + # files with newlines in them are rare and last I checked (in 2008), + # mpd could not support them, either. So lets not bother for now. + next + else + scan_any(x, dir[:id]) + end + end + ensure + Dir.chdir(prev_wd) if cur && prev_wd + @pwd = prev_wd + end + + def send_harder(sock, msg) + sock.sendmsg(msg) + rescue Errno::EMSGSIZE + sock.setsockopt(:SOL_SOCKET, :SO_SNDBUF, msg.bytesize + 1024) + # if it still fails, oh well... + begin + sock.sendmsg(msg) + rescue => e + warn "#{msg.bytesize} too big, dropped #{e.class}" + end + end +end diff --git a/lib/dtas/mlib/migrations/0001_initial.rb b/lib/dtas/mlib/migrations/0001_initial.rb new file mode 100644 index 0000000..f147cbe --- /dev/null +++ b/lib/dtas/mlib/migrations/0001_initial.rb @@ -0,0 +1,42 @@ +# Copyright (C) 2015 all contributors +# License: GPLv3 or later (https://www.gnu.org/licenses/gpl-3.0.txt) + +Sequel.migration do + up do + create_table(:nodes) do + primary_key :id + String :name, null: false # encoding: binary, POSIX + Integer :ctime + foreign_key :parent_id, :nodes, null: false # parent dir + # >= 0: tlen of track, -2: ignore, -1: directory + Integer :tlen, null: false + unique [ :parent_id, :name ] + end + + create_table(:tags) do + primary_key :id + String :tag, null: false, unique: true # encoding: US-ASCII + end + + create_table(:vals) do + primary_key :id + String :val, null: false, unique: true # encoding: UTF-8 + end + + create_table(:comments) do + foreign_key :node_id, :nodes, null: false + foreign_key :tag_id, :tags, null: false + foreign_key :val_id, :vals, null: false + primary_key [ :node_id, :tag_id, :val_id ] + index :node_id + index [ :tag_id, :val_id ] + end + end + + down do + drop_table(:nodes) + drop_table(:tags) + drop_table(:vals) + drop_table(:comments) + end +end diff --git a/test/test_mlib.rb b/test/test_mlib.rb new file mode 100644 index 0000000..072d1d0 --- /dev/null +++ b/test/test_mlib.rb @@ -0,0 +1,31 @@ +# Copyright (C) 2013-2015 all contributors +# License: GPLv3 or later (https://www.gnu.org/licenses/gpl-3.0.txt) +require_relative 'helper' +begin + require 'dtas/mlib' + require 'sequel/no_core_ext' + require 'sqlite3' +rescue LoadError => err + warn "skipping mlib test: #{err.message}" + exit 0 +end + +class TestMlib < Testcase + def setup + @db = Sequel.sqlite(':memory:') + end + + def test_migrate + ml = DTAS::Mlib.new(@db) + begin + $-w = false + ml.migrate + tables = @db.tables + ensure + $-w = true + end + [ :nodes, :tags, :vals, :comments ].each do |t| + assert tables.include?(t), "missing #{t}" + end + end +end -- cgit v1.2.3-24-ge0c7