From: Eric Wong <e@80x24.org>
To: <dtas-all@nongnu.org>
Subject: [PATCH] introduce dtas-mlib for music library functions
Date: Fri, 27 Nov 2015 11:50:54 +0000 [thread overview]
Message-ID: <20151127115054.16912-1-e@80x24.org> (raw)
Eventually this will support searching and be the basis
of an mpd-compatible proxy in front of dtas-player
---
bin/dtas-mlib | 39 +++++
lib/dtas/mlib.rb | 290 +++++++++++++++++++++++++++++++
lib/dtas/mlib/migrations/0001_initial.rb | 42 +++++
test/test_mlib.rb | 31 ++++
4 files changed, 402 insertions(+)
create mode 100755 bin/dtas-mlib
create mode 100644 lib/dtas/mlib.rb
create mode 100644 lib/dtas/mlib/migrations/0001_initial.rb
create mode 100644 test/test_mlib.rb
diff --git a/bin/dtas-mlib b/bin/dtas-mlib
new file mode 100755
index 0000000..0e17d91
--- /dev/null
+++ b/bin/dtas-mlib
@@ -0,0 +1,39 @@
+#!/usr/bin/env ruby
+# Copyright (C) 2015 all contributors <dtas-all@nongnu.org>
+# License: GPLv3 or later (https://www.gnu.org/licenses/gpl-3.0.txt)
+usage = "#$0 [-d DATABASE-URI] ACTION [DIRECTORY]"
+Thread.abort_on_exception = $stderr.sync = $stdout.sync = true
+require 'dtas/mlib'
+require 'optparse'
+path = '~/.dtas/mlib.sqlite'
+db = File.expand_path(path)
+OptionParser.new('', 24, ' ') do |op|
+ op.banner = usage
+ op.on('-d', '--database <URI|PATH>', "database (default: #{path}") do |d|
+ db = d
+ end
+ op.on('-h', '--help') do
+ puts(op.to_s)
+ exit
+ end
+ op.parse!(ARGV)
+end
+
+unless db.include?('://')
+ dir = File.dirname(db)
+ unless File.directory?(dir)
+ require 'fileutils'
+ FileUtils.mkpath(dir)
+ end
+end
+
+case action = ARGV.shift
+when 'update'
+ directory = ARGV.shift or abort "DIRECTORY required\n#{usage}"
+else
+ abort usage
+end
+
+mlib = DTAS::Mlib.new(db)
+mlib.migrate
+mlib.__send__(action, directory)
diff --git a/lib/dtas/mlib.rb b/lib/dtas/mlib.rb
new file mode 100644
index 0000000..59a589c
--- /dev/null
+++ b/lib/dtas/mlib.rb
@@ -0,0 +1,290 @@
+# -*- encoding: utf-8 -*-
+# Copyright (C) 2015 all contributors <dtas-all@nongnu.org>
+# License: GPLv3 or later (https://www.gnu.org/licenses/gpl-3.0.txt)
+
+require_relative '../dtas'
+require_relative 'process'
+require 'socket'
+
+# For the DTAS Music Library, based on what MPD uses.
+class DTAS::Mlib
+ attr_accessor :follow_outside_symlinks
+ attr_accessor :follow_inside_symlinks
+ attr_accessor :tags
+ DM_DIR = -1
+ DM_IGN = -2
+ include DTAS::Process
+
+ Job = Struct.new(:wd, :ctime, :parent_id, :path)
+
+ # same capitalization as in mpd
+ TAGS = Hash[*(
+ %w(Artist ArtistSort
+ Album AlbumSort
+ AlbumArtist AlbumArtistSort
+ Title Track Name
+ Genre Date Composer Performer Comment Disc
+ MUSICBRAINZ_ARTISTID MUSICBRAINZ_ALBUMID
+ MUSICBRAINZ_ALBUMARTISTID
+ MUSICBRAINZ_TRACKID
+ MUSICBRAINZ_RELEASETRACKID).map! { |x| [ x.downcase, x ] }.flatten!)]
+
+ def initialize(db)
+ if String === db
+ db = "sqlite://#{db}" unless db.include?('://')
+ require 'sequel/no_core_ext'
+ db = Sequel.connect(db, single_threaded: true)
+ end
+ if db.class.to_s.downcase.include?('sqlite')
+ db.transaction_mode = :immediate
+ db.synchronous = :off
+ end
+ @db = db
+ @pwd = nil
+ @follow_outside_symlinks = true
+ @follow_inside_symlinks = true
+ @root_node = nil
+ @tags = TAGS.dup
+ @tag_map = nil
+ @suffixes = nil
+ @work = nil
+ end
+
+ def init_suffixes
+ `sox --help 2>/dev/null` =~ /\nAUDIO FILE FORMATS:\s*([^\n]+)/s
+ re = $1.split(/\s+/).map { |x| Regexp.quote(x) }.join('|')
+ @suffixes = Regexp.new("\\.(?:#{re})\\z", Regexp::IGNORECASE)
+ end
+
+ def worker(todo)
+ @work.close
+ @db.tables # reconnect before chdir
+ @pwd = Dir.pwd.b
+ begin
+ buf = todo.recv(16384) # 4x bigger than PATH_MAX ought to be enough
+ exit if buf.empty?
+ job = Marshal.load(buf)
+ buf.clear
+ worker_work(job)
+ rescue => e
+ warn "#{e.message} (#{e.class}) #{e.backtrace.join("\n")}\n"
+ end while true
+ end
+
+ def ignore(job)
+ @db.transaction do
+ node_ensure(job.parent_id, job.path, DM_IGN, job.ctime)
+ end
+ end
+
+ def worker_work(job)
+ tlen = nil
+ wd = job.wd
+ if wd != @pwd
+ Dir.chdir(wd)
+ @pwd = wd
+ end
+ tmp = {}
+ path = job.path
+ tlen = qx(%W(soxi -D #{path}), no_raise: true)
+ return ignore(job) unless String === tlen
+ tlen = tlen.to_f
+ return ignore(job) if tlen < 0
+ tlen = tlen.round
+ buf = qx(%W(soxi -a #{path}), no_raise: true)
+ return ignore(job) unless String === buf
+
+ # no, we don't support comments with newlines in them
+ buf = buf.split("\n".freeze)
+ while line = buf.shift
+ tag, value = line.split('='.freeze, 2)
+ tag && value or next
+ tag.downcase!
+ tag_id = @tag_map[tag] or next
+ value.strip!
+
+ # FIXME: this fallback needs testing
+ [ Encoding::UTF_8, Encoding::ISO_8859_1 ].each do |enc|
+ value.force_encoding(enc)
+ if value.valid_encoding?
+ value.encode!(Encoding::UTF_8) if enc != Encoding::UTF_8
+ tmp[tag_id] = value
+ break
+ end
+ end
+ end
+ @db.transaction do
+ node_id = node_ensure(job.parent_id, path, tlen, job.ctime)[:id]
+ vals = @db[:vals]
+ comments = @db[:comments]
+ q = { node_id: node_id }
+ comments.where(q).delete
+ tmp.each do |tid, val|
+ v = vals[val: val]
+ q[:val_id] = v ? v[:id] : vals.insert(val: val)
+ q[:tag_id] = tid
+ comments.insert(q)
+ end
+ end
+ end
+
+ def update(path, jobs: 8)
+ # n.b. "jobs" is for CPU concurrency. Audio media is typically stored
+ # on high-latency media or slow network file systems; so we use a high
+ # number of jobs by default to compensate for the seek-heavy workload
+ # this generates
+ init_suffixes
+ st = File.stat(path) # we always follow the first dir even if it's a symlink
+ st.directory? or
+ raise ArgumentError, "path: #{path.inspect} is not a directory"
+ @work and raise 'update already running'
+ todo, @work = UNIXSocket.pair(:SOCK_SEQPACKET)
+ @db.disconnect
+ jobs.times { |i| fork { worker(todo) } }
+ todo.close
+ scan_dir(path, st)
+ @work.close
+ Process.waitall
+ ensure
+ @work = nil
+ end
+
+ def migrate
+ require 'sequel'
+ Sequel.extension(:migration, :core_extensions) # ugh...
+ @db.transaction do
+ Sequel::Migrator.apply(@db, "#{File.dirname(__FILE__)}/mlib/migrations")
+ root_node # ensure this exists
+ load_tags
+ end
+ end
+
+ def load_tags
+ tag_map = {}
+ tags = @db[:tags]
+ @tags.each do |lc, mc|
+ unless q = tags[tag: mc]
+ q = { tag: mc }
+ q[:id] = tags.insert(q)
+ end
+ tag_map[lc] = q[:id]
+ end
+
+ # Xiph tags use "tracknumber" and "discnumber"
+ %w(track disc).each do |x|
+ tag_id = tag_map[x] and tag_map["#{x}number"] = tag_id
+ end
+ @tag_map = tag_map
+ end
+
+ def scan_any(path, parent_id)
+ st = File.lstat(path) rescue return
+ if st.directory?
+ scan_dir(path, st, parent_id)
+ elsif st.file?
+ scan_file(path, st, parent_id)
+ # elsif st.symlink? TODO
+ # scan_link(path, st, parent_id)
+ end
+ end
+
+ def scan_file(path, st, parent_id)
+ return if @suffixes !~ path || st.size == 0
+
+ # no-op if no change
+ if node = @db[:nodes][name: path, parent_id: parent_id]
+ return if st.ctime.to_i == node[:ctime] || node[:tlen] == DM_IGN
+ end
+
+ job = Job.new(@pwd, st.ctime.to_i, parent_id, path)
+ send_harder(@work, Marshal.dump(job))
+ end
+
+ def root_node
+ q = @root_node and return q
+ # root node always has parent_id: 1
+ q = {
+ parent_id: 1, # self
+ name: '',
+ }
+ node = @db[:nodes][q] and return (@root_node = node)
+ begin
+ q[:tlen] = DM_DIR
+ q[:id] = @db[:nodes].insert(q)
+ q
+ rescue Sequel::DatabaseError
+ # we may conflict on insert if we didn't use a transaction
+ raise if @db.in_transaction?
+ @root_node = @db[:paths][q] or raise
+ end
+ end
+
+ def dir_vivify(parts, ctime)
+ @db.transaction do
+ dir = root_node
+ last = parts.pop
+ parts.each do |name|
+ dir = node_ensure(dir[:id], name, DM_DIR)
+ end
+ node_ensure(dir[:id], last, DM_DIR, ctime)
+ end
+ end
+
+ def node_update_maybe(node, tlen, ctime)
+ q = {}
+ q[:ctime] = ctime if ctime && ctime != node[:ctime]
+ q[:tlen] = tlen if tlen != node[:tlen]
+ return if q.empty?
+ node_id = node.delete(:id)
+ @db[:nodes].where(id: node_id).update(node.merge(q))
+ node[:id] = node_id
+ end
+
+ def node_ensure(parent_id, name, tlen, ctime = nil)
+ q = { name: name, parent_id: parent_id }
+ if node = @db[:nodes][q]
+ node_update_maybe(node, tlen, ctime)
+ else
+ # brand new node
+ node = q.dup
+ node[:tlen] = tlen
+ node[:ctime] = ctime
+ node[:id] = @db[:nodes].insert(node)
+ end
+ node
+ end
+
+ def scan_dir(path, st, parent_id = nil)
+ prev_wd = @pwd
+ Dir.chdir(path)
+ cur = @pwd = Dir.pwd.b
+
+ # TODO: use parent_id if given
+ dir = dir_vivify(cur.split(%r{/+}n), st.ctime.to_i)
+ Dir.foreach('.', encoding: Encoding::BINARY) do |x|
+ case x
+ when '.', '..', %r{\n}n
+ # files with newlines in them are rare and last I checked (in 2008),
+ # mpd could not support them, either. So lets not bother for now.
+ next
+ else
+ scan_any(x, dir[:id])
+ end
+ end
+ ensure
+ Dir.chdir(prev_wd) if cur && prev_wd
+ @pwd = prev_wd
+ end
+
+ def send_harder(sock, msg)
+ sock.sendmsg(msg)
+ rescue Errno::EMSGSIZE
+ sock.setsockopt(:SOL_SOCKET, :SO_SNDBUF, msg.bytesize + 1024)
+ # if it still fails, oh well...
+ begin
+ sock.sendmsg(msg)
+ rescue => e
+ warn "#{msg.bytesize} too big, dropped #{e.class}"
+ end
+ end
+end
diff --git a/lib/dtas/mlib/migrations/0001_initial.rb b/lib/dtas/mlib/migrations/0001_initial.rb
new file mode 100644
index 0000000..f147cbe
--- /dev/null
+++ b/lib/dtas/mlib/migrations/0001_initial.rb
@@ -0,0 +1,42 @@
+# Copyright (C) 2015 all contributors <dtas-all@nongnu.org>
+# License: GPLv3 or later (https://www.gnu.org/licenses/gpl-3.0.txt)
+
+Sequel.migration do
+ up do
+ create_table(:nodes) do
+ primary_key :id
+ String :name, null: false # encoding: binary, POSIX
+ Integer :ctime
+ foreign_key :parent_id, :nodes, null: false # parent dir
+ # >= 0: tlen of track, -2: ignore, -1: directory
+ Integer :tlen, null: false
+ unique [ :parent_id, :name ]
+ end
+
+ create_table(:tags) do
+ primary_key :id
+ String :tag, null: false, unique: true # encoding: US-ASCII
+ end
+
+ create_table(:vals) do
+ primary_key :id
+ String :val, null: false, unique: true # encoding: UTF-8
+ end
+
+ create_table(:comments) do
+ foreign_key :node_id, :nodes, null: false
+ foreign_key :tag_id, :tags, null: false
+ foreign_key :val_id, :vals, null: false
+ primary_key [ :node_id, :tag_id, :val_id ]
+ index :node_id
+ index [ :tag_id, :val_id ]
+ end
+ end
+
+ down do
+ drop_table(:nodes)
+ drop_table(:tags)
+ drop_table(:vals)
+ drop_table(:comments)
+ end
+end
diff --git a/test/test_mlib.rb b/test/test_mlib.rb
new file mode 100644
index 0000000..072d1d0
--- /dev/null
+++ b/test/test_mlib.rb
@@ -0,0 +1,31 @@
+# Copyright (C) 2013-2015 all contributors <dtas-all@nongnu.org>
+# License: GPLv3 or later (https://www.gnu.org/licenses/gpl-3.0.txt)
+require_relative 'helper'
+begin
+ require 'dtas/mlib'
+ require 'sequel/no_core_ext'
+ require 'sqlite3'
+rescue LoadError => err
+ warn "skipping mlib test: #{err.message}"
+ exit 0
+end
+
+class TestMlib < Testcase
+ def setup
+ @db = Sequel.sqlite(':memory:')
+ end
+
+ def test_migrate
+ ml = DTAS::Mlib.new(@db)
+ begin
+ $-w = false
+ ml.migrate
+ tables = @db.tables
+ ensure
+ $-w = true
+ end
+ [ :nodes, :tags, :vals, :comments ].each do |t|
+ assert tables.include?(t), "missing #{t}"
+ end
+ end
+end
--
EW
next reply other threads:[~2015-11-27 11:51 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-11-27 11:50 Eric Wong [this message]
2015-11-28 11:16 ` [PATCH 2/1] dtas-mlib: add dump support for debugging Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://80x24.org/dtas/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20151127115054.16912-1-e@80x24.org \
--to=e@80x24.org \
--cc=dtas-all@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/dtas.git/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).