From 7266a4bcceb1feb1dc7db9fa2e263b1fcc91ac4e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 5 Apr 2015 09:26:34 +0000 Subject: dtas-archive: paranoid archival script This archives audio files (typically .wav from a portable devices) as FLAC and performs a best-effort verification the file was transferred succesfully without bit errors by dropping kernel caches and rechecking the result. --- Documentation/GNUmakefile | 1 + Documentation/dtas-archive.txt | 61 ++++++++++++++ bin/dtas-archive | 187 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 249 insertions(+) create mode 100644 Documentation/dtas-archive.txt create mode 100755 bin/dtas-archive diff --git a/Documentation/GNUmakefile b/Documentation/GNUmakefile index 4f44cdc..537c659 100644 --- a/Documentation/GNUmakefile +++ b/Documentation/GNUmakefile @@ -18,6 +18,7 @@ m1 += dtas-sinkedit m1 += dtas-sourceedit m1 += dtas-tl m1 += dtas-splitfx +m1 += dtas-archive m7 = m7 += dtas-player_protocol diff --git a/Documentation/dtas-archive.txt b/Documentation/dtas-archive.txt new file mode 100644 index 0000000..9f2fe04 --- /dev/null +++ b/Documentation/dtas-archive.txt @@ -0,0 +1,61 @@ +% dtas-archive(1) dtas user manual +% + +# NAME + +dtas-archive - paranoid audio file copy + +# SYNOPSYS + +dtas-archive [OPTIONS] SOURCE DESTINATION + +# DESCRIPTION + +dtas-archive is intended for archiving audio data to/from laptops and +computers without ECC memory, attempting to read data multiple times in +an attempt to detect memory corruption. dtas-archive may only be +effective on machines running the Linux kernel where posix_fadvise(2) +can be used to drop caches for a particular file after fsync(2). + +dtas-archive spawns sox(1) to archive audio data (likely uncompressed +WAVE) to FLAC and verifies the result using sndfile-cmp(1), a tool +implemented by different than sox and less likely to share the same bugs +(if any) as sox. + +# OPTIONS + +-j, \--jobs [JOBS] +: Number of jobs to run in parallel. Incrementing this may hurt + performance on slow storage devices. Default: 1 + +-n, \--dry-run +: Print, but do not run the commands to be executed + +-s, \--quiet, \--silent +: Silent operation, commands are not printed as executed + +-S, \--stats +: Run and save the text output of the sox "stats" effect as + $DESTINATION_FILE_WITHOUT_SUFFIX.stats next to the output file + +-k, \--keep-going +: Continue after error + +-r, \--repeat [COUNT] +: Number of times to repeat the sndfile-cmp(1) check. Default: 1 + +# COPYRIGHT + +Copyright 2015 all contributors .\ +License: GPLv3 or later + +# CONTACT + +All feedback welcome via plain-text mail to: \ +Mailing list archives available at and +\ +No subscription is necessary to post to the mailing list. + +# SEE ALSO + +sndfile-cmp(1), sox(1) diff --git a/bin/dtas-archive b/bin/dtas-archive new file mode 100755 index 0000000..69fc40e --- /dev/null +++ b/bin/dtas-archive @@ -0,0 +1,187 @@ +#!/usr/bin/env ruby +# Copyright (C) 2015 all contributors +# License: GPLv3 or later (https://www.gnu.org/licenses/gpl-3.0.txt) +usage = "#$0 SOURCE DESTINATION" + +# We could use the equivalent sox command here, but some folks working on +# dtas is more likely to write patches for sox (and thus introduce bugs +# into it), so we'll use sndfile-cmp as it lives in a different source tree +%w(sndfile-cmp sox).each do |cmd| + `which #{cmd} 2>/dev/null`.chomp.empty? and abort "#{cmd} not found in PATH" +end + +RUBY_PLATFORM =~ /linux/ or + warn "#$0 is unproven without Linux kernel fadvise semantics" +have_advise = IO.instance_methods.include?(:advise) +have_advise or warn "#$0 does not work reliably without IO#advise support" + +require 'shellwords' +require 'fileutils' +require 'find' +require 'optparse' +Thread.abort_on_exception = true +dry_run = false +silent = false +type = 'flac' +jobs = 1 +repeat = 1 +stats = false +keep_going = false + +OptionParser.new('', 24, ' ') do |op| + op.banner = usage + op.on('-t', '--type [TYPE]', 'FILE-TYPE (default: flac)') { |t| type = t } + op.on('-j', '--jobs [JOBS]', Integer) { |j| jobs = j } + op.on('-S', '--stats', 'save stats on the file') { stats = true } + op.on('-k', '--keep-going', 'continue after error') { keep_going = true } + op.on('-n', '--dry-run', 'only print commands, do not run them') do + dry_run = true + end + op.on('-r', '--repeat [COUNT]', 'number of times to check', Integer) do |r| + repeat = r + end + op.on('-s', '--quiet', '--silent') { silent = true } + op.on('-h', '--help') do + puts(op.to_s) + exit + end + op.parse!(ARGV) +end + +dst = ARGV.pop +src = ARGV.dup + +FileUtils.mkpath(dst) unless File.exist?(dst) +src_files = Hash.new { |h,dest_dir| h[dest_dir] = [] } + +src.each do |s| + src_st = File.stat(s) + if src_st.directory? + Find.find(s) do |path| + File.file?(path) or next + dir = File.dirname(path) + dir_st = File.stat(dir) + if dir_st.ino == src_st.ino && dir_st.dev == src_st.dev + src_files['.'] << path + else + dir = File.basename(File.dirname(path)) + src_files[dir] << path + end + end + else + src_files['.'] << s + end +end + +pairs = [] +type = ".#{type}" unless type.start_with?('.') + +src_files.each do |dir, files| + dir = dir == '.' ? dst : File.join(dst, dir) + if dry_run || !silent + puts "mkdir -p #{Shellwords.escape(dir)}" + end + FileUtils.mkpath(dir) unless dry_run + + files.each do |path| + base = File.basename(path).sub(/\.[^\.]+\z/, type) + out = File.join(dir, base) + pairs << [ path, out ] + end +end + +mtx = Mutex.new # protects fails and pairs +fails = [] +mismatches = [] + +on_fail = lambda do |job, status| + mtx.synchronize do + pairs.clear unless keep_going + fails << [ job, status ] + end +end + +on_mismatch = lambda do |job, status| + mtx.synchronize do + mismatches << [ job, status ] + end +end + +exiting = false +%w(INT TERM).each do |s| + trap(s) do + warn "Caught SIG#{s}, stopping gracefully..." + exiting = true + trap(s, 'DEFAULT') # non-graceful if signaled again + end +end + +thrs = jobs.times.map do |i| + Thread.new do + while job = mtx.synchronize { pairs.shift } + break if exiting + + input, output = *job + + unless system('soxi', '-s', input, out: IO::NULL, err: IO::NULL) + warn "skipping #{input.inspect}, not an audio file" + next + end + + stats_out = "#{output.sub(/\.[\.]+\z/, '')}.stats" if stats + + if dry_run || !silent + names = job.map { |x| Shellwords.escape(x) } + cmd = [ 'sox', *names ] + if stats + cmd << 'stats' + cmd << "2>#{Shellwords.escape(stats_out)}" + end + + puts cmd.join(' ') + cmpcmd = "sndfile-cmp #{names[0]} #{names[1]}" + if dry_run + puts cmpcmd + next + end + end + + cmd = [ 'sox', input, output ] + if stats + cmd << 'stats' + cmd = [ *cmd, { err: stats_out } ] + end + system(*cmd) or on_fail.call(job, $?) + + # clear kernel caches, this relies on Linux behavior + repeat.times do + if have_advise + th = Thread.new { File.open(input) { |fp| fp.advise(:dontneed) } } + File.open(output, 'ab') do |fp| + fp.fsync + fp.advise(:dontneed) + end + th.join + end + + puts cmpcmd unless silent + system('sndfile-cmp', input, output) or on_mismatch.call(job, $?) + end + st = File.stat(input) + File.utime(st.atime, st.mtime, output) + end + end +end + +thrs.each(&:join) +ok = true +fails.each do |job, status| + $stderr.puts "#{job.inspect} failed: #{status.inspect}" + ok = false +end +mismatches.each do |job, status| + $stderr.puts "#{job.inspect} mismatched: #{status.inspect}" + ok = false +end + +exit ok -- cgit v1.2.3-24-ge0c7