Merge new metadata support

This merges the work done on reading metadata tags (Vorbis comments).
Claxon now reads those when opening a stream, and keeps a rather
low-level representation of them in memory. This representation can be
used to iterate over all the metadata tags. In addition, a higher-level
API is exposed to do a case-insensitive lookup for a particular tag.
This commit is contained in:
Ruud van Asseldonk 2017-10-19 21:40:18 +02:00
commit cf09824eee
17 changed files with 852 additions and 77 deletions

4
.gitignore vendored
View File

@ -1,6 +1,5 @@
# Cargo files
/target
/Cargo.lock
# Test files
/testsamples/*.flac
@ -11,7 +10,10 @@
/testsamples/extra/*.wav
# Except for small test samples that are included verbatim
!/testsamples/large_vendor_string.flac
!/testsamples/large_vorbis_comment_block.flac
!/testsamples/pop.flac
!/testsamples/repeated_vorbis_comment.flac
!/testsamples/short.flac
!/testsamples/wasted_bits.flac

123
Cargo.lock generated Normal file
View File

@ -0,0 +1,123 @@
[root]
name = "claxon"
version = "0.3.1"
dependencies = [
"hound 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"mp4parse 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"ogg 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
"time 0.1.38 (registry+https://github.com/rust-lang/crates.io-index)",
"walkdir 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "bitreader"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "byteorder"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "hound"
version = "3.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "kernel32-sys"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "libc"
version = "0.2.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "mp4parse"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bitreader 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"byteorder 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-traits"
version = "0.1.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "ogg"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byteorder 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "redox_syscall"
version = "0.1.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "same-file"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "time"
version = "0.1.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)",
"redox_syscall 0.1.31 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "walkdir"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"same-file 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "winapi"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "winapi-build"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum bitreader 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a13e712f6e04f0e507a913abf5e008ba2a342ec26d9fac34ba0766bd77ced5b8"
"checksum byteorder 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff81738b726f5d099632ceaffe7fb65b90212e8dce59d518729e7e8634032d3d"
"checksum hound 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d751b836ac593fc578456297bea26218d15ef07c3086c70fca97ba5402e562ec"
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
"checksum libc 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)" = "2370ca07ec338939e356443dac2296f581453c35fe1e3a3ed06023c49435f915"
"checksum mp4parse 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "99942c1e9fa8b6fc9b63c6eb65bc6d442df3bcd8b8889de5b436f5527fd81adf"
"checksum num-traits 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "99843c856d68d8b4313b03a17e33c4bb42ae8f6610ea81b28abe076ac721b9b0"
"checksum ogg 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3f8de5433300a8a0ba60a3207766a3ce9efdede6aaab23311b5a8cf1664fe2e9"
"checksum redox_syscall 0.1.31 (registry+https://github.com/rust-lang/crates.io-index)" = "8dde11f18c108289bef24469638a04dce49da56084f2d50618b226e47eb04509"
"checksum same-file 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d931a44fdaa43b8637009e7632a02adc4f2b2e0733c08caa4cf00e8da4a117a7"
"checksum time 0.1.38 (registry+https://github.com/rust-lang/crates.io-index)" = "d5d788d3aa77bc0ef3e9621256885555368b47bd495c13dd2e7413c89f845520"
"checksum walkdir 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "bb08f9e670fab86099470b97cd2b252d6527f0b3cc1401acdb595ffc9dd288ff"
"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"

View File

@ -17,9 +17,6 @@ travis-ci = { repository = "ruuda/claxon", branch = "v0.3.1" }
[dev-dependencies]
hound = "3.0"
mp4parse = "0.8"
ogg = "0.5.1"
time = "0.1"
walkdir = "1.0"
# Use a fork of the ogg crate for now, because v0.5.0 is not compatible with
# Rust versions below 1.17, and Claxon wants to be compatible with 1.13.
ogg = { git = "https://github.com/ruuda/ogg", rev = "72105a4125f98f638a9735ad" }

View File

@ -1,6 +1,22 @@
Changelog
=========
Unreleased
----------
**Breaking changes**:
- The unsued `Error::TooWide` variant has been removed.
- Files with metadata blocks larger than 10 MiB are now rejected to protect
against memory allocation denial of service attacks.
Release highlights:
- Functionality for reading FLAC embedded in an ogg or mp4 container has been
added, together with new examples.
- Support for reading metadata (Vorbis comments, also known as FLAC tags) has
been added.
0.3.1
-----

9
docs/background.md Normal file
View File

@ -0,0 +1,9 @@
# Background
Many media players crash on corrupted input (not FLAC in particular). This is
bad, the decoder should signal an error on invalid input, it should not crash.
I suspect that this is partly due to the fact that most decoders are written in
C. I thought I'd try and write a decoder in a safe language: Rust. Video codecs
can be quite complex, and nowadays CPU decoding is not all that common any more.
Therefore, I decided to first try and write a decoder for an audio codec that I
love and use on a daily basis: FLAC.

View File

@ -0,0 +1,82 @@
// Claxon -- A FLAC decoding library in Rust
// Copyright 2017 Ruud van Asseldonk
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// A copy of the License has been included in the root of the repository.
// This file contains a minimal example of using Claxon and Hound to decode a
// flac file. This can be done more efficiently, but it is also more verbose.
// See the `decode` example for that.
extern crate claxon;
extern crate walkdir;
extern crate time;
use time::PreciseTime;
use std::io::{Seek, SeekFrom};
fn main() {
use std::ffi::OsStr;
let mut file_times_us = Vec::new();
let mut bytes_per_sec = Vec::new();
let wd = walkdir::WalkDir::new("testsamples/extra")
.follow_links(true)
.max_open(1024) // Prefer more file descriptors over allocating memory.
.into_iter()
.filter_map(|e| e.ok())
.take(1024);
for entry in wd {
let path = entry.path();
if path.is_file() && path.extension() == Some(OsStr::new("flac")) {
let epoch = PreciseTime::now();
let mut bytes = 0;
// Read the file multiple times to amortize the walkdir cost.
for _ in 0..10 {
let reader = claxon::FlacReader::open(path).unwrap();
// Note that these are not optimized away even though the results
// are not used, because the expectation may fail.
reader.get_tag("date").next().expect("date");
reader.get_tag("originaldate").next().expect("originaldate");
reader.get_tag("tracknumber").next().expect("tracknumber");
reader.get_tag("tracktotal").next().expect("tracktotal");
reader.get_tag("discnumber").next().expect("discnumber");
reader.get_tag("disctotal").next().expect("disctotal");
reader.get_tag("title").next().expect("title");
reader.get_tag("album").next().expect("album");
reader.get_tag("artist").next().expect("artist");
reader.get_tag("albumartist").next().expect("albumartist");
reader.get_tag("artistsort").next().expect("artistsort");
reader.get_tag("albumartistsort").next().expect("albumartistsort");
reader.get_tag("musicbrainz_trackid").next().expect("musicbrainz_trackid");
reader.get_tag("musicbrainz_albumid").next().expect("musicbrainz_albumid");
reader.get_tag("musicbrainz_artistid").next().expect("musicbrainz_artistid");
reader.get_tag("musicbrainz_albumartistid").next().expect("musicbrainz_albumartistid");
bytes += reader.into_inner().seek(SeekFrom::Current(0)).unwrap();
}
let duration_ns = epoch.to(PreciseTime::now()).num_nanoseconds().unwrap();
file_times_us.push(duration_ns as f64 / 1000.0 / 10.0);
bytes_per_sec.push(bytes as f64 / (duration_ns as f64 / 1.0e9) / 10.0);
}
}
file_times_us.sort_by(|x, y| x.partial_cmp(y).unwrap());
bytes_per_sec.sort_by(|x, y| x.partial_cmp(y).unwrap());
let p10 = file_times_us[10 * file_times_us.len() / 100];
let p50 = file_times_us[50 * file_times_us.len() / 100];
let p90 = file_times_us[90 * file_times_us.len() / 100];
let mean = file_times_us.iter().sum::<f64>() / (file_times_us.len() as f64);
let p10_mibs = bytes_per_sec[10 * bytes_per_sec.len() / 100] / (1024.0 * 1024.0);
// Output numbers compatible with tools/compare_benches.r.
println!("{:>6.2} {:>6.2} {:>6.2} {:>6.2} {:>6.2}", p10, p50, p90, mean, p10_mibs);
}

28
examples/tags.rs Normal file
View File

@ -0,0 +1,28 @@
// Claxon -- A FLAC decoding library in Rust
// Copyright 2017 Ruud van Asseldonk
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// A copy of the License has been included in the root of the repository.
// This file contains a minimal example of using Claxon to read metadata tags
// (also called Vorbis comments) from a flac file. It behaves similarly to
// `metaflac --block-type=VORBIS_COMMENT --list <file>`.
extern crate claxon;
use std::env;
fn main() {
for fname in env::args().skip(1) {
let reader = claxon::FlacReader::open(&fname).expect("failed to open FLAC stream");
// We can use `tags()` to iterate over all tags. When looking for a
// specific tag, `get_tag()` may be useful instead.
for (name, value) in reader.tags() {
// Print comments in a format similar to what
// `metaflac --block-type=VORBIS_COMMENT --list` would print.
println!("{}: {}={}", fname, name, value);
}
}
}

View File

@ -7,13 +7,9 @@ A FLAC decoding library in Rust.
[![Crates.io version][crate-img]][crate]
[![Documentation][docs-img]][docs]
Many media players crash on corrupted input (not FLAC in particular). This is
bad, the decoder should signal an error on invalid input, it should not crash.
I suspect that this is partly due to the fact that most decoders are written in
C. I thought I'd try and write a decoder in a safe language: Rust. Video codecs
can be quite complex, and nowadays CPU decoding is not all that common any more.
Therefore, I decided to first try and write a decoder for an audio codec that I
love and use on a daily basis: FLAC.
Claxon is a FLAC decoder written in pure Rust. It has been fuzzed and verified
against the reference decoder for correctness. Its performance is similar to the
reference decoder.
Example
-------

View File

@ -11,6 +11,7 @@ use std::error;
use std::fmt;
use std::io;
use std::result;
use std::string;
/// An error that prevents succesful decoding of the FLAC stream.
#[derive(Debug)]
@ -21,10 +22,6 @@ pub enum Error {
/// An ill-formed FLAC stream was encountered.
FormatError(&'static str),
/// The audio stream has more bits per sample than the provided sample
/// buffer to decode into.
TooWide,
/// A currently unsupported feature of the FLAC format was encountered.
///
/// Claxon reads the FLAC format as it was with FLAC 1.3.1. Values in the
@ -36,14 +33,12 @@ pub enum Error {
impl PartialEq for Error {
fn eq(&self, other: &Error) -> bool {
use error::Error::{IoError, FormatError, TooWide, Unsupported};
use error::Error::{IoError, FormatError, Unsupported};
match (self, other) {
(&FormatError(r1), &FormatError(r2)) => r1 == r2,
(&TooWide, &TooWide) => true,
(&Unsupported(f1), &Unsupported(f2)) => f1 == f2,
(&IoError(_), _) => false,
(&FormatError(_), _) => false,
(&TooWide, _) => false,
(&Unsupported(_), _) => false,
}
}
@ -57,10 +52,6 @@ impl fmt::Display for Error {
try!(formatter.write_str("Ill-formed FLAC stream: "));
formatter.write_str(reason)
}
Error::TooWide => {
formatter.write_str("The audio stream has more bits per sample than the provided \
sample buffer to decode into.")
}
Error::Unsupported(feature) => {
try!(formatter.write_str("A currently unsupported feature of the FLAC format \
was encountered: "));
@ -75,7 +66,6 @@ impl error::Error for Error {
match *self {
Error::IoError(ref err) => err.description(),
Error::FormatError(reason) => reason,
Error::TooWide => "the sample has more bits than the destination type",
Error::Unsupported(_) => "unsupported feature",
}
}
@ -84,7 +74,6 @@ impl error::Error for Error {
match *self {
Error::IoError(ref err) => Some(err),
Error::FormatError(_) => None,
Error::TooWide => None,
Error::Unsupported(_) => None,
}
}
@ -96,6 +85,14 @@ impl From<io::Error> for Error {
}
}
impl From<string::FromUtf8Error> for Error {
fn from(_: string::FromUtf8Error) -> Error {
// Vendor strings and Vorbis cmments are the only place where UTF-8 is
// parsed into a String.
Error::FormatError("Vorbis comment or vendor string is not valid UTF-8")
}
}
/// A type for results generated by Claxon where the error type is hard-wired.
pub type Result<T> = result::Result<T, Error>;

View File

@ -106,6 +106,15 @@ pub trait ReadBytes {
let b3 = try!(self.read_u8()) as u32;
Ok(b0 << 24 | b1 << 16 | b2 << 8 | b3)
}
/// Reads four bytes and interprets them as a little-endian 32-bit unsigned integer.
fn read_le_u32(&mut self) -> io::Result<u32> {
let b0 = try!(self.read_u8()) as u32;
let b1 = try!(self.read_u8()) as u32;
let b2 = try!(self.read_u8()) as u32;
let b3 = try!(self.read_u8()) as u32;
Ok(b3 << 24 | b2 << 16 | b1 << 8 | b0)
}
}
impl<R: io::Read> ReadBytes for BufferedReader<R>
@ -356,6 +365,22 @@ fn verify_read_be_u32_cursor() {
assert!(cursor.read_be_u32().is_err());
}
#[test]
fn verify_read_le_u32_buffered_reader() {
let mut reader = BufferedReader::new(io::Cursor::new(vec![2u8, 0, 0, 0, 0xe9, 0xff, 0x01, 0x80, 0]));
assert_eq!(reader.read_le_u32().ok(), Some(2));
assert_eq!(reader.read_le_u32().ok(), Some(2_147_614_697));
assert!(reader.read_le_u32().is_err());
}
#[test]
fn verify_read_le_u32_cursor() {
let mut reader = io::Cursor::new(vec![2u8, 0, 0, 0, 0xe9, 0xff, 0x01, 0x80, 0]);
assert_eq!(reader.read_le_u32().ok(), Some(2));
assert_eq!(reader.read_le_u32().ok(), Some(2_147_614_697));
assert!(reader.read_le_u32().is_err());
}
/// Left shift that does not panic when shifting by the integer width.
#[inline(always)]
fn shift_left(x: u8, shift: u32) -> u8 {

View File

@ -53,6 +53,16 @@
//! # }
//! ```
//!
//! Retrieving the artist metadata:
//!
//! ```
//! # use claxon;
//! let reader = claxon::FlacReader::open("testsamples/pop.flac").unwrap();
//! for artist in reader.get_tag("ARTIST") {
//! println!("{}", artist);
//! }
//! ```
//!
//! For more examples, see the [examples](https://github.com/ruuda/claxon/tree/master/examples)
//! directory in the crate.
@ -65,7 +75,7 @@ use std::path;
use error::fmt_err;
use frame::FrameReader;
use input::{BufferedReader, ReadBytes};
use metadata::{MetadataBlock, MetadataBlockReader, StreamInfo};
use metadata::{MetadataBlock, MetadataBlockReader, StreamInfo, VorbisComment};
mod crc;
mod error;
@ -82,6 +92,7 @@ pub use frame::Block;
/// TODO: Add an example.
pub struct FlacReader<R: io::Read> {
streaminfo: StreamInfo,
vorbis_comment: Option<VorbisComment>,
#[allow(dead_code)] // TODO: Expose metadata nicely.
metadata_blocks: Vec<MetadataBlock>,
input: BufferedReader<R>,
@ -125,8 +136,13 @@ fn read_stream_header<R: ReadBytes>(input: &mut R) -> Result<()> {
impl<R: io::Read> FlacReader<R> {
/// Attempts to create a reader that reads the FLAC format.
///
/// The header and metadata blocks are read immediately. Audio frames will
/// be read on demand.
/// The header and metadata blocks are read immediately. Audio frames
/// will be read on demand.
///
/// Claxon rejects files that claim to contain excessively large metadata
/// blocks, to protect against denial of service attacks where a
/// small damaged or malicous file could cause gigabytes of memory
/// to be allocated. `Error::Unsupported` is returned in that case.
pub fn new(reader: R) -> Result<FlacReader<R>> {
let mut buf_reader = BufferedReader::new(reader);
@ -135,7 +151,7 @@ impl<R: io::Read> FlacReader<R> {
// Start a new scope, because the input reader must be available again
// for the frame reader next.
let (streaminfo, metadata_blocks) = {
let (streaminfo, vorbis_comment, metadata_blocks) = {
// Next are one or more metadata blocks. The flac specification
// dictates that the streaminfo block is the first block. The metadata
// block reader will yield at least one element, so the unwrap is safe.
@ -146,21 +162,36 @@ impl<R: io::Read> FlacReader<R> {
_ => return fmt_err("streaminfo block missing"),
};
let mut vorbis_comment = None;
// There might be more metadata blocks, read and store them.
let mut metadata_blocks = Vec::new();
for block_result in metadata_iter {
match block_result {
Err(error) => return Err(error),
Ok(block) => metadata_blocks.push(block),
match try!(block_result) {
MetadataBlock::VorbisComment(vc) => {
// The Vorbis comment block need not be present, but
// when it is, it must be unique.
if vorbis_comment.is_some() {
return fmt_err("encountered second Vorbis comment block")
} else {
vorbis_comment = Some(vc);
}
}
MetadataBlock::StreamInfo(..) => {
return fmt_err("encountered second streaminfo block")
}
// Other blocks are currently not handled.
block => metadata_blocks.push(block),
}
}
(streaminfo, metadata_blocks)
(streaminfo, vorbis_comment, metadata_blocks)
};
// The flac reader will contain the reader that will read frames.
let flac_reader = FlacReader {
streaminfo: streaminfo,
vorbis_comment: vorbis_comment,
metadata_blocks: metadata_blocks,
input: buf_reader,
};
@ -175,6 +206,51 @@ impl<R: io::Read> FlacReader<R> {
self.streaminfo
}
/// Returns the vendor string of the Vorbis comment block, if present.
///
/// This string usually contains the name and version of the program that
/// encoded the FLAC stream, such as `reference libFLAC 1.3.2 20170101`
/// or `Lavf57.25.100`.
pub fn vendor(&self) -> Option<&str> {
self.vorbis_comment.as_ref().map(|vc| &vc.vendor[..])
}
/// Returns name-value pairs of Vorbis comments, such as `("ARTIST", "Queen")`.
///
/// The name is supposed to be interpreted case-insensitively, and is
/// guaranteed to consist of ASCII characters. Claxon does not normalize
/// the casing of the name. Use `get_tag()` to do a case-insensitive lookup.
///
/// Names need not be unique. For instance, multiple `ARTIST` comments might
/// be present on a collaboration track.
///
/// See https://www.xiph.org/vorbis/doc/v-comment.html for more details.
pub fn tags<'a>(&'a self) -> metadata::Tags<'a> {
match self.vorbis_comment.as_ref() {
Some(vc) => metadata::Tags::new(&vc.comments[..]),
None => metadata::Tags::new(&[]),
}
}
/// Look up a Vorbis comment such as `ARTIST` in a case-insensitive way.
///
/// Returns an iterator, because tags may occur more than once. There could
/// be multiple `ARTIST` tags on a collaboration track, for instance.
///
/// Note that tag names are ASCII and never contain `'='`; trying to look up
/// a non-ASCII tag will return no results. Furthermore, the Vorbis comment
/// spec dictates that tag names should be handled case-insensitively, so
/// this method performs a case-insensitive lookup.
///
/// See also `tags()` for access to the raw tags.
/// See https://www.xiph.org/vorbis/doc/v-comment.html for more details.
pub fn get_tag<'a>(&'a self, tag_name: &'a str) -> metadata::GetTag<'a> {
match self.vorbis_comment.as_ref() {
Some(vc) => metadata::GetTag::new(&vc.comments[..], tag_name),
None => metadata::GetTag::new(&[], tag_name),
}
}
/// Returns an iterator that decodes a single frame on every iteration.
/// TODO: It is not an iterator.
///
@ -187,6 +263,14 @@ impl<R: io::Read> FlacReader<R> {
/// Returns an iterator over all samples.
///
/// The channel data is is interleaved. The iterator is streaming. That is,
/// if you call this method once, read a few samples, and call this method
/// again, the second iterator will not start again from the beginning of
/// the file. It will continue somewhere after where the first iterator
/// stopped, and it might skip some samples. (This is because FLAC divides
/// a stream into blocks, which have to be decoded entirely. If you drop the
/// iterator, you lose the unread samples in that block.)
///
/// This is a user-friendly interface that trades performance for ease of
/// use. If performance is an issue, consider using `blocks()` instead.
///
@ -197,14 +281,6 @@ impl<R: io::Read> FlacReader<R> {
/// block can never fail, but a match on every sample is required
/// nonetheless. For more control over when decoding happens, and less error
/// handling overhead, use `blocks()`.
///
/// The channel data is is interleaved. The iterator is streaming. That is,
/// if you call this method once, read a few samples, and call this method
/// again, the second iterator will not start again from the beginning of
/// the file. It will continue somewhere after where the first iterator
/// stopped, and it might skip some samples. (This is because FLAC divides
/// a stream into blocks, which have to be decoded entirely. If you drop the
/// iterator, you lose the unread samples in that block.)
pub fn samples<'r>(&'r mut self) -> FlacSamples<&'r mut BufferedReader<R>> {
FlacSamples {
frame_reader: frame::FrameReader::new(&mut self.input),

View File

@ -7,8 +7,10 @@
//! The `metadata` module deals with metadata at the beginning of a FLAC stream.
use error::{Result, fmt_err};
use error::{Error, Result, fmt_err};
use input::ReadBytes;
use std::str;
use std::slice;
#[derive(Clone, Copy)]
struct MetadataBlockHeader {
@ -21,18 +23,18 @@ struct MetadataBlockHeader {
#[derive(Clone, Copy, Debug)]
pub struct StreamInfo {
// TODO: "size" would better be called "duration" for clarity.
/// The minimum block size (in inter-channel samples) used in the stream.
///
/// To get the minimum block duration in seconds, divide this by the sample
/// rate.
// TODO: Rename to `min_block_duration` for clarity?
/// This number is independent of the number of channels. To get the minimum
/// block duration in seconds, divide this by the sample rate.
pub min_block_size: u16,
/// The maximum block size (in inter-channel samples) used in the stream.
///
/// To get the maximum block duration in seconds, divide this by the sample
/// rate.
// TODO: Rename to `max_block_duration` for clarity?
/// This number is independent of the number of channels. To get the
/// maximum block duratin in seconds, divide by the sample rate. To avoid
/// allocations during decoding, a buffer of this size times the number of
/// channels can be allocated up front and passed into
/// `FrameReader::read_next_or_eof()`.
pub max_block_size: u16,
/// The minimum frame size (in bytes) used in the stream.
pub min_frame_size: Option<u32>,
@ -70,6 +72,34 @@ pub struct SeekTable {
seekpoints: Vec<SeekPoint>,
}
/// Vorbis comments, also known as FLAC tags (e.g. artist, title, etc.).
pub struct VorbisComment {
/// The “vendor string”, chosen by the encoder vendor.
///
/// This string usually contains the name and version of the program that
/// encoded the FLAC stream, such as `reference libFLAC 1.3.2 20170101`
/// or `Lavf57.25.100`.
pub vendor: String,
/// Name-value pairs of Vorbis comments, such as `ARTIST=Queen`.
///
/// This struct stores a raw low-level representation of tags. Use
/// `FlacReader::tags()` for a friendlier iterator. The tuple consists of
/// the string in `"NAME=value"` format, and the index of the `'='` into
/// that string.
///
/// The name is supposed to be interpreted case-insensitively, and is
/// guaranteed to consist of ASCII characters. Claxon does not normalize
/// the casing of the name. Use `metadata::GetTag` to do a case-insensitive
/// lookup.
///
/// Names need not be unique. For instance, multiple `ARTIST` comments might
/// be present on a collaboration track.
///
/// See https://www.xiph.org/vorbis/doc/v-comment.html for more details.
pub comments: Vec<(String, usize)>,
}
/// A metadata about the flac stream.
pub enum MetadataBlock {
/// A stream info block.
@ -89,7 +119,7 @@ pub enum MetadataBlock {
/// A seek table block.
SeekTable(SeekTable),
/// A Vorbis comment block, also known as FLAC tags.
VorbisComment, // TODO
VorbisComment(VorbisComment),
/// A CUE sheet block.
CueSheet, // TODO
/// A picture block.
@ -98,6 +128,82 @@ pub enum MetadataBlock {
Reserved,
}
/// Iterates over Vorbis comments (FLAC tags) in a FLAC stream.
///
/// See `FlacReader::tags()` for more details.
pub struct Tags<'a> {
/// The underlying iterator.
iter: slice::Iter<'a, (String, usize)>,
}
impl<'a> Tags<'a> {
/// Returns a new `Tags` iterator.
#[inline]
pub fn new(comments: &'a [(String, usize)]) -> Tags<'a> {
Tags {
iter: comments.iter(),
}
}
}
impl<'a> Iterator for Tags<'a> {
type Item = (&'a str, &'a str);
#[inline]
fn next(&mut self) -> Option<(&'a str, &'a str)> {
return self.iter.next().map(|&(ref comment, sep_idx)| {
(&comment[..sep_idx], &comment[sep_idx+1..])
})
}
}
// TODO: `Tags` could implement `ExactSizeIterator`.
/// Iterates over Vorbis comments looking for a specific one; returns its values as `&str`.
///
/// See `FlacReader::get_tag()` for more details.
pub struct GetTag<'a> {
/// The Vorbis comments to search through.
vorbis_comments: &'a [(String, usize)],
/// The tag to look for.
needle: &'a str,
/// The index of the (name, value) pair that should be inspected next.
index: usize,
}
impl<'a> GetTag<'a> {
/// Returns a new `GetTag` iterator.
#[inline]
pub fn new(vorbis_comments: &'a [(String, usize)], needle: &'a str) -> GetTag<'a> {
GetTag {
vorbis_comments: vorbis_comments,
needle: needle,
index: 0,
}
}
}
impl<'a> Iterator for GetTag<'a> {
type Item = &'a str;
#[inline]
fn next(&mut self) -> Option<&'a str> {
use std::ascii::AsciiExt;
while self.index < self.vorbis_comments.len() {
let (ref comment, sep_idx) = self.vorbis_comments[self.index];
self.index += 1;
if comment[..sep_idx].eq_ignore_ascii_case(self.needle) {
return Some(&comment[sep_idx + 1..])
}
}
return None
}
}
#[inline]
fn read_metadata_block_header<R: ReadBytes>(input: &mut R) -> Result<MetadataBlockHeader> {
let byte = try!(input.read_u8());
@ -127,6 +233,7 @@ fn read_metadata_block_header<R: ReadBytes>(input: &mut R) -> Result<MetadataBlo
/// used to decode a single metadata block. For instance, the Ogg format embeds
/// metadata blocks including their header verbatim in packets. This function
/// can be used to decode that raw data.
#[inline]
pub fn read_metadata_block_with_header<R: ReadBytes>(input: &mut R)
-> Result<MetadataBlock> {
let header = try!(read_metadata_block_header(input));
@ -143,6 +250,7 @@ pub fn read_metadata_block_with_header<R: ReadBytes>(input: &mut R)
/// used to decode a single metadata block. For instance, the MP4 format sports
/// a “FLAC Specific Box” which contains the block type and the raw data. This
/// function can be used to decode that raw data.
#[inline]
pub fn read_metadata_block<R: ReadBytes>(input: &mut R,
block_type: u8,
length: u32)
@ -174,9 +282,8 @@ pub fn read_metadata_block<R: ReadBytes>(input: &mut R,
Ok(MetadataBlock::Padding { length: length })
}
4 => {
// TODO: implement Vorbis comment reading. For now, pretend it is padding.
try!(input.skip(length));
Ok(MetadataBlock::Padding { length: length })
let vorbis_comment = try!(read_vorbis_comment_block(input, length));
Ok(MetadataBlock::VorbisComment(vorbis_comment))
}
5 => {
// TODO: implement CUE sheet reading. For now, pretend it is padding.
@ -285,6 +392,106 @@ fn read_streaminfo_block<R: ReadBytes>(input: &mut R) -> Result<StreamInfo> {
Ok(stream_info)
}
fn read_vorbis_comment_block<R: ReadBytes>(input: &mut R, length: u32) -> Result<VorbisComment> {
if length < 8 {
// We expect at a minimum a 32-bit vendor string length, and a 32-bit
// comment count.
return fmt_err("Vorbis comment block is too short")
}
// Fail if the length of the Vorbis comment block is larger than 1 MiB. This
// block is full of length-prefixed strings for which we allocate memory up
// front. If there were no limit on these, a maliciously crafted file could
// cause OOM by claiming to contain large strings. But at least the strings
// cannot be longer than the size of the Vorbis comment block, and by
// limiting the size of that block, we can mitigate such DoS attacks.
//
// The typical size of a the Vorbis comment block is 1 KiB; on a corpus of
// real-world flac files, the 0.05 and 0.95 quantiles were 792 and 1257
// bytes respectively, with even the 0.99 quantile below 2 KiB. The only
// reason for having a large Vorbis comment block is when cover art is
// incorrectly embedded there, but the Vorbis comment block is not the right
// place for that anyway.
if length > 10 * 1024 * 1024 {
let msg = "Vorbis comment blocks larger than 10 MiB are not supported";
return Err(Error::Unsupported(msg))
}
// The Vorbis comment block starts with a length-prefixed "vendor string".
// It cannot be larger than the block length - 8, because there are the
// 32-bit vendor string length, and comment count.
let vendor_len = try!(input.read_le_u32());
if vendor_len > length - 8 { return fmt_err("vendor string too long") }
let mut vendor_bytes = Vec::with_capacity(vendor_len as usize);
// We can safely set the lenght of the vector here; the uninitialized memory
// is not exposed. If `read_into` succeeds, it will have overwritten all
// bytes. If not, an error is returned and the memory is never exposed.
unsafe { vendor_bytes.set_len(vendor_len as usize); }
try!(input.read_into(&mut vendor_bytes));
let vendor = try!(String::from_utf8(vendor_bytes));
// Next up is the number of comments. Because every comment is at least 4
// bytes to indicate its length, there cannot be more comments than the
// length of the block divided by 4. This is only an upper bound to ensure
// that we don't allocate a big vector, to protect against DoS attacks.
let comments_len = try!(input.read_le_u32());
if comments_len >= length / 4 {
return fmt_err("too many entries for Vorbis comment block")
}
let mut comments = Vec::with_capacity(comments_len as usize);
let mut bytes_left = length - 8 - vendor_len;
// For every comment, there is a length-prefixed string of the form
// "NAME=value".
while bytes_left >= 4 {
let comment_len = try!(input.read_le_u32());
bytes_left -= 4;
if comment_len > bytes_left {
return fmt_err("Vorbis comment too long for Vorbis comment block")
}
// For the same reason as above, setting the length is safe here.
let mut comment_bytes = Vec::with_capacity(comment_len as usize);
unsafe { comment_bytes.set_len(comment_len as usize); }
try!(input.read_into(&mut comment_bytes));
bytes_left -= comment_len;
if let Some(sep_index) = comment_bytes.iter().position(|&x| x == b'=') {
{
let name_bytes = &comment_bytes[..sep_index];
// According to the Vorbis spec, the field name may consist of ascii
// bytes 0x20 through 0x7d, 0x3d (`=`) excluded. Verifying this has
// the advantage that if the check passes, the result is valid
// UTF-8, so the conversion to string will not fail.
if name_bytes.iter().any(|&x| x < 0x20 || x > 0x7d) {
return fmt_err("Vorbis comment field name contains invalid byte")
}
}
let comment = try!(String::from_utf8(comment_bytes));
comments.push((comment, sep_index));
} else {
return fmt_err("Vorbis comment does not contain '='")
}
}
if comments.len() != comments_len as usize {
return fmt_err("Vorbis comment block contains wrong number of entries")
}
let vorbis_comment = VorbisComment {
vendor: vendor,
comments: comments,
};
Ok(vorbis_comment)
}
fn read_padding_block<R: ReadBytes>(input: &mut R, length: u32) -> Result<()> {
// The specification dictates that all bits of the padding block must be 0.
// However, the reference implementation does not issue an error when this
@ -296,7 +503,15 @@ fn read_padding_block<R: ReadBytes>(input: &mut R, length: u32) -> Result<()> {
fn read_application_block<R: ReadBytes>(input: &mut R, length: u32) -> Result<(u32, Vec<u8>)> {
if length < 4 {
return fmt_err("application block length must be at least 4 bytes.")
return fmt_err("application block length must be at least 4 bytes")
}
// Reject large application blocks to avoid memory-based denial-
// of-service attacks. See also the more elaborate motivation in
// `read_vorbis_comment_block()`.
if length > 10 * 1024 * 1024 {
let msg = "application blocks larger than 10 MiB are not supported";
return Err(Error::Unsupported(msg))
}
let id = try!(input.read_be_u32());
@ -336,6 +551,7 @@ impl<R: ReadBytes> MetadataBlockReader<R> {
}
}
#[inline]
fn read_next(&mut self) -> MetadataBlockResult {
let header = try!(read_metadata_block_header(&mut self.input));
let block = try!(read_metadata_block(&mut self.input, header.block_type, header.length));
@ -347,6 +563,7 @@ impl<R: ReadBytes> MetadataBlockReader<R> {
impl<R: ReadBytes> Iterator for MetadataBlockReader<R> {
type Item = MetadataBlockResult;
#[inline]
fn next(&mut self) -> Option<MetadataBlockResult> {
if self.done {
None
@ -363,6 +580,7 @@ impl<R: ReadBytes> Iterator for MetadataBlockReader<R> {
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
// When done, there will be no more blocks,
// when not done, there will be at least one more.

View File

@ -13,7 +13,7 @@ use std::fs;
use std::io;
use std::path::Path;
fn run_metaflac<P: AsRef<Path>>(fname: P) -> String {
fn run_metaflac_streaminfo<P: AsRef<Path>>(fname: P) -> String {
use std::process::Command;
// Run metaflac on the specified file and print all streaminfo data.
@ -33,6 +33,19 @@ fn run_metaflac<P: AsRef<Path>>(fname: P) -> String {
String::from_utf8(output.stdout).expect("metaflac wrote invalid UTF-8")
}
fn run_metaflac_vorbis_comment<P: AsRef<Path>>(fname: P) -> String {
use std::process::Command;
// Run metaflac on the specified file and print all Vorbis comment data.
let output = Command::new("metaflac")
.arg("--block-type=VORBIS_COMMENT")
.arg("--list")
.arg(fname.as_ref().to_str().expect("unsupported filename"))
.output()
.expect("failed to run metaflac");
String::from_utf8(output.stdout).expect("metaflac wrote invalid UTF-8")
}
fn decode_file<P: AsRef<Path>>(fname: P) -> Vec<u8> {
use std::process::Command;
@ -55,11 +68,8 @@ fn print_hex(seq: &[u8]) -> String {
}
fn read_streaminfo<P: AsRef<Path>>(fname: P) -> String {
// Use a buffered reader, this speeds up the test by 120%.
let file = fs::File::open(fname).unwrap();
let reader = io::BufReader::new(file);
let stream = claxon::FlacReader::new(reader).unwrap();
let streaminfo = stream.streaminfo();
let reader = claxon::FlacReader::open(fname).unwrap();
let streaminfo = reader.streaminfo();
// Format the streaminfo in the same way that metaflac prints it.
format!("{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n",
@ -74,8 +84,8 @@ fn read_streaminfo<P: AsRef<Path>>(fname: P) -> String {
print_hex(&streaminfo.md5sum)) // TODO implement LowerHex for &[u8] and submit a PR.
}
fn compare_metaflac<P: AsRef<Path>>(fname: P) {
let metaflac = run_metaflac(&fname);
fn compare_metaflac_streaminfo<P: AsRef<Path>>(fname: P) {
let metaflac = run_metaflac_streaminfo(&fname);
let streaminfo = read_streaminfo(&fname);
let mut mf_lines = metaflac.lines();
let mut si_lines = streaminfo.lines();
@ -88,6 +98,68 @@ fn compare_metaflac<P: AsRef<Path>>(fname: P) {
}
}
fn compare_metaflac_vorbis_comment<P: AsRef<Path>>(fname: P) {
let metaflac = run_metaflac_vorbis_comment(&fname);
let reader = claxon::FlacReader::open(fname).unwrap();
let mut mf_lines = metaflac.lines();
// Search for the vendor string in the metaflac output.
while let Some(line) = mf_lines.next() {
let prefix = " vendor string: ";
if line.starts_with(prefix) {
let mf_vendor_string = &line[prefix.len()..];
// If the vendor string starts with a null byte, metaflac will not
// print it -- my guess is because metaflac is written in C and uses
// C-style string manipulation. In that case we skip it.
match reader.vendor() {
Some(x) if x.starts_with('\0') => {
assert_eq!("", mf_vendor_string);
break
}
_ => {}
}
assert_eq!(reader.vendor(), Some(mf_vendor_string));
break
}
}
let mut tags = reader.tags();
// Loop through all of the comments.
while let Some(line) = mf_lines.next() {
let prefix = " comment[";
if line.starts_with(prefix) {
let mf_line = &line[prefix.len()..];
let prefix_sep_index = mf_line.find(' ').unwrap();
let mf_pair = &mf_line[prefix_sep_index + 1..];
let sep_index = mf_pair.find('=').unwrap();
let mf_name = &mf_pair[..sep_index];
let mf_value = &mf_pair[sep_index + 1..];
let (name, value_lines) = tags.next().unwrap();
let mut value_lines_iter = value_lines.lines();
let value = value_lines_iter.next().unwrap_or("");
assert_eq!(name, mf_name);
assert_eq!(value, mf_value);
// If there are newlines, then we also need to read those as
// separate lines from the metaflac untput. This does assume that
// the newline count that Claxon read is correct, and because of the
// behavior of the `.lines()` iterator this does not accurately
// verify carriage returns, but we could not anyway, because
// metaflac does not escape them.
while let Some(actual_line) = value_lines_iter.next() {
assert_eq!(actual_line, mf_lines.next().unwrap());
}
}
}
}
fn compare_decoded_stream<P: AsRef<Path>>(fname: P) {
let wav = decode_file(&fname);
let cursor = io::Cursor::new(wav);
@ -96,17 +168,15 @@ fn compare_decoded_stream<P: AsRef<Path>>(fname: P) {
// we read with Hound) to how Claxon decodes it, sample by sample.
let mut ref_wav_reader = hound::WavReader::new(cursor).unwrap();
let try_file = fs::File::open(fname).unwrap();
let try_buf_reader = io::BufReader::new(try_file);
let mut try_flac_reader = claxon::FlacReader::new(try_buf_reader).unwrap();
let mut flac_reader = claxon::FlacReader::open(fname).unwrap();
// The streaminfo test will ensure that things like bit depth and
// sample rate match, only the actual samples are compared here.
let mut ref_samples = ref_wav_reader.samples::<i32>();
let samples = try_flac_reader.streaminfo().samples.unwrap();
let n_channels = try_flac_reader.streaminfo().channels;
let mut blocks = try_flac_reader.blocks();
let samples = flac_reader.streaminfo().samples.unwrap();
let n_channels = flac_reader.streaminfo().channels;
let mut blocks = flac_reader.blocks();
let mut sample = 0u64;
let mut b = 0u64;
let mut buffer = Vec::new();
@ -146,42 +216,110 @@ fn compare_decoded_stream<P: AsRef<Path>>(fname: P) {
#[test]
fn verify_streaminfo_p0() {
compare_metaflac("testsamples/p0.flac");
compare_metaflac_streaminfo("testsamples/p0.flac");
}
#[test]
fn verify_streaminfo_p1() {
compare_metaflac("testsamples/p1.flac");
compare_metaflac_streaminfo("testsamples/p1.flac");
}
#[test]
fn verify_streaminfo_p2() {
compare_metaflac("testsamples/p2.flac");
compare_metaflac_streaminfo("testsamples/p2.flac");
}
#[test]
fn verify_streaminfo_p3() {
compare_metaflac("testsamples/p3.flac");
compare_metaflac_streaminfo("testsamples/p3.flac");
}
#[test]
fn verify_streaminfo_p4() {
compare_metaflac("testsamples/p4.flac");
compare_metaflac_streaminfo("testsamples/p4.flac");
}
#[test]
fn verify_streaminfo_pop() {
compare_metaflac("testsamples/pop.flac");
compare_metaflac_streaminfo("testsamples/pop.flac");
}
#[test]
fn verify_streaminfo_short() {
compare_metaflac("testsamples/short.flac");
compare_metaflac_streaminfo("testsamples/short.flac");
}
#[test]
fn verify_streaminfo_wasted_bits() {
compare_metaflac("testsamples/wasted_bits.flac");
compare_metaflac_streaminfo("testsamples/wasted_bits.flac");
}
#[test]
fn verify_vorbis_comment_p0() {
compare_metaflac_vorbis_comment("testsamples/p0.flac");
}
#[test]
fn verify_vorbis_comment_p1() {
compare_metaflac_vorbis_comment("testsamples/p1.flac");
}
#[test]
fn verify_vorbis_comment_p2() {
compare_metaflac_vorbis_comment("testsamples/p2.flac");
}
#[test]
fn verify_vorbis_comment_p3() {
compare_metaflac_vorbis_comment("testsamples/p3.flac");
}
#[test]
fn verify_vorbis_comment_p4() {
compare_metaflac_vorbis_comment("testsamples/p4.flac");
}
#[test]
fn test_flac_reader_get_tag_is_case_insensitive() {
let flac_reader = claxon::FlacReader::open("testsamples/p4.flac").unwrap();
// This file contains the following metadata:
// METADATA block #2
// type: 4 (VORBIS_COMMENT)
// is last: false
// length: 241
// vendor string: reference libFLAC 1.1.0 20030126
// comments: 5
// comment[0]: REPLAYGAIN_TRACK_PEAK=0.69879150
// comment[1]: REPLAYGAIN_TRACK_GAIN=-4.00 dB
// comment[2]: REPLAYGAIN_ALBUM_PEAK=0.69879150
// comment[3]: REPLAYGAIN_ALBUM_GAIN=-3.68 dB
// comment[4]: Comment=Encoded by FLAC v1.1.1a with FLAC Frontend v1.7.1
let mut replaygain_upper = flac_reader.get_tag("REPLAYGAIN_TRACK_GAIN");
assert_eq!(replaygain_upper.next(), Some("-4.00 dB"));
assert_eq!(replaygain_upper.next(), None);
// The lookup should be case-insensitive.
let mut replaygain_lower = flac_reader.get_tag("replaygain_track_gain");
assert_eq!(replaygain_lower.next(), Some("-4.00 dB"));
assert_eq!(replaygain_lower.next(), None);
// Non-existing tags should not be found.
let mut foobar = flac_reader.get_tag("foobar");
assert_eq!(foobar.next(), None);
}
#[test]
fn test_flac_reader_get_tag_returns_all_matches() {
let flac_reader = claxon::FlacReader::open("testsamples/repeated_vorbis_comment.flac").unwrap();
// This file contains two FOO tags, `FOO=bar` and `FOO=baz`.
let mut foo = flac_reader.get_tag("FOO");
assert_eq!(foo.next(), Some("bar"));
assert_eq!(foo.next(), Some("baz"));
assert_eq!(foo.next(), None);
}
#[test]
@ -226,6 +364,31 @@ fn verify_decoded_stream_wasted_bits() {
compare_decoded_stream("testsamples/wasted_bits.flac");
}
#[test]
fn verify_limits_on_vendor_string() {
// This file claims to have a vendor string which would not fit in the
// block.
let file = fs::File::open("testsamples/large_vendor_string.flac").unwrap();
match claxon::FlacReader::new(file) {
Ok(..) => panic!("This file should fail to load"),
Err(err) => {
assert_eq!(err, claxon::Error::FormatError("vendor string too long"))
}
}
}
#[test]
fn verify_limits_on_vorbis_comment_block() {
// This file claims to have a very large Vorbis comment block, which could
// make the decoder go OOM.
let file = fs::File::open("testsamples/large_vorbis_comment_block.flac").unwrap();
match claxon::FlacReader::new(file) {
Ok(..) => panic!("This file should fail to load"),
Err(claxon::Error::Unsupported(..)) => { }
Err(..) => panic!("Expected 'Unsupported' error."),
}
}
#[test]
fn verify_extra_samples() {
use std::ffi::OsStr;
@ -246,7 +409,8 @@ fn verify_extra_samples() {
if path.is_file() && path.extension() == Some(OsStr::new("flac")) {
print!(" comparing {} ...", path.to_str()
.expect("unsupported filename"));
compare_metaflac(&path);
compare_metaflac_streaminfo(&path);
compare_metaflac_vorbis_comment(&path);
compare_decoded_stream(&path);
println!(" ok");
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

42
tools/benchmark_metadata.sh Executable file
View File

@ -0,0 +1,42 @@
#!/bin/sh
# This script runs the bench_metadata program, which reads at mst 1024 flac
# files in the testsamples/extra directory, and collects the results. It expects
# a basename for the output files. It is useful to use a directory plus a short
# identifier, e.g. "measurements/baseline". Then after making a change, run this
# script with "measurements/after" as basename. Results can be compared with the
# compare_benches.r script.
# Exit if any command fails.
set -e
if [ -z "$1" ]; then
echo "You must provide a basename for the file to write the results to."
exit 1
fi
# Put the Git commit in the basename so I can cross-reference later.
bname="$1_$(git rev-parse @ | cut -c 1-7)"
# Disable automatic CPU frequency scaling to get lower variance measurements.
if ! grep -q "performance" /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor; then
echo "Locking CPU clock speed to its maximum. This requires root access."
echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor > /dev/null
fi
# Optimize for the current CPU specifically, and include debugging symbols.
export RUSTFLAGS="-C target-cpu=native -g"
# Compile the benchmarking program.
cargo build --release --example bench_decode
for i in {1..10}; do
echo "[$i/10] Benchmarking ..."
# Run the benchmarks with "taskset" to lock them to the same CPU core for the
# entire program, to lower variance in the measurements.
taskset -c 1 target/release/examples/bench_metadata > "${bname}_${i}.dat"
done
# Merge the output files.
rm -f "${bname}_all.dat"
cat ${bname}_*.dat > "${bname}_all.dat"