unnieversal/crates/pvoc-rs/src/lib.rs

442 lines
14 KiB
Rust
Raw Normal View History

2021-07-24 16:40:59 +00:00
extern crate apodize;
extern crate rustfft;
#[cfg(test)]
#[macro_use]
extern crate approx;
#[cfg(test)]
extern crate rand;
use rustfft::num_complex::Complex;
use rustfft::num_traits::{Float, FromPrimitive, ToPrimitive};
use std::collections::VecDeque;
use std::f64::consts::PI;
use std::sync::Arc;
#[allow(non_camel_case_types)]
type c64 = Complex<f64>;
pub trait Bin: Clone + Copy {
fn empty() -> Self;
fn new(freq: f64, phase: f64, amp: f64) -> Self;
fn amp(&self) -> f64;
fn phase(&self, pvoc: &PhaseVocoder<Self>) -> f64;
}
/// Represents a component of the spectrum, composed of a phase and amplitude.
2021-07-24 16:40:59 +00:00
#[derive(Copy, Clone)]
pub struct PhaseBin {
pub phase: f64,
2021-07-24 16:40:59 +00:00
pub amp: f64,
}
impl Bin for PhaseBin {
fn empty() -> Self {
Self {
phase: 0.0,
amp: 0.0,
2021-07-24 16:40:59 +00:00
}
}
fn new(_: f64, phase: f64, amp: f64) -> Self {
Self { phase, amp }
}
fn amp(&self) -> f64 {
self.amp
}
fn phase(&self, _: &PhaseVocoder<Self>) -> f64 {
self.phase
}
}
/// Represents a component of the spectrum, composed of a frequency and amplitude.
#[derive(Copy, Clone)]
pub struct FreqBin {
pub freq: f64,
pub amp: f64,
}
impl Bin for FreqBin {
fn empty() -> Self {
Self {
2021-07-24 16:40:59 +00:00
freq: 0.0,
amp: 0.0,
}
}
fn new(freq: f64, _: f64, amp: f64) -> Self {
Self { freq, amp }
}
fn amp(&self) -> f64 {
self.amp
}
fn phase(&self, pvoc: &PhaseVocoder<Self>) -> f64 {
pvoc.frequency_to_phase(self.freq)
}
2021-07-24 16:40:59 +00:00
}
/// A phase vocoder.
///
/// Roughly translated from http://blogs.zynaptiq.com/bernsee/pitch-shifting-using-the-ft/
pub struct PhaseVocoder<B: Bin = FreqBin> {
2021-07-24 16:40:59 +00:00
channels: usize,
sample_rate: f64,
frame_size: usize,
time_res: usize,
samples_waiting: usize,
in_buf: Vec<VecDeque<f64>>,
out_buf: Vec<VecDeque<f64>>,
last_phase: Vec<Vec<f64>>,
sum_phase: Vec<Vec<f64>>,
output_accum: Vec<VecDeque<f64>>,
forward_fft: Arc<dyn rustfft::Fft<f64>>,
backward_fft: Arc<dyn rustfft::Fft<f64>>,
window: Vec<f64>,
fft_in: Vec<c64>,
fft_out: Vec<c64>,
fft_scratch: Vec<c64>,
analysis_out: Vec<Vec<B>>,
synthesis_in: Vec<Vec<B>>,
2021-07-24 16:40:59 +00:00
}
impl<B: Bin> PhaseVocoder<B> {
2021-07-24 16:40:59 +00:00
/// Constructs a new phase vocoder.
///
/// `channels` is the number of channels of audio.
///
/// `sample_rate` is the sample rate.
///
/// `frame_size` is the fourier transform size. It must be `> 1`.
/// For optimal computation speed, this should be a power of 2.
/// Will be rounded to a multiple of `time_res`.
///
/// `time_res` is the number of frames to overlap.
///
/// # Panics
/// Panics if `frame_size` is `<= 1` after rounding.
pub fn new(channels: usize, sample_rate: f64, frame_size: usize, time_res: usize) -> Self {
2021-07-24 16:40:59 +00:00
let mut frame_size = frame_size / time_res * time_res;
if frame_size == 0 {
frame_size = time_res;
}
// If `frame_size == 1`, computing the window would panic.
assert!(frame_size > 1);
let mut fft_planner = rustfft::FftPlanner::new();
let mut pv = PhaseVocoder {
channels,
sample_rate,
frame_size,
time_res,
samples_waiting: 0,
in_buf: vec![VecDeque::new(); channels],
out_buf: vec![VecDeque::new(); channels],
last_phase: vec![vec![0.0; frame_size]; channels],
sum_phase: vec![vec![0.0; frame_size]; channels],
output_accum: vec![VecDeque::new(); channels],
forward_fft: fft_planner.plan_fft(frame_size, rustfft::FftDirection::Forward),
backward_fft: fft_planner.plan_fft(frame_size, rustfft::FftDirection::Inverse),
window: apodize::hanning_iter(frame_size)
.map(|x| x.sqrt())
.collect(),
fft_in: vec![c64::new(0.0, 0.0); frame_size],
fft_out: vec![c64::new(0.0, 0.0); frame_size],
fft_scratch: vec![],
analysis_out: vec![vec![B::empty(); frame_size]; channels],
synthesis_in: vec![vec![B::empty(); frame_size]; channels],
2021-07-24 16:40:59 +00:00
};
pv.fft_scratch = vec![
c64::new(0.0, 0.0);
pv.forward_fft
.get_outofplace_scratch_len()
.max(pv.backward_fft.get_outofplace_scratch_len())
];
pv
}
pub fn num_channels(&self) -> usize {
self.channels
}
pub fn num_bins(&self) -> usize {
self.frame_size
}
pub fn time_res(&self) -> usize {
self.time_res
}
pub fn sample_rate(&self) -> f64 {
self.sample_rate
}
pub fn set_sample_rate(&mut self, sample_rate: f64) {
self.sample_rate = sample_rate;
}
/// Reads samples from `input`, processes the samples, then resynthesizes as many samples as
/// possible into `output`. Returns the number of frames written to `output`.
///
/// `processor` is a function to manipulate the spectrum before it is resynthesized. Its
/// arguments are respectively `num_channels`, `num_bins`, `analysis_output` and
/// `synthesis_input`.
///
/// Samples are expected to be normalized to the range [-1, 1].
///
/// This method can be called multiple times on the same `PhaseVocoder`.
/// If this happens, in the analysis step, it will be assumed that the `input` is a continuation
/// of the `input` that was passed during the previous call.
///
/// It is possible that not enough data is available yet to fill `output` completely.
/// In that case, only the first frames of `output` will be written to.
/// Conversely, if there is more data available than `output` can hold, the remaining
/// output is kept in the `PhaseVocoder` and can be retrieved with another call to
/// `process` when more input data is available.
///
/// # Remark
/// The `synthesis_input` passed to the `processor_function` is currently initialised to empty
/// bins. This behaviour may change in a future release, so make sure that your implementation
/// does not rely on it.
pub fn process<S, F>(
&mut self,
input: &[&[S]],
output: &mut [&mut [S]],
mut processor: F,
) -> usize
where
S: Float + ToPrimitive + FromPrimitive,
F: FnMut(usize, usize, &[Vec<B>], &mut [Vec<B>]),
2021-07-24 16:40:59 +00:00
{
assert_eq!(
input.len(),
self.channels,
"input length does not equal channel count"
);
assert_eq!(
output.len(),
self.channels,
"output length does not equal channel count"
);
2021-07-24 16:40:59 +00:00
// push samples to input queue
for chan in 0..input.len() {
for sample in input[chan].iter() {
self.in_buf[chan].push_back(sample.to_f64().unwrap());
self.samples_waiting += 1;
}
}
while self.samples_waiting >= 2 * self.frame_size * self.channels {
let frame_sizef = self.frame_size as f64;
let time_resf = self.time_res as f64;
let step_size = frame_sizef / time_resf;
for _ in 0..self.time_res {
// Initialise the synthesis bins to empty bins.
// This may be removed in a future release.
for synthesis_channel in self.synthesis_in.iter_mut() {
for bin in synthesis_channel.iter_mut() {
*bin = B::empty();
2021-07-24 16:40:59 +00:00
}
}
// ANALYSIS
for chan in 0..self.channels {
// read in
for i in 0..self.frame_size {
self.fft_in[i] = c64::new(self.in_buf[chan][i] * self.window[i], 0.0);
}
self.forward_fft.process_outofplace_with_scratch(
&mut self.fft_in,
&mut self.fft_out,
&mut self.fft_scratch,
);
for i in 0..self.frame_size {
let x = self.fft_out[i];
let (amp, phase) = x.to_polar();
let bin_phase = phase - self.last_phase[chan][i];
let freq = self.phase_to_frequency(i, bin_phase);
2021-07-24 16:40:59 +00:00
self.last_phase[chan][i] = phase;
// yeah passing both and letting the constructor decide is ugly
// but it's fast to do so
self.analysis_out[chan][i] = B::new(freq, bin_phase, amp * 2.0);
2021-07-24 16:40:59 +00:00
}
}
// PROCESSING
processor(
self.channels,
self.frame_size,
&self.analysis_out,
&mut self.synthesis_in,
);
// SYNTHESIS
for chan in 0..self.channels {
for i in 0..self.frame_size {
let amp = self.synthesis_in[chan][i].amp();
// passing self as a param is slightly ugly but hey
// it works
let phase = self.synthesis_in[chan][i].phase(self);
2021-07-24 16:40:59 +00:00
self.sum_phase[chan][i] += phase;
let phase = self.sum_phase[chan][i];
self.fft_in[i] = c64::from_polar(amp, phase);
}
self.backward_fft.process_outofplace_with_scratch(
&mut self.fft_in,
&mut self.fft_out,
&mut self.fft_scratch,
);
// accumulate
for i in 0..self.frame_size {
if i == self.output_accum[chan].len() {
self.output_accum[chan].push_back(0.0);
}
self.output_accum[chan][i] +=
self.window[i] * self.fft_out[i].re / (frame_sizef * time_resf);
}
// write out
for _ in 0..step_size as usize {
self.out_buf[chan].push_back(self.output_accum[chan].pop_front().unwrap());
self.in_buf[chan].pop_front();
}
}
}
self.samples_waiting -= self.frame_size * self.channels;
}
// pop samples from output queue
let mut n_written = 0;
for chan in 0..self.channels {
for samp in 0..output[chan].len() {
output[chan][samp] = match self.out_buf[chan].pop_front() {
Some(x) => FromPrimitive::from_f64(x).unwrap(),
None => break,
};
n_written += 1;
}
}
n_written / self.channels
}
pub fn phase_to_frequency(&self, bin: usize, phase: f64) -> f64 {
let frame_sizef = self.frame_size as f64;
let freq_per_bin = self.sample_rate / frame_sizef;
let time_resf = self.time_res as f64;
let step_size = frame_sizef / time_resf;
let expect = 2.0 * PI * step_size / frame_sizef;
let mut tmp = phase;
tmp -= (bin as f64) * expect;
let mut qpd = (tmp / PI) as i32;
if qpd >= 0 {
qpd += qpd & 1;
} else {
qpd -= qpd & 1;
}
tmp -= PI * (qpd as f64);
tmp = time_resf * tmp / (2.0 * PI);
tmp = (bin as f64) * freq_per_bin + tmp * freq_per_bin;
tmp
}
pub fn frequency_to_phase(&self, freq: f64) -> f64 {
let step_size = self.frame_size as f64 / self.time_res as f64;
2.0 * PI * freq / self.sample_rate * step_size
}
}
#[cfg(test)]
fn identity(channels: usize, bins: usize, input: &[Vec<FreqBin>], output: &mut [Vec<FreqBin>]) {
2021-07-24 16:40:59 +00:00
for i in 0..channels {
for j in 0..bins {
output[i][j] = input[i][j];
}
}
}
#[cfg(test)]
fn test_data_is_reconstructed(mut pvoc: PhaseVocoder, input_samples: &[f32]) {
let mut output_samples = vec![0.0; input_samples.len()];
let frame_size = pvoc.num_bins();
// Pre-padding, not collecting any output.
pvoc.process(&[&vec![0.0; frame_size]], &mut [&mut Vec::new()], identity);
// The data-itself, collecting some output that we will discard
let mut scratch = vec![0.0; frame_size];
pvoc.process(&[&input_samples], &mut [&mut scratch], identity);
// Post-padding and collecting all output
pvoc.process(
&[&vec![0.0; frame_size]],
&mut [&mut output_samples],
identity,
);
assert_ulps_eq!(input_samples, output_samples.as_slice(), epsilon = 1e-2);
}
#[test]
fn identity_transform_reconstructs_original_data_hat_function() {
let window_len = 256;
let pvoc = PhaseVocoder::new(1, 44100.0, window_len, window_len / 4);
let input_len = 1024;
let mut input_samples = vec![0.0; input_len];
for i in 0..input_len {
if i < input_len / 2 {
input_samples[i] = (i as f32) / ((input_len / 2) as f32)
} else {
input_samples[i] = 2.0 - (i as f32) / ((input_len / 2) as f32);
}
}
test_data_is_reconstructed(pvoc, input_samples.as_slice());
}
#[test]
fn identity_transform_reconstructs_original_data_random_data() {
use rand::rngs::SmallRng;
use rand::{Rng, SeedableRng};
let mut rng = SmallRng::seed_from_u64(1);
let mut input_samples = [0.0; 16384];
rng.fill(&mut input_samples[..]);
let pvoc = PhaseVocoder::new(1, 44100.0, 256, 256 / 4);
test_data_is_reconstructed(pvoc, &input_samples);
}
#[test]
fn process_works_with_sample_res_equal_to_window() {
let mut pvoc = PhaseVocoder::new(1, 44100.0, 256, 256);
let input_len = 1024;
let input_samples = vec![0.0; input_len];
let mut output_samples = vec![0.0; input_len];
pvoc.process(&[&input_samples], &mut [&mut output_samples], identity);
}
#[test]
fn process_works_when_reading_sample_by_sample() {
let mut pvoc = PhaseVocoder::new(1, 44100.0, 8, 2);
let input_len = 32;
let input_samples = vec![0.0; input_len];
let mut output_samples = vec![0.0; input_len];
for i in 0..input_samples.len() {
pvoc.process(
&[&input_samples[dbg!(i)..i + 1]],
&mut [&mut output_samples],
identity,
);
}
}