2021-07-24 16:40:59 +00:00
|
|
|
extern crate apodize;
|
|
|
|
extern crate rustfft;
|
|
|
|
#[cfg(test)]
|
|
|
|
#[macro_use]
|
|
|
|
extern crate approx;
|
|
|
|
#[cfg(test)]
|
|
|
|
extern crate rand;
|
|
|
|
|
|
|
|
use rustfft::num_complex::Complex;
|
|
|
|
use rustfft::num_traits::{Float, FromPrimitive, ToPrimitive};
|
|
|
|
use std::collections::VecDeque;
|
|
|
|
use std::f64::consts::PI;
|
|
|
|
use std::sync::Arc;
|
|
|
|
|
|
|
|
#[allow(non_camel_case_types)]
|
|
|
|
type c64 = Complex<f64>;
|
|
|
|
|
2021-09-16 15:42:18 +00:00
|
|
|
pub trait Bin: Clone + Copy {
|
|
|
|
fn empty() -> Self;
|
|
|
|
fn new(freq: f64, phase: f64, amp: f64) -> Self;
|
|
|
|
fn amp(&self) -> f64;
|
|
|
|
fn phase(&self, pvoc: &PhaseVocoder<Self>) -> f64;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Represents a component of the spectrum, composed of a phase and amplitude.
|
2021-07-24 16:40:59 +00:00
|
|
|
#[derive(Copy, Clone)]
|
2021-09-16 15:42:18 +00:00
|
|
|
pub struct PhaseBin {
|
|
|
|
pub phase: f64,
|
2021-07-24 16:40:59 +00:00
|
|
|
pub amp: f64,
|
|
|
|
}
|
2021-09-16 15:42:18 +00:00
|
|
|
impl Bin for PhaseBin {
|
|
|
|
fn empty() -> Self {
|
|
|
|
Self {
|
|
|
|
phase: 0.0,
|
|
|
|
amp: 0.0,
|
2021-07-24 16:40:59 +00:00
|
|
|
}
|
|
|
|
}
|
2021-09-16 15:42:18 +00:00
|
|
|
fn new(_: f64, phase: f64, amp: f64) -> Self {
|
|
|
|
Self { phase, amp }
|
|
|
|
}
|
|
|
|
|
|
|
|
fn amp(&self) -> f64 {
|
|
|
|
self.amp
|
|
|
|
}
|
|
|
|
fn phase(&self, _: &PhaseVocoder<Self>) -> f64 {
|
|
|
|
self.phase
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/// Represents a component of the spectrum, composed of a frequency and amplitude.
|
|
|
|
#[derive(Copy, Clone)]
|
|
|
|
pub struct FreqBin {
|
|
|
|
pub freq: f64,
|
|
|
|
pub amp: f64,
|
|
|
|
}
|
|
|
|
impl Bin for FreqBin {
|
|
|
|
fn empty() -> Self {
|
|
|
|
Self {
|
2021-07-24 16:40:59 +00:00
|
|
|
freq: 0.0,
|
|
|
|
amp: 0.0,
|
|
|
|
}
|
|
|
|
}
|
2021-09-16 15:42:18 +00:00
|
|
|
fn new(freq: f64, _: f64, amp: f64) -> Self {
|
|
|
|
Self { freq, amp }
|
|
|
|
}
|
|
|
|
|
|
|
|
fn amp(&self) -> f64 {
|
|
|
|
self.amp
|
|
|
|
}
|
|
|
|
|
|
|
|
fn phase(&self, pvoc: &PhaseVocoder<Self>) -> f64 {
|
|
|
|
pvoc.frequency_to_phase(self.freq)
|
|
|
|
}
|
2021-07-24 16:40:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// A phase vocoder.
|
|
|
|
///
|
|
|
|
/// Roughly translated from http://blogs.zynaptiq.com/bernsee/pitch-shifting-using-the-ft/
|
2021-09-16 15:42:18 +00:00
|
|
|
pub struct PhaseVocoder<B: Bin = FreqBin> {
|
2021-07-24 16:40:59 +00:00
|
|
|
channels: usize,
|
|
|
|
sample_rate: f64,
|
|
|
|
frame_size: usize,
|
|
|
|
time_res: usize,
|
|
|
|
|
|
|
|
samples_waiting: usize,
|
|
|
|
in_buf: Vec<VecDeque<f64>>,
|
|
|
|
out_buf: Vec<VecDeque<f64>>,
|
|
|
|
last_phase: Vec<Vec<f64>>,
|
|
|
|
sum_phase: Vec<Vec<f64>>,
|
|
|
|
output_accum: Vec<VecDeque<f64>>,
|
|
|
|
|
|
|
|
forward_fft: Arc<dyn rustfft::Fft<f64>>,
|
|
|
|
backward_fft: Arc<dyn rustfft::Fft<f64>>,
|
|
|
|
|
|
|
|
window: Vec<f64>,
|
|
|
|
|
|
|
|
fft_in: Vec<c64>,
|
|
|
|
fft_out: Vec<c64>,
|
|
|
|
fft_scratch: Vec<c64>,
|
2021-09-16 15:42:18 +00:00
|
|
|
analysis_out: Vec<Vec<B>>,
|
|
|
|
synthesis_in: Vec<Vec<B>>,
|
2021-07-24 16:40:59 +00:00
|
|
|
}
|
|
|
|
|
2021-09-16 15:42:18 +00:00
|
|
|
impl<B: Bin> PhaseVocoder<B> {
|
2021-07-24 16:40:59 +00:00
|
|
|
/// Constructs a new phase vocoder.
|
|
|
|
///
|
|
|
|
/// `channels` is the number of channels of audio.
|
|
|
|
///
|
|
|
|
/// `sample_rate` is the sample rate.
|
|
|
|
///
|
|
|
|
/// `frame_size` is the fourier transform size. It must be `> 1`.
|
|
|
|
/// For optimal computation speed, this should be a power of 2.
|
|
|
|
/// Will be rounded to a multiple of `time_res`.
|
|
|
|
///
|
|
|
|
/// `time_res` is the number of frames to overlap.
|
|
|
|
///
|
|
|
|
/// # Panics
|
|
|
|
/// Panics if `frame_size` is `<= 1` after rounding.
|
2021-09-16 15:42:18 +00:00
|
|
|
pub fn new(channels: usize, sample_rate: f64, frame_size: usize, time_res: usize) -> Self {
|
2021-07-24 16:40:59 +00:00
|
|
|
let mut frame_size = frame_size / time_res * time_res;
|
|
|
|
if frame_size == 0 {
|
|
|
|
frame_size = time_res;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If `frame_size == 1`, computing the window would panic.
|
|
|
|
assert!(frame_size > 1);
|
|
|
|
|
|
|
|
let mut fft_planner = rustfft::FftPlanner::new();
|
|
|
|
|
|
|
|
let mut pv = PhaseVocoder {
|
|
|
|
channels,
|
|
|
|
sample_rate,
|
|
|
|
frame_size,
|
|
|
|
time_res,
|
|
|
|
|
|
|
|
samples_waiting: 0,
|
|
|
|
in_buf: vec![VecDeque::new(); channels],
|
|
|
|
out_buf: vec![VecDeque::new(); channels],
|
|
|
|
last_phase: vec![vec![0.0; frame_size]; channels],
|
|
|
|
sum_phase: vec![vec![0.0; frame_size]; channels],
|
|
|
|
output_accum: vec![VecDeque::new(); channels],
|
|
|
|
|
|
|
|
forward_fft: fft_planner.plan_fft(frame_size, rustfft::FftDirection::Forward),
|
|
|
|
backward_fft: fft_planner.plan_fft(frame_size, rustfft::FftDirection::Inverse),
|
|
|
|
|
|
|
|
window: apodize::hanning_iter(frame_size)
|
|
|
|
.map(|x| x.sqrt())
|
|
|
|
.collect(),
|
|
|
|
|
|
|
|
fft_in: vec![c64::new(0.0, 0.0); frame_size],
|
|
|
|
fft_out: vec![c64::new(0.0, 0.0); frame_size],
|
|
|
|
fft_scratch: vec![],
|
2021-09-16 15:42:18 +00:00
|
|
|
analysis_out: vec![vec![B::empty(); frame_size]; channels],
|
|
|
|
synthesis_in: vec![vec![B::empty(); frame_size]; channels],
|
2021-07-24 16:40:59 +00:00
|
|
|
};
|
|
|
|
pv.fft_scratch = vec![
|
|
|
|
c64::new(0.0, 0.0);
|
|
|
|
pv.forward_fft
|
|
|
|
.get_outofplace_scratch_len()
|
|
|
|
.max(pv.backward_fft.get_outofplace_scratch_len())
|
|
|
|
];
|
|
|
|
pv
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn num_channels(&self) -> usize {
|
|
|
|
self.channels
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn num_bins(&self) -> usize {
|
|
|
|
self.frame_size
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn time_res(&self) -> usize {
|
|
|
|
self.time_res
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn sample_rate(&self) -> f64 {
|
|
|
|
self.sample_rate
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn set_sample_rate(&mut self, sample_rate: f64) {
|
|
|
|
self.sample_rate = sample_rate;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Reads samples from `input`, processes the samples, then resynthesizes as many samples as
|
|
|
|
/// possible into `output`. Returns the number of frames written to `output`.
|
|
|
|
///
|
|
|
|
/// `processor` is a function to manipulate the spectrum before it is resynthesized. Its
|
|
|
|
/// arguments are respectively `num_channels`, `num_bins`, `analysis_output` and
|
|
|
|
/// `synthesis_input`.
|
|
|
|
///
|
|
|
|
/// Samples are expected to be normalized to the range [-1, 1].
|
|
|
|
///
|
|
|
|
/// This method can be called multiple times on the same `PhaseVocoder`.
|
|
|
|
/// If this happens, in the analysis step, it will be assumed that the `input` is a continuation
|
|
|
|
/// of the `input` that was passed during the previous call.
|
|
|
|
///
|
|
|
|
/// It is possible that not enough data is available yet to fill `output` completely.
|
|
|
|
/// In that case, only the first frames of `output` will be written to.
|
|
|
|
/// Conversely, if there is more data available than `output` can hold, the remaining
|
|
|
|
/// output is kept in the `PhaseVocoder` and can be retrieved with another call to
|
|
|
|
/// `process` when more input data is available.
|
|
|
|
///
|
|
|
|
/// # Remark
|
|
|
|
/// The `synthesis_input` passed to the `processor_function` is currently initialised to empty
|
|
|
|
/// bins. This behaviour may change in a future release, so make sure that your implementation
|
|
|
|
/// does not rely on it.
|
|
|
|
pub fn process<S, F>(
|
|
|
|
&mut self,
|
|
|
|
input: &[&[S]],
|
|
|
|
output: &mut [&mut [S]],
|
|
|
|
mut processor: F,
|
|
|
|
) -> usize
|
|
|
|
where
|
|
|
|
S: Float + ToPrimitive + FromPrimitive,
|
2021-09-16 15:42:18 +00:00
|
|
|
F: FnMut(usize, usize, &[Vec<B>], &mut [Vec<B>]),
|
2021-07-24 16:40:59 +00:00
|
|
|
{
|
2021-09-16 15:42:18 +00:00
|
|
|
assert_eq!(
|
|
|
|
input.len(),
|
|
|
|
self.channels,
|
|
|
|
"input length does not equal channel count"
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
output.len(),
|
|
|
|
self.channels,
|
|
|
|
"output length does not equal channel count"
|
|
|
|
);
|
2021-07-24 16:40:59 +00:00
|
|
|
|
|
|
|
// push samples to input queue
|
|
|
|
for chan in 0..input.len() {
|
|
|
|
for sample in input[chan].iter() {
|
|
|
|
self.in_buf[chan].push_back(sample.to_f64().unwrap());
|
|
|
|
self.samples_waiting += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
while self.samples_waiting >= 2 * self.frame_size * self.channels {
|
|
|
|
let frame_sizef = self.frame_size as f64;
|
|
|
|
let time_resf = self.time_res as f64;
|
|
|
|
let step_size = frame_sizef / time_resf;
|
|
|
|
|
|
|
|
for _ in 0..self.time_res {
|
|
|
|
// Initialise the synthesis bins to empty bins.
|
|
|
|
// This may be removed in a future release.
|
|
|
|
for synthesis_channel in self.synthesis_in.iter_mut() {
|
|
|
|
for bin in synthesis_channel.iter_mut() {
|
2021-09-16 15:42:18 +00:00
|
|
|
*bin = B::empty();
|
2021-07-24 16:40:59 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// ANALYSIS
|
|
|
|
for chan in 0..self.channels {
|
|
|
|
// read in
|
|
|
|
for i in 0..self.frame_size {
|
|
|
|
self.fft_in[i] = c64::new(self.in_buf[chan][i] * self.window[i], 0.0);
|
|
|
|
}
|
|
|
|
|
|
|
|
self.forward_fft.process_outofplace_with_scratch(
|
|
|
|
&mut self.fft_in,
|
|
|
|
&mut self.fft_out,
|
|
|
|
&mut self.fft_scratch,
|
|
|
|
);
|
|
|
|
|
|
|
|
for i in 0..self.frame_size {
|
|
|
|
let x = self.fft_out[i];
|
|
|
|
let (amp, phase) = x.to_polar();
|
2021-09-16 15:42:18 +00:00
|
|
|
let bin_phase = phase - self.last_phase[chan][i];
|
|
|
|
let freq = self.phase_to_frequency(i, bin_phase);
|
2021-07-24 16:40:59 +00:00
|
|
|
self.last_phase[chan][i] = phase;
|
|
|
|
|
2021-09-16 15:42:18 +00:00
|
|
|
// yeah passing both and letting the constructor decide is ugly
|
|
|
|
// but it's fast to do so
|
|
|
|
self.analysis_out[chan][i] = B::new(freq, bin_phase, amp * 2.0);
|
2021-07-24 16:40:59 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// PROCESSING
|
|
|
|
processor(
|
|
|
|
self.channels,
|
|
|
|
self.frame_size,
|
|
|
|
&self.analysis_out,
|
|
|
|
&mut self.synthesis_in,
|
|
|
|
);
|
|
|
|
|
|
|
|
// SYNTHESIS
|
|
|
|
for chan in 0..self.channels {
|
|
|
|
for i in 0..self.frame_size {
|
2021-09-16 15:42:18 +00:00
|
|
|
let amp = self.synthesis_in[chan][i].amp();
|
|
|
|
// passing self as a param is slightly ugly but hey
|
|
|
|
// it works
|
|
|
|
let phase = self.synthesis_in[chan][i].phase(self);
|
|
|
|
|
2021-07-24 16:40:59 +00:00
|
|
|
self.sum_phase[chan][i] += phase;
|
|
|
|
let phase = self.sum_phase[chan][i];
|
|
|
|
|
|
|
|
self.fft_in[i] = c64::from_polar(amp, phase);
|
|
|
|
}
|
|
|
|
|
|
|
|
self.backward_fft.process_outofplace_with_scratch(
|
|
|
|
&mut self.fft_in,
|
|
|
|
&mut self.fft_out,
|
|
|
|
&mut self.fft_scratch,
|
|
|
|
);
|
|
|
|
|
|
|
|
// accumulate
|
|
|
|
for i in 0..self.frame_size {
|
|
|
|
if i == self.output_accum[chan].len() {
|
|
|
|
self.output_accum[chan].push_back(0.0);
|
|
|
|
}
|
|
|
|
self.output_accum[chan][i] +=
|
|
|
|
self.window[i] * self.fft_out[i].re / (frame_sizef * time_resf);
|
|
|
|
}
|
|
|
|
|
|
|
|
// write out
|
|
|
|
for _ in 0..step_size as usize {
|
|
|
|
self.out_buf[chan].push_back(self.output_accum[chan].pop_front().unwrap());
|
|
|
|
self.in_buf[chan].pop_front();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
self.samples_waiting -= self.frame_size * self.channels;
|
|
|
|
}
|
|
|
|
|
|
|
|
// pop samples from output queue
|
|
|
|
let mut n_written = 0;
|
|
|
|
for chan in 0..self.channels {
|
|
|
|
for samp in 0..output[chan].len() {
|
|
|
|
output[chan][samp] = match self.out_buf[chan].pop_front() {
|
|
|
|
Some(x) => FromPrimitive::from_f64(x).unwrap(),
|
|
|
|
None => break,
|
|
|
|
};
|
|
|
|
n_written += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
n_written / self.channels
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn phase_to_frequency(&self, bin: usize, phase: f64) -> f64 {
|
|
|
|
let frame_sizef = self.frame_size as f64;
|
|
|
|
let freq_per_bin = self.sample_rate / frame_sizef;
|
|
|
|
let time_resf = self.time_res as f64;
|
|
|
|
let step_size = frame_sizef / time_resf;
|
|
|
|
let expect = 2.0 * PI * step_size / frame_sizef;
|
|
|
|
let mut tmp = phase;
|
|
|
|
tmp -= (bin as f64) * expect;
|
|
|
|
let mut qpd = (tmp / PI) as i32;
|
|
|
|
if qpd >= 0 {
|
|
|
|
qpd += qpd & 1;
|
|
|
|
} else {
|
|
|
|
qpd -= qpd & 1;
|
|
|
|
}
|
|
|
|
tmp -= PI * (qpd as f64);
|
|
|
|
tmp = time_resf * tmp / (2.0 * PI);
|
|
|
|
tmp = (bin as f64) * freq_per_bin + tmp * freq_per_bin;
|
|
|
|
tmp
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn frequency_to_phase(&self, freq: f64) -> f64 {
|
|
|
|
let step_size = self.frame_size as f64 / self.time_res as f64;
|
|
|
|
2.0 * PI * freq / self.sample_rate * step_size
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
2021-09-16 15:42:18 +00:00
|
|
|
fn identity(channels: usize, bins: usize, input: &[Vec<FreqBin>], output: &mut [Vec<FreqBin>]) {
|
2021-07-24 16:40:59 +00:00
|
|
|
for i in 0..channels {
|
|
|
|
for j in 0..bins {
|
|
|
|
output[i][j] = input[i][j];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
fn test_data_is_reconstructed(mut pvoc: PhaseVocoder, input_samples: &[f32]) {
|
|
|
|
let mut output_samples = vec![0.0; input_samples.len()];
|
|
|
|
let frame_size = pvoc.num_bins();
|
|
|
|
// Pre-padding, not collecting any output.
|
|
|
|
pvoc.process(&[&vec![0.0; frame_size]], &mut [&mut Vec::new()], identity);
|
|
|
|
// The data-itself, collecting some output that we will discard
|
|
|
|
let mut scratch = vec![0.0; frame_size];
|
|
|
|
pvoc.process(&[&input_samples], &mut [&mut scratch], identity);
|
|
|
|
// Post-padding and collecting all output
|
|
|
|
pvoc.process(
|
|
|
|
&[&vec![0.0; frame_size]],
|
|
|
|
&mut [&mut output_samples],
|
|
|
|
identity,
|
|
|
|
);
|
|
|
|
|
|
|
|
assert_ulps_eq!(input_samples, output_samples.as_slice(), epsilon = 1e-2);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn identity_transform_reconstructs_original_data_hat_function() {
|
|
|
|
let window_len = 256;
|
|
|
|
let pvoc = PhaseVocoder::new(1, 44100.0, window_len, window_len / 4);
|
|
|
|
let input_len = 1024;
|
|
|
|
let mut input_samples = vec![0.0; input_len];
|
|
|
|
for i in 0..input_len {
|
|
|
|
if i < input_len / 2 {
|
|
|
|
input_samples[i] = (i as f32) / ((input_len / 2) as f32)
|
|
|
|
} else {
|
|
|
|
input_samples[i] = 2.0 - (i as f32) / ((input_len / 2) as f32);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
test_data_is_reconstructed(pvoc, input_samples.as_slice());
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn identity_transform_reconstructs_original_data_random_data() {
|
|
|
|
use rand::rngs::SmallRng;
|
|
|
|
use rand::{Rng, SeedableRng};
|
|
|
|
let mut rng = SmallRng::seed_from_u64(1);
|
|
|
|
let mut input_samples = [0.0; 16384];
|
|
|
|
rng.fill(&mut input_samples[..]);
|
|
|
|
let pvoc = PhaseVocoder::new(1, 44100.0, 256, 256 / 4);
|
|
|
|
test_data_is_reconstructed(pvoc, &input_samples);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn process_works_with_sample_res_equal_to_window() {
|
|
|
|
let mut pvoc = PhaseVocoder::new(1, 44100.0, 256, 256);
|
|
|
|
let input_len = 1024;
|
|
|
|
let input_samples = vec![0.0; input_len];
|
|
|
|
let mut output_samples = vec![0.0; input_len];
|
|
|
|
pvoc.process(&[&input_samples], &mut [&mut output_samples], identity);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn process_works_when_reading_sample_by_sample() {
|
|
|
|
let mut pvoc = PhaseVocoder::new(1, 44100.0, 8, 2);
|
|
|
|
let input_len = 32;
|
|
|
|
let input_samples = vec![0.0; input_len];
|
|
|
|
let mut output_samples = vec![0.0; input_len];
|
|
|
|
for i in 0..input_samples.len() {
|
|
|
|
pvoc.process(
|
|
|
|
&[&input_samples[dbg!(i)..i + 1]],
|
|
|
|
&mut [&mut output_samples],
|
|
|
|
identity,
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|