unnieversal/crates/pvoc-rs/src/lib.rs

extern crate apodize;
extern crate rustfft;
#[cfg(test)]
#[macro_use]
extern crate approx;
#[cfg(test)]
extern crate rand;

use rustfft::num_complex::Complex;
use rustfft::num_traits::{Float, FromPrimitive, ToPrimitive};
use std::collections::VecDeque;
use std::f64::consts::PI;
use std::sync::Arc;

#[allow(non_camel_case_types)]
type c64 = Complex<f64>;

/// Represents a component of the spectrum, composed of a frequency and amplitude.
#[derive(Copy, Clone)]
pub struct Bin {
    pub freq: f64,
    pub amp: f64,
}

impl Bin {
    pub fn new(freq: f64, amp: f64) -> Bin {
        Bin {
            freq: freq,
            amp: amp,
        }
    }
    pub fn empty() -> Bin {
        Bin {
            freq: 0.0,
            amp: 0.0,
        }
    }
}

/// A phase vocoder.
///
/// Roughly translated from http://blogs.zynaptiq.com/bernsee/pitch-shifting-using-the-ft/
pub struct PhaseVocoder {
    channels: usize,
    sample_rate: f64,
    frame_size: usize,
    time_res: usize,

    samples_waiting: usize,
    in_buf: Vec<VecDeque<f64>>,
    out_buf: Vec<VecDeque<f64>>,
    last_phase: Vec<Vec<f64>>,
    sum_phase: Vec<Vec<f64>>,
    output_accum: Vec<VecDeque<f64>>,

    forward_fft: Arc<dyn rustfft::Fft<f64>>,
    backward_fft: Arc<dyn rustfft::Fft<f64>>,

    window: Vec<f64>,

    fft_in: Vec<c64>,
    fft_out: Vec<c64>,
    fft_scratch: Vec<c64>,
    analysis_out: Vec<Vec<Bin>>,
    synthesis_in: Vec<Vec<Bin>>,
}

impl PhaseVocoder {
    /// Constructs a new phase vocoder.
    ///
    /// `channels` is the number of channels of audio.
    ///
    /// `sample_rate` is the sample rate.
    ///
    /// `frame_size` is the fourier transform size. It must be `> 1`.
    /// For optimal computation speed, this should be a power of 2.
    /// Will be rounded to a multiple of `time_res`.
    ///
    /// `time_res` is the number of frames to overlap.
    ///
    /// # Panics
    /// Panics if `frame_size` is `<= 1` after rounding.
    pub fn new(
        channels: usize,
        sample_rate: f64,
        frame_size: usize,
        time_res: usize,
    ) -> PhaseVocoder {
        let mut frame_size = frame_size / time_res * time_res;
        if frame_size == 0 {
            frame_size = time_res;
        }

        // If `frame_size == 1`, computing the window would panic.
        assert!(frame_size > 1);

        let mut fft_planner = rustfft::FftPlanner::new();

        let mut pv = PhaseVocoder {
            channels,
            sample_rate,
            frame_size,
            time_res,

            samples_waiting: 0,
            in_buf: vec![VecDeque::new(); channels],
            out_buf: vec![VecDeque::new(); channels],
            last_phase: vec![vec![0.0; frame_size]; channels],
            sum_phase: vec![vec![0.0; frame_size]; channels],
            output_accum: vec![VecDeque::new(); channels],

            forward_fft: fft_planner.plan_fft(frame_size, rustfft::FftDirection::Forward),
            backward_fft: fft_planner.plan_fft(frame_size, rustfft::FftDirection::Inverse),

            window: apodize::hanning_iter(frame_size)
                .map(|x| x.sqrt())
                .collect(),

            fft_in: vec![c64::new(0.0, 0.0); frame_size],
            fft_out: vec![c64::new(0.0, 0.0); frame_size],
            fft_scratch: vec![],
            analysis_out: vec![vec![Bin::empty(); frame_size]; channels],
            synthesis_in: vec![vec![Bin::empty(); frame_size]; channels],
        };
        pv.fft_scratch = vec![
            c64::new(0.0, 0.0);
            pv.forward_fft
                .get_outofplace_scratch_len()
                .max(pv.backward_fft.get_outofplace_scratch_len())
        ];
        pv
    }

    pub fn num_channels(&self) -> usize {
        self.channels
    }

    pub fn num_bins(&self) -> usize {
        self.frame_size
    }

    pub fn time_res(&self) -> usize {
        self.time_res
    }

    pub fn sample_rate(&self) -> f64 {
        self.sample_rate
    }

    pub fn set_sample_rate(&mut self, sample_rate: f64) {
        self.sample_rate = sample_rate;
    }

    /// Reads samples from `input`, processes the samples, then resynthesizes as many samples as
    /// possible into `output`. Returns the number of frames written to `output`.
    ///
    /// `processor` is a function to manipulate the spectrum before it is resynthesized. Its
    /// arguments are respectively `num_channels`, `num_bins`, `analysis_output` and
    /// `synthesis_input`.
    ///
    /// Samples are expected to be normalized to the range [-1, 1].
    ///
    /// This method can be called multiple times on the same `PhaseVocoder`.
    /// If this happens, in the analysis step, it will be assumed that the `input` is a continuation
    /// of the `input` that was passed during the previous call.
    ///
    /// It is possible that not enough data is available yet to fill `output` completely.
    /// In that case, only the first frames of `output` will be written to.
    /// Conversely, if there is more data available than `output` can hold, the remaining
    /// output is kept in the `PhaseVocoder` and can be retrieved with another call to
    /// `process` when more input data is available.
    ///
    /// # Remark
    /// The `synthesis_input` passed to the `processor_function` is currently initialised to empty
    /// bins. This behaviour may change in a future release, so make sure that your implementation
    /// does not rely on it.
    pub fn process<S, F>(
        &mut self,
        input: &[&[S]],
        output: &mut [&mut [S]],
        mut processor: F,
    ) -> usize
    where
        S: Float + ToPrimitive + FromPrimitive,
        F: FnMut(usize, usize, &[Vec<Bin>], &mut [Vec<Bin>]),
    {
        assert_eq!(input.len(), self.channels);
        assert_eq!(output.len(), self.channels);

        // push samples to input queue
        for chan in 0..input.len() {
            for sample in input[chan].iter() {
                self.in_buf[chan].push_back(sample.to_f64().unwrap());
                self.samples_waiting += 1;
            }
        }

        while self.samples_waiting >= 2 * self.frame_size * self.channels {
            let frame_sizef = self.frame_size as f64;
            let time_resf = self.time_res as f64;
            let step_size = frame_sizef / time_resf;

            for _ in 0..self.time_res {
                // Initialise the synthesis bins to empty bins.
                // This may be removed in a future release.
                for synthesis_channel in self.synthesis_in.iter_mut() {
                    for bin in synthesis_channel.iter_mut() {
                        *bin = Bin::empty();
                    }
                }

                // ANALYSIS
                for chan in 0..self.channels {
                    // read in
                    for i in 0..self.frame_size {
                        self.fft_in[i] = c64::new(self.in_buf[chan][i] * self.window[i], 0.0);
                    }

                    self.forward_fft.process_outofplace_with_scratch(
                        &mut self.fft_in,
                        &mut self.fft_out,
                        &mut self.fft_scratch,
                    );

                    for i in 0..self.frame_size {
                        let x = self.fft_out[i];
                        let (amp, phase) = x.to_polar();
                        let freq = self.phase_to_frequency(i, phase - self.last_phase[chan][i]);
                        self.last_phase[chan][i] = phase;

                        self.analysis_out[chan][i] = Bin::new(freq, amp * 2.0);
                    }
                }

                // PROCESSING
                processor(
                    self.channels,
                    self.frame_size,
                    &self.analysis_out,
                    &mut self.synthesis_in,
                );

                // SYNTHESIS
                for chan in 0..self.channels {
                    for i in 0..self.frame_size {
                        let amp = self.synthesis_in[chan][i].amp;
                        let freq = self.synthesis_in[chan][i].freq;
                        let phase = self.frequency_to_phase(freq);
                        self.sum_phase[chan][i] += phase;
                        let phase = self.sum_phase[chan][i];

                        self.fft_in[i] = c64::from_polar(amp, phase);
                    }

                    self.backward_fft.process_outofplace_with_scratch(
                        &mut self.fft_in,
                        &mut self.fft_out,
                        &mut self.fft_scratch,
                    );

                    // accumulate
                    for i in 0..self.frame_size {
                        if i == self.output_accum[chan].len() {
                            self.output_accum[chan].push_back(0.0);
                        }
                        self.output_accum[chan][i] +=
                            self.window[i] * self.fft_out[i].re / (frame_sizef * time_resf);
                    }

                    // write out
                    for _ in 0..step_size as usize {
                        self.out_buf[chan].push_back(self.output_accum[chan].pop_front().unwrap());
                        self.in_buf[chan].pop_front();
                    }
                }
            }
            self.samples_waiting -= self.frame_size * self.channels;
        }

        // pop samples from output queue
        let mut n_written = 0;
        for chan in 0..self.channels {
            for samp in 0..output[chan].len() {
                output[chan][samp] = match self.out_buf[chan].pop_front() {
                    Some(x) => FromPrimitive::from_f64(x).unwrap(),
                    None => break,
                };
                n_written += 1;
            }
        }
        n_written / self.channels
    }

    pub fn phase_to_frequency(&self, bin: usize, phase: f64) -> f64 {
        let frame_sizef = self.frame_size as f64;
        let freq_per_bin = self.sample_rate / frame_sizef;
        let time_resf = self.time_res as f64;
        let step_size = frame_sizef / time_resf;
        let expect = 2.0 * PI * step_size / frame_sizef;
        let mut tmp = phase;
        tmp -= (bin as f64) * expect;
        let mut qpd = (tmp / PI) as i32;
        if qpd >= 0 {
            qpd += qpd & 1;
        } else {
            qpd -= qpd & 1;
        }
        tmp -= PI * (qpd as f64);
        tmp = time_resf * tmp / (2.0 * PI);
        tmp = (bin as f64) * freq_per_bin + tmp * freq_per_bin;
        tmp
    }

    pub fn frequency_to_phase(&self, freq: f64) -> f64 {
        let step_size = self.frame_size as f64 / self.time_res as f64;
        2.0 * PI * freq / self.sample_rate * step_size
    }
}

#[cfg(test)]
fn identity(channels: usize, bins: usize, input: &[Vec<Bin>], output: &mut [Vec<Bin>]) {
    for i in 0..channels {
        for j in 0..bins {
            output[i][j] = input[i][j];
        }
    }
}

#[cfg(test)]
fn test_data_is_reconstructed(mut pvoc: PhaseVocoder, input_samples: &[f32]) {
    let mut output_samples = vec![0.0; input_samples.len()];
    let frame_size = pvoc.num_bins();
    // Pre-padding, not collecting any output.
    pvoc.process(&[&vec![0.0; frame_size]], &mut [&mut Vec::new()], identity);
    // The data-itself, collecting some output that we will discard
    let mut scratch = vec![0.0; frame_size];
    pvoc.process(&[&input_samples], &mut [&mut scratch], identity);
    // Post-padding and collecting all output
    pvoc.process(
        &[&vec![0.0; frame_size]],
        &mut [&mut output_samples],
        identity,
    );

    assert_ulps_eq!(input_samples, output_samples.as_slice(), epsilon = 1e-2);
}

#[test]
fn identity_transform_reconstructs_original_data_hat_function() {
    let window_len = 256;
    let pvoc = PhaseVocoder::new(1, 44100.0, window_len, window_len / 4);
    let input_len = 1024;
    let mut input_samples = vec![0.0; input_len];
    for i in 0..input_len {
        if i < input_len / 2 {
            input_samples[i] = (i as f32) / ((input_len / 2) as f32)
        } else {
            input_samples[i] = 2.0 - (i as f32) / ((input_len / 2) as f32);
        }
    }

    test_data_is_reconstructed(pvoc, input_samples.as_slice());
}

#[test]
fn identity_transform_reconstructs_original_data_random_data() {
    use rand::rngs::SmallRng;
    use rand::{Rng, SeedableRng};
    let mut rng = SmallRng::seed_from_u64(1);
    let mut input_samples = [0.0; 16384];
    rng.fill(&mut input_samples[..]);
    let pvoc = PhaseVocoder::new(1, 44100.0, 256, 256 / 4);
    test_data_is_reconstructed(pvoc, &input_samples);
}

#[test]
fn process_works_with_sample_res_equal_to_window() {
    let mut pvoc = PhaseVocoder::new(1, 44100.0, 256, 256);
    let input_len = 1024;
    let input_samples = vec![0.0; input_len];
    let mut output_samples = vec![0.0; input_len];
    pvoc.process(&[&input_samples], &mut [&mut output_samples], identity);
}

#[test]
fn process_works_when_reading_sample_by_sample() {
    let mut pvoc = PhaseVocoder::new(1, 44100.0, 8, 2);
    let input_len = 32;
    let input_samples = vec![0.0; input_len];
    let mut output_samples = vec![0.0; input_len];
    for i in 0..input_samples.len() {
        pvoc.process(
            &[&input_samples[dbg!(i)..i + 1]],
            &mut [&mut output_samples],
            identity,
        );
    }
}