[readme] explain robotuna

[robotuna] fix pitch shifting by using pvoc
[opiate, pvoc] implement changing phase and not frequency
2021-09-16 19:00:06 +02:00 · 2021-09-16 18:31:19 +02:00 · 2021-09-16 17:42:18 +02:00
6 changed files with 153 additions and 187 deletions
--- a/README.md
+++ b/README.md
@ -153,9 +153,21 @@ the only way to reset the pattern is to reset the plugin. if your daw has a way

 ### robotuna

-WIP automatic pitch correction and pitch shifting. the intention is to use it to make hyperpop-style high-pitched vocals
+automatic pitch correction and pitch shifting. the intention is to use it to make hyperpop-style high-pitched vocals

-it kinda works, but not really. since the pitch shifting part is too slow using a phase vocoder, it doesn't run in real time, and there's some clicking noises. i need to rework that whole thing so it can run in real time. i'll probably change the whole thing completely instead of optimizing it cause yeah
+params:
+- `manual/snap`: decides whether to listen to midi (if under 0.5) or to snap to the closest semitone (if over or equal 0.5)
+- `frequency gain`: added gain on top of the correction
+
+robotuna will do pitch detection on the input, and then shift the pitch to match the expected one. the expected pitch can be either: the midi note, if set to manual; or the closest semitone, if set to snap
+
+note: snap mode ignores the midi input
+
+after that, the pitch will be shifted by `frequency gain`
+
+for example:
+- setting to manual mode, setting gain to `2.0`, and not sending any midi notes, will make the audio be one octave up
+- setting to snap mode, gain to `1.0`, will work as a traditional pitch corrector

 ### hysteria

--- a/crates/opiate/src/lib.rs
+++ b/crates/opiate/src/lib.rs
@ -4,7 +4,7 @@
 // morphing algorithm from https://ccrma.stanford.edu/~jhsu/421b/

 use baseplug::{Plugin, ProcessContext};
-use pvoc::{Bin, PhaseVocoder};
+use pvoc::{PhaseBin, PhaseVocoder};
 use serde::{Deserialize, Serialize};
 use utils::logs::*;

@ -24,7 +24,7 @@ impl Default for OpiateModel {
 }

 struct Opiate {
-    pvoc: PhaseVocoder,
+    pvoc: PhaseVocoder<PhaseBin>,
    out_0: Vec<f32>,
    out_1: Vec<f32>,
    out_2: Vec<f32>,
@ -68,10 +68,10 @@ impl Plugin for Opiate {
        }

        let out = &mut [
-            &mut self.out_0[..],
-            &mut self.out_1[..],
-            &mut self.out_2[..],
-            &mut self.out_3[..],
+            &mut self.out_0[0..ctx.nframes],
+            &mut self.out_1[0..ctx.nframes],
+            &mut self.out_2[0..ctx.nframes],
+            &mut self.out_3[0..ctx.nframes],
        ][..];

        let morph = model.morph[0] as f64;
@ -79,27 +79,28 @@ impl Plugin for Opiate {
        self.pvoc.process(
            input,
            out,
-            |_channels: usize, bins: usize, input: &[Vec<Bin>], output: &mut [Vec<Bin>]| {
+            |_channels: usize,
+             bins: usize,
+             input: &[Vec<PhaseBin>],
+             output: &mut [Vec<PhaseBin>]| {
                for j in 0..bins {
-                    // TODO Check if working with the frequencies is the same as working with the phase
-                    // i think we might need to try it to make sure it's the same
-                    // to do that, we'll need to change how pvoc works
-
+                    // left
                    let mags = morph * (1.0 - input[0][j].amp) + input[0][j].amp;
                    let mags2 = imorph * (1.0 - input[2][j].amp) + input[2][j].amp;
-                    let phases = input[0][j].freq - (input[0][j].freq * morph);
-                    let phases2 = input[2][j].freq - (input[2][j].freq * imorph);
+                    let phases = input[0][j].phase - (input[0][j].phase * morph);
+                    let phases2 = input[2][j].phase - (input[2][j].phase * imorph);

                    output[0][j].amp = mags * mags2;
-                    output[0][j].freq = phases + phases2;
+                    output[0][j].phase = phases + phases2;

+                    // right
                    let mags = morph * (1.0 - input[1][j].amp) + input[1][j].amp;
                    let mags2 = imorph * (1.0 - input[3][j].amp) + input[3][j].amp;
-                    let phases = input[1][j].freq - (input[1][j].freq * morph);
-                    let phases2 = input[3][j].freq - (input[3][j].freq * imorph);
+                    let phases = input[1][j].phase - (input[1][j].phase * morph);
+                    let phases2 = input[3][j].phase - (input[3][j].phase * imorph);

                    output[1][j].amp = mags * mags2;
-                    output[1][j].freq = phases + phases2;
+                    output[1][j].phase = phases + phases2;
                }
            },
        );
@ -107,6 +108,9 @@ impl Plugin for Opiate {
        for i in 0..ctx.nframes {
            output[0][i] = self.out_0[i];
            output[1][i] = self.out_1[i];
+
+            self.out_0[i] = 0.0;
+            self.out_1[i] = 0.0;
        }
    }
 }
--- a/crates/pvoc-rs/src/lib.rs
+++ b/crates/pvoc-rs/src/lib.rs
@ -15,32 +15,67 @@ use std::sync::Arc;
 #[allow(non_camel_case_types)]
 type c64 = Complex<f64>;

+pub trait Bin: Clone + Copy {
+    fn empty() -> Self;
+    fn new(freq: f64, phase: f64, amp: f64) -> Self;
+    fn amp(&self) -> f64;
+    fn phase(&self, pvoc: &PhaseVocoder<Self>) -> f64;
+}
+
+/// Represents a component of the spectrum, composed of a phase and amplitude.
+#[derive(Copy, Clone)]
+pub struct PhaseBin {
+    pub phase: f64,
+    pub amp: f64,
+}
+impl Bin for PhaseBin {
+    fn empty() -> Self {
+        Self {
+            phase: 0.0,
+            amp: 0.0,
+        }
+    }
+    fn new(_: f64, phase: f64, amp: f64) -> Self {
+        Self { phase, amp }
+    }
+
+    fn amp(&self) -> f64 {
+        self.amp
+    }
+    fn phase(&self, _: &PhaseVocoder<Self>) -> f64 {
+        self.phase
+    }
+}
 /// Represents a component of the spectrum, composed of a frequency and amplitude.
 #[derive(Copy, Clone)]
-pub struct Bin {
+pub struct FreqBin {
    pub freq: f64,
    pub amp: f64,
 }
-
-impl Bin {
-    pub fn new(freq: f64, amp: f64) -> Bin {
-        Bin {
-            freq: freq,
-            amp: amp,
-        }
-    }
-    pub fn empty() -> Bin {
-        Bin {
+impl Bin for FreqBin {
+    fn empty() -> Self {
+        Self {
            freq: 0.0,
            amp: 0.0,
        }
    }
+    fn new(freq: f64, _: f64, amp: f64) -> Self {
+        Self { freq, amp }
+    }
+
+    fn amp(&self) -> f64 {
+        self.amp
+    }
+
+    fn phase(&self, pvoc: &PhaseVocoder<Self>) -> f64 {
+        pvoc.frequency_to_phase(self.freq)
+    }
 }

 /// A phase vocoder.
 ///
 /// Roughly translated from http://blogs.zynaptiq.com/bernsee/pitch-shifting-using-the-ft/
-pub struct PhaseVocoder {
+pub struct PhaseVocoder<B: Bin = FreqBin> {
    channels: usize,
    sample_rate: f64,
    frame_size: usize,
@ -61,11 +96,11 @@ pub struct PhaseVocoder {
    fft_in: Vec<c64>,
    fft_out: Vec<c64>,
    fft_scratch: Vec<c64>,
-    analysis_out: Vec<Vec<Bin>>,
-    synthesis_in: Vec<Vec<Bin>>,
+    analysis_out: Vec<Vec<B>>,
+    synthesis_in: Vec<Vec<B>>,
 }

-impl PhaseVocoder {
+impl<B: Bin> PhaseVocoder<B> {
    /// Constructs a new phase vocoder.
    ///
    /// `channels` is the number of channels of audio.
@ -80,12 +115,7 @@ impl PhaseVocoder {
    ///
    /// # Panics
    /// Panics if `frame_size` is `<= 1` after rounding.
-    pub fn new(
-        channels: usize,
-        sample_rate: f64,
-        frame_size: usize,
-        time_res: usize,
-    ) -> PhaseVocoder {
+    pub fn new(channels: usize, sample_rate: f64, frame_size: usize, time_res: usize) -> Self {
        let mut frame_size = frame_size / time_res * time_res;
        if frame_size == 0 {
            frame_size = time_res;
@ -119,8 +149,8 @@ impl PhaseVocoder {
            fft_in: vec![c64::new(0.0, 0.0); frame_size],
            fft_out: vec![c64::new(0.0, 0.0); frame_size],
            fft_scratch: vec![],
-            analysis_out: vec![vec![Bin::empty(); frame_size]; channels],
-            synthesis_in: vec![vec![Bin::empty(); frame_size]; channels],
+            analysis_out: vec![vec![B::empty(); frame_size]; channels],
+            synthesis_in: vec![vec![B::empty(); frame_size]; channels],
        };
        pv.fft_scratch = vec![
            c64::new(0.0, 0.0);
@ -182,10 +212,18 @@ impl PhaseVocoder {
    ) -> usize
    where
        S: Float + ToPrimitive + FromPrimitive,
-        F: FnMut(usize, usize, &[Vec<Bin>], &mut [Vec<Bin>]),
+        F: FnMut(usize, usize, &[Vec<B>], &mut [Vec<B>]),
    {
-        assert_eq!(input.len(), self.channels);
-        assert_eq!(output.len(), self.channels);
+        assert_eq!(
+            input.len(),
+            self.channels,
+            "input length does not equal channel count"
+        );
+        assert_eq!(
+            output.len(),
+            self.channels,
+            "output length does not equal channel count"
+        );

        // push samples to input queue
        for chan in 0..input.len() {
@ -205,7 +243,7 @@ impl PhaseVocoder {
                // This may be removed in a future release.
                for synthesis_channel in self.synthesis_in.iter_mut() {
                    for bin in synthesis_channel.iter_mut() {
-                        *bin = Bin::empty();
+                        *bin = B::empty();
                    }
                }

@ -225,10 +263,13 @@ impl PhaseVocoder {
                    for i in 0..self.frame_size {
                        let x = self.fft_out[i];
                        let (amp, phase) = x.to_polar();
-                        let freq = self.phase_to_frequency(i, phase - self.last_phase[chan][i]);
+                        let bin_phase = phase - self.last_phase[chan][i];
+                        let freq = self.phase_to_frequency(i, bin_phase);
                        self.last_phase[chan][i] = phase;

-                        self.analysis_out[chan][i] = Bin::new(freq, amp * 2.0);
+                        // yeah passing both and letting the constructor decide is ugly
+                        // but it's fast to do so
+                        self.analysis_out[chan][i] = B::new(freq, bin_phase, amp * 2.0);
                    }
                }

@ -243,9 +284,11 @@ impl PhaseVocoder {
                // SYNTHESIS
                for chan in 0..self.channels {
                    for i in 0..self.frame_size {
-                        let amp = self.synthesis_in[chan][i].amp;
-                        let freq = self.synthesis_in[chan][i].freq;
-                        let phase = self.frequency_to_phase(freq);
+                        let amp = self.synthesis_in[chan][i].amp();
+                        // passing self as a param is slightly ugly but hey
+                        // it works
+                        let phase = self.synthesis_in[chan][i].phase(self);
+
                        self.sum_phase[chan][i] += phase;
                        let phase = self.sum_phase[chan][i];

@ -318,7 +361,7 @@ impl PhaseVocoder {
 }

 #[cfg(test)]
-fn identity(channels: usize, bins: usize, input: &[Vec<Bin>], output: &mut [Vec<Bin>]) {
+fn identity(channels: usize, bins: usize, input: &[Vec<FreqBin>], output: &mut [Vec<FreqBin>]) {
    for i in 0..channels {
        for j in 0..bins {
            output[i][j] = input[i][j];
--- a/crates/robotuna/Cargo.toml
+++ b/crates/robotuna/Cargo.toml
@ -11,5 +11,5 @@ baseplug = { git = "https://github.com/wrl/baseplug.git", rev = "9cec68f31cca9c0
 ringbuf = "0.2.5"
 serde = "1.0.126"
 log = "0.4.14"
-
+pvoc = { path = "../pvoc-rs" }
 utils = { path = "../utils" }
--- a/crates/robotuna/src/lib.rs
+++ b/crates/robotuna/src/lib.rs
@ -2,14 +2,13 @@
 #![feature(generic_associated_types)]

 use baseplug::{MidiReceiver, Plugin, ProcessContext};
+use pvoc::{FreqBin, PhaseVocoder};
 use serde::{Deserialize, Serialize};

-use utils::delay::*;
 use utils::logs::*;
 use utils::pitch::*;

-const BUFFER_LEN: usize = 2 << 9;
-const DELAY_LEN: usize = 4000;
+const DET_LEN: usize = 128;

 baseplug::model! {
    #[derive(Debug, Serialize, Deserialize)]
@ -40,16 +39,9 @@ struct RoboTuna {
    pitch_l: Option<f32>,
    pitch_r: Option<f32>,

-    detector_thread: pitch_detection::PitchDetectorThread<BUFFER_LEN>,
+    detector_thread: pitch_detection::PitchDetectorThread<DET_LEN>,

-    /// Keeps delay lines for playing
-    delays: DelayLines<DELAY_LEN>,
-
-    /// Floating indexes so we can do interpolation
-    delay_idx_l: f32,
-    delay_idx_r: f32,
-    /// true indexes so we can know how much we're drifting away
-    true_idx: usize,
+    pvoc: PhaseVocoder,
 }

 impl Plugin for RoboTuna {
@ -63,10 +55,10 @@ impl Plugin for RoboTuna {
    type Model = RoboTunaModel;

    #[inline]
-    fn new(_sample_rate: f32, _model: &RoboTunaModel) -> Self {
+    fn new(sample_rate: f32, _model: &RoboTunaModel) -> Self {
        setup_logging("robotuna.log");

-        let detector_thread = pitch_detection::PitchDetectorThread::<BUFFER_LEN>::new();
+        let detector_thread = pitch_detection::PitchDetectorThread::<DET_LEN>::new();

        log::info!("finished init");

@ -74,15 +66,10 @@ impl Plugin for RoboTuna {
            note: None,
            pitch_l: None,
            pitch_r: None,
+
            detector_thread,

-            delays: DelayLines::<DELAY_LEN>::new(),
-
-            delay_idx_l: 0.0,
-            delay_idx_r: 0.0,
-            // We start this at a high number cause idk
-            // We'll catch up when we start playing
-            true_idx: 500,
+            pvoc: PhaseVocoder::new(2, sample_rate as f64, 128, 4),
        }
    }

@ -92,47 +79,57 @@ impl Plugin for RoboTuna {
        let output = &mut ctx.outputs[0].buffers;

        for i in 0..ctx.nframes {
-            // pass input to pitch detector
+            // pass input to pitch detectors
            self.detector_thread
                .write(input[0][i], input[1][i], ctx.sample_rate as u32);

-            // Try to get a processed buffer from the processor thread
+            // Try to get a pitch from short detector thread
            if let Some((pitch_l, pitch_r)) = self.detector_thread.try_get_pitch() {
                // Update current pitch
                // We use `or`, so we keep the old value if the current one is None
                self.pitch_l = pitch_l.or(self.pitch_l);
                self.pitch_r = pitch_r.or(self.pitch_r);
            }
-
-            // Play from delay line according to pitch
-            let (l, r) = self.shift(
-                input[0][i],
-                input[1][i],
-                ctx.sample_rate,
-                model.freq_gain[i],
-                model.manual[i] < 0.5,
-            );
-
-            output[0][i] = l;
-            output[1][i] = r;
        }
+
+        let shift = self.shift(model.freq_gain[0], model.manual[0] < 0.5);
+        self.pvoc.process(
+            input,
+            output,
+            |channels: usize, bins: usize, input: &[Vec<FreqBin>], output: &mut [Vec<FreqBin>]| {
+                for i in 0..channels {
+                    for j in 0..bins / 2 {
+                        let index = ((j as f64) * shift[i]) as usize;
+                        if index < bins / 2 {
+                            output[i][index].freq = input[i][j].freq * shift[i];
+                            output[i][index].amp += input[i][j].amp;
+                        }
+                    }
+                }
+            },
+        );
    }
 }
 impl RoboTuna {
-    fn advancement_rate(&self, freq_gain: f32, manual: bool) -> (f32, f32) {
-        // TODO Deal with pitch detection failing
-        let current_pitch_l = self.pitch_l.unwrap_or(220.0);
-        let current_pitch_r = self.pitch_r.unwrap_or(220.0);
+    fn pitch(&self) -> (f32, f32) {
+        let l = self.pitch_l.unwrap_or(220.0);
+        let r = self.pitch_r.unwrap_or(220.0);
+
+        (l, r)
+    }
+
+    fn shift(&self, freq_gain: f32, manual: bool) -> [f64; 2] {
+        let (current_pitch_l, current_pitch_r) = self.pitch();

        if manual {
            // If we're on manual, get the expected frequency from the midi note
            if let Some(expected) = self.note.map(midi_note_to_pitch) {
                let l = expected / current_pitch_l;
                let r = expected / current_pitch_r;
-                (freq_gain * l, freq_gain * r)
+                [(freq_gain * l) as f64, (freq_gain * r) as f64]
            } else {
                // If there's no note, we just do frequency gain
-                (freq_gain, freq_gain)
+                [freq_gain as f64, freq_gain as f64]
            }
        } else {
            // If we're on snap, get the closest note
@ -141,99 +138,9 @@ impl RoboTuna {

            let l = expected_l / current_pitch_l;
            let r = expected_r / current_pitch_r;
-            (freq_gain * l, freq_gain * r)
+            [(freq_gain * l) as f64, (freq_gain * r) as f64]
        }
    }
-
-    fn shift(
-        &mut self,
-        l: f32,
-        r: f32,
-        sample_rate: f32,
-        freq_gain: f32,
-        manual: bool,
-    ) -> (f32, f32) {
-        // so um this code will probably not make any sense if i don't write an explanation of the
-        // general thing it's trying to achieve
-        // if i've forgoten to write it up and you want to understand the code, ping me and uh yeah
-
-        // add input to delay line
-        self.delays.write_and_advance(l, r);
-
-        // get period of left & right
-        let period_l = sample_rate / self.pitch_l.unwrap_or(220.0);
-        let period_r = sample_rate / self.pitch_r.unwrap_or(220.0);
-
-        // advance indexes
-        let (adv_l, adv_r) = self.advancement_rate(freq_gain, manual);
-        self.delay_idx_l += adv_l;
-        self.delay_idx_r += adv_r;
-        self.true_idx += 1;
-
-        // get the current value
-        let mut l = self.delays.l.floating_index(self.delay_idx_l);
-        let mut r = self.delays.r.floating_index(self.delay_idx_r);
-
-        // get how close we are to the input idx, so we know if we have to interpolate/jump
-        let l_diff = self.true_idx as f32 - self.delay_idx_l;
-        let r_diff = self.true_idx as f32 - self.delay_idx_r;
-
-        // TODO change to a non-linear interpolation
-
-        const DIV: f32 = 2.0 / 3.0;
-        if l_diff - period_l < (period_l / DIV) {
-            let a = (l_diff - period_l) / (period_l / DIV);
-            l *= a;
-            l += (1.0 - a) * self.delays.l.floating_index(self.delay_idx_l - period_l);
-        // crossfade
-        // if we are close to having to jump, we start crossfading with the jump destination
-        // crossfade when we're one third of the period away from jumping
-        // when we get close to jumping back
-        if l_diff - period_l < cf_len_l {
-            // cross goes from 1 (when l_diff is at the max) to 0 (when l_diff == period_l)
-            let cross = (l_diff - period_l) / cf_len_l;
-            let (fade_in, fade_out) = ep_crossfade(1.0 - cross);
-            l = fade_out * l + fade_in * self.delays.l.floating_index(self.delay_idx_l - period_l);
-        }
-        // when we get close to jumping foward
-        if MAX_PERIOD * period_l - l_diff < cf_len_l {
-            // cross goes from 1 (when l_diff is at the min) to 0 (when l_diff == 3.0 * period_l)
-            let cross = (MAX_PERIOD * period_l - l_diff) / cf_len_l;
-            let (fade_in, fade_out) = ep_crossfade(1.0 - cross);
-            l = fade_out * l + fade_in * self.delays.l.floating_index(self.delay_idx_l + period_l);
-        }
-        if r_diff - period_r < (period_r / DIV) {
-            let a = (r_diff - period_r) / (period_r / DIV);
-            r *= a;
-            r += (1.0 - a) * self.delays.r.floating_index(self.delay_idx_r - period_r);
-        }
-        if 3.0 * period_r - r_diff < (period_r / DIV) {
-            let a = (3.0 * period_r - r_diff) / (period_r / DIV);
-            r *= a;
-            r += (1.0 - a) * self.delays.r.floating_index(self.delay_idx_r - period_r);
-        }
-
-        // Check if we need to advance/go back `period` samples
-        // we want to be between the second and third period
-        // so ideally we want {l,r}_diff == 2.0 * period_{l,r}
-
-        // We are about to get to the first period
-        if l_diff < period_l {
-            self.delay_idx_l -= period_l;
-        }
-        // We are about to get to the fourth period
-        if l_diff > 3.0 * period_l {
-            self.delay_idx_l += period_l;
-        }
-        if r_diff < period_r {
-            self.delay_idx_r -= period_r;
-        }
-        if r_diff > 3.0 * period_r {
-            self.delay_idx_r += period_r;
-        }
-
-        (l, r)
-    }
 }
 impl MidiReceiver for RoboTuna {
    fn midi_input(&mut self, _model: &RoboTunaModelProcess, data: [u8; 3]) {
--- a/crates/utils/src/pitch/mod.rs
+++ b/crates/utils/src/pitch/mod.rs
@ -5,7 +5,7 @@ pub fn generate_vocoder(sample_rate: u32) -> PhaseVocoder {
 }

 // From https://github.com/nwoeanhinnogaehr/pvoc-plugins/blob/master/src/plugins/pitchshifter.rs
-use pvoc::{Bin, PhaseVocoder};
+use pvoc::{FreqBin, PhaseVocoder};
 pub fn pitch_shift<const LEN: usize>(
    pvoc: &mut PhaseVocoder,
    input: &[f32],
@ -17,7 +17,7 @@ pub fn pitch_shift<const LEN: usize>(
    pvoc.process(
        &[&input],
        &mut [&mut output],
-        |channels: usize, bins: usize, input: &[Vec<Bin>], output: &mut [Vec<Bin>]| {
+        |channels: usize, bins: usize, input: &[Vec<FreqBin>], output: &mut [Vec<FreqBin>]| {
            for i in 0..channels {
                for j in 0..bins / 2 {
                    let index = ((j as f64) * shift) as usize;
Author	SHA1	Message	Date
annieversary	a09d9f83a4	[readme] explain robotuna	2021-09-16 19:00:06 +02:00
annieversary	c721192f86	[robotuna] fix pitch shifting by using pvoc	2021-09-16 18:31:19 +02:00
annieversary	43a66b4769	[opiate, pvoc] implement changing phase and not frequency	2021-09-16 17:42:18 +02:00