From 29b6ed92837d80181259f5392e07747f01661a06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bart=C5=82omiej=20T=2E=20Listwon?= Date: Fri, 11 Feb 2022 20:15:32 +0100 Subject: [PATCH] Audio quality improvements in PitchShift --- .../effects/audio_effect_pitch_shift.cpp | 88 +++++++++++-------- .../audio/effects/audio_effect_pitch_shift.h | 39 ++++---- 2 files changed, 72 insertions(+), 55 deletions(-) diff --git a/servers/audio/effects/audio_effect_pitch_shift.cpp b/servers/audio/effects/audio_effect_pitch_shift.cpp index 32223f47f6e..ba2d257c0a6 100644 --- a/servers/audio/effects/audio_effect_pitch_shift.cpp +++ b/servers/audio/effects/audio_effect_pitch_shift.cpp @@ -74,7 +74,7 @@ * *****************************************************************************/ -void SMBPitchShift::PitchShift(float pitchShift, long numSampsToProcess, long fftFrameSize, long osamp, float sampleRate, float *indata, float *outdata,int stride) { +void SMBPitchShift::PitchShift(float pitchShift, int64_t numSampsToProcess, int64_t fftFrameSize, int64_t osamp, float sampleRate, float *indata, float *outdata,int stride) { /* @@ -85,19 +85,32 @@ void SMBPitchShift::PitchShift(float pitchShift, long numSampsToProcess, long ff */ double magn, phase, tmp, window, real, imag; - double freqPerBin, expct; - long i,k, qpd, index, inFifoLatency, stepSize, fftFrameSize2; + double freqPerBin, expct, reciprocalFftFrameSize; + int64_t i,k, qpd, index, inFifoLatency, stepSize, fftFrameSize2; /* set up some handy variables */ fftFrameSize2 = fftFrameSize/2; + reciprocalFftFrameSize = 1./fftFrameSize; stepSize = fftFrameSize/osamp; - freqPerBin = sampleRate/(double)fftFrameSize; - expct = 2.*Math_PI*(double)stepSize/(double)fftFrameSize; + freqPerBin = reciprocalFftFrameSize * sampleRate; + expct = Math_TAU * reciprocalFftFrameSize * stepSize; inFifoLatency = fftFrameSize-stepSize; - if (gRover == 0) { gRover = inFifoLatency; -} + if (gRover == 0) { + gRover = inFifoLatency; + } - /* initialize our static arrays */ + // If pitchShift changes clear arrays to prevent some artifacts and quality loss. + if (lastPitchShift != pitchShift) { + lastPitchShift = pitchShift; + memset(gInFIFO, 0, MAX_FRAME_LENGTH * sizeof(float)); + memset(gOutFIFO, 0, MAX_FRAME_LENGTH * sizeof(float)); + memset(gFFTworksp, 0, 2 * MAX_FRAME_LENGTH * sizeof(double)); + memset(gLastPhase, 0, (MAX_FRAME_LENGTH / 2 + 1) * sizeof(double)); + memset(gSumPhase, 0, (MAX_FRAME_LENGTH / 2 + 1) * sizeof(double)); + memset(gOutputAccum, 0, 2 * MAX_FRAME_LENGTH * sizeof(double)); + memset(gAnaFreq, 0, MAX_FRAME_LENGTH * sizeof(double)); + memset(gAnaMagn, 0, MAX_FRAME_LENGTH * sizeof(double)); + } /* main processing loop */ for (i = 0; i < numSampsToProcess; i++){ @@ -112,7 +125,7 @@ void SMBPitchShift::PitchShift(float pitchShift, long numSampsToProcess, long ff /* do windowing and re,im interleave */ for (k = 0; k < fftFrameSize;k++) { - window = -.5*cos(2.*Math_PI*(double)k/(double)fftFrameSize)+.5; + window = -.5*cos(Math_TAU * reciprocalFftFrameSize * k)+.5; gFFTworksp[2*k] = gInFIFO[k] * window; gFFTworksp[2*k+1] = 0.; } @@ -124,6 +137,7 @@ void SMBPitchShift::PitchShift(float pitchShift, long numSampsToProcess, long ff /* this is the analysis step */ for (k = 0; k <= fftFrameSize2; k++) { + /* de-interlace FFT buffer */ real = gFFTworksp[2*k]; imag = gFFTworksp[2*k+1]; @@ -141,13 +155,15 @@ void SMBPitchShift::PitchShift(float pitchShift, long numSampsToProcess, long ff /* map delta phase into +/- Pi interval */ qpd = tmp/Math_PI; - if (qpd >= 0) { qpd += qpd&1; - } else { qpd -= qpd&1; -} + if (qpd >= 0) { + qpd += qpd&1; + } else { + qpd -= qpd&1; + } tmp -= Math_PI*(double)qpd; /* get deviation from bin frequency from the +/- Pi interval */ - tmp = osamp*tmp/(2.*Math_PI); + tmp = osamp*tmp/Math_TAU; /* compute the k-th partials' true frequency */ tmp = (double)k*freqPerBin + tmp*freqPerBin; @@ -160,8 +176,8 @@ void SMBPitchShift::PitchShift(float pitchShift, long numSampsToProcess, long ff /* ***************** PROCESSING ******************* */ /* this does the actual pitch shifting */ - memset(gSynMagn, 0, fftFrameSize*sizeof(float)); - memset(gSynFreq, 0, fftFrameSize*sizeof(float)); + memset(gSynMagn, 0, fftFrameSize*sizeof(double)); + memset(gSynFreq, 0, fftFrameSize*sizeof(double)); for (k = 0; k <= fftFrameSize2; k++) { index = k*pitchShift; if (index <= fftFrameSize2) { @@ -184,7 +200,7 @@ void SMBPitchShift::PitchShift(float pitchShift, long numSampsToProcess, long ff tmp /= freqPerBin; /* take osamp into account */ - tmp = 2.*Math_PI*tmp/osamp; + tmp = Math_TAU*tmp/osamp; /* add the overlap phase advance back in */ tmp += (double)k*expct; @@ -199,33 +215,35 @@ void SMBPitchShift::PitchShift(float pitchShift, long numSampsToProcess, long ff } /* zero negative frequencies */ - for (k = fftFrameSize+2; k < 2*fftFrameSize; k++) { gFFTworksp[k] = 0.; -} + for (k = fftFrameSize+2; k < 2*MAX_FRAME_LENGTH; k++) { + gFFTworksp[k] = 0.; + } /* do inverse transform */ smbFft(gFFTworksp, fftFrameSize, 1); /* do windowing and add to output accumulator */ for(k=0; k < fftFrameSize; k++) { - window = -.5*cos(2.*Math_PI*(double)k/(double)fftFrameSize)+.5; + window = -.5*cos(Math_TAU * reciprocalFftFrameSize * k)+.5; gOutputAccum[k] += 2.*window*gFFTworksp[2*k]/(fftFrameSize2*osamp); } - for (k = 0; k < stepSize; k++) { gOutFIFO[k] = gOutputAccum[k]; -} + for (k = 0; k < stepSize; k++) { + gOutFIFO[k] = gOutputAccum[k]; + } /* shift accumulator */ - memmove(gOutputAccum, gOutputAccum+stepSize, fftFrameSize*sizeof(float)); + memmove(gOutputAccum, gOutputAccum+stepSize, fftFrameSize*sizeof(double)); /* move input FIFO */ - for (k = 0; k < inFifoLatency; k++) { gInFIFO[k] = gInFIFO[k+stepSize]; -} + for (k = 0; k < inFifoLatency; k++) { + gInFIFO[k] = gInFIFO[k+stepSize]; + } } } } - -void SMBPitchShift::smbFft(float *fftBuffer, long fftFrameSize, long sign) +void SMBPitchShift::smbFft(double *fftBuffer, int64_t fftFrameSize, int64_t sign) /* FFT routine, (C)1996 S.M.Bernsee. Sign = -1 is FFT, 1 is iFFT (inverse) Fills fftBuffer[0...2*fftFrameSize-1] with the Fourier transform of the @@ -238,14 +256,16 @@ void SMBPitchShift::smbFft(float *fftBuffer, long fftFrameSize, long sign) of the frequencies of interest is in fftBuffer[0...fftFrameSize]. */ { - float wr, wi, arg, *p1, *p2, temp; - float tr, ti, ur, ui, *p1r, *p1i, *p2r, *p2i; - long i, bitm, j, le, le2, k; + double wr, wi, arg, *p1, *p2, temp; + double tr, ti, ur, ui, *p1r, *p1i, *p2r, *p2i; + int64_t i, bitm, j, le, le2, k, logN; + logN = (int64_t)(log(fftFrameSize) / log(2.) + .5); for (i = 2; i < 2*fftFrameSize-2; i += 2) { for (bitm = 2, j = 0; bitm < 2*fftFrameSize; bitm <<= 1) { - if (i & bitm) { j++; -} + if (i & bitm) { + j++; + } j <<= 1; } if (i < j) { @@ -255,7 +275,8 @@ void SMBPitchShift::smbFft(float *fftBuffer, long fftFrameSize, long sign) *p1 = *p2; *p2 = temp; } } - for (k = 0, le = 2; k < (long)(log((double)fftFrameSize)/log(2.)+.5); k++) { + + for (k = 0, le = 2; k < logN; k++) { le <<= 1; le2 = le>>1; ur = 1.0; @@ -369,7 +390,4 @@ AudioEffectPitchShift::AudioEffectPitchShift() { pitch_scale = 1.0; oversampling = 4; fft_size = FFT_SIZE_2048; - wet = 0.0; - dry = 0.0; - filter = false; } diff --git a/servers/audio/effects/audio_effect_pitch_shift.h b/servers/audio/effects/audio_effect_pitch_shift.h index 0478d05cebc..23da61bb325 100644 --- a/servers/audio/effects/audio_effect_pitch_shift.h +++ b/servers/audio/effects/audio_effect_pitch_shift.h @@ -40,31 +40,33 @@ class SMBPitchShift { float gInFIFO[MAX_FRAME_LENGTH]; float gOutFIFO[MAX_FRAME_LENGTH]; - float gFFTworksp[2 * MAX_FRAME_LENGTH]; - float gLastPhase[MAX_FRAME_LENGTH / 2 + 1]; - float gSumPhase[MAX_FRAME_LENGTH / 2 + 1]; - float gOutputAccum[2 * MAX_FRAME_LENGTH]; - float gAnaFreq[MAX_FRAME_LENGTH]; - float gAnaMagn[MAX_FRAME_LENGTH]; - float gSynFreq[MAX_FRAME_LENGTH]; - float gSynMagn[MAX_FRAME_LENGTH]; - long gRover; + double gFFTworksp[2 * MAX_FRAME_LENGTH]; + double gLastPhase[MAX_FRAME_LENGTH / 2 + 1]; + double gSumPhase[MAX_FRAME_LENGTH / 2 + 1]; + double gOutputAccum[2 * MAX_FRAME_LENGTH]; + double gAnaFreq[MAX_FRAME_LENGTH]; + double gAnaMagn[MAX_FRAME_LENGTH]; + double gSynFreq[MAX_FRAME_LENGTH]; + double gSynMagn[MAX_FRAME_LENGTH]; + int64_t gRover; + float lastPitchShift; - void smbFft(float *fftBuffer, long fftFrameSize, long sign); + void smbFft(double *fftBuffer, int64_t fftFrameSize, int64_t sign); public: - void PitchShift(float pitchShift, long numSampsToProcess, long fftFrameSize, long osamp, float sampleRate, float *indata, float *outdata, int stride); + void PitchShift(float pitchShift, int64_t numSampsToProcess, int64_t fftFrameSize, int64_t osamp, float sampleRate, float *indata, float *outdata, int stride); SMBPitchShift() { gRover = 0; memset(gInFIFO, 0, MAX_FRAME_LENGTH * sizeof(float)); memset(gOutFIFO, 0, MAX_FRAME_LENGTH * sizeof(float)); - memset(gFFTworksp, 0, 2 * MAX_FRAME_LENGTH * sizeof(float)); - memset(gLastPhase, 0, (MAX_FRAME_LENGTH / 2 + 1) * sizeof(float)); - memset(gSumPhase, 0, (MAX_FRAME_LENGTH / 2 + 1) * sizeof(float)); - memset(gOutputAccum, 0, 2 * MAX_FRAME_LENGTH * sizeof(float)); - memset(gAnaFreq, 0, MAX_FRAME_LENGTH * sizeof(float)); - memset(gAnaMagn, 0, MAX_FRAME_LENGTH * sizeof(float)); + memset(gFFTworksp, 0, 2 * MAX_FRAME_LENGTH * sizeof(double)); + memset(gLastPhase, 0, (MAX_FRAME_LENGTH / 2 + 1) * sizeof(double)); + memset(gSumPhase, 0, (MAX_FRAME_LENGTH / 2 + 1) * sizeof(double)); + memset(gOutputAccum, 0, 2 * MAX_FRAME_LENGTH * sizeof(double)); + memset(gAnaFreq, 0, MAX_FRAME_LENGTH * sizeof(double)); + memset(gAnaMagn, 0, MAX_FRAME_LENGTH * sizeof(double)); + lastPitchShift = 1.0; } }; @@ -101,9 +103,6 @@ public: float pitch_scale; int oversampling; FFTSize fft_size; - float wet; - float dry; - bool filter; protected: static void _bind_methods();