FastYin.java (10407B)
1 /* 2 * _______ _____ _____ _____ 3 * |__ __| | __ \ / ____| __ \ 4 * | | __ _ _ __ ___ ___ ___| | | | (___ | |__) | 5 * | |/ _` | '__/ __|/ _ \/ __| | | |\___ \| ___/ 6 * | | (_| | | \__ \ (_) \__ \ |__| |____) | | 7 * |_|\__,_|_| |___/\___/|___/_____/|_____/|_| 8 * 9 * ------------------------------------------------------------- 10 * 11 * TarsosDSP is developed by Joren Six at IPEM, University Ghent 12 * 13 * ------------------------------------------------------------- 14 * 15 * Info: http://0110.be/tag/TarsosDSP 16 * Github: https://github.com/JorenSix/TarsosDSP 17 * Releases: http://0110.be/releases/TarsosDSP/ 18 * 19 * TarsosDSP includes modified source code by various authors, 20 * for credits and info, see README. 21 * 22 */ 23 24 /* 25 * 26 * I took Joren's Code and changed it so that 27 * it uses the FFT to calculate the difference function. 28 * TarsosDSP is developed by Joren Six at 29 * The Royal Academy of Fine Arts & Royal Conservatory, 30 * University College Ghent, 31 * Hoogpoort 64, 9000 Ghent - Belgium 32 * 33 * http://tarsos.0110.be/tag/TarsosDSP 34 * 35 */ 36 37 package be.tarsos.dsp.pitch; 38 39 import be.tarsos.dsp.util.fft.FloatFFT; 40 41 /** 42 * An implementation of the YIN pitch tracking algorithm which uses an FFT to 43 * calculate the difference function. This makes calculating the difference 44 * function more performant. See <a href= 45 * "http://recherche.ircam.fr/equipes/pcm/cheveign/ps/2002_JASA_YIN_proof.pdf" 46 * >the YIN paper.</a> This implementation is done by <a href="mailto:matthias.mauch@elec.qmul.ac.uk">Matthias Mauch</a> and is 47 * based on {@link Yin} which is based on the implementation found in <a 48 * href="http://aubio.org">aubio</a> by Paul Brossier. 49 * 50 * @author Matthias Mauch 51 * @author Joren Six 52 * @author Paul Brossier 53 */ 54 public final class FastYin implements PitchDetector { 55 /** 56 * The default YIN threshold value. Should be around 0.10~0.15. See YIN 57 * paper for more information. 58 */ 59 private static final double DEFAULT_THRESHOLD = 0.20; 60 61 /** 62 * The default size of an audio buffer (in samples). 63 */ 64 public static final int DEFAULT_BUFFER_SIZE = 2048; 65 66 /** 67 * The default overlap of two consecutive audio buffers (in samples). 68 */ 69 public static final int DEFAULT_OVERLAP = 1536; 70 71 /** 72 * The actual YIN threshold. 73 */ 74 private final double threshold; 75 76 /** 77 * The audio sample rate. Most audio has a sample rate of 44.1kHz. 78 */ 79 private final float sampleRate; 80 81 /** 82 * The buffer that stores the calculated values. It is exactly half the size 83 * of the input buffer. 84 */ 85 private final float[] yinBuffer; 86 87 /** 88 * The result of the pitch detection iteration. 89 */ 90 private final PitchDetectionResult result; 91 92 //------------------------ FFT instance members 93 94 /** 95 * Holds the FFT data, twice the length of the audio buffer. 96 */ 97 private final float[] audioBufferFFT; 98 99 /** 100 * Half of the data, disguised as a convolution kernel. 101 */ 102 private final float[] kernel; 103 104 /** 105 * Buffer to allow convolution via complex multiplication. It calculates the auto correlation function (ACF). 106 */ 107 private final float[] yinStyleACF; 108 109 /** 110 * An FFT object to quickly calculate the difference function. 111 */ 112 private final FloatFFT fft; 113 114 /** 115 * Create a new pitch detector for a stream with the defined sample rate. 116 * Processes the audio in blocks of the defined size. 117 * 118 * @param audioSampleRate 119 * The sample rate of the audio stream. E.g. 44.1 kHz. 120 * @param bufferSize 121 * The size of a buffer. E.g. 1024. 122 */ 123 public FastYin(final float audioSampleRate, final int bufferSize) { 124 this(audioSampleRate, bufferSize, DEFAULT_THRESHOLD); 125 } 126 127 /** 128 * Create a new pitch detector for a stream with the defined sample rate. 129 * Processes the audio in blocks of the defined size. 130 * 131 * @param audioSampleRate 132 * The sample rate of the audio stream. E.g. 44.1 kHz. 133 * @param bufferSize 134 * The size of a buffer. E.g. 1024. 135 * @param yinThreshold 136 * The parameter that defines which peaks are kept as possible 137 * pitch candidates. See the YIN paper for more details. 138 */ 139 public FastYin(final float audioSampleRate, final int bufferSize, final double yinThreshold) { 140 this.sampleRate = audioSampleRate; 141 this.threshold = yinThreshold; 142 yinBuffer = new float[bufferSize / 2]; 143 //Initializations for FFT difference step 144 audioBufferFFT = new float[2*bufferSize]; 145 kernel = new float[2*bufferSize]; 146 yinStyleACF = new float[2*bufferSize]; 147 fft = new FloatFFT(bufferSize); 148 result = new PitchDetectionResult(); 149 } 150 151 /** 152 * The main flow of the YIN algorithm. Returns a pitch value in Hz or -1 if 153 * no pitch is detected. 154 * 155 * @return a pitch value in Hz or -1 if no pitch is detected. 156 */ 157 public PitchDetectionResult getPitch(final float[] audioBuffer) { 158 159 final int tauEstimate; 160 final float pitchInHertz; 161 162 // step 2 163 difference(audioBuffer); 164 165 // step 3 166 cumulativeMeanNormalizedDifference(); 167 168 // step 4 169 tauEstimate = absoluteThreshold(); 170 171 // step 5 172 if (tauEstimate != -1) { 173 final float betterTau = parabolicInterpolation(tauEstimate); 174 175 // step 6 176 // TODO Implement optimization for the AUBIO_YIN algorithm. 177 // 0.77% => 0.5% error rate, 178 // using the data of the YIN paper 179 // bestLocalEstimate() 180 181 // conversion to Hz 182 pitchInHertz = sampleRate / betterTau; 183 } else{ 184 // no pitch found 185 pitchInHertz = -1; 186 } 187 188 result.setPitch(pitchInHertz); 189 190 return result; 191 } 192 193 /** 194 * Implements the difference function as described in step 2 of the YIN 195 * paper with an FFT to reduce the number of operations. 196 */ 197 private void difference(final float[] audioBuffer) { 198 // POWER TERM CALCULATION 199 // ... for the power terms in equation (7) in the Yin paper 200 float[] powerTerms = new float[yinBuffer.length]; 201 for (int j = 0; j < yinBuffer.length; ++j) { 202 powerTerms[0] += audioBuffer[j] * audioBuffer[j]; 203 } 204 // now iteratively calculate all others (saves a few multiplications) 205 for (int tau = 1; tau < yinBuffer.length; ++tau) { 206 powerTerms[tau] = powerTerms[tau-1] - audioBuffer[tau-1] * audioBuffer[tau-1] + audioBuffer[tau+yinBuffer.length] * audioBuffer[tau+yinBuffer.length]; 207 } 208 209 // YIN-STYLE AUTOCORRELATION via FFT 210 // 1. data 211 for (int j = 0; j < audioBuffer.length; ++j) { 212 audioBufferFFT[2*j] = audioBuffer[j]; 213 audioBufferFFT[2*j+1] = 0; 214 } 215 fft.complexForward(audioBufferFFT); 216 217 // 2. half of the data, disguised as a convolution kernel 218 for (int j = 0; j < yinBuffer.length; ++j) { 219 kernel[2*j] = audioBuffer[(yinBuffer.length-1)-j]; 220 kernel[2*j+1] = 0; 221 kernel[2*j+audioBuffer.length] = 0; 222 kernel[2*j+audioBuffer.length+1] = 0; 223 } 224 fft.complexForward(kernel); 225 226 // 3. convolution via complex multiplication 227 for (int j = 0; j < audioBuffer.length; ++j) { 228 yinStyleACF[2*j] = audioBufferFFT[2*j]*kernel[2*j] - audioBufferFFT[2*j+1]*kernel[2*j+1]; // real 229 yinStyleACF[2*j+1] = audioBufferFFT[2*j+1]*kernel[2*j] + audioBufferFFT[2*j]*kernel[2*j+1]; // imaginary 230 } 231 fft.complexInverse(yinStyleACF, true); 232 233 // CALCULATION OF difference function 234 // ... according to (7) in the Yin paper. 235 for (int j = 0; j < yinBuffer.length; ++j) { 236 // taking only the real part 237 yinBuffer[j] = powerTerms[0] + powerTerms[j] - 2 * yinStyleACF[2 * (yinBuffer.length - 1 + j)]; 238 } 239 } 240 241 /** 242 * The cumulative mean normalized difference function as described in step 3 243 * of the YIN paper. <br> 244 * <code> 245 * yinBuffer[0] == yinBuffer[1] = 1 246 * </code> 247 */ 248 private void cumulativeMeanNormalizedDifference() { 249 int tau; 250 yinBuffer[0] = 1; 251 float runningSum = 0; 252 for (tau = 1; tau < yinBuffer.length; tau++) { 253 runningSum += yinBuffer[tau]; 254 yinBuffer[tau] *= tau / runningSum; 255 } 256 } 257 258 /** 259 * Implements step 4 of the AUBIO_YIN paper. 260 */ 261 private int absoluteThreshold() { 262 // Uses another loop construct 263 // than the AUBIO implementation 264 int tau; 265 // first two positions in yinBuffer are always 1 266 // So start at the third (index 2) 267 for (tau = 2; tau < yinBuffer.length; tau++) { 268 if (yinBuffer[tau] < threshold) { 269 while (tau + 1 < yinBuffer.length && yinBuffer[tau + 1] < yinBuffer[tau]) { 270 tau++; 271 } 272 // found tau, exit loop and return 273 // store the probability 274 // From the YIN paper: The threshold determines the list of 275 // candidates admitted to the set, and can be interpreted as the 276 // proportion of aperiodic power tolerated 277 // within a periodic signal. 278 // 279 // Since we want the periodicity and and not aperiodicity: 280 // periodicity = 1 - aperiodicity 281 result.setProbability(1 - yinBuffer[tau]); 282 break; 283 } 284 } 285 286 287 // if no pitch found, tau => -1 288 if (tau == yinBuffer.length || yinBuffer[tau] >= threshold || result.getProbability() > 1.0) { 289 tau = -1; 290 result.setProbability(0); 291 result.setPitched(false); 292 } else { 293 result.setPitched(true); 294 } 295 296 return tau; 297 } 298 299 /** 300 * Implements step 5 of the AUBIO_YIN paper. It refines the estimated tau 301 * value using parabolic interpolation. This is needed to detect higher 302 * frequencies more precisely. See http://fizyka.umk.pl/nrbook/c10-2.pdf and 303 * for more background 304 * http://fedc.wiwi.hu-berlin.de/xplore/tutorials/xegbohtmlnode62.html 305 * 306 * @param tauEstimate 307 * The estimated tau value. 308 * @return A better, more precise tau value. 309 */ 310 private float parabolicInterpolation(final int tauEstimate) { 311 final float betterTau; 312 final int x0; 313 final int x2; 314 315 if (tauEstimate < 1) { 316 x0 = tauEstimate; 317 } else { 318 x0 = tauEstimate - 1; 319 } 320 if (tauEstimate + 1 < yinBuffer.length) { 321 x2 = tauEstimate + 1; 322 } else { 323 x2 = tauEstimate; 324 } 325 if (x0 == tauEstimate) { 326 if (yinBuffer[tauEstimate] <= yinBuffer[x2]) { 327 betterTau = tauEstimate; 328 } else { 329 betterTau = x2; 330 } 331 } else if (x2 == tauEstimate) { 332 if (yinBuffer[tauEstimate] <= yinBuffer[x0]) { 333 betterTau = tauEstimate; 334 } else { 335 betterTau = x0; 336 } 337 } else { 338 float s0, s1, s2; 339 s0 = yinBuffer[x0]; 340 s1 = yinBuffer[tauEstimate]; 341 s2 = yinBuffer[x2]; 342 // fixed AUBIO implementation, thanks to Karl Helgason: 343 // (2.0f * s1 - s2 - s0) was incorrectly multiplied with -1 344 betterTau = tauEstimate + (s2 - s0) / (2 * (2 * s1 - s2 - s0)); 345 } 346 return betterTau; 347 } 348 }