Yin.java (7559B)
1 /* 2 * _______ _____ _____ _____ 3 * |__ __| | __ \ / ____| __ \ 4 * | | __ _ _ __ ___ ___ ___| | | | (___ | |__) | 5 * | |/ _` | '__/ __|/ _ \/ __| | | |\___ \| ___/ 6 * | | (_| | | \__ \ (_) \__ \ |__| |____) | | 7 * |_|\__,_|_| |___/\___/|___/_____/|_____/|_| 8 * 9 * ------------------------------------------------------------- 10 * 11 * TarsosDSP is developed by Joren Six at IPEM, University Ghent 12 * 13 * ------------------------------------------------------------- 14 * 15 * Info: http://0110.be/tag/TarsosDSP 16 * Github: https://github.com/JorenSix/TarsosDSP 17 * Releases: http://0110.be/releases/TarsosDSP/ 18 * 19 * TarsosDSP includes modified source code by various authors, 20 * for credits and info, see README. 21 * 22 */ 23 24 25 package be.tarsos.dsp.pitch; 26 27 /** 28 * An implementation of the AUBIO_YIN pitch tracking algorithm. See <a href= 29 * "http://recherche.ircam.fr/equipes/pcm/cheveign/ps/2002_JASA_YIN_proof.pdf" 30 * >the YIN paper.</a> Implementation based on <a 31 * href="http://aubio.org">aubio</a> 32 * 33 * @author Joren Six 34 * @author Paul Brossier 35 */ 36 public final class Yin implements PitchDetector { 37 /** 38 * The default YIN threshold value. Should be around 0.10~0.15. See YIN 39 * paper for more information. 40 */ 41 private static final double DEFAULT_THRESHOLD = 0.20; 42 43 /** 44 * The default size of an audio buffer (in samples). 45 */ 46 public static final int DEFAULT_BUFFER_SIZE = 2048; 47 48 /** 49 * The default overlap of two consecutive audio buffers (in samples). 50 */ 51 public static final int DEFAULT_OVERLAP = 1536; 52 53 /** 54 * The actual YIN threshold. 55 */ 56 private final double threshold; 57 58 /** 59 * The audio sample rate. Most audio has a sample rate of 44.1kHz. 60 */ 61 private final float sampleRate; 62 63 /** 64 * The buffer that stores the calculated values. It is exactly half the size 65 * of the input buffer. 66 */ 67 private final float[] yinBuffer; 68 69 /** 70 * The result of the pitch detection iteration. 71 */ 72 private final PitchDetectionResult result; 73 74 /** 75 * Create a new pitch detector for a stream with the defined sample rate. 76 * Processes the audio in blocks of the defined size. 77 * 78 * @param audioSampleRate 79 * The sample rate of the audio stream. E.g. 44.1 kHz. 80 * @param bufferSize 81 * The size of a buffer. E.g. 1024. 82 */ 83 public Yin(final float audioSampleRate, final int bufferSize) { 84 this(audioSampleRate, bufferSize, DEFAULT_THRESHOLD); 85 } 86 87 /** 88 * Create a new pitch detector for a stream with the defined sample rate. 89 * Processes the audio in blocks of the defined size. 90 * 91 * @param audioSampleRate 92 * The sample rate of the audio stream. E.g. 44.1 kHz. 93 * @param bufferSize 94 * The size of a buffer. E.g. 1024. 95 * @param yinThreshold 96 * The parameter that defines which peaks are kept as possible 97 * pitch candidates. See the YIN paper for more details. 98 */ 99 public Yin(final float audioSampleRate, final int bufferSize, final double yinThreshold) { 100 this.sampleRate = audioSampleRate; 101 this.threshold = yinThreshold; 102 yinBuffer = new float[bufferSize / 2]; 103 result = new PitchDetectionResult(); 104 } 105 106 /** 107 * The main flow of the YIN algorithm. Returns a pitch value in Hz or -1 if 108 * no pitch is detected. 109 * 110 * @return a pitch value in Hz or -1 if no pitch is detected. 111 */ 112 public PitchDetectionResult getPitch(final float[] audioBuffer) { 113 114 final int tauEstimate; 115 final float pitchInHertz; 116 117 // step 2 118 difference(audioBuffer); 119 120 // step 3 121 cumulativeMeanNormalizedDifference(); 122 123 // step 4 124 tauEstimate = absoluteThreshold(); 125 126 // step 5 127 if (tauEstimate != -1) { 128 final float betterTau = parabolicInterpolation(tauEstimate); 129 130 // step 6 131 // TODO Implement optimization for the AUBIO_YIN algorithm. 132 // 0.77% => 0.5% error rate, 133 // using the data of the YIN paper 134 // bestLocalEstimate() 135 136 // conversion to Hz 137 pitchInHertz = sampleRate / betterTau; 138 } else{ 139 // no pitch found 140 pitchInHertz = -1; 141 } 142 143 result.setPitch(pitchInHertz); 144 145 return result; 146 } 147 148 /** 149 * Implements the difference function as described in step 2 of the YIN 150 * paper. 151 */ 152 private void difference(final float[] audioBuffer) { 153 int index, tau; 154 float delta; 155 for (tau = 0; tau < yinBuffer.length; tau++) { 156 yinBuffer[tau] = 0; 157 } 158 for (tau = 1; tau < yinBuffer.length; tau++) { 159 for (index = 0; index < yinBuffer.length; index++) { 160 delta = audioBuffer[index] - audioBuffer[index + tau]; 161 yinBuffer[tau] += delta * delta; 162 } 163 } 164 } 165 166 /** 167 * The cumulative mean normalized difference function as described in step 3 168 * of the YIN paper. <br> 169 * <code> 170 * yinBuffer[0] == yinBuffer[1] = 1 171 * </code> 172 */ 173 private void cumulativeMeanNormalizedDifference() { 174 int tau; 175 yinBuffer[0] = 1; 176 float runningSum = 0; 177 for (tau = 1; tau < yinBuffer.length; tau++) { 178 runningSum += yinBuffer[tau]; 179 yinBuffer[tau] *= tau / runningSum; 180 } 181 } 182 183 /** 184 * Implements step 4 of the AUBIO_YIN paper. 185 */ 186 private int absoluteThreshold() { 187 // Uses another loop construct 188 // than the AUBIO implementation 189 int tau; 190 // first two positions in yinBuffer are always 1 191 // So start at the third (index 2) 192 for (tau = 2; tau < yinBuffer.length; tau++) { 193 if (yinBuffer[tau] < threshold) { 194 while (tau + 1 < yinBuffer.length && yinBuffer[tau + 1] < yinBuffer[tau]) { 195 tau++; 196 } 197 // found tau, exit loop and return 198 // store the probability 199 // From the YIN paper: The threshold determines the list of 200 // candidates admitted to the set, and can be interpreted as the 201 // proportion of aperiodic power tolerated 202 // within a periodic signal. 203 // 204 // Since we want the periodicity and and not aperiodicity: 205 // periodicity = 1 - aperiodicity 206 result.setProbability(1 - yinBuffer[tau]); 207 break; 208 } 209 } 210 211 212 // if no pitch found, tau => -1 213 if (tau == yinBuffer.length || yinBuffer[tau] >= threshold) { 214 tau = -1; 215 result.setProbability(0); 216 result.setPitched(false); 217 } else { 218 result.setPitched(true); 219 } 220 221 return tau; 222 } 223 224 /** 225 * Implements step 5 of the AUBIO_YIN paper. It refines the estimated tau 226 * value using parabolic interpolation. This is needed to detect higher 227 * frequencies more precisely. See http://fizyka.umk.pl/nrbook/c10-2.pdf and 228 * for more background 229 * http://fedc.wiwi.hu-berlin.de/xplore/tutorials/xegbohtmlnode62.html 230 * 231 * @param tauEstimate 232 * The estimated tau value. 233 * @return A better, more precise tau value. 234 */ 235 private float parabolicInterpolation(final int tauEstimate) { 236 final float betterTau; 237 final int x0; 238 final int x2; 239 240 if (tauEstimate < 1) { 241 x0 = tauEstimate; 242 } else { 243 x0 = tauEstimate - 1; 244 } 245 if (tauEstimate + 1 < yinBuffer.length) { 246 x2 = tauEstimate + 1; 247 } else { 248 x2 = tauEstimate; 249 } 250 if (x0 == tauEstimate) { 251 if (yinBuffer[tauEstimate] <= yinBuffer[x2]) { 252 betterTau = tauEstimate; 253 } else { 254 betterTau = x2; 255 } 256 } else if (x2 == tauEstimate) { 257 if (yinBuffer[tauEstimate] <= yinBuffer[x0]) { 258 betterTau = tauEstimate; 259 } else { 260 betterTau = x0; 261 } 262 } else { 263 float s0, s1, s2; 264 s0 = yinBuffer[x0]; 265 s1 = yinBuffer[tauEstimate]; 266 s2 = yinBuffer[x2]; 267 // fixed AUBIO implementation, thanks to Karl Helgason: 268 // (2.0f * s1 - s2 - s0) was incorrectly multiplied with -1 269 betterTau = tauEstimate + (s2 - s0) / (2 * (2 * s1 - s2 - s0)); 270 } 271 return betterTau; 272 } 273 }