plectrum

Plectrum: instrument tuner for Android
Log | Files | Refs | README | LICENSE

Yin.java (7559B)


      1 /*
      2 *      _______                       _____   _____ _____  
      3 *     |__   __|                     |  __ \ / ____|  __ \ 
      4 *        | | __ _ _ __ ___  ___  ___| |  | | (___ | |__) |
      5 *        | |/ _` | '__/ __|/ _ \/ __| |  | |\___ \|  ___/ 
      6 *        | | (_| | |  \__ \ (_) \__ \ |__| |____) | |     
      7 *        |_|\__,_|_|  |___/\___/|___/_____/|_____/|_|     
      8 *                                                         
      9 * -------------------------------------------------------------
     10 *
     11 * TarsosDSP is developed by Joren Six at IPEM, University Ghent
     12 *  
     13 * -------------------------------------------------------------
     14 *
     15 *  Info: http://0110.be/tag/TarsosDSP
     16 *  Github: https://github.com/JorenSix/TarsosDSP
     17 *  Releases: http://0110.be/releases/TarsosDSP/
     18 *  
     19 *  TarsosDSP includes modified source code by various authors,
     20 *  for credits and info, see README.
     21 * 
     22 */
     23 
     24 
     25 package be.tarsos.dsp.pitch;
     26 
     27 /**
     28  * An implementation of the AUBIO_YIN pitch tracking algorithm. See <a href=
     29  * "http://recherche.ircam.fr/equipes/pcm/cheveign/ps/2002_JASA_YIN_proof.pdf"
     30  * >the YIN paper.</a> Implementation based on <a
     31  * href="http://aubio.org">aubio</a>
     32  * 
     33  * @author Joren Six
     34  * @author Paul Brossier
     35  */
     36 public final class Yin implements PitchDetector {
     37 	/**
     38 	 * The default YIN threshold value. Should be around 0.10~0.15. See YIN
     39 	 * paper for more information.
     40 	 */
     41 	private static final double DEFAULT_THRESHOLD = 0.20;
     42 
     43 	/**
     44 	 * The default size of an audio buffer (in samples).
     45 	 */
     46 	public static final int DEFAULT_BUFFER_SIZE = 2048;
     47 
     48 	/**
     49 	 * The default overlap of two consecutive audio buffers (in samples).
     50 	 */
     51 	public static final int DEFAULT_OVERLAP = 1536;
     52 
     53 	/**
     54 	 * The actual YIN threshold.
     55 	 */
     56 	private final double threshold;
     57 
     58 	/**
     59 	 * The audio sample rate. Most audio has a sample rate of 44.1kHz.
     60 	 */
     61 	private final float sampleRate;
     62 
     63 	/**
     64 	 * The buffer that stores the calculated values. It is exactly half the size
     65 	 * of the input buffer.
     66 	 */
     67 	private final float[] yinBuffer;
     68 	
     69 	/**
     70 	 * The result of the pitch detection iteration.
     71 	 */
     72 	private final PitchDetectionResult result;
     73 
     74 	/**
     75 	 * Create a new pitch detector for a stream with the defined sample rate.
     76 	 * Processes the audio in blocks of the defined size.
     77 	 * 
     78 	 * @param audioSampleRate
     79 	 *            The sample rate of the audio stream. E.g. 44.1 kHz.
     80 	 * @param bufferSize
     81 	 *            The size of a buffer. E.g. 1024.
     82 	 */
     83 	public Yin(final float audioSampleRate, final int bufferSize) {
     84 		this(audioSampleRate, bufferSize, DEFAULT_THRESHOLD);
     85 	}
     86 
     87 	/**
     88 	 * Create a new pitch detector for a stream with the defined sample rate.
     89 	 * Processes the audio in blocks of the defined size.
     90 	 * 
     91 	 * @param audioSampleRate
     92 	 *            The sample rate of the audio stream. E.g. 44.1 kHz.
     93 	 * @param bufferSize
     94 	 *            The size of a buffer. E.g. 1024.
     95 	 * @param yinThreshold
     96 	 *            The parameter that defines which peaks are kept as possible
     97 	 *            pitch candidates. See the YIN paper for more details.
     98 	 */
     99 	public Yin(final float audioSampleRate, final int bufferSize, final double yinThreshold) {
    100 		this.sampleRate = audioSampleRate;
    101 		this.threshold = yinThreshold;
    102 		yinBuffer = new float[bufferSize / 2];
    103 		result = new PitchDetectionResult();
    104 	}
    105 
    106 	/**
    107 	 * The main flow of the YIN algorithm. Returns a pitch value in Hz or -1 if
    108 	 * no pitch is detected.
    109 	 * 
    110 	 * @return a pitch value in Hz or -1 if no pitch is detected.
    111 	 */
    112 	public PitchDetectionResult getPitch(final float[] audioBuffer) {
    113 
    114 		final int tauEstimate;
    115 		final float pitchInHertz;
    116 
    117 		// step 2
    118 		difference(audioBuffer);
    119 
    120 		// step 3
    121 		cumulativeMeanNormalizedDifference();
    122 
    123 		// step 4
    124 		tauEstimate = absoluteThreshold();
    125 
    126 		// step 5
    127 		if (tauEstimate != -1) {
    128 			final float betterTau = parabolicInterpolation(tauEstimate);
    129 
    130 			// step 6
    131 			// TODO Implement optimization for the AUBIO_YIN algorithm.
    132 			// 0.77% => 0.5% error rate,
    133 			// using the data of the YIN paper
    134 			// bestLocalEstimate()
    135 
    136 			// conversion to Hz
    137 			pitchInHertz = sampleRate / betterTau;
    138 		} else{
    139 			// no pitch found
    140 			pitchInHertz = -1;
    141 		}
    142 		
    143 		result.setPitch(pitchInHertz);
    144 
    145 		return result;
    146 	}
    147 
    148 	/**
    149 	 * Implements the difference function as described in step 2 of the YIN
    150 	 * paper.
    151 	 */
    152 	private void difference(final float[] audioBuffer) {
    153 		int index, tau;
    154 		float delta;
    155 		for (tau = 0; tau < yinBuffer.length; tau++) {
    156 			yinBuffer[tau] = 0;
    157 		}
    158 		for (tau = 1; tau < yinBuffer.length; tau++) {
    159 			for (index = 0; index < yinBuffer.length; index++) {
    160 				delta = audioBuffer[index] - audioBuffer[index + tau];
    161 				yinBuffer[tau] += delta * delta;
    162 			}
    163 		}
    164 	}
    165 
    166 	/**
    167 	 * The cumulative mean normalized difference function as described in step 3
    168 	 * of the YIN paper. <br>
    169 	 * <code>
    170 	 * yinBuffer[0] == yinBuffer[1] = 1
    171 	 * </code>
    172 	 */
    173 	private void cumulativeMeanNormalizedDifference() {
    174 		int tau;
    175 		yinBuffer[0] = 1;
    176 		float runningSum = 0;
    177 		for (tau = 1; tau < yinBuffer.length; tau++) {
    178 			runningSum += yinBuffer[tau];
    179 			yinBuffer[tau] *= tau / runningSum;
    180 		}
    181 	}
    182 
    183 	/**
    184 	 * Implements step 4 of the AUBIO_YIN paper.
    185 	 */
    186 	private int absoluteThreshold() {
    187 		// Uses another loop construct
    188 		// than the AUBIO implementation
    189 		int tau;
    190 		// first two positions in yinBuffer are always 1
    191 		// So start at the third (index 2)
    192 		for (tau = 2; tau < yinBuffer.length; tau++) {
    193 			if (yinBuffer[tau] < threshold) {
    194 				while (tau + 1 < yinBuffer.length && yinBuffer[tau + 1] < yinBuffer[tau]) {
    195 					tau++;
    196 				}
    197 				// found tau, exit loop and return
    198 				// store the probability
    199 				// From the YIN paper: The threshold determines the list of
    200 				// candidates admitted to the set, and can be interpreted as the
    201 				// proportion of aperiodic power tolerated
    202 				// within a periodic signal.
    203 				//
    204 				// Since we want the periodicity and and not aperiodicity:
    205 				// periodicity = 1 - aperiodicity
    206 				result.setProbability(1 - yinBuffer[tau]);
    207 				break;
    208 			}
    209 		}
    210 
    211 		
    212 		// if no pitch found, tau => -1
    213 		if (tau == yinBuffer.length || yinBuffer[tau] >= threshold) {
    214 			tau = -1;
    215 			result.setProbability(0);
    216 			result.setPitched(false);	
    217 		} else {
    218 			result.setPitched(true);
    219 		}
    220 
    221 		return tau;
    222 	}
    223 
    224 	/**
    225 	 * Implements step 5 of the AUBIO_YIN paper. It refines the estimated tau
    226 	 * value using parabolic interpolation. This is needed to detect higher
    227 	 * frequencies more precisely. See http://fizyka.umk.pl/nrbook/c10-2.pdf and
    228 	 * for more background
    229 	 * http://fedc.wiwi.hu-berlin.de/xplore/tutorials/xegbohtmlnode62.html
    230 	 * 
    231 	 * @param tauEstimate
    232 	 *            The estimated tau value.
    233 	 * @return A better, more precise tau value.
    234 	 */
    235 	private float parabolicInterpolation(final int tauEstimate) {
    236 		final float betterTau;
    237 		final int x0;
    238 		final int x2;
    239 
    240 		if (tauEstimate < 1) {
    241 			x0 = tauEstimate;
    242 		} else {
    243 			x0 = tauEstimate - 1;
    244 		}
    245 		if (tauEstimate + 1 < yinBuffer.length) {
    246 			x2 = tauEstimate + 1;
    247 		} else {
    248 			x2 = tauEstimate;
    249 		}
    250 		if (x0 == tauEstimate) {
    251 			if (yinBuffer[tauEstimate] <= yinBuffer[x2]) {
    252 				betterTau = tauEstimate;
    253 			} else {
    254 				betterTau = x2;
    255 			}
    256 		} else if (x2 == tauEstimate) {
    257 			if (yinBuffer[tauEstimate] <= yinBuffer[x0]) {
    258 				betterTau = tauEstimate;
    259 			} else {
    260 				betterTau = x0;
    261 			}
    262 		} else {
    263 			float s0, s1, s2;
    264 			s0 = yinBuffer[x0];
    265 			s1 = yinBuffer[tauEstimate];
    266 			s2 = yinBuffer[x2];
    267 			// fixed AUBIO implementation, thanks to Karl Helgason:
    268 			// (2.0f * s1 - s2 - s0) was incorrectly multiplied with -1
    269 			betterTau = tauEstimate + (s2 - s0) / (2 * (2 * s1 - s2 - s0));
    270 		}
    271 		return betterTau;
    272 	}
    273 }