plectrum

Plectrum: instrument tuner for Android
Log | Files | Refs | README | LICENSE

FastYin.java (10407B)


      1 /*
      2 *      _______                       _____   _____ _____  
      3 *     |__   __|                     |  __ \ / ____|  __ \ 
      4 *        | | __ _ _ __ ___  ___  ___| |  | | (___ | |__) |
      5 *        | |/ _` | '__/ __|/ _ \/ __| |  | |\___ \|  ___/ 
      6 *        | | (_| | |  \__ \ (_) \__ \ |__| |____) | |     
      7 *        |_|\__,_|_|  |___/\___/|___/_____/|_____/|_|     
      8 *                                                         
      9 * -------------------------------------------------------------
     10 *
     11 * TarsosDSP is developed by Joren Six at IPEM, University Ghent
     12 *  
     13 * -------------------------------------------------------------
     14 *
     15 *  Info: http://0110.be/tag/TarsosDSP
     16 *  Github: https://github.com/JorenSix/TarsosDSP
     17 *  Releases: http://0110.be/releases/TarsosDSP/
     18 *  
     19 *  TarsosDSP includes modified source code by various authors,
     20 *  for credits and info, see README.
     21 * 
     22 */
     23 
     24 /*
     25 *
     26 *  I took Joren's Code and changed it so that 
     27 *  it uses the FFT to calculate the difference function.
     28 *  TarsosDSP is developed by Joren Six at 
     29 *  The Royal Academy of Fine Arts & Royal Conservatory,
     30 *  University College Ghent,
     31 *  Hoogpoort 64, 9000 Ghent - Belgium
     32 *  
     33 *  http://tarsos.0110.be/tag/TarsosDSP
     34 *
     35 */
     36 
     37 package be.tarsos.dsp.pitch;
     38 
     39 import be.tarsos.dsp.util.fft.FloatFFT;
     40 
     41 /**
     42  * An implementation of the YIN pitch tracking algorithm which uses an FFT to
     43  * calculate the difference function. This makes calculating the difference
     44  * function more performant. See <a href=
     45  * "http://recherche.ircam.fr/equipes/pcm/cheveign/ps/2002_JASA_YIN_proof.pdf"
     46  * >the YIN paper.</a> This implementation is done by <a href="mailto:matthias.mauch@elec.qmul.ac.uk">Matthias Mauch</a> and is
     47  * based on {@link Yin} which is based on the implementation found in <a
     48  * href="http://aubio.org">aubio</a> by Paul Brossier.
     49  * 
     50  * @author Matthias Mauch
     51  * @author Joren Six
     52  * @author Paul Brossier
     53  */
     54 public final class FastYin implements PitchDetector {
     55 	/**
     56 	 * The default YIN threshold value. Should be around 0.10~0.15. See YIN
     57 	 * paper for more information.
     58 	 */
     59 	private static final double DEFAULT_THRESHOLD = 0.20;
     60 
     61 	/**
     62 	 * The default size of an audio buffer (in samples).
     63 	 */
     64 	public static final int DEFAULT_BUFFER_SIZE = 2048;
     65 
     66 	/**
     67 	 * The default overlap of two consecutive audio buffers (in samples).
     68 	 */
     69 	public static final int DEFAULT_OVERLAP = 1536;
     70 
     71 	/**
     72 	 * The actual YIN threshold.
     73 	 */
     74 	private final double threshold;
     75 
     76 	/**
     77 	 * The audio sample rate. Most audio has a sample rate of 44.1kHz.
     78 	 */
     79 	private final float sampleRate;
     80 
     81 	/**
     82 	 * The buffer that stores the calculated values. It is exactly half the size
     83 	 * of the input buffer.
     84 	 */
     85 	private final float[] yinBuffer;	
     86 	
     87 	/**
     88 	 * The result of the pitch detection iteration.
     89 	 */
     90 	private final PitchDetectionResult result;
     91 	
     92 	//------------------------ FFT instance members
     93 	
     94 	/**
     95 	 * Holds the FFT data, twice the length of the audio buffer.
     96 	 */
     97 	private final float[] audioBufferFFT;
     98 	
     99 	/**
    100 	 * Half of the data, disguised as a convolution kernel.
    101 	 */
    102 	private final  float[] kernel;
    103 	
    104 	/**
    105 	 * Buffer to allow convolution via complex multiplication. It calculates the auto correlation function (ACF).
    106 	 */
    107 	private final float[] yinStyleACF;
    108 	
    109 	/**
    110 	 * An FFT object to quickly calculate the difference function.
    111 	 */
    112 	private final FloatFFT fft;
    113 
    114 	/**
    115 	 * Create a new pitch detector for a stream with the defined sample rate.
    116 	 * Processes the audio in blocks of the defined size.
    117 	 * 
    118 	 * @param audioSampleRate
    119 	 *            The sample rate of the audio stream. E.g. 44.1 kHz.
    120 	 * @param bufferSize
    121 	 *            The size of a buffer. E.g. 1024.
    122 	 */
    123 	public FastYin(final float audioSampleRate, final int bufferSize) {
    124 		this(audioSampleRate, bufferSize, DEFAULT_THRESHOLD);
    125 	}
    126 
    127 	/**
    128 	 * Create a new pitch detector for a stream with the defined sample rate.
    129 	 * Processes the audio in blocks of the defined size.
    130 	 * 
    131 	 * @param audioSampleRate
    132 	 *            The sample rate of the audio stream. E.g. 44.1 kHz.
    133 	 * @param bufferSize
    134 	 *            The size of a buffer. E.g. 1024.
    135 	 * @param yinThreshold
    136 	 *            The parameter that defines which peaks are kept as possible
    137 	 *            pitch candidates. See the YIN paper for more details.
    138 	 */
    139 	public FastYin(final float audioSampleRate, final int bufferSize, final double yinThreshold) {
    140 		this.sampleRate = audioSampleRate;
    141 		this.threshold = yinThreshold;
    142 		yinBuffer = new float[bufferSize / 2];
    143 		//Initializations for FFT difference step
    144 		audioBufferFFT = new float[2*bufferSize];
    145 		kernel = new float[2*bufferSize];
    146 		yinStyleACF = new float[2*bufferSize];
    147 		fft = new FloatFFT(bufferSize);
    148 		result = new PitchDetectionResult();
    149 	}
    150 
    151 	/**
    152 	 * The main flow of the YIN algorithm. Returns a pitch value in Hz or -1 if
    153 	 * no pitch is detected.
    154 	 * 
    155 	 * @return a pitch value in Hz or -1 if no pitch is detected.
    156 	 */
    157 	public PitchDetectionResult getPitch(final float[] audioBuffer) {
    158 
    159 		final int tauEstimate;
    160 		final float pitchInHertz;
    161 
    162 		// step 2
    163 		difference(audioBuffer);
    164 
    165 		// step 3
    166 		cumulativeMeanNormalizedDifference();
    167 
    168 		// step 4
    169 		tauEstimate = absoluteThreshold();
    170 
    171 		// step 5
    172 		if (tauEstimate != -1) {
    173 			final float betterTau = parabolicInterpolation(tauEstimate);
    174 
    175 			// step 6
    176 			// TODO Implement optimization for the AUBIO_YIN algorithm.
    177 			// 0.77% => 0.5% error rate,
    178 			// using the data of the YIN paper
    179 			// bestLocalEstimate()
    180 
    181 			// conversion to Hz
    182 			pitchInHertz = sampleRate / betterTau;
    183 		} else{
    184 			// no pitch found
    185 			pitchInHertz = -1;
    186 		}
    187 		
    188 		result.setPitch(pitchInHertz);
    189 
    190 		return result;
    191 	}
    192 
    193 	/**
    194 	 * Implements the difference function as described in step 2 of the YIN
    195 	 * paper with an FFT to reduce the number of operations.
    196 	 */
    197 	private void difference(final float[] audioBuffer) {
    198 		// POWER TERM CALCULATION
    199 		// ... for the power terms in equation (7) in the Yin paper
    200 		float[] powerTerms = new float[yinBuffer.length];
    201 		for (int j = 0; j < yinBuffer.length; ++j) {
    202 			powerTerms[0] += audioBuffer[j] * audioBuffer[j];
    203 		}
    204 		// now iteratively calculate all others (saves a few multiplications)
    205 		for (int tau = 1; tau < yinBuffer.length; ++tau) {
    206 			powerTerms[tau] = powerTerms[tau-1] - audioBuffer[tau-1] * audioBuffer[tau-1] + audioBuffer[tau+yinBuffer.length] * audioBuffer[tau+yinBuffer.length];  
    207 		}
    208 
    209 		// YIN-STYLE AUTOCORRELATION via FFT
    210 		// 1. data
    211 		for (int j = 0; j < audioBuffer.length; ++j) {
    212 			audioBufferFFT[2*j] = audioBuffer[j];
    213 			audioBufferFFT[2*j+1] = 0;
    214 		}
    215 		fft.complexForward(audioBufferFFT);
    216 		
    217 		// 2. half of the data, disguised as a convolution kernel
    218 		for (int j = 0; j < yinBuffer.length; ++j) {
    219 			kernel[2*j] = audioBuffer[(yinBuffer.length-1)-j];
    220 			kernel[2*j+1] = 0;
    221 			kernel[2*j+audioBuffer.length] = 0;
    222 			kernel[2*j+audioBuffer.length+1] = 0;
    223 		}
    224 		fft.complexForward(kernel);
    225 
    226 		// 3. convolution via complex multiplication
    227 		for (int j = 0; j < audioBuffer.length; ++j) {
    228 			yinStyleACF[2*j]   = audioBufferFFT[2*j]*kernel[2*j] - audioBufferFFT[2*j+1]*kernel[2*j+1]; // real
    229 			yinStyleACF[2*j+1] = audioBufferFFT[2*j+1]*kernel[2*j] + audioBufferFFT[2*j]*kernel[2*j+1]; // imaginary
    230 		}
    231 		fft.complexInverse(yinStyleACF, true);
    232 		
    233 		// CALCULATION OF difference function
    234 		// ... according to (7) in the Yin paper.
    235 		for (int j = 0; j < yinBuffer.length; ++j) {
    236 			// taking only the real part
    237 			yinBuffer[j] = powerTerms[0] + powerTerms[j] - 2 * yinStyleACF[2 * (yinBuffer.length - 1 + j)];
    238 		}
    239 	}
    240 
    241 	/**
    242 	 * The cumulative mean normalized difference function as described in step 3
    243 	 * of the YIN paper. <br>
    244 	 * <code>
    245 	 * yinBuffer[0] == yinBuffer[1] = 1
    246 	 * </code>
    247 	 */
    248 	private void cumulativeMeanNormalizedDifference() {
    249 		int tau;
    250 		yinBuffer[0] = 1;
    251 		float runningSum = 0;
    252 		for (tau = 1; tau < yinBuffer.length; tau++) {
    253 			runningSum += yinBuffer[tau];
    254 			yinBuffer[tau] *= tau / runningSum;
    255 		}
    256 	}
    257 
    258 	/**
    259 	 * Implements step 4 of the AUBIO_YIN paper.
    260 	 */
    261 	private int absoluteThreshold() {
    262 		// Uses another loop construct
    263 		// than the AUBIO implementation
    264 		int tau;
    265 		// first two positions in yinBuffer are always 1
    266 		// So start at the third (index 2)
    267 		for (tau = 2; tau < yinBuffer.length; tau++) {
    268 			if (yinBuffer[tau] < threshold) {
    269 				while (tau + 1 < yinBuffer.length && yinBuffer[tau + 1] < yinBuffer[tau]) {
    270 					tau++;
    271 				}
    272 				// found tau, exit loop and return
    273 				// store the probability
    274 				// From the YIN paper: The threshold determines the list of
    275 				// candidates admitted to the set, and can be interpreted as the
    276 				// proportion of aperiodic power tolerated
    277 				// within a periodic signal.
    278 				//
    279 				// Since we want the periodicity and and not aperiodicity:
    280 				// periodicity = 1 - aperiodicity
    281 				result.setProbability(1 - yinBuffer[tau]);
    282 				break;
    283 			}
    284 		}
    285 
    286 		
    287 		// if no pitch found, tau => -1
    288 		if (tau == yinBuffer.length || yinBuffer[tau] >= threshold || result.getProbability() > 1.0) {
    289 			tau = -1;
    290 			result.setProbability(0);
    291 			result.setPitched(false);	
    292 		} else {
    293 			result.setPitched(true);
    294 		}
    295 
    296 		return tau;
    297 	}
    298 
    299 	/**
    300 	 * Implements step 5 of the AUBIO_YIN paper. It refines the estimated tau
    301 	 * value using parabolic interpolation. This is needed to detect higher
    302 	 * frequencies more precisely. See http://fizyka.umk.pl/nrbook/c10-2.pdf and
    303 	 * for more background
    304 	 * http://fedc.wiwi.hu-berlin.de/xplore/tutorials/xegbohtmlnode62.html
    305 	 * 
    306 	 * @param tauEstimate
    307 	 *            The estimated tau value.
    308 	 * @return A better, more precise tau value.
    309 	 */
    310 	private float parabolicInterpolation(final int tauEstimate) {
    311 		final float betterTau;
    312 		final int x0;
    313 		final int x2;
    314 
    315 		if (tauEstimate < 1) {
    316 			x0 = tauEstimate;
    317 		} else {
    318 			x0 = tauEstimate - 1;
    319 		}
    320 		if (tauEstimate + 1 < yinBuffer.length) {
    321 			x2 = tauEstimate + 1;
    322 		} else {
    323 			x2 = tauEstimate;
    324 		}
    325 		if (x0 == tauEstimate) {
    326 			if (yinBuffer[tauEstimate] <= yinBuffer[x2]) {
    327 				betterTau = tauEstimate;
    328 			} else {
    329 				betterTau = x2;
    330 			}
    331 		} else if (x2 == tauEstimate) {
    332 			if (yinBuffer[tauEstimate] <= yinBuffer[x0]) {
    333 				betterTau = tauEstimate;
    334 			} else {
    335 				betterTau = x0;
    336 			}
    337 		} else {
    338 			float s0, s1, s2;
    339 			s0 = yinBuffer[x0];
    340 			s1 = yinBuffer[tauEstimate];
    341 			s2 = yinBuffer[x2];
    342 			// fixed AUBIO implementation, thanks to Karl Helgason:
    343 			// (2.0f * s1 - s2 - s0) was incorrectly multiplied with -1
    344 			betterTau = tauEstimate + (s2 - s0) / (2 * (2 * s1 - s2 - s0));
    345 		}
    346 		return betterTau;
    347 	}
    348 }