plectrum

Plectrum: instrument tuner for Android
Log | Files | Refs | README | LICENSE

DynamicWavelet.java (9899B)


      1 /*
      2 *      _______                       _____   _____ _____  
      3 *     |__   __|                     |  __ \ / ____|  __ \ 
      4 *        | | __ _ _ __ ___  ___  ___| |  | | (___ | |__) |
      5 *        | |/ _` | '__/ __|/ _ \/ __| |  | |\___ \|  ___/ 
      6 *        | | (_| | |  \__ \ (_) \__ \ |__| |____) | |     
      7 *        |_|\__,_|_|  |___/\___/|___/_____/|_____/|_|     
      8 *                                                         
      9 * -------------------------------------------------------------
     10 *
     11 * TarsosDSP is developed by Joren Six at IPEM, University Ghent
     12 *  
     13 * -------------------------------------------------------------
     14 *
     15 *  Info: http://0110.be/tag/TarsosDSP
     16 *  Github: https://github.com/JorenSix/TarsosDSP
     17 *  Releases: http://0110.be/releases/TarsosDSP/
     18 *  
     19 *  TarsosDSP includes modified source code by various authors,
     20 *  for credits and info, see README.
     21 * 
     22 */
     23 
     24 package be.tarsos.dsp.pitch;
     25 
     26 import java.util.Arrays;
     27 
     28 /* dywapitchtrack.c
     29 
     30 Dynamic Wavelet Algorithm Pitch Tracking library
     31 Released under the MIT open source licence
     32  
     33 Copyright (c) 2010 Antoine Schmitt
     34 
     35 Permission is hereby granted, free of charge, to any person obtaining a copy
     36 of this software and associated documentation files (the "Software"), to deal
     37 in the Software without restriction, including without limitation the rights
     38 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     39 copies of the Software, and to permit persons to whom the Software is
     40 furnished to do so, subject to the following conditions:
     41 
     42 The above copyright notice and this permission notice shall be included in
     43 all copies or substantial portions of the Software.
     44 
     45 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     46 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     47 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     48 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     49 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     50 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     51 THE SOFTWARE.
     52 */
     53 
     54 /**
     55  * <p>
     56  * The pitch is the main frequency of the waveform (the 'note' being played or
     57  * sung). It is expressed as a float in Hz.
     58  * </p>
     59  * Unlike the human ear, pitch detection is difficult to achieve for computers.
     60  * Many algorithms have been designed and experimented, but there is no 'best'
     61  * algorithm. They all depend on the context and the tradeoffs acceptable in
     62  * terms of speed and latency. The context includes the quality and 'cleanness'
     63  * of the audio : obviously polyphonic sounds (multiple instruments playing
     64  * different notes at the same time) are extremely difficult to track,
     65  * percussive or noisy audio has no pitch, most real-life audio have some noisy
     66  * moments, some instruments have a lot of harmonics, etc... </p>
     67  * <p>
     68  * The dywapitchtrack is based on a custom-tailored algorithm which is of very
     69  * high quality: both very accurate (precision < 0.05 semitones), very low
     70  * latency (< 23 ms) and very low error rate. It has been thoroughly tested on
     71  * human voice.
     72  * </p>
     73  * <p>
     74  * It can best be described as a dynamic wavelet algorithm (dywa):
     75  * </p>
     76  * <p>
     77  * The heart of the algorithm is a very powerful wavelet algorithm, described in
     78  * a paper by Eric Larson and Ross Maddox <a href= http://online.physics.uiuc
     79  * .edu/courses/phys498pom/NSF_REU_Reports/2005_reu/Real
     80  * -Time_Time-Domain_Pitch_Tracking_Using_Wavelets.pdf">"Real-Time Time-Domain
     81  * Pitch Tracking Using Wavelets</a>
     82  * </p>
     83  * 
     84  * @author Antoine Schmitt
     85  * @author Joren Six
     86  * 
     87  */
     88 public class DynamicWavelet implements PitchDetector{
     89 	
     90 	// algorithm parameters
     91 	private final int maxFLWTlevels = 6;
     92 	private final double maxF = 3000.;
     93 	private final int differenceLevelsN = 3;
     94 	private final double maximaThresholdRatio = 0.75;
     95 	
     96 	/**
     97 	 * The result of the pitch detection iteration.
     98 	 */
     99 	private final PitchDetectionResult result;
    100 	
    101 	private final float sampleRate;
    102 	
    103 	int[] distances; 
    104 	int[] mins;
    105 	int[] maxs;
    106 	
    107 	public DynamicWavelet(float sampleRate,int bufferSize){
    108 		this.sampleRate = sampleRate;
    109 		
    110 		distances = new int[bufferSize];
    111 		mins = new int[bufferSize];
    112 		maxs = new int[bufferSize];
    113 		result = new PitchDetectionResult();
    114 	}
    115 	
    116 	@Override
    117 	public PitchDetectionResult getPitch(float[] audioBuffer) {
    118 		float pitchF = -1.0f;
    119 		
    120 		int curSamNb = audioBuffer.length;
    121 	
    122 		int nbMins;
    123 		int nbMaxs;
    124 		
    125 		//check if the buffer size changed
    126 		if(distances.length == audioBuffer.length){
    127 			//if not fill the arrays with zero
    128 			Arrays.fill(distances,0);
    129 			Arrays.fill(mins,0);
    130 			Arrays.fill(maxs,0);
    131 		} else {
    132 			//otherwise create new ones 
    133 			distances = new int[audioBuffer.length];
    134 			mins = new int[audioBuffer.length];
    135 			maxs = new int[audioBuffer.length];
    136 		}
    137 		
    138 		double ampltitudeThreshold;  
    139 		double theDC = 0.0;
    140 		
    141 		
    142 		//compute ampltitudeThreshold and theDC
    143 		//first compute the DC and maxAMplitude
    144 		double maxValue = 0.0;
    145 		double minValue = 0.0;
    146 		for (int i = 0; i < audioBuffer.length;i++) {
    147 			double sample = audioBuffer[i];
    148 			theDC = theDC + sample;
    149 			maxValue = Math.max(maxValue, sample);
    150 			minValue = Math.min(sample, minValue);
    151 		}
    152 		theDC = theDC/audioBuffer.length;
    153 		maxValue = maxValue - theDC;
    154 		minValue = minValue - theDC;
    155 		double amplitudeMax = (maxValue > -minValue ? maxValue : -minValue);
    156 		
    157 		ampltitudeThreshold = amplitudeMax*maximaThresholdRatio;
    158 		
    159 		// levels, start without downsampling..
    160 		int curLevel = 0;
    161 		double curModeDistance = -1.;
    162 		int delta;
    163 		
    164 		//TODO: refactor to make this more java, break it up in methods, remove the wile and branching statements...
    165 		
    166 		search:
    167 		while(true){
    168 			delta = (int) (sampleRate / (Math.pow(2, curLevel)*maxF));
    169 			if (curSamNb < 2)
    170 				break search;
    171 			
    172 			// compute the first maximums and minumums after zero-crossing
    173 			// store if greater than the min threshold
    174 			// and if at a greater distance than delta
    175 			double dv, previousDV = -1000;
    176 			
    177 			nbMins = nbMaxs = 0;   
    178 			int lastMinIndex = -1000000;
    179 			int lastmaxIndex = -1000000;
    180 			boolean findMax = false;
    181 			boolean findMin = false;
    182 			for (int i = 2; i < curSamNb; i++) {
    183 				double si = audioBuffer[i] - theDC;
    184 				double si1 = audioBuffer[i-1] - theDC;
    185 				
    186 				if(si1 <= 0 && si > 0) findMax = true;
    187 				if(si1 >= 0 && si < 0) findMin = true;
    188 				
    189 				// min or max ?
    190 				dv = si - si1;
    191 				
    192 				if (previousDV > -1000) {
    193 					if (findMin && previousDV < 0 && dv >= 0) { 
    194 					
    195 						// minimum
    196 						if (Math.abs(si) >= ampltitudeThreshold) {
    197 							if (i > lastMinIndex + delta) {
    198 								mins[nbMins++] = i;
    199 								lastMinIndex = i;
    200 								findMin = false;
    201 							} 
    202 						} 
    203 					}
    204 					
    205 					if (findMax  && previousDV > 0 && dv <= 0) {
    206 						// maximum
    207 						if (Math.abs(si) >= ampltitudeThreshold) {
    208 							if (i > lastmaxIndex + delta) {
    209 								maxs[nbMaxs++] = i;
    210 								lastmaxIndex = i;
    211 								findMax = false;
    212 							}
    213 						}
    214 					}
    215 				}
    216 				previousDV = dv;
    217 			}
    218 			
    219 			if (nbMins == 0 && nbMaxs == 0) {
    220 				// no best distance !
    221 				//asLog("dywapitch no mins nor maxs, exiting\n");
    222 				
    223 				// if DEBUGG then put "no mins nor maxs, exiting"
    224 				break search;
    225 			}
    226 			
    227 			int d;
    228 			Arrays.fill(distances, 0);
    229 			for (int i = 0 ; i < nbMins ; i++) {
    230 				for (int j = 1; j < differenceLevelsN; j++) {
    231 					if (i+j < nbMins) {
    232 						d = Math.abs(mins[i] - mins[i+j]);
    233 						//asLog("dywapitch i=%ld j=%ld d=%ld\n", i, j, d);
    234 						distances[d] = distances[d] + 1;
    235 					}
    236 				}
    237 			}
    238 			
    239 			int bestDistance = -1;
    240 			int bestValue = -1;
    241 			for (int i = 0; i< curSamNb; i++) {
    242 				int summed = 0;
    243 				for (int j = -delta ; j <= delta ; j++) {
    244 					if (i+j >=0 && i+j < curSamNb)
    245 						summed += distances[i+j];
    246 				}
    247 				//asLog("dywapitch i=%ld summed=%ld bestDistance=%ld\n", i, summed, bestDistance);
    248 				if (summed == bestValue) {
    249 					if (i == 2*bestDistance)
    250 						bestDistance = i;
    251 					
    252 				} else if (summed > bestValue) {
    253 					bestValue = summed;
    254 					bestDistance = i;
    255 				}
    256 			}
    257 			
    258 			// averaging
    259 			double distAvg = 0.0;
    260 			double nbDists = 0;
    261 			for (int j = -delta ; j <= delta ; j++) {
    262 				if (bestDistance+j >=0 && bestDistance+j < audioBuffer.length) {
    263 					int nbDist = distances[bestDistance+j];
    264 					if (nbDist > 0) {
    265 						nbDists += nbDist;
    266 						distAvg += (bestDistance+j)*nbDist;
    267 					}
    268 				}
    269 			}
    270 			
    271 			// this is our mode distance !
    272 			distAvg /= nbDists;
    273 			//asLog("dywapitch distAvg=%f\n", distAvg);
    274 
    275 			// continue the levels ?
    276 			if (curModeDistance > -1.) {
    277 				double similarity = Math.abs(distAvg*2 - curModeDistance);
    278 				if (similarity <= 2*delta) {
    279 					//if DEBUGG then put "similarity="&similarity&&"delta="&delta&&"ok"
    280 	 				//asLog("dywapitch similarity=%f OK !\n", similarity);
    281 					// two consecutive similar mode distances : ok !
    282 					pitchF = (float) (sampleRate/(Math.pow(2,curLevel-1)*curModeDistance));
    283 					break search;
    284 				}
    285 				//if DEBUGG then put "similarity="&similarity&&"delta="&delta&&"not"
    286 			}
    287 			
    288 			// not similar, continue next level
    289 			curModeDistance = distAvg;
    290 			
    291 						
    292 			curLevel = curLevel + 1;
    293 			if (curLevel >= maxFLWTlevels) {
    294 				// put "max levels reached, exiting"
    295 	 			//asLog("dywapitch max levels reached, exiting\n");
    296 				break search;
    297 			}
    298 			
    299 			// downsample
    300 			if (curSamNb < 2) {
    301 	 			//asLog("dywapitch not enough samples, exiting\n");
    302 				break search;
    303 			}
    304 			//do not modify original audio buffer, make a copy buffer, if
    305 			//downsampling is needed (only once).
    306 			float[] newAudioBuffer = audioBuffer;
    307 			if(curSamNb == distances.length){
    308 				newAudioBuffer = new float[curSamNb/2];
    309 			}
    310 			for (int i = 0; i < curSamNb/2; i++) {
    311 				newAudioBuffer[i] = (audioBuffer[2*i] + audioBuffer[2*i + 1])/2.0f;				
    312 			}
    313 			audioBuffer = newAudioBuffer;
    314 			curSamNb /= 2;
    315 		}		
    316 		
    317 		result.setPitch(pitchF);
    318 		result.setPitched(-1!=pitchF);
    319 		result.setProbability(-1);
    320 		
    321 		return result;
    322 	}
    323 }