DynamicWavelet.java (9899B)
1 /* 2 * _______ _____ _____ _____ 3 * |__ __| | __ \ / ____| __ \ 4 * | | __ _ _ __ ___ ___ ___| | | | (___ | |__) | 5 * | |/ _` | '__/ __|/ _ \/ __| | | |\___ \| ___/ 6 * | | (_| | | \__ \ (_) \__ \ |__| |____) | | 7 * |_|\__,_|_| |___/\___/|___/_____/|_____/|_| 8 * 9 * ------------------------------------------------------------- 10 * 11 * TarsosDSP is developed by Joren Six at IPEM, University Ghent 12 * 13 * ------------------------------------------------------------- 14 * 15 * Info: http://0110.be/tag/TarsosDSP 16 * Github: https://github.com/JorenSix/TarsosDSP 17 * Releases: http://0110.be/releases/TarsosDSP/ 18 * 19 * TarsosDSP includes modified source code by various authors, 20 * for credits and info, see README. 21 * 22 */ 23 24 package be.tarsos.dsp.pitch; 25 26 import java.util.Arrays; 27 28 /* dywapitchtrack.c 29 30 Dynamic Wavelet Algorithm Pitch Tracking library 31 Released under the MIT open source licence 32 33 Copyright (c) 2010 Antoine Schmitt 34 35 Permission is hereby granted, free of charge, to any person obtaining a copy 36 of this software and associated documentation files (the "Software"), to deal 37 in the Software without restriction, including without limitation the rights 38 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 39 copies of the Software, and to permit persons to whom the Software is 40 furnished to do so, subject to the following conditions: 41 42 The above copyright notice and this permission notice shall be included in 43 all copies or substantial portions of the Software. 44 45 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 46 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 47 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 48 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 49 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 50 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 51 THE SOFTWARE. 52 */ 53 54 /** 55 * <p> 56 * The pitch is the main frequency of the waveform (the 'note' being played or 57 * sung). It is expressed as a float in Hz. 58 * </p> 59 * Unlike the human ear, pitch detection is difficult to achieve for computers. 60 * Many algorithms have been designed and experimented, but there is no 'best' 61 * algorithm. They all depend on the context and the tradeoffs acceptable in 62 * terms of speed and latency. The context includes the quality and 'cleanness' 63 * of the audio : obviously polyphonic sounds (multiple instruments playing 64 * different notes at the same time) are extremely difficult to track, 65 * percussive or noisy audio has no pitch, most real-life audio have some noisy 66 * moments, some instruments have a lot of harmonics, etc... </p> 67 * <p> 68 * The dywapitchtrack is based on a custom-tailored algorithm which is of very 69 * high quality: both very accurate (precision < 0.05 semitones), very low 70 * latency (< 23 ms) and very low error rate. It has been thoroughly tested on 71 * human voice. 72 * </p> 73 * <p> 74 * It can best be described as a dynamic wavelet algorithm (dywa): 75 * </p> 76 * <p> 77 * The heart of the algorithm is a very powerful wavelet algorithm, described in 78 * a paper by Eric Larson and Ross Maddox <a href= http://online.physics.uiuc 79 * .edu/courses/phys498pom/NSF_REU_Reports/2005_reu/Real 80 * -Time_Time-Domain_Pitch_Tracking_Using_Wavelets.pdf">"Real-Time Time-Domain 81 * Pitch Tracking Using Wavelets</a> 82 * </p> 83 * 84 * @author Antoine Schmitt 85 * @author Joren Six 86 * 87 */ 88 public class DynamicWavelet implements PitchDetector{ 89 90 // algorithm parameters 91 private final int maxFLWTlevels = 6; 92 private final double maxF = 3000.; 93 private final int differenceLevelsN = 3; 94 private final double maximaThresholdRatio = 0.75; 95 96 /** 97 * The result of the pitch detection iteration. 98 */ 99 private final PitchDetectionResult result; 100 101 private final float sampleRate; 102 103 int[] distances; 104 int[] mins; 105 int[] maxs; 106 107 public DynamicWavelet(float sampleRate,int bufferSize){ 108 this.sampleRate = sampleRate; 109 110 distances = new int[bufferSize]; 111 mins = new int[bufferSize]; 112 maxs = new int[bufferSize]; 113 result = new PitchDetectionResult(); 114 } 115 116 @Override 117 public PitchDetectionResult getPitch(float[] audioBuffer) { 118 float pitchF = -1.0f; 119 120 int curSamNb = audioBuffer.length; 121 122 int nbMins; 123 int nbMaxs; 124 125 //check if the buffer size changed 126 if(distances.length == audioBuffer.length){ 127 //if not fill the arrays with zero 128 Arrays.fill(distances,0); 129 Arrays.fill(mins,0); 130 Arrays.fill(maxs,0); 131 } else { 132 //otherwise create new ones 133 distances = new int[audioBuffer.length]; 134 mins = new int[audioBuffer.length]; 135 maxs = new int[audioBuffer.length]; 136 } 137 138 double ampltitudeThreshold; 139 double theDC = 0.0; 140 141 142 //compute ampltitudeThreshold and theDC 143 //first compute the DC and maxAMplitude 144 double maxValue = 0.0; 145 double minValue = 0.0; 146 for (int i = 0; i < audioBuffer.length;i++) { 147 double sample = audioBuffer[i]; 148 theDC = theDC + sample; 149 maxValue = Math.max(maxValue, sample); 150 minValue = Math.min(sample, minValue); 151 } 152 theDC = theDC/audioBuffer.length; 153 maxValue = maxValue - theDC; 154 minValue = minValue - theDC; 155 double amplitudeMax = (maxValue > -minValue ? maxValue : -minValue); 156 157 ampltitudeThreshold = amplitudeMax*maximaThresholdRatio; 158 159 // levels, start without downsampling.. 160 int curLevel = 0; 161 double curModeDistance = -1.; 162 int delta; 163 164 //TODO: refactor to make this more java, break it up in methods, remove the wile and branching statements... 165 166 search: 167 while(true){ 168 delta = (int) (sampleRate / (Math.pow(2, curLevel)*maxF)); 169 if (curSamNb < 2) 170 break search; 171 172 // compute the first maximums and minumums after zero-crossing 173 // store if greater than the min threshold 174 // and if at a greater distance than delta 175 double dv, previousDV = -1000; 176 177 nbMins = nbMaxs = 0; 178 int lastMinIndex = -1000000; 179 int lastmaxIndex = -1000000; 180 boolean findMax = false; 181 boolean findMin = false; 182 for (int i = 2; i < curSamNb; i++) { 183 double si = audioBuffer[i] - theDC; 184 double si1 = audioBuffer[i-1] - theDC; 185 186 if(si1 <= 0 && si > 0) findMax = true; 187 if(si1 >= 0 && si < 0) findMin = true; 188 189 // min or max ? 190 dv = si - si1; 191 192 if (previousDV > -1000) { 193 if (findMin && previousDV < 0 && dv >= 0) { 194 195 // minimum 196 if (Math.abs(si) >= ampltitudeThreshold) { 197 if (i > lastMinIndex + delta) { 198 mins[nbMins++] = i; 199 lastMinIndex = i; 200 findMin = false; 201 } 202 } 203 } 204 205 if (findMax && previousDV > 0 && dv <= 0) { 206 // maximum 207 if (Math.abs(si) >= ampltitudeThreshold) { 208 if (i > lastmaxIndex + delta) { 209 maxs[nbMaxs++] = i; 210 lastmaxIndex = i; 211 findMax = false; 212 } 213 } 214 } 215 } 216 previousDV = dv; 217 } 218 219 if (nbMins == 0 && nbMaxs == 0) { 220 // no best distance ! 221 //asLog("dywapitch no mins nor maxs, exiting\n"); 222 223 // if DEBUGG then put "no mins nor maxs, exiting" 224 break search; 225 } 226 227 int d; 228 Arrays.fill(distances, 0); 229 for (int i = 0 ; i < nbMins ; i++) { 230 for (int j = 1; j < differenceLevelsN; j++) { 231 if (i+j < nbMins) { 232 d = Math.abs(mins[i] - mins[i+j]); 233 //asLog("dywapitch i=%ld j=%ld d=%ld\n", i, j, d); 234 distances[d] = distances[d] + 1; 235 } 236 } 237 } 238 239 int bestDistance = -1; 240 int bestValue = -1; 241 for (int i = 0; i< curSamNb; i++) { 242 int summed = 0; 243 for (int j = -delta ; j <= delta ; j++) { 244 if (i+j >=0 && i+j < curSamNb) 245 summed += distances[i+j]; 246 } 247 //asLog("dywapitch i=%ld summed=%ld bestDistance=%ld\n", i, summed, bestDistance); 248 if (summed == bestValue) { 249 if (i == 2*bestDistance) 250 bestDistance = i; 251 252 } else if (summed > bestValue) { 253 bestValue = summed; 254 bestDistance = i; 255 } 256 } 257 258 // averaging 259 double distAvg = 0.0; 260 double nbDists = 0; 261 for (int j = -delta ; j <= delta ; j++) { 262 if (bestDistance+j >=0 && bestDistance+j < audioBuffer.length) { 263 int nbDist = distances[bestDistance+j]; 264 if (nbDist > 0) { 265 nbDists += nbDist; 266 distAvg += (bestDistance+j)*nbDist; 267 } 268 } 269 } 270 271 // this is our mode distance ! 272 distAvg /= nbDists; 273 //asLog("dywapitch distAvg=%f\n", distAvg); 274 275 // continue the levels ? 276 if (curModeDistance > -1.) { 277 double similarity = Math.abs(distAvg*2 - curModeDistance); 278 if (similarity <= 2*delta) { 279 //if DEBUGG then put "similarity="&similarity&&"delta="&delta&&"ok" 280 //asLog("dywapitch similarity=%f OK !\n", similarity); 281 // two consecutive similar mode distances : ok ! 282 pitchF = (float) (sampleRate/(Math.pow(2,curLevel-1)*curModeDistance)); 283 break search; 284 } 285 //if DEBUGG then put "similarity="&similarity&&"delta="&delta&&"not" 286 } 287 288 // not similar, continue next level 289 curModeDistance = distAvg; 290 291 292 curLevel = curLevel + 1; 293 if (curLevel >= maxFLWTlevels) { 294 // put "max levels reached, exiting" 295 //asLog("dywapitch max levels reached, exiting\n"); 296 break search; 297 } 298 299 // downsample 300 if (curSamNb < 2) { 301 //asLog("dywapitch not enough samples, exiting\n"); 302 break search; 303 } 304 //do not modify original audio buffer, make a copy buffer, if 305 //downsampling is needed (only once). 306 float[] newAudioBuffer = audioBuffer; 307 if(curSamNb == distances.length){ 308 newAudioBuffer = new float[curSamNb/2]; 309 } 310 for (int i = 0; i < curSamNb/2; i++) { 311 newAudioBuffer[i] = (audioBuffer[2*i] + audioBuffer[2*i + 1])/2.0f; 312 } 313 audioBuffer = newAudioBuffer; 314 curSamNb /= 2; 315 } 316 317 result.setPitch(pitchF); 318 result.setPitched(-1!=pitchF); 319 result.setProbability(-1); 320 321 return result; 322 } 323 }