SpectralPeakProcessor.java (15240B)
1 /* 2 * _______ _____ _____ _____ 3 * |__ __| | __ \ / ____| __ \ 4 * | | __ _ _ __ ___ ___ ___| | | | (___ | |__) | 5 * | |/ _` | '__/ __|/ _ \/ __| | | |\___ \| ___/ 6 * | | (_| | | \__ \ (_) \__ \ |__| |____) | | 7 * |_|\__,_|_| |___/\___/|___/_____/|_____/|_| 8 * 9 * ------------------------------------------------------------- 10 * 11 * TarsosDSP is developed by Joren Six at IPEM, University Ghent 12 * 13 * ------------------------------------------------------------- 14 * 15 * Info: http://0110.be/tag/TarsosDSP 16 * Github: https://github.com/JorenSix/TarsosDSP 17 * Releases: http://0110.be/releases/TarsosDSP/ 18 * 19 * TarsosDSP includes modified source code by various authors, 20 * for credits and info, see README. 21 * 22 */ 23 24 package be.tarsos.dsp; 25 26 import java.util.ArrayList; 27 import java.util.Arrays; 28 import java.util.List; 29 30 import be.tarsos.dsp.util.PitchConverter; 31 import be.tarsos.dsp.util.fft.FFT; 32 import be.tarsos.dsp.util.fft.HammingWindow; 33 34 /** 35 * <p> 36 * This class implements a spectral peak follower as described in Sethares et 37 * al. 2009 - Spectral Tools for Dynamic Tonality and Audio Morphing - section 38 * "Analysis-Resynthessis". It calculates a noise floor and picks spectral peaks 39 * rising above a calculated noise floor with a certain factor. The noise floor 40 * is determined using a simple median filter. 41 * </p> 42 * <p> 43 * Parts of the code is modified from <a 44 * href="http://www.dynamictonality.com/spectools.htm">the code accompanying 45 * "Spectral Tools for Dynamic Tonality and Audio Morphing"</a>. 46 * </p> 47 * <p> 48 * To get the spectral peaks from an audio frame, call <code>getPeakList</code> 49 * <code><pre> 50 AudioDispatcher dispatcher = new AudioDispatcher(stream, fftsize, overlap); 51 dispatcher.addAudioProcessor(spectralPeakFollower); 52 dispatcher.addAudioProcessor(new AudioProcessor() { 53 54 public void processingFinished() { 55 } 56 57 public boolean process(AudioEvent audioEvent) { 58 float[] noiseFloor = SpectralPeakProcessor.calculateNoiseFloor(spectralPeakFollower.getMagnitudes(), medianFilterLength, noiseFloorFactor); 59 List<Integer> localMaxima = SpectralPeakProcessor.findLocalMaxima(spectralPeakFollower.getMagnitudes(), noiseFloor); 60 List<> list = SpectralPeakProcessor.findPeaks(spectralPeakFollower.getMagnitudes(), spectralPeakFollower.getFrequencyEstimates(), localMaxima, numberOfPeaks); 61 // do something with the list... 62 return true; 63 } 64 }); 65 dispatcher.run(); 66 </pre></code> 67 * 68 * @author Joren Six 69 * @author William A. Sethares 70 * @author Andrew J. Milne 71 * @author Stefan Tiedje 72 * @author Anthony Prechtl 73 * @author James Plamondon 74 * 75 */ 76 public class SpectralPeakProcessor implements AudioProcessor { 77 78 /** 79 * The sample rate of the signal. 80 */ 81 private final int sampleRate; 82 83 /** 84 * Cached calculations for the frequency calculation 85 */ 86 private final double dt; 87 private final double cbin; 88 private final double inv_2pi; 89 private final double inv_deltat; 90 private final double inv_2pideltat; 91 92 /** 93 * The fft object used to calculate phase and magnitudes. 94 */ 95 private final FFT fft; 96 97 /** 98 * The pahse info of the current frame. 99 */ 100 private final float[] currentPhaseOffsets; 101 102 /** 103 * The magnitudes in the current frame. 104 */ 105 private final float[] magnitudes; 106 107 /** 108 * Detailed frequency estimates for each bin, using phase info 109 */ 110 private final float[] frequencyEstimates; 111 112 /** 113 * The phase information of the previous frame, or null. 114 */ 115 private float[] previousPhaseOffsets; 116 117 118 119 public SpectralPeakProcessor(int bufferSize, int overlap, int sampleRate) { 120 fft = new FFT(bufferSize, new HammingWindow()); 121 122 magnitudes = new float[bufferSize / 2]; 123 currentPhaseOffsets = new float[bufferSize / 2]; 124 frequencyEstimates = new float[bufferSize / 2]; 125 126 dt = (bufferSize - overlap) / (double) sampleRate; 127 cbin = (double) (dt * sampleRate / (double) bufferSize); 128 129 inv_2pi = (double) (1.0 / (2.0 * Math.PI)); 130 inv_deltat = (double) (1.0 / dt); 131 inv_2pideltat = (double) (inv_deltat * inv_2pi); 132 133 this.sampleRate = sampleRate; 134 135 } 136 137 private void calculateFFT(float[] audio) { 138 // Clone to prevent overwriting audio data 139 float[] fftData = audio.clone(); 140 // Extract the power and phase data 141 fft.powerPhaseFFT(fftData, magnitudes, currentPhaseOffsets); 142 } 143 144 private void normalizeMagintudes(){ 145 float maxMagnitude = (float) -1e6; 146 for(int i = 0;i<magnitudes.length;i++){ 147 maxMagnitude = Math.max(maxMagnitude, magnitudes[i]); 148 } 149 150 //log10 of the normalized value 151 //adding 75 makes sure the value is above zero, a bit ugly though... 152 for(int i = 1;i<magnitudes.length;i++){ 153 magnitudes[i] = (float) (10 * Math.log10(magnitudes[i]/maxMagnitude)) + 75; 154 } 155 } 156 157 @Override 158 public boolean process(AudioEvent audioEvent) { 159 float[] audio = audioEvent.getFloatBuffer(); 160 161 // 1. Extract magnitudes, and phase using an FFT. 162 calculateFFT(audio); 163 164 // 2. Estimate a detailed frequency for each bin. 165 calculateFrequencyEstimates(); 166 167 // 3. Normalize the each magnitude. 168 normalizeMagintudes(); 169 170 // 4. Store the current phase so it can be used for the next frequency estimates block. 171 previousPhaseOffsets = currentPhaseOffsets.clone(); 172 173 return true; 174 } 175 176 @Override 177 public void processingFinished() { 178 } 179 180 181 /** 182 * For each bin, calculate a precise frequency estimate using phase offset. 183 */ 184 private void calculateFrequencyEstimates() { 185 for(int i = 0;i < frequencyEstimates.length;i++){ 186 frequencyEstimates[i] = getFrequencyForBin(i); 187 } 188 } 189 190 /** 191 * @return the magnitudes. 192 */ 193 public float[] getMagnitudes() { 194 return magnitudes.clone(); 195 } 196 197 /** 198 * @return the precise frequency for each bin. 199 */ 200 public float[] getFrequencyEstimates(){ 201 return frequencyEstimates.clone(); 202 } 203 204 /** 205 * Calculates a frequency for a bin using phase info, if available. 206 * @param binIndex The FFT bin index. 207 * @return a frequency, in Hz, calculated using available phase info. 208 */ 209 private float getFrequencyForBin(int binIndex){ 210 final float frequencyInHertz; 211 // use the phase delta information to get a more precise 212 // frequency estimate 213 // if the phase of the previous frame is available. 214 // See 215 // * Moore 1976 216 // "The use of phase vocoder in computer music applications" 217 // * Sethares et al. 2009 - Spectral Tools for Dynamic 218 // Tonality and Audio Morphing 219 // * Laroche and Dolson 1999 220 if (previousPhaseOffsets != null) { 221 float phaseDelta = currentPhaseOffsets[binIndex] - previousPhaseOffsets[binIndex]; 222 long k = Math.round(cbin * binIndex - inv_2pi * phaseDelta); 223 frequencyInHertz = (float) (inv_2pideltat * phaseDelta + inv_deltat * k); 224 } else { 225 frequencyInHertz = (float) fft.binToHz(binIndex, sampleRate); 226 } 227 return frequencyInHertz; 228 } 229 230 /** 231 * Calculate a noise floor for an array of magnitudes. 232 * @param magnitudes The magnitudes of the current frame. 233 * @param medianFilterLength The length of the median filter used to determine the noise floor. 234 * @param noiseFloorFactor The noise floor is multiplied with this factor to determine if the 235 * information is either noise or an interesting spectral peak. 236 * @return a float array representing the noise floor. 237 */ 238 public static float[] calculateNoiseFloor(float[] magnitudes, int medianFilterLength, float noiseFloorFactor) { 239 double[] noiseFloorBuffer; 240 float[] noisefloor = new float[magnitudes.length]; 241 242 float median = (float) median(magnitudes.clone()); 243 244 // Naive median filter implementation. 245 // For each element take a median of surrounding values (noiseFloorBuffer) 246 // Store the median as the noise floor. 247 for (int i = 0; i < magnitudes.length; i++) { 248 noiseFloorBuffer = new double[medianFilterLength]; 249 int index = 0; 250 for (int j = i - medianFilterLength/2; j <= i + medianFilterLength/2 && index < noiseFloorBuffer.length; j++) { 251 if(j >= 0 && j < magnitudes.length){ 252 noiseFloorBuffer[index] = magnitudes[j]; 253 } else{ 254 noiseFloorBuffer[index] = median; 255 } 256 index++; 257 } 258 // calculate the noise floor value. 259 noisefloor[i] = (float) (median(noiseFloorBuffer) * (noiseFloorFactor)) ; 260 } 261 262 float rampLength = 12.0f; 263 for(int i = 0 ; i <= rampLength ; i++){ 264 //ramp 265 float ramp = 1.0f; 266 ramp = (float) (-1 * (Math.log(i/rampLength))) + 1.0f; 267 noisefloor[i] = ramp * noisefloor[i]; 268 } 269 270 return noisefloor; 271 } 272 273 /** 274 * Finds the local magintude maxima and stores them in the given list. 275 * @param magnitudes The magnitudes. 276 * @param noisefloor The noise floor. 277 * @return a list of local maxima. 278 */ 279 public static List<Integer> findLocalMaxima(float[] magnitudes,float[] noisefloor){ 280 List<Integer> localMaximaIndexes = new ArrayList<Integer>(); 281 for (int i = 1; i < magnitudes.length - 1; i++) { 282 boolean largerThanPrevious = (magnitudes[i - 1] < magnitudes[i]); 283 boolean largerThanNext = (magnitudes[i] > magnitudes[i + 1]); 284 boolean largerThanNoiseFloor = (magnitudes[i] > noisefloor[i]); 285 if (largerThanPrevious && largerThanNext && largerThanNoiseFloor) { 286 localMaximaIndexes.add(i); 287 } 288 } 289 return localMaximaIndexes; 290 } 291 292 /** 293 * @param magnitudes the magnitudes. 294 * @return the index for the maximum magnitude. 295 */ 296 private static int findMaxMagnitudeIndex(float[] magnitudes){ 297 int maxMagnitudeIndex = 0; 298 float maxMagnitude = (float) -1e6; 299 for (int i = 1; i < magnitudes.length - 1; i++) { 300 if(magnitudes[i] > maxMagnitude){ 301 maxMagnitude = magnitudes[i]; 302 maxMagnitudeIndex = i; 303 } 304 } 305 return maxMagnitudeIndex; 306 } 307 308 /** 309 * 310 * @param magnitudes the magnitudes.. 311 * @param frequencyEstimates The frequency estimates for each bin. 312 * @param localMaximaIndexes The indexes of the local maxima. 313 * @param numberOfPeaks The requested number of peaks. 314 * @param minDistanceInCents The minimum distance in cents between the peaks 315 * @return A list with spectral peaks. 316 */ 317 public static List<SpectralPeak> findPeaks(float[] magnitudes, float[] frequencyEstimates, List<Integer> localMaximaIndexes, int numberOfPeaks, int minDistanceInCents){ 318 int maxMagnitudeIndex = findMaxMagnitudeIndex(magnitudes); 319 List<SpectralPeak> spectralPeakList = new ArrayList<SpectralPeak>(); 320 321 if(localMaximaIndexes.size()==0) 322 return spectralPeakList; 323 324 float referenceFrequency=0; 325 //the frequency of the bin with the highest magnitude 326 referenceFrequency = frequencyEstimates[maxMagnitudeIndex]; 327 328 //remove frequency estimates below zero 329 for(int i = 0 ; i < localMaximaIndexes.size() ; i++){ 330 if(frequencyEstimates[localMaximaIndexes.get(i)] < 0 ){ 331 localMaximaIndexes.remove(i); 332 frequencyEstimates[localMaximaIndexes.get(i)]=1;//Hz 333 i--; 334 } 335 } 336 337 //filter the local maxima indexes, remove peaks that are too close to each other 338 //assumes that localmaximaIndexes is sorted from lowest to higest index 339 for(int i = 1 ; i < localMaximaIndexes.size() ; i++){ 340 double centCurrent = PitchConverter.hertzToAbsoluteCent(frequencyEstimates[localMaximaIndexes.get(i)]); 341 double centPrev = PitchConverter.hertzToAbsoluteCent(frequencyEstimates[localMaximaIndexes.get(i-1)]); 342 double centDelta = centCurrent - centPrev; 343 if(centDelta < minDistanceInCents ){ 344 if(magnitudes[localMaximaIndexes.get(i)] > magnitudes[localMaximaIndexes.get(i-1)]){ 345 localMaximaIndexes.remove(i-1); 346 }else{ 347 localMaximaIndexes.remove(i); 348 } 349 i--; 350 } 351 } 352 353 // Retrieve the maximum values for the indexes 354 float[] maxMagnitudes = new float[localMaximaIndexes.size()]; 355 for(int i = 0 ; i < localMaximaIndexes.size() ; i++){ 356 maxMagnitudes[i] = magnitudes[localMaximaIndexes.get(i)]; 357 } 358 // Sort the magnitudes in ascending order 359 Arrays.sort(maxMagnitudes); 360 361 // Find the threshold, the first value or somewhere in the array. 362 float peakthresh = maxMagnitudes[0]; 363 if (maxMagnitudes.length > numberOfPeaks) { 364 peakthresh = maxMagnitudes[maxMagnitudes.length - numberOfPeaks]; 365 } 366 367 //store the peaks 368 for(Integer i : localMaximaIndexes){ 369 if(magnitudes[i]>= peakthresh){ 370 final float frequencyInHertz= frequencyEstimates[i]; 371 //ignore frequencies lower than 30Hz 372 float binMagnitude = magnitudes[i]; 373 SpectralPeak peak = new SpectralPeak(0,frequencyInHertz, binMagnitude, referenceFrequency,i); 374 spectralPeakList.add(peak); 375 } 376 } 377 return spectralPeakList; 378 } 379 380 public static final float median(double[] arr){ 381 return percentile(arr, 0.5); 382 } 383 384 /** 385 * Returns the p-th percentile of values in an array. You can use this 386 * function to establish a threshold of acceptance. For example, you can 387 * decide to examine candidates who score above the 90th percentile (0.9). 388 * The elements of the input array are modified (sorted) by this method. 389 * 390 * @param arr An array of sample data values that define relative standing. 391 * The contents of the input array are sorted by this method. 392 * @param p The percentile value in the range 0..1, inclusive. 393 * @return The p-th percentile of values in an array. If p is not a multiple 394 * of 1/(n - 1), this method interpolates to determine the value at 395 * the p-th percentile. 396 **/ 397 public static final float percentile( double[] arr, double p ) { 398 399 if (p < 0 || p > 1) 400 throw new IllegalArgumentException("Percentile out of range."); 401 402 // Sort the array in ascending order. 403 Arrays.sort(arr); 404 405 // Calculate the percentile. 406 double t = p*(arr.length - 1); 407 int i = (int)t; 408 409 return (float) ((i + 1 - t)*arr[i] + (t - i)*arr[i + 1]); 410 } 411 412 public static double median(float[] m) { 413 // Sort the array in ascending order. 414 Arrays.sort(m); 415 int middle = m.length/2; 416 if (m.length%2 == 1) { 417 return m[middle]; 418 } else { 419 return (m[middle-1] + m[middle]) / 2.0; 420 } 421 } 422 423 424 public static class SpectralPeak{ 425 private final float frequencyInHertz; 426 private final float magnitude; 427 private final float referenceFrequency; 428 private final int bin; 429 /** 430 * Timestamp in fractional seconds 431 */ 432 private final float timeStamp; 433 434 public SpectralPeak(float timeStamp,float frequencyInHertz, float magnitude,float referenceFrequency,int bin){ 435 this.frequencyInHertz = frequencyInHertz; 436 this.magnitude = magnitude; 437 this.referenceFrequency = referenceFrequency; 438 this.timeStamp = timeStamp; 439 this.bin = bin; 440 } 441 442 public float getRelativeFrequencyInCents(){ 443 if(referenceFrequency > 0 && frequencyInHertz > 0){ 444 float refInCents = (float) PitchConverter.hertzToAbsoluteCent(referenceFrequency); 445 float valueInCents = (float) PitchConverter.hertzToAbsoluteCent(frequencyInHertz); 446 return valueInCents - refInCents; 447 }else{ 448 return 0; 449 } 450 } 451 452 public float getTimeStamp(){ 453 return timeStamp; 454 } 455 456 public float getMagnitude(){ 457 return magnitude; 458 } 459 460 public float getFrequencyInHertz(){ 461 return frequencyInHertz; 462 } 463 464 public float getRefFrequencyInHertz(){ 465 return referenceFrequency; 466 } 467 468 public String toString(){ 469 return String.format("%.2f %.2f %.2f", frequencyInHertz,getRelativeFrequencyInCents(),magnitude); 470 } 471 472 public int getBin() { 473 return bin; 474 } 475 } 476 477 478 }