plectrum

Plectrum: instrument tuner for Android
Log | Files | Refs | README | LICENSE

SpectralPeakProcessor.java (15240B)


      1 /*
      2 *      _______                       _____   _____ _____  
      3 *     |__   __|                     |  __ \ / ____|  __ \ 
      4 *        | | __ _ _ __ ___  ___  ___| |  | | (___ | |__) |
      5 *        | |/ _` | '__/ __|/ _ \/ __| |  | |\___ \|  ___/ 
      6 *        | | (_| | |  \__ \ (_) \__ \ |__| |____) | |     
      7 *        |_|\__,_|_|  |___/\___/|___/_____/|_____/|_|     
      8 *                                                         
      9 * -------------------------------------------------------------
     10 *
     11 * TarsosDSP is developed by Joren Six at IPEM, University Ghent
     12 *  
     13 * -------------------------------------------------------------
     14 *
     15 *  Info: http://0110.be/tag/TarsosDSP
     16 *  Github: https://github.com/JorenSix/TarsosDSP
     17 *  Releases: http://0110.be/releases/TarsosDSP/
     18 *  
     19 *  TarsosDSP includes modified source code by various authors,
     20 *  for credits and info, see README.
     21 * 
     22 */
     23 
     24 package be.tarsos.dsp;
     25 
     26 import java.util.ArrayList;
     27 import java.util.Arrays;
     28 import java.util.List;
     29 
     30 import be.tarsos.dsp.util.PitchConverter;
     31 import be.tarsos.dsp.util.fft.FFT;
     32 import be.tarsos.dsp.util.fft.HammingWindow;
     33 
     34 /**
     35  * <p>
     36  * This class implements a spectral peak follower as described in Sethares et
     37  * al. 2009 - Spectral Tools for Dynamic Tonality and Audio Morphing - section
     38  * "Analysis-Resynthessis". It calculates a noise floor and picks spectral peaks
     39  * rising above a calculated noise floor with a certain factor. The noise floor
     40  * is determined using a simple median filter.
     41  * </p>
     42  * <p>
     43  * Parts of the code is modified from <a
     44  * href="http://www.dynamictonality.com/spectools.htm">the code accompanying
     45  * "Spectral Tools for Dynamic Tonality and Audio Morphing"</a>.
     46  * </p>
     47  * <p>
     48  * To get the spectral peaks from an audio frame, call <code>getPeakList</code>
     49  * <code><pre> 
     50 AudioDispatcher dispatcher = new AudioDispatcher(stream, fftsize, overlap);
     51 dispatcher.addAudioProcessor(spectralPeakFollower);
     52 dispatcher.addAudioProcessor(new AudioProcessor() {
     53 
     54   public void processingFinished() {
     55   }
     56   
     57   public boolean process(AudioEvent audioEvent) {
     58     float[] noiseFloor = SpectralPeakProcessor.calculateNoiseFloor(spectralPeakFollower.getMagnitudes(), medianFilterLength, noiseFloorFactor);
     59 	List<Integer> localMaxima = SpectralPeakProcessor.findLocalMaxima(spectralPeakFollower.getMagnitudes(), noiseFloor);
     60 	List<> list = SpectralPeakProcessor.findPeaks(spectralPeakFollower.getMagnitudes(), spectralPeakFollower.getFrequencyEstimates(), localMaxima, numberOfPeaks);
     61     // do something with the list...
     62     return true;
     63   }
     64 });
     65 dispatcher.run();
     66 </pre></code>
     67  * 
     68  * @author Joren Six
     69  * @author William A. Sethares
     70  * @author Andrew J. Milne
     71  * @author Stefan Tiedje
     72  * @author Anthony Prechtl
     73  * @author James Plamondon
     74  * 
     75  */
     76 public class SpectralPeakProcessor implements AudioProcessor {
     77 
     78 	/**
     79 	 * The sample rate of the signal.
     80 	 */
     81 	private final int sampleRate;
     82 	
     83 	/**
     84 	 * Cached calculations for the frequency calculation
     85 	 */
     86 	private final double dt;
     87 	private final double cbin;
     88 	private final double inv_2pi;
     89 	private final double inv_deltat;
     90 	private final double inv_2pideltat;
     91 
     92 	/**
     93 	 * The fft object used to calculate phase and magnitudes.
     94 	 */
     95 	private final FFT fft;
     96 
     97 	/**
     98 	 * The pahse info of the current frame.
     99 	 */
    100 	private final float[] currentPhaseOffsets;
    101 
    102 	/**
    103 	 * The magnitudes in the current frame.
    104 	 */
    105 	private final float[] magnitudes;
    106 	
    107 	/**
    108 	 * Detailed frequency estimates for each bin, using phase info
    109 	 */
    110 	private final float[] frequencyEstimates;
    111 	
    112 	/**
    113 	 * The phase information of the previous frame, or null.
    114 	 */
    115 	private float[] previousPhaseOffsets;
    116 
    117 	
    118 
    119 	public SpectralPeakProcessor(int bufferSize, int overlap, int sampleRate) {
    120 		fft = new FFT(bufferSize, new HammingWindow());
    121 
    122 		magnitudes = new float[bufferSize / 2];
    123 		currentPhaseOffsets = new float[bufferSize / 2];
    124 		frequencyEstimates = new float[bufferSize / 2];
    125 
    126 		dt = (bufferSize - overlap) / (double) sampleRate;
    127 		cbin = (double) (dt * sampleRate / (double) bufferSize);
    128 
    129 		inv_2pi = (double) (1.0 / (2.0 * Math.PI));
    130 		inv_deltat = (double) (1.0 / dt);
    131 		inv_2pideltat = (double) (inv_deltat * inv_2pi);
    132 
    133 		this.sampleRate = sampleRate;
    134 		
    135 	}
    136 
    137 	private void calculateFFT(float[] audio) {
    138 		// Clone to prevent overwriting audio data
    139 		float[] fftData = audio.clone();
    140 		// Extract the power and phase data
    141 		fft.powerPhaseFFT(fftData, magnitudes, currentPhaseOffsets);
    142 	}
    143 	
    144 	private void normalizeMagintudes(){
    145 		float maxMagnitude = (float) -1e6;
    146 		for(int i = 0;i<magnitudes.length;i++){
    147 			maxMagnitude = Math.max(maxMagnitude, magnitudes[i]);
    148 		}
    149 		
    150 		//log10 of the normalized value
    151 		//adding 75 makes sure the value is above zero, a bit ugly though...
    152 		for(int i = 1;i<magnitudes.length;i++){
    153 			magnitudes[i] = (float) (10 * Math.log10(magnitudes[i]/maxMagnitude)) + 75;
    154 		}
    155 	}
    156 
    157 	@Override
    158 	public boolean process(AudioEvent audioEvent) {
    159 		float[] audio = audioEvent.getFloatBuffer();
    160 	
    161 		// 1. Extract magnitudes, and phase using an FFT.
    162 		calculateFFT(audio);
    163 		
    164 		// 2. Estimate a detailed frequency for each bin.
    165 		calculateFrequencyEstimates();
    166 		
    167 		// 3. Normalize the each magnitude.
    168 		normalizeMagintudes();
    169 		
    170 		// 4. Store the current phase so it can be used for the next frequency estimates block. 
    171 		previousPhaseOffsets = currentPhaseOffsets.clone();
    172 	
    173 		return true;		
    174 	}
    175 	
    176 	@Override
    177 	public void processingFinished() {
    178 	}
    179 	
    180 	
    181 	/**
    182 	 * For each bin, calculate a precise frequency estimate using phase offset.
    183 	 */
    184 	private void calculateFrequencyEstimates() {
    185 		for(int i = 0;i < frequencyEstimates.length;i++){
    186 			frequencyEstimates[i] = getFrequencyForBin(i);
    187 		}
    188 	}
    189 	
    190 	/**
    191 	 * @return the magnitudes.
    192 	 */
    193 	public float[] getMagnitudes() {
    194 		return magnitudes.clone();
    195 	}
    196 	
    197 	/**
    198 	 * @return the precise frequency for each bin.
    199 	 */
    200 	public float[] getFrequencyEstimates(){
    201 		return frequencyEstimates.clone();
    202 	}
    203 	
    204 	/**
    205 	 * Calculates a frequency for a bin using phase info, if available.
    206 	 * @param binIndex The FFT bin index.
    207 	 * @return a frequency, in Hz, calculated using available phase info.
    208 	 */
    209 	private float getFrequencyForBin(int binIndex){
    210 		final float frequencyInHertz;
    211 		// use the phase delta information to get a more precise
    212 		// frequency estimate
    213 		// if the phase of the previous frame is available.
    214 		// See
    215 		// * Moore 1976
    216 		// "The use of phase vocoder in computer music applications"
    217 		// * Sethares et al. 2009 - Spectral Tools for Dynamic
    218 		// Tonality and Audio Morphing
    219 		// * Laroche and Dolson 1999
    220 		if (previousPhaseOffsets != null) {
    221 			float phaseDelta = currentPhaseOffsets[binIndex] - previousPhaseOffsets[binIndex];
    222 			long k = Math.round(cbin * binIndex - inv_2pi * phaseDelta);
    223 			frequencyInHertz = (float) (inv_2pideltat * phaseDelta  + inv_deltat * k);
    224 		} else {
    225 			frequencyInHertz = (float) fft.binToHz(binIndex, sampleRate);
    226 		}
    227 		return frequencyInHertz;
    228 	}
    229 	
    230 	/**
    231 	 * Calculate a noise floor for an array of magnitudes.
    232 	 * @param magnitudes The magnitudes of the current frame.
    233 	 * @param medianFilterLength The length of the median filter used to determine the noise floor.
    234 	 * @param noiseFloorFactor The noise floor is multiplied with this factor to determine if the
    235 	 * information is either noise or an interesting spectral peak.
    236 	 * @return a float array representing the noise floor.
    237 	 */
    238 	public static float[] calculateNoiseFloor(float[] magnitudes, int medianFilterLength, float noiseFloorFactor) {
    239 		double[] noiseFloorBuffer;
    240 		float[] noisefloor = new float[magnitudes.length];
    241 		
    242 		float median = (float) median(magnitudes.clone());
    243 		
    244 		// Naive median filter implementation.
    245 		// For each element take a median of surrounding values (noiseFloorBuffer) 
    246 		// Store the median as the noise floor.
    247 		for (int i = 0; i < magnitudes.length; i++) {
    248 			noiseFloorBuffer = new double[medianFilterLength];
    249 			int index = 0;
    250 			for (int j = i - medianFilterLength/2; j <= i + medianFilterLength/2 && index < noiseFloorBuffer.length; j++) {
    251 			  if(j >= 0 && j < magnitudes.length){
    252 				noiseFloorBuffer[index] = magnitudes[j];
    253 			  } else{
    254 				noiseFloorBuffer[index] = median;
    255 			  } 
    256 			  index++;
    257 			}		
    258 			// calculate the noise floor value.
    259 			noisefloor[i] = (float) (median(noiseFloorBuffer) * (noiseFloorFactor)) ;
    260 		}
    261 		
    262 		float rampLength = 12.0f;
    263 		for(int i = 0 ; i <= rampLength ; i++){
    264 			//ramp
    265 			float ramp = 1.0f;
    266 			ramp = (float) (-1 * (Math.log(i/rampLength))) + 1.0f;
    267 			noisefloor[i] = ramp * noisefloor[i];
    268 		}
    269 		
    270 		return noisefloor;
    271 	}
    272 	
    273 	/**
    274 	 * Finds the local magintude maxima and stores them in the given list.
    275 	 * @param magnitudes The magnitudes.
    276 	 * @param noisefloor The noise floor.
    277 	 * @return a list of local maxima.
    278 	 */
    279 	public static List<Integer> findLocalMaxima(float[] magnitudes,float[] noisefloor){
    280 		List<Integer> localMaximaIndexes = new ArrayList<Integer>();
    281 		for (int i = 1; i < magnitudes.length - 1; i++) {
    282 			boolean largerThanPrevious = (magnitudes[i - 1] < magnitudes[i]);
    283 			boolean largerThanNext = (magnitudes[i] > magnitudes[i + 1]);
    284 			boolean largerThanNoiseFloor = (magnitudes[i] >  noisefloor[i]);
    285 			if (largerThanPrevious && largerThanNext && largerThanNoiseFloor) {
    286 				localMaximaIndexes.add(i);
    287 			}
    288 		}
    289 		return localMaximaIndexes;
    290 	}
    291 	
    292 	/**
    293 	 * @param magnitudes the magnitudes.
    294 	 * @return the index for the maximum magnitude.
    295 	 */
    296 	private static int findMaxMagnitudeIndex(float[] magnitudes){
    297 		int maxMagnitudeIndex = 0;
    298 		float maxMagnitude = (float) -1e6;
    299 		for (int i = 1; i < magnitudes.length - 1; i++) {
    300 			if(magnitudes[i] > maxMagnitude){
    301 				maxMagnitude = magnitudes[i];
    302 				maxMagnitudeIndex = i;
    303 			}
    304 		}
    305 		return maxMagnitudeIndex;
    306 	}
    307 	
    308 	/**
    309 	 * 
    310 	 * @param magnitudes the magnitudes..
    311 	 * @param frequencyEstimates The frequency estimates for each bin.
    312 	 * @param localMaximaIndexes The indexes of the local maxima.
    313 	 * @param numberOfPeaks The requested number of peaks.
    314 	 * @param minDistanceInCents The minimum distance in cents between the peaks
    315 	 * @return A list with spectral peaks.
    316 	 */
    317 	public static List<SpectralPeak> findPeaks(float[] magnitudes, float[] frequencyEstimates, List<Integer> localMaximaIndexes, int numberOfPeaks, int minDistanceInCents){
    318 		int maxMagnitudeIndex = findMaxMagnitudeIndex(magnitudes);		
    319 		List<SpectralPeak> spectralPeakList = new ArrayList<SpectralPeak>();
    320 		
    321 		if(localMaximaIndexes.size()==0)
    322 			return spectralPeakList;
    323 		
    324 		float referenceFrequency=0;
    325 		//the frequency of the bin with the highest magnitude
    326 		referenceFrequency =  frequencyEstimates[maxMagnitudeIndex];
    327 		
    328 		//remove frequency estimates below zero
    329 		for(int i = 0 ; i < localMaximaIndexes.size() ; i++){
    330 			if(frequencyEstimates[localMaximaIndexes.get(i)] < 0 ){
    331 				localMaximaIndexes.remove(i);
    332 				frequencyEstimates[localMaximaIndexes.get(i)]=1;//Hz
    333 				i--;
    334 			}
    335 		}
    336 		
    337 		//filter the local maxima indexes, remove peaks that are too close to each other
    338 		//assumes that localmaximaIndexes is sorted from lowest to higest index
    339 		for(int i = 1 ; i < localMaximaIndexes.size() ; i++){
    340 			double centCurrent = PitchConverter.hertzToAbsoluteCent(frequencyEstimates[localMaximaIndexes.get(i)]);
    341 			double centPrev = PitchConverter.hertzToAbsoluteCent(frequencyEstimates[localMaximaIndexes.get(i-1)]);
    342 			double centDelta = centCurrent - centPrev;
    343 			if(centDelta  < minDistanceInCents ){
    344 				if(magnitudes[localMaximaIndexes.get(i)] > magnitudes[localMaximaIndexes.get(i-1)]){
    345 					localMaximaIndexes.remove(i-1);
    346 				}else{
    347 					localMaximaIndexes.remove(i);
    348 				}
    349 				i--;
    350 			}
    351 		}
    352 		
    353 		// Retrieve the maximum values for the indexes
    354 		float[] maxMagnitudes = new float[localMaximaIndexes.size()];
    355 		for(int i = 0 ; i < localMaximaIndexes.size() ; i++){
    356 			maxMagnitudes[i] = magnitudes[localMaximaIndexes.get(i)];
    357 		}
    358 		// Sort the magnitudes in ascending order
    359 		Arrays.sort(maxMagnitudes);
    360 		
    361 		// Find the threshold, the first value or somewhere in the array.
    362 		float peakthresh = maxMagnitudes[0];
    363 		if (maxMagnitudes.length > numberOfPeaks) {
    364 			peakthresh = maxMagnitudes[maxMagnitudes.length - numberOfPeaks];
    365 		}
    366 		
    367 		//store the peaks
    368 		for(Integer i : localMaximaIndexes){
    369 			if(magnitudes[i]>= peakthresh){
    370 				final float frequencyInHertz= frequencyEstimates[i];
    371 				//ignore frequencies lower than 30Hz
    372 				float binMagnitude = magnitudes[i];
    373 				SpectralPeak peak = new SpectralPeak(0,frequencyInHertz, binMagnitude, referenceFrequency,i);
    374 				spectralPeakList.add(peak);
    375 			}
    376 		}
    377 		return spectralPeakList;
    378 	}
    379 	
    380 	public static final float median(double[] arr){
    381 		return percentile(arr, 0.5);
    382 	}
    383 	
    384 	/**
    385 	*  Returns the p-th percentile of values in an array. You can use this
    386 	*  function to establish a threshold of acceptance. For example, you can
    387 	*  decide to examine candidates who score above the 90th percentile (0.9).
    388 	*  The elements of the input array are modified (sorted) by this method.
    389 	*
    390 	*  @param   arr  An array of sample data values that define relative standing.
    391 	*                The contents of the input array are sorted by this method.
    392 	*  @param   p    The percentile value in the range 0..1, inclusive.
    393 	*  @return  The p-th percentile of values in an array.  If p is not a multiple
    394 	*           of 1/(n - 1), this method interpolates to determine the value at
    395 	*           the p-th percentile.
    396 	**/
    397 	public static final float percentile( double[] arr, double p ) {
    398 		
    399 		if (p < 0 || p > 1)
    400 			throw new IllegalArgumentException("Percentile out of range.");
    401 		
    402 		//	Sort the array in ascending order.
    403 		Arrays.sort(arr);
    404 		
    405 		//	Calculate the percentile.
    406 		double t = p*(arr.length - 1);
    407 		int i = (int)t;
    408 		
    409 		return (float) ((i + 1 - t)*arr[i] + (t - i)*arr[i + 1]);
    410 	}
    411 	
    412 	public static double median(float[] m) {
    413 //		Sort the array in ascending order.
    414 		Arrays.sort(m);
    415 	    int middle = m.length/2;
    416 	    if (m.length%2 == 1) {
    417 	        return m[middle];
    418 	    } else {
    419 	        return (m[middle-1] + m[middle]) / 2.0;
    420 	    }
    421 	}
    422 	
    423 	
    424 	public static class SpectralPeak{
    425 		private final float frequencyInHertz;
    426 		private final float magnitude;
    427 		private final float referenceFrequency;
    428 		private final int bin;
    429 		/**
    430 		 * Timestamp in fractional seconds
    431 		 */
    432 		private final float timeStamp;
    433 		
    434 		public SpectralPeak(float timeStamp,float frequencyInHertz, float magnitude,float referenceFrequency,int bin){
    435 			this.frequencyInHertz = frequencyInHertz;
    436 			this.magnitude = magnitude;
    437 			this.referenceFrequency = referenceFrequency;
    438 			this.timeStamp = timeStamp;
    439 			this.bin = bin;
    440 		}
    441 		
    442 		public float getRelativeFrequencyInCents(){
    443 			if(referenceFrequency > 0 && frequencyInHertz > 0){
    444 				float refInCents = (float) PitchConverter.hertzToAbsoluteCent(referenceFrequency);
    445 				float valueInCents = (float) PitchConverter.hertzToAbsoluteCent(frequencyInHertz);
    446 				return valueInCents - refInCents;
    447 			}else{
    448 				return 0;
    449 			}
    450 		}
    451 		
    452 		public float getTimeStamp(){
    453 			return timeStamp;
    454 		}
    455 		
    456 		public float getMagnitude(){
    457 			return magnitude;
    458 		}
    459 		
    460 		public float getFrequencyInHertz(){
    461 			return frequencyInHertz;
    462 		}
    463 		
    464 		public float getRefFrequencyInHertz(){
    465 			return referenceFrequency;
    466 		}
    467 		
    468 		public String toString(){
    469 			return String.format("%.2f %.2f %.2f", frequencyInHertz,getRelativeFrequencyInCents(),magnitude);
    470 		}
    471 
    472 		public int getBin() {
    473 			return bin;
    474 		}
    475 	}
    476 
    477 
    478 }