plectrum

Plectrum: instrument tuner for Android
Log | Files | Refs | README | LICENSE

BeatRootSpectralFluxOnsetDetector.java (9265B)


      1 /*
      2 *      _______                       _____   _____ _____  
      3 *     |__   __|                     |  __ \ / ____|  __ \ 
      4 *        | | __ _ _ __ ___  ___  ___| |  | | (___ | |__) |
      5 *        | |/ _` | '__/ __|/ _ \/ __| |  | |\___ \|  ___/ 
      6 *        | | (_| | |  \__ \ (_) \__ \ |__| |____) | |     
      7 *        |_|\__,_|_|  |___/\___/|___/_____/|_____/|_|     
      8 *                                                         
      9 * -------------------------------------------------------------
     10 *
     11 * TarsosDSP is developed by Joren Six at IPEM, University Ghent
     12 *  
     13 * -------------------------------------------------------------
     14 *
     15 *  Info: http://0110.be/tag/TarsosDSP
     16 *  Github: https://github.com/JorenSix/TarsosDSP
     17 *  Releases: http://0110.be/releases/TarsosDSP/
     18 *  
     19 *  TarsosDSP includes modified source code by various authors,
     20 *  for credits and info, see README.
     21 * 
     22 */
     23 
     24 package be.tarsos.dsp.onsets;
     25 
     26 import java.util.Arrays;
     27 import java.util.Iterator;
     28 import java.util.LinkedList;
     29 
     30 import be.tarsos.dsp.AudioDispatcher;
     31 import be.tarsos.dsp.AudioEvent;
     32 import be.tarsos.dsp.AudioProcessor;
     33 import be.tarsos.dsp.beatroot.Peaks;
     34 import be.tarsos.dsp.util.fft.FFT;
     35 import be.tarsos.dsp.util.fft.ScaledHammingWindow;
     36 
     37 /**
     38  * <p>
     39  * A non real-time spectral flux onset detection method, as implemented in the
     40  * BeatRoot system of Centre for Digital Music, Queen Mary, University of
     41  * London.
     42  * </p>
     43  * 
     44  * <p>
     45  * This onset detection function does not, NOT work in real-time. It analyzes an
     46  * audio-stream and detects onsets during a post processing step.
     47  * </p>
     48  * 
     49  * @author Joren Six
     50  * @author Simon Dixon
     51  */
     52 public class BeatRootSpectralFluxOnsetDetector implements AudioProcessor, OnsetDetector {
     53 	/** RMS amplitude of the current frame. */
     54 	private double frameRMS;
     55 	
     56 	/** The number of overlapping frames of audio data which have been read. */
     57 	private int frameCount;
     58 
     59 	/** Long term average frame energy (in frequency domain representation). */
     60 	private double ltAverage;
     61 
     62 	/** The real part of the data for the in-place FFT computation.
     63 	 *  Since input data is real, this initially contains the input data. */
     64 	private float[] reBuffer;
     65 
     66 	/** The imaginary part of the data for the in-place FFT computation.
     67 	 *  Since input data is real, this initially contains zeros. */
     68 	private float[] imBuffer;
     69 
     70 	/** Spectral flux onset detection function, indexed by frame. */
     71 	private double[] spectralFlux;
     72 	
     73 	/** A mapping function for mapping FFT bins to final frequency bins.
     74 	 *  The mapping is linear (1-1) until the resolution reaches 2 points per
     75 	 *  semitone, then logarithmic with a semitone resolution.  e.g. for
     76 	 *  44.1kHz sampling rate and fftSize of 2048 (46ms), bin spacing is
     77 	 *  21.5Hz, which is mapped linearly for bins 0-34 (0 to 732Hz), and
     78 	 *  logarithmically for the remaining bins (midi notes 79 to 127, bins 35 to
     79 	 *  83), where all energy above note 127 is mapped into the final bin. */
     80 	private int[] freqMap;
     81 
     82 	/** The number of entries in <code>freqMap</code>. Note that the length of
     83 	 *  the array is greater, because its size is not known at creation time. */
     84 	private int freqMapSize;
     85 
     86 	/** The magnitude spectrum of the most recent frame.
     87 	 *  Used for calculating the spectral flux. */
     88 	private float[] prevFrame;
     89 	
     90 	/** The magnitude spectrum of the current frame. */
     91 	private double[] newFrame;
     92 
     93 	/** The magnitude spectra of all frames, used for plotting the spectrogram. */
     94 	private double[][] frames;
     95 	
     96 	/** The RMS energy of all frames. */
     97 	private double[] energy;
     98 	
     99 	/** Spacing of audio frames in samples (see <code>hopTime</code>) */
    100 	protected int hopSize;
    101 
    102 	/** The size of an FFT frame in samples (see <code>fftTime</code>) */
    103 	protected int fftSize;
    104 
    105 	/** Total number of audio frames if known, or -1 for live or compressed input. */
    106 	private int totalFrames;
    107 	
    108 	/** RMS frame energy below this value results in the frame being set to zero,
    109 	 *  so that normalization does not have undesired side-effects. */
    110 	public static double silenceThreshold = 0.0004;
    111 	
    112 	/** For dynamic range compression, this value is added to the log magnitude
    113 	 *  in each frequency bin and any remaining negative values are then set to zero.
    114 	 */
    115 	public static double rangeThreshold = 10;
    116 	
    117 	/** Determines method of normalization. Values can be:<ul>
    118 	 *  <li>0: no normalization</li>
    119 	 *  <li>1: normalization by current frame energy</li>
    120 	 *  <li>2: normalization by exponential average of frame energy</li>
    121 	 *  </ul>
    122 	 */
    123 	public static int normaliseMode = 2;
    124 	
    125 	/** Ratio between rate of sampling the signal energy (for the amplitude envelope) and the hop size */
    126 	public static int energyOversampleFactor = 2;
    127 	
    128 	private OnsetHandler handler;
    129 	
    130 	private double hopTime;
    131 	
    132 	private final FFT fft;
    133 	
    134 	public BeatRootSpectralFluxOnsetDetector(AudioDispatcher d,int fftSize, int hopSize){
    135 		
    136 		this.hopSize = hopSize; 
    137 		this.hopTime = hopSize/d.getFormat().getSampleRate();
    138 		this.fftSize = fftSize;
    139 
    140 		System.err.println("Please use the ComplexOnset detector: BeatRootSpectralFluxOnsetDetector does currenlty not support streaming");
    141 		//no overlap
    142 		//FIXME:		
    143 		int durationInFrames = -1000; 
    144 		totalFrames = (int)(durationInFrames / hopSize) + 4;
    145 		energy = new double[totalFrames*energyOversampleFactor];
    146 		spectralFlux = new double[totalFrames];
    147 		
    148 		reBuffer = new float[fftSize/2];
    149 		imBuffer = new float[fftSize/2];
    150 		prevFrame = new float[fftSize/2];
    151 		
    152 		makeFreqMap(fftSize, d.getFormat().getSampleRate());
    153 		
    154 		newFrame = new double[freqMapSize];
    155 		frames = new double[totalFrames][freqMapSize];
    156 		handler = new PrintOnsetHandler();
    157 		fft = new FFT(fftSize,new ScaledHammingWindow());
    158 	}
    159 
    160 	@Override
    161 	public boolean process(AudioEvent audioEvent) {
    162 		frameRMS = audioEvent.getRMS()/2.0;
    163 		
    164 		float[] audioBuffer = audioEvent.getFloatBuffer().clone();
    165 	
    166 		Arrays.fill(imBuffer, 0);
    167 		fft.powerPhaseFFTBeatRootOnset(audioBuffer, reBuffer, imBuffer);
    168 		Arrays.fill(newFrame, 0);
    169 		
    170 		double flux = 0;
    171 		for (int i = 0; i < fftSize/2; i++) {
    172 			if (reBuffer[i] > prevFrame[i])
    173 				flux += reBuffer[i] - prevFrame[i];
    174 			newFrame[freqMap[i]] += reBuffer[i];
    175 		}
    176 		spectralFlux[frameCount] = flux;
    177 		for (int i = 0; i < freqMapSize; i++)
    178 			frames[frameCount][i] = newFrame[i];
    179 	
    180 		int sz = (fftSize - hopSize) / energyOversampleFactor;
    181 		int index = hopSize; 
    182 		for (int j = 0; j < energyOversampleFactor; j++) {
    183 			double newEnergy = 0;
    184 			for (int i = 0; i < sz; i++) {
    185 				newEnergy += audioBuffer[index] * audioBuffer[index];
    186 				if (++index == fftSize)
    187 					index = 0;				
    188 			}
    189 			energy[frameCount * energyOversampleFactor + j] =
    190 					newEnergy / sz <= 1e-6? 0: Math.log(newEnergy / sz) + 13.816;
    191 		}
    192 		double decay = frameCount >= 200? 0.99:
    193 					(frameCount < 100? 0: (frameCount - 100) / 100.0);
    194 		if (ltAverage == 0)
    195 			ltAverage = frameRMS;
    196 		else
    197 			ltAverage = ltAverage * decay + frameRMS * (1.0 - decay);
    198 		if (frameRMS <= silenceThreshold)
    199 			for (int i = 0; i < freqMapSize; i++)
    200 				frames[frameCount][i] = 0;
    201 		else {
    202 			if (normaliseMode == 1)
    203 				for (int i = 0; i < freqMapSize; i++)
    204 					frames[frameCount][i] /= frameRMS;
    205 			else if (normaliseMode == 2)
    206 				for (int i = 0; i < freqMapSize; i++)
    207 					frames[frameCount][i] /= ltAverage;
    208 			for (int i = 0; i < freqMapSize; i++) {
    209 				frames[frameCount][i] = Math.log(frames[frameCount][i]) + rangeThreshold;
    210 				if (frames[frameCount][i] < 0)
    211 					frames[frameCount][i] = 0;
    212 			}
    213 		}
    214 
    215 		float[] tmp = prevFrame;
    216 		prevFrame = reBuffer;
    217 		reBuffer = tmp;
    218 		frameCount++;
    219 		return true;
    220 	}
    221 	
    222 	/** 
    223 	 *  Creates a map of FFT frequency bins to comparison bins.
    224 	 *  Where the spacing of FFT bins is less than 0.5 semitones, the mapping is
    225 	 *  one to one. Where the spacing is greater than 0.5 semitones, the FFT
    226 	 *  energy is mapped into semitone-wide bins. No scaling is performed; that
    227 	 *  is the energy is summed into the comparison bins. See also
    228 	 *  processFrame()
    229 	 */
    230 	protected void makeFreqMap(int fftSize, float sampleRate) {
    231 		freqMap = new int[fftSize/2+1];
    232 		double binWidth = sampleRate / fftSize;
    233 		int crossoverBin = (int)(2 / (Math.pow(2, 1/12.0) - 1));
    234 		int crossoverMidi = (int)Math.round(Math.log(crossoverBin*binWidth/440)/
    235 														Math.log(2) * 12 + 69);
    236 		// freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth;
    237 		int i = 0;
    238 		while (i <= crossoverBin)
    239 			freqMap[i++] = i;
    240 		while (i <= fftSize/2) {
    241 			double midi = Math.log(i*binWidth/440) / Math.log(2) * 12 + 69;
    242 			if (midi > 127)
    243 				midi = 127;
    244 			freqMap[i++] = crossoverBin + (int)Math.round(midi) - crossoverMidi;
    245 		}
    246 		freqMapSize = freqMap[i-1] + 1;
    247 	} // makeFreqMap()
    248 	
    249 	
    250 	private void findOnsets(double p1, double p2){
    251 		LinkedList<Integer> peaks = Peaks.findPeaks(spectralFlux, (int)Math.round(0.06 / hopTime), p1, p2, true);
    252 		Iterator<Integer> it = peaks.iterator();
    253 	
    254 		double minSalience = Peaks.min(spectralFlux);
    255 		for (int i = 0; i < peaks.size(); i++) {
    256 			int index = it.next();
    257 			double time  = index * hopTime;
    258 			double salience = spectralFlux[index] - minSalience;
    259 			handler.handleOnset(time,salience);
    260 		}
    261 	}
    262 	
    263 	public void setHandler(OnsetHandler handler) {
    264 		this.handler = handler;
    265 	}
    266 
    267 	@Override
    268 	public void processingFinished() {
    269 		double p1 = 0.35;
    270 		double p2 = 0.84;
    271 		Peaks.normalise(spectralFlux);
    272 		findOnsets(p1, p2);
    273 	}
    274 }