plectrum

Plectrum: instrument tuner for Android
Log | Files | Refs | README | LICENSE

MFCC.java (8717B)


      1 /*
      2 *      _______                       _____   _____ _____  
      3 *     |__   __|                     |  __ \ / ____|  __ \ 
      4 *        | | __ _ _ __ ___  ___  ___| |  | | (___ | |__) |
      5 *        | |/ _` | '__/ __|/ _ \/ __| |  | |\___ \|  ___/ 
      6 *        | | (_| | |  \__ \ (_) \__ \ |__| |____) | |     
      7 *        |_|\__,_|_|  |___/\___/|___/_____/|_____/|_|     
      8 *                                                         
      9 * -------------------------------------------------------------
     10 *
     11 * TarsosDSP is developed by Joren Six at IPEM, University Ghent
     12 *  
     13 * -------------------------------------------------------------
     14 *
     15 *  Info: http://0110.be/tag/TarsosDSP
     16 *  Github: https://github.com/JorenSix/TarsosDSP
     17 *  Releases: http://0110.be/releases/TarsosDSP/
     18 *  
     19 *  TarsosDSP includes modified source code by various authors,
     20 *  for credits and info, see README.
     21 * 
     22 */
     23 
     24 package be.tarsos.dsp.mfcc;
     25 
     26 import be.tarsos.dsp.AudioEvent;
     27 import be.tarsos.dsp.AudioProcessor;
     28 import be.tarsos.dsp.util.fft.FFT;
     29 import be.tarsos.dsp.util.fft.HammingWindow;
     30 
     31 
     32 public class MFCC implements AudioProcessor {
     33 	
     34     private int amountOfCepstrumCoef; //Number of MFCCs per frame
     35     protected int amountOfMelFilters; //Number of mel filters (SPHINX-III uses 40)
     36     protected float lowerFilterFreq; //lower limit of filter (or 64 Hz?)
     37     protected float upperFilterFreq; //upper limit of filter (or half of sampling freq.?)
     38     
     39     float[] audioFloatBuffer;
     40     //Er zijn evenveel mfccs als er frames zijn!?
     41     //Per frame zijn er dan CEPSTRA coƫficienten
     42     private float[] mfcc;
     43     
     44     int centerFrequencies[];
     45 
     46     private FFT fft;
     47     private int samplesPerFrame; 
     48     private float sampleRate;
     49     
     50     public MFCC(int samplesPerFrame, int sampleRate){
     51     	this(samplesPerFrame, sampleRate, 30, 30, 133.3334f, ((float)sampleRate)/2f);
     52     }
     53 
     54     public MFCC(int samplesPerFrame, float sampleRate, int amountOfCepstrumCoef, int amountOfMelFilters, float lowerFilterFreq, float upperFilterFreq) {
     55         this.samplesPerFrame = samplesPerFrame; 
     56         this.sampleRate = sampleRate;
     57         this.amountOfCepstrumCoef = amountOfCepstrumCoef;
     58         this.amountOfMelFilters = amountOfMelFilters;
     59         this.fft = new FFT(samplesPerFrame, new HammingWindow());
     60         
     61         this.lowerFilterFreq = Math.max(lowerFilterFreq, 25);
     62         this.upperFilterFreq = Math.min(upperFilterFreq, sampleRate / 2);
     63         calculateFilterBanks();       
     64     }
     65 
     66 	@Override
     67 	public boolean process(AudioEvent audioEvent) {
     68 		audioFloatBuffer = audioEvent.getFloatBuffer().clone();
     69 
     70         // Magnitude Spectrum
     71         float bin[] = magnitudeSpectrum(audioFloatBuffer);
     72         // get Mel Filterbank
     73         float fbank[] = melFilter(bin, centerFrequencies);
     74         // Non-linear transformation
     75         float f[] = nonLinearTransformation(fbank);
     76         // Cepstral coefficients
     77         mfcc = cepCoefficients(f);
     78         
     79 		return true;
     80 	}
     81 
     82 	@Override
     83 	public void processingFinished() {
     84 
     85 	}
     86 	
     87     /**
     88      * computes the magnitude spectrum of the input frame<br>
     89      * calls: none<br>
     90      * called by: featureExtraction
     91      * @param frame Input frame signal
     92      * @return Magnitude Spectrum array
     93      */
     94     public float[] magnitudeSpectrum(float frame[]){
     95         float magSpectrum[] = new float[frame.length];
     96         
     97         // calculate FFT for current frame
     98         
     99         fft.forwardTransform(frame);
    100         
    101         // calculate magnitude spectrum
    102         for (int k = 0; k < frame.length/2; k++){
    103         	magSpectrum[frame.length/2+k] = fft.modulus(frame, frame.length/2-1-k);
    104         	magSpectrum[frame.length/2-1-k] = magSpectrum[frame.length/2+k];        	
    105         }
    106 
    107         return magSpectrum;
    108     }
    109 	
    110     /**
    111      * calculates the FFT bin indices<br> calls: none<br> called by:
    112      * featureExtraction
    113      *
    114      */
    115  
    116     public final void calculateFilterBanks() {
    117         centerFrequencies = new int[amountOfMelFilters + 2];
    118 
    119         centerFrequencies[0] = Math.round(lowerFilterFreq / sampleRate * samplesPerFrame);
    120         centerFrequencies[centerFrequencies.length - 1] = (int) (samplesPerFrame / 2);
    121 
    122         double mel[] = new double[2];
    123         mel[0] = freqToMel(lowerFilterFreq);
    124         mel[1] = freqToMel(upperFilterFreq);
    125         
    126         float factor = (float)((mel[1] - mel[0]) / (amountOfMelFilters + 1));
    127         //Calculates te centerfrequencies.
    128         for (int i = 1; i <= amountOfMelFilters; i++) {
    129             float fc = (inverseMel(mel[0] + factor * i) / sampleRate) * samplesPerFrame;
    130             centerFrequencies[i] = Math.round(fc);
    131         }
    132 
    133     }
    134     
    135 	
    136     /**
    137      * the output of mel filtering is subjected to a logarithm function (natural logarithm)<br>
    138      * calls: none<br>
    139      * called by: featureExtraction
    140      * @param fbank Output of mel filtering
    141      * @return Natural log of the output of mel filtering
    142      */
    143     public float[] nonLinearTransformation(float fbank[]){
    144         float f[] = new float[fbank.length];
    145         final float FLOOR = -50;
    146         
    147         for (int i = 0; i < fbank.length; i++){
    148             f[i] = (float) Math.log(fbank[i]);
    149             
    150             // check if ln() returns a value less than the floor
    151             if (f[i] < FLOOR) f[i] = FLOOR;
    152         }
    153         
    154         return f;
    155     }
    156     
    157     /**
    158      * Calculate the output of the mel filter<br> calls: none called by:
    159      * featureExtraction
    160      * @param bin The bins.
    161      * @param centerFrequencies  The frequency centers.
    162      * @return Output of mel filter.
    163      */
    164     public float[] melFilter(float bin[], int centerFrequencies[]) {
    165         float temp[] = new float[amountOfMelFilters + 2];
    166 
    167         for (int k = 1; k <= amountOfMelFilters; k++) {
    168             float num1 = 0, num2 = 0;
    169 
    170             float den = (centerFrequencies[k] - centerFrequencies[k - 1] + 1);
    171 
    172             for (int i = centerFrequencies[k - 1]; i <= centerFrequencies[k]; i++) {
    173                 num1 += bin[i] * (i - centerFrequencies[k - 1] + 1);
    174             }
    175             num1 /= den;
    176 
    177             den = (centerFrequencies[k + 1] - centerFrequencies[k] + 1);
    178 
    179             for (int i = centerFrequencies[k] + 1; i <= centerFrequencies[k + 1]; i++) {
    180                 num2 += bin[i] * (1 - ((i - centerFrequencies[k]) / den));
    181             }
    182 
    183             temp[k] = num1 + num2;
    184         }
    185 
    186         float fbank[] = new float[amountOfMelFilters];
    187         
    188         for (int i = 0; i < amountOfMelFilters; i++) {
    189             fbank[i] = temp[i + 1];
    190         }
    191 
    192         return fbank;
    193     }
    194     
    195     
    196     /**
    197      * Cepstral coefficients are calculated from the output of the Non-linear Transformation method<br>
    198      * calls: none<br>
    199      * called by: featureExtraction
    200      * @param f Output of the Non-linear Transformation method
    201      * @return Cepstral Coefficients
    202      */
    203     public float[] cepCoefficients(float f[]){
    204         float cepc[] = new float[amountOfCepstrumCoef];
    205         
    206         for (int i = 0; i < cepc.length; i++){
    207             for (int j = 0; j < f.length; j++){
    208                 cepc[i] += f[j] * Math.cos(Math.PI * i / f.length * (j + 0.5));
    209             }
    210         }
    211         
    212         return cepc;
    213     }
    214     
    215 //    /**
    216 //     * calculates center frequency<br>
    217 //     * calls: none<br>
    218 //     * called by: featureExtraction
    219 //     * @param i Index of mel filters
    220 //     * @return Center Frequency
    221 //     */
    222 //    private static float centerFreq(int i,float samplingRate){
    223 //        double mel[] = new double[2];
    224 //        mel[0] = freqToMel(lowerFilterFreq);
    225 //        mel[1] = freqToMel(samplingRate / 2);
    226 //        
    227 //        // take inverse mel of:
    228 //        double temp = mel[0] + ((mel[1] - mel[0]) / (amountOfMelFilters + 1)) * i;
    229 //        return inverseMel(temp);
    230 //    }
    231     
    232     /**
    233      * convert frequency to mel-frequency<br>
    234      * calls: none<br>
    235      * called by: featureExtraction
    236      * @param freq Frequency
    237      * @return Mel-Frequency
    238      */
    239     protected static float freqToMel(float freq){
    240         return (float) (2595 * log10(1 + freq / 700));
    241     }
    242     
    243     /**
    244      * calculates the inverse of Mel Frequency<br>
    245      * calls: none<br>
    246      * called by: featureExtraction
    247      */
    248     private static float inverseMel(double x) {
    249         return (float) (700 * (Math.pow(10, x / 2595) - 1));
    250     }
    251     
    252     /**
    253      * calculates logarithm with base 10<br>
    254      * calls: none<br>
    255      * called by: featureExtraction
    256      * @param value Number to take the log of
    257      * @return base 10 logarithm of the input values
    258      */
    259     protected static float log10(float value){
    260         return (float) (Math.log(value) / Math.log(10));
    261     }
    262 
    263 	public float[] getMFCC() {
    264 		return mfcc.clone();
    265 	}
    266 
    267 	public int[] getCenterFrequencies() {
    268 		return centerFrequencies;
    269 	}
    270 }