MFCC.java (8717B)
1 /* 2 * _______ _____ _____ _____ 3 * |__ __| | __ \ / ____| __ \ 4 * | | __ _ _ __ ___ ___ ___| | | | (___ | |__) | 5 * | |/ _` | '__/ __|/ _ \/ __| | | |\___ \| ___/ 6 * | | (_| | | \__ \ (_) \__ \ |__| |____) | | 7 * |_|\__,_|_| |___/\___/|___/_____/|_____/|_| 8 * 9 * ------------------------------------------------------------- 10 * 11 * TarsosDSP is developed by Joren Six at IPEM, University Ghent 12 * 13 * ------------------------------------------------------------- 14 * 15 * Info: http://0110.be/tag/TarsosDSP 16 * Github: https://github.com/JorenSix/TarsosDSP 17 * Releases: http://0110.be/releases/TarsosDSP/ 18 * 19 * TarsosDSP includes modified source code by various authors, 20 * for credits and info, see README. 21 * 22 */ 23 24 package be.tarsos.dsp.mfcc; 25 26 import be.tarsos.dsp.AudioEvent; 27 import be.tarsos.dsp.AudioProcessor; 28 import be.tarsos.dsp.util.fft.FFT; 29 import be.tarsos.dsp.util.fft.HammingWindow; 30 31 32 public class MFCC implements AudioProcessor { 33 34 private int amountOfCepstrumCoef; //Number of MFCCs per frame 35 protected int amountOfMelFilters; //Number of mel filters (SPHINX-III uses 40) 36 protected float lowerFilterFreq; //lower limit of filter (or 64 Hz?) 37 protected float upperFilterFreq; //upper limit of filter (or half of sampling freq.?) 38 39 float[] audioFloatBuffer; 40 //Er zijn evenveel mfccs als er frames zijn!? 41 //Per frame zijn er dan CEPSTRA coƫficienten 42 private float[] mfcc; 43 44 int centerFrequencies[]; 45 46 private FFT fft; 47 private int samplesPerFrame; 48 private float sampleRate; 49 50 public MFCC(int samplesPerFrame, int sampleRate){ 51 this(samplesPerFrame, sampleRate, 30, 30, 133.3334f, ((float)sampleRate)/2f); 52 } 53 54 public MFCC(int samplesPerFrame, float sampleRate, int amountOfCepstrumCoef, int amountOfMelFilters, float lowerFilterFreq, float upperFilterFreq) { 55 this.samplesPerFrame = samplesPerFrame; 56 this.sampleRate = sampleRate; 57 this.amountOfCepstrumCoef = amountOfCepstrumCoef; 58 this.amountOfMelFilters = amountOfMelFilters; 59 this.fft = new FFT(samplesPerFrame, new HammingWindow()); 60 61 this.lowerFilterFreq = Math.max(lowerFilterFreq, 25); 62 this.upperFilterFreq = Math.min(upperFilterFreq, sampleRate / 2); 63 calculateFilterBanks(); 64 } 65 66 @Override 67 public boolean process(AudioEvent audioEvent) { 68 audioFloatBuffer = audioEvent.getFloatBuffer().clone(); 69 70 // Magnitude Spectrum 71 float bin[] = magnitudeSpectrum(audioFloatBuffer); 72 // get Mel Filterbank 73 float fbank[] = melFilter(bin, centerFrequencies); 74 // Non-linear transformation 75 float f[] = nonLinearTransformation(fbank); 76 // Cepstral coefficients 77 mfcc = cepCoefficients(f); 78 79 return true; 80 } 81 82 @Override 83 public void processingFinished() { 84 85 } 86 87 /** 88 * computes the magnitude spectrum of the input frame<br> 89 * calls: none<br> 90 * called by: featureExtraction 91 * @param frame Input frame signal 92 * @return Magnitude Spectrum array 93 */ 94 public float[] magnitudeSpectrum(float frame[]){ 95 float magSpectrum[] = new float[frame.length]; 96 97 // calculate FFT for current frame 98 99 fft.forwardTransform(frame); 100 101 // calculate magnitude spectrum 102 for (int k = 0; k < frame.length/2; k++){ 103 magSpectrum[frame.length/2+k] = fft.modulus(frame, frame.length/2-1-k); 104 magSpectrum[frame.length/2-1-k] = magSpectrum[frame.length/2+k]; 105 } 106 107 return magSpectrum; 108 } 109 110 /** 111 * calculates the FFT bin indices<br> calls: none<br> called by: 112 * featureExtraction 113 * 114 */ 115 116 public final void calculateFilterBanks() { 117 centerFrequencies = new int[amountOfMelFilters + 2]; 118 119 centerFrequencies[0] = Math.round(lowerFilterFreq / sampleRate * samplesPerFrame); 120 centerFrequencies[centerFrequencies.length - 1] = (int) (samplesPerFrame / 2); 121 122 double mel[] = new double[2]; 123 mel[0] = freqToMel(lowerFilterFreq); 124 mel[1] = freqToMel(upperFilterFreq); 125 126 float factor = (float)((mel[1] - mel[0]) / (amountOfMelFilters + 1)); 127 //Calculates te centerfrequencies. 128 for (int i = 1; i <= amountOfMelFilters; i++) { 129 float fc = (inverseMel(mel[0] + factor * i) / sampleRate) * samplesPerFrame; 130 centerFrequencies[i] = Math.round(fc); 131 } 132 133 } 134 135 136 /** 137 * the output of mel filtering is subjected to a logarithm function (natural logarithm)<br> 138 * calls: none<br> 139 * called by: featureExtraction 140 * @param fbank Output of mel filtering 141 * @return Natural log of the output of mel filtering 142 */ 143 public float[] nonLinearTransformation(float fbank[]){ 144 float f[] = new float[fbank.length]; 145 final float FLOOR = -50; 146 147 for (int i = 0; i < fbank.length; i++){ 148 f[i] = (float) Math.log(fbank[i]); 149 150 // check if ln() returns a value less than the floor 151 if (f[i] < FLOOR) f[i] = FLOOR; 152 } 153 154 return f; 155 } 156 157 /** 158 * Calculate the output of the mel filter<br> calls: none called by: 159 * featureExtraction 160 * @param bin The bins. 161 * @param centerFrequencies The frequency centers. 162 * @return Output of mel filter. 163 */ 164 public float[] melFilter(float bin[], int centerFrequencies[]) { 165 float temp[] = new float[amountOfMelFilters + 2]; 166 167 for (int k = 1; k <= amountOfMelFilters; k++) { 168 float num1 = 0, num2 = 0; 169 170 float den = (centerFrequencies[k] - centerFrequencies[k - 1] + 1); 171 172 for (int i = centerFrequencies[k - 1]; i <= centerFrequencies[k]; i++) { 173 num1 += bin[i] * (i - centerFrequencies[k - 1] + 1); 174 } 175 num1 /= den; 176 177 den = (centerFrequencies[k + 1] - centerFrequencies[k] + 1); 178 179 for (int i = centerFrequencies[k] + 1; i <= centerFrequencies[k + 1]; i++) { 180 num2 += bin[i] * (1 - ((i - centerFrequencies[k]) / den)); 181 } 182 183 temp[k] = num1 + num2; 184 } 185 186 float fbank[] = new float[amountOfMelFilters]; 187 188 for (int i = 0; i < amountOfMelFilters; i++) { 189 fbank[i] = temp[i + 1]; 190 } 191 192 return fbank; 193 } 194 195 196 /** 197 * Cepstral coefficients are calculated from the output of the Non-linear Transformation method<br> 198 * calls: none<br> 199 * called by: featureExtraction 200 * @param f Output of the Non-linear Transformation method 201 * @return Cepstral Coefficients 202 */ 203 public float[] cepCoefficients(float f[]){ 204 float cepc[] = new float[amountOfCepstrumCoef]; 205 206 for (int i = 0; i < cepc.length; i++){ 207 for (int j = 0; j < f.length; j++){ 208 cepc[i] += f[j] * Math.cos(Math.PI * i / f.length * (j + 0.5)); 209 } 210 } 211 212 return cepc; 213 } 214 215 // /** 216 // * calculates center frequency<br> 217 // * calls: none<br> 218 // * called by: featureExtraction 219 // * @param i Index of mel filters 220 // * @return Center Frequency 221 // */ 222 // private static float centerFreq(int i,float samplingRate){ 223 // double mel[] = new double[2]; 224 // mel[0] = freqToMel(lowerFilterFreq); 225 // mel[1] = freqToMel(samplingRate / 2); 226 // 227 // // take inverse mel of: 228 // double temp = mel[0] + ((mel[1] - mel[0]) / (amountOfMelFilters + 1)) * i; 229 // return inverseMel(temp); 230 // } 231 232 /** 233 * convert frequency to mel-frequency<br> 234 * calls: none<br> 235 * called by: featureExtraction 236 * @param freq Frequency 237 * @return Mel-Frequency 238 */ 239 protected static float freqToMel(float freq){ 240 return (float) (2595 * log10(1 + freq / 700)); 241 } 242 243 /** 244 * calculates the inverse of Mel Frequency<br> 245 * calls: none<br> 246 * called by: featureExtraction 247 */ 248 private static float inverseMel(double x) { 249 return (float) (700 * (Math.pow(10, x / 2595) - 1)); 250 } 251 252 /** 253 * calculates logarithm with base 10<br> 254 * calls: none<br> 255 * called by: featureExtraction 256 * @param value Number to take the log of 257 * @return base 10 logarithm of the input values 258 */ 259 protected static float log10(float value){ 260 return (float) (Math.log(value) / Math.log(10)); 261 } 262 263 public float[] getMFCC() { 264 return mfcc.clone(); 265 } 266 267 public int[] getCenterFrequencies() { 268 return centerFrequencies; 269 } 270 }