plectrum

Plectrum: instrument tuner for Android
Log | Files | Refs | README | LICENSE

WaveformSimilarityBasedOverlapAdd.java (13307B)


      1 /*
      2 *      _______                       _____   _____ _____  
      3 *     |__   __|                     |  __ \ / ____|  __ \ 
      4 *        | | __ _ _ __ ___  ___  ___| |  | | (___ | |__) |
      5 *        | |/ _` | '__/ __|/ _ \/ __| |  | |\___ \|  ___/ 
      6 *        | | (_| | |  \__ \ (_) \__ \ |__| |____) | |     
      7 *        |_|\__,_|_|  |___/\___/|___/_____/|_____/|_|     
      8 *                                                         
      9 * -------------------------------------------------------------
     10 *
     11 * TarsosDSP is developed by Joren Six at IPEM, University Ghent
     12 *  
     13 * -------------------------------------------------------------
     14 *
     15 *  Info: http://0110.be/tag/TarsosDSP
     16 *  Github: https://github.com/JorenSix/TarsosDSP
     17 *  Releases: http://0110.be/releases/TarsosDSP/
     18 *  
     19 *  TarsosDSP includes modified source code by various authors,
     20 *  for credits and info, see README.
     21 * 
     22 */
     23 
     24 
     25 package be.tarsos.dsp;
     26 
     27 
     28 /**
     29  *
     30  * <p>
     31  * An overlap-add technique based on waveform similarity (WSOLA) for high
     32  * quality time-scale modification of speech
     33  * </p>
     34  * <p>
     35  * A concept of waveform similarity for tackling the problem of time-scale
     36  * modification of speech is proposed. It is worked out in the context of
     37  * short-time Fourier transform representations. The resulting WSOLA
     38  * (waveform-similarity-based synchronized overlap-add) algorithm produces
     39  * high-quality speech output, is algorithmically and computationally efficient
     40  * and robust, and allows for online processing with arbitrary time-scaling
     41  * factors that may be specified in a time-varying fashion and can be chosen
     42  * over a wide continuous range of values.
     43  * </p>
     44  * <p>
     45  * Inspired by the work soundtouch by Olli Parviainen,
     46  * http://www.surina.net/soundtouch, especially the TDStrech.cpp file.
     47  * </p>
     48  * @author Joren Six
     49  * @author Olli Parviainen
     50  */
     51 public class WaveformSimilarityBasedOverlapAdd implements AudioProcessor {	
     52 	private int seekWindowLength;
     53 	private int seekLength;
     54 	private int overlapLength;
     55 	
     56 	private float[] pMidBuffer;	
     57 	private float[] pRefMidBuffer;
     58 	private float[] outputFloatBuffer;
     59 	
     60 	private int intskip;
     61 	private int sampleReq; 
     62 	
     63 	private double tempo;
     64 	
     65 	private AudioDispatcher dispatcher;
     66 
     67 	private Parameters newParameters;
     68 	
     69 	/**
     70 	 * Create a new instance based on algorithm parameters for a certain audio format.
     71 	 * @param params The parameters for the algorithm.
     72 	 */
     73 	public WaveformSimilarityBasedOverlapAdd(Parameters  params){
     74 		setParameters(params);
     75 		applyNewParameters();
     76 	}
     77 	
     78 	public void setParameters(Parameters params){
     79 		newParameters = params;
     80 	}
     81 	
     82 	public void setDispatcher(AudioDispatcher newDispatcher){
     83 		this.dispatcher = newDispatcher;
     84 	}
     85 	
     86 	private void applyNewParameters(){
     87 		Parameters params = newParameters;
     88 		int oldOverlapLength = overlapLength;
     89 		overlapLength = (int) ((params.getSampleRate() * params.getOverlapMs())/1000);
     90 		seekWindowLength = (int) ((params.getSampleRate() * params.getSequenceMs())/1000);
     91 		seekLength = (int) ((params.getSampleRate() *  params.getSeekWindowMs())/1000);
     92 		
     93 		tempo = params.getTempo();
     94 		
     95 		//pMidBuffer and pRefBuffer are initialized with 8 times the needed length to prevent a reset
     96 		//of the arrays when overlapLength changes.
     97 		
     98 		if(overlapLength > oldOverlapLength * 8 && pMidBuffer==null){
     99 			pMidBuffer = new float[overlapLength * 8]; //overlapLengthx2?
    100 			pRefMidBuffer = new float[overlapLength * 8];//overlapLengthx2?
    101 			System.out.println("New overlapLength" + overlapLength);
    102 		}
    103 		
    104 		double nominalSkip = tempo * (seekWindowLength - overlapLength);
    105 		intskip = (int) (nominalSkip + 0.5);
    106 		
    107 		sampleReq = Math.max(intskip + overlapLength, seekWindowLength) + seekLength;
    108 		
    109 		float[] prevOutputBuffer = outputFloatBuffer;
    110 		outputFloatBuffer = new float[getOutputBufferSize()];
    111 		if(prevOutputBuffer!=null){
    112 			System.out.println("Copy outputFloatBuffer contents");
    113 			for(int i = 0 ; i < prevOutputBuffer.length && i < outputFloatBuffer.length ; i++){
    114 			 outputFloatBuffer[i] = prevOutputBuffer[i];
    115 			}
    116 		}
    117 		
    118 		newParameters = null;
    119 	}
    120 	
    121 	public int getInputBufferSize(){
    122 		return sampleReq;
    123 	}
    124 	
    125 	private int getOutputBufferSize(){
    126 		return seekWindowLength - overlapLength;
    127 	}
    128 	
    129 	public int getOverlap(){
    130 		return sampleReq-intskip;
    131 	}
    132 	
    133 	
    134 	/**
    135 	 * Overlaps the sample in output with the samples in input.
    136 	 * @param output The output buffer.
    137 	 * @param input The input buffer.
    138 	 */
    139 	private void overlap(final float[] output, int outputOffset, float[] input,int inputOffset){
    140 		for(int i = 0 ; i < overlapLength ; i++){
    141 			int itemp = overlapLength - i;
    142 			output[i + outputOffset] = (input[i + inputOffset] * i + pMidBuffer[i] * itemp ) / overlapLength;  
    143 		}
    144 	}
    145 	
    146 	
    147 	/**
    148 	 * Seeks for the optimal overlap-mixing position.
    149 	 * 
    150 	 * The best position is determined as the position where the two overlapped
    151 	 * sample sequences are 'most alike', in terms of the highest
    152 	 * cross-correlation value over the overlapping period
    153 	 * 
    154 	 * @param inputBuffer The input buffer
    155 	 * @param postion The position where to start the seek operation, in the input buffer. 
    156 	 * @return The best position.
    157 	 */
    158 	private int seekBestOverlapPosition(float[] inputBuffer, int postion) {
    159 		int bestOffset;
    160 		double bestCorrelation, currentCorrelation;
    161 		int tempOffset;
    162 
    163 		int comparePosition;
    164 
    165 		// Slopes the amplitude of the 'midBuffer' samples
    166 		precalcCorrReferenceMono();
    167 
    168 		bestCorrelation = -10;
    169 		bestOffset = 0;
    170 
    171 		// Scans for the best correlation value by testing each possible
    172 		// position
    173 		// over the permitted range.
    174 		for (tempOffset = 0; tempOffset < seekLength; tempOffset++) {
    175 
    176 			comparePosition = postion + tempOffset;
    177 
    178 			// Calculates correlation value for the mixing position
    179 			// corresponding
    180 			// to 'tempOffset'
    181 			currentCorrelation = (double) calcCrossCorr(pRefMidBuffer, inputBuffer,comparePosition);
    182 			// heuristic rule to slightly favor values close to mid of the
    183 			// range
    184 			double tmp = (double) (2 * tempOffset - seekLength) / seekLength;
    185 			currentCorrelation = ((currentCorrelation + 0.1) * (1.0 - 0.25 * tmp * tmp));
    186 
    187 			// Checks for the highest correlation value
    188 			if (currentCorrelation > bestCorrelation) {
    189 				bestCorrelation = currentCorrelation;
    190 				bestOffset = tempOffset;
    191 			}
    192 		}
    193 
    194 		return bestOffset;
    195 
    196 	}
    197 	
    198 	/**
    199 	* Slopes the amplitude of the 'midBuffer' samples so that cross correlation
    200 	* is faster to calculate. Why is this faster?
    201 	*/
    202 	void precalcCorrReferenceMono()
    203 	{
    204 	    for (int i = 0; i < overlapLength; i++){
    205 	    	float temp = i * (overlapLength - i);
    206 	        pRefMidBuffer[i] = pMidBuffer[i] * temp;
    207 	    }
    208 	}	
    209 
    210 	
    211 	double calcCrossCorr(float[] mixingPos, float[] compare, int offset){
    212 		double corr = 0;
    213 	    double norm = 0;
    214 	    for (int i = 1; i < overlapLength; i ++){
    215 	        corr += mixingPos[i] * compare[i + offset];
    216 	        norm += mixingPos[i] * mixingPos[i];
    217 	    }
    218 	    // To avoid division by zero.
    219 	    if (norm < 1e-8){
    220 	    	norm = 1.0;    
    221 	    }
    222 	    return corr / Math.pow(norm,0.5);
    223 	}
    224 	
    225 	
    226 	@Override
    227 	public boolean process(AudioEvent audioEvent) {
    228 		float[] audioFloatBuffer = audioEvent.getFloatBuffer();
    229 		assert audioFloatBuffer.length == getInputBufferSize();
    230 		
    231 		//Search for the best overlapping position.
    232 		int offset =  seekBestOverlapPosition(audioFloatBuffer,0);
    233 		
    234 		// Mix the samples in the 'inputBuffer' at position of 'offset' with the 
    235         // samples in 'midBuffer' using sliding overlapping
    236         // ... first partially overlap with the end of the previous sequence
    237         // (that's in 'midBuffer')
    238 		overlap(outputFloatBuffer,0,audioFloatBuffer,offset);
    239 			
    240 		//copy sequence samples from input to output			
    241 		int sequenceLength = seekWindowLength - 2 * overlapLength;
    242 		System.arraycopy(audioFloatBuffer, offset + overlapLength, outputFloatBuffer, overlapLength, sequenceLength);
    243 		
    244 	     // Copies the end of the current sequence from 'inputBuffer' to 
    245         // 'midBuffer' for being mixed with the beginning of the next 
    246         // processing sequence and so on
    247 		System.arraycopy(audioFloatBuffer, offset + sequenceLength + overlapLength, pMidBuffer, 0, overlapLength);
    248 		
    249 		assert outputFloatBuffer.length == getOutputBufferSize();
    250 		
    251 		audioEvent.setFloatBuffer(outputFloatBuffer);
    252 		audioEvent.setOverlap(0);
    253 		
    254 		if(newParameters!=null){
    255 			applyNewParameters();
    256 			dispatcher.setStepSizeAndOverlap(getInputBufferSize(),getOverlap());
    257 		}
    258 		
    259 		return true;
    260 	}
    261 
    262 	@Override
    263 	public void processingFinished() {
    264 		// NOOP
    265 	}
    266 
    267 
    268 	
    269 	/**
    270 	 * An object to encapsulate some of the parameters for
    271 	 *         WSOLA, together with a couple of practical helper functions.
    272 	 * 
    273 	 * @author Joren Six
    274 	 */
    275 	public static class Parameters {
    276 		private final int sequenceMs;
    277 		private final int seekWindowMs;
    278 		private final int overlapMs;
    279 		
    280 		private final double tempo;
    281 		private final double sampleRate;
    282 
    283 		/**
    284 		 * @param tempo
    285 		 *            The tempo change 1.0 means unchanged, 2.0 is + 100% , 0.5
    286 		 *            is half of the speed.
    287 		 * @param sampleRate
    288 		 *            The sample rate of the audio 44.1kHz is common.
    289 		 * @param newSequenceMs
    290 		 *            Length of a single processing sequence, in milliseconds.
    291 		 *            This determines to how long sequences the original sound
    292 		 *            is chopped in the time-stretch algorithm.
    293 		 * 
    294 		 *            The larger this value is, the lesser sequences are used in
    295 		 *            processing. In principle a bigger value sounds better when
    296 		 *            slowing down tempo, but worse when increasing tempo and
    297 		 *            vice versa.
    298 		 * 
    299 		 *            Increasing this value reduces computational burden & vice
    300 		 *            versa.
    301 		 * @param newSeekWindowMs
    302 		 *            Seeking window length in milliseconds for algorithm that
    303 		 *            finds the best possible overlapping location. This
    304 		 *            determines from how wide window the algorithm may look for
    305 		 *            an optimal joining location when mixing the sound
    306 		 *            sequences back together.
    307 		 * 
    308 		 *            The bigger this window setting is, the higher the
    309 		 *            possibility to find a better mixing position will become,
    310 		 *            but at the same time large values may cause a "drifting"
    311 		 *            artifact because consequent sequences will be taken at
    312 		 *            more uneven intervals.
    313 		 * 
    314 		 *            If there's a disturbing artifact that sounds as if a
    315 		 *            constant frequency was drifting around, try reducing this
    316 		 *            setting.
    317 		 * 
    318 		 *            Increasing this value increases computational burden &
    319 		 *            vice versa.
    320 		 * @param newOverlapMs
    321 		 *            Overlap length in milliseconds. When the chopped sound
    322 		 *            sequences are mixed back together, to form a continuous
    323 		 *            sound stream, this parameter defines over how long period
    324 		 *            the two consecutive sequences are let to overlap each
    325 		 *            other.
    326 		 * 
    327 		 *            This shouldn't be that critical parameter. If you reduce
    328 		 *            the DEFAULT_SEQUENCE_MS setting by a large amount, you
    329 		 *            might wish to try a smaller value on this.
    330 		 * 
    331 		 *            Increasing this value increases computational burden &
    332 		 *            vice versa.
    333 		 */
    334 		public Parameters(double tempo, double sampleRate, int newSequenceMs, int newSeekWindowMs, int newOverlapMs) {
    335 			this.tempo = tempo;
    336 			this.sampleRate = sampleRate;
    337 			this.overlapMs = newOverlapMs;
    338 			this.seekWindowMs = newSeekWindowMs;
    339 			this.sequenceMs = newSequenceMs;
    340 		}
    341 		
    342 		public static Parameters speechDefaults(double tempo, double sampleRate){
    343 			int sequenceMs = 40;
    344 			int seekWindowMs = 15;
    345 			int overlapMs = 12;
    346 			return new Parameters(tempo,sampleRate,sequenceMs, seekWindowMs,overlapMs);
    347 		}
    348 		
    349 		public static Parameters musicDefaults(double tempo, double sampleRate){
    350 			int sequenceMs = 82;
    351 			int seekWindowMs =  28;
    352 			int overlapMs = 12;
    353 			return new Parameters(tempo,sampleRate,sequenceMs, seekWindowMs,overlapMs);
    354 		}
    355 		
    356 		public static Parameters slowdownDefaults(double tempo, double sampleRate){
    357 			int sequenceMs = 100;
    358 			int seekWindowMs =  35;
    359 			int overlapMs = 20;
    360 			return new Parameters(tempo,sampleRate,sequenceMs, seekWindowMs,overlapMs);
    361 		}
    362 		
    363 		public static Parameters automaticDefaults(double tempo, double sampleRate){
    364 			double tempoLow = 0.5; // -50% speed
    365 			double tempoHigh = 2.0; // +100% speed
    366 			
    367 			double sequenceMsLow = 125; //ms
    368 			double sequenceMsHigh = 50; //ms
    369 			double sequenceK = ((sequenceMsHigh - sequenceMsLow) / (tempoHigh - tempoLow));
    370 			double sequenceC = sequenceMsLow - sequenceK * tempoLow;
    371 			
    372 			double seekLow = 25;// ms
    373 			double seekHigh = 15;// ms
    374 			double seekK =((seekHigh - seekLow) / (tempoHigh-tempoLow));
    375 			double seekC = seekLow - seekK * seekLow;
    376 			
    377 			int sequenceMs = (int) (sequenceC + sequenceK * tempo + 0.5);
    378 			int seekWindowMs =  (int) (seekC + seekK * tempo + 0.5);
    379 			int overlapMs = 12;
    380 			return new Parameters(tempo,sampleRate,sequenceMs, seekWindowMs,overlapMs);
    381 		}
    382 
    383 		public double getOverlapMs() {
    384 			return overlapMs;
    385 		}
    386 
    387 		public double getSequenceMs() {
    388 			return sequenceMs;
    389 		}
    390 
    391 		public double getSeekWindowMs() {
    392 			return seekWindowMs;
    393 		}
    394 		
    395 		public double getSampleRate() {
    396 			return sampleRate;
    397 		}
    398 		
    399 		public double getTempo(){
    400 			return tempo;
    401 		}
    402 	}
    403 }