TarsosDSPAudioFormat.java (24711B)
1 /* 2 * _______ _____ _____ _____ 3 * |__ __| | __ \ / ____| __ \ 4 * | | __ _ _ __ ___ ___ ___| | | | (___ | |__) | 5 * | |/ _` | '__/ __|/ _ \/ __| | | |\___ \| ___/ 6 * | | (_| | | \__ \ (_) \__ \ |__| |____) | | 7 * |_|\__,_|_| |___/\___/|___/_____/|_____/|_| 8 * 9 * ------------------------------------------------------------- 10 * 11 * TarsosDSP is developed by Joren Six at IPEM, University Ghent 12 * 13 * ------------------------------------------------------------- 14 * 15 * Info: http://0110.be/tag/TarsosDSP 16 * Github: https://github.com/JorenSix/TarsosDSP 17 * Releases: http://0110.be/releases/TarsosDSP/ 18 * 19 * TarsosDSP includes modified source code by various authors, 20 * for credits and info, see README. 21 * 22 */ 23 24 package be.tarsos.dsp.io; 25 26 /* 27 * Copyright (c) 1999, 2007, Oracle and/or its affiliates. All rights reserved. 28 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 29 * 30 * This code is free software; you can redistribute it and/or modify it 31 * under the terms of the GNU General Public License version 2 only, as 32 * published by the Free Software Foundation. Oracle designates this 33 * particular file as subject to the "Classpath" exception as provided 34 * by Oracle in the LICENSE file that accompanied this code. 35 * 36 * This code is distributed in the hope that it will be useful, but WITHOUT 37 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 38 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 39 * version 2 for more details (a copy is included in the LICENSE file that 40 * accompanied this code). 41 * 42 * You should have received a copy of the GNU General Public License version 43 * 2 along with this work; if not, write to the Free Software Foundation, 44 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 45 * 46 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 47 * or visit www.oracle.com if you need additional information or have any 48 * questions. 49 */ 50 51 import java.util.Collections; 52 import java.util.HashMap; 53 import java.util.Map; 54 55 /** 56 * <code>AudioFormat</code> is the class that specifies a particular arrangement of data in a sound stream. 57 * By examing the information stored in the audio format, you can discover how to interpret the bits in the 58 * binary sound data. 59 * <p> 60 * Every data LineWavelet has an audio format associated with its data stream. The audio format of a source (playback) data LineWavelet indicates 61 * what kind of data the data LineWavelet expects to receive for output. For a target (capture) data LineWavelet, the audio format specifies the kind 62 * of the data that can be read from the LineWavelet. 63 * Sound files also have audio formats, of course. 64 * <p> 65 * The <code>AudioFormat</code> class accommodates a number of common sound-file encoding techniques, including 66 * pulse-code modulation (PCM), mu-law encoding, and a-law encoding. These encoding techniques are predefined, 67 * but service providers can create new encoding types. 68 * The encoding that a specific format uses is named by its <code>encoding</code> field. 69 *<p> 70 * In addition to the encoding, the audio format includes other properties that further specify the exact 71 * arrangement of the data. 72 * These include the number of channels, sample rate, sample size, byte order, frame rate, and frame size. 73 * Sounds may have different numbers of audio channels: one for mono, two for stereo. 74 * The sample rate measures how many "snapshots" (samples) of the sound pressure are taken per second, per channel. 75 * (If the sound is stereo rather than mono, two samples are actually measured at each instant of time: one for the left channel, 76 * and another for the right channel; however, the sample rate still measures the number per channel, so the rate is the same 77 * regardless of the number of channels. This is the standard use of the term.) 78 * The sample size indicates how many bits are used to store each snapshot; 8 and 16 are typical values. 79 * For 16-bit samples (or any other sample size larger than a byte), 80 * byte order is important; the bytes in each sample are arranged in 81 * either the "little-endian" or "big-endian" style. 82 * For encodings like PCM, a frame consists of the set of samples for all channels at a given 83 * point in time, and so the size of a frame (in bytes) is always equal to the size of a sample (in bytes) times 84 * the number of channels. However, with some other sorts of encodings a frame can contain 85 * a bundle of compressed data for a whole series of samples, as well as additional, non-sample 86 * data. For such encodings, the sample rate and sample size refer to the data after it is decoded into PCM, 87 * and so they are completely different from the frame rate and frame size. 88 * 89 * <p>An <code>AudioFormat</code> object can include a set of 90 * properties. A property is a pair of key and value: the key 91 * is of type <code>String</code>, the associated property 92 * value is an arbitrary object. Properties specify 93 * additional format specifications, like the bit rate for 94 * compressed formats. Properties are mainly used as a means 95 * to transport additional information of the audio format 96 * to and from the service providers. Therefore, properties 97 * are ignored in the AudioFormat method. 98 * 99 * <p>The following table lists some common properties which 100 * service providers should use, if applicable: 101 * 102 * <table border=0> 103 * <tr> 104 * <th>Property key</th> 105 * <th>Value type</th> 106 * <th>Description</th> 107 * </tr> 108 * <tr> 109 * <td>"bitrate"</td> 110 * <td>{@link java.lang.Integer Integer}</td> 111 * <td>average bit rate in bits per second</td> 112 * </tr> 113 * <tr> 114 * <td>"vbr"</td> 115 * <td>{@link java.lang.Boolean Boolean}</td> 116 * <td><code>true</code>, if the file is encoded in variable bit 117 * rate (VBR)</td> 118 * </tr> 119 * <tr> 120 * <td>"quality"</td> 121 * <td>{@link java.lang.Integer Integer}</td> 122 * <td>encoding/conversion quality, 1..100</td> 123 * </tr> 124 * </table> 125 * 126 * <p>Vendors of service providers (plugins) are encouraged 127 * to seek information about other already established 128 * properties in third party plugins, and follow the same 129 * conventions. 130 * 131 * @author Kara Kytle 132 * @author Florian Bomers 133 * @since 1.3 134 */ 135 public class TarsosDSPAudioFormat { 136 137 // INSTANCE VARIABLES 138 139 140 /** 141 * The audio encoding technique used by this format. 142 */ 143 protected Encoding encoding; 144 145 /** 146 * The number of samples played or recorded per second, for sounds that have this format. 147 */ 148 protected float sampleRate; 149 150 /** 151 * The number of bits in each sample of a sound that has this format. 152 */ 153 protected int sampleSizeInBits; 154 155 /** 156 * The number of audio channels in this format (1 for mono, 2 for stereo). 157 */ 158 protected int channels; 159 160 /** 161 * The number of bytes in each frame of a sound that has this format. 162 */ 163 protected int frameSize; 164 165 /** 166 * The number of frames played or recorded per second, for sounds that have this format. 167 */ 168 protected float frameRate; 169 170 /** 171 * Indicates whether the audio data is stored in big-endian or little-endian order. 172 */ 173 protected boolean bigEndian; 174 175 176 /** The set of properties */ 177 private HashMap<String, Object> properties; 178 179 public static final int NOT_SPECIFIED = -1; 180 181 182 /** 183 * Constructs an <code>AudioFormat</code> with the given parameters. 184 * The encoding specifies the convention used to represent the data. 185 * The other parameters are further explained in the 186 * @param encoding the audio encoding technique 187 * @param sampleRate the number of samples per second 188 * @param sampleSizeInBits the number of bits in each sample 189 * @param channels the number of channels (1 for mono, 2 for stereo, and so on) 190 * @param frameSize the number of bytes in each frame 191 * @param frameRate the number of frames per second 192 * @param bigEndian indicates whether the data for a single sample 193 * is stored in big-endian byte order (<code>false</code> 194 * means little-endian) 195 */ 196 public TarsosDSPAudioFormat(Encoding encoding, float sampleRate, int sampleSizeInBits, 197 int channels, int frameSize, float frameRate, boolean bigEndian) { 198 199 this.encoding = encoding; 200 this.sampleRate = sampleRate; 201 this.sampleSizeInBits = sampleSizeInBits; 202 this.channels = channels; 203 this.frameSize = frameSize; 204 this.frameRate = frameRate; 205 this.bigEndian = bigEndian; 206 this.properties = null; 207 } 208 209 210 /** 211 * Constructs an <code>AudioFormat</code> with the given parameters. 212 * The encoding specifies the convention used to represent the data. 213 * The other parameters are further explained in the 214 * @param encoding the audio encoding technique 215 * @param sampleRate the number of samples per second 216 * @param sampleSizeInBits the number of bits in each sample 217 * @param channels the number of channels (1 for mono, 2 for 218 * stereo, and so on) 219 * @param frameSize the number of bytes in each frame 220 * @param frameRate the number of frames per second 221 * @param bigEndian indicates whether the data for a single sample 222 * is stored in big-endian byte order 223 * (<code>false</code> means little-endian) 224 * @param properties a <code>Map<String,Object></code> object 225 * containing format properties 226 * 227 * @since 1.5 228 */ 229 public TarsosDSPAudioFormat(Encoding encoding, float sampleRate, 230 int sampleSizeInBits, int channels, 231 int frameSize, float frameRate, 232 boolean bigEndian, Map<String, Object> properties) { 233 this(encoding, sampleRate, sampleSizeInBits, channels, 234 frameSize, frameRate, bigEndian); 235 this.properties = new HashMap<String, Object>(properties); 236 } 237 238 239 /** 240 * Constructs an <code>AudioFormat</code> with a linear PCM encoding and 241 * the given parameters. The frame size is set to the number of bytes 242 * required to contain one sample from each channel, and the frame rate 243 * is set to the sample rate. 244 * 245 * @param sampleRate the number of samples per second 246 * @param sampleSizeInBits the number of bits in each sample 247 * @param channels the number of channels (1 for mono, 2 for stereo, and so on) 248 * @param signed indicates whether the data is signed or unsigned 249 * @param bigEndian indicates whether the data for a single sample 250 * is stored in big-endian byte order (<code>false</code> 251 * means little-endian) 252 */ 253 public TarsosDSPAudioFormat(float sampleRate, int sampleSizeInBits, 254 int channels, boolean signed, boolean bigEndian) { 255 256 this((signed == true ? Encoding.PCM_SIGNED : Encoding.PCM_UNSIGNED), 257 sampleRate, 258 sampleSizeInBits, 259 channels, 260 (channels == NOT_SPECIFIED || sampleSizeInBits == NOT_SPECIFIED)? 261 NOT_SPECIFIED: 262 ((sampleSizeInBits + 7) / 8) * channels, 263 sampleRate, 264 bigEndian); 265 } 266 267 /** 268 * Obtains the type of encoding for sounds in this format. 269 * 270 * @return the encoding type 271 * @see Encoding#PCM_SIGNED 272 * @see Encoding#PCM_UNSIGNED 273 * @see Encoding#ULAW 274 * @see Encoding#ALAW 275 */ 276 public Encoding getEncoding() { 277 278 return encoding; 279 } 280 281 /** 282 * Obtains the sample rate. 283 * For compressed formats, the return value is the sample rate of the uncompressed 284 * audio data. 285 * When this AudioFormat is used for queries capabilities , a sample rate of 286 * <code>AudioSystem.NOT_SPECIFIED</code> means that any sample rate is 287 * acceptable. <code>AudioSystem.NOT_SPECIFIED</code> is also returned when 288 * the sample rate is not defined for this audio format. 289 * @return the number of samples per second, 290 * or <code>AudioSystem.NOT_SPECIFIED</code> 291 * 292 * @see #getFrameRate() 293 */ 294 public float getSampleRate() { 295 296 return sampleRate; 297 } 298 299 /** 300 * Obtains the size of a sample. 301 * For compressed formats, the return value is the sample size of the 302 * uncompressed audio data. 303 * When this AudioFormat is used for queries or capabilities , a sample size of 304 * <code>AudioSystem.NOT_SPECIFIED</code> means that any sample size is 305 * acceptable. <code>AudioSystem.NOT_SPECIFIED</code> is also returned when 306 * the sample size is not defined for this audio format. 307 * @return the number of bits in each sample, 308 * or <code>AudioSystem.NOT_SPECIFIED</code> 309 * 310 * @see #getFrameSize() 311 */ 312 public int getSampleSizeInBits() { 313 314 return sampleSizeInBits; 315 } 316 317 /** 318 * Obtains the number of channels. 319 * When this AudioFormat is used for queries or capabilities , a return value of 320 * <code>AudioSystem.NOT_SPECIFIED</code> means that any (positive) number of channels is 321 * acceptable. 322 * @return The number of channels (1 for mono, 2 for stereo, etc.), 323 * or <code>AudioSystem.NOT_SPECIFIED</code> 324 * 325 */ 326 public int getChannels() { 327 328 return channels; 329 } 330 331 /** 332 * Obtains the frame size in bytes. 333 * When this AudioFormat is used for queries or capabilities, a frame size of 334 * <code>AudioSystem.NOT_SPECIFIED</code> means that any frame size is 335 * acceptable. <code>AudioSystem.NOT_SPECIFIED</code> is also returned when 336 * the frame size is not defined for this audio format. 337 * @return the number of bytes per frame, 338 * or <code>AudioSystem.NOT_SPECIFIED</code> 339 * 340 * @see #getSampleSizeInBits() 341 */ 342 public int getFrameSize() { 343 344 return frameSize; 345 } 346 347 /** 348 * Obtains the frame rate in frames per second. 349 * When this AudioFormat is used for queries or capabilities , a frame rate of 350 * <code>AudioSystem.NOT_SPECIFIED</code> means that any frame rate is 351 * acceptable. <code>AudioSystem.NOT_SPECIFIED</code> is also returned when 352 * the frame rate is not defined for this audio format. 353 * @return the number of frames per second, 354 * or <code>AudioSystem.NOT_SPECIFIED</code> 355 * 356 * @see #getSampleRate() 357 */ 358 public float getFrameRate() { 359 360 return frameRate; 361 } 362 363 364 /** 365 * Indicates whether the audio data is stored in big-endian or little-endian 366 * byte order. If the sample size is not more than one byte, the return value is 367 * irrelevant. 368 * @return <code>true</code> if the data is stored in big-endian byte order, 369 * <code>false</code> if little-endian 370 */ 371 public boolean isBigEndian() { 372 373 return bigEndian; 374 } 375 376 377 /** 378 * Obtain an unmodifiable map of properties. 379 * The concept of properties is further explained in 380 * the. 381 * 382 * @return a <code>Map<String,Object></code> object containing 383 * all properties. If no properties are recognized, an empty map is 384 * returned. 385 * 386 * @see #getProperty(String) 387 * @since 1.5 388 */ 389 @SuppressWarnings("unchecked") 390 public Map<String,Object> properties() { 391 Map<String,Object> ret; 392 if (properties == null) { 393 ret = new HashMap<String,Object>(0); 394 } else { 395 ret = (Map<String,Object>) (properties.clone()); 396 } 397 return (Map<String,Object>) Collections.unmodifiableMap(ret); 398 } 399 400 401 /** 402 * Obtain the property value specified by the key. 403 * The concept of properties is further explained in 404 * the. 405 * 406 * <p>If the specified property is not defined for a 407 * particular file format, this method returns 408 * <code>null</code>. 409 * 410 * @param key the key of the desired property 411 * @return the value of the property with the specified key, 412 * or <code>null</code> if the property does not exist. 413 * 414 * @see #properties() 415 * @since 1.5 416 */ 417 public Object getProperty(String key) { 418 if (properties == null) { 419 return null; 420 } 421 return properties.get(key); 422 } 423 424 425 /** 426 * Indicates whether this format matches the one specified. To match, 427 * two formats must have the same encoding, the same number of channels, 428 * and the same number of bits per sample and bytes per frame. 429 * The two formats must also have the same sample rate, 430 * unless the specified format has the sample rate value <code>AudioSystem.NOT_SPECIFIED</code>, 431 * which any sample rate will match. The frame rates must 432 * similarly be equal, unless the specified format has the frame rate 433 * value <code>AudioSystem.NOT_SPECIFIED</code>. The byte order (big-endian or little-endian) 434 * must match if the sample size is greater than one byte. 435 * 436 * @param format format to test for match 437 * @return <code>true</code> if this format matches the one specified, 438 * <code>false</code> otherwise. 439 */ 440 /* 441 * $$kk: 04.20.99: i changed the semantics of this. 442 */ 443 public boolean matches(TarsosDSPAudioFormat format) { 444 445 if (format.getEncoding().equals(getEncoding()) && 446 ( (format.getSampleRate() == (float)NOT_SPECIFIED) || (format.getSampleRate() == getSampleRate()) ) && 447 (format.getSampleSizeInBits() == getSampleSizeInBits()) && 448 (format.getChannels() == getChannels() && 449 (format.getFrameSize() == getFrameSize()) && 450 ( (format.getFrameRate() == (float)NOT_SPECIFIED) || (format.getFrameRate() == getFrameRate()) ) && 451 ( (format.getSampleSizeInBits() <= 8) || (format.isBigEndian() == isBigEndian()) ) ) ) 452 return true; 453 454 return false; 455 } 456 457 458 /** 459 * Returns a string that describes the format, such as: 460 * "PCM SIGNED 22050 Hz 16 bit mono big-endian". The contents of the string 461 * may vary between implementations of Java Sound. 462 * 463 * @return a string that describes the format parameters 464 */ 465 public String toString() { 466 String sEncoding = ""; 467 if (getEncoding() != null) { 468 sEncoding = getEncoding().toString() + " "; 469 } 470 471 String sSampleRate; 472 if (getSampleRate() == (float) NOT_SPECIFIED) { 473 sSampleRate = "unknown sample rate, "; 474 } else { 475 sSampleRate = "" + getSampleRate() + " Hz, "; 476 } 477 478 String sSampleSizeInBits; 479 if (getSampleSizeInBits() == (float) NOT_SPECIFIED) { 480 sSampleSizeInBits = "unknown bits per sample, "; 481 } else { 482 sSampleSizeInBits = "" + getSampleSizeInBits() + " bit, "; 483 } 484 485 String sChannels; 486 if (getChannels() == 1) { 487 sChannels = "mono, "; 488 } else 489 if (getChannels() == 2) { 490 sChannels = "stereo, "; 491 } else { 492 if (getChannels() == NOT_SPECIFIED) { 493 sChannels = " unknown number of channels, "; 494 } else { 495 sChannels = ""+getChannels()+" channels, "; 496 } 497 } 498 499 String sFrameSize; 500 if (getFrameSize() == (float) NOT_SPECIFIED) { 501 sFrameSize = "unknown frame size, "; 502 } else { 503 sFrameSize = "" + getFrameSize()+ " bytes/frame, "; 504 } 505 506 String sFrameRate = ""; 507 if (Math.abs(getSampleRate() - getFrameRate()) > 0.00001) { 508 if (getFrameRate() == (float) NOT_SPECIFIED) { 509 sFrameRate = "unknown frame rate, "; 510 } else { 511 sFrameRate = getFrameRate() + " frames/second, "; 512 } 513 } 514 515 String sEndian = ""; 516 if ((getEncoding().equals(Encoding.PCM_SIGNED) 517 || getEncoding().equals(Encoding.PCM_UNSIGNED)) 518 && ((getSampleSizeInBits() > 8) 519 || (getSampleSizeInBits() == NOT_SPECIFIED))) { 520 if (isBigEndian()) { 521 sEndian = "big-endian"; 522 } else { 523 sEndian = "little-endian"; 524 } 525 } 526 527 return sEncoding 528 + sSampleRate 529 + sSampleSizeInBits 530 + sChannels 531 + sFrameSize 532 + sFrameRate 533 + sEndian; 534 535 } 536 537 /** 538 * The <code>Encoding</code> class names the specific type of data representation 539 * used for an audio stream. The encoding includes aspects of the 540 * sound format other than the number of channels, sample rate, sample size, 541 * frame rate, frame size, and byte order. 542 * <p> 543 * One ubiquitous type of audio encoding is pulse-code modulation (PCM), 544 * which is simply a linear (proportional) representation of the sound 545 * waveform. With PCM, the number stored in each sample is proportional 546 * to the instantaneous amplitude of the sound pressure at that point in 547 * time. The numbers are frequently signed or unsigned integers. 548 * Besides PCM, other encodings include mu-law and a-law, which are nonlinear 549 * mappings of the sound amplitude that are often used for recording speech. 550 * <p> 551 * You can use a predefined encoding by referring to one of the static 552 * objects created by this class, such as PCM_SIGNED or 553 * PCM_UNSIGNED. Service providers can create new encodings, such as 554 * compressed audio formats or floating-point PCM samples, and make 555 * these available through the <code>AudioSystem</code> class. 556 * <p> 557 * The <code>Encoding</code> class is static, so that all 558 * <code>AudioFormat</code> objects that have the same encoding will refer 559 * to the same object (rather than different instances of the same class). 560 * This allows matches to be made by checking that two format's encodings 561 * are equal. 562 * 563 * @author Kara Kytle 564 * @since 1.3 565 */ 566 public static class Encoding { 567 568 569 // ENCODING DEFINES 570 571 /** 572 * Specifies signed, linear PCM data. 573 */ 574 public static final Encoding PCM_SIGNED = new Encoding("PCM_SIGNED"); 575 576 /** 577 * Specifies unsigned, linear PCM data. 578 */ 579 public static final Encoding PCM_UNSIGNED = new Encoding("PCM_UNSIGNED"); 580 581 /** 582 * Specifies u-law encoded data. 583 */ 584 public static final Encoding ULAW = new Encoding("ULAW"); 585 586 /** 587 * Specifies a-law encoded data. 588 */ 589 public static final Encoding ALAW = new Encoding("ALAW"); 590 591 592 // INSTANCE VARIABLES 593 594 /** 595 * Encoding name. 596 */ 597 private String name; 598 599 600 // CONSTRUCTOR 601 602 /** 603 * Constructs a new encoding. 604 * @param name the name of the new type of encoding 605 */ 606 public Encoding(String name) { 607 this.name = name; 608 } 609 610 611 // METHODS 612 613 /** 614 * Finalizes the equals method 615 */ 616 public final boolean equals(Object obj) { 617 if (toString() == null) { 618 return (obj != null) && (obj.toString() == null); 619 } 620 if (obj instanceof Encoding) { 621 return toString().equals(obj.toString()); 622 } 623 return false; 624 } 625 626 /** 627 * Finalizes the hashCode method 628 */ 629 public final int hashCode() { 630 if (toString() == null) { 631 return 0; 632 } 633 return toString().hashCode(); 634 } 635 636 /** 637 * Provides the <code>String</code> representation of the encoding. This <code>String</code> is 638 * the same name that was passed to the constructor. For the predefined encodings, the name 639 * is similar to the encoding's variable (field) name. For example, <code>PCM_SIGNED.toString()</code> returns 640 * the name "pcm_signed". 641 * 642 * @return the encoding name 643 */ 644 public final String toString() { 645 return name; 646 } 647 648 } // class Encoding 649 }