Add support for IMA ADPCM in WAV

PiperOrigin-RevId: 287854701
This commit is contained in:
olly 2020-01-02 18:19:41 +00:00 committed by Oliver Woodman
parent f0e0ee421f
commit 826083db92
9 changed files with 526 additions and 22 deletions

View File

@ -36,6 +36,7 @@
([#6602](https://github.com/google/ExoPlayer/issues/6602)).
* Support "twos" codec (big endian PCM) in MP4
([#5789](https://github.com/google/ExoPlayer/issues/5789)).
* WAV: Support IMA ADPCM encoded data.
### 2.11.1 (2019-12-20) ###

View File

@ -32,15 +32,17 @@ public final class WavUtil {
public static final int DATA_FOURCC = 0x64617461;
/** WAVE type value for integer PCM audio data. */
private static final int TYPE_PCM = 0x0001;
public static final int TYPE_PCM = 0x0001;
/** WAVE type value for float PCM audio data. */
private static final int TYPE_FLOAT = 0x0003;
public static final int TYPE_FLOAT = 0x0003;
/** WAVE type value for 8-bit ITU-T G.711 A-law audio data. */
private static final int TYPE_A_LAW = 0x0006;
public static final int TYPE_A_LAW = 0x0006;
/** WAVE type value for 8-bit ITU-T G.711 mu-law audio data. */
private static final int TYPE_MU_LAW = 0x0007;
public static final int TYPE_MU_LAW = 0x0007;
/** WAVE type value for IMA ADPCM audio data. */
public static final int TYPE_IMA_ADPCM = 0x0011;
/** WAVE type value for extended WAVE format. */
private static final int TYPE_WAVE_FORMAT_EXTENSIBLE = 0xFFFE;
public static final int TYPE_WAVE_FORMAT_EXTENSIBLE = 0xFFFE;
/**
* Returns the WAVE format type value for the given {@link C.PcmEncoding}.

View File

@ -28,6 +28,7 @@ import com.google.android.exoplayer2.extractor.PositionHolder;
import com.google.android.exoplayer2.extractor.TrackOutput;
import com.google.android.exoplayer2.util.Assertions;
import com.google.android.exoplayer2.util.MimeTypes;
import com.google.android.exoplayer2.util.ParsableByteArray;
import com.google.android.exoplayer2.util.Util;
import java.io.IOException;
@ -91,12 +92,16 @@ public final class WavExtractor implements Extractor {
throw new ParserException("Unsupported or unrecognized wav header.");
}
@C.PcmEncoding
int pcmEncoding = WavUtil.getPcmEncodingForType(header.formatType, header.bitsPerSample);
if (pcmEncoding == C.ENCODING_INVALID) {
throw new ParserException("Unsupported WAV format type: " + header.formatType);
if (header.formatType == WavUtil.TYPE_IMA_ADPCM) {
outputWriter = new ImaAdPcmOutputWriter(extractorOutput, trackOutput, header);
} else {
@C.PcmEncoding
int pcmEncoding = WavUtil.getPcmEncodingForType(header.formatType, header.bitsPerSample);
if (pcmEncoding == C.ENCODING_INVALID) {
throw new ParserException("Unsupported WAV format type: " + header.formatType);
}
outputWriter = new PcmOutputWriter(extractorOutput, trackOutput, header, pcmEncoding);
}
outputWriter = new PcmOutputWriter(extractorOutput, trackOutput, header, pcmEncoding);
}
if (dataStartPosition == C.POSITION_UNSET) {
@ -156,11 +161,22 @@ public final class WavExtractor implements Extractor {
private final TrackOutput trackOutput;
private final WavHeader header;
private final @C.PcmEncoding int pcmEncoding;
private final int targetSampleSize;
/** The target size of each output sample, in bytes. */
private final int targetSampleSizeBytes;
/** The time at which the writer was last {@link #reset}. */
private long startTimeUs;
/**
* The number of bytes that have been written to {@link #trackOutput} but have yet to be
* included as part of a sample (i.e. the corresponding call to {@link
* TrackOutput#sampleMetadata} has yet to be made).
*/
private int pendingOutputBytes;
/**
* The total number of frames in samples that have been written to the trackOutput since the
* last call to {@link #reset}.
*/
private long outputFrameCount;
private int pendingBytes;
public PcmOutputWriter(
ExtractorOutput extractorOutput,
@ -173,15 +189,15 @@ public final class WavExtractor implements Extractor {
this.pcmEncoding = pcmEncoding;
// For PCM blocks correspond to single frames. This is validated in init(int, long).
int bytesPerFrame = header.blockSize;
targetSampleSize =
targetSampleSizeBytes =
Math.max(bytesPerFrame, header.frameRateHz * bytesPerFrame / TARGET_SAMPLES_PER_SECOND);
}
@Override
public void reset(long timeUs) {
startTimeUs = timeUs;
pendingOutputBytes = 0;
outputFrameCount = 0;
pendingBytes = 0;
}
@Override
@ -204,7 +220,7 @@ public final class WavExtractor implements Extractor {
MimeTypes.AUDIO_RAW,
/* codecs= */ null,
/* bitrate= */ header.frameRateHz * bytesPerFrame * 8,
targetSampleSize,
/* maxInputSize= */ targetSampleSizeBytes,
header.numChannels,
header.frameRateHz,
pcmEncoding,
@ -220,34 +236,298 @@ public final class WavExtractor implements Extractor {
throws IOException, InterruptedException {
// Write sample data until we've reached the target sample size, or the end of the data.
boolean endOfSampleData = bytesLeft == 0;
while (!endOfSampleData && pendingBytes < targetSampleSize) {
int bytesToRead = (int) Math.min(targetSampleSize - pendingBytes, bytesLeft);
while (!endOfSampleData && pendingOutputBytes < targetSampleSizeBytes) {
int bytesToRead = (int) Math.min(targetSampleSizeBytes - pendingOutputBytes, bytesLeft);
int bytesAppended = trackOutput.sampleData(input, bytesToRead, true);
if (bytesAppended == RESULT_END_OF_INPUT) {
endOfSampleData = true;
} else {
pendingBytes += bytesAppended;
pendingOutputBytes += bytesAppended;
}
}
// Write the corresponding sample metadata. Samples must be a whole number of frames. It's
// possible pendingBytes is not a whole number of frames if the stream ended unexpectedly.
// possible that the number of pending output bytes is not a whole number of frames if the
// stream ended unexpectedly.
int bytesPerFrame = header.blockSize;
int pendingFrames = pendingBytes / bytesPerFrame;
int pendingFrames = pendingOutputBytes / bytesPerFrame;
if (pendingFrames > 0) {
long timeUs =
startTimeUs
+ Util.scaleLargeTimestamp(
outputFrameCount, C.MICROS_PER_SECOND, header.frameRateHz);
int size = pendingFrames * bytesPerFrame;
int offset = pendingBytes - size;
int offset = pendingOutputBytes - size;
trackOutput.sampleMetadata(
timeUs, C.BUFFER_FLAG_KEY_FRAME, size, offset, /* encryptionData= */ null);
outputFrameCount += pendingFrames;
pendingBytes = offset;
pendingOutputBytes = offset;
}
return endOfSampleData;
}
}
private static final class ImaAdPcmOutputWriter implements OutputWriter {
private static final int[] INDEX_TABLE = {
-1, -1, -1, -1, 2, 4, 6, 8, -1, -1, -1, -1, 2, 4, 6, 8
};
private static final int[] STEP_TABLE = {
7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 19, 21, 23, 25, 28, 31, 34, 37, 41, 45, 50, 55, 60, 66,
73, 80, 88, 97, 107, 118, 130, 143, 157, 173, 190, 209, 230, 253, 279, 307, 337, 371, 408,
449, 494, 544, 598, 658, 724, 796, 876, 963, 1060, 1166, 1282, 1411, 1552, 1707, 1878, 2066,
2272, 2499, 2749, 3024, 3327, 3660, 4026, 4428, 4871, 5358, 5894, 6484, 7132, 7845, 8630,
9493, 10442, 11487, 12635, 13899, 15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794,
32767
};
private final ExtractorOutput extractorOutput;
private final TrackOutput trackOutput;
private final WavHeader header;
/** The target size of each output sample, in frames. */
private final int targetSampleSizeFrames;
// Properties of the input (yet to be decoded) data.
private int framesPerBlock;
private byte[] inputData;
private int pendingInputBytes;
// Target for decoded (yet to be output) data.
private ParsableByteArray decodedData;
// Properties of the output.
/** The time at which the writer was last {@link #reset}. */
private long startTimeUs;
/**
* The number of bytes that have been written to {@link #trackOutput} but have yet to be
* included as part of a sample (i.e. the corresponding call to {@link
* TrackOutput#sampleMetadata} has yet to be made).
*/
private int pendingOutputBytes;
/**
* The total number of frames in samples that have been written to the trackOutput since the
* last call to {@link #reset}.
*/
private long outputFrameCount;
public ImaAdPcmOutputWriter(
ExtractorOutput extractorOutput, TrackOutput trackOutput, WavHeader header) {
this.extractorOutput = extractorOutput;
this.trackOutput = trackOutput;
this.header = header;
targetSampleSizeFrames = Math.max(1, header.frameRateHz / TARGET_SAMPLES_PER_SECOND);
}
@Override
public void reset(long timeUs) {
// Reset the input side.
pendingInputBytes = 0;
// Reset the output side.
startTimeUs = timeUs;
pendingOutputBytes = 0;
outputFrameCount = 0;
}
@Override
public void init(int dataStartPosition, long dataEndPosition) throws ParserException {
// Validate the header.
ParsableByteArray scratch = new ParsableByteArray(header.extraData);
scratch.readLittleEndianUnsignedShort();
framesPerBlock = scratch.readLittleEndianUnsignedShort();
// This calculation is defined in "Microsoft Multimedia Standards Update - New Multimedia
// Types and Data Techniques" (1994). See the "IMA ADPCM Wave Type" and
// "DVI ADPCM Wave Type" sections, and the calculation of wSamplesPerBlock in the latter.
int numChannels = header.numChannels;
int expectedFramesPerBlock =
(((header.blockSize - (4 * numChannels)) * 8) / (header.bitsPerSample * numChannels)) + 1;
if (framesPerBlock != expectedFramesPerBlock) {
throw new ParserException(
"Expected frames per block: " + expectedFramesPerBlock + "; got: " + framesPerBlock);
}
// Calculate the number of blocks we'll need to decode to obtain an output sample of the
// target sample size, and allocate suitably sized buffers for input and decoded data.
int maxBlocksToDecode = Util.ceilDivide(targetSampleSizeFrames, framesPerBlock);
inputData = new byte[maxBlocksToDecode * header.blockSize];
decodedData =
new ParsableByteArray(maxBlocksToDecode * numOutputFramesToBytes(framesPerBlock));
// Output the seek map.
extractorOutput.seekMap(
new WavSeekMap(header, framesPerBlock, dataStartPosition, dataEndPosition));
// Output the format. We calculate the bitrate of the data before decoding, since this is the
// bitrate of the stream itself.
int bitrate = header.frameRateHz * header.blockSize * 8 / framesPerBlock;
Format format =
Format.createAudioSampleFormat(
/* id= */ null,
MimeTypes.AUDIO_RAW,
/* codecs= */ null,
bitrate,
/* maxInputSize= */ numOutputFramesToBytes(targetSampleSizeFrames),
header.numChannels,
header.frameRateHz,
C.ENCODING_PCM_16BIT,
/* initializationData= */ null,
/* drmInitData= */ null,
/* selectionFlags= */ 0,
/* language= */ null);
trackOutput.format(format);
}
@Override
public boolean sampleData(ExtractorInput input, long bytesLeft)
throws IOException, InterruptedException {
// Calculate the number of additional frames that we need on the output side to complete a
// sample of the target size.
int targetFramesRemaining =
targetSampleSizeFrames - numOutputBytesToFrames(pendingOutputBytes);
// Calculate the whole number of blocks that we need to decode to obtain this many frames.
int blocksToDecode = Util.ceilDivide(targetFramesRemaining, framesPerBlock);
int targetReadBytes = blocksToDecode * header.blockSize;
// Read input data until we've reached the target number of blocks, or the end of the data.
boolean endOfSampleData = bytesLeft == 0;
while (!endOfSampleData && pendingInputBytes < targetReadBytes) {
int bytesToRead = (int) Math.min(targetReadBytes - pendingInputBytes, bytesLeft);
int bytesAppended = input.read(inputData, pendingInputBytes, bytesToRead);
if (bytesAppended == RESULT_END_OF_INPUT) {
endOfSampleData = true;
} else {
pendingInputBytes += bytesAppended;
}
}
int pendingBlockCount = pendingInputBytes / header.blockSize;
if (pendingBlockCount > 0) {
// We have at least one whole block to decode.
decode(inputData, pendingBlockCount, decodedData);
pendingInputBytes -= pendingBlockCount * header.blockSize;
// Write all of the decoded data to the track output.
int decodedDataSize = decodedData.limit();
trackOutput.sampleData(decodedData, decodedDataSize);
pendingOutputBytes += decodedDataSize;
// Output the next sample at the target size.
int pendingOutputFrames = numOutputBytesToFrames(pendingOutputBytes);
if (pendingOutputFrames >= targetSampleSizeFrames) {
writeSampleMetadata(targetSampleSizeFrames);
}
}
// If we've reached the end of the data, we might need to output a final partial sample.
if (endOfSampleData) {
int pendingOutputFrames = numOutputBytesToFrames(pendingOutputBytes);
if (pendingOutputFrames > 0) {
writeSampleMetadata(pendingOutputFrames);
}
}
return endOfSampleData;
}
private void writeSampleMetadata(int sampleFrames) {
long timeUs =
startTimeUs
+ Util.scaleLargeTimestamp(outputFrameCount, C.MICROS_PER_SECOND, header.frameRateHz);
int size = numOutputFramesToBytes(sampleFrames);
int offset = pendingOutputBytes - size;
trackOutput.sampleMetadata(
timeUs, C.BUFFER_FLAG_KEY_FRAME, size, offset, /* encryptionData= */ null);
outputFrameCount += sampleFrames;
pendingOutputBytes -= size;
}
/**
* Decodes IMA ADPCM data to 16 bit PCM.
*
* @param input The input data to decode.
* @param blockCount The number of blocks to decode.
* @param output The output into which the decoded data will be written.
*/
private void decode(byte[] input, int blockCount, ParsableByteArray output) {
for (int blockIndex = 0; blockIndex < blockCount; blockIndex++) {
for (int channelIndex = 0; channelIndex < header.numChannels; channelIndex++) {
decodeBlockForChannel(input, blockIndex, channelIndex, output.data);
}
}
int decodedDataSize = numOutputFramesToBytes(framesPerBlock * blockCount);
output.reset(decodedDataSize);
}
private void decodeBlockForChannel(
byte[] input, int blockIndex, int channelIndex, byte[] output) {
int blockSize = header.blockSize;
int numChannels = header.numChannels;
// The input data consists for a four byte header [Ci] for each of the N channels, followed
// by interleaved data segments [Ci-DATAj], each of which are four bytes long.
//
// [C1][C2]...[CN] [C1-Data0][C2-Data0]...[CN-Data0] [C1-Data1][C2-Data1]...[CN-Data1] etc
//
// Compute the start indices for the [Ci] and [Ci-Data0] for the current channel, as well as
// the number of data bytes for the channel in the block.
int blockStartIndex = blockIndex * blockSize;
int headerStartIndex = blockStartIndex + channelIndex * 4;
int dataStartIndex = headerStartIndex + numChannels * 4;
int dataSizeBytes = blockSize / numChannels - 4;
// Decode initialization. Casting to a short is necessary for the most significant bit to be
// treated as -2^15 rather than 2^15.
int predictedSample =
(short) (((input[headerStartIndex + 1] & 0xFF) << 8) | (input[headerStartIndex] & 0xFF));
int stepIndex = Math.min(input[headerStartIndex + 2] & 0xFF, 88);
int step = STEP_TABLE[stepIndex];
// Output the initial 16 bit PCM sample from the header.
int outputIndex = (blockIndex * framesPerBlock * numChannels + channelIndex) * 2;
output[outputIndex] = (byte) (predictedSample & 0xFF);
output[outputIndex + 1] = (byte) (predictedSample >> 8);
// We examine each data byte twice during decode.
for (int i = 0; i < dataSizeBytes * 2; i++) {
int dataSegmentIndex = i / 8;
int dataSegmentOffset = (i / 2) % 4;
int dataIndex = dataStartIndex + (dataSegmentIndex * numChannels * 4) + dataSegmentOffset;
int originalSample = input[dataIndex] & 0xFF;
if (i % 2 == 0) {
originalSample &= 0x0F; // Bottom four bits.
} else {
originalSample >>= 4; // Top four bits.
}
int delta = originalSample & 0x07;
int difference = ((2 * delta + 1) * step) >> 3;
if ((originalSample & 0x08) != 0) {
difference = -difference;
}
predictedSample += difference;
predictedSample = Util.constrainValue(predictedSample, /* min= */ -32768, /* max= */ 32767);
// Output the next 16 bit PCM sample to the correct position in the output.
outputIndex += 2 * numChannels;
output[outputIndex] = (byte) (predictedSample & 0xFF);
output[outputIndex + 1] = (byte) (predictedSample >> 8);
stepIndex += INDEX_TABLE[originalSample];
stepIndex = Util.constrainValue(stepIndex, /* min= */ 0, /* max= */ STEP_TABLE.length - 1);
step = STEP_TABLE[stepIndex];
}
}
private int numOutputBytesToFrames(int bytes) {
return bytes / (2 * header.numChannels);
}
private int numOutputFramesToBytes(int frames) {
return frames * 2 * header.numChannels;
}
}
}

Binary file not shown.

View File

@ -0,0 +1,75 @@
seekMap:
isSeekable = true
duration = 1018185
getPosition(0) = [[timeUs=0, position=94]]
numberOfTracks = 1
track 0:
format:
bitrate = 177004
id = null
containerMimeType = null
sampleMimeType = audio/raw
maxInputSize = 8820
width = -1
height = -1
frameRate = -1.0
rotationDegrees = 0
pixelWidthHeightRatio = 1.0
channelCount = 1
sampleRate = 44100
pcmEncoding = 2
encoderDelay = 0
encoderPadding = 0
subsampleOffsetUs = 9223372036854775807
selectionFlags = 0
language = null
drmInitData = -
metadata = null
initializationData:
total output bytes = 89804
sample count = 11
sample 0:
time = 0
flags = 1
data = length 8820, hash E90A457C
sample 1:
time = 100000
flags = 1
data = length 8820, hash EA798370
sample 2:
time = 200000
flags = 1
data = length 8820, hash A57ED989
sample 3:
time = 300000
flags = 1
data = length 8820, hash 8B681816
sample 4:
time = 400000
flags = 1
data = length 8820, hash 48177BEB
sample 5:
time = 500000
flags = 1
data = length 8820, hash 70197776
sample 6:
time = 600000
flags = 1
data = length 8820, hash DB4A4704
sample 7:
time = 700000
flags = 1
data = length 8820, hash 84A525D0
sample 8:
time = 800000
flags = 1
data = length 8820, hash 197A4377
sample 9:
time = 900000
flags = 1
data = length 8820, hash 6982BC91
sample 10:
time = 1000000
flags = 1
data = length 1604, hash 3DED68ED
tracksEnded = true

View File

@ -0,0 +1,59 @@
seekMap:
isSeekable = true
duration = 1018185
getPosition(0) = [[timeUs=0, position=94]]
numberOfTracks = 1
track 0:
format:
bitrate = 177004
id = null
containerMimeType = null
sampleMimeType = audio/raw
maxInputSize = 8820
width = -1
height = -1
frameRate = -1.0
rotationDegrees = 0
pixelWidthHeightRatio = 1.0
channelCount = 1
sampleRate = 44100
pcmEncoding = 2
encoderDelay = 0
encoderPadding = 0
subsampleOffsetUs = 9223372036854775807
selectionFlags = 0
language = null
drmInitData = -
metadata = null
initializationData:
total output bytes = 61230
sample count = 7
sample 0:
time = 339395
flags = 1
data = length 8820, hash 25FCA092
sample 1:
time = 439395
flags = 1
data = length 8820, hash 9400B4BE
sample 2:
time = 539395
flags = 1
data = length 8820, hash 5BA7E45D
sample 3:
time = 639395
flags = 1
data = length 8820, hash 5AC42905
sample 4:
time = 739395
flags = 1
data = length 8820, hash D57059C
sample 5:
time = 839395
flags = 1
data = length 8820, hash DEF5C480
sample 6:
time = 939395
flags = 1
data = length 8310, hash 10B3FC93
tracksEnded = true

View File

@ -0,0 +1,47 @@
seekMap:
isSeekable = true
duration = 1018185
getPosition(0) = [[timeUs=0, position=94]]
numberOfTracks = 1
track 0:
format:
bitrate = 177004
id = null
containerMimeType = null
sampleMimeType = audio/raw
maxInputSize = 8820
width = -1
height = -1
frameRate = -1.0
rotationDegrees = 0
pixelWidthHeightRatio = 1.0
channelCount = 1
sampleRate = 44100
pcmEncoding = 2
encoderDelay = 0
encoderPadding = 0
subsampleOffsetUs = 9223372036854775807
selectionFlags = 0
language = null
drmInitData = -
metadata = null
initializationData:
total output bytes = 32656
sample count = 4
sample 0:
time = 678790
flags = 1
data = length 8820, hash DB7FF64C
sample 1:
time = 778790
flags = 1
data = length 8820, hash B895DFDC
sample 2:
time = 878790
flags = 1
data = length 8820, hash E3AB416D
sample 3:
time = 978790
flags = 1
data = length 6196, hash E27E175A
tracksEnded = true

View File

@ -0,0 +1,35 @@
seekMap:
isSeekable = true
duration = 1018185
getPosition(0) = [[timeUs=0, position=94]]
numberOfTracks = 1
track 0:
format:
bitrate = 177004
id = null
containerMimeType = null
sampleMimeType = audio/raw
maxInputSize = 8820
width = -1
height = -1
frameRate = -1.0
rotationDegrees = 0
pixelWidthHeightRatio = 1.0
channelCount = 1
sampleRate = 44100
pcmEncoding = 2
encoderDelay = 0
encoderPadding = 0
subsampleOffsetUs = 9223372036854775807
selectionFlags = 0
language = null
drmInitData = -
metadata = null
initializationData:
total output bytes = 4082
sample count = 1
sample 0:
time = 1018185
flags = 1
data = length 4082, hash 4CB1A490
tracksEnded = true

View File

@ -28,4 +28,9 @@ public final class WavExtractorTest {
public void testSample() throws Exception {
ExtractorAsserts.assertBehavior(WavExtractor::new, "wav/sample.wav");
}
@Test
public void testSampleImaAdpcm() throws Exception {
ExtractorAsserts.assertBehavior(WavExtractor::new, "wav/sample_ima_adpcm.wav");
}
}