Remove assumption that WAV files only contain PCM encoded data

- WavHeader is now immutable and contains only values parsed out
  of the WAVE FMT chunk. It no longer contains a C.PcmEncoding
  encoding, or mutable data bounds.
- WavHeaderReader now parses the WAVE header chunks without any
  additional logic (e.g. validating the block alignment value,
  which is format type dependent).
- The SeekMap part of WavHeader is split out into WavSeekMap.

PiperOrigin-RevId: 285232498
This commit is contained in:
olly 2019-12-12 19:26:10 +00:00 committed by Oliver Woodman
parent 1144926380
commit d62dc9dcfb
6 changed files with 202 additions and 198 deletions

View File

@ -176,7 +176,7 @@ public final class TeeAudioProcessor extends BaseAudioProcessor {
// Write the rest of the header as little endian data.
scratchByteBuffer.clear();
scratchByteBuffer.putInt(16);
scratchByteBuffer.putShort((short) WavUtil.getTypeForEncoding(encoding));
scratchByteBuffer.putShort((short) WavUtil.getTypeForPcmEncoding(encoding));
scratchByteBuffer.putShort((short) channelCount);
scratchByteBuffer.putInt(sampleRateHz);
int bytesPerSample = Util.getPcmFrameSize(encoding, channelCount);

View File

@ -42,9 +42,16 @@ public final class WavUtil {
/** WAVE type value for extended WAVE format. */
private static final int TYPE_WAVE_FORMAT_EXTENSIBLE = 0xFFFE;
/** Returns the WAVE type value for the given {@code encoding}. */
public static int getTypeForEncoding(@C.PcmEncoding int encoding) {
switch (encoding) {
/**
* Returns the WAVE format type value for the given {@link C.PcmEncoding}.
*
* @param pcmEncoding The {@link C.PcmEncoding} value.
* @return The corresponding WAVE format type.
* @throws IllegalArgumentException If {@code pcmEncoding} is not a {@link C.PcmEncoding}, or if
* it's {@link C#ENCODING_INVALID} or {@link Format#NO_VALUE}.
*/
public static int getTypeForPcmEncoding(@C.PcmEncoding int pcmEncoding) {
switch (pcmEncoding) {
case C.ENCODING_PCM_8BIT:
case C.ENCODING_PCM_16BIT:
case C.ENCODING_PCM_24BIT:
@ -63,8 +70,11 @@ public final class WavUtil {
}
}
/** Returns the PCM encoding for the given WAVE {@code type} value. */
public static @C.PcmEncoding int getEncodingForType(int type, int bitsPerSample) {
/**
* Returns the {@link C.PcmEncoding} for the given WAVE format type value, or {@link
* C#ENCODING_INVALID} if the type is not a known PCM type.
*/
public static @C.PcmEncoding int getPcmEncodingForType(int type, int bitsPerSample) {
switch (type) {
case TYPE_PCM:
case TYPE_WAVE_FORMAT_EXTENSIBLE:

View File

@ -15,9 +15,11 @@
*/
package com.google.android.exoplayer2.extractor.wav;
import android.util.Pair;
import com.google.android.exoplayer2.C;
import com.google.android.exoplayer2.Format;
import com.google.android.exoplayer2.ParserException;
import com.google.android.exoplayer2.audio.WavUtil;
import com.google.android.exoplayer2.extractor.Extractor;
import com.google.android.exoplayer2.extractor.ExtractorInput;
import com.google.android.exoplayer2.extractor.ExtractorOutput;
@ -41,10 +43,17 @@ public final class WavExtractor implements Extractor {
private ExtractorOutput extractorOutput;
private TrackOutput trackOutput;
private WavHeader wavHeader;
private int bytesPerFrame;
private WavHeader header;
private WavSeekMap seekMap;
private int dataStartPosition;
private long dataEndPosition;
private int pendingBytes;
public WavExtractor() {
dataStartPosition = C.POSITION_UNSET;
dataEndPosition = C.POSITION_UNSET;
}
@Override
public boolean sniff(ExtractorInput input) throws IOException, InterruptedException {
return WavHeaderReader.peek(input) != null;
@ -54,7 +63,7 @@ public final class WavExtractor implements Extractor {
public void init(ExtractorOutput output) {
extractorOutput = output;
trackOutput = output.track(0, C.TRACK_TYPE_AUDIO);
wavHeader = null;
header = null;
output.endTracks();
}
@ -71,29 +80,58 @@ public final class WavExtractor implements Extractor {
@Override
public int read(ExtractorInput input, PositionHolder seekPosition)
throws IOException, InterruptedException {
if (wavHeader == null) {
wavHeader = WavHeaderReader.peek(input);
if (wavHeader == null) {
if (header == null) {
header = WavHeaderReader.peek(input);
if (header == null) {
// Should only happen if the media wasn't sniffed.
throw new ParserException("Unsupported or unrecognized wav header.");
}
Format format = Format.createAudioSampleFormat(null, MimeTypes.AUDIO_RAW, null,
wavHeader.getBitrate(), MAX_INPUT_SIZE, wavHeader.getNumChannels(),
wavHeader.getSampleRateHz(), wavHeader.getEncoding(), null, null, 0, null);
@C.PcmEncoding
int pcmEncoding = WavUtil.getPcmEncodingForType(header.formatType, header.bitsPerSample);
if (pcmEncoding == C.ENCODING_INVALID) {
throw new ParserException("Unsupported WAV format type: " + header.formatType);
}
// PCM specific header validation.
int expectedBytesPerFrame = header.numChannels * header.bitsPerSample / 8;
if (header.blockAlign != expectedBytesPerFrame) {
throw new ParserException(
"Unexpected bytes per frame: "
+ header.blockAlign
+ "; expected: "
+ expectedBytesPerFrame);
}
Format format =
Format.createAudioSampleFormat(
/* id= */ null,
MimeTypes.AUDIO_RAW,
/* codecs= */ null,
/* bitrate= */ header.averageBytesPerSecond * 8,
MAX_INPUT_SIZE,
header.numChannels,
header.sampleRateHz,
pcmEncoding,
/* initializationData= */ null,
/* drmInitData= */ null,
/* selectionFlags= */ 0,
/* language= */ null);
trackOutput.format(format);
bytesPerFrame = wavHeader.getBytesPerFrame();
}
if (!wavHeader.hasDataBounds()) {
WavHeaderReader.skipToData(input, wavHeader);
extractorOutput.seekMap(wavHeader);
if (dataStartPosition == C.POSITION_UNSET) {
Pair<Long, Long> dataBounds = WavHeaderReader.skipToData(input);
dataStartPosition = dataBounds.first.intValue();
dataEndPosition = dataBounds.second;
seekMap =
new WavSeekMap(header, /* samplesPerBlock= */ 1, dataStartPosition, dataEndPosition);
extractorOutput.seekMap(seekMap);
} else if (input.getPosition() == 0) {
input.skipFully(wavHeader.getDataStartPosition());
input.skipFully(dataStartPosition);
}
long dataEndPosition = wavHeader.getDataEndPosition();
Assertions.checkState(dataEndPosition != C.POSITION_UNSET);
long bytesLeft = dataEndPosition - input.getPosition();
if (bytesLeft <= 0) {
return Extractor.RESULT_END_OF_INPUT;
@ -105,16 +143,17 @@ public final class WavExtractor implements Extractor {
pendingBytes += bytesAppended;
}
// Samples must consist of a whole number of frames.
// For PCM blockAlign is the frame size, and samples must consist of a whole number of frames.
int bytesPerFrame = header.blockAlign;
int pendingFrames = pendingBytes / bytesPerFrame;
if (pendingFrames > 0) {
long timeUs = wavHeader.getTimeUs(input.getPosition() - pendingBytes);
long timeUs = seekMap.getTimeUs(input.getPosition() - pendingBytes);
int size = pendingFrames * bytesPerFrame;
pendingBytes -= size;
trackOutput.sampleMetadata(timeUs, C.BUFFER_FLAG_KEY_FRAME, size, pendingBytes, null);
trackOutput.sampleMetadata(
timeUs, C.BUFFER_FLAG_KEY_FRAME, size, pendingBytes, /* encryptionData= */ null);
}
return bytesAppended == RESULT_END_OF_INPUT ? RESULT_END_OF_INPUT : RESULT_CONTINUE;
}
}

View File

@ -15,160 +15,41 @@
*/
package com.google.android.exoplayer2.extractor.wav;
import com.google.android.exoplayer2.C;
import com.google.android.exoplayer2.extractor.SeekMap;
import com.google.android.exoplayer2.extractor.SeekPoint;
import com.google.android.exoplayer2.util.Util;
/** Header for a WAV file. */
/* package */ final class WavHeader implements SeekMap {
/* package */ final class WavHeader {
/** Number of audio channels. */
private final int numChannels;
/** Sample rate in Hertz. */
private final int sampleRateHz;
/** Average bytes per second for the sample data. */
private final int averageBytesPerSecond;
/** Alignment for frames of audio data; should equal {@code numChannels * bitsPerSample / 8}. */
private final int blockAlignment;
/** Bits per sample for the audio data. */
private final int bitsPerSample;
/** Number of samples in each block. */
private final int samplesPerBlock;
/** The PCM encoding. */
@C.PcmEncoding private final int encoding;
/** Position of the start of the sample data, in bytes. */
private int dataStartPosition;
/** Position of the end of the sample data (exclusive), in bytes. */
private long dataEndPosition;
/**
* The format type. Standard format types are the "WAVE form Registration Number" constants
* defined in RFC 2361 Appendix A.
*/
public final int formatType;
/** The number of channels. */
public final int numChannels;
/** The sample rate in Hertz. */
public final int sampleRateHz;
/** The average bytes per second for the sample data. */
public final int averageBytesPerSecond;
/** The block size in bytes. */
public final int blockAlign;
/** Bits per sample for a single channel. */
public final int bitsPerSample;
/** Extra data appended to the format chunk of the header. */
public final byte[] extraData;
public WavHeader(
int formatType,
int numChannels,
int sampleRateHz,
int averageBytesPerSecond,
int blockAlignment,
int bytesPerFrame,
int bitsPerSample,
int samplesPerBlock,
@C.PcmEncoding int encoding) {
byte[] extraData) {
this.formatType = formatType;
this.numChannels = numChannels;
this.sampleRateHz = sampleRateHz;
this.averageBytesPerSecond = averageBytesPerSecond;
this.blockAlignment = blockAlignment;
this.blockAlign = bytesPerFrame;
this.bitsPerSample = bitsPerSample;
this.samplesPerBlock = samplesPerBlock;
this.encoding = encoding;
dataStartPosition = C.POSITION_UNSET;
dataEndPosition = C.POSITION_UNSET;
this.extraData = extraData;
}
// Data bounds.
/**
* Sets the data start position and size in bytes of sample data in this WAV.
*
* @param dataStartPosition The position of the start of the sample data, in bytes.
* @param dataEndPosition The position of the end of the sample data (exclusive), in bytes.
*/
public void setDataBounds(int dataStartPosition, long dataEndPosition) {
this.dataStartPosition = dataStartPosition;
this.dataEndPosition = dataEndPosition;
}
/**
* Returns the position of the start of the sample data, in bytes, or {@link C#POSITION_UNSET} if
* the data bounds have not been set.
*/
public int getDataStartPosition() {
return dataStartPosition;
}
/**
* Returns the position of the end of the sample data (exclusive), in bytes, or {@link
* C#POSITION_UNSET} if the data bounds have not been set.
*/
public long getDataEndPosition() {
return dataEndPosition;
}
/** Returns whether the data start position and size have been set. */
public boolean hasDataBounds() {
return dataStartPosition != C.POSITION_UNSET;
}
// SeekMap implementation.
@Override
public boolean isSeekable() {
return true;
}
@Override
public long getDurationUs() {
long numBlocks = (dataEndPosition - dataStartPosition) / blockAlignment;
return numBlocks * samplesPerBlock * C.MICROS_PER_SECOND / sampleRateHz;
}
@Override
public SeekPoints getSeekPoints(long timeUs) {
long dataSize = dataEndPosition - dataStartPosition;
long positionOffset = (timeUs * averageBytesPerSecond) / C.MICROS_PER_SECOND;
// Constrain to nearest preceding frame offset.
positionOffset = (positionOffset / blockAlignment) * blockAlignment;
positionOffset = Util.constrainValue(positionOffset, 0, dataSize - blockAlignment);
long seekPosition = dataStartPosition + positionOffset;
long seekTimeUs = getTimeUs(seekPosition);
SeekPoint seekPoint = new SeekPoint(seekTimeUs, seekPosition);
if (seekTimeUs >= timeUs || positionOffset == dataSize - blockAlignment) {
return new SeekPoints(seekPoint);
} else {
long secondSeekPosition = seekPosition + blockAlignment;
long secondSeekTimeUs = getTimeUs(secondSeekPosition);
SeekPoint secondSeekPoint = new SeekPoint(secondSeekTimeUs, secondSeekPosition);
return new SeekPoints(seekPoint, secondSeekPoint);
}
}
// Misc getters.
/**
* Returns the time in microseconds for the given position in bytes.
*
* @param position The position in bytes.
*/
public long getTimeUs(long position) {
long positionOffset = Math.max(0, position - dataStartPosition);
return (positionOffset * C.MICROS_PER_SECOND) / averageBytesPerSecond;
}
/** Returns the bytes per frame of this WAV. */
public int getBytesPerFrame() {
return blockAlignment;
}
/** Returns the bitrate of this WAV. */
public int getBitrate() {
return sampleRateHz * bitsPerSample * numChannels;
}
/** Returns the sample rate in Hertz of this WAV. */
public int getSampleRateHz() {
return sampleRateHz;
}
/** Returns the number of audio channels in this WAV. */
public int getNumChannels() {
return numChannels;
}
/** Returns the number of samples in each block. */
public int getSamplesPerBlock() {
return samplesPerBlock;
}
/** Returns the PCM encoding. **/
public @C.PcmEncoding int getEncoding() {
return encoding;
}
}

View File

@ -15,6 +15,7 @@
*/
package com.google.android.exoplayer2.extractor.wav;
import android.util.Pair;
import androidx.annotation.Nullable;
import com.google.android.exoplayer2.C;
import com.google.android.exoplayer2.ParserException;
@ -23,6 +24,7 @@ import com.google.android.exoplayer2.extractor.ExtractorInput;
import com.google.android.exoplayer2.util.Assertions;
import com.google.android.exoplayer2.util.Log;
import com.google.android.exoplayer2.util.ParsableByteArray;
import com.google.android.exoplayer2.util.Util;
import java.io.IOException;
/** Reads a {@code WavHeader} from an input stream; supports resuming from input failures. */
@ -71,57 +73,46 @@ import java.io.IOException;
Assertions.checkState(chunkHeader.size >= 16);
input.peekFully(scratch.data, 0, 16);
scratch.setPosition(0);
int type = scratch.readLittleEndianUnsignedShort();
int audioFormatType = scratch.readLittleEndianUnsignedShort();
int numChannels = scratch.readLittleEndianUnsignedShort();
int sampleRateHz = scratch.readLittleEndianUnsignedIntToInt();
int averageBytesPerSecond = scratch.readLittleEndianUnsignedIntToInt();
int blockAlignment = scratch.readLittleEndianUnsignedShort();
int bitsPerSample = scratch.readLittleEndianUnsignedShort();
int expectedBlockAlignment = numChannels * bitsPerSample / 8;
if (blockAlignment != expectedBlockAlignment) {
throw new ParserException("Expected block alignment: " + expectedBlockAlignment + "; got: "
+ blockAlignment);
int bytesLeft = (int) chunkHeader.size - 16;
byte[] extraData;
if (bytesLeft > 0) {
extraData = new byte[bytesLeft];
input.peekFully(extraData, 0, bytesLeft);
} else {
extraData = Util.EMPTY_BYTE_ARRAY;
}
@C.PcmEncoding int encoding = WavUtil.getEncodingForType(type, bitsPerSample);
if (encoding == C.ENCODING_INVALID) {
Log.e(TAG, "Unsupported WAV format: " + bitsPerSample + " bit/sample, type " + type);
return null;
}
// If present, skip extensionSize, validBitsPerSample, channelMask, subFormatGuid, ...
input.advancePeekPosition((int) chunkHeader.size - 16);
return new WavHeader(
audioFormatType,
numChannels,
sampleRateHz,
averageBytesPerSecond,
blockAlignment,
bitsPerSample,
/* samplesPerBlock= */ 1,
encoding);
extraData);
}
/**
* Skips to the data in the given WAV input stream. After calling, the input stream's position
* will point to the start of sample data in the WAV, and the data bounds of the provided {@link
* WavHeader} will have been set.
* Skips to the data in the given WAV input stream, and returns its bounds. After calling, the
* input stream's position will point to the start of sample data in the WAV. If an exception is
* thrown, the input position will be left pointing to a chunk header.
*
* <p>If an exception is thrown, the input position will be left pointing to a chunk header and
* the bounds of the provided {@link WavHeader} will not have been set.
*
* @param input Input stream to skip to the data chunk in. Its peek position must be pointing to a
* valid chunk header.
* @param wavHeader WAV header to populate with data bounds.
* @param input The input stream, whose read position must be pointing to a valid chunk header.
* @return The byte positions at which the data starts (inclusive) and ends (exclusive).
* @throws ParserException If an error occurs parsing chunks.
* @throws IOException If reading from the input fails.
* @throws InterruptedException If interrupted while reading from input.
*/
public static void skipToData(ExtractorInput input, WavHeader wavHeader)
public static Pair<Long, Long> skipToData(ExtractorInput input)
throws IOException, InterruptedException {
Assertions.checkNotNull(input);
Assertions.checkNotNull(wavHeader);
// Make sure the peek position is set to the read position before we peek the first header.
input.resetPeekPosition();
@ -147,14 +138,14 @@ import java.io.IOException;
// Skip past the "data" header.
input.skipFully(ChunkHeader.SIZE_IN_BYTES);
int dataStartPosition = (int) input.getPosition();
long dataStartPosition = input.getPosition();
long dataEndPosition = dataStartPosition + chunkHeader.size;
long inputLength = input.getLength();
if (inputLength != C.LENGTH_UNSET && dataEndPosition > inputLength) {
Log.w(TAG, "Data exceeds input length: " + dataEndPosition + ", " + inputLength);
dataEndPosition = inputLength;
}
wavHeader.setDataBounds(dataStartPosition, dataEndPosition);
return Pair.create(dataStartPosition, dataEndPosition);
}
private WavHeaderReader() {

View File

@ -0,0 +1,83 @@
/*
* Copyright (C) 2019 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.android.exoplayer2.extractor.wav;
import com.google.android.exoplayer2.C;
import com.google.android.exoplayer2.extractor.SeekMap;
import com.google.android.exoplayer2.extractor.SeekPoint;
import com.google.android.exoplayer2.util.Util;
/* package */ final class WavSeekMap implements SeekMap {
/** The WAV header for the stream. */
private final WavHeader wavHeader;
/** Number of samples in each block. */
private final int samplesPerBlock;
/** Position of the start of the sample data, in bytes. */
private final long dataStartPosition;
/** Position of the end of the sample data (exclusive), in bytes. */
private final long dataEndPosition;
public WavSeekMap(
WavHeader wavHeader, int samplesPerBlock, long dataStartPosition, long dataEndPosition) {
this.wavHeader = wavHeader;
this.samplesPerBlock = samplesPerBlock;
this.dataStartPosition = dataStartPosition;
this.dataEndPosition = dataEndPosition;
}
@Override
public boolean isSeekable() {
return true;
}
@Override
public long getDurationUs() {
long numBlocks = (dataEndPosition - dataStartPosition) / wavHeader.blockAlign;
return numBlocks * samplesPerBlock * C.MICROS_PER_SECOND / wavHeader.sampleRateHz;
}
@Override
public SeekPoints getSeekPoints(long timeUs) {
long blockAlign = wavHeader.blockAlign;
long dataSize = dataEndPosition - dataStartPosition;
long positionOffset = (timeUs * wavHeader.averageBytesPerSecond) / C.MICROS_PER_SECOND;
// Constrain to nearest preceding frame offset.
positionOffset = (positionOffset / blockAlign) * blockAlign;
positionOffset = Util.constrainValue(positionOffset, 0, dataSize - blockAlign);
long seekPosition = dataStartPosition + positionOffset;
long seekTimeUs = getTimeUs(seekPosition);
SeekPoint seekPoint = new SeekPoint(seekTimeUs, seekPosition);
if (seekTimeUs >= timeUs || positionOffset == dataSize - blockAlign) {
return new SeekPoints(seekPoint);
} else {
long secondSeekPosition = seekPosition + blockAlign;
long secondSeekTimeUs = getTimeUs(secondSeekPosition);
SeekPoint secondSeekPoint = new SeekPoint(secondSeekTimeUs, secondSeekPosition);
return new SeekPoints(seekPoint, secondSeekPoint);
}
}
/**
* Returns the time in microseconds for the given position in bytes.
*
* @param position The position in bytes.
*/
public long getTimeUs(long position) {
long positionOffset = Math.max(0, position - dataStartPosition);
return (positionOffset * C.MICROS_PER_SECOND) / wavHeader.averageBytesPerSecond;
}
}