Clean up WavExtractor PCM output

- Make extractor output samples that are uniformly distributed
  with respect to time, with a target of ~10 samples per second.
  The old approach could in theory put every frame into its own
  sample, which would be very inefficient downstream because we'd
  need to pass them individually to MediaCodec. It could also put
  data corresponding to a long duration of time into a single
  sample (e.g. if the sample rate of the content is low), which
  is bad downstream because we decide whether to set the decodeOnly
  flag on a per sample basis. More generally, the new approach
  is more predictable :).

- Stop using the WavSeekMap to get sample timestamps, and instead
  calculate them directly from the number of frames output. It's
  more obviously correct, particularly for data formats like IMA
  ADPCM where we'll need to adjust the data prior to output.

PiperOrigin-RevId: 285750010
This commit is contained in:
olly 2019-12-16 13:05:09 +00:00 committed by Oliver Woodman
parent 27b06e9ad9
commit 7bd912f895
8 changed files with 168 additions and 92 deletions

View File

@ -28,6 +28,7 @@ import com.google.android.exoplayer2.extractor.PositionHolder;
import com.google.android.exoplayer2.extractor.TrackOutput; import com.google.android.exoplayer2.extractor.TrackOutput;
import com.google.android.exoplayer2.util.Assertions; import com.google.android.exoplayer2.util.Assertions;
import com.google.android.exoplayer2.util.MimeTypes; import com.google.android.exoplayer2.util.MimeTypes;
import com.google.android.exoplayer2.util.Util;
import java.io.IOException; import java.io.IOException;
/** /**
@ -35,8 +36,12 @@ import java.io.IOException;
*/ */
public final class WavExtractor implements Extractor { public final class WavExtractor implements Extractor {
/** Arbitrary maximum sample size of 32KB, which is ~170ms of 16-bit stereo PCM audio at 48KHz. */ /**
private static final int MAX_SAMPLE_SIZE = 32 * 1024; * When outputting PCM data to a {@link TrackOutput}, we can choose how many frames are grouped
* into each sample, and hence each sample's duration. This is the target number of samples to
* output for each second of media, meaning that each sample will have a duration of ~100ms.
*/
private static final int TARGET_SAMPLES_PER_SECOND = 10;
/** Factory for {@link WavExtractor} instances. */ /** Factory for {@link WavExtractor} instances. */
public static final ExtractorsFactory FACTORY = () -> new Extractor[] {new WavExtractor()}; public static final ExtractorsFactory FACTORY = () -> new Extractor[] {new WavExtractor()};
@ -67,7 +72,7 @@ public final class WavExtractor implements Extractor {
@Override @Override
public void seek(long position, long timeUs) { public void seek(long position, long timeUs) {
if (outputWriter != null) { if (outputWriter != null) {
outputWriter.reset(); outputWriter.reset(timeUs);
} }
} }
@ -105,18 +110,18 @@ public final class WavExtractor implements Extractor {
Assertions.checkState(dataEndPosition != C.POSITION_UNSET); Assertions.checkState(dataEndPosition != C.POSITION_UNSET);
long bytesLeft = dataEndPosition - input.getPosition(); long bytesLeft = dataEndPosition - input.getPosition();
if (bytesLeft <= 0) { return outputWriter.sampleData(input, bytesLeft) ? RESULT_END_OF_INPUT : RESULT_CONTINUE;
return Extractor.RESULT_END_OF_INPUT;
}
return outputWriter.sampleData(input, bytesLeft) ? RESULT_CONTINUE : RESULT_END_OF_INPUT;
} }
/** Writes to the extractor's output. */ /** Writes to the extractor's output. */
private interface OutputWriter { private interface OutputWriter {
/** Resets the writer. */ /**
void reset(); * Resets the writer.
*
* @param timeUs The new start position in microseconds.
*/
void reset(long timeUs);
/** /**
* Initializes the writer. * Initializes the writer.
@ -137,7 +142,7 @@ public final class WavExtractor implements Extractor {
* *
* @param input The input from which to read. * @param input The input from which to read.
* @param bytesLeft The number of sample data bytes left to be read from the input. * @param bytesLeft The number of sample data bytes left to be read from the input.
* @return True if data was consumed. False if the end of the stream has been reached. * @return Whether the end of the sample data has been reached.
* @throws IOException If an error occurs reading from the input. * @throws IOException If an error occurs reading from the input.
* @throws InterruptedException If the thread has been interrupted. * @throws InterruptedException If the thread has been interrupted.
*/ */
@ -151,8 +156,10 @@ public final class WavExtractor implements Extractor {
private final TrackOutput trackOutput; private final TrackOutput trackOutput;
private final WavHeader header; private final WavHeader header;
private final @C.PcmEncoding int pcmEncoding; private final @C.PcmEncoding int pcmEncoding;
private final int targetSampleSize;
private WavSeekMap seekMap; private long startTimeUs;
private long outputFrameCount;
private int pendingBytes; private int pendingBytes;
public PcmOutputWriter( public PcmOutputWriter(
@ -164,26 +171,31 @@ public final class WavExtractor implements Extractor {
this.trackOutput = trackOutput; this.trackOutput = trackOutput;
this.header = header; this.header = header;
this.pcmEncoding = pcmEncoding; this.pcmEncoding = pcmEncoding;
// For PCM blocks correspond to single frames. This is validated in init(int, long).
int bytesPerFrame = header.blockSize;
targetSampleSize =
Math.max(bytesPerFrame, header.frameRateHz * bytesPerFrame / TARGET_SAMPLES_PER_SECOND);
} }
@Override @Override
public void reset() { public void reset(long timeUs) {
startTimeUs = timeUs;
outputFrameCount = 0;
pendingBytes = 0; pendingBytes = 0;
} }
@Override @Override
public void init(int dataStartPosition, long dataEndPosition) throws ParserException { public void init(int dataStartPosition, long dataEndPosition) throws ParserException {
// Validate the header. // Validate the header.
int expectedBytesPerFrame = header.numChannels * header.bitsPerSample / 8; int bytesPerFrame = header.numChannels * header.bitsPerSample / 8;
if (header.blockAlign != expectedBytesPerFrame) { if (header.blockSize != bytesPerFrame) {
throw new ParserException( throw new ParserException(
"Expected block alignment: " + expectedBytesPerFrame + "; got: " + header.blockAlign); "Expected block size: " + bytesPerFrame + "; got: " + header.blockSize);
} }
// Output the seek map. // Output the seek map.
seekMap = extractorOutput.seekMap(
new WavSeekMap(header, /* samplesPerBlock= */ 1, dataStartPosition, dataEndPosition); new WavSeekMap(header, /* framesPerBlock= */ 1, dataStartPosition, dataEndPosition));
extractorOutput.seekMap(seekMap);
// Output the format. // Output the format.
Format format = Format format =
@ -192,9 +204,9 @@ public final class WavExtractor implements Extractor {
MimeTypes.AUDIO_RAW, MimeTypes.AUDIO_RAW,
/* codecs= */ null, /* codecs= */ null,
/* bitrate= */ header.averageBytesPerSecond * 8, /* bitrate= */ header.averageBytesPerSecond * 8,
MAX_SAMPLE_SIZE, targetSampleSize,
header.numChannels, header.numChannels,
header.sampleRateHz, header.frameRateHz,
pcmEncoding, pcmEncoding,
/* initializationData= */ null, /* initializationData= */ null,
/* drmInitData= */ null, /* drmInitData= */ null,
@ -206,25 +218,36 @@ public final class WavExtractor implements Extractor {
@Override @Override
public boolean sampleData(ExtractorInput input, long bytesLeft) public boolean sampleData(ExtractorInput input, long bytesLeft)
throws IOException, InterruptedException { throws IOException, InterruptedException {
int maxBytesToRead = (int) Math.min(MAX_SAMPLE_SIZE - pendingBytes, bytesLeft); // Write sample data until we've reached the target sample size, or the end of the data.
int numBytesAppended = trackOutput.sampleData(input, maxBytesToRead, true); boolean endOfSampleData = bytesLeft == 0;
boolean wereBytesAppended = numBytesAppended != RESULT_END_OF_INPUT; while (!endOfSampleData && pendingBytes < targetSampleSize) {
if (wereBytesAppended) { int bytesToRead = (int) Math.min(targetSampleSize - pendingBytes, bytesLeft);
pendingBytes += numBytesAppended; int bytesAppended = trackOutput.sampleData(input, bytesToRead, true);
if (bytesAppended == RESULT_END_OF_INPUT) {
endOfSampleData = true;
} else {
pendingBytes += bytesAppended;
}
} }
// blockAlign is the frame size, and samples must consist of a whole number of frames. // Write the corresponding sample metadata. Samples must be a whole number of frames. It's
int bytesPerFrame = header.blockAlign; // possible pendingBytes is not a whole number of frames if the stream ended unexpectedly.
int bytesPerFrame = header.blockSize;
int pendingFrames = pendingBytes / bytesPerFrame; int pendingFrames = pendingBytes / bytesPerFrame;
if (pendingFrames > 0) { if (pendingFrames > 0) {
long timeUs = seekMap.getTimeUs(input.getPosition() - pendingBytes); long timeUs =
startTimeUs
+ Util.scaleLargeTimestamp(
outputFrameCount, C.MICROS_PER_SECOND, header.frameRateHz);
int size = pendingFrames * bytesPerFrame; int size = pendingFrames * bytesPerFrame;
pendingBytes -= size; int offset = pendingBytes - size;
trackOutput.sampleMetadata( trackOutput.sampleMetadata(
timeUs, C.BUFFER_FLAG_KEY_FRAME, size, pendingBytes, /* encryptionData= */ null); timeUs, C.BUFFER_FLAG_KEY_FRAME, size, offset, /* encryptionData= */ null);
outputFrameCount += pendingFrames;
pendingBytes = offset;
} }
return wereBytesAppended; return endOfSampleData;
} }
} }
} }

View File

@ -26,11 +26,11 @@ package com.google.android.exoplayer2.extractor.wav;
/** The number of channels. */ /** The number of channels. */
public final int numChannels; public final int numChannels;
/** The sample rate in Hertz. */ /** The sample rate in Hertz. */
public final int sampleRateHz; public final int frameRateHz;
/** The average bytes per second for the sample data. */ /** The average bytes per second for the sample data. */
public final int averageBytesPerSecond; public final int averageBytesPerSecond;
/** The block size in bytes. */ /** The block size in bytes. */
public final int blockAlign; public final int blockSize;
/** Bits per sample for a single channel. */ /** Bits per sample for a single channel. */
public final int bitsPerSample; public final int bitsPerSample;
/** Extra data appended to the format chunk of the header. */ /** Extra data appended to the format chunk of the header. */
@ -39,16 +39,16 @@ package com.google.android.exoplayer2.extractor.wav;
public WavHeader( public WavHeader(
int formatType, int formatType,
int numChannels, int numChannels,
int sampleRateHz, int frameRateHz,
int averageBytesPerSecond, int averageBytesPerSecond,
int bytesPerFrame, int blockSize,
int bitsPerSample, int bitsPerSample,
byte[] extraData) { byte[] extraData) {
this.formatType = formatType; this.formatType = formatType;
this.numChannels = numChannels; this.numChannels = numChannels;
this.sampleRateHz = sampleRateHz; this.frameRateHz = frameRateHz;
this.averageBytesPerSecond = averageBytesPerSecond; this.averageBytesPerSecond = averageBytesPerSecond;
this.blockAlign = bytesPerFrame; this.blockSize = blockSize;
this.bitsPerSample = bitsPerSample; this.bitsPerSample = bitsPerSample;
this.extraData = extraData; this.extraData = extraData;
} }

View File

@ -75,9 +75,9 @@ import java.io.IOException;
scratch.setPosition(0); scratch.setPosition(0);
int audioFormatType = scratch.readLittleEndianUnsignedShort(); int audioFormatType = scratch.readLittleEndianUnsignedShort();
int numChannels = scratch.readLittleEndianUnsignedShort(); int numChannels = scratch.readLittleEndianUnsignedShort();
int sampleRateHz = scratch.readLittleEndianUnsignedIntToInt(); int frameRateHz = scratch.readLittleEndianUnsignedIntToInt();
int averageBytesPerSecond = scratch.readLittleEndianUnsignedIntToInt(); int averageBytesPerSecond = scratch.readLittleEndianUnsignedIntToInt();
int blockAlignment = scratch.readLittleEndianUnsignedShort(); int blockSize = scratch.readLittleEndianUnsignedShort();
int bitsPerSample = scratch.readLittleEndianUnsignedShort(); int bitsPerSample = scratch.readLittleEndianUnsignedShort();
int bytesLeft = (int) chunkHeader.size - 16; int bytesLeft = (int) chunkHeader.size - 16;
@ -92,9 +92,9 @@ import java.io.IOException;
return new WavHeader( return new WavHeader(
audioFormatType, audioFormatType,
numChannels, numChannels,
sampleRateHz, frameRateHz,
averageBytesPerSecond, averageBytesPerSecond,
blockAlignment, blockSize,
bitsPerSample, bitsPerSample,
extraData); extraData);
} }

View File

@ -22,21 +22,19 @@ import com.google.android.exoplayer2.util.Util;
/* package */ final class WavSeekMap implements SeekMap { /* package */ final class WavSeekMap implements SeekMap {
/** The WAV header for the stream. */
private final WavHeader wavHeader; private final WavHeader wavHeader;
/** Number of samples in each block. */ private final int framesPerBlock;
private final int samplesPerBlock; private final long firstBlockPosition;
/** Position of the start of the sample data, in bytes. */ private final long blockCount;
private final long dataStartPosition; private final long durationUs;
/** Position of the end of the sample data (exclusive), in bytes. */
private final long dataEndPosition;
public WavSeekMap( public WavSeekMap(
WavHeader wavHeader, int samplesPerBlock, long dataStartPosition, long dataEndPosition) { WavHeader wavHeader, int framesPerBlock, long dataStartPosition, long dataEndPosition) {
this.wavHeader = wavHeader; this.wavHeader = wavHeader;
this.samplesPerBlock = samplesPerBlock; this.framesPerBlock = framesPerBlock;
this.dataStartPosition = dataStartPosition; this.firstBlockPosition = dataStartPosition;
this.dataEndPosition = dataEndPosition; this.blockCount = (dataEndPosition - dataStartPosition) / wavHeader.blockSize;
durationUs = blockIndexToTimeUs(blockCount);
} }
@Override @Override
@ -46,38 +44,33 @@ import com.google.android.exoplayer2.util.Util;
@Override @Override
public long getDurationUs() { public long getDurationUs() {
long numBlocks = (dataEndPosition - dataStartPosition) / wavHeader.blockAlign; return durationUs;
return numBlocks * samplesPerBlock * C.MICROS_PER_SECOND / wavHeader.sampleRateHz;
} }
@Override @Override
public SeekPoints getSeekPoints(long timeUs) { public SeekPoints getSeekPoints(long timeUs) {
long blockAlign = wavHeader.blockAlign; // Calculate the expected number of bytes of sample data corresponding to the requested time.
long dataSize = dataEndPosition - dataStartPosition;
long positionOffset = (timeUs * wavHeader.averageBytesPerSecond) / C.MICROS_PER_SECOND; long positionOffset = (timeUs * wavHeader.averageBytesPerSecond) / C.MICROS_PER_SECOND;
// Constrain to nearest preceding frame offset. // Calculate the containing block index, constraining to valid indices.
positionOffset = (positionOffset / blockAlign) * blockAlign; long blockSize = wavHeader.blockSize;
positionOffset = Util.constrainValue(positionOffset, 0, dataSize - blockAlign); long blockIndex = Util.constrainValue(positionOffset / blockSize, 0, blockCount - 1);
long seekPosition = dataStartPosition + positionOffset;
long seekTimeUs = getTimeUs(seekPosition); long seekPosition = firstBlockPosition + (blockIndex * blockSize);
long seekTimeUs = blockIndexToTimeUs(blockIndex);
SeekPoint seekPoint = new SeekPoint(seekTimeUs, seekPosition); SeekPoint seekPoint = new SeekPoint(seekTimeUs, seekPosition);
if (seekTimeUs >= timeUs || positionOffset == dataSize - blockAlign) { if (seekTimeUs >= timeUs || blockIndex == blockCount - 1) {
return new SeekPoints(seekPoint); return new SeekPoints(seekPoint);
} else { } else {
long secondSeekPosition = seekPosition + blockAlign; long secondBlockIndex = blockIndex + 1;
long secondSeekTimeUs = getTimeUs(secondSeekPosition); long secondSeekPosition = firstBlockPosition + (secondBlockIndex * blockSize);
long secondSeekTimeUs = blockIndexToTimeUs(secondBlockIndex);
SeekPoint secondSeekPoint = new SeekPoint(secondSeekTimeUs, secondSeekPosition); SeekPoint secondSeekPoint = new SeekPoint(secondSeekTimeUs, secondSeekPosition);
return new SeekPoints(seekPoint, secondSeekPoint); return new SeekPoints(seekPoint, secondSeekPoint);
} }
} }
/** private long blockIndexToTimeUs(long blockIndex) {
* Returns the time in microseconds for the given position in bytes. return Util.scaleLargeTimestamp(
* blockIndex * framesPerBlock, C.MICROS_PER_SECOND, wavHeader.frameRateHz);
* @param position The position in bytes.
*/
public long getTimeUs(long position) {
long positionOffset = Math.max(0, position - dataStartPosition);
return (positionOffset * C.MICROS_PER_SECOND) / wavHeader.averageBytesPerSecond;
} }
} }

View File

@ -9,7 +9,7 @@ track 0:
id = null id = null
containerMimeType = null containerMimeType = null
sampleMimeType = audio/raw sampleMimeType = audio/raw
maxInputSize = 32768 maxInputSize = 8820
width = -1 width = -1
height = -1 height = -1
frameRate = -1.0 frameRate = -1.0
@ -27,17 +27,45 @@ track 0:
metadata = null metadata = null
initializationData: initializationData:
total output bytes = 88200 total output bytes = 88200
sample count = 3 sample count = 10
sample 0: sample 0:
time = 0 time = 0
flags = 1 flags = 1
data = length 32768, hash 9A8CEEBA data = length 8820, hash FAE27E28
sample 1: sample 1:
time = 371519 time = 100000
flags = 1 flags = 1
data = length 32768, hash C1717317 data = length 8820, hash 21C3E9C3
sample 2: sample 2:
time = 743038 time = 200000
flags = 1 flags = 1
data = length 22664, hash 819F5F62 data = length 8820, hash B51AD902
sample 3:
time = 300000
flags = 1
data = length 8820, hash 2F4B2CB4
sample 4:
time = 400000
flags = 1
data = length 8820, hash F0030CC2
sample 5:
time = 500000
flags = 1
data = length 8820, hash FF83DA46
sample 6:
time = 600000
flags = 1
data = length 8820, hash 685C1AB5
sample 7:
time = 700000
flags = 1
data = length 8820, hash BE63D51C
sample 8:
time = 800000
flags = 1
data = length 8820, hash 1E44EB8E
sample 9:
time = 900000
flags = 1
data = length 8820, hash 57C41232
tracksEnded = true tracksEnded = true

View File

@ -9,7 +9,7 @@ track 0:
id = null id = null
containerMimeType = null containerMimeType = null
sampleMimeType = audio/raw sampleMimeType = audio/raw
maxInputSize = 32768 maxInputSize = 8820
width = -1 width = -1
height = -1 height = -1
frameRate = -1.0 frameRate = -1.0
@ -27,13 +27,33 @@ track 0:
metadata = null metadata = null
initializationData: initializationData:
total output bytes = 58802 total output bytes = 58802
sample count = 2 sample count = 7
sample 0: sample 0:
time = 333310 time = 333333
flags = 1 flags = 1
data = length 32768, hash 42D6E860 data = length 8820, hash 31868A21
sample 1: sample 1:
time = 704829 time = 433333
flags = 1 flags = 1
data = length 26034, hash 62692C38 data = length 8820, hash AE3D77A2
sample 2:
time = 533333
flags = 1
data = length 8820, hash 966140CE
sample 3:
time = 633333
flags = 1
data = length 8820, hash CB405D7B
sample 4:
time = 733333
flags = 1
data = length 8820, hash 733BA3E6
sample 5:
time = 833333
flags = 1
data = length 8820, hash 7595D752
sample 6:
time = 933333
flags = 1
data = length 5882, hash C617B719
tracksEnded = true tracksEnded = true

View File

@ -9,7 +9,7 @@ track 0:
id = null id = null
containerMimeType = null containerMimeType = null
sampleMimeType = audio/raw sampleMimeType = audio/raw
maxInputSize = 32768 maxInputSize = 8820
width = -1 width = -1
height = -1 height = -1
frameRate = -1.0 frameRate = -1.0
@ -27,9 +27,21 @@ track 0:
metadata = null metadata = null
initializationData: initializationData:
total output bytes = 29402 total output bytes = 29402
sample count = 1 sample count = 4
sample 0: sample 0:
time = 666643 time = 666666
flags = 1 flags = 1
data = length 29402, hash 4241604E data = length 8820, hash D6617E20
sample 1:
time = 766666
flags = 1
data = length 8820, hash 28C74B7A
sample 2:
time = 866666
flags = 1
data = length 8820, hash 680DEFC7
sample 3:
time = 966666
flags = 1
data = length 2942, hash 1D063CF0
tracksEnded = true tracksEnded = true

View File

@ -9,7 +9,7 @@ track 0:
id = null id = null
containerMimeType = null containerMimeType = null
sampleMimeType = audio/raw sampleMimeType = audio/raw
maxInputSize = 32768 maxInputSize = 8820
width = -1 width = -1
height = -1 height = -1
frameRate = -1.0 frameRate = -1.0
@ -29,7 +29,7 @@ track 0:
total output bytes = 2 total output bytes = 2
sample count = 1 sample count = 1
sample 0: sample 0:
time = 999977 time = 1000000
flags = 1 flags = 1
data = length 2, hash 116 data = length 2, hash 116
tracksEnded = true tracksEnded = true