From 7bd912f895d5c9a815b03d6a960e350eb0d39249 Mon Sep 17 00:00:00 2001 From: olly Date: Mon, 16 Dec 2019 13:05:09 +0000 Subject: [PATCH] Clean up WavExtractor PCM output - Make extractor output samples that are uniformly distributed with respect to time, with a target of ~10 samples per second. The old approach could in theory put every frame into its own sample, which would be very inefficient downstream because we'd need to pass them individually to MediaCodec. It could also put data corresponding to a long duration of time into a single sample (e.g. if the sample rate of the content is low), which is bad downstream because we decide whether to set the decodeOnly flag on a per sample basis. More generally, the new approach is more predictable :). - Stop using the WavSeekMap to get sample timestamps, and instead calculate them directly from the number of frames output. It's more obviously correct, particularly for data formats like IMA ADPCM where we'll need to adjust the data prior to output. PiperOrigin-RevId: 285750010 --- .../extractor/wav/WavExtractor.java | 87 ++++++++++++------- .../exoplayer2/extractor/wav/WavHeader.java | 12 +-- .../extractor/wav/WavHeaderReader.java | 8 +- .../exoplayer2/extractor/wav/WavSeekMap.java | 55 +++++------- .../src/test/assets/wav/sample.wav.0.dump | 42 +++++++-- .../src/test/assets/wav/sample.wav.1.dump | 32 +++++-- .../src/test/assets/wav/sample.wav.2.dump | 20 ++++- .../src/test/assets/wav/sample.wav.3.dump | 4 +- 8 files changed, 168 insertions(+), 92 deletions(-) diff --git a/library/core/src/main/java/com/google/android/exoplayer2/extractor/wav/WavExtractor.java b/library/core/src/main/java/com/google/android/exoplayer2/extractor/wav/WavExtractor.java index 25be6c32b9..37edb07a1a 100644 --- a/library/core/src/main/java/com/google/android/exoplayer2/extractor/wav/WavExtractor.java +++ b/library/core/src/main/java/com/google/android/exoplayer2/extractor/wav/WavExtractor.java @@ -28,6 +28,7 @@ import com.google.android.exoplayer2.extractor.PositionHolder; import com.google.android.exoplayer2.extractor.TrackOutput; import com.google.android.exoplayer2.util.Assertions; import com.google.android.exoplayer2.util.MimeTypes; +import com.google.android.exoplayer2.util.Util; import java.io.IOException; /** @@ -35,8 +36,12 @@ import java.io.IOException; */ public final class WavExtractor implements Extractor { - /** Arbitrary maximum sample size of 32KB, which is ~170ms of 16-bit stereo PCM audio at 48KHz. */ - private static final int MAX_SAMPLE_SIZE = 32 * 1024; + /** + * When outputting PCM data to a {@link TrackOutput}, we can choose how many frames are grouped + * into each sample, and hence each sample's duration. This is the target number of samples to + * output for each second of media, meaning that each sample will have a duration of ~100ms. + */ + private static final int TARGET_SAMPLES_PER_SECOND = 10; /** Factory for {@link WavExtractor} instances. */ public static final ExtractorsFactory FACTORY = () -> new Extractor[] {new WavExtractor()}; @@ -67,7 +72,7 @@ public final class WavExtractor implements Extractor { @Override public void seek(long position, long timeUs) { if (outputWriter != null) { - outputWriter.reset(); + outputWriter.reset(timeUs); } } @@ -105,18 +110,18 @@ public final class WavExtractor implements Extractor { Assertions.checkState(dataEndPosition != C.POSITION_UNSET); long bytesLeft = dataEndPosition - input.getPosition(); - if (bytesLeft <= 0) { - return Extractor.RESULT_END_OF_INPUT; - } - - return outputWriter.sampleData(input, bytesLeft) ? RESULT_CONTINUE : RESULT_END_OF_INPUT; + return outputWriter.sampleData(input, bytesLeft) ? RESULT_END_OF_INPUT : RESULT_CONTINUE; } /** Writes to the extractor's output. */ private interface OutputWriter { - /** Resets the writer. */ - void reset(); + /** + * Resets the writer. + * + * @param timeUs The new start position in microseconds. + */ + void reset(long timeUs); /** * Initializes the writer. @@ -137,7 +142,7 @@ public final class WavExtractor implements Extractor { * * @param input The input from which to read. * @param bytesLeft The number of sample data bytes left to be read from the input. - * @return True if data was consumed. False if the end of the stream has been reached. + * @return Whether the end of the sample data has been reached. * @throws IOException If an error occurs reading from the input. * @throws InterruptedException If the thread has been interrupted. */ @@ -151,8 +156,10 @@ public final class WavExtractor implements Extractor { private final TrackOutput trackOutput; private final WavHeader header; private final @C.PcmEncoding int pcmEncoding; + private final int targetSampleSize; - private WavSeekMap seekMap; + private long startTimeUs; + private long outputFrameCount; private int pendingBytes; public PcmOutputWriter( @@ -164,26 +171,31 @@ public final class WavExtractor implements Extractor { this.trackOutput = trackOutput; this.header = header; this.pcmEncoding = pcmEncoding; + // For PCM blocks correspond to single frames. This is validated in init(int, long). + int bytesPerFrame = header.blockSize; + targetSampleSize = + Math.max(bytesPerFrame, header.frameRateHz * bytesPerFrame / TARGET_SAMPLES_PER_SECOND); } @Override - public void reset() { + public void reset(long timeUs) { + startTimeUs = timeUs; + outputFrameCount = 0; pendingBytes = 0; } @Override public void init(int dataStartPosition, long dataEndPosition) throws ParserException { // Validate the header. - int expectedBytesPerFrame = header.numChannels * header.bitsPerSample / 8; - if (header.blockAlign != expectedBytesPerFrame) { + int bytesPerFrame = header.numChannels * header.bitsPerSample / 8; + if (header.blockSize != bytesPerFrame) { throw new ParserException( - "Expected block alignment: " + expectedBytesPerFrame + "; got: " + header.blockAlign); + "Expected block size: " + bytesPerFrame + "; got: " + header.blockSize); } // Output the seek map. - seekMap = - new WavSeekMap(header, /* samplesPerBlock= */ 1, dataStartPosition, dataEndPosition); - extractorOutput.seekMap(seekMap); + extractorOutput.seekMap( + new WavSeekMap(header, /* framesPerBlock= */ 1, dataStartPosition, dataEndPosition)); // Output the format. Format format = @@ -192,9 +204,9 @@ public final class WavExtractor implements Extractor { MimeTypes.AUDIO_RAW, /* codecs= */ null, /* bitrate= */ header.averageBytesPerSecond * 8, - MAX_SAMPLE_SIZE, + targetSampleSize, header.numChannels, - header.sampleRateHz, + header.frameRateHz, pcmEncoding, /* initializationData= */ null, /* drmInitData= */ null, @@ -206,25 +218,36 @@ public final class WavExtractor implements Extractor { @Override public boolean sampleData(ExtractorInput input, long bytesLeft) throws IOException, InterruptedException { - int maxBytesToRead = (int) Math.min(MAX_SAMPLE_SIZE - pendingBytes, bytesLeft); - int numBytesAppended = trackOutput.sampleData(input, maxBytesToRead, true); - boolean wereBytesAppended = numBytesAppended != RESULT_END_OF_INPUT; - if (wereBytesAppended) { - pendingBytes += numBytesAppended; + // Write sample data until we've reached the target sample size, or the end of the data. + boolean endOfSampleData = bytesLeft == 0; + while (!endOfSampleData && pendingBytes < targetSampleSize) { + int bytesToRead = (int) Math.min(targetSampleSize - pendingBytes, bytesLeft); + int bytesAppended = trackOutput.sampleData(input, bytesToRead, true); + if (bytesAppended == RESULT_END_OF_INPUT) { + endOfSampleData = true; + } else { + pendingBytes += bytesAppended; + } } - // blockAlign is the frame size, and samples must consist of a whole number of frames. - int bytesPerFrame = header.blockAlign; + // Write the corresponding sample metadata. Samples must be a whole number of frames. It's + // possible pendingBytes is not a whole number of frames if the stream ended unexpectedly. + int bytesPerFrame = header.blockSize; int pendingFrames = pendingBytes / bytesPerFrame; if (pendingFrames > 0) { - long timeUs = seekMap.getTimeUs(input.getPosition() - pendingBytes); + long timeUs = + startTimeUs + + Util.scaleLargeTimestamp( + outputFrameCount, C.MICROS_PER_SECOND, header.frameRateHz); int size = pendingFrames * bytesPerFrame; - pendingBytes -= size; + int offset = pendingBytes - size; trackOutput.sampleMetadata( - timeUs, C.BUFFER_FLAG_KEY_FRAME, size, pendingBytes, /* encryptionData= */ null); + timeUs, C.BUFFER_FLAG_KEY_FRAME, size, offset, /* encryptionData= */ null); + outputFrameCount += pendingFrames; + pendingBytes = offset; } - return wereBytesAppended; + return endOfSampleData; } } } diff --git a/library/core/src/main/java/com/google/android/exoplayer2/extractor/wav/WavHeader.java b/library/core/src/main/java/com/google/android/exoplayer2/extractor/wav/WavHeader.java index 88db3c23f2..ca34e32cc0 100644 --- a/library/core/src/main/java/com/google/android/exoplayer2/extractor/wav/WavHeader.java +++ b/library/core/src/main/java/com/google/android/exoplayer2/extractor/wav/WavHeader.java @@ -26,11 +26,11 @@ package com.google.android.exoplayer2.extractor.wav; /** The number of channels. */ public final int numChannels; /** The sample rate in Hertz. */ - public final int sampleRateHz; + public final int frameRateHz; /** The average bytes per second for the sample data. */ public final int averageBytesPerSecond; /** The block size in bytes. */ - public final int blockAlign; + public final int blockSize; /** Bits per sample for a single channel. */ public final int bitsPerSample; /** Extra data appended to the format chunk of the header. */ @@ -39,16 +39,16 @@ package com.google.android.exoplayer2.extractor.wav; public WavHeader( int formatType, int numChannels, - int sampleRateHz, + int frameRateHz, int averageBytesPerSecond, - int bytesPerFrame, + int blockSize, int bitsPerSample, byte[] extraData) { this.formatType = formatType; this.numChannels = numChannels; - this.sampleRateHz = sampleRateHz; + this.frameRateHz = frameRateHz; this.averageBytesPerSecond = averageBytesPerSecond; - this.blockAlign = bytesPerFrame; + this.blockSize = blockSize; this.bitsPerSample = bitsPerSample; this.extraData = extraData; } diff --git a/library/core/src/main/java/com/google/android/exoplayer2/extractor/wav/WavHeaderReader.java b/library/core/src/main/java/com/google/android/exoplayer2/extractor/wav/WavHeaderReader.java index 1d28b12a85..b2cdda7f9d 100644 --- a/library/core/src/main/java/com/google/android/exoplayer2/extractor/wav/WavHeaderReader.java +++ b/library/core/src/main/java/com/google/android/exoplayer2/extractor/wav/WavHeaderReader.java @@ -75,9 +75,9 @@ import java.io.IOException; scratch.setPosition(0); int audioFormatType = scratch.readLittleEndianUnsignedShort(); int numChannels = scratch.readLittleEndianUnsignedShort(); - int sampleRateHz = scratch.readLittleEndianUnsignedIntToInt(); + int frameRateHz = scratch.readLittleEndianUnsignedIntToInt(); int averageBytesPerSecond = scratch.readLittleEndianUnsignedIntToInt(); - int blockAlignment = scratch.readLittleEndianUnsignedShort(); + int blockSize = scratch.readLittleEndianUnsignedShort(); int bitsPerSample = scratch.readLittleEndianUnsignedShort(); int bytesLeft = (int) chunkHeader.size - 16; @@ -92,9 +92,9 @@ import java.io.IOException; return new WavHeader( audioFormatType, numChannels, - sampleRateHz, + frameRateHz, averageBytesPerSecond, - blockAlignment, + blockSize, bitsPerSample, extraData); } diff --git a/library/core/src/main/java/com/google/android/exoplayer2/extractor/wav/WavSeekMap.java b/library/core/src/main/java/com/google/android/exoplayer2/extractor/wav/WavSeekMap.java index 4c35b4e2e9..53e0f45306 100644 --- a/library/core/src/main/java/com/google/android/exoplayer2/extractor/wav/WavSeekMap.java +++ b/library/core/src/main/java/com/google/android/exoplayer2/extractor/wav/WavSeekMap.java @@ -22,21 +22,19 @@ import com.google.android.exoplayer2.util.Util; /* package */ final class WavSeekMap implements SeekMap { - /** The WAV header for the stream. */ private final WavHeader wavHeader; - /** Number of samples in each block. */ - private final int samplesPerBlock; - /** Position of the start of the sample data, in bytes. */ - private final long dataStartPosition; - /** Position of the end of the sample data (exclusive), in bytes. */ - private final long dataEndPosition; + private final int framesPerBlock; + private final long firstBlockPosition; + private final long blockCount; + private final long durationUs; public WavSeekMap( - WavHeader wavHeader, int samplesPerBlock, long dataStartPosition, long dataEndPosition) { + WavHeader wavHeader, int framesPerBlock, long dataStartPosition, long dataEndPosition) { this.wavHeader = wavHeader; - this.samplesPerBlock = samplesPerBlock; - this.dataStartPosition = dataStartPosition; - this.dataEndPosition = dataEndPosition; + this.framesPerBlock = framesPerBlock; + this.firstBlockPosition = dataStartPosition; + this.blockCount = (dataEndPosition - dataStartPosition) / wavHeader.blockSize; + durationUs = blockIndexToTimeUs(blockCount); } @Override @@ -46,38 +44,33 @@ import com.google.android.exoplayer2.util.Util; @Override public long getDurationUs() { - long numBlocks = (dataEndPosition - dataStartPosition) / wavHeader.blockAlign; - return numBlocks * samplesPerBlock * C.MICROS_PER_SECOND / wavHeader.sampleRateHz; + return durationUs; } @Override public SeekPoints getSeekPoints(long timeUs) { - long blockAlign = wavHeader.blockAlign; - long dataSize = dataEndPosition - dataStartPosition; + // Calculate the expected number of bytes of sample data corresponding to the requested time. long positionOffset = (timeUs * wavHeader.averageBytesPerSecond) / C.MICROS_PER_SECOND; - // Constrain to nearest preceding frame offset. - positionOffset = (positionOffset / blockAlign) * blockAlign; - positionOffset = Util.constrainValue(positionOffset, 0, dataSize - blockAlign); - long seekPosition = dataStartPosition + positionOffset; - long seekTimeUs = getTimeUs(seekPosition); + // Calculate the containing block index, constraining to valid indices. + long blockSize = wavHeader.blockSize; + long blockIndex = Util.constrainValue(positionOffset / blockSize, 0, blockCount - 1); + + long seekPosition = firstBlockPosition + (blockIndex * blockSize); + long seekTimeUs = blockIndexToTimeUs(blockIndex); SeekPoint seekPoint = new SeekPoint(seekTimeUs, seekPosition); - if (seekTimeUs >= timeUs || positionOffset == dataSize - blockAlign) { + if (seekTimeUs >= timeUs || blockIndex == blockCount - 1) { return new SeekPoints(seekPoint); } else { - long secondSeekPosition = seekPosition + blockAlign; - long secondSeekTimeUs = getTimeUs(secondSeekPosition); + long secondBlockIndex = blockIndex + 1; + long secondSeekPosition = firstBlockPosition + (secondBlockIndex * blockSize); + long secondSeekTimeUs = blockIndexToTimeUs(secondBlockIndex); SeekPoint secondSeekPoint = new SeekPoint(secondSeekTimeUs, secondSeekPosition); return new SeekPoints(seekPoint, secondSeekPoint); } } - /** - * Returns the time in microseconds for the given position in bytes. - * - * @param position The position in bytes. - */ - public long getTimeUs(long position) { - long positionOffset = Math.max(0, position - dataStartPosition); - return (positionOffset * C.MICROS_PER_SECOND) / wavHeader.averageBytesPerSecond; + private long blockIndexToTimeUs(long blockIndex) { + return Util.scaleLargeTimestamp( + blockIndex * framesPerBlock, C.MICROS_PER_SECOND, wavHeader.frameRateHz); } } diff --git a/library/core/src/test/assets/wav/sample.wav.0.dump b/library/core/src/test/assets/wav/sample.wav.0.dump index fc3ded6ff8..50daee00ce 100644 --- a/library/core/src/test/assets/wav/sample.wav.0.dump +++ b/library/core/src/test/assets/wav/sample.wav.0.dump @@ -9,7 +9,7 @@ track 0: id = null containerMimeType = null sampleMimeType = audio/raw - maxInputSize = 32768 + maxInputSize = 8820 width = -1 height = -1 frameRate = -1.0 @@ -27,17 +27,45 @@ track 0: metadata = null initializationData: total output bytes = 88200 - sample count = 3 + sample count = 10 sample 0: time = 0 flags = 1 - data = length 32768, hash 9A8CEEBA + data = length 8820, hash FAE27E28 sample 1: - time = 371519 + time = 100000 flags = 1 - data = length 32768, hash C1717317 + data = length 8820, hash 21C3E9C3 sample 2: - time = 743038 + time = 200000 flags = 1 - data = length 22664, hash 819F5F62 + data = length 8820, hash B51AD902 + sample 3: + time = 300000 + flags = 1 + data = length 8820, hash 2F4B2CB4 + sample 4: + time = 400000 + flags = 1 + data = length 8820, hash F0030CC2 + sample 5: + time = 500000 + flags = 1 + data = length 8820, hash FF83DA46 + sample 6: + time = 600000 + flags = 1 + data = length 8820, hash 685C1AB5 + sample 7: + time = 700000 + flags = 1 + data = length 8820, hash BE63D51C + sample 8: + time = 800000 + flags = 1 + data = length 8820, hash 1E44EB8E + sample 9: + time = 900000 + flags = 1 + data = length 8820, hash 57C41232 tracksEnded = true diff --git a/library/core/src/test/assets/wav/sample.wav.1.dump b/library/core/src/test/assets/wav/sample.wav.1.dump index f6c120bde5..80fa9c02af 100644 --- a/library/core/src/test/assets/wav/sample.wav.1.dump +++ b/library/core/src/test/assets/wav/sample.wav.1.dump @@ -9,7 +9,7 @@ track 0: id = null containerMimeType = null sampleMimeType = audio/raw - maxInputSize = 32768 + maxInputSize = 8820 width = -1 height = -1 frameRate = -1.0 @@ -27,13 +27,33 @@ track 0: metadata = null initializationData: total output bytes = 58802 - sample count = 2 + sample count = 7 sample 0: - time = 333310 + time = 333333 flags = 1 - data = length 32768, hash 42D6E860 + data = length 8820, hash 31868A21 sample 1: - time = 704829 + time = 433333 flags = 1 - data = length 26034, hash 62692C38 + data = length 8820, hash AE3D77A2 + sample 2: + time = 533333 + flags = 1 + data = length 8820, hash 966140CE + sample 3: + time = 633333 + flags = 1 + data = length 8820, hash CB405D7B + sample 4: + time = 733333 + flags = 1 + data = length 8820, hash 733BA3E6 + sample 5: + time = 833333 + flags = 1 + data = length 8820, hash 7595D752 + sample 6: + time = 933333 + flags = 1 + data = length 5882, hash C617B719 tracksEnded = true diff --git a/library/core/src/test/assets/wav/sample.wav.2.dump b/library/core/src/test/assets/wav/sample.wav.2.dump index bfe175a657..e848e81cbc 100644 --- a/library/core/src/test/assets/wav/sample.wav.2.dump +++ b/library/core/src/test/assets/wav/sample.wav.2.dump @@ -9,7 +9,7 @@ track 0: id = null containerMimeType = null sampleMimeType = audio/raw - maxInputSize = 32768 + maxInputSize = 8820 width = -1 height = -1 frameRate = -1.0 @@ -27,9 +27,21 @@ track 0: metadata = null initializationData: total output bytes = 29402 - sample count = 1 + sample count = 4 sample 0: - time = 666643 + time = 666666 flags = 1 - data = length 29402, hash 4241604E + data = length 8820, hash D6617E20 + sample 1: + time = 766666 + flags = 1 + data = length 8820, hash 28C74B7A + sample 2: + time = 866666 + flags = 1 + data = length 8820, hash 680DEFC7 + sample 3: + time = 966666 + flags = 1 + data = length 2942, hash 1D063CF0 tracksEnded = true diff --git a/library/core/src/test/assets/wav/sample.wav.3.dump b/library/core/src/test/assets/wav/sample.wav.3.dump index 160a5efdd7..3216b1e11a 100644 --- a/library/core/src/test/assets/wav/sample.wav.3.dump +++ b/library/core/src/test/assets/wav/sample.wav.3.dump @@ -9,7 +9,7 @@ track 0: id = null containerMimeType = null sampleMimeType = audio/raw - maxInputSize = 32768 + maxInputSize = 8820 width = -1 height = -1 frameRate = -1.0 @@ -29,7 +29,7 @@ track 0: total output bytes = 2 sample count = 1 sample 0: - time = 999977 + time = 1000000 flags = 1 data = length 2, hash 116 tracksEnded = true