From a80e7be0295a5b5b36431c658c17a1fd59569cc0 Mon Sep 17 00:00:00 2001 From: dancho Date: Wed, 5 Feb 2025 07:03:21 -0800 Subject: [PATCH] MCVR support skipping parts of AV1 input buffers AV1 input buffers contain multiple compressed pictures. Enable skipping only the last showable frame, while leaving any reference pictures to be decoded later, as part of the next decoder input buffer. Partial skipping of AV1 input buffer is only applied when: * fewer than 8 OBUs are delayed * there's likely to be enough capacity in the decoder input buffer for the next frame PiperOrigin-RevId: 723496060 --- .../mediacodec/MediaCodecRenderer.java | 31 ++++- .../video/Av1SampleDependencyParser.java | 11 +- .../video/MediaCodecVideoRenderer.java | 19 ++- .../video/Av1SampleDependencyParserTest.java | 21 +++ .../clippedMediaItem.dump | 128 ++++++++---------- 5 files changed, 132 insertions(+), 78 deletions(-) diff --git a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/mediacodec/MediaCodecRenderer.java b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/mediacodec/MediaCodecRenderer.java index 0dd61e9858..4de67f4944 100644 --- a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/mediacodec/MediaCodecRenderer.java +++ b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/mediacodec/MediaCodecRenderer.java @@ -1434,9 +1434,7 @@ public abstract class MediaCodecRenderer extends BaseRenderer { return true; } - if (shouldSkipDecoderInputBuffer(buffer)) { - buffer.clear(); - decoderCounters.skippedInputBufferCount += 1; + if (shouldDiscardDecoderInputBuffer(buffer)) { return true; } @@ -1753,6 +1751,33 @@ public abstract class MediaCodecRenderer extends BaseRenderer { return false; } + /** + * Returns whether the input buffer should be discarded before decoding. + * + *

Implement this method to to skip decoding of buffers that are not needed during a seek, or + * to drop input buffers that cannot be rendered on time. See {@link + * C#BUFFER_FLAG_NOT_DEPENDED_ON}. + * + *

Subclasses that implement this method are responsible for updating {@link #decoderCounters}. + * For codecs with out-of-order buffers, consecutive dropped input buffers may have to be counted + * after frame reordering. For example, in {@link #processOutputBuffer}. + * + *

Implementations of this method must update the {@linkplain DecoderInputBuffer#data decoder + * input buffer contents}. Data that is only used for output at the current {@link + * DecoderInputBuffer#timeUs} should be removed. Data that is referenced by later input buffers + * should remain in the current buffer. + * + * @param buffer The input buffer. + */ + protected boolean shouldDiscardDecoderInputBuffer(DecoderInputBuffer buffer) { + if (shouldSkipDecoderInputBuffer(buffer)) { + buffer.clear(); + decoderCounters.skippedInputBufferCount += 1; + return true; + } + return false; + } + /** * Returns the presentation time of the last buffer in the stream. * diff --git a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/video/Av1SampleDependencyParser.java b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/video/Av1SampleDependencyParser.java index 4f0df3f4de..1ffa32939f 100644 --- a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/video/Av1SampleDependencyParser.java +++ b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/video/Av1SampleDependencyParser.java @@ -30,6 +30,15 @@ import java.util.List; /** An AV1 bitstream parser that identifies frames that are not depended on. */ /* package */ final class Av1SampleDependencyParser { + /** + * When {@link #sampleLimitAfterSkippingNonReferenceFrame(ByteBuffer)} partially skips a temporal + * unit, the decoder input buffer is left with extra reference frames that need to be decoded. + * + *

The AV1 spec defines {@code NUM_REF_FRAMES = 8} - delaying more than 8 reference frames will + * overwrite the same output slots. + */ + private static final int MAX_OBU_COUNT_FOR_PARTIAL_SKIP = 8; + @Nullable private SequenceHeader sequenceHeader; /** @@ -59,7 +68,7 @@ import java.util.List; } last--; } - if (skippedFramesCount > 1) { + if (skippedFramesCount > 1 || last + 1 >= MAX_OBU_COUNT_FOR_PARTIAL_SKIP) { return sample.limit(); } if (last >= 0) { diff --git a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/video/MediaCodecVideoRenderer.java b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/video/MediaCodecVideoRenderer.java index fa160dbd12..da5496195a 100644 --- a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/video/MediaCodecVideoRenderer.java +++ b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/video/MediaCodecVideoRenderer.java @@ -1415,7 +1415,7 @@ public class MediaCodecVideoRenderer extends MediaCodecRenderer } @Override - protected boolean shouldSkipDecoderInputBuffer(DecoderInputBuffer buffer) { + protected boolean shouldDiscardDecoderInputBuffer(DecoderInputBuffer buffer) { if (isBufferProbablyLastSample(buffer)) { // Make sure to decode and render the last frame. return false; @@ -1429,7 +1429,12 @@ public class MediaCodecVideoRenderer extends MediaCodecRenderer if (!isBufferBeforeStartTime(buffer)) { return false; } + if (buffer.hasSupplementalData()) { + return false; + } if (buffer.notDependedOn()) { + buffer.clear(); + decoderCounters.skippedInputBufferCount += 1; return true; } if (av1SampleDependencyParser != null @@ -1439,8 +1444,16 @@ public class MediaCodecVideoRenderer extends MediaCodecRenderer readOnlySample.flip(); int sampleLimitAfterSkippingNonReferenceFrames = av1SampleDependencyParser.sampleLimitAfterSkippingNonReferenceFrame(readOnlySample); - // TODO: b/391108133 - support skipping parts of AV1 input buffers. - return sampleLimitAfterSkippingNonReferenceFrames == readOnlySample.position(); + boolean hasSpaceForNextFrame = + sampleLimitAfterSkippingNonReferenceFrames + checkNotNull(codecMaxValues).inputSize + < readOnlySample.capacity(); + if (sampleLimitAfterSkippingNonReferenceFrames != readOnlySample.limit() + && hasSpaceForNextFrame) { + checkNotNull(buffer.data).position(sampleLimitAfterSkippingNonReferenceFrames); + decoderCounters.skippedInputBufferCount += 1; + return true; + } + return false; } return false; } diff --git a/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/video/Av1SampleDependencyParserTest.java b/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/video/Av1SampleDependencyParserTest.java index 825b7dbd49..c928f99b58 100644 --- a/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/video/Av1SampleDependencyParserTest.java +++ b/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/video/Av1SampleDependencyParserTest.java @@ -157,4 +157,25 @@ public class Av1SampleDependencyParserTest { assertThat(sampleLimitAfterSkippingNonReferenceFrames) .isEqualTo(notDependedOnFrame.length + notDependedOnFrame.length); } + + @Test + public void sampleLimitAfterSkippingNonReferenceFrame_withEightDelayedObus_returnsFullSample() { + ByteBuffer sample = ByteBuffer.allocate(256); + sample.put(sequenceHeader); + sample.put(temporalDelimiter); + sample.put(dependedOnFrame); + sample.put(temporalDelimiter); + sample.put(dependedOnFrame); + sample.put(temporalDelimiter); + sample.put(padding); + sample.put(dependedOnFrame); + sample.put(notDependedOnFrame); + sample.flip(); + Av1SampleDependencyParser av1SampleDependencyParser = new Av1SampleDependencyParser(); + + int sampleLimitAfterSkippingNonReferenceFrames = + av1SampleDependencyParser.sampleLimitAfterSkippingNonReferenceFrame(sample); + + assertThat(sampleLimitAfterSkippingNonReferenceFrames).isEqualTo(sample.limit()); + } } diff --git a/libraries/test_data/src/test/assets/playbackdumps/av1SampleDependencies/clippedMediaItem.dump b/libraries/test_data/src/test/assets/playbackdumps/av1SampleDependencies/clippedMediaItem.dump index 4b5ed47d8a..c4b081cbea 100644 --- a/libraries/test_data/src/test/assets/playbackdumps/av1SampleDependencies/clippedMediaItem.dump +++ b/libraries/test_data/src/test/assets/playbackdumps/av1SampleDependencies/clippedMediaItem.dump @@ -1,212 +1,198 @@ MediaCodecAdapter (exotest.video.av1): inputBuffers: - count = 30 + count = 28 input buffer #0: timeUs = 1000000000000 contents = length 84, hash 9C46A819 input buffer #1: - timeUs = 1000000033333 - contents = length 158, hash 43A1B544 - input buffer #2: timeUs = 1000000066666 - contents = length 3, hash D600 - input buffer #3: + contents = length 133, hash 1F0DB1A3 + input buffer #2: timeUs = 1000000133333 contents = length 3, hash D5F0 - input buffer #4: - timeUs = 1000000166666 - contents = length 55, hash 9FC5012E - input buffer #5: + input buffer #3: timeUs = 1000000200000 - contents = length 3, hash D600 - input buffer #6: + contents = length 31, hash 4D3984BD + input buffer #4: timeUs = 1000000233333 contents = length 27, hash 70CFAC05 - input buffer #7: + input buffer #5: timeUs = 1000000266666 contents = length 3, hash D5D0 - input buffer #8: + input buffer #6: timeUs = 1000000300000 contents = length 82, hash 944218D6 - input buffer #9: + input buffer #7: timeUs = 1000000333333 contents = length 3, hash D600 - input buffer #10: + input buffer #8: timeUs = 1000000366666 contents = length 27, hash BA4D4A06 - input buffer #11: + input buffer #9: timeUs = 1000000400000 contents = length 3, hash D5F0 - input buffer #12: + input buffer #10: timeUs = 1000000433333 contents = length 54, hash A98584CA - input buffer #13: + input buffer #11: timeUs = 1000000466666 contents = length 3, hash D600 - input buffer #14: + input buffer #12: timeUs = 1000000500000 contents = length 27, hash 45D733B8 - input buffer #15: + input buffer #13: timeUs = 1000000533333 contents = length 3, hash D5A0 - input buffer #16: + input buffer #14: timeUs = 1000000566666 contents = length 112, hash B80B26FD - input buffer #17: + input buffer #15: timeUs = 1000000600000 contents = length 3, hash D5F0 - input buffer #18: + input buffer #16: timeUs = 1000000633333 contents = length 27, hash 37DD29D9 - input buffer #19: + input buffer #17: timeUs = 1000000666666 contents = length 3, hash D5E0 - input buffer #20: + input buffer #18: timeUs = 1000000700000 contents = length 54, hash 1C15581C - input buffer #21: + input buffer #19: timeUs = 1000000733333 contents = length 3, hash D5F0 - input buffer #22: + input buffer #20: timeUs = 1000000766666 contents = length 27, hash 49EC3531 - input buffer #23: + input buffer #21: timeUs = 1000000800000 contents = length 3, hash D5B0 - input buffer #24: + input buffer #22: timeUs = 1000000833333 contents = length 84, hash 2025C9F5 - input buffer #25: + input buffer #23: timeUs = 1000000866666 contents = length 3, hash D5D0 - input buffer #26: + input buffer #24: timeUs = 1000000900000 contents = length 27, hash B927669C - input buffer #27: + input buffer #25: timeUs = 1000000933333 contents = length 3, hash D5C0 - input buffer #28: + input buffer #26: timeUs = 1000000966666 contents = length 27, hash 706C58AD - input buffer #29: + input buffer #27: timeUs = 0 flags = 4 contents = length 0, hash 1 outputBuffers: - count = 29 + count = 27 output buffer #0: timeUs = 1000000000000 size = 84 rendered = false output buffer #1: - timeUs = 1000000033333 - size = 158 + timeUs = 1000000066666 + size = 133 rendered = false output buffer #2: - timeUs = 1000000066666 - size = 3 - rendered = false - output buffer #3: timeUs = 1000000133333 size = 3 rendered = false - output buffer #4: - timeUs = 1000000166666 - size = 55 - rendered = false - output buffer #5: + output buffer #3: timeUs = 1000000200000 - size = 3 + size = 31 rendered = true - output buffer #6: + output buffer #4: timeUs = 1000000233333 size = 27 rendered = true - output buffer #7: + output buffer #5: timeUs = 1000000266666 size = 3 rendered = true - output buffer #8: + output buffer #6: timeUs = 1000000300000 size = 82 rendered = true - output buffer #9: + output buffer #7: timeUs = 1000000333333 size = 3 rendered = true - output buffer #10: + output buffer #8: timeUs = 1000000366666 size = 27 rendered = true - output buffer #11: + output buffer #9: timeUs = 1000000400000 size = 3 rendered = true - output buffer #12: + output buffer #10: timeUs = 1000000433333 size = 54 rendered = true - output buffer #13: + output buffer #11: timeUs = 1000000466666 size = 3 rendered = true - output buffer #14: + output buffer #12: timeUs = 1000000500000 size = 27 rendered = true - output buffer #15: + output buffer #13: timeUs = 1000000533333 size = 3 rendered = true - output buffer #16: + output buffer #14: timeUs = 1000000566666 size = 112 rendered = true - output buffer #17: + output buffer #15: timeUs = 1000000600000 size = 3 rendered = true - output buffer #18: + output buffer #16: timeUs = 1000000633333 size = 27 rendered = true - output buffer #19: + output buffer #17: timeUs = 1000000666666 size = 3 rendered = true - output buffer #20: + output buffer #18: timeUs = 1000000700000 size = 54 rendered = true - output buffer #21: + output buffer #19: timeUs = 1000000733333 size = 3 rendered = true - output buffer #22: + output buffer #20: timeUs = 1000000766666 size = 27 rendered = true - output buffer #23: + output buffer #21: timeUs = 1000000800000 size = 3 rendered = true - output buffer #24: + output buffer #22: timeUs = 1000000833333 size = 84 rendered = true - output buffer #25: + output buffer #23: timeUs = 1000000866666 size = 3 rendered = true - output buffer #26: + output buffer #24: timeUs = 1000000900000 size = 27 rendered = true - output buffer #27: + output buffer #25: timeUs = 1000000933333 size = 3 rendered = true - output buffer #28: + output buffer #26: timeUs = 1000000966666 size = 27 rendered = true