From 06f340005fa7852d0c86723042d1783b32f47ed9 Mon Sep 17 00:00:00 2001 From: sheenachhabra Date: Tue, 4 Mar 2025 12:21:49 -0800 Subject: [PATCH] Add support for video gaps via `addGap()` API The gap item earlier meant audio gap. Now the gap item will be filled with audio, video or both based upon what all tracks are present in the Sequence. If the very first item is a gap then it is filled with audio only. Support for video gap at start will be added in a follow up CL. If the first item is a gap then `forceAudioTrack` is set to true. PiperOrigin-RevId: 733422557 --- .../transformer/TransformerVideoGapsTest.java | 209 ++++++++++++++++++ .../transformer/EditedMediaItemSequence.java | 2 +- .../transformer/SequenceAssetLoader.java | 91 ++++++-- .../transformer/TransformerInternal.java | 4 - .../VideoFrameProcessingWrapper.java | 2 - .../transformer/SequenceExportTest.java | 18 -- 6 files changed, 278 insertions(+), 48 deletions(-) diff --git a/libraries/transformer/src/androidTest/java/androidx/media3/transformer/TransformerVideoGapsTest.java b/libraries/transformer/src/androidTest/java/androidx/media3/transformer/TransformerVideoGapsTest.java index 0339ed83f3..a2bf707fdb 100644 --- a/libraries/transformer/src/androidTest/java/androidx/media3/transformer/TransformerVideoGapsTest.java +++ b/libraries/transformer/src/androidTest/java/androidx/media3/transformer/TransformerVideoGapsTest.java @@ -46,6 +46,8 @@ public class TransformerVideoGapsTest { new EditedMediaItem.Builder(MediaItem.fromUri(MP4_ASSET.uri)).build(); private static final EditedMediaItem AUDIO_ONLY_MEDIA_ITEM = AUDIO_VIDEO_MEDIA_ITEM.buildUpon().setRemoveVideo(true).build(); + private static final EditedMediaItem VIDEO_ONLY_MEDIA_ITEM = + AUDIO_VIDEO_MEDIA_ITEM.buildUpon().setRemoveAudio(true).build(); private final Context context = ApplicationProvider.getApplicationContext(); @Rule public final TestName testName = new TestName(); @@ -131,4 +133,211 @@ public class TransformerVideoGapsTest { assertThat(videoTrackOutput.getSampleCount()) .isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames); } + + // TODO: b/391111085 - Change test when gaps at the start of the sequence are supported. + @Test + public void export_withTwoVideoOnlyMediaItemsAndGapAtStart_throws() { + Transformer transformer = new Transformer.Builder(context).build(); + Composition composition = + new Composition.Builder( + new EditedMediaItemSequence.Builder() + .addGap(/* durationUs= */ 1_000_000) + .addItem(VIDEO_ONLY_MEDIA_ITEM) + .addItem(VIDEO_ONLY_MEDIA_ITEM) + .build()) + .build(); + TransformerAndroidTestRunner transformerAndroidTestRunner = + new TransformerAndroidTestRunner.Builder(context, transformer).build(); + + assertThrows( + ExportException.class, () -> transformerAndroidTestRunner.run(testId, composition)); + } + + @Test + public void export_withTwoVideoOnlyMediaItemsAndGapInMiddle_insertsBlankFramesForGap() + throws Exception { + assumeFormatsSupported( + context, + testId, + /* inputFormat= */ MP4_ASSET.videoFormat, + /* outputFormat= */ MP4_ASSET.videoFormat); + Transformer transformer = new Transformer.Builder(context).build(); + Composition composition = + new Composition.Builder( + new EditedMediaItemSequence.Builder() + .addItem(VIDEO_ONLY_MEDIA_ITEM) + .addGap(/* durationUs= */ 1_000_000) + .addItem(VIDEO_ONLY_MEDIA_ITEM) + .build()) + .build(); + + ExportTestResult result = + new TransformerAndroidTestRunner.Builder(context, transformer) + .build() + .run(testId, composition); + + FakeExtractorOutput fakeExtractorOutput = + TestUtil.extractAllSamplesFromFilePath( + new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath); + FakeTrackOutput videoTrackOutput = getVideoTrackOutput(fakeExtractorOutput); + // The gap is for 1 sec with 30 fps. + int expectedBlankFrames = 30; + assertThat(videoTrackOutput.getSampleCount()) + .isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames); + } + + @Test + public void export_withTwoVideoOnlyMediaItemsAndGapAtTheEnd_insertsBlankFramesForGap() + throws Exception { + assumeFormatsSupported( + context, + testId, + /* inputFormat= */ MP4_ASSET.videoFormat, + /* outputFormat= */ MP4_ASSET.videoFormat); + Transformer transformer = new Transformer.Builder(context).build(); + Composition composition = + new Composition.Builder( + new EditedMediaItemSequence.Builder() + .addItem(VIDEO_ONLY_MEDIA_ITEM) + .addItem(VIDEO_ONLY_MEDIA_ITEM) + .addGap(/* durationUs= */ 1_000_000) + .build()) + .build(); + + ExportTestResult result = + new TransformerAndroidTestRunner.Builder(context, transformer) + .build() + .run(testId, composition); + + FakeExtractorOutput fakeExtractorOutput = + TestUtil.extractAllSamplesFromFilePath( + new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath); + FakeTrackOutput videoTrackOutput = getVideoTrackOutput(fakeExtractorOutput); + // The gap is for 1 sec with 30 fps. + int expectedBlankFrames = 30; + assertThat(videoTrackOutput.getSampleCount()) + .isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames); + } + + // TODO: b/391111085 - Change test when gaps at the start of the sequence are supported. + @Test + public void export_withTwoMediaItemsAndGapAtStart_throws() { + Transformer transformer = new Transformer.Builder(context).build(); + Composition composition = + new Composition.Builder( + new EditedMediaItemSequence.Builder() + .addGap(/* durationUs= */ 1_000_000) + .addItem(AUDIO_VIDEO_MEDIA_ITEM) + .addItem(AUDIO_VIDEO_MEDIA_ITEM) + .build()) + .build(); + TransformerAndroidTestRunner transformerAndroidTestRunner = + new TransformerAndroidTestRunner.Builder(context, transformer).build(); + + assertThrows( + ExportException.class, () -> transformerAndroidTestRunner.run(testId, composition)); + } + + @Test + public void export_withTwoMediaItemsAndGapInMiddle_insertsBlankFramesForGap() throws Exception { + assumeFormatsSupported( + context, + testId, + /* inputFormat= */ MP4_ASSET.videoFormat, + /* outputFormat= */ MP4_ASSET.videoFormat); + Transformer transformer = new Transformer.Builder(context).build(); + Composition composition = + new Composition.Builder( + new EditedMediaItemSequence.Builder() + .addItem(AUDIO_VIDEO_MEDIA_ITEM) + .addGap(/* durationUs= */ 1_000_000) + .addItem(AUDIO_VIDEO_MEDIA_ITEM) + .build()) + .build(); + + ExportTestResult result = + new TransformerAndroidTestRunner.Builder(context, transformer) + .build() + .run(testId, composition); + + FakeExtractorOutput fakeExtractorOutput = + TestUtil.extractAllSamplesFromFilePath( + new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath); + FakeTrackOutput videoTrackOutput = getVideoTrackOutput(fakeExtractorOutput); + // The gap is for 1 sec with 30 fps. + int expectedBlankFrames = 30; + assertThat(videoTrackOutput.getSampleCount()) + .isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames); + } + + @Test + public void export_withTwoMediaItemsAndGapAtTheEnd_insertsBlankFramesForGap() throws Exception { + assumeFormatsSupported( + context, + testId, + /* inputFormat= */ MP4_ASSET.videoFormat, + /* outputFormat= */ MP4_ASSET.videoFormat); + Transformer transformer = new Transformer.Builder(context).build(); + Composition composition = + new Composition.Builder( + new EditedMediaItemSequence.Builder() + .addItem(AUDIO_VIDEO_MEDIA_ITEM) + .addItem(AUDIO_VIDEO_MEDIA_ITEM) + .addGap(/* durationUs= */ 1_000_000) + .build()) + .build(); + + ExportTestResult result = + new TransformerAndroidTestRunner.Builder(context, transformer) + .build() + .run(testId, composition); + + FakeExtractorOutput fakeExtractorOutput = + TestUtil.extractAllSamplesFromFilePath( + new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath); + FakeTrackOutput videoTrackOutput = getVideoTrackOutput(fakeExtractorOutput); + // The gap is for 1 sec with 30 fps. + int expectedBlankFrames = 30; + assertThat(videoTrackOutput.getSampleCount()) + .isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames); + } + + @Test + public void export_withMixOfAudioVideoAndGap_insertsBlankFramesAsExpected() throws Exception { + assumeFormatsSupported( + context, + testId, + /* inputFormat= */ MP4_ASSET.videoFormat, + /* outputFormat= */ MP4_ASSET.videoFormat); + Transformer transformer = new Transformer.Builder(context).build(); + Composition composition = + new Composition.Builder( + new EditedMediaItemSequence.Builder() + .addItem(AUDIO_VIDEO_MEDIA_ITEM) + .addItem(AUDIO_ONLY_MEDIA_ITEM) + .addItem(VIDEO_ONLY_MEDIA_ITEM) + .addGap(/* durationUs= */ 1_000_000) + .build()) + .build(); + + ExportTestResult result = + new TransformerAndroidTestRunner.Builder(context, transformer) + .build() + .run(testId, composition); + + FakeExtractorOutput fakeExtractorOutput = + TestUtil.extractAllSamplesFromFilePath( + new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath); + FakeTrackOutput videoTrackOutput = getVideoTrackOutput(fakeExtractorOutput); + // The gap is for 1024ms with 30 fps. + int expectedBlankFramesForAudioOnlyItem = 31; + // The gap is for 1 sec with 30 fps. + int expectedBlankFramesForOneSecGap = 30; + assertThat(videoTrackOutput.getSampleCount()) + .isEqualTo( + MP4_ASSET.videoFrameCount + + expectedBlankFramesForAudioOnlyItem + + MP4_ASSET.videoFrameCount + + expectedBlankFramesForOneSecGap); + } } diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/EditedMediaItemSequence.java b/libraries/transformer/src/main/java/androidx/media3/transformer/EditedMediaItemSequence.java index b36fdd6721..5f7cdf825a 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/EditedMediaItemSequence.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/EditedMediaItemSequence.java @@ -87,7 +87,7 @@ public final class EditedMediaItemSequence { * *

A gap is a period of time with no media. * - *

Gaps are only supported in sequences of audio. + *

Gaps at the start of the sequence are not supported if the sequence has video. * * @param durationUs The duration of the gap, in milliseconds. * @return This builder, for convenience. diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/SequenceAssetLoader.java b/libraries/transformer/src/main/java/androidx/media3/transformer/SequenceAssetLoader.java index 1be29b0a11..b1ee871010 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/SequenceAssetLoader.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/SequenceAssetLoader.java @@ -125,6 +125,8 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; private volatile long currentAssetDurationAfterEffectsAppliedUs; private volatile long maxSequenceDurationUs; private volatile boolean isMaxSequenceDurationUsFinal; + private volatile boolean sequenceHasAudio; + private volatile boolean sequenceHasVideo; public SequenceAssetLoader( EditedMediaItemSequence sequence, @@ -136,7 +138,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; Looper looper) { editedMediaItems = sequence.editedMediaItems; isLooping = sequence.isLooping; - this.forceAudioTrack = forceAudioTrack; + this.forceAudioTrack = forceAudioTrack || sequence.editedMediaItems.get(0).isGap(); this.assetLoaderFactory = new GapInterceptingAssetLoaderFactory(assetLoaderFactory); this.compositionSettings = compositionSettings; sequenceAssetLoaderListener = listener; @@ -309,6 +311,11 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; SampleConsumerWrapper sampleConsumer; if (isCurrentAssetFirstAsset) { + if (trackType == C.TRACK_TYPE_VIDEO) { + sequenceHasVideo = true; + } else { + sequenceHasAudio = true; + } @Nullable SampleConsumer wrappedSampleConsumer = sequenceAssetLoaderListener.onOutputFormat(format); if (wrappedSampleConsumer == null) { @@ -394,7 +401,9 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; /* durationUs= */ (trackType == C.TRACK_TYPE_AUDIO && isLooping && decodeAudio) ? C.TIME_UNSET : currentAssetDurationUs, - /* decodedFormat= */ editedMediaItem.isGap() ? null : outputFormat, + /* decodedFormat= */ (editedMediaItem.isGap() && trackType == C.TRACK_TYPE_AUDIO) + ? null + : outputFormat, /* isLast= */ isLastMediaItemInSequence()); } @@ -596,9 +605,9 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; } } - private void onGapSignalled() { - nonEndedTrackCount.decrementAndGet(); - if (!isLastMediaItemInSequence()) { + private void onAudioGapSignalled() { + int nonEndedTracks = nonEndedTrackCount.decrementAndGet(); + if (nonEndedTracks == 0 && !isLastMediaItemInSequence()) { switchAssetLoader(); } } @@ -680,15 +689,21 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; private final class GapSignalingAssetLoader implements AssetLoader { private final long durationUs; - private final Format trackFormat; - private final Format decodedFormat; + private final boolean shouldProduceAudio; + private final boolean shouldProduceVideo; + private final Format audioTrackFormat; + private final Format audioTrackDecodedFormat; - private boolean outputtedFormat; + private boolean producedAudio; + private boolean producedVideo; private GapSignalingAssetLoader(long durationUs) { this.durationUs = durationUs; - this.trackFormat = new Format.Builder().setSampleMimeType(MimeTypes.AUDIO_RAW).build(); - this.decodedFormat = + shouldProduceAudio = sequenceHasAudio || forceAudioTrack; + shouldProduceVideo = sequenceHasVideo; + checkState(shouldProduceAudio || shouldProduceVideo); + this.audioTrackFormat = new Format.Builder().setSampleMimeType(MimeTypes.AUDIO_RAW).build(); + this.audioTrackDecodedFormat = new Format.Builder() .setSampleMimeType(MimeTypes.AUDIO_RAW) .setSampleRate(44100) @@ -700,14 +715,28 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; @Override public void start() { onDurationUs(durationUs); - onTrackCount(1); - onTrackAdded(trackFormat, SUPPORTED_OUTPUT_TYPE_DECODED); + int trackCount = shouldProduceAudio && shouldProduceVideo ? 2 : 1; + onTrackCount(trackCount); + if (shouldProduceAudio) { + onTrackAdded(audioTrackFormat, SUPPORTED_OUTPUT_TYPE_DECODED); + } + if (shouldProduceVideo) { + onTrackAdded(BLANK_IMAGE_BITMAP_FORMAT, SUPPORTED_OUTPUT_TYPE_DECODED); + } outputFormatToSequenceAssetLoader(); } @Override public @Transformer.ProgressState int getProgress(ProgressHolder progressHolder) { - progressHolder.progress = outputtedFormat ? 99 : 0; + boolean audioPending = shouldProduceAudio && !producedAudio; + boolean videoPending = shouldProduceVideo && !producedVideo; + if (audioPending && videoPending) { + progressHolder.progress = 0; + } else if (!audioPending && !videoPending) { + progressHolder.progress = 99; + } else { + progressHolder.progress = 50; + } return PROGRESS_STATE_AVAILABLE; } @@ -721,19 +750,35 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; /** Outputs the gap format, scheduling to try again if unsuccessful. */ private void outputFormatToSequenceAssetLoader() { - try { - if (outputtedFormat) { - return; - } + boolean audioPending = shouldProduceAudio && !producedAudio; + boolean videoPending = shouldProduceVideo && !producedVideo; + checkState(audioPending || videoPending); - @Nullable SampleConsumerWrapper sampleConsumerWrapper = onOutputFormat(decodedFormat); - if (sampleConsumerWrapper != null) { - outputtedFormat = true; - sampleConsumerWrapper.onGapSignalled(); - } else { + try { + boolean shouldRetry = false; + if (audioPending) { + @Nullable + SampleConsumerWrapper sampleConsumerWrapper = onOutputFormat(audioTrackDecodedFormat); + if (sampleConsumerWrapper == null) { + shouldRetry = true; + } else { + sampleConsumerWrapper.onAudioGapSignalled(); + producedAudio = true; + } + } + if (videoPending) { + @Nullable + SampleConsumerWrapper sampleConsumerWrapper = onOutputFormat(BLANK_IMAGE_BITMAP_FORMAT); + if (sampleConsumerWrapper == null) { + shouldRetry = true; + } else { + insertBlankFrames(getBlankImageBitmap()); + producedVideo = true; + } + } + if (shouldRetry) { handler.postDelayed(this::outputFormatToSequenceAssetLoader, RETRY_DELAY_MS); } - } catch (ExportException e) { onError(e); } catch (RuntimeException e) { diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerInternal.java b/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerInternal.java index b9176bd9bd..5f649fb1be 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerInternal.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerInternal.java @@ -600,10 +600,6 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; @C.TrackType int trackType = getProcessedTrackType(firstAssetLoaderInputFormat.sampleMimeType); - checkArgument( - trackType != TRACK_TYPE_VIDEO || !composition.sequences.get(sequenceIndex).hasGaps(), - "Gaps in video sequences are not supported."); - synchronized (assetLoaderLock) { assetLoaderInputTracker.registerTrack(sequenceIndex, firstAssetLoaderInputFormat); if (assetLoaderInputTracker.hasRegisteredAllTracks()) { diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/VideoFrameProcessingWrapper.java b/libraries/transformer/src/main/java/androidx/media3/transformer/VideoFrameProcessingWrapper.java index c033acbad3..0a2647b10a 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/VideoFrameProcessingWrapper.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/VideoFrameProcessingWrapper.java @@ -19,7 +19,6 @@ package androidx.media3.transformer; import static androidx.media3.common.VideoFrameProcessor.INPUT_TYPE_BITMAP; import static androidx.media3.common.VideoFrameProcessor.INPUT_TYPE_SURFACE; import static androidx.media3.common.VideoFrameProcessor.INPUT_TYPE_TEXTURE_ID; -import static androidx.media3.common.util.Assertions.checkArgument; import static androidx.media3.common.util.Assertions.checkNotNull; import android.graphics.Bitmap; @@ -59,7 +58,6 @@ import java.util.concurrent.atomic.AtomicLong; long durationUs, @Nullable Format decodedFormat, boolean isLast) { - checkArgument(!editedMediaItem.isGap()); boolean isSurfaceAssetLoaderMediaItem = isMediaItemForSurfaceAssetLoader(editedMediaItem); durationUs = editedMediaItem.getDurationAfterEffectsApplied(durationUs); if (decodedFormat != null) { diff --git a/libraries/transformer/src/test/java/androidx/media3/transformer/SequenceExportTest.java b/libraries/transformer/src/test/java/androidx/media3/transformer/SequenceExportTest.java index e710745e49..c229ae5dcc 100644 --- a/libraries/transformer/src/test/java/androidx/media3/transformer/SequenceExportTest.java +++ b/libraries/transformer/src/test/java/androidx/media3/transformer/SequenceExportTest.java @@ -538,24 +538,6 @@ public final class SequenceExportTest { assertThat(getRootCause(exception)).hasMessageThat().isEqualTo("Gaps can not be transmuxed."); } - @Test - public void start_videoGap_throws() throws Exception { - Transformer transformer = new TestTransformerBuilder(context).build(); - EditedMediaItem audioVideoItem = - new EditedMediaItem.Builder(MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW_VIDEO)) - .build(); - EditedMediaItemSequence sequence = - new EditedMediaItemSequence.Builder().addItem(audioVideoItem).addGap(500_000).build(); - - transformer.start(new Composition.Builder(sequence).build(), outputDir.newFile().getPath()); - - ExportException exception = - assertThrows(ExportException.class, () -> TransformerTestRunner.runLooper(transformer)); - assertThat(getRootCause(exception)) - .hasMessageThat() - .isEqualTo("Gaps in video sequences are not supported."); - } - @Test public void start_gapVideo_throws() throws Exception { Transformer transformer = new TestTransformerBuilder(context).build();