diff --git a/libraries/transformer/src/androidTest/java/androidx/media3/transformer/TransformerGapsTest.java b/libraries/transformer/src/androidTest/java/androidx/media3/transformer/TransformerGapsTest.java index 235047a9c8..d5c236eca5 100644 --- a/libraries/transformer/src/androidTest/java/androidx/media3/transformer/TransformerGapsTest.java +++ b/libraries/transformer/src/androidTest/java/androidx/media3/transformer/TransformerGapsTest.java @@ -24,6 +24,8 @@ import static org.junit.Assert.assertThrows; import android.content.Context; import androidx.media3.common.C; import androidx.media3.common.MediaItem; +import androidx.media3.common.MimeTypes; +import androidx.media3.effect.Presentation; import androidx.media3.extractor.mp4.Mp4Extractor; import androidx.media3.extractor.text.DefaultSubtitleParserFactory; import androidx.media3.test.utils.FakeExtractorOutput; @@ -31,6 +33,7 @@ import androidx.media3.test.utils.FakeTrackOutput; import androidx.media3.test.utils.TestUtil; import androidx.test.core.app.ApplicationProvider; import androidx.test.ext.junit.runners.AndroidJUnit4; +import com.google.common.collect.ImmutableList; import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -60,7 +63,6 @@ public class TransformerGapsTest { testId = testName.getMethodName(); } - // TODO: b/391111085 - Change test when gaps at the start of the sequence are supported. @Test public void export_withThreeMediaItemsAndFirstMediaItemHavingNoVideo_throws() { Transformer transformer = new Transformer.Builder(context).build(); @@ -77,6 +79,58 @@ public class TransformerGapsTest { ExportException.class, () -> transformerAndroidTestRunner.run(testId, composition)); } + @Test + public void + export_withThreeMediaItemsAndFirstMediaItemHavingNoVideoAndForceVideoTrackSetToTrue_insertsBlankFramesForFirstMediaItem() + throws Exception { + int outputWidth = 320; + int outputHeight = 240; + assumeFormatsSupported( + context, + testId, + /* inputFormat= */ MP4_ASSET.videoFormat, + /* outputFormat= */ MP4_ASSET + .videoFormat + .buildUpon() + .setWidth(outputWidth) + .setHeight(outputHeight) + .build()); + assumeFormatsSupported( + context, + testId, + /* inputFormat= */ MP4_ASSET.videoFormat, + /* outputFormat= */ MP4_ASSET.videoFormat); + Transformer transformer = + new Transformer.Builder(context).setVideoMimeType(MimeTypes.VIDEO_H264).build(); + Composition composition = + new Composition.Builder( + new EditedMediaItemSequence.Builder( + AUDIO_ONLY_MEDIA_ITEM, AUDIO_VIDEO_MEDIA_ITEM, AUDIO_VIDEO_MEDIA_ITEM) + .setForceVideoTrack(true) + .build()) + .setEffects( + new Effects( + ImmutableList.of(), + ImmutableList.of( + Presentation.createForWidthAndHeight( + outputWidth, outputHeight, Presentation.LAYOUT_SCALE_TO_FIT)))) + .build(); + + ExportTestResult result = + new TransformerAndroidTestRunner.Builder(context, transformer) + .build() + .run(testId, composition); + + FakeExtractorOutput fakeExtractorOutput = + TestUtil.extractAllSamplesFromFilePath( + new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath); + FakeTrackOutput videoTrackOutput = getTrackOutput(fakeExtractorOutput, C.TRACK_TYPE_VIDEO); + // The video gap is for 1024 ms with 30 fps. + int expectedBlankFrames = 31; + assertThat(videoTrackOutput.getSampleCount()) + .isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames); + } + @Test public void export_withThreeMediaItemsAndSecondMediaItemHavingNoVideo_insertsBlankFramesForSecondMediaItem() @@ -100,7 +154,7 @@ public class TransformerGapsTest { TestUtil.extractAllSamplesFromFilePath( new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath); FakeTrackOutput videoTrackOutput = getTrackOutput(fakeExtractorOutput, C.TRACK_TYPE_VIDEO); - // The gap is for 1024ms with 30 fps. + // The gap is for 1024 ms with 30 fps. int expectedBlankFrames = 31; assertThat(videoTrackOutput.getSampleCount()) .isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames); @@ -129,12 +183,62 @@ public class TransformerGapsTest { TestUtil.extractAllSamplesFromFilePath( new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath); FakeTrackOutput videoTrackOutput = getTrackOutput(fakeExtractorOutput, C.TRACK_TYPE_VIDEO); - // The gap is for 1024ms with 30 fps. + // The gap is for 1024 ms with 30 fps. int expectedBlankFrames = 31; assertThat(videoTrackOutput.getSampleCount()) .isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames); } + @Test + public void + export_withTwoVideoOnlyMediaItemsAndGapAtStartAndForceVideoTrackSetToTrue_insertsBlankFramesForGap() + throws Exception { + int outputWidth = 320; + int outputHeight = 240; + assumeFormatsSupported( + context, + testId, + /* inputFormat= */ MP4_ASSET.videoFormat, + /* outputFormat= */ MP4_ASSET + .videoFormat + .buildUpon() + .setWidth(outputWidth) + .setHeight(outputHeight) + .build()); + // The default output mime type is H265 which might not work on all the devices. + Transformer transformer = + new Transformer.Builder(context).setVideoMimeType(MimeTypes.VIDEO_H264).build(); + Composition composition = + new Composition.Builder( + new EditedMediaItemSequence.Builder() + .addGap(/* durationUs= */ 1_000_000) + .addItem(VIDEO_ONLY_MEDIA_ITEM) + .addItem(VIDEO_ONLY_MEDIA_ITEM) + .setForceVideoTrack(true) + .build()) + .setEffects( + new Effects( + ImmutableList.of(), + ImmutableList.of( + Presentation.createForWidthAndHeight( + outputWidth, outputHeight, Presentation.LAYOUT_SCALE_TO_FIT)))) + .build(); + + ExportTestResult result = + new TransformerAndroidTestRunner.Builder(context, transformer) + .build() + .run(testId, composition); + + FakeExtractorOutput fakeExtractorOutput = + TestUtil.extractAllSamplesFromFilePath( + new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath); + FakeTrackOutput videoTrackOutput = getTrackOutput(fakeExtractorOutput, C.TRACK_TYPE_VIDEO); + // The gap is for 1 sec with 30 fps. + int expectedBlankFrames = 30; + assertThat(videoTrackOutput.getSampleCount()) + .isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames); + } + @Test public void export_withTwoVideoOnlyMediaItemsAndGapInMiddle_insertsBlankFramesForGap() throws Exception { @@ -212,6 +316,83 @@ public class TransformerGapsTest { assertThrows(IllegalArgumentException.class, sequenceBuilder::build); } + @Test + public void export_withTwoMediaItemsAndGapAtStartAndOnlyForceAudioTrackSetToTrue_throws() + throws Exception { + Transformer transformer = new Transformer.Builder(context).build(); + Composition composition = + new Composition.Builder( + new EditedMediaItemSequence.Builder() + .addGap(/* durationUs= */ 1_000_000) + .addItem(AUDIO_VIDEO_MEDIA_ITEM) + .addItem(AUDIO_VIDEO_MEDIA_ITEM) + .setForceAudioTrack(true) + .build()) + .build(); + TransformerAndroidTestRunner transformerAndroidTestRunner = + new TransformerAndroidTestRunner.Builder(context, transformer).build(); + + assertThrows( + ExportException.class, () -> transformerAndroidTestRunner.run(testId, composition)); + } + + @Test + public void + export_withTwoMediaItemsAndGapAtStartAndBothForceAudioAndVideoTrackSetToTrue_insertsBlankFramesAndSilenceForGap() + throws Exception { + int outputWidth = 320; + int outputHeight = 240; + assumeFormatsSupported( + context, + testId, + /* inputFormat= */ MP4_ASSET.videoFormat, + /* outputFormat= */ MP4_ASSET + .videoFormat + .buildUpon() + .setWidth(outputWidth) + .setHeight(outputHeight) + .build()); + // The default output mime type is H265 which might not work on all the devices. + Transformer transformer = + new Transformer.Builder(context).setVideoMimeType(MimeTypes.VIDEO_H264).build(); + Composition composition = + new Composition.Builder( + new EditedMediaItemSequence.Builder() + .addGap(/* durationUs= */ 1_000_000) + .addItem(AUDIO_VIDEO_MEDIA_ITEM) + .addItem(AUDIO_VIDEO_MEDIA_ITEM) + .setForceAudioTrack(true) + .setForceVideoTrack(true) + .build()) + .setEffects( + new Effects( + ImmutableList.of(), + ImmutableList.of( + Presentation.createForWidthAndHeight( + outputWidth, outputHeight, Presentation.LAYOUT_SCALE_TO_FIT)))) + .build(); + + ExportTestResult result = + new TransformerAndroidTestRunner.Builder(context, transformer) + .build() + .run(testId, composition); + + FakeExtractorOutput fakeExtractorOutput = + TestUtil.extractAllSamplesFromFilePath( + new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath); + FakeTrackOutput videoTrackOutput = getTrackOutput(fakeExtractorOutput, C.TRACK_TYPE_VIDEO); + // The gap is for 1 sec with 30 fps. + int expectedBlankFrames = 30; + assertThat(videoTrackOutput.getSampleCount()) + .isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames); + FakeTrackOutput audioTrackOutput = getTrackOutput(fakeExtractorOutput, C.TRACK_TYPE_AUDIO); + long lastAudioSampleTimestampUs = + audioTrackOutput.getSampleTimeUs(audioTrackOutput.getSampleCount() - 1); + // 1000 ms gap + 1024 ms audio + 1024 ms audio. + // Since audio samples are not deterministic, hence use a lower timestamp. + assertThat(lastAudioSampleTimestampUs).isGreaterThan(3_000_000); + } + @Test public void export_withTwoMediaItemsAndGapInMiddle_insertsBlankFramesForGap() throws Exception { assumeFormatsSupported( @@ -303,7 +484,7 @@ public class TransformerGapsTest { TestUtil.extractAllSamplesFromFilePath( new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath); FakeTrackOutput videoTrackOutput = getTrackOutput(fakeExtractorOutput, C.TRACK_TYPE_VIDEO); - // The gap is for 1024ms with 30 fps. + // The gap is for 1024 ms with 30 fps. int expectedBlankFramesForAudioOnlyItem = 31; // The gap is for 1 sec with 30 fps. int expectedBlankFramesForOneSecGap = 30; diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/EditedMediaItemSequence.java b/libraries/transformer/src/main/java/androidx/media3/transformer/EditedMediaItemSequence.java index 02b9bf405b..3e3be082f1 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/EditedMediaItemSequence.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/EditedMediaItemSequence.java @@ -20,6 +20,7 @@ import static androidx.media3.common.util.Assertions.checkArgument; import androidx.media3.common.MediaItem; import androidx.media3.common.audio.AudioProcessor; import androidx.media3.common.util.UnstableApi; +import androidx.media3.effect.Presentation; import com.google.common.collect.ImmutableList; import com.google.errorprone.annotations.CanIgnoreReturnValue; import java.util.List; @@ -37,6 +38,7 @@ public final class EditedMediaItemSequence { private final ImmutableList.Builder items; private boolean isLooping; private boolean forceAudioTrack; + private boolean forceVideoTrack; /** Creates an instance. */ public Builder(EditedMediaItem... editedMediaItems) { @@ -55,6 +57,7 @@ public final class EditedMediaItemSequence { .addAll(editedMediaItemSequence.editedMediaItems); isLooping = editedMediaItemSequence.isLooping; forceAudioTrack = editedMediaItemSequence.forceAudioTrack; + forceVideoTrack = editedMediaItemSequence.forceVideoTrack; } /** @@ -98,10 +101,9 @@ public final class EditedMediaItemSequence { * *

A gap is a period of time with no media. * - *

If the gap is at the start of the sequence then {@linkplain #setForceAudioTrack(boolean) - * force audio track} flag must be set to force silent audio. - * - *

Gaps at the start of the sequence are not supported if the sequence has video. + *

If the gap is added at the start of the sequence, then {@linkplain + * #setForceAudioTrack(boolean) force audio track} or/and {@linkplain + * #setForceVideoTrack(boolean) force video track} flag must be set appropriately. * * @param durationUs The duration of the gap, in milliseconds. * @return This builder, for convenience. @@ -165,6 +167,43 @@ public final class EditedMediaItemSequence { return this; } + /** + * Forces blank frames in the {@linkplain EditedMediaItemSequence sequence}. + * + *

This flag is necessary when: + * + *

+ * + *

If the flag is not set appropriately, then the export will {@linkplain + * Transformer.Listener#onError(Composition, ExportResult, ExportException) fail}. + * + *

If the first {@link EditedMediaItem} already contains video, this flag has no effect. + * + *

The MIME type of the output's video track can be set using {@link + * Transformer.Builder#setVideoMimeType(String)}. + * + *

The output resolution must be set using a {@link Presentation} effect on the {@link + * Composition}. + * + *

Forcing a video track and {@linkplain Composition.Builder#setTransmuxVideo(boolean) + * requesting video transmuxing} are not allowed together because generating blank frames + * requires transcoding. + * + *

The default value is {@code false}. + * + * @param forceVideoTrack Whether to force video track. + */ + @CanIgnoreReturnValue + public Builder setForceVideoTrack(boolean forceVideoTrack) { + this.forceVideoTrack = forceVideoTrack; + return this; + } + /** * Builds the {@link EditedMediaItemSequence}. * @@ -199,6 +238,9 @@ public final class EditedMediaItemSequence { /** Forces silent audio in the {@linkplain EditedMediaItemSequence sequence}. */ public final boolean forceAudioTrack; + /** Forces blank frames in the {@linkplain EditedMediaItemSequence sequence}. */ + public final boolean forceVideoTrack; + /** * @deprecated Use {@link Builder}. */ @@ -234,10 +276,12 @@ public final class EditedMediaItemSequence { checkArgument( !editedMediaItems.isEmpty(), "The sequence must contain at least one EditedMediaItem."); checkArgument( - !editedMediaItems.get(0).isGap() || builder.forceAudioTrack, - "If the first item in the sequence is a Gap, then forceAudioTrack flag must be set"); + !editedMediaItems.get(0).isGap() || builder.forceAudioTrack || builder.forceVideoTrack, + "If the first item in the sequence is a Gap, then forceAudioTrack or forceVideoTrack flag" + + " must be set"); this.isLooping = builder.isLooping; this.forceAudioTrack = builder.forceAudioTrack; + this.forceVideoTrack = builder.forceVideoTrack; } /** Return whether any items are a {@linkplain Builder#addGap(long) gap}. */ diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/SequenceAssetLoader.java b/libraries/transformer/src/main/java/androidx/media3/transformer/SequenceAssetLoader.java index 762f3680bc..52703a4558 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/SequenceAssetLoader.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/SequenceAssetLoader.java @@ -83,6 +83,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; private final List editedMediaItems; private final boolean isLooping; private final boolean forceAudioTrack; + private final boolean forceVideoTrack; private final Factory assetLoaderFactory; private final CompositionSettings compositionSettings; private final Listener sequenceAssetLoaderListener; @@ -139,6 +140,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; editedMediaItems = sequence.editedMediaItems; isLooping = sequence.isLooping; this.forceAudioTrack = sequence.forceAudioTrack; + this.forceVideoTrack = sequence.forceVideoTrack; this.assetLoaderFactory = new GapInterceptingAssetLoaderFactory(assetLoaderFactory); this.compositionSettings = compositionSettings; sequenceAssetLoaderListener = listener; @@ -271,10 +273,16 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; return decode; } - boolean addForcedAudioTrack = forceAudioTrack && reportedTrackCount.get() == 1 && !isAudio; + boolean addForcedAudioTrack = false; + boolean addForcedVideoTrack = false; + if (reportedTrackCount.get() == 1) { + addForcedAudioTrack = forceAudioTrack && !isAudio; + addForcedVideoTrack = forceVideoTrack && isAudio; + } if (!isTrackCountReported) { - int trackCount = reportedTrackCount.get() + (addForcedAudioTrack ? 1 : 0); + int trackCount = + reportedTrackCount.get() + (addForcedAudioTrack || addForcedVideoTrack ? 1 : 0); sequenceAssetLoaderListener.onTrackCount(trackCount); isTrackCountReported = true; } @@ -293,6 +301,11 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; FORCE_AUDIO_TRACK_FORMAT, SUPPORTED_OUTPUT_TYPE_DECODED); decodeAudio = true; } + if (addForcedVideoTrack) { + sequenceAssetLoaderListener.onTrackAdded( + BLANK_IMAGE_BITMAP_FORMAT, SUPPORTED_OUTPUT_TYPE_DECODED); + decodeVideo = true; + } return decodeOutput; } @@ -324,30 +337,41 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; sampleConsumer = new SampleConsumerWrapper(wrappedSampleConsumer, trackType); sampleConsumersByTrackType.put(trackType, sampleConsumer); - if (forceAudioTrack && reportedTrackCount.get() == 1 && trackType == C.TRACK_TYPE_VIDEO) { - SampleConsumer wrappedAudioSampleConsumer = - checkStateNotNull( - sequenceAssetLoaderListener.onOutputFormat( - FORCE_AUDIO_TRACK_FORMAT - .buildUpon() - .setSampleMimeType(MimeTypes.AUDIO_RAW) - .setPcmEncoding(C.ENCODING_PCM_16BIT) - .build())); - sampleConsumersByTrackType.put( - C.TRACK_TYPE_AUDIO, - new SampleConsumerWrapper(wrappedAudioSampleConsumer, C.TRACK_TYPE_AUDIO)); + if (reportedTrackCount.get() == 1) { + if (forceAudioTrack && trackType == C.TRACK_TYPE_VIDEO) { + SampleConsumer wrappedAudioSampleConsumer = + checkStateNotNull( + sequenceAssetLoaderListener.onOutputFormat( + FORCE_AUDIO_TRACK_FORMAT + .buildUpon() + .setSampleMimeType(MimeTypes.AUDIO_RAW) + .setPcmEncoding(C.ENCODING_PCM_16BIT) + .build())); + sampleConsumersByTrackType.put( + C.TRACK_TYPE_AUDIO, + new SampleConsumerWrapper(wrappedAudioSampleConsumer, C.TRACK_TYPE_AUDIO)); + } else if (forceVideoTrack && trackType == C.TRACK_TYPE_AUDIO) { + SampleConsumer wrappedVideoSampleConsumer = + checkStateNotNull( + sequenceAssetLoaderListener.onOutputFormat(BLANK_IMAGE_BITMAP_FORMAT)); + sampleConsumersByTrackType.put( + C.TRACK_TYPE_VIDEO, + new SampleConsumerWrapper(wrappedVideoSampleConsumer, C.TRACK_TYPE_VIDEO)); + } } } else { + String missingTrackMessage = + trackType == C.TRACK_TYPE_AUDIO + ? "The preceding MediaItem does not contain any audio track. If the sequence starts" + + " with an item without audio track (like images), followed by items with" + + " audio tracks, then EditedMediaItemSequence.Builder.setForceAudioTrack()" + + " needs to be set to true." + : "The preceding MediaItem does not contain any video track. If the sequence starts" + + " with an item without video track (audio only), followed by items with video" + + " tracks, then EditedMediaItemSequence.Builder.setForceVideoTrack() needs to" + + " be set to true."; sampleConsumer = - checkStateNotNull( - sampleConsumersByTrackType.get(trackType), - Util.formatInvariant( - "The preceding MediaItem does not contain any track of type %d. If the" - + " Composition contains a sequence that starts with items without audio" - + " tracks (like images), followed by items with audio tracks," - + " Composition.Builder.experimentalSetForceAudioTrack() needs to be set to" - + " true.", - trackType)); + checkStateNotNull(sampleConsumersByTrackType.get(trackType), missingTrackMessage); } onMediaItemChanged(trackType, format); if (reportedTrackCount.get() == 1 && sampleConsumersByTrackType.size() == 2) { @@ -700,7 +724,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; private GapSignalingAssetLoader(long durationUs) { this.durationUs = durationUs; shouldProduceAudio = sequenceHasAudio || forceAudioTrack; - shouldProduceVideo = sequenceHasVideo; + shouldProduceVideo = sequenceHasVideo || forceVideoTrack; checkState(shouldProduceAudio || shouldProduceVideo); this.audioTrackFormat = new Format.Builder().setSampleMimeType(MimeTypes.AUDIO_RAW).build(); this.audioTrackDecodedFormat =