Add forceVideoTrack flag on EditedMediaItemSequence

This is similar to forceAudioTrack.

PiperOrigin-RevId: 743235372
This commit is contained in:
sheenachhabra 2025-04-02 12:23:29 -07:00 committed by Copybara-Service
parent 989e9f9e84
commit bd14b753ee
3 changed files with 283 additions and 34 deletions

View File

@ -24,6 +24,8 @@ import static org.junit.Assert.assertThrows;
import android.content.Context;
import androidx.media3.common.C;
import androidx.media3.common.MediaItem;
import androidx.media3.common.MimeTypes;
import androidx.media3.effect.Presentation;
import androidx.media3.extractor.mp4.Mp4Extractor;
import androidx.media3.extractor.text.DefaultSubtitleParserFactory;
import androidx.media3.test.utils.FakeExtractorOutput;
@ -31,6 +33,7 @@ import androidx.media3.test.utils.FakeTrackOutput;
import androidx.media3.test.utils.TestUtil;
import androidx.test.core.app.ApplicationProvider;
import androidx.test.ext.junit.runners.AndroidJUnit4;
import com.google.common.collect.ImmutableList;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
@ -60,7 +63,6 @@ public class TransformerGapsTest {
testId = testName.getMethodName();
}
// TODO: b/391111085 - Change test when gaps at the start of the sequence are supported.
@Test
public void export_withThreeMediaItemsAndFirstMediaItemHavingNoVideo_throws() {
Transformer transformer = new Transformer.Builder(context).build();
@ -77,6 +79,58 @@ public class TransformerGapsTest {
ExportException.class, () -> transformerAndroidTestRunner.run(testId, composition));
}
@Test
public void
export_withThreeMediaItemsAndFirstMediaItemHavingNoVideoAndForceVideoTrackSetToTrue_insertsBlankFramesForFirstMediaItem()
throws Exception {
int outputWidth = 320;
int outputHeight = 240;
assumeFormatsSupported(
context,
testId,
/* inputFormat= */ MP4_ASSET.videoFormat,
/* outputFormat= */ MP4_ASSET
.videoFormat
.buildUpon()
.setWidth(outputWidth)
.setHeight(outputHeight)
.build());
assumeFormatsSupported(
context,
testId,
/* inputFormat= */ MP4_ASSET.videoFormat,
/* outputFormat= */ MP4_ASSET.videoFormat);
Transformer transformer =
new Transformer.Builder(context).setVideoMimeType(MimeTypes.VIDEO_H264).build();
Composition composition =
new Composition.Builder(
new EditedMediaItemSequence.Builder(
AUDIO_ONLY_MEDIA_ITEM, AUDIO_VIDEO_MEDIA_ITEM, AUDIO_VIDEO_MEDIA_ITEM)
.setForceVideoTrack(true)
.build())
.setEffects(
new Effects(
ImmutableList.of(),
ImmutableList.of(
Presentation.createForWidthAndHeight(
outputWidth, outputHeight, Presentation.LAYOUT_SCALE_TO_FIT))))
.build();
ExportTestResult result =
new TransformerAndroidTestRunner.Builder(context, transformer)
.build()
.run(testId, composition);
FakeExtractorOutput fakeExtractorOutput =
TestUtil.extractAllSamplesFromFilePath(
new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath);
FakeTrackOutput videoTrackOutput = getTrackOutput(fakeExtractorOutput, C.TRACK_TYPE_VIDEO);
// The video gap is for 1024 ms with 30 fps.
int expectedBlankFrames = 31;
assertThat(videoTrackOutput.getSampleCount())
.isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames);
}
@Test
public void
export_withThreeMediaItemsAndSecondMediaItemHavingNoVideo_insertsBlankFramesForSecondMediaItem()
@ -100,7 +154,7 @@ public class TransformerGapsTest {
TestUtil.extractAllSamplesFromFilePath(
new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath);
FakeTrackOutput videoTrackOutput = getTrackOutput(fakeExtractorOutput, C.TRACK_TYPE_VIDEO);
// The gap is for 1024ms with 30 fps.
// The gap is for 1024 ms with 30 fps.
int expectedBlankFrames = 31;
assertThat(videoTrackOutput.getSampleCount())
.isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames);
@ -129,12 +183,62 @@ public class TransformerGapsTest {
TestUtil.extractAllSamplesFromFilePath(
new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath);
FakeTrackOutput videoTrackOutput = getTrackOutput(fakeExtractorOutput, C.TRACK_TYPE_VIDEO);
// The gap is for 1024ms with 30 fps.
// The gap is for 1024 ms with 30 fps.
int expectedBlankFrames = 31;
assertThat(videoTrackOutput.getSampleCount())
.isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames);
}
@Test
public void
export_withTwoVideoOnlyMediaItemsAndGapAtStartAndForceVideoTrackSetToTrue_insertsBlankFramesForGap()
throws Exception {
int outputWidth = 320;
int outputHeight = 240;
assumeFormatsSupported(
context,
testId,
/* inputFormat= */ MP4_ASSET.videoFormat,
/* outputFormat= */ MP4_ASSET
.videoFormat
.buildUpon()
.setWidth(outputWidth)
.setHeight(outputHeight)
.build());
// The default output mime type is H265 which might not work on all the devices.
Transformer transformer =
new Transformer.Builder(context).setVideoMimeType(MimeTypes.VIDEO_H264).build();
Composition composition =
new Composition.Builder(
new EditedMediaItemSequence.Builder()
.addGap(/* durationUs= */ 1_000_000)
.addItem(VIDEO_ONLY_MEDIA_ITEM)
.addItem(VIDEO_ONLY_MEDIA_ITEM)
.setForceVideoTrack(true)
.build())
.setEffects(
new Effects(
ImmutableList.of(),
ImmutableList.of(
Presentation.createForWidthAndHeight(
outputWidth, outputHeight, Presentation.LAYOUT_SCALE_TO_FIT))))
.build();
ExportTestResult result =
new TransformerAndroidTestRunner.Builder(context, transformer)
.build()
.run(testId, composition);
FakeExtractorOutput fakeExtractorOutput =
TestUtil.extractAllSamplesFromFilePath(
new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath);
FakeTrackOutput videoTrackOutput = getTrackOutput(fakeExtractorOutput, C.TRACK_TYPE_VIDEO);
// The gap is for 1 sec with 30 fps.
int expectedBlankFrames = 30;
assertThat(videoTrackOutput.getSampleCount())
.isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames);
}
@Test
public void export_withTwoVideoOnlyMediaItemsAndGapInMiddle_insertsBlankFramesForGap()
throws Exception {
@ -212,6 +316,83 @@ public class TransformerGapsTest {
assertThrows(IllegalArgumentException.class, sequenceBuilder::build);
}
@Test
public void export_withTwoMediaItemsAndGapAtStartAndOnlyForceAudioTrackSetToTrue_throws()
throws Exception {
Transformer transformer = new Transformer.Builder(context).build();
Composition composition =
new Composition.Builder(
new EditedMediaItemSequence.Builder()
.addGap(/* durationUs= */ 1_000_000)
.addItem(AUDIO_VIDEO_MEDIA_ITEM)
.addItem(AUDIO_VIDEO_MEDIA_ITEM)
.setForceAudioTrack(true)
.build())
.build();
TransformerAndroidTestRunner transformerAndroidTestRunner =
new TransformerAndroidTestRunner.Builder(context, transformer).build();
assertThrows(
ExportException.class, () -> transformerAndroidTestRunner.run(testId, composition));
}
@Test
public void
export_withTwoMediaItemsAndGapAtStartAndBothForceAudioAndVideoTrackSetToTrue_insertsBlankFramesAndSilenceForGap()
throws Exception {
int outputWidth = 320;
int outputHeight = 240;
assumeFormatsSupported(
context,
testId,
/* inputFormat= */ MP4_ASSET.videoFormat,
/* outputFormat= */ MP4_ASSET
.videoFormat
.buildUpon()
.setWidth(outputWidth)
.setHeight(outputHeight)
.build());
// The default output mime type is H265 which might not work on all the devices.
Transformer transformer =
new Transformer.Builder(context).setVideoMimeType(MimeTypes.VIDEO_H264).build();
Composition composition =
new Composition.Builder(
new EditedMediaItemSequence.Builder()
.addGap(/* durationUs= */ 1_000_000)
.addItem(AUDIO_VIDEO_MEDIA_ITEM)
.addItem(AUDIO_VIDEO_MEDIA_ITEM)
.setForceAudioTrack(true)
.setForceVideoTrack(true)
.build())
.setEffects(
new Effects(
ImmutableList.of(),
ImmutableList.of(
Presentation.createForWidthAndHeight(
outputWidth, outputHeight, Presentation.LAYOUT_SCALE_TO_FIT))))
.build();
ExportTestResult result =
new TransformerAndroidTestRunner.Builder(context, transformer)
.build()
.run(testId, composition);
FakeExtractorOutput fakeExtractorOutput =
TestUtil.extractAllSamplesFromFilePath(
new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath);
FakeTrackOutput videoTrackOutput = getTrackOutput(fakeExtractorOutput, C.TRACK_TYPE_VIDEO);
// The gap is for 1 sec with 30 fps.
int expectedBlankFrames = 30;
assertThat(videoTrackOutput.getSampleCount())
.isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames);
FakeTrackOutput audioTrackOutput = getTrackOutput(fakeExtractorOutput, C.TRACK_TYPE_AUDIO);
long lastAudioSampleTimestampUs =
audioTrackOutput.getSampleTimeUs(audioTrackOutput.getSampleCount() - 1);
// 1000 ms gap + 1024 ms audio + 1024 ms audio.
// Since audio samples are not deterministic, hence use a lower timestamp.
assertThat(lastAudioSampleTimestampUs).isGreaterThan(3_000_000);
}
@Test
public void export_withTwoMediaItemsAndGapInMiddle_insertsBlankFramesForGap() throws Exception {
assumeFormatsSupported(
@ -303,7 +484,7 @@ public class TransformerGapsTest {
TestUtil.extractAllSamplesFromFilePath(
new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath);
FakeTrackOutput videoTrackOutput = getTrackOutput(fakeExtractorOutput, C.TRACK_TYPE_VIDEO);
// The gap is for 1024ms with 30 fps.
// The gap is for 1024 ms with 30 fps.
int expectedBlankFramesForAudioOnlyItem = 31;
// The gap is for 1 sec with 30 fps.
int expectedBlankFramesForOneSecGap = 30;

View File

@ -20,6 +20,7 @@ import static androidx.media3.common.util.Assertions.checkArgument;
import androidx.media3.common.MediaItem;
import androidx.media3.common.audio.AudioProcessor;
import androidx.media3.common.util.UnstableApi;
import androidx.media3.effect.Presentation;
import com.google.common.collect.ImmutableList;
import com.google.errorprone.annotations.CanIgnoreReturnValue;
import java.util.List;
@ -37,6 +38,7 @@ public final class EditedMediaItemSequence {
private final ImmutableList.Builder<EditedMediaItem> items;
private boolean isLooping;
private boolean forceAudioTrack;
private boolean forceVideoTrack;
/** Creates an instance. */
public Builder(EditedMediaItem... editedMediaItems) {
@ -55,6 +57,7 @@ public final class EditedMediaItemSequence {
.addAll(editedMediaItemSequence.editedMediaItems);
isLooping = editedMediaItemSequence.isLooping;
forceAudioTrack = editedMediaItemSequence.forceAudioTrack;
forceVideoTrack = editedMediaItemSequence.forceVideoTrack;
}
/**
@ -98,10 +101,9 @@ public final class EditedMediaItemSequence {
*
* <p>A gap is a period of time with no media.
*
* <p>If the gap is at the start of the sequence then {@linkplain #setForceAudioTrack(boolean)
* force audio track} flag must be set to force silent audio.
*
* <p>Gaps at the start of the sequence are not supported if the sequence has video.
* <p>If the gap is added at the start of the sequence, then {@linkplain
* #setForceAudioTrack(boolean) force audio track} or/and {@linkplain
* #setForceVideoTrack(boolean) force video track} flag must be set appropriately.
*
* @param durationUs The duration of the gap, in milliseconds.
* @return This builder, for convenience.
@ -165,6 +167,43 @@ public final class EditedMediaItemSequence {
return this;
}
/**
* Forces blank frames in the {@linkplain EditedMediaItemSequence sequence}.
*
* <p>This flag is necessary when:
*
* <ul>
* <li>The first {@link EditedMediaItem} in the sequence does not contain video, but
* subsequent items do.
* <li>The first item in the sequence is a {@linkplain #addGap(long) gap} and the subsequent
* {@linkplain EditedMediaItem media items} contain video.
* </ul>
*
* <p>If the flag is not set appropriately, then the export will {@linkplain
* Transformer.Listener#onError(Composition, ExportResult, ExportException) fail}.
*
* <p>If the first {@link EditedMediaItem} already contains video, this flag has no effect.
*
* <p>The MIME type of the output's video track can be set using {@link
* Transformer.Builder#setVideoMimeType(String)}.
*
* <p>The output resolution must be set using a {@link Presentation} effect on the {@link
* Composition}.
*
* <p>Forcing a video track and {@linkplain Composition.Builder#setTransmuxVideo(boolean)
* requesting video transmuxing} are not allowed together because generating blank frames
* requires transcoding.
*
* <p>The default value is {@code false}.
*
* @param forceVideoTrack Whether to force video track.
*/
@CanIgnoreReturnValue
public Builder setForceVideoTrack(boolean forceVideoTrack) {
this.forceVideoTrack = forceVideoTrack;
return this;
}
/**
* Builds the {@link EditedMediaItemSequence}.
*
@ -199,6 +238,9 @@ public final class EditedMediaItemSequence {
/** Forces silent audio in the {@linkplain EditedMediaItemSequence sequence}. */
public final boolean forceAudioTrack;
/** Forces blank frames in the {@linkplain EditedMediaItemSequence sequence}. */
public final boolean forceVideoTrack;
/**
* @deprecated Use {@link Builder}.
*/
@ -234,10 +276,12 @@ public final class EditedMediaItemSequence {
checkArgument(
!editedMediaItems.isEmpty(), "The sequence must contain at least one EditedMediaItem.");
checkArgument(
!editedMediaItems.get(0).isGap() || builder.forceAudioTrack,
"If the first item in the sequence is a Gap, then forceAudioTrack flag must be set");
!editedMediaItems.get(0).isGap() || builder.forceAudioTrack || builder.forceVideoTrack,
"If the first item in the sequence is a Gap, then forceAudioTrack or forceVideoTrack flag"
+ " must be set");
this.isLooping = builder.isLooping;
this.forceAudioTrack = builder.forceAudioTrack;
this.forceVideoTrack = builder.forceVideoTrack;
}
/** Return whether any items are a {@linkplain Builder#addGap(long) gap}. */

View File

@ -83,6 +83,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
private final List<EditedMediaItem> editedMediaItems;
private final boolean isLooping;
private final boolean forceAudioTrack;
private final boolean forceVideoTrack;
private final Factory assetLoaderFactory;
private final CompositionSettings compositionSettings;
private final Listener sequenceAssetLoaderListener;
@ -139,6 +140,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
editedMediaItems = sequence.editedMediaItems;
isLooping = sequence.isLooping;
this.forceAudioTrack = sequence.forceAudioTrack;
this.forceVideoTrack = sequence.forceVideoTrack;
this.assetLoaderFactory = new GapInterceptingAssetLoaderFactory(assetLoaderFactory);
this.compositionSettings = compositionSettings;
sequenceAssetLoaderListener = listener;
@ -271,10 +273,16 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
return decode;
}
boolean addForcedAudioTrack = forceAudioTrack && reportedTrackCount.get() == 1 && !isAudio;
boolean addForcedAudioTrack = false;
boolean addForcedVideoTrack = false;
if (reportedTrackCount.get() == 1) {
addForcedAudioTrack = forceAudioTrack && !isAudio;
addForcedVideoTrack = forceVideoTrack && isAudio;
}
if (!isTrackCountReported) {
int trackCount = reportedTrackCount.get() + (addForcedAudioTrack ? 1 : 0);
int trackCount =
reportedTrackCount.get() + (addForcedAudioTrack || addForcedVideoTrack ? 1 : 0);
sequenceAssetLoaderListener.onTrackCount(trackCount);
isTrackCountReported = true;
}
@ -293,6 +301,11 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
FORCE_AUDIO_TRACK_FORMAT, SUPPORTED_OUTPUT_TYPE_DECODED);
decodeAudio = true;
}
if (addForcedVideoTrack) {
sequenceAssetLoaderListener.onTrackAdded(
BLANK_IMAGE_BITMAP_FORMAT, SUPPORTED_OUTPUT_TYPE_DECODED);
decodeVideo = true;
}
return decodeOutput;
}
@ -324,30 +337,41 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
sampleConsumer = new SampleConsumerWrapper(wrappedSampleConsumer, trackType);
sampleConsumersByTrackType.put(trackType, sampleConsumer);
if (forceAudioTrack && reportedTrackCount.get() == 1 && trackType == C.TRACK_TYPE_VIDEO) {
SampleConsumer wrappedAudioSampleConsumer =
checkStateNotNull(
sequenceAssetLoaderListener.onOutputFormat(
FORCE_AUDIO_TRACK_FORMAT
.buildUpon()
.setSampleMimeType(MimeTypes.AUDIO_RAW)
.setPcmEncoding(C.ENCODING_PCM_16BIT)
.build()));
sampleConsumersByTrackType.put(
C.TRACK_TYPE_AUDIO,
new SampleConsumerWrapper(wrappedAudioSampleConsumer, C.TRACK_TYPE_AUDIO));
if (reportedTrackCount.get() == 1) {
if (forceAudioTrack && trackType == C.TRACK_TYPE_VIDEO) {
SampleConsumer wrappedAudioSampleConsumer =
checkStateNotNull(
sequenceAssetLoaderListener.onOutputFormat(
FORCE_AUDIO_TRACK_FORMAT
.buildUpon()
.setSampleMimeType(MimeTypes.AUDIO_RAW)
.setPcmEncoding(C.ENCODING_PCM_16BIT)
.build()));
sampleConsumersByTrackType.put(
C.TRACK_TYPE_AUDIO,
new SampleConsumerWrapper(wrappedAudioSampleConsumer, C.TRACK_TYPE_AUDIO));
} else if (forceVideoTrack && trackType == C.TRACK_TYPE_AUDIO) {
SampleConsumer wrappedVideoSampleConsumer =
checkStateNotNull(
sequenceAssetLoaderListener.onOutputFormat(BLANK_IMAGE_BITMAP_FORMAT));
sampleConsumersByTrackType.put(
C.TRACK_TYPE_VIDEO,
new SampleConsumerWrapper(wrappedVideoSampleConsumer, C.TRACK_TYPE_VIDEO));
}
}
} else {
String missingTrackMessage =
trackType == C.TRACK_TYPE_AUDIO
? "The preceding MediaItem does not contain any audio track. If the sequence starts"
+ " with an item without audio track (like images), followed by items with"
+ " audio tracks, then EditedMediaItemSequence.Builder.setForceAudioTrack()"
+ " needs to be set to true."
: "The preceding MediaItem does not contain any video track. If the sequence starts"
+ " with an item without video track (audio only), followed by items with video"
+ " tracks, then EditedMediaItemSequence.Builder.setForceVideoTrack() needs to"
+ " be set to true.";
sampleConsumer =
checkStateNotNull(
sampleConsumersByTrackType.get(trackType),
Util.formatInvariant(
"The preceding MediaItem does not contain any track of type %d. If the"
+ " Composition contains a sequence that starts with items without audio"
+ " tracks (like images), followed by items with audio tracks,"
+ " Composition.Builder.experimentalSetForceAudioTrack() needs to be set to"
+ " true.",
trackType));
checkStateNotNull(sampleConsumersByTrackType.get(trackType), missingTrackMessage);
}
onMediaItemChanged(trackType, format);
if (reportedTrackCount.get() == 1 && sampleConsumersByTrackType.size() == 2) {
@ -700,7 +724,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
private GapSignalingAssetLoader(long durationUs) {
this.durationUs = durationUs;
shouldProduceAudio = sequenceHasAudio || forceAudioTrack;
shouldProduceVideo = sequenceHasVideo;
shouldProduceVideo = sequenceHasVideo || forceVideoTrack;
checkState(shouldProduceAudio || shouldProduceVideo);
this.audioTrackFormat = new Format.Builder().setSampleMimeType(MimeTypes.AUDIO_RAW).build();
this.audioTrackDecodedFormat =