Add support for video gaps via addGap() API

The gap item earlier meant audio gap.
Now the gap item will be filled with audio, video
or both based upon what all tracks are present in the
Sequence.
If the very first item is a gap then it is filled with
audio only. Support for video gap at start will be added in
a follow up CL.
If the first item is a gap then `forceAudioTrack`
is set to true.

PiperOrigin-RevId: 733422557
This commit is contained in:
sheenachhabra 2025-03-04 12:21:49 -08:00 committed by Copybara-Service
parent 5a0f4c6b3f
commit 06f340005f
6 changed files with 278 additions and 48 deletions

View File

@ -46,6 +46,8 @@ public class TransformerVideoGapsTest {
new EditedMediaItem.Builder(MediaItem.fromUri(MP4_ASSET.uri)).build(); new EditedMediaItem.Builder(MediaItem.fromUri(MP4_ASSET.uri)).build();
private static final EditedMediaItem AUDIO_ONLY_MEDIA_ITEM = private static final EditedMediaItem AUDIO_ONLY_MEDIA_ITEM =
AUDIO_VIDEO_MEDIA_ITEM.buildUpon().setRemoveVideo(true).build(); AUDIO_VIDEO_MEDIA_ITEM.buildUpon().setRemoveVideo(true).build();
private static final EditedMediaItem VIDEO_ONLY_MEDIA_ITEM =
AUDIO_VIDEO_MEDIA_ITEM.buildUpon().setRemoveAudio(true).build();
private final Context context = ApplicationProvider.getApplicationContext(); private final Context context = ApplicationProvider.getApplicationContext();
@Rule public final TestName testName = new TestName(); @Rule public final TestName testName = new TestName();
@ -131,4 +133,211 @@ public class TransformerVideoGapsTest {
assertThat(videoTrackOutput.getSampleCount()) assertThat(videoTrackOutput.getSampleCount())
.isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames); .isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames);
} }
// TODO: b/391111085 - Change test when gaps at the start of the sequence are supported.
@Test
public void export_withTwoVideoOnlyMediaItemsAndGapAtStart_throws() {
Transformer transformer = new Transformer.Builder(context).build();
Composition composition =
new Composition.Builder(
new EditedMediaItemSequence.Builder()
.addGap(/* durationUs= */ 1_000_000)
.addItem(VIDEO_ONLY_MEDIA_ITEM)
.addItem(VIDEO_ONLY_MEDIA_ITEM)
.build())
.build();
TransformerAndroidTestRunner transformerAndroidTestRunner =
new TransformerAndroidTestRunner.Builder(context, transformer).build();
assertThrows(
ExportException.class, () -> transformerAndroidTestRunner.run(testId, composition));
}
@Test
public void export_withTwoVideoOnlyMediaItemsAndGapInMiddle_insertsBlankFramesForGap()
throws Exception {
assumeFormatsSupported(
context,
testId,
/* inputFormat= */ MP4_ASSET.videoFormat,
/* outputFormat= */ MP4_ASSET.videoFormat);
Transformer transformer = new Transformer.Builder(context).build();
Composition composition =
new Composition.Builder(
new EditedMediaItemSequence.Builder()
.addItem(VIDEO_ONLY_MEDIA_ITEM)
.addGap(/* durationUs= */ 1_000_000)
.addItem(VIDEO_ONLY_MEDIA_ITEM)
.build())
.build();
ExportTestResult result =
new TransformerAndroidTestRunner.Builder(context, transformer)
.build()
.run(testId, composition);
FakeExtractorOutput fakeExtractorOutput =
TestUtil.extractAllSamplesFromFilePath(
new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath);
FakeTrackOutput videoTrackOutput = getVideoTrackOutput(fakeExtractorOutput);
// The gap is for 1 sec with 30 fps.
int expectedBlankFrames = 30;
assertThat(videoTrackOutput.getSampleCount())
.isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames);
}
@Test
public void export_withTwoVideoOnlyMediaItemsAndGapAtTheEnd_insertsBlankFramesForGap()
throws Exception {
assumeFormatsSupported(
context,
testId,
/* inputFormat= */ MP4_ASSET.videoFormat,
/* outputFormat= */ MP4_ASSET.videoFormat);
Transformer transformer = new Transformer.Builder(context).build();
Composition composition =
new Composition.Builder(
new EditedMediaItemSequence.Builder()
.addItem(VIDEO_ONLY_MEDIA_ITEM)
.addItem(VIDEO_ONLY_MEDIA_ITEM)
.addGap(/* durationUs= */ 1_000_000)
.build())
.build();
ExportTestResult result =
new TransformerAndroidTestRunner.Builder(context, transformer)
.build()
.run(testId, composition);
FakeExtractorOutput fakeExtractorOutput =
TestUtil.extractAllSamplesFromFilePath(
new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath);
FakeTrackOutput videoTrackOutput = getVideoTrackOutput(fakeExtractorOutput);
// The gap is for 1 sec with 30 fps.
int expectedBlankFrames = 30;
assertThat(videoTrackOutput.getSampleCount())
.isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames);
}
// TODO: b/391111085 - Change test when gaps at the start of the sequence are supported.
@Test
public void export_withTwoMediaItemsAndGapAtStart_throws() {
Transformer transformer = new Transformer.Builder(context).build();
Composition composition =
new Composition.Builder(
new EditedMediaItemSequence.Builder()
.addGap(/* durationUs= */ 1_000_000)
.addItem(AUDIO_VIDEO_MEDIA_ITEM)
.addItem(AUDIO_VIDEO_MEDIA_ITEM)
.build())
.build();
TransformerAndroidTestRunner transformerAndroidTestRunner =
new TransformerAndroidTestRunner.Builder(context, transformer).build();
assertThrows(
ExportException.class, () -> transformerAndroidTestRunner.run(testId, composition));
}
@Test
public void export_withTwoMediaItemsAndGapInMiddle_insertsBlankFramesForGap() throws Exception {
assumeFormatsSupported(
context,
testId,
/* inputFormat= */ MP4_ASSET.videoFormat,
/* outputFormat= */ MP4_ASSET.videoFormat);
Transformer transformer = new Transformer.Builder(context).build();
Composition composition =
new Composition.Builder(
new EditedMediaItemSequence.Builder()
.addItem(AUDIO_VIDEO_MEDIA_ITEM)
.addGap(/* durationUs= */ 1_000_000)
.addItem(AUDIO_VIDEO_MEDIA_ITEM)
.build())
.build();
ExportTestResult result =
new TransformerAndroidTestRunner.Builder(context, transformer)
.build()
.run(testId, composition);
FakeExtractorOutput fakeExtractorOutput =
TestUtil.extractAllSamplesFromFilePath(
new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath);
FakeTrackOutput videoTrackOutput = getVideoTrackOutput(fakeExtractorOutput);
// The gap is for 1 sec with 30 fps.
int expectedBlankFrames = 30;
assertThat(videoTrackOutput.getSampleCount())
.isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames);
}
@Test
public void export_withTwoMediaItemsAndGapAtTheEnd_insertsBlankFramesForGap() throws Exception {
assumeFormatsSupported(
context,
testId,
/* inputFormat= */ MP4_ASSET.videoFormat,
/* outputFormat= */ MP4_ASSET.videoFormat);
Transformer transformer = new Transformer.Builder(context).build();
Composition composition =
new Composition.Builder(
new EditedMediaItemSequence.Builder()
.addItem(AUDIO_VIDEO_MEDIA_ITEM)
.addItem(AUDIO_VIDEO_MEDIA_ITEM)
.addGap(/* durationUs= */ 1_000_000)
.build())
.build();
ExportTestResult result =
new TransformerAndroidTestRunner.Builder(context, transformer)
.build()
.run(testId, composition);
FakeExtractorOutput fakeExtractorOutput =
TestUtil.extractAllSamplesFromFilePath(
new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath);
FakeTrackOutput videoTrackOutput = getVideoTrackOutput(fakeExtractorOutput);
// The gap is for 1 sec with 30 fps.
int expectedBlankFrames = 30;
assertThat(videoTrackOutput.getSampleCount())
.isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames);
}
@Test
public void export_withMixOfAudioVideoAndGap_insertsBlankFramesAsExpected() throws Exception {
assumeFormatsSupported(
context,
testId,
/* inputFormat= */ MP4_ASSET.videoFormat,
/* outputFormat= */ MP4_ASSET.videoFormat);
Transformer transformer = new Transformer.Builder(context).build();
Composition composition =
new Composition.Builder(
new EditedMediaItemSequence.Builder()
.addItem(AUDIO_VIDEO_MEDIA_ITEM)
.addItem(AUDIO_ONLY_MEDIA_ITEM)
.addItem(VIDEO_ONLY_MEDIA_ITEM)
.addGap(/* durationUs= */ 1_000_000)
.build())
.build();
ExportTestResult result =
new TransformerAndroidTestRunner.Builder(context, transformer)
.build()
.run(testId, composition);
FakeExtractorOutput fakeExtractorOutput =
TestUtil.extractAllSamplesFromFilePath(
new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath);
FakeTrackOutput videoTrackOutput = getVideoTrackOutput(fakeExtractorOutput);
// The gap is for 1024ms with 30 fps.
int expectedBlankFramesForAudioOnlyItem = 31;
// The gap is for 1 sec with 30 fps.
int expectedBlankFramesForOneSecGap = 30;
assertThat(videoTrackOutput.getSampleCount())
.isEqualTo(
MP4_ASSET.videoFrameCount
+ expectedBlankFramesForAudioOnlyItem
+ MP4_ASSET.videoFrameCount
+ expectedBlankFramesForOneSecGap);
}
} }

View File

@ -87,7 +87,7 @@ public final class EditedMediaItemSequence {
* *
* <p>A gap is a period of time with no media. * <p>A gap is a period of time with no media.
* *
* <p>Gaps are only supported in sequences of audio. * <p>Gaps at the start of the sequence are not supported if the sequence has video.
* *
* @param durationUs The duration of the gap, in milliseconds. * @param durationUs The duration of the gap, in milliseconds.
* @return This builder, for convenience. * @return This builder, for convenience.

View File

@ -125,6 +125,8 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
private volatile long currentAssetDurationAfterEffectsAppliedUs; private volatile long currentAssetDurationAfterEffectsAppliedUs;
private volatile long maxSequenceDurationUs; private volatile long maxSequenceDurationUs;
private volatile boolean isMaxSequenceDurationUsFinal; private volatile boolean isMaxSequenceDurationUsFinal;
private volatile boolean sequenceHasAudio;
private volatile boolean sequenceHasVideo;
public SequenceAssetLoader( public SequenceAssetLoader(
EditedMediaItemSequence sequence, EditedMediaItemSequence sequence,
@ -136,7 +138,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
Looper looper) { Looper looper) {
editedMediaItems = sequence.editedMediaItems; editedMediaItems = sequence.editedMediaItems;
isLooping = sequence.isLooping; isLooping = sequence.isLooping;
this.forceAudioTrack = forceAudioTrack; this.forceAudioTrack = forceAudioTrack || sequence.editedMediaItems.get(0).isGap();
this.assetLoaderFactory = new GapInterceptingAssetLoaderFactory(assetLoaderFactory); this.assetLoaderFactory = new GapInterceptingAssetLoaderFactory(assetLoaderFactory);
this.compositionSettings = compositionSettings; this.compositionSettings = compositionSettings;
sequenceAssetLoaderListener = listener; sequenceAssetLoaderListener = listener;
@ -309,6 +311,11 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
SampleConsumerWrapper sampleConsumer; SampleConsumerWrapper sampleConsumer;
if (isCurrentAssetFirstAsset) { if (isCurrentAssetFirstAsset) {
if (trackType == C.TRACK_TYPE_VIDEO) {
sequenceHasVideo = true;
} else {
sequenceHasAudio = true;
}
@Nullable @Nullable
SampleConsumer wrappedSampleConsumer = sequenceAssetLoaderListener.onOutputFormat(format); SampleConsumer wrappedSampleConsumer = sequenceAssetLoaderListener.onOutputFormat(format);
if (wrappedSampleConsumer == null) { if (wrappedSampleConsumer == null) {
@ -394,7 +401,9 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
/* durationUs= */ (trackType == C.TRACK_TYPE_AUDIO && isLooping && decodeAudio) /* durationUs= */ (trackType == C.TRACK_TYPE_AUDIO && isLooping && decodeAudio)
? C.TIME_UNSET ? C.TIME_UNSET
: currentAssetDurationUs, : currentAssetDurationUs,
/* decodedFormat= */ editedMediaItem.isGap() ? null : outputFormat, /* decodedFormat= */ (editedMediaItem.isGap() && trackType == C.TRACK_TYPE_AUDIO)
? null
: outputFormat,
/* isLast= */ isLastMediaItemInSequence()); /* isLast= */ isLastMediaItemInSequence());
} }
@ -596,9 +605,9 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
} }
} }
private void onGapSignalled() { private void onAudioGapSignalled() {
nonEndedTrackCount.decrementAndGet(); int nonEndedTracks = nonEndedTrackCount.decrementAndGet();
if (!isLastMediaItemInSequence()) { if (nonEndedTracks == 0 && !isLastMediaItemInSequence()) {
switchAssetLoader(); switchAssetLoader();
} }
} }
@ -680,15 +689,21 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
private final class GapSignalingAssetLoader implements AssetLoader { private final class GapSignalingAssetLoader implements AssetLoader {
private final long durationUs; private final long durationUs;
private final Format trackFormat; private final boolean shouldProduceAudio;
private final Format decodedFormat; private final boolean shouldProduceVideo;
private final Format audioTrackFormat;
private final Format audioTrackDecodedFormat;
private boolean outputtedFormat; private boolean producedAudio;
private boolean producedVideo;
private GapSignalingAssetLoader(long durationUs) { private GapSignalingAssetLoader(long durationUs) {
this.durationUs = durationUs; this.durationUs = durationUs;
this.trackFormat = new Format.Builder().setSampleMimeType(MimeTypes.AUDIO_RAW).build(); shouldProduceAudio = sequenceHasAudio || forceAudioTrack;
this.decodedFormat = shouldProduceVideo = sequenceHasVideo;
checkState(shouldProduceAudio || shouldProduceVideo);
this.audioTrackFormat = new Format.Builder().setSampleMimeType(MimeTypes.AUDIO_RAW).build();
this.audioTrackDecodedFormat =
new Format.Builder() new Format.Builder()
.setSampleMimeType(MimeTypes.AUDIO_RAW) .setSampleMimeType(MimeTypes.AUDIO_RAW)
.setSampleRate(44100) .setSampleRate(44100)
@ -700,14 +715,28 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
@Override @Override
public void start() { public void start() {
onDurationUs(durationUs); onDurationUs(durationUs);
onTrackCount(1); int trackCount = shouldProduceAudio && shouldProduceVideo ? 2 : 1;
onTrackAdded(trackFormat, SUPPORTED_OUTPUT_TYPE_DECODED); onTrackCount(trackCount);
if (shouldProduceAudio) {
onTrackAdded(audioTrackFormat, SUPPORTED_OUTPUT_TYPE_DECODED);
}
if (shouldProduceVideo) {
onTrackAdded(BLANK_IMAGE_BITMAP_FORMAT, SUPPORTED_OUTPUT_TYPE_DECODED);
}
outputFormatToSequenceAssetLoader(); outputFormatToSequenceAssetLoader();
} }
@Override @Override
public @Transformer.ProgressState int getProgress(ProgressHolder progressHolder) { public @Transformer.ProgressState int getProgress(ProgressHolder progressHolder) {
progressHolder.progress = outputtedFormat ? 99 : 0; boolean audioPending = shouldProduceAudio && !producedAudio;
boolean videoPending = shouldProduceVideo && !producedVideo;
if (audioPending && videoPending) {
progressHolder.progress = 0;
} else if (!audioPending && !videoPending) {
progressHolder.progress = 99;
} else {
progressHolder.progress = 50;
}
return PROGRESS_STATE_AVAILABLE; return PROGRESS_STATE_AVAILABLE;
} }
@ -721,19 +750,35 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
/** Outputs the gap format, scheduling to try again if unsuccessful. */ /** Outputs the gap format, scheduling to try again if unsuccessful. */
private void outputFormatToSequenceAssetLoader() { private void outputFormatToSequenceAssetLoader() {
try { boolean audioPending = shouldProduceAudio && !producedAudio;
if (outputtedFormat) { boolean videoPending = shouldProduceVideo && !producedVideo;
return; checkState(audioPending || videoPending);
}
@Nullable SampleConsumerWrapper sampleConsumerWrapper = onOutputFormat(decodedFormat); try {
if (sampleConsumerWrapper != null) { boolean shouldRetry = false;
outputtedFormat = true; if (audioPending) {
sampleConsumerWrapper.onGapSignalled(); @Nullable
SampleConsumerWrapper sampleConsumerWrapper = onOutputFormat(audioTrackDecodedFormat);
if (sampleConsumerWrapper == null) {
shouldRetry = true;
} else { } else {
sampleConsumerWrapper.onAudioGapSignalled();
producedAudio = true;
}
}
if (videoPending) {
@Nullable
SampleConsumerWrapper sampleConsumerWrapper = onOutputFormat(BLANK_IMAGE_BITMAP_FORMAT);
if (sampleConsumerWrapper == null) {
shouldRetry = true;
} else {
insertBlankFrames(getBlankImageBitmap());
producedVideo = true;
}
}
if (shouldRetry) {
handler.postDelayed(this::outputFormatToSequenceAssetLoader, RETRY_DELAY_MS); handler.postDelayed(this::outputFormatToSequenceAssetLoader, RETRY_DELAY_MS);
} }
} catch (ExportException e) { } catch (ExportException e) {
onError(e); onError(e);
} catch (RuntimeException e) { } catch (RuntimeException e) {

View File

@ -600,10 +600,6 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
@C.TrackType @C.TrackType
int trackType = getProcessedTrackType(firstAssetLoaderInputFormat.sampleMimeType); int trackType = getProcessedTrackType(firstAssetLoaderInputFormat.sampleMimeType);
checkArgument(
trackType != TRACK_TYPE_VIDEO || !composition.sequences.get(sequenceIndex).hasGaps(),
"Gaps in video sequences are not supported.");
synchronized (assetLoaderLock) { synchronized (assetLoaderLock) {
assetLoaderInputTracker.registerTrack(sequenceIndex, firstAssetLoaderInputFormat); assetLoaderInputTracker.registerTrack(sequenceIndex, firstAssetLoaderInputFormat);
if (assetLoaderInputTracker.hasRegisteredAllTracks()) { if (assetLoaderInputTracker.hasRegisteredAllTracks()) {

View File

@ -19,7 +19,6 @@ package androidx.media3.transformer;
import static androidx.media3.common.VideoFrameProcessor.INPUT_TYPE_BITMAP; import static androidx.media3.common.VideoFrameProcessor.INPUT_TYPE_BITMAP;
import static androidx.media3.common.VideoFrameProcessor.INPUT_TYPE_SURFACE; import static androidx.media3.common.VideoFrameProcessor.INPUT_TYPE_SURFACE;
import static androidx.media3.common.VideoFrameProcessor.INPUT_TYPE_TEXTURE_ID; import static androidx.media3.common.VideoFrameProcessor.INPUT_TYPE_TEXTURE_ID;
import static androidx.media3.common.util.Assertions.checkArgument;
import static androidx.media3.common.util.Assertions.checkNotNull; import static androidx.media3.common.util.Assertions.checkNotNull;
import android.graphics.Bitmap; import android.graphics.Bitmap;
@ -59,7 +58,6 @@ import java.util.concurrent.atomic.AtomicLong;
long durationUs, long durationUs,
@Nullable Format decodedFormat, @Nullable Format decodedFormat,
boolean isLast) { boolean isLast) {
checkArgument(!editedMediaItem.isGap());
boolean isSurfaceAssetLoaderMediaItem = isMediaItemForSurfaceAssetLoader(editedMediaItem); boolean isSurfaceAssetLoaderMediaItem = isMediaItemForSurfaceAssetLoader(editedMediaItem);
durationUs = editedMediaItem.getDurationAfterEffectsApplied(durationUs); durationUs = editedMediaItem.getDurationAfterEffectsApplied(durationUs);
if (decodedFormat != null) { if (decodedFormat != null) {

View File

@ -538,24 +538,6 @@ public final class SequenceExportTest {
assertThat(getRootCause(exception)).hasMessageThat().isEqualTo("Gaps can not be transmuxed."); assertThat(getRootCause(exception)).hasMessageThat().isEqualTo("Gaps can not be transmuxed.");
} }
@Test
public void start_videoGap_throws() throws Exception {
Transformer transformer = new TestTransformerBuilder(context).build();
EditedMediaItem audioVideoItem =
new EditedMediaItem.Builder(MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW_VIDEO))
.build();
EditedMediaItemSequence sequence =
new EditedMediaItemSequence.Builder().addItem(audioVideoItem).addGap(500_000).build();
transformer.start(new Composition.Builder(sequence).build(), outputDir.newFile().getPath());
ExportException exception =
assertThrows(ExportException.class, () -> TransformerTestRunner.runLooper(transformer));
assertThat(getRootCause(exception))
.hasMessageThat()
.isEqualTo("Gaps in video sequences are not supported.");
}
@Test @Test
public void start_gapVideo_throws() throws Exception { public void start_gapVideo_throws() throws Exception {
Transformer transformer = new TestTransformerBuilder(context).build(); Transformer transformer = new TestTransformerBuilder(context).build();