Implement blank frame insertion when video track goes missing

If an sequence has both audio and video tracks running but then
the next media item has only audio, then insert blank frames to
continue video track.

PiperOrigin-RevId: 730828269
This commit is contained in:
sheenachhabra 2025-02-25 04:30:24 -08:00 committed by Copybara-Service
parent a016adc6b1
commit bdbcdf1660
2 changed files with 171 additions and 18 deletions

View File

@ -0,0 +1,130 @@
/*
* Copyright 2025 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package androidx.media3.transformer;
import static androidx.media3.transformer.AndroidTestUtil.MP4_ASSET;
import static androidx.media3.transformer.AndroidTestUtil.getVideoTrackOutput;
import static com.google.common.truth.Truth.assertThat;
import static org.junit.Assert.assertThrows;
import android.content.Context;
import androidx.media3.common.MediaItem;
import androidx.media3.extractor.mp4.Mp4Extractor;
import androidx.media3.extractor.text.DefaultSubtitleParserFactory;
import androidx.media3.test.utils.FakeExtractorOutput;
import androidx.media3.test.utils.FakeTrackOutput;
import androidx.media3.test.utils.TestUtil;
import androidx.test.core.app.ApplicationProvider;
import androidx.test.ext.junit.runners.AndroidJUnit4;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;
import org.junit.runner.RunWith;
/**
* End-to-end instrumentation tests for {@link Transformer} when the {@link EditedMediaItemSequence}
* has video gaps.
*/
@RunWith(AndroidJUnit4.class)
public class TransformerVideoGapsTest {
private static final EditedMediaItem AUDIO_VIDEO_MEDIA_ITEM =
new EditedMediaItem.Builder(MediaItem.fromUri(MP4_ASSET.uri)).build();
private static final EditedMediaItem AUDIO_ONLY_MEDIA_ITEM =
AUDIO_VIDEO_MEDIA_ITEM.buildUpon().setRemoveVideo(true).build();
private final Context context = ApplicationProvider.getApplicationContext();
@Rule public final TestName testName = new TestName();
private String testId;
@Before
public void setUpTestId() {
testId = testName.getMethodName();
}
// TODO: b/391111085 - Change test when gaps at the start of the sequence are supported.
@Test
public void export_withThreeMediaItemsAndFirstMediaItemHavingNoVideo_throws() {
Transformer transformer = new Transformer.Builder(context).build();
Composition composition =
new Composition.Builder(
new EditedMediaItemSequence.Builder(
AUDIO_ONLY_MEDIA_ITEM, AUDIO_VIDEO_MEDIA_ITEM, AUDIO_VIDEO_MEDIA_ITEM)
.build())
.build();
TransformerAndroidTestRunner transformerAndroidTestRunner =
new TransformerAndroidTestRunner.Builder(context, transformer).build();
assertThrows(
ExportException.class, () -> transformerAndroidTestRunner.run(testId, composition));
}
@Test
public void
export_withThreeMediaItemsAndSecondMediaItemHavingNoVideo_insertsBlankFrameForSecondMediaItem()
throws Exception {
Transformer transformer = new Transformer.Builder(context).build();
Composition composition =
new Composition.Builder(
new EditedMediaItemSequence.Builder(
AUDIO_VIDEO_MEDIA_ITEM, AUDIO_ONLY_MEDIA_ITEM, AUDIO_VIDEO_MEDIA_ITEM)
.build())
.build();
ExportTestResult result =
new TransformerAndroidTestRunner.Builder(context, transformer)
.build()
.run(testId, composition);
FakeExtractorOutput fakeExtractorOutput =
TestUtil.extractAllSamplesFromFilePath(
new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath);
FakeTrackOutput videoTrackOutput = getVideoTrackOutput(fakeExtractorOutput);
// The gap is for 1024ms with 30 fps.
int expectedBlankFrames = 31;
assertThat(videoTrackOutput.getSampleCount())
.isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames);
}
@Test
public void
export_withThreeMediaItemsAndLastMediaItemHavingNoVideo_insertsBlankFrameForLastMediaItem()
throws Exception {
Transformer transformer = new Transformer.Builder(context).build();
Composition composition =
new Composition.Builder(
new EditedMediaItemSequence.Builder(
AUDIO_VIDEO_MEDIA_ITEM, AUDIO_VIDEO_MEDIA_ITEM, AUDIO_ONLY_MEDIA_ITEM)
.build())
.build();
ExportTestResult result =
new TransformerAndroidTestRunner.Builder(context, transformer)
.build()
.run(testId, composition);
FakeExtractorOutput fakeExtractorOutput =
TestUtil.extractAllSamplesFromFilePath(
new Mp4Extractor(new DefaultSubtitleParserFactory()), result.filePath);
FakeTrackOutput videoTrackOutput = getVideoTrackOutput(fakeExtractorOutput);
// The gap is for 1024ms with 30 fps.
int expectedBlankFrames = 31;
assertThat(videoTrackOutput.getSampleCount())
.isEqualTo(2 * MP4_ASSET.videoFrameCount + expectedBlankFrames);
}
}

View File

@ -27,15 +27,18 @@ import static androidx.media3.transformer.Transformer.PROGRESS_STATE_NOT_STARTED
import static androidx.media3.transformer.TransformerUtil.getProcessedTrackType;
import android.graphics.Bitmap;
import android.graphics.Color;
import android.os.Looper;
import android.view.Surface;
import androidx.annotation.Nullable;
import androidx.media3.common.C;
import androidx.media3.common.ColorInfo;
import androidx.media3.common.Format;
import androidx.media3.common.MediaItem;
import androidx.media3.common.MimeTypes;
import androidx.media3.common.OnInputFrameProcessedListener;
import androidx.media3.common.util.Clock;
import androidx.media3.common.util.ConstantRateTimestampIterator;
import androidx.media3.common.util.HandlerWrapper;
import androidx.media3.common.util.TimestampIterator;
import androidx.media3.common.util.Util;
@ -62,6 +65,18 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
.setChannelCount(2)
.build();
private static final Format BLANK_IMAGE_BITMAP_FORMAT =
new Format.Builder()
.setWidth(1)
.setHeight(1)
.setSampleMimeType(MimeTypes.IMAGE_RAW)
.setColorInfo(ColorInfo.SRGB_BT709_FULL)
.build();
private static final float BLANK_IMAGE_FRAME_RATE = 30.0f;
private static final int RETRY_DELAY_MS = 10;
private final List<EditedMediaItem> editedMediaItems;
private final boolean isLooping;
private final boolean forceAudioTrack;
@ -314,14 +329,6 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
new SampleConsumerWrapper(wrappedAudioSampleConsumer, C.TRACK_TYPE_AUDIO));
}
} else {
// TODO: b/270533049 - Remove the check below when implementing blank video frames generation.
boolean videoTrackDisappeared =
reportedTrackCount.get() == 1
&& trackType == C.TRACK_TYPE_AUDIO
&& sampleConsumersByTrackType.size() == 2;
checkState(
!videoTrackDisappeared,
"Inputs with no video track are not supported when the output contains a video track");
sampleConsumer =
checkStateNotNull(
sampleConsumersByTrackType.get(trackType),
@ -335,17 +342,36 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
}
onMediaItemChanged(trackType, format);
if (reportedTrackCount.get() == 1 && sampleConsumersByTrackType.size() == 2) {
for (Map.Entry<Integer, SampleConsumerWrapper> entry :
sampleConsumersByTrackType.entrySet()) {
int outputTrackType = entry.getKey();
if (trackType != outputTrackType) {
onMediaItemChanged(outputTrackType, /* outputFormat= */ null);
}
// One track is missing from the current media item.
if (trackType == C.TRACK_TYPE_AUDIO) {
// Fill video gap with blank frames.
onMediaItemChanged(C.TRACK_TYPE_VIDEO, /* outputFormat= */ BLANK_IMAGE_BITMAP_FORMAT);
nonEndedTrackCount.incrementAndGet();
Bitmap bitmap =
Bitmap.createBitmap(
new int[] {Color.BLACK}, /* width= */ 1, /* height= */ 1, Bitmap.Config.ARGB_8888);
handler.post(() -> insertBlankFrames(bitmap));
} else {
// Generate audio silence in the AudioGraph by signalling null format.
onMediaItemChanged(C.TRACK_TYPE_AUDIO, /* outputFormat= */ null);
}
}
return sampleConsumer;
}
private void insertBlankFrames(Bitmap bitmap) {
SampleConsumerWrapper videoSampleConsumer =
checkNotNull(sampleConsumersByTrackType.get(C.TRACK_TYPE_VIDEO));
if (videoSampleConsumer.queueInputBitmap(
bitmap,
new ConstantRateTimestampIterator(currentAssetDurationUs, BLANK_IMAGE_FRAME_RATE))
!= SampleConsumer.INPUT_RESULT_SUCCESS) {
handler.postDelayed(() -> insertBlankFrames(bitmap), RETRY_DELAY_MS);
} else {
videoSampleConsumer.signalEndOfVideoInput();
}
}
private void onMediaItemChanged(int trackType, @Nullable Format outputFormat) {
@Nullable
OnMediaItemChangedListener onMediaItemChangedListener =
@ -646,8 +672,6 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
*/
private final class GapSignalingAssetLoader implements AssetLoader {
private static final int OUTPUT_FORMAT_RETRY_DELAY_MS = 10;
private final long durationUs;
private final Format trackFormat;
private final Format decodedFormat;
@ -700,8 +724,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
outputtedFormat = true;
sampleConsumerWrapper.onGapSignalled();
} else {
handler.postDelayed(
this::outputFormatToSequenceAssetLoader, OUTPUT_FORMAT_RETRY_DELAY_MS);
handler.postDelayed(this::outputFormatToSequenceAssetLoader, RETRY_DELAY_MS);
}
} catch (ExportException e) {