Fix trim optimization logic when edit lists exist

Also makes muxer shift the first video timestamp to zero, if it's not.

The trim position should respect the media timeline.

For example in a video that is 10s long (without edit list), if an edit list
adds 1_000ms to each video sample, and trimming 100ms, here's the expected:

- The video duration is 10.9s (`10s + 1s edit - 0.1s trim`)
- The first video frame time would be at 0.9s (`1s edit - 0.1s trim`)

PiperOrigin-RevId: 692187399
This commit is contained in:
claincly 2024-11-01 08:33:43 -07:00 committed by Copybara-Service
parent 38e1efafc2
commit b0c6106882
6 changed files with 157 additions and 6 deletions

View File

@ -256,6 +256,31 @@ public final class AndroidTestUtil {
.setFrameRate(29.97f)
.build())
.build();
/** This file contains an edit lists that adds one second to all video frames. */
public static final AssetInfo MP4_POSITIVE_SHIFT_EDIT_LIST =
new AssetInfo.Builder("asset:///media/mp4/edit_list_positive_shift.mp4")
.setVideoFormat(
new Format.Builder()
.setSampleMimeType(VIDEO_H264)
.setWidth(1920)
.setHeight(1080)
.setFrameRate(30.f)
.build())
.build();
/** This file contains an edit lists that subtacts 1 second to all video frames. */
public static final AssetInfo MP4_NEGATIVE_SHIFT_EDIT_LIST =
new AssetInfo.Builder("asset:///media/mp4/edit_list_negative_shift.mp4")
.setVideoFormat(
new Format.Builder()
.setSampleMimeType(VIDEO_H264)
.setWidth(1920)
.setHeight(1080)
.setFrameRate(30.f)
.build())
.build();
public static final AssetInfo MP4_TRIM_OPTIMIZATION_270 =
new AssetInfo.Builder(
"asset:///media/mp4/internal_emulator_transformer_output_270_rotated.mp4")

View File

@ -30,6 +30,7 @@ import static androidx.media3.transformer.AndroidTestUtil.MP4_ASSET_WITH_INCREAS
import static androidx.media3.transformer.AndroidTestUtil.MP4_ASSET_WITH_INCREASING_TIMESTAMPS_320W_240H_15S;
import static androidx.media3.transformer.AndroidTestUtil.MP4_ASSET_WITH_SHORTER_AUDIO;
import static androidx.media3.transformer.AndroidTestUtil.MP4_PORTRAIT_ASSET;
import static androidx.media3.transformer.AndroidTestUtil.MP4_POSITIVE_SHIFT_EDIT_LIST;
import static androidx.media3.transformer.AndroidTestUtil.MP4_TRIM_OPTIMIZATION;
import static androidx.media3.transformer.AndroidTestUtil.MP4_TRIM_OPTIMIZATION_180;
import static androidx.media3.transformer.AndroidTestUtil.MP4_TRIM_OPTIMIZATION_270;
@ -66,6 +67,7 @@ import android.os.HandlerThread;
import android.os.Looper;
import android.os.Message;
import android.util.Pair;
import androidx.annotation.Nullable;
import androidx.media3.common.C;
import androidx.media3.common.Effect;
import androidx.media3.common.Format;
@ -112,6 +114,7 @@ import com.google.common.collect.ImmutableSet;
import java.io.File;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
import org.junit.Before;
@ -1022,6 +1025,68 @@ public class TransformerEndToEndTest {
assertThat(new File(result.filePath).length()).isGreaterThan(0);
}
@Test
public void
clippedMediaWithPositiveEditList_trimOptimizationEnabled_setsFirstVideoTimestampToZero()
throws Exception {
MediaItem mediaItem =
new MediaItem.Builder()
.setUri(MP4_POSITIVE_SHIFT_EDIT_LIST.uri)
.setClippingConfiguration(
new MediaItem.ClippingConfiguration.Builder().setStartPositionUs(100_000).build())
.build();
EditedMediaItem editedMediaItem = new EditedMediaItem.Builder(mediaItem).build();
ExportTestResult result =
new TransformerAndroidTestRunner.Builder(
context,
new Transformer.Builder(context)
.experimentalSetTrimOptimizationEnabled(true)
.build())
.build()
.run(testId, editedMediaItem);
Mp4Extractor mp4Extractor = new Mp4Extractor(new DefaultSubtitleParserFactory());
FakeExtractorOutput fakeExtractorOutput =
TestUtil.extractAllSamplesFromFilePath(mp4Extractor, checkNotNull(result.filePath));
assertThat(result.exportResult.fileSizeBytes).isGreaterThan(0);
List<Long> videoTimestampsUs =
checkNotNull(getVideoTrackOutput(fakeExtractorOutput)).getSampleTimesUs();
assertThat(videoTimestampsUs).hasSize(270);
assertThat(videoTimestampsUs.get(0)).isEqualTo(0);
// The second sample is originally at 1_033_333, clipping at 100_000 results in 933_333.
assertThat(videoTimestampsUs.get(1)).isEqualTo(933_333);
}
@Test
public void
clippedMediaWithPositiveEditList_trimOptimizationDisbled_setsFirstVideoTimestampToZero()
throws Exception {
MediaItem mediaItem =
new MediaItem.Builder()
.setUri(MP4_POSITIVE_SHIFT_EDIT_LIST.uri)
.setClippingConfiguration(
new MediaItem.ClippingConfiguration.Builder().setStartPositionUs(100_000).build())
.build();
EditedMediaItem editedMediaItem = new EditedMediaItem.Builder(mediaItem).build();
ExportTestResult result =
new TransformerAndroidTestRunner.Builder(context, new Transformer.Builder(context).build())
.build()
.run(testId, editedMediaItem);
Mp4Extractor mp4Extractor = new Mp4Extractor(new DefaultSubtitleParserFactory());
FakeExtractorOutput fakeExtractorOutput =
TestUtil.extractAllSamplesFromFilePath(mp4Extractor, checkNotNull(result.filePath));
assertThat(result.exportResult.fileSizeBytes).isGreaterThan(0);
List<Long> videoTimestampsUs =
checkNotNull(getVideoTrackOutput(fakeExtractorOutput)).getSampleTimesUs();
assertThat(videoTimestampsUs).hasSize(270);
assertThat(videoTimestampsUs.get(0)).isEqualTo(0);
// The second sample is originally at 1_033_333, clipping at 100_000 results in 933_333.
assertThat(videoTimestampsUs.get(1)).isEqualTo(933_333);
}
@Test
public void clippedMedia_trimOptimizationEnabled_completesWithOptimizationApplied()
throws Exception {
@ -2113,7 +2178,7 @@ public class TransformerEndToEndTest {
assertThat(videoTrack.getSampleTimeUs(/* index= */ 0)).isEqualTo(0);
int sampleIndexWithLargestSampleTime = 10;
// TODO: b/365992945 - Address the issue of sample timeUs increasing due to negative timestamps
// caused by the edit list. The correct values should be 11_500_000 and 9_500_000 respectively.
// caused by the edit list. The correct values should be 11_500_000 and 9_500_000 respectively.
assertThat(videoTrack.getSampleTimeUs(sampleIndexWithLargestSampleTime)).isEqualTo(12_000_000);
assertThat(videoTrack.getSampleTimeUs(/* index= */ expectedSampleCount - 1))
.isEqualTo(10_000_000);
@ -2438,6 +2503,17 @@ public class TransformerEndToEndTest {
}
}
@Nullable
private static FakeTrackOutput getVideoTrackOutput(FakeExtractorOutput extractorOutput) {
for (int i = 0; i < extractorOutput.numberOfTracks; i++) {
FakeTrackOutput trackOutput = extractorOutput.trackOutputs.get(i);
if (MimeTypes.isVideo(checkNotNull(trackOutput.lastFormat).sampleMimeType)) {
return trackOutput;
}
}
return null;
}
private static final class VideoUnsupportedEncoderFactory implements Codec.EncoderFactory {
private final Codec.EncoderFactory encoderFactory;

View File

@ -59,6 +59,9 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
*/
public final long lastSyncSampleTimestampUs;
/** The prestation timestamp of the first video frame, in microseconds. */
public final long firstVideoSampleTimestampUs;
/**
* The presentation timestamp (in microseconds) of the first sync sample at or after {@code
* timeUs}, or {@link C#TIME_END_OF_SOURCE} if there are none. Set to {@link C#TIME_UNSET} if
@ -78,12 +81,14 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
private Mp4Info(
long durationUs,
long lastSyncSampleTimestampUs,
long firstVideoSampleTimestampUs,
long firstSyncSampleTimestampUsAfterTimeUs,
boolean isFirstVideoSampleAfterTimeUsSyncSample,
@Nullable Format videoFormat,
@Nullable Format audioFormat) {
this.durationUs = durationUs;
this.lastSyncSampleTimestampUs = lastSyncSampleTimestampUs;
this.firstVideoSampleTimestampUs = firstVideoSampleTimestampUs;
this.firstSyncSampleTimestampUsAfterTimeUs = firstSyncSampleTimestampUsAfterTimeUs;
this.isFirstVideoSampleAfterTimeUsSyncSample = isFirstVideoSampleAfterTimeUsSyncSample;
this.videoFormat = videoFormat;
@ -147,6 +152,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
}
long durationUs = mp4Extractor.getDurationUs();
long firstVideoSampleTimestampUs = C.TIME_UNSET;
long lastSyncSampleTimestampUs = C.TIME_UNSET;
long firstSyncSampleTimestampUsAfterTimeUs = C.TIME_UNSET;
boolean isFirstSampleAfterTimeUsSyncSample = false;
@ -174,6 +180,9 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
long[] trackTimestampsUs =
mp4Extractor.getSampleTimestampsUs(extractorOutput.videoTrackId);
if (trackTimestampsUs.length > 0) {
firstVideoSampleTimestampUs = trackTimestampsUs[0];
}
int indexOfTrackTimestampUsAfterTimeUs =
Util.binarySearchCeil(
@ -199,6 +208,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
return new Mp4Info(
durationUs,
lastSyncSampleTimestampUs,
firstVideoSampleTimestampUs,
firstSyncSampleTimestampUsAfterTimeUs,
isFirstSampleAfterTimeUsSyncSample,
videoFormat,

View File

@ -44,6 +44,7 @@ import androidx.media3.common.C;
import androidx.media3.common.Format;
import androidx.media3.common.Metadata;
import androidx.media3.common.MimeTypes;
import androidx.media3.common.util.Log;
import androidx.media3.common.util.Util;
import androidx.media3.container.NalUnitUtil;
import androidx.media3.effect.DebugTraceUtil;
@ -71,6 +72,8 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
* <p>This wrapper can contain at most one video track and one audio track.
*/
/* package */ final class MuxerWrapper {
private static final String TAG = "MuxerWrapper";
/**
* Thrown when video formats fail to match between {@link #MUXER_MODE_MUX_PARTIAL} and {@link
* #MUXER_MODE_APPEND}.
@ -566,6 +569,21 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
}
if (trackInfo.sampleCount == 0) {
if (trackType == C.TRACK_TYPE_VIDEO
&& contains(trackTypeToInfo, C.TRACK_TYPE_AUDIO)
&& !dropSamplesBeforeFirstVideoSample) {
checkState(firstVideoPresentationTimeUs != C.TIME_UNSET);
// Set the presentation timestamp of the first video to zero so that the first video frame
// is presented when playback starts cross-platform. Moreover, MediaMuxer shifts all video
// sample times to zero under API30 and it breaks A/V sync.
// Only apply this when there is audio track added, i.e. when not recording screen.
// TODO: b/376217254 - Consider removing after switching to InAppMuxer.
// TODO: b/376217254 - Remove audio dropping logic, use video frame shifting instead.
Log.w(
TAG,
"Applying workarounds for edit list: shifting only the first video timestamp to zero.");
presentationTimeUs = 0;
}
trackInfo.startTimeUs = presentationTimeUs;
}
trackInfo.sampleCount++;

View File

@ -1357,19 +1357,40 @@ public final class Transformer {
}
long maxEncodedAudioBufferDurationUs = 0;
if (mp4Info.audioFormat != null && mp4Info.audioFormat.sampleRate != Format.NO_VALUE) {
// Ensure there is an audio sample to mux between the two clip times to prevent
// Transformer from hanging because it received an audio track but no audio samples.
maxEncodedAudioBufferDurationUs =
Util.sampleCountToDurationUs(
AAC_LC_AUDIO_SAMPLE_COUNT, mp4Info.audioFormat.sampleRate);
}
if (mp4Info.firstSyncSampleTimestampUsAfterTimeUs
== mp4Info.firstVideoSampleTimestampUs) {
// The video likely includes an edit list. For example, an edit list adds 1_000ms to
// each video sample and the trim position is from 100ms, the first sample would be at
// 1_000ms, the first sync sample after 100ms would also be at 1_000ms; but in this
// case processing should start from 100ms rather than 1_000ms. The resulting video
// should be 100ms shorter than the original video, and the first video timestamp
// should have timestamp at 900ms.
Transformer.this.composition =
buildUponCompositionForTrimOptimization(
composition,
trimStartTimeUs,
trimEndTimeUs,
mp4Info.durationUs,
/* startsAtKeyFrame= */ true,
/* clearVideoEffects= */ false);
exportResultBuilder.setOptimizationResult(
OPTIMIZATION_ABANDONED_KEYFRAME_PLACEMENT_OPTIMAL_FOR_TRIM);
processFullInput();
return;
}
// Ensure there is an audio sample to mux between the two clip times to prevent
// Transformer from hanging because it received an audio track but no audio samples.
if (mp4Info.firstSyncSampleTimestampUsAfterTimeUs - trimStartTimeUs
<= maxEncodedAudioBufferDurationUs
|| mp4Info.isFirstVideoSampleAfterTimeUsSyncSample) {
Transformer.this.composition =
buildUponCompositionForTrimOptimization(
composition,
mp4Info.firstSyncSampleTimestampUsAfterTimeUs,
/* startTimeUs= */ mp4Info.firstSyncSampleTimestampUsAfterTimeUs,
trimEndTimeUs,
mp4Info.durationUs,
/* startsAtKeyFrame= */ true,
@ -1409,6 +1430,7 @@ public final class Transformer {
processFullInput();
return;
}
Transformer.this.mediaItemInfo = mp4Info;
maybeSetMuxerWrapperAdditionalRotationDegrees(
remuxingMuxerWrapper,
@ -1418,8 +1440,8 @@ public final class Transformer {
buildUponCompositionForTrimOptimization(
composition,
trimStartTimeUs,
mp4Info.firstSyncSampleTimestampUsAfterTimeUs,
mp4Info.durationUs,
/* endTimeUs= */ mp4Info.firstSyncSampleTimestampUsAfterTimeUs,
/* mediaDurationUs= */ mp4Info.durationUs,
/* startsAtKeyFrame= */ false,
/* clearVideoEffects= */ true);
startInternal(