From 46eeabb877e4a2d6783c145a84770422e73a04d5 Mon Sep 17 00:00:00 2001 From: claincly Date: Mon, 12 Aug 2024 06:31:11 -0700 Subject: [PATCH] Support `setRemoveAudio` in CompositionPlayer PiperOrigin-RevId: 662063725 --- ...ithRemovingFirstAndLastAudio_succeeds.dump | 81 ++++++++ ...ideosWithRemovingMiddleAudio_succeeds.dump | 78 ++++++++ .../performance/CompositionPlaybackTest.java | 180 +++++++++++++++--- .../media3/transformer/CompositionPlayer.java | 16 +- 4 files changed, 324 insertions(+), 31 deletions(-) create mode 100644 libraries/test_data/src/test/assets/audiosinkdumps/wav/playback_sequenceOfThreeVideosWithRemovingFirstAndLastAudio_succeeds.dump create mode 100644 libraries/test_data/src/test/assets/audiosinkdumps/wav/playback_sequenceOfThreeVideosWithRemovingMiddleAudio_succeeds.dump diff --git a/libraries/test_data/src/test/assets/audiosinkdumps/wav/playback_sequenceOfThreeVideosWithRemovingFirstAndLastAudio_succeeds.dump b/libraries/test_data/src/test/assets/audiosinkdumps/wav/playback_sequenceOfThreeVideosWithRemovingFirstAndLastAudio_succeeds.dump new file mode 100644 index 0000000000..f6b458bd2d --- /dev/null +++ b/libraries/test_data/src/test/assets/audiosinkdumps/wav/playback_sequenceOfThreeVideosWithRemovingFirstAndLastAudio_succeeds.dump @@ -0,0 +1,81 @@ +AudioSink: + buffer count = 25 + config: + pcmEncoding = 2 + channelCount = 2 + sampleRate = 44100 + buffer #0: + time = 0 + data = 1742602241 + buffer #1: + time = 23219 + data = 1742602241 + buffer #2: + time = 46439 + data = 1742602241 + buffer #3: + time = 69659 + data = 1742602241 + buffer #4: + time = 92879 + data = 1742602241 + buffer #5: + time = 116099 + data = 1742602241 + buffer #6: + time = 139319 + data = 1742602241 + buffer #7: + time = 162539 + data = 1742602241 + buffer #8: + time = 185759 + data = 1742602241 + buffer #9: + time = 208979 + data = 1742602241 + buffer #10: + time = 232199 + data = 1742602241 + buffer #11: + time = 255419 + data = 1742602241 + buffer #12: + time = 278639 + data = 1742602241 + buffer #13: + time = 301859 + data = 1742602241 + buffer #14: + time = 325079 + data = 1865678849 + buffer #15: + time = 464399 + data = 820010753 + buffer #16: + time = 500000 + data = 1407885569 + buffer #17: + time = 719818 + data = 1339014657 + buffer #18: + time = 1000000 + data = 38127629 + buffer #19: + time = 1209750 + data = 765431113 + buffer #20: + time = 1500000 + data = -245398331 + buffer #21: + time = 2000000 + data = -883339733 + buffer #22: + time = 2161768 + data = -1116029439 + buffer #23: + time = 2500000 + data = -164077823 + buffer #24: + time = 3000000 + data = -1619525631 diff --git a/libraries/test_data/src/test/assets/audiosinkdumps/wav/playback_sequenceOfThreeVideosWithRemovingMiddleAudio_succeeds.dump b/libraries/test_data/src/test/assets/audiosinkdumps/wav/playback_sequenceOfThreeVideosWithRemovingMiddleAudio_succeeds.dump new file mode 100644 index 0000000000..9d572a78fa --- /dev/null +++ b/libraries/test_data/src/test/assets/audiosinkdumps/wav/playback_sequenceOfThreeVideosWithRemovingMiddleAudio_succeeds.dump @@ -0,0 +1,78 @@ +AudioSink: + buffer count = 24 + config: + pcmEncoding = 2 + channelCount = 1 + sampleRate = 44100 + buffer #0: + time = 0 + data = -676819263 + buffer #1: + time = 67233 + data = -1276182527 + buffer #2: + time = 90453 + data = 1946193324 + buffer #3: + time = 113673 + data = 104315105 + buffer #4: + time = 136893 + data = 1010810964 + buffer #5: + time = 160113 + data = -826419418 + buffer #6: + time = 183333 + data = 371097263 + buffer #7: + time = 206553 + data = 2052827414 + buffer #8: + time = 229773 + data = -5216832 + buffer #9: + time = 252993 + data = -1419834910 + buffer #10: + time = 276213 + data = -298784263 + buffer #11: + time = 299433 + data = -689936200 + buffer #12: + time = 322653 + data = -1523709536 + buffer #13: + time = 461972 + data = -708375787 + buffer #14: + time = 500000 + data = 1992124950 + buffer #15: + time = 647732 + data = -1661880409 + buffer #16: + time = 1000000 + data = -1899662657 + buffer #17: + time = 1460430 + data = -1641541695 + buffer #18: + time = 1500000 + data = -1935405183 + buffer #19: + time = 2000000 + data = 1074468007 + buffer #20: + time = 2345102 + data = 329859524 + buffer #21: + time = 2500000 + data = -2047290594 + buffer #22: + time = 3000000 + data = -1193826308 + buffer #23: + time = 3343560 + data = -494005503 diff --git a/libraries/transformer/src/androidTest/java/androidx/media3/transformer/mh/performance/CompositionPlaybackTest.java b/libraries/transformer/src/androidTest/java/androidx/media3/transformer/mh/performance/CompositionPlaybackTest.java index 226a33d559..ca6c16eb64 100644 --- a/libraries/transformer/src/androidTest/java/androidx/media3/transformer/mh/performance/CompositionPlaybackTest.java +++ b/libraries/transformer/src/androidTest/java/androidx/media3/transformer/mh/performance/CompositionPlaybackTest.java @@ -32,12 +32,16 @@ import android.graphics.Bitmap; import android.graphics.PixelFormat; import android.media.Image; import android.media.ImageReader; +import android.view.SurfaceView; import androidx.media3.common.Effect; import androidx.media3.common.MediaItem; import androidx.media3.common.util.ConditionVariable; import androidx.media3.common.util.Size; import androidx.media3.common.util.Util; import androidx.media3.effect.GlEffect; +import androidx.media3.exoplayer.audio.DefaultAudioSink; +import androidx.media3.test.utils.CapturingAudioSink; +import androidx.media3.test.utils.DumpFileAsserts; import androidx.media3.transformer.Composition; import androidx.media3.transformer.CompositionPlayer; import androidx.media3.transformer.EditedMediaItem; @@ -45,12 +49,11 @@ import androidx.media3.transformer.EditedMediaItemSequence; import androidx.media3.transformer.Effects; import androidx.media3.transformer.InputTimestampRecordingShaderProgram; import androidx.media3.transformer.PlayerTestListener; +import androidx.media3.transformer.SurfaceTestActivity; +import androidx.test.ext.junit.rules.ActivityScenarioRule; import androidx.test.ext.junit.runners.AndroidJUnit4; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import java.util.ArrayList; -import java.util.List; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicReference; import org.checkerframework.checker.nullness.qual.MonotonicNonNull; @@ -80,20 +83,28 @@ public class CompositionPlaybackTest { @Rule public final TestName testName = new TestName(); + @Rule + public ActivityScenarioRule rule = + new ActivityScenarioRule<>(SurfaceTestActivity.class); + private final Context context = getInstrumentation().getContext().getApplicationContext(); private final PlayerTestListener playerTestListener = new PlayerTestListener(TEST_TIMEOUT_MS); private @MonotonicNonNull CompositionPlayer player; private @MonotonicNonNull ImageReader outputImageReader; + private String testId; + private SurfaceView surfaceView; @Before - public void setUpTestId() { + public void setUp() { + rule.getScenario().onActivity(activity -> surfaceView = activity.getSurfaceView()); testId = testName.getMethodName(); } @After public void tearDown() { + rule.getScenario().close(); getInstrumentation() .runOnMainSync( () -> { @@ -182,12 +193,13 @@ public class CompositionPlaybackTest { new Composition.Builder( new EditedMediaItemSequence(ImmutableList.of(editedMediaItem, editedMediaItem))) .build(); - List expectedTimestampsUs = new ArrayList<>(); - expectedTimestampsUs.addAll(VIDEO_TIMESTAMPS_US); - expectedTimestampsUs.addAll( - Lists.newArrayList( - Iterables.transform( - VIDEO_TIMESTAMPS_US, timestampUs -> (VIDEO_DURATION_US + timestampUs)))); + ImmutableList expectedTimestampsUs = + new ImmutableList.Builder() + .addAll(VIDEO_TIMESTAMPS_US) + .addAll( + Iterables.transform( + VIDEO_TIMESTAMPS_US, timestampUs -> (VIDEO_DURATION_US + timestampUs))) + .build(); getInstrumentation() .runOnMainSync( @@ -221,12 +233,13 @@ public class CompositionPlaybackTest { new Composition.Builder( new EditedMediaItemSequence(ImmutableList.of(editedMediaItem, editedMediaItem))) .build(); - List expectedTimestampsUs = new ArrayList<>(); - expectedTimestampsUs.addAll(IMAGE_TIMESTAMPS_US); - expectedTimestampsUs.addAll( - Lists.newArrayList( - Iterables.transform( - IMAGE_TIMESTAMPS_US, timestampUs -> (IMAGE_DURATION_US + timestampUs)))); + ImmutableList expectedTimestampsUs = + new ImmutableList.Builder() + .addAll(IMAGE_TIMESTAMPS_US) + .addAll( + Iterables.transform( + IMAGE_TIMESTAMPS_US, timestampUs -> (IMAGE_DURATION_US + timestampUs))) + .build(); getInstrumentation() .runOnMainSync( @@ -269,12 +282,13 @@ public class CompositionPlaybackTest { new EditedMediaItemSequence( ImmutableList.of(videoEditedMediaItem, imageEditedMediaItem))) .build(); - List expectedTimestampsUs = new ArrayList<>(); - expectedTimestampsUs.addAll(VIDEO_TIMESTAMPS_US); - expectedTimestampsUs.addAll( - Lists.newArrayList( - Iterables.transform( - IMAGE_TIMESTAMPS_US, timestampUs -> (VIDEO_DURATION_US + timestampUs)))); + ImmutableList expectedTimestampsUs = + new ImmutableList.Builder() + .addAll(VIDEO_TIMESTAMPS_US) + .addAll( + Iterables.transform( + IMAGE_TIMESTAMPS_US, timestampUs -> (VIDEO_DURATION_US + timestampUs))) + .build(); getInstrumentation() .runOnMainSync( @@ -317,12 +331,13 @@ public class CompositionPlaybackTest { new EditedMediaItemSequence( ImmutableList.of(imageEditedMediaItem, videoEditedMediaItem))) .build(); - List expectedTimestampsUs = new ArrayList<>(); - expectedTimestampsUs.addAll(IMAGE_TIMESTAMPS_US); - expectedTimestampsUs.addAll( - Lists.newArrayList( - Iterables.transform( - VIDEO_TIMESTAMPS_US, timestampUs -> (IMAGE_DURATION_US + timestampUs)))); + ImmutableList expectedTimestampsUs = + new ImmutableList.Builder() + .addAll(IMAGE_TIMESTAMPS_US) + .addAll( + Iterables.transform( + VIDEO_TIMESTAMPS_US, timestampUs -> (IMAGE_DURATION_US + timestampUs))) + .build(); getInstrumentation() .runOnMainSync( @@ -338,4 +353,113 @@ public class CompositionPlaybackTest { assertThat(inputTimestampRecordingShaderProgram.getInputTimestampsUs()) .isEqualTo(expectedTimestampsUs); } + + @Test + public void playback_sequenceOfThreeVideosWithRemovingFirstAndLastAudio_succeeds() + throws Exception { + InputTimestampRecordingShaderProgram inputTimestampRecordingShaderProgram = + new InputTimestampRecordingShaderProgram(); + EditedMediaItem videoEditedMediaItem = + new EditedMediaItem.Builder(VIDEO_MEDIA_ITEM) + .setDurationUs(VIDEO_DURATION_US) + .setEffects( + new Effects( + /* audioProcessors= */ ImmutableList.of(), + /* videoEffects= */ ImmutableList.of( + (GlEffect) (context, useHdr) -> inputTimestampRecordingShaderProgram))) + .build(); + EditedMediaItem videoEditedMediaItemRemoveAudio = + videoEditedMediaItem.buildUpon().setRemoveAudio(true).build(); + Composition composition = + new Composition.Builder( + new EditedMediaItemSequence( + videoEditedMediaItemRemoveAudio, + videoEditedMediaItem, + videoEditedMediaItemRemoveAudio)) + .build(); + ImmutableList expectedTimestampsUs = + new ImmutableList.Builder() + .addAll(VIDEO_TIMESTAMPS_US) + .addAll( + Iterables.transform( + VIDEO_TIMESTAMPS_US, timestampUs -> (VIDEO_DURATION_US + timestampUs))) + .addAll( + Iterables.transform( + VIDEO_TIMESTAMPS_US, timestampUs -> (2 * VIDEO_DURATION_US + timestampUs))) + .build(); + CapturingAudioSink capturingAudioSink = + new CapturingAudioSink(new DefaultAudioSink.Builder(context).build()); + + getInstrumentation() + .runOnMainSync( + () -> { + player = + new CompositionPlayer.Builder(context).setAudioSink(capturingAudioSink).build(); + player.addListener(playerTestListener); + player.setComposition(composition); + player.prepare(); + player.play(); + }); + playerTestListener.waitUntilPlayerEnded(); + + assertThat(inputTimestampRecordingShaderProgram.getInputTimestampsUs()) + .isEqualTo(expectedTimestampsUs); + DumpFileAsserts.assertOutput( + context, + capturingAudioSink, + "audiosinkdumps/wav/playback_sequenceOfThreeVideosWithRemovingFirstAndLastAudio_succeeds.dump"); + } + + @Test + public void playback_sequenceOfThreeVideosWithRemovingMiddleAudio_succeeds() throws Exception { + InputTimestampRecordingShaderProgram inputTimestampRecordingShaderProgram = + new InputTimestampRecordingShaderProgram(); + EditedMediaItem videoEditedMediaItem = + new EditedMediaItem.Builder(VIDEO_MEDIA_ITEM) + .setDurationUs(VIDEO_DURATION_US) + .setEffects( + new Effects( + /* audioProcessors= */ ImmutableList.of(), + /* videoEffects= */ ImmutableList.of( + (GlEffect) (context, useHdr) -> inputTimestampRecordingShaderProgram))) + .build(); + EditedMediaItem videoEditedMediaItemRemoveAudio = + videoEditedMediaItem.buildUpon().setRemoveAudio(true).build(); + Composition composition = + new Composition.Builder( + new EditedMediaItemSequence( + videoEditedMediaItem, videoEditedMediaItemRemoveAudio, videoEditedMediaItem)) + .build(); + ImmutableList expectedTimestampsUs = + new ImmutableList.Builder() + .addAll(VIDEO_TIMESTAMPS_US) + .addAll( + Iterables.transform( + VIDEO_TIMESTAMPS_US, timestampUs -> (VIDEO_DURATION_US + timestampUs))) + .addAll( + Iterables.transform( + VIDEO_TIMESTAMPS_US, timestampUs -> (2 * VIDEO_DURATION_US + timestampUs))) + .build(); + CapturingAudioSink capturingAudioSink = + new CapturingAudioSink(new DefaultAudioSink.Builder(context).build()); + + getInstrumentation() + .runOnMainSync( + () -> { + player = + new CompositionPlayer.Builder(context).setAudioSink(capturingAudioSink).build(); + player.addListener(playerTestListener); + player.setComposition(composition); + player.prepare(); + player.play(); + }); + playerTestListener.waitUntilPlayerEnded(); + + assertThat(inputTimestampRecordingShaderProgram.getInputTimestampsUs()) + .isEqualTo(expectedTimestampsUs); + DumpFileAsserts.assertOutput( + context, + capturingAudioSink, + "audiosinkdumps/wav/playback_sequenceOfThreeVideosWithRemovingMiddleAudio_succeeds.dump"); + } } diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/CompositionPlayer.java b/libraries/transformer/src/main/java/androidx/media3/transformer/CompositionPlayer.java index b01a5e2c2a..618bb807e2 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/CompositionPlayer.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/CompositionPlayer.java @@ -64,6 +64,7 @@ import androidx.media3.exoplayer.source.ClippingMediaSource; import androidx.media3.exoplayer.source.ConcatenatingMediaSource2; import androidx.media3.exoplayer.source.DefaultMediaSourceFactory; import androidx.media3.exoplayer.source.ExternalLoader; +import androidx.media3.exoplayer.source.FilteringMediaSource; import androidx.media3.exoplayer.source.ForwardingTimeline; import androidx.media3.exoplayer.source.MediaPeriod; import androidx.media3.exoplayer.source.MediaSource; @@ -78,6 +79,7 @@ import androidx.media3.exoplayer.util.EventLogger; import androidx.media3.exoplayer.video.CompositingVideoSinkProvider; import androidx.media3.exoplayer.video.VideoFrameReleaseControl; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.errorprone.annotations.CanIgnoreReturnValue; @@ -747,10 +749,18 @@ public final class CompositionPlayer extends SimpleBasePlayer new SilenceMediaSource(editedMediaItem.durationUs), editedMediaItem.mediaItem.clippingConfiguration.startPositionUs, editedMediaItem.mediaItem.clippingConfiguration.endPositionUs); + + // The MediaSource that loads the MediaItem + MediaSource mainMediaSource = + defaultMediaSourceFactory.createMediaSource(editedMediaItem.mediaItem); + if (editedMediaItem.removeAudio) { + mainMediaSource = + new FilteringMediaSource( + mainMediaSource, ImmutableSet.of(C.TRACK_TYPE_VIDEO, C.TRACK_TYPE_IMAGE)); + } + MediaSource mergingMediaSource = - new MergingMediaSource( - defaultMediaSourceFactory.createMediaSource(editedMediaItem.mediaItem), - silenceMediaSource); + new MergingMediaSource(mainMediaSource, silenceMediaSource); MediaSource itemMediaSource = wrapWithVideoEffectsBasedMediaSources( mergingMediaSource, editedMediaItem.effects.videoEffects, durationUs);