diff --git a/libraries/test_data/src/test/assets/transformerdumps/mp4/sample.mp4.concatenated_audio_high_pitch.dump b/libraries/test_data/src/test/assets/transformerdumps/mp4/sample.mp4.concatenated_audio_high_pitch.dump deleted file mode 100644 index 8d0d954a24..0000000000 --- a/libraries/test_data/src/test/assets/transformerdumps/mp4/sample.mp4.concatenated_audio_high_pitch.dump +++ /dev/null @@ -1,207 +0,0 @@ -format 0: - averageBitrate = 131072 - sampleMimeType = audio/mp4a-latm - channelCount = 1 - sampleRate = 44100 - pcmEncoding = 2 - metadata = entries=[TSSE: description=null: values=[Lavf56.1.0], xyz: latitude=40.68, longitude=-74.5, Mp4Timestamp: creation time=3547558895, modification time=3547558895, timescale=1000] -container metadata = entries=[TSSE: description=null: values=[Lavf56.1.0], xyz: latitude=40.68, longitude=-74.5, Mp4Timestamp: creation time=3547558895, modification time=3547558895, timescale=1000] -sample: - trackIndex = 0 - dataHashCode = 915609509 - size = 792 - isKeyFrame = true - presentationTimeUs = 0 -sample: - trackIndex = 0 - dataHashCode = -1580893866 - size = 678 - isKeyFrame = true - presentationTimeUs = 8979 -sample: - trackIndex = 0 - dataHashCode = -31547651 - size = 304 - isKeyFrame = true - presentationTimeUs = 16666 -sample: - trackIndex = 0 - dataHashCode = 1415140636 - size = 460 - isKeyFrame = true - presentationTimeUs = 20113 -sample: - trackIndex = 0 - dataHashCode = 1721060815 - size = 850 - isKeyFrame = true - presentationTimeUs = 25328 -sample: - trackIndex = 0 - dataHashCode = 1707913464 - size = 446 - isKeyFrame = true - presentationTimeUs = 34965 -sample: - trackIndex = 0 - dataHashCode = -776771764 - size = 852 - isKeyFrame = true - presentationTimeUs = 40022 -sample: - trackIndex = 0 - dataHashCode = -609146892 - size = 368 - isKeyFrame = true - presentationTimeUs = 49682 -sample: - trackIndex = 0 - dataHashCode = -2044977387 - size = 1166 - isKeyFrame = true - presentationTimeUs = 53854 -sample: - trackIndex = 0 - dataHashCode = -753877175 - size = 506 - isKeyFrame = true - presentationTimeUs = 67074 -sample: - trackIndex = 0 - dataHashCode = 1491046836 - size = 578 - isKeyFrame = true - presentationTimeUs = 72811 -sample: - trackIndex = 0 - dataHashCode = 621394572 - size = 668 - isKeyFrame = true - presentationTimeUs = 79365 -sample: - trackIndex = 0 - dataHashCode = -58393202 - size = 268 - isKeyFrame = true - presentationTimeUs = 86938 -sample: - trackIndex = 0 - dataHashCode = 1253593269 - size = 318 - isKeyFrame = true - presentationTimeUs = 89977 -sample: - trackIndex = 0 - dataHashCode = -1544714160 - size = 424 - isKeyFrame = true - presentationTimeUs = 93582 -sample: - trackIndex = 0 - dataHashCode = -2038565545 - size = 294 - isKeyFrame = true - presentationTimeUs = 98390 -sample: - trackIndex = 0 - dataHashCode = 803611858 - size = 394 - isKeyFrame = true - presentationTimeUs = 101723 -sample: - trackIndex = 0 - dataHashCode = 890682839 - size = 812 - isKeyFrame = true - presentationTimeUs = 106190 -sample: - trackIndex = 0 - dataHashCode = 1798765816 - size = 332 - isKeyFrame = true - presentationTimeUs = 115396 -sample: - trackIndex = 0 - dataHashCode = -155329417 - size = 250 - isKeyFrame = true - presentationTimeUs = 119160 -sample: - trackIndex = 0 - dataHashCode = 2061435630 - size = 304 - isKeyFrame = true - presentationTimeUs = 121995 -sample: - trackIndex = 0 - dataHashCode = -667770092 - size = 1318 - isKeyFrame = true - presentationTimeUs = 125442 -sample: - trackIndex = 0 - dataHashCode = 1947321516 - size = 224 - isKeyFrame = true - presentationTimeUs = 140385 -sample: - trackIndex = 0 - dataHashCode = 1744495738 - size = 446 - isKeyFrame = true - presentationTimeUs = 142925 -sample: - trackIndex = 0 - dataHashCode = 801488010 - size = 838 - isKeyFrame = true - presentationTimeUs = 147981 -sample: - trackIndex = 0 - dataHashCode = -867204691 - size = 520 - isKeyFrame = true - presentationTimeUs = 157482 -sample: - trackIndex = 0 - dataHashCode = 1994555264 - size = 230 - isKeyFrame = true - presentationTimeUs = 163378 -sample: - trackIndex = 0 - dataHashCode = -748724753 - size = 380 - isKeyFrame = true - presentationTimeUs = 165986 -sample: - trackIndex = 0 - dataHashCode = -1557661843 - size = 692 - isKeyFrame = true - presentationTimeUs = 170294 -sample: - trackIndex = 0 - dataHashCode = 461522726 - size = 270 - isKeyFrame = true - presentationTimeUs = 178140 -sample: - trackIndex = 0 - dataHashCode = 1058760091 - size = 238 - isKeyFrame = true - presentationTimeUs = 181201 -sample: - trackIndex = 0 - dataHashCode = 1541647596 - size = 722 - isKeyFrame = true - presentationTimeUs = 183900 -sample: - trackIndex = 0 - dataHashCode = -2107816707 - size = 2062 - isKeyFrame = true - presentationTimeUs = 192086 -released = true diff --git a/libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.concatenated.dump b/libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.concatenated.dump new file mode 100644 index 0000000000..5d424bab47 --- /dev/null +++ b/libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.concatenated.dump @@ -0,0 +1,127 @@ +format 0: + averageBitrate = 131072 + sampleMimeType = audio/mp4a-latm + channelCount = 1 + sampleRate = 44100 + pcmEncoding = 2 +sample: + trackIndex = 0 + dataHashCode = -85819864 + size = 8820 + isKeyFrame = true + presentationTimeUs = 0 +sample: + trackIndex = 0 + dataHashCode = 566487491 + size = 8820 + isKeyFrame = true + presentationTimeUs = 100000 +sample: + trackIndex = 0 + dataHashCode = -1256531710 + size = 8820 + isKeyFrame = true + presentationTimeUs = 200000 +sample: + trackIndex = 0 + dataHashCode = 793455796 + size = 8820 + isKeyFrame = true + presentationTimeUs = 300000 +sample: + trackIndex = 0 + dataHashCode = -268235582 + size = 8820 + isKeyFrame = true + presentationTimeUs = 400000 +sample: + trackIndex = 0 + dataHashCode = -8136122 + size = 8820 + isKeyFrame = true + presentationTimeUs = 500000 +sample: + trackIndex = 0 + dataHashCode = 1750866613 + size = 8820 + isKeyFrame = true + presentationTimeUs = 600000 +sample: + trackIndex = 0 + dataHashCode = -1100753636 + size = 8820 + isKeyFrame = true + presentationTimeUs = 700000 +sample: + trackIndex = 0 + dataHashCode = 507833230 + size = 8820 + isKeyFrame = true + presentationTimeUs = 800000 +sample: + trackIndex = 0 + dataHashCode = 1472467506 + size = 8820 + isKeyFrame = true + presentationTimeUs = 900000 +sample: + trackIndex = 0 + dataHashCode = -85819864 + size = 8820 + isKeyFrame = true + presentationTimeUs = 1000000 +sample: + trackIndex = 0 + dataHashCode = 566487491 + size = 8820 + isKeyFrame = true + presentationTimeUs = 1100000 +sample: + trackIndex = 0 + dataHashCode = -1256531710 + size = 8820 + isKeyFrame = true + presentationTimeUs = 1200000 +sample: + trackIndex = 0 + dataHashCode = 793455796 + size = 8820 + isKeyFrame = true + presentationTimeUs = 1300000 +sample: + trackIndex = 0 + dataHashCode = -268235582 + size = 8820 + isKeyFrame = true + presentationTimeUs = 1400000 +sample: + trackIndex = 0 + dataHashCode = -8136122 + size = 8820 + isKeyFrame = true + presentationTimeUs = 1500000 +sample: + trackIndex = 0 + dataHashCode = 1750866613 + size = 8820 + isKeyFrame = true + presentationTimeUs = 1600000 +sample: + trackIndex = 0 + dataHashCode = -1100753636 + size = 8820 + isKeyFrame = true + presentationTimeUs = 1700000 +sample: + trackIndex = 0 + dataHashCode = 507833230 + size = 8820 + isKeyFrame = true + presentationTimeUs = 1800000 +sample: + trackIndex = 0 + dataHashCode = 1472467506 + size = 8820 + isKeyFrame = true + presentationTimeUs = 1900000 +released = true diff --git a/libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.concatenated_high_pitch.dump b/libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.concatenated_high_pitch.dump new file mode 100644 index 0000000000..342660787d --- /dev/null +++ b/libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.concatenated_high_pitch.dump @@ -0,0 +1,133 @@ +format 0: + averageBitrate = 131072 + sampleMimeType = audio/mp4a-latm + channelCount = 1 + sampleRate = 44100 + pcmEncoding = 2 +sample: + trackIndex = 0 + dataHashCode = 2042155098 + size = 6482 + isKeyFrame = true + presentationTimeUs = 0 +sample: + trackIndex = 0 + dataHashCode = 1417355469 + size = 8556 + isKeyFrame = true + presentationTimeUs = 73492 +sample: + trackIndex = 0 + dataHashCode = -2107697498 + size = 8754 + isKeyFrame = true + presentationTimeUs = 170498 +sample: + trackIndex = 0 + dataHashCode = 736072795 + size = 8908 + isKeyFrame = true + presentationTimeUs = 269750 +sample: + trackIndex = 0 + dataHashCode = -1913553170 + size = 9208 + isKeyFrame = true + presentationTimeUs = 370748 +sample: + trackIndex = 0 + dataHashCode = 13583718 + size = 8968 + isKeyFrame = true + presentationTimeUs = 475147 +sample: + trackIndex = 0 + dataHashCode = -1444602526 + size = 8588 + isKeyFrame = true + presentationTimeUs = 576825 +sample: + trackIndex = 0 + dataHashCode = -1693065958 + size = 8778 + isKeyFrame = true + presentationTimeUs = 674195 +sample: + trackIndex = 0 + dataHashCode = 2071205641 + size = 8736 + isKeyFrame = true + presentationTimeUs = 773718 +sample: + trackIndex = 0 + dataHashCode = 1433538831 + size = 8636 + isKeyFrame = true + presentationTimeUs = 872766 +sample: + trackIndex = 0 + dataHashCode = -949798077 + size = 9424 + isKeyFrame = true + presentationTimeUs = 970680 +sample: + trackIndex = 0 + dataHashCode = -1275686831 + size = 8088 + isKeyFrame = true + presentationTimeUs = 1077528 +sample: + trackIndex = 0 + dataHashCode = -1360039206 + size = 8858 + isKeyFrame = true + presentationTimeUs = 1169229 +sample: + trackIndex = 0 + dataHashCode = 736072795 + size = 8908 + isKeyFrame = true + presentationTimeUs = 1269659 +sample: + trackIndex = 0 + dataHashCode = -1913553170 + size = 9208 + isKeyFrame = true + presentationTimeUs = 1370657 +sample: + trackIndex = 0 + dataHashCode = 13583718 + size = 8968 + isKeyFrame = true + presentationTimeUs = 1475056 +sample: + trackIndex = 0 + dataHashCode = -1444602526 + size = 8588 + isKeyFrame = true + presentationTimeUs = 1576734 +sample: + trackIndex = 0 + dataHashCode = -1693065958 + size = 8778 + isKeyFrame = true + presentationTimeUs = 1674104 +sample: + trackIndex = 0 + dataHashCode = 2071205641 + size = 8736 + isKeyFrame = true + presentationTimeUs = 1773628 +sample: + trackIndex = 0 + dataHashCode = 1433538831 + size = 8636 + isKeyFrame = true + presentationTimeUs = 1872675 +sample: + trackIndex = 0 + dataHashCode = 992130724 + size = 2580 + isKeyFrame = true + presentationTimeUs = 1970589 +released = true diff --git a/libraries/transformer/src/test/java/androidx/media3/transformer/SequenceExportTest.java b/libraries/transformer/src/test/java/androidx/media3/transformer/SequenceExportTest.java index 377dd551b6..c471765a38 100644 --- a/libraries/transformer/src/test/java/androidx/media3/transformer/SequenceExportTest.java +++ b/libraries/transformer/src/test/java/androidx/media3/transformer/SequenceExportTest.java @@ -18,9 +18,11 @@ package androidx.media3.transformer; import static androidx.media3.common.util.Assertions.checkNotNull; import static androidx.media3.transformer.TestUtil.ASSET_URI_PREFIX; +import static androidx.media3.transformer.TestUtil.FILE_AUDIO_RAW; import static androidx.media3.transformer.TestUtil.FILE_AUDIO_VIDEO; import static androidx.media3.transformer.TestUtil.FILE_AUDIO_VIDEO_INCREASING_TIMESTAMPS_15S; import static androidx.media3.transformer.TestUtil.createEncodersAndDecoders; +import static androidx.media3.transformer.TestUtil.createPitchChangingAudioProcessor; import static androidx.media3.transformer.TestUtil.createTransformerBuilder; import static androidx.media3.transformer.TestUtil.getDumpFileName; import static androidx.media3.transformer.TestUtil.removeEncodersAndDecoders; @@ -40,12 +42,20 @@ import java.nio.file.Files; import java.nio.file.Paths; import org.junit.After; import org.junit.Before; +import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; /** * End-to-end test for exporting a single {@link EditedMediaItemSequence} containing multiple {@link * EditedMediaItem} instances with {@link Transformer}. + * + *
Video tracks can not be processed by Robolectric, as the muxer audio/video interleaving means + * it waits for more audio samples before writing video samples. Robolectric decoders (currently) + * just copy input buffers to the output. Audio timestamps are computed based on the amount of data + * passed through (see [internal: b/178685617]), so are much smaller than expected because they are + * based on encoded samples. As a result, input files with video and audio must either remove or + * transmux the video. */ @RunWith(AndroidJUnit4.class) public final class SequenceExportTest { @@ -97,8 +107,7 @@ public final class SequenceExportTest { Transformer transformer = createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); MediaItem mediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_VIDEO); - SonicAudioProcessor sonicAudioProcessor = new SonicAudioProcessor(); - sonicAudioProcessor.setPitch(2f); + SonicAudioProcessor sonicAudioProcessor = createPitchChangingAudioProcessor(/* pitch= */ 2f); Effect videoEffect = RgbFilter.createGrayscaleFilter(); Effects effects = new Effects(ImmutableList.of(sonicAudioProcessor), ImmutableList.of(videoEffect)); @@ -172,11 +181,11 @@ public final class SequenceExportTest { Transformer transformer = createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); MediaItem mediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_VIDEO); - EditedMediaItem noAudioEditedMediaItem = + EditedMediaItem videoOnlyMediaItem = new EditedMediaItem.Builder(mediaItem).setRemoveAudio(true).build(); - EditedMediaItem audioEditedMediaItem = new EditedMediaItem.Builder(mediaItem).build(); + EditedMediaItem audioVideoMediaItem = new EditedMediaItem.Builder(mediaItem).build(); EditedMediaItemSequence sequence = - new EditedMediaItemSequence(ImmutableList.of(noAudioEditedMediaItem, audioEditedMediaItem)); + new EditedMediaItemSequence(ImmutableList.of(videoOnlyMediaItem, audioVideoMediaItem)); Composition composition = new Composition.Builder(ImmutableList.of(sequence)) .experimentalSetForceAudioTrack(true) @@ -198,8 +207,7 @@ public final class SequenceExportTest { Transformer transformer = createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); MediaItem mediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_VIDEO); - SonicAudioProcessor sonicAudioProcessor = new SonicAudioProcessor(); - sonicAudioProcessor.setPitch(2f); + SonicAudioProcessor sonicAudioProcessor = createPitchChangingAudioProcessor(/* pitch= */ 2f); Effects effects = new Effects(ImmutableList.of(sonicAudioProcessor), /* videoEffects= */ ImmutableList.of()); EditedMediaItem noAudioEditedMediaItem = @@ -224,22 +232,11 @@ public final class SequenceExportTest { } @Test - public void start_concatenateSameAudioItemWithEffects_completesSuccessfully() throws Exception { + public void start_concatenateSameAudioItem_completesSuccessfully() throws Exception { Transformer transformer = createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); - MediaItem mediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_VIDEO); - SonicAudioProcessor sonicAudioProcessor = new SonicAudioProcessor(); - sonicAudioProcessor.setPitch(2f); - Effects effects = - new Effects(ImmutableList.of(sonicAudioProcessor), /* videoEffects= */ ImmutableList.of()); - - // The video track must be removed in order for the export to end. Indeed, the - // Robolectric decoder just copies the input buffers to the output and the audio timestamps are - // therefore computed based on the encoded samples (see [internal: b/178685617]). As a result, - // the audio timestamps are much smaller than they should be and the muxer waits for more audio - // samples before writing video samples. - EditedMediaItem editedMediaItem = - new EditedMediaItem.Builder(mediaItem).setEffects(effects).setRemoveVideo(true).build(); + MediaItem audioOnlyMediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW); + EditedMediaItem editedMediaItem = new EditedMediaItem.Builder(audioOnlyMediaItem).build(); EditedMediaItemSequence editedMediaItemSequence = new EditedMediaItemSequence(ImmutableList.of(editedMediaItem, editedMediaItem)); Composition composition = @@ -251,6 +248,68 @@ public final class SequenceExportTest { DumpFileAsserts.assertOutput( context, checkNotNull(testMuxerHolder.testMuxer), - getDumpFileName(FILE_AUDIO_VIDEO + ".concatenated_audio_high_pitch")); + getDumpFileName(FILE_AUDIO_RAW + ".concatenated")); + } + + @Test + public void start_concatenateSameAudioItemWithEffects_completesSuccessfully() throws Exception { + Transformer transformer = + createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); + MediaItem audioOnlyMediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW); + SonicAudioProcessor sonicAudioProcessor = createPitchChangingAudioProcessor(/* pitch= */ 2f); + Effects effects = + new Effects(ImmutableList.of(sonicAudioProcessor), /* videoEffects= */ ImmutableList.of()); + EditedMediaItem editedMediaItem = + new EditedMediaItem.Builder(audioOnlyMediaItem).setEffects(effects).build(); + EditedMediaItemSequence editedMediaItemSequence = + new EditedMediaItemSequence(ImmutableList.of(editedMediaItem, editedMediaItem)); + Composition composition = + new Composition.Builder(ImmutableList.of(editedMediaItemSequence)).build(); + + transformer.start(composition, outputPath); + TransformerTestRunner.runLooper(transformer); + + DumpFileAsserts.assertOutput( + context, + checkNotNull(testMuxerHolder.testMuxer), + getDumpFileName(FILE_AUDIO_RAW + ".concatenated_high_pitch")); + } + + @Test + @Ignore("Handle MediaItem effects changes (See [internal: b/274093424]).") + public void start_concatenateSameAudioItemWithDifferentEffects_completesSuccessfully() + throws Exception { + Transformer transformer = + createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); + MediaItem audioOnlyMediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW); + Effects highPitchEffects = + new Effects( + ImmutableList.of(createPitchChangingAudioProcessor(/* pitch= */ 2f)), + /* videoEffects= */ ImmutableList.of()); + EditedMediaItem highPitchMediaItem = + new EditedMediaItem.Builder(audioOnlyMediaItem) + .setRemoveVideo(true) + .setEffects(highPitchEffects) + .build(); + Effects lowPitchEffects = + new Effects( + ImmutableList.of(createPitchChangingAudioProcessor(/* pitch= */ 0.5f)), + /* videoEffects= */ ImmutableList.of()); + EditedMediaItem lowPitchMediaItem = + new EditedMediaItem.Builder(audioOnlyMediaItem) + .setRemoveVideo(true) + .setEffects(lowPitchEffects) + .build(); + EditedMediaItemSequence sequence = + new EditedMediaItemSequence(ImmutableList.of(highPitchMediaItem, lowPitchMediaItem)); + Composition composition = new Composition.Builder(ImmutableList.of(sequence)).build(); + + transformer.start(composition, outputPath); + TransformerTestRunner.runLooper(transformer); + + DumpFileAsserts.assertOutput( + context, + checkNotNull(testMuxerHolder.testMuxer), + getDumpFileName(FILE_AUDIO_RAW + ".high_pitch_then_low_pitch")); } } diff --git a/libraries/transformer/src/test/java/androidx/media3/transformer/TestUtil.java b/libraries/transformer/src/test/java/androidx/media3/transformer/TestUtil.java index 66ccc87d93..3545230cce 100644 --- a/libraries/transformer/src/test/java/androidx/media3/transformer/TestUtil.java +++ b/libraries/transformer/src/test/java/androidx/media3/transformer/TestUtil.java @@ -24,6 +24,7 @@ import androidx.annotation.Nullable; import androidx.media3.common.C; import androidx.media3.common.Format; import androidx.media3.common.MimeTypes; +import androidx.media3.common.audio.SonicAudioProcessor; import androidx.media3.common.util.UnstableApi; import androidx.media3.common.util.Util; import androidx.media3.test.utils.FakeClock; @@ -239,6 +240,12 @@ public final class TestUtil { new DefaultEncoderFactory.Builder(context).setEnableFallback(enableFallback).build()); } + public static SonicAudioProcessor createPitchChangingAudioProcessor(float pitch) { + SonicAudioProcessor sonicAudioProcessor = new SonicAudioProcessor(); + sonicAudioProcessor.setPitch(pitch); + return sonicAudioProcessor; + } + public static String getDumpFileName(String originalFileName) { return DUMP_FILE_OUTPUT_DIRECTORY + '/' + originalFileName + '.' + DUMP_FILE_EXTENSION; }