From f60f79bb10ad4a375934c89f027865ba53b38509 Mon Sep 17 00:00:00 2001 From: samrobinson Date: Thu, 29 Jun 2023 13:29:41 +0000 Subject: [PATCH] Handle media item (Effects/Format) changes in AudioSamplePipeline. On a MediaItem change, the input Format (and Effects to apply) may be different. Therefore the AudioProcessingPipeline must be reconfigured to determine what processing is active, and what the AudioFormat of the data output is. In the event that it is different, additional AudioProcessor instances must be used to ensure the encoder will still be able to accept the audio buffers. PiperOrigin-RevId: 544338451 --- .../common/audio/AudioProcessingPipeline.java | 7 +- ...s.mp4.silence_then_audio_with_effects.dump | 104 ++--------- .../sample.wav.concatenated_high_pitch.dump | 38 ++-- .../sample.wav.high_pitch_then_low_pitch.dump | 139 +++++++++++++++ ...high_pitch_then_sample.wav-high_pitch.dump | 109 ++++++++++++ ...-high_pitch_then_sample.wav-low_pitch.dump | 109 ++++++++++++ .../wav/sample_rf64.wav_then_sample.wav.dump | 97 +++++++++++ .../transformer/AudioSamplePipeline.java | 164 ++++++++++++++---- .../transformer/SilentAudioGenerator.java | 4 +- .../transformer/TransformerInternal.java | 3 +- .../transformer/SequenceExportTest.java | 105 ++++++++++- .../androidx/media3/transformer/TestUtil.java | 9 +- 12 files changed, 733 insertions(+), 155 deletions(-) create mode 100644 libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.high_pitch_then_low_pitch.dump create mode 100644 libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav-high_pitch_then_sample.wav-high_pitch.dump create mode 100644 libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav-high_pitch_then_sample.wav-low_pitch.dump create mode 100644 libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav_then_sample.wav.dump diff --git a/libraries/common/src/main/java/androidx/media3/common/audio/AudioProcessingPipeline.java b/libraries/common/src/main/java/androidx/media3/common/audio/AudioProcessingPipeline.java index 346b21be97..03ff279d6e 100644 --- a/libraries/common/src/main/java/androidx/media3/common/audio/AudioProcessingPipeline.java +++ b/libraries/common/src/main/java/androidx/media3/common/audio/AudioProcessingPipeline.java @@ -170,7 +170,12 @@ public final class AudioProcessingPipeline { } } - /** Returns the {@link AudioFormat} currently being output. */ + /** + * Returns the {@link AudioFormat} of data being output through {@link #getOutput()}. + * + * @return The {@link AudioFormat} currently being output, or {@link AudioFormat#NOT_SET} if no + * {@linkplain #configure(AudioFormat) configuration} has been {@linkplain #flush() applied}. + */ public AudioFormat getOutputAudioFormat() { return outputAudioFormat; } diff --git a/libraries/test_data/src/test/assets/transformerdumps/mp4/testvid_1022ms.mp4.silence_then_audio_with_effects.dump b/libraries/test_data/src/test/assets/transformerdumps/mp4/testvid_1022ms.mp4.silence_then_audio_with_effects.dump index 31e1643c34..9f90acb445 100644 --- a/libraries/test_data/src/test/assets/transformerdumps/mp4/testvid_1022ms.mp4.silence_then_audio_with_effects.dump +++ b/libraries/test_data/src/test/assets/transformerdumps/mp4/testvid_1022ms.mp4.silence_then_audio_with_effects.dump @@ -637,110 +637,38 @@ sample: presentationTimeUs = 967709 sample: trackIndex = 1 - dataHashCode = -1759454975 - size = 440 + dataHashCode = 1916158593 + size = 5164 isKeyFrame = true presentationTimeUs = 992653 sample: trackIndex = 1 - dataHashCode = -1759454975 - size = 440 + dataHashCode = -1950877403 + size = 1120 isKeyFrame = true - presentationTimeUs = 995147 + presentationTimeUs = 1021927 sample: trackIndex = 1 - dataHashCode = -1759454975 - size = 440 - isKeyFrame = true - presentationTimeUs = 997641 -sample: - trackIndex = 1 - dataHashCode = -1759454975 - size = 440 - isKeyFrame = true - presentationTimeUs = 1000136 -sample: - trackIndex = 1 - dataHashCode = -1759454975 - size = 440 - isKeyFrame = true - presentationTimeUs = 1002630 -sample: - trackIndex = 1 - dataHashCode = -1759454975 - size = 440 - isKeyFrame = true - presentationTimeUs = 1005124 -sample: - trackIndex = 1 - dataHashCode = -1759454975 - size = 440 - isKeyFrame = true - presentationTimeUs = 1007619 -sample: - trackIndex = 1 - dataHashCode = -1759454975 - size = 440 - isKeyFrame = true - presentationTimeUs = 1010113 -sample: - trackIndex = 1 - dataHashCode = -1759454975 - size = 440 - isKeyFrame = true - presentationTimeUs = 1012607 -sample: - trackIndex = 1 - dataHashCode = -1759454975 - size = 440 - isKeyFrame = true - presentationTimeUs = 1015102 -sample: - trackIndex = 1 - dataHashCode = -2121773972 - size = 440 - isKeyFrame = true - presentationTimeUs = 1017596 -sample: - trackIndex = 1 - dataHashCode = 1517597454 - size = 440 - isKeyFrame = true - presentationTimeUs = 1020090 -sample: - trackIndex = 1 - dataHashCode = 1952035733 - size = 448 - isKeyFrame = true - presentationTimeUs = 1022585 -sample: - trackIndex = 1 - dataHashCode = -1861807761 - size = 604 - isKeyFrame = true - presentationTimeUs = 1025124 -sample: - trackIndex = 1 - dataHashCode = -1460926592 + dataHashCode = 1590174306 size = 880 isKeyFrame = true - presentationTimeUs = 1028548 + presentationTimeUs = 1028276 sample: trackIndex = 1 - dataHashCode = 1194085269 - size = 532 + dataHashCode = -432400310 + size = 1588 isKeyFrame = true - presentationTimeUs = 1033537 + presentationTimeUs = 1033265 sample: trackIndex = 1 - dataHashCode = -493118955 - size = 2296 + dataHashCode = -270222999 + size = 832 isKeyFrame = true - presentationTimeUs = 1036553 + presentationTimeUs = 1042267 sample: trackIndex = 1 - dataHashCode = 90420756 - size = 4568 + dataHashCode = -1941634508 + size = 5028 isKeyFrame = true - presentationTimeUs = 1049569 + presentationTimeUs = 1046984 released = true diff --git a/libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.concatenated_high_pitch.dump b/libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.concatenated_high_pitch.dump index 342660787d..661e5f200a 100644 --- a/libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.concatenated_high_pitch.dump +++ b/libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.concatenated_high_pitch.dump @@ -66,68 +66,74 @@ sample: presentationTimeUs = 872766 sample: trackIndex = 0 - dataHashCode = -949798077 - size = 9424 + dataHashCode = 992130724 + size = 2580 isKeyFrame = true presentationTimeUs = 970680 sample: trackIndex = 0 - dataHashCode = -1275686831 - size = 8088 + dataHashCode = 2042155098 + size = 6482 isKeyFrame = true - presentationTimeUs = 1077528 + presentationTimeUs = 999931 sample: trackIndex = 0 - dataHashCode = -1360039206 - size = 8858 + dataHashCode = 1417355469 + size = 8556 isKeyFrame = true - presentationTimeUs = 1169229 + presentationTimeUs = 1073424 +sample: + trackIndex = 0 + dataHashCode = -2107697498 + size = 8754 + isKeyFrame = true + presentationTimeUs = 1170430 sample: trackIndex = 0 dataHashCode = 736072795 size = 8908 isKeyFrame = true - presentationTimeUs = 1269659 + presentationTimeUs = 1269682 sample: trackIndex = 0 dataHashCode = -1913553170 size = 9208 isKeyFrame = true - presentationTimeUs = 1370657 + presentationTimeUs = 1370680 sample: trackIndex = 0 dataHashCode = 13583718 size = 8968 isKeyFrame = true - presentationTimeUs = 1475056 + presentationTimeUs = 1475079 sample: trackIndex = 0 dataHashCode = -1444602526 size = 8588 isKeyFrame = true - presentationTimeUs = 1576734 + presentationTimeUs = 1576757 sample: trackIndex = 0 dataHashCode = -1693065958 size = 8778 isKeyFrame = true - presentationTimeUs = 1674104 + presentationTimeUs = 1674126 sample: trackIndex = 0 dataHashCode = 2071205641 size = 8736 isKeyFrame = true - presentationTimeUs = 1773628 + presentationTimeUs = 1773650 sample: trackIndex = 0 dataHashCode = 1433538831 size = 8636 isKeyFrame = true - presentationTimeUs = 1872675 + presentationTimeUs = 1872698 sample: trackIndex = 0 dataHashCode = 992130724 size = 2580 isKeyFrame = true - presentationTimeUs = 1970589 + presentationTimeUs = 1970612 released = true diff --git a/libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.high_pitch_then_low_pitch.dump b/libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.high_pitch_then_low_pitch.dump new file mode 100644 index 0000000000..18f66c5646 --- /dev/null +++ b/libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.high_pitch_then_low_pitch.dump @@ -0,0 +1,139 @@ +format 0: + averageBitrate = 131072 + sampleMimeType = audio/mp4a-latm + channelCount = 1 + sampleRate = 44100 + pcmEncoding = 2 +sample: + trackIndex = 0 + dataHashCode = 2042155098 + size = 6482 + isKeyFrame = true + presentationTimeUs = 0 +sample: + trackIndex = 0 + dataHashCode = 1417355469 + size = 8556 + isKeyFrame = true + presentationTimeUs = 73492 +sample: + trackIndex = 0 + dataHashCode = -2107697498 + size = 8754 + isKeyFrame = true + presentationTimeUs = 170498 +sample: + trackIndex = 0 + dataHashCode = 736072795 + size = 8908 + isKeyFrame = true + presentationTimeUs = 269750 +sample: + trackIndex = 0 + dataHashCode = -1913553170 + size = 9208 + isKeyFrame = true + presentationTimeUs = 370748 +sample: + trackIndex = 0 + dataHashCode = 13583718 + size = 8968 + isKeyFrame = true + presentationTimeUs = 475147 +sample: + trackIndex = 0 + dataHashCode = -1444602526 + size = 8588 + isKeyFrame = true + presentationTimeUs = 576825 +sample: + trackIndex = 0 + dataHashCode = -1693065958 + size = 8778 + isKeyFrame = true + presentationTimeUs = 674195 +sample: + trackIndex = 0 + dataHashCode = 2071205641 + size = 8736 + isKeyFrame = true + presentationTimeUs = 773718 +sample: + trackIndex = 0 + dataHashCode = 1433538831 + size = 8636 + isKeyFrame = true + presentationTimeUs = 872766 +sample: + trackIndex = 0 + dataHashCode = 992130724 + size = 2580 + isKeyFrame = true + presentationTimeUs = 970680 +sample: + trackIndex = 0 + dataHashCode = 2002762528 + size = 6450 + isKeyFrame = true + presentationTimeUs = 999931 +sample: + trackIndex = 0 + dataHashCode = -657557348 + size = 9288 + isKeyFrame = true + presentationTimeUs = 1073061 +sample: + trackIndex = 0 + dataHashCode = 1101737790 + size = 8026 + isKeyFrame = true + presentationTimeUs = 1178367 +sample: + trackIndex = 0 + dataHashCode = 942749699 + size = 9580 + isKeyFrame = true + presentationTimeUs = 1269365 +sample: + trackIndex = 0 + dataHashCode = 1696779430 + size = 9176 + isKeyFrame = true + presentationTimeUs = 1377981 +sample: + trackIndex = 0 + dataHashCode = -1146784158 + size = 7898 + isKeyFrame = true + presentationTimeUs = 1482018 +sample: + trackIndex = 0 + dataHashCode = -108739018 + size = 9244 + isKeyFrame = true + presentationTimeUs = 1571564 +sample: + trackIndex = 0 + dataHashCode = 1527742727 + size = 8614 + isKeyFrame = true + presentationTimeUs = 1676371 +sample: + trackIndex = 0 + dataHashCode = -771072152 + size = 8408 + isKeyFrame = true + presentationTimeUs = 1774036 +sample: + trackIndex = 0 + dataHashCode = -307907100 + size = 10212 + isKeyFrame = true + presentationTimeUs = 1869365 +sample: + trackIndex = 0 + dataHashCode = -2024887086 + size = 1312 + isKeyFrame = true + presentationTimeUs = 1985147 +released = true diff --git a/libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav-high_pitch_then_sample.wav-high_pitch.dump b/libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav-high_pitch_then_sample.wav-high_pitch.dump new file mode 100644 index 0000000000..8245176e14 --- /dev/null +++ b/libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav-high_pitch_then_sample.wav-high_pitch.dump @@ -0,0 +1,109 @@ +format 0: + averageBitrate = 131072 + sampleMimeType = audio/mp4a-latm + channelCount = 2 + sampleRate = 48000 + pcmEncoding = 2 +sample: + trackIndex = 0 + dataHashCode = -226169394 + size = 15528 + isKeyFrame = true + presentationTimeUs = 0 +sample: + trackIndex = 0 + dataHashCode = -1654106973 + size = 17436 + isKeyFrame = true + presentationTimeUs = 80875 +sample: + trackIndex = 0 + dataHashCode = 1078555669 + size = 19384 + isKeyFrame = true + presentationTimeUs = 171687 +sample: + trackIndex = 0 + dataHashCode = -1783495042 + size = 8904 + isKeyFrame = true + presentationTimeUs = 272645 +sample: + trackIndex = 0 + dataHashCode = 1369234850 + size = 5688 + isKeyFrame = true + presentationTimeUs = 319020 +sample: + trackIndex = 0 + dataHashCode = 340823895 + size = 14108 + isKeyFrame = true + presentationTimeUs = 348645 +sample: + trackIndex = 0 + dataHashCode = 591641755 + size = 18624 + isKeyFrame = true + presentationTimeUs = 422125 +sample: + trackIndex = 0 + dataHashCode = -909124577 + size = 19056 + isKeyFrame = true + presentationTimeUs = 519125 +sample: + trackIndex = 0 + dataHashCode = -1644849415 + size = 19392 + isKeyFrame = true + presentationTimeUs = 618375 +sample: + trackIndex = 0 + dataHashCode = 660809049 + size = 20044 + isKeyFrame = true + presentationTimeUs = 719375 +sample: + trackIndex = 0 + dataHashCode = -601805369 + size = 19524 + isKeyFrame = true + presentationTimeUs = 823770 +sample: + trackIndex = 0 + dataHashCode = -1348998373 + size = 18696 + isKeyFrame = true + presentationTimeUs = 925458 +sample: + trackIndex = 0 + dataHashCode = 1191266277 + size = 19108 + isKeyFrame = true + presentationTimeUs = 1022833 +sample: + trackIndex = 0 + dataHashCode = -1278785599 + size = 19016 + isKeyFrame = true + presentationTimeUs = 1122354 +sample: + trackIndex = 0 + dataHashCode = -809901361 + size = 18800 + isKeyFrame = true + presentationTimeUs = 1221395 +sample: + trackIndex = 0 + dataHashCode = 163546833 + size = 5616 + isKeyFrame = true + presentationTimeUs = 1319312 +sample: + trackIndex = 0 + dataHashCode = 743627 + size = 4 + isKeyFrame = true + presentationTimeUs = 1348562 +released = true diff --git a/libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav-high_pitch_then_sample.wav-low_pitch.dump b/libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav-high_pitch_then_sample.wav-low_pitch.dump new file mode 100644 index 0000000000..b30b249464 --- /dev/null +++ b/libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav-high_pitch_then_sample.wav-low_pitch.dump @@ -0,0 +1,109 @@ +format 0: + averageBitrate = 131072 + sampleMimeType = audio/mp4a-latm + channelCount = 2 + sampleRate = 48000 + pcmEncoding = 2 +sample: + trackIndex = 0 + dataHashCode = -226169394 + size = 15528 + isKeyFrame = true + presentationTimeUs = 0 +sample: + trackIndex = 0 + dataHashCode = -1654106973 + size = 17436 + isKeyFrame = true + presentationTimeUs = 80875 +sample: + trackIndex = 0 + dataHashCode = 1078555669 + size = 19384 + isKeyFrame = true + presentationTimeUs = 171687 +sample: + trackIndex = 0 + dataHashCode = -1783495042 + size = 8904 + isKeyFrame = true + presentationTimeUs = 272645 +sample: + trackIndex = 0 + dataHashCode = 1369234850 + size = 5688 + isKeyFrame = true + presentationTimeUs = 319020 +sample: + trackIndex = 0 + dataHashCode = 711439073 + size = 14040 + isKeyFrame = true + presentationTimeUs = 348645 +sample: + trackIndex = 0 + dataHashCode = -555451391 + size = 20216 + isKeyFrame = true + presentationTimeUs = 421770 +sample: + trackIndex = 0 + dataHashCode = 1700905759 + size = 17472 + isKeyFrame = true + presentationTimeUs = 527062 +sample: + trackIndex = 0 + dataHashCode = -416119019 + size = 20856 + isKeyFrame = true + presentationTimeUs = 618062 +sample: + trackIndex = 0 + dataHashCode = -2060733789 + size = 19976 + isKeyFrame = true + presentationTimeUs = 726687 +sample: + trackIndex = 0 + dataHashCode = 212454167 + size = 17192 + isKeyFrame = true + presentationTimeUs = 830729 +sample: + trackIndex = 0 + dataHashCode = 956501205 + size = 20124 + isKeyFrame = true + presentationTimeUs = 920270 +sample: + trackIndex = 0 + dataHashCode = 299859135 + size = 18748 + isKeyFrame = true + presentationTimeUs = 1025083 +sample: + trackIndex = 0 + dataHashCode = 636945085 + size = 18304 + isKeyFrame = true + presentationTimeUs = 1122729 +sample: + trackIndex = 0 + dataHashCode = -754285165 + size = 22232 + isKeyFrame = true + presentationTimeUs = 1218062 +sample: + trackIndex = 0 + dataHashCode = -1967816889 + size = 2856 + isKeyFrame = true + presentationTimeUs = 1333854 +sample: + trackIndex = 0 + dataHashCode = 1012987 + size = 4 + isKeyFrame = true + presentationTimeUs = 1348729 +released = true diff --git a/libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav_then_sample.wav.dump b/libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav_then_sample.wav.dump new file mode 100644 index 0000000000..dc95acb66d --- /dev/null +++ b/libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav_then_sample.wav.dump @@ -0,0 +1,97 @@ +format 0: + averageBitrate = 131072 + sampleMimeType = audio/mp4a-latm + channelCount = 2 + sampleRate = 48000 + pcmEncoding = 2 +sample: + trackIndex = 0 + dataHashCode = -278103001 + size = 19200 + isKeyFrame = true + presentationTimeUs = 0 +sample: + trackIndex = 0 + dataHashCode = 1522105084 + size = 19200 + isKeyFrame = true + presentationTimeUs = 100000 +sample: + trackIndex = 0 + dataHashCode = 932319027 + size = 19200 + isKeyFrame = true + presentationTimeUs = 200000 +sample: + trackIndex = 0 + dataHashCode = 325000240 + size = 9336 + isKeyFrame = true + presentationTimeUs = 300000 +sample: + trackIndex = 0 + dataHashCode = -717358295 + size = 19196 + isKeyFrame = true + presentationTimeUs = 348625 +sample: + trackIndex = 0 + dataHashCode = -877978019 + size = 19200 + isKeyFrame = true + presentationTimeUs = 448604 +sample: + trackIndex = 0 + dataHashCode = 903565693 + size = 19200 + isKeyFrame = true + presentationTimeUs = 548604 +sample: + trackIndex = 0 + dataHashCode = -1424006305 + size = 19200 + isKeyFrame = true + presentationTimeUs = 648604 +sample: + trackIndex = 0 + dataHashCode = -1450884853 + size = 19200 + isKeyFrame = true + presentationTimeUs = 748604 +sample: + trackIndex = 0 + dataHashCode = 1566382623 + size = 19200 + isKeyFrame = true + presentationTimeUs = 848604 +sample: + trackIndex = 0 + dataHashCode = -510424155 + size = 19200 + isKeyFrame = true + presentationTimeUs = 948604 +sample: + trackIndex = 0 + dataHashCode = 13706341 + size = 19200 + isKeyFrame = true + presentationTimeUs = 1048604 +sample: + trackIndex = 0 + dataHashCode = -1421600077 + size = 19200 + isKeyFrame = true + presentationTimeUs = 1148604 +sample: + trackIndex = 0 + dataHashCode = 722747579 + size = 19200 + isKeyFrame = true + presentationTimeUs = 1248604 +sample: + trackIndex = 0 + dataHashCode = 326119 + size = 4 + isKeyFrame = true + presentationTimeUs = 1348604 +released = true diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/AudioSamplePipeline.java b/libraries/transformer/src/main/java/androidx/media3/transformer/AudioSamplePipeline.java index 05d2ff14ff..3197663bf6 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/AudioSamplePipeline.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/AudioSamplePipeline.java @@ -19,10 +19,12 @@ package androidx.media3.transformer; import static androidx.media3.common.util.Assertions.checkArgument; import static androidx.media3.common.util.Assertions.checkNotNull; import static androidx.media3.common.util.Assertions.checkState; +import static androidx.media3.common.util.Assertions.checkStateNotNull; import static androidx.media3.decoder.DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DIRECT; import static androidx.media3.decoder.DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DISABLED; import static java.lang.Math.min; +import android.util.Pair; import androidx.annotation.Nullable; import androidx.media3.common.C; import androidx.media3.common.Format; @@ -30,7 +32,11 @@ import androidx.media3.common.MimeTypes; import androidx.media3.common.audio.AudioProcessingPipeline; import androidx.media3.common.audio.AudioProcessor; import androidx.media3.common.audio.AudioProcessor.AudioFormat; +import androidx.media3.common.audio.ChannelMixingAudioProcessor; +import androidx.media3.common.audio.ChannelMixingMatrix; +import androidx.media3.common.audio.SonicAudioProcessor; import androidx.media3.common.audio.SpeedChangingAudioProcessor; +import androidx.media3.common.util.NullableType; import androidx.media3.common.util.Util; import androidx.media3.decoder.DecoderInputBuffer; import com.google.common.collect.ImmutableList; @@ -38,6 +44,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.Queue; import java.util.concurrent.ConcurrentLinkedDeque; +import java.util.concurrent.atomic.AtomicReference; import org.checkerframework.dataflow.qual.Pure; /** Pipeline to process, re-encode and mux raw audio samples. */ @@ -49,12 +56,14 @@ import org.checkerframework.dataflow.qual.Pure; private final SilentAudioGenerator silentAudioGenerator; private final Queue availableInputBuffers; private final Queue pendingInputBuffers; - private final AudioProcessingPipeline audioProcessingPipeline; private final Codec encoder; private final AudioFormat encoderInputAudioFormat; private final DecoderInputBuffer encoderInputBuffer; private final DecoderInputBuffer encoderOutputBuffer; - + private final AtomicReference<@NullableType Pair> + pendingMediaItem; + private boolean receivedFirstMediaItemCallback; + private AudioProcessingPipeline audioProcessingPipeline; private long encoderTotalInputBytes; private volatile boolean queueEndOfStreamAfterSilence; @@ -64,13 +73,13 @@ import org.checkerframework.dataflow.qual.Pure; Format firstAssetLoaderInputFormat, Format firstPipelineInputFormat, TransformationRequest transformationRequest, - boolean flattenForSlowMotion, - ImmutableList audioProcessors, + EditedMediaItem firstEditedMediaItem, Codec.EncoderFactory encoderFactory, MuxerWrapper muxerWrapper, FallbackListener fallbackListener) throws ExportException { super(firstAssetLoaderInputFormat, muxerWrapper); + checkArgument(firstPipelineInputFormat.pcmEncoding != Format.NO_VALUE); availableInputBuffers = new ConcurrentLinkedDeque<>(); ByteBuffer emptyBuffer = ByteBuffer.allocateDirect(0).order(ByteOrder.nativeOrder()); @@ -80,36 +89,21 @@ import org.checkerframework.dataflow.qual.Pure; availableInputBuffers.add(inputBuffer); } pendingInputBuffers = new ConcurrentLinkedDeque<>(); - encoderInputBuffer = new DecoderInputBuffer(BUFFER_REPLACEMENT_MODE_DISABLED); encoderOutputBuffer = new DecoderInputBuffer(BUFFER_REPLACEMENT_MODE_DISABLED); - - checkArgument(firstPipelineInputFormat.pcmEncoding != Format.NO_VALUE); + pendingMediaItem = new AtomicReference<>(); AudioFormat inputAudioFormat = new AudioFormat(firstPipelineInputFormat); - silentAudioGenerator = new SilentAudioGenerator(inputAudioFormat); + audioProcessingPipeline = + configureProcessing( + /* editedMediaItem= */ firstEditedMediaItem, + /* trackFormat= */ firstPipelineInputFormat, + /* inputAudioFormat= */ inputAudioFormat, + /* requiredOutputAudioFormat= */ AudioFormat.NOT_SET); + AudioFormat outputAudioFormat = audioProcessingPipeline.getOutputAudioFormat(); + checkState(!outputAudioFormat.equals(AudioFormat.NOT_SET)); - if (flattenForSlowMotion && firstAssetLoaderInputFormat.metadata != null) { - audioProcessors = - new ImmutableList.Builder() - .add( - new SpeedChangingAudioProcessor( - new SegmentSpeedProvider(firstAssetLoaderInputFormat.metadata))) - .addAll(audioProcessors) - .build(); - } - - audioProcessingPipeline = new AudioProcessingPipeline(audioProcessors); - - try { - encoderInputAudioFormat = audioProcessingPipeline.configure(inputAudioFormat); - } catch (AudioProcessor.UnhandledAudioFormatException unhandledAudioFormatException) { - throw ExportException.createForAudioProcessing( - unhandledAudioFormatException, inputAudioFormat); - } - - audioProcessingPipeline.flush(); - + encoderInputAudioFormat = outputAudioFormat; Format requestedEncoderFormat = new Format.Builder() .setSampleMimeType( @@ -153,16 +147,22 @@ import org.checkerframework.dataflow.qual.Pure; if (isLast) { queueEndOfStreamAfterSilence = true; } + } else { + checkState(MimeTypes.isAudio(trackFormat.sampleMimeType)); + checkState(trackFormat.pcmEncoding != Format.NO_VALUE); + } + + if (!receivedFirstMediaItemCallback) { + receivedFirstMediaItemCallback = true; return; } - checkState(MimeTypes.isAudio(trackFormat.sampleMimeType)); - checkState(trackFormat.pcmEncoding != Format.NO_VALUE); + pendingMediaItem.set(Pair.create(editedMediaItem, trackFormat)); } @Override @Nullable public DecoderInputBuffer getInputBuffer() { - if (shouldGenerateSilence()) { + if (shouldGenerateSilence() || pendingMediaItem.get() != null) { return null; } return availableInputBuffers.peek(); @@ -170,6 +170,7 @@ import org.checkerframework.dataflow.qual.Pure; @Override public boolean queueInputBuffer() { + checkState(pendingMediaItem.get() == null); DecoderInputBuffer inputBuffer = availableInputBuffers.remove(); pendingInputBuffers.add(inputBuffer); return true; @@ -218,10 +219,34 @@ import org.checkerframework.dataflow.qual.Pure; return encoder.isEnded(); } + /** + * Reconfigures audio processing based on the pending {@linkplain #onMediaItemChanged media item + * change}. + * + *

Before reconfiguration, all pending buffers must be fully processed and drained to the + * encoder, however end of stream buffers should be handled so the encoder is not {@link + * #queueEndOfStreamToEncoder() queued end of stream}. + */ + private void reconfigureProcessingForPendingMediaItem() throws ExportException { + Pair pendingChange = + checkStateNotNull(pendingMediaItem.get()); + AudioFormat pendingAudioFormat = + pendingChange.second != null + ? new AudioFormat(pendingChange.second) + : silentAudioGenerator.audioFormat; + audioProcessingPipeline = + configureProcessing( + /* editedMediaItem= */ pendingChange.first, + /* trackFormat= */ pendingChange.second, + /* inputAudioFormat= */ pendingAudioFormat, + /* requiredOutputAudioFormat= */ encoderInputAudioFormat); + pendingMediaItem.set(null); + } + /** * Attempts to pass input data to the encoder. * - * @return Whether it may be possible to feed more data immediately by calling this method again. + * @return Whether the {@link AudioSamplePipeline} may be able to continue processing data. */ private boolean feedEncoderFromInput() throws ExportException { if (!encoder.maybeDequeueInputBuffer(encoderInputBuffer)) { @@ -234,6 +259,10 @@ import org.checkerframework.dataflow.qual.Pure; } if (pendingInputBuffers.isEmpty()) { + if (pendingMediaItem.get() != null) { + reconfigureProcessingForPendingMediaItem(); + return true; + } // Only read volatile variable queueEndOfStreamAfterSilence if there is a chance that end of // stream should be queued. if (!silentAudioGenerator.hasRemaining() && queueEndOfStreamAfterSilence) { @@ -244,7 +273,9 @@ import org.checkerframework.dataflow.qual.Pure; DecoderInputBuffer pendingInputBuffer = pendingInputBuffers.element(); if (pendingInputBuffer.isEndOfStream()) { - queueEndOfStreamToEncoder(); + if (pendingMediaItem.get() == null) { + queueEndOfStreamToEncoder(); + } removePendingInputBuffer(); return false; } @@ -260,7 +291,7 @@ import org.checkerframework.dataflow.qual.Pure; /** * Attempts to feed audio processor output data to the encoder. * - * @return Whether it may be possible to feed more data immediately by calling this method again. + * @return Whether the {@link AudioSamplePipeline} may be able to continue processing data. */ private boolean feedEncoderFromProcessingPipeline() throws ExportException { if (!encoder.maybeDequeueInputBuffer(encoderInputBuffer)) { @@ -268,9 +299,12 @@ import org.checkerframework.dataflow.qual.Pure; } ByteBuffer processingPipelineOutputBuffer = audioProcessingPipeline.getOutput(); - if (!processingPipelineOutputBuffer.hasRemaining()) { if (audioProcessingPipeline.isEnded()) { + if (pendingMediaItem.get() != null) { + reconfigureProcessingForPendingMediaItem(); + return true; + } queueEndOfStreamToEncoder(); } return false; @@ -283,7 +317,7 @@ import org.checkerframework.dataflow.qual.Pure; /** * Attempts to feed input data to the {@link AudioProcessingPipeline}. * - * @return Whether it may be possible to feed more data immediately by calling this method again. + * @return Whether the {@link AudioSamplePipeline} may be able to continue processing data. */ private boolean feedProcessingPipelineFromInput() { if (shouldGenerateSilence()) { @@ -295,7 +329,8 @@ import org.checkerframework.dataflow.qual.Pure; if (pendingInputBuffers.isEmpty()) { // Only read volatile variable queueEndOfStreamAfterSilence if there is a chance that end of // stream should be queued. - if (!silentAudioGenerator.hasRemaining() && queueEndOfStreamAfterSilence) { + if (pendingMediaItem.get() != null + || (!silentAudioGenerator.hasRemaining() && queueEndOfStreamAfterSilence)) { audioProcessingPipeline.queueEndOfStream(); } return false; @@ -370,4 +405,57 @@ import org.checkerframework.dataflow.qual.Pure; private boolean shouldGenerateSilence() { return silentAudioGenerator.hasRemaining() && pendingInputBuffers.isEmpty(); } + + private static AudioProcessingPipeline configureProcessing( + EditedMediaItem editedMediaItem, + @Nullable Format trackFormat, + AudioFormat inputAudioFormat, + AudioFormat requiredOutputAudioFormat) + throws ExportException { + ImmutableList.Builder audioProcessors = new ImmutableList.Builder<>(); + if (editedMediaItem.flattenForSlowMotion + && trackFormat != null + && trackFormat.metadata != null) { + audioProcessors.add( + new SpeedChangingAudioProcessor(new SegmentSpeedProvider(trackFormat.metadata))); + } + audioProcessors.addAll(editedMediaItem.effects.audioProcessors); + // Ensure the output from APP matches what the encoder is configured to receive. + if (!requiredOutputAudioFormat.equals(AudioFormat.NOT_SET)) { + SonicAudioProcessor sampleRateChanger = new SonicAudioProcessor(); + sampleRateChanger.setOutputSampleRateHz(requiredOutputAudioFormat.sampleRate); + audioProcessors.add(sampleRateChanger); + + // TODO(b/262706549): Handle channel mixing with AudioMixer. + if (requiredOutputAudioFormat.channelCount <= 2) { + // ChannelMixingMatrix.create only has defaults for mono/stereo input/output. + ChannelMixingAudioProcessor channelCountChanger = new ChannelMixingAudioProcessor(); + channelCountChanger.putChannelMixingMatrix( + ChannelMixingMatrix.create( + /* inputChannelCount= */ 1, requiredOutputAudioFormat.channelCount)); + channelCountChanger.putChannelMixingMatrix( + ChannelMixingMatrix.create( + /* inputChannelCount= */ 2, requiredOutputAudioFormat.channelCount)); + audioProcessors.add(channelCountChanger); + } + } + + AudioProcessingPipeline audioProcessingPipeline = + new AudioProcessingPipeline(audioProcessors.build()); + try { + AudioFormat outputAudioFormat = audioProcessingPipeline.configure(inputAudioFormat); + if (!requiredOutputAudioFormat.equals(AudioFormat.NOT_SET) + && !outputAudioFormat.equals(requiredOutputAudioFormat)) { + throw new AudioProcessor.UnhandledAudioFormatException( + "Audio format can not be modified to match existing downstream format", + inputAudioFormat); + } + } catch (AudioProcessor.UnhandledAudioFormatException unhandledAudioFormatException) { + throw ExportException.createForAudioProcessing( + unhandledAudioFormatException, inputAudioFormat); + } + + audioProcessingPipeline.flush(); + return audioProcessingPipeline; + } } diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/SilentAudioGenerator.java b/libraries/transformer/src/main/java/androidx/media3/transformer/SilentAudioGenerator.java index 39e038af60..43b74ac81a 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/SilentAudioGenerator.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/SilentAudioGenerator.java @@ -26,7 +26,9 @@ import java.util.concurrent.atomic.AtomicLong; /* package */ final class SilentAudioGenerator { private static final int DEFAULT_BUFFER_SIZE_FRAMES = 1024; - private final AudioFormat audioFormat; + /** The {@link AudioFormat} of the silent audio generated. */ + public final AudioFormat audioFormat; + private final ByteBuffer internalBuffer; private final AtomicLong remainingBytesToOutput; diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerInternal.java b/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerInternal.java index 8dedc7f68c..4e2344981a 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerInternal.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerInternal.java @@ -555,8 +555,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; addedTrackInfo.firstAssetLoaderInputFormat, /* firstPipelineInputFormat= */ firstAssetLoaderOutputFormat, transformationRequest, - firstEditedMediaItem.flattenForSlowMotion, - firstEditedMediaItem.effects.audioProcessors, + firstEditedMediaItem, encoderFactory, muxerWrapper, fallbackListener); diff --git a/libraries/transformer/src/test/java/androidx/media3/transformer/SequenceExportTest.java b/libraries/transformer/src/test/java/androidx/media3/transformer/SequenceExportTest.java index 3853a04ab8..b822c54cee 100644 --- a/libraries/transformer/src/test/java/androidx/media3/transformer/SequenceExportTest.java +++ b/libraries/transformer/src/test/java/androidx/media3/transformer/SequenceExportTest.java @@ -19,6 +19,7 @@ package androidx.media3.transformer; import static androidx.media3.common.util.Assertions.checkNotNull; import static androidx.media3.transformer.TestUtil.ASSET_URI_PREFIX; import static androidx.media3.transformer.TestUtil.FILE_AUDIO_RAW; +import static androidx.media3.transformer.TestUtil.FILE_AUDIO_RAW_STEREO_48000KHZ; import static androidx.media3.transformer.TestUtil.FILE_AUDIO_VIDEO; import static androidx.media3.transformer.TestUtil.FILE_AUDIO_VIDEO_INCREASING_TIMESTAMPS_15S; import static androidx.media3.transformer.TestUtil.FILE_AUDIO_VIDEO_STEREO; @@ -43,7 +44,6 @@ import java.nio.file.Files; import java.nio.file.Paths; import org.junit.After; import org.junit.Before; -import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; @@ -233,7 +233,7 @@ public final class SequenceExportTest { } @Test - public void start_concatenateSameAudioItem_completesSuccessfully() throws Exception { + public void concatenateTwoAudioItems_withSameFormat_completesSuccessfully() throws Exception { Transformer transformer = createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); MediaItem audioOnlyMediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW); @@ -253,7 +253,8 @@ public final class SequenceExportTest { } @Test - public void start_concatenateSameAudioItemWithEffects_completesSuccessfully() throws Exception { + public void concatenateTwoAudioItems_withSameFormatAndSameEffects_completesSuccessfully() + throws Exception { Transformer transformer = createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); MediaItem audioOnlyMediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW); @@ -277,8 +278,7 @@ public final class SequenceExportTest { } @Test - @Ignore("Handle MediaItem effects changes (See [internal: b/274093424]).") - public void start_concatenateSameAudioItemWithDifferentEffects_completesSuccessfully() + public void concatenateTwoAudioItems_withSameFormatAndDiffEffects_completesSuccessfully() throws Exception { Transformer transformer = createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); @@ -313,4 +313,99 @@ public final class SequenceExportTest { checkNotNull(testMuxerHolder.testMuxer), getDumpFileName(FILE_AUDIO_RAW + ".high_pitch_then_low_pitch")); } + + @Test + public void concatenateTwoAudioItems_withDiffFormat_completesSuccessfully() throws Exception { + Transformer transformer = + createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); + MediaItem stereo48000Audio = + MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW_STEREO_48000KHZ); + MediaItem mono44100Audio = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW); + EditedMediaItemSequence editedMediaItemSequence = + new EditedMediaItemSequence( + ImmutableList.of( + new EditedMediaItem.Builder(stereo48000Audio).build(), + new EditedMediaItem.Builder(mono44100Audio).build())); + Composition composition = + new Composition.Builder(ImmutableList.of(editedMediaItemSequence)).build(); + + transformer.start(composition, outputPath); + TransformerTestRunner.runLooper(transformer); + + DumpFileAsserts.assertOutput( + context, + checkNotNull(testMuxerHolder.testMuxer), + getDumpFileName(FILE_AUDIO_RAW_STEREO_48000KHZ + "_then_sample.wav")); + } + + @Test + public void concatenateTwoAudioItems_withDiffFormatAndSameEffects_completesSuccessfully() + throws Exception { + Transformer transformer = + createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); + + Effects highPitch = + new Effects( + ImmutableList.of(createPitchChangingAudioProcessor(/* pitch= */ 2f)), + /* videoEffects= */ ImmutableList.of()); + + EditedMediaItem stereo48000Audio = + new EditedMediaItem.Builder( + MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW_STEREO_48000KHZ)) + .setEffects(highPitch) + .build(); + EditedMediaItem mono44100Audio = + new EditedMediaItem.Builder(MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW)) + .setEffects(highPitch) + .build(); + + EditedMediaItemSequence editedMediaItemSequence = + new EditedMediaItemSequence(ImmutableList.of(stereo48000Audio, mono44100Audio)); + Composition composition = + new Composition.Builder(ImmutableList.of(editedMediaItemSequence)).build(); + + transformer.start(composition, outputPath); + TransformerTestRunner.runLooper(transformer); + + DumpFileAsserts.assertOutput( + context, + checkNotNull(testMuxerHolder.testMuxer), + getDumpFileName(FILE_AUDIO_RAW_STEREO_48000KHZ + "-high_pitch_then_sample.wav-high_pitch")); + } + + @Test + public void concatenateTwoAudioItems_withDiffFormatAndDiffEffects_completesSuccessfully() + throws Exception { + Transformer transformer = + createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); + + EditedMediaItem stereo48000AudioHighPitch = + new EditedMediaItem.Builder( + MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW_STEREO_48000KHZ)) + .setEffects( + new Effects( + ImmutableList.of(createPitchChangingAudioProcessor(/* pitch= */ 2f)), + /* videoEffects= */ ImmutableList.of())) + .build(); + EditedMediaItem mono44100AudioLowPitch = + new EditedMediaItem.Builder(MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW)) + .setEffects( + new Effects( + ImmutableList.of(createPitchChangingAudioProcessor(/* pitch= */ 0.5f)), + /* videoEffects= */ ImmutableList.of())) + .build(); + EditedMediaItemSequence editedMediaItemSequence = + new EditedMediaItemSequence( + ImmutableList.of(stereo48000AudioHighPitch, mono44100AudioLowPitch)); + Composition composition = + new Composition.Builder(ImmutableList.of(editedMediaItemSequence)).build(); + + transformer.start(composition, outputPath); + TransformerTestRunner.runLooper(transformer); + + DumpFileAsserts.assertOutput( + context, + checkNotNull(testMuxerHolder.testMuxer), + getDumpFileName(FILE_AUDIO_RAW_STEREO_48000KHZ + "-high_pitch_then_sample.wav-low_pitch")); + } } diff --git a/libraries/transformer/src/test/java/androidx/media3/transformer/TestUtil.java b/libraries/transformer/src/test/java/androidx/media3/transformer/TestUtil.java index dec49f1d4d..c614f20733 100644 --- a/libraries/transformer/src/test/java/androidx/media3/transformer/TestUtil.java +++ b/libraries/transformer/src/test/java/androidx/media3/transformer/TestUtil.java @@ -153,6 +153,7 @@ public final class TestUtil { public static final String FILE_AUDIO_VIDEO_INCREASING_TIMESTAMPS_15S = "mp4/sample_with_increasing_timestamps_320w_240h.mp4"; public static final String FILE_AUDIO_RAW = "wav/sample.wav"; + public static final String FILE_AUDIO_RAW_STEREO_48000KHZ = "wav/sample_rf64.wav"; public static final String FILE_WITH_SUBTITLES = "mkv/sample_with_srt.mkv"; public static final String FILE_WITH_SEF_SLOW_MOTION = "mp4/sample_sef_slow_motion.mp4"; public static final String FILE_AUDIO_UNSUPPORTED_BY_DECODER = "amr/sample_wb.amr"; @@ -168,8 +169,8 @@ public final class TestUtil { public static void createEncodersAndDecoders() { ShadowMediaCodec.CodecConfig codecConfig = new ShadowMediaCodec.CodecConfig( - /* inputBufferSize= */ 10_000, - /* outputBufferSize= */ 10_000, + /* inputBufferSize= */ 100_000, + /* outputBufferSize= */ 100_000, /* codec= */ (in, out) -> out.put(in)); addCodec( MimeTypes.AUDIO_AAC, @@ -194,8 +195,8 @@ public final class TestUtil { ShadowMediaCodec.CodecConfig throwingCodecConfig = new ShadowMediaCodec.CodecConfig( - /* inputBufferSize= */ 10_000, - /* outputBufferSize= */ 10_000, + /* inputBufferSize= */ 100_000, + /* outputBufferSize= */ 100_000, new ShadowMediaCodec.CodecConfig.Codec() { @Override