diff --git a/libraries/common/src/main/java/androidx/media3/common/audio/AudioProcessingPipeline.java b/libraries/common/src/main/java/androidx/media3/common/audio/AudioProcessingPipeline.java index 346b21be97..03ff279d6e 100644 --- a/libraries/common/src/main/java/androidx/media3/common/audio/AudioProcessingPipeline.java +++ b/libraries/common/src/main/java/androidx/media3/common/audio/AudioProcessingPipeline.java @@ -170,7 +170,12 @@ public final class AudioProcessingPipeline { } } - /** Returns the {@link AudioFormat} currently being output. */ + /** + * Returns the {@link AudioFormat} of data being output through {@link #getOutput()}. + * + * @return The {@link AudioFormat} currently being output, or {@link AudioFormat#NOT_SET} if no + * {@linkplain #configure(AudioFormat) configuration} has been {@linkplain #flush() applied}. + */ public AudioFormat getOutputAudioFormat() { return outputAudioFormat; } diff --git a/libraries/test_data/src/test/assets/transformerdumps/mp4/testvid_1022ms.mp4.silence_then_audio_with_effects.dump b/libraries/test_data/src/test/assets/transformerdumps/mp4/testvid_1022ms.mp4.silence_then_audio_with_effects.dump index 31e1643c34..9f90acb445 100644 --- a/libraries/test_data/src/test/assets/transformerdumps/mp4/testvid_1022ms.mp4.silence_then_audio_with_effects.dump +++ b/libraries/test_data/src/test/assets/transformerdumps/mp4/testvid_1022ms.mp4.silence_then_audio_with_effects.dump @@ -637,110 +637,38 @@ sample: presentationTimeUs = 967709 sample: trackIndex = 1 - dataHashCode = -1759454975 - size = 440 + dataHashCode = 1916158593 + size = 5164 isKeyFrame = true presentationTimeUs = 992653 sample: trackIndex = 1 - dataHashCode = -1759454975 - size = 440 + dataHashCode = -1950877403 + size = 1120 isKeyFrame = true - presentationTimeUs = 995147 + presentationTimeUs = 1021927 sample: trackIndex = 1 - dataHashCode = -1759454975 - size = 440 - isKeyFrame = true - presentationTimeUs = 997641 -sample: - trackIndex = 1 - dataHashCode = -1759454975 - size = 440 - isKeyFrame = true - presentationTimeUs = 1000136 -sample: - trackIndex = 1 - dataHashCode = -1759454975 - size = 440 - isKeyFrame = true - presentationTimeUs = 1002630 -sample: - trackIndex = 1 - dataHashCode = -1759454975 - size = 440 - isKeyFrame = true - presentationTimeUs = 1005124 -sample: - trackIndex = 1 - dataHashCode = -1759454975 - size = 440 - isKeyFrame = true - presentationTimeUs = 1007619 -sample: - trackIndex = 1 - dataHashCode = -1759454975 - size = 440 - isKeyFrame = true - presentationTimeUs = 1010113 -sample: - trackIndex = 1 - dataHashCode = -1759454975 - size = 440 - isKeyFrame = true - presentationTimeUs = 1012607 -sample: - trackIndex = 1 - dataHashCode = -1759454975 - size = 440 - isKeyFrame = true - presentationTimeUs = 1015102 -sample: - trackIndex = 1 - dataHashCode = -2121773972 - size = 440 - isKeyFrame = true - presentationTimeUs = 1017596 -sample: - trackIndex = 1 - dataHashCode = 1517597454 - size = 440 - isKeyFrame = true - presentationTimeUs = 1020090 -sample: - trackIndex = 1 - dataHashCode = 1952035733 - size = 448 - isKeyFrame = true - presentationTimeUs = 1022585 -sample: - trackIndex = 1 - dataHashCode = -1861807761 - size = 604 - isKeyFrame = true - presentationTimeUs = 1025124 -sample: - trackIndex = 1 - dataHashCode = -1460926592 + dataHashCode = 1590174306 size = 880 isKeyFrame = true - presentationTimeUs = 1028548 + presentationTimeUs = 1028276 sample: trackIndex = 1 - dataHashCode = 1194085269 - size = 532 + dataHashCode = -432400310 + size = 1588 isKeyFrame = true - presentationTimeUs = 1033537 + presentationTimeUs = 1033265 sample: trackIndex = 1 - dataHashCode = -493118955 - size = 2296 + dataHashCode = -270222999 + size = 832 isKeyFrame = true - presentationTimeUs = 1036553 + presentationTimeUs = 1042267 sample: trackIndex = 1 - dataHashCode = 90420756 - size = 4568 + dataHashCode = -1941634508 + size = 5028 isKeyFrame = true - presentationTimeUs = 1049569 + presentationTimeUs = 1046984 released = true diff --git a/libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.concatenated_high_pitch.dump b/libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.concatenated_high_pitch.dump index 342660787d..661e5f200a 100644 --- a/libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.concatenated_high_pitch.dump +++ b/libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.concatenated_high_pitch.dump @@ -66,68 +66,74 @@ sample: presentationTimeUs = 872766 sample: trackIndex = 0 - dataHashCode = -949798077 - size = 9424 + dataHashCode = 992130724 + size = 2580 isKeyFrame = true presentationTimeUs = 970680 sample: trackIndex = 0 - dataHashCode = -1275686831 - size = 8088 + dataHashCode = 2042155098 + size = 6482 isKeyFrame = true - presentationTimeUs = 1077528 + presentationTimeUs = 999931 sample: trackIndex = 0 - dataHashCode = -1360039206 - size = 8858 + dataHashCode = 1417355469 + size = 8556 isKeyFrame = true - presentationTimeUs = 1169229 + presentationTimeUs = 1073424 +sample: + trackIndex = 0 + dataHashCode = -2107697498 + size = 8754 + isKeyFrame = true + presentationTimeUs = 1170430 sample: trackIndex = 0 dataHashCode = 736072795 size = 8908 isKeyFrame = true - presentationTimeUs = 1269659 + presentationTimeUs = 1269682 sample: trackIndex = 0 dataHashCode = -1913553170 size = 9208 isKeyFrame = true - presentationTimeUs = 1370657 + presentationTimeUs = 1370680 sample: trackIndex = 0 dataHashCode = 13583718 size = 8968 isKeyFrame = true - presentationTimeUs = 1475056 + presentationTimeUs = 1475079 sample: trackIndex = 0 dataHashCode = -1444602526 size = 8588 isKeyFrame = true - presentationTimeUs = 1576734 + presentationTimeUs = 1576757 sample: trackIndex = 0 dataHashCode = -1693065958 size = 8778 isKeyFrame = true - presentationTimeUs = 1674104 + presentationTimeUs = 1674126 sample: trackIndex = 0 dataHashCode = 2071205641 size = 8736 isKeyFrame = true - presentationTimeUs = 1773628 + presentationTimeUs = 1773650 sample: trackIndex = 0 dataHashCode = 1433538831 size = 8636 isKeyFrame = true - presentationTimeUs = 1872675 + presentationTimeUs = 1872698 sample: trackIndex = 0 dataHashCode = 992130724 size = 2580 isKeyFrame = true - presentationTimeUs = 1970589 + presentationTimeUs = 1970612 released = true diff --git a/libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.high_pitch_then_low_pitch.dump b/libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.high_pitch_then_low_pitch.dump new file mode 100644 index 0000000000..18f66c5646 --- /dev/null +++ b/libraries/test_data/src/test/assets/transformerdumps/wav/sample.wav.high_pitch_then_low_pitch.dump @@ -0,0 +1,139 @@ +format 0: + averageBitrate = 131072 + sampleMimeType = audio/mp4a-latm + channelCount = 1 + sampleRate = 44100 + pcmEncoding = 2 +sample: + trackIndex = 0 + dataHashCode = 2042155098 + size = 6482 + isKeyFrame = true + presentationTimeUs = 0 +sample: + trackIndex = 0 + dataHashCode = 1417355469 + size = 8556 + isKeyFrame = true + presentationTimeUs = 73492 +sample: + trackIndex = 0 + dataHashCode = -2107697498 + size = 8754 + isKeyFrame = true + presentationTimeUs = 170498 +sample: + trackIndex = 0 + dataHashCode = 736072795 + size = 8908 + isKeyFrame = true + presentationTimeUs = 269750 +sample: + trackIndex = 0 + dataHashCode = -1913553170 + size = 9208 + isKeyFrame = true + presentationTimeUs = 370748 +sample: + trackIndex = 0 + dataHashCode = 13583718 + size = 8968 + isKeyFrame = true + presentationTimeUs = 475147 +sample: + trackIndex = 0 + dataHashCode = -1444602526 + size = 8588 + isKeyFrame = true + presentationTimeUs = 576825 +sample: + trackIndex = 0 + dataHashCode = -1693065958 + size = 8778 + isKeyFrame = true + presentationTimeUs = 674195 +sample: + trackIndex = 0 + dataHashCode = 2071205641 + size = 8736 + isKeyFrame = true + presentationTimeUs = 773718 +sample: + trackIndex = 0 + dataHashCode = 1433538831 + size = 8636 + isKeyFrame = true + presentationTimeUs = 872766 +sample: + trackIndex = 0 + dataHashCode = 992130724 + size = 2580 + isKeyFrame = true + presentationTimeUs = 970680 +sample: + trackIndex = 0 + dataHashCode = 2002762528 + size = 6450 + isKeyFrame = true + presentationTimeUs = 999931 +sample: + trackIndex = 0 + dataHashCode = -657557348 + size = 9288 + isKeyFrame = true + presentationTimeUs = 1073061 +sample: + trackIndex = 0 + dataHashCode = 1101737790 + size = 8026 + isKeyFrame = true + presentationTimeUs = 1178367 +sample: + trackIndex = 0 + dataHashCode = 942749699 + size = 9580 + isKeyFrame = true + presentationTimeUs = 1269365 +sample: + trackIndex = 0 + dataHashCode = 1696779430 + size = 9176 + isKeyFrame = true + presentationTimeUs = 1377981 +sample: + trackIndex = 0 + dataHashCode = -1146784158 + size = 7898 + isKeyFrame = true + presentationTimeUs = 1482018 +sample: + trackIndex = 0 + dataHashCode = -108739018 + size = 9244 + isKeyFrame = true + presentationTimeUs = 1571564 +sample: + trackIndex = 0 + dataHashCode = 1527742727 + size = 8614 + isKeyFrame = true + presentationTimeUs = 1676371 +sample: + trackIndex = 0 + dataHashCode = -771072152 + size = 8408 + isKeyFrame = true + presentationTimeUs = 1774036 +sample: + trackIndex = 0 + dataHashCode = -307907100 + size = 10212 + isKeyFrame = true + presentationTimeUs = 1869365 +sample: + trackIndex = 0 + dataHashCode = -2024887086 + size = 1312 + isKeyFrame = true + presentationTimeUs = 1985147 +released = true diff --git a/libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav-high_pitch_then_sample.wav-high_pitch.dump b/libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav-high_pitch_then_sample.wav-high_pitch.dump new file mode 100644 index 0000000000..8245176e14 --- /dev/null +++ b/libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav-high_pitch_then_sample.wav-high_pitch.dump @@ -0,0 +1,109 @@ +format 0: + averageBitrate = 131072 + sampleMimeType = audio/mp4a-latm + channelCount = 2 + sampleRate = 48000 + pcmEncoding = 2 +sample: + trackIndex = 0 + dataHashCode = -226169394 + size = 15528 + isKeyFrame = true + presentationTimeUs = 0 +sample: + trackIndex = 0 + dataHashCode = -1654106973 + size = 17436 + isKeyFrame = true + presentationTimeUs = 80875 +sample: + trackIndex = 0 + dataHashCode = 1078555669 + size = 19384 + isKeyFrame = true + presentationTimeUs = 171687 +sample: + trackIndex = 0 + dataHashCode = -1783495042 + size = 8904 + isKeyFrame = true + presentationTimeUs = 272645 +sample: + trackIndex = 0 + dataHashCode = 1369234850 + size = 5688 + isKeyFrame = true + presentationTimeUs = 319020 +sample: + trackIndex = 0 + dataHashCode = 340823895 + size = 14108 + isKeyFrame = true + presentationTimeUs = 348645 +sample: + trackIndex = 0 + dataHashCode = 591641755 + size = 18624 + isKeyFrame = true + presentationTimeUs = 422125 +sample: + trackIndex = 0 + dataHashCode = -909124577 + size = 19056 + isKeyFrame = true + presentationTimeUs = 519125 +sample: + trackIndex = 0 + dataHashCode = -1644849415 + size = 19392 + isKeyFrame = true + presentationTimeUs = 618375 +sample: + trackIndex = 0 + dataHashCode = 660809049 + size = 20044 + isKeyFrame = true + presentationTimeUs = 719375 +sample: + trackIndex = 0 + dataHashCode = -601805369 + size = 19524 + isKeyFrame = true + presentationTimeUs = 823770 +sample: + trackIndex = 0 + dataHashCode = -1348998373 + size = 18696 + isKeyFrame = true + presentationTimeUs = 925458 +sample: + trackIndex = 0 + dataHashCode = 1191266277 + size = 19108 + isKeyFrame = true + presentationTimeUs = 1022833 +sample: + trackIndex = 0 + dataHashCode = -1278785599 + size = 19016 + isKeyFrame = true + presentationTimeUs = 1122354 +sample: + trackIndex = 0 + dataHashCode = -809901361 + size = 18800 + isKeyFrame = true + presentationTimeUs = 1221395 +sample: + trackIndex = 0 + dataHashCode = 163546833 + size = 5616 + isKeyFrame = true + presentationTimeUs = 1319312 +sample: + trackIndex = 0 + dataHashCode = 743627 + size = 4 + isKeyFrame = true + presentationTimeUs = 1348562 +released = true diff --git a/libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav-high_pitch_then_sample.wav-low_pitch.dump b/libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav-high_pitch_then_sample.wav-low_pitch.dump new file mode 100644 index 0000000000..b30b249464 --- /dev/null +++ b/libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav-high_pitch_then_sample.wav-low_pitch.dump @@ -0,0 +1,109 @@ +format 0: + averageBitrate = 131072 + sampleMimeType = audio/mp4a-latm + channelCount = 2 + sampleRate = 48000 + pcmEncoding = 2 +sample: + trackIndex = 0 + dataHashCode = -226169394 + size = 15528 + isKeyFrame = true + presentationTimeUs = 0 +sample: + trackIndex = 0 + dataHashCode = -1654106973 + size = 17436 + isKeyFrame = true + presentationTimeUs = 80875 +sample: + trackIndex = 0 + dataHashCode = 1078555669 + size = 19384 + isKeyFrame = true + presentationTimeUs = 171687 +sample: + trackIndex = 0 + dataHashCode = -1783495042 + size = 8904 + isKeyFrame = true + presentationTimeUs = 272645 +sample: + trackIndex = 0 + dataHashCode = 1369234850 + size = 5688 + isKeyFrame = true + presentationTimeUs = 319020 +sample: + trackIndex = 0 + dataHashCode = 711439073 + size = 14040 + isKeyFrame = true + presentationTimeUs = 348645 +sample: + trackIndex = 0 + dataHashCode = -555451391 + size = 20216 + isKeyFrame = true + presentationTimeUs = 421770 +sample: + trackIndex = 0 + dataHashCode = 1700905759 + size = 17472 + isKeyFrame = true + presentationTimeUs = 527062 +sample: + trackIndex = 0 + dataHashCode = -416119019 + size = 20856 + isKeyFrame = true + presentationTimeUs = 618062 +sample: + trackIndex = 0 + dataHashCode = -2060733789 + size = 19976 + isKeyFrame = true + presentationTimeUs = 726687 +sample: + trackIndex = 0 + dataHashCode = 212454167 + size = 17192 + isKeyFrame = true + presentationTimeUs = 830729 +sample: + trackIndex = 0 + dataHashCode = 956501205 + size = 20124 + isKeyFrame = true + presentationTimeUs = 920270 +sample: + trackIndex = 0 + dataHashCode = 299859135 + size = 18748 + isKeyFrame = true + presentationTimeUs = 1025083 +sample: + trackIndex = 0 + dataHashCode = 636945085 + size = 18304 + isKeyFrame = true + presentationTimeUs = 1122729 +sample: + trackIndex = 0 + dataHashCode = -754285165 + size = 22232 + isKeyFrame = true + presentationTimeUs = 1218062 +sample: + trackIndex = 0 + dataHashCode = -1967816889 + size = 2856 + isKeyFrame = true + presentationTimeUs = 1333854 +sample: + trackIndex = 0 + dataHashCode = 1012987 + size = 4 + isKeyFrame = true + presentationTimeUs = 1348729 +released = true diff --git a/libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav_then_sample.wav.dump b/libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav_then_sample.wav.dump new file mode 100644 index 0000000000..dc95acb66d --- /dev/null +++ b/libraries/test_data/src/test/assets/transformerdumps/wav/sample_rf64.wav_then_sample.wav.dump @@ -0,0 +1,97 @@ +format 0: + averageBitrate = 131072 + sampleMimeType = audio/mp4a-latm + channelCount = 2 + sampleRate = 48000 + pcmEncoding = 2 +sample: + trackIndex = 0 + dataHashCode = -278103001 + size = 19200 + isKeyFrame = true + presentationTimeUs = 0 +sample: + trackIndex = 0 + dataHashCode = 1522105084 + size = 19200 + isKeyFrame = true + presentationTimeUs = 100000 +sample: + trackIndex = 0 + dataHashCode = 932319027 + size = 19200 + isKeyFrame = true + presentationTimeUs = 200000 +sample: + trackIndex = 0 + dataHashCode = 325000240 + size = 9336 + isKeyFrame = true + presentationTimeUs = 300000 +sample: + trackIndex = 0 + dataHashCode = -717358295 + size = 19196 + isKeyFrame = true + presentationTimeUs = 348625 +sample: + trackIndex = 0 + dataHashCode = -877978019 + size = 19200 + isKeyFrame = true + presentationTimeUs = 448604 +sample: + trackIndex = 0 + dataHashCode = 903565693 + size = 19200 + isKeyFrame = true + presentationTimeUs = 548604 +sample: + trackIndex = 0 + dataHashCode = -1424006305 + size = 19200 + isKeyFrame = true + presentationTimeUs = 648604 +sample: + trackIndex = 0 + dataHashCode = -1450884853 + size = 19200 + isKeyFrame = true + presentationTimeUs = 748604 +sample: + trackIndex = 0 + dataHashCode = 1566382623 + size = 19200 + isKeyFrame = true + presentationTimeUs = 848604 +sample: + trackIndex = 0 + dataHashCode = -510424155 + size = 19200 + isKeyFrame = true + presentationTimeUs = 948604 +sample: + trackIndex = 0 + dataHashCode = 13706341 + size = 19200 + isKeyFrame = true + presentationTimeUs = 1048604 +sample: + trackIndex = 0 + dataHashCode = -1421600077 + size = 19200 + isKeyFrame = true + presentationTimeUs = 1148604 +sample: + trackIndex = 0 + dataHashCode = 722747579 + size = 19200 + isKeyFrame = true + presentationTimeUs = 1248604 +sample: + trackIndex = 0 + dataHashCode = 326119 + size = 4 + isKeyFrame = true + presentationTimeUs = 1348604 +released = true diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/AudioSamplePipeline.java b/libraries/transformer/src/main/java/androidx/media3/transformer/AudioSamplePipeline.java index 05d2ff14ff..3197663bf6 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/AudioSamplePipeline.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/AudioSamplePipeline.java @@ -19,10 +19,12 @@ package androidx.media3.transformer; import static androidx.media3.common.util.Assertions.checkArgument; import static androidx.media3.common.util.Assertions.checkNotNull; import static androidx.media3.common.util.Assertions.checkState; +import static androidx.media3.common.util.Assertions.checkStateNotNull; import static androidx.media3.decoder.DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DIRECT; import static androidx.media3.decoder.DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DISABLED; import static java.lang.Math.min; +import android.util.Pair; import androidx.annotation.Nullable; import androidx.media3.common.C; import androidx.media3.common.Format; @@ -30,7 +32,11 @@ import androidx.media3.common.MimeTypes; import androidx.media3.common.audio.AudioProcessingPipeline; import androidx.media3.common.audio.AudioProcessor; import androidx.media3.common.audio.AudioProcessor.AudioFormat; +import androidx.media3.common.audio.ChannelMixingAudioProcessor; +import androidx.media3.common.audio.ChannelMixingMatrix; +import androidx.media3.common.audio.SonicAudioProcessor; import androidx.media3.common.audio.SpeedChangingAudioProcessor; +import androidx.media3.common.util.NullableType; import androidx.media3.common.util.Util; import androidx.media3.decoder.DecoderInputBuffer; import com.google.common.collect.ImmutableList; @@ -38,6 +44,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.Queue; import java.util.concurrent.ConcurrentLinkedDeque; +import java.util.concurrent.atomic.AtomicReference; import org.checkerframework.dataflow.qual.Pure; /** Pipeline to process, re-encode and mux raw audio samples. */ @@ -49,12 +56,14 @@ import org.checkerframework.dataflow.qual.Pure; private final SilentAudioGenerator silentAudioGenerator; private final Queue availableInputBuffers; private final Queue pendingInputBuffers; - private final AudioProcessingPipeline audioProcessingPipeline; private final Codec encoder; private final AudioFormat encoderInputAudioFormat; private final DecoderInputBuffer encoderInputBuffer; private final DecoderInputBuffer encoderOutputBuffer; - + private final AtomicReference<@NullableType Pair> + pendingMediaItem; + private boolean receivedFirstMediaItemCallback; + private AudioProcessingPipeline audioProcessingPipeline; private long encoderTotalInputBytes; private volatile boolean queueEndOfStreamAfterSilence; @@ -64,13 +73,13 @@ import org.checkerframework.dataflow.qual.Pure; Format firstAssetLoaderInputFormat, Format firstPipelineInputFormat, TransformationRequest transformationRequest, - boolean flattenForSlowMotion, - ImmutableList audioProcessors, + EditedMediaItem firstEditedMediaItem, Codec.EncoderFactory encoderFactory, MuxerWrapper muxerWrapper, FallbackListener fallbackListener) throws ExportException { super(firstAssetLoaderInputFormat, muxerWrapper); + checkArgument(firstPipelineInputFormat.pcmEncoding != Format.NO_VALUE); availableInputBuffers = new ConcurrentLinkedDeque<>(); ByteBuffer emptyBuffer = ByteBuffer.allocateDirect(0).order(ByteOrder.nativeOrder()); @@ -80,36 +89,21 @@ import org.checkerframework.dataflow.qual.Pure; availableInputBuffers.add(inputBuffer); } pendingInputBuffers = new ConcurrentLinkedDeque<>(); - encoderInputBuffer = new DecoderInputBuffer(BUFFER_REPLACEMENT_MODE_DISABLED); encoderOutputBuffer = new DecoderInputBuffer(BUFFER_REPLACEMENT_MODE_DISABLED); - - checkArgument(firstPipelineInputFormat.pcmEncoding != Format.NO_VALUE); + pendingMediaItem = new AtomicReference<>(); AudioFormat inputAudioFormat = new AudioFormat(firstPipelineInputFormat); - silentAudioGenerator = new SilentAudioGenerator(inputAudioFormat); + audioProcessingPipeline = + configureProcessing( + /* editedMediaItem= */ firstEditedMediaItem, + /* trackFormat= */ firstPipelineInputFormat, + /* inputAudioFormat= */ inputAudioFormat, + /* requiredOutputAudioFormat= */ AudioFormat.NOT_SET); + AudioFormat outputAudioFormat = audioProcessingPipeline.getOutputAudioFormat(); + checkState(!outputAudioFormat.equals(AudioFormat.NOT_SET)); - if (flattenForSlowMotion && firstAssetLoaderInputFormat.metadata != null) { - audioProcessors = - new ImmutableList.Builder() - .add( - new SpeedChangingAudioProcessor( - new SegmentSpeedProvider(firstAssetLoaderInputFormat.metadata))) - .addAll(audioProcessors) - .build(); - } - - audioProcessingPipeline = new AudioProcessingPipeline(audioProcessors); - - try { - encoderInputAudioFormat = audioProcessingPipeline.configure(inputAudioFormat); - } catch (AudioProcessor.UnhandledAudioFormatException unhandledAudioFormatException) { - throw ExportException.createForAudioProcessing( - unhandledAudioFormatException, inputAudioFormat); - } - - audioProcessingPipeline.flush(); - + encoderInputAudioFormat = outputAudioFormat; Format requestedEncoderFormat = new Format.Builder() .setSampleMimeType( @@ -153,16 +147,22 @@ import org.checkerframework.dataflow.qual.Pure; if (isLast) { queueEndOfStreamAfterSilence = true; } + } else { + checkState(MimeTypes.isAudio(trackFormat.sampleMimeType)); + checkState(trackFormat.pcmEncoding != Format.NO_VALUE); + } + + if (!receivedFirstMediaItemCallback) { + receivedFirstMediaItemCallback = true; return; } - checkState(MimeTypes.isAudio(trackFormat.sampleMimeType)); - checkState(trackFormat.pcmEncoding != Format.NO_VALUE); + pendingMediaItem.set(Pair.create(editedMediaItem, trackFormat)); } @Override @Nullable public DecoderInputBuffer getInputBuffer() { - if (shouldGenerateSilence()) { + if (shouldGenerateSilence() || pendingMediaItem.get() != null) { return null; } return availableInputBuffers.peek(); @@ -170,6 +170,7 @@ import org.checkerframework.dataflow.qual.Pure; @Override public boolean queueInputBuffer() { + checkState(pendingMediaItem.get() == null); DecoderInputBuffer inputBuffer = availableInputBuffers.remove(); pendingInputBuffers.add(inputBuffer); return true; @@ -218,10 +219,34 @@ import org.checkerframework.dataflow.qual.Pure; return encoder.isEnded(); } + /** + * Reconfigures audio processing based on the pending {@linkplain #onMediaItemChanged media item + * change}. + * + *

Before reconfiguration, all pending buffers must be fully processed and drained to the + * encoder, however end of stream buffers should be handled so the encoder is not {@link + * #queueEndOfStreamToEncoder() queued end of stream}. + */ + private void reconfigureProcessingForPendingMediaItem() throws ExportException { + Pair pendingChange = + checkStateNotNull(pendingMediaItem.get()); + AudioFormat pendingAudioFormat = + pendingChange.second != null + ? new AudioFormat(pendingChange.second) + : silentAudioGenerator.audioFormat; + audioProcessingPipeline = + configureProcessing( + /* editedMediaItem= */ pendingChange.first, + /* trackFormat= */ pendingChange.second, + /* inputAudioFormat= */ pendingAudioFormat, + /* requiredOutputAudioFormat= */ encoderInputAudioFormat); + pendingMediaItem.set(null); + } + /** * Attempts to pass input data to the encoder. * - * @return Whether it may be possible to feed more data immediately by calling this method again. + * @return Whether the {@link AudioSamplePipeline} may be able to continue processing data. */ private boolean feedEncoderFromInput() throws ExportException { if (!encoder.maybeDequeueInputBuffer(encoderInputBuffer)) { @@ -234,6 +259,10 @@ import org.checkerframework.dataflow.qual.Pure; } if (pendingInputBuffers.isEmpty()) { + if (pendingMediaItem.get() != null) { + reconfigureProcessingForPendingMediaItem(); + return true; + } // Only read volatile variable queueEndOfStreamAfterSilence if there is a chance that end of // stream should be queued. if (!silentAudioGenerator.hasRemaining() && queueEndOfStreamAfterSilence) { @@ -244,7 +273,9 @@ import org.checkerframework.dataflow.qual.Pure; DecoderInputBuffer pendingInputBuffer = pendingInputBuffers.element(); if (pendingInputBuffer.isEndOfStream()) { - queueEndOfStreamToEncoder(); + if (pendingMediaItem.get() == null) { + queueEndOfStreamToEncoder(); + } removePendingInputBuffer(); return false; } @@ -260,7 +291,7 @@ import org.checkerframework.dataflow.qual.Pure; /** * Attempts to feed audio processor output data to the encoder. * - * @return Whether it may be possible to feed more data immediately by calling this method again. + * @return Whether the {@link AudioSamplePipeline} may be able to continue processing data. */ private boolean feedEncoderFromProcessingPipeline() throws ExportException { if (!encoder.maybeDequeueInputBuffer(encoderInputBuffer)) { @@ -268,9 +299,12 @@ import org.checkerframework.dataflow.qual.Pure; } ByteBuffer processingPipelineOutputBuffer = audioProcessingPipeline.getOutput(); - if (!processingPipelineOutputBuffer.hasRemaining()) { if (audioProcessingPipeline.isEnded()) { + if (pendingMediaItem.get() != null) { + reconfigureProcessingForPendingMediaItem(); + return true; + } queueEndOfStreamToEncoder(); } return false; @@ -283,7 +317,7 @@ import org.checkerframework.dataflow.qual.Pure; /** * Attempts to feed input data to the {@link AudioProcessingPipeline}. * - * @return Whether it may be possible to feed more data immediately by calling this method again. + * @return Whether the {@link AudioSamplePipeline} may be able to continue processing data. */ private boolean feedProcessingPipelineFromInput() { if (shouldGenerateSilence()) { @@ -295,7 +329,8 @@ import org.checkerframework.dataflow.qual.Pure; if (pendingInputBuffers.isEmpty()) { // Only read volatile variable queueEndOfStreamAfterSilence if there is a chance that end of // stream should be queued. - if (!silentAudioGenerator.hasRemaining() && queueEndOfStreamAfterSilence) { + if (pendingMediaItem.get() != null + || (!silentAudioGenerator.hasRemaining() && queueEndOfStreamAfterSilence)) { audioProcessingPipeline.queueEndOfStream(); } return false; @@ -370,4 +405,57 @@ import org.checkerframework.dataflow.qual.Pure; private boolean shouldGenerateSilence() { return silentAudioGenerator.hasRemaining() && pendingInputBuffers.isEmpty(); } + + private static AudioProcessingPipeline configureProcessing( + EditedMediaItem editedMediaItem, + @Nullable Format trackFormat, + AudioFormat inputAudioFormat, + AudioFormat requiredOutputAudioFormat) + throws ExportException { + ImmutableList.Builder audioProcessors = new ImmutableList.Builder<>(); + if (editedMediaItem.flattenForSlowMotion + && trackFormat != null + && trackFormat.metadata != null) { + audioProcessors.add( + new SpeedChangingAudioProcessor(new SegmentSpeedProvider(trackFormat.metadata))); + } + audioProcessors.addAll(editedMediaItem.effects.audioProcessors); + // Ensure the output from APP matches what the encoder is configured to receive. + if (!requiredOutputAudioFormat.equals(AudioFormat.NOT_SET)) { + SonicAudioProcessor sampleRateChanger = new SonicAudioProcessor(); + sampleRateChanger.setOutputSampleRateHz(requiredOutputAudioFormat.sampleRate); + audioProcessors.add(sampleRateChanger); + + // TODO(b/262706549): Handle channel mixing with AudioMixer. + if (requiredOutputAudioFormat.channelCount <= 2) { + // ChannelMixingMatrix.create only has defaults for mono/stereo input/output. + ChannelMixingAudioProcessor channelCountChanger = new ChannelMixingAudioProcessor(); + channelCountChanger.putChannelMixingMatrix( + ChannelMixingMatrix.create( + /* inputChannelCount= */ 1, requiredOutputAudioFormat.channelCount)); + channelCountChanger.putChannelMixingMatrix( + ChannelMixingMatrix.create( + /* inputChannelCount= */ 2, requiredOutputAudioFormat.channelCount)); + audioProcessors.add(channelCountChanger); + } + } + + AudioProcessingPipeline audioProcessingPipeline = + new AudioProcessingPipeline(audioProcessors.build()); + try { + AudioFormat outputAudioFormat = audioProcessingPipeline.configure(inputAudioFormat); + if (!requiredOutputAudioFormat.equals(AudioFormat.NOT_SET) + && !outputAudioFormat.equals(requiredOutputAudioFormat)) { + throw new AudioProcessor.UnhandledAudioFormatException( + "Audio format can not be modified to match existing downstream format", + inputAudioFormat); + } + } catch (AudioProcessor.UnhandledAudioFormatException unhandledAudioFormatException) { + throw ExportException.createForAudioProcessing( + unhandledAudioFormatException, inputAudioFormat); + } + + audioProcessingPipeline.flush(); + return audioProcessingPipeline; + } } diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/SilentAudioGenerator.java b/libraries/transformer/src/main/java/androidx/media3/transformer/SilentAudioGenerator.java index 39e038af60..43b74ac81a 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/SilentAudioGenerator.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/SilentAudioGenerator.java @@ -26,7 +26,9 @@ import java.util.concurrent.atomic.AtomicLong; /* package */ final class SilentAudioGenerator { private static final int DEFAULT_BUFFER_SIZE_FRAMES = 1024; - private final AudioFormat audioFormat; + /** The {@link AudioFormat} of the silent audio generated. */ + public final AudioFormat audioFormat; + private final ByteBuffer internalBuffer; private final AtomicLong remainingBytesToOutput; diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerInternal.java b/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerInternal.java index 8dedc7f68c..4e2344981a 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerInternal.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerInternal.java @@ -555,8 +555,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; addedTrackInfo.firstAssetLoaderInputFormat, /* firstPipelineInputFormat= */ firstAssetLoaderOutputFormat, transformationRequest, - firstEditedMediaItem.flattenForSlowMotion, - firstEditedMediaItem.effects.audioProcessors, + firstEditedMediaItem, encoderFactory, muxerWrapper, fallbackListener); diff --git a/libraries/transformer/src/test/java/androidx/media3/transformer/SequenceExportTest.java b/libraries/transformer/src/test/java/androidx/media3/transformer/SequenceExportTest.java index 3853a04ab8..b822c54cee 100644 --- a/libraries/transformer/src/test/java/androidx/media3/transformer/SequenceExportTest.java +++ b/libraries/transformer/src/test/java/androidx/media3/transformer/SequenceExportTest.java @@ -19,6 +19,7 @@ package androidx.media3.transformer; import static androidx.media3.common.util.Assertions.checkNotNull; import static androidx.media3.transformer.TestUtil.ASSET_URI_PREFIX; import static androidx.media3.transformer.TestUtil.FILE_AUDIO_RAW; +import static androidx.media3.transformer.TestUtil.FILE_AUDIO_RAW_STEREO_48000KHZ; import static androidx.media3.transformer.TestUtil.FILE_AUDIO_VIDEO; import static androidx.media3.transformer.TestUtil.FILE_AUDIO_VIDEO_INCREASING_TIMESTAMPS_15S; import static androidx.media3.transformer.TestUtil.FILE_AUDIO_VIDEO_STEREO; @@ -43,7 +44,6 @@ import java.nio.file.Files; import java.nio.file.Paths; import org.junit.After; import org.junit.Before; -import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; @@ -233,7 +233,7 @@ public final class SequenceExportTest { } @Test - public void start_concatenateSameAudioItem_completesSuccessfully() throws Exception { + public void concatenateTwoAudioItems_withSameFormat_completesSuccessfully() throws Exception { Transformer transformer = createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); MediaItem audioOnlyMediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW); @@ -253,7 +253,8 @@ public final class SequenceExportTest { } @Test - public void start_concatenateSameAudioItemWithEffects_completesSuccessfully() throws Exception { + public void concatenateTwoAudioItems_withSameFormatAndSameEffects_completesSuccessfully() + throws Exception { Transformer transformer = createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); MediaItem audioOnlyMediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW); @@ -277,8 +278,7 @@ public final class SequenceExportTest { } @Test - @Ignore("Handle MediaItem effects changes (See [internal: b/274093424]).") - public void start_concatenateSameAudioItemWithDifferentEffects_completesSuccessfully() + public void concatenateTwoAudioItems_withSameFormatAndDiffEffects_completesSuccessfully() throws Exception { Transformer transformer = createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); @@ -313,4 +313,99 @@ public final class SequenceExportTest { checkNotNull(testMuxerHolder.testMuxer), getDumpFileName(FILE_AUDIO_RAW + ".high_pitch_then_low_pitch")); } + + @Test + public void concatenateTwoAudioItems_withDiffFormat_completesSuccessfully() throws Exception { + Transformer transformer = + createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); + MediaItem stereo48000Audio = + MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW_STEREO_48000KHZ); + MediaItem mono44100Audio = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW); + EditedMediaItemSequence editedMediaItemSequence = + new EditedMediaItemSequence( + ImmutableList.of( + new EditedMediaItem.Builder(stereo48000Audio).build(), + new EditedMediaItem.Builder(mono44100Audio).build())); + Composition composition = + new Composition.Builder(ImmutableList.of(editedMediaItemSequence)).build(); + + transformer.start(composition, outputPath); + TransformerTestRunner.runLooper(transformer); + + DumpFileAsserts.assertOutput( + context, + checkNotNull(testMuxerHolder.testMuxer), + getDumpFileName(FILE_AUDIO_RAW_STEREO_48000KHZ + "_then_sample.wav")); + } + + @Test + public void concatenateTwoAudioItems_withDiffFormatAndSameEffects_completesSuccessfully() + throws Exception { + Transformer transformer = + createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); + + Effects highPitch = + new Effects( + ImmutableList.of(createPitchChangingAudioProcessor(/* pitch= */ 2f)), + /* videoEffects= */ ImmutableList.of()); + + EditedMediaItem stereo48000Audio = + new EditedMediaItem.Builder( + MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW_STEREO_48000KHZ)) + .setEffects(highPitch) + .build(); + EditedMediaItem mono44100Audio = + new EditedMediaItem.Builder(MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW)) + .setEffects(highPitch) + .build(); + + EditedMediaItemSequence editedMediaItemSequence = + new EditedMediaItemSequence(ImmutableList.of(stereo48000Audio, mono44100Audio)); + Composition composition = + new Composition.Builder(ImmutableList.of(editedMediaItemSequence)).build(); + + transformer.start(composition, outputPath); + TransformerTestRunner.runLooper(transformer); + + DumpFileAsserts.assertOutput( + context, + checkNotNull(testMuxerHolder.testMuxer), + getDumpFileName(FILE_AUDIO_RAW_STEREO_48000KHZ + "-high_pitch_then_sample.wav-high_pitch")); + } + + @Test + public void concatenateTwoAudioItems_withDiffFormatAndDiffEffects_completesSuccessfully() + throws Exception { + Transformer transformer = + createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); + + EditedMediaItem stereo48000AudioHighPitch = + new EditedMediaItem.Builder( + MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW_STEREO_48000KHZ)) + .setEffects( + new Effects( + ImmutableList.of(createPitchChangingAudioProcessor(/* pitch= */ 2f)), + /* videoEffects= */ ImmutableList.of())) + .build(); + EditedMediaItem mono44100AudioLowPitch = + new EditedMediaItem.Builder(MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW)) + .setEffects( + new Effects( + ImmutableList.of(createPitchChangingAudioProcessor(/* pitch= */ 0.5f)), + /* videoEffects= */ ImmutableList.of())) + .build(); + EditedMediaItemSequence editedMediaItemSequence = + new EditedMediaItemSequence( + ImmutableList.of(stereo48000AudioHighPitch, mono44100AudioLowPitch)); + Composition composition = + new Composition.Builder(ImmutableList.of(editedMediaItemSequence)).build(); + + transformer.start(composition, outputPath); + TransformerTestRunner.runLooper(transformer); + + DumpFileAsserts.assertOutput( + context, + checkNotNull(testMuxerHolder.testMuxer), + getDumpFileName(FILE_AUDIO_RAW_STEREO_48000KHZ + "-high_pitch_then_sample.wav-low_pitch")); + } } diff --git a/libraries/transformer/src/test/java/androidx/media3/transformer/TestUtil.java b/libraries/transformer/src/test/java/androidx/media3/transformer/TestUtil.java index dec49f1d4d..c614f20733 100644 --- a/libraries/transformer/src/test/java/androidx/media3/transformer/TestUtil.java +++ b/libraries/transformer/src/test/java/androidx/media3/transformer/TestUtil.java @@ -153,6 +153,7 @@ public final class TestUtil { public static final String FILE_AUDIO_VIDEO_INCREASING_TIMESTAMPS_15S = "mp4/sample_with_increasing_timestamps_320w_240h.mp4"; public static final String FILE_AUDIO_RAW = "wav/sample.wav"; + public static final String FILE_AUDIO_RAW_STEREO_48000KHZ = "wav/sample_rf64.wav"; public static final String FILE_WITH_SUBTITLES = "mkv/sample_with_srt.mkv"; public static final String FILE_WITH_SEF_SLOW_MOTION = "mp4/sample_sef_slow_motion.mp4"; public static final String FILE_AUDIO_UNSUPPORTED_BY_DECODER = "amr/sample_wb.amr"; @@ -168,8 +169,8 @@ public final class TestUtil { public static void createEncodersAndDecoders() { ShadowMediaCodec.CodecConfig codecConfig = new ShadowMediaCodec.CodecConfig( - /* inputBufferSize= */ 10_000, - /* outputBufferSize= */ 10_000, + /* inputBufferSize= */ 100_000, + /* outputBufferSize= */ 100_000, /* codec= */ (in, out) -> out.put(in)); addCodec( MimeTypes.AUDIO_AAC, @@ -194,8 +195,8 @@ public final class TestUtil { ShadowMediaCodec.CodecConfig throwingCodecConfig = new ShadowMediaCodec.CodecConfig( - /* inputBufferSize= */ 10_000, - /* outputBufferSize= */ 10_000, + /* inputBufferSize= */ 100_000, + /* outputBufferSize= */ 100_000, new ShadowMediaCodec.CodecConfig.Codec() { @Override