From ddfd79bb983dea68fc85237c57968f6a88ec30be Mon Sep 17 00:00:00 2001 From: kimvde Date: Thu, 3 Feb 2022 12:42:16 +0000 Subject: [PATCH] Use SpeedChangingAudioProcessor in Transformer PiperOrigin-RevId: 426113559 --- .../mp4/sample_sef_slow_motion.mp4.dump | 218 +++++++++--------- .../AudioTranscodingSamplePipeline.java | 107 +++------ .../SpeedChangingAudioProcessor.java | 2 + .../VideoTranscodingSamplePipeline.java | 7 +- 4 files changed, 155 insertions(+), 179 deletions(-) diff --git a/libraries/test_data/src/test/assets/transformerdumps/mp4/sample_sef_slow_motion.mp4.dump b/libraries/test_data/src/test/assets/transformerdumps/mp4/sample_sef_slow_motion.mp4.dump index a83228a55c..bace6d6cfa 100644 --- a/libraries/test_data/src/test/assets/transformerdumps/mp4/sample_sef_slow_motion.mp4.dump +++ b/libraries/test_data/src/test/assets/transformerdumps/mp4/sample_sef_slow_motion.mp4.dump @@ -132,148 +132,154 @@ sample: presentationTimeUs = 0 sample: trackIndex = 0 - dataHashCode = 1000136444 - size = 140 + dataHashCode = -1948569090 + size = 72 isKeyFrame = true presentationTimeUs = 417 sample: trackIndex = 0 - dataHashCode = 217961709 - size = 172 - isKeyFrame = true - presentationTimeUs = 3334 -sample: - trackIndex = 0 - dataHashCode = -879376936 - size = 176 - isKeyFrame = true - presentationTimeUs = 6917 -sample: - trackIndex = 0 - dataHashCode = 1259979587 - size = 192 - isKeyFrame = true - presentationTimeUs = 10584 -sample: - trackIndex = 0 - dataHashCode = 907407225 - size = 188 - isKeyFrame = true - presentationTimeUs = 14584 -sample: - trackIndex = 0 - dataHashCode = -904354707 - size = 176 - isKeyFrame = true - presentationTimeUs = 18500 -sample: - trackIndex = 0 - dataHashCode = 1001385853 - size = 172 - isKeyFrame = true - presentationTimeUs = 22167 -sample: - trackIndex = 0 - dataHashCode = 1545716086 - size = 196 - isKeyFrame = true - presentationTimeUs = 25750 -sample: - trackIndex = 0 - dataHashCode = 358710839 - size = 180 - isKeyFrame = true - presentationTimeUs = 29834 -sample: - trackIndex = 0 - dataHashCode = -671124798 - size = 140 - isKeyFrame = true - presentationTimeUs = 33584 -sample: - trackIndex = 0 - dataHashCode = -945404910 - size = 120 - isKeyFrame = true - presentationTimeUs = 36500 -sample: - trackIndex = 0 - dataHashCode = 1881048379 - size = 88 - isKeyFrame = true - presentationTimeUs = 39000 -sample: - trackIndex = 0 - dataHashCode = 1059579897 - size = 88 - isKeyFrame = true - presentationTimeUs = 40834 -sample: - trackIndex = 0 - dataHashCode = 1496098648 + dataHashCode = -1316750072 size = 84 isKeyFrame = true - presentationTimeUs = 42667 + presentationTimeUs = 1917 sample: trackIndex = 0 - dataHashCode = 250093960 - size = 751 + dataHashCode = 1016428949 + size = 88 isKeyFrame = true - presentationTimeUs = 44417 + presentationTimeUs = 3667 sample: trackIndex = 0 - dataHashCode = 1895536226 - size = 1045 + dataHashCode = -1127325245 + size = 96 isKeyFrame = true - presentationTimeUs = 60063 + presentationTimeUs = 5500 sample: trackIndex = 0 - dataHashCode = 1723596464 - size = 947 + dataHashCode = 1148147726 + size = 92 isKeyFrame = true - presentationTimeUs = 81834 + presentationTimeUs = 7500 sample: trackIndex = 0 - dataHashCode = -978803114 - size = 946 + dataHashCode = -2125685540 + size = 76 isKeyFrame = true - presentationTimeUs = 101563 + presentationTimeUs = 9417 sample: trackIndex = 0 - dataHashCode = 387377078 - size = 946 + dataHashCode = 473329679 + size = 24 isKeyFrame = true - presentationTimeUs = 121271 + presentationTimeUs = 11000 sample: trackIndex = 0 - dataHashCode = -132658698 - size = 901 + dataHashCode = 240990900 + size = 176 isKeyFrame = true - presentationTimeUs = 140980 + presentationTimeUs = 11500 sample: trackIndex = 0 - dataHashCode = 1495036471 - size = 899 + dataHashCode = 777637182 + size = 196 isKeyFrame = true - presentationTimeUs = 159750 + presentationTimeUs = 15167 sample: trackIndex = 0 - dataHashCode = 304440590 - size = 878 + dataHashCode = 1872106264 + size = 180 isKeyFrame = true - presentationTimeUs = 178480 + presentationTimeUs = 19250 sample: trackIndex = 0 - dataHashCode = -1955900344 - size = 112 + dataHashCode = -1520711499 + size = 140 isKeyFrame = true - presentationTimeUs = 196771 + presentationTimeUs = 23000 sample: trackIndex = 0 - dataHashCode = 88896626 - size = 116 + dataHashCode = 1580199067 + size = 232 isKeyFrame = true - presentationTimeUs = 199105 + presentationTimeUs = 25917 +sample: + trackIndex = 0 + dataHashCode = 475464086 + size = 184 + isKeyFrame = true + presentationTimeUs = 30750 +sample: + trackIndex = 0 + dataHashCode = -211754132 + size = 172 + isKeyFrame = true + presentationTimeUs = 34584 +sample: + trackIndex = 0 + dataHashCode = 1236547164 + size = 172 + isKeyFrame = true + presentationTimeUs = 38167 +sample: + trackIndex = 0 + dataHashCode = -2064216186 + size = 188 + isKeyFrame = true + presentationTimeUs = 41750 +sample: + trackIndex = 0 + dataHashCode = -682950885 + size = 260 + isKeyFrame = true + presentationTimeUs = 45667 +sample: + trackIndex = 0 + dataHashCode = 1301206627 + size = 236 + isKeyFrame = true + presentationTimeUs = 51084 +sample: + trackIndex = 0 + dataHashCode = 256580525 + size = 236 + isKeyFrame = true + presentationTimeUs = 56000 +sample: + trackIndex = 0 + dataHashCode = -1086601304 + size = 236 + isKeyFrame = true + presentationTimeUs = 60917 +sample: + trackIndex = 0 + dataHashCode = -2046131588 + size = 224 + isKeyFrame = true + presentationTimeUs = 65834 +sample: + trackIndex = 0 + dataHashCode = 1550955865 + size = 224 + isKeyFrame = true + presentationTimeUs = 70500 +sample: + trackIndex = 0 + dataHashCode = -274800552 + size = 220 + isKeyFrame = true + presentationTimeUs = 75167 +sample: + trackIndex = 0 + dataHashCode = 382420909 + size = 224 + isKeyFrame = true + presentationTimeUs = 79750 +sample: + trackIndex = 0 + dataHashCode = -1431575865 + size = 232 + isKeyFrame = true + presentationTimeUs = 84417 sample: trackIndex = 1 dataHashCode = -968901399 diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/AudioTranscodingSamplePipeline.java b/libraries/transformer/src/main/java/androidx/media3/transformer/AudioTranscodingSamplePipeline.java index 94d77c7c6a..0a101e52e6 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/AudioTranscodingSamplePipeline.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/AudioTranscodingSamplePipeline.java @@ -20,7 +20,6 @@ import static androidx.media3.common.util.Assertions.checkNotNull; import static androidx.media3.common.util.Assertions.checkState; import static java.lang.Math.min; -import android.media.MediaCodec.BufferInfo; import androidx.annotation.Nullable; import androidx.media3.common.C; import androidx.media3.common.Format; @@ -28,9 +27,9 @@ import androidx.media3.common.util.Util; import androidx.media3.decoder.DecoderInputBuffer; import androidx.media3.exoplayer.audio.AudioProcessor; import androidx.media3.exoplayer.audio.AudioProcessor.AudioFormat; -import androidx.media3.exoplayer.audio.SonicAudioProcessor; import java.nio.ByteBuffer; import java.util.List; +import org.checkerframework.checker.nullness.qual.RequiresNonNull; import org.checkerframework.dataflow.qual.Pure; /** @@ -43,22 +42,18 @@ import org.checkerframework.dataflow.qual.Pure; private final Codec decoder; private final DecoderInputBuffer decoderInputBuffer; - private final SonicAudioProcessor sonicAudioProcessor; - private final SpeedProvider speedProvider; - private final boolean flattenForSlowMotion; + @Nullable private final SpeedChangingAudioProcessor speedChangingAudioProcessor; private final Codec encoder; private final AudioFormat encoderInputAudioFormat; private final DecoderInputBuffer encoderInputBuffer; private final DecoderInputBuffer encoderOutputBuffer; + private ByteBuffer processorOutputBuffer; + private long nextEncoderInputBufferTimeUs; private long encoderBufferDurationRemainder; - private ByteBuffer sonicOutputBuffer; - private boolean drainingSonicForSpeedChange; - private float currentSpeed; - public AudioTranscodingSamplePipeline( Format inputFormat, TransformationRequest transformationRequest, @@ -74,13 +69,8 @@ import org.checkerframework.dataflow.qual.Pure; encoderOutputBuffer = new DecoderInputBuffer(DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DISABLED); - this.decoder = decoderFactory.createForAudioDecoding(inputFormat); + decoder = decoderFactory.createForAudioDecoding(inputFormat); - this.flattenForSlowMotion = transformationRequest.flattenForSlowMotion; - sonicAudioProcessor = new SonicAudioProcessor(); - sonicOutputBuffer = AudioProcessor.EMPTY_BUFFER; - speedProvider = new SegmentSpeedProvider(inputFormat); - currentSpeed = speedProvider.getSpeed(0); AudioFormat encoderInputAudioFormat = new AudioFormat( inputFormat.sampleRate, @@ -88,18 +78,21 @@ import org.checkerframework.dataflow.qual.Pure; // The decoder uses ENCODING_PCM_16BIT by default. // https://developer.android.com/reference/android/media/MediaCodec#raw-audio-buffers C.ENCODING_PCM_16BIT); - if (flattenForSlowMotion) { + if (transformationRequest.flattenForSlowMotion) { + speedChangingAudioProcessor = + new SpeedChangingAudioProcessor(new SegmentSpeedProvider(inputFormat)); try { - encoderInputAudioFormat = sonicAudioProcessor.configure(encoderInputAudioFormat); + encoderInputAudioFormat = speedChangingAudioProcessor.configure(encoderInputAudioFormat); } catch (AudioProcessor.UnhandledAudioFormatException impossible) { throw new IllegalStateException(impossible); } - sonicAudioProcessor.setSpeed(currentSpeed); - sonicAudioProcessor.setPitch(currentSpeed); - sonicAudioProcessor.flush(); + speedChangingAudioProcessor.flush(); + } else { + speedChangingAudioProcessor = null; } - this.encoderInputAudioFormat = encoderInputAudioFormat; + processorOutputBuffer = AudioProcessor.EMPTY_BUFFER; + this.encoderInputAudioFormat = encoderInputAudioFormat; Format requestedOutputFormat = new Format.Builder() .setSampleMimeType( @@ -130,8 +123,8 @@ import org.checkerframework.dataflow.qual.Pure; @Override public boolean processData() throws TransformationException { - if (sonicAudioProcessor.isActive()) { - return feedEncoderFromSonic() || feedSonicFromDecoder(); + if (speedChangingAudioProcessor != null) { + return feedEncoderFromProcessor() || feedProcessorFromDecoder(); } else { return feedEncoderFromDecoder(); } @@ -167,7 +160,9 @@ import org.checkerframework.dataflow.qual.Pure; @Override public void release() { - sonicAudioProcessor.reset(); + if (speedChangingAudioProcessor != null) { + speedChangingAudioProcessor.reset(); + } decoder.release(); encoder.release(); } @@ -190,10 +185,7 @@ import org.checkerframework.dataflow.qual.Pure; if (decoderOutputBuffer == null) { return false; } - if (isSpeedChanging(checkNotNull(decoder.getOutputBufferInfo()))) { - flushSonicAndSetSpeed(currentSpeed); - return false; - } + feedEncoder(decoderOutputBuffer); if (!decoderOutputBuffer.hasRemaining()) { decoder.releaseOutputBuffer(); @@ -205,22 +197,23 @@ import org.checkerframework.dataflow.qual.Pure; * Attempts to pass audio processor output data to the encoder, and returns whether it may be * possible to pass more data immediately by calling this method again. */ - private boolean feedEncoderFromSonic() throws TransformationException { + @RequiresNonNull("speedChangingAudioProcessor") + private boolean feedEncoderFromProcessor() throws TransformationException { if (!encoder.maybeDequeueInputBuffer(encoderInputBuffer)) { return false; } - if (!sonicOutputBuffer.hasRemaining()) { - sonicOutputBuffer = sonicAudioProcessor.getOutput(); - if (!sonicOutputBuffer.hasRemaining()) { - if (decoder.isEnded() && sonicAudioProcessor.isEnded()) { + if (!processorOutputBuffer.hasRemaining()) { + processorOutputBuffer = speedChangingAudioProcessor.getOutput(); + if (!processorOutputBuffer.hasRemaining()) { + if (decoder.isEnded() && speedChangingAudioProcessor.isEnded()) { queueEndOfStreamToEncoder(); } return false; } } - feedEncoder(sonicOutputBuffer); + feedEncoder(processorOutputBuffer); return true; } @@ -228,37 +221,27 @@ import org.checkerframework.dataflow.qual.Pure; * Attempts to process decoder output data, and returns whether it may be possible to process more * data immediately by calling this method again. */ - private boolean feedSonicFromDecoder() throws TransformationException { - if (drainingSonicForSpeedChange) { - if (sonicAudioProcessor.isEnded() && !sonicOutputBuffer.hasRemaining()) { - flushSonicAndSetSpeed(currentSpeed); - drainingSonicForSpeedChange = false; - } - return false; - } - - // Sonic invalidates any previous output buffer when more input is queued, so we don't queue if - // there is output still to be processed. - if (sonicOutputBuffer.hasRemaining()) { + @RequiresNonNull("speedChangingAudioProcessor") + private boolean feedProcessorFromDecoder() throws TransformationException { + // Audio processors invalidate any previous output buffer when more input is queued, so we don't + // queue if there is output still to be processed. + if (processorOutputBuffer.hasRemaining() + || speedChangingAudioProcessor.getOutput().hasRemaining()) { return false; } if (decoder.isEnded()) { - sonicAudioProcessor.queueEndOfStream(); + speedChangingAudioProcessor.queueEndOfStream(); return false; } - checkState(!sonicAudioProcessor.isEnded()); + checkState(!speedChangingAudioProcessor.isEnded()); @Nullable ByteBuffer decoderOutputBuffer = decoder.getOutputBuffer(); if (decoderOutputBuffer == null) { return false; } - if (isSpeedChanging(checkNotNull(decoder.getOutputBufferInfo()))) { - sonicAudioProcessor.queueEndOfStream(); - drainingSonicForSpeedChange = true; - return false; - } - sonicAudioProcessor.queueInput(decoderOutputBuffer); + + speedChangingAudioProcessor.queueInput(decoderOutputBuffer); if (!decoderOutputBuffer.hasRemaining()) { decoder.releaseOutputBuffer(); } @@ -294,22 +277,6 @@ import org.checkerframework.dataflow.qual.Pure; encoder.queueInputBuffer(encoderInputBuffer); } - private boolean isSpeedChanging(BufferInfo bufferInfo) { - if (!flattenForSlowMotion) { - return false; - } - float newSpeed = speedProvider.getSpeed(bufferInfo.presentationTimeUs); - boolean speedChanging = newSpeed != currentSpeed; - currentSpeed = newSpeed; - return speedChanging; - } - - private void flushSonicAndSetSpeed(float speed) { - sonicAudioProcessor.setSpeed(speed); - sonicAudioProcessor.setPitch(speed); - sonicAudioProcessor.flush(); - } - private void computeNextEncoderInputBufferTimeUs( long bytesWritten, int bytesPerFrame, int sampleRate) { // The calculation below accounts for remainders and rounding. Without that it corresponds to diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/SpeedChangingAudioProcessor.java b/libraries/transformer/src/main/java/androidx/media3/transformer/SpeedChangingAudioProcessor.java index 5a3840f6cc..0ce639cca8 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/SpeedChangingAudioProcessor.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/SpeedChangingAudioProcessor.java @@ -28,6 +28,8 @@ import java.nio.ByteBuffer; /** * An {@link AudioProcessor} that changes the speed of audio samples depending on their timestamp. */ +// TODO(b/198772621): Consider making the processor inactive and skipping it in the processor chain +// when speed is 1. /* package */ final class SpeedChangingAudioProcessor extends BaseAudioProcessor { /** The speed provider that provides the speed for each timestamp. */ diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/VideoTranscodingSamplePipeline.java b/libraries/transformer/src/main/java/androidx/media3/transformer/VideoTranscodingSamplePipeline.java index 20a0e83737..dee399daa2 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/VideoTranscodingSamplePipeline.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/VideoTranscodingSamplePipeline.java @@ -30,7 +30,6 @@ import androidx.media3.common.Format; import androidx.media3.common.util.Util; import androidx.media3.decoder.DecoderInputBuffer; import java.util.List; -import org.checkerframework.checker.nullness.qual.MonotonicNonNull; import org.checkerframework.dataflow.qual.Pure; /** @@ -42,11 +41,11 @@ import org.checkerframework.dataflow.qual.Pure; private final DecoderInputBuffer decoderInputBuffer; private final Codec decoder; + @Nullable private final FrameEditor frameEditor; + private final Codec encoder; private final DecoderInputBuffer encoderOutputBuffer; - private @MonotonicNonNull FrameEditor frameEditor; - private boolean waitingForFrameEditorInput; public VideoTranscodingSamplePipeline( @@ -139,6 +138,8 @@ import org.checkerframework.dataflow.qual.Pure; /* outputSurface= */ checkNotNull(encoder.getInputSurface()), transformationRequest.enableHdrEditing, debugViewProvider); + } else { + frameEditor = null; } decoder =