Use SpeedChangingAudioProcessor in Transformer

PiperOrigin-RevId: 426113559
This commit is contained in:
kimvde 2022-02-03 12:42:16 +00:00 committed by Ian Baker
parent 92a6cc10a4
commit ddfd79bb98
4 changed files with 155 additions and 179 deletions

View File

@ -132,148 +132,154 @@ sample:
presentationTimeUs = 0 presentationTimeUs = 0
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = 1000136444 dataHashCode = -1948569090
size = 140 size = 72
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 417 presentationTimeUs = 417
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = 217961709 dataHashCode = -1316750072
size = 172
isKeyFrame = true
presentationTimeUs = 3334
sample:
trackIndex = 0
dataHashCode = -879376936
size = 176
isKeyFrame = true
presentationTimeUs = 6917
sample:
trackIndex = 0
dataHashCode = 1259979587
size = 192
isKeyFrame = true
presentationTimeUs = 10584
sample:
trackIndex = 0
dataHashCode = 907407225
size = 188
isKeyFrame = true
presentationTimeUs = 14584
sample:
trackIndex = 0
dataHashCode = -904354707
size = 176
isKeyFrame = true
presentationTimeUs = 18500
sample:
trackIndex = 0
dataHashCode = 1001385853
size = 172
isKeyFrame = true
presentationTimeUs = 22167
sample:
trackIndex = 0
dataHashCode = 1545716086
size = 196
isKeyFrame = true
presentationTimeUs = 25750
sample:
trackIndex = 0
dataHashCode = 358710839
size = 180
isKeyFrame = true
presentationTimeUs = 29834
sample:
trackIndex = 0
dataHashCode = -671124798
size = 140
isKeyFrame = true
presentationTimeUs = 33584
sample:
trackIndex = 0
dataHashCode = -945404910
size = 120
isKeyFrame = true
presentationTimeUs = 36500
sample:
trackIndex = 0
dataHashCode = 1881048379
size = 88
isKeyFrame = true
presentationTimeUs = 39000
sample:
trackIndex = 0
dataHashCode = 1059579897
size = 88
isKeyFrame = true
presentationTimeUs = 40834
sample:
trackIndex = 0
dataHashCode = 1496098648
size = 84 size = 84
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 42667 presentationTimeUs = 1917
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = 250093960 dataHashCode = 1016428949
size = 751 size = 88
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 44417 presentationTimeUs = 3667
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = 1895536226 dataHashCode = -1127325245
size = 1045 size = 96
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 60063 presentationTimeUs = 5500
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = 1723596464 dataHashCode = 1148147726
size = 947 size = 92
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 81834 presentationTimeUs = 7500
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = -978803114 dataHashCode = -2125685540
size = 946 size = 76
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 101563 presentationTimeUs = 9417
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = 387377078 dataHashCode = 473329679
size = 946 size = 24
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 121271 presentationTimeUs = 11000
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = -132658698 dataHashCode = 240990900
size = 901 size = 176
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 140980 presentationTimeUs = 11500
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = 1495036471 dataHashCode = 777637182
size = 899 size = 196
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 159750 presentationTimeUs = 15167
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = 304440590 dataHashCode = 1872106264
size = 878 size = 180
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 178480 presentationTimeUs = 19250
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = -1955900344 dataHashCode = -1520711499
size = 112 size = 140
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 196771 presentationTimeUs = 23000
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = 88896626 dataHashCode = 1580199067
size = 116 size = 232
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 199105 presentationTimeUs = 25917
sample:
trackIndex = 0
dataHashCode = 475464086
size = 184
isKeyFrame = true
presentationTimeUs = 30750
sample:
trackIndex = 0
dataHashCode = -211754132
size = 172
isKeyFrame = true
presentationTimeUs = 34584
sample:
trackIndex = 0
dataHashCode = 1236547164
size = 172
isKeyFrame = true
presentationTimeUs = 38167
sample:
trackIndex = 0
dataHashCode = -2064216186
size = 188
isKeyFrame = true
presentationTimeUs = 41750
sample:
trackIndex = 0
dataHashCode = -682950885
size = 260
isKeyFrame = true
presentationTimeUs = 45667
sample:
trackIndex = 0
dataHashCode = 1301206627
size = 236
isKeyFrame = true
presentationTimeUs = 51084
sample:
trackIndex = 0
dataHashCode = 256580525
size = 236
isKeyFrame = true
presentationTimeUs = 56000
sample:
trackIndex = 0
dataHashCode = -1086601304
size = 236
isKeyFrame = true
presentationTimeUs = 60917
sample:
trackIndex = 0
dataHashCode = -2046131588
size = 224
isKeyFrame = true
presentationTimeUs = 65834
sample:
trackIndex = 0
dataHashCode = 1550955865
size = 224
isKeyFrame = true
presentationTimeUs = 70500
sample:
trackIndex = 0
dataHashCode = -274800552
size = 220
isKeyFrame = true
presentationTimeUs = 75167
sample:
trackIndex = 0
dataHashCode = 382420909
size = 224
isKeyFrame = true
presentationTimeUs = 79750
sample:
trackIndex = 0
dataHashCode = -1431575865
size = 232
isKeyFrame = true
presentationTimeUs = 84417
sample: sample:
trackIndex = 1 trackIndex = 1
dataHashCode = -968901399 dataHashCode = -968901399

View File

@ -20,7 +20,6 @@ import static androidx.media3.common.util.Assertions.checkNotNull;
import static androidx.media3.common.util.Assertions.checkState; import static androidx.media3.common.util.Assertions.checkState;
import static java.lang.Math.min; import static java.lang.Math.min;
import android.media.MediaCodec.BufferInfo;
import androidx.annotation.Nullable; import androidx.annotation.Nullable;
import androidx.media3.common.C; import androidx.media3.common.C;
import androidx.media3.common.Format; import androidx.media3.common.Format;
@ -28,9 +27,9 @@ import androidx.media3.common.util.Util;
import androidx.media3.decoder.DecoderInputBuffer; import androidx.media3.decoder.DecoderInputBuffer;
import androidx.media3.exoplayer.audio.AudioProcessor; import androidx.media3.exoplayer.audio.AudioProcessor;
import androidx.media3.exoplayer.audio.AudioProcessor.AudioFormat; import androidx.media3.exoplayer.audio.AudioProcessor.AudioFormat;
import androidx.media3.exoplayer.audio.SonicAudioProcessor;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.List; import java.util.List;
import org.checkerframework.checker.nullness.qual.RequiresNonNull;
import org.checkerframework.dataflow.qual.Pure; import org.checkerframework.dataflow.qual.Pure;
/** /**
@ -43,22 +42,18 @@ import org.checkerframework.dataflow.qual.Pure;
private final Codec decoder; private final Codec decoder;
private final DecoderInputBuffer decoderInputBuffer; private final DecoderInputBuffer decoderInputBuffer;
private final SonicAudioProcessor sonicAudioProcessor; @Nullable private final SpeedChangingAudioProcessor speedChangingAudioProcessor;
private final SpeedProvider speedProvider;
private final boolean flattenForSlowMotion;
private final Codec encoder; private final Codec encoder;
private final AudioFormat encoderInputAudioFormat; private final AudioFormat encoderInputAudioFormat;
private final DecoderInputBuffer encoderInputBuffer; private final DecoderInputBuffer encoderInputBuffer;
private final DecoderInputBuffer encoderOutputBuffer; private final DecoderInputBuffer encoderOutputBuffer;
private ByteBuffer processorOutputBuffer;
private long nextEncoderInputBufferTimeUs; private long nextEncoderInputBufferTimeUs;
private long encoderBufferDurationRemainder; private long encoderBufferDurationRemainder;
private ByteBuffer sonicOutputBuffer;
private boolean drainingSonicForSpeedChange;
private float currentSpeed;
public AudioTranscodingSamplePipeline( public AudioTranscodingSamplePipeline(
Format inputFormat, Format inputFormat,
TransformationRequest transformationRequest, TransformationRequest transformationRequest,
@ -74,13 +69,8 @@ import org.checkerframework.dataflow.qual.Pure;
encoderOutputBuffer = encoderOutputBuffer =
new DecoderInputBuffer(DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DISABLED); new DecoderInputBuffer(DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DISABLED);
this.decoder = decoderFactory.createForAudioDecoding(inputFormat); decoder = decoderFactory.createForAudioDecoding(inputFormat);
this.flattenForSlowMotion = transformationRequest.flattenForSlowMotion;
sonicAudioProcessor = new SonicAudioProcessor();
sonicOutputBuffer = AudioProcessor.EMPTY_BUFFER;
speedProvider = new SegmentSpeedProvider(inputFormat);
currentSpeed = speedProvider.getSpeed(0);
AudioFormat encoderInputAudioFormat = AudioFormat encoderInputAudioFormat =
new AudioFormat( new AudioFormat(
inputFormat.sampleRate, inputFormat.sampleRate,
@ -88,18 +78,21 @@ import org.checkerframework.dataflow.qual.Pure;
// The decoder uses ENCODING_PCM_16BIT by default. // The decoder uses ENCODING_PCM_16BIT by default.
// https://developer.android.com/reference/android/media/MediaCodec#raw-audio-buffers // https://developer.android.com/reference/android/media/MediaCodec#raw-audio-buffers
C.ENCODING_PCM_16BIT); C.ENCODING_PCM_16BIT);
if (flattenForSlowMotion) { if (transformationRequest.flattenForSlowMotion) {
speedChangingAudioProcessor =
new SpeedChangingAudioProcessor(new SegmentSpeedProvider(inputFormat));
try { try {
encoderInputAudioFormat = sonicAudioProcessor.configure(encoderInputAudioFormat); encoderInputAudioFormat = speedChangingAudioProcessor.configure(encoderInputAudioFormat);
} catch (AudioProcessor.UnhandledAudioFormatException impossible) { } catch (AudioProcessor.UnhandledAudioFormatException impossible) {
throw new IllegalStateException(impossible); throw new IllegalStateException(impossible);
} }
sonicAudioProcessor.setSpeed(currentSpeed); speedChangingAudioProcessor.flush();
sonicAudioProcessor.setPitch(currentSpeed); } else {
sonicAudioProcessor.flush(); speedChangingAudioProcessor = null;
} }
this.encoderInputAudioFormat = encoderInputAudioFormat; processorOutputBuffer = AudioProcessor.EMPTY_BUFFER;
this.encoderInputAudioFormat = encoderInputAudioFormat;
Format requestedOutputFormat = Format requestedOutputFormat =
new Format.Builder() new Format.Builder()
.setSampleMimeType( .setSampleMimeType(
@ -130,8 +123,8 @@ import org.checkerframework.dataflow.qual.Pure;
@Override @Override
public boolean processData() throws TransformationException { public boolean processData() throws TransformationException {
if (sonicAudioProcessor.isActive()) { if (speedChangingAudioProcessor != null) {
return feedEncoderFromSonic() || feedSonicFromDecoder(); return feedEncoderFromProcessor() || feedProcessorFromDecoder();
} else { } else {
return feedEncoderFromDecoder(); return feedEncoderFromDecoder();
} }
@ -167,7 +160,9 @@ import org.checkerframework.dataflow.qual.Pure;
@Override @Override
public void release() { public void release() {
sonicAudioProcessor.reset(); if (speedChangingAudioProcessor != null) {
speedChangingAudioProcessor.reset();
}
decoder.release(); decoder.release();
encoder.release(); encoder.release();
} }
@ -190,10 +185,7 @@ import org.checkerframework.dataflow.qual.Pure;
if (decoderOutputBuffer == null) { if (decoderOutputBuffer == null) {
return false; return false;
} }
if (isSpeedChanging(checkNotNull(decoder.getOutputBufferInfo()))) {
flushSonicAndSetSpeed(currentSpeed);
return false;
}
feedEncoder(decoderOutputBuffer); feedEncoder(decoderOutputBuffer);
if (!decoderOutputBuffer.hasRemaining()) { if (!decoderOutputBuffer.hasRemaining()) {
decoder.releaseOutputBuffer(); decoder.releaseOutputBuffer();
@ -205,22 +197,23 @@ import org.checkerframework.dataflow.qual.Pure;
* Attempts to pass audio processor output data to the encoder, and returns whether it may be * Attempts to pass audio processor output data to the encoder, and returns whether it may be
* possible to pass more data immediately by calling this method again. * possible to pass more data immediately by calling this method again.
*/ */
private boolean feedEncoderFromSonic() throws TransformationException { @RequiresNonNull("speedChangingAudioProcessor")
private boolean feedEncoderFromProcessor() throws TransformationException {
if (!encoder.maybeDequeueInputBuffer(encoderInputBuffer)) { if (!encoder.maybeDequeueInputBuffer(encoderInputBuffer)) {
return false; return false;
} }
if (!sonicOutputBuffer.hasRemaining()) { if (!processorOutputBuffer.hasRemaining()) {
sonicOutputBuffer = sonicAudioProcessor.getOutput(); processorOutputBuffer = speedChangingAudioProcessor.getOutput();
if (!sonicOutputBuffer.hasRemaining()) { if (!processorOutputBuffer.hasRemaining()) {
if (decoder.isEnded() && sonicAudioProcessor.isEnded()) { if (decoder.isEnded() && speedChangingAudioProcessor.isEnded()) {
queueEndOfStreamToEncoder(); queueEndOfStreamToEncoder();
} }
return false; return false;
} }
} }
feedEncoder(sonicOutputBuffer); feedEncoder(processorOutputBuffer);
return true; return true;
} }
@ -228,37 +221,27 @@ import org.checkerframework.dataflow.qual.Pure;
* Attempts to process decoder output data, and returns whether it may be possible to process more * Attempts to process decoder output data, and returns whether it may be possible to process more
* data immediately by calling this method again. * data immediately by calling this method again.
*/ */
private boolean feedSonicFromDecoder() throws TransformationException { @RequiresNonNull("speedChangingAudioProcessor")
if (drainingSonicForSpeedChange) { private boolean feedProcessorFromDecoder() throws TransformationException {
if (sonicAudioProcessor.isEnded() && !sonicOutputBuffer.hasRemaining()) { // Audio processors invalidate any previous output buffer when more input is queued, so we don't
flushSonicAndSetSpeed(currentSpeed); // queue if there is output still to be processed.
drainingSonicForSpeedChange = false; if (processorOutputBuffer.hasRemaining()
} || speedChangingAudioProcessor.getOutput().hasRemaining()) {
return false;
}
// Sonic invalidates any previous output buffer when more input is queued, so we don't queue if
// there is output still to be processed.
if (sonicOutputBuffer.hasRemaining()) {
return false; return false;
} }
if (decoder.isEnded()) { if (decoder.isEnded()) {
sonicAudioProcessor.queueEndOfStream(); speedChangingAudioProcessor.queueEndOfStream();
return false; return false;
} }
checkState(!sonicAudioProcessor.isEnded()); checkState(!speedChangingAudioProcessor.isEnded());
@Nullable ByteBuffer decoderOutputBuffer = decoder.getOutputBuffer(); @Nullable ByteBuffer decoderOutputBuffer = decoder.getOutputBuffer();
if (decoderOutputBuffer == null) { if (decoderOutputBuffer == null) {
return false; return false;
} }
if (isSpeedChanging(checkNotNull(decoder.getOutputBufferInfo()))) {
sonicAudioProcessor.queueEndOfStream(); speedChangingAudioProcessor.queueInput(decoderOutputBuffer);
drainingSonicForSpeedChange = true;
return false;
}
sonicAudioProcessor.queueInput(decoderOutputBuffer);
if (!decoderOutputBuffer.hasRemaining()) { if (!decoderOutputBuffer.hasRemaining()) {
decoder.releaseOutputBuffer(); decoder.releaseOutputBuffer();
} }
@ -294,22 +277,6 @@ import org.checkerframework.dataflow.qual.Pure;
encoder.queueInputBuffer(encoderInputBuffer); encoder.queueInputBuffer(encoderInputBuffer);
} }
private boolean isSpeedChanging(BufferInfo bufferInfo) {
if (!flattenForSlowMotion) {
return false;
}
float newSpeed = speedProvider.getSpeed(bufferInfo.presentationTimeUs);
boolean speedChanging = newSpeed != currentSpeed;
currentSpeed = newSpeed;
return speedChanging;
}
private void flushSonicAndSetSpeed(float speed) {
sonicAudioProcessor.setSpeed(speed);
sonicAudioProcessor.setPitch(speed);
sonicAudioProcessor.flush();
}
private void computeNextEncoderInputBufferTimeUs( private void computeNextEncoderInputBufferTimeUs(
long bytesWritten, int bytesPerFrame, int sampleRate) { long bytesWritten, int bytesPerFrame, int sampleRate) {
// The calculation below accounts for remainders and rounding. Without that it corresponds to // The calculation below accounts for remainders and rounding. Without that it corresponds to

View File

@ -28,6 +28,8 @@ import java.nio.ByteBuffer;
/** /**
* An {@link AudioProcessor} that changes the speed of audio samples depending on their timestamp. * An {@link AudioProcessor} that changes the speed of audio samples depending on their timestamp.
*/ */
// TODO(b/198772621): Consider making the processor inactive and skipping it in the processor chain
// when speed is 1.
/* package */ final class SpeedChangingAudioProcessor extends BaseAudioProcessor { /* package */ final class SpeedChangingAudioProcessor extends BaseAudioProcessor {
/** The speed provider that provides the speed for each timestamp. */ /** The speed provider that provides the speed for each timestamp. */

View File

@ -30,7 +30,6 @@ import androidx.media3.common.Format;
import androidx.media3.common.util.Util; import androidx.media3.common.util.Util;
import androidx.media3.decoder.DecoderInputBuffer; import androidx.media3.decoder.DecoderInputBuffer;
import java.util.List; import java.util.List;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
import org.checkerframework.dataflow.qual.Pure; import org.checkerframework.dataflow.qual.Pure;
/** /**
@ -42,11 +41,11 @@ import org.checkerframework.dataflow.qual.Pure;
private final DecoderInputBuffer decoderInputBuffer; private final DecoderInputBuffer decoderInputBuffer;
private final Codec decoder; private final Codec decoder;
@Nullable private final FrameEditor frameEditor;
private final Codec encoder; private final Codec encoder;
private final DecoderInputBuffer encoderOutputBuffer; private final DecoderInputBuffer encoderOutputBuffer;
private @MonotonicNonNull FrameEditor frameEditor;
private boolean waitingForFrameEditorInput; private boolean waitingForFrameEditorInput;
public VideoTranscodingSamplePipeline( public VideoTranscodingSamplePipeline(
@ -139,6 +138,8 @@ import org.checkerframework.dataflow.qual.Pure;
/* outputSurface= */ checkNotNull(encoder.getInputSurface()), /* outputSurface= */ checkNotNull(encoder.getInputSurface()),
transformationRequest.enableHdrEditing, transformationRequest.enableHdrEditing,
debugViewProvider); debugViewProvider);
} else {
frameEditor = null;
} }
decoder = decoder =