Use SpeedChangingAudioProcessor in Transformer

PiperOrigin-RevId: 426113559
This commit is contained in:
kimvde 2022-02-03 12:42:16 +00:00 committed by Ian Baker
parent 92a6cc10a4
commit ddfd79bb98
4 changed files with 155 additions and 179 deletions

View File

@ -132,148 +132,154 @@ sample:
presentationTimeUs = 0
sample:
trackIndex = 0
dataHashCode = 1000136444
size = 140
dataHashCode = -1948569090
size = 72
isKeyFrame = true
presentationTimeUs = 417
sample:
trackIndex = 0
dataHashCode = 217961709
size = 172
isKeyFrame = true
presentationTimeUs = 3334
sample:
trackIndex = 0
dataHashCode = -879376936
size = 176
isKeyFrame = true
presentationTimeUs = 6917
sample:
trackIndex = 0
dataHashCode = 1259979587
size = 192
isKeyFrame = true
presentationTimeUs = 10584
sample:
trackIndex = 0
dataHashCode = 907407225
size = 188
isKeyFrame = true
presentationTimeUs = 14584
sample:
trackIndex = 0
dataHashCode = -904354707
size = 176
isKeyFrame = true
presentationTimeUs = 18500
sample:
trackIndex = 0
dataHashCode = 1001385853
size = 172
isKeyFrame = true
presentationTimeUs = 22167
sample:
trackIndex = 0
dataHashCode = 1545716086
size = 196
isKeyFrame = true
presentationTimeUs = 25750
sample:
trackIndex = 0
dataHashCode = 358710839
size = 180
isKeyFrame = true
presentationTimeUs = 29834
sample:
trackIndex = 0
dataHashCode = -671124798
size = 140
isKeyFrame = true
presentationTimeUs = 33584
sample:
trackIndex = 0
dataHashCode = -945404910
size = 120
isKeyFrame = true
presentationTimeUs = 36500
sample:
trackIndex = 0
dataHashCode = 1881048379
size = 88
isKeyFrame = true
presentationTimeUs = 39000
sample:
trackIndex = 0
dataHashCode = 1059579897
size = 88
isKeyFrame = true
presentationTimeUs = 40834
sample:
trackIndex = 0
dataHashCode = 1496098648
dataHashCode = -1316750072
size = 84
isKeyFrame = true
presentationTimeUs = 42667
presentationTimeUs = 1917
sample:
trackIndex = 0
dataHashCode = 250093960
size = 751
dataHashCode = 1016428949
size = 88
isKeyFrame = true
presentationTimeUs = 44417
presentationTimeUs = 3667
sample:
trackIndex = 0
dataHashCode = 1895536226
size = 1045
dataHashCode = -1127325245
size = 96
isKeyFrame = true
presentationTimeUs = 60063
presentationTimeUs = 5500
sample:
trackIndex = 0
dataHashCode = 1723596464
size = 947
dataHashCode = 1148147726
size = 92
isKeyFrame = true
presentationTimeUs = 81834
presentationTimeUs = 7500
sample:
trackIndex = 0
dataHashCode = -978803114
size = 946
dataHashCode = -2125685540
size = 76
isKeyFrame = true
presentationTimeUs = 101563
presentationTimeUs = 9417
sample:
trackIndex = 0
dataHashCode = 387377078
size = 946
dataHashCode = 473329679
size = 24
isKeyFrame = true
presentationTimeUs = 121271
presentationTimeUs = 11000
sample:
trackIndex = 0
dataHashCode = -132658698
size = 901
dataHashCode = 240990900
size = 176
isKeyFrame = true
presentationTimeUs = 140980
presentationTimeUs = 11500
sample:
trackIndex = 0
dataHashCode = 1495036471
size = 899
dataHashCode = 777637182
size = 196
isKeyFrame = true
presentationTimeUs = 159750
presentationTimeUs = 15167
sample:
trackIndex = 0
dataHashCode = 304440590
size = 878
dataHashCode = 1872106264
size = 180
isKeyFrame = true
presentationTimeUs = 178480
presentationTimeUs = 19250
sample:
trackIndex = 0
dataHashCode = -1955900344
size = 112
dataHashCode = -1520711499
size = 140
isKeyFrame = true
presentationTimeUs = 196771
presentationTimeUs = 23000
sample:
trackIndex = 0
dataHashCode = 88896626
size = 116
dataHashCode = 1580199067
size = 232
isKeyFrame = true
presentationTimeUs = 199105
presentationTimeUs = 25917
sample:
trackIndex = 0
dataHashCode = 475464086
size = 184
isKeyFrame = true
presentationTimeUs = 30750
sample:
trackIndex = 0
dataHashCode = -211754132
size = 172
isKeyFrame = true
presentationTimeUs = 34584
sample:
trackIndex = 0
dataHashCode = 1236547164
size = 172
isKeyFrame = true
presentationTimeUs = 38167
sample:
trackIndex = 0
dataHashCode = -2064216186
size = 188
isKeyFrame = true
presentationTimeUs = 41750
sample:
trackIndex = 0
dataHashCode = -682950885
size = 260
isKeyFrame = true
presentationTimeUs = 45667
sample:
trackIndex = 0
dataHashCode = 1301206627
size = 236
isKeyFrame = true
presentationTimeUs = 51084
sample:
trackIndex = 0
dataHashCode = 256580525
size = 236
isKeyFrame = true
presentationTimeUs = 56000
sample:
trackIndex = 0
dataHashCode = -1086601304
size = 236
isKeyFrame = true
presentationTimeUs = 60917
sample:
trackIndex = 0
dataHashCode = -2046131588
size = 224
isKeyFrame = true
presentationTimeUs = 65834
sample:
trackIndex = 0
dataHashCode = 1550955865
size = 224
isKeyFrame = true
presentationTimeUs = 70500
sample:
trackIndex = 0
dataHashCode = -274800552
size = 220
isKeyFrame = true
presentationTimeUs = 75167
sample:
trackIndex = 0
dataHashCode = 382420909
size = 224
isKeyFrame = true
presentationTimeUs = 79750
sample:
trackIndex = 0
dataHashCode = -1431575865
size = 232
isKeyFrame = true
presentationTimeUs = 84417
sample:
trackIndex = 1
dataHashCode = -968901399

View File

@ -20,7 +20,6 @@ import static androidx.media3.common.util.Assertions.checkNotNull;
import static androidx.media3.common.util.Assertions.checkState;
import static java.lang.Math.min;
import android.media.MediaCodec.BufferInfo;
import androidx.annotation.Nullable;
import androidx.media3.common.C;
import androidx.media3.common.Format;
@ -28,9 +27,9 @@ import androidx.media3.common.util.Util;
import androidx.media3.decoder.DecoderInputBuffer;
import androidx.media3.exoplayer.audio.AudioProcessor;
import androidx.media3.exoplayer.audio.AudioProcessor.AudioFormat;
import androidx.media3.exoplayer.audio.SonicAudioProcessor;
import java.nio.ByteBuffer;
import java.util.List;
import org.checkerframework.checker.nullness.qual.RequiresNonNull;
import org.checkerframework.dataflow.qual.Pure;
/**
@ -43,22 +42,18 @@ import org.checkerframework.dataflow.qual.Pure;
private final Codec decoder;
private final DecoderInputBuffer decoderInputBuffer;
private final SonicAudioProcessor sonicAudioProcessor;
private final SpeedProvider speedProvider;
private final boolean flattenForSlowMotion;
@Nullable private final SpeedChangingAudioProcessor speedChangingAudioProcessor;
private final Codec encoder;
private final AudioFormat encoderInputAudioFormat;
private final DecoderInputBuffer encoderInputBuffer;
private final DecoderInputBuffer encoderOutputBuffer;
private ByteBuffer processorOutputBuffer;
private long nextEncoderInputBufferTimeUs;
private long encoderBufferDurationRemainder;
private ByteBuffer sonicOutputBuffer;
private boolean drainingSonicForSpeedChange;
private float currentSpeed;
public AudioTranscodingSamplePipeline(
Format inputFormat,
TransformationRequest transformationRequest,
@ -74,13 +69,8 @@ import org.checkerframework.dataflow.qual.Pure;
encoderOutputBuffer =
new DecoderInputBuffer(DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DISABLED);
this.decoder = decoderFactory.createForAudioDecoding(inputFormat);
decoder = decoderFactory.createForAudioDecoding(inputFormat);
this.flattenForSlowMotion = transformationRequest.flattenForSlowMotion;
sonicAudioProcessor = new SonicAudioProcessor();
sonicOutputBuffer = AudioProcessor.EMPTY_BUFFER;
speedProvider = new SegmentSpeedProvider(inputFormat);
currentSpeed = speedProvider.getSpeed(0);
AudioFormat encoderInputAudioFormat =
new AudioFormat(
inputFormat.sampleRate,
@ -88,18 +78,21 @@ import org.checkerframework.dataflow.qual.Pure;
// The decoder uses ENCODING_PCM_16BIT by default.
// https://developer.android.com/reference/android/media/MediaCodec#raw-audio-buffers
C.ENCODING_PCM_16BIT);
if (flattenForSlowMotion) {
if (transformationRequest.flattenForSlowMotion) {
speedChangingAudioProcessor =
new SpeedChangingAudioProcessor(new SegmentSpeedProvider(inputFormat));
try {
encoderInputAudioFormat = sonicAudioProcessor.configure(encoderInputAudioFormat);
encoderInputAudioFormat = speedChangingAudioProcessor.configure(encoderInputAudioFormat);
} catch (AudioProcessor.UnhandledAudioFormatException impossible) {
throw new IllegalStateException(impossible);
}
sonicAudioProcessor.setSpeed(currentSpeed);
sonicAudioProcessor.setPitch(currentSpeed);
sonicAudioProcessor.flush();
speedChangingAudioProcessor.flush();
} else {
speedChangingAudioProcessor = null;
}
this.encoderInputAudioFormat = encoderInputAudioFormat;
processorOutputBuffer = AudioProcessor.EMPTY_BUFFER;
this.encoderInputAudioFormat = encoderInputAudioFormat;
Format requestedOutputFormat =
new Format.Builder()
.setSampleMimeType(
@ -130,8 +123,8 @@ import org.checkerframework.dataflow.qual.Pure;
@Override
public boolean processData() throws TransformationException {
if (sonicAudioProcessor.isActive()) {
return feedEncoderFromSonic() || feedSonicFromDecoder();
if (speedChangingAudioProcessor != null) {
return feedEncoderFromProcessor() || feedProcessorFromDecoder();
} else {
return feedEncoderFromDecoder();
}
@ -167,7 +160,9 @@ import org.checkerframework.dataflow.qual.Pure;
@Override
public void release() {
sonicAudioProcessor.reset();
if (speedChangingAudioProcessor != null) {
speedChangingAudioProcessor.reset();
}
decoder.release();
encoder.release();
}
@ -190,10 +185,7 @@ import org.checkerframework.dataflow.qual.Pure;
if (decoderOutputBuffer == null) {
return false;
}
if (isSpeedChanging(checkNotNull(decoder.getOutputBufferInfo()))) {
flushSonicAndSetSpeed(currentSpeed);
return false;
}
feedEncoder(decoderOutputBuffer);
if (!decoderOutputBuffer.hasRemaining()) {
decoder.releaseOutputBuffer();
@ -205,22 +197,23 @@ import org.checkerframework.dataflow.qual.Pure;
* Attempts to pass audio processor output data to the encoder, and returns whether it may be
* possible to pass more data immediately by calling this method again.
*/
private boolean feedEncoderFromSonic() throws TransformationException {
@RequiresNonNull("speedChangingAudioProcessor")
private boolean feedEncoderFromProcessor() throws TransformationException {
if (!encoder.maybeDequeueInputBuffer(encoderInputBuffer)) {
return false;
}
if (!sonicOutputBuffer.hasRemaining()) {
sonicOutputBuffer = sonicAudioProcessor.getOutput();
if (!sonicOutputBuffer.hasRemaining()) {
if (decoder.isEnded() && sonicAudioProcessor.isEnded()) {
if (!processorOutputBuffer.hasRemaining()) {
processorOutputBuffer = speedChangingAudioProcessor.getOutput();
if (!processorOutputBuffer.hasRemaining()) {
if (decoder.isEnded() && speedChangingAudioProcessor.isEnded()) {
queueEndOfStreamToEncoder();
}
return false;
}
}
feedEncoder(sonicOutputBuffer);
feedEncoder(processorOutputBuffer);
return true;
}
@ -228,37 +221,27 @@ import org.checkerframework.dataflow.qual.Pure;
* Attempts to process decoder output data, and returns whether it may be possible to process more
* data immediately by calling this method again.
*/
private boolean feedSonicFromDecoder() throws TransformationException {
if (drainingSonicForSpeedChange) {
if (sonicAudioProcessor.isEnded() && !sonicOutputBuffer.hasRemaining()) {
flushSonicAndSetSpeed(currentSpeed);
drainingSonicForSpeedChange = false;
}
return false;
}
// Sonic invalidates any previous output buffer when more input is queued, so we don't queue if
// there is output still to be processed.
if (sonicOutputBuffer.hasRemaining()) {
@RequiresNonNull("speedChangingAudioProcessor")
private boolean feedProcessorFromDecoder() throws TransformationException {
// Audio processors invalidate any previous output buffer when more input is queued, so we don't
// queue if there is output still to be processed.
if (processorOutputBuffer.hasRemaining()
|| speedChangingAudioProcessor.getOutput().hasRemaining()) {
return false;
}
if (decoder.isEnded()) {
sonicAudioProcessor.queueEndOfStream();
speedChangingAudioProcessor.queueEndOfStream();
return false;
}
checkState(!sonicAudioProcessor.isEnded());
checkState(!speedChangingAudioProcessor.isEnded());
@Nullable ByteBuffer decoderOutputBuffer = decoder.getOutputBuffer();
if (decoderOutputBuffer == null) {
return false;
}
if (isSpeedChanging(checkNotNull(decoder.getOutputBufferInfo()))) {
sonicAudioProcessor.queueEndOfStream();
drainingSonicForSpeedChange = true;
return false;
}
sonicAudioProcessor.queueInput(decoderOutputBuffer);
speedChangingAudioProcessor.queueInput(decoderOutputBuffer);
if (!decoderOutputBuffer.hasRemaining()) {
decoder.releaseOutputBuffer();
}
@ -294,22 +277,6 @@ import org.checkerframework.dataflow.qual.Pure;
encoder.queueInputBuffer(encoderInputBuffer);
}
private boolean isSpeedChanging(BufferInfo bufferInfo) {
if (!flattenForSlowMotion) {
return false;
}
float newSpeed = speedProvider.getSpeed(bufferInfo.presentationTimeUs);
boolean speedChanging = newSpeed != currentSpeed;
currentSpeed = newSpeed;
return speedChanging;
}
private void flushSonicAndSetSpeed(float speed) {
sonicAudioProcessor.setSpeed(speed);
sonicAudioProcessor.setPitch(speed);
sonicAudioProcessor.flush();
}
private void computeNextEncoderInputBufferTimeUs(
long bytesWritten, int bytesPerFrame, int sampleRate) {
// The calculation below accounts for remainders and rounding. Without that it corresponds to

View File

@ -28,6 +28,8 @@ import java.nio.ByteBuffer;
/**
* An {@link AudioProcessor} that changes the speed of audio samples depending on their timestamp.
*/
// TODO(b/198772621): Consider making the processor inactive and skipping it in the processor chain
// when speed is 1.
/* package */ final class SpeedChangingAudioProcessor extends BaseAudioProcessor {
/** The speed provider that provides the speed for each timestamp. */

View File

@ -30,7 +30,6 @@ import androidx.media3.common.Format;
import androidx.media3.common.util.Util;
import androidx.media3.decoder.DecoderInputBuffer;
import java.util.List;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
import org.checkerframework.dataflow.qual.Pure;
/**
@ -42,11 +41,11 @@ import org.checkerframework.dataflow.qual.Pure;
private final DecoderInputBuffer decoderInputBuffer;
private final Codec decoder;
@Nullable private final FrameEditor frameEditor;
private final Codec encoder;
private final DecoderInputBuffer encoderOutputBuffer;
private @MonotonicNonNull FrameEditor frameEditor;
private boolean waitingForFrameEditorInput;
public VideoTranscodingSamplePipeline(
@ -139,6 +138,8 @@ import org.checkerframework.dataflow.qual.Pure;
/* outputSurface= */ checkNotNull(encoder.getInputSurface()),
transformationRequest.enableHdrEditing,
debugViewProvider);
} else {
frameEditor = null;
}
decoder =