Utilize AudioProcessingPipeline in Transformer.

Provides an API for applications to set AudioProcessors for use
in Transformer.

PiperOrigin-RevId: 488621242
This commit is contained in:
samrobinson 2022-11-15 12:28:33 +00:00 committed by microkatz
parent 7a7d08343a
commit 7b2547271a
4 changed files with 110 additions and 81 deletions

View File

@ -18,21 +18,23 @@ package androidx.media3.transformer;
import static androidx.media3.common.util.Assertions.checkNotNull;
import static androidx.media3.common.util.Assertions.checkState;
import static androidx.media3.decoder.DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DISABLED;
import static java.lang.Math.min;
import androidx.annotation.Nullable;
import androidx.media3.common.C;
import androidx.media3.common.Format;
import androidx.media3.common.audio.AudioProcessingPipeline;
import androidx.media3.common.audio.AudioProcessor;
import androidx.media3.common.audio.AudioProcessor.AudioFormat;
import androidx.media3.common.util.Util;
import androidx.media3.decoder.DecoderInputBuffer;
import com.google.common.collect.ImmutableList;
import java.nio.ByteBuffer;
import org.checkerframework.checker.nullness.qual.RequiresNonNull;
import org.checkerframework.dataflow.qual.Pure;
/**
* Pipeline to decode audio samples, apply transformations on the raw samples, and re-encode them.
* Pipeline to decode audio samples, apply audio processing to the raw samples, and re-encode them.
*/
/* package */ final class AudioTranscodingSamplePipeline extends BaseSamplePipeline {
@ -40,16 +42,12 @@ import org.checkerframework.dataflow.qual.Pure;
private final Codec decoder;
private final DecoderInputBuffer decoderInputBuffer;
@Nullable private final SpeedChangingAudioProcessor speedChangingAudioProcessor;
private final AudioProcessingPipeline audioProcessingPipeline;
private final Codec encoder;
private final AudioFormat encoderInputAudioFormat;
private final DecoderInputBuffer encoderInputBuffer;
private final DecoderInputBuffer encoderOutputBuffer;
private ByteBuffer processorOutputBuffer;
private long nextEncoderInputBufferTimeUs;
private long encoderBufferDurationRemainder;
@ -58,6 +56,7 @@ import org.checkerframework.dataflow.qual.Pure;
long streamStartPositionUs,
long streamOffsetUs,
TransformationRequest transformationRequest,
ImmutableList<AudioProcessor> audioProcessors,
Codec.DecoderFactory decoderFactory,
Codec.EncoderFactory encoderFactory,
MuxerWrapper muxerWrapper,
@ -70,37 +69,38 @@ import org.checkerframework.dataflow.qual.Pure;
transformationRequest.flattenForSlowMotion,
muxerWrapper);
decoderInputBuffer =
new DecoderInputBuffer(DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DISABLED);
encoderInputBuffer =
new DecoderInputBuffer(DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DISABLED);
encoderOutputBuffer =
new DecoderInputBuffer(DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DISABLED);
decoderInputBuffer = new DecoderInputBuffer(BUFFER_REPLACEMENT_MODE_DISABLED);
encoderInputBuffer = new DecoderInputBuffer(BUFFER_REPLACEMENT_MODE_DISABLED);
encoderOutputBuffer = new DecoderInputBuffer(BUFFER_REPLACEMENT_MODE_DISABLED);
decoder = decoderFactory.createForAudioDecoding(inputFormat);
AudioFormat encoderInputAudioFormat =
if (transformationRequest.flattenForSlowMotion) {
audioProcessors =
new ImmutableList.Builder<AudioProcessor>()
.add(new SpeedChangingAudioProcessor(new SegmentSpeedProvider(inputFormat)))
.addAll(audioProcessors)
.build();
}
audioProcessingPipeline = new AudioProcessingPipeline(audioProcessors);
AudioFormat pipelineInputAudioFormat =
new AudioFormat(
inputFormat.sampleRate,
inputFormat.channelCount,
// The decoder uses ENCODING_PCM_16BIT by default.
// https://developer.android.com/reference/android/media/MediaCodec#raw-audio-buffers
C.ENCODING_PCM_16BIT);
if (transformationRequest.flattenForSlowMotion) {
speedChangingAudioProcessor =
new SpeedChangingAudioProcessor(new SegmentSpeedProvider(inputFormat));
try {
encoderInputAudioFormat = speedChangingAudioProcessor.configure(encoderInputAudioFormat);
} catch (AudioProcessor.UnhandledAudioFormatException impossible) {
throw new IllegalStateException(impossible);
}
speedChangingAudioProcessor.flush();
} else {
speedChangingAudioProcessor = null;
}
processorOutputBuffer = AudioProcessor.EMPTY_BUFFER;
this.encoderInputAudioFormat = encoderInputAudioFormat;
try {
encoderInputAudioFormat = audioProcessingPipeline.configure(pipelineInputAudioFormat);
} catch (AudioProcessor.UnhandledAudioFormatException unhandledAudioFormatException) {
throw TransformationException.createForAudioProcessing(
unhandledAudioFormatException, pipelineInputAudioFormat);
}
audioProcessingPipeline.flush();
Format requestedOutputFormat =
new Format.Builder()
.setSampleMimeType(
@ -125,9 +125,7 @@ import org.checkerframework.dataflow.qual.Pure;
@Override
public void release() {
if (speedChangingAudioProcessor != null) {
speedChangingAudioProcessor.reset();
}
audioProcessingPipeline.reset();
decoder.release();
encoder.release();
}
@ -145,8 +143,8 @@ import org.checkerframework.dataflow.qual.Pure;
@Override
protected boolean processDataUpToMuxer() throws TransformationException {
if (speedChangingAudioProcessor != null) {
return feedEncoderFromProcessor() || feedProcessorFromDecoder();
if (audioProcessingPipeline.isOperational()) {
return feedEncoderFromProcessingPipeline() || feedProcessingPipelineFromDecoder();
} else {
return feedEncoderFromDecoder();
}
@ -207,57 +205,51 @@ import org.checkerframework.dataflow.qual.Pure;
}
/**
* Attempts to pass audio processor output data to the encoder, and returns whether it may be
* possible to pass more data immediately by calling this method again.
* Attempts to feed audio processor output data to the encoder.
*
* @return Whether more data can be fed immediately, by calling this method again.
*/
@RequiresNonNull("speedChangingAudioProcessor")
private boolean feedEncoderFromProcessor() throws TransformationException {
private boolean feedEncoderFromProcessingPipeline() throws TransformationException {
if (!encoder.maybeDequeueInputBuffer(encoderInputBuffer)) {
return false;
}
if (!processorOutputBuffer.hasRemaining()) {
processorOutputBuffer = speedChangingAudioProcessor.getOutput();
if (!processorOutputBuffer.hasRemaining()) {
if (decoder.isEnded() && speedChangingAudioProcessor.isEnded()) {
ByteBuffer processingPipelineOutputBuffer = audioProcessingPipeline.getOutput();
if (!processingPipelineOutputBuffer.hasRemaining()) {
if (audioProcessingPipeline.isEnded()) {
queueEndOfStreamToEncoder();
}
return false;
}
}
feedEncoder(processorOutputBuffer);
feedEncoder(processingPipelineOutputBuffer);
return true;
}
/**
* Attempts to process decoder output data, and returns whether it may be possible to process more
* data immediately by calling this method again.
* Attempts to feed decoder output data to the {@link AudioProcessingPipeline}.
*
* @return Whether it may be possible to feed more data immediately by calling this method again.
*/
@RequiresNonNull("speedChangingAudioProcessor")
private boolean feedProcessorFromDecoder() throws TransformationException {
// Audio processors invalidate any previous output buffer when more input is queued, so we don't
// queue if there is output still to be processed.
if (processorOutputBuffer.hasRemaining()
|| speedChangingAudioProcessor.getOutput().hasRemaining()) {
return false;
}
private boolean feedProcessingPipelineFromDecoder() throws TransformationException {
if (decoder.isEnded()) {
speedChangingAudioProcessor.queueEndOfStream();
audioProcessingPipeline.queueEndOfStream();
return false;
}
checkState(!speedChangingAudioProcessor.isEnded());
checkState(!audioProcessingPipeline.isEnded());
@Nullable ByteBuffer decoderOutputBuffer = decoder.getOutputBuffer();
if (decoderOutputBuffer == null) {
return false;
}
speedChangingAudioProcessor.queueInput(decoderOutputBuffer);
if (!decoderOutputBuffer.hasRemaining()) {
decoder.releaseOutputBuffer(/* render= */ false);
audioProcessingPipeline.queueInput(decoderOutputBuffer);
if (decoderOutputBuffer.hasRemaining()) {
return false;
}
// Decoder output buffer was fully consumed by the processing pipeline.
decoder.releaseOutputBuffer(/* render= */ false);
return true;
}
@ -290,6 +282,17 @@ import org.checkerframework.dataflow.qual.Pure;
encoder.queueInputBuffer(encoderInputBuffer);
}
@Pure
private static TransformationRequest createFallbackTransformationRequest(
TransformationRequest transformationRequest, Format requestedFormat, Format actualFormat) {
// TODO(b/210591626): Also update bitrate and other params once encoder configuration and
// fallback are implemented.
if (Util.areEqual(requestedFormat.sampleMimeType, actualFormat.sampleMimeType)) {
return transformationRequest;
}
return transformationRequest.buildUpon().setAudioMimeType(actualFormat.sampleMimeType).build();
}
private void computeNextEncoderInputBufferTimeUs(
long bytesWritten, int bytesPerFrame, int sampleRate) {
// The calculation below accounts for remainders and rounding. Without that it corresponds to
@ -307,15 +310,4 @@ import org.checkerframework.dataflow.qual.Pure;
}
nextEncoderInputBufferTimeUs += bufferDurationUs;
}
@Pure
private static TransformationRequest createFallbackTransformationRequest(
TransformationRequest transformationRequest, Format requestedFormat, Format actualFormat) {
// TODO(b/210591626): Also update bitrate and other params once encoder configuration and
// fallback are implemented.
if (Util.areEqual(requestedFormat.sampleMimeType, actualFormat.sampleMimeType)) {
return transformationRequest;
}
return transformationRequest.buildUpon().setAudioMimeType(actualFormat.sampleMimeType).build();
}
}

View File

@ -26,7 +26,6 @@ import androidx.media3.common.Format;
import androidx.media3.common.FrameProcessingException;
import androidx.media3.common.FrameProcessor;
import androidx.media3.common.PlaybackException;
import androidx.media3.common.audio.AudioProcessor;
import androidx.media3.common.audio.AudioProcessor.AudioFormat;
import androidx.media3.common.util.Clock;
import androidx.media3.common.util.UnstableApi;
@ -73,6 +72,7 @@ public final class TransformationException extends Exception {
ERROR_CODE_OUTPUT_FORMAT_UNSUPPORTED,
ERROR_CODE_HDR_ENCODING_UNSUPPORTED,
ERROR_CODE_FRAME_PROCESSING_FAILED,
ERROR_CODE_AUDIO_PROCESSING_FAILED,
ERROR_CODE_MUXING_FAILED,
})
public @interface ErrorCode {}
@ -163,9 +163,15 @@ public final class TransformationException extends Exception {
/** Caused by a frame processing failure. */
public static final int ERROR_CODE_FRAME_PROCESSING_FAILED = 5001;
// Muxing errors (6xxx).
// Audio processing errors (6xxx).
/** Caused by an audio processing failure. */
public static final int ERROR_CODE_AUDIO_PROCESSING_FAILED = 6001;
// Muxing errors (7xxx).
/** Caused by a failure while muxing media samples. */
public static final int ERROR_CODE_MUXING_FAILED = 6001;
public static final int ERROR_CODE_MUXING_FAILED = 7001;
private static final ImmutableBiMap<String, @ErrorCode Integer> NAME_TO_ERROR_CODE =
new ImmutableBiMap.Builder<String, @ErrorCode Integer>()
@ -188,6 +194,7 @@ public final class TransformationException extends Exception {
.put("ERROR_CODE_OUTPUT_FORMAT_UNSUPPORTED", ERROR_CODE_OUTPUT_FORMAT_UNSUPPORTED)
.put("ERROR_CODE_HDR_ENCODING_UNSUPPORTED", ERROR_CODE_HDR_ENCODING_UNSUPPORTED)
.put("ERROR_CODE_FRAME_PROCESSING_FAILED", ERROR_CODE_FRAME_PROCESSING_FAILED)
.put("ERROR_CODE_AUDIO_PROCESSING_FAILED", ERROR_CODE_AUDIO_PROCESSING_FAILED)
.put("ERROR_CODE_MUXING_FAILED", ERROR_CODE_MUXING_FAILED)
.buildOrThrow();
@ -264,18 +271,18 @@ public final class TransformationException extends Exception {
}
/**
* Creates an instance for an {@link AudioProcessor} related exception.
* Creates an instance for an audio processing related exception.
*
* @param cause The cause of the failure.
* @param componentName The name of the {@link AudioProcessor} used.
* @param audioFormat The {@link AudioFormat} used.
* @param errorCode See {@link #errorCode}.
* @return The created instance.
*/
public static TransformationException createForAudioProcessor(
Throwable cause, String componentName, AudioFormat audioFormat, int errorCode) {
public static TransformationException createForAudioProcessing(
Throwable cause, AudioFormat audioFormat) {
return new TransformationException(
componentName + " error, audio_format = " + audioFormat, cause, errorCode);
"Audio processing error, audio_format = " + audioFormat,
cause,
ERROR_CODE_AUDIO_PROCESSING_FAILED);
}
/**

View File

@ -35,6 +35,7 @@ import androidx.media3.common.FrameProcessor;
import androidx.media3.common.MediaItem;
import androidx.media3.common.MediaLibraryInfo;
import androidx.media3.common.MimeTypes;
import androidx.media3.common.audio.AudioProcessor;
import androidx.media3.common.util.Clock;
import androidx.media3.common.util.ListenerSet;
import androidx.media3.common.util.UnstableApi;
@ -85,6 +86,7 @@ public final class Transformer {
// Optional fields.
private TransformationRequest transformationRequest;
private ImmutableList<AudioProcessor> audioProcessors;
private ImmutableList<Effect> videoEffects;
private boolean removeAudio;
private boolean removeVideo;
@ -106,6 +108,7 @@ public final class Transformer {
public Builder(Context context) {
this.context = context.getApplicationContext();
transformationRequest = new TransformationRequest.Builder().build();
audioProcessors = ImmutableList.of();
videoEffects = ImmutableList.of();
decoderFactory = new DefaultDecoderFactory(this.context);
encoderFactory = new DefaultEncoderFactory.Builder(this.context).build();
@ -121,6 +124,7 @@ public final class Transformer {
private Builder(Transformer transformer) {
this.context = transformer.context;
this.transformationRequest = transformer.transformationRequest;
this.audioProcessors = transformer.audioProcessors;
this.videoEffects = transformer.videoEffects;
this.removeAudio = transformer.removeAudio;
this.removeVideo = transformer.removeVideo;
@ -151,6 +155,19 @@ public final class Transformer {
return this;
}
/**
* Sets the {@link AudioProcessor} instances to apply to audio buffers.
*
* <p>The {@link AudioProcessor} instances are applied in the order of the list, and buffers
* will only be modified by that {@link AudioProcessor} if it {@link AudioProcessor#isActive()}
* based on the current configuration.
*/
@CanIgnoreReturnValue
public Builder setAudioProcessors(List<AudioProcessor> audioProcessors) {
this.audioProcessors = ImmutableList.copyOf(audioProcessors);
return this;
}
/**
* Sets the {@link Effect} instances to apply to each video frame.
*
@ -426,6 +443,7 @@ public final class Transformer {
return new Transformer(
context,
transformationRequest,
audioProcessors,
videoEffects,
removeAudio,
removeVideo,
@ -537,6 +555,7 @@ public final class Transformer {
private final Context context;
private final TransformationRequest transformationRequest;
private final ImmutableList<AudioProcessor> audioProcessors;
private final ImmutableList<Effect> videoEffects;
private final boolean removeAudio;
private final boolean removeVideo;
@ -558,6 +577,7 @@ public final class Transformer {
private Transformer(
Context context,
TransformationRequest transformationRequest,
ImmutableList<AudioProcessor> audioProcessors,
ImmutableList<Effect> videoEffects,
boolean removeAudio,
boolean removeVideo,
@ -573,6 +593,7 @@ public final class Transformer {
checkState(!removeAudio || !removeVideo, "Audio and video cannot both be removed.");
this.context = context;
this.transformationRequest = transformationRequest;
this.audioProcessors = audioProcessors;
this.videoEffects = videoEffects;
this.removeAudio = removeAudio;
this.removeVideo = removeVideo;
@ -589,6 +610,7 @@ public final class Transformer {
new TransformerInternal(
context,
transformationRequest,
audioProcessors,
videoEffects,
removeAudio,
removeVideo,

View File

@ -34,6 +34,7 @@ import androidx.media3.common.MediaItem;
import androidx.media3.common.Metadata;
import androidx.media3.common.MimeTypes;
import androidx.media3.common.PlaybackException;
import androidx.media3.common.audio.AudioProcessor;
import androidx.media3.common.util.Clock;
import androidx.media3.exoplayer.source.MediaSource;
import androidx.media3.extractor.metadata.mp4.SlowMotionData;
@ -52,6 +53,7 @@ import java.util.List;
private final Context context;
private final TransformationRequest transformationRequest;
private final ImmutableList<AudioProcessor> audioProcessors;
private final ImmutableList<Effect> videoEffects;
private final Codec.DecoderFactory decoderFactory;
private final Codec.EncoderFactory encoderFactory;
@ -66,6 +68,7 @@ import java.util.List;
public TransformerInternal(
Context context,
TransformationRequest transformationRequest,
ImmutableList<AudioProcessor> audioProcessors,
ImmutableList<Effect> videoEffects,
boolean removeAudio,
boolean removeVideo,
@ -78,6 +81,7 @@ import java.util.List;
Clock clock) {
this.context = context;
this.transformationRequest = transformationRequest;
this.audioProcessors = audioProcessors;
this.videoEffects = videoEffects;
this.decoderFactory = decoderFactory;
this.encoderFactory = encoderFactory;
@ -210,6 +214,7 @@ import java.util.List;
streamStartPositionUs,
streamOffsetUs,
transformationRequest,
audioProcessors,
decoderFactory,
encoderFactory,
muxerWrapper,
@ -256,6 +261,9 @@ import java.util.List;
if (transformationRequest.flattenForSlowMotion && isSlowMotion(inputFormat)) {
return true;
}
if (!audioProcessors.isEmpty()) {
return true;
}
return false;
}