diff --git a/libraries/common/src/main/java/androidx/media3/common/audio/AudioMixingUtil.java b/libraries/common/src/main/java/androidx/media3/common/audio/AudioMixingUtil.java index 491c6af8a8..33b03feecc 100644 --- a/libraries/common/src/main/java/androidx/media3/common/audio/AudioMixingUtil.java +++ b/libraries/common/src/main/java/androidx/media3/common/audio/AudioMixingUtil.java @@ -70,6 +70,8 @@ public final class AudioMixingUtil { * @param matrix Scaled channel mapping from input to output. * @param framesToMix Number of audio frames to mix. Must be within the bounds of both buffers. * @param accumulate Whether to accumulate with the existing samples in the mixing buffer. + * @param clipFloatOutput Whether to clip the output signal to be in the [-1.0, 1.0] range if the + * output encoding is {@link C#ENCODING_PCM_FLOAT}. * @return The {@code mixingBuffer}, for convenience. */ public static ByteBuffer mix( @@ -79,7 +81,8 @@ public final class AudioMixingUtil { AudioFormat mixingAudioFormat, ChannelMixingMatrix matrix, int framesToMix, - boolean accumulate) { + boolean accumulate, + boolean clipFloatOutput) { boolean int16Input = inputAudioFormat.encoding == C.ENCODING_PCM_16BIT; boolean int16Output = mixingAudioFormat.encoding == C.ENCODING_PCM_16BIT; @@ -114,7 +117,10 @@ public final class AudioMixingUtil { (short) constrainValue(outputFrame[outputChannel], Short.MIN_VALUE, Short.MAX_VALUE)); } else { mixingBuffer.putFloat( - constrainValue(outputFrame[outputChannel], FLOAT_PCM_MIN_VALUE, FLOAT_PCM_MAX_VALUE)); + clipFloatOutput + ? constrainValue( + outputFrame[outputChannel], FLOAT_PCM_MIN_VALUE, FLOAT_PCM_MAX_VALUE) + : outputFrame[outputChannel]); } outputFrame[outputChannel] = 0; diff --git a/libraries/common/src/main/java/androidx/media3/common/audio/ChannelMixingAudioProcessor.java b/libraries/common/src/main/java/androidx/media3/common/audio/ChannelMixingAudioProcessor.java index d82eeae696..ff067f5d13 100644 --- a/libraries/common/src/main/java/androidx/media3/common/audio/ChannelMixingAudioProcessor.java +++ b/libraries/common/src/main/java/androidx/media3/common/audio/ChannelMixingAudioProcessor.java @@ -85,7 +85,8 @@ public final class ChannelMixingAudioProcessor extends BaseAudioProcessor { outputAudioFormat, channelMixingMatrix, framesToMix, - /* accumulate= */ false); + /* accumulate= */ false, + /* clipFloatOutput= */ true); outputBuffer.flip(); } } diff --git a/libraries/common/src/test/java/androidx/media3/common/audio/AudioMixingUtilTest.java b/libraries/common/src/test/java/androidx/media3/common/audio/AudioMixingUtilTest.java index 2535289b2f..c8504345a6 100644 --- a/libraries/common/src/test/java/androidx/media3/common/audio/AudioMixingUtilTest.java +++ b/libraries/common/src/test/java/androidx/media3/common/audio/AudioMixingUtilTest.java @@ -61,7 +61,8 @@ public final class AudioMixingUtilTest { STEREO_44100_PCM_FLOAT, STEREO_TO_STEREO.scaleBy(0.5f), /* framesToMix= */ 2, - /* accumulate= */ true); + /* accumulate= */ true, + /* clipFloatOutput= */ true); assertWithMessage("Source buffer").that(sourceBuffer.remaining()).isEqualTo(0); assertWithMessage("Mixing buffer").that(mixingBuffer.remaining()).isEqualTo(0); @@ -81,7 +82,8 @@ public final class AudioMixingUtilTest { STEREO_44100_PCM_FLOAT, MONO_TO_STEREO.scaleBy(0.5f), /* framesToMix= */ 2, - /* accumulate= */ true); + /* accumulate= */ true, + /* clipFloatOutput= */ true); assertWithMessage("Source buffer").that(sourceBuffer.remaining()).isEqualTo(0); assertWithMessage("Mixing buffer").that(mixingBuffer.remaining()).isEqualTo(0); @@ -108,7 +110,8 @@ public final class AudioMixingUtilTest { STEREO_44100_PCM_FLOAT, STEREO_TO_STEREO.scaleBy(0.5f), /* framesToMix= */ 2, - /* accumulate= */ true); + /* accumulate= */ true, + /* clipFloatOutput= */ true); assertWithMessage("Source buffer").that(sourceBuffer.remaining()).isEqualTo(0); assertWithMessage("Mixing buffer").that(mixingBuffer.remaining()).isEqualTo(0); @@ -132,7 +135,8 @@ public final class AudioMixingUtilTest { STEREO_44100_PCM_FLOAT, MONO_TO_STEREO.scaleBy(0.5f), /* framesToMix= */ 2, - /* accumulate= */ true); + /* accumulate= */ true, + /* clipFloatOutput= */ true); assertWithMessage("Source buffer").that(sourceBuffer.remaining()).isEqualTo(0); assertWithMessage("Mixing buffer").that(mixingBuffer.remaining()).isEqualTo(0); @@ -155,7 +159,8 @@ public final class AudioMixingUtilTest { MONO_44100_PCM_FLOAT, STEREO_TO_MONO.scaleBy(0.5f), /* framesToMix= */ 2, - /* accumulate= */ true); + /* accumulate= */ true, + /* clipFloatOutput= */ true); assertWithMessage("Source buffer").that(sourceBuffer.remaining()).isEqualTo(0); assertWithMessage("Mixing buffer").that(mixingBuffer.remaining()).isEqualTo(0); @@ -175,7 +180,8 @@ public final class AudioMixingUtilTest { MONO_44100_PCM_FLOAT, MONO_TO_MONO.scaleBy(0.5f), /* framesToMix= */ 2, - /* accumulate= */ true); + /* accumulate= */ true, + /* clipFloatOutput= */ true); assertWithMessage("Source buffer").that(sourceBuffer.remaining()).isEqualTo(0); assertWithMessage("Mixing buffer").that(mixingBuffer.remaining()).isEqualTo(0); @@ -202,7 +208,8 @@ public final class AudioMixingUtilTest { MONO_44100_PCM_FLOAT, STEREO_TO_MONO.scaleBy(0.5f), /* framesToMix= */ 2, - /* accumulate= */ true); + /* accumulate= */ true, + /* clipFloatOutput= */ true); assertWithMessage("Source buffer").that(sourceBuffer.remaining()).isEqualTo(0); assertWithMessage("Mixing buffer").that(mixingBuffer.remaining()).isEqualTo(0); @@ -226,7 +233,8 @@ public final class AudioMixingUtilTest { MONO_44100_PCM_FLOAT, MONO_TO_MONO.scaleBy(0.5f), /* framesToMix= */ 2, - /* accumulate= */ true); + /* accumulate= */ true, + /* clipFloatOutput= */ true); assertWithMessage("Source buffer").that(sourceBuffer.remaining()).isEqualTo(0); assertWithMessage("Mixing buffer").that(mixingBuffer.remaining()).isEqualTo(0); @@ -250,7 +258,8 @@ public final class AudioMixingUtilTest { STEREO_44100_PCM_16BIT, MONO_TO_STEREO, /* framesToMix= */ 3, - /* accumulate= */ true); + /* accumulate= */ true, + /* clipFloatOutput= */ true); assertWithMessage("Source buffer").that(sourceBuffer.remaining()).isEqualTo(0); assertWithMessage("Mixing buffer").that(mixingBuffer.remaining()).isEqualTo(0); @@ -272,7 +281,8 @@ public final class AudioMixingUtilTest { MONO_44100_PCM_16BIT, MONO_TO_MONO, /* framesToMix= */ 2, - /* accumulate= */ true); + /* accumulate= */ true, + /* clipFloatOutput= */ true); assertWithMessage("Source buffer").that(sourceBuffer.remaining()).isEqualTo(0); assertWithMessage("Mixing buffer") @@ -304,7 +314,8 @@ public final class AudioMixingUtilTest { MONO_44100_PCM_16BIT, MONO_TO_MONO, /* framesToMix= */ 4, - /* accumulate= */ true); + /* accumulate= */ true, + /* clipFloatOutput= */ true); assertWithMessage("Source buffer").that(sourceBuffer.remaining()).isEqualTo(0); assertWithMessage("Mixing buffer").that(mixingBuffer.remaining()).isEqualTo(0); @@ -328,7 +339,8 @@ public final class AudioMixingUtilTest { STEREO_44100_PCM_16BIT, STEREO_TO_STEREO, /* framesToMix= */ 3, - /* accumulate= */ true); + /* accumulate= */ true, + /* clipFloatOutput= */ true); assertWithMessage("Source buffer").that(sourceBuffer.remaining()).isEqualTo(0); assertWithMessage("Mixing buffer").that(mixingBuffer.remaining()).isEqualTo(0); @@ -350,7 +362,8 @@ public final class AudioMixingUtilTest { STEREO_44100_PCM_16BIT, STEREO_TO_STEREO, /* framesToMix= */ 2, - /* accumulate= */ false); + /* accumulate= */ false, + /* clipFloatOutput= */ true); assertWithMessage("Source buffer") .that(sourceBuffer.remaining()) @@ -362,4 +375,46 @@ public final class AudioMixingUtilTest { mixingBuffer.rewind(); assertThat(mixingBuffer).isEqualTo(expectedBuffer); } + + @Test + public void mixToMonoFloat_withMonoFloatInput_withClipping() { + ByteBuffer mixingBuffer = createByteBuffer(new float[] {0.9f, -0.9f}); + ByteBuffer sourceBuffer = createByteBuffer(new float[] {0.5f, -0.2f}); + + AudioMixingUtil.mix( + sourceBuffer, + MONO_44100_PCM_FLOAT, + mixingBuffer, + MONO_44100_PCM_FLOAT, + MONO_TO_MONO, + /* framesToMix= */ 2, + /* accumulate= */ true, + /* clipFloatOutput= */ true); + + assertWithMessage("Source buffer").that(sourceBuffer.remaining()).isEqualTo(0); + assertWithMessage("Mixing buffer").that(mixingBuffer.remaining()).isEqualTo(0); + mixingBuffer.rewind(); + assertThat(createFloatArray(mixingBuffer)).isEqualTo(new float[] {1f, -1f}); + } + + @Test + public void mixToMonoFloat_withMonoFloatInput_noClipping() { + ByteBuffer mixingBuffer = createByteBuffer(new float[] {0.9f, -0.9f}); + ByteBuffer sourceBuffer = createByteBuffer(new float[] {0.5f, -0.2f}); + + AudioMixingUtil.mix( + sourceBuffer, + MONO_44100_PCM_FLOAT, + mixingBuffer, + MONO_44100_PCM_FLOAT, + MONO_TO_MONO, + /* framesToMix= */ 2, + /* accumulate= */ true, + /* clipFloatOutput= */ false); + + assertWithMessage("Source buffer").that(sourceBuffer.remaining()).isEqualTo(0); + assertWithMessage("Mixing buffer").that(mixingBuffer.remaining()).isEqualTo(0); + mixingBuffer.rewind(); + assertThat(createFloatArray(mixingBuffer)).isEqualTo(new float[] {1.4f, -1.1f}); + } } diff --git a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/WaveformAudioBufferSink.java b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/WaveformAudioBufferSink.java index d42d2b01d8..c93fadf662 100644 --- a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/WaveformAudioBufferSink.java +++ b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/WaveformAudioBufferSink.java @@ -140,7 +140,8 @@ public class WaveformAudioBufferSink implements TeeAudioProcessor.AudioBufferSin mixingAudioFormat, channelMixingMatrix, /* framesToMix= */ 1, - /* accumulate= */ false); + /* accumulate= */ false, + /* clipFloatOutput= */ true); mixingBuffer.rewind(); for (int i = 0; i < outputChannels.size(); i++) { WaveformBar bar = outputChannels.get(i); diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/AudioMixer.java b/libraries/transformer/src/main/java/androidx/media3/transformer/AudioMixer.java index c0a979db10..035ebdb112 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/AudioMixer.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/AudioMixer.java @@ -64,7 +64,9 @@ public interface AudioMixer { */ @Deprecated static AudioMixer create() { - return new DefaultAudioMixer.Factory(/* outputSilenceWithNoSources= */ true).create(); + return new DefaultAudioMixer.Factory( + /* outputSilenceWithNoSources= */ true, /* clipFloatOutput= */ true) + .create(); } /** diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/DefaultAudioMixer.java b/libraries/transformer/src/main/java/androidx/media3/transformer/DefaultAudioMixer.java index 8439729083..61ec69a03c 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/DefaultAudioMixer.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/DefaultAudioMixer.java @@ -33,20 +33,30 @@ import androidx.media3.common.util.Util; import java.nio.ByteBuffer; import java.nio.ByteOrder; -/** An {@link AudioMixer} that incrementally mixes source audio into a fixed size mixing buffer. */ +/** + * An {@link AudioMixer} that incrementally mixes source audio into a fixed size mixing buffer. + * + *

By default, the output signal is guaranteed to be in the range corresponding to its encoding. + * This range is [{@link Short#MIN_VALUE}, {@link Short#MAX_VALUE}] for {@link + * C#ENCODING_PCM_16BIT}, and [-1.0, 1.0] for {@link C#ENCODING_PCM_FLOAT}. Before adding a value to + * the output buffer, it is first converted to the output encoding (in the corresponding range). It + * is then added to the output buffer value, and the result is clipped by moving it to the closest + * value in this range. + */ @UnstableApi public final class DefaultAudioMixer implements AudioMixer { /** An {@link AudioMixer.Factory} implementation for {@link DefaultAudioMixer} instances. */ public static final class Factory implements AudioMixer.Factory { private final boolean outputSilenceWithNoSources; + private final boolean clipFloatOutput; /** - * Creates an instance that does not {@linkplain #getOutput() output} silence when there are no - * {@linkplain #addSource sources}. + * Creates an instance. This is equivalent to {@link #Factory(boolean, boolean) new + * Factory(false, true)}. */ public Factory() { - this(/* outputSilenceWithNoSources= */ false); + this(/* outputSilenceWithNoSources= */ false, /* clipFloatOutput= */ true); } /** @@ -54,20 +64,27 @@ public final class DefaultAudioMixer implements AudioMixer { * * @param outputSilenceWithNoSources Whether to {@linkplain #getOutput() output} silence when * there are no {@linkplain #addSource sources}. + * @param clipFloatOutput Whether to clip the output signal to be in the [-1.0, 1.0] range if + * the output encoding is {@link C#ENCODING_PCM_FLOAT}. This parameter is ignored for + * non-float output signals. For float output signals, non-float input signals are converted + * to float signals in the [-1.0, 1.0] range. All input signals (float or non-float) are + * then added and the result is clipped if and only if {@code clipFloatOutput} is true. */ - public Factory(boolean outputSilenceWithNoSources) { + public Factory(boolean outputSilenceWithNoSources, boolean clipFloatOutput) { this.outputSilenceWithNoSources = outputSilenceWithNoSources; + this.clipFloatOutput = clipFloatOutput; } @Override public DefaultAudioMixer create() { - return new DefaultAudioMixer(outputSilenceWithNoSources); + return new DefaultAudioMixer(outputSilenceWithNoSources, clipFloatOutput); } } // TODO(b/290002438, b/276734854): Improve buffer management & determine best default size. private static final int DEFAULT_BUFFER_SIZE_MS = 500; + private final boolean clipFloatOutput; private final SparseArray sources; private int nextSourceId; private AudioFormat outputAudioFormat; @@ -90,7 +107,8 @@ public final class DefaultAudioMixer implements AudioMixer { */ private long maxPositionOfRemovedSources; - private DefaultAudioMixer(boolean outputSilenceWithNoSources) { + private DefaultAudioMixer(boolean outputSilenceWithNoSources, boolean clipFloatOutput) { + this.clipFloatOutput = clipFloatOutput; sources = new SparseArray<>(); outputAudioFormat = AudioFormat.NOT_SET; bufferSizeFrames = C.LENGTH_UNSET; @@ -338,7 +356,7 @@ public final class DefaultAudioMixer implements AudioMixer { } /** Per-source information. */ - private static class SourceInfo { + private final class SourceInfo { /** * Position (in frames) of the next source audio frame to be input by the source, relative to * the mixer start. @@ -399,7 +417,8 @@ public final class DefaultAudioMixer implements AudioMixer { mixingAudioFormat, channelMixingMatrix, framesToMix, - /* accumulate= */ true); + /* accumulate= */ true, + clipFloatOutput); position = newPosition; } } diff --git a/libraries/transformer/src/test/java/androidx/media3/transformer/DefaultAudioMixerTest.java b/libraries/transformer/src/test/java/androidx/media3/transformer/DefaultAudioMixerTest.java index 959b5cca82..d10a27ff29 100644 --- a/libraries/transformer/src/test/java/androidx/media3/transformer/DefaultAudioMixerTest.java +++ b/libraries/transformer/src/test/java/androidx/media3/transformer/DefaultAudioMixerTest.java @@ -67,7 +67,9 @@ public final class DefaultAudioMixerTest { @Before public void setup() { - mixer = new DefaultAudioMixer.Factory(outputSilenceWithNoSources).create(); + mixer = + new DefaultAudioMixer.Factory(outputSilenceWithNoSources, /* clipFloatOutput= */ true) + .create(); } @Test