Add support for constant power mixing in DefaultAudioMixer

This change exposes a flag in `DefaultAudioMixer#Factory` to set whether
to upmix/downmix using constant power or constant gain mixing matrices.

This is a non-functional change.

PiperOrigin-RevId: 733339367
This commit is contained in:
ivanbuper 2025-03-04 08:29:17 -08:00 committed by Copybara-Service
parent 4ab7ddea93
commit 5a0f4c6b3f
12 changed files with 132 additions and 42 deletions

View File

@ -9,6 +9,7 @@
* Extractors:
* DataSource:
* Audio:
* Allow constant power upmixing/downmixing in DefaultAudioMixer.
* Video:
* Text:
* Metadata:

View File

@ -17,6 +17,7 @@ package androidx.media3.common.audio;
import static androidx.media3.common.util.Assertions.checkArgument;
import androidx.annotation.IntRange;
import androidx.media3.common.util.UnstableApi;
import com.google.errorprone.annotations.CanIgnoreReturnValue;
@ -52,21 +53,41 @@ public final class ChannelMixingMatrix {
private final boolean isIdentity;
/**
* Creates a basic channel mixing matrix that converts from {@code inputChannelCount} channels to
* {@code outputChannelCount} channels.
* Returns a default constant gain channel mixing matrix that mixes {@code inputChannelCount}
* channels into {@code outputChannelCount} channels.
*
* <p>If the input and output channel counts match then a simple identity matrix will be returned.
* Otherwise, default matrix coefficients will be used to best match channel locations and overall
* power level.
* <p>This method returns an identity matrix if {@code inputChannelCount} and {@code
* outputChannelCount} are equal.
*
* @param inputChannelCount Number of input channels.
* @param outputChannelCount Number of output channels.
* @return New channel mixing matrix.
* @throws UnsupportedOperationException If no default matrix coefficients are implemented for the
* given input and output channel counts.
* @throws UnsupportedOperationException If no default coefficients are available for the given
* input and output channel counts.
* @deprecated Use {@link #createForConstantGain} instead.
*/
// TODO(b/300467493): Modify create() to use constant power defaults and migrate all users.
public static ChannelMixingMatrix create(int inputChannelCount, int outputChannelCount) {
// TODO(b/399861060): Remove in Media3 1.8.
@Deprecated
public static ChannelMixingMatrix create(
@IntRange(from = 1, to = 2) int inputChannelCount,
@IntRange(from = 1, to = 2) int outputChannelCount) {
return createForConstantGain(inputChannelCount, outputChannelCount);
}
/**
* Returns a default constant gain channel mixing matrix that mixes {@code inputChannelCount}
* channels into {@code outputChannelCount} channels.
*
* <p>This method returns an identity matrix if {@code inputChannelCount} and {@code
* outputChannelCount} are equal.
*
* @param inputChannelCount Number of input channels.
* @param outputChannelCount Number of output channels.
* @throws UnsupportedOperationException If no default coefficients are available for the given
* input and output channel counts.
*/
public static ChannelMixingMatrix createForConstantGain(
@IntRange(from = 1, to = 2) int inputChannelCount,
@IntRange(from = 1, to = 2) int outputChannelCount) {
return new ChannelMixingMatrix(
inputChannelCount,
outputChannelCount,
@ -74,19 +95,31 @@ public final class ChannelMixingMatrix {
}
/**
* Returns default constant power matrix for mixing {@code inputChannelCount} channels into {@code
* outputChannelCount} channels.
* Returns a default constant power channel mixing matrix that mixes {@code inputChannelCount}
* channels into {@code outputChannelCount} channels.
*
* <p>If the input and output channel counts match then a simple identity matrix will be returned.
* <p>This method returns an identity matrix if {@code inputChannelCount} and {@code
* outputChannelCount} are equal.
*
* <p>Channel counts map to the following layouts:
*
* <ol>
* <li>[MONO]
* <li>[FRONT_LEFT, FRONT_RIGHT]
* <li>[FRONT_LEFT, FRONT_RIGHT, FRONT_CENTER]
* <li>[FRONT_LEFT, FRONT_RIGHT, BACK_LEFT, BACK_RIGHT]
* <li>[FRONT_LEFT, FRONT_RIGHT, FRONT_CENTER, BACK_LEFT, BACK_RIGHT]
* <li>[FRONT_LEFT, FRONT_RIGHT, FRONT_CENTER, LOW_FREQUENCY, BACK_LEFT, BACK_RIGHT]
* </ol>
*
* @param inputChannelCount Number of input channels.
* @param outputChannelCount Number of output channels.
* @return New channel mixing matrix.
* @throws UnsupportedOperationException If no default coefficients are available for the given
* input and output channel counts.
* @throws UnsupportedOperationException If no default matrix coefficients are implemented for the
* given input and output channel counts.
*/
public static ChannelMixingMatrix createForConstantPower(
int inputChannelCount, int outputChannelCount) {
@IntRange(from = 1, to = 6) int inputChannelCount,
@IntRange(from = 1, to = 2) int outputChannelCount) {
return new ChannelMixingMatrix(
inputChannelCount,
outputChannelCount,

View File

@ -41,13 +41,17 @@ public final class AudioMixingUtilTest {
new AudioFormat(/* sampleRate= */ 44100, /* channelCount= */ 1, C.ENCODING_PCM_16BIT);
private static final ChannelMixingMatrix STEREO_TO_STEREO =
ChannelMixingMatrix.create(/* inputChannelCount= */ 2, /* outputChannelCount= */ 2);
ChannelMixingMatrix.createForConstantGain(
/* inputChannelCount= */ 2, /* outputChannelCount= */ 2);
private static final ChannelMixingMatrix MONO_TO_STEREO =
ChannelMixingMatrix.create(/* inputChannelCount= */ 1, /* outputChannelCount= */ 2);
ChannelMixingMatrix.createForConstantGain(
/* inputChannelCount= */ 1, /* outputChannelCount= */ 2);
private static final ChannelMixingMatrix STEREO_TO_MONO =
ChannelMixingMatrix.create(/* inputChannelCount= */ 2, /* outputChannelCount= */ 1);
ChannelMixingMatrix.createForConstantGain(
/* inputChannelCount= */ 2, /* outputChannelCount= */ 1);
private static final ChannelMixingMatrix MONO_TO_MONO =
ChannelMixingMatrix.create(/* inputChannelCount= */ 1, /* outputChannelCount= */ 1);
ChannelMixingMatrix.createForConstantGain(
/* inputChannelCount= */ 1, /* outputChannelCount= */ 1);
@Test
public void mixToStereoFloat_withStereoFloatInput() {

View File

@ -41,9 +41,11 @@ public final class ChannelMixingAudioProcessorTest {
public void setUp() {
audioProcessor = new ChannelMixingAudioProcessor();
audioProcessor.putChannelMixingMatrix(
ChannelMixingMatrix.create(/* inputChannelCount= */ 2, /* outputChannelCount= */ 1));
ChannelMixingMatrix.createForConstantGain(
/* inputChannelCount= */ 2, /* outputChannelCount= */ 1));
audioProcessor.putChannelMixingMatrix(
ChannelMixingMatrix.create(/* inputChannelCount= */ 1, /* outputChannelCount= */ 2));
ChannelMixingMatrix.createForConstantGain(
/* inputChannelCount= */ 1, /* outputChannelCount= */ 2));
}
@Test
@ -102,7 +104,8 @@ public final class ChannelMixingAudioProcessorTest {
@Test
public void configureWithIdentityMatrix_isActiveReturnsFalse() throws Exception {
audioProcessor.putChannelMixingMatrix(
ChannelMixingMatrix.create(/* inputChannelCount= */ 2, /* outputChannelCount= */ 2));
ChannelMixingMatrix.createForConstantGain(
/* inputChannelCount= */ 2, /* outputChannelCount= */ 2));
audioProcessor.configure(AUDIO_FORMAT_48KHZ_STEREO_16BIT);
assertThat(audioProcessor.isActive()).isFalse();
@ -133,7 +136,8 @@ public final class ChannelMixingAudioProcessorTest {
@Test
public void scaledMixingMatrix_queueInput_outputIsScaled() throws Exception {
audioProcessor.putChannelMixingMatrix(
ChannelMixingMatrix.create(/* inputChannelCount= */ 2, /* outputChannelCount= */ 2)
ChannelMixingMatrix.createForConstantGain(
/* inputChannelCount= */ 2, /* outputChannelCount= */ 2)
.scaleBy(0.5f));
audioProcessor.configure(AUDIO_FORMAT_48KHZ_STEREO_16BIT);

View File

@ -104,7 +104,7 @@ public class WaveformAudioBufferSink implements TeeAudioProcessor.AudioBufferSin
* @param barsPerSecond The number of bars that should be generated per each second of audio.
* @param outputChannelCount The number of channels that the output waveform should contain. If
* this is different than the number of input channels, the audio will be mixed using the
* {@linkplain ChannelMixingMatrix#create default mixing matrix}.
* {@linkplain ChannelMixingMatrix#createForConstantGain default mixing matrix}.
* @param listener The listener to be notified when a new waveform bar has been generated.
*/
public WaveformAudioBufferSink(int barsPerSecond, int outputChannelCount, Listener listener) {
@ -123,7 +123,8 @@ public class WaveformAudioBufferSink implements TeeAudioProcessor.AudioBufferSin
samplesPerBar = sampleRateHz / barsPerSecond;
inputAudioFormat = new AudioFormat(sampleRateHz, channelCount, encoding);
mixingAudioFormat = new AudioFormat(sampleRateHz, outputChannels.size(), C.ENCODING_PCM_FLOAT);
channelMixingMatrix = ChannelMixingMatrix.create(channelCount, outputChannels.size());
channelMixingMatrix =
ChannelMixingMatrix.createForConstantGain(channelCount, outputChannels.size());
}
@Override

View File

@ -1739,7 +1739,8 @@ public class TransformerEndToEndTest {
ChannelMixingAudioProcessor channelMixingAudioProcessor = new ChannelMixingAudioProcessor();
channelMixingAudioProcessor.putChannelMixingMatrix(
ChannelMixingMatrix.create(/* inputChannelCount= */ 1, /* outputChannelCount= */ 2));
ChannelMixingMatrix.createForConstantGain(
/* inputChannelCount= */ 1, /* outputChannelCount= */ 2));
EditedMediaItem editedMediaItem =
new EditedMediaItem.Builder(MediaItem.fromUri(Uri.parse(MP4_ASSET.uri)))
.setRemoveVideo(true)

View File

@ -92,7 +92,8 @@ public class TransformerWithInAppMp4MuxerEndToEndAndroidTest {
new Transformer.Builder(context).setMuxerFactory(new InAppMp4Muxer.Factory()).build();
ChannelMixingAudioProcessor channelMixingAudioProcessor = new ChannelMixingAudioProcessor();
channelMixingAudioProcessor.putChannelMixingMatrix(
ChannelMixingMatrix.create(/* inputChannelCount= */ 1, /* outputChannelCount= */ 2));
ChannelMixingMatrix.createForConstantGain(
/* inputChannelCount= */ 1, /* outputChannelCount= */ 2));
MediaItem mediaItem = MediaItem.fromUri(Uri.parse(MP4_FILE_ASSET_DIRECTORY + H264_MP4));
EditedMediaItem editedMediaItem =
new EditedMediaItem.Builder(mediaItem)

View File

@ -169,7 +169,8 @@ public class TranscodeForegroundSpeedTest {
sonicAudioProcessor.setOutputSampleRateHz(44_100);
ChannelMixingAudioProcessor mixingAudioProcessor = new ChannelMixingAudioProcessor();
mixingAudioProcessor.putChannelMixingMatrix(
ChannelMixingMatrix.create(/* inputChannelCount= */ 2, /* outputChannelCount= */ 1));
ChannelMixingMatrix.createForConstantGain(
/* inputChannelCount= */ 2, /* outputChannelCount= */ 1));
EditedMediaItem editedMediaItem =
new EditedMediaItem.Builder(mediaItem)
.setEffects(

View File

@ -536,10 +536,10 @@ import java.util.concurrent.atomic.AtomicLong;
|| requiredOutputAudioFormat.channelCount == 2) {
ChannelMixingAudioProcessor channelCountChanger = new ChannelMixingAudioProcessor();
channelCountChanger.putChannelMixingMatrix(
ChannelMixingMatrix.create(
ChannelMixingMatrix.createForConstantGain(
/* inputChannelCount= */ 1, requiredOutputAudioFormat.channelCount));
channelCountChanger.putChannelMixingMatrix(
ChannelMixingMatrix.create(
ChannelMixingMatrix.createForConstantGain(
/* inputChannelCount= */ 2, requiredOutputAudioFormat.channelCount));
audioProcessors.add(channelCountChanger);
}

View File

@ -16,6 +16,8 @@
package androidx.media3.transformer;
import static androidx.media3.common.audio.AudioProcessor.EMPTY_BUFFER;
import static androidx.media3.common.audio.ChannelMixingMatrix.createForConstantGain;
import static androidx.media3.common.audio.ChannelMixingMatrix.createForConstantPower;
import static androidx.media3.common.util.Assertions.checkArgument;
import static androidx.media3.common.util.Assertions.checkState;
import static androidx.media3.common.util.Util.contains;
@ -51,13 +53,36 @@ public final class DefaultAudioMixer implements AudioMixer {
public static final class Factory implements AudioMixer.Factory {
private final boolean outputSilenceWithNoSources;
private final boolean clipFloatOutput;
private final boolean useConstantPowerMixingMatrices;
/**
* Creates an instance. This is equivalent to {@link #Factory(boolean, boolean) new
* Factory(false, true)}.
* Creates an instance. This is equivalent to {@link #Factory(boolean, boolean, boolean) new
* Factory(false, true, false)}.
*/
public Factory() {
this(/* outputSilenceWithNoSources= */ false, /* clipFloatOutput= */ true);
this(
/* outputSilenceWithNoSources= */ false,
/* clipFloatOutput= */ true,
/* useConstantPowerMixingMatrices= */ false);
}
/**
* Creates an instance. This is equivalent to {@link #Factory(boolean, boolean, boolean) new
* Factory(outputSilenceWithNoSources, clipFloatOutput, false)}.
*
* @param outputSilenceWithNoSources Whether to {@linkplain #getOutput() output} silence when
* there are no {@linkplain #addSource sources}.
* @param clipFloatOutput Whether to clip the output signal to be in the [-1.0, 1.0] range if
* the output encoding is {@link C#ENCODING_PCM_FLOAT}. This parameter is ignored for
* non-float output signals. For float output signals, non-float input signals are converted
* to float signals in the [-1.0, 1.0] range. All input signals (float or non-float) are
* then added and the result is clipped if and only if {@code clipFloatOutput} is true.
*/
public Factory(boolean outputSilenceWithNoSources, boolean clipFloatOutput) {
this(
/* outputSilenceWithNoSources= */ outputSilenceWithNoSources,
/* clipFloatOutput= */ clipFloatOutput,
/* useConstantPowerMixingMatrices= */ false);
}
/**
@ -70,15 +95,24 @@ public final class DefaultAudioMixer implements AudioMixer {
* non-float output signals. For float output signals, non-float input signals are converted
* to float signals in the [-1.0, 1.0] range. All input signals (float or non-float) are
* then added and the result is clipped if and only if {@code clipFloatOutput} is true.
* @param useConstantPowerMixingMatrices Whether to upmix/downmix using {@linkplain
* ChannelMixingMatrix#createForConstantPower(int, int) constant power mixing matrices}. If
* {@code false}, uses {@linkplain ChannelMixingMatrix#createForConstantGain constant gain
* mixing matrices}.
*/
public Factory(boolean outputSilenceWithNoSources, boolean clipFloatOutput) {
public Factory(
boolean outputSilenceWithNoSources,
boolean clipFloatOutput,
boolean useConstantPowerMixingMatrices) {
this.outputSilenceWithNoSources = outputSilenceWithNoSources;
this.clipFloatOutput = clipFloatOutput;
this.useConstantPowerMixingMatrices = useConstantPowerMixingMatrices;
}
@Override
public DefaultAudioMixer create() {
return new DefaultAudioMixer(outputSilenceWithNoSources, clipFloatOutput);
return new DefaultAudioMixer(
outputSilenceWithNoSources, clipFloatOutput, useConstantPowerMixingMatrices);
}
}
@ -87,6 +121,7 @@ public final class DefaultAudioMixer implements AudioMixer {
private final boolean outputSilenceWithNoSources;
private final boolean clipFloatOutput;
private final boolean useConstantPowerMixingMatrices;
private final SparseArray<SourceInfo> sources;
private int nextSourceId;
private AudioFormat outputAudioFormat;
@ -109,9 +144,13 @@ public final class DefaultAudioMixer implements AudioMixer {
*/
private long maxPositionOfRemovedSources;
private DefaultAudioMixer(boolean outputSilenceWithNoSources, boolean clipFloatOutput) {
private DefaultAudioMixer(
boolean outputSilenceWithNoSources,
boolean clipFloatOutput,
boolean useConstantPowerMixingMatrices) {
this.outputSilenceWithNoSources = outputSilenceWithNoSources;
this.clipFloatOutput = clipFloatOutput;
this.useConstantPowerMixingMatrices = useConstantPowerMixingMatrices;
sources = new SparseArray<>();
outputAudioFormat = AudioFormat.NOT_SET;
bufferSizeFrames = C.LENGTH_UNSET;
@ -188,7 +227,9 @@ public final class DefaultAudioMixer implements AudioMixer {
sourceId,
new SourceInfo(
sourceFormat,
ChannelMixingMatrix.create(sourceFormat.channelCount, outputAudioFormat.channelCount),
useConstantPowerMixingMatrices
? createForConstantPower(sourceFormat.channelCount, outputAudioFormat.channelCount)
: createForConstantGain(sourceFormat.channelCount, outputAudioFormat.channelCount),
startFrameOffset));
DebugTraceUtil.logEvent(

View File

@ -68,7 +68,10 @@ public final class DefaultAudioMixerTest {
@Before
public void setup() {
mixer =
new DefaultAudioMixer.Factory(outputSilenceWithNoSources, /* clipFloatOutput= */ true)
new DefaultAudioMixer.Factory(
outputSilenceWithNoSources,
/* clipFloatOutput= */ true,
/* useConstantPowerMixingMatrices= */ false)
.create();
}

View File

@ -88,7 +88,7 @@ public final class TestUtil {
ChannelMixingAudioProcessor audioProcessor = new ChannelMixingAudioProcessor();
for (int channel = 1; channel <= 6; channel++) {
audioProcessor.putChannelMixingMatrix(
ChannelMixingMatrix.create(
ChannelMixingMatrix.createForConstantGain(
/* inputChannelCount= */ channel, /* outputChannelCount= */ channel)
.scaleBy(scale));
}
@ -100,7 +100,7 @@ public final class TestUtil {
ChannelMixingAudioProcessor audioProcessor = new ChannelMixingAudioProcessor();
for (int inputChannelCount = 1; inputChannelCount <= 2; inputChannelCount++) {
audioProcessor.putChannelMixingMatrix(
ChannelMixingMatrix.create(inputChannelCount, outputChannelCount));
ChannelMixingMatrix.createForConstantGain(inputChannelCount, outputChannelCount));
}
return audioProcessor;
}