Add sample rate fallback to AudioSampleExporter

After this change, if the sample rate supported by the encoder differs from the requested sample rate and enableFallback is true, the AudioSampleExporter will convert audio to a sample rate supported by the encoder. This fixes a bug where the audio track is distorted when an unsupported sample rate is requested.

PiperOrigin-RevId: 712822358
This commit is contained in:
Googler 2025-01-07 01:50:14 -08:00 committed by Copybara-Service
parent 6c2d25184c
commit b2aa8d6d21
5 changed files with 138 additions and 8 deletions

View File

@ -1020,9 +1020,16 @@ public final class AndroidTestUtil {
public static final AssetInfo MP3_ASSET =
new AssetInfo.Builder("asset:///media/mp3/test-cbr-info-header.mp3").build();
// This file contains 1 second of audio at 44.1kHZ.
public static final AssetInfo WAV_ASSET =
new AssetInfo.Builder("asset:///media/wav/sample.wav").build();
public static final AssetInfo WAV_96KHZ_ASSET =
new AssetInfo.Builder("asset:///media/wav/sample_96khz.wav").build();
public static final AssetInfo WAV_192KHZ_ASSET =
new AssetInfo.Builder("asset:///media/wav/sample_192khz.wav").build();
/** A {@link GlEffect} that adds delay in the video pipeline by putting the thread to sleep. */
public static final class DelayEffect implements GlEffect {
private final long delayMs;
@ -1341,6 +1348,42 @@ public final class AndroidTestUtil {
throw new AssumptionViolatedException("Profile not supported");
}
/**
* Assumes that the given sample rate is unsupported and returns the fallback sample rate the
* device will use to encode.
*
* @param mimeType The {@linkplain MimeTypes MIME type}.
* @param unsupportedSampleRate An unsupported sample rate.
* @return The fallback sample rate.
* @throws AssumptionViolatedException If the device does not have the required encoder or sample
* rate configuration.
*/
public static int getFallbackAssumingUnsupportedSampleRate(
String mimeType, int unsupportedSampleRate) {
ImmutableList<MediaCodecInfo> supportedEncoders = EncoderUtil.getSupportedEncoders(mimeType);
if (supportedEncoders.isEmpty()) {
throw new AssumptionViolatedException("No supported encoders for mime type: " + mimeType);
}
int closestSupportedSampleRate = -1;
int minSampleRateCost = Integer.MAX_VALUE;
for (int i = 0; i < supportedEncoders.size(); i++) {
int actualFallbackSampleRate =
EncoderUtil.getClosestSupportedSampleRate(
supportedEncoders.get(i), mimeType, unsupportedSampleRate);
int sampleRateCost = Math.abs(actualFallbackSampleRate - unsupportedSampleRate);
if (sampleRateCost < minSampleRateCost) {
minSampleRateCost = sampleRateCost;
closestSupportedSampleRate = actualFallbackSampleRate;
}
}
if (closestSupportedSampleRate == unsupportedSampleRate) {
throw new AssumptionViolatedException(
String.format("Expected sample rate %s to be unsupported", unsupportedSampleRate));
}
return closestSupportedSampleRate;
}
/** Returns a {@link Muxer.Factory} depending upon the API level. */
public static Muxer.Factory getMuxerFactoryBasedOnApi() {
// MediaMuxer supports B-frame from API > 24.

View File

@ -36,6 +36,8 @@ import static androidx.media3.transformer.AndroidTestUtil.MP4_TRIM_OPTIMIZATION;
import static androidx.media3.transformer.AndroidTestUtil.MP4_TRIM_OPTIMIZATION_180;
import static androidx.media3.transformer.AndroidTestUtil.MP4_TRIM_OPTIMIZATION_270;
import static androidx.media3.transformer.AndroidTestUtil.PNG_ASSET;
import static androidx.media3.transformer.AndroidTestUtil.WAV_192KHZ_ASSET;
import static androidx.media3.transformer.AndroidTestUtil.WAV_96KHZ_ASSET;
import static androidx.media3.transformer.AndroidTestUtil.WAV_ASSET;
import static androidx.media3.transformer.AndroidTestUtil.WEBP_LARGE;
import static androidx.media3.transformer.AndroidTestUtil.assumeCanEncodeWithProfile;
@ -43,6 +45,7 @@ import static androidx.media3.transformer.AndroidTestUtil.assumeFormatsSupported
import static androidx.media3.transformer.AndroidTestUtil.createFrameCountingEffect;
import static androidx.media3.transformer.AndroidTestUtil.createOpenGlObjects;
import static androidx.media3.transformer.AndroidTestUtil.generateTextureFromBitmap;
import static androidx.media3.transformer.AndroidTestUtil.getFallbackAssumingUnsupportedSampleRate;
import static androidx.media3.transformer.AndroidTestUtil.getMuxerFactoryBasedOnApi;
import static androidx.media3.transformer.AndroidTestUtil.recordTestSkipped;
import static androidx.media3.transformer.ExportResult.CONVERSION_PROCESS_NA;
@ -2414,6 +2417,68 @@ public class TransformerEndToEndTest {
assertThat(format.bitrate).isEqualTo(requestedBitrate);
}
@Test
public void export_withUnsupportedSampleRateAndFallbackEnabled_exportsWithFallbackSampleRate()
throws Exception {
int unsupportedSampleRate = 96_000;
int fallbackSampleRate =
getFallbackAssumingUnsupportedSampleRate(MimeTypes.AUDIO_AAC, unsupportedSampleRate);
Transformer transformer =
new Transformer.Builder(context)
.setEncoderFactory(
new DefaultEncoderFactory.Builder(context).setEnableFallback(true).build())
.build();
EditedMediaItem editedMediaItem =
new EditedMediaItem.Builder(MediaItem.fromUri(WAV_96KHZ_ASSET.uri))
.setRemoveVideo(true)
.build();
ExportTestResult result =
new TransformerAndroidTestRunner.Builder(context, transformer)
.build()
.run(testId, editedMediaItem);
assertThat(result.exportResult.sampleRate).isEqualTo(fallbackSampleRate);
assertThat(result.exportResult.durationMs).isWithin(50).of(1_000);
assertThat(new File(result.filePath).length()).isGreaterThan(0);
}
@Test
public void
export_withTwoUnsupportedAndOneSupportedSampleRateAndFallbackEnabled_exportsWithFallbackSampleRate()
throws Exception {
int unsupportedSampleRate = 192_000;
int fallbackSampleRate =
getFallbackAssumingUnsupportedSampleRate(MimeTypes.AUDIO_AAC, unsupportedSampleRate);
Transformer transformer =
new Transformer.Builder(context)
.setEncoderFactory(
new DefaultEncoderFactory.Builder(context).setEnableFallback(true).build())
.build();
EditedMediaItemSequence audioSequence =
new EditedMediaItemSequence.Builder(
new EditedMediaItem.Builder(MediaItem.fromUri(WAV_192KHZ_ASSET.uri))
.setRemoveVideo(true)
.build(),
new EditedMediaItem.Builder(MediaItem.fromUri(WAV_ASSET.uri))
.setRemoveVideo(true)
.build(),
new EditedMediaItem.Builder(MediaItem.fromUri(WAV_96KHZ_ASSET.uri))
.setRemoveVideo(true)
.build())
.build();
Composition composition = new Composition.Builder(audioSequence).build();
ExportTestResult result =
new TransformerAndroidTestRunner.Builder(context, transformer)
.build()
.run(testId, composition);
assertThat(result.exportResult.sampleRate).isEqualTo(fallbackSampleRate);
assertThat(result.exportResult.durationMs).isWithin(150).of(3_000);
assertThat(new File(result.filePath).length()).isGreaterThan(0);
}
private static boolean shouldSkipDeviceForAacObjectHeProfileEncoding() {
return Util.SDK_INT < 29;
}

View File

@ -26,6 +26,7 @@ import androidx.media3.common.C;
import androidx.media3.common.Format;
import androidx.media3.common.audio.AudioProcessor;
import androidx.media3.common.audio.AudioProcessor.AudioFormat;
import androidx.media3.common.audio.SonicAudioProcessor;
import androidx.media3.common.util.Util;
import androidx.media3.decoder.DecoderInputBuffer;
import androidx.media3.effect.DebugTraceUtil;
@ -60,11 +61,19 @@ import org.checkerframework.dataflow.qual.Pure;
FallbackListener fallbackListener)
throws ExportException {
super(firstAssetLoaderTrackFormat, muxerWrapper);
audioGraph = new AudioGraph(mixerFactory, compositionAudioProcessors);
SonicAudioProcessor outputResampler = new SonicAudioProcessor();
audioGraph =
new AudioGraph(
mixerFactory,
new ImmutableList.Builder<AudioProcessor>()
.addAll(compositionAudioProcessors)
.add(outputResampler)
.build());
this.firstInputFormat = firstInputFormat;
firstInput = audioGraph.registerInput(firstEditedMediaItem, firstInputFormat);
encoderInputAudioFormat = audioGraph.getOutputAudioFormat();
checkState(!encoderInputAudioFormat.equals(AudioFormat.NOT_SET));
AudioGraphInput currentFirstInput =
audioGraph.registerInput(firstEditedMediaItem, firstInputFormat);
AudioFormat currentEncoderInputAudioFormat = audioGraph.getOutputAudioFormat();
checkState(!currentEncoderInputAudioFormat.equals(AudioFormat.NOT_SET));
Format requestedEncoderFormat =
new Format.Builder()
@ -72,9 +81,9 @@ import org.checkerframework.dataflow.qual.Pure;
transformationRequest.audioMimeType != null
? transformationRequest.audioMimeType
: checkNotNull(firstAssetLoaderTrackFormat.sampleMimeType))
.setSampleRate(encoderInputAudioFormat.sampleRate)
.setChannelCount(encoderInputAudioFormat.channelCount)
.setPcmEncoding(encoderInputAudioFormat.encoding)
.setSampleRate(currentEncoderInputAudioFormat.sampleRate)
.setChannelCount(currentEncoderInputAudioFormat.channelCount)
.setPcmEncoding(currentEncoderInputAudioFormat.encoding)
.setCodecs(firstInputFormat.codecs)
.build();
@ -88,7 +97,20 @@ import org.checkerframework.dataflow.qual.Pure;
requestedEncoderFormat,
muxerWrapper.getSupportedSampleMimeTypes(C.TRACK_TYPE_AUDIO)))
.build());
// TODO: b/324056144 - Fallback when sample rate is unsupported by encoder
AudioFormat actualEncoderAudioFormat = new AudioFormat(encoder.getConfigurationFormat());
// This occurs when the encoder does not support the requested format. In this case, the audio
// graph output needs to be resampled to a sample rate matching the encoder input to avoid
// distorted audio.
if (actualEncoderAudioFormat.sampleRate != currentEncoderInputAudioFormat.sampleRate) {
audioGraph.reset();
outputResampler.setOutputSampleRateHz(actualEncoderAudioFormat.sampleRate);
currentFirstInput = audioGraph.registerInput(firstEditedMediaItem, firstInputFormat);
currentEncoderInputAudioFormat = audioGraph.getOutputAudioFormat();
}
this.firstInput = currentFirstInput;
this.encoderInputAudioFormat = currentEncoderInputAudioFormat;
encoderInputBuffer = new DecoderInputBuffer(BUFFER_REPLACEMENT_MODE_DISABLED);
encoderOutputBuffer = new DecoderInputBuffer(BUFFER_REPLACEMENT_MODE_DISABLED);