From 682889f91d7fb805280348a6fefab333886389df Mon Sep 17 00:00:00 2001 From: ivanbuper Date: Thu, 2 Jan 2025 09:33:30 -0800 Subject: [PATCH] Implement Sonic method to get input frame count from output frame count The static method can estimate the number of input frames needed to get a given number of output frames with a given Sonic configuration. This CL is prework to remove the dependency of `SpeedChangingAudioProcessor#getMediaDurationUs()` on non-static output based heuristics and simplify `SpeedChangingAudioProcessor`. PiperOrigin-RevId: 711446999 --- .../androidx/media3/common/audio/Sonic.java | 63 +++++++++- .../media3/common/audio/SonicTest.java | 119 ++++++++++++++++++ 2 files changed, 180 insertions(+), 2 deletions(-) diff --git a/libraries/common/src/main/java/androidx/media3/common/audio/Sonic.java b/libraries/common/src/main/java/androidx/media3/common/audio/Sonic.java index 5855338c7e..a16913899c 100644 --- a/libraries/common/src/main/java/androidx/media3/common/audio/Sonic.java +++ b/libraries/common/src/main/java/androidx/media3/common/audio/Sonic.java @@ -36,6 +36,9 @@ import java.util.Arrays; private static final int AMDF_FREQUENCY = 4000; private static final int BYTES_PER_SAMPLE = 2; + private static final float MINIMUM_SPEEDUP_RATE = 1.00001f; + private static final float MINIMUM_SLOWDOWN_RATE = 0.99999f; + private final int inputSampleRateHz; private final int channelCount; private final float speed; @@ -91,7 +94,7 @@ import java.util.Arrays; BigDecimal length = BigDecimal.valueOf(inputFrameCount); BigDecimal framesAfterTimeStretching; - if (speedRate > 1.00001 || speedRate < 0.99999) { + if (speedRate > MINIMUM_SPEEDUP_RATE || speedRate < MINIMUM_SLOWDOWN_RATE) { framesAfterTimeStretching = length.divide(BigDecimal.valueOf(speedRate), RoundingMode.HALF_EVEN); } else { @@ -143,6 +146,62 @@ import java.util.Arrays; return accumulatedError.longValueExact(); } + /** + * Returns the number of input frames required for Sonic to produce the given number of output + * frames under the specified parameters. + * + *

This method is the inverse of {@link #getExpectedFrameCountAfterProcessorApplied}. + * + * @param inputSampleRateHz Input sample rate in Hertz. + * @param outputSampleRateHz Output sample rate in Hertz. + * @param speed Speed rate. + * @param pitch Pitch rate. + * @param outputFrameCount Number of output frames to calculate the required input frame count of. + */ + /* package */ static long getExpectedInputFrameCountForOutputFrameCount( + int inputSampleRateHz, + int outputSampleRateHz, + float speed, + float pitch, + long outputFrameCount) { + float resamplingRate = (float) inputSampleRateHz / outputSampleRateHz; + resamplingRate *= pitch; + BigDecimal bigResamplingRate = new BigDecimal(String.valueOf(resamplingRate)); + long framesBeforeResampling = + getFrameCountBeforeResamplingForOutputCount( + BigDecimal.valueOf(inputSampleRateHz), + bigResamplingRate, + BigDecimal.valueOf(outputFrameCount)); + double speedRate = speed / pitch; + + if (speedRate > MINIMUM_SPEEDUP_RATE || speedRate < MINIMUM_SLOWDOWN_RATE) { + return BigDecimal.valueOf(framesBeforeResampling) + .multiply(BigDecimal.valueOf(speedRate)) + .setScale(0, RoundingMode.FLOOR) + .longValueExact(); + } else { + // If speed is almost 1, then just copy the buffers without modifying them. + return framesBeforeResampling; + } + } + + /** + * Returns the expected input frame count prior to resampling with Sonic. + * + *

See {@link #getExpectedFrameCountAfterProcessorApplied} for more information. + * + * @param sampleRate Input sample rate of {@link Sonic} instance. + * @param resamplingRate Resampling rate given by {@code (inputSampleRate / outputSampleRate) * + * pitch}. + * @param outputLength Length of output in frames. + */ + private static long getFrameCountBeforeResamplingForOutputCount( + BigDecimal sampleRate, BigDecimal resamplingRate, BigDecimal outputLength) { + BigDecimal denominator = sampleRate.divide(resamplingRate, /* scale */ 0, RoundingMode.FLOOR); + BigDecimal numerator = sampleRate.multiply(outputLength); + return numerator.divide(denominator, /* scale */ 0, RoundingMode.FLOOR).longValueExact(); + } + /** * Creates a new Sonic audio stream processor. * @@ -599,7 +658,7 @@ import java.util.Arrays; int originalOutputFrameCount = outputFrameCount; double s = speed / pitch; float r = rate * pitch; - if (s > 1.00001 || s < 0.99999) { + if (s > MINIMUM_SPEEDUP_RATE || s < MINIMUM_SLOWDOWN_RATE) { changeSpeed(s); } else { copyToOutput(inputBuffer, 0, inputFrameCount); diff --git a/libraries/common/src/test/java/androidx/media3/common/audio/SonicTest.java b/libraries/common/src/test/java/androidx/media3/common/audio/SonicTest.java index a672e04815..3855111249 100644 --- a/libraries/common/src/test/java/androidx/media3/common/audio/SonicTest.java +++ b/libraries/common/src/test/java/androidx/media3/common/audio/SonicTest.java @@ -17,6 +17,7 @@ package androidx.media3.common.audio; import static androidx.media3.common.audio.Sonic.calculateAccumulatedTruncationErrorForResampling; import static androidx.media3.common.audio.Sonic.getExpectedFrameCountAfterProcessorApplied; +import static androidx.media3.common.audio.Sonic.getExpectedInputFrameCountForOutputFrameCount; import static com.google.common.truth.Truth.assertThat; import androidx.test.ext.junit.runners.AndroidJUnit4; @@ -260,4 +261,122 @@ public class SonicTest { // (All calculations are done on BigDecimal rounded to 20 decimal places, unless indicated). assertThat(error).isEqualTo(305); } + + @Test + public void getExpectedInputFrameCountForOutputFrameCount_fasterSpeed_returnsExpectedCount() { + long inputSamples = + getExpectedInputFrameCountForOutputFrameCount( + /* inputSampleRateHz= */ 48_000, + /* outputSampleRateHz= */ 48_000, + /* speed= */ 5, + /* pitch= */ 1, + /* outputFrameCount= */ 20); + assertThat(inputSamples).isEqualTo(100); + } + + @Test + public void + getExpectedInputFrameCountForOutputFrameCount_fasterSpeedAndPitch_returnsExpectedCount() { + long inputSamples = + getExpectedInputFrameCountForOutputFrameCount( + /* inputSampleRateHz= */ 48_000, + /* outputSampleRateHz= */ 48_000, + /* speed= */ 5, + /* pitch= */ 5, + /* outputFrameCount= */ 20); + assertThat(inputSamples).isEqualTo(100); + } + + @Test + public void getExpectedInputFrameCountForOutputFrameCount_higherPitch_returnsExpectedCount() { + long inputSamples = + getExpectedInputFrameCountForOutputFrameCount( + /* inputSampleRateHz= */ 48_000, + /* outputSampleRateHz= */ 48_000, + /* speed= */ 1, + /* pitch= */ 5, + /* outputFrameCount= */ 20); + assertThat(inputSamples).isEqualTo(20); + } + + @Test + public void getExpectedInputFrameCountForOutputFrameCount_slowerSpeed_returnsExpectedCount() { + long inputSamples = + getExpectedInputFrameCountForOutputFrameCount( + /* inputSampleRateHz= */ 48_000, + /* outputSampleRateHz= */ 48_000, + /* speed= */ 0.25f, + /* pitch= */ 1, + /* outputFrameCount= */ 100); + assertThat(inputSamples).isEqualTo(25); + } + + @Test + public void + getExpectedInputFrameCountForOutputFrameCount_slowerSpeedAndPitch_returnsExpectedCount() { + long inputSamples = + getExpectedInputFrameCountForOutputFrameCount( + /* inputSampleRateHz= */ 48_000, + /* outputSampleRateHz= */ 48_000, + /* speed= */ 0.25f, + /* pitch= */ 0.25f, + /* outputFrameCount= */ 100); + assertThat(inputSamples).isEqualTo(25); + } + + @Test + public void getExpectedInputFrameCountForOutputFrameCount_lowerPitch_returnsExpectedCount() { + long inputSamples = + getExpectedInputFrameCountForOutputFrameCount( + /* inputSampleRateHz= */ 48_000, + /* outputSampleRateHz= */ 48_000, + /* speed= */ 1, + /* pitch= */ 0.75f, + /* outputFrameCount= */ 100); + assertThat(inputSamples).isEqualTo(100); + } + + @Test + public void + getExpectedInputFrameCountForOutputFrameCount_differentSamplingRates_returnsExpectedCount() { + long inputSamples = + getExpectedInputFrameCountForOutputFrameCount( + /* inputSampleRateHz= */ 48_000, + /* outputSampleRateHz= */ 96_000, + /* speed= */ 1, + /* pitch= */ 1, + /* outputFrameCount= */ 100); + assertThat(inputSamples).isEqualTo(50); + } + + @Test + public void + getExpectedInputFrameCountForOutputFrameCount_differentPitchSpeedAndSamplingRates_returnsExpectedCount() { + long inputSamples = + getExpectedInputFrameCountForOutputFrameCount( + /* inputSampleRateHz= */ 48_000, + /* outputSampleRateHz= */ 96_000, + /* speed= */ 5, + /* pitch= */ 2, + /* outputFrameCount= */ 40); + assertThat(inputSamples).isEqualTo(100); + } + + @Test + public void + getExpectedInputFrameCountForOutputFrameCount_withPeriodicResamplingRate_adjustsForTruncationError() { + float resamplingRate = 0.33f; + long outputLength = 81_521_212; + long truncationError = 305; + + long inputSamples = + getExpectedInputFrameCountForOutputFrameCount( + /* inputSampleRateHz= */ 48_000, + /* outputSampleRateHz= */ 48_000, + /* speed= */ resamplingRate, + /* pitch= */ resamplingRate, + /* outputFrameCount= */ outputLength - truncationError); + + assertThat(inputSamples).isEqualTo(26_902_000); + } }