From 7e7764de5e04972d92fc7ab9df47243b55d7795b Mon Sep 17 00:00:00 2001 From: ivanbuper Date: Mon, 28 Oct 2024 10:40:47 -0700 Subject: [PATCH] Implement `getExpectedFrameCountAfterProcessorApplied()` in `Sonic` This method allows `Sonic` to statically and accurately report the expected number of output frames for any given parameter configuration. This change is required prework for `SpeedChangingAudioProcessor` to implement a similar static method and allow precise, non-blocking timestamp adjustments for the experimental speed changing effect. PiperOrigin-RevId: 690669627 --- .../androidx/media3/common/audio/Sonic.java | 39 +++++ .../audio/RandomParameterizedSonicTest.java | 29 ++-- ...erizedSpeedChangingAudioProcessorTest.java | 30 ++-- .../media3/common/audio/SonicTest.java | 136 ++++++++++++++++++ 4 files changed, 196 insertions(+), 38 deletions(-) diff --git a/libraries/common/src/main/java/androidx/media3/common/audio/Sonic.java b/libraries/common/src/main/java/androidx/media3/common/audio/Sonic.java index cde24121d7..5855338c7e 100644 --- a/libraries/common/src/main/java/androidx/media3/common/audio/Sonic.java +++ b/libraries/common/src/main/java/androidx/media3/common/audio/Sonic.java @@ -72,6 +72,45 @@ import java.util.Arrays; private int maxDiff; private double accumulatedSpeedAdjustmentError; + /** + * Returns the estimated output frame count for a given configuration and input frame count. + * + *

Please note that the returned value might not be mathematically exact, as Sonic incurs in + * truncation and precision errors that accumulate on the output. + */ + public static long getExpectedFrameCountAfterProcessorApplied( + int inputSampleRateHz, + int outputSampleRateHz, + float speed, + float pitch, + long inputFrameCount) { + float resamplingRate = (float) inputSampleRateHz / outputSampleRateHz; + resamplingRate *= pitch; + double speedRate = speed / pitch; + BigDecimal bigResamplingRate = new BigDecimal(String.valueOf(resamplingRate)); + + BigDecimal length = BigDecimal.valueOf(inputFrameCount); + BigDecimal framesAfterTimeStretching; + if (speedRate > 1.00001 || speedRate < 0.99999) { + framesAfterTimeStretching = + length.divide(BigDecimal.valueOf(speedRate), RoundingMode.HALF_EVEN); + } else { + // If speed is almost 1, then just copy the buffers without modifying them. + framesAfterTimeStretching = length; + } + + if (resamplingRate == 1.0f) { + return framesAfterTimeStretching.longValueExact(); + } + + BigDecimal framesAfterResampling = + framesAfterTimeStretching.divide(bigResamplingRate, RoundingMode.HALF_EVEN); + + return framesAfterResampling.longValueExact() + - calculateAccumulatedTruncationErrorForResampling( + framesAfterTimeStretching, BigDecimal.valueOf(inputSampleRateHz), bigResamplingRate); + } + /** * Returns expected accumulated truncation error for {@link Sonic}'s resampling algorithm, given * an input length, input sample rate, and resampling rate. diff --git a/libraries/common/src/test/java/androidx/media3/common/audio/RandomParameterizedSonicTest.java b/libraries/common/src/test/java/androidx/media3/common/audio/RandomParameterizedSonicTest.java index 95fb22e82e..c2500b26ea 100644 --- a/libraries/common/src/test/java/androidx/media3/common/audio/RandomParameterizedSonicTest.java +++ b/libraries/common/src/test/java/androidx/media3/common/audio/RandomParameterizedSonicTest.java @@ -15,7 +15,6 @@ */ package androidx.media3.common.audio; -import static androidx.media3.common.audio.Sonic.calculateAccumulatedTruncationErrorForResampling; import static androidx.media3.test.utils.TestUtil.generateFloatInRange; import static com.google.common.truth.Truth.assertThat; import static java.lang.Math.max; @@ -164,18 +163,10 @@ public final class RandomParameterizedSonicTest { } sonic.flush(); - BigDecimal bigLength = new BigDecimal(String.valueOf(streamLength)); - // The scale of expectedSize will be bigLength.scale() - speed.scale(). Thus, the result should - // always yield an integer. - BigDecimal expectedSize = bigLength.divide(speed, RoundingMode.HALF_EVEN); - - long accumulatedTruncationError = - calculateAccumulatedTruncationErrorForResampling( - bigLength, new BigDecimal(SAMPLE_RATE), speed); - - assertThat(readSampleCount) - .isWithin(1) - .of(expectedSize.longValueExact() - accumulatedTruncationError); + long expectedSamples = + Sonic.getExpectedFrameCountAfterProcessorApplied( + SAMPLE_RATE, SAMPLE_RATE, speed.floatValue(), speed.floatValue(), streamLength); + assertThat(readSampleCount).isWithin(1).of(expectedSamples); } @Test @@ -208,20 +199,18 @@ public final class RandomParameterizedSonicTest { } sonic.flush(); - BigDecimal bigLength = new BigDecimal(String.valueOf(streamLength)); - // The scale of expectedSampleCount will be bigLength.scale() - speed.scale(). Thus, the result - // should always yield an integer. - BigDecimal expectedSampleCount = bigLength.divide(speed, RoundingMode.HALF_EVEN); + long expectedSamples = + Sonic.getExpectedFrameCountAfterProcessorApplied( + SAMPLE_RATE, SAMPLE_RATE, speed.floatValue(), 1, streamLength); // Calculate allowed tolerance and round to nearest integer. BigDecimal allowedTolerance = TIME_STRETCHING_SAMPLE_DRIFT_TOLERANCE - .multiply(expectedSampleCount) + .multiply(BigDecimal.valueOf(expectedSamples)) .setScale(/* newScale= */ 0, RoundingMode.HALF_EVEN); // Always allow at least 1 sample of tolerance. long tolerance = max(allowedTolerance.longValue(), 1); - - assertThat(readSampleCount).isWithin(tolerance).of(expectedSampleCount.longValueExact()); + assertThat(readSampleCount).isWithin(tolerance).of(expectedSamples); } } diff --git a/libraries/common/src/test/java/androidx/media3/common/audio/RandomParameterizedSpeedChangingAudioProcessorTest.java b/libraries/common/src/test/java/androidx/media3/common/audio/RandomParameterizedSpeedChangingAudioProcessorTest.java index fba90497b4..51fc65055a 100644 --- a/libraries/common/src/test/java/androidx/media3/common/audio/RandomParameterizedSpeedChangingAudioProcessorTest.java +++ b/libraries/common/src/test/java/androidx/media3/common/audio/RandomParameterizedSpeedChangingAudioProcessorTest.java @@ -15,7 +15,6 @@ */ package androidx.media3.common.audio; -import static androidx.media3.common.audio.Sonic.calculateAccumulatedTruncationErrorForResampling; import static androidx.media3.test.utils.TestUtil.buildTestData; import static androidx.media3.test.utils.TestUtil.generateFloatInRange; import static androidx.media3.test.utils.TestUtil.generateLong; @@ -27,6 +26,7 @@ import androidx.media3.test.utils.TestSpeedProvider; import com.google.common.collect.ImmutableList; import com.google.common.collect.Range; import com.google.common.primitives.Floats; +import com.google.common.primitives.Ints; import java.math.BigDecimal; import java.math.RoundingMode; import java.nio.ByteBuffer; @@ -102,30 +102,26 @@ public class RandomParameterizedSpeedChangingAudioProcessorTest { ByteBuffer.wrap( buildTestData(/* length= */ BUFFER_SIZE * AUDIO_FORMAT.bytesPerFrame, random)); ByteBuffer outBuffer; - BigDecimal expectedTotalOutputFrameCount = BigDecimal.ZERO; long outputFrameCount = 0; long totalInputFrameCount = 0; - long expectedResamplingError = 0; + long expectedOutputFrames = 0; for (int i = 0; i < frameCounts.size(); i++) { totalInputFrameCount += frameCounts.get(i); - BigDecimal frameCount = BigDecimal.valueOf(frameCounts.get(i)); - BigDecimal speed = speeds.get(i); - BigDecimal expectedOutputFrameCountForSection = - frameCount.divide(speed, RoundingMode.HALF_EVEN); - expectedTotalOutputFrameCount = - expectedTotalOutputFrameCount.add(expectedOutputFrameCountForSection); - // SpeedChangingAudioProcessor currently uses resampling on Sonic, instead of time-stretching. - // See b/359649531. - expectedResamplingError += - calculateAccumulatedTruncationErrorForResampling( - frameCount, BigDecimal.valueOf(AUDIO_FORMAT.sampleRate), speed); + float speed = speeds.get(i).floatValue(); + expectedOutputFrames += + Sonic.getExpectedFrameCountAfterProcessorApplied( + /* inputSampleRateHz= */ AUDIO_FORMAT.sampleRate, + /* outputSampleRateHz= */ AUDIO_FORMAT.sampleRate, + /* speed= */ speed, + /* pitch= */ speed, + /* inputFrameCount= */ frameCounts.get(i)); } SpeedProvider speedProvider = TestSpeedProvider.createWithFrameCounts( AUDIO_FORMAT, - /* frameCounts= */ frameCounts.stream().mapToInt(Math::toIntExact).toArray(), + /* frameCounts= */ Ints.toArray(frameCounts), /* speeds= */ Floats.toArray(speeds)); SpeedChangingAudioProcessor speedChangingAudioProcessor = @@ -152,8 +148,6 @@ public class RandomParameterizedSpeedChangingAudioProcessorTest { outputFrameCount += outBuffer.remaining() / AUDIO_FORMAT.bytesPerFrame; // We allow 1 frame of tolerance per speed change. - assertThat(outputFrameCount) - .isWithin(frameCounts.size()) - .of(expectedTotalOutputFrameCount.longValueExact() - expectedResamplingError); + assertThat(outputFrameCount).isWithin(frameCounts.size()).of(expectedOutputFrames); } } diff --git a/libraries/common/src/test/java/androidx/media3/common/audio/SonicTest.java b/libraries/common/src/test/java/androidx/media3/common/audio/SonicTest.java index 7c27337d8f..a672e04815 100644 --- a/libraries/common/src/test/java/androidx/media3/common/audio/SonicTest.java +++ b/libraries/common/src/test/java/androidx/media3/common/audio/SonicTest.java @@ -16,6 +16,7 @@ package androidx.media3.common.audio; import static androidx.media3.common.audio.Sonic.calculateAccumulatedTruncationErrorForResampling; +import static androidx.media3.common.audio.Sonic.getExpectedFrameCountAfterProcessorApplied; import static com.google.common.truth.Truth.assertThat; import androidx.test.ext.junit.runners.AndroidJUnit4; @@ -110,6 +111,141 @@ public class SonicTest { assertThat(outputBuffer.array()).isEqualTo(new short[] {0, 4, 8}); } + @Test + public void + getExpectedFrameCountAfterProcessorApplied_timeStretchingFaster_returnsExpectedSampleCount() { + long samples = + getExpectedFrameCountAfterProcessorApplied( + /* inputSampleRateHz= */ 44100, + /* outputSampleRateHz= */ 44100, + /* speed= */ 2, + /* pitch= */ 1, + /* inputFrameCount= */ 88200); + assertThat(samples).isEqualTo(44100); + } + + @Test + public void + getExpectedFrameCountAfterProcessorApplied_timeStretchingSlower_returnsExpectedSampleCount() { + long samples = + getExpectedFrameCountAfterProcessorApplied( + /* inputSampleRateHz= */ 44100, + /* outputSampleRateHz= */ 44100, + /* speed= */ 0.5f, + /* pitch= */ 1, + /* inputFrameCount= */ 88200); + assertThat(samples).isEqualTo(176400); + } + + @Test + public void + getExpectedFrameCountAfterProcessorApplied_resamplingHigherSampleRate_returnsExpectedSampleCount() { + long samples = + getExpectedFrameCountAfterProcessorApplied( + /* inputSampleRateHz= */ 44100, + /* outputSampleRateHz= */ 88200, + /* speed= */ 1f, + /* pitch= */ 1, + /* inputFrameCount= */ 88200); + assertThat(samples).isEqualTo(176400); + } + + @Test + public void + getExpectedFrameCountAfterProcessorApplied_resamplingLowerSampleRate_returnsExpectedSampleCount() { + long samples = + getExpectedFrameCountAfterProcessorApplied( + /* inputSampleRateHz= */ 44100, + /* outputSampleRateHz= */ 22050, + /* speed= */ 1f, + /* pitch= */ 1, + /* inputFrameCount= */ 88200); + assertThat(samples).isEqualTo(44100); + } + + @Test + public void + getExpectedFrameCountAfterProcessorApplied_resamplingLowerPitch_returnsExpectedSampleCount() { + long samples = + getExpectedFrameCountAfterProcessorApplied( + /* inputSampleRateHz= */ 44100, + /* outputSampleRateHz= */ 44100, + /* speed= */ 0.5f, + /* pitch= */ 0.5f, + /* inputFrameCount= */ 88200); + assertThat(samples).isEqualTo(176400); + } + + @Test + public void + getExpectedFrameCountAfterProcessorApplied_resamplingHigherPitch_returnsExpectedSampleCount() { + long samples = + getExpectedFrameCountAfterProcessorApplied( + /* inputSampleRateHz= */ 44100, + /* outputSampleRateHz= */ 44100, + /* speed= */ 2f, + /* pitch= */ 2f, + /* inputFrameCount= */ 88200); + assertThat(samples).isEqualTo(44100); + } + + @Test + public void + getExpectedFrameCountAfterProcessorApplied_resamplePitchAndSampleRateChange_returnsExpectedSampleCount() { + long samples = + getExpectedFrameCountAfterProcessorApplied( + /* inputSampleRateHz= */ 44100, + /* outputSampleRateHz= */ 88200, + /* speed= */ 1f, + /* pitch= */ 2f, + /* inputFrameCount= */ 88200); + // First time stretch at speed / pitch = 0.5. + // Then resample at (inputSampleRateHz / outputSampleRateHz) * pitch = 0.5 * 2. + // Final sample count is 88200 / 0.5 / (0.5 * 2) = 176400. + assertThat(samples).isEqualTo(176400); + } + + @Test + public void + getExpectedFrameCountAfterProcessorApplied_pitchSpeedAndSampleRateChange_returnsExpectedSampleCount() { + long samples = + getExpectedFrameCountAfterProcessorApplied( + /* inputSampleRateHz= */ 48000, + /* outputSampleRateHz= */ 192000, + /* speed= */ 5f, + /* pitch= */ 0.5f, + /* inputFrameCount= */ 88200); + // First time stretch at speed / pitch = 10. + // Then resample at (inputSampleRateHz / outputSampleRateHz) * pitch = 0.25 * 0.5. + // Final sample count is 88200 / 10 / (0.25 * 0.5) = 176400. + assertThat(samples).isEqualTo(70560); + } + + @Test + public void + getExpectedFrameCountAfterProcessorApplied_withPeriodicResamplingRate_adjustsForTruncationError() { + long length = 26902000; + float resamplingRate = 0.33f; + long samples = + getExpectedFrameCountAfterProcessorApplied( + /* inputSampleRateHz= */ 48000, + /* outputSampleRateHz= */ 48000, + /* speed= */ resamplingRate, + /* pitch= */ resamplingRate, + /* inputFrameCount= */ length); + + long truncationError = + calculateAccumulatedTruncationErrorForResampling( + BigDecimal.valueOf(length), + BigDecimal.valueOf(48000), + new BigDecimal(String.valueOf(resamplingRate))); + // Sonic incurs on accumulated truncation errors when the input sample rate is not exactly + // divisible by the resampling rate (pitch * inputSampleRateHz / outputSampleRateHz). This error + // is more prominent on larger stream lengths and inputSampleRateHz + resamplingRate + // combinations that result in higher truncated decimal values. + assertThat(samples).isEqualTo(81521212 - truncationError); + } + @Test public void calculateAccumulatedTruncationErrorForResampling_returnsExpectedSampleCount() { long error =