Add a waveform audio buffer sink.

This can be used together with TeeAudioProcessor to draw an audio waveform. PiperOrigin-RevId: 558213516
2025-04-30 06:46:50 +08:00 · 2023-08-18 20:12:07 +01:00 · 2023-08-18 20:12:07 +01:00 · 6566387f70
commit 6566387f70
parent 54797d4cc5
2 changed files with 329 additions and 0 deletions
--- a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/WaveformAudioBufferSink.java
+++ b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/WaveformAudioBufferSink.java
@ -0,0 +1,155 @@
+/*
+ * Copyright 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package androidx.media3.exoplayer.audio;
+
+import static androidx.media3.common.util.Assertions.checkStateNotNull;
+import static com.google.common.base.Preconditions.checkArgument;
+import static java.lang.Math.max;
+import static java.lang.Math.min;
+
+import android.util.SparseArray;
+import androidx.annotation.FloatRange;
+import androidx.media3.common.C;
+import androidx.media3.common.audio.AudioMixingUtil;
+import androidx.media3.common.audio.AudioProcessor.AudioFormat;
+import androidx.media3.common.audio.ChannelMixingMatrix;
+import androidx.media3.common.util.UnstableApi;
+import androidx.media3.common.util.Util;
+import java.nio.ByteBuffer;
+import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
+
+/** A sink for audio buffers that produces {@link WaveformBar waveform bars}. */
+@UnstableApi
+public class WaveformAudioBufferSink implements TeeAudioProcessor.AudioBufferSink {
+  /**
+   * Aggregates a group of audio samples. The values exposed can be used to draw one vertical bar of
+   * an audio waveform.
+   */
+  public static class WaveformBar {
+    private float minSampleValue = 1f;
+    private float maxSampleValue = -1f;
+    private double squareSum;
+    private int sampleCount;
+
+    /** Returns the number of samples {@linkplain #addSample added}. */
+    public int getSampleCount() {
+      return sampleCount;
+    }
+
+    /** Returns the minimum sample value in this group, normalized between -1 and +1. */
+    public double getMinSampleValue() {
+      return minSampleValue;
+    }
+
+    /** Returns the maximum sample value in this group, normalized between -1 and +1. */
+    public double getMaxSampleValue() {
+      return maxSampleValue;
+    }
+
+    /**
+     * Returns the RMS (Root Mean Square) of the samples in this group, normalized between -1 and
+     * +1.
+     *
+     * <p>This an estimate of the audio loudness level.
+     */
+    public double getRootMeanSquare() {
+      return Math.sqrt(squareSum / sampleCount);
+    }
+
+    /**
+     * Adds a new sample to the group.
+     *
+     * @param sample The sample value, between -1 and +1.
+     */
+    public void addSample(@FloatRange(from = -1, to = 1) float sample) {
+      checkArgument(sample >= -1f && sample <= 1f);
+      minSampleValue = min(minSampleValue, sample);
+      maxSampleValue = max(maxSampleValue, sample);
+      squareSum += (double) sample * sample;
+      sampleCount++;
+    }
+  }
+
+  /** Listener for the audio waveform generation. */
+  public interface Listener {
+    /** Called when a new waveform bar has been generated for a specific output channel. */
+    void onNewWaveformBar(int channelIndex, WaveformBar waveformBar);
+  }
+
+  private final int barsPerSecond;
+  private final Listener listener;
+  private final SparseArray<WaveformBar> outputChannels;
+  private final ByteBuffer mixingBuffer;
+  private @MonotonicNonNull AudioFormat inputAudioFormat;
+  private @MonotonicNonNull AudioFormat mixingAudioFormat;
+  private @MonotonicNonNull ChannelMixingMatrix channelMixingMatrix;
+  private int samplesPerBar;
+
+  /**
+   * Creates a new instance.
+   *
+   * @param barsPerSecond The number of bars that should be generated per each second of audio.
+   * @param outputChannelCount The number of channels that the output waveform should contain. If
+   *     this is different than the number of input channels, the audio will be mixed using the
+   *     {@linkplain ChannelMixingMatrix#create default mixing matrix}.
+   * @param listener The listener to be notified when a new waveform bar has been generated.
+   */
+  public WaveformAudioBufferSink(int barsPerSecond, int outputChannelCount, Listener listener) {
+    this.barsPerSecond = barsPerSecond;
+    this.listener = listener;
+    mixingBuffer =
+        ByteBuffer.allocate(Util.getPcmFrameSize(C.ENCODING_PCM_FLOAT, outputChannelCount));
+    outputChannels = new SparseArray<>(outputChannelCount);
+    for (int i = 0; i < outputChannelCount; i++) {
+      outputChannels.append(i, new WaveformBar());
+    }
+  }
+
+  @Override
+  public void flush(int sampleRateHz, int channelCount, @C.PcmEncoding int encoding) {
+    samplesPerBar = sampleRateHz / barsPerSecond;
+    inputAudioFormat = new AudioFormat(sampleRateHz, channelCount, encoding);
+    mixingAudioFormat = new AudioFormat(sampleRateHz, outputChannels.size(), C.ENCODING_PCM_FLOAT);
+    channelMixingMatrix = ChannelMixingMatrix.create(channelCount, outputChannels.size());
+  }
+
+  @Override
+  public void handleBuffer(ByteBuffer buffer) {
+    checkStateNotNull(inputAudioFormat);
+    checkStateNotNull(mixingAudioFormat);
+    checkStateNotNull(channelMixingMatrix);
+    while (buffer.hasRemaining()) {
+      mixingBuffer.rewind();
+      AudioMixingUtil.mix(
+          buffer,
+          inputAudioFormat,
+          mixingBuffer,
+          mixingAudioFormat,
+          channelMixingMatrix,
+          /* framesToMix= */ 1,
+          /* accumulate= */ false);
+      mixingBuffer.rewind();
+      for (int i = 0; i < outputChannels.size(); i++) {
+        WaveformBar bar = outputChannels.get(i);
+        bar.addSample(mixingBuffer.getFloat());
+        if (bar.getSampleCount() >= samplesPerBar) {
+          listener.onNewWaveformBar(i, bar);
+          outputChannels.set(i, new WaveformBar());
+        }
+      }
+    }
+  }
+}
--- a/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/audio/WaveformAudioBufferSinkTest.java
+++ b/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/audio/WaveformAudioBufferSinkTest.java
@ -0,0 +1,174 @@
+/*
+ * Copyright 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package androidx.media3.exoplayer.audio;
+
+import static com.google.common.truth.Truth.assertThat;
+import static java.util.concurrent.TimeUnit.MILLISECONDS;
+
+import androidx.media3.common.C;
+import androidx.media3.common.util.Util;
+import androidx.test.ext.junit.runners.AndroidJUnit4;
+import com.google.common.collect.ImmutableList;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+
+/** Unit tests for {@link WaveformAudioBufferSinkTest}. */
+@RunWith(AndroidJUnit4.class)
+public final class WaveformAudioBufferSinkTest {
+  private static final long TIMEOUT_MS = 1_000;
+  private static final double ALLOWED_FLOAT_CONVERSION_ERROR = 0.0001;
+
+  @Test
+  public void handleBuffer_monoToMono16Bit_callbackHasExpectedValue() throws Exception {
+    ByteBuffer byteBuffer = ByteBuffer.allocate(4);
+    byteBuffer.putShort(0, (short) (Short.MIN_VALUE / 3));
+    byteBuffer.putShort(2, (short) (Short.MAX_VALUE / 2));
+    ImmutableList<WaveformAudioBufferSink.WaveformBar> channels =
+        calculateChannelWaveformBars(
+            byteBuffer,
+            /* inputChannelCount= */ 1,
+            /* outputChannelCount= */ 1,
+            C.ENCODING_PCM_16BIT);
+
+    assertThat(channels.get(0).getSampleCount()).isEqualTo(2);
+    assertThat(channels.get(0).getMinSampleValue())
+        .isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
+        .of(-0.3333);
+    assertThat(channels.get(0).getMaxSampleValue())
+        .isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
+        .of(0.5);
+    assertThat(channels.get(0).getRootMeanSquare())
+        .isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
+        .of(0.4249);
+  }
+
+  @Test
+  public void handleBuffer_stereoToMono16Bit_callbackHasExpectedValue() throws Exception {
+    ByteBuffer byteBuffer = ByteBuffer.allocate(8);
+    byteBuffer.putShort(0, (short) (Short.MIN_VALUE / 3));
+    byteBuffer.putShort(2, (short) (Short.MIN_VALUE / 3));
+    byteBuffer.putShort(4, (short) (Short.MAX_VALUE / 2));
+    byteBuffer.putShort(6, (short) (Short.MAX_VALUE / 2));
+    ImmutableList<WaveformAudioBufferSink.WaveformBar> channels =
+        calculateChannelWaveformBars(
+            byteBuffer,
+            /* inputChannelCount= */ 2,
+            /* outputChannelCount= */ 1,
+            C.ENCODING_PCM_16BIT);
+
+    assertThat(channels.get(0).getSampleCount()).isEqualTo(2);
+    assertThat(channels.get(0).getMinSampleValue())
+        .isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
+        .of(-0.3333);
+    assertThat(channels.get(0).getMaxSampleValue())
+        .isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
+        .of(0.5);
+    assertThat(channels.get(0).getRootMeanSquare())
+        .isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
+        .of(0.4249);
+  }
+
+  @Test
+  public void handleBuffer_stereoToStereo16Bit_callbackHasExpectedValue() throws Exception {
+    ByteBuffer byteBuffer = ByteBuffer.allocate(8);
+    byteBuffer.putShort(0, (short) (Short.MIN_VALUE / 3));
+    byteBuffer.putShort(2, (short) (Short.MIN_VALUE / 2));
+    byteBuffer.putShort(4, (short) (Short.MAX_VALUE / 2));
+    byteBuffer.putShort(6, (short) (Short.MAX_VALUE / 3));
+    ImmutableList<WaveformAudioBufferSink.WaveformBar> channels =
+        calculateChannelWaveformBars(
+            byteBuffer,
+            /* inputChannelCount= */ 2,
+            /* outputChannelCount= */ 2,
+            C.ENCODING_PCM_16BIT);
+
+    assertThat(channels.get(0).getSampleCount()).isEqualTo(2);
+    assertThat(channels.get(0).getMinSampleValue())
+        .isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
+        .of(-0.3333);
+    assertThat(channels.get(0).getMaxSampleValue())
+        .isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
+        .of(0.5);
+    assertThat(channels.get(0).getRootMeanSquare())
+        .isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
+        .of(0.4249);
+
+    assertThat(channels.get(1).getSampleCount()).isEqualTo(2);
+    assertThat(channels.get(1).getMinSampleValue())
+        .isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
+        .of(-0.5);
+    assertThat(channels.get(1).getMaxSampleValue())
+        .isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
+        .of(0.3333);
+    assertThat(channels.get(1).getRootMeanSquare())
+        .isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
+        .of(0.4249);
+  }
+
+  @Test
+  public void handleBuffer_monoToMonoFloat_callbackHasExpectedValue() throws Exception {
+    ByteBuffer byteBuffer = ByteBuffer.allocate(8);
+    byteBuffer.putFloat(0, -0.3333f);
+    byteBuffer.putFloat(4, 0.5f);
+    ImmutableList<WaveformAudioBufferSink.WaveformBar> channels =
+        calculateChannelWaveformBars(
+            byteBuffer,
+            /* inputChannelCount= */ 1,
+            /* outputChannelCount= */ 1,
+            C.ENCODING_PCM_FLOAT);
+
+    assertThat(channels.get(0).getSampleCount()).isEqualTo(2);
+    assertThat(channels.get(0).getMinSampleValue())
+        .isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
+        .of(-0.3333);
+    assertThat(channels.get(0).getMaxSampleValue())
+        .isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
+        .of(0.5);
+    assertThat(channels.get(0).getRootMeanSquare())
+        .isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
+        .of(0.4249);
+  }
+
+  private ImmutableList<WaveformAudioBufferSink.WaveformBar> calculateChannelWaveformBars(
+      ByteBuffer byteBuffer,
+      int inputChannelCount,
+      int outputChannelCount,
+      @C.PcmEncoding int encoding)
+      throws InterruptedException {
+    List<WaveformAudioBufferSink.WaveformBar> channels = new ArrayList<>(outputChannelCount);
+    for (int i = 0; i < outputChannelCount; i++) {
+      channels.add(new WaveformAudioBufferSink.WaveformBar());
+    }
+    CountDownLatch countDownLatch = new CountDownLatch(outputChannelCount);
+    WaveformAudioBufferSink waveformAudioBufferSink =
+        new WaveformAudioBufferSink(
+            /* barsPerSecond= */ 1,
+            outputChannelCount,
+            (channelIndex, bar) -> {
+              countDownLatch.countDown();
+              channels.set(channelIndex, bar);
+            });
+    int sampleRateHz = byteBuffer.remaining() / Util.getPcmFrameSize(encoding, inputChannelCount);
+    waveformAudioBufferSink.flush(sampleRateHz, inputChannelCount, encoding);
+    waveformAudioBufferSink.handleBuffer(byteBuffer);
+    assertThat(countDownLatch.await(TIMEOUT_MS, MILLISECONDS)).isTrue();
+    return ImmutableList.copyOf(channels);
+  }
+}