mirror of
https://github.com/androidx/media.git
synced 2025-04-30 06:46:50 +08:00
Add a waveform audio buffer sink.
This can be used together with TeeAudioProcessor to draw an audio waveform. PiperOrigin-RevId: 558213516
This commit is contained in:
parent
54797d4cc5
commit
6566387f70
@ -0,0 +1,155 @@
|
||||
/*
|
||||
* Copyright 2023 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package androidx.media3.exoplayer.audio;
|
||||
|
||||
import static androidx.media3.common.util.Assertions.checkStateNotNull;
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
import static java.lang.Math.max;
|
||||
import static java.lang.Math.min;
|
||||
|
||||
import android.util.SparseArray;
|
||||
import androidx.annotation.FloatRange;
|
||||
import androidx.media3.common.C;
|
||||
import androidx.media3.common.audio.AudioMixingUtil;
|
||||
import androidx.media3.common.audio.AudioProcessor.AudioFormat;
|
||||
import androidx.media3.common.audio.ChannelMixingMatrix;
|
||||
import androidx.media3.common.util.UnstableApi;
|
||||
import androidx.media3.common.util.Util;
|
||||
import java.nio.ByteBuffer;
|
||||
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
|
||||
|
||||
/** A sink for audio buffers that produces {@link WaveformBar waveform bars}. */
|
||||
@UnstableApi
|
||||
public class WaveformAudioBufferSink implements TeeAudioProcessor.AudioBufferSink {
|
||||
/**
|
||||
* Aggregates a group of audio samples. The values exposed can be used to draw one vertical bar of
|
||||
* an audio waveform.
|
||||
*/
|
||||
public static class WaveformBar {
|
||||
private float minSampleValue = 1f;
|
||||
private float maxSampleValue = -1f;
|
||||
private double squareSum;
|
||||
private int sampleCount;
|
||||
|
||||
/** Returns the number of samples {@linkplain #addSample added}. */
|
||||
public int getSampleCount() {
|
||||
return sampleCount;
|
||||
}
|
||||
|
||||
/** Returns the minimum sample value in this group, normalized between -1 and +1. */
|
||||
public double getMinSampleValue() {
|
||||
return minSampleValue;
|
||||
}
|
||||
|
||||
/** Returns the maximum sample value in this group, normalized between -1 and +1. */
|
||||
public double getMaxSampleValue() {
|
||||
return maxSampleValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the RMS (Root Mean Square) of the samples in this group, normalized between -1 and
|
||||
* +1.
|
||||
*
|
||||
* <p>This an estimate of the audio loudness level.
|
||||
*/
|
||||
public double getRootMeanSquare() {
|
||||
return Math.sqrt(squareSum / sampleCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a new sample to the group.
|
||||
*
|
||||
* @param sample The sample value, between -1 and +1.
|
||||
*/
|
||||
public void addSample(@FloatRange(from = -1, to = 1) float sample) {
|
||||
checkArgument(sample >= -1f && sample <= 1f);
|
||||
minSampleValue = min(minSampleValue, sample);
|
||||
maxSampleValue = max(maxSampleValue, sample);
|
||||
squareSum += (double) sample * sample;
|
||||
sampleCount++;
|
||||
}
|
||||
}
|
||||
|
||||
/** Listener for the audio waveform generation. */
|
||||
public interface Listener {
|
||||
/** Called when a new waveform bar has been generated for a specific output channel. */
|
||||
void onNewWaveformBar(int channelIndex, WaveformBar waveformBar);
|
||||
}
|
||||
|
||||
private final int barsPerSecond;
|
||||
private final Listener listener;
|
||||
private final SparseArray<WaveformBar> outputChannels;
|
||||
private final ByteBuffer mixingBuffer;
|
||||
private @MonotonicNonNull AudioFormat inputAudioFormat;
|
||||
private @MonotonicNonNull AudioFormat mixingAudioFormat;
|
||||
private @MonotonicNonNull ChannelMixingMatrix channelMixingMatrix;
|
||||
private int samplesPerBar;
|
||||
|
||||
/**
|
||||
* Creates a new instance.
|
||||
*
|
||||
* @param barsPerSecond The number of bars that should be generated per each second of audio.
|
||||
* @param outputChannelCount The number of channels that the output waveform should contain. If
|
||||
* this is different than the number of input channels, the audio will be mixed using the
|
||||
* {@linkplain ChannelMixingMatrix#create default mixing matrix}.
|
||||
* @param listener The listener to be notified when a new waveform bar has been generated.
|
||||
*/
|
||||
public WaveformAudioBufferSink(int barsPerSecond, int outputChannelCount, Listener listener) {
|
||||
this.barsPerSecond = barsPerSecond;
|
||||
this.listener = listener;
|
||||
mixingBuffer =
|
||||
ByteBuffer.allocate(Util.getPcmFrameSize(C.ENCODING_PCM_FLOAT, outputChannelCount));
|
||||
outputChannels = new SparseArray<>(outputChannelCount);
|
||||
for (int i = 0; i < outputChannelCount; i++) {
|
||||
outputChannels.append(i, new WaveformBar());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flush(int sampleRateHz, int channelCount, @C.PcmEncoding int encoding) {
|
||||
samplesPerBar = sampleRateHz / barsPerSecond;
|
||||
inputAudioFormat = new AudioFormat(sampleRateHz, channelCount, encoding);
|
||||
mixingAudioFormat = new AudioFormat(sampleRateHz, outputChannels.size(), C.ENCODING_PCM_FLOAT);
|
||||
channelMixingMatrix = ChannelMixingMatrix.create(channelCount, outputChannels.size());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleBuffer(ByteBuffer buffer) {
|
||||
checkStateNotNull(inputAudioFormat);
|
||||
checkStateNotNull(mixingAudioFormat);
|
||||
checkStateNotNull(channelMixingMatrix);
|
||||
while (buffer.hasRemaining()) {
|
||||
mixingBuffer.rewind();
|
||||
AudioMixingUtil.mix(
|
||||
buffer,
|
||||
inputAudioFormat,
|
||||
mixingBuffer,
|
||||
mixingAudioFormat,
|
||||
channelMixingMatrix,
|
||||
/* framesToMix= */ 1,
|
||||
/* accumulate= */ false);
|
||||
mixingBuffer.rewind();
|
||||
for (int i = 0; i < outputChannels.size(); i++) {
|
||||
WaveformBar bar = outputChannels.get(i);
|
||||
bar.addSample(mixingBuffer.getFloat());
|
||||
if (bar.getSampleCount() >= samplesPerBar) {
|
||||
listener.onNewWaveformBar(i, bar);
|
||||
outputChannels.set(i, new WaveformBar());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,174 @@
|
||||
/*
|
||||
* Copyright 2023 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package androidx.media3.exoplayer.audio;
|
||||
|
||||
import static com.google.common.truth.Truth.assertThat;
|
||||
import static java.util.concurrent.TimeUnit.MILLISECONDS;
|
||||
|
||||
import androidx.media3.common.C;
|
||||
import androidx.media3.common.util.Util;
|
||||
import androidx.test.ext.junit.runners.AndroidJUnit4;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
|
||||
/** Unit tests for {@link WaveformAudioBufferSinkTest}. */
|
||||
@RunWith(AndroidJUnit4.class)
|
||||
public final class WaveformAudioBufferSinkTest {
|
||||
private static final long TIMEOUT_MS = 1_000;
|
||||
private static final double ALLOWED_FLOAT_CONVERSION_ERROR = 0.0001;
|
||||
|
||||
@Test
|
||||
public void handleBuffer_monoToMono16Bit_callbackHasExpectedValue() throws Exception {
|
||||
ByteBuffer byteBuffer = ByteBuffer.allocate(4);
|
||||
byteBuffer.putShort(0, (short) (Short.MIN_VALUE / 3));
|
||||
byteBuffer.putShort(2, (short) (Short.MAX_VALUE / 2));
|
||||
ImmutableList<WaveformAudioBufferSink.WaveformBar> channels =
|
||||
calculateChannelWaveformBars(
|
||||
byteBuffer,
|
||||
/* inputChannelCount= */ 1,
|
||||
/* outputChannelCount= */ 1,
|
||||
C.ENCODING_PCM_16BIT);
|
||||
|
||||
assertThat(channels.get(0).getSampleCount()).isEqualTo(2);
|
||||
assertThat(channels.get(0).getMinSampleValue())
|
||||
.isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
|
||||
.of(-0.3333);
|
||||
assertThat(channels.get(0).getMaxSampleValue())
|
||||
.isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
|
||||
.of(0.5);
|
||||
assertThat(channels.get(0).getRootMeanSquare())
|
||||
.isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
|
||||
.of(0.4249);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void handleBuffer_stereoToMono16Bit_callbackHasExpectedValue() throws Exception {
|
||||
ByteBuffer byteBuffer = ByteBuffer.allocate(8);
|
||||
byteBuffer.putShort(0, (short) (Short.MIN_VALUE / 3));
|
||||
byteBuffer.putShort(2, (short) (Short.MIN_VALUE / 3));
|
||||
byteBuffer.putShort(4, (short) (Short.MAX_VALUE / 2));
|
||||
byteBuffer.putShort(6, (short) (Short.MAX_VALUE / 2));
|
||||
ImmutableList<WaveformAudioBufferSink.WaveformBar> channels =
|
||||
calculateChannelWaveformBars(
|
||||
byteBuffer,
|
||||
/* inputChannelCount= */ 2,
|
||||
/* outputChannelCount= */ 1,
|
||||
C.ENCODING_PCM_16BIT);
|
||||
|
||||
assertThat(channels.get(0).getSampleCount()).isEqualTo(2);
|
||||
assertThat(channels.get(0).getMinSampleValue())
|
||||
.isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
|
||||
.of(-0.3333);
|
||||
assertThat(channels.get(0).getMaxSampleValue())
|
||||
.isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
|
||||
.of(0.5);
|
||||
assertThat(channels.get(0).getRootMeanSquare())
|
||||
.isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
|
||||
.of(0.4249);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void handleBuffer_stereoToStereo16Bit_callbackHasExpectedValue() throws Exception {
|
||||
ByteBuffer byteBuffer = ByteBuffer.allocate(8);
|
||||
byteBuffer.putShort(0, (short) (Short.MIN_VALUE / 3));
|
||||
byteBuffer.putShort(2, (short) (Short.MIN_VALUE / 2));
|
||||
byteBuffer.putShort(4, (short) (Short.MAX_VALUE / 2));
|
||||
byteBuffer.putShort(6, (short) (Short.MAX_VALUE / 3));
|
||||
ImmutableList<WaveformAudioBufferSink.WaveformBar> channels =
|
||||
calculateChannelWaveformBars(
|
||||
byteBuffer,
|
||||
/* inputChannelCount= */ 2,
|
||||
/* outputChannelCount= */ 2,
|
||||
C.ENCODING_PCM_16BIT);
|
||||
|
||||
assertThat(channels.get(0).getSampleCount()).isEqualTo(2);
|
||||
assertThat(channels.get(0).getMinSampleValue())
|
||||
.isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
|
||||
.of(-0.3333);
|
||||
assertThat(channels.get(0).getMaxSampleValue())
|
||||
.isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
|
||||
.of(0.5);
|
||||
assertThat(channels.get(0).getRootMeanSquare())
|
||||
.isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
|
||||
.of(0.4249);
|
||||
|
||||
assertThat(channels.get(1).getSampleCount()).isEqualTo(2);
|
||||
assertThat(channels.get(1).getMinSampleValue())
|
||||
.isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
|
||||
.of(-0.5);
|
||||
assertThat(channels.get(1).getMaxSampleValue())
|
||||
.isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
|
||||
.of(0.3333);
|
||||
assertThat(channels.get(1).getRootMeanSquare())
|
||||
.isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
|
||||
.of(0.4249);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void handleBuffer_monoToMonoFloat_callbackHasExpectedValue() throws Exception {
|
||||
ByteBuffer byteBuffer = ByteBuffer.allocate(8);
|
||||
byteBuffer.putFloat(0, -0.3333f);
|
||||
byteBuffer.putFloat(4, 0.5f);
|
||||
ImmutableList<WaveformAudioBufferSink.WaveformBar> channels =
|
||||
calculateChannelWaveformBars(
|
||||
byteBuffer,
|
||||
/* inputChannelCount= */ 1,
|
||||
/* outputChannelCount= */ 1,
|
||||
C.ENCODING_PCM_FLOAT);
|
||||
|
||||
assertThat(channels.get(0).getSampleCount()).isEqualTo(2);
|
||||
assertThat(channels.get(0).getMinSampleValue())
|
||||
.isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
|
||||
.of(-0.3333);
|
||||
assertThat(channels.get(0).getMaxSampleValue())
|
||||
.isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
|
||||
.of(0.5);
|
||||
assertThat(channels.get(0).getRootMeanSquare())
|
||||
.isWithin(ALLOWED_FLOAT_CONVERSION_ERROR)
|
||||
.of(0.4249);
|
||||
}
|
||||
|
||||
private ImmutableList<WaveformAudioBufferSink.WaveformBar> calculateChannelWaveformBars(
|
||||
ByteBuffer byteBuffer,
|
||||
int inputChannelCount,
|
||||
int outputChannelCount,
|
||||
@C.PcmEncoding int encoding)
|
||||
throws InterruptedException {
|
||||
List<WaveformAudioBufferSink.WaveformBar> channels = new ArrayList<>(outputChannelCount);
|
||||
for (int i = 0; i < outputChannelCount; i++) {
|
||||
channels.add(new WaveformAudioBufferSink.WaveformBar());
|
||||
}
|
||||
CountDownLatch countDownLatch = new CountDownLatch(outputChannelCount);
|
||||
WaveformAudioBufferSink waveformAudioBufferSink =
|
||||
new WaveformAudioBufferSink(
|
||||
/* barsPerSecond= */ 1,
|
||||
outputChannelCount,
|
||||
(channelIndex, bar) -> {
|
||||
countDownLatch.countDown();
|
||||
channels.set(channelIndex, bar);
|
||||
});
|
||||
int sampleRateHz = byteBuffer.remaining() / Util.getPcmFrameSize(encoding, inputChannelCount);
|
||||
waveformAudioBufferSink.flush(sampleRateHz, inputChannelCount, encoding);
|
||||
waveformAudioBufferSink.handleBuffer(byteBuffer);
|
||||
assertThat(countDownLatch.await(TIMEOUT_MS, MILLISECONDS)).isTrue();
|
||||
return ImmutableList.copyOf(channels);
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user