AudioGraph refactoring: configure mixer and add sources in one place

PiperOrigin-RevId: 628076773
This commit is contained in:
kimvde 2024-04-25 07:59:37 -07:00 committed by Copybara-Service
parent 13a3aa7e77
commit c87940eb27
7 changed files with 135 additions and 75 deletions

View File

@ -27,114 +27,124 @@ container metadata:
entry = xyz: latitude=40.68, longitude=-74.5 entry = xyz: latitude=40.68, longitude=-74.5
sample: sample:
trackType = audio trackType = audio
dataHashCode = -1438864202 dataHashCode = 1742602241
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = 934871277 dataHashCode = 1742602241
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = 1134999182 dataHashCode = 413799331
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = -174912532 dataHashCode = 1108115494
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = -1005624952 dataHashCode = -1885521440
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = -1678870804 dataHashCode = 1112600185
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = -1663719904 dataHashCode = -377617286
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = -1735249329 dataHashCode = 1174897446
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = 1127436171 dataHashCode = -652005356
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = -508990108 dataHashCode = -1841655008
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = 1930479446 dataHashCode = 1815238174
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = -167569148 dataHashCode = -1745464927
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = 202160218 dataHashCode = -969133005
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = 1519515939 dataHashCode = 943244167
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = -1072406739 dataHashCode = 981350607
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = 33449818 dataHashCode = -1886507040
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = -271975432 dataHashCode = -502968940
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = -1029917087 dataHashCode = 1607535982
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = 1547114815 dataHashCode = -1557286731
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = 1307364090 dataHashCode = 1932764288
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = 1545711502 dataHashCode = -480251183
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample: sample:
trackType = audio trackType = audio
dataHashCode = -1034138750 dataHashCode = 1508860307
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample:
trackType = audio
dataHashCode = -1713708904
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 2107276453
size = 2282
isKeyFrame = true
sample: sample:
trackType = video trackType = video
dataHashCode = -770308242 dataHashCode = -770308242

View File

@ -135,6 +135,16 @@ sample:
dataHashCode = 1742602241 dataHashCode = 1742602241
size = 4096 size = 4096
isKeyFrame = true isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1340921537
size = 2282
isKeyFrame = true
sample: sample:
trackType = video trackType = video
dataHashCode = -770308242 dataHashCode = -770308242

View File

@ -997,7 +997,8 @@ public class TransformerEndToEndTest {
context, testId, /* inputFormat= */ MP4_ASSET_FORMAT, /* outputFormat= */ MP4_ASSET_FORMAT); context, testId, /* inputFormat= */ MP4_ASSET_FORMAT, /* outputFormat= */ MP4_ASSET_FORMAT);
ImmutableList<AudioProcessor> audioProcessors = ImmutableList.of(createSonic(1.2f)); ImmutableList<AudioProcessor> audioProcessors = ImmutableList.of(createSonic(1.2f));
ImmutableList<Effect> videoEffects = ImmutableList.of(RgbFilter.createGrayscaleFilter()); ImmutableList<Effect> videoEffects = ImmutableList.of(RgbFilter.createGrayscaleFilter());
MediaItem mediaItem = MediaItem.fromUri(Uri.parse(MP4_ASSET_URI_STRING)); MediaItem mediaItem =
MediaItem.fromUri(Uri.parse(MP4_ASSET_WITH_INCREASING_TIMESTAMPS_URI_STRING));
EditedMediaItem editedMediaItem = EditedMediaItem editedMediaItem =
new EditedMediaItem.Builder(mediaItem) new EditedMediaItem.Builder(mediaItem)
.setEffects(new Effects(audioProcessors, videoEffects)) .setEffects(new Effects(audioProcessors, videoEffects))

View File

@ -25,7 +25,6 @@ import androidx.media3.common.audio.AudioProcessingPipeline;
import androidx.media3.common.audio.AudioProcessor; import androidx.media3.common.audio.AudioProcessor;
import androidx.media3.common.audio.AudioProcessor.AudioFormat; import androidx.media3.common.audio.AudioProcessor.AudioFormat;
import androidx.media3.common.audio.AudioProcessor.UnhandledAudioFormatException; import androidx.media3.common.audio.AudioProcessor.UnhandledAudioFormatException;
import androidx.media3.common.util.Log;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.ArrayList; import java.util.ArrayList;
@ -42,8 +41,9 @@ import java.util.Objects;
private final AudioProcessingPipeline audioProcessingPipeline; private final AudioProcessingPipeline audioProcessingPipeline;
private AudioFormat mixerAudioFormat; private AudioFormat mixerAudioFormat;
private boolean isMixerConfigured;
private boolean isMixerReady;
private long pendingStartTimeUs; private long pendingStartTimeUs;
private int mixerSourcesToAdd;
private ByteBuffer mixerOutput; private ByteBuffer mixerOutput;
private int finishedInputs; private int finishedInputs;
@ -87,24 +87,18 @@ import java.util.Objects;
throws ExportException { throws ExportException {
checkArgument(format.pcmEncoding != Format.NO_VALUE); checkArgument(format.pcmEncoding != Format.NO_VALUE);
AudioGraphInput audioGraphInput; AudioGraphInput audioGraphInput;
int sourceId;
try { try {
audioGraphInput = new AudioGraphInput(mixerAudioFormat, editedMediaItem, format); audioGraphInput = new AudioGraphInput(mixerAudioFormat, editedMediaItem, format);
if (Objects.equals(mixerAudioFormat, AudioFormat.NOT_SET)) { if (Objects.equals(mixerAudioFormat, AudioFormat.NOT_SET)) {
// Mixer not configured, configure before doing anything else.
this.mixerAudioFormat = audioGraphInput.getOutputAudioFormat(); this.mixerAudioFormat = audioGraphInput.getOutputAudioFormat();
mixer.configure(mixerAudioFormat, /* bufferSizeMs= */ C.LENGTH_UNSET, /* startTimeUs= */ 0);
audioProcessingPipeline.configure(mixerAudioFormat); audioProcessingPipeline.configure(mixerAudioFormat);
audioProcessingPipeline.flush(); audioProcessingPipeline.flush();
} }
sourceId = mixer.addSource(audioGraphInput.getOutputAudioFormat(), /* startTimeUs= */ 0);
} catch (UnhandledAudioFormatException e) { } catch (UnhandledAudioFormatException e) {
throw ExportException.createForAudioProcessing( throw ExportException.createForAudioProcessing(
e, "Error while registering input " + inputInfos.size()); e, "Error while registering input " + inputInfos.size());
} }
inputInfos.add(new InputInfo(audioGraphInput, sourceId)); inputInfos.add(new InputInfo(audioGraphInput));
return audioGraphInput; return audioGraphInput;
} }
@ -124,14 +118,9 @@ import java.util.Objects;
* unless the graph was {@linkplain #flush() flushed}. * unless the graph was {@linkplain #flush() flushed}.
*/ */
public ByteBuffer getOutput() throws ExportException { public ByteBuffer getOutput() throws ExportException {
if (mixerSourcesToAdd > 0) { if (!ensureMixerReady()) {
// TODO(b/303029174): Consider adding the mixer sources at the start of playback and after a
// seek at the same place in the code.
addMixerSources();
if (mixerSourcesToAdd > 0) {
return EMPTY_BUFFER; return EMPTY_BUFFER;
} }
}
if (!mixer.isEnded()) { if (!mixer.isEnded()) {
feedMixer(); feedMixer();
} }
@ -177,13 +166,8 @@ import java.util.Objects;
inputInfo.audioGraphInput.flush(); inputInfo.audioGraphInput.flush();
} }
mixer.reset(); mixer.reset();
try { isMixerConfigured = false;
mixer.configure(mixerAudioFormat, /* bufferSizeMs= */ C.LENGTH_UNSET, pendingStartTimeUs); isMixerReady = false;
} catch (UnhandledAudioFormatException e) {
// Should never happen because mixer has already been configured with the same format.
Log.e(TAG, "Unexpected mixer configuration error");
}
mixerSourcesToAdd = inputInfos.size();
mixerOutput = EMPTY_BUFFER; mixerOutput = EMPTY_BUFFER;
audioProcessingPipeline.flush(); audioProcessingPipeline.flush();
finishedInputs = 0; finishedInputs = 0;
@ -215,19 +199,19 @@ import java.util.Objects;
return isMixerEnded(); return isMixerEnded();
} }
private boolean isMixerEnded() { private boolean ensureMixerReady() throws ExportException {
return !mixerOutput.hasRemaining() && finishedInputs >= inputInfos.size() && mixer.isEnded(); if (isMixerReady) {
return true;
} }
if (!isMixerConfigured) {
private void feedProcessingPipelineFromMixer() { try {
if (isMixerEnded()) { mixer.configure(mixerAudioFormat, /* bufferSizeMs= */ C.LENGTH_UNSET, pendingStartTimeUs);
audioProcessingPipeline.queueEndOfStream(); } catch (UnhandledAudioFormatException e) {
return; throw ExportException.createForAudioProcessing(e, "Error while configuring mixer");
} }
audioProcessingPipeline.queueInput(mixerOutput); isMixerConfigured = true;
} }
isMixerReady = true;
private void addMixerSources() throws ExportException {
for (int i = 0; i < inputInfos.size(); i++) { for (int i = 0; i < inputInfos.size(); i++) {
InputInfo inputInfo = inputInfos.get(i); InputInfo inputInfo = inputInfos.get(i);
if (inputInfo.mixerSourceId != C.INDEX_UNSET) { if (inputInfo.mixerSourceId != C.INDEX_UNSET) {
@ -239,9 +223,9 @@ import java.util.Objects;
audioGraphInput.getOutput(); audioGraphInput.getOutput();
long sourceStartTimeUs = audioGraphInput.getStartTimeUs(); long sourceStartTimeUs = audioGraphInput.getStartTimeUs();
if (sourceStartTimeUs == C.TIME_UNSET) { if (sourceStartTimeUs == C.TIME_UNSET) {
isMixerReady = false;
continue; continue;
} else if (sourceStartTimeUs == C.TIME_END_OF_SOURCE) { } else if (sourceStartTimeUs == C.TIME_END_OF_SOURCE) {
mixerSourcesToAdd--;
continue; continue;
} }
inputInfo.mixerSourceId = inputInfo.mixerSourceId =
@ -250,8 +234,8 @@ import java.util.Objects;
throw ExportException.createForAudioProcessing( throw ExportException.createForAudioProcessing(
e, "Unhandled format while adding source " + inputInfo.mixerSourceId); e, "Unhandled format while adding source " + inputInfo.mixerSourceId);
} }
mixerSourcesToAdd--;
} }
return isMixerReady;
} }
private void feedMixer() throws ExportException { private void feedMixer() throws ExportException {
@ -282,13 +266,25 @@ import java.util.Objects;
} }
} }
private void feedProcessingPipelineFromMixer() {
if (isMixerEnded()) {
audioProcessingPipeline.queueEndOfStream();
return;
}
audioProcessingPipeline.queueInput(mixerOutput);
}
private boolean isMixerEnded() {
return !mixerOutput.hasRemaining() && finishedInputs >= inputInfos.size() && mixer.isEnded();
}
private static final class InputInfo { private static final class InputInfo {
public final AudioGraphInput audioGraphInput; public final AudioGraphInput audioGraphInput;
public int mixerSourceId; public int mixerSourceId;
public InputInfo(AudioGraphInput audioGraphInput, int mixerSourceId) { public InputInfo(AudioGraphInput audioGraphInput) {
this.audioGraphInput = audioGraphInput; this.audioGraphInput = audioGraphInput;
this.mixerSourceId = mixerSourceId; mixerSourceId = C.INDEX_UNSET;
} }
} }
} }

View File

@ -43,6 +43,7 @@ import java.nio.ByteBuffer;
import java.nio.ByteOrder; import java.nio.ByteOrder;
import java.util.Queue; import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.AtomicReference;
/** /**
@ -64,13 +65,13 @@ import java.util.concurrent.atomic.AtomicReference;
private final Queue<DecoderInputBuffer> availableInputBuffers; private final Queue<DecoderInputBuffer> availableInputBuffers;
private final Queue<DecoderInputBuffer> pendingInputBuffers; private final Queue<DecoderInputBuffer> pendingInputBuffers;
private final AtomicReference<@NullableType MediaItemChange> pendingMediaItemChange; private final AtomicReference<@NullableType MediaItemChange> pendingMediaItemChange;
private final AtomicLong startTimeUs;
@Nullable private DecoderInputBuffer currentInputBufferBeingOutput; @Nullable private DecoderInputBuffer currentInputBufferBeingOutput;
private AudioProcessingPipeline audioProcessingPipeline; private AudioProcessingPipeline audioProcessingPipeline;
private boolean processedFirstMediaItemChange; private boolean processedFirstMediaItemChange;
private boolean receivedEndOfStreamFromInput; private boolean receivedEndOfStreamFromInput;
private boolean queueEndOfStreamAfterSilence; private boolean queueEndOfStreamAfterSilence;
private long startTimeUs;
private boolean inputBlocked; private boolean inputBlocked;
/** /**
@ -104,7 +105,7 @@ import java.util.concurrent.atomic.AtomicReference;
// APP configuration not active until flush called. getOutputAudioFormat based on active config. // APP configuration not active until flush called. getOutputAudioFormat based on active config.
audioProcessingPipeline.flush(); audioProcessingPipeline.flush();
outputAudioFormat = audioProcessingPipeline.getOutputAudioFormat(); outputAudioFormat = audioProcessingPipeline.getOutputAudioFormat();
startTimeUs = C.TIME_UNSET; startTimeUs = new AtomicLong(C.TIME_UNSET);
} }
/** Returns the {@link AudioFormat} of {@linkplain #getOutput() output buffers}. */ /** Returns the {@link AudioFormat} of {@linkplain #getOutput() output buffers}. */
@ -186,19 +187,14 @@ import java.util.concurrent.atomic.AtomicReference;
checkState(pendingMediaItemChange.get() == null); checkState(pendingMediaItemChange.get() == null);
DecoderInputBuffer inputBuffer = availableInputBuffers.remove(); DecoderInputBuffer inputBuffer = availableInputBuffers.remove();
pendingInputBuffers.add(inputBuffer); pendingInputBuffers.add(inputBuffer);
if (startTimeUs == C.TIME_UNSET) { startTimeUs.compareAndSet(
startTimeUs = inputBuffer.timeUs; /* expectedValue= */ C.TIME_UNSET, /* newValue= */ inputBuffer.timeUs);
}
return true; return true;
} }
/** /** Returns the stream start time in microseconds, or {@link C#TIME_UNSET} if unknown. */
* Returns the stream start time in microseconds, or {@link C#TIME_UNSET} if unknown.
*
* <p>Should only be called if the input thread and processing thread are the same.
*/
public long getStartTimeUs() { public long getStartTimeUs() {
return startTimeUs; return startTimeUs.get();
} }
/** /**
@ -247,7 +243,7 @@ import java.util.concurrent.atomic.AtomicReference;
audioProcessingPipeline.flush(); audioProcessingPipeline.flush();
receivedEndOfStreamFromInput = false; receivedEndOfStreamFromInput = false;
queueEndOfStreamAfterSilence = false; queueEndOfStreamAfterSilence = false;
startTimeUs = C.TIME_UNSET; startTimeUs.set(C.TIME_UNSET);
} }
/** /**
@ -407,6 +403,7 @@ import java.util.concurrent.atomic.AtomicReference;
pendingAudioFormat = new AudioFormat(pendingChange.format); pendingAudioFormat = new AudioFormat(pendingChange.format);
} else { // Generating silence } else { // Generating silence
pendingAudioFormat = silentAudioGenerator.audioFormat; pendingAudioFormat = silentAudioGenerator.audioFormat;
startTimeUs.compareAndSet(/* expectedValue= */ C.TIME_UNSET, /* newValue= */ 0);
silentAudioGenerator.addSilence(pendingChange.durationUs); silentAudioGenerator.addSilence(pendingChange.durationUs);
if (pendingChange.isLast) { if (pendingChange.isLast) {
queueEndOfStreamAfterSilence = true; queueEndOfStreamAfterSilence = true;

View File

@ -18,10 +18,14 @@ package androidx.media3.transformer;
import static androidx.media3.common.util.Assertions.checkState; import static androidx.media3.common.util.Assertions.checkState;
import static androidx.media3.common.util.Util.getPcmFormat; import static androidx.media3.common.util.Util.getPcmFormat;
import static com.google.common.truth.Truth.assertThat; import static com.google.common.truth.Truth.assertThat;
import static org.junit.Assert.assertThrows;
import androidx.media3.common.C; import androidx.media3.common.C;
import androidx.media3.common.Format;
import androidx.media3.common.MediaItem; import androidx.media3.common.MediaItem;
import androidx.media3.common.audio.AudioProcessor;
import androidx.media3.common.audio.AudioProcessor.AudioFormat; import androidx.media3.common.audio.AudioProcessor.AudioFormat;
import androidx.media3.common.audio.BaseAudioProcessor;
import androidx.media3.common.audio.SonicAudioProcessor; import androidx.media3.common.audio.SonicAudioProcessor;
import androidx.media3.decoder.DecoderInputBuffer; import androidx.media3.decoder.DecoderInputBuffer;
import androidx.media3.test.utils.TestUtil; import androidx.media3.test.utils.TestUtil;
@ -89,6 +93,36 @@ public class AudioGraphTest {
assertThat(bytesOutput).isEqualTo(3 * 100_000 * 2 * 6); assertThat(bytesOutput).isEqualTo(3 * 100_000 * 2 * 6);
} }
@Test
public void audioGraphInputOutputtingInvalidFormat_audioGraphThrows() throws Exception {
AudioGraph audioGraph =
new AudioGraph(new DefaultAudioMixer.Factory(), /* effects= */ ImmutableList.of());
AudioProcessor audioProcessor =
new BaseAudioProcessor() {
@Override
public void queueInput(ByteBuffer inputBuffer) {}
@Override
protected AudioFormat onConfigure(AudioFormat inputAudioFormat) {
return new AudioFormat(
/* sampleRate= */ 44_100,
/* channelCount= */ Format.NO_VALUE,
C.ENCODING_PCM_16BIT);
}
};
EditedMediaItem editedMediaItem =
new EditedMediaItem.Builder(MediaItem.EMPTY)
.setEffects(
new Effects(
/* audioProcessors= */ ImmutableList.of(audioProcessor),
/* videoEffects= */ ImmutableList.of()))
.build();
audioGraph.registerInput(editedMediaItem, getPcmFormat(SURROUND_50000));
assertThrows(ExportException.class, audioGraph::getOutput);
}
@Test @Test
public void getOutputAudioFormat_afterInitialization_isNotSet() { public void getOutputAudioFormat_afterInitialization_isNotSet() {
AudioGraph audioGraph = AudioGraph audioGraph =
@ -170,6 +204,18 @@ public class AudioGraphTest {
assertThat(audioGraph.getOutputAudioFormat().sampleRate).isEqualTo(48_000); assertThat(audioGraph.getOutputAudioFormat().sampleRate).isEqualTo(48_000);
} }
@Test
public void registerInput_withUnsupportedFormat_throws() {
AudioGraph audioGraph =
new AudioGraph(new DefaultAudioMixer.Factory(), /* effects= */ ImmutableList.of());
AudioFormat audioFormat =
new AudioFormat(/* sampleRate= */ 44_100, /* channelCount= */ 1, C.ENCODING_PCM_8BIT);
assertThrows(
IllegalArgumentException.class,
() -> audioGraph.registerInput(FAKE_ITEM, getPcmFormat(audioFormat)));
}
@Test @Test
public void createAudioGraphWithEffect_changesOutputFormat() throws Exception { public void createAudioGraphWithEffect_changesOutputFormat() throws Exception {
SonicAudioProcessor sonicAudioProcessor = new SonicAudioProcessor(); SonicAudioProcessor sonicAudioProcessor = new SonicAudioProcessor();

View File

@ -54,7 +54,7 @@ import org.robolectric.ParameterizedRobolectricTestRunner.Parameters;
* *
* <ul> * <ul>
* <li>Video can not be transcoded, due to OpenGL not being supported with Robolectric. * <li>Video can not be transcoded, due to OpenGL not being supported with Robolectric.
* <li>Non RAW audio can not be trancoded, because AudioGraph requires decoded data but * <li>Non RAW audio can not be transcoded, because AudioGraph requires decoded data but
* Robolectric decoders do not decode. * Robolectric decoders do not decode.
* <li>RAW audio will always be transcoded, because the muxer does not support RAW audio as input. * <li>RAW audio will always be transcoded, because the muxer does not support RAW audio as input.
* </ul> * </ul>