AudioGraph refactoring: configure mixer and add sources in one place

PiperOrigin-RevId: 628076773
This commit is contained in:
kimvde 2024-04-25 07:59:37 -07:00 committed by Copybara-Service
parent 13a3aa7e77
commit c87940eb27
7 changed files with 135 additions and 75 deletions

View File

@ -27,114 +27,124 @@ container metadata:
entry = xyz: latitude=40.68, longitude=-74.5
sample:
trackType = audio
dataHashCode = -1438864202
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 934871277
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1134999182
dataHashCode = 413799331
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -174912532
dataHashCode = 1108115494
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -1005624952
dataHashCode = -1885521440
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -1678870804
dataHashCode = 1112600185
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -1663719904
dataHashCode = -377617286
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -1735249329
dataHashCode = 1174897446
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1127436171
dataHashCode = -652005356
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -508990108
dataHashCode = -1841655008
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1930479446
dataHashCode = 1815238174
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -167569148
dataHashCode = -1745464927
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 202160218
dataHashCode = -969133005
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1519515939
dataHashCode = 943244167
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -1072406739
dataHashCode = 981350607
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 33449818
dataHashCode = -1886507040
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -271975432
dataHashCode = -502968940
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -1029917087
dataHashCode = 1607535982
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1547114815
dataHashCode = -1557286731
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1307364090
dataHashCode = 1932764288
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1545711502
dataHashCode = -480251183
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -1034138750
dataHashCode = 1508860307
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -1713708904
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 2107276453
size = 2282
isKeyFrame = true
sample:
trackType = video
dataHashCode = -770308242

View File

@ -135,6 +135,16 @@ sample:
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1340921537
size = 2282
isKeyFrame = true
sample:
trackType = video
dataHashCode = -770308242

View File

@ -997,7 +997,8 @@ public class TransformerEndToEndTest {
context, testId, /* inputFormat= */ MP4_ASSET_FORMAT, /* outputFormat= */ MP4_ASSET_FORMAT);
ImmutableList<AudioProcessor> audioProcessors = ImmutableList.of(createSonic(1.2f));
ImmutableList<Effect> videoEffects = ImmutableList.of(RgbFilter.createGrayscaleFilter());
MediaItem mediaItem = MediaItem.fromUri(Uri.parse(MP4_ASSET_URI_STRING));
MediaItem mediaItem =
MediaItem.fromUri(Uri.parse(MP4_ASSET_WITH_INCREASING_TIMESTAMPS_URI_STRING));
EditedMediaItem editedMediaItem =
new EditedMediaItem.Builder(mediaItem)
.setEffects(new Effects(audioProcessors, videoEffects))

View File

@ -25,7 +25,6 @@ import androidx.media3.common.audio.AudioProcessingPipeline;
import androidx.media3.common.audio.AudioProcessor;
import androidx.media3.common.audio.AudioProcessor.AudioFormat;
import androidx.media3.common.audio.AudioProcessor.UnhandledAudioFormatException;
import androidx.media3.common.util.Log;
import com.google.common.collect.ImmutableList;
import java.nio.ByteBuffer;
import java.util.ArrayList;
@ -42,8 +41,9 @@ import java.util.Objects;
private final AudioProcessingPipeline audioProcessingPipeline;
private AudioFormat mixerAudioFormat;
private boolean isMixerConfigured;
private boolean isMixerReady;
private long pendingStartTimeUs;
private int mixerSourcesToAdd;
private ByteBuffer mixerOutput;
private int finishedInputs;
@ -87,24 +87,18 @@ import java.util.Objects;
throws ExportException {
checkArgument(format.pcmEncoding != Format.NO_VALUE);
AudioGraphInput audioGraphInput;
int sourceId;
try {
audioGraphInput = new AudioGraphInput(mixerAudioFormat, editedMediaItem, format);
if (Objects.equals(mixerAudioFormat, AudioFormat.NOT_SET)) {
// Mixer not configured, configure before doing anything else.
this.mixerAudioFormat = audioGraphInput.getOutputAudioFormat();
mixer.configure(mixerAudioFormat, /* bufferSizeMs= */ C.LENGTH_UNSET, /* startTimeUs= */ 0);
audioProcessingPipeline.configure(mixerAudioFormat);
audioProcessingPipeline.flush();
}
sourceId = mixer.addSource(audioGraphInput.getOutputAudioFormat(), /* startTimeUs= */ 0);
} catch (UnhandledAudioFormatException e) {
throw ExportException.createForAudioProcessing(
e, "Error while registering input " + inputInfos.size());
}
inputInfos.add(new InputInfo(audioGraphInput, sourceId));
inputInfos.add(new InputInfo(audioGraphInput));
return audioGraphInput;
}
@ -124,14 +118,9 @@ import java.util.Objects;
* unless the graph was {@linkplain #flush() flushed}.
*/
public ByteBuffer getOutput() throws ExportException {
if (mixerSourcesToAdd > 0) {
// TODO(b/303029174): Consider adding the mixer sources at the start of playback and after a
// seek at the same place in the code.
addMixerSources();
if (mixerSourcesToAdd > 0) {
if (!ensureMixerReady()) {
return EMPTY_BUFFER;
}
}
if (!mixer.isEnded()) {
feedMixer();
}
@ -177,13 +166,8 @@ import java.util.Objects;
inputInfo.audioGraphInput.flush();
}
mixer.reset();
try {
mixer.configure(mixerAudioFormat, /* bufferSizeMs= */ C.LENGTH_UNSET, pendingStartTimeUs);
} catch (UnhandledAudioFormatException e) {
// Should never happen because mixer has already been configured with the same format.
Log.e(TAG, "Unexpected mixer configuration error");
}
mixerSourcesToAdd = inputInfos.size();
isMixerConfigured = false;
isMixerReady = false;
mixerOutput = EMPTY_BUFFER;
audioProcessingPipeline.flush();
finishedInputs = 0;
@ -215,19 +199,19 @@ import java.util.Objects;
return isMixerEnded();
}
private boolean isMixerEnded() {
return !mixerOutput.hasRemaining() && finishedInputs >= inputInfos.size() && mixer.isEnded();
private boolean ensureMixerReady() throws ExportException {
if (isMixerReady) {
return true;
}
private void feedProcessingPipelineFromMixer() {
if (isMixerEnded()) {
audioProcessingPipeline.queueEndOfStream();
return;
if (!isMixerConfigured) {
try {
mixer.configure(mixerAudioFormat, /* bufferSizeMs= */ C.LENGTH_UNSET, pendingStartTimeUs);
} catch (UnhandledAudioFormatException e) {
throw ExportException.createForAudioProcessing(e, "Error while configuring mixer");
}
audioProcessingPipeline.queueInput(mixerOutput);
isMixerConfigured = true;
}
private void addMixerSources() throws ExportException {
isMixerReady = true;
for (int i = 0; i < inputInfos.size(); i++) {
InputInfo inputInfo = inputInfos.get(i);
if (inputInfo.mixerSourceId != C.INDEX_UNSET) {
@ -239,9 +223,9 @@ import java.util.Objects;
audioGraphInput.getOutput();
long sourceStartTimeUs = audioGraphInput.getStartTimeUs();
if (sourceStartTimeUs == C.TIME_UNSET) {
isMixerReady = false;
continue;
} else if (sourceStartTimeUs == C.TIME_END_OF_SOURCE) {
mixerSourcesToAdd--;
continue;
}
inputInfo.mixerSourceId =
@ -250,8 +234,8 @@ import java.util.Objects;
throw ExportException.createForAudioProcessing(
e, "Unhandled format while adding source " + inputInfo.mixerSourceId);
}
mixerSourcesToAdd--;
}
return isMixerReady;
}
private void feedMixer() throws ExportException {
@ -282,13 +266,25 @@ import java.util.Objects;
}
}
private void feedProcessingPipelineFromMixer() {
if (isMixerEnded()) {
audioProcessingPipeline.queueEndOfStream();
return;
}
audioProcessingPipeline.queueInput(mixerOutput);
}
private boolean isMixerEnded() {
return !mixerOutput.hasRemaining() && finishedInputs >= inputInfos.size() && mixer.isEnded();
}
private static final class InputInfo {
public final AudioGraphInput audioGraphInput;
public int mixerSourceId;
public InputInfo(AudioGraphInput audioGraphInput, int mixerSourceId) {
public InputInfo(AudioGraphInput audioGraphInput) {
this.audioGraphInput = audioGraphInput;
this.mixerSourceId = mixerSourceId;
mixerSourceId = C.INDEX_UNSET;
}
}
}

View File

@ -43,6 +43,7 @@ import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
/**
@ -64,13 +65,13 @@ import java.util.concurrent.atomic.AtomicReference;
private final Queue<DecoderInputBuffer> availableInputBuffers;
private final Queue<DecoderInputBuffer> pendingInputBuffers;
private final AtomicReference<@NullableType MediaItemChange> pendingMediaItemChange;
private final AtomicLong startTimeUs;
@Nullable private DecoderInputBuffer currentInputBufferBeingOutput;
private AudioProcessingPipeline audioProcessingPipeline;
private boolean processedFirstMediaItemChange;
private boolean receivedEndOfStreamFromInput;
private boolean queueEndOfStreamAfterSilence;
private long startTimeUs;
private boolean inputBlocked;
/**
@ -104,7 +105,7 @@ import java.util.concurrent.atomic.AtomicReference;
// APP configuration not active until flush called. getOutputAudioFormat based on active config.
audioProcessingPipeline.flush();
outputAudioFormat = audioProcessingPipeline.getOutputAudioFormat();
startTimeUs = C.TIME_UNSET;
startTimeUs = new AtomicLong(C.TIME_UNSET);
}
/** Returns the {@link AudioFormat} of {@linkplain #getOutput() output buffers}. */
@ -186,19 +187,14 @@ import java.util.concurrent.atomic.AtomicReference;
checkState(pendingMediaItemChange.get() == null);
DecoderInputBuffer inputBuffer = availableInputBuffers.remove();
pendingInputBuffers.add(inputBuffer);
if (startTimeUs == C.TIME_UNSET) {
startTimeUs = inputBuffer.timeUs;
}
startTimeUs.compareAndSet(
/* expectedValue= */ C.TIME_UNSET, /* newValue= */ inputBuffer.timeUs);
return true;
}
/**
* Returns the stream start time in microseconds, or {@link C#TIME_UNSET} if unknown.
*
* <p>Should only be called if the input thread and processing thread are the same.
*/
/** Returns the stream start time in microseconds, or {@link C#TIME_UNSET} if unknown. */
public long getStartTimeUs() {
return startTimeUs;
return startTimeUs.get();
}
/**
@ -247,7 +243,7 @@ import java.util.concurrent.atomic.AtomicReference;
audioProcessingPipeline.flush();
receivedEndOfStreamFromInput = false;
queueEndOfStreamAfterSilence = false;
startTimeUs = C.TIME_UNSET;
startTimeUs.set(C.TIME_UNSET);
}
/**
@ -407,6 +403,7 @@ import java.util.concurrent.atomic.AtomicReference;
pendingAudioFormat = new AudioFormat(pendingChange.format);
} else { // Generating silence
pendingAudioFormat = silentAudioGenerator.audioFormat;
startTimeUs.compareAndSet(/* expectedValue= */ C.TIME_UNSET, /* newValue= */ 0);
silentAudioGenerator.addSilence(pendingChange.durationUs);
if (pendingChange.isLast) {
queueEndOfStreamAfterSilence = true;

View File

@ -18,10 +18,14 @@ package androidx.media3.transformer;
import static androidx.media3.common.util.Assertions.checkState;
import static androidx.media3.common.util.Util.getPcmFormat;
import static com.google.common.truth.Truth.assertThat;
import static org.junit.Assert.assertThrows;
import androidx.media3.common.C;
import androidx.media3.common.Format;
import androidx.media3.common.MediaItem;
import androidx.media3.common.audio.AudioProcessor;
import androidx.media3.common.audio.AudioProcessor.AudioFormat;
import androidx.media3.common.audio.BaseAudioProcessor;
import androidx.media3.common.audio.SonicAudioProcessor;
import androidx.media3.decoder.DecoderInputBuffer;
import androidx.media3.test.utils.TestUtil;
@ -89,6 +93,36 @@ public class AudioGraphTest {
assertThat(bytesOutput).isEqualTo(3 * 100_000 * 2 * 6);
}
@Test
public void audioGraphInputOutputtingInvalidFormat_audioGraphThrows() throws Exception {
AudioGraph audioGraph =
new AudioGraph(new DefaultAudioMixer.Factory(), /* effects= */ ImmutableList.of());
AudioProcessor audioProcessor =
new BaseAudioProcessor() {
@Override
public void queueInput(ByteBuffer inputBuffer) {}
@Override
protected AudioFormat onConfigure(AudioFormat inputAudioFormat) {
return new AudioFormat(
/* sampleRate= */ 44_100,
/* channelCount= */ Format.NO_VALUE,
C.ENCODING_PCM_16BIT);
}
};
EditedMediaItem editedMediaItem =
new EditedMediaItem.Builder(MediaItem.EMPTY)
.setEffects(
new Effects(
/* audioProcessors= */ ImmutableList.of(audioProcessor),
/* videoEffects= */ ImmutableList.of()))
.build();
audioGraph.registerInput(editedMediaItem, getPcmFormat(SURROUND_50000));
assertThrows(ExportException.class, audioGraph::getOutput);
}
@Test
public void getOutputAudioFormat_afterInitialization_isNotSet() {
AudioGraph audioGraph =
@ -170,6 +204,18 @@ public class AudioGraphTest {
assertThat(audioGraph.getOutputAudioFormat().sampleRate).isEqualTo(48_000);
}
@Test
public void registerInput_withUnsupportedFormat_throws() {
AudioGraph audioGraph =
new AudioGraph(new DefaultAudioMixer.Factory(), /* effects= */ ImmutableList.of());
AudioFormat audioFormat =
new AudioFormat(/* sampleRate= */ 44_100, /* channelCount= */ 1, C.ENCODING_PCM_8BIT);
assertThrows(
IllegalArgumentException.class,
() -> audioGraph.registerInput(FAKE_ITEM, getPcmFormat(audioFormat)));
}
@Test
public void createAudioGraphWithEffect_changesOutputFormat() throws Exception {
SonicAudioProcessor sonicAudioProcessor = new SonicAudioProcessor();

View File

@ -54,7 +54,7 @@ import org.robolectric.ParameterizedRobolectricTestRunner.Parameters;
*
* <ul>
* <li>Video can not be transcoded, due to OpenGL not being supported with Robolectric.
* <li>Non RAW audio can not be trancoded, because AudioGraph requires decoded data but
* <li>Non RAW audio can not be transcoded, because AudioGraph requires decoded data but
* Robolectric decoders do not decode.
* <li>RAW audio will always be transcoded, because the muxer does not support RAW audio as input.
* </ul>