Trigger silence generation when end of stream is encountered

This change avoids a muxer deadlock when: 1. Sequence of items 2. First item has audio track that is shorter than video 3. Audio finishes, and muxer refuses to write more than 500ms of video consecutively. SequenceAssetLoader fails to progress to the second item. A muxer deadlock is possible when the audio of the first item finishes, audio end-of-stream is not propagated through AudioGraph, and muxer blocks video, preventing SequenceAssetLoader to move to the next item in sequence. By triggering silence generation early as soon as audio EOS is encountered, we ensure SequenceAssetLoader can progress to the next item. PiperOrigin-RevId: 636179966
2024-05-22 08:41:40 -07:00 · 2024-05-22 08:41:40 -07:00 · a74076f691
commit a74076f691
parent 5b3066f380
3 changed files with 117 additions and 7 deletions
--- a/libraries/transformer/src/main/java/androidx/media3/transformer/AudioGraphInput.java
+++ b/libraries/transformer/src/main/java/androidx/media3/transformer/AudioGraphInput.java
@ -282,6 +282,14 @@ import java.util.concurrent.atomic.AtomicReference;
    if (pendingMediaItemChange.get() != null) {
      return false;
    }
+    if (currentItemExpectedInputDurationUs != C.TIME_UNSET) {
+      // When exporting a sequence of items, we rely on currentItemExpectedInputDurationUs and
+      // receivedEndOfStreamFromInput to determine silence padding.
+      // Use isCurrentItemLast to correctly propagate end of stream once for the entire sequence.
+      return isCurrentItemLast && (receivedEndOfStreamFromInput || queueEndOfStreamAfterSilence);
+    }
+    // For a looping sequence, currentItemExpectedInputDurationUs is unset, and
+    // there isn't a last item -- end of stream is passed through directly.
    return receivedEndOfStreamFromInput || queueEndOfStreamAfterSilence;
  }

--- a/libraries/transformer/src/main/java/androidx/media3/transformer/SequenceAssetLoader.java
+++ b/libraries/transformer/src/main/java/androidx/media3/transformer/SequenceAssetLoader.java
@ -277,7 +277,7 @@ import java.util.concurrent.atomic.AtomicInteger;
      if (wrappedSampleConsumer == null) {
        return null;
      }
-      sampleConsumer = new SampleConsumerWrapper(wrappedSampleConsumer);
+      sampleConsumer = new SampleConsumerWrapper(wrappedSampleConsumer, trackType);
      sampleConsumersByTrackType.put(trackType, sampleConsumer);

      if (forceAudioTrack && reportedTrackCount.get() == 1 && trackType == C.TRACK_TYPE_VIDEO) {
@ -290,7 +290,7 @@ import java.util.concurrent.atomic.AtomicInteger;
                        .setPcmEncoding(C.ENCODING_PCM_16BIT)
                        .build()));
        sampleConsumersByTrackType.put(
-            C.TRACK_TYPE_AUDIO, new SampleConsumerWrapper(wrappedAudioSampleConsumer));
+            C.TRACK_TYPE_AUDIO, new SampleConsumerWrapper(wrappedAudioSampleConsumer, trackType));
      }
    } else {
      // TODO(b/270533049): Remove the check below when implementing blank video frames generation.
@ -391,13 +391,15 @@ import java.util.concurrent.atomic.AtomicInteger;
  private final class SampleConsumerWrapper implements SampleConsumer {

    private final SampleConsumer sampleConsumer;
+    private final @C.TrackType int trackType;

    private long totalDurationUs;
    private boolean audioLoopingEnded;
    private boolean videoLoopingEnded;

-    public SampleConsumerWrapper(SampleConsumer sampleConsumer) {
+    public SampleConsumerWrapper(SampleConsumer sampleConsumer, @C.TrackType int trackType) {
      this.sampleConsumer = sampleConsumer;
+      this.trackType = trackType;
    }

    @Nullable
@ -426,8 +428,15 @@ import java.util.concurrent.atomic.AtomicInteger;
      if (inputBuffer.isEndOfStream()) {
        nonEndedTrackCount.decrementAndGet();
        if (currentMediaItemIndex < editedMediaItems.size() - 1 || isLooping) {
-          inputBuffer.clear();
-          inputBuffer.timeUs = 0;
+          if (trackType == C.TRACK_TYPE_AUDIO && !isLooping && decodeAudio) {
+            // Trigger silence generation (if needed) for a decoded audio track when end of stream
+            // is first encountered. This helps us avoid a muxer deadlock when audio track is
+            // shorter than video track. Not applicable for looping sequences.
+            checkState(sampleConsumer.queueInputBuffer());
+          } else {
+            inputBuffer.clear();
+            inputBuffer.timeUs = 0;
+          }
          if (nonEndedTrackCount.get() == 0) {
            switchAssetLoader();
          }
--- a/libraries/transformer/src/test/java/androidx/media3/transformer/AudioGraphInputTest.java
+++ b/libraries/transformer/src/test/java/androidx/media3/transformer/AudioGraphInputTest.java
@ -170,7 +170,100 @@ public class AudioGraphInputTest {
  }

  @Test
-  public void isEnded_withEndOfStreamQueued_returnsTrue() throws Exception {
+  public void isEnded_withEndOfStreamQueuedAndItemIsNotLastAndDurationIsSet_returnsFalse()
+      throws Exception {
+    AudioGraphInput audioGraphInput =
+        new AudioGraphInput(
+            /* requestedOutputAudioFormat= */ AudioFormat.NOT_SET,
+            /* editedMediaItem= */ FAKE_ITEM,
+            /* inputFormat= */ getPcmFormat(MONO_44100));
+
+    audioGraphInput.onMediaItemChanged(
+        /* editedMediaItem= */ FAKE_ITEM,
+        /* durationUs= */ 0,
+        /* decodedFormat= */ getPcmFormat(MONO_44100),
+        /* isLast= */ false);
+
+    checkState(!audioGraphInput.getOutput().hasRemaining());
+    assertThat(audioGraphInput.isEnded()).isFalse();
+
+    // Queue EOS.
+    audioGraphInput.getInputBuffer().setFlags(C.BUFFER_FLAG_END_OF_STREAM);
+    checkState(audioGraphInput.queueInputBuffer());
+
+    assertThat(audioGraphInput.getOutput().hasRemaining()).isFalse();
+    assertThat(audioGraphInput.getOutput().hasRemaining()).isFalse();
+    assertThat(audioGraphInput.getOutput().hasRemaining()).isFalse();
+    assertThat(audioGraphInput.isEnded()).isFalse();
+  }
+
+  @Test
+  public void isEnded_withEndOfStreamQueuedAndItemIsLastAndDurationIsSet_returnsTrue()
+      throws Exception {
+    AudioGraphInput audioGraphInput =
+        new AudioGraphInput(
+            /* requestedOutputAudioFormat= */ AudioFormat.NOT_SET,
+            /* editedMediaItem= */ FAKE_ITEM,
+            /* inputFormat= */ getPcmFormat(MONO_44100));
+
+    audioGraphInput.onMediaItemChanged(
+        /* editedMediaItem= */ FAKE_ITEM,
+        /* durationUs= */ 0,
+        /* decodedFormat= */ getPcmFormat(MONO_44100),
+        /* isLast= */ true);
+
+    checkState(!audioGraphInput.getOutput().hasRemaining());
+    assertThat(audioGraphInput.isEnded()).isFalse();
+
+    // Queue EOS.
+    audioGraphInput.getInputBuffer().setFlags(C.BUFFER_FLAG_END_OF_STREAM);
+    checkState(audioGraphInput.queueInputBuffer());
+
+    assertThat(audioGraphInput.getOutput().hasRemaining()).isFalse();
+    assertThat(audioGraphInput.getOutput().hasRemaining()).isFalse();
+    assertThat(audioGraphInput.getOutput().hasRemaining()).isFalse();
+    assertThat(audioGraphInput.isEnded()).isTrue();
+  }
+
+  @Test
+  public void isEnded_withEndOfStreamQueuedAndItemIsNotLastAndDurationIsPositive_returnsFalse()
+      throws Exception {
+    AudioGraphInput audioGraphInput =
+        new AudioGraphInput(
+            /* requestedOutputAudioFormat= */ AudioFormat.NOT_SET,
+            /* editedMediaItem= */ FAKE_ITEM,
+            /* inputFormat= */ getPcmFormat(MONO_44100));
+
+    audioGraphInput.onMediaItemChanged(
+        /* editedMediaItem= */ FAKE_ITEM,
+        /* durationUs= */ 500_000,
+        /* decodedFormat= */ getPcmFormat(MONO_44100),
+        /* isLast= */ false);
+
+    checkState(!audioGraphInput.getOutput().hasRemaining());
+    assertThat(audioGraphInput.isEnded()).isFalse();
+
+    // Queue EOS.
+    audioGraphInput.getInputBuffer().setFlags(C.BUFFER_FLAG_END_OF_STREAM);
+    checkState(audioGraphInput.queueInputBuffer());
+
+    // First call to getOutput() triggers silence generation.
+    checkState(!audioGraphInput.getOutput().hasRemaining());
+    int totalBytesOutput = 0;
+    ByteBuffer output;
+    while ((output = audioGraphInput.getOutput()).hasRemaining()) {
+      totalBytesOutput += output.remaining();
+      output.position(output.limit());
+    }
+    assertThat(audioGraphInput.getOutput().hasRemaining()).isFalse();
+    long expectedSampleCount =
+        Util.durationUsToSampleCount(/* durationUs= */ 500_000, MONO_44100.sampleRate);
+    assertThat(totalBytesOutput).isEqualTo(expectedSampleCount * MONO_44100.bytesPerFrame);
+    assertThat(audioGraphInput.isEnded()).isFalse();
+  }
+
+  @Test
+  public void isEnded_withEndOfStreamQueued_whenDurationIsUnset_returnsTrue() throws Exception {
    AudioGraphInput audioGraphInput =
        new AudioGraphInput(
            /* requestedOutputAudioFormat= */ AudioFormat.NOT_SET,
@ -206,7 +299,7 @@ public class AudioGraphInputTest {
        /* editedMediaItem= */ FAKE_ITEM,
        /* durationUs= */ C.TIME_UNSET,
        /* decodedFormat= */ getPcmFormat(MONO_44100),
-        /* isLast= */ false);
+        /* isLast= */ true);

    // Force the media item change to be processed.
    checkState(!audioGraphInput.getOutput().hasRemaining());