Retain stream time offsets through codecs

ExoPlayer applies a large time offset to buffers so that, if the input has negative timestamps, generally buffers seen by the decoders should have positive timestamps. Modify how the offset is handled in `Transformer` so that decoders and encoders generally see positive timestamps, by leaving the offset on samples when reading them in the base renderer (remove the code that removed the offset), and then removing the offset when muxing. Also update the frame processor chain and slow motion flattening code to retain the existing behavior after this change (these both need original media presentation timestamps) Tested via existing end-to-end tests and manually verified that the overlay frame processor shows the expected original media timestamps. Aside: we don't need the same logic as ExoPlayer to track stream offsets across the decoder yet, because we don't try to handle stream changes during playback in single asset editing. (There is an edge case of multi-period DASH that may not work but I doubt anyone will use that as input to `Transformer` before we change the code to handle multi-asset properly.) In future we should try to refactor interaction with the decoder to use the same code for Transformer and ExoPlayer. PiperOrigin-RevId: 451846055
2022-05-30 11:04:24 +00:00 · 2022-05-30 11:04:24 +00:00 · b25d00a795
commit b25d00a795
parent cad1440e66
10 changed files with 69 additions and 32 deletions
--- a/libraries/transformer/src/androidTest/java/androidx/media3/transformer/FrameProcessorChainPixelTest.java
+++ b/libraries/transformer/src/androidTest/java/androidx/media3/transformer/FrameProcessorChainPixelTest.java
@ -309,6 +309,7 @@ public final class FrameProcessorChainPixelTest {
              pixelWidthHeightRatio,
              inputWidth,
              inputHeight,
+              /* streamOffsetUs= */ 0L,
              effects,
              /* enableExperimentalHdrEditing= */ false);
      Size outputSize = frameProcessorChain.getOutputSize();
--- a/libraries/transformer/src/androidTest/java/androidx/media3/transformer/FrameProcessorChainTest.java
+++ b/libraries/transformer/src/androidTest/java/androidx/media3/transformer/FrameProcessorChainTest.java
@ -130,6 +130,7 @@ public final class FrameProcessorChainTest {
        pixelWidthHeightRatio,
        inputSize.getWidth(),
        inputSize.getHeight(),
+        /* streamOffsetUs= */ 0L,
        effects.build(),
        /* enableExperimentalHdrEditing= */ false);
  }
--- a/libraries/transformer/src/main/java/androidx/media3/transformer/AudioTranscodingSamplePipeline.java
+++ b/libraries/transformer/src/main/java/androidx/media3/transformer/AudioTranscodingSamplePipeline.java
@ -56,6 +56,7 @@ import org.checkerframework.dataflow.qual.Pure;

  public AudioTranscodingSamplePipeline(
      Format inputFormat,
+      long streamOffsetUs,
      TransformationRequest transformationRequest,
      Codec.DecoderFactory decoderFactory,
      Codec.EncoderFactory encoderFactory,
@ -108,6 +109,9 @@ import org.checkerframework.dataflow.qual.Pure;
    fallbackListener.onTransformationRequestFinalized(
        createFallbackTransformationRequest(
            transformationRequest, requestedOutputFormat, encoder.getConfigurationFormat()));
+
+    // Use the same stream offset as the input stream for encoder input buffers.
+    nextEncoderInputBufferTimeUs = streamOffsetUs;
  }

  @Override
--- a/libraries/transformer/src/main/java/androidx/media3/transformer/FrameProcessorChain.java
+++ b/libraries/transformer/src/main/java/androidx/media3/transformer/FrameProcessorChain.java
@ -101,6 +101,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
      float pixelWidthHeightRatio,
      int inputWidth,
      int inputHeight,
+      long streamOffsetUs,
      List<GlEffect> effects,
      boolean enableExperimentalHdrEditing)
      throws FrameProcessingException {
@ -119,6 +120,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
                      pixelWidthHeightRatio,
                      inputWidth,
                      inputHeight,
+                      streamOffsetUs,
                      effects,
                      enableExperimentalHdrEditing,
                      singleThreadExecutorService))
@ -145,6 +147,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
      float pixelWidthHeightRatio,
      int inputWidth,
      int inputHeight,
+      long streamOffsetUs,
      List<GlEffect> effects,
      boolean enableExperimentalHdrEditing,
      ExecutorService singleThreadExecutorService)
@ -190,6 +193,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
        eglContext,
        singleThreadExecutorService,
        inputExternalTexId,
+        streamOffsetUs,
        framebuffers,
        textureProcessors,
        listener,
@ -252,6 +256,11 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
  private final EGLContext eglContext;
  /** Some OpenGL commands may block, so all OpenGL commands are run on a background thread. */
  private final ExecutorService singleThreadExecutorService;
+  /**
+   * Offset compared to original media presentation time that has been added to incoming frame
+   * timestamps, in microseconds.
+   */
+  private final long streamOffsetUs;
  /** Futures corresponding to the executor service's pending tasks. */
  private final ConcurrentLinkedQueue<Future<?>> futures;
  /** Number of frames {@linkplain #registerInputFrame() registered} but not fully processed. */
@ -308,6 +317,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
      EGLContext eglContext,
      ExecutorService singleThreadExecutorService,
      int inputExternalTexId,
+      long streamOffsetUs,
      int[] framebuffers,
      ImmutableList<SingleFrameGlTextureProcessor> textureProcessors,
      Listener listener,
@ -317,6 +327,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
    this.eglDisplay = eglDisplay;
    this.eglContext = eglContext;
    this.singleThreadExecutorService = singleThreadExecutorService;
+    this.streamOffsetUs = streamOffsetUs;
    this.framebuffers = framebuffers;
    this.textureProcessors = textureProcessors;
    this.listener = listener;
@ -476,8 +487,9 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
      }

      inputSurfaceTexture.updateTexImage();
-      long presentationTimeNs = inputSurfaceTexture.getTimestamp();
-      presentationTimeUs = presentationTimeNs / 1000;
+      long inputFrameTimeNs = inputSurfaceTexture.getTimestamp();
+      // Correct for the stream offset so processors see original media presentation timestamps.
+      presentationTimeUs = inputFrameTimeNs / 1000 - streamOffsetUs;
      inputSurfaceTexture.getTransformMatrix(textureTransformMatrix);
      ((ExternalTextureProcessor) textureProcessors.get(0))
          .setTextureTransformMatrix(textureTransformMatrix);
@ -502,7 +514,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
      clearOutputFrame();
      getLast(textureProcessors).drawFrame(presentationTimeUs);

-      EGLExt.eglPresentationTimeANDROID(eglDisplay, outputEglSurface, presentationTimeNs);
+      EGLExt.eglPresentationTimeANDROID(eglDisplay, outputEglSurface, inputFrameTimeNs);
      EGL14.eglSwapBuffers(eglDisplay, outputEglSurface);

      if (debugSurfaceViewWrapper != null) {
--- a/libraries/transformer/src/main/java/androidx/media3/transformer/PassthroughSamplePipeline.java
+++ b/libraries/transformer/src/main/java/androidx/media3/transformer/PassthroughSamplePipeline.java
@ -25,17 +25,14 @@ import androidx.media3.decoder.DecoderInputBuffer;

  private final DecoderInputBuffer buffer;
  private final Format format;
-  private final long outputPresentationTimeOffsetUs;

  private boolean hasPendingBuffer;

  public PassthroughSamplePipeline(
      Format format,
-      long outputPresentationTimeOffsetUs,
      TransformationRequest transformationRequest,
      FallbackListener fallbackListener) {
    this.format = format;
-    this.outputPresentationTimeOffsetUs = outputPresentationTimeOffsetUs;
    buffer = new DecoderInputBuffer(DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DIRECT);
    hasPendingBuffer = false;
    fallbackListener.onTransformationRequestFinalized(transformationRequest);
@ -49,7 +46,6 @@ import androidx.media3.decoder.DecoderInputBuffer;

  @Override
  public void queueInputBuffer() {
-    buffer.timeUs -= outputPresentationTimeOffsetUs;
    hasPendingBuffer = true;
  }

--- a/libraries/transformer/src/main/java/androidx/media3/transformer/SefSlowMotionFlattener.java
+++ b/libraries/transformer/src/main/java/androidx/media3/transformer/SefSlowMotionFlattener.java
@ -28,7 +28,6 @@ import androidx.media3.common.Format;
 import androidx.media3.common.Metadata;
 import androidx.media3.common.MimeTypes;
 import androidx.media3.common.util.Util;
-import androidx.media3.decoder.DecoderInputBuffer;
 import androidx.media3.extractor.metadata.mp4.SlowMotionData;
 import androidx.media3.extractor.metadata.mp4.SmtaMetadataEntry;
 import com.google.common.collect.ImmutableList;
@ -106,9 +105,15 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
   * segments into account, in microseconds.
   */
  private long frameTimeDeltaUs;
+  /**
+   * The presentation time for the last {@linkplain #dropOrTransformSample(ByteBuffer, long)
+   * processed sample}.
+   */
+  private long lastSamplePresentationTimeUs;

  public SefSlowMotionFlattener(Format format) {
    scratch = new byte[NAL_START_CODE_LENGTH];
+    lastSamplePresentationTimeUs = C.TIME_UNSET;
    MetadataInfo metadataInfo = getMetadataInfo(format.metadata);
    slowMotionData = metadataInfo.slowMotionData;
    List<SlowMotionData.Segment> segments =
@ -132,36 +137,47 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
   * Applies slow motion flattening by either indicating that the buffer's data should be dropped or
   * transforming it in place.
   *
+   * <p>After calling this method, call {@link #getSamplePresentationTimeUs()} to get the new
+   * presentation time for the sample (whether it was dropped or not).
+   *
   * @return Whether the buffer should be dropped.
   */
-  @RequiresNonNull("#1.data")
-  public boolean dropOrTransformSample(DecoderInputBuffer buffer) {
+  public boolean dropOrTransformSample(ByteBuffer buffer, long bufferTimeUs) {
    if (slowMotionData == null) {
      // The input is not an SEF slow motion video.
+      lastSamplePresentationTimeUs = bufferTimeUs;
      return false;
    }

-    ByteBuffer data = buffer.data;
-    int originalPosition = data.position();
-    data.position(originalPosition + NAL_START_CODE_LENGTH);
-    data.get(scratch, 0, 4); // Read nal_unit_header_svc_extension.
+    int originalPosition = buffer.position();
+    buffer.position(originalPosition + NAL_START_CODE_LENGTH);
+    buffer.get(scratch, 0, 4); // Read nal_unit_header_svc_extension.
    int nalUnitType = scratch[0] & 0x1F;
    boolean svcExtensionFlag = ((scratch[1] & 0xFF) >> 7) == 1;
    checkState(
        nalUnitType == NAL_UNIT_TYPE_PREFIX && svcExtensionFlag,
        "Missing SVC extension prefix NAL unit.");
    int layer = (scratch[3] & 0xFF) >> 5;
-    boolean shouldKeepFrame = processCurrentFrame(layer, buffer.timeUs);
+    boolean shouldKeepFrame = processCurrentFrame(layer, bufferTimeUs);
    // Update the timestamp regardless of whether the buffer is dropped as the timestamp may be
    // reused for the empty end-of-stream buffer.
-    buffer.timeUs = getCurrentFrameOutputTimeUs(/* inputTimeUs= */ buffer.timeUs);
+    lastSamplePresentationTimeUs = getCurrentFrameOutputTimeUs(bufferTimeUs);
    if (shouldKeepFrame) {
-      data.position(originalPosition);
+      buffer.position(originalPosition);
      return false;
    }
    return true;
  }

+  /**
+   * Returns the new presentation time for the last sample handled via {@link
+   * #dropOrTransformSample(ByteBuffer, long)}.
+   */
+  public long getSamplePresentationTimeUs() {
+    checkState(lastSamplePresentationTimeUs != C.TIME_UNSET);
+    return lastSamplePresentationTimeUs;
+  }
+
  /**
   * Processes the current frame and returns whether it should be kept.
   *
--- a/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerAudioRenderer.java
+++ b/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerAudioRenderer.java
@ -70,12 +70,12 @@ import androidx.media3.extractor.metadata.mp4.SlowMotionData;
    Format inputFormat = checkNotNull(formatHolder.format);
    if (shouldPassthrough(inputFormat)) {
      samplePipeline =
-          new PassthroughSamplePipeline(
-              inputFormat, startPositionOffsetUs, transformationRequest, fallbackListener);
+          new PassthroughSamplePipeline(inputFormat, transformationRequest, fallbackListener);
    } else {
      samplePipeline =
          new AudioTranscodingSamplePipeline(
              inputFormat,
+              streamOffsetUs,
              transformationRequest,
              decoderFactory,
              encoderFactory,
--- a/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerBaseRenderer.java
+++ b/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerBaseRenderer.java
@ -45,7 +45,7 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
  protected boolean muxerWrapperTrackAdded;
  protected boolean muxerWrapperTrackEnded;
  protected long streamOffsetUs;
-  protected long startPositionOffsetUs;
+  protected long streamStartPositionUs;
  protected @MonotonicNonNull SamplePipeline samplePipeline;

  public TransformerBaseRenderer(
@ -110,7 +110,7 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
  @Override
  protected final void onStreamChanged(Format[] formats, long startPositionUs, long offsetUs) {
    this.streamOffsetUs = offsetUs;
-    this.startPositionOffsetUs = startPositionUs - offsetUs;
+    this.streamStartPositionUs = startPositionUs;
  }

  @Override
@ -178,11 +178,14 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
      return false;
    }

+    long samplePresentationTimeUs = samplePipelineOutputBuffer.timeUs - streamStartPositionUs;
+    // TODO(b/204892224): Consider subtracting the first sample timestamp from the sample pipeline
+    //  buffer from all samples so that they are guaranteed to start from zero in the output file.
    if (!muxerWrapper.writeSample(
        getTrackType(),
        checkStateNotNull(samplePipelineOutputBuffer.data),
        samplePipelineOutputBuffer.isKeyFrame(),
-        samplePipelineOutputBuffer.timeUs)) {
+        samplePresentationTimeUs)) {
      return false;
    }
    samplePipeline.releaseOutputBuffer();
@ -212,7 +215,6 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
          return false;
        }
        mediaClock.updateTimeForTrackType(getTrackType(), samplePipelineInputBuffer.timeUs);
-        samplePipelineInputBuffer.timeUs -= streamOffsetUs;
        checkStateNotNull(samplePipelineInputBuffer.data);
        maybeQueueSampleToPipeline(samplePipelineInputBuffer);
        return true;
--- a/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerVideoRenderer.java
+++ b/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerVideoRenderer.java
@ -89,14 +89,13 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
    Format inputFormat = checkNotNull(formatHolder.format);
    if (shouldPassthrough(inputFormat)) {
      samplePipeline =
-          new PassthroughSamplePipeline(
-              inputFormat, startPositionOffsetUs, transformationRequest, fallbackListener);
+          new PassthroughSamplePipeline(inputFormat, transformationRequest, fallbackListener);
    } else {
      samplePipeline =
          new VideoTranscodingSamplePipeline(
              context,
              inputFormat,
-              startPositionOffsetUs,
+              streamOffsetUs,
              transformationRequest,
              effects,
              decoderFactory,
@ -113,7 +112,7 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
  }

  private boolean shouldPassthrough(Format inputFormat) {
-    if (startPositionOffsetUs != 0 && !clippingStartsAtKeyFrame) {
+    if ((streamStartPositionUs - streamOffsetUs) != 0 && !clippingStartsAtKeyFrame) {
      return false;
    }
    if (encoderFactory.videoNeedsEncoding()) {
@ -166,9 +165,16 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
  @RequiresNonNull({"samplePipeline", "#1.data"})
  protected void maybeQueueSampleToPipeline(DecoderInputBuffer inputBuffer)
      throws TransformationException {
+    if (sefSlowMotionFlattener == null) {
+      samplePipeline.queueInputBuffer();
+      return;
+    }
+
    ByteBuffer data = inputBuffer.data;
+    long presentationTimeUs = inputBuffer.timeUs - streamOffsetUs;
    boolean shouldDropSample =
-        sefSlowMotionFlattener != null && sefSlowMotionFlattener.dropOrTransformSample(inputBuffer);
+        sefSlowMotionFlattener.dropOrTransformSample(data, presentationTimeUs);
+    inputBuffer.timeUs = streamOffsetUs + sefSlowMotionFlattener.getSamplePresentationTimeUs();
    if (shouldDropSample) {
      data.clear();
    } else {
--- a/libraries/transformer/src/main/java/androidx/media3/transformer/VideoTranscodingSamplePipeline.java
+++ b/libraries/transformer/src/main/java/androidx/media3/transformer/VideoTranscodingSamplePipeline.java
@ -36,7 +36,6 @@ import org.checkerframework.dataflow.qual.Pure;
 */
 /* package */ final class VideoTranscodingSamplePipeline implements SamplePipeline {
  private final int outputRotationDegrees;
-  private final long outputPresentationTimeOffsetUs;
  private final int maxPendingFrameCount;

  private final DecoderInputBuffer decoderInputBuffer;
@ -53,7 +52,7 @@ import org.checkerframework.dataflow.qual.Pure;
  public VideoTranscodingSamplePipeline(
      Context context,
      Format inputFormat,
-      long outputPresentationTimeOffsetUs,
+      long streamOffsetUs,
      TransformationRequest transformationRequest,
      ImmutableList<GlEffect> effects,
      Codec.DecoderFactory decoderFactory,
@ -63,7 +62,6 @@ import org.checkerframework.dataflow.qual.Pure;
      FrameProcessorChain.Listener frameProcessorChainListener,
      Transformer.DebugViewProvider debugViewProvider)
      throws TransformationException {
-    this.outputPresentationTimeOffsetUs = outputPresentationTimeOffsetUs;
    decoderInputBuffer =
        new DecoderInputBuffer(DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DISABLED);
    encoderOutputBuffer =
@ -102,6 +100,7 @@ import org.checkerframework.dataflow.qual.Pure;
              inputFormat.pixelWidthHeightRatio,
              /* inputWidth= */ decodedWidth,
              /* inputHeight= */ decodedHeight,
+              streamOffsetUs,
              effectsListBuilder.build(),
              transformationRequest.enableHdrEditing);
    } catch (FrameProcessingException e) {
@ -202,7 +201,7 @@ import org.checkerframework.dataflow.qual.Pure;
      return null;
    }
    MediaCodec.BufferInfo bufferInfo = checkNotNull(encoder.getOutputBufferInfo());
-    encoderOutputBuffer.timeUs = bufferInfo.presentationTimeUs - outputPresentationTimeOffsetUs;
+    encoderOutputBuffer.timeUs = bufferInfo.presentationTimeUs;
    encoderOutputBuffer.setFlags(bufferInfo.flags);
    return encoderOutputBuffer;
  }