Fix and simplify timestamps handling

Handling of the stream offset and start position was unnecessarily
complex and even incorrect. It was going to be an issue for
concatenation of video and image input.

The stream offset is the offset added before decoding/encoding to
make sure it doesn’t fail in case of negative timestamps (which do
rarely occur).
The start position is equal to the stream offset, plus the clipping
start time if the media is clipped.

Before this change:
- Samples were offset by the start position before decoding, and this
offset was removed before muxing.
- The startPosition of the first MediaItem in a sequence was used for
all the MediaItems in this sequence (which is incorrect).
- The stream offset was removed before applying the GL effects and
added back before encoding so that it was not visible to the OpenGL
processing.

After this change:
- The start position is subtracted in the AssetLoader, so that the
downstream components don’t have to deal with the stream offsets and
start positions.
- Decoded samples with negative timestamps are not passed to the
SamplePipelines. The MediaMuxer doesn’t handle negative timestamps
well. If a stream is 10 secondes long and starts at timestamp -2
seconds, the output will only contain the samples corresponding to the
first 8 (10 - 2) seconds. It won’t contain the last 2 seconds of the
stream. It seems acceptable to remove the first 2 seconds instead.

PiperOrigin-RevId: 520916464
This commit is contained in:
kimvde 2023-03-31 15:20:28 +01:00 committed by Marc Baechinger
parent bf2d311bf1
commit 3a5c4277a7
16 changed files with 94 additions and 119 deletions

View File

@ -98,18 +98,10 @@ public interface AssetLoader {
* AssetLoader} (prior to video slow motion flattening or to decoding).
* @param supportedOutputTypes The output {@linkplain SupportedOutputTypes types} supported by
* this {@link AssetLoader} for the track added. At least one output type must be supported.
* @param streamStartPositionUs The start position of the stream (offset by {@code
* streamOffsetUs}), in microseconds.
* @param streamOffsetUs The offset that will be added to the timestamps to make sure they are
* non-negative, in microseconds.
* @return Whether the {@link AssetLoader} needs to provide decoded data to the {@link
* SampleConsumer}.
*/
boolean onTrackAdded(
Format inputFormat,
@SupportedOutputTypes int supportedOutputTypes,
long streamStartPositionUs,
long streamOffsetUs);
boolean onTrackAdded(Format inputFormat, @SupportedOutputTypes int supportedOutputTypes);
/**
* Called when the {@link Format} of samples that will be output by the {@link AssetLoader} is

View File

@ -62,7 +62,6 @@ import org.checkerframework.dataflow.qual.Pure;
public AudioSamplePipeline(
Format firstAssetLoaderInputFormat,
Format firstPipelineInputFormat,
long streamOffsetUs,
TransformationRequest transformationRequest,
boolean flattenForSlowMotion,
ImmutableList<AudioProcessor> audioProcessors,
@ -70,7 +69,7 @@ import org.checkerframework.dataflow.qual.Pure;
MuxerWrapper muxerWrapper,
FallbackListener fallbackListener)
throws ExportException {
super(firstAssetLoaderInputFormat, /* streamStartPositionUs= */ streamOffsetUs, muxerWrapper);
super(firstAssetLoaderInputFormat, muxerWrapper);
silentAudioGenerator = new SilentAudioGenerator(firstPipelineInputFormat);
availableInputBuffers = new ConcurrentLinkedDeque<>();
@ -141,9 +140,6 @@ import org.checkerframework.dataflow.qual.Pure;
transformationRequest,
requestedEncoderFormat,
/* actualFormat= */ encoder.getConfigurationFormat()));
// Use the same stream offset as the input stream for encoder input buffers.
nextEncoderInputBufferTimeUs = streamOffsetUs;
}
@Override

View File

@ -216,11 +216,32 @@ public interface Codec {
* <p>This should be called after the buffer has been processed. The next output buffer will not
* be available until the current output buffer has been released.
*
* <p>Calling this method with {@code render} set to {@code true} is equivalent to calling {@link
* #releaseOutputBuffer(long)} with the presentation timestamp of the {@link
* #getOutputBufferInfo() output buffer info}.
*
* @param render Whether the buffer needs to be rendered to the output {@link Surface}.
* @throws ExportException If the underlying decoder or encoder encounters a problem.
*/
void releaseOutputBuffer(boolean render) throws ExportException;
/**
* Renders and releases the current output buffer.
*
* <p>This method must only be called on video decoders.
*
* <p>This method will first render the buffer to the output surface. The surface will then
* release the buffer back to the {@code Codec} once it is no longer used/displayed.
*
* <p>This should be called after the buffer has been processed. The next output buffer will not
* be available until the current output buffer has been released.
*
* @param renderPresentationTimeUs The presentation timestamp to associate with this buffer, in
* microseconds.
* @throws ExportException If the underlying decoder or encoder encounters a problem.
*/
void releaseOutputBuffer(long renderPresentationTimeUs) throws ExportException;
/**
* Returns whether the {@code Codec}'s output stream has ended, and no more data can be dequeued.
*/

View File

@ -253,12 +253,21 @@ public final class DefaultCodec implements Codec {
@Override
public void releaseOutputBuffer(boolean render) throws ExportException {
releaseOutputBuffer(render, checkStateNotNull(outputBufferInfo).presentationTimeUs);
}
@Override
public void releaseOutputBuffer(long renderPresentationTimeUs) throws ExportException {
releaseOutputBuffer(/* render= */ true, renderPresentationTimeUs);
}
private void releaseOutputBuffer(boolean render, long renderPresentationTimeUs)
throws ExportException {
outputBuffer = null;
try {
if (render) {
mediaCodec.releaseOutputBuffer(
outputBufferIndex,
/* renderTimestampNs= */ checkStateNotNull(outputBufferInfo).presentationTimeUs * 1000);
outputBufferIndex, /* renderTimestampNs= */ renderPresentationTimeUs * 1000);
} else {
mediaCodec.releaseOutputBuffer(outputBufferIndex, /* render= */ false);
}

View File

@ -43,11 +43,10 @@ import java.util.concurrent.atomic.AtomicLong;
public EncodedSamplePipeline(
Format format,
long streamStartPositionUs,
TransformationRequest transformationRequest,
MuxerWrapper muxerWrapper,
FallbackListener fallbackListener) {
super(format, streamStartPositionUs, muxerWrapper);
super(format, muxerWrapper);
this.format = format;
nextMediaItemOffsetUs = new AtomicLong();
availableInputBuffers = new ConcurrentLinkedDeque<>();

View File

@ -51,6 +51,22 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
decoder = decoderFactory.createForAudioDecoding(inputFormat);
}
@Override
protected boolean shouldDropInputBuffer(DecoderInputBuffer inputBuffer) {
if (inputBuffer.isEndOfStream()) {
return false;
}
inputBuffer.timeUs -= streamStartPositionUs;
// Drop samples with negative timestamp in the transcoding case, to prevent encoder failures.
if (decoder != null && inputBuffer.timeUs < 0) {
inputBuffer.clear();
return true;
}
return false;
}
/**
* Attempts to get decoded audio data and pass it to the sample consumer.
*

View File

@ -40,6 +40,7 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
/* package */ abstract class ExoAssetLoaderBaseRenderer extends BaseRenderer {
protected long streamStartPositionUs;
protected long streamOffsetUs;
protected @MonotonicNonNull SampleConsumer sampleConsumer;
protected @MonotonicNonNull Codec decoder;
@ -52,7 +53,6 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
private final DecoderInputBuffer decoderInputBuffer;
private boolean isRunning;
private long streamStartPositionUs;
private boolean shouldInitDecoder;
private boolean hasPendingConsumerInput;
@ -213,10 +213,7 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
onInputFormatRead(inputFormat);
shouldInitDecoder =
assetLoaderListener.onTrackAdded(
inputFormat,
SUPPORTED_OUTPUT_TYPE_DECODED | SUPPORTED_OUTPUT_TYPE_ENCODED,
streamStartPositionUs,
streamOffsetUs);
inputFormat, SUPPORTED_OUTPUT_TYPE_DECODED | SUPPORTED_OUTPUT_TYPE_ENCODED);
}
if (shouldInitDecoder) {

View File

@ -93,22 +93,27 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
@Override
protected boolean shouldDropInputBuffer(DecoderInputBuffer inputBuffer) {
ByteBuffer inputBytes = checkNotNull(inputBuffer.data);
if (sefVideoSlowMotionFlattener == null || inputBuffer.isEndOfStream()) {
if (inputBuffer.isEndOfStream()) {
return false;
}
long presentationTimeUs = inputBuffer.timeUs - streamOffsetUs;
boolean shouldDropInputBuffer =
sefVideoSlowMotionFlattener.dropOrTransformSample(inputBytes, presentationTimeUs);
if (shouldDropInputBuffer) {
inputBytes.clear();
} else {
ByteBuffer inputBytes = checkNotNull(inputBuffer.data);
if (sefVideoSlowMotionFlattener != null) {
long presentationTimeUs = inputBuffer.timeUs - streamOffsetUs;
boolean shouldDropInputBuffer =
sefVideoSlowMotionFlattener.dropOrTransformSample(inputBytes, presentationTimeUs);
if (shouldDropInputBuffer) {
inputBytes.clear();
return true;
}
inputBuffer.timeUs =
streamOffsetUs + sefVideoSlowMotionFlattener.getSamplePresentationTimeUs();
}
return shouldDropInputBuffer;
if (decoder == null) {
inputBuffer.timeUs -= streamStartPositionUs;
}
return false;
}
@Override
@ -132,7 +137,9 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
return false;
}
if (isDecodeOnlyBuffer(decoderOutputBufferInfo.presentationTimeUs)) {
long presentationTimeUs = decoderOutputBufferInfo.presentationTimeUs - streamStartPositionUs;
// Drop samples with negative timestamp in the transcoding case, to prevent encoder failures.
if (presentationTimeUs < 0 || isDecodeOnlyBuffer(decoderOutputBufferInfo.presentationTimeUs)) {
decoder.releaseOutputBuffer(/* render= */ false);
return true;
}
@ -142,11 +149,11 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
return false;
}
if (!sampleConsumer.registerVideoFrame(decoderOutputBufferInfo.presentationTimeUs)) {
if (!sampleConsumer.registerVideoFrame(presentationTimeUs)) {
return false;
}
decoder.releaseOutputBuffer(/* render= */ true);
decoder.releaseOutputBuffer(presentationTimeUs);
return true;
}

View File

@ -116,11 +116,7 @@ public final class ImageAssetLoader implements AssetLoader {
.setWidth(bitmap.getWidth())
.setSampleMimeType(MIME_TYPE_IMAGE_ALL)
.build();
listener.onTrackAdded(
format,
SUPPORTED_OUTPUT_TYPE_DECODED,
/* streamStartPositionUs= */ 0,
/* streamOffsetUs= */ 0);
listener.onTrackAdded(format, SUPPORTED_OUTPUT_TYPE_DECODED);
scheduledExecutorService.submit(() -> queueBitmapInternal(bitmap, format));
} catch (RuntimeException e) {
listener.onError(ExportException.createForAssetLoader(e, ERROR_CODE_UNSPECIFIED));

View File

@ -47,16 +47,13 @@ import java.util.List;
*/
/* package */ abstract class SamplePipeline implements SampleConsumer, OnMediaItemChangedListener {
private final long streamStartPositionUs;
private final MuxerWrapper muxerWrapper;
private final @C.TrackType int outputTrackType;
@Nullable private final Metadata metadata;
private boolean muxerWrapperTrackAdded;
public SamplePipeline(
Format firstInputFormat, long streamStartPositionUs, MuxerWrapper muxerWrapper) {
this.streamStartPositionUs = streamStartPositionUs;
public SamplePipeline(Format firstInputFormat, MuxerWrapper muxerWrapper) {
this.muxerWrapper = muxerWrapper;
this.metadata = firstInputFormat.metadata;
outputTrackType = getProcessedTrackType(firstInputFormat.sampleMimeType);
@ -118,15 +115,12 @@ import java.util.List;
return false;
}
long samplePresentationTimeUs = muxerInputBuffer.timeUs - streamStartPositionUs;
// TODO(b/204892224): Consider subtracting the first sample timestamp from the sample pipeline
// buffer from all samples so that they are guaranteed to start from zero in the output file.
try {
if (!muxerWrapper.writeSample(
outputTrackType,
checkStateNotNull(muxerInputBuffer.data),
muxerInputBuffer.isKeyFrame(),
samplePresentationTimeUs)) {
muxerInputBuffer.timeUs)) {
return false;
}
} catch (Muxer.MuxerException e) {

View File

@ -83,7 +83,6 @@ import java.util.concurrent.atomic.AtomicInteger;
private int currentMediaItemIndex;
private AssetLoader currentAssetLoader;
private boolean trackCountReported;
private long currentAssetStartTimeUs;
private boolean decodeAudio;
private boolean decodeVideo;
private long totalDurationUs;
@ -207,13 +206,7 @@ import java.util.concurrent.atomic.AtomicInteger;
}
@Override
public boolean onTrackAdded(
Format inputFormat,
@SupportedOutputTypes int supportedOutputTypes,
long streamStartPositionUs,
long streamOffsetUs) {
currentAssetStartTimeUs = streamStartPositionUs;
public boolean onTrackAdded(Format inputFormat, @SupportedOutputTypes int supportedOutputTypes) {
boolean isAudio = getProcessedTrackType(inputFormat.sampleMimeType) == C.TRACK_TYPE_AUDIO;
if (!isCurrentAssetFirstAsset) {
return isAudio ? decodeAudio : decodeVideo;
@ -228,8 +221,7 @@ import java.util.concurrent.atomic.AtomicInteger;
}
boolean decodeOutput =
sequenceAssetLoaderListener.onTrackAdded(
inputFormat, supportedOutputTypes, streamStartPositionUs, streamOffsetUs);
sequenceAssetLoaderListener.onTrackAdded(inputFormat, supportedOutputTypes);
if (isAudio) {
decodeAudio = decodeOutput;
@ -239,10 +231,7 @@ import java.util.concurrent.atomic.AtomicInteger;
if (addForcedAudioTrack) {
sequenceAssetLoaderListener.onTrackAdded(
FORCE_AUDIO_TRACK_FORMAT,
SUPPORTED_OUTPUT_TYPE_DECODED,
streamStartPositionUs,
streamOffsetUs);
FORCE_AUDIO_TRACK_FORMAT, SUPPORTED_OUTPUT_TYPE_DECODED);
}
return decodeOutput;
@ -374,7 +363,7 @@ import java.util.concurrent.atomic.AtomicInteger;
@Override
public boolean queueInputBuffer() {
DecoderInputBuffer inputBuffer = checkStateNotNull(sampleConsumer.getInputBuffer());
long globalTimestampUs = totalDurationUs + inputBuffer.timeUs - currentAssetStartTimeUs;
long globalTimestampUs = totalDurationUs + inputBuffer.timeUs;
if (isLooping && globalTimestampUs >= maxSequenceDurationUs) {
if (isMaxSequenceDurationUsFinal && !audioLoopingEnded) {
checkNotNull(inputBuffer.data).limit(0);
@ -450,7 +439,7 @@ import java.util.concurrent.atomic.AtomicInteger;
@Override
public boolean registerVideoFrame(long presentationTimeUs) {
long globalTimestampUs = totalDurationUs + presentationTimeUs - currentAssetStartTimeUs;
long globalTimestampUs = totalDurationUs + presentationTimeUs;
if (isLooping && globalTimestampUs >= maxSequenceDurationUs) {
if (isMaxSequenceDurationUsFinal && !videoLoopingEnded) {
videoLoopingEnded = true;

View File

@ -481,17 +481,11 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
@Override
public boolean onTrackAdded(
Format firstAssetLoaderInputFormat,
@AssetLoader.SupportedOutputTypes int supportedOutputTypes,
long streamStartPositionUs,
long streamOffsetUs) {
@AssetLoader.SupportedOutputTypes int supportedOutputTypes) {
@C.TrackType
int trackType = getProcessedTrackType(firstAssetLoaderInputFormat.sampleMimeType);
AddedTrackInfo trackInfo =
new AddedTrackInfo(
firstAssetLoaderInputFormat,
supportedOutputTypes,
streamStartPositionUs,
streamOffsetUs);
new AddedTrackInfo(firstAssetLoaderInputFormat, supportedOutputTypes);
addedTrackInfoByTrackType.put(trackType, trackInfo);
if (trackType == C.TRACK_TYPE_AUDIO) {
@ -547,7 +541,6 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
return new AudioSamplePipeline(
addedTrackInfo.firstAssetLoaderInputFormat,
/* firstPipelineInputFormat= */ firstAssetLoaderOutputFormat,
addedTrackInfo.streamOffsetUs,
transformationRequest,
firstEditedMediaItem.flattenForSlowMotion,
firstEditedMediaItem.effects.audioProcessors,
@ -566,8 +559,6 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
return new VideoSamplePipeline(
context,
addedTrackInfo.firstAssetLoaderInputFormat,
addedTrackInfo.streamStartPositionUs,
addedTrackInfo.streamOffsetUs,
transformationRequest,
firstEditedMediaItem.effects.videoEffects,
compositionPresentation,
@ -582,11 +573,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
}
return new EncodedSamplePipeline(
firstAssetLoaderOutputFormat,
addedTrackInfo.streamStartPositionUs,
transformationRequest,
muxerWrapper,
fallbackListener);
firstAssetLoaderOutputFormat, transformationRequest, muxerWrapper, fallbackListener);
}
/**
@ -631,31 +618,17 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
private final class AddedTrackInfo {
public final Format firstAssetLoaderInputFormat;
public final long streamStartPositionUs;
public final long streamOffsetUs;
public final boolean shouldTranscode;
public AddedTrackInfo(
Format firstAssetLoaderInputFormat,
@AssetLoader.SupportedOutputTypes int supportedOutputTypes,
long streamStartPositionUs,
long streamOffsetUs) {
@AssetLoader.SupportedOutputTypes int supportedOutputTypes) {
this.firstAssetLoaderInputFormat = firstAssetLoaderInputFormat;
this.streamStartPositionUs = streamStartPositionUs;
this.streamOffsetUs = streamOffsetUs;
shouldTranscode =
shouldTranscode(
firstAssetLoaderInputFormat,
supportedOutputTypes,
streamStartPositionUs,
streamOffsetUs);
shouldTranscode = shouldTranscode(firstAssetLoaderInputFormat, supportedOutputTypes);
}
private boolean shouldTranscode(
Format inputFormat,
@AssetLoader.SupportedOutputTypes int supportedOutputTypes,
long streamStartPositionUs,
long streamOffsetUs) {
Format inputFormat, @AssetLoader.SupportedOutputTypes int supportedOutputTypes) {
boolean assetLoaderCanOutputDecoded =
(supportedOutputTypes & SUPPORTED_OUTPUT_TYPE_DECODED) != 0;
boolean assetLoaderCanOutputEncoded =
@ -670,8 +643,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
} else if (trackType == C.TRACK_TYPE_AUDIO) {
shouldTranscode = shouldTranscodeAudio(inputFormat);
} else if (trackType == C.TRACK_TYPE_VIDEO) {
shouldTranscode =
shouldTranscodeVideo(inputFormat, streamStartPositionUs, streamOffsetUs);
shouldTranscode = shouldTranscodeVideo(inputFormat);
}
checkState(!shouldTranscode || assetLoaderCanOutputDecoded);
@ -704,13 +676,12 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
return false;
}
private boolean shouldTranscodeVideo(
Format inputFormat, long streamStartPositionUs, long streamOffsetUs) {
private boolean shouldTranscodeVideo(Format inputFormat) {
if (composition.sequences.size() > 1 || editedMediaItems.size() > 1) {
return !composition.transmuxVideo;
}
EditedMediaItem firstEditedMediaItem = editedMediaItems.get(0);
if ((streamStartPositionUs - streamOffsetUs) != 0
if (firstEditedMediaItem.mediaItem.clippingConfiguration.startPositionMs > 0
&& !firstEditedMediaItem.mediaItem.clippingConfiguration.startsAtKeyFrame) {
return true;
}

View File

@ -61,7 +61,6 @@ import org.checkerframework.dataflow.qual.Pure;
/** MIME type to use for output video if the input type is not a video. */
private static final String DEFAULT_OUTPUT_MIME_TYPE = MimeTypes.VIDEO_H265;
private final long streamOffsetUs;
private final AtomicLong mediaItemOffsetUs;
private final VideoFrameProcessor videoFrameProcessor;
private final ColorInfo videoFrameProcessorInputColor;
@ -77,8 +76,6 @@ import org.checkerframework.dataflow.qual.Pure;
public VideoSamplePipeline(
Context context,
Format firstInputFormat,
long streamStartPositionUs,
long streamOffsetUs,
TransformationRequest transformationRequest,
ImmutableList<Effect> effects,
@Nullable Presentation presentation,
@ -91,8 +88,7 @@ import org.checkerframework.dataflow.qual.Pure;
DebugViewProvider debugViewProvider)
throws ExportException {
// TODO(b/262693177) Add tests for input format change.
super(firstInputFormat, streamStartPositionUs, muxerWrapper);
this.streamOffsetUs = streamOffsetUs;
super(firstInputFormat, muxerWrapper);
mediaItemOffsetUs = new AtomicLong();
finalFramePresentationTimeUs = C.TIME_UNSET;
@ -199,7 +195,6 @@ import org.checkerframework.dataflow.qual.Pure;
new FrameInfo.Builder(decodedSize.getWidth(), decodedSize.getHeight())
.setPixelWidthHeightRatio(trackFormat.pixelWidthHeightRatio)
.setOffsetToAddUs(mediaItemOffsetUs.get())
.setStreamOffsetUs(streamOffsetUs)
.build());
}
mediaItemOffsetUs.addAndGet(durationUs);

View File

@ -75,10 +75,7 @@ public class ExoPlayerAssetLoaderTest {
@Override
public boolean onTrackAdded(
Format inputFormat,
@AssetLoader.SupportedOutputTypes int supportedOutputTypes,
long streamStartPositionUs,
long streamOffsetUs) {
Format inputFormat, @AssetLoader.SupportedOutputTypes int supportedOutputTypes) {
if (!isDurationSet) {
exceptionRef.set(
new IllegalStateException("onTrackAdded() called before onDurationUs()"));

View File

@ -67,10 +67,7 @@ public class ImageAssetLoaderTest {
@Override
public boolean onTrackAdded(
Format inputFormat,
@AssetLoader.SupportedOutputTypes int supportedOutputTypes,
long streamStartPositionUs,
long streamOffsetUs) {
Format inputFormat, @AssetLoader.SupportedOutputTypes int supportedOutputTypes) {
if (!isDurationSet) {
exceptionRef.set(
new IllegalStateException("onTrackAdded() called before onDurationUs()"));

View File

@ -117,8 +117,7 @@ public final class TestUtil {
.setChannelCount(2)
.build();
try {
listener.onTrackAdded(
format, supportedOutputTypes, /* streamStartPositionUs= */ 0, /* streamOffsetUs= */ 0);
listener.onTrackAdded(format, supportedOutputTypes);
SampleConsumer sampleConsumer = listener.onOutputFormat(format);
if (sampleConsumerRef != null) {