Implement interleaving of editable video tracks

The CL adds another way of writing editable video
tracks where the samples will be interleaved with the
primary track samples in the "mdat" box.

PiperOrigin-RevId: 665313751
This commit is contained in:
sheenachhabra 2024-08-20 05:25:48 -07:00 committed by Copybara-Service
parent 643ec73e58
commit bb3948aa98
5 changed files with 345 additions and 12 deletions

View File

@ -121,14 +121,21 @@ public final class Mp4Muxer implements Muxer {
String getCacheFilePath();
}
public final CacheFileProvider cacheFileProvider;
public final boolean shouldInterleaveSamples;
@Nullable public final CacheFileProvider cacheFileProvider;
/**
* Creates an instance.
*
* @param cacheFileProvider A {@link CacheFileProvider}.
* @param shouldInterleaveSamples Whether to interleave editable video track samples with
* primary track samples.
* @param cacheFileProvider A {@link CacheFileProvider}. Required only when {@code
* shouldInterleaveSamples} is set to {@code false}, can be {@code null} otherwise.
*/
public EditableVideoParameters(CacheFileProvider cacheFileProvider) {
public EditableVideoParameters(
boolean shouldInterleaveSamples, @Nullable CacheFileProvider cacheFileProvider) {
checkArgument(shouldInterleaveSamples || cacheFileProvider != null);
this.shouldInterleaveSamples = shouldInterleaveSamples;
this.cacheFileProvider = cacheFileProvider;
}
}
@ -373,6 +380,10 @@ public final class Mp4Muxer implements Muxer {
*/
public TrackToken addTrack(int sortKey, Format format) throws MuxerException {
if (outputFileFormat == FILE_FORMAT_EDITABLE_VIDEO && isEditableVideoTrack(format)) {
if (checkNotNull(editableVideoParameters).shouldInterleaveSamples) {
// Editable video tracks are handled by the primary Mp4Writer.
return mp4Writer.addEditableVideoTrack(sortKey, format);
}
try {
ensureSetupForEditableVideoTracks();
} catch (FileNotFoundException e) {
@ -484,7 +495,8 @@ public final class Mp4Muxer implements Muxer {
@EnsuresNonNull({"editableVideoMp4Writer"})
private void ensureSetupForEditableVideoTracks() throws FileNotFoundException {
if (editableVideoMp4Writer == null) {
cacheFilePath = checkNotNull(editableVideoParameters).cacheFileProvider.getCacheFilePath();
cacheFilePath =
checkNotNull(checkNotNull(editableVideoParameters).cacheFileProvider).getCacheFilePath();
cacheFileOutputStream = new FileOutputStream(cacheFilePath);
editableVideoMetadataCollector = new MetadataCollector();
editableVideoMp4Writer =

View File

@ -20,12 +20,17 @@ import static androidx.media3.common.util.Assertions.checkState;
import static androidx.media3.muxer.AnnexBUtils.doesSampleContainAnnexBNalUnits;
import static androidx.media3.muxer.Boxes.BOX_HEADER_SIZE;
import static androidx.media3.muxer.Boxes.LARGE_SIZE_BOX_HEADER_SIZE;
import static androidx.media3.muxer.Boxes.getEdvdBoxHeader;
import static androidx.media3.muxer.MuxerUtil.getEditableTracksLengthMetadata;
import static androidx.media3.muxer.MuxerUtil.getEditableTracksOffsetMetadata;
import static androidx.media3.muxer.MuxerUtil.populateEditableVideoTracksMetadata;
import static java.lang.Math.max;
import static java.lang.Math.min;
import android.media.MediaCodec.BufferInfo;
import androidx.media3.common.Format;
import androidx.media3.common.util.Util;
import androidx.media3.container.MdtaMetadataEntry;
import com.google.common.collect.Range;
import java.io.IOException;
import java.nio.ByteBuffer;
@ -47,6 +52,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
private final @Mp4Muxer.LastFrameDurationBehavior int lastFrameDurationBehavior;
private final boolean sampleCopyEnabled;
private final List<Track> tracks;
private final List<Track> editableVideoTracks;
private final AtomicBoolean hasWrittenSamples;
// Stores location of the space reserved for the moov box at the beginning of the file (after ftyp
@ -89,6 +95,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
this.lastFrameDurationBehavior = lastFrameDurationBehavior;
this.sampleCopyEnabled = sampleCopyEnabled;
tracks = new ArrayList<>();
editableVideoTracks = new ArrayList<>();
hasWrittenSamples = new AtomicBoolean(false);
canWriteMoovAtStart = attemptStreamableOutputEnabled;
lastMoovWritten = Range.closed(0L, 0L);
@ -108,6 +115,22 @@ import java.util.concurrent.atomic.AtomicBoolean;
return track;
}
/**
* Adds an editable video track of the given {@link Format}.
*
* <p>See {@link MuxerUtil#isEditableVideoTrack(Format)} for editable video tracks.
*
* @param sortKey The key used for sorting the track list.
* @param format The {@link Format} for the track.
* @return A unique {@link Track}. It should be used in {@link #writeSampleData}.
*/
public Track addEditableVideoTrack(int sortKey, Format format) {
Track track = new Track(format, sortKey, sampleCopyEnabled);
editableVideoTracks.add(track);
Collections.sort(editableVideoTracks, (a, b) -> Integer.compare(a.sortKey, b.sortKey));
return track;
}
/**
* Writes encoded sample data.
*
@ -132,11 +155,60 @@ import java.util.concurrent.atomic.AtomicBoolean;
for (int i = 0; i < tracks.size(); i++) {
writePendingTrackSamples(tracks.get(i));
}
for (int i = 0; i < editableVideoTracks.size(); i++) {
writePendingTrackSamples(editableVideoTracks.get(i));
}
// Leave the file empty if no samples are written.
if (hasWrittenSamples.get()) {
finalizeMoovBox();
if (!hasWrittenSamples.get()) {
return;
}
finalizeMoovBox();
if (!editableVideoTracks.isEmpty()) {
writeEdvdBox();
}
}
private void writeEdvdBox() throws IOException {
// The exact offset is known after writing primary track data.
MdtaMetadataEntry placeholderEditableTrackOffset =
getEditableTracksOffsetMetadata(/* offset= */ 0L);
metadataCollector.addMetadata(placeholderEditableTrackOffset);
ByteBuffer edvdBox = getEdvdBox();
metadataCollector.addMetadata(getEditableTracksLengthMetadata(edvdBox.remaining()));
finalizeMoovBox();
// Once final moov is written, update the actual offset.
metadataCollector.removeMdtaMetadataEntry(placeholderEditableTrackOffset);
metadataCollector.addMetadata(getEditableTracksOffsetMetadata(outputFileChannel.size()));
long fileSizeBefore = outputFileChannel.size();
finalizeMoovBox();
checkState(fileSizeBefore == outputFileChannel.size());
// After writing primary track data, write the edvd box.
outputFileChannel.position(outputFileChannel.size());
outputFileChannel.write(edvdBox);
}
private ByteBuffer getEdvdBox() {
// The edvd box will have one ftyp and one moov box.
ByteBuffer ftypBox = Boxes.ftyp();
MetadataCollector editableVideoMetadataCollector = new MetadataCollector();
populateEditableVideoTracksMetadata(
editableVideoMetadataCollector,
metadataCollector.timestampData,
/* samplesInterleaved= */ true,
editableVideoTracks);
ByteBuffer moovBox =
Mp4MoovStructure.moov(
editableVideoTracks,
editableVideoMetadataCollector,
findMinimumPresentationTimestampUsAcrossTracks(editableVideoTracks),
/* isFragmentedMp4= */ false,
lastFrameDurationBehavior);
ByteBuffer edvdBoxHeader =
getEdvdBoxHeader(/* payloadSize= */ ftypBox.remaining() + moovBox.remaining());
return BoxUtils.concatenateBuffers(edvdBoxHeader, ftypBox, moovBox);
}
/**
@ -435,9 +507,10 @@ import java.util.concurrent.atomic.AtomicBoolean;
}
private void doInterleave() throws IOException {
boolean newSamplesWritten = maybeWritePendingTrackSamples(tracks);
boolean primaryTrackSampleWritten = maybeWritePendingTrackSamples(tracks);
maybeWritePendingTrackSamples(editableVideoTracks);
if (newSamplesWritten && canWriteMoovAtStart) {
if (primaryTrackSampleWritten && canWriteMoovAtStart) {
maybeWriteMoovAtStart();
}
}

View File

@ -384,7 +384,9 @@ public class Mp4MuxerEndToEndTest {
Mp4Muxer muxer =
new Mp4Muxer.Builder(new FileOutputStream(outputFilePath))
.setOutputFileFormat(Mp4Muxer.FILE_FORMAT_EDITABLE_VIDEO)
.setEditableVideoParameters(new Mp4Muxer.EditableVideoParameters(() -> cacheFilePath))
.setEditableVideoParameters(
new Mp4Muxer.EditableVideoParameters(
/* shouldInterleaveSamples= */ false, () -> cacheFilePath))
.build();
try {
@ -416,7 +418,7 @@ public class Mp4MuxerEndToEndTest {
DumpableMp4Box outputFileDumpableBox =
new DumpableMp4Box(ByteBuffer.wrap(TestUtil.getByteArrayFromFilePath(outputFilePath)));
// 1 track is written in the outer moov box and 2 tracks are writtin in the edvd.moov box.
// 1 track is written in the outer moov box and 2 tracks are written in the edvd.moov box.
DumpFileAsserts.assertOutput(
context,
outputFileDumpableBox,
@ -474,7 +476,9 @@ public class Mp4MuxerEndToEndTest {
Mp4Muxer muxer =
new Mp4Muxer.Builder(new FileOutputStream(outputFilePath))
.setOutputFileFormat(Mp4Muxer.FILE_FORMAT_EDITABLE_VIDEO)
.setEditableVideoParameters(new Mp4Muxer.EditableVideoParameters(() -> cacheFilePath))
.setEditableVideoParameters(
new Mp4Muxer.EditableVideoParameters(
/* shouldInterleaveSamples= */ false, () -> cacheFilePath))
.build();
try {
@ -523,7 +527,9 @@ public class Mp4MuxerEndToEndTest {
Mp4Muxer muxer =
new Mp4Muxer.Builder(new FileOutputStream(outputFilePath))
.setOutputFileFormat(Mp4Muxer.FILE_FORMAT_EDITABLE_VIDEO)
.setEditableVideoParameters(new Mp4Muxer.EditableVideoParameters(() -> cacheFilePath))
.setEditableVideoParameters(
new Mp4Muxer.EditableVideoParameters(
/* shouldInterleaveSamples= */ false, () -> cacheFilePath))
.build();
try {
@ -564,6 +570,109 @@ public class Mp4MuxerEndToEndTest {
MuxerTestUtil.getExpectedDumpFilePath("mp4_with_editable_video_tracks.mp4"));
}
@Test
public void
writeMp4File_withFileFormatEditableVideoAndEditableVideoTracksAndShouldInterleaveSamples_primaryVideoTracksMatchesExpected()
throws Exception {
String outputFilePath = temporaryFolder.newFile().getPath();
Mp4Muxer muxer =
new Mp4Muxer.Builder(new FileOutputStream(outputFilePath))
.setOutputFileFormat(Mp4Muxer.FILE_FORMAT_EDITABLE_VIDEO)
.setEditableVideoParameters(
new Mp4Muxer.EditableVideoParameters(
/* shouldInterleaveSamples= */ true, /* cacheFileProvider= */ null))
.build();
try {
muxer.addMetadataEntry(
new Mp4TimestampData(
/* creationTimestampSeconds= */ 1_000_000L,
/* modificationTimestampSeconds= */ 5_000_000L));
TrackToken primaryVideoTrackToken = muxer.addTrack(FAKE_VIDEO_FORMAT);
TrackToken sharpVideoTrackToken =
muxer.addTrack(
FAKE_VIDEO_FORMAT
.buildUpon()
.setRoleFlags(C.ROLE_FLAG_AUXILIARY)
.setAuxiliaryTrackType(C.AUXILIARY_TRACK_TYPE_ORIGINAL)
.build());
TrackToken depthLinearVideoTrackToken =
muxer.addTrack(
FAKE_VIDEO_FORMAT
.buildUpon()
.setRoleFlags(C.ROLE_FLAG_AUXILIARY)
.setAuxiliaryTrackType(C.AUXILIARY_TRACK_TYPE_DEPTH_LINEAR)
.build());
writeFakeSamples(muxer, primaryVideoTrackToken, /* sampleCount= */ 5);
writeFakeSamples(muxer, sharpVideoTrackToken, /* sampleCount= */ 5);
writeFakeSamples(muxer, depthLinearVideoTrackToken, /* sampleCount= */ 5);
} finally {
muxer.close();
}
FakeExtractorOutput primaryTracksOutput =
TestUtil.extractAllSamplesFromFilePath(
new Mp4Extractor(new DefaultSubtitleParserFactory()), outputFilePath);
// The Mp4Extractor extracts primary tracks by default.
DumpFileAsserts.assertOutput(
context,
primaryTracksOutput,
MuxerTestUtil.getExpectedDumpFilePath(
"mp4_with_primary_tracks_when_editable_track_samples_interleaved.mp4"));
}
@Test
public void
writeMp4File_withFileFormatEditableVideoAndEditableVideoTracksAndShouldInterleaveSamples_editableVideoTracksMatchesExpected()
throws Exception {
String outputFilePath = temporaryFolder.newFile().getPath();
Mp4Muxer muxer =
new Mp4Muxer.Builder(new FileOutputStream(outputFilePath))
.setOutputFileFormat(Mp4Muxer.FILE_FORMAT_EDITABLE_VIDEO)
.setEditableVideoParameters(
new Mp4Muxer.EditableVideoParameters(
/* shouldInterleaveSamples= */ true, /* cacheFileProvider= */ null))
.build();
try {
muxer.addMetadataEntry(
new Mp4TimestampData(
/* creationTimestampSeconds= */ 1_000_000L,
/* modificationTimestampSeconds= */ 5_000_000L));
TrackToken primaryVideoTrackToken = muxer.addTrack(FAKE_VIDEO_FORMAT);
TrackToken sharpVideoTrackToken =
muxer.addTrack(
FAKE_VIDEO_FORMAT
.buildUpon()
.setRoleFlags(C.ROLE_FLAG_AUXILIARY)
.setAuxiliaryTrackType(C.AUXILIARY_TRACK_TYPE_ORIGINAL)
.build());
TrackToken depthLinearVideoTrackToken =
muxer.addTrack(
FAKE_VIDEO_FORMAT
.buildUpon()
.setRoleFlags(C.ROLE_FLAG_AUXILIARY)
.setAuxiliaryTrackType(C.AUXILIARY_TRACK_TYPE_DEPTH_LINEAR)
.build());
writeFakeSamples(muxer, primaryVideoTrackToken, /* sampleCount= */ 5);
writeFakeSamples(muxer, sharpVideoTrackToken, /* sampleCount= */ 5);
writeFakeSamples(muxer, depthLinearVideoTrackToken, /* sampleCount= */ 5);
} finally {
muxer.close();
}
FakeExtractorOutput editableTracksOutput =
TestUtil.extractAllSamplesFromFilePath(
new Mp4Extractor(
new DefaultSubtitleParserFactory(), Mp4Extractor.FLAG_READ_EDITABLE_VIDEO_TRACKS),
outputFilePath);
DumpFileAsserts.assertOutput(
context,
editableTracksOutput,
MuxerTestUtil.getExpectedDumpFilePath(
"mp4_with_editable_video_tracks_when_editable_track_samples_interleaved.mp4"));
}
private static void writeFakeSamples(Mp4Muxer muxer, TrackToken trackToken, int sampleCount)
throws Muxer.MuxerException {
for (int i = 0; i < sampleCount; i++) {

View File

@ -0,0 +1,91 @@
seekMap:
isSeekable = true
duration = 0
getPosition(0) = [[timeUs=0, position=400332]]
getPosition(1) = [[timeUs=0, position=400556]]
getPosition(0) = [[timeUs=0, position=400332]]
getPosition(0) = [[timeUs=0, position=400332]]
numberOfTracks = 2
track 0:
total output bytes = 280
sample count = 5
format 0:
id = 1
sampleMimeType = video/avc
codecs = avc1.F4000A
maxInputSize = 86
maxNumReorderSamples = 2
width = 12
height = 10
colorInfo:
colorRange = 1
lumaBitdepth = 8
chromaBitdepth = 8
roleFlags = [auxiliary]
auxiliaryTrackType = original
metadata = entries=[mdta: key=editable.tracks.map, value=track types = 0,1, mdta: key=editable.tracks.samples.location, value=1, Mp4Timestamp: creation time=1000000, modification time=5000000, timescale=10000]
initializationData:
data = length 28, hash 410B510
data = length 9, hash FBADD682
sample 0:
time = 0
flags = 1
data = length 56, hash C4551A2E
sample 1:
time = 0
flags = 1
data = length 56, hash C4551A2E
sample 2:
time = 0
flags = 1
data = length 56, hash C4551A2E
sample 3:
time = 0
flags = 1
data = length 56, hash C4551A2E
sample 4:
time = 0
flags = 536870913
data = length 56, hash C4551A2E
track 1:
total output bytes = 280
sample count = 5
format 0:
id = 2
sampleMimeType = video/avc
codecs = avc1.F4000A
maxInputSize = 86
maxNumReorderSamples = 2
width = 12
height = 10
colorInfo:
colorRange = 1
lumaBitdepth = 8
chromaBitdepth = 8
roleFlags = [auxiliary]
auxiliaryTrackType = depth-linear
metadata = entries=[mdta: key=editable.tracks.map, value=track types = 0,1, mdta: key=editable.tracks.samples.location, value=1, Mp4Timestamp: creation time=1000000, modification time=5000000, timescale=10000]
initializationData:
data = length 28, hash 410B510
data = length 9, hash FBADD682
sample 0:
time = 0
flags = 1
data = length 56, hash C4551A2E
sample 1:
time = 0
flags = 1
data = length 56, hash C4551A2E
sample 2:
time = 0
flags = 1
data = length 56, hash C4551A2E
sample 3:
time = 0
flags = 1
data = length 56, hash C4551A2E
sample 4:
time = 0
flags = 536870913
data = length 56, hash C4551A2E
tracksEnded = true

View File

@ -0,0 +1,48 @@
seekMap:
isSeekable = true
duration = 0
getPosition(0) = [[timeUs=0, position=400052]]
getPosition(1) = [[timeUs=0, position=400276]]
getPosition(0) = [[timeUs=0, position=400052]]
getPosition(0) = [[timeUs=0, position=400052]]
numberOfTracks = 1
track 0:
total output bytes = 280
sample count = 5
format 0:
id = 1
sampleMimeType = video/avc
codecs = avc1.F4000A
maxInputSize = 86
maxNumReorderSamples = 2
width = 12
height = 10
colorInfo:
colorRange = 1
lumaBitdepth = 8
chromaBitdepth = 8
metadata = entries=[mdta: key=editable.tracks.length, value=1493, mdta: key=editable.tracks.offset, value=400892, Mp4Timestamp: creation time=1000000, modification time=5000000, timescale=10000]
initializationData:
data = length 28, hash 410B510
data = length 9, hash FBADD682
sample 0:
time = 0
flags = 1
data = length 56, hash C4551A2E
sample 1:
time = 0
flags = 1
data = length 56, hash C4551A2E
sample 2:
time = 0
flags = 1
data = length 56, hash C4551A2E
sample 3:
time = 0
flags = 1
data = length 56, hash C4551A2E
sample 4:
time = 0
flags = 536870913
data = length 56, hash C4551A2E
tracksEnded = true