From 4b7c5100f143c0705ccad7e27faea6815c85ba0f Mon Sep 17 00:00:00 2001 From: sheenachhabra Date: Tue, 20 Aug 2024 07:44:40 -0700 Subject: [PATCH] Move moov box generation to Boxes.java This is a no-op change. Like all other boxes, moov box creation can also be in Boxes.java class. PiperOrigin-RevId: 665359529 --- .../java/androidx/media3/muxer/Boxes.java | 196 +++++++++++++++ .../media3/muxer/FragmentedMp4Writer.java | 2 +- .../media3/muxer/Mp4MoovStructure.java | 227 ------------------ .../java/androidx/media3/muxer/Mp4Writer.java | 4 +- .../java/androidx/media3/muxer/Track.java | 2 +- 5 files changed, 200 insertions(+), 231 deletions(-) delete mode 100644 libraries/muxer/src/main/java/androidx/media3/muxer/Mp4MoovStructure.java diff --git a/libraries/muxer/src/main/java/androidx/media3/muxer/Boxes.java b/libraries/muxer/src/main/java/androidx/media3/muxer/Boxes.java index 202cadf243..d3051cc5eb 100644 --- a/libraries/muxer/src/main/java/androidx/media3/muxer/Boxes.java +++ b/libraries/muxer/src/main/java/androidx/media3/muxer/Boxes.java @@ -21,6 +21,7 @@ import static androidx.media3.common.util.Assertions.checkState; import static androidx.media3.muxer.ColorUtils.MEDIAFORMAT_STANDARD_TO_PRIMARIES_AND_MATRIX; import static androidx.media3.muxer.ColorUtils.MEDIAFORMAT_TRANSFER_TO_MP4_TRANSFER; import static androidx.media3.muxer.MuxerUtil.UNSIGNED_INT_MAX_VALUE; +import static java.lang.Math.max; import static java.nio.charset.StandardCharsets.UTF_8; import android.media.MediaCodec; @@ -37,6 +38,7 @@ import androidx.media3.container.NalUnitUtil; import androidx.media3.muxer.FragmentedMp4Writer.SampleMetadata; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; import com.google.common.primitives.Bytes; import com.google.common.primitives.Ints; import java.nio.ByteBuffer; @@ -45,6 +47,8 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Locale; +import org.checkerframework.checker.nullness.qual.PolyNull; /** * Writes out various types of boxes as per MP4 (ISO/IEC 14496-12) standards. @@ -53,6 +57,19 @@ import java.util.List; * buffers}. */ /* package */ final class Boxes { + /** Provides track's metadata like media format, written samples. */ + public interface TrackMetadataProvider { + Format format(); + + int videoUnitTimebase(); + + ImmutableList writtenSamples(); + + ImmutableList writtenChunkOffsets(); + + ImmutableList writtenChunkSampleCounts(); + } + /* Total number of bytes in an integer. */ private static final int BYTES_PER_INTEGER = 4; @@ -107,6 +124,169 @@ import java.util.List; (byte) 0xAF, (byte) 0xAC); + /** Returns the moov box. */ + @SuppressWarnings("InlinedApi") + public static ByteBuffer moov( + List tracks, + MetadataCollector metadataCollector, + long minInputPtsUs, + boolean isFragmentedMp4, + @Mp4Muxer.LastFrameDurationBehavior int lastFrameDurationBehavior) { + // The timestamp will always fit into a 32-bit integer. This is already validated in the + // Mp4Muxer.setTimestampData() API. The value after type casting might be negative, but it is + // still valid because it is meant to be read as an unsigned integer. + int creationTimestampSeconds = (int) metadataCollector.timestampData.creationTimestampSeconds; + int modificationTimestampSeconds = + (int) metadataCollector.timestampData.modificationTimestampSeconds; + List trakBoxes = new ArrayList<>(); + List trexBoxes = new ArrayList<>(); + + int nextTrackId = 1; + long videoDurationUs = 0L; + for (int i = 0; i < tracks.size(); i++) { + TrackMetadataProvider track = tracks.get(i); + if (!isFragmentedMp4 && track.writtenSamples().isEmpty()) { + continue; + } + Format format = track.format(); + String languageCode = bcp47LanguageTagToIso3(format.language); + + // Generate the sample durations to calculate the total duration for tkhd box. + List sampleDurationsVu = + Boxes.convertPresentationTimestampsToDurationsVu( + track.writtenSamples(), + minInputPtsUs, + track.videoUnitTimebase(), + lastFrameDurationBehavior); + + long trackDurationInTrackUnitsVu = 0; + for (int j = 0; j < sampleDurationsVu.size(); j++) { + trackDurationInTrackUnitsVu += sampleDurationsVu.get(j); + } + + long trackDurationUs = usFromVu(trackDurationInTrackUnitsVu, track.videoUnitTimebase()); + + @C.TrackType int trackType = MimeTypes.getTrackType(format.sampleMimeType); + ByteBuffer stts = Boxes.stts(sampleDurationsVu); + ByteBuffer ctts = + MimeTypes.isVideo(format.sampleMimeType) + ? Boxes.ctts(track.writtenSamples(), sampleDurationsVu, track.videoUnitTimebase()) + : ByteBuffer.allocate(0); + ByteBuffer stsz = Boxes.stsz(track.writtenSamples()); + ByteBuffer stsc = Boxes.stsc(track.writtenChunkSampleCounts()); + ByteBuffer chunkOffsetBox = + isFragmentedMp4 + ? Boxes.stco(track.writtenChunkOffsets()) + : Boxes.co64(track.writtenChunkOffsets()); + + String handlerType; + String handlerName; + ByteBuffer mhdBox; + ByteBuffer sampleEntryBox; + ByteBuffer stsdBox; + ByteBuffer stblBox; + + switch (trackType) { + case C.TRACK_TYPE_VIDEO: + handlerType = "vide"; + handlerName = "VideoHandle"; + mhdBox = Boxes.vmhd(); + sampleEntryBox = Boxes.videoSampleEntry(format); + stsdBox = Boxes.stsd(sampleEntryBox); + stblBox = + Boxes.stbl( + stsdBox, + stts, + ctts, + stsz, + stsc, + chunkOffsetBox, + Boxes.stss(track.writtenSamples())); + break; + case C.TRACK_TYPE_AUDIO: + handlerType = "soun"; + handlerName = "SoundHandle"; + mhdBox = Boxes.smhd(); + sampleEntryBox = Boxes.audioSampleEntry(format); + stsdBox = Boxes.stsd(sampleEntryBox); + stblBox = Boxes.stbl(stsdBox, stts, stsz, stsc, chunkOffsetBox); + break; + case C.TRACK_TYPE_METADATA: + // TODO: (b/280443593) - Check if we can identify a metadata track type from a custom + // mime type. + case C.TRACK_TYPE_UNKNOWN: + handlerType = "meta"; + handlerName = "MetaHandle"; + mhdBox = Boxes.nmhd(); + sampleEntryBox = Boxes.textMetaDataSampleEntry(format); + stsdBox = Boxes.stsd(sampleEntryBox); + stblBox = Boxes.stbl(stsdBox, stts, stsz, stsc, chunkOffsetBox); + break; + default: + throw new IllegalArgumentException("Unsupported track type"); + } + + // The below statement is also a description of how a mdat box looks like, with all the + // inner boxes and what they actually store. Although they're technically instance methods, + // everything that is written to a box is visible in the argument list. + ByteBuffer trakBox = + Boxes.trak( + Boxes.tkhd( + nextTrackId, + trackDurationUs, + creationTimestampSeconds, + modificationTimestampSeconds, + metadataCollector.orientationData.orientation, + format), + Boxes.mdia( + Boxes.mdhd( + trackDurationInTrackUnitsVu, + track.videoUnitTimebase(), + creationTimestampSeconds, + modificationTimestampSeconds, + languageCode), + Boxes.hdlr(handlerType, handlerName), + Boxes.minf(mhdBox, Boxes.dinf(Boxes.dref(Boxes.localUrl())), stblBox))); + + trakBoxes.add(trakBox); + videoDurationUs = max(videoDurationUs, trackDurationUs); + trexBoxes.add(Boxes.trex(nextTrackId)); + nextTrackId++; + } + + ByteBuffer mvhdBox = + Boxes.mvhd( + nextTrackId, creationTimestampSeconds, modificationTimestampSeconds, videoDurationUs); + ByteBuffer udtaBox = Boxes.udta(metadataCollector.locationData); + ByteBuffer metaBox = + metadataCollector.metadataEntries.isEmpty() + ? ByteBuffer.allocate(0) + : Boxes.meta( + Boxes.hdlr(/* handlerType= */ "mdta", /* handlerName= */ ""), + Boxes.keys(Lists.newArrayList(metadataCollector.metadataEntries)), + Boxes.ilst(Lists.newArrayList(metadataCollector.metadataEntries))); + + List subBoxes = new ArrayList<>(); + subBoxes.add(mvhdBox); + subBoxes.add(udtaBox); + subBoxes.add(metaBox); + subBoxes.addAll(trakBoxes); + if (isFragmentedMp4) { + subBoxes.add(Boxes.mvex(trexBoxes)); + } + + ByteBuffer moovBox = BoxUtils.wrapBoxesIntoBox("moov", subBoxes); + + // Also add XMP if needed + if (metadataCollector.xmpData != null) { + return BoxUtils.concatenateBuffers( + moovBox, Boxes.uuid(Boxes.XMP_UUID, ByteBuffer.wrap(metadataCollector.xmpData.data))); + } else { + // No need for another copy if there is no XMP to be appended. + return moovBox; + } + } + /** * Returns the tkhd box. * @@ -1065,6 +1245,22 @@ import java.util.List; return edvdBoxHeader; } + /** Returns an ISO 639-2/T (ISO3) language code for the IETF BCP 47 language tag. */ + private static @PolyNull String bcp47LanguageTagToIso3(@PolyNull String languageTag) { + if (languageTag == null) { + return null; + } + + Locale locale = Locale.forLanguageTag(languageTag); + + return locale.getISO3Language().isEmpty() ? languageTag : locale.getISO3Language(); + } + + /** Converts video units to microseconds, using the provided timebase. */ + private static long usFromVu(long timestampVu, long videoUnitTimebase) { + return timestampVu * 1_000_000L / videoUnitTimebase; + } + // TODO: b/317117431 - Change this method to getLastSampleDuration(). /** Adjusts the duration of the very last sample if needed. */ private static void adjustLastSampleDuration( diff --git a/libraries/muxer/src/main/java/androidx/media3/muxer/FragmentedMp4Writer.java b/libraries/muxer/src/main/java/androidx/media3/muxer/FragmentedMp4Writer.java index 1d3e1c181a..b89cfdebba 100644 --- a/libraries/muxer/src/main/java/androidx/media3/muxer/FragmentedMp4Writer.java +++ b/libraries/muxer/src/main/java/androidx/media3/muxer/FragmentedMp4Writer.java @@ -202,7 +202,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; output.write(Boxes.ftyp()); // The minInputPtsUs is actually ignored as there are no pending samples to write. output.write( - Mp4MoovStructure.moov( + Boxes.moov( tracks, metadataCollector, /* minInputPtsUs= */ 0L, diff --git a/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4MoovStructure.java b/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4MoovStructure.java deleted file mode 100644 index 054b72ce09..0000000000 --- a/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4MoovStructure.java +++ /dev/null @@ -1,227 +0,0 @@ -/* - * Copyright 2022 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package androidx.media3.muxer; - -import static java.lang.Math.max; - -import android.media.MediaCodec.BufferInfo; -import androidx.media3.common.C; -import androidx.media3.common.Format; -import androidx.media3.common.MimeTypes; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.List; -import java.util.Locale; -import org.checkerframework.checker.nullness.qual.PolyNull; - -/** Builds the moov box structure of an MP4 file. */ -/* package */ final class Mp4MoovStructure { - /** Provides track's metadata like media format, written samples. */ - public interface TrackMetadataProvider { - Format format(); - - int videoUnitTimebase(); - - ImmutableList writtenSamples(); - - ImmutableList writtenChunkOffsets(); - - ImmutableList writtenChunkSampleCounts(); - } - - private Mp4MoovStructure() {} - - /** Returns the moov box. */ - @SuppressWarnings("InlinedApi") - public static ByteBuffer moov( - List tracks, - MetadataCollector metadataCollector, - long minInputPtsUs, - boolean isFragmentedMp4, - @Mp4Muxer.LastFrameDurationBehavior int lastFrameDurationBehavior) { - // The timestamp will always fit into a 32-bit integer. This is already validated in the - // Mp4Muxer.setTimestampData() API. The value after type casting might be negative, but it is - // still valid because it is meant to be read as an unsigned integer. - int creationTimestampSeconds = (int) metadataCollector.timestampData.creationTimestampSeconds; - int modificationTimestampSeconds = - (int) metadataCollector.timestampData.modificationTimestampSeconds; - List trakBoxes = new ArrayList<>(); - List trexBoxes = new ArrayList<>(); - - int nextTrackId = 1; - long videoDurationUs = 0L; - for (int i = 0; i < tracks.size(); i++) { - TrackMetadataProvider track = tracks.get(i); - if (!isFragmentedMp4 && track.writtenSamples().isEmpty()) { - continue; - } - Format format = track.format(); - String languageCode = bcp47LanguageTagToIso3(format.language); - - // Generate the sample durations to calculate the total duration for tkhd box. - List sampleDurationsVu = - Boxes.convertPresentationTimestampsToDurationsVu( - track.writtenSamples(), - minInputPtsUs, - track.videoUnitTimebase(), - lastFrameDurationBehavior); - - long trackDurationInTrackUnitsVu = 0; - for (int j = 0; j < sampleDurationsVu.size(); j++) { - trackDurationInTrackUnitsVu += sampleDurationsVu.get(j); - } - - long trackDurationUs = usFromVu(trackDurationInTrackUnitsVu, track.videoUnitTimebase()); - - @C.TrackType int trackType = MimeTypes.getTrackType(format.sampleMimeType); - ByteBuffer stts = Boxes.stts(sampleDurationsVu); - ByteBuffer ctts = - MimeTypes.isVideo(format.sampleMimeType) - ? Boxes.ctts(track.writtenSamples(), sampleDurationsVu, track.videoUnitTimebase()) - : ByteBuffer.allocate(0); - ByteBuffer stsz = Boxes.stsz(track.writtenSamples()); - ByteBuffer stsc = Boxes.stsc(track.writtenChunkSampleCounts()); - ByteBuffer chunkOffsetBox = - isFragmentedMp4 - ? Boxes.stco(track.writtenChunkOffsets()) - : Boxes.co64(track.writtenChunkOffsets()); - - String handlerType; - String handlerName; - ByteBuffer mhdBox; - ByteBuffer sampleEntryBox; - ByteBuffer stsdBox; - ByteBuffer stblBox; - - switch (trackType) { - case C.TRACK_TYPE_VIDEO: - handlerType = "vide"; - handlerName = "VideoHandle"; - mhdBox = Boxes.vmhd(); - sampleEntryBox = Boxes.videoSampleEntry(format); - stsdBox = Boxes.stsd(sampleEntryBox); - stblBox = - Boxes.stbl( - stsdBox, - stts, - ctts, - stsz, - stsc, - chunkOffsetBox, - Boxes.stss(track.writtenSamples())); - break; - case C.TRACK_TYPE_AUDIO: - handlerType = "soun"; - handlerName = "SoundHandle"; - mhdBox = Boxes.smhd(); - sampleEntryBox = Boxes.audioSampleEntry(format); - stsdBox = Boxes.stsd(sampleEntryBox); - stblBox = Boxes.stbl(stsdBox, stts, stsz, stsc, chunkOffsetBox); - break; - case C.TRACK_TYPE_METADATA: - // TODO: (b/280443593) - Check if we can identify a metadata track type from a custom - // mime type. - case C.TRACK_TYPE_UNKNOWN: - handlerType = "meta"; - handlerName = "MetaHandle"; - mhdBox = Boxes.nmhd(); - sampleEntryBox = Boxes.textMetaDataSampleEntry(format); - stsdBox = Boxes.stsd(sampleEntryBox); - stblBox = Boxes.stbl(stsdBox, stts, stsz, stsc, chunkOffsetBox); - break; - default: - throw new IllegalArgumentException("Unsupported track type"); - } - - // The below statement is also a description of how a mdat box looks like, with all the - // inner boxes and what they actually store. Although they're technically instance methods, - // everything that is written to a box is visible in the argument list. - ByteBuffer trakBox = - Boxes.trak( - Boxes.tkhd( - nextTrackId, - trackDurationUs, - creationTimestampSeconds, - modificationTimestampSeconds, - metadataCollector.orientationData.orientation, - format), - Boxes.mdia( - Boxes.mdhd( - trackDurationInTrackUnitsVu, - track.videoUnitTimebase(), - creationTimestampSeconds, - modificationTimestampSeconds, - languageCode), - Boxes.hdlr(handlerType, handlerName), - Boxes.minf(mhdBox, Boxes.dinf(Boxes.dref(Boxes.localUrl())), stblBox))); - - trakBoxes.add(trakBox); - videoDurationUs = max(videoDurationUs, trackDurationUs); - trexBoxes.add(Boxes.trex(nextTrackId)); - nextTrackId++; - } - - ByteBuffer mvhdBox = - Boxes.mvhd( - nextTrackId, creationTimestampSeconds, modificationTimestampSeconds, videoDurationUs); - ByteBuffer udtaBox = Boxes.udta(metadataCollector.locationData); - ByteBuffer metaBox = - metadataCollector.metadataEntries.isEmpty() - ? ByteBuffer.allocate(0) - : Boxes.meta( - Boxes.hdlr(/* handlerType= */ "mdta", /* handlerName= */ ""), - Boxes.keys(Lists.newArrayList(metadataCollector.metadataEntries)), - Boxes.ilst(Lists.newArrayList(metadataCollector.metadataEntries))); - - List subBoxes = new ArrayList<>(); - subBoxes.add(mvhdBox); - subBoxes.add(udtaBox); - subBoxes.add(metaBox); - subBoxes.addAll(trakBoxes); - if (isFragmentedMp4) { - subBoxes.add(Boxes.mvex(trexBoxes)); - } - - ByteBuffer moovBox = BoxUtils.wrapBoxesIntoBox("moov", subBoxes); - - // Also add XMP if needed - if (metadataCollector.xmpData != null) { - return BoxUtils.concatenateBuffers( - moovBox, Boxes.uuid(Boxes.XMP_UUID, ByteBuffer.wrap(metadataCollector.xmpData.data))); - } else { - // No need for another copy if there is no XMP to be appended. - return moovBox; - } - } - - /** Returns an ISO 639-2/T (ISO3) language code for the IETF BCP 47 language tag. */ - private static @PolyNull String bcp47LanguageTagToIso3(@PolyNull String languageTag) { - if (languageTag == null) { - return null; - } - - Locale locale = Locale.forLanguageTag(languageTag); - - return locale.getISO3Language().isEmpty() ? languageTag : locale.getISO3Language(); - } - - /** Converts video units to microseconds, using the provided timebase. */ - private static long usFromVu(long timestampVu, long videoUnitTimebase) { - return timestampVu * 1_000_000L / videoUnitTimebase; - } -} diff --git a/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Writer.java b/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Writer.java index 4245e9acb4..424780cacf 100644 --- a/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Writer.java +++ b/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Writer.java @@ -200,7 +200,7 @@ import java.util.concurrent.atomic.AtomicBoolean; /* samplesInterleaved= */ true, editableVideoTracks); ByteBuffer moovBox = - Mp4MoovStructure.moov( + Boxes.moov( editableVideoTracks, editableVideoMetadataCollector, findMinimumPresentationTimestampUsAcrossTracks(editableVideoTracks), @@ -318,7 +318,7 @@ import java.util.concurrent.atomic.AtomicBoolean; ByteBuffer moovHeader; if (minInputPtsUs != Long.MAX_VALUE) { moovHeader = - Mp4MoovStructure.moov( + Boxes.moov( tracks, metadataCollector, minInputPtsUs, diff --git a/libraries/muxer/src/main/java/androidx/media3/muxer/Track.java b/libraries/muxer/src/main/java/androidx/media3/muxer/Track.java index 38ead557f9..7f7fd7a796 100644 --- a/libraries/muxer/src/main/java/androidx/media3/muxer/Track.java +++ b/libraries/muxer/src/main/java/androidx/media3/muxer/Track.java @@ -28,7 +28,7 @@ import java.util.Deque; import java.util.List; /** Represents a single track (audio, video, metadata etc.). */ -/* package */ final class Track implements TrackToken, Mp4MoovStructure.TrackMetadataProvider { +/* package */ final class Track implements TrackToken, Boxes.TrackMetadataProvider { public final Format format; public final int sortKey; public final List writtenSamples;