diff --git a/libraries/muxer/src/main/java/androidx/media3/muxer/Boxes.java b/libraries/muxer/src/main/java/androidx/media3/muxer/Boxes.java index f741a51c56..d87860b31c 100644 --- a/libraries/muxer/src/main/java/androidx/media3/muxer/Boxes.java +++ b/libraries/muxer/src/main/java/androidx/media3/muxer/Boxes.java @@ -20,8 +20,6 @@ import static androidx.media3.common.util.Assertions.checkNotNull; import static androidx.media3.common.util.Assertions.checkState; import static androidx.media3.muxer.ColorUtils.MEDIAFORMAT_STANDARD_TO_PRIMARIES_AND_MATRIX; import static androidx.media3.muxer.ColorUtils.MEDIAFORMAT_TRANSFER_TO_MP4_TRANSFER; -import static androidx.media3.muxer.Mp4Utils.BYTES_PER_INTEGER; -import static androidx.media3.muxer.Mp4Utils.MVHD_TIMEBASE; import static androidx.media3.muxer.Mp4Utils.UNSIGNED_INT_MAX_VALUE; import android.media.MediaCodec; @@ -53,6 +51,9 @@ import java.util.Locale; * buffers}. */ /* package */ final class Boxes { + /* Total number of bytes in an integer. */ + private static final int BYTES_PER_INTEGER = 4; + // Box size (4 bytes) + Box name (4 bytes) public static final int BOX_HEADER_SIZE = 2 * BYTES_PER_INTEGER; @@ -60,6 +61,20 @@ import java.util.Locale; public static final int TFHD_BOX_CONTENT_SIZE = 4 * BYTES_PER_INTEGER; + /** + * The maximum length of boxes which have fixed sizes. + * + *

Technically, we'd know how long they actually are; this upper bound is much simpler to + * produce though and we'll throw if we overflow anyway. + */ + private static final int MAX_FIXED_LEAF_BOX_SIZE = 200; + + /** + * The per-video timebase, used for durations in MVHD and TKHD even if the per-track timebase is + * different (e.g. typically the sample rate for audio). + */ + private static final long MVHD_TIMEBASE = 10_000L; + // unsigned int(2) sample_depends_on = 2 (bit index 25 and 24) private static final int TRUN_BOX_SYNC_SAMPLE_FLAGS = 0b00000010_00000000_00000000_00000000; // unsigned int(2) sample_depends_on = 1 (bit index 25 and 24) @@ -94,12 +109,12 @@ import java.util.Locale; */ public static ByteBuffer tkhd( int trackId, - int trackDurationVu, + long trackDurationUs, int creationTimestampSeconds, int modificationTimestampSeconds, int orientation, Format format) { - ByteBuffer contents = ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE); + ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE); contents.putInt(0x00000007); // version and flags; allow presentation, etc. contents.putInt(creationTimestampSeconds); // creation_time; unsigned int(32) @@ -108,6 +123,9 @@ import java.util.Locale; contents.putInt(trackId); contents.putInt(0); // reserved + // Using the time base of the entire file, not that of the track; otherwise, + // Quicktime will stretch the audio accordingly, see b/158120042. + int trackDurationVu = (int) vuFromUs(trackDurationUs, MVHD_TIMEBASE); contents.putInt(trackDurationVu); contents.putInt(0); // reserved @@ -139,14 +157,14 @@ import java.util.Locale; int creationTimestampSeconds, int modificationTimestampSeconds, long videoDurationUs) { - ByteBuffer contents = ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE); + ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE); contents.putInt(0); // version and flags contents.putInt(creationTimestampSeconds); // creation_time; unsigned int(32) contents.putInt(modificationTimestampSeconds); // modification_time; unsigned int(32) contents.putInt((int) MVHD_TIMEBASE); // The per-track timescales might be different. contents.putInt( - (int) Mp4Utils.vuFromUs(videoDurationUs, MVHD_TIMEBASE)); // Duration of the entire video. + (int) vuFromUs(videoDurationUs, MVHD_TIMEBASE)); // Duration of the entire video. contents.putInt(0x00010000); // rate = 1.0 contents.putShort((short) 0x0100); // volume = full volume contents.putShort((short) 0); // reserved @@ -184,7 +202,7 @@ import java.util.Locale; int creationTimestampSeconds, int modificationTimestampSeconds, @Nullable String languageCode) { - ByteBuffer contents = ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE); + ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE); contents.putInt(0x0); // version and flags contents.putInt(creationTimestampSeconds); // creation_time; unsigned int(32) @@ -207,7 +225,7 @@ import java.util.Locale; *

This is a header for video tracks. */ public static ByteBuffer vmhd() { - ByteBuffer contents = ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE); + ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE); contents.putInt(0x0); // version and flags contents.putShort((short) 0); // graphicsmode @@ -226,7 +244,7 @@ import java.util.Locale; *

This is a header for audio tracks. */ public static ByteBuffer smhd() { - ByteBuffer contents = ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE); + ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE); contents.putInt(0x0); // version and flags contents.putShort((short) 0); // balance @@ -242,7 +260,7 @@ import java.util.Locale; *

This is a header for metadata tracks. */ public static ByteBuffer nmhd() { - ByteBuffer contents = ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE); + ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE); contents.putInt(0x0); // version and flags contents.flip(); @@ -256,7 +274,7 @@ import java.util.Locale; * metadata tracks. */ public static ByteBuffer textMetaDataSampleEntry(Format format) { - ByteBuffer contents = ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE); + ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE); String mimeType = checkNotNull(format.sampleMimeType); byte[] mimeBytes = Util.getUtf8Bytes(mimeType); contents.put(mimeBytes); // content_encoding @@ -325,7 +343,7 @@ import java.util.Locale; * @return {@link ByteBuffer} containing the hdlr box. */ public static ByteBuffer hdlr(String handlerType, String handlerName) { - ByteBuffer contents = ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE); + ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE); contents.putInt(0x0); // version and flags. contents.putInt(0); // pre_defined. contents.put(Util.getUtf8Bytes(handlerType)); // handler_type. @@ -397,7 +415,7 @@ import java.util.Locale; *

This box contains a list of metadata keys. */ public static ByteBuffer keys(List mdtaMetadataEntries) { - ByteBuffer contents = ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE); + ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE); contents.putInt(0x0); // version and flags contents.putInt(mdtaMetadataEntries.size()); // Entry count @@ -416,7 +434,7 @@ import java.util.Locale; *

This box contains a list of metadata values. */ public static ByteBuffer ilst(List mdtaMetadataEntries) { - ByteBuffer contents = ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE); + ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE); for (int i = 0; i < mdtaMetadataEntries.size(); i++) { int keyId = i + 1; @@ -488,8 +506,7 @@ import java.util.Locale; checkArgument(csd0.length > 0, "csd-0 is empty."); ByteBuffer csd0ByteBuffer = ByteBuffer.wrap(csd0); - ByteBuffer contents = - ByteBuffer.allocate(csd0ByteBuffer.limit() + Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE); + ByteBuffer contents = ByteBuffer.allocate(csd0ByteBuffer.limit() + MAX_FIXED_LEAF_BOX_SIZE); contents.putInt(0x00); // reserved contents.putShort((short) 0x0); // reserved @@ -536,8 +553,7 @@ import java.util.Locale; ByteBuffer codecSpecificBox = codecSpecificBox(format); String fourcc = codecSpecificFourcc(format); - ByteBuffer contents = - ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE + codecSpecificBox.limit()); + ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE + codecSpecificBox.limit()); // reserved = 0 (6 bytes) contents.putInt(0); @@ -625,8 +641,8 @@ import java.util.Locale; // TODO: b/316158030 - First calculate the duration and then convert us to vu to avoid // rounding error. long currentSampleDurationVu = - Mp4Utils.vuFromUs(nextSampleTimeUs, videoUnitTimescale) - - Mp4Utils.vuFromUs(currentSampleTimeUs, videoUnitTimescale); + vuFromUs(nextSampleTimeUs, videoUnitTimescale) + - vuFromUs(currentSampleTimeUs, videoUnitTimescale); if (currentSampleDurationVu > Integer.MAX_VALUE) { throw new IllegalArgumentException( String.format( @@ -644,8 +660,7 @@ import java.util.Locale; /** Generates the stts (decoding time to sample) box. */ public static ByteBuffer stts(List durationsVu) { - ByteBuffer contents = - ByteBuffer.allocate(durationsVu.size() * 8 + Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE); + ByteBuffer contents = ByteBuffer.allocate(durationsVu.size() * 8 + MAX_FIXED_LEAF_BOX_SIZE); contents.putInt(0x0); // version and flags. @@ -684,8 +699,7 @@ import java.util.Locale; /** Returns the stsz (sample size) box. */ public static ByteBuffer stsz(List writtenSamples) { - ByteBuffer contents = - ByteBuffer.allocate(writtenSamples.size() * 4 + Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE); + ByteBuffer contents = ByteBuffer.allocate(writtenSamples.size() * 4 + MAX_FIXED_LEAF_BOX_SIZE); contents.putInt(0x0); // version and flags. @@ -707,8 +721,7 @@ import java.util.Locale; /** Returns the stsc (sample to chunk) box. */ public static ByteBuffer stsc(List writtenChunkSampleCounts) { ByteBuffer contents = - ByteBuffer.allocate( - writtenChunkSampleCounts.size() * 12 + Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE); + ByteBuffer.allocate(writtenChunkSampleCounts.size() * 12 + MAX_FIXED_LEAF_BOX_SIZE); contents.putInt(0x0); // version and flags. contents.putInt(writtenChunkSampleCounts.size()); // entry_count. @@ -767,8 +780,7 @@ import java.util.Locale; /** Returns the stss (sync sample) box. */ public static ByteBuffer stss(List writtenSamples) { - ByteBuffer contents = - ByteBuffer.allocate(writtenSamples.size() * 4 + Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE); + ByteBuffer contents = ByteBuffer.allocate(writtenSamples.size() * 4 + MAX_FIXED_LEAF_BOX_SIZE); contents.putInt(0x0); // version and flags. @@ -797,8 +809,7 @@ import java.util.Locale; /** Returns the stsd (sample description) box. */ public static ByteBuffer stsd(ByteBuffer sampleEntryBox) { - ByteBuffer contents = - ByteBuffer.allocate(sampleEntryBox.limit() + Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE); + ByteBuffer contents = ByteBuffer.allocate(sampleEntryBox.limit() + MAX_FIXED_LEAF_BOX_SIZE); contents.putInt(0x0); // version and flags. contents.putInt(1); // entry_count, We have only one sample description in each track. @@ -969,7 +980,7 @@ import java.util.Locale; ByteBuffer contents = ByteBuffer.allocate( - csd0ByteBuffer.limit() + csd1ByteBuffer.limit() + Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE); + csd0ByteBuffer.limit() + csd1ByteBuffer.limit() + MAX_FIXED_LEAF_BOX_SIZE); contents.put((byte) 0x01); // configurationVersion @@ -1017,8 +1028,7 @@ import java.util.Locale; ByteBuffer csd0ByteBuffer = ByteBuffer.wrap(csd0); - ByteBuffer contents = - ByteBuffer.allocate(csd0ByteBuffer.limit() + Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE); + ByteBuffer contents = ByteBuffer.allocate(csd0ByteBuffer.limit() + MAX_FIXED_LEAF_BOX_SIZE); ImmutableList nalusWithEmulationPrevention = AnnexBUtils.findNalUnits(csd0ByteBuffer); @@ -1197,7 +1207,7 @@ import java.util.Locale; ByteBuffer csd0ByteBuffer, int peakBitrate, int averageBitrate) { int csd0Size = csd0ByteBuffer.limit(); - ByteBuffer contents = ByteBuffer.allocate(csd0Size + Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE); + ByteBuffer contents = ByteBuffer.allocate(csd0Size + MAX_FIXED_LEAF_BOX_SIZE); contents.putInt(0x0); // version and flags. contents.put((byte) 0x03); // ES_DescrTag @@ -1287,4 +1297,9 @@ import java.util.Locale; throw new IllegalArgumentException("invalid orientation " + orientation); } } + + /** Converts microseconds to video units, using the provided timebase. */ + private static long vuFromUs(long timestampUs, long videoUnitTimebase) { + return timestampUs * videoUnitTimebase / 1_000_000L; // (division for us to s conversion) + } } diff --git a/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4MoovStructure.java b/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4MoovStructure.java index b0cfb1ba46..e6ca9ec7cb 100644 --- a/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4MoovStructure.java +++ b/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4MoovStructure.java @@ -15,7 +15,6 @@ */ package androidx.media3.muxer; -import static androidx.media3.muxer.Mp4Utils.MVHD_TIMEBASE; import static java.lang.Math.max; import android.media.MediaCodec.BufferInfo; @@ -92,8 +91,7 @@ import org.checkerframework.checker.nullness.qual.PolyNull; trackDurationInTrackUnitsVu += sampleDurationsVu.get(j); } - long trackDurationUs = - Mp4Utils.usFromVu(trackDurationInTrackUnitsVu, track.videoUnitTimebase()); + long trackDurationUs = usFromVu(trackDurationInTrackUnitsVu, track.videoUnitTimebase()); @C.TrackType int trackType = MimeTypes.getTrackType(format.sampleMimeType); ByteBuffer stts = Boxes.stts(sampleDurationsVu); @@ -152,9 +150,7 @@ import org.checkerframework.checker.nullness.qual.PolyNull; Boxes.trak( Boxes.tkhd( nextTrackId, - // Using the time base of the entire file, not that of the track; otherwise, - // Quicktime will stretch the audio accordingly, see b/158120042. - (int) Mp4Utils.vuFromUs(trackDurationUs, MVHD_TIMEBASE), + trackDurationUs, creationTimestampSeconds, modificationTimestampSeconds, metadataCollector.orientationData.orientation, @@ -217,4 +213,9 @@ import org.checkerframework.checker.nullness.qual.PolyNull; return locale.getISO3Language().isEmpty() ? languageTag : locale.getISO3Language(); } + + /** Converts video units to microseconds, using the provided timebase. */ + private static long usFromVu(long timestampVu, long videoUnitTimebase) { + return timestampVu * 1_000_000L / videoUnitTimebase; + } } diff --git a/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Utils.java b/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Utils.java index 4c936a55e8..af19ad295c 100644 --- a/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Utils.java +++ b/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Utils.java @@ -16,6 +16,7 @@ package androidx.media3.muxer; import androidx.media3.common.Metadata; +import androidx.media3.common.util.UnstableApi; import androidx.media3.container.MdtaMetadataEntry; import androidx.media3.container.Mp4LocationData; import androidx.media3.container.Mp4OrientationData; @@ -23,39 +24,13 @@ import androidx.media3.container.Mp4TimestampData; import androidx.media3.container.XmpData; /** Utilities for MP4 files. */ -/* package */ final class Mp4Utils { - /* Total number of bytes in an integer. */ - public static final int BYTES_PER_INTEGER = 4; - - /** - * The maximum length of boxes which have fixed sizes. - * - *

Technically, we'd know how long they actually are; this upper bound is much simpler to - * produce though and we'll throw if we overflow anyway. - */ - public static final int MAX_FIXED_LEAF_BOX_SIZE = 200; - +@UnstableApi +public final class Mp4Utils { /** The maximum value of a 32-bit unsigned int. */ public static final long UNSIGNED_INT_MAX_VALUE = 4_294_967_295L; - /** - * The per-video timebase, used for durations in MVHD and TKHD even if the per-track timebase is - * different (e.g. typically the sample rate for audio). - */ - public static final long MVHD_TIMEBASE = 10_000L; - private Mp4Utils() {} - /** Converts microseconds to video units, using the provided timebase. */ - public static long vuFromUs(long timestampUs, long videoUnitTimebase) { - return timestampUs * videoUnitTimebase / 1_000_000L; // (division for us to s conversion) - } - - /** Converts video units to microseconds, using the provided timebase. */ - public static long usFromVu(long timestampVu, long videoUnitTimebase) { - return timestampVu * 1_000_000L / videoUnitTimebase; - } - /** Returns whether a given {@link Metadata.Entry metadata} is supported. */ public static boolean isMetadataSupported(Metadata.Entry metadata) { return metadata instanceof Mp4OrientationData diff --git a/libraries/muxer/src/test/java/androidx/media3/muxer/BoxesTest.java b/libraries/muxer/src/test/java/androidx/media3/muxer/BoxesTest.java index f84e55d01f..6ce162ae64 100644 --- a/libraries/muxer/src/test/java/androidx/media3/muxer/BoxesTest.java +++ b/libraries/muxer/src/test/java/androidx/media3/muxer/BoxesTest.java @@ -68,7 +68,7 @@ public class BoxesTest { ByteBuffer tkhdBox = Boxes.tkhd( /* trackId= */ 1, - /* trackDurationVu= */ 5_000_000, + /* trackDurationUs= */ 500_000_000, /* creationTimestampSeconds= */ 1_000_000_000, /* modificationTimestampSeconds= */ 2_000_000_000, /* orientation= */ 90, @@ -84,7 +84,7 @@ public class BoxesTest { ByteBuffer tkhdBox = Boxes.tkhd( /* trackId= */ 1, - /* trackDurationVu= */ 5_000_000, + /* trackDurationUs= */ 500_000_000, /* creationTimestampSeconds= */ 1_000_000_000, /* modificationTimestampSeconds= */ 2_000_000_000, /* orientation= */ 90, diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/InAppMuxer.java b/libraries/transformer/src/main/java/androidx/media3/transformer/InAppMuxer.java index 2caa861b6d..05ba16e448 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/InAppMuxer.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/InAppMuxer.java @@ -21,13 +21,10 @@ import androidx.media3.common.Format; import androidx.media3.common.Metadata; import androidx.media3.common.MimeTypes; import androidx.media3.common.util.UnstableApi; -import androidx.media3.container.MdtaMetadataEntry; -import androidx.media3.container.Mp4LocationData; import androidx.media3.container.Mp4OrientationData; -import androidx.media3.container.Mp4TimestampData; -import androidx.media3.container.XmpData; import androidx.media3.muxer.FragmentedMp4Muxer; import androidx.media3.muxer.Mp4Muxer; +import androidx.media3.muxer.Mp4Utils; import androidx.media3.muxer.Muxer.TrackToken; import com.google.common.collect.ImmutableList; import com.google.errorprone.annotations.CanIgnoreReturnValue; @@ -226,7 +223,7 @@ public final class InAppMuxer implements Muxer { public void addMetadata(Metadata metadata) { for (int i = 0; i < metadata.length(); i++) { Metadata.Entry entry = metadata.get(i); - if (isMetadataSupported(entry)) { + if (Mp4Utils.isMetadataSupported(entry)) { metadataEntries.add(entry); } } @@ -255,19 +252,4 @@ public final class InAppMuxer implements Muxer { muxer.addMetadata(entry); } } - - /** Returns whether a given {@link Metadata.Entry metadata} is supported. */ - private static boolean isMetadataSupported(Metadata.Entry metadata) { - return metadata instanceof Mp4OrientationData - || metadata instanceof Mp4LocationData - || metadata instanceof Mp4TimestampData - || (metadata instanceof MdtaMetadataEntry - && isMdtaMetadataEntrySupported((MdtaMetadataEntry) metadata)) - || metadata instanceof XmpData; - } - - private static boolean isMdtaMetadataEntrySupported(MdtaMetadataEntry mdtaMetadataEntry) { - return mdtaMetadataEntry.typeIndicator == MdtaMetadataEntry.TYPE_INDICATOR_STRING - || mdtaMetadataEntry.typeIndicator == MdtaMetadataEntry.TYPE_INDICATOR_FLOAT32; - } }