From 53c174f047ac759e6ff3c44776d7d46fc0aed755 Mon Sep 17 00:00:00 2001 From: sheenachhabra Date: Thu, 8 Jun 2023 14:51:43 +0000 Subject: [PATCH] Add support for passing custom metadata via transformer Changes included: 1. Enable MP4 extractor to read all types of metadata. 2. Allow passing String and Float metadata via Transformer. Reference to QuickTime spec: https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/Metadata/Metadata.html#//apple_ref/doc/uid/TP40000939-CH1-SW21 PiperOrigin-RevId: 538783982 --- .../androidx/media3/common/util/Util.java | 50 ++- .../media3/container/MdtaMetadataEntry.java | 27 +- .../exoplayer/MetadataRetrieverTest.java | 39 +- .../media3/extractor/mp4/MetadataUtil.java | 23 +- .../mp4/sample_android_slow_motion.mp4.0.dump | 2 +- .../mp4/sample_android_slow_motion.mp4.1.dump | 2 +- .../mp4/sample_android_slow_motion.mp4.2.dump | 2 +- .../mp4/sample_android_slow_motion.mp4.3.dump | 2 +- ...ndroid_slow_motion.mp4.unknown_length.dump | 2 +- .../mp4/sample_with_color_info.mp4.0.dump | 4 +- .../mp4/sample_with_color_info.mp4.1.dump | 4 +- .../mp4/sample_with_color_info.mp4.2.dump | 4 +- .../mp4/sample_with_color_info.mp4.3.dump | 4 +- ...le_with_color_info.mp4.unknown_length.dump | 4 +- .../mp4/sample.mp4.with_custom_metadata.dump | 339 ++++++++++++++++++ .../mp4/sample_sef_slow_motion.mp4.dump | 8 +- .../media3/transformer/InAppMuxer.java | 12 +- ...TransformerWithInAppMuxerEndToEndTest.java | 48 ++- 18 files changed, 518 insertions(+), 58 deletions(-) create mode 100644 libraries/test_data/src/test/assets/transformerdumps/mp4/sample.mp4.with_custom_metadata.dump diff --git a/libraries/common/src/main/java/androidx/media3/common/util/Util.java b/libraries/common/src/main/java/androidx/media3/common/util/Util.java index 09a6169e38..5b90a97905 100644 --- a/libraries/common/src/main/java/androidx/media3/common/util/Util.java +++ b/libraries/common/src/main/java/androidx/media3/common/util/Util.java @@ -27,6 +27,7 @@ import static androidx.media3.common.Player.COMMAND_SEEK_TO_NEXT; import static androidx.media3.common.Player.COMMAND_SEEK_TO_NEXT_MEDIA_ITEM; import static androidx.media3.common.Player.COMMAND_SEEK_TO_PREVIOUS; import static androidx.media3.common.Player.COMMAND_SEEK_TO_PREVIOUS_MEDIA_ITEM; +import static androidx.media3.common.util.Assertions.checkArgument; import static androidx.media3.common.util.Assertions.checkNotNull; import static java.lang.Math.abs; import static java.lang.Math.max; @@ -206,8 +207,16 @@ public final class Util { return outputStream.toByteArray(); } + /** Converts an integer into an equivalent byte array. */ + @UnstableApi + public static byte[] toByteArray(int value) { + return new byte[] { + (byte) (value >> 24), (byte) (value >> 16), (byte) (value >> 8), (byte) value + }; + } + /** - * Converts an array of 32-bit integers into an equivalent byte array. + * Converts an array of integers into an equivalent byte array. * *

Each integer is converted into 4 sequential bytes. */ @@ -216,14 +225,37 @@ public final class Util { byte[] array = new byte[values.length * 4]; int index = 0; for (int value : values) { - array[index++] = (byte) (value >> 24); - array[index++] = (byte) (value >> 16); - array[index++] = (byte) (value >> 8); - array[index++] = (byte) (value /* >> 0 */); + byte[] byteArray = toByteArray(value); + array[index++] = byteArray[0]; + array[index++] = byteArray[1]; + array[index++] = byteArray[2]; + array[index++] = byteArray[3]; } return array; } + /** Converts a float into an equivalent byte array. */ + @UnstableApi + public static byte[] toByteArray(float value) { + return toByteArray(Float.floatToIntBits(value)); + } + + /** Converts a byte array into a float. */ + @UnstableApi + public static float toFloat(byte[] bytes) { + checkArgument(bytes.length == 4); + int intBits = + bytes[0] << 24 | (bytes[1] & 0xFF) << 16 | (bytes[2] & 0xFF) << 8 | (bytes[3] & 0xFF); + return Float.intBitsToFloat(intBits); + } + + /** Converts a byte array into an integer. */ + @UnstableApi + public static int toInteger(byte[] bytes) { + checkArgument(bytes.length == 4); + return bytes[0] << 24 | bytes[1] << 16 | bytes[2] << 8 | bytes[3]; + } + /** * Registers a {@link BroadcastReceiver} that's not intended to receive broadcasts from other * apps. This will be enforced by specifying {@link Context#RECEIVER_NOT_EXPORTED} if {@link @@ -463,7 +495,7 @@ public final class Util { @UnstableApi @SuppressWarnings({"nullness:argument", "nullness:return"}) public static T[] nullSafeArrayCopy(T[] input, int length) { - Assertions.checkArgument(length <= input.length); + checkArgument(length <= input.length); return Arrays.copyOf(input, length); } @@ -478,8 +510,8 @@ public final class Util { @UnstableApi @SuppressWarnings({"nullness:argument", "nullness:return"}) public static T[] nullSafeArrayCopyOfRange(T[] input, int from, int to) { - Assertions.checkArgument(0 <= from); - Assertions.checkArgument(to <= input.length); + checkArgument(0 <= from); + checkArgument(to <= input.length); return Arrays.copyOfRange(input, from, to); } @@ -1609,7 +1641,7 @@ public final class Util { @UnstableApi public static int getIntegerCodeForString(String string) { int length = string.length(); - Assertions.checkArgument(length <= 4); + checkArgument(length <= 4); int result = 0; for (int i = 0; i < length; i++) { result <<= 8; diff --git a/libraries/container/src/main/java/androidx/media3/container/MdtaMetadataEntry.java b/libraries/container/src/main/java/androidx/media3/container/MdtaMetadataEntry.java index 2420eefd98..293fb565fb 100644 --- a/libraries/container/src/main/java/androidx/media3/container/MdtaMetadataEntry.java +++ b/libraries/container/src/main/java/androidx/media3/container/MdtaMetadataEntry.java @@ -21,7 +21,6 @@ import androidx.annotation.Nullable; import androidx.media3.common.Metadata; import androidx.media3.common.util.UnstableApi; import androidx.media3.common.util.Util; -import java.nio.ByteBuffer; import java.util.Arrays; /** @@ -34,7 +33,12 @@ public final class MdtaMetadataEntry implements Metadata.Entry { /** Key for the capture frame rate (in frames per second). */ public static final String KEY_ANDROID_CAPTURE_FPS = "com.android.capture.fps"; - public static final int TYPE_INDICATOR_FLOAT = 23; + /** The type indicator for UTF-8 string. */ + public static final int TYPE_INDICATOR_STRING = 1; + /** The type indicator for Float32. */ + public static final int TYPE_INDICATOR_FLOAT32 = 23; + /** The type indicator for 32-bit signed integer. */ + public static final int TYPE_INDICATOR_INT32 = 67; /** The metadata key name. */ public final String key; @@ -87,10 +91,21 @@ public final class MdtaMetadataEntry implements Metadata.Entry { @Override public String toString() { - String formattedValue = - typeIndicator == TYPE_INDICATOR_FLOAT - ? Float.toString(ByteBuffer.wrap(value).getFloat()) - : Util.toHexString(value); + String formattedValue; + switch (typeIndicator) { + case TYPE_INDICATOR_STRING: + formattedValue = Util.fromUtf8Bytes(value); + break; + case TYPE_INDICATOR_FLOAT32: + formattedValue = String.valueOf(Util.toFloat(value)); + break; + case TYPE_INDICATOR_INT32: + formattedValue = String.valueOf(Util.toInteger(value)); + break; + default: + formattedValue = Util.toHexString(value); + } + return "mdta: key=" + key + ", value=" + formattedValue; } diff --git a/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/MetadataRetrieverTest.java b/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/MetadataRetrieverTest.java index f5e4bde3b1..8c015b8db2 100644 --- a/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/MetadataRetrieverTest.java +++ b/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/MetadataRetrieverTest.java @@ -26,6 +26,7 @@ import android.net.Uri; import androidx.media3.common.C; import androidx.media3.common.MediaItem; import androidx.media3.common.MimeTypes; +import androidx.media3.common.util.Util; import androidx.media3.container.CreationTime; import androidx.media3.container.MdtaMetadataEntry; import androidx.media3.exoplayer.source.TrackGroupArray; @@ -153,6 +154,18 @@ public class MetadataRetrieverTest { public void retrieveMetadata_sefSlowMotion_outputsExpectedMetadata() throws Exception { MediaItem mediaItem = MediaItem.fromUri(Uri.parse("asset://android_asset/media/mp4/sample_sef_slow_motion.mp4")); + MdtaMetadataEntry expectedAndroidVersionMetadata = + new MdtaMetadataEntry( + /* key= */ "com.android.version", + /* value= */ Util.getUtf8Bytes("10"), + /* localeIndicator= */ 0, + MdtaMetadataEntry.TYPE_INDICATOR_STRING); + MdtaMetadataEntry expectedTemporalLayersCountMetdata = + new MdtaMetadataEntry( + /* key= */ "com.android.video.temporal_layers_count", + /* value= */ Util.toByteArray(4), + /* localeIndicator= */ 0, + MdtaMetadataEntry.TYPE_INDICATOR_INT32); SmtaMetadataEntry expectedSmtaEntry = new SmtaMetadataEntry(/* captureFrameRate= */ 240, /* svcTemporalLayerCount= */ 4); List segments = new ArrayList<>(); @@ -178,17 +191,25 @@ public class MetadataRetrieverTest { assertThat(trackGroups.length).isEqualTo(2); // Video and audio // Audio - assertThat(trackGroups.get(0).getFormat(0).metadata.length()).isEqualTo(3); - assertThat(trackGroups.get(0).getFormat(0).metadata.get(0)).isEqualTo(expectedSmtaEntry); - assertThat(trackGroups.get(0).getFormat(0).metadata.get(1)).isEqualTo(expectedSlowMotionData); - assertThat(trackGroups.get(0).getFormat(0).metadata.get(2)).isEqualTo(expectedCreationTime); + assertThat(trackGroups.get(0).getFormat(0).metadata.length()).isEqualTo(5); + assertThat(trackGroups.get(0).getFormat(0).metadata.get(0)) + .isEqualTo(expectedAndroidVersionMetadata); + assertThat(trackGroups.get(0).getFormat(0).metadata.get(1)) + .isEqualTo(expectedTemporalLayersCountMetdata); + assertThat(trackGroups.get(0).getFormat(0).metadata.get(2)).isEqualTo(expectedSmtaEntry); + assertThat(trackGroups.get(0).getFormat(0).metadata.get(3)).isEqualTo(expectedSlowMotionData); + assertThat(trackGroups.get(0).getFormat(0).metadata.get(4)).isEqualTo(expectedCreationTime); // Video - assertThat(trackGroups.get(1).getFormat(0).metadata.length()).isEqualTo(4); - assertThat(trackGroups.get(1).getFormat(0).metadata.get(0)).isEqualTo(expectedMdtaEntry); - assertThat(trackGroups.get(1).getFormat(0).metadata.get(1)).isEqualTo(expectedSmtaEntry); - assertThat(trackGroups.get(1).getFormat(0).metadata.get(2)).isEqualTo(expectedSlowMotionData); - assertThat(trackGroups.get(1).getFormat(0).metadata.get(3)).isEqualTo(expectedCreationTime); + assertThat(trackGroups.get(1).getFormat(0).metadata.length()).isEqualTo(6); + assertThat(trackGroups.get(1).getFormat(0).metadata.get(0)) + .isEqualTo(expectedAndroidVersionMetadata); + assertThat(trackGroups.get(1).getFormat(0).metadata.get(1)) + .isEqualTo(expectedTemporalLayersCountMetdata); + assertThat(trackGroups.get(1).getFormat(0).metadata.get(2)).isEqualTo(expectedMdtaEntry); + assertThat(trackGroups.get(1).getFormat(0).metadata.get(3)).isEqualTo(expectedSmtaEntry); + assertThat(trackGroups.get(1).getFormat(0).metadata.get(4)).isEqualTo(expectedSlowMotionData); + assertThat(trackGroups.get(1).getFormat(0).metadata.get(5)).isEqualTo(expectedCreationTime); } @Test diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/mp4/MetadataUtil.java b/libraries/extractor/src/main/java/androidx/media3/extractor/mp4/MetadataUtil.java index be8c295220..a8610367da 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/mp4/MetadataUtil.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/mp4/MetadataUtil.java @@ -299,17 +299,20 @@ import org.checkerframework.checker.nullness.compatqual.NullableType; if (udtaMetaMetadata != null) { formatMetadata = udtaMetaMetadata; } - } else if (trackType == C.TRACK_TYPE_VIDEO) { - // Populate only metadata keys that are known to be specific to video. - if (mdtaMetadata != null) { - for (int i = 0; i < mdtaMetadata.length(); i++) { - Metadata.Entry entry = mdtaMetadata.get(i); - if (entry instanceof MdtaMetadataEntry) { - MdtaMetadataEntry mdtaMetadataEntry = (MdtaMetadataEntry) entry; - if (MdtaMetadataEntry.KEY_ANDROID_CAPTURE_FPS.equals(mdtaMetadataEntry.key)) { - formatMetadata = new Metadata(mdtaMetadataEntry); - break; + } + + if (mdtaMetadata != null) { + for (int i = 0; i < mdtaMetadata.length(); i++) { + Metadata.Entry entry = mdtaMetadata.get(i); + if (entry instanceof MdtaMetadataEntry) { + MdtaMetadataEntry mdtaMetadataEntry = (MdtaMetadataEntry) entry; + // This key is present in the container level meta box. + if (mdtaMetadataEntry.key.equals(MdtaMetadataEntry.KEY_ANDROID_CAPTURE_FPS)) { + if (trackType == C.TRACK_TYPE_VIDEO) { + formatMetadata = formatMetadata.copyWithAppendedEntries(mdtaMetadataEntry); } + } else { + formatMetadata = formatMetadata.copyWithAppendedEntries(mdtaMetadataEntry); } } } diff --git a/libraries/test_data/src/test/assets/extractordumps/mp4/sample_android_slow_motion.mp4.0.dump b/libraries/test_data/src/test/assets/extractordumps/mp4/sample_android_slow_motion.mp4.0.dump index ff366ec919..e0e71760a5 100644 --- a/libraries/test_data/src/test/assets/extractordumps/mp4/sample_android_slow_motion.mp4.0.dump +++ b/libraries/test_data/src/test/assets/extractordumps/mp4/sample_android_slow_motion.mp4.0.dump @@ -22,7 +22,7 @@ track 0: colorRange = 1 colorTransfer = -1 hdrStaticInfo = length 0, hash 0 - metadata = entries=[mdta: key=com.android.capture.fps, value=240.0, Creation time: unset] + metadata = entries=[mdta: key=major_brand, value=mp42, mdta: key=minor_version, value=0, mdta: key=compatible_brands, value=isommp42, mdta: key=com.android.capture.fps, value=240.0, mdta: key=com.android.version, value=10, mdta: key=encoder, value=Lavf58.29.100, Creation time: unset] initializationData: data = length 22, hash 4CF81805 data = length 9, hash FBAFBA1C diff --git a/libraries/test_data/src/test/assets/extractordumps/mp4/sample_android_slow_motion.mp4.1.dump b/libraries/test_data/src/test/assets/extractordumps/mp4/sample_android_slow_motion.mp4.1.dump index ff366ec919..e0e71760a5 100644 --- a/libraries/test_data/src/test/assets/extractordumps/mp4/sample_android_slow_motion.mp4.1.dump +++ b/libraries/test_data/src/test/assets/extractordumps/mp4/sample_android_slow_motion.mp4.1.dump @@ -22,7 +22,7 @@ track 0: colorRange = 1 colorTransfer = -1 hdrStaticInfo = length 0, hash 0 - metadata = entries=[mdta: key=com.android.capture.fps, value=240.0, Creation time: unset] + metadata = entries=[mdta: key=major_brand, value=mp42, mdta: key=minor_version, value=0, mdta: key=compatible_brands, value=isommp42, mdta: key=com.android.capture.fps, value=240.0, mdta: key=com.android.version, value=10, mdta: key=encoder, value=Lavf58.29.100, Creation time: unset] initializationData: data = length 22, hash 4CF81805 data = length 9, hash FBAFBA1C diff --git a/libraries/test_data/src/test/assets/extractordumps/mp4/sample_android_slow_motion.mp4.2.dump b/libraries/test_data/src/test/assets/extractordumps/mp4/sample_android_slow_motion.mp4.2.dump index ff366ec919..e0e71760a5 100644 --- a/libraries/test_data/src/test/assets/extractordumps/mp4/sample_android_slow_motion.mp4.2.dump +++ b/libraries/test_data/src/test/assets/extractordumps/mp4/sample_android_slow_motion.mp4.2.dump @@ -22,7 +22,7 @@ track 0: colorRange = 1 colorTransfer = -1 hdrStaticInfo = length 0, hash 0 - metadata = entries=[mdta: key=com.android.capture.fps, value=240.0, Creation time: unset] + metadata = entries=[mdta: key=major_brand, value=mp42, mdta: key=minor_version, value=0, mdta: key=compatible_brands, value=isommp42, mdta: key=com.android.capture.fps, value=240.0, mdta: key=com.android.version, value=10, mdta: key=encoder, value=Lavf58.29.100, Creation time: unset] initializationData: data = length 22, hash 4CF81805 data = length 9, hash FBAFBA1C diff --git a/libraries/test_data/src/test/assets/extractordumps/mp4/sample_android_slow_motion.mp4.3.dump b/libraries/test_data/src/test/assets/extractordumps/mp4/sample_android_slow_motion.mp4.3.dump index ff366ec919..e0e71760a5 100644 --- a/libraries/test_data/src/test/assets/extractordumps/mp4/sample_android_slow_motion.mp4.3.dump +++ b/libraries/test_data/src/test/assets/extractordumps/mp4/sample_android_slow_motion.mp4.3.dump @@ -22,7 +22,7 @@ track 0: colorRange = 1 colorTransfer = -1 hdrStaticInfo = length 0, hash 0 - metadata = entries=[mdta: key=com.android.capture.fps, value=240.0, Creation time: unset] + metadata = entries=[mdta: key=major_brand, value=mp42, mdta: key=minor_version, value=0, mdta: key=compatible_brands, value=isommp42, mdta: key=com.android.capture.fps, value=240.0, mdta: key=com.android.version, value=10, mdta: key=encoder, value=Lavf58.29.100, Creation time: unset] initializationData: data = length 22, hash 4CF81805 data = length 9, hash FBAFBA1C diff --git a/libraries/test_data/src/test/assets/extractordumps/mp4/sample_android_slow_motion.mp4.unknown_length.dump b/libraries/test_data/src/test/assets/extractordumps/mp4/sample_android_slow_motion.mp4.unknown_length.dump index ff366ec919..e0e71760a5 100644 --- a/libraries/test_data/src/test/assets/extractordumps/mp4/sample_android_slow_motion.mp4.unknown_length.dump +++ b/libraries/test_data/src/test/assets/extractordumps/mp4/sample_android_slow_motion.mp4.unknown_length.dump @@ -22,7 +22,7 @@ track 0: colorRange = 1 colorTransfer = -1 hdrStaticInfo = length 0, hash 0 - metadata = entries=[mdta: key=com.android.capture.fps, value=240.0, Creation time: unset] + metadata = entries=[mdta: key=major_brand, value=mp42, mdta: key=minor_version, value=0, mdta: key=compatible_brands, value=isommp42, mdta: key=com.android.capture.fps, value=240.0, mdta: key=com.android.version, value=10, mdta: key=encoder, value=Lavf58.29.100, Creation time: unset] initializationData: data = length 22, hash 4CF81805 data = length 9, hash FBAFBA1C diff --git a/libraries/test_data/src/test/assets/extractordumps/mp4/sample_with_color_info.mp4.0.dump b/libraries/test_data/src/test/assets/extractordumps/mp4/sample_with_color_info.mp4.0.dump index 5efc2dd070..db591713a7 100644 --- a/libraries/test_data/src/test/assets/extractordumps/mp4/sample_with_color_info.mp4.0.dump +++ b/libraries/test_data/src/test/assets/extractordumps/mp4/sample_with_color_info.mp4.0.dump @@ -23,7 +23,7 @@ track 0: colorRange = 2 colorTransfer = 7 hdrStaticInfo = length 0, hash 0 - metadata = entries=[Creation time: 1621959711000] + metadata = entries=[mdta: key=com.apple.quicktime.location.accuracy.horizontal, value=3.754789, mdta: key=com.apple.quicktime.location.ISO6709, value=+37.7450-122.4301+066.374/, mdta: key=com.apple.quicktime.make, value=Apple, mdta: key=com.apple.quicktime.model, value=iPhone 12 Pro Max, mdta: key=com.apple.quicktime.software, value=14.5.1, mdta: key=com.apple.quicktime.creationdate, value=2021-05-25T09:21:51-0700, Creation time: 1621959711000] initializationData: data = length 526, hash 7B3FC433 sample 0: @@ -75,7 +75,7 @@ track 1: sampleRate = 44100 encoderPadding = 2204 language = und - metadata = entries=[Creation time: 1621959711000] + metadata = entries=[mdta: key=com.apple.quicktime.location.accuracy.horizontal, value=3.754789, mdta: key=com.apple.quicktime.location.ISO6709, value=+37.7450-122.4301+066.374/, mdta: key=com.apple.quicktime.make, value=Apple, mdta: key=com.apple.quicktime.model, value=iPhone 12 Pro Max, mdta: key=com.apple.quicktime.software, value=14.5.1, mdta: key=com.apple.quicktime.creationdate, value=2021-05-25T09:21:51-0700, Creation time: 1621959711000] initializationData: data = length 2, hash 5FF sample 0: diff --git a/libraries/test_data/src/test/assets/extractordumps/mp4/sample_with_color_info.mp4.1.dump b/libraries/test_data/src/test/assets/extractordumps/mp4/sample_with_color_info.mp4.1.dump index 301b4c4941..fa7930bac7 100644 --- a/libraries/test_data/src/test/assets/extractordumps/mp4/sample_with_color_info.mp4.1.dump +++ b/libraries/test_data/src/test/assets/extractordumps/mp4/sample_with_color_info.mp4.1.dump @@ -23,7 +23,7 @@ track 0: colorRange = 2 colorTransfer = 7 hdrStaticInfo = length 0, hash 0 - metadata = entries=[Creation time: 1621959711000] + metadata = entries=[mdta: key=com.apple.quicktime.location.accuracy.horizontal, value=3.754789, mdta: key=com.apple.quicktime.location.ISO6709, value=+37.7450-122.4301+066.374/, mdta: key=com.apple.quicktime.make, value=Apple, mdta: key=com.apple.quicktime.model, value=iPhone 12 Pro Max, mdta: key=com.apple.quicktime.software, value=14.5.1, mdta: key=com.apple.quicktime.creationdate, value=2021-05-25T09:21:51-0700, Creation time: 1621959711000] initializationData: data = length 526, hash 7B3FC433 sample 0: @@ -75,7 +75,7 @@ track 1: sampleRate = 44100 encoderPadding = 2204 language = und - metadata = entries=[Creation time: 1621959711000] + metadata = entries=[mdta: key=com.apple.quicktime.location.accuracy.horizontal, value=3.754789, mdta: key=com.apple.quicktime.location.ISO6709, value=+37.7450-122.4301+066.374/, mdta: key=com.apple.quicktime.make, value=Apple, mdta: key=com.apple.quicktime.model, value=iPhone 12 Pro Max, mdta: key=com.apple.quicktime.software, value=14.5.1, mdta: key=com.apple.quicktime.creationdate, value=2021-05-25T09:21:51-0700, Creation time: 1621959711000] initializationData: data = length 2, hash 5FF sample 0: diff --git a/libraries/test_data/src/test/assets/extractordumps/mp4/sample_with_color_info.mp4.2.dump b/libraries/test_data/src/test/assets/extractordumps/mp4/sample_with_color_info.mp4.2.dump index 71e45ab216..ac44fcc3f1 100644 --- a/libraries/test_data/src/test/assets/extractordumps/mp4/sample_with_color_info.mp4.2.dump +++ b/libraries/test_data/src/test/assets/extractordumps/mp4/sample_with_color_info.mp4.2.dump @@ -23,7 +23,7 @@ track 0: colorRange = 2 colorTransfer = 7 hdrStaticInfo = length 0, hash 0 - metadata = entries=[Creation time: 1621959711000] + metadata = entries=[mdta: key=com.apple.quicktime.location.accuracy.horizontal, value=3.754789, mdta: key=com.apple.quicktime.location.ISO6709, value=+37.7450-122.4301+066.374/, mdta: key=com.apple.quicktime.make, value=Apple, mdta: key=com.apple.quicktime.model, value=iPhone 12 Pro Max, mdta: key=com.apple.quicktime.software, value=14.5.1, mdta: key=com.apple.quicktime.creationdate, value=2021-05-25T09:21:51-0700, Creation time: 1621959711000] initializationData: data = length 526, hash 7B3FC433 sample 0: @@ -75,7 +75,7 @@ track 1: sampleRate = 44100 encoderPadding = 2204 language = und - metadata = entries=[Creation time: 1621959711000] + metadata = entries=[mdta: key=com.apple.quicktime.location.accuracy.horizontal, value=3.754789, mdta: key=com.apple.quicktime.location.ISO6709, value=+37.7450-122.4301+066.374/, mdta: key=com.apple.quicktime.make, value=Apple, mdta: key=com.apple.quicktime.model, value=iPhone 12 Pro Max, mdta: key=com.apple.quicktime.software, value=14.5.1, mdta: key=com.apple.quicktime.creationdate, value=2021-05-25T09:21:51-0700, Creation time: 1621959711000] initializationData: data = length 2, hash 5FF sample 0: diff --git a/libraries/test_data/src/test/assets/extractordumps/mp4/sample_with_color_info.mp4.3.dump b/libraries/test_data/src/test/assets/extractordumps/mp4/sample_with_color_info.mp4.3.dump index b02307a43b..23d6d28e09 100644 --- a/libraries/test_data/src/test/assets/extractordumps/mp4/sample_with_color_info.mp4.3.dump +++ b/libraries/test_data/src/test/assets/extractordumps/mp4/sample_with_color_info.mp4.3.dump @@ -23,7 +23,7 @@ track 0: colorRange = 2 colorTransfer = 7 hdrStaticInfo = length 0, hash 0 - metadata = entries=[Creation time: 1621959711000] + metadata = entries=[mdta: key=com.apple.quicktime.location.accuracy.horizontal, value=3.754789, mdta: key=com.apple.quicktime.location.ISO6709, value=+37.7450-122.4301+066.374/, mdta: key=com.apple.quicktime.make, value=Apple, mdta: key=com.apple.quicktime.model, value=iPhone 12 Pro Max, mdta: key=com.apple.quicktime.software, value=14.5.1, mdta: key=com.apple.quicktime.creationdate, value=2021-05-25T09:21:51-0700, Creation time: 1621959711000] initializationData: data = length 526, hash 7B3FC433 sample 0: @@ -75,7 +75,7 @@ track 1: sampleRate = 44100 encoderPadding = 2204 language = und - metadata = entries=[Creation time: 1621959711000] + metadata = entries=[mdta: key=com.apple.quicktime.location.accuracy.horizontal, value=3.754789, mdta: key=com.apple.quicktime.location.ISO6709, value=+37.7450-122.4301+066.374/, mdta: key=com.apple.quicktime.make, value=Apple, mdta: key=com.apple.quicktime.model, value=iPhone 12 Pro Max, mdta: key=com.apple.quicktime.software, value=14.5.1, mdta: key=com.apple.quicktime.creationdate, value=2021-05-25T09:21:51-0700, Creation time: 1621959711000] initializationData: data = length 2, hash 5FF sample 0: diff --git a/libraries/test_data/src/test/assets/extractordumps/mp4/sample_with_color_info.mp4.unknown_length.dump b/libraries/test_data/src/test/assets/extractordumps/mp4/sample_with_color_info.mp4.unknown_length.dump index 5efc2dd070..db591713a7 100644 --- a/libraries/test_data/src/test/assets/extractordumps/mp4/sample_with_color_info.mp4.unknown_length.dump +++ b/libraries/test_data/src/test/assets/extractordumps/mp4/sample_with_color_info.mp4.unknown_length.dump @@ -23,7 +23,7 @@ track 0: colorRange = 2 colorTransfer = 7 hdrStaticInfo = length 0, hash 0 - metadata = entries=[Creation time: 1621959711000] + metadata = entries=[mdta: key=com.apple.quicktime.location.accuracy.horizontal, value=3.754789, mdta: key=com.apple.quicktime.location.ISO6709, value=+37.7450-122.4301+066.374/, mdta: key=com.apple.quicktime.make, value=Apple, mdta: key=com.apple.quicktime.model, value=iPhone 12 Pro Max, mdta: key=com.apple.quicktime.software, value=14.5.1, mdta: key=com.apple.quicktime.creationdate, value=2021-05-25T09:21:51-0700, Creation time: 1621959711000] initializationData: data = length 526, hash 7B3FC433 sample 0: @@ -75,7 +75,7 @@ track 1: sampleRate = 44100 encoderPadding = 2204 language = und - metadata = entries=[Creation time: 1621959711000] + metadata = entries=[mdta: key=com.apple.quicktime.location.accuracy.horizontal, value=3.754789, mdta: key=com.apple.quicktime.location.ISO6709, value=+37.7450-122.4301+066.374/, mdta: key=com.apple.quicktime.make, value=Apple, mdta: key=com.apple.quicktime.model, value=iPhone 12 Pro Max, mdta: key=com.apple.quicktime.software, value=14.5.1, mdta: key=com.apple.quicktime.creationdate, value=2021-05-25T09:21:51-0700, Creation time: 1621959711000] initializationData: data = length 2, hash 5FF sample 0: diff --git a/libraries/test_data/src/test/assets/transformerdumps/mp4/sample.mp4.with_custom_metadata.dump b/libraries/test_data/src/test/assets/transformerdumps/mp4/sample.mp4.with_custom_metadata.dump new file mode 100644 index 0000000000..0134c4666c --- /dev/null +++ b/libraries/test_data/src/test/assets/transformerdumps/mp4/sample.mp4.with_custom_metadata.dump @@ -0,0 +1,339 @@ +seekMap: + isSeekable = true + duration = 1065600 + getPosition(0) = [[timeUs=0, position=44]] + getPosition(1) = [[timeUs=0, position=44]] + getPosition(532800) = [[timeUs=0, position=44]] + getPosition(1065600) = [[timeUs=0, position=44]] +numberOfTracks = 2 +track 0: + total output bytes = 89876 + sample count = 30 + format 0: + id = 1 + sampleMimeType = video/avc + codecs = avc1.64001F + maxInputSize = 36722 + width = 1080 + height = 720 + frameRate = 32.113037 + metadata = entries=[mdta: key=StringKey, value=StringValue, mdta: key=FloatKey, value=600.0, xyz: latitude=40.68, longitude=-74.5, Creation time: 1464714095000] + initializationData: + data = length 29, hash 4746B5D9 + data = length 10, hash 7A0D0F2B + sample 0: + time = 0 + flags = 1 + data = length 36692, hash D216076E + sample 1: + time = 66722 + flags = 0 + data = length 5312, hash D45D3CA0 + sample 2: + time = 33355 + flags = 0 + data = length 599, hash 1BE7812D + sample 3: + time = 200200 + flags = 0 + data = length 7735, hash 4490F110 + sample 4: + time = 133455 + flags = 0 + data = length 987, hash 560B5036 + sample 5: + time = 100100 + flags = 0 + data = length 673, hash ED7CD8C7 + sample 6: + time = 166822 + flags = 0 + data = length 523, hash 3020DF50 + sample 7: + time = 333655 + flags = 0 + data = length 6061, hash 736C72B2 + sample 8: + time = 266922 + flags = 0 + data = length 992, hash FE132F23 + sample 9: + time = 233555 + flags = 0 + data = length 623, hash 5B2C1816 + sample 10: + time = 300300 + flags = 0 + data = length 421, hash 742E69C1 + sample 11: + time = 433755 + flags = 0 + data = length 4899, hash F72F86A1 + sample 12: + time = 400400 + flags = 0 + data = length 568, hash 519A8E50 + sample 13: + time = 367022 + flags = 0 + data = length 620, hash 3990AA39 + sample 14: + time = 567222 + flags = 0 + data = length 5450, hash F06EC4AA + sample 15: + time = 500500 + flags = 0 + data = length 1051, hash 92DFA63A + sample 16: + time = 467122 + flags = 0 + data = length 874, hash 69587FB4 + sample 17: + time = 533855 + flags = 0 + data = length 781, hash 36BE495B + sample 18: + time = 700700 + flags = 0 + data = length 4725, hash AC0C8CD3 + sample 19: + time = 633955 + flags = 0 + data = length 1022, hash 5D8BFF34 + sample 20: + time = 600600 + flags = 0 + data = length 790, hash 99413A99 + sample 21: + time = 667322 + flags = 0 + data = length 610, hash 5E129290 + sample 22: + time = 834155 + flags = 0 + data = length 2751, hash 769974CB + sample 23: + time = 767422 + flags = 0 + data = length 745, hash B78A477A + sample 24: + time = 734055 + flags = 0 + data = length 621, hash CF741E7A + sample 25: + time = 800800 + flags = 0 + data = length 505, hash 1DB4894E + sample 26: + time = 967622 + flags = 0 + data = length 1268, hash C15348DC + sample 27: + time = 900900 + flags = 0 + data = length 880, hash C2DE85D0 + sample 28: + time = 867522 + flags = 0 + data = length 530, hash C98BC6A8 + sample 29: + time = 934255 + flags = 536870912 + data = length 568, hash 4FE5C8EA +track 1: + total output bytes = 9529 + sample count = 45 + format 0: + peakBitrate = 200000 + id = 2 + sampleMimeType = audio/mp4a-latm + codecs = mp4a.40.2 + maxInputSize = 294 + channelCount = 1 + sampleRate = 44100 + language = und + metadata = entries=[mdta: key=StringKey, value=StringValue, mdta: key=FloatKey, value=600.0, xyz: latitude=40.68, longitude=-74.5, Creation time: 1464714095000] + initializationData: + data = length 2, hash 5F7 + sample 0: + time = 0 + flags = 1 + data = length 23, hash 47DE9131 + sample 1: + time = 67208 + flags = 1 + data = length 6, hash 31EC5206 + sample 2: + time = 90437 + flags = 1 + data = length 148, hash 894A176B + sample 3: + time = 113645 + flags = 1 + data = length 189, hash CEF235A1 + sample 4: + time = 136875 + flags = 1 + data = length 205, hash BBF5F7B0 + sample 5: + time = 160083 + flags = 1 + data = length 210, hash F278B193 + sample 6: + time = 183312 + flags = 1 + data = length 210, hash 82DA1589 + sample 7: + time = 206520 + flags = 1 + data = length 207, hash 5BE231DF + sample 8: + time = 229750 + flags = 1 + data = length 225, hash 18819EE1 + sample 9: + time = 252958 + flags = 1 + data = length 215, hash CA7FA67B + sample 10: + time = 276187 + flags = 1 + data = length 211, hash 581A1C18 + sample 11: + time = 299416 + flags = 1 + data = length 216, hash ADB88187 + sample 12: + time = 322625 + flags = 1 + data = length 229, hash 2E8BA4DC + sample 13: + time = 345854 + flags = 1 + data = length 232, hash 22F0C510 + sample 14: + time = 369062 + flags = 1 + data = length 235, hash 867AD0DC + sample 15: + time = 392291 + flags = 1 + data = length 231, hash 84E823A8 + sample 16: + time = 415500 + flags = 1 + data = length 226, hash 1BEF3A95 + sample 17: + time = 438729 + flags = 1 + data = length 216, hash EAA345AE + sample 18: + time = 461958 + flags = 1 + data = length 229, hash 6957411F + sample 19: + time = 485166 + flags = 1 + data = length 219, hash 41275022 + sample 20: + time = 508395 + flags = 1 + data = length 241, hash 6495DF96 + sample 21: + time = 531604 + flags = 1 + data = length 228, hash 63D95906 + sample 22: + time = 554833 + flags = 1 + data = length 238, hash 34F676F9 + sample 23: + time = 578041 + flags = 1 + data = length 234, hash E5CBC045 + sample 24: + time = 601270 + flags = 1 + data = length 231, hash 5FC43661 + sample 25: + time = 624479 + flags = 1 + data = length 217, hash 682708ED + sample 26: + time = 647708 + flags = 1 + data = length 239, hash D43780FC + sample 27: + time = 670937 + flags = 1 + data = length 243, hash C5E17980 + sample 28: + time = 694145 + flags = 1 + data = length 231, hash AC5837BA + sample 29: + time = 717375 + flags = 1 + data = length 230, hash 169EE895 + sample 30: + time = 740583 + flags = 1 + data = length 238, hash C48FF3F1 + sample 31: + time = 763812 + flags = 1 + data = length 225, hash 531E4599 + sample 32: + time = 787020 + flags = 1 + data = length 232, hash CB3C6B8D + sample 33: + time = 810250 + flags = 1 + data = length 243, hash F8C94C7 + sample 34: + time = 833458 + flags = 1 + data = length 232, hash A646A7D0 + sample 35: + time = 856687 + flags = 1 + data = length 237, hash E8B787A5 + sample 36: + time = 879916 + flags = 1 + data = length 228, hash 3FA7A29F + sample 37: + time = 903125 + flags = 1 + data = length 235, hash B9B33B0A + sample 38: + time = 926354 + flags = 1 + data = length 264, hash 71A4869E + sample 39: + time = 949562 + flags = 1 + data = length 257, hash D049B54C + sample 40: + time = 972791 + flags = 1 + data = length 227, hash 66757231 + sample 41: + time = 996000 + flags = 1 + data = length 227, hash BD374F1B + sample 42: + time = 1019229 + flags = 1 + data = length 235, hash 999477F6 + sample 43: + time = 1042437 + flags = 1 + data = length 229, hash FFF98DF0 + sample 44: + time = 1065666 + flags = 536870913 + data = length 6, hash 31B22286 +tracksEnded = true diff --git a/libraries/test_data/src/test/assets/transformerdumps/mp4/sample_sef_slow_motion.mp4.dump b/libraries/test_data/src/test/assets/transformerdumps/mp4/sample_sef_slow_motion.mp4.dump index e5e01d6bc7..0a1e23e83a 100644 --- a/libraries/test_data/src/test/assets/transformerdumps/mp4/sample_sef_slow_motion.mp4.dump +++ b/libraries/test_data/src/test/assets/transformerdumps/mp4/sample_sef_slow_motion.mp4.dump @@ -6,19 +6,19 @@ format 0: width = 320 height = 240 frameRate = 29.523811 - metadata = entries=[mdta: key=com.android.capture.fps, value=240.0, smta: captureFrameRate=240.0, svcTemporalLayerCount=4, SlowMotion: segments=[Segment: startTimeMs=88, endTimeMs=879, speedDivisor=2, Segment: startTimeMs=1255, endTimeMs=1970, speedDivisor=8], Creation time: 1604060090000] + metadata = entries=[mdta: key=com.android.version, value=10, mdta: key=com.android.video.temporal_layers_count, value=4, mdta: key=com.android.capture.fps, value=240.0, smta: captureFrameRate=240.0, svcTemporalLayerCount=4, SlowMotion: segments=[Segment: startTimeMs=88, endTimeMs=879, speedDivisor=2, Segment: startTimeMs=1255, endTimeMs=1970, speedDivisor=8], Creation time: 1604060090000] initializationData: data = length 33, hash D3FB879D data = length 10, hash 7A0D0F2B -container metadata = entries=[mdta: key=com.android.capture.fps, value=240.0, smta: captureFrameRate=240.0, svcTemporalLayerCount=4, SlowMotion: segments=[Segment: startTimeMs=88, endTimeMs=879, speedDivisor=2, Segment: startTimeMs=1255, endTimeMs=1970, speedDivisor=8], Creation time: 1604060090000] +container metadata = entries=[mdta: key=com.android.version, value=10, mdta: key=com.android.video.temporal_layers_count, value=4, mdta: key=com.android.capture.fps, value=240.0, smta: captureFrameRate=240.0, svcTemporalLayerCount=4, SlowMotion: segments=[Segment: startTimeMs=88, endTimeMs=879, speedDivisor=2, Segment: startTimeMs=1255, endTimeMs=1970, speedDivisor=8], Creation time: 1604060090000] format 1: averageBitrate = 131072 sampleMimeType = audio/mp4a-latm channelCount = 2 sampleRate = 12000 pcmEncoding = 2 - metadata = entries=[smta: captureFrameRate=240.0, svcTemporalLayerCount=4, SlowMotion: segments=[Segment: startTimeMs=88, endTimeMs=879, speedDivisor=2, Segment: startTimeMs=1255, endTimeMs=1970, speedDivisor=8], Creation time: 1604060090000] -container metadata = entries=[smta: captureFrameRate=240.0, svcTemporalLayerCount=4, SlowMotion: segments=[Segment: startTimeMs=88, endTimeMs=879, speedDivisor=2, Segment: startTimeMs=1255, endTimeMs=1970, speedDivisor=8], Creation time: 1604060090000] + metadata = entries=[mdta: key=com.android.version, value=10, mdta: key=com.android.video.temporal_layers_count, value=4, smta: captureFrameRate=240.0, svcTemporalLayerCount=4, SlowMotion: segments=[Segment: startTimeMs=88, endTimeMs=879, speedDivisor=2, Segment: startTimeMs=1255, endTimeMs=1970, speedDivisor=8], Creation time: 1604060090000] +container metadata = entries=[mdta: key=com.android.version, value=10, mdta: key=com.android.video.temporal_layers_count, value=4, smta: captureFrameRate=240.0, svcTemporalLayerCount=4, SlowMotion: segments=[Segment: startTimeMs=88, endTimeMs=879, speedDivisor=2, Segment: startTimeMs=1255, endTimeMs=1970, speedDivisor=8], Creation time: 1604060090000] sample: trackIndex = 1 dataHashCode = -212376212 diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/InAppMuxer.java b/libraries/transformer/src/main/java/androidx/media3/transformer/InAppMuxer.java index 25e6aaea1d..24aa12329e 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/InAppMuxer.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/InAppMuxer.java @@ -24,6 +24,7 @@ import androidx.media3.common.Format; import androidx.media3.common.Metadata; import androidx.media3.common.MimeTypes; import androidx.media3.common.util.UnstableApi; +import androidx.media3.common.util.Util; import androidx.media3.container.CreationTime; import androidx.media3.container.MdtaMetadataEntry; import androidx.media3.container.Mp4LocationData; @@ -186,8 +187,11 @@ public final class InAppMuxer implements Muxer { || entry instanceof XmpData || entry instanceof CreationTime || (entry instanceof MdtaMetadataEntry - && ((MdtaMetadataEntry) entry) - .key.equals(MdtaMetadataEntry.KEY_ANDROID_CAPTURE_FPS))) { + && (((MdtaMetadataEntry) entry).key.equals(MdtaMetadataEntry.KEY_ANDROID_CAPTURE_FPS) + || ((MdtaMetadataEntry) entry).typeIndicator + == MdtaMetadataEntry.TYPE_INDICATOR_STRING + || ((MdtaMetadataEntry) entry).typeIndicator + == MdtaMetadataEntry.TYPE_INDICATOR_FLOAT32))) { metadataEntries.add(entry); } } @@ -232,6 +236,10 @@ public final class InAppMuxer implements Muxer { if (mdtaMetadataEntry.key.equals(MdtaMetadataEntry.KEY_ANDROID_CAPTURE_FPS)) { byte[] captureFps = mdtaMetadataEntry.value; mp4Muxer.setCaptureFps(ByteBuffer.wrap(captureFps).getFloat()); + } else if (mdtaMetadataEntry.typeIndicator == MdtaMetadataEntry.TYPE_INDICATOR_STRING) { + mp4Muxer.addMetadata(mdtaMetadataEntry.key, Util.fromUtf8Bytes(mdtaMetadataEntry.value)); + } else if (mdtaMetadataEntry.typeIndicator == MdtaMetadataEntry.TYPE_INDICATOR_FLOAT32) { + mp4Muxer.addMetadata(mdtaMetadataEntry.key, Util.toFloat(mdtaMetadataEntry.value)); } else { throw new IllegalStateException("Unsupported MdtaMetadataEntry " + mdtaMetadataEntry.key); } diff --git a/libraries/transformer/src/test/java/androidx/media3/transformer/TransformerWithInAppMuxerEndToEndTest.java b/libraries/transformer/src/test/java/androidx/media3/transformer/TransformerWithInAppMuxerEndToEndTest.java index 087f506bd7..e2a0939a02 100644 --- a/libraries/transformer/src/test/java/androidx/media3/transformer/TransformerWithInAppMuxerEndToEndTest.java +++ b/libraries/transformer/src/test/java/androidx/media3/transformer/TransformerWithInAppMuxerEndToEndTest.java @@ -116,13 +116,13 @@ public class TransformerWithInAppMuxerEndToEndTest { new InAppMuxer.Factory( DefaultMuxer.Factory.DEFAULT_MAX_DELAY_BETWEEN_SAMPLES_MS, metadataEntries -> { - byte[] captureFps = new byte[] {66, 112, 0, 0}; // 60.0f + float captureFps = 60.0f; metadataEntries.add( new MdtaMetadataEntry( MdtaMetadataEntry.KEY_ANDROID_CAPTURE_FPS, - /* value= */ captureFps, + /* value= */ Util.toByteArray(captureFps), /* localeIndicator= */ 0, - MdtaMetadataEntry.TYPE_INDICATOR_FLOAT)); + MdtaMetadataEntry.TYPE_INDICATOR_FLOAT32)); }); Transformer transformer = new Transformer.Builder(context) @@ -166,4 +166,46 @@ public class TransformerWithInAppMuxerEndToEndTest { DumpFileAsserts.assertOutput( context, fakeExtractorOutput, TestUtil.getDumpFileName(H264_MP4 + ".with_creation_time")); } + + @Test + public void transmux_withCustomeMetadata_outputMatchesExpected() throws Exception { + Muxer.Factory inAppMuxerFactory = + new InAppMuxer.Factory( + DefaultMuxer.Factory.DEFAULT_MAX_DELAY_BETWEEN_SAMPLES_MS, + metadataEntries -> { + String stringKey = "StringKey"; + String stringValue = "StringValue"; + metadataEntries.add( + new MdtaMetadataEntry( + stringKey, + Util.getUtf8Bytes(stringValue), + /* localeIndicator= */ 0, + MdtaMetadataEntry.TYPE_INDICATOR_STRING)); + String floatKey = "FloatKey"; + float floatValue = 600.0f; + metadataEntries.add( + new MdtaMetadataEntry( + floatKey, + Util.toByteArray(floatValue), + /* localeIndicator= */ 0, + MdtaMetadataEntry.TYPE_INDICATOR_FLOAT32)); + }); + Transformer transformer = + new Transformer.Builder(context) + .setClock(new FakeClock(/* isAutoAdvancing= */ true)) + .setMuxerFactory(inAppMuxerFactory) + .build(); + MediaItem mediaItem = MediaItem.fromUri(Uri.parse(MP4_FILE_ASSET_DIRECTORY + H264_MP4)); + + transformer.start(mediaItem, outputPath); + TransformerTestRunner.runLooper(transformer); + + FakeExtractorOutput fakeExtractorOutput = + androidx.media3.test.utils.TestUtil.extractAllSamplesFromFilePath( + new Mp4Extractor(), checkNotNull(outputPath)); + // [mdta: key=StringKey, value=StringValue, mdta: key=FloatKey, value=600.0] in track metadata + // dump + DumpFileAsserts.assertOutput( + context, fakeExtractorOutput, TestUtil.getDumpFileName(H264_MP4 + ".with_custom_metadata")); + } }