Add support for adding capture FPS via transformer

PiperOrigin-RevId: 534814892
This commit is contained in:
sheenachhabra 2023-05-24 14:38:59 +01:00 committed by tonihei
parent 71facd825e
commit a944ffecb9
11 changed files with 401 additions and 10 deletions

View File

@ -21,6 +21,7 @@ import androidx.annotation.Nullable;
import androidx.media3.common.Metadata;
import androidx.media3.common.util.UnstableApi;
import androidx.media3.common.util.Util;
import java.nio.ByteBuffer;
import java.util.Arrays;
/**
@ -33,6 +34,8 @@ public final class MdtaMetadataEntry implements Metadata.Entry {
/** Key for the capture frame rate (in frames per second). */
public static final String KEY_ANDROID_CAPTURE_FPS = "com.android.capture.fps";
public static final int TYPE_INDICATOR_FLOAT = 23;
/** The metadata key name. */
public final String key;
/** The payload. The interpretation of the value depends on {@link #typeIndicator}. */
@ -84,7 +87,11 @@ public final class MdtaMetadataEntry implements Metadata.Entry {
@Override
public String toString() {
return "mdta: key=" + key + ", value=" + Util.toHexString(value);
String formattedValue =
typeIndicator == TYPE_INDICATOR_FLOAT
? Float.toString(ByteBuffer.wrap(value).getFloat())
: Util.toHexString(value);
return "mdta: key=" + key + ", value=" + formattedValue;
}
// Parcelable implementation.

View File

@ -22,7 +22,7 @@ track 0:
colorRange = 1
colorTransfer = -1
hdrStaticInfo = length 0, hash 0
metadata = entries=[mdta: key=com.android.capture.fps, value=43700000]
metadata = entries=[mdta: key=com.android.capture.fps, value=240.0]
initializationData:
data = length 22, hash 4CF81805
data = length 9, hash FBAFBA1C

View File

@ -22,7 +22,7 @@ track 0:
colorRange = 1
colorTransfer = -1
hdrStaticInfo = length 0, hash 0
metadata = entries=[mdta: key=com.android.capture.fps, value=43700000]
metadata = entries=[mdta: key=com.android.capture.fps, value=240.0]
initializationData:
data = length 22, hash 4CF81805
data = length 9, hash FBAFBA1C

View File

@ -22,7 +22,7 @@ track 0:
colorRange = 1
colorTransfer = -1
hdrStaticInfo = length 0, hash 0
metadata = entries=[mdta: key=com.android.capture.fps, value=43700000]
metadata = entries=[mdta: key=com.android.capture.fps, value=240.0]
initializationData:
data = length 22, hash 4CF81805
data = length 9, hash FBAFBA1C

View File

@ -22,7 +22,7 @@ track 0:
colorRange = 1
colorTransfer = -1
hdrStaticInfo = length 0, hash 0
metadata = entries=[mdta: key=com.android.capture.fps, value=43700000]
metadata = entries=[mdta: key=com.android.capture.fps, value=240.0]
initializationData:
data = length 22, hash 4CF81805
data = length 9, hash FBAFBA1C

View File

@ -22,7 +22,7 @@ track 0:
colorRange = 1
colorTransfer = -1
hdrStaticInfo = length 0, hash 0
metadata = entries=[mdta: key=com.android.capture.fps, value=43700000]
metadata = entries=[mdta: key=com.android.capture.fps, value=240.0]
initializationData:
data = length 22, hash 4CF81805
data = length 9, hash FBAFBA1C

View File

@ -21,7 +21,7 @@ track 0:
colorRange = 1
colorTransfer = -1
hdrStaticInfo = length 0, hash 0
metadata = entries=[mdta: key=com.android.capture.fps, value=42f00000]
metadata = entries=[mdta: key=com.android.capture.fps, value=120.0]
initializationData:
data = length 28, hash 410B510
data = length 9, hash FBADD682

View File

@ -0,0 +1,339 @@
seekMap:
isSeekable = true
duration = 1065600
getPosition(0) = [[timeUs=0, position=44]]
getPosition(1) = [[timeUs=0, position=44]]
getPosition(532800) = [[timeUs=0, position=44]]
getPosition(1065600) = [[timeUs=0, position=44]]
numberOfTracks = 2
track 0:
total output bytes = 89876
sample count = 30
format 0:
id = 1
sampleMimeType = video/avc
codecs = avc1.64001F
maxInputSize = 36722
width = 1080
height = 720
frameRate = 32.113037
metadata = entries=[mdta: key=com.android.capture.fps, value=60.0, xyz: latitude=40.68, longitude=-74.5]
initializationData:
data = length 29, hash 4746B5D9
data = length 10, hash 7A0D0F2B
sample 0:
time = 0
flags = 1
data = length 36692, hash D216076E
sample 1:
time = 66722
flags = 0
data = length 5312, hash D45D3CA0
sample 2:
time = 33355
flags = 0
data = length 599, hash 1BE7812D
sample 3:
time = 200200
flags = 0
data = length 7735, hash 4490F110
sample 4:
time = 133455
flags = 0
data = length 987, hash 560B5036
sample 5:
time = 100100
flags = 0
data = length 673, hash ED7CD8C7
sample 6:
time = 166822
flags = 0
data = length 523, hash 3020DF50
sample 7:
time = 333655
flags = 0
data = length 6061, hash 736C72B2
sample 8:
time = 266922
flags = 0
data = length 992, hash FE132F23
sample 9:
time = 233555
flags = 0
data = length 623, hash 5B2C1816
sample 10:
time = 300300
flags = 0
data = length 421, hash 742E69C1
sample 11:
time = 433755
flags = 0
data = length 4899, hash F72F86A1
sample 12:
time = 400400
flags = 0
data = length 568, hash 519A8E50
sample 13:
time = 367022
flags = 0
data = length 620, hash 3990AA39
sample 14:
time = 567222
flags = 0
data = length 5450, hash F06EC4AA
sample 15:
time = 500500
flags = 0
data = length 1051, hash 92DFA63A
sample 16:
time = 467122
flags = 0
data = length 874, hash 69587FB4
sample 17:
time = 533855
flags = 0
data = length 781, hash 36BE495B
sample 18:
time = 700700
flags = 0
data = length 4725, hash AC0C8CD3
sample 19:
time = 633955
flags = 0
data = length 1022, hash 5D8BFF34
sample 20:
time = 600600
flags = 0
data = length 790, hash 99413A99
sample 21:
time = 667322
flags = 0
data = length 610, hash 5E129290
sample 22:
time = 834155
flags = 0
data = length 2751, hash 769974CB
sample 23:
time = 767422
flags = 0
data = length 745, hash B78A477A
sample 24:
time = 734055
flags = 0
data = length 621, hash CF741E7A
sample 25:
time = 800800
flags = 0
data = length 505, hash 1DB4894E
sample 26:
time = 967622
flags = 0
data = length 1268, hash C15348DC
sample 27:
time = 900900
flags = 0
data = length 880, hash C2DE85D0
sample 28:
time = 867522
flags = 0
data = length 530, hash C98BC6A8
sample 29:
time = 934255
flags = 536870912
data = length 568, hash 4FE5C8EA
track 1:
total output bytes = 9529
sample count = 45
format 0:
peakBitrate = 200000
id = 2
sampleMimeType = audio/mp4a-latm
codecs = mp4a.40.2
maxInputSize = 294
channelCount = 1
sampleRate = 44100
language = und
metadata = entries=[xyz: latitude=40.68, longitude=-74.5]
initializationData:
data = length 2, hash 5F7
sample 0:
time = 0
flags = 1
data = length 23, hash 47DE9131
sample 1:
time = 67208
flags = 1
data = length 6, hash 31EC5206
sample 2:
time = 90437
flags = 1
data = length 148, hash 894A176B
sample 3:
time = 113645
flags = 1
data = length 189, hash CEF235A1
sample 4:
time = 136875
flags = 1
data = length 205, hash BBF5F7B0
sample 5:
time = 160083
flags = 1
data = length 210, hash F278B193
sample 6:
time = 183312
flags = 1
data = length 210, hash 82DA1589
sample 7:
time = 206520
flags = 1
data = length 207, hash 5BE231DF
sample 8:
time = 229750
flags = 1
data = length 225, hash 18819EE1
sample 9:
time = 252958
flags = 1
data = length 215, hash CA7FA67B
sample 10:
time = 276187
flags = 1
data = length 211, hash 581A1C18
sample 11:
time = 299416
flags = 1
data = length 216, hash ADB88187
sample 12:
time = 322625
flags = 1
data = length 229, hash 2E8BA4DC
sample 13:
time = 345854
flags = 1
data = length 232, hash 22F0C510
sample 14:
time = 369062
flags = 1
data = length 235, hash 867AD0DC
sample 15:
time = 392291
flags = 1
data = length 231, hash 84E823A8
sample 16:
time = 415500
flags = 1
data = length 226, hash 1BEF3A95
sample 17:
time = 438729
flags = 1
data = length 216, hash EAA345AE
sample 18:
time = 461958
flags = 1
data = length 229, hash 6957411F
sample 19:
time = 485166
flags = 1
data = length 219, hash 41275022
sample 20:
time = 508395
flags = 1
data = length 241, hash 6495DF96
sample 21:
time = 531604
flags = 1
data = length 228, hash 63D95906
sample 22:
time = 554833
flags = 1
data = length 238, hash 34F676F9
sample 23:
time = 578041
flags = 1
data = length 234, hash E5CBC045
sample 24:
time = 601270
flags = 1
data = length 231, hash 5FC43661
sample 25:
time = 624479
flags = 1
data = length 217, hash 682708ED
sample 26:
time = 647708
flags = 1
data = length 239, hash D43780FC
sample 27:
time = 670937
flags = 1
data = length 243, hash C5E17980
sample 28:
time = 694145
flags = 1
data = length 231, hash AC5837BA
sample 29:
time = 717375
flags = 1
data = length 230, hash 169EE895
sample 30:
time = 740583
flags = 1
data = length 238, hash C48FF3F1
sample 31:
time = 763812
flags = 1
data = length 225, hash 531E4599
sample 32:
time = 787020
flags = 1
data = length 232, hash CB3C6B8D
sample 33:
time = 810250
flags = 1
data = length 243, hash F8C94C7
sample 34:
time = 833458
flags = 1
data = length 232, hash A646A7D0
sample 35:
time = 856687
flags = 1
data = length 237, hash E8B787A5
sample 36:
time = 879916
flags = 1
data = length 228, hash 3FA7A29F
sample 37:
time = 903125
flags = 1
data = length 235, hash B9B33B0A
sample 38:
time = 926354
flags = 1
data = length 264, hash 71A4869E
sample 39:
time = 949562
flags = 1
data = length 257, hash D049B54C
sample 40:
time = 972791
flags = 1
data = length 227, hash 66757231
sample 41:
time = 996000
flags = 1
data = length 227, hash BD374F1B
sample 42:
time = 1019229
flags = 1
data = length 235, hash 999477F6
sample 43:
time = 1042437
flags = 1
data = length 229, hash FFF98DF0
sample 44:
time = 1065666
flags = 536870913
data = length 6, hash 31B22286
tracksEnded = true

View File

@ -6,11 +6,11 @@ format 0:
width = 320
height = 240
frameRate = 29.523811
metadata = entries=[mdta: key=com.android.capture.fps, value=43700000, smta: captureFrameRate=240.0, svcTemporalLayerCount=4, SlowMotion: segments=[Segment: startTimeMs=88, endTimeMs=879, speedDivisor=2, Segment: startTimeMs=1255, endTimeMs=1970, speedDivisor=8]]
metadata = entries=[mdta: key=com.android.capture.fps, value=240.0, smta: captureFrameRate=240.0, svcTemporalLayerCount=4, SlowMotion: segments=[Segment: startTimeMs=88, endTimeMs=879, speedDivisor=2, Segment: startTimeMs=1255, endTimeMs=1970, speedDivisor=8]]
initializationData:
data = length 33, hash D3FB879D
data = length 10, hash 7A0D0F2B
container metadata = entries=[mdta: key=com.android.capture.fps, value=43700000, smta: captureFrameRate=240.0, svcTemporalLayerCount=4, SlowMotion: segments=[Segment: startTimeMs=88, endTimeMs=879, speedDivisor=2, Segment: startTimeMs=1255, endTimeMs=1970, speedDivisor=8]]
container metadata = entries=[mdta: key=com.android.capture.fps, value=240.0, smta: captureFrameRate=240.0, svcTemporalLayerCount=4, SlowMotion: segments=[Segment: startTimeMs=88, endTimeMs=879, speedDivisor=2, Segment: startTimeMs=1255, endTimeMs=1970, speedDivisor=8]]
format 1:
averageBitrate = 131072
sampleMimeType = audio/mp4a-latm

View File

@ -24,6 +24,7 @@ import androidx.media3.common.Format;
import androidx.media3.common.Metadata;
import androidx.media3.common.MimeTypes;
import androidx.media3.common.util.UnstableApi;
import androidx.media3.container.MdtaMetadataEntry;
import androidx.media3.container.Mp4LocationData;
import androidx.media3.container.XmpData;
import androidx.media3.muxer.Mp4Muxer;
@ -57,6 +58,7 @@ public final class InAppMuxer implements Muxer {
* <ul>
* <li>{@link Mp4LocationData}
* <li>{@link XmpData}
* <li>{@link MdtaMetadataEntry}
* </ul>
*/
void updateMetadataEntries(Set<Metadata.Entry> metadataEntries);
@ -179,7 +181,9 @@ public final class InAppMuxer implements Muxer {
Metadata.Entry entry = metadata.get(i);
// Keep only supported metadata.
// LINT.IfChange(added_metadata)
if (entry instanceof Mp4LocationData || entry instanceof XmpData) {
if (entry instanceof Mp4LocationData
|| entry instanceof XmpData
|| entry instanceof MdtaMetadataEntry) {
metadataEntries.add(entry);
}
}
@ -216,6 +220,14 @@ public final class InAppMuxer implements Muxer {
((Mp4LocationData) entry).latitude, ((Mp4LocationData) entry).longitude);
} else if (entry instanceof XmpData) {
mp4Muxer.addXmp(ByteBuffer.wrap(((XmpData) entry).data));
} else if (entry instanceof MdtaMetadataEntry) {
MdtaMetadataEntry mdtaMetadataEntry = (MdtaMetadataEntry) entry;
if (mdtaMetadataEntry.key.equals(MdtaMetadataEntry.KEY_ANDROID_CAPTURE_FPS)) {
byte[] captureFps = mdtaMetadataEntry.value;
mp4Muxer.setCaptureFps(ByteBuffer.wrap(captureFps).getFloat());
} else {
throw new IllegalStateException("Unsupported MdtaMetadataEntry " + mdtaMetadataEntry.key);
}
} else {
throw new IllegalStateException("Unsupported Metadata.Entry " + entry.getClass().getName());
}

View File

@ -23,6 +23,7 @@ import android.net.Uri;
import androidx.media3.common.MediaItem;
import androidx.media3.common.Metadata;
import androidx.media3.common.util.Util;
import androidx.media3.container.MdtaMetadataEntry;
import androidx.media3.container.Mp4LocationData;
import androidx.media3.container.XmpData;
import androidx.media3.extractor.mp4.Mp4Extractor;
@ -107,4 +108,36 @@ public class TransformerWithInAppMuxerEndToEndTest {
// TODO(b/270956881): Use FakeExtractorOutput once it starts dumping uuid box.
assertThat(exportResult.exportException).isNull();
}
@Test
public void transmux_withCaptureFps_outputMatchedExpected() throws Exception {
Muxer.Factory inAppMuxerFactory =
new InAppMuxer.Factory(
DefaultMuxer.Factory.DEFAULT_MAX_DELAY_BETWEEN_SAMPLES_MS,
metadataEntries -> {
byte[] captureFps = new byte[] {66, 112, 0, 0}; // 60.0f
metadataEntries.add(
new MdtaMetadataEntry(
MdtaMetadataEntry.KEY_ANDROID_CAPTURE_FPS,
/* value= */ captureFps,
/* localeIndicator= */ 0,
MdtaMetadataEntry.TYPE_INDICATOR_FLOAT));
});
Transformer transformer =
new Transformer.Builder(context)
.setClock(new FakeClock(/* isAutoAdvancing= */ true))
.setMuxerFactory(inAppMuxerFactory)
.build();
MediaItem mediaItem = MediaItem.fromUri(Uri.parse(MP4_FILE_ASSET_DIRECTORY + H264_MP4));
transformer.start(mediaItem, outputPath);
TransformerTestRunner.runLooper(transformer);
FakeExtractorOutput fakeExtractorOutput =
androidx.media3.test.utils.TestUtil.extractAllSamplesFromFilePath(
new Mp4Extractor(), checkNotNull(outputPath));
// [mdta: key=com.android.capture.fps, value=60.0] in video track metadata dump.
DumpFileAsserts.assertOutput(
context, fakeExtractorOutput, TestUtil.getDumpFileName(H264_MP4 + ".with_capture_fps"));
}
}