From 7b9aa873440c982045f5250288b7e53623f2267c Mon Sep 17 00:00:00 2001 From: andrewlewis Date: Fri, 17 Nov 2023 08:06:57 -0800 Subject: [PATCH] Allow allocating more buffers when transmuxing When transmuxing, the `EncodedSampleExporter` maintains a queue of input buffers that get filled with encoded data by the asset loader. The number of buffers was limited to avoid using more and more memory if producer (asset loader) gets far ahead of the consumer (exporter). Previously this limit was fixed at 10 buffers, but increasing the number of buffers can make some transmux operations much faster. Allow allocating between a min and max number of buffers, and also set a target allocation size beyond which new buffers can't be allocated. This allows audio formats which require many small buffers to be processed more quickly, while preventing allocating too much memory for hypothetical very high bitrate formats. 'Remove video' edits on local videos in particular get much faster, because audio buffers are very short and there are lots of them. With a sample 10 minute video, a 'remove video' edit took 2 seconds (36 seconds before this change). With a sample 1 minute removing video took 0.25 seconds after this change (2.5 seconds before). The speed improvement is smaller for other types of edits that retain the video track. Transmuxing a 10 minute video retaining the video track took 26 seconds (40 seconds before). PiperOrigin-RevId: 583390284 --- RELEASENOTES.md | 1 + .../highPitch_silenceHighPitch.dump | 60 +++++--- .../sowt-with-video.mov/original_silence.dump | 40 ++++-- .../mp4/sowt-with-video.mov/silence.dump | 60 +++++--- .../silenceHighPitch_highPitch.dump | 60 +++++--- .../silenceHighPitch_silence.dump | 120 ++++++++++------ .../silenceHighPitch_silenceHighPitch.dump | 120 ++++++++++------ .../sowt-with-video.mov/silence_original.dump | 60 +++++--- .../sowt-with-video.mov/silence_silence.dump | 120 ++++++++++------ .../silence_silenceHighPitch.dump | 120 ++++++++++------ .../transformer/EncodedSampleExporter.java | 57 ++++++-- .../EncodedSampleExporterTest.java | 134 ++++++++++++++++++ 12 files changed, 676 insertions(+), 276 deletions(-) create mode 100644 libraries/transformer/src/test/java/androidx/media3/transformer/EncodedSampleExporterTest.java diff --git a/RELEASENOTES.md b/RELEASENOTES.md index 2b4433eb10..f7f37238f3 100644 --- a/RELEASENOTES.md +++ b/RELEASENOTES.md @@ -24,6 +24,7 @@ by default with null `ImageOutput` and `ImageDecoder.Factory.DEFAULT`. * Transformer: * Add support for flattening H.265/HEVC SEF slow motion videos. + * Increase transmuxing speed, especially for 'remove video' edits. * Track Selection: * Add `DefaultTrackSelector.selectImageTrack` to enable image track selection. diff --git a/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/highPitch_silenceHighPitch.dump b/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/highPitch_silenceHighPitch.dump index 8bd7fca868..250c53c3f7 100644 --- a/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/highPitch_silenceHighPitch.dump +++ b/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/highPitch_silenceHighPitch.dump @@ -932,16 +932,16 @@ sample: presentationTimeUs = 3145020 sample: trackType = audio - dataHashCode = -738966527 - size = 8160 + dataHashCode = 202646529 + size = 4320 isKeyFrame = true presentationTimeUs = 3165020 sample: trackType = audio - dataHashCode = 202646529 - size = 4320 + dataHashCode = -738966527 + size = 8160 isKeyFrame = true - presentationTimeUs = 3207520 + presentationTimeUs = 3187520 sample: trackType = audio dataHashCode = -1017110527 @@ -986,10 +986,16 @@ sample: presentationTimeUs = 3357520 sample: trackType = audio - dataHashCode = -738966527 - size = 8160 + dataHashCode = 202646529 + size = 4320 isKeyFrame = true presentationTimeUs = 3377520 +sample: + trackType = audio + dataHashCode = -1017110527 + size = 3840 + isKeyFrame = true + presentationTimeUs = 3400020 sample: trackType = audio dataHashCode = 202646529 @@ -1028,10 +1034,16 @@ sample: presentationTimeUs = 3527520 sample: trackType = audio - dataHashCode = -738966527 - size = 8160 + dataHashCode = -1017110527 + size = 3840 isKeyFrame = true presentationTimeUs = 3550020 +sample: + trackType = audio + dataHashCode = 202646529 + size = 4320 + isKeyFrame = true + presentationTimeUs = 3570020 sample: trackType = audio dataHashCode = -1017110527 @@ -1058,16 +1070,10 @@ sample: presentationTimeUs = 3655020 sample: trackType = audio - dataHashCode = -1017110527 - size = 3840 + dataHashCode = -738966527 + size = 8160 isKeyFrame = true presentationTimeUs = 3677520 -sample: - trackType = audio - dataHashCode = 202646529 - size = 4320 - isKeyFrame = true - presentationTimeUs = 3697520 sample: trackType = audio dataHashCode = -1017110527 @@ -1088,10 +1094,16 @@ sample: presentationTimeUs = 3762520 sample: trackType = audio - dataHashCode = -738966527 - size = 8160 + dataHashCode = -1017110527 + size = 3840 isKeyFrame = true presentationTimeUs = 3785020 +sample: + trackType = audio + dataHashCode = 202646529 + size = 4320 + isKeyFrame = true + presentationTimeUs = 3805020 sample: trackType = audio dataHashCode = -1017110527 @@ -1130,10 +1142,16 @@ sample: presentationTimeUs = 3932520 sample: trackType = audio - dataHashCode = -422154111 - size = 8636 + dataHashCode = 304617473 + size = 2880 isKeyFrame = true presentationTimeUs = 3955020 +sample: + trackType = audio + dataHashCode = -933078911 + size = 5756 + isKeyFrame = true + presentationTimeUs = 3970020 sample: trackType = audio dataHashCode = -1807454463 diff --git a/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/original_silence.dump b/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/original_silence.dump index ce81e5d518..f50119419f 100644 --- a/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/original_silence.dump +++ b/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/original_silence.dump @@ -1016,10 +1016,16 @@ sample: presentationTimeUs = 3500000 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 3536000 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 3557333 sample: trackType = audio dataHashCode = 1742602241 @@ -1058,16 +1064,10 @@ sample: presentationTimeUs = 3685333 sample: trackType = audio - dataHashCode = 1742602241 - size = 4096 + dataHashCode = -809762815 + size = 8192 isKeyFrame = true presentationTimeUs = 3706666 -sample: - trackType = audio - dataHashCode = 1742602241 - size = 4096 - isKeyFrame = true - presentationTimeUs = 3728000 sample: trackType = audio dataHashCode = 1742602241 @@ -1082,10 +1082,16 @@ sample: presentationTimeUs = 3770666 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 3792000 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 3813333 sample: trackType = audio dataHashCode = 1742602241 @@ -1118,10 +1124,16 @@ sample: presentationTimeUs = 3920000 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 3941333 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 3962666 sample: trackType = audio dataHashCode = -303661055 diff --git a/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silence.dump b/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silence.dump index 02d82b9662..96db2021ae 100644 --- a/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silence.dump +++ b/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silence.dump @@ -324,10 +324,16 @@ sample: presentationTimeUs = 1160997 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 1184217 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 1207437 sample: trackType = audio dataHashCode = 1742602241 @@ -348,16 +354,10 @@ sample: presentationTimeUs = 1277097 sample: trackType = audio - dataHashCode = 1742602241 - size = 4096 + dataHashCode = -809762815 + size = 8192 isKeyFrame = true presentationTimeUs = 1300317 -sample: - trackType = audio - dataHashCode = 1742602241 - size = 4096 - isKeyFrame = true - presentationTimeUs = 1323537 sample: trackType = audio dataHashCode = 1742602241 @@ -378,10 +378,16 @@ sample: presentationTimeUs = 1393197 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 1416417 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 1439637 sample: trackType = audio dataHashCode = 1742602241 @@ -414,10 +420,16 @@ sample: presentationTimeUs = 1555736 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 1578956 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 1602176 sample: trackType = audio dataHashCode = 1742602241 @@ -462,16 +474,16 @@ sample: presentationTimeUs = 1764716 sample: trackType = audio - dataHashCode = 1742602241 - size = 4096 + dataHashCode = -809762815 + size = 8192 isKeyFrame = true presentationTimeUs = 1787936 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true - presentationTimeUs = 1811156 + presentationTimeUs = 1834376 sample: trackType = audio dataHashCode = 1742602241 @@ -504,10 +516,16 @@ sample: presentationTimeUs = 1950476 sample: trackType = audio - dataHashCode = -474907647 - size = 4640 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 1973696 +sample: + trackType = audio + dataHashCode = -204243967 + size = 544 + isKeyFrame = true + presentationTimeUs = 1996916 sample: trackType = video dataHashCode = 33691269 diff --git a/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silenceHighPitch_highPitch.dump b/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silenceHighPitch_highPitch.dump index 8708b2baa8..1894a83a45 100644 --- a/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silenceHighPitch_highPitch.dump +++ b/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silenceHighPitch_highPitch.dump @@ -324,10 +324,16 @@ sample: presentationTimeUs = 1154761 sample: trackType = audio - dataHashCode = 644553473 - size = 8360 + dataHashCode = -1264223743 + size = 4400 isKeyFrame = true presentationTimeUs = 1177210 +sample: + trackType = audio + dataHashCode = 571187457 + size = 3960 + isKeyFrame = true + presentationTimeUs = 1202154 sample: trackType = audio dataHashCode = 571187457 @@ -348,16 +354,10 @@ sample: presentationTimeUs = 1269501 sample: trackType = audio - dataHashCode = 571187457 - size = 3960 + dataHashCode = 2136490497 + size = 7920 isKeyFrame = true presentationTimeUs = 1294444 -sample: - trackType = audio - dataHashCode = 571187457 - size = 3960 - isKeyFrame = true - presentationTimeUs = 1316893 sample: trackType = audio dataHashCode = -1264223743 @@ -378,10 +378,16 @@ sample: presentationTimeUs = 1386712 sample: trackType = audio - dataHashCode = 644553473 - size = 8360 + dataHashCode = -1264223743 + size = 4400 isKeyFrame = true presentationTimeUs = 1409160 +sample: + trackType = audio + dataHashCode = 571187457 + size = 3960 + isKeyFrame = true + presentationTimeUs = 1434104 sample: trackType = audio dataHashCode = 571187457 @@ -414,10 +420,16 @@ sample: presentationTimeUs = 1548843 sample: trackType = audio - dataHashCode = 2136490497 - size = 7920 + dataHashCode = 571187457 + size = 3960 isKeyFrame = true presentationTimeUs = 1573786 +sample: + trackType = audio + dataHashCode = 571187457 + size = 3960 + isKeyFrame = true + presentationTimeUs = 1596235 sample: trackType = audio dataHashCode = -448902783 @@ -462,16 +474,16 @@ sample: presentationTimeUs = 1758344 sample: trackType = audio - dataHashCode = -1264223743 - size = 4400 + dataHashCode = 644553473 + size = 8360 isKeyFrame = true presentationTimeUs = 1780793 sample: trackType = audio - dataHashCode = 2136490497 - size = 7920 + dataHashCode = 571187457 + size = 3960 isKeyFrame = true - presentationTimeUs = 1805736 + presentationTimeUs = 1828185 sample: trackType = audio dataHashCode = -122903935 @@ -504,10 +516,16 @@ sample: presentationTimeUs = 1942902 sample: trackType = audio - dataHashCode = 359941761 - size = 5644 + dataHashCode = -1759454975 + size = 440 isKeyFrame = true presentationTimeUs = 1967845 +sample: + trackType = audio + dataHashCode = -1409159807 + size = 5204 + isKeyFrame = true + presentationTimeUs = 1970340 sample: trackType = audio dataHashCode = -752508258 diff --git a/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silenceHighPitch_silence.dump b/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silenceHighPitch_silence.dump index 29b54f0ffc..86c669197d 100644 --- a/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silenceHighPitch_silence.dump +++ b/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silenceHighPitch_silence.dump @@ -324,10 +324,16 @@ sample: presentationTimeUs = 1154761 sample: trackType = audio - dataHashCode = 644553473 - size = 8360 + dataHashCode = -1264223743 + size = 4400 isKeyFrame = true presentationTimeUs = 1177210 +sample: + trackType = audio + dataHashCode = 571187457 + size = 3960 + isKeyFrame = true + presentationTimeUs = 1202154 sample: trackType = audio dataHashCode = 571187457 @@ -348,16 +354,10 @@ sample: presentationTimeUs = 1269501 sample: trackType = audio - dataHashCode = 571187457 - size = 3960 + dataHashCode = 2136490497 + size = 7920 isKeyFrame = true presentationTimeUs = 1294444 -sample: - trackType = audio - dataHashCode = 571187457 - size = 3960 - isKeyFrame = true - presentationTimeUs = 1316893 sample: trackType = audio dataHashCode = -1264223743 @@ -378,10 +378,16 @@ sample: presentationTimeUs = 1386712 sample: trackType = audio - dataHashCode = 644553473 - size = 8360 + dataHashCode = -1264223743 + size = 4400 isKeyFrame = true presentationTimeUs = 1409160 +sample: + trackType = audio + dataHashCode = 571187457 + size = 3960 + isKeyFrame = true + presentationTimeUs = 1434104 sample: trackType = audio dataHashCode = 571187457 @@ -414,10 +420,16 @@ sample: presentationTimeUs = 1548843 sample: trackType = audio - dataHashCode = 2136490497 - size = 7920 + dataHashCode = 571187457 + size = 3960 isKeyFrame = true presentationTimeUs = 1573786 +sample: + trackType = audio + dataHashCode = 571187457 + size = 3960 + isKeyFrame = true + presentationTimeUs = 1596235 sample: trackType = audio dataHashCode = -448902783 @@ -462,16 +474,16 @@ sample: presentationTimeUs = 1758344 sample: trackType = audio - dataHashCode = -1264223743 - size = 4400 + dataHashCode = 644553473 + size = 8360 isKeyFrame = true presentationTimeUs = 1780793 sample: trackType = audio - dataHashCode = 2136490497 - size = 7920 + dataHashCode = 571187457 + size = 3960 isKeyFrame = true - presentationTimeUs = 1805736 + presentationTimeUs = 1828185 sample: trackType = audio dataHashCode = -122903935 @@ -504,10 +516,16 @@ sample: presentationTimeUs = 1942902 sample: trackType = audio - dataHashCode = 359941761 - size = 5644 + dataHashCode = -1759454975 + size = 440 isKeyFrame = true presentationTimeUs = 1967845 +sample: + trackType = audio + dataHashCode = -1409159807 + size = 5204 + isKeyFrame = true + presentationTimeUs = 1970340 sample: trackType = audio dataHashCode = 693101697 @@ -798,10 +816,16 @@ sample: presentationTimeUs = 3114399 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 3137619 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 3160839 sample: trackType = audio dataHashCode = 1742602241 @@ -816,16 +840,10 @@ sample: presentationTimeUs = 3207278 sample: trackType = audio - dataHashCode = 1742602241 - size = 4096 + dataHashCode = -809762815 + size = 8192 isKeyFrame = true presentationTimeUs = 3230498 -sample: - trackType = audio - dataHashCode = 1742602241 - size = 4096 - isKeyFrame = true - presentationTimeUs = 3253718 sample: trackType = audio dataHashCode = 1742602241 @@ -840,10 +858,16 @@ sample: presentationTimeUs = 3300158 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 3323378 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 3346598 sample: trackType = audio dataHashCode = 1742602241 @@ -900,10 +924,16 @@ sample: presentationTimeUs = 3555578 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 3578798 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 3602018 sample: trackType = audio dataHashCode = 1742602241 @@ -930,16 +960,16 @@ sample: presentationTimeUs = 3694897 sample: trackType = audio - dataHashCode = 1742602241 - size = 4096 + dataHashCode = -809762815 + size = 8192 isKeyFrame = true presentationTimeUs = 3718117 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true - presentationTimeUs = 3741337 + presentationTimeUs = 3764557 sample: trackType = audio dataHashCode = 1742602241 @@ -972,10 +1002,16 @@ sample: presentationTimeUs = 3880657 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 3903877 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 3927097 sample: trackType = audio dataHashCode = 1742602241 diff --git a/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silenceHighPitch_silenceHighPitch.dump b/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silenceHighPitch_silenceHighPitch.dump index 7806630a63..6c9bb4bcd5 100644 --- a/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silenceHighPitch_silenceHighPitch.dump +++ b/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silenceHighPitch_silenceHighPitch.dump @@ -324,10 +324,16 @@ sample: presentationTimeUs = 1154761 sample: trackType = audio - dataHashCode = 644553473 - size = 8360 + dataHashCode = -1264223743 + size = 4400 isKeyFrame = true presentationTimeUs = 1177210 +sample: + trackType = audio + dataHashCode = 571187457 + size = 3960 + isKeyFrame = true + presentationTimeUs = 1202154 sample: trackType = audio dataHashCode = 571187457 @@ -348,16 +354,10 @@ sample: presentationTimeUs = 1269501 sample: trackType = audio - dataHashCode = 571187457 - size = 3960 + dataHashCode = 2136490497 + size = 7920 isKeyFrame = true presentationTimeUs = 1294444 -sample: - trackType = audio - dataHashCode = 571187457 - size = 3960 - isKeyFrame = true - presentationTimeUs = 1316893 sample: trackType = audio dataHashCode = -1264223743 @@ -378,10 +378,16 @@ sample: presentationTimeUs = 1386712 sample: trackType = audio - dataHashCode = 644553473 - size = 8360 + dataHashCode = -1264223743 + size = 4400 isKeyFrame = true presentationTimeUs = 1409160 +sample: + trackType = audio + dataHashCode = 571187457 + size = 3960 + isKeyFrame = true + presentationTimeUs = 1434104 sample: trackType = audio dataHashCode = 571187457 @@ -414,10 +420,16 @@ sample: presentationTimeUs = 1548843 sample: trackType = audio - dataHashCode = 2136490497 - size = 7920 + dataHashCode = 571187457 + size = 3960 isKeyFrame = true presentationTimeUs = 1573786 +sample: + trackType = audio + dataHashCode = 571187457 + size = 3960 + isKeyFrame = true + presentationTimeUs = 1596235 sample: trackType = audio dataHashCode = -448902783 @@ -462,16 +474,16 @@ sample: presentationTimeUs = 1758344 sample: trackType = audio - dataHashCode = -1264223743 - size = 4400 + dataHashCode = 644553473 + size = 8360 isKeyFrame = true presentationTimeUs = 1780793 sample: trackType = audio - dataHashCode = 2136490497 - size = 7920 + dataHashCode = 571187457 + size = 3960 isKeyFrame = true - presentationTimeUs = 1805736 + presentationTimeUs = 1828185 sample: trackType = audio dataHashCode = -122903935 @@ -504,10 +516,16 @@ sample: presentationTimeUs = 1942902 sample: trackType = audio - dataHashCode = 359941761 - size = 5644 + dataHashCode = -1759454975 + size = 440 isKeyFrame = true presentationTimeUs = 1967845 +sample: + trackType = audio + dataHashCode = -1409159807 + size = 5204 + isKeyFrame = true + presentationTimeUs = 1970340 sample: trackType = audio dataHashCode = 693101697 @@ -798,10 +816,16 @@ sample: presentationTimeUs = 3107233 sample: trackType = audio - dataHashCode = 2136490497 - size = 7920 + dataHashCode = 571187457 + size = 3960 isKeyFrame = true presentationTimeUs = 3132154 +sample: + trackType = audio + dataHashCode = 571187457 + size = 3960 + isKeyFrame = true + presentationTimeUs = 3154603 sample: trackType = audio dataHashCode = -1264223743 @@ -816,16 +840,10 @@ sample: presentationTimeUs = 3201995 sample: trackType = audio - dataHashCode = 571187457 - size = 3960 + dataHashCode = 2136490497 + size = 7920 isKeyFrame = true presentationTimeUs = 3224444 -sample: - trackType = audio - dataHashCode = 571187457 - size = 3960 - isKeyFrame = true - presentationTimeUs = 3246893 sample: trackType = audio dataHashCode = -1264223743 @@ -840,10 +858,16 @@ sample: presentationTimeUs = 3294285 sample: trackType = audio - dataHashCode = 644553473 - size = 8360 + dataHashCode = 571187457 + size = 3960 isKeyFrame = true presentationTimeUs = 3316734 +sample: + trackType = audio + dataHashCode = -1264223743 + size = 4400 + isKeyFrame = true + presentationTimeUs = 3339183 sample: trackType = audio dataHashCode = -448902783 @@ -900,10 +924,16 @@ sample: presentationTimeUs = 3548684 sample: trackType = audio - dataHashCode = 2136490497 - size = 7920 + dataHashCode = 571187457 + size = 3960 isKeyFrame = true presentationTimeUs = 3573628 +sample: + trackType = audio + dataHashCode = 571187457 + size = 3960 + isKeyFrame = true + presentationTimeUs = 3596077 sample: trackType = audio dataHashCode = -448902783 @@ -930,16 +960,16 @@ sample: presentationTimeUs = 3688344 sample: trackType = audio - dataHashCode = -1264223743 - size = 4400 + dataHashCode = 644553473 + size = 8360 isKeyFrame = true presentationTimeUs = 3710793 sample: trackType = audio - dataHashCode = 2136490497 - size = 7920 + dataHashCode = 571187457 + size = 3960 isKeyFrame = true - presentationTimeUs = 3735736 + presentationTimeUs = 3758185 sample: trackType = audio dataHashCode = -1264223743 @@ -978,10 +1008,16 @@ sample: presentationTimeUs = 3897845 sample: trackType = audio - dataHashCode = 644553473 - size = 8360 + dataHashCode = 571187457 + size = 3960 isKeyFrame = true presentationTimeUs = 3920294 +sample: + trackType = audio + dataHashCode = -1264223743 + size = 4400 + isKeyFrame = true + presentationTimeUs = 3942743 sample: trackType = audio dataHashCode = -1759454975 diff --git a/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silence_original.dump b/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silence_original.dump index a3527fa511..79ca8c1a19 100644 --- a/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silence_original.dump +++ b/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silence_original.dump @@ -324,10 +324,16 @@ sample: presentationTimeUs = 1160997 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 1184217 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 1207437 sample: trackType = audio dataHashCode = 1742602241 @@ -348,16 +354,10 @@ sample: presentationTimeUs = 1277097 sample: trackType = audio - dataHashCode = 1742602241 - size = 4096 + dataHashCode = -809762815 + size = 8192 isKeyFrame = true presentationTimeUs = 1300317 -sample: - trackType = audio - dataHashCode = 1742602241 - size = 4096 - isKeyFrame = true - presentationTimeUs = 1323537 sample: trackType = audio dataHashCode = 1742602241 @@ -378,10 +378,16 @@ sample: presentationTimeUs = 1393197 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 1416417 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 1439637 sample: trackType = audio dataHashCode = 1742602241 @@ -414,10 +420,16 @@ sample: presentationTimeUs = 1555736 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 1578956 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 1602176 sample: trackType = audio dataHashCode = 1742602241 @@ -462,16 +474,16 @@ sample: presentationTimeUs = 1764716 sample: trackType = audio - dataHashCode = 1742602241 - size = 4096 + dataHashCode = -809762815 + size = 8192 isKeyFrame = true presentationTimeUs = 1787936 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true - presentationTimeUs = 1811156 + presentationTimeUs = 1834376 sample: trackType = audio dataHashCode = 1742602241 @@ -504,10 +516,16 @@ sample: presentationTimeUs = 1950476 sample: trackType = audio - dataHashCode = -474907647 - size = 4640 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 1973696 +sample: + trackType = audio + dataHashCode = -204243967 + size = 544 + isKeyFrame = true + presentationTimeUs = 1996916 sample: trackType = audio dataHashCode = 838148193 diff --git a/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silence_silence.dump b/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silence_silence.dump index c09558b8fa..f372669fad 100644 --- a/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silence_silence.dump +++ b/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silence_silence.dump @@ -324,10 +324,16 @@ sample: presentationTimeUs = 1160997 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 1184217 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 1207437 sample: trackType = audio dataHashCode = 1742602241 @@ -348,16 +354,10 @@ sample: presentationTimeUs = 1277097 sample: trackType = audio - dataHashCode = 1742602241 - size = 4096 + dataHashCode = -809762815 + size = 8192 isKeyFrame = true presentationTimeUs = 1300317 -sample: - trackType = audio - dataHashCode = 1742602241 - size = 4096 - isKeyFrame = true - presentationTimeUs = 1323537 sample: trackType = audio dataHashCode = 1742602241 @@ -378,10 +378,16 @@ sample: presentationTimeUs = 1393197 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 1416417 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 1439637 sample: trackType = audio dataHashCode = 1742602241 @@ -414,10 +420,16 @@ sample: presentationTimeUs = 1555736 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 1578956 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 1602176 sample: trackType = audio dataHashCode = 1742602241 @@ -462,16 +474,16 @@ sample: presentationTimeUs = 1764716 sample: trackType = audio - dataHashCode = 1742602241 - size = 4096 + dataHashCode = -809762815 + size = 8192 isKeyFrame = true presentationTimeUs = 1787936 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true - presentationTimeUs = 1811156 + presentationTimeUs = 1834376 sample: trackType = audio dataHashCode = 1742602241 @@ -504,10 +516,16 @@ sample: presentationTimeUs = 1950476 sample: trackType = audio - dataHashCode = -474907647 - size = 4640 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 1973696 +sample: + trackType = audio + dataHashCode = -204243967 + size = 544 + isKeyFrame = true + presentationTimeUs = 1996916 sample: trackType = audio dataHashCode = 1742602241 @@ -702,16 +720,16 @@ sample: presentationTimeUs = 2719818 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 2743038 sample: trackType = audio - dataHashCode = 1742602241 - size = 4096 + dataHashCode = -809762815 + size = 8192 isKeyFrame = true - presentationTimeUs = 2789478 + presentationTimeUs = 2766258 sample: trackType = audio dataHashCode = 1742602241 @@ -798,10 +816,16 @@ sample: presentationTimeUs = 3114557 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 3137777 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 3160997 sample: trackType = audio dataHashCode = 1742602241 @@ -822,16 +846,10 @@ sample: presentationTimeUs = 3230657 sample: trackType = audio - dataHashCode = 1742602241 - size = 4096 + dataHashCode = -809762815 + size = 8192 isKeyFrame = true presentationTimeUs = 3253877 -sample: - trackType = audio - dataHashCode = 1742602241 - size = 4096 - isKeyFrame = true - presentationTimeUs = 3277097 sample: trackType = audio dataHashCode = 1742602241 @@ -840,10 +858,16 @@ sample: presentationTimeUs = 3300317 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 3323537 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 3346757 sample: trackType = audio dataHashCode = 1742602241 @@ -900,10 +924,16 @@ sample: presentationTimeUs = 3555736 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 3578956 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 3602176 sample: trackType = audio dataHashCode = 1742602241 @@ -972,10 +1002,16 @@ sample: presentationTimeUs = 3880816 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 3904036 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 3927256 sample: trackType = audio dataHashCode = 1742602241 diff --git a/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silence_silenceHighPitch.dump b/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silence_silenceHighPitch.dump index 5cd3131c6e..d542904911 100644 --- a/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silence_silenceHighPitch.dump +++ b/libraries/test_data/src/test/assets/transformerdumps/mp4/sowt-with-video.mov/silence_silenceHighPitch.dump @@ -324,10 +324,16 @@ sample: presentationTimeUs = 1160997 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 1184217 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 1207437 sample: trackType = audio dataHashCode = 1742602241 @@ -348,16 +354,10 @@ sample: presentationTimeUs = 1277097 sample: trackType = audio - dataHashCode = 1742602241 - size = 4096 + dataHashCode = -809762815 + size = 8192 isKeyFrame = true presentationTimeUs = 1300317 -sample: - trackType = audio - dataHashCode = 1742602241 - size = 4096 - isKeyFrame = true - presentationTimeUs = 1323537 sample: trackType = audio dataHashCode = 1742602241 @@ -378,10 +378,16 @@ sample: presentationTimeUs = 1393197 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 1416417 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 1439637 sample: trackType = audio dataHashCode = 1742602241 @@ -414,10 +420,16 @@ sample: presentationTimeUs = 1555736 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 1578956 +sample: + trackType = audio + dataHashCode = 1742602241 + size = 4096 + isKeyFrame = true + presentationTimeUs = 1602176 sample: trackType = audio dataHashCode = 1742602241 @@ -462,16 +474,16 @@ sample: presentationTimeUs = 1764716 sample: trackType = audio - dataHashCode = 1742602241 - size = 4096 + dataHashCode = -809762815 + size = 8192 isKeyFrame = true presentationTimeUs = 1787936 sample: trackType = audio - dataHashCode = -809762815 - size = 8192 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true - presentationTimeUs = 1811156 + presentationTimeUs = 1834376 sample: trackType = audio dataHashCode = 1742602241 @@ -504,10 +516,16 @@ sample: presentationTimeUs = 1950476 sample: trackType = audio - dataHashCode = -474907647 - size = 4640 + dataHashCode = 1742602241 + size = 4096 isKeyFrame = true presentationTimeUs = 1973696 +sample: + trackType = audio + dataHashCode = -204243967 + size = 544 + isKeyFrame = true + presentationTimeUs = 1996916 sample: trackType = audio dataHashCode = 1940582145 @@ -702,16 +720,16 @@ sample: presentationTimeUs = 2713310 sample: trackType = audio - dataHashCode = 644553473 - size = 8360 + dataHashCode = -1264223743 + size = 4400 isKeyFrame = true presentationTimeUs = 2735759 sample: trackType = audio - dataHashCode = 571187457 - size = 3960 + dataHashCode = 2136490497 + size = 7920 isKeyFrame = true - presentationTimeUs = 2783151 + presentationTimeUs = 2760702 sample: trackType = audio dataHashCode = -1264223743 @@ -798,10 +816,16 @@ sample: presentationTimeUs = 3107392 sample: trackType = audio - dataHashCode = 2136490497 - size = 7920 + dataHashCode = 571187457 + size = 3960 isKeyFrame = true presentationTimeUs = 3132312 +sample: + trackType = audio + dataHashCode = 571187457 + size = 3960 + isKeyFrame = true + presentationTimeUs = 3154761 sample: trackType = audio dataHashCode = -1264223743 @@ -822,16 +846,10 @@ sample: presentationTimeUs = 3224603 sample: trackType = audio - dataHashCode = 571187457 - size = 3960 + dataHashCode = 644553473 + size = 8360 isKeyFrame = true presentationTimeUs = 3247052 -sample: - trackType = audio - dataHashCode = -1264223743 - size = 4400 - isKeyFrame = true - presentationTimeUs = 3269501 sample: trackType = audio dataHashCode = 571187457 @@ -840,10 +858,16 @@ sample: presentationTimeUs = 3294444 sample: trackType = audio - dataHashCode = 644553473 - size = 8360 + dataHashCode = 571187457 + size = 3960 isKeyFrame = true presentationTimeUs = 3316893 +sample: + trackType = audio + dataHashCode = -1264223743 + size = 4400 + isKeyFrame = true + presentationTimeUs = 3339342 sample: trackType = audio dataHashCode = -448902783 @@ -900,10 +924,16 @@ sample: presentationTimeUs = 3548843 sample: trackType = audio - dataHashCode = 2136490497 - size = 7920 + dataHashCode = 571187457 + size = 3960 isKeyFrame = true presentationTimeUs = 3573786 +sample: + trackType = audio + dataHashCode = 571187457 + size = 3960 + isKeyFrame = true + presentationTimeUs = 3596235 sample: trackType = audio dataHashCode = -448902783 @@ -978,10 +1008,16 @@ sample: presentationTimeUs = 3898004 sample: trackType = audio - dataHashCode = 644553473 - size = 8360 + dataHashCode = 571187457 + size = 3960 isKeyFrame = true presentationTimeUs = 3920453 +sample: + trackType = audio + dataHashCode = -1264223743 + size = 4400 + isKeyFrame = true + presentationTimeUs = 3942902 sample: trackType = audio dataHashCode = -1759454975 diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/EncodedSampleExporter.java b/libraries/transformer/src/main/java/androidx/media3/transformer/EncodedSampleExporter.java index f0c0e8b1ac..03631ad685 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/EncodedSampleExporter.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/EncodedSampleExporter.java @@ -16,9 +16,11 @@ package androidx.media3.transformer; +import static androidx.media3.common.util.Assertions.checkNotNull; import static androidx.media3.decoder.DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DIRECT; import androidx.annotation.Nullable; +import androidx.annotation.VisibleForTesting; import androidx.media3.common.Format; import androidx.media3.decoder.DecoderInputBuffer; import java.nio.ByteBuffer; @@ -30,7 +32,19 @@ import java.util.concurrent.atomic.AtomicLong; /** Muxes encoded samples without any transcoding or transformation. */ /* package */ final class EncodedSampleExporter extends SampleExporter implements GraphInput { - private static final int MAX_INPUT_BUFFER_COUNT = 10; + // These constants limit the number of buffers used to pass input data. More buffers can avoid the + // producer/consumer having to wait, but can increase allocation size (determined by the producer + // side) so we constrain the number of buffers to be in this range, and prevent allocating more + // buffers (above the minimum number) once a target size has been reached. Once the target has + // been reached, no new buffers will be created but the producer can still increase the size of + // existing buffers. + @VisibleForTesting /* package */ static final int MIN_INPUT_BUFFER_COUNT = 10; + @VisibleForTesting /* package */ static final int MAX_INPUT_BUFFER_COUNT = 200; + @VisibleForTesting /* package */ static final long ALLOCATION_SIZE_TARGET_BYTES = 2 * 1024 * 1024; + + /** An empty, direct {@link ByteBuffer}. */ + private static final ByteBuffer EMPTY_BUFFER = + ByteBuffer.allocateDirect(0).order(ByteOrder.nativeOrder()); private final Format format; private final long initialTimestampOffsetUs; @@ -38,10 +52,17 @@ import java.util.concurrent.atomic.AtomicLong; private final Queue availableInputBuffers; private final Queue pendingInputBuffers; - private long mediaItemOffsetUs; + // Accessed on the producer and consumer threads. private volatile boolean inputEnded; + // Accessed only on the producer thread. + + private long mediaItemOffsetUs; + private boolean hasReachedAllocationTarget; + private long totalBufferSizeBytes; + @Nullable private DecoderInputBuffer nextInputBuffer; + public EncodedSampleExporter( Format format, TransformationRequest transformationRequest, @@ -53,12 +74,6 @@ import java.util.concurrent.atomic.AtomicLong; this.initialTimestampOffsetUs = initialTimestampOffsetUs; nextMediaItemOffsetUs = new AtomicLong(); availableInputBuffers = new ConcurrentLinkedDeque<>(); - ByteBuffer emptyBuffer = ByteBuffer.allocateDirect(0).order(ByteOrder.nativeOrder()); - for (int i = 0; i < MAX_INPUT_BUFFER_COUNT; i++) { - DecoderInputBuffer inputBuffer = new DecoderInputBuffer(BUFFER_REPLACEMENT_MODE_DIRECT); - inputBuffer.data = emptyBuffer; - availableInputBuffers.add(inputBuffer); - } pendingInputBuffers = new ConcurrentLinkedDeque<>(); fallbackListener.onTransformationRequestFinalized(transformationRequest); } @@ -76,18 +91,40 @@ import java.util.concurrent.atomic.AtomicLong; @Override @Nullable public DecoderInputBuffer getInputBuffer() { - return availableInputBuffers.peek(); + if (nextInputBuffer == null) { + nextInputBuffer = availableInputBuffers.poll(); + if (!hasReachedAllocationTarget) { + if (nextInputBuffer == null) { + nextInputBuffer = new DecoderInputBuffer(BUFFER_REPLACEMENT_MODE_DIRECT); + nextInputBuffer.data = EMPTY_BUFFER; + } else { + // The size of this buffer has already been accounted for but the producer may reallocate + // it so remove it from the total and add it back when it's queued again. + totalBufferSizeBytes -= checkNotNull(nextInputBuffer.data).capacity(); + } + } + } + return nextInputBuffer; } @Override public boolean queueInputBuffer() { - DecoderInputBuffer inputBuffer = availableInputBuffers.remove(); + DecoderInputBuffer inputBuffer = checkNotNull(nextInputBuffer); + nextInputBuffer = null; if (inputBuffer.isEndOfStream()) { inputEnded = true; } else { inputBuffer.timeUs += mediaItemOffsetUs + initialTimestampOffsetUs; pendingInputBuffers.add(inputBuffer); } + if (!hasReachedAllocationTarget) { + int bufferCount = availableInputBuffers.size() + pendingInputBuffers.size(); + totalBufferSizeBytes += checkNotNull(inputBuffer.data).capacity(); + hasReachedAllocationTarget = + bufferCount >= MIN_INPUT_BUFFER_COUNT + && (bufferCount >= MAX_INPUT_BUFFER_COUNT + || totalBufferSizeBytes >= ALLOCATION_SIZE_TARGET_BYTES); + } return true; } diff --git a/libraries/transformer/src/test/java/androidx/media3/transformer/EncodedSampleExporterTest.java b/libraries/transformer/src/test/java/androidx/media3/transformer/EncodedSampleExporterTest.java new file mode 100644 index 0000000000..9916ebacef --- /dev/null +++ b/libraries/transformer/src/test/java/androidx/media3/transformer/EncodedSampleExporterTest.java @@ -0,0 +1,134 @@ +/* + * Copyright 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package androidx.media3.transformer; + +import static androidx.media3.common.util.Assertions.checkNotNull; +import static androidx.media3.transformer.EncodedSampleExporter.ALLOCATION_SIZE_TARGET_BYTES; +import static androidx.media3.transformer.EncodedSampleExporter.MAX_INPUT_BUFFER_COUNT; +import static androidx.media3.transformer.EncodedSampleExporter.MIN_INPUT_BUFFER_COUNT; +import static com.google.common.truth.Truth.assertThat; +import static org.mockito.Mockito.mock; + +import android.os.Looper; +import androidx.annotation.Nullable; +import androidx.media3.common.Format; +import androidx.media3.common.MediaItem; +import androidx.media3.common.MimeTypes; +import androidx.media3.common.util.Clock; +import androidx.media3.common.util.HandlerWrapper; +import androidx.media3.common.util.ListenerSet; +import androidx.media3.decoder.DecoderInputBuffer; +import androidx.test.ext.junit.runners.AndroidJUnit4; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; + +/** Unit tests for {@link EncodedSampleExporter}. */ +@RunWith(AndroidJUnit4.class) +public final class EncodedSampleExporterTest { + + private EncodedSampleExporter encodedSampleExporter; + + @Mock private ListenerSet.IterationFinishedEvent mockIterationFinishedEvent; + @Mock private HandlerWrapper mockHandlerWrapper; + + @Before + public void setUp() { + Looper looper = checkNotNull(Looper.myLooper()); + FallbackListener fallbackListener = + new FallbackListener( + new Composition.Builder( + new EditedMediaItemSequence( + new EditedMediaItem.Builder(MediaItem.EMPTY).build())) + .build(), + new ListenerSet<>(looper, Clock.DEFAULT, mockIterationFinishedEvent), + mockHandlerWrapper, + new TransformationRequest.Builder().build()); + fallbackListener.setTrackCount(1); + encodedSampleExporter = + new EncodedSampleExporter( + new Format.Builder().setSampleMimeType(MimeTypes.AUDIO_AAC).build(), + new TransformationRequest.Builder().build(), + new MuxerWrapper( + /* outputPath= */ "unused", + new InAppMuxer.Factory(), + mock(MuxerWrapper.Listener.class), + MuxerWrapper.MUXER_MODE_DEFAULT), + fallbackListener, + /* initialTimestampOffsetUs= */ 0); + } + + @Test + public void queueInput_withEmptyBuffers_allocatesMaxBufferCount() { + for (int i = 0; i < MAX_INPUT_BUFFER_COUNT; i++) { + @Nullable DecoderInputBuffer decoderInputBuffer = encodedSampleExporter.getInputBuffer(); + assertThat(decoderInputBuffer).isNotNull(); + decoderInputBuffer.ensureSpaceForWrite(/* length= */ 0); + encodedSampleExporter.queueInputBuffer(); + } + assertThat(encodedSampleExporter.getInputBuffer()).isNull(); + } + + @Test + public void queueInput_withSmallBuffers_allocatesMaxBufferCount() { + assertThat(fillInputAndGetTotalInputSize(/* inputBufferSizeBytes= */ 1)) + .isEqualTo(MAX_INPUT_BUFFER_COUNT); + } + + @Test + public void queueInput_withMediumBuffers_reachesBufferSizeTarget() { + assertThat(fillInputAndGetTotalInputSize(/* inputBufferSizeBytes= */ 16 * 1024)) + .isEqualTo(ALLOCATION_SIZE_TARGET_BYTES); + } + + @Test + public void queueInput_withLargeBuffers_allocatesMinBufferCount() { + assertThat(fillInputAndGetTotalInputSize(/* inputBufferSizeBytes= */ 1024 * 1024)) + .isEqualTo(MIN_INPUT_BUFFER_COUNT * 1024 * 1024); + } + + @Test + public void queueInputToLimitThenProcessOutput_queueInputSucceeds() { + // Queue input until no more input is accepted. + assertThat(fillInputAndGetTotalInputSize(/* inputBufferSizeBytes= */ 16 * 1024)) + .isEqualTo(ALLOCATION_SIZE_TARGET_BYTES); + assertThat(fillInputAndGetTotalInputSize(/* inputBufferSizeBytes= */ 1024 * 1024)).isEqualTo(0); + + // Simulate draining to the muxer. + while (encodedSampleExporter.getMuxerInputBuffer() != null) { + encodedSampleExporter.releaseMuxerInputBuffer(); + } + + // It's possible to queue input again. + assertThat(fillInputAndGetTotalInputSize(/* inputBufferSizeBytes= */ 16 * 1024)) + .isEqualTo(ALLOCATION_SIZE_TARGET_BYTES); + } + + private long fillInputAndGetTotalInputSize(int inputBufferSizeBytes) { + int totalAllocatedSize = 0; + for (int i = 0; i < MAX_INPUT_BUFFER_COUNT + 1; i++) { + @Nullable DecoderInputBuffer decoderInputBuffer = encodedSampleExporter.getInputBuffer(); + if (decoderInputBuffer == null) { + return totalAllocatedSize; + } + decoderInputBuffer.ensureSpaceForWrite(inputBufferSizeBytes); + encodedSampleExporter.queueInputBuffer(); + totalAllocatedSize += inputBufferSizeBytes; + } + throw new IllegalStateException("Unexpectedly allocated more than MAX_INPUT_BUFFER_COUNT"); + } +}