Add raw audio support to Mp4Muxer.

The fourcc used is `sowt` for little endian 16 bit PCM and `twos` for big endian 16 bit PCM.

PiperOrigin-RevId: 724391195
This commit is contained in:
Googler 2025-02-07 10:25:24 -08:00 committed by Copybara-Service
parent 1190980616
commit 0ba3bf66c6
6 changed files with 83 additions and 3 deletions

View File

@ -685,6 +685,7 @@ import org.checkerframework.checker.nullness.qual.PolyNull;
}
/** Returns a codec specific box. */
@SuppressWarnings("MergeCases")
public static ByteBuffer codecSpecificBox(Format format) {
String mimeType = checkNotNull(format.sampleMimeType);
switch (mimeType) {
@ -697,6 +698,8 @@ import org.checkerframework.checker.nullness.qual.PolyNull;
return damrBox(/* mode= */ (short) 0x83FF); // mode set: all enabled for AMR-WB
case MimeTypes.AUDIO_OPUS:
return dOpsBox(format);
case MimeTypes.AUDIO_RAW:
return ByteBuffer.allocate(0); // No codec specific box for raw audio.
case MimeTypes.VIDEO_H263:
return d263Box(format);
case MimeTypes.VIDEO_H264:
@ -1690,6 +1693,14 @@ import org.checkerframework.checker.nullness.qual.PolyNull;
return "s263";
case MimeTypes.AUDIO_OPUS:
return "Opus";
case MimeTypes.AUDIO_RAW:
if (format.pcmEncoding == C.ENCODING_PCM_16BIT) {
return "sowt";
} else if (format.pcmEncoding == C.ENCODING_PCM_16BIT_BIG_ENDIAN) {
return "twos";
} else {
throw new IllegalArgumentException("Unsupported PCM encoding: " + format.pcmEncoding);
}
case MimeTypes.VIDEO_H264:
return "avc1";
case MimeTypes.VIDEO_H265:

View File

@ -58,6 +58,7 @@ import java.nio.ByteBuffer;
* <li>AMR-WB (Wideband AMR)
* <li>Opus
* <li>Vorbis
* <li>Raw Audio
* </ul>
* <li>Metadata
* </ul>
@ -158,7 +159,8 @@ public final class FragmentedMp4Muxer implements AutoCloseable {
MimeTypes.AUDIO_AMR_NB,
MimeTypes.AUDIO_AMR_WB,
MimeTypes.AUDIO_OPUS,
MimeTypes.AUDIO_VORBIS);
MimeTypes.AUDIO_VORBIS,
MimeTypes.AUDIO_RAW);
private final FragmentedMp4Writer fragmentedMp4Writer;
private final MetadataCollector metadataCollector;

View File

@ -82,6 +82,7 @@ import org.checkerframework.checker.nullness.qual.EnsuresNonNull;
* <li>AMR-WB (Wideband AMR)
* <li>Opus
* <li>Vorbis
* <li>Raw Audio
* </ul>
* <li>Metadata
* </ul>
@ -361,7 +362,8 @@ public final class Mp4Muxer implements AutoCloseable {
MimeTypes.AUDIO_AMR_NB,
MimeTypes.AUDIO_AMR_WB,
MimeTypes.AUDIO_OPUS,
MimeTypes.AUDIO_VORBIS);
MimeTypes.AUDIO_VORBIS,
MimeTypes.AUDIO_RAW);
private static final String TAG = "Mp4Muxer";

View File

@ -73,6 +73,7 @@ public class Mp4MuxerEndToEndParameterizedTest {
private static final String AMR_WB_3GP = "bbb_mono_16kHz_23.05kbps_amrwb.3gp";
private static final String OPUS_OGG = "bbb_6ch_8kHz_opus.ogg";
private static final String VORBIS_OGG = "bbb_1ch_16kHz_q10_vorbis.ogg";
private static final String RAW_WAV = "bbb_2ch_44kHz.wav";
@Parameters(name = "{0}")
public static ImmutableList<String> mediaSamples() {
@ -92,7 +93,8 @@ public class Mp4MuxerEndToEndParameterizedTest {
AMR_NB_3GP,
AMR_WB_3GP,
OPUS_OGG,
VORBIS_OGG);
VORBIS_OGG,
RAW_WAV);
}
@Parameter public @MonotonicNonNull String inputFile;

View File

@ -0,0 +1,63 @@
seekMap:
isSeekable = true
duration = 1000000
getPosition(0) = [[timeUs=0, position=400052]]
getPosition(1) = [[timeUs=1, position=400052]]
getPosition(500000) = [[timeUs=500000, position=488252]]
getPosition(1000000) = [[timeUs=1000000, position=558812]]
numberOfTracks = 1
track 0:
total output bytes = 40
sample count = 10
track duration = 1000000
format 0:
id = 1
containerMimeType = audio/mp4
sampleMimeType = audio/raw
maxInputSize = 34
channelCount = 2
sampleRate = 44100
pcmEncoding = 2
language = ```
metadata = entries=[Mp4Timestamp: creation time=100000000, modification time=500000000, timescale=10000]
sample 0:
time = 0
flags = 1
data = length 4, hash F0FD1
sample 1:
time = 100000
flags = 1
data = length 4, hash FFDB3408
sample 2:
time = 200000
flags = 1
data = length 4, hash 47510F
sample 3:
time = 300000
flags = 1
data = length 4, hash 35B613
sample 4:
time = 400000
flags = 1
data = length 4, hash 333956
sample 5:
time = 500000
flags = 1
data = length 4, hash FFF14579
sample 6:
time = 600000
flags = 1
data = length 4, hash 3620FE
sample 7:
time = 700000
flags = 1
data = length 4, hash C488D
sample 8:
time = 800000
flags = 1
data = length 4, hash FFF6FBD8
sample 9:
time = 900000
flags = 536870913
data = length 4, hash FFDD4C2A
tracksEnded = true