Add support for big endian 24/32-bit PCM

Also parse the PCM encoding for lpcm in MP4, and update `MatroskaExtractor`
similarly.

Tested manually in the demo app using an MP4 with 24-bit big endian audio.

PiperOrigin-RevId: 546878505
This commit is contained in:
andrewlewis 2023-07-10 16:21:36 +01:00 committed by Rohit Singh
parent 7c9e6ad2ea
commit 6f4ef51727
11 changed files with 111 additions and 12 deletions

View File

@ -50,6 +50,8 @@
implementation details of an `Extractor` you must first call
`Extractor.getUnderlyingInstance`.
* Audio:
* Add support for 24/32-bit big endian PCM in MP4 and Matroska, and parse
PCM encoding for `lpcm` in MP4.
* Audio Offload:
* Add `AudioSink.getFormatOffloadSupport(Format)` that retrieves level of
offload support the sink can provide for the format through a

View File

@ -166,10 +166,12 @@ public final class C {
/**
* Represents an audio encoding, or an invalid or unset value. One of {@link Format#NO_VALUE},
* {@link #ENCODING_INVALID}, {@link #ENCODING_PCM_8BIT}, {@link #ENCODING_PCM_16BIT}, {@link
* #ENCODING_PCM_16BIT_BIG_ENDIAN}, {@link #ENCODING_PCM_24BIT}, {@link #ENCODING_PCM_32BIT},
* {@link #ENCODING_PCM_FLOAT}, {@link #ENCODING_MP3}, {@link #ENCODING_AC3}, {@link
* #ENCODING_E_AC3}, {@link #ENCODING_E_AC3_JOC}, {@link #ENCODING_AC4}, {@link #ENCODING_DTS},
* {@link #ENCODING_DTS_HD}, {@link #ENCODING_DOLBY_TRUEHD} or {@link #ENCODING_OPUS}.
* #ENCODING_PCM_16BIT_BIG_ENDIAN}, {@link #ENCODING_PCM_24BIT}, {@link
* #ENCODING_PCM_24BIT_BIG_ENDIAN}, {@link #ENCODING_PCM_32BIT}, {@link
* #ENCODING_PCM_32BIT_BIG_ENDIAN}, {@link #ENCODING_PCM_FLOAT}, {@link #ENCODING_MP3}, {@link
* #ENCODING_AC3}, {@link #ENCODING_E_AC3}, {@link #ENCODING_E_AC3_JOC}, {@link #ENCODING_AC4},
* {@link #ENCODING_DTS}, {@link #ENCODING_DTS_HD}, {@link #ENCODING_DOLBY_TRUEHD} or {@link
* #ENCODING_OPUS}.
*/
@UnstableApi
@Documented
@ -182,7 +184,9 @@ public final class C {
ENCODING_PCM_16BIT,
ENCODING_PCM_16BIT_BIG_ENDIAN,
ENCODING_PCM_24BIT,
ENCODING_PCM_24BIT_BIG_ENDIAN,
ENCODING_PCM_32BIT,
ENCODING_PCM_32BIT_BIG_ENDIAN,
ENCODING_PCM_FLOAT,
ENCODING_MP3,
ENCODING_AAC_LC,
@ -206,8 +210,9 @@ public final class C {
/**
* Represents a PCM audio encoding, or an invalid or unset value. One of {@link Format#NO_VALUE},
* {@link #ENCODING_INVALID}, {@link #ENCODING_PCM_8BIT}, {@link #ENCODING_PCM_16BIT}, {@link
* #ENCODING_PCM_16BIT_BIG_ENDIAN}, {@link #ENCODING_PCM_24BIT}, {@link #ENCODING_PCM_32BIT},
* {@link #ENCODING_PCM_FLOAT}.
* #ENCODING_PCM_16BIT_BIG_ENDIAN}, {@link #ENCODING_PCM_24BIT}, {@link
* #ENCODING_PCM_24BIT_BIG_ENDIAN}, {@link #ENCODING_PCM_32BIT}, {@link
* #ENCODING_PCM_32BIT_BIG_ENDIAN}, {@link #ENCODING_PCM_FLOAT}.
*/
@UnstableApi
@Documented
@ -220,7 +225,9 @@ public final class C {
ENCODING_PCM_16BIT,
ENCODING_PCM_16BIT_BIG_ENDIAN,
ENCODING_PCM_24BIT,
ENCODING_PCM_24BIT_BIG_ENDIAN,
ENCODING_PCM_32BIT,
ENCODING_PCM_32BIT_BIG_ENDIAN,
ENCODING_PCM_FLOAT
})
public @interface PcmEncoding {}
@ -240,9 +247,15 @@ public final class C {
/** PCM encoding with 24 bits per sample. */
@UnstableApi public static final int ENCODING_PCM_24BIT = 0x20000000;
/** Like {@link #ENCODING_PCM_24BIT} but with the bytes in big endian order. */
@UnstableApi public static final int ENCODING_PCM_24BIT_BIG_ENDIAN = 0x50000000;
/** PCM encoding with 32 bits per sample. */
@UnstableApi public static final int ENCODING_PCM_32BIT = 0x30000000;
/** Like {@link #ENCODING_PCM_32BIT} but with the bytes in big endian order. */
@UnstableApi public static final int ENCODING_PCM_32BIT_BIG_ENDIAN = 0x60000000;
/** See {@link AudioFormat#ENCODING_PCM_FLOAT}. */
@UnstableApi public static final int ENCODING_PCM_FLOAT = AudioFormat.ENCODING_PCM_FLOAT;

View File

@ -31,7 +31,9 @@ import java.nio.ByteBuffer;
* <li>{@link C#ENCODING_PCM_16BIT} ({@link #isActive()} will return {@code false})
* <li>{@link C#ENCODING_PCM_16BIT_BIG_ENDIAN}
* <li>{@link C#ENCODING_PCM_24BIT}
* <li>{@link C#ENCODING_PCM_24BIT_BIG_ENDIAN}
* <li>{@link C#ENCODING_PCM_32BIT}
* <li>{@link C#ENCODING_PCM_32BIT_BIG_ENDIAN}
* <li>{@link C#ENCODING_PCM_FLOAT}
* </ul>
*/
@ -47,7 +49,9 @@ public final class ToInt16PcmAudioProcessor extends BaseAudioProcessor {
&& encoding != C.ENCODING_PCM_16BIT
&& encoding != C.ENCODING_PCM_16BIT_BIG_ENDIAN
&& encoding != C.ENCODING_PCM_24BIT
&& encoding != C.ENCODING_PCM_24BIT_BIG_ENDIAN
&& encoding != C.ENCODING_PCM_32BIT
&& encoding != C.ENCODING_PCM_32BIT_BIG_ENDIAN
&& encoding != C.ENCODING_PCM_FLOAT) {
throw new UnhandledAudioFormatException(inputAudioFormat);
}
@ -72,9 +76,11 @@ public final class ToInt16PcmAudioProcessor extends BaseAudioProcessor {
resampledSize = size;
break;
case C.ENCODING_PCM_24BIT:
case C.ENCODING_PCM_24BIT_BIG_ENDIAN:
resampledSize = (size / 3) * 2;
break;
case C.ENCODING_PCM_32BIT:
case C.ENCODING_PCM_32BIT_BIG_ENDIAN:
case C.ENCODING_PCM_FLOAT:
resampledSize = size / 2;
break;
@ -109,6 +115,13 @@ public final class ToInt16PcmAudioProcessor extends BaseAudioProcessor {
buffer.put(inputBuffer.get(i + 2));
}
break;
case C.ENCODING_PCM_24BIT_BIG_ENDIAN:
// 24 BE -> 16 bit resampling. Drop the least significant byte.
for (int i = position; i < limit; i += 3) {
buffer.put(inputBuffer.get(i + 1));
buffer.put(inputBuffer.get(i));
}
break;
case C.ENCODING_PCM_32BIT:
// 32 -> 16 bit resampling. Drop the two least significant bytes.
for (int i = position; i < limit; i += 4) {
@ -116,6 +129,13 @@ public final class ToInt16PcmAudioProcessor extends BaseAudioProcessor {
buffer.put(inputBuffer.get(i + 3));
}
break;
case C.ENCODING_PCM_32BIT_BIG_ENDIAN:
// 32 BE -> 16 bit resampling. Drop the two least significant bytes.
for (int i = position; i < limit; i += 4) {
buffer.put(inputBuffer.get(i + 1));
buffer.put(inputBuffer.get(i));
}
break;
case C.ENCODING_PCM_FLOAT:
// 32 bit floating point -> 16 bit resampling. Floating point values are in the range
// [-1.0, 1.0], so need to be scaled by Short.MAX_VALUE.

View File

@ -46,7 +46,8 @@ public final class MediaFormatUtil {
*
* <p>Equivalent to {@link MediaFormat#KEY_PCM_ENCODING}, except it allows additional values
* defined by {@link C.PcmEncoding}, including {@link C#ENCODING_PCM_16BIT_BIG_ENDIAN}, {@link
* C#ENCODING_PCM_24BIT}, and {@link C#ENCODING_PCM_32BIT}.
* C#ENCODING_PCM_24BIT}, {@link C#ENCODING_PCM_24BIT_BIG_ENDIAN}, {@link C#ENCODING_PCM_32BIT}
* and {@link C#ENCODING_PCM_32BIT_BIG_ENDIAN}.
*/
// The constant value must not be changed, because it's also set by the framework MediaParser API.
public static final String KEY_PCM_ENCODING_EXTENDED = "exo-pcm-encoding-int";
@ -445,6 +446,8 @@ public final class MediaFormatUtil {
break;
case Format.NO_VALUE:
case C.ENCODING_PCM_16BIT_BIG_ENDIAN:
case C.ENCODING_PCM_24BIT_BIG_ENDIAN:
case C.ENCODING_PCM_32BIT_BIG_ENDIAN:
default:
// No matching value. Do nothing.
return;

View File

@ -1863,7 +1863,9 @@ public final class Util {
|| encoding == C.ENCODING_PCM_16BIT
|| encoding == C.ENCODING_PCM_16BIT_BIG_ENDIAN
|| encoding == C.ENCODING_PCM_24BIT
|| encoding == C.ENCODING_PCM_24BIT_BIG_ENDIAN
|| encoding == C.ENCODING_PCM_32BIT
|| encoding == C.ENCODING_PCM_32BIT_BIG_ENDIAN
|| encoding == C.ENCODING_PCM_FLOAT;
}
@ -1876,7 +1878,9 @@ public final class Util {
@UnstableApi
public static boolean isEncodingHighResolutionPcm(@C.PcmEncoding int encoding) {
return encoding == C.ENCODING_PCM_24BIT
|| encoding == C.ENCODING_PCM_24BIT_BIG_ENDIAN
|| encoding == C.ENCODING_PCM_32BIT
|| encoding == C.ENCODING_PCM_32BIT_BIG_ENDIAN
|| encoding == C.ENCODING_PCM_FLOAT;
}
@ -1992,8 +1996,10 @@ public final class Util {
case C.ENCODING_PCM_16BIT_BIG_ENDIAN:
return channelCount * 2;
case C.ENCODING_PCM_24BIT:
case C.ENCODING_PCM_24BIT_BIG_ENDIAN:
return channelCount * 3;
case C.ENCODING_PCM_32BIT:
case C.ENCODING_PCM_32BIT_BIG_ENDIAN:
case C.ENCODING_PCM_FLOAT:
return channelCount * 4;
case C.ENCODING_INVALID:

View File

@ -1726,7 +1726,9 @@ public final class DefaultAudioSink implements AudioSink {
case C.ENCODING_PCM_16BIT:
case C.ENCODING_PCM_16BIT_BIG_ENDIAN:
case C.ENCODING_PCM_24BIT:
case C.ENCODING_PCM_24BIT_BIG_ENDIAN:
case C.ENCODING_PCM_32BIT:
case C.ENCODING_PCM_32BIT_BIG_ENDIAN:
case C.ENCODING_PCM_8BIT:
case C.ENCODING_PCM_FLOAT:
case C.ENCODING_AAC_ER_BSAC:

View File

@ -282,7 +282,9 @@ public class DefaultAudioTrackBufferSizeProvider
case C.ENCODING_PCM_16BIT:
case C.ENCODING_PCM_16BIT_BIG_ENDIAN:
case C.ENCODING_PCM_24BIT:
case C.ENCODING_PCM_24BIT_BIG_ENDIAN:
case C.ENCODING_PCM_32BIT:
case C.ENCODING_PCM_32BIT_BIG_ENDIAN:
case C.ENCODING_PCM_8BIT:
case C.ENCODING_PCM_FLOAT:
case C.ENCODING_AAC_ER_BSAC:

View File

@ -29,7 +29,9 @@ import java.nio.ByteBuffer;
*
* <ul>
* <li>{@link C#ENCODING_PCM_24BIT}
* <li>{@link C#ENCODING_PCM_24BIT_BIG_ENDIAN}
* <li>{@link C#ENCODING_PCM_32BIT}
* <li>{@link C#ENCODING_PCM_32BIT_BIG_ENDIAN}
* <li>{@link C#ENCODING_PCM_FLOAT} ({@link #isActive()} will return {@code false})
* </ul>
*/
@ -70,6 +72,16 @@ import java.nio.ByteBuffer;
writePcm32BitFloat(pcm32BitInteger, buffer);
}
break;
case C.ENCODING_PCM_24BIT_BIG_ENDIAN:
buffer = replaceOutputBuffer((size / 3) * 4);
for (int i = position; i < limit; i += 3) {
int pcm32BitInteger =
((inputBuffer.get(i + 2) & 0xFF) << 8)
| ((inputBuffer.get(i + 1) & 0xFF) << 16)
| ((inputBuffer.get(i) & 0xFF) << 24);
writePcm32BitFloat(pcm32BitInteger, buffer);
}
break;
case C.ENCODING_PCM_32BIT:
buffer = replaceOutputBuffer(size);
for (int i = position; i < limit; i += 4) {
@ -81,6 +93,17 @@ import java.nio.ByteBuffer;
writePcm32BitFloat(pcm32BitInteger, buffer);
}
break;
case C.ENCODING_PCM_32BIT_BIG_ENDIAN:
buffer = replaceOutputBuffer(size);
for (int i = position; i < limit; i += 4) {
int pcm32BitInteger =
(inputBuffer.get(i + 3) & 0xFF)
| ((inputBuffer.get(i + 2) & 0xFF) << 8)
| ((inputBuffer.get(i + 1) & 0xFF) << 16)
| ((inputBuffer.get(i) & 0xFF) << 24);
writePcm32BitFloat(pcm32BitInteger, buffer);
}
break;
case C.ENCODING_PCM_8BIT:
case C.ENCODING_PCM_16BIT:
case C.ENCODING_PCM_16BIT_BIG_ENDIAN:

View File

@ -77,7 +77,10 @@ public final class WavUtil {
return TYPE_PCM;
case C.ENCODING_PCM_FLOAT:
return TYPE_FLOAT;
case C.ENCODING_PCM_16BIT_BIG_ENDIAN: // Not TYPE_PCM, because TYPE_PCM is little endian.
// TYPE_PCM is little endian so big endian formats don't match.
case C.ENCODING_PCM_16BIT_BIG_ENDIAN:
case C.ENCODING_PCM_24BIT_BIG_ENDIAN:
case C.ENCODING_PCM_32BIT_BIG_ENDIAN:
case C.ENCODING_INVALID:
case Format.NO_VALUE:
default:

View File

@ -2204,6 +2204,10 @@ public class MatroskaExtractor implements Extractor {
pcmEncoding = C.ENCODING_PCM_8BIT;
} else if (audioBitDepth == 16) {
pcmEncoding = C.ENCODING_PCM_16BIT_BIG_ENDIAN;
} else if (audioBitDepth == 24) {
pcmEncoding = C.ENCODING_PCM_24BIT_BIG_ENDIAN;
} else if (audioBitDepth == 32) {
pcmEncoding = C.ENCODING_PCM_32BIT_BIG_ENDIAN;
} else {
pcmEncoding = Format.NO_VALUE;
mimeType = MimeTypes.AUDIO_UNKNOWN;

View File

@ -1493,9 +1493,25 @@ import java.util.List;
sampleRate = (int) Math.round(parent.readDouble());
channelCount = parent.readUnsignedIntToInt();
// Skip always7F000000, sampleSize, formatSpecificFlags, constBytesPerAudioPacket,
// constLPCMFramesPerAudioPacket.
parent.skipBytes(20);
parent.skipBytes(4); // always7F000000
int bitsPerSample = parent.readUnsignedIntToInt();
int formatSpecificFlags = parent.readUnsignedIntToInt();
boolean isFloat = (formatSpecificFlags & 1) != 0;
boolean isBigEndian = (formatSpecificFlags & (1 << 1)) != 0;
if (!isFloat) {
if (bitsPerSample == 8) {
pcmEncoding = C.ENCODING_PCM_8BIT;
} else if (bitsPerSample == 16) {
pcmEncoding = isBigEndian ? C.ENCODING_PCM_16BIT_BIG_ENDIAN : C.ENCODING_PCM_16BIT;
} else if (bitsPerSample == 24) {
pcmEncoding = isBigEndian ? C.ENCODING_PCM_24BIT_BIG_ENDIAN : C.ENCODING_PCM_24BIT;
} else if (bitsPerSample == 32) {
pcmEncoding = isBigEndian ? C.ENCODING_PCM_32BIT_BIG_ENDIAN : C.ENCODING_PCM_32BIT;
}
} else if (bitsPerSample == 32) {
pcmEncoding = C.ENCODING_PCM_FLOAT;
}
parent.skipBytes(8); // constBytesPerAudioPacket, constLPCMFramesPerAudioPacket
} else {
// Unsupported version.
return;
@ -1541,12 +1557,17 @@ import java.util.List;
mimeType = MimeTypes.AUDIO_AMR_NB;
} else if (atomType == Atom.TYPE_sawb) {
mimeType = MimeTypes.AUDIO_AMR_WB;
} else if (atomType == Atom.TYPE_lpcm || atomType == Atom.TYPE_sowt) {
} else if (atomType == Atom.TYPE_sowt) {
mimeType = MimeTypes.AUDIO_RAW;
pcmEncoding = C.ENCODING_PCM_16BIT;
} else if (atomType == Atom.TYPE_twos) {
mimeType = MimeTypes.AUDIO_RAW;
pcmEncoding = C.ENCODING_PCM_16BIT_BIG_ENDIAN;
} else if (atomType == Atom.TYPE_lpcm) {
mimeType = MimeTypes.AUDIO_RAW;
if (pcmEncoding == Format.NO_VALUE) {
pcmEncoding = C.ENCODING_PCM_16BIT;
}
} else if (atomType == Atom.TYPE__mp2 || atomType == Atom.TYPE__mp3) {
mimeType = MimeTypes.AUDIO_MPEG;
} else if (atomType == Atom.TYPE_mha1) {