From 6f4ef517271c63505e527311ec4a3e76abeaafcc Mon Sep 17 00:00:00 2001 From: andrewlewis Date: Mon, 10 Jul 2023 16:21:36 +0100 Subject: [PATCH] Add support for big endian 24/32-bit PCM Also parse the PCM encoding for lpcm in MP4, and update `MatroskaExtractor` similarly. Tested manually in the demo app using an MP4 with 24-bit big endian audio. PiperOrigin-RevId: 546878505 --- RELEASENOTES.md | 2 ++ .../main/java/androidx/media3/common/C.java | 25 ++++++++++++---- .../audio/ToInt16PcmAudioProcessor.java | 20 +++++++++++++ .../media3/common/util/MediaFormatUtil.java | 5 +++- .../androidx/media3/common/util/Util.java | 6 ++++ .../exoplayer/audio/DefaultAudioSink.java | 2 ++ .../DefaultAudioTrackBufferSizeProvider.java | 2 ++ .../audio/ToFloatPcmAudioProcessor.java | 23 +++++++++++++++ .../androidx/media3/extractor/WavUtil.java | 5 +++- .../extractor/mkv/MatroskaExtractor.java | 4 +++ .../media3/extractor/mp4/AtomParsers.java | 29 ++++++++++++++++--- 11 files changed, 111 insertions(+), 12 deletions(-) diff --git a/RELEASENOTES.md b/RELEASENOTES.md index db545efa7e..bc04f4302d 100644 --- a/RELEASENOTES.md +++ b/RELEASENOTES.md @@ -50,6 +50,8 @@ implementation details of an `Extractor` you must first call `Extractor.getUnderlyingInstance`. * Audio: + * Add support for 24/32-bit big endian PCM in MP4 and Matroska, and parse + PCM encoding for `lpcm` in MP4. * Audio Offload: * Add `AudioSink.getFormatOffloadSupport(Format)` that retrieves level of offload support the sink can provide for the format through a diff --git a/libraries/common/src/main/java/androidx/media3/common/C.java b/libraries/common/src/main/java/androidx/media3/common/C.java index 87d8cd8064..661b485746 100644 --- a/libraries/common/src/main/java/androidx/media3/common/C.java +++ b/libraries/common/src/main/java/androidx/media3/common/C.java @@ -166,10 +166,12 @@ public final class C { /** * Represents an audio encoding, or an invalid or unset value. One of {@link Format#NO_VALUE}, * {@link #ENCODING_INVALID}, {@link #ENCODING_PCM_8BIT}, {@link #ENCODING_PCM_16BIT}, {@link - * #ENCODING_PCM_16BIT_BIG_ENDIAN}, {@link #ENCODING_PCM_24BIT}, {@link #ENCODING_PCM_32BIT}, - * {@link #ENCODING_PCM_FLOAT}, {@link #ENCODING_MP3}, {@link #ENCODING_AC3}, {@link - * #ENCODING_E_AC3}, {@link #ENCODING_E_AC3_JOC}, {@link #ENCODING_AC4}, {@link #ENCODING_DTS}, - * {@link #ENCODING_DTS_HD}, {@link #ENCODING_DOLBY_TRUEHD} or {@link #ENCODING_OPUS}. + * #ENCODING_PCM_16BIT_BIG_ENDIAN}, {@link #ENCODING_PCM_24BIT}, {@link + * #ENCODING_PCM_24BIT_BIG_ENDIAN}, {@link #ENCODING_PCM_32BIT}, {@link + * #ENCODING_PCM_32BIT_BIG_ENDIAN}, {@link #ENCODING_PCM_FLOAT}, {@link #ENCODING_MP3}, {@link + * #ENCODING_AC3}, {@link #ENCODING_E_AC3}, {@link #ENCODING_E_AC3_JOC}, {@link #ENCODING_AC4}, + * {@link #ENCODING_DTS}, {@link #ENCODING_DTS_HD}, {@link #ENCODING_DOLBY_TRUEHD} or {@link + * #ENCODING_OPUS}. */ @UnstableApi @Documented @@ -182,7 +184,9 @@ public final class C { ENCODING_PCM_16BIT, ENCODING_PCM_16BIT_BIG_ENDIAN, ENCODING_PCM_24BIT, + ENCODING_PCM_24BIT_BIG_ENDIAN, ENCODING_PCM_32BIT, + ENCODING_PCM_32BIT_BIG_ENDIAN, ENCODING_PCM_FLOAT, ENCODING_MP3, ENCODING_AAC_LC, @@ -206,8 +210,9 @@ public final class C { /** * Represents a PCM audio encoding, or an invalid or unset value. One of {@link Format#NO_VALUE}, * {@link #ENCODING_INVALID}, {@link #ENCODING_PCM_8BIT}, {@link #ENCODING_PCM_16BIT}, {@link - * #ENCODING_PCM_16BIT_BIG_ENDIAN}, {@link #ENCODING_PCM_24BIT}, {@link #ENCODING_PCM_32BIT}, - * {@link #ENCODING_PCM_FLOAT}. + * #ENCODING_PCM_16BIT_BIG_ENDIAN}, {@link #ENCODING_PCM_24BIT}, {@link + * #ENCODING_PCM_24BIT_BIG_ENDIAN}, {@link #ENCODING_PCM_32BIT}, {@link + * #ENCODING_PCM_32BIT_BIG_ENDIAN}, {@link #ENCODING_PCM_FLOAT}. */ @UnstableApi @Documented @@ -220,7 +225,9 @@ public final class C { ENCODING_PCM_16BIT, ENCODING_PCM_16BIT_BIG_ENDIAN, ENCODING_PCM_24BIT, + ENCODING_PCM_24BIT_BIG_ENDIAN, ENCODING_PCM_32BIT, + ENCODING_PCM_32BIT_BIG_ENDIAN, ENCODING_PCM_FLOAT }) public @interface PcmEncoding {} @@ -240,9 +247,15 @@ public final class C { /** PCM encoding with 24 bits per sample. */ @UnstableApi public static final int ENCODING_PCM_24BIT = 0x20000000; + /** Like {@link #ENCODING_PCM_24BIT} but with the bytes in big endian order. */ + @UnstableApi public static final int ENCODING_PCM_24BIT_BIG_ENDIAN = 0x50000000; + /** PCM encoding with 32 bits per sample. */ @UnstableApi public static final int ENCODING_PCM_32BIT = 0x30000000; + /** Like {@link #ENCODING_PCM_32BIT} but with the bytes in big endian order. */ + @UnstableApi public static final int ENCODING_PCM_32BIT_BIG_ENDIAN = 0x60000000; + /** See {@link AudioFormat#ENCODING_PCM_FLOAT}. */ @UnstableApi public static final int ENCODING_PCM_FLOAT = AudioFormat.ENCODING_PCM_FLOAT; diff --git a/libraries/common/src/main/java/androidx/media3/common/audio/ToInt16PcmAudioProcessor.java b/libraries/common/src/main/java/androidx/media3/common/audio/ToInt16PcmAudioProcessor.java index 94d594141c..ccd72b1e4c 100644 --- a/libraries/common/src/main/java/androidx/media3/common/audio/ToInt16PcmAudioProcessor.java +++ b/libraries/common/src/main/java/androidx/media3/common/audio/ToInt16PcmAudioProcessor.java @@ -31,7 +31,9 @@ import java.nio.ByteBuffer; *
  • {@link C#ENCODING_PCM_16BIT} ({@link #isActive()} will return {@code false}) *
  • {@link C#ENCODING_PCM_16BIT_BIG_ENDIAN} *
  • {@link C#ENCODING_PCM_24BIT} + *
  • {@link C#ENCODING_PCM_24BIT_BIG_ENDIAN} *
  • {@link C#ENCODING_PCM_32BIT} + *
  • {@link C#ENCODING_PCM_32BIT_BIG_ENDIAN} *
  • {@link C#ENCODING_PCM_FLOAT} * */ @@ -47,7 +49,9 @@ public final class ToInt16PcmAudioProcessor extends BaseAudioProcessor { && encoding != C.ENCODING_PCM_16BIT && encoding != C.ENCODING_PCM_16BIT_BIG_ENDIAN && encoding != C.ENCODING_PCM_24BIT + && encoding != C.ENCODING_PCM_24BIT_BIG_ENDIAN && encoding != C.ENCODING_PCM_32BIT + && encoding != C.ENCODING_PCM_32BIT_BIG_ENDIAN && encoding != C.ENCODING_PCM_FLOAT) { throw new UnhandledAudioFormatException(inputAudioFormat); } @@ -72,9 +76,11 @@ public final class ToInt16PcmAudioProcessor extends BaseAudioProcessor { resampledSize = size; break; case C.ENCODING_PCM_24BIT: + case C.ENCODING_PCM_24BIT_BIG_ENDIAN: resampledSize = (size / 3) * 2; break; case C.ENCODING_PCM_32BIT: + case C.ENCODING_PCM_32BIT_BIG_ENDIAN: case C.ENCODING_PCM_FLOAT: resampledSize = size / 2; break; @@ -109,6 +115,13 @@ public final class ToInt16PcmAudioProcessor extends BaseAudioProcessor { buffer.put(inputBuffer.get(i + 2)); } break; + case C.ENCODING_PCM_24BIT_BIG_ENDIAN: + // 24 BE -> 16 bit resampling. Drop the least significant byte. + for (int i = position; i < limit; i += 3) { + buffer.put(inputBuffer.get(i + 1)); + buffer.put(inputBuffer.get(i)); + } + break; case C.ENCODING_PCM_32BIT: // 32 -> 16 bit resampling. Drop the two least significant bytes. for (int i = position; i < limit; i += 4) { @@ -116,6 +129,13 @@ public final class ToInt16PcmAudioProcessor extends BaseAudioProcessor { buffer.put(inputBuffer.get(i + 3)); } break; + case C.ENCODING_PCM_32BIT_BIG_ENDIAN: + // 32 BE -> 16 bit resampling. Drop the two least significant bytes. + for (int i = position; i < limit; i += 4) { + buffer.put(inputBuffer.get(i + 1)); + buffer.put(inputBuffer.get(i)); + } + break; case C.ENCODING_PCM_FLOAT: // 32 bit floating point -> 16 bit resampling. Floating point values are in the range // [-1.0, 1.0], so need to be scaled by Short.MAX_VALUE. diff --git a/libraries/common/src/main/java/androidx/media3/common/util/MediaFormatUtil.java b/libraries/common/src/main/java/androidx/media3/common/util/MediaFormatUtil.java index e108bd4e25..1824b33f2d 100644 --- a/libraries/common/src/main/java/androidx/media3/common/util/MediaFormatUtil.java +++ b/libraries/common/src/main/java/androidx/media3/common/util/MediaFormatUtil.java @@ -46,7 +46,8 @@ public final class MediaFormatUtil { * *

    Equivalent to {@link MediaFormat#KEY_PCM_ENCODING}, except it allows additional values * defined by {@link C.PcmEncoding}, including {@link C#ENCODING_PCM_16BIT_BIG_ENDIAN}, {@link - * C#ENCODING_PCM_24BIT}, and {@link C#ENCODING_PCM_32BIT}. + * C#ENCODING_PCM_24BIT}, {@link C#ENCODING_PCM_24BIT_BIG_ENDIAN}, {@link C#ENCODING_PCM_32BIT} + * and {@link C#ENCODING_PCM_32BIT_BIG_ENDIAN}. */ // The constant value must not be changed, because it's also set by the framework MediaParser API. public static final String KEY_PCM_ENCODING_EXTENDED = "exo-pcm-encoding-int"; @@ -445,6 +446,8 @@ public final class MediaFormatUtil { break; case Format.NO_VALUE: case C.ENCODING_PCM_16BIT_BIG_ENDIAN: + case C.ENCODING_PCM_24BIT_BIG_ENDIAN: + case C.ENCODING_PCM_32BIT_BIG_ENDIAN: default: // No matching value. Do nothing. return; diff --git a/libraries/common/src/main/java/androidx/media3/common/util/Util.java b/libraries/common/src/main/java/androidx/media3/common/util/Util.java index e66d409478..6d53487af0 100644 --- a/libraries/common/src/main/java/androidx/media3/common/util/Util.java +++ b/libraries/common/src/main/java/androidx/media3/common/util/Util.java @@ -1863,7 +1863,9 @@ public final class Util { || encoding == C.ENCODING_PCM_16BIT || encoding == C.ENCODING_PCM_16BIT_BIG_ENDIAN || encoding == C.ENCODING_PCM_24BIT + || encoding == C.ENCODING_PCM_24BIT_BIG_ENDIAN || encoding == C.ENCODING_PCM_32BIT + || encoding == C.ENCODING_PCM_32BIT_BIG_ENDIAN || encoding == C.ENCODING_PCM_FLOAT; } @@ -1876,7 +1878,9 @@ public final class Util { @UnstableApi public static boolean isEncodingHighResolutionPcm(@C.PcmEncoding int encoding) { return encoding == C.ENCODING_PCM_24BIT + || encoding == C.ENCODING_PCM_24BIT_BIG_ENDIAN || encoding == C.ENCODING_PCM_32BIT + || encoding == C.ENCODING_PCM_32BIT_BIG_ENDIAN || encoding == C.ENCODING_PCM_FLOAT; } @@ -1992,8 +1996,10 @@ public final class Util { case C.ENCODING_PCM_16BIT_BIG_ENDIAN: return channelCount * 2; case C.ENCODING_PCM_24BIT: + case C.ENCODING_PCM_24BIT_BIG_ENDIAN: return channelCount * 3; case C.ENCODING_PCM_32BIT: + case C.ENCODING_PCM_32BIT_BIG_ENDIAN: case C.ENCODING_PCM_FLOAT: return channelCount * 4; case C.ENCODING_INVALID: diff --git a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/DefaultAudioSink.java b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/DefaultAudioSink.java index dddcacbc02..73fa7805a8 100644 --- a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/DefaultAudioSink.java +++ b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/DefaultAudioSink.java @@ -1726,7 +1726,9 @@ public final class DefaultAudioSink implements AudioSink { case C.ENCODING_PCM_16BIT: case C.ENCODING_PCM_16BIT_BIG_ENDIAN: case C.ENCODING_PCM_24BIT: + case C.ENCODING_PCM_24BIT_BIG_ENDIAN: case C.ENCODING_PCM_32BIT: + case C.ENCODING_PCM_32BIT_BIG_ENDIAN: case C.ENCODING_PCM_8BIT: case C.ENCODING_PCM_FLOAT: case C.ENCODING_AAC_ER_BSAC: diff --git a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/DefaultAudioTrackBufferSizeProvider.java b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/DefaultAudioTrackBufferSizeProvider.java index 9819ab0380..5e178f0523 100644 --- a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/DefaultAudioTrackBufferSizeProvider.java +++ b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/DefaultAudioTrackBufferSizeProvider.java @@ -282,7 +282,9 @@ public class DefaultAudioTrackBufferSizeProvider case C.ENCODING_PCM_16BIT: case C.ENCODING_PCM_16BIT_BIG_ENDIAN: case C.ENCODING_PCM_24BIT: + case C.ENCODING_PCM_24BIT_BIG_ENDIAN: case C.ENCODING_PCM_32BIT: + case C.ENCODING_PCM_32BIT_BIG_ENDIAN: case C.ENCODING_PCM_8BIT: case C.ENCODING_PCM_FLOAT: case C.ENCODING_AAC_ER_BSAC: diff --git a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/ToFloatPcmAudioProcessor.java b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/ToFloatPcmAudioProcessor.java index 7531daaaad..9faaae4df1 100644 --- a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/ToFloatPcmAudioProcessor.java +++ b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/ToFloatPcmAudioProcessor.java @@ -29,7 +29,9 @@ import java.nio.ByteBuffer; * *

    */ @@ -70,6 +72,16 @@ import java.nio.ByteBuffer; writePcm32BitFloat(pcm32BitInteger, buffer); } break; + case C.ENCODING_PCM_24BIT_BIG_ENDIAN: + buffer = replaceOutputBuffer((size / 3) * 4); + for (int i = position; i < limit; i += 3) { + int pcm32BitInteger = + ((inputBuffer.get(i + 2) & 0xFF) << 8) + | ((inputBuffer.get(i + 1) & 0xFF) << 16) + | ((inputBuffer.get(i) & 0xFF) << 24); + writePcm32BitFloat(pcm32BitInteger, buffer); + } + break; case C.ENCODING_PCM_32BIT: buffer = replaceOutputBuffer(size); for (int i = position; i < limit; i += 4) { @@ -81,6 +93,17 @@ import java.nio.ByteBuffer; writePcm32BitFloat(pcm32BitInteger, buffer); } break; + case C.ENCODING_PCM_32BIT_BIG_ENDIAN: + buffer = replaceOutputBuffer(size); + for (int i = position; i < limit; i += 4) { + int pcm32BitInteger = + (inputBuffer.get(i + 3) & 0xFF) + | ((inputBuffer.get(i + 2) & 0xFF) << 8) + | ((inputBuffer.get(i + 1) & 0xFF) << 16) + | ((inputBuffer.get(i) & 0xFF) << 24); + writePcm32BitFloat(pcm32BitInteger, buffer); + } + break; case C.ENCODING_PCM_8BIT: case C.ENCODING_PCM_16BIT: case C.ENCODING_PCM_16BIT_BIG_ENDIAN: diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/WavUtil.java b/libraries/extractor/src/main/java/androidx/media3/extractor/WavUtil.java index 2b7eeaeae9..a1cbe72185 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/WavUtil.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/WavUtil.java @@ -77,7 +77,10 @@ public final class WavUtil { return TYPE_PCM; case C.ENCODING_PCM_FLOAT: return TYPE_FLOAT; - case C.ENCODING_PCM_16BIT_BIG_ENDIAN: // Not TYPE_PCM, because TYPE_PCM is little endian. + // TYPE_PCM is little endian so big endian formats don't match. + case C.ENCODING_PCM_16BIT_BIG_ENDIAN: + case C.ENCODING_PCM_24BIT_BIG_ENDIAN: + case C.ENCODING_PCM_32BIT_BIG_ENDIAN: case C.ENCODING_INVALID: case Format.NO_VALUE: default: diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/mkv/MatroskaExtractor.java b/libraries/extractor/src/main/java/androidx/media3/extractor/mkv/MatroskaExtractor.java index 181d97a298..03f897629b 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/mkv/MatroskaExtractor.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/mkv/MatroskaExtractor.java @@ -2204,6 +2204,10 @@ public class MatroskaExtractor implements Extractor { pcmEncoding = C.ENCODING_PCM_8BIT; } else if (audioBitDepth == 16) { pcmEncoding = C.ENCODING_PCM_16BIT_BIG_ENDIAN; + } else if (audioBitDepth == 24) { + pcmEncoding = C.ENCODING_PCM_24BIT_BIG_ENDIAN; + } else if (audioBitDepth == 32) { + pcmEncoding = C.ENCODING_PCM_32BIT_BIG_ENDIAN; } else { pcmEncoding = Format.NO_VALUE; mimeType = MimeTypes.AUDIO_UNKNOWN; diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/mp4/AtomParsers.java b/libraries/extractor/src/main/java/androidx/media3/extractor/mp4/AtomParsers.java index b1696cbbad..21de332143 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/mp4/AtomParsers.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/mp4/AtomParsers.java @@ -1493,9 +1493,25 @@ import java.util.List; sampleRate = (int) Math.round(parent.readDouble()); channelCount = parent.readUnsignedIntToInt(); - // Skip always7F000000, sampleSize, formatSpecificFlags, constBytesPerAudioPacket, - // constLPCMFramesPerAudioPacket. - parent.skipBytes(20); + parent.skipBytes(4); // always7F000000 + int bitsPerSample = parent.readUnsignedIntToInt(); + int formatSpecificFlags = parent.readUnsignedIntToInt(); + boolean isFloat = (formatSpecificFlags & 1) != 0; + boolean isBigEndian = (formatSpecificFlags & (1 << 1)) != 0; + if (!isFloat) { + if (bitsPerSample == 8) { + pcmEncoding = C.ENCODING_PCM_8BIT; + } else if (bitsPerSample == 16) { + pcmEncoding = isBigEndian ? C.ENCODING_PCM_16BIT_BIG_ENDIAN : C.ENCODING_PCM_16BIT; + } else if (bitsPerSample == 24) { + pcmEncoding = isBigEndian ? C.ENCODING_PCM_24BIT_BIG_ENDIAN : C.ENCODING_PCM_24BIT; + } else if (bitsPerSample == 32) { + pcmEncoding = isBigEndian ? C.ENCODING_PCM_32BIT_BIG_ENDIAN : C.ENCODING_PCM_32BIT; + } + } else if (bitsPerSample == 32) { + pcmEncoding = C.ENCODING_PCM_FLOAT; + } + parent.skipBytes(8); // constBytesPerAudioPacket, constLPCMFramesPerAudioPacket } else { // Unsupported version. return; @@ -1541,12 +1557,17 @@ import java.util.List; mimeType = MimeTypes.AUDIO_AMR_NB; } else if (atomType == Atom.TYPE_sawb) { mimeType = MimeTypes.AUDIO_AMR_WB; - } else if (atomType == Atom.TYPE_lpcm || atomType == Atom.TYPE_sowt) { + } else if (atomType == Atom.TYPE_sowt) { mimeType = MimeTypes.AUDIO_RAW; pcmEncoding = C.ENCODING_PCM_16BIT; } else if (atomType == Atom.TYPE_twos) { mimeType = MimeTypes.AUDIO_RAW; pcmEncoding = C.ENCODING_PCM_16BIT_BIG_ENDIAN; + } else if (atomType == Atom.TYPE_lpcm) { + mimeType = MimeTypes.AUDIO_RAW; + if (pcmEncoding == Format.NO_VALUE) { + pcmEncoding = C.ENCODING_PCM_16BIT; + } } else if (atomType == Atom.TYPE__mp2 || atomType == Atom.TYPE__mp3) { mimeType = MimeTypes.AUDIO_MPEG; } else if (atomType == Atom.TYPE_mha1) {