diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/GaplessInfoHolder.java b/libraries/extractor/src/main/java/androidx/media3/extractor/GaplessInfoHolder.java index 96933c3ba7..d22459e316 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/GaplessInfoHolder.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/GaplessInfoHolder.java @@ -52,23 +52,6 @@ public final class GaplessInfoHolder { encoderPadding = Format.NO_VALUE; } - /** - * Populates the holder with data from an MP3 Xing header, if valid and non-zero. - * - * @param value The 24-bit value to decode. - * @return Whether the holder was populated. - */ - public boolean setFromXingHeaderValue(int value) { - int encoderDelay = value >> 12; - int encoderPadding = value & 0x0FFF; - if (encoderDelay > 0 || encoderPadding > 0) { - this.encoderDelay = encoderDelay; - this.encoderPadding = encoderPadding; - return true; - } - return false; - } - /** * Populates the holder with data parsed from ID3 {@link Metadata}. * diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/mp3/Mp3Extractor.java b/libraries/extractor/src/main/java/androidx/media3/extractor/mp3/Mp3Extractor.java index 180cf026ab..395ff415b1 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/mp3/Mp3Extractor.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/mp3/Mp3Extractor.java @@ -517,16 +517,14 @@ public final class Mp3Extractor implements Extractor { switch (seekHeader) { case SEEK_HEADER_XING: case SEEK_HEADER_INFO: - seeker = - XingSeeker.create(input.getLength(), input.getPosition(), synchronizedHeader, frame); - if (seeker != null && !gaplessInfoHolder.hasGaplessInfo()) { - // If there is a Xing header, read gapless playback metadata at a fixed offset. - input.resetPeekPosition(); - input.advancePeekPosition(xingBase + 141); - input.peekFully(scratch.getData(), 0, 3); - scratch.setPosition(0); - gaplessInfoHolder.setFromXingHeaderValue(scratch.readUnsignedInt24()); + XingFrame xingFrame = XingFrame.parse(synchronizedHeader, frame); + if (!gaplessInfoHolder.hasGaplessInfo() + && xingFrame.encoderDelay != C.LENGTH_UNSET + && xingFrame.encoderPadding != C.LENGTH_UNSET) { + gaplessInfoHolder.encoderDelay = xingFrame.encoderDelay; + gaplessInfoHolder.encoderPadding = xingFrame.encoderPadding; } + seeker = XingSeeker.create(input.getLength(), xingFrame, input.getPosition()); input.skipFully(synchronizedHeader.frameSize); if (seeker != null && !seeker.isSeekable() && seekHeader == SEEK_HEADER_INFO) { // Fall back to constant bitrate seeking for Info headers missing a table of contents. diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/mp3/XingFrame.java b/libraries/extractor/src/main/java/androidx/media3/extractor/mp3/XingFrame.java new file mode 100644 index 0000000000..456526c374 --- /dev/null +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/mp3/XingFrame.java @@ -0,0 +1,123 @@ +/* + * Copyright 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package androidx.media3.extractor.mp3; + +import androidx.annotation.Nullable; +import androidx.media3.common.C; +import androidx.media3.common.util.ParsableByteArray; +import androidx.media3.extractor.MpegAudioUtil; + +/** Representation of a LAME Xing or Info frame. */ +/* package */ final class XingFrame { + + private static final String TAG = "XingHeader"; + + /** The header of the Xing or Info frame. */ + public final MpegAudioUtil.Header header; + + /** The frame count, or {@link C#LENGTH_UNSET} if not present in the header. */ + public final long frameCount; + + /** + * Data size, including the XING frame, or {@link C#LENGTH_UNSET} if not present in the header. + */ + public final long dataSize; + + /** + * The number of samples to skip at the start of the stream, or {@link C#LENGTH_UNSET} if not + * present in the header. + */ + public final int encoderDelay; + + /** + * The number of samples to skip at the end of the stream, or {@link C#LENGTH_UNSET} if not + * present in the header. + */ + public final int encoderPadding; + + /** + * Entries are in the range [0, 255], but are stored as long integers for convenience. Null if the + * table of contents was missing from the header, in which case seeking is not be supported. + */ + @Nullable public final long[] tableOfContents; + + private XingFrame( + MpegAudioUtil.Header header, + long frameCount, + long dataSize, + @Nullable long[] tableOfContents, + int encoderDelay, + int encoderPadding) { + this.header = header; + this.frameCount = frameCount; + this.dataSize = dataSize; + this.tableOfContents = tableOfContents; + this.encoderDelay = encoderDelay; + this.encoderPadding = encoderPadding; + } + + /** + * Returns a {@link XingFrame} containing the info parsed from a LAME Xing (VBR) or Info (CBR) + * frame. + * + *

The {@link ParsableByteArray#getPosition()} in {@code frame} when this method exits is + * undefined. + * + * @param mpegAudioHeader The MPEG audio header associated with the frame. + * @param frame The data in this audio frame, with its position set to immediately after the + * 'Xing' or 'Info' tag. + */ + public static XingFrame parse(MpegAudioUtil.Header mpegAudioHeader, ParsableByteArray frame) { + int samplesPerFrame = mpegAudioHeader.samplesPerFrame; + int sampleRate = mpegAudioHeader.sampleRate; + + int flags = frame.readInt(); + int frameCount = (flags & 0x01) != 0 ? frame.readUnsignedIntToInt() : C.LENGTH_UNSET; + long dataSize = (flags & 0x02) != 0 ? frame.readUnsignedInt() : C.LENGTH_UNSET; + + long[] tableOfContents; + if ((flags & 0x04) == 0x04) { + tableOfContents = new long[100]; + for (int i = 0; i < 100; i++) { + tableOfContents[i] = frame.readUnsignedByte(); + } + } else { + tableOfContents = null; + } + + if ((flags & 0x8) != 0) { + frame.skipBytes(4); // Quality indicator + } + + int encoderDelay; + int encoderPadding; + // Skip: version string (9), revision & VBR method (1), lowpass filter (1), replay gain (8), + // encoding flags & ATH type (1), bitrate (1). + int bytesToSkipBeforeEncoderDelayAndPadding = 9 + 1 + 1 + 8 + 1 + 1; + if (frame.bytesLeft() >= bytesToSkipBeforeEncoderDelayAndPadding + 3) { + frame.skipBytes(bytesToSkipBeforeEncoderDelayAndPadding); + int encoderDelayAndPadding = frame.readUnsignedInt24(); + encoderDelay = (encoderDelayAndPadding & 0xFFF000) >> 12; + encoderPadding = (encoderDelayAndPadding & 0xFFF); + } else { + encoderDelay = C.LENGTH_UNSET; + encoderPadding = C.LENGTH_UNSET; + } + + return new XingFrame( + mpegAudioHeader, frameCount, dataSize, tableOfContents, encoderDelay, encoderPadding); + } +} diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/mp3/XingSeeker.java b/libraries/extractor/src/main/java/androidx/media3/extractor/mp3/XingSeeker.java index db8d322676..515a11335e 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/mp3/XingSeeker.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/mp3/XingSeeker.java @@ -19,9 +19,7 @@ import androidx.annotation.Nullable; import androidx.media3.common.C; import androidx.media3.common.util.Assertions; import androidx.media3.common.util.Log; -import androidx.media3.common.util.ParsableByteArray; import androidx.media3.common.util.Util; -import androidx.media3.extractor.MpegAudioUtil; import androidx.media3.extractor.SeekPoint; /** MP3 seeker that uses metadata from a Xing header. */ @@ -43,43 +41,33 @@ import androidx.media3.extractor.SeekPoint; * information is not present. */ @Nullable - public static XingSeeker create( - long inputLength, - long position, - MpegAudioUtil.Header mpegAudioHeader, - ParsableByteArray frame) { - int samplesPerFrame = mpegAudioHeader.samplesPerFrame; - int sampleRate = mpegAudioHeader.sampleRate; - - int flags = frame.readInt(); - int frameCount; - if ((flags & 0x01) != 0x01 || (frameCount = frame.readUnsignedIntToInt()) == 0) { + public static XingSeeker create(long inputLength, XingFrame xingFrame, long position) { + if (xingFrame.frameCount == C.LENGTH_UNSET && xingFrame.frameCount == 0) { // If the frame count is missing/invalid, the header can't be used to determine the duration. return null; } + // TODO: b/319235116 - Handle encoder delay and padding when calculating duration. // Audio requires both a start and end PCM sample, so subtract one from the sample count before // calculating the duration. - long durationUs = Util.sampleCountToDurationUs((frameCount * samplesPerFrame) - 1, sampleRate); - if ((flags & 0x06) != 0x06) { + long durationUs = + Util.sampleCountToDurationUs( + (xingFrame.frameCount * xingFrame.header.samplesPerFrame) - 1, + xingFrame.header.sampleRate); + if (xingFrame.dataSize == C.LENGTH_UNSET || xingFrame.tableOfContents == null) { // If the size in bytes or table of contents is missing, the stream is not seekable. - return new XingSeeker(position, mpegAudioHeader.frameSize, durationUs); + return new XingSeeker(position, xingFrame.header.frameSize, durationUs); } - long dataSize = frame.readUnsignedInt(); - long[] tableOfContents = new long[100]; - for (int i = 0; i < 100; i++) { - tableOfContents[i] = frame.readUnsignedByte(); - } - - // TODO: Handle encoder delay and padding in 3 bytes offset by xingBase + 213 bytes: - // delay = (frame.readUnsignedByte() << 4) + (frame.readUnsignedByte() >> 4); - // padding = ((frame.readUnsignedByte() & 0x0F) << 8) + frame.readUnsignedByte(); - - if (inputLength != C.LENGTH_UNSET && inputLength != position + dataSize) { - Log.w(TAG, "XING data size mismatch: " + inputLength + ", " + (position + dataSize)); + if (inputLength != C.LENGTH_UNSET && inputLength != position + xingFrame.dataSize) { + Log.w( + TAG, "XING data size mismatch: " + inputLength + ", " + (position + xingFrame.dataSize)); } return new XingSeeker( - position, mpegAudioHeader.frameSize, durationUs, dataSize, tableOfContents); + position, + xingFrame.header.frameSize, + durationUs, + xingFrame.dataSize, + xingFrame.tableOfContents); } private final long dataStartPosition; diff --git a/libraries/extractor/src/test/java/androidx/media3/extractor/mp3/XingSeekerTest.java b/libraries/extractor/src/test/java/androidx/media3/extractor/mp3/XingSeekerTest.java index bd88b6cfcb..9593084f70 100644 --- a/libraries/extractor/src/test/java/androidx/media3/extractor/mp3/XingSeekerTest.java +++ b/libraries/extractor/src/test/java/androidx/media3/extractor/mp3/XingSeekerTest.java @@ -64,15 +64,13 @@ public final class XingSeekerTest { seeker = XingSeeker.create( C.LENGTH_UNSET, - XING_FRAME_POSITION, - xingFrameHeader, - new ParsableByteArray(XING_FRAME_PAYLOAD)); + XingFrame.parse(xingFrameHeader, new ParsableByteArray(XING_FRAME_PAYLOAD)), + XING_FRAME_POSITION); seekerWithInputLength = XingSeeker.create( - STREAM_LENGTH, - XING_FRAME_POSITION, - xingFrameHeader, - new ParsableByteArray(XING_FRAME_PAYLOAD)); + C.LENGTH_UNSET, + XingFrame.parse(xingFrameHeader, new ParsableByteArray(XING_FRAME_PAYLOAD)), + XING_FRAME_POSITION); xingFrameSize = xingFrameHeader.frameSize; }