MP3: Extract Xing/Info parsing code from XingSeeker

This means in a later change we can still use some of the info for CBR
files, even if we want to ignore the imprecise table of contents and
seek based on a constant bitrate assumption instead.

PiperOrigin-RevId: 597193997
This commit is contained in:
ibaker 2024-01-10 02:57:48 -08:00 committed by Copybara-Service
parent 460501fcd1
commit 4fde35c9cc
5 changed files with 152 additions and 62 deletions

View File

@ -52,23 +52,6 @@ public final class GaplessInfoHolder {
encoderPadding = Format.NO_VALUE; encoderPadding = Format.NO_VALUE;
} }
/**
* Populates the holder with data from an MP3 Xing header, if valid and non-zero.
*
* @param value The 24-bit value to decode.
* @return Whether the holder was populated.
*/
public boolean setFromXingHeaderValue(int value) {
int encoderDelay = value >> 12;
int encoderPadding = value & 0x0FFF;
if (encoderDelay > 0 || encoderPadding > 0) {
this.encoderDelay = encoderDelay;
this.encoderPadding = encoderPadding;
return true;
}
return false;
}
/** /**
* Populates the holder with data parsed from ID3 {@link Metadata}. * Populates the holder with data parsed from ID3 {@link Metadata}.
* *

View File

@ -517,16 +517,14 @@ public final class Mp3Extractor implements Extractor {
switch (seekHeader) { switch (seekHeader) {
case SEEK_HEADER_XING: case SEEK_HEADER_XING:
case SEEK_HEADER_INFO: case SEEK_HEADER_INFO:
seeker = XingFrame xingFrame = XingFrame.parse(synchronizedHeader, frame);
XingSeeker.create(input.getLength(), input.getPosition(), synchronizedHeader, frame); if (!gaplessInfoHolder.hasGaplessInfo()
if (seeker != null && !gaplessInfoHolder.hasGaplessInfo()) { && xingFrame.encoderDelay != C.LENGTH_UNSET
// If there is a Xing header, read gapless playback metadata at a fixed offset. && xingFrame.encoderPadding != C.LENGTH_UNSET) {
input.resetPeekPosition(); gaplessInfoHolder.encoderDelay = xingFrame.encoderDelay;
input.advancePeekPosition(xingBase + 141); gaplessInfoHolder.encoderPadding = xingFrame.encoderPadding;
input.peekFully(scratch.getData(), 0, 3);
scratch.setPosition(0);
gaplessInfoHolder.setFromXingHeaderValue(scratch.readUnsignedInt24());
} }
seeker = XingSeeker.create(input.getLength(), xingFrame, input.getPosition());
input.skipFully(synchronizedHeader.frameSize); input.skipFully(synchronizedHeader.frameSize);
if (seeker != null && !seeker.isSeekable() && seekHeader == SEEK_HEADER_INFO) { if (seeker != null && !seeker.isSeekable() && seekHeader == SEEK_HEADER_INFO) {
// Fall back to constant bitrate seeking for Info headers missing a table of contents. // Fall back to constant bitrate seeking for Info headers missing a table of contents.

View File

@ -0,0 +1,123 @@
/*
* Copyright 2024 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package androidx.media3.extractor.mp3;
import androidx.annotation.Nullable;
import androidx.media3.common.C;
import androidx.media3.common.util.ParsableByteArray;
import androidx.media3.extractor.MpegAudioUtil;
/** Representation of a LAME Xing or Info frame. */
/* package */ final class XingFrame {
private static final String TAG = "XingHeader";
/** The header of the Xing or Info frame. */
public final MpegAudioUtil.Header header;
/** The frame count, or {@link C#LENGTH_UNSET} if not present in the header. */
public final long frameCount;
/**
* Data size, including the XING frame, or {@link C#LENGTH_UNSET} if not present in the header.
*/
public final long dataSize;
/**
* The number of samples to skip at the start of the stream, or {@link C#LENGTH_UNSET} if not
* present in the header.
*/
public final int encoderDelay;
/**
* The number of samples to skip at the end of the stream, or {@link C#LENGTH_UNSET} if not
* present in the header.
*/
public final int encoderPadding;
/**
* Entries are in the range [0, 255], but are stored as long integers for convenience. Null if the
* table of contents was missing from the header, in which case seeking is not be supported.
*/
@Nullable public final long[] tableOfContents;
private XingFrame(
MpegAudioUtil.Header header,
long frameCount,
long dataSize,
@Nullable long[] tableOfContents,
int encoderDelay,
int encoderPadding) {
this.header = header;
this.frameCount = frameCount;
this.dataSize = dataSize;
this.tableOfContents = tableOfContents;
this.encoderDelay = encoderDelay;
this.encoderPadding = encoderPadding;
}
/**
* Returns a {@link XingFrame} containing the info parsed from a LAME Xing (VBR) or Info (CBR)
* frame.
*
* <p>The {@link ParsableByteArray#getPosition()} in {@code frame} when this method exits is
* undefined.
*
* @param mpegAudioHeader The MPEG audio header associated with the frame.
* @param frame The data in this audio frame, with its position set to immediately after the
* 'Xing' or 'Info' tag.
*/
public static XingFrame parse(MpegAudioUtil.Header mpegAudioHeader, ParsableByteArray frame) {
int samplesPerFrame = mpegAudioHeader.samplesPerFrame;
int sampleRate = mpegAudioHeader.sampleRate;
int flags = frame.readInt();
int frameCount = (flags & 0x01) != 0 ? frame.readUnsignedIntToInt() : C.LENGTH_UNSET;
long dataSize = (flags & 0x02) != 0 ? frame.readUnsignedInt() : C.LENGTH_UNSET;
long[] tableOfContents;
if ((flags & 0x04) == 0x04) {
tableOfContents = new long[100];
for (int i = 0; i < 100; i++) {
tableOfContents[i] = frame.readUnsignedByte();
}
} else {
tableOfContents = null;
}
if ((flags & 0x8) != 0) {
frame.skipBytes(4); // Quality indicator
}
int encoderDelay;
int encoderPadding;
// Skip: version string (9), revision & VBR method (1), lowpass filter (1), replay gain (8),
// encoding flags & ATH type (1), bitrate (1).
int bytesToSkipBeforeEncoderDelayAndPadding = 9 + 1 + 1 + 8 + 1 + 1;
if (frame.bytesLeft() >= bytesToSkipBeforeEncoderDelayAndPadding + 3) {
frame.skipBytes(bytesToSkipBeforeEncoderDelayAndPadding);
int encoderDelayAndPadding = frame.readUnsignedInt24();
encoderDelay = (encoderDelayAndPadding & 0xFFF000) >> 12;
encoderPadding = (encoderDelayAndPadding & 0xFFF);
} else {
encoderDelay = C.LENGTH_UNSET;
encoderPadding = C.LENGTH_UNSET;
}
return new XingFrame(
mpegAudioHeader, frameCount, dataSize, tableOfContents, encoderDelay, encoderPadding);
}
}

View File

@ -19,9 +19,7 @@ import androidx.annotation.Nullable;
import androidx.media3.common.C; import androidx.media3.common.C;
import androidx.media3.common.util.Assertions; import androidx.media3.common.util.Assertions;
import androidx.media3.common.util.Log; import androidx.media3.common.util.Log;
import androidx.media3.common.util.ParsableByteArray;
import androidx.media3.common.util.Util; import androidx.media3.common.util.Util;
import androidx.media3.extractor.MpegAudioUtil;
import androidx.media3.extractor.SeekPoint; import androidx.media3.extractor.SeekPoint;
/** MP3 seeker that uses metadata from a Xing header. */ /** MP3 seeker that uses metadata from a Xing header. */
@ -43,43 +41,33 @@ import androidx.media3.extractor.SeekPoint;
* information is not present. * information is not present.
*/ */
@Nullable @Nullable
public static XingSeeker create( public static XingSeeker create(long inputLength, XingFrame xingFrame, long position) {
long inputLength, if (xingFrame.frameCount == C.LENGTH_UNSET && xingFrame.frameCount == 0) {
long position,
MpegAudioUtil.Header mpegAudioHeader,
ParsableByteArray frame) {
int samplesPerFrame = mpegAudioHeader.samplesPerFrame;
int sampleRate = mpegAudioHeader.sampleRate;
int flags = frame.readInt();
int frameCount;
if ((flags & 0x01) != 0x01 || (frameCount = frame.readUnsignedIntToInt()) == 0) {
// If the frame count is missing/invalid, the header can't be used to determine the duration. // If the frame count is missing/invalid, the header can't be used to determine the duration.
return null; return null;
} }
// TODO: b/319235116 - Handle encoder delay and padding when calculating duration.
// Audio requires both a start and end PCM sample, so subtract one from the sample count before // Audio requires both a start and end PCM sample, so subtract one from the sample count before
// calculating the duration. // calculating the duration.
long durationUs = Util.sampleCountToDurationUs((frameCount * samplesPerFrame) - 1, sampleRate); long durationUs =
if ((flags & 0x06) != 0x06) { Util.sampleCountToDurationUs(
(xingFrame.frameCount * xingFrame.header.samplesPerFrame) - 1,
xingFrame.header.sampleRate);
if (xingFrame.dataSize == C.LENGTH_UNSET || xingFrame.tableOfContents == null) {
// If the size in bytes or table of contents is missing, the stream is not seekable. // If the size in bytes or table of contents is missing, the stream is not seekable.
return new XingSeeker(position, mpegAudioHeader.frameSize, durationUs); return new XingSeeker(position, xingFrame.header.frameSize, durationUs);
} }
long dataSize = frame.readUnsignedInt(); if (inputLength != C.LENGTH_UNSET && inputLength != position + xingFrame.dataSize) {
long[] tableOfContents = new long[100]; Log.w(
for (int i = 0; i < 100; i++) { TAG, "XING data size mismatch: " + inputLength + ", " + (position + xingFrame.dataSize));
tableOfContents[i] = frame.readUnsignedByte();
}
// TODO: Handle encoder delay and padding in 3 bytes offset by xingBase + 213 bytes:
// delay = (frame.readUnsignedByte() << 4) + (frame.readUnsignedByte() >> 4);
// padding = ((frame.readUnsignedByte() & 0x0F) << 8) + frame.readUnsignedByte();
if (inputLength != C.LENGTH_UNSET && inputLength != position + dataSize) {
Log.w(TAG, "XING data size mismatch: " + inputLength + ", " + (position + dataSize));
} }
return new XingSeeker( return new XingSeeker(
position, mpegAudioHeader.frameSize, durationUs, dataSize, tableOfContents); position,
xingFrame.header.frameSize,
durationUs,
xingFrame.dataSize,
xingFrame.tableOfContents);
} }
private final long dataStartPosition; private final long dataStartPosition;

View File

@ -64,15 +64,13 @@ public final class XingSeekerTest {
seeker = seeker =
XingSeeker.create( XingSeeker.create(
C.LENGTH_UNSET, C.LENGTH_UNSET,
XING_FRAME_POSITION, XingFrame.parse(xingFrameHeader, new ParsableByteArray(XING_FRAME_PAYLOAD)),
xingFrameHeader, XING_FRAME_POSITION);
new ParsableByteArray(XING_FRAME_PAYLOAD));
seekerWithInputLength = seekerWithInputLength =
XingSeeker.create( XingSeeker.create(
STREAM_LENGTH, C.LENGTH_UNSET,
XING_FRAME_POSITION, XingFrame.parse(xingFrameHeader, new ParsableByteArray(XING_FRAME_PAYLOAD)),
xingFrameHeader, XING_FRAME_POSITION);
new ParsableByteArray(XING_FRAME_PAYLOAD));
xingFrameSize = xingFrameHeader.frameSize; xingFrameSize = xingFrameHeader.frameSize;
} }