mirror of
https://github.com/androidx/media.git
synced 2025-04-30 06:46:50 +08:00
MP3: Extract Xing/Info parsing code from XingSeeker
This means in a later change we can still use some of the info for CBR files, even if we want to ignore the imprecise table of contents and seek based on a constant bitrate assumption instead. PiperOrigin-RevId: 597193997
This commit is contained in:
parent
460501fcd1
commit
4fde35c9cc
@ -52,23 +52,6 @@ public final class GaplessInfoHolder {
|
||||
encoderPadding = Format.NO_VALUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Populates the holder with data from an MP3 Xing header, if valid and non-zero.
|
||||
*
|
||||
* @param value The 24-bit value to decode.
|
||||
* @return Whether the holder was populated.
|
||||
*/
|
||||
public boolean setFromXingHeaderValue(int value) {
|
||||
int encoderDelay = value >> 12;
|
||||
int encoderPadding = value & 0x0FFF;
|
||||
if (encoderDelay > 0 || encoderPadding > 0) {
|
||||
this.encoderDelay = encoderDelay;
|
||||
this.encoderPadding = encoderPadding;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Populates the holder with data parsed from ID3 {@link Metadata}.
|
||||
*
|
||||
|
@ -517,16 +517,14 @@ public final class Mp3Extractor implements Extractor {
|
||||
switch (seekHeader) {
|
||||
case SEEK_HEADER_XING:
|
||||
case SEEK_HEADER_INFO:
|
||||
seeker =
|
||||
XingSeeker.create(input.getLength(), input.getPosition(), synchronizedHeader, frame);
|
||||
if (seeker != null && !gaplessInfoHolder.hasGaplessInfo()) {
|
||||
// If there is a Xing header, read gapless playback metadata at a fixed offset.
|
||||
input.resetPeekPosition();
|
||||
input.advancePeekPosition(xingBase + 141);
|
||||
input.peekFully(scratch.getData(), 0, 3);
|
||||
scratch.setPosition(0);
|
||||
gaplessInfoHolder.setFromXingHeaderValue(scratch.readUnsignedInt24());
|
||||
XingFrame xingFrame = XingFrame.parse(synchronizedHeader, frame);
|
||||
if (!gaplessInfoHolder.hasGaplessInfo()
|
||||
&& xingFrame.encoderDelay != C.LENGTH_UNSET
|
||||
&& xingFrame.encoderPadding != C.LENGTH_UNSET) {
|
||||
gaplessInfoHolder.encoderDelay = xingFrame.encoderDelay;
|
||||
gaplessInfoHolder.encoderPadding = xingFrame.encoderPadding;
|
||||
}
|
||||
seeker = XingSeeker.create(input.getLength(), xingFrame, input.getPosition());
|
||||
input.skipFully(synchronizedHeader.frameSize);
|
||||
if (seeker != null && !seeker.isSeekable() && seekHeader == SEEK_HEADER_INFO) {
|
||||
// Fall back to constant bitrate seeking for Info headers missing a table of contents.
|
||||
|
@ -0,0 +1,123 @@
|
||||
/*
|
||||
* Copyright 2024 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package androidx.media3.extractor.mp3;
|
||||
|
||||
import androidx.annotation.Nullable;
|
||||
import androidx.media3.common.C;
|
||||
import androidx.media3.common.util.ParsableByteArray;
|
||||
import androidx.media3.extractor.MpegAudioUtil;
|
||||
|
||||
/** Representation of a LAME Xing or Info frame. */
|
||||
/* package */ final class XingFrame {
|
||||
|
||||
private static final String TAG = "XingHeader";
|
||||
|
||||
/** The header of the Xing or Info frame. */
|
||||
public final MpegAudioUtil.Header header;
|
||||
|
||||
/** The frame count, or {@link C#LENGTH_UNSET} if not present in the header. */
|
||||
public final long frameCount;
|
||||
|
||||
/**
|
||||
* Data size, including the XING frame, or {@link C#LENGTH_UNSET} if not present in the header.
|
||||
*/
|
||||
public final long dataSize;
|
||||
|
||||
/**
|
||||
* The number of samples to skip at the start of the stream, or {@link C#LENGTH_UNSET} if not
|
||||
* present in the header.
|
||||
*/
|
||||
public final int encoderDelay;
|
||||
|
||||
/**
|
||||
* The number of samples to skip at the end of the stream, or {@link C#LENGTH_UNSET} if not
|
||||
* present in the header.
|
||||
*/
|
||||
public final int encoderPadding;
|
||||
|
||||
/**
|
||||
* Entries are in the range [0, 255], but are stored as long integers for convenience. Null if the
|
||||
* table of contents was missing from the header, in which case seeking is not be supported.
|
||||
*/
|
||||
@Nullable public final long[] tableOfContents;
|
||||
|
||||
private XingFrame(
|
||||
MpegAudioUtil.Header header,
|
||||
long frameCount,
|
||||
long dataSize,
|
||||
@Nullable long[] tableOfContents,
|
||||
int encoderDelay,
|
||||
int encoderPadding) {
|
||||
this.header = header;
|
||||
this.frameCount = frameCount;
|
||||
this.dataSize = dataSize;
|
||||
this.tableOfContents = tableOfContents;
|
||||
this.encoderDelay = encoderDelay;
|
||||
this.encoderPadding = encoderPadding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a {@link XingFrame} containing the info parsed from a LAME Xing (VBR) or Info (CBR)
|
||||
* frame.
|
||||
*
|
||||
* <p>The {@link ParsableByteArray#getPosition()} in {@code frame} when this method exits is
|
||||
* undefined.
|
||||
*
|
||||
* @param mpegAudioHeader The MPEG audio header associated with the frame.
|
||||
* @param frame The data in this audio frame, with its position set to immediately after the
|
||||
* 'Xing' or 'Info' tag.
|
||||
*/
|
||||
public static XingFrame parse(MpegAudioUtil.Header mpegAudioHeader, ParsableByteArray frame) {
|
||||
int samplesPerFrame = mpegAudioHeader.samplesPerFrame;
|
||||
int sampleRate = mpegAudioHeader.sampleRate;
|
||||
|
||||
int flags = frame.readInt();
|
||||
int frameCount = (flags & 0x01) != 0 ? frame.readUnsignedIntToInt() : C.LENGTH_UNSET;
|
||||
long dataSize = (flags & 0x02) != 0 ? frame.readUnsignedInt() : C.LENGTH_UNSET;
|
||||
|
||||
long[] tableOfContents;
|
||||
if ((flags & 0x04) == 0x04) {
|
||||
tableOfContents = new long[100];
|
||||
for (int i = 0; i < 100; i++) {
|
||||
tableOfContents[i] = frame.readUnsignedByte();
|
||||
}
|
||||
} else {
|
||||
tableOfContents = null;
|
||||
}
|
||||
|
||||
if ((flags & 0x8) != 0) {
|
||||
frame.skipBytes(4); // Quality indicator
|
||||
}
|
||||
|
||||
int encoderDelay;
|
||||
int encoderPadding;
|
||||
// Skip: version string (9), revision & VBR method (1), lowpass filter (1), replay gain (8),
|
||||
// encoding flags & ATH type (1), bitrate (1).
|
||||
int bytesToSkipBeforeEncoderDelayAndPadding = 9 + 1 + 1 + 8 + 1 + 1;
|
||||
if (frame.bytesLeft() >= bytesToSkipBeforeEncoderDelayAndPadding + 3) {
|
||||
frame.skipBytes(bytesToSkipBeforeEncoderDelayAndPadding);
|
||||
int encoderDelayAndPadding = frame.readUnsignedInt24();
|
||||
encoderDelay = (encoderDelayAndPadding & 0xFFF000) >> 12;
|
||||
encoderPadding = (encoderDelayAndPadding & 0xFFF);
|
||||
} else {
|
||||
encoderDelay = C.LENGTH_UNSET;
|
||||
encoderPadding = C.LENGTH_UNSET;
|
||||
}
|
||||
|
||||
return new XingFrame(
|
||||
mpegAudioHeader, frameCount, dataSize, tableOfContents, encoderDelay, encoderPadding);
|
||||
}
|
||||
}
|
@ -19,9 +19,7 @@ import androidx.annotation.Nullable;
|
||||
import androidx.media3.common.C;
|
||||
import androidx.media3.common.util.Assertions;
|
||||
import androidx.media3.common.util.Log;
|
||||
import androidx.media3.common.util.ParsableByteArray;
|
||||
import androidx.media3.common.util.Util;
|
||||
import androidx.media3.extractor.MpegAudioUtil;
|
||||
import androidx.media3.extractor.SeekPoint;
|
||||
|
||||
/** MP3 seeker that uses metadata from a Xing header. */
|
||||
@ -43,43 +41,33 @@ import androidx.media3.extractor.SeekPoint;
|
||||
* information is not present.
|
||||
*/
|
||||
@Nullable
|
||||
public static XingSeeker create(
|
||||
long inputLength,
|
||||
long position,
|
||||
MpegAudioUtil.Header mpegAudioHeader,
|
||||
ParsableByteArray frame) {
|
||||
int samplesPerFrame = mpegAudioHeader.samplesPerFrame;
|
||||
int sampleRate = mpegAudioHeader.sampleRate;
|
||||
|
||||
int flags = frame.readInt();
|
||||
int frameCount;
|
||||
if ((flags & 0x01) != 0x01 || (frameCount = frame.readUnsignedIntToInt()) == 0) {
|
||||
public static XingSeeker create(long inputLength, XingFrame xingFrame, long position) {
|
||||
if (xingFrame.frameCount == C.LENGTH_UNSET && xingFrame.frameCount == 0) {
|
||||
// If the frame count is missing/invalid, the header can't be used to determine the duration.
|
||||
return null;
|
||||
}
|
||||
// TODO: b/319235116 - Handle encoder delay and padding when calculating duration.
|
||||
// Audio requires both a start and end PCM sample, so subtract one from the sample count before
|
||||
// calculating the duration.
|
||||
long durationUs = Util.sampleCountToDurationUs((frameCount * samplesPerFrame) - 1, sampleRate);
|
||||
if ((flags & 0x06) != 0x06) {
|
||||
long durationUs =
|
||||
Util.sampleCountToDurationUs(
|
||||
(xingFrame.frameCount * xingFrame.header.samplesPerFrame) - 1,
|
||||
xingFrame.header.sampleRate);
|
||||
if (xingFrame.dataSize == C.LENGTH_UNSET || xingFrame.tableOfContents == null) {
|
||||
// If the size in bytes or table of contents is missing, the stream is not seekable.
|
||||
return new XingSeeker(position, mpegAudioHeader.frameSize, durationUs);
|
||||
return new XingSeeker(position, xingFrame.header.frameSize, durationUs);
|
||||
}
|
||||
|
||||
long dataSize = frame.readUnsignedInt();
|
||||
long[] tableOfContents = new long[100];
|
||||
for (int i = 0; i < 100; i++) {
|
||||
tableOfContents[i] = frame.readUnsignedByte();
|
||||
}
|
||||
|
||||
// TODO: Handle encoder delay and padding in 3 bytes offset by xingBase + 213 bytes:
|
||||
// delay = (frame.readUnsignedByte() << 4) + (frame.readUnsignedByte() >> 4);
|
||||
// padding = ((frame.readUnsignedByte() & 0x0F) << 8) + frame.readUnsignedByte();
|
||||
|
||||
if (inputLength != C.LENGTH_UNSET && inputLength != position + dataSize) {
|
||||
Log.w(TAG, "XING data size mismatch: " + inputLength + ", " + (position + dataSize));
|
||||
if (inputLength != C.LENGTH_UNSET && inputLength != position + xingFrame.dataSize) {
|
||||
Log.w(
|
||||
TAG, "XING data size mismatch: " + inputLength + ", " + (position + xingFrame.dataSize));
|
||||
}
|
||||
return new XingSeeker(
|
||||
position, mpegAudioHeader.frameSize, durationUs, dataSize, tableOfContents);
|
||||
position,
|
||||
xingFrame.header.frameSize,
|
||||
durationUs,
|
||||
xingFrame.dataSize,
|
||||
xingFrame.tableOfContents);
|
||||
}
|
||||
|
||||
private final long dataStartPosition;
|
||||
|
@ -64,15 +64,13 @@ public final class XingSeekerTest {
|
||||
seeker =
|
||||
XingSeeker.create(
|
||||
C.LENGTH_UNSET,
|
||||
XING_FRAME_POSITION,
|
||||
xingFrameHeader,
|
||||
new ParsableByteArray(XING_FRAME_PAYLOAD));
|
||||
XingFrame.parse(xingFrameHeader, new ParsableByteArray(XING_FRAME_PAYLOAD)),
|
||||
XING_FRAME_POSITION);
|
||||
seekerWithInputLength =
|
||||
XingSeeker.create(
|
||||
STREAM_LENGTH,
|
||||
XING_FRAME_POSITION,
|
||||
xingFrameHeader,
|
||||
new ParsableByteArray(XING_FRAME_PAYLOAD));
|
||||
C.LENGTH_UNSET,
|
||||
XingFrame.parse(xingFrameHeader, new ParsableByteArray(XING_FRAME_PAYLOAD)),
|
||||
XING_FRAME_POSITION);
|
||||
xingFrameSize = xingFrameHeader.frameSize;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user