MP3: Assume an Info header indicates CBR for seeking purposes

The seek table in a Xing/Info header is very imprecise (max resolution
of 255 to describe each of 100 byte positions in the file). Seeking
using a constant bitrate assumption is more accurate, especially for
longer files (which exacerbates the imprecision of the Info header).

VBR files should contain an Xing header, while an Info header is
identical but indicates the file is CBR.

Issue: androidx/media#878
PiperOrigin-RevId: 597827891
This commit is contained in:
ibaker 2024-01-12 06:45:27 -08:00 committed by Copybara-Service
parent 5056dfaa2b
commit 4061d476a1
7 changed files with 139 additions and 100 deletions

View File

@ -55,6 +55,10 @@
`ColorInfo.colorSpace`, `ColorInfo.colorTransfer`, and `ColorInfo.colorSpace`, `ColorInfo.colorTransfer`, and
`ColorInfo.colorRange` values `ColorInfo.colorRange` values
([#692](https://github.com/androidx/media/pull/692)). ([#692](https://github.com/androidx/media/pull/692)).
* MP3: Use constant bitrate (CBR) seeking for files with an `Info` header
(the CBR equivalent of the `Xing` header). Previously we used the seek
table from the `Info` header, but this results in less precise seeking
than if we ignore it and assume the file is CBR.
* Audio: * Audio:
* Video: * Video:
* Change the `MediaCodecVideoRenderer` constructor that takes a * Change the `MediaCodecVideoRenderer` constructor that takes a

View File

@ -524,11 +524,23 @@ public final class Mp3Extractor implements Extractor {
gaplessInfoHolder.encoderDelay = xingFrame.encoderDelay; gaplessInfoHolder.encoderDelay = xingFrame.encoderDelay;
gaplessInfoHolder.encoderPadding = xingFrame.encoderPadding; gaplessInfoHolder.encoderPadding = xingFrame.encoderPadding;
} }
seeker = XingSeeker.create(input.getLength(), xingFrame, input.getPosition()); long startPosition = input.getPosition();
input.skipFully(synchronizedHeader.frameSize); input.skipFully(synchronizedHeader.frameSize);
if (seeker != null && !seeker.isSeekable() && seekHeader == SEEK_HEADER_INFO) { // An Xing frame indicates the file is VBR (so we have to use the seek header for seeking)
// Fall back to constant bitrate seeking for Info headers missing a table of contents. // while an Info header indicates the file is CBR, in which case ConstantBitrateSeeker will
return getConstantBitrateSeeker(input, /* allowSeeksIfLengthUnknown= */ false); // give more accurate seeking than the low-resolution seek table in the Info header. We can
// still use the length from the Info frame if we don't know the stream length directly.
if (seekHeader == SEEK_HEADER_XING) {
seeker = XingSeeker.create(input.getLength(), xingFrame, startPosition);
} else { // seekHeader == SEEK_HEADER_INFO
long streamLength =
xingFrame.dataSize != C.LENGTH_UNSET
? startPosition + xingFrame.dataSize
: C.LENGTH_UNSET;
// TODO: b/319235116 - Consider using the duration derived from the Xing/Info frame when
// it considers encoding delay and padding.
seeker =
getConstantBitrateSeeker(input, streamLength, /* allowSeeksIfLengthUnknown= */ false);
} }
break; break;
case SEEK_HEADER_VBRI: case SEEK_HEADER_VBRI:
@ -548,11 +560,26 @@ public final class Mp3Extractor implements Extractor {
/** Peeks the next frame and returns a {@link ConstantBitrateSeeker} based on its bitrate. */ /** Peeks the next frame and returns a {@link ConstantBitrateSeeker} based on its bitrate. */
private Seeker getConstantBitrateSeeker(ExtractorInput input, boolean allowSeeksIfLengthUnknown) private Seeker getConstantBitrateSeeker(ExtractorInput input, boolean allowSeeksIfLengthUnknown)
throws IOException { throws IOException {
return getConstantBitrateSeeker(input, C.LENGTH_UNSET, allowSeeksIfLengthUnknown);
}
/**
* Peeks the next frame and returns a {@link ConstantBitrateSeeker} based on its bitrate. {@code
* streamLengthFallback} is used if {@link ExtractorInput#getLength() input.getLength()} is {@link
* C#LENGTH_UNSET}. {@code streamLengthFallback} may also be {@link C#LENGTH_UNSET} to indicate
* the length is unknown.
*/
private Seeker getConstantBitrateSeeker(
ExtractorInput input, long streamLengthFallback, boolean allowSeeksIfLengthUnknown)
throws IOException {
input.peekFully(scratch.getData(), 0, 4); input.peekFully(scratch.getData(), 0, 4);
scratch.setPosition(0); scratch.setPosition(0);
synchronizedHeader.setForHeaderData(scratch.readInt()); synchronizedHeader.setForHeaderData(scratch.readInt());
return new ConstantBitrateSeeker( return new ConstantBitrateSeeker(
input.getLength(), input.getPosition(), synchronizedHeader, allowSeeksIfLengthUnknown); input.getLength() != C.LENGTH_UNSET ? input.getLength() : streamLengthFallback,
input.getPosition(),
synchronizedHeader,
allowSeeksIfLengthUnknown);
} }
@EnsuresNonNull({"extractorOutput", "realTrackOutput"}) @EnsuresNonNull({"extractorOutput", "realTrackOutput"})

View File

@ -2,9 +2,9 @@ seekMap:
isSeekable = true isSeekable = true
duration = 1044875 duration = 1044875
getPosition(0) = [[timeUs=0, position=227]] getPosition(0) = [[timeUs=0, position=227]]
getPosition(1) = [[timeUs=1, position=227]] getPosition(1) = [[timeUs=0, position=227], [timeUs=26000, position=435]]
getPosition(522437) = [[timeUs=522437, position=4582]] getPosition(522437) = [[timeUs=520000, position=4387], [timeUs=546000, position=4595]]
getPosition(1044875) = [[timeUs=1044875, position=8585]] getPosition(1044875) = [[timeUs=1018875, position=8378]]
numberOfTracks = 1 numberOfTracks = 1
track 0: track 0:
total output bytes = 8359 total output bytes = 8359

View File

@ -2,13 +2,13 @@ seekMap:
isSeekable = true isSeekable = true
duration = 1044875 duration = 1044875
getPosition(0) = [[timeUs=0, position=227]] getPosition(0) = [[timeUs=0, position=227]]
getPosition(1) = [[timeUs=1, position=227]] getPosition(1) = [[timeUs=0, position=227], [timeUs=26000, position=435]]
getPosition(522437) = [[timeUs=522437, position=4582]] getPosition(522437) = [[timeUs=520000, position=4387], [timeUs=546000, position=4595]]
getPosition(1044875) = [[timeUs=1044875, position=8585]] getPosition(1044875) = [[timeUs=1018875, position=8378]]
numberOfTracks = 1 numberOfTracks = 1
track 0: track 0:
total output bytes = 5434 total output bytes = 5643
sample count = 26 sample count = 27
format 0: format 0:
sampleMimeType = audio/mpeg sampleMimeType = audio/mpeg
maxInputSize = 4096 maxInputSize = 4096
@ -18,107 +18,111 @@ track 0:
encoderPadding = 1404 encoderPadding = 1404
metadata = entries=[TSSE: description=null: values=[Lavf58.45.100]] metadata = entries=[TSSE: description=null: values=[Lavf58.45.100]]
sample 0: sample 0:
time = 344808 time = 339500
flags = 1
data = length 209, hash 6CCBBB3B
sample 1:
time = 365622
flags = 1 flags = 1
data = length 209, hash 34191E1 data = length 209, hash 34191E1
sample 1: sample 2:
time = 370930 time = 391744
flags = 1 flags = 1
data = length 209, hash 57323ED7 data = length 209, hash 57323ED7
sample 2: sample 3:
time = 397052 time = 417867
flags = 1 flags = 1
data = length 209, hash 75618CF3 data = length 209, hash 75618CF3
sample 3: sample 4:
time = 423175 time = 443989
flags = 1 flags = 1
data = length 209, hash 784C973B data = length 209, hash 784C973B
sample 4: sample 5:
time = 449297 time = 470112
flags = 1 flags = 1
data = length 209, hash 49106390 data = length 209, hash 49106390
sample 5: sample 6:
time = 475420 time = 496234
flags = 1 flags = 1
data = length 209, hash 70F6A563 data = length 209, hash 70F6A563
sample 6: sample 7:
time = 501542 time = 522357
flags = 1 flags = 1
data = length 209, hash 721882B0 data = length 209, hash 721882B0
sample 7: sample 8:
time = 527665 time = 548479
flags = 1 flags = 1
data = length 209, hash 81C62AEE data = length 209, hash 81C62AEE
sample 8: sample 9:
time = 553787 time = 574602
flags = 1 flags = 1
data = length 209, hash 16D22463 data = length 209, hash 16D22463
sample 9: sample 10:
time = 579910 time = 600724
flags = 1 flags = 1
data = length 209, hash 47033534 data = length 209, hash 47033534
sample 10: sample 11:
time = 606032 time = 626846
flags = 1 flags = 1
data = length 209, hash CECB37A6 data = length 209, hash CECB37A6
sample 11: sample 12:
time = 632154 time = 652969
flags = 1 flags = 1
data = length 209, hash 6C9C307B data = length 209, hash 6C9C307B
sample 12: sample 13:
time = 658277 time = 679091
flags = 1 flags = 1
data = length 209, hash 3EB1A364 data = length 209, hash 3EB1A364
sample 13: sample 14:
time = 684399 time = 705214
flags = 1 flags = 1
data = length 209, hash 30962500 data = length 209, hash 30962500
sample 14: sample 15:
time = 710522 time = 731336
flags = 1 flags = 1
data = length 209, hash 2C5CCBB7 data = length 209, hash 2C5CCBB7
sample 15: sample 16:
time = 736644 time = 757459
flags = 1 flags = 1
data = length 209, hash F9CB9E37 data = length 209, hash F9CB9E37
sample 16: sample 17:
time = 762767 time = 783581
flags = 1 flags = 1
data = length 209, hash F75BC8C0 data = length 209, hash F75BC8C0
sample 17: sample 18:
time = 788889 time = 809704
flags = 1 flags = 1
data = length 209, hash D00ED607 data = length 209, hash D00ED607
sample 18: sample 19:
time = 815012 time = 835826
flags = 1 flags = 1
data = length 209, hash B4338395 data = length 209, hash B4338395
sample 19: sample 20:
time = 841134 time = 861948
flags = 1 flags = 1
data = length 209, hash E3E838A0 data = length 209, hash E3E838A0
sample 20: sample 21:
time = 867256 time = 888071
flags = 1 flags = 1
data = length 209, hash 2B0CF78 data = length 209, hash 2B0CF78
sample 21: sample 22:
time = 893379 time = 914193
flags = 1 flags = 1
data = length 209, hash 31906FA9 data = length 209, hash 31906FA9
sample 22: sample 23:
time = 919501 time = 940316
flags = 1 flags = 1
data = length 209, hash C92FC08F data = length 209, hash C92FC08F
sample 23: sample 24:
time = 945624 time = 966438
flags = 1 flags = 1
data = length 209, hash 7C89994 data = length 209, hash 7C89994
sample 24: sample 25:
time = 971746 time = 992561
flags = 1 flags = 1
data = length 209, hash EC37743B data = length 209, hash EC37743B
sample 25: sample 26:
time = 997869 time = 1018683
flags = 1 flags = 1
data = length 209, hash C974F6FB data = length 209, hash C974F6FB
tracksEnded = true tracksEnded = true

View File

@ -2,13 +2,13 @@ seekMap:
isSeekable = true isSeekable = true
duration = 1044875 duration = 1044875
getPosition(0) = [[timeUs=0, position=227]] getPosition(0) = [[timeUs=0, position=227]]
getPosition(1) = [[timeUs=1, position=227]] getPosition(1) = [[timeUs=0, position=227], [timeUs=26000, position=435]]
getPosition(522437) = [[timeUs=522437, position=4582]] getPosition(522437) = [[timeUs=520000, position=4387], [timeUs=546000, position=4595]]
getPosition(1044875) = [[timeUs=1044875, position=8585]] getPosition(1044875) = [[timeUs=1018875, position=8378]]
numberOfTracks = 1 numberOfTracks = 1
track 0: track 0:
total output bytes = 2717 total output bytes = 2926
sample count = 13 sample count = 14
format 0: format 0:
sampleMimeType = audio/mpeg sampleMimeType = audio/mpeg
maxInputSize = 4096 maxInputSize = 4096
@ -18,55 +18,59 @@ track 0:
encoderPadding = 1404 encoderPadding = 1404
metadata = entries=[TSSE: description=null: values=[Lavf58.45.100]] metadata = entries=[TSSE: description=null: values=[Lavf58.45.100]]
sample 0: sample 0:
time = 679168 time = 679125
flags = 1
data = length 209, hash 3EB1A364
sample 1:
time = 705247
flags = 1 flags = 1
data = length 209, hash 30962500 data = length 209, hash 30962500
sample 1: sample 2:
time = 705290 time = 731369
flags = 1 flags = 1
data = length 209, hash 2C5CCBB7 data = length 209, hash 2C5CCBB7
sample 2: sample 3:
time = 731412 time = 757492
flags = 1 flags = 1
data = length 209, hash F9CB9E37 data = length 209, hash F9CB9E37
sample 3: sample 4:
time = 757535 time = 783614
flags = 1 flags = 1
data = length 209, hash F75BC8C0 data = length 209, hash F75BC8C0
sample 4: sample 5:
time = 783657 time = 809737
flags = 1 flags = 1
data = length 209, hash D00ED607 data = length 209, hash D00ED607
sample 5: sample 6:
time = 809780 time = 835859
flags = 1 flags = 1
data = length 209, hash B4338395 data = length 209, hash B4338395
sample 6: sample 7:
time = 835902 time = 861982
flags = 1 flags = 1
data = length 209, hash E3E838A0 data = length 209, hash E3E838A0
sample 7: sample 8:
time = 862025 time = 888104
flags = 1 flags = 1
data = length 209, hash 2B0CF78 data = length 209, hash 2B0CF78
sample 8: sample 9:
time = 888147 time = 914227
flags = 1 flags = 1
data = length 209, hash 31906FA9 data = length 209, hash 31906FA9
sample 9: sample 10:
time = 914270 time = 940349
flags = 1 flags = 1
data = length 209, hash C92FC08F data = length 209, hash C92FC08F
sample 10: sample 11:
time = 940392 time = 966471
flags = 1 flags = 1
data = length 209, hash 7C89994 data = length 209, hash 7C89994
sample 11: sample 12:
time = 966514 time = 992594
flags = 1 flags = 1
data = length 209, hash EC37743B data = length 209, hash EC37743B
sample 12: sample 13:
time = 992637 time = 1018716
flags = 1 flags = 1
data = length 209, hash C974F6FB data = length 209, hash C974F6FB
tracksEnded = true tracksEnded = true

View File

@ -2,9 +2,9 @@ seekMap:
isSeekable = true isSeekable = true
duration = 1044875 duration = 1044875
getPosition(0) = [[timeUs=0, position=227]] getPosition(0) = [[timeUs=0, position=227]]
getPosition(1) = [[timeUs=1, position=227]] getPosition(1) = [[timeUs=0, position=227], [timeUs=26000, position=435]]
getPosition(522437) = [[timeUs=522437, position=4582]] getPosition(522437) = [[timeUs=520000, position=4387], [timeUs=546000, position=4595]]
getPosition(1044875) = [[timeUs=1044875, position=8585]] getPosition(1044875) = [[timeUs=1018875, position=8378]]
numberOfTracks = 1 numberOfTracks = 1
track 0: track 0:
total output bytes = 0 total output bytes = 0

View File

@ -2,9 +2,9 @@ seekMap:
isSeekable = true isSeekable = true
duration = 1044875 duration = 1044875
getPosition(0) = [[timeUs=0, position=227]] getPosition(0) = [[timeUs=0, position=227]]
getPosition(1) = [[timeUs=1, position=227]] getPosition(1) = [[timeUs=0, position=227], [timeUs=26000, position=435]]
getPosition(522437) = [[timeUs=522437, position=4582]] getPosition(522437) = [[timeUs=520000, position=4387], [timeUs=546000, position=4595]]
getPosition(1044875) = [[timeUs=1044875, position=8585]] getPosition(1044875) = [[timeUs=1018875, position=8378]]
numberOfTracks = 1 numberOfTracks = 1
track 0: track 0:
total output bytes = 8359 total output bytes = 8359