MP3: Assume an Info header indicates CBR for seeking purposes

The seek table in a Xing/Info header is very imprecise (max resolution
of 255 to describe each of 100 byte positions in the file). Seeking
using a constant bitrate assumption is more accurate, especially for
longer files (which exacerbates the imprecision of the Info header).

VBR files should contain an Xing header, while an Info header is
identical but indicates the file is CBR.

Issue: androidx/media#878
PiperOrigin-RevId: 597827891
This commit is contained in:
ibaker 2024-01-12 06:45:27 -08:00 committed by Copybara-Service
parent 5056dfaa2b
commit 4061d476a1
7 changed files with 139 additions and 100 deletions

View File

@ -55,6 +55,10 @@
`ColorInfo.colorSpace`, `ColorInfo.colorTransfer`, and
`ColorInfo.colorRange` values
([#692](https://github.com/androidx/media/pull/692)).
* MP3: Use constant bitrate (CBR) seeking for files with an `Info` header
(the CBR equivalent of the `Xing` header). Previously we used the seek
table from the `Info` header, but this results in less precise seeking
than if we ignore it and assume the file is CBR.
* Audio:
* Video:
* Change the `MediaCodecVideoRenderer` constructor that takes a

View File

@ -524,11 +524,23 @@ public final class Mp3Extractor implements Extractor {
gaplessInfoHolder.encoderDelay = xingFrame.encoderDelay;
gaplessInfoHolder.encoderPadding = xingFrame.encoderPadding;
}
seeker = XingSeeker.create(input.getLength(), xingFrame, input.getPosition());
long startPosition = input.getPosition();
input.skipFully(synchronizedHeader.frameSize);
if (seeker != null && !seeker.isSeekable() && seekHeader == SEEK_HEADER_INFO) {
// Fall back to constant bitrate seeking for Info headers missing a table of contents.
return getConstantBitrateSeeker(input, /* allowSeeksIfLengthUnknown= */ false);
// An Xing frame indicates the file is VBR (so we have to use the seek header for seeking)
// while an Info header indicates the file is CBR, in which case ConstantBitrateSeeker will
// give more accurate seeking than the low-resolution seek table in the Info header. We can
// still use the length from the Info frame if we don't know the stream length directly.
if (seekHeader == SEEK_HEADER_XING) {
seeker = XingSeeker.create(input.getLength(), xingFrame, startPosition);
} else { // seekHeader == SEEK_HEADER_INFO
long streamLength =
xingFrame.dataSize != C.LENGTH_UNSET
? startPosition + xingFrame.dataSize
: C.LENGTH_UNSET;
// TODO: b/319235116 - Consider using the duration derived from the Xing/Info frame when
// it considers encoding delay and padding.
seeker =
getConstantBitrateSeeker(input, streamLength, /* allowSeeksIfLengthUnknown= */ false);
}
break;
case SEEK_HEADER_VBRI:
@ -548,11 +560,26 @@ public final class Mp3Extractor implements Extractor {
/** Peeks the next frame and returns a {@link ConstantBitrateSeeker} based on its bitrate. */
private Seeker getConstantBitrateSeeker(ExtractorInput input, boolean allowSeeksIfLengthUnknown)
throws IOException {
return getConstantBitrateSeeker(input, C.LENGTH_UNSET, allowSeeksIfLengthUnknown);
}
/**
* Peeks the next frame and returns a {@link ConstantBitrateSeeker} based on its bitrate. {@code
* streamLengthFallback} is used if {@link ExtractorInput#getLength() input.getLength()} is {@link
* C#LENGTH_UNSET}. {@code streamLengthFallback} may also be {@link C#LENGTH_UNSET} to indicate
* the length is unknown.
*/
private Seeker getConstantBitrateSeeker(
ExtractorInput input, long streamLengthFallback, boolean allowSeeksIfLengthUnknown)
throws IOException {
input.peekFully(scratch.getData(), 0, 4);
scratch.setPosition(0);
synchronizedHeader.setForHeaderData(scratch.readInt());
return new ConstantBitrateSeeker(
input.getLength(), input.getPosition(), synchronizedHeader, allowSeeksIfLengthUnknown);
input.getLength() != C.LENGTH_UNSET ? input.getLength() : streamLengthFallback,
input.getPosition(),
synchronizedHeader,
allowSeeksIfLengthUnknown);
}
@EnsuresNonNull({"extractorOutput", "realTrackOutput"})

View File

@ -2,9 +2,9 @@ seekMap:
isSeekable = true
duration = 1044875
getPosition(0) = [[timeUs=0, position=227]]
getPosition(1) = [[timeUs=1, position=227]]
getPosition(522437) = [[timeUs=522437, position=4582]]
getPosition(1044875) = [[timeUs=1044875, position=8585]]
getPosition(1) = [[timeUs=0, position=227], [timeUs=26000, position=435]]
getPosition(522437) = [[timeUs=520000, position=4387], [timeUs=546000, position=4595]]
getPosition(1044875) = [[timeUs=1018875, position=8378]]
numberOfTracks = 1
track 0:
total output bytes = 8359

View File

@ -2,13 +2,13 @@ seekMap:
isSeekable = true
duration = 1044875
getPosition(0) = [[timeUs=0, position=227]]
getPosition(1) = [[timeUs=1, position=227]]
getPosition(522437) = [[timeUs=522437, position=4582]]
getPosition(1044875) = [[timeUs=1044875, position=8585]]
getPosition(1) = [[timeUs=0, position=227], [timeUs=26000, position=435]]
getPosition(522437) = [[timeUs=520000, position=4387], [timeUs=546000, position=4595]]
getPosition(1044875) = [[timeUs=1018875, position=8378]]
numberOfTracks = 1
track 0:
total output bytes = 5434
sample count = 26
total output bytes = 5643
sample count = 27
format 0:
sampleMimeType = audio/mpeg
maxInputSize = 4096
@ -18,107 +18,111 @@ track 0:
encoderPadding = 1404
metadata = entries=[TSSE: description=null: values=[Lavf58.45.100]]
sample 0:
time = 344808
time = 339500
flags = 1
data = length 209, hash 6CCBBB3B
sample 1:
time = 365622
flags = 1
data = length 209, hash 34191E1
sample 1:
time = 370930
sample 2:
time = 391744
flags = 1
data = length 209, hash 57323ED7
sample 2:
time = 397052
sample 3:
time = 417867
flags = 1
data = length 209, hash 75618CF3
sample 3:
time = 423175
sample 4:
time = 443989
flags = 1
data = length 209, hash 784C973B
sample 4:
time = 449297
sample 5:
time = 470112
flags = 1
data = length 209, hash 49106390
sample 5:
time = 475420
sample 6:
time = 496234
flags = 1
data = length 209, hash 70F6A563
sample 6:
time = 501542
sample 7:
time = 522357
flags = 1
data = length 209, hash 721882B0
sample 7:
time = 527665
sample 8:
time = 548479
flags = 1
data = length 209, hash 81C62AEE
sample 8:
time = 553787
sample 9:
time = 574602
flags = 1
data = length 209, hash 16D22463
sample 9:
time = 579910
sample 10:
time = 600724
flags = 1
data = length 209, hash 47033534
sample 10:
time = 606032
sample 11:
time = 626846
flags = 1
data = length 209, hash CECB37A6
sample 11:
time = 632154
sample 12:
time = 652969
flags = 1
data = length 209, hash 6C9C307B
sample 12:
time = 658277
sample 13:
time = 679091
flags = 1
data = length 209, hash 3EB1A364
sample 13:
time = 684399
sample 14:
time = 705214
flags = 1
data = length 209, hash 30962500
sample 14:
time = 710522
sample 15:
time = 731336
flags = 1
data = length 209, hash 2C5CCBB7
sample 15:
time = 736644
sample 16:
time = 757459
flags = 1
data = length 209, hash F9CB9E37
sample 16:
time = 762767
sample 17:
time = 783581
flags = 1
data = length 209, hash F75BC8C0
sample 17:
time = 788889
sample 18:
time = 809704
flags = 1
data = length 209, hash D00ED607
sample 18:
time = 815012
sample 19:
time = 835826
flags = 1
data = length 209, hash B4338395
sample 19:
time = 841134
sample 20:
time = 861948
flags = 1
data = length 209, hash E3E838A0
sample 20:
time = 867256
sample 21:
time = 888071
flags = 1
data = length 209, hash 2B0CF78
sample 21:
time = 893379
sample 22:
time = 914193
flags = 1
data = length 209, hash 31906FA9
sample 22:
time = 919501
sample 23:
time = 940316
flags = 1
data = length 209, hash C92FC08F
sample 23:
time = 945624
sample 24:
time = 966438
flags = 1
data = length 209, hash 7C89994
sample 24:
time = 971746
sample 25:
time = 992561
flags = 1
data = length 209, hash EC37743B
sample 25:
time = 997869
sample 26:
time = 1018683
flags = 1
data = length 209, hash C974F6FB
tracksEnded = true

View File

@ -2,13 +2,13 @@ seekMap:
isSeekable = true
duration = 1044875
getPosition(0) = [[timeUs=0, position=227]]
getPosition(1) = [[timeUs=1, position=227]]
getPosition(522437) = [[timeUs=522437, position=4582]]
getPosition(1044875) = [[timeUs=1044875, position=8585]]
getPosition(1) = [[timeUs=0, position=227], [timeUs=26000, position=435]]
getPosition(522437) = [[timeUs=520000, position=4387], [timeUs=546000, position=4595]]
getPosition(1044875) = [[timeUs=1018875, position=8378]]
numberOfTracks = 1
track 0:
total output bytes = 2717
sample count = 13
total output bytes = 2926
sample count = 14
format 0:
sampleMimeType = audio/mpeg
maxInputSize = 4096
@ -18,55 +18,59 @@ track 0:
encoderPadding = 1404
metadata = entries=[TSSE: description=null: values=[Lavf58.45.100]]
sample 0:
time = 679168
time = 679125
flags = 1
data = length 209, hash 3EB1A364
sample 1:
time = 705247
flags = 1
data = length 209, hash 30962500
sample 1:
time = 705290
sample 2:
time = 731369
flags = 1
data = length 209, hash 2C5CCBB7
sample 2:
time = 731412
sample 3:
time = 757492
flags = 1
data = length 209, hash F9CB9E37
sample 3:
time = 757535
sample 4:
time = 783614
flags = 1
data = length 209, hash F75BC8C0
sample 4:
time = 783657
sample 5:
time = 809737
flags = 1
data = length 209, hash D00ED607
sample 5:
time = 809780
sample 6:
time = 835859
flags = 1
data = length 209, hash B4338395
sample 6:
time = 835902
sample 7:
time = 861982
flags = 1
data = length 209, hash E3E838A0
sample 7:
time = 862025
sample 8:
time = 888104
flags = 1
data = length 209, hash 2B0CF78
sample 8:
time = 888147
sample 9:
time = 914227
flags = 1
data = length 209, hash 31906FA9
sample 9:
time = 914270
sample 10:
time = 940349
flags = 1
data = length 209, hash C92FC08F
sample 10:
time = 940392
sample 11:
time = 966471
flags = 1
data = length 209, hash 7C89994
sample 11:
time = 966514
sample 12:
time = 992594
flags = 1
data = length 209, hash EC37743B
sample 12:
time = 992637
sample 13:
time = 1018716
flags = 1
data = length 209, hash C974F6FB
tracksEnded = true

View File

@ -2,9 +2,9 @@ seekMap:
isSeekable = true
duration = 1044875
getPosition(0) = [[timeUs=0, position=227]]
getPosition(1) = [[timeUs=1, position=227]]
getPosition(522437) = [[timeUs=522437, position=4582]]
getPosition(1044875) = [[timeUs=1044875, position=8585]]
getPosition(1) = [[timeUs=0, position=227], [timeUs=26000, position=435]]
getPosition(522437) = [[timeUs=520000, position=4387], [timeUs=546000, position=4595]]
getPosition(1044875) = [[timeUs=1018875, position=8378]]
numberOfTracks = 1
track 0:
total output bytes = 0

View File

@ -2,9 +2,9 @@ seekMap:
isSeekable = true
duration = 1044875
getPosition(0) = [[timeUs=0, position=227]]
getPosition(1) = [[timeUs=1, position=227]]
getPosition(522437) = [[timeUs=522437, position=4582]]
getPosition(1044875) = [[timeUs=1044875, position=8585]]
getPosition(1) = [[timeUs=0, position=227], [timeUs=26000, position=435]]
getPosition(522437) = [[timeUs=520000, position=4387], [timeUs=546000, position=4595]]
getPosition(1044875) = [[timeUs=1018875, position=8378]]
numberOfTracks = 1
track 0:
total output bytes = 8359