From 4061d476a14314867da2f74ba7049c85568b56eb Mon Sep 17 00:00:00 2001 From: ibaker Date: Fri, 12 Jan 2024 06:45:27 -0800 Subject: [PATCH] MP3: Assume an Info header indicates CBR for seeking purposes The seek table in a Xing/Info header is very imprecise (max resolution of 255 to describe each of 100 byte positions in the file). Seeking using a constant bitrate assumption is more accurate, especially for longer files (which exacerbates the imprecision of the Info header). VBR files should contain an Xing header, while an Info header is identical but indicates the file is CBR. Issue: androidx/media#878 PiperOrigin-RevId: 597827891 --- RELEASENOTES.md | 4 + .../media3/extractor/mp3/Mp3Extractor.java | 37 +++++- .../mp3/test-cbr-info-header.mp3.0.dump | 6 +- .../mp3/test-cbr-info-header.mp3.1.dump | 116 +++++++++--------- .../mp3/test-cbr-info-header.mp3.2.dump | 64 +++++----- .../mp3/test-cbr-info-header.mp3.3.dump | 6 +- ...st-cbr-info-header.mp3.unknown_length.dump | 6 +- 7 files changed, 139 insertions(+), 100 deletions(-) diff --git a/RELEASENOTES.md b/RELEASENOTES.md index 1389fb1c1f..5cdc5f5edf 100644 --- a/RELEASENOTES.md +++ b/RELEASENOTES.md @@ -55,6 +55,10 @@ `ColorInfo.colorSpace`, `ColorInfo.colorTransfer`, and `ColorInfo.colorRange` values ([#692](https://github.com/androidx/media/pull/692)). + * MP3: Use constant bitrate (CBR) seeking for files with an `Info` header + (the CBR equivalent of the `Xing` header). Previously we used the seek + table from the `Info` header, but this results in less precise seeking + than if we ignore it and assume the file is CBR. * Audio: * Video: * Change the `MediaCodecVideoRenderer` constructor that takes a diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/mp3/Mp3Extractor.java b/libraries/extractor/src/main/java/androidx/media3/extractor/mp3/Mp3Extractor.java index 395ff415b1..e9d4163ed0 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/mp3/Mp3Extractor.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/mp3/Mp3Extractor.java @@ -524,11 +524,23 @@ public final class Mp3Extractor implements Extractor { gaplessInfoHolder.encoderDelay = xingFrame.encoderDelay; gaplessInfoHolder.encoderPadding = xingFrame.encoderPadding; } - seeker = XingSeeker.create(input.getLength(), xingFrame, input.getPosition()); + long startPosition = input.getPosition(); input.skipFully(synchronizedHeader.frameSize); - if (seeker != null && !seeker.isSeekable() && seekHeader == SEEK_HEADER_INFO) { - // Fall back to constant bitrate seeking for Info headers missing a table of contents. - return getConstantBitrateSeeker(input, /* allowSeeksIfLengthUnknown= */ false); + // An Xing frame indicates the file is VBR (so we have to use the seek header for seeking) + // while an Info header indicates the file is CBR, in which case ConstantBitrateSeeker will + // give more accurate seeking than the low-resolution seek table in the Info header. We can + // still use the length from the Info frame if we don't know the stream length directly. + if (seekHeader == SEEK_HEADER_XING) { + seeker = XingSeeker.create(input.getLength(), xingFrame, startPosition); + } else { // seekHeader == SEEK_HEADER_INFO + long streamLength = + xingFrame.dataSize != C.LENGTH_UNSET + ? startPosition + xingFrame.dataSize + : C.LENGTH_UNSET; + // TODO: b/319235116 - Consider using the duration derived from the Xing/Info frame when + // it considers encoding delay and padding. + seeker = + getConstantBitrateSeeker(input, streamLength, /* allowSeeksIfLengthUnknown= */ false); } break; case SEEK_HEADER_VBRI: @@ -548,11 +560,26 @@ public final class Mp3Extractor implements Extractor { /** Peeks the next frame and returns a {@link ConstantBitrateSeeker} based on its bitrate. */ private Seeker getConstantBitrateSeeker(ExtractorInput input, boolean allowSeeksIfLengthUnknown) throws IOException { + return getConstantBitrateSeeker(input, C.LENGTH_UNSET, allowSeeksIfLengthUnknown); + } + + /** + * Peeks the next frame and returns a {@link ConstantBitrateSeeker} based on its bitrate. {@code + * streamLengthFallback} is used if {@link ExtractorInput#getLength() input.getLength()} is {@link + * C#LENGTH_UNSET}. {@code streamLengthFallback} may also be {@link C#LENGTH_UNSET} to indicate + * the length is unknown. + */ + private Seeker getConstantBitrateSeeker( + ExtractorInput input, long streamLengthFallback, boolean allowSeeksIfLengthUnknown) + throws IOException { input.peekFully(scratch.getData(), 0, 4); scratch.setPosition(0); synchronizedHeader.setForHeaderData(scratch.readInt()); return new ConstantBitrateSeeker( - input.getLength(), input.getPosition(), synchronizedHeader, allowSeeksIfLengthUnknown); + input.getLength() != C.LENGTH_UNSET ? input.getLength() : streamLengthFallback, + input.getPosition(), + synchronizedHeader, + allowSeeksIfLengthUnknown); } @EnsuresNonNull({"extractorOutput", "realTrackOutput"}) diff --git a/libraries/test_data/src/test/assets/extractordumps/mp3/test-cbr-info-header.mp3.0.dump b/libraries/test_data/src/test/assets/extractordumps/mp3/test-cbr-info-header.mp3.0.dump index 04e949490d..86ac2aad9f 100644 --- a/libraries/test_data/src/test/assets/extractordumps/mp3/test-cbr-info-header.mp3.0.dump +++ b/libraries/test_data/src/test/assets/extractordumps/mp3/test-cbr-info-header.mp3.0.dump @@ -2,9 +2,9 @@ seekMap: isSeekable = true duration = 1044875 getPosition(0) = [[timeUs=0, position=227]] - getPosition(1) = [[timeUs=1, position=227]] - getPosition(522437) = [[timeUs=522437, position=4582]] - getPosition(1044875) = [[timeUs=1044875, position=8585]] + getPosition(1) = [[timeUs=0, position=227], [timeUs=26000, position=435]] + getPosition(522437) = [[timeUs=520000, position=4387], [timeUs=546000, position=4595]] + getPosition(1044875) = [[timeUs=1018875, position=8378]] numberOfTracks = 1 track 0: total output bytes = 8359 diff --git a/libraries/test_data/src/test/assets/extractordumps/mp3/test-cbr-info-header.mp3.1.dump b/libraries/test_data/src/test/assets/extractordumps/mp3/test-cbr-info-header.mp3.1.dump index c397158af2..5868123166 100644 --- a/libraries/test_data/src/test/assets/extractordumps/mp3/test-cbr-info-header.mp3.1.dump +++ b/libraries/test_data/src/test/assets/extractordumps/mp3/test-cbr-info-header.mp3.1.dump @@ -2,13 +2,13 @@ seekMap: isSeekable = true duration = 1044875 getPosition(0) = [[timeUs=0, position=227]] - getPosition(1) = [[timeUs=1, position=227]] - getPosition(522437) = [[timeUs=522437, position=4582]] - getPosition(1044875) = [[timeUs=1044875, position=8585]] + getPosition(1) = [[timeUs=0, position=227], [timeUs=26000, position=435]] + getPosition(522437) = [[timeUs=520000, position=4387], [timeUs=546000, position=4595]] + getPosition(1044875) = [[timeUs=1018875, position=8378]] numberOfTracks = 1 track 0: - total output bytes = 5434 - sample count = 26 + total output bytes = 5643 + sample count = 27 format 0: sampleMimeType = audio/mpeg maxInputSize = 4096 @@ -18,107 +18,111 @@ track 0: encoderPadding = 1404 metadata = entries=[TSSE: description=null: values=[Lavf58.45.100]] sample 0: - time = 344808 + time = 339500 + flags = 1 + data = length 209, hash 6CCBBB3B + sample 1: + time = 365622 flags = 1 data = length 209, hash 34191E1 - sample 1: - time = 370930 + sample 2: + time = 391744 flags = 1 data = length 209, hash 57323ED7 - sample 2: - time = 397052 + sample 3: + time = 417867 flags = 1 data = length 209, hash 75618CF3 - sample 3: - time = 423175 + sample 4: + time = 443989 flags = 1 data = length 209, hash 784C973B - sample 4: - time = 449297 + sample 5: + time = 470112 flags = 1 data = length 209, hash 49106390 - sample 5: - time = 475420 + sample 6: + time = 496234 flags = 1 data = length 209, hash 70F6A563 - sample 6: - time = 501542 + sample 7: + time = 522357 flags = 1 data = length 209, hash 721882B0 - sample 7: - time = 527665 + sample 8: + time = 548479 flags = 1 data = length 209, hash 81C62AEE - sample 8: - time = 553787 + sample 9: + time = 574602 flags = 1 data = length 209, hash 16D22463 - sample 9: - time = 579910 + sample 10: + time = 600724 flags = 1 data = length 209, hash 47033534 - sample 10: - time = 606032 + sample 11: + time = 626846 flags = 1 data = length 209, hash CECB37A6 - sample 11: - time = 632154 + sample 12: + time = 652969 flags = 1 data = length 209, hash 6C9C307B - sample 12: - time = 658277 + sample 13: + time = 679091 flags = 1 data = length 209, hash 3EB1A364 - sample 13: - time = 684399 + sample 14: + time = 705214 flags = 1 data = length 209, hash 30962500 - sample 14: - time = 710522 + sample 15: + time = 731336 flags = 1 data = length 209, hash 2C5CCBB7 - sample 15: - time = 736644 + sample 16: + time = 757459 flags = 1 data = length 209, hash F9CB9E37 - sample 16: - time = 762767 + sample 17: + time = 783581 flags = 1 data = length 209, hash F75BC8C0 - sample 17: - time = 788889 + sample 18: + time = 809704 flags = 1 data = length 209, hash D00ED607 - sample 18: - time = 815012 + sample 19: + time = 835826 flags = 1 data = length 209, hash B4338395 - sample 19: - time = 841134 + sample 20: + time = 861948 flags = 1 data = length 209, hash E3E838A0 - sample 20: - time = 867256 + sample 21: + time = 888071 flags = 1 data = length 209, hash 2B0CF78 - sample 21: - time = 893379 + sample 22: + time = 914193 flags = 1 data = length 209, hash 31906FA9 - sample 22: - time = 919501 + sample 23: + time = 940316 flags = 1 data = length 209, hash C92FC08F - sample 23: - time = 945624 + sample 24: + time = 966438 flags = 1 data = length 209, hash 7C89994 - sample 24: - time = 971746 + sample 25: + time = 992561 flags = 1 data = length 209, hash EC37743B - sample 25: - time = 997869 + sample 26: + time = 1018683 flags = 1 data = length 209, hash C974F6FB tracksEnded = true diff --git a/libraries/test_data/src/test/assets/extractordumps/mp3/test-cbr-info-header.mp3.2.dump b/libraries/test_data/src/test/assets/extractordumps/mp3/test-cbr-info-header.mp3.2.dump index 120494f411..25a2640d65 100644 --- a/libraries/test_data/src/test/assets/extractordumps/mp3/test-cbr-info-header.mp3.2.dump +++ b/libraries/test_data/src/test/assets/extractordumps/mp3/test-cbr-info-header.mp3.2.dump @@ -2,13 +2,13 @@ seekMap: isSeekable = true duration = 1044875 getPosition(0) = [[timeUs=0, position=227]] - getPosition(1) = [[timeUs=1, position=227]] - getPosition(522437) = [[timeUs=522437, position=4582]] - getPosition(1044875) = [[timeUs=1044875, position=8585]] + getPosition(1) = [[timeUs=0, position=227], [timeUs=26000, position=435]] + getPosition(522437) = [[timeUs=520000, position=4387], [timeUs=546000, position=4595]] + getPosition(1044875) = [[timeUs=1018875, position=8378]] numberOfTracks = 1 track 0: - total output bytes = 2717 - sample count = 13 + total output bytes = 2926 + sample count = 14 format 0: sampleMimeType = audio/mpeg maxInputSize = 4096 @@ -18,55 +18,59 @@ track 0: encoderPadding = 1404 metadata = entries=[TSSE: description=null: values=[Lavf58.45.100]] sample 0: - time = 679168 + time = 679125 + flags = 1 + data = length 209, hash 3EB1A364 + sample 1: + time = 705247 flags = 1 data = length 209, hash 30962500 - sample 1: - time = 705290 + sample 2: + time = 731369 flags = 1 data = length 209, hash 2C5CCBB7 - sample 2: - time = 731412 + sample 3: + time = 757492 flags = 1 data = length 209, hash F9CB9E37 - sample 3: - time = 757535 + sample 4: + time = 783614 flags = 1 data = length 209, hash F75BC8C0 - sample 4: - time = 783657 + sample 5: + time = 809737 flags = 1 data = length 209, hash D00ED607 - sample 5: - time = 809780 + sample 6: + time = 835859 flags = 1 data = length 209, hash B4338395 - sample 6: - time = 835902 + sample 7: + time = 861982 flags = 1 data = length 209, hash E3E838A0 - sample 7: - time = 862025 + sample 8: + time = 888104 flags = 1 data = length 209, hash 2B0CF78 - sample 8: - time = 888147 + sample 9: + time = 914227 flags = 1 data = length 209, hash 31906FA9 - sample 9: - time = 914270 + sample 10: + time = 940349 flags = 1 data = length 209, hash C92FC08F - sample 10: - time = 940392 + sample 11: + time = 966471 flags = 1 data = length 209, hash 7C89994 - sample 11: - time = 966514 + sample 12: + time = 992594 flags = 1 data = length 209, hash EC37743B - sample 12: - time = 992637 + sample 13: + time = 1018716 flags = 1 data = length 209, hash C974F6FB tracksEnded = true diff --git a/libraries/test_data/src/test/assets/extractordumps/mp3/test-cbr-info-header.mp3.3.dump b/libraries/test_data/src/test/assets/extractordumps/mp3/test-cbr-info-header.mp3.3.dump index 571c174f79..f7ea250468 100644 --- a/libraries/test_data/src/test/assets/extractordumps/mp3/test-cbr-info-header.mp3.3.dump +++ b/libraries/test_data/src/test/assets/extractordumps/mp3/test-cbr-info-header.mp3.3.dump @@ -2,9 +2,9 @@ seekMap: isSeekable = true duration = 1044875 getPosition(0) = [[timeUs=0, position=227]] - getPosition(1) = [[timeUs=1, position=227]] - getPosition(522437) = [[timeUs=522437, position=4582]] - getPosition(1044875) = [[timeUs=1044875, position=8585]] + getPosition(1) = [[timeUs=0, position=227], [timeUs=26000, position=435]] + getPosition(522437) = [[timeUs=520000, position=4387], [timeUs=546000, position=4595]] + getPosition(1044875) = [[timeUs=1018875, position=8378]] numberOfTracks = 1 track 0: total output bytes = 0 diff --git a/libraries/test_data/src/test/assets/extractordumps/mp3/test-cbr-info-header.mp3.unknown_length.dump b/libraries/test_data/src/test/assets/extractordumps/mp3/test-cbr-info-header.mp3.unknown_length.dump index 04e949490d..86ac2aad9f 100644 --- a/libraries/test_data/src/test/assets/extractordumps/mp3/test-cbr-info-header.mp3.unknown_length.dump +++ b/libraries/test_data/src/test/assets/extractordumps/mp3/test-cbr-info-header.mp3.unknown_length.dump @@ -2,9 +2,9 @@ seekMap: isSeekable = true duration = 1044875 getPosition(0) = [[timeUs=0, position=227]] - getPosition(1) = [[timeUs=1, position=227]] - getPosition(522437) = [[timeUs=522437, position=4582]] - getPosition(1044875) = [[timeUs=1044875, position=8585]] + getPosition(1) = [[timeUs=0, position=227], [timeUs=26000, position=435]] + getPosition(522437) = [[timeUs=520000, position=4387], [timeUs=546000, position=4595]] + getPosition(1044875) = [[timeUs=1018875, position=8378]] numberOfTracks = 1 track 0: total output bytes = 8359