diff --git a/RELEASENOTES.md b/RELEASENOTES.md index b7c4f7a322..697df550a4 100644 --- a/RELEASENOTES.md +++ b/RELEASENOTES.md @@ -26,6 +26,10 @@ * Text: * TTML: Add support for referencing `tts:origin` and `tts:extent` via `style` ([#2953](https://github.com/google/ExoPlayer/issues/2953)). + * Restrict WebVTT and SubRip timestamps to exactly 3 decimal places. + Previously we incorrectly parsed any number of decimal places but always + assumed the value was in milliseconds, leading to incorrect timestamps + ([#1997](https://github.com/androidx/media/issues/1997)). * Metadata: * Image: * DataSource: diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/text/subrip/SubripParser.java b/libraries/extractor/src/main/java/androidx/media3/extractor/text/subrip/SubripParser.java index da98660d3c..db3a8df9a9 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/text/subrip/SubripParser.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/text/subrip/SubripParser.java @@ -60,7 +60,7 @@ public final class SubripParser implements SubtitleParser { private static final String TAG = "SubripParser"; // Some SRT files don't include hours or milliseconds in the timecode, so we use optional groups. - private static final String SUBRIP_TIMECODE = "(?:(\\d+):)?(\\d+):(\\d+)(?:,(\\d+))?"; + private static final String SUBRIP_TIMECODE = "(?:(\\d+):)?(\\d+):(\\d+)(?:,(\\d{3}))?"; private static final Pattern SUBRIP_TIMING_LINE = Pattern.compile("\\s*(" + SUBRIP_TIMECODE + ")\\s*-->\\s*(" + SUBRIP_TIMECODE + ")\\s*"); diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/text/webvtt/WebvttCueParser.java b/libraries/extractor/src/main/java/androidx/media3/extractor/text/webvtt/WebvttCueParser.java index 3b9d048b1f..173037bc39 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/text/webvtt/WebvttCueParser.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/text/webvtt/WebvttCueParser.java @@ -347,7 +347,7 @@ public final class WebvttCueParser { WebvttParserUtil.parseTimestampUs(Assertions.checkNotNull(cueHeaderMatcher.group(1))); builder.endTimeUs = WebvttParserUtil.parseTimestampUs(Assertions.checkNotNull(cueHeaderMatcher.group(2))); - } catch (NumberFormatException e) { + } catch (IllegalArgumentException e) { Log.w(TAG, "Skipping cue with bad header: " + cueHeaderMatcher.group()); return null; } diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/text/webvtt/WebvttParserUtil.java b/libraries/extractor/src/main/java/androidx/media3/extractor/text/webvtt/WebvttParserUtil.java index 1ba28b7fd7..22cfdb60fb 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/text/webvtt/WebvttParserUtil.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/text/webvtt/WebvttParserUtil.java @@ -62,9 +62,9 @@ public final class WebvttParserUtil { * * @param timestamp The timestamp string. * @return The parsed timestamp in microseconds. - * @throws NumberFormatException If the timestamp could not be parsed. + * @throws IllegalArgumentException If the timestamp could not be parsed. */ - public static long parseTimestampUs(String timestamp) throws NumberFormatException { + public static long parseTimestampUs(String timestamp) { long value = 0; String[] parts = Util.splitAtFirst(timestamp, "\\."); String[] subparts = Util.split(parts[0], ":"); @@ -73,7 +73,12 @@ public final class WebvttParserUtil { } value *= 1000; if (parts.length == 2) { - value += Long.parseLong(parts[1]); + String millis = parts[1].trim(); + if (millis.length() == 3) { + value += Long.parseLong(millis); + } else { + throw new IllegalArgumentException("Expected 3 decimal places, got: " + millis); + } } return value * 1000; } diff --git a/libraries/extractor/src/test/java/androidx/media3/extractor/text/subrip/SubripParserTest.java b/libraries/extractor/src/test/java/androidx/media3/extractor/text/subrip/SubripParserTest.java index ee04b90fc1..5988c2885f 100644 --- a/libraries/extractor/src/test/java/androidx/media3/extractor/text/subrip/SubripParserTest.java +++ b/libraries/extractor/src/test/java/androidx/media3/extractor/text/subrip/SubripParserTest.java @@ -26,6 +26,7 @@ import androidx.media3.test.utils.TestUtil; import androidx.test.core.app.ApplicationProvider; import androidx.test.ext.junit.runners.AndroidJUnit4; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -51,6 +52,7 @@ public final class SubripParserTest { private static final String TYPICAL_WITH_TAGS = "media/subrip/typical_with_tags"; private static final String TYPICAL_NO_HOURS_AND_MILLIS = "media/subrip/typical_no_hours_and_millis"; + private static final String TYPICAL_BAD_TIMESTAMPS = "media/subrip/typical_bad_timestamps"; @Test public void cueReplacementBehaviorIsMerge() { @@ -288,6 +290,17 @@ public final class SubripParserTest { assertTypicalCue3(allCues.get(2)); } + @Test + public void parseTypicalBadTimestamps() throws IOException { + SubripParser parser = new SubripParser(); + byte[] bytes = + TestUtil.getByteArray(ApplicationProvider.getApplicationContext(), TYPICAL_BAD_TIMESTAMPS); + + ImmutableList allCues = parseAllCues(parser, bytes); + + assertTypicalCue1(Iterables.getOnlyElement(allCues)); + } + private static ImmutableList parseAllCues(SubtitleParser parser, byte[] data) { ImmutableList.Builder cues = ImmutableList.builder(); parser.parse(data, OutputOptions.allCues(), cues::add); diff --git a/libraries/test_data/src/test/assets/media/subrip/typical_bad_timestamps b/libraries/test_data/src/test/assets/media/subrip/typical_bad_timestamps new file mode 100644 index 0000000000..729614a84b --- /dev/null +++ b/libraries/test_data/src/test/assets/media/subrip/typical_bad_timestamps @@ -0,0 +1,11 @@ +1 +00:00:00,000 --> 00:00:01,234 +This is the first subtitle. + +2 +00:00:02,345 --> 00:00:03,4560 +This should be discarded (too many decimal places). + +3 +02:00:04,567 --> 02:00:08,90 +This should be discarded (too few decimal places). diff --git a/libraries/test_data/src/test/assets/media/webvtt/typical_with_bad_timestamps b/libraries/test_data/src/test/assets/media/webvtt/typical_with_bad_timestamps index bb70a9fa26..fe4b6ec9b3 100644 --- a/libraries/test_data/src/test/assets/media/webvtt/typical_with_bad_timestamps +++ b/libraries/test_data/src/test/assets/media/webvtt/typical_with_bad_timestamps @@ -12,3 +12,9 @@ This should be discarded (too many dots). 00:06.789 --> not-a-timestamp This should be discarded (not a timestamp). + +00:07.789 --> 00:08.7890 +This should be discarded (too many decimal places). + +00:09.789 --> 00:10.78 +This should be discarded (too few decimal places).