Restrict SubRip and WebVTT parsing to exactly 3 decimal places

We previously parsed an arbitrary number of decimal places, but assumed
the value was in milliseconds, which doesn't make sense if there is
greater or fewer than 3. This change restricts the parsing to match
exactly 3, meaning the millisecond assumption is always true.

The WebVTT spec requires there to be exactly 3 decimal places:
https://www.w3.org/TR/webvtt1/#webvtt-timestamp

The SubRip spec is less clearly defined, but the Wikipedia article
defines it as having exactly 3 decimal places
(https://en.wikipedia.org/wiki/SubRip#Format) and ExoPlayer has always
assumed 3 decimal places (anything else is already handled incorrectly),
so this change just ensures we don't show subtitles at the wrong time.

Issue: androidx/media#1997
PiperOrigin-RevId: 712885023
This commit is contained in:
ibaker 2025-01-07 06:11:03 -08:00 committed by Copybara-Service
parent 9ac91aaadd
commit 31e5142b72
7 changed files with 44 additions and 5 deletions

View File

@ -26,6 +26,10 @@
* Text: * Text:
* TTML: Add support for referencing `tts:origin` and `tts:extent` via * TTML: Add support for referencing `tts:origin` and `tts:extent` via
`style` ([#2953](https://github.com/google/ExoPlayer/issues/2953)). `style` ([#2953](https://github.com/google/ExoPlayer/issues/2953)).
* Restrict WebVTT and SubRip timestamps to exactly 3 decimal places.
Previously we incorrectly parsed any number of decimal places but always
assumed the value was in milliseconds, leading to incorrect timestamps
([#1997](https://github.com/androidx/media/issues/1997)).
* Metadata: * Metadata:
* Image: * Image:
* DataSource: * DataSource:

View File

@ -60,7 +60,7 @@ public final class SubripParser implements SubtitleParser {
private static final String TAG = "SubripParser"; private static final String TAG = "SubripParser";
// Some SRT files don't include hours or milliseconds in the timecode, so we use optional groups. // Some SRT files don't include hours or milliseconds in the timecode, so we use optional groups.
private static final String SUBRIP_TIMECODE = "(?:(\\d+):)?(\\d+):(\\d+)(?:,(\\d+))?"; private static final String SUBRIP_TIMECODE = "(?:(\\d+):)?(\\d+):(\\d+)(?:,(\\d{3}))?";
private static final Pattern SUBRIP_TIMING_LINE = private static final Pattern SUBRIP_TIMING_LINE =
Pattern.compile("\\s*(" + SUBRIP_TIMECODE + ")\\s*-->\\s*(" + SUBRIP_TIMECODE + ")\\s*"); Pattern.compile("\\s*(" + SUBRIP_TIMECODE + ")\\s*-->\\s*(" + SUBRIP_TIMECODE + ")\\s*");

View File

@ -347,7 +347,7 @@ public final class WebvttCueParser {
WebvttParserUtil.parseTimestampUs(Assertions.checkNotNull(cueHeaderMatcher.group(1))); WebvttParserUtil.parseTimestampUs(Assertions.checkNotNull(cueHeaderMatcher.group(1)));
builder.endTimeUs = builder.endTimeUs =
WebvttParserUtil.parseTimestampUs(Assertions.checkNotNull(cueHeaderMatcher.group(2))); WebvttParserUtil.parseTimestampUs(Assertions.checkNotNull(cueHeaderMatcher.group(2)));
} catch (NumberFormatException e) { } catch (IllegalArgumentException e) {
Log.w(TAG, "Skipping cue with bad header: " + cueHeaderMatcher.group()); Log.w(TAG, "Skipping cue with bad header: " + cueHeaderMatcher.group());
return null; return null;
} }

View File

@ -62,9 +62,9 @@ public final class WebvttParserUtil {
* *
* @param timestamp The timestamp string. * @param timestamp The timestamp string.
* @return The parsed timestamp in microseconds. * @return The parsed timestamp in microseconds.
* @throws NumberFormatException If the timestamp could not be parsed. * @throws IllegalArgumentException If the timestamp could not be parsed.
*/ */
public static long parseTimestampUs(String timestamp) throws NumberFormatException { public static long parseTimestampUs(String timestamp) {
long value = 0; long value = 0;
String[] parts = Util.splitAtFirst(timestamp, "\\."); String[] parts = Util.splitAtFirst(timestamp, "\\.");
String[] subparts = Util.split(parts[0], ":"); String[] subparts = Util.split(parts[0], ":");
@ -73,7 +73,12 @@ public final class WebvttParserUtil {
} }
value *= 1000; value *= 1000;
if (parts.length == 2) { if (parts.length == 2) {
value += Long.parseLong(parts[1]); String millis = parts[1].trim();
if (millis.length() == 3) {
value += Long.parseLong(millis);
} else {
throw new IllegalArgumentException("Expected 3 decimal places, got: " + millis);
}
} }
return value * 1000; return value * 1000;
} }

View File

@ -26,6 +26,7 @@ import androidx.media3.test.utils.TestUtil;
import androidx.test.core.app.ApplicationProvider; import androidx.test.core.app.ApplicationProvider;
import androidx.test.ext.junit.runners.AndroidJUnit4; import androidx.test.ext.junit.runners.AndroidJUnit4;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -51,6 +52,7 @@ public final class SubripParserTest {
private static final String TYPICAL_WITH_TAGS = "media/subrip/typical_with_tags"; private static final String TYPICAL_WITH_TAGS = "media/subrip/typical_with_tags";
private static final String TYPICAL_NO_HOURS_AND_MILLIS = private static final String TYPICAL_NO_HOURS_AND_MILLIS =
"media/subrip/typical_no_hours_and_millis"; "media/subrip/typical_no_hours_and_millis";
private static final String TYPICAL_BAD_TIMESTAMPS = "media/subrip/typical_bad_timestamps";
@Test @Test
public void cueReplacementBehaviorIsMerge() { public void cueReplacementBehaviorIsMerge() {
@ -288,6 +290,17 @@ public final class SubripParserTest {
assertTypicalCue3(allCues.get(2)); assertTypicalCue3(allCues.get(2));
} }
@Test
public void parseTypicalBadTimestamps() throws IOException {
SubripParser parser = new SubripParser();
byte[] bytes =
TestUtil.getByteArray(ApplicationProvider.getApplicationContext(), TYPICAL_BAD_TIMESTAMPS);
ImmutableList<CuesWithTiming> allCues = parseAllCues(parser, bytes);
assertTypicalCue1(Iterables.getOnlyElement(allCues));
}
private static ImmutableList<CuesWithTiming> parseAllCues(SubtitleParser parser, byte[] data) { private static ImmutableList<CuesWithTiming> parseAllCues(SubtitleParser parser, byte[] data) {
ImmutableList.Builder<CuesWithTiming> cues = ImmutableList.builder(); ImmutableList.Builder<CuesWithTiming> cues = ImmutableList.builder();
parser.parse(data, OutputOptions.allCues(), cues::add); parser.parse(data, OutputOptions.allCues(), cues::add);

View File

@ -0,0 +1,11 @@
1
00:00:00,000 --> 00:00:01,234
This is the first subtitle.
2
00:00:02,345 --> 00:00:03,4560
This should be discarded (too many decimal places).
3
02:00:04,567 --> 02:00:08,90
This should be discarded (too few decimal places).

View File

@ -12,3 +12,9 @@ This should be discarded (too many dots).
00:06.789 --> not-a-timestamp 00:06.789 --> not-a-timestamp
This should be discarded (not a timestamp). This should be discarded (not a timestamp).
00:07.789 --> 00:08.7890
This should be discarded (too many decimal places).
00:09.789 --> 00:10.78
This should be discarded (too few decimal places).