mirror of
https://github.com/androidx/media.git
synced 2025-04-30 06:46:50 +08:00
Restrict SubRip and WebVTT parsing to exactly 3 decimal places
We previously parsed an arbitrary number of decimal places, but assumed the value was in milliseconds, which doesn't make sense if there is greater or fewer than 3. This change restricts the parsing to match exactly 3, meaning the millisecond assumption is always true. The WebVTT spec requires there to be exactly 3 decimal places: https://www.w3.org/TR/webvtt1/#webvtt-timestamp The SubRip spec is less clearly defined, but the Wikipedia article defines it as having exactly 3 decimal places (https://en.wikipedia.org/wiki/SubRip#Format) and ExoPlayer has always assumed 3 decimal places (anything else is already handled incorrectly), so this change just ensures we don't show subtitles at the wrong time. Issue: androidx/media#1997 PiperOrigin-RevId: 712885023
This commit is contained in:
parent
9ac91aaadd
commit
31e5142b72
@ -26,6 +26,10 @@
|
|||||||
* Text:
|
* Text:
|
||||||
* TTML: Add support for referencing `tts:origin` and `tts:extent` via
|
* TTML: Add support for referencing `tts:origin` and `tts:extent` via
|
||||||
`style` ([#2953](https://github.com/google/ExoPlayer/issues/2953)).
|
`style` ([#2953](https://github.com/google/ExoPlayer/issues/2953)).
|
||||||
|
* Restrict WebVTT and SubRip timestamps to exactly 3 decimal places.
|
||||||
|
Previously we incorrectly parsed any number of decimal places but always
|
||||||
|
assumed the value was in milliseconds, leading to incorrect timestamps
|
||||||
|
([#1997](https://github.com/androidx/media/issues/1997)).
|
||||||
* Metadata:
|
* Metadata:
|
||||||
* Image:
|
* Image:
|
||||||
* DataSource:
|
* DataSource:
|
||||||
|
@ -60,7 +60,7 @@ public final class SubripParser implements SubtitleParser {
|
|||||||
private static final String TAG = "SubripParser";
|
private static final String TAG = "SubripParser";
|
||||||
|
|
||||||
// Some SRT files don't include hours or milliseconds in the timecode, so we use optional groups.
|
// Some SRT files don't include hours or milliseconds in the timecode, so we use optional groups.
|
||||||
private static final String SUBRIP_TIMECODE = "(?:(\\d+):)?(\\d+):(\\d+)(?:,(\\d+))?";
|
private static final String SUBRIP_TIMECODE = "(?:(\\d+):)?(\\d+):(\\d+)(?:,(\\d{3}))?";
|
||||||
private static final Pattern SUBRIP_TIMING_LINE =
|
private static final Pattern SUBRIP_TIMING_LINE =
|
||||||
Pattern.compile("\\s*(" + SUBRIP_TIMECODE + ")\\s*-->\\s*(" + SUBRIP_TIMECODE + ")\\s*");
|
Pattern.compile("\\s*(" + SUBRIP_TIMECODE + ")\\s*-->\\s*(" + SUBRIP_TIMECODE + ")\\s*");
|
||||||
|
|
||||||
|
@ -347,7 +347,7 @@ public final class WebvttCueParser {
|
|||||||
WebvttParserUtil.parseTimestampUs(Assertions.checkNotNull(cueHeaderMatcher.group(1)));
|
WebvttParserUtil.parseTimestampUs(Assertions.checkNotNull(cueHeaderMatcher.group(1)));
|
||||||
builder.endTimeUs =
|
builder.endTimeUs =
|
||||||
WebvttParserUtil.parseTimestampUs(Assertions.checkNotNull(cueHeaderMatcher.group(2)));
|
WebvttParserUtil.parseTimestampUs(Assertions.checkNotNull(cueHeaderMatcher.group(2)));
|
||||||
} catch (NumberFormatException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
Log.w(TAG, "Skipping cue with bad header: " + cueHeaderMatcher.group());
|
Log.w(TAG, "Skipping cue with bad header: " + cueHeaderMatcher.group());
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -62,9 +62,9 @@ public final class WebvttParserUtil {
|
|||||||
*
|
*
|
||||||
* @param timestamp The timestamp string.
|
* @param timestamp The timestamp string.
|
||||||
* @return The parsed timestamp in microseconds.
|
* @return The parsed timestamp in microseconds.
|
||||||
* @throws NumberFormatException If the timestamp could not be parsed.
|
* @throws IllegalArgumentException If the timestamp could not be parsed.
|
||||||
*/
|
*/
|
||||||
public static long parseTimestampUs(String timestamp) throws NumberFormatException {
|
public static long parseTimestampUs(String timestamp) {
|
||||||
long value = 0;
|
long value = 0;
|
||||||
String[] parts = Util.splitAtFirst(timestamp, "\\.");
|
String[] parts = Util.splitAtFirst(timestamp, "\\.");
|
||||||
String[] subparts = Util.split(parts[0], ":");
|
String[] subparts = Util.split(parts[0], ":");
|
||||||
@ -73,7 +73,12 @@ public final class WebvttParserUtil {
|
|||||||
}
|
}
|
||||||
value *= 1000;
|
value *= 1000;
|
||||||
if (parts.length == 2) {
|
if (parts.length == 2) {
|
||||||
value += Long.parseLong(parts[1]);
|
String millis = parts[1].trim();
|
||||||
|
if (millis.length() == 3) {
|
||||||
|
value += Long.parseLong(millis);
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("Expected 3 decimal places, got: " + millis);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return value * 1000;
|
return value * 1000;
|
||||||
}
|
}
|
||||||
|
@ -26,6 +26,7 @@ import androidx.media3.test.utils.TestUtil;
|
|||||||
import androidx.test.core.app.ApplicationProvider;
|
import androidx.test.core.app.ApplicationProvider;
|
||||||
import androidx.test.ext.junit.runners.AndroidJUnit4;
|
import androidx.test.ext.junit.runners.AndroidJUnit4;
|
||||||
import com.google.common.collect.ImmutableList;
|
import com.google.common.collect.ImmutableList;
|
||||||
|
import com.google.common.collect.Iterables;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@ -51,6 +52,7 @@ public final class SubripParserTest {
|
|||||||
private static final String TYPICAL_WITH_TAGS = "media/subrip/typical_with_tags";
|
private static final String TYPICAL_WITH_TAGS = "media/subrip/typical_with_tags";
|
||||||
private static final String TYPICAL_NO_HOURS_AND_MILLIS =
|
private static final String TYPICAL_NO_HOURS_AND_MILLIS =
|
||||||
"media/subrip/typical_no_hours_and_millis";
|
"media/subrip/typical_no_hours_and_millis";
|
||||||
|
private static final String TYPICAL_BAD_TIMESTAMPS = "media/subrip/typical_bad_timestamps";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void cueReplacementBehaviorIsMerge() {
|
public void cueReplacementBehaviorIsMerge() {
|
||||||
@ -288,6 +290,17 @@ public final class SubripParserTest {
|
|||||||
assertTypicalCue3(allCues.get(2));
|
assertTypicalCue3(allCues.get(2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void parseTypicalBadTimestamps() throws IOException {
|
||||||
|
SubripParser parser = new SubripParser();
|
||||||
|
byte[] bytes =
|
||||||
|
TestUtil.getByteArray(ApplicationProvider.getApplicationContext(), TYPICAL_BAD_TIMESTAMPS);
|
||||||
|
|
||||||
|
ImmutableList<CuesWithTiming> allCues = parseAllCues(parser, bytes);
|
||||||
|
|
||||||
|
assertTypicalCue1(Iterables.getOnlyElement(allCues));
|
||||||
|
}
|
||||||
|
|
||||||
private static ImmutableList<CuesWithTiming> parseAllCues(SubtitleParser parser, byte[] data) {
|
private static ImmutableList<CuesWithTiming> parseAllCues(SubtitleParser parser, byte[] data) {
|
||||||
ImmutableList.Builder<CuesWithTiming> cues = ImmutableList.builder();
|
ImmutableList.Builder<CuesWithTiming> cues = ImmutableList.builder();
|
||||||
parser.parse(data, OutputOptions.allCues(), cues::add);
|
parser.parse(data, OutputOptions.allCues(), cues::add);
|
||||||
|
@ -0,0 +1,11 @@
|
|||||||
|
1
|
||||||
|
00:00:00,000 --> 00:00:01,234
|
||||||
|
This is the first subtitle.
|
||||||
|
|
||||||
|
2
|
||||||
|
00:00:02,345 --> 00:00:03,4560
|
||||||
|
This should be discarded (too many decimal places).
|
||||||
|
|
||||||
|
3
|
||||||
|
02:00:04,567 --> 02:00:08,90
|
||||||
|
This should be discarded (too few decimal places).
|
@ -12,3 +12,9 @@ This should be discarded (too many dots).
|
|||||||
|
|
||||||
00:06.789 --> not-a-timestamp
|
00:06.789 --> not-a-timestamp
|
||||||
This should be discarded (not a timestamp).
|
This should be discarded (not a timestamp).
|
||||||
|
|
||||||
|
00:07.789 --> 00:08.7890
|
||||||
|
This should be discarded (too many decimal places).
|
||||||
|
|
||||||
|
00:09.789 --> 00:10.78
|
||||||
|
This should be discarded (too few decimal places).
|
||||||
|
Loading…
x
Reference in New Issue
Block a user