Detect WebVTT file header according to the spec:

1. An optional U+FEFF BYTE ORDER MARK (BOM) character.
2. The string "WEBVTT".
3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER
   TABULATION (tab) character followed by any number of characters that
   are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters.
4. Exactly one WebVTT line terminators to terminate the line with the file
   magic and separate it from the rest of the body.

Issue: #580
This commit is contained in:
Oliver Woodman 2015-06-26 14:26:31 +01:00
parent 712756c3e8
commit a1ab1fc2a2

View File

@ -44,6 +44,10 @@ public class WebvttParser implements SubtitleParser {
private static final long SAMPLING_RATE = 90;
private static final String WEBVTT_FILE_HEADER_STRING = "^\uFEFF?WEBVTT((\\u0020|\u0009).*)?$";
private static final Pattern WEBVTT_FILE_HEADER =
Pattern.compile(WEBVTT_FILE_HEADER_STRING);
private static final String WEBVTT_METADATA_HEADER_STRING = "\\S*[:=]\\S*";
private static final Pattern WEBVTT_METADATA_HEADER =
Pattern.compile(WEBVTT_METADATA_HEADER_STRING);
@ -116,7 +120,7 @@ public class WebvttParser implements SubtitleParser {
}
}
if (!line.equals("WEBVTT") && !line.equals("\uFEFFWEBVTT")) {
if (!WEBVTT_FILE_HEADER.matcher(line).matches()) {
throw new ParserException("Expected WEBVTT. Got " + line);
}