Minor Webvtt parsing tweaks

This commit is contained in:
Oliver Woodman 2014-11-06 19:27:28 +00:00
parent d2e73dd566
commit eccf8d7924

View File

@ -53,8 +53,13 @@ public class WebvttParser implements SubtitleParser {
private static final long SAMPLING_RATE = 90; private static final long SAMPLING_RATE = 90;
private static final String WEBVTT_METADATA_HEADER_STRING = "\\S*[:=]\\S*";
private static final Pattern WEBVTT_METADATA_HEADER =
Pattern.compile(WEBVTT_METADATA_HEADER_STRING);
private static final String WEBVTT_TIMESTAMP_STRING = "(\\d+:)?[0-5]\\d:[0-5]\\d\\.\\d{3}"; private static final String WEBVTT_TIMESTAMP_STRING = "(\\d+:)?[0-5]\\d:[0-5]\\d\\.\\d{3}";
private static final Pattern WEBVTT_TIMESTAMP = Pattern.compile(WEBVTT_TIMESTAMP_STRING); private static final Pattern WEBVTT_TIMESTAMP = Pattern.compile(WEBVTT_TIMESTAMP_STRING);
private static final Pattern MEDIA_TIMESTAMP_OFFSET = Pattern.compile(OFFSET + "\\d+"); private static final Pattern MEDIA_TIMESTAMP_OFFSET = Pattern.compile(OFFSET + "\\d+");
private static final Pattern MEDIA_TIMESTAMP = Pattern.compile("MPEGTS:\\d+"); private static final Pattern MEDIA_TIMESTAMP = Pattern.compile("MPEGTS:\\d+");
@ -90,30 +95,33 @@ public class WebvttParser implements SubtitleParser {
throw new ParserException("Expected WEBVTT. Got " + line); throw new ParserException("Expected WEBVTT. Got " + line);
} }
// after "WEBVTT" there should be either an empty line or an "X-TIMESTAMP-MAP" line and then // parse the remainder of the header
// and empty line while (true) {
line = webvttData.readLine(); line = webvttData.readLine();
if (!line.isEmpty()) { if (line == null) {
if (!line.startsWith("X-TIMESTAMP-MAP")) { // we reached EOF before finishing the header
throw new ParserException("Expected an empty line or X-TIMESTAMP-MAP. Got " + line); throw new ParserException("Expected an empty line after webvtt header");
} else if (line.isEmpty()) {
// we've read the newline that separates the header from the body
break;
} }
// parse the media timestamp Matcher matcher = WEBVTT_METADATA_HEADER.matcher(line);
Matcher matcher = MEDIA_TIMESTAMP.matcher(line);
if (!matcher.find()) { if (!matcher.find()) {
throw new ParserException("X-TIMESTAMP-MAP doesn't contain media timestmap: " + line); throw new ParserException("Expected webvtt metadata header; got: " + line);
}
if (line.startsWith("X-TIMESTAMP-MAP")) {
// parse the media timestamp
Matcher timestampMatcher = MEDIA_TIMESTAMP.matcher(line);
if (!timestampMatcher.find()) {
throw new ParserException("X-TIMESTAMP-MAP doesn't contain media timestamp: " + line);
} else { } else {
mediaTimestampUs = (Long.parseLong(matcher.group().substring(7)) * 1000) / SAMPLING_RATE mediaTimestampUs = (Long.parseLong(timestampMatcher.group().substring(7)) * 1000)
- mediaTimestampOffsetUs; / SAMPLING_RATE - mediaTimestampOffsetUs;
} }
mediaTimestampUs = getAdjustedStartTime(mediaTimestampUs); mediaTimestampUs = getAdjustedStartTime(mediaTimestampUs);
// read in the next line (which should be an empty line)
line = webvttData.readLine();
} }
if (!line.isEmpty()) {
throw new ParserException("Expected an empty line after WEBVTT or X-TIMESTAMP-MAP. Got "
+ line);
} }
// process the cues and text // process the cues and text