From 72f093c4f6d66cea021756e1cbe931b52f55a79e Mon Sep 17 00:00:00 2001 From: Oliver Woodman Date: Wed, 25 Nov 2015 17:05:20 +0000 Subject: [PATCH] WebVTT parser improvements. * Split findNextCueHeader and validateWebVttHeader into static methods. This is a step toward WebVTT in HLS, where we'll need to re-use these to peek at the top of the WebVTT file (they'll be moved into a util class). * Made parser robust against bad cue headers + added a test. * Removed spurious looking assertion in WebvttSubtitle. --- .../assets/webvtt/with_bad_cue_header | 10 ++ .../text/webvtt/WebvttParserTest.java | 15 +++ .../exoplayer/text/webvtt/WebvttParser.java | 110 ++++++++++-------- .../exoplayer/text/webvtt/WebvttSubtitle.java | 1 - 4 files changed, 87 insertions(+), 49 deletions(-) create mode 100644 library/src/androidTest/assets/webvtt/with_bad_cue_header diff --git a/library/src/androidTest/assets/webvtt/with_bad_cue_header b/library/src/androidTest/assets/webvtt/with_bad_cue_header new file mode 100644 index 0000000000..0cb4e8099e --- /dev/null +++ b/library/src/androidTest/assets/webvtt/with_bad_cue_header @@ -0,0 +1,10 @@ +WEBVTT # This comment is allowed + +00:00.000 --> 00:01.234 +This is the first subtitle. + +00:02.badbadbadbadbadbad --> 00:03.456 +This is the second subtitle. + +00:04.000 --> 00:05.000 +This is the third subtitle. diff --git a/library/src/androidTest/java/com/google/android/exoplayer/text/webvtt/WebvttParserTest.java b/library/src/androidTest/java/com/google/android/exoplayer/text/webvtt/WebvttParserTest.java index b0d7271c9c..07967e7df7 100644 --- a/library/src/androidTest/java/com/google/android/exoplayer/text/webvtt/WebvttParserTest.java +++ b/library/src/androidTest/java/com/google/android/exoplayer/text/webvtt/WebvttParserTest.java @@ -33,6 +33,7 @@ public class WebvttParserTest extends InstrumentationTestCase { private static final String TYPICAL_WITH_IDS_FILE = "webvtt/typical_with_identifiers"; private static final String TYPICAL_WITH_COMMENTS_FILE = "webvtt/typical_with_comments"; private static final String WITH_POSITIONING_FILE = "webvtt/with_positioning"; + private static final String WITH_BAD_CUE_HEADER_FILE = "webvtt/with_bad_cue_header"; private static final String WITH_TAGS_FILE = "webvtt/with_tags"; private static final String EMPTY_FILE = "webvtt/empty"; @@ -132,6 +133,20 @@ public class WebvttParserTest extends InstrumentationTestCase { Cue.ANCHOR_TYPE_END, 0.1f); } + public void testParseWithBadCueHeader() throws IOException { + WebvttParser parser = new WebvttParser(); + InputStream inputStream = + getInstrumentation().getContext().getResources().getAssets().open(WITH_BAD_CUE_HEADER_FILE); + WebvttSubtitle subtitle = parser.parse(inputStream); + + // test event count + assertEquals(4, subtitle.getEventTimeCount()); + + // test cues + assertCue(subtitle, 0, 0, 1234000, "This is the first subtitle."); + assertCue(subtitle, 2, 4000000, 5000000, "This is the third subtitle."); + } + private static void assertCue(WebvttSubtitle subtitle, int eventTimeIndex, long startTimeUs, int endTimeUs, String text) { assertCue(subtitle, eventTimeIndex, startTimeUs, endTimeUs, text, null, Cue.DIMEN_UNSET, diff --git a/library/src/main/java/com/google/android/exoplayer/text/webvtt/WebvttParser.java b/library/src/main/java/com/google/android/exoplayer/text/webvtt/WebvttParser.java index 08361c5acb..346069c50e 100644 --- a/library/src/main/java/com/google/android/exoplayer/text/webvtt/WebvttParser.java +++ b/library/src/main/java/com/google/android/exoplayer/text/webvtt/WebvttParser.java @@ -23,6 +23,7 @@ import com.google.android.exoplayer.util.MimeTypes; import android.text.Html; import android.text.Layout.Alignment; +import android.text.TextUtils; import android.util.Log; import java.io.BufferedReader; @@ -43,9 +44,9 @@ public final class WebvttParser implements SubtitleParser { private static final String TAG = "WebvttParser"; private static final Pattern HEADER = Pattern.compile("^\uFEFF?WEBVTT((\u0020|\u0009).*)?$"); - private static final Pattern COMMENT_BLOCK = Pattern.compile("^NOTE((\u0020|\u0009).*)?$"); + private static final Pattern COMMENT = Pattern.compile("^NOTE((\u0020|\u0009).*)?$"); private static final Pattern CUE_HEADER = Pattern.compile("^(\\S+)\\s+-->\\s+(\\S+)(.*)?$"); - private static final Pattern CUE_SETTING = Pattern.compile("\\S+?:\\S+"); + private static final Pattern CUE_SETTING = Pattern.compile("(\\S+?):(\\S+)"); private final PositionHolder positionHolder; private final StringBuilder textBuilder; @@ -62,51 +63,26 @@ public final class WebvttParser implements SubtitleParser { @Override public final WebvttSubtitle parse(InputStream inputStream) throws IOException { - ArrayList subtitles = new ArrayList<>(); - BufferedReader webvttData = new BufferedReader(new InputStreamReader(inputStream, C.UTF8_NAME)); - String line; - // File should start with "WEBVTT". - line = webvttData.readLine(); - if (line == null || !HEADER.matcher(line).matches()) { - throw new ParserException("Expected WEBVTT. Got " + line); - } + // Validate the first line of the header, and skip the remainder. + validateWebvttHeaderLine(webvttData); + while (!TextUtils.isEmpty(webvttData.readLine())) {} - // Parse the remainder of the header. - while (true) { - line = webvttData.readLine(); - if (line == null) { - // We reached EOF before finishing the header. - throw new ParserException("Expected an empty line after webvtt header"); - } else if (line.isEmpty()) { - // We read the newline that separates the header from the body. - break; - } - } - - // Process the cues and text. - while ((line = webvttData.readLine()) != null) { - // Skip a comment block, if present. - Matcher matcher = COMMENT_BLOCK.matcher(line); - if (matcher.find()) { - // Skip until the end of the comment block. - while ((line = webvttData.readLine()) != null && !line.isEmpty()) { - // Ignore comment text. - } + ArrayList subtitles = new ArrayList<>(); + Matcher cueHeaderMatcher; + while ((cueHeaderMatcher = findNextCueHeader(webvttData)) != null) { + long cueStartTime; + long cueEndTime; + try { + // Parse the cue start and end times. + cueStartTime = parseTimestampUs(cueHeaderMatcher.group(1)); + cueEndTime = parseTimestampUs(cueHeaderMatcher.group(2)); + } catch (NumberFormatException e) { + Log.w(TAG, "Skipping cue with bad header: " + cueHeaderMatcher.group()); continue; } - // Skip anything other than a cue header. - matcher = CUE_HEADER.matcher(line); - if (!matcher.matches()) { - continue; - } - - // Parse the cue start and end times. - long cueStartTime = parseTimestampUs(matcher.group(1)); - long cueEndTime = parseTimestampUs(matcher.group(2)); - // Default cue settings. Alignment cueTextAlignment = null; float cueLine = Cue.DIMEN_UNSET; @@ -117,12 +93,10 @@ public final class WebvttParser implements SubtitleParser { float cueWidth = Cue.DIMEN_UNSET; // Parse the cue settings list. - matcher = CUE_SETTING.matcher(matcher.group(3)); - while (matcher.find()) { - String match = matcher.group(); - String[] parts = match.split(":", 2); - String name = parts[0]; - String value = parts[1]; + Matcher cueSettingMatcher = CUE_SETTING.matcher(cueHeaderMatcher.group(3)); + while (cueSettingMatcher.find()) { + String name = cueSettingMatcher.group(1); + String value = cueSettingMatcher.group(2); try { if ("line".equals(name)) { parseLineAttribute(value, positionHolder); @@ -141,7 +115,7 @@ public final class WebvttParser implements SubtitleParser { Log.w(TAG, "Unknown cue setting " + name + ":" + value); } } catch (NumberFormatException e) { - Log.w(TAG, e.getMessage() + ": " + match); + Log.w(TAG, "Skipping bad cue setting: " + cueSettingMatcher.group()); } } @@ -153,6 +127,7 @@ public final class WebvttParser implements SubtitleParser { // Parse the cue text. textBuilder.setLength(0); + String line; while ((line = webvttData.readLine()) != null && !line.isEmpty()) { if (textBuilder.length() > 0) { textBuilder.append("
"); @@ -169,6 +144,45 @@ public final class WebvttParser implements SubtitleParser { return new WebvttSubtitle(subtitles); } + /** + * Reads and validates the first line of a WebVTT file. + * + * @param input The input from which the line should be read. + * @throws ParserException If the line isn't the start of a valid WebVTT file. + * @throws IOException If an error occurs reading from the input. + */ + private static void validateWebvttHeaderLine(BufferedReader input) throws IOException { + String line = input.readLine(); + if (line == null || !HEADER.matcher(line).matches()) { + throw new ParserException("Expected WEBVTT. Got " + line); + } + } + + /** + * Reads lines up to and including the next WebVTT cue header. + * + * @param input The input from which lines should be read. + * @throws IOException If an error occurs reading from the input. + * @return A {@link Matcher} for the WebVTT cue header, or null if the end of the input was + * reached without a cue header being found. In the case that a cue header is found, groups 1, + * 2 and 3 of the returned matcher contain the start time, end time and settings list. + */ + private static Matcher findNextCueHeader(BufferedReader input) throws IOException { + String line; + while ((line = input.readLine()) != null) { + if (COMMENT.matcher(line).matches()) { + // Skip until the end of the comment block. + while ((line = input.readLine()) != null && !line.isEmpty()) {} + } else { + Matcher cueHeaderMatcher = CUE_HEADER.matcher(line); + if (cueHeaderMatcher.matches()) { + return cueHeaderMatcher; + } + } + } + return null; + } + private static long parseTimestampUs(String s) throws NumberFormatException { long value = 0; String[] parts = s.split("\\.", 2); diff --git a/library/src/main/java/com/google/android/exoplayer/text/webvtt/WebvttSubtitle.java b/library/src/main/java/com/google/android/exoplayer/text/webvtt/WebvttSubtitle.java index 1f78a82a7e..309ca1d535 100644 --- a/library/src/main/java/com/google/android/exoplayer/text/webvtt/WebvttSubtitle.java +++ b/library/src/main/java/com/google/android/exoplayer/text/webvtt/WebvttSubtitle.java @@ -56,7 +56,6 @@ public final class WebvttSubtitle implements Subtitle { @Override public int getNextEventTimeIndex(long timeUs) { - Assertions.checkArgument(timeUs >= 0); int index = Util.binarySearchCeil(sortedCueTimesUs, timeUs, false, false); return index < sortedCueTimesUs.length ? index : -1; }