From 925795cb4f110d1cb3dcaea29d550c60b5355dc5 Mon Sep 17 00:00:00 2001 From: joli Date: Fri, 11 Sep 2015 02:58:56 +0200 Subject: [PATCH] Improve WebVTT parser according to WebVTT spec - Line parameter - Added support for value and line alignment attributes. - Support negative numbers when line is an absolute number (not a percentage). - Position parameter - Added support for value and position alignment attributes - Added support for WebVTT comment blocks - Percentage values now accept decimal numbers (as webvtt spec states) - Added new WebVTT tests for testing all new implemented features --- .../assets/webvtt/typical_with_comments | 18 ++++ .../assets/webvtt/typical_with_metadata | 22 +++++ .../text/webvtt/WebvttParserTest.java | 87 ++++++++++++++++++ .../exoplayer/text/webvtt/WebvttParser.java | 90 ++++++++++++++----- 4 files changed, 197 insertions(+), 20 deletions(-) create mode 100644 library/src/androidTest/assets/webvtt/typical_with_comments create mode 100644 library/src/androidTest/assets/webvtt/typical_with_metadata diff --git a/library/src/androidTest/assets/webvtt/typical_with_comments b/library/src/androidTest/assets/webvtt/typical_with_comments new file mode 100644 index 0000000000..6d116e2db3 --- /dev/null +++ b/library/src/androidTest/assets/webvtt/typical_with_comments @@ -0,0 +1,18 @@ +WEBVTT + +NOTE +This is a comment block +with multiple lines + +1 +00:00.000 --> 00:01.234 +This is the first subtitle. + +NOTE Single line comment + +2 +00:02.345 --> 00:03.456 +This is the second subtitle. + +NOTE +File ending with a comment diff --git a/library/src/androidTest/assets/webvtt/typical_with_metadata b/library/src/androidTest/assets/webvtt/typical_with_metadata new file mode 100644 index 0000000000..61fa0cc690 --- /dev/null +++ b/library/src/androidTest/assets/webvtt/typical_with_metadata @@ -0,0 +1,22 @@ +WEBVTT + +NOTE Position with percentage and position alignment + +00:00:00.000 --> 00:00:01.234 position:10%,start align:start size:35% +This is the first subtitle. + +NOTE Wrong position provided. It should be provided as +a percentage value + +00:02.345 --> 00:03.456 position:10 align:end size:35% +This is the second subtitle. + +NOTE Line as percentage and line alignment + +00:04.000 --> 00:05.000 line:45%,end align:middle size:35% +This is the third subtitle. + +NOTE Line as absolute negative number and without line alignment + +00:06.000 --> 00:07.000 line:-10 align:middle size:35% +This is the forth subtitle. diff --git a/library/src/androidTest/java/com/google/android/exoplayer/text/webvtt/WebvttParserTest.java b/library/src/androidTest/java/com/google/android/exoplayer/text/webvtt/WebvttParserTest.java index bcbc2e9206..9b643a5b36 100644 --- a/library/src/androidTest/java/com/google/android/exoplayer/text/webvtt/WebvttParserTest.java +++ b/library/src/androidTest/java/com/google/android/exoplayer/text/webvtt/WebvttParserTest.java @@ -16,6 +16,9 @@ package com.google.android.exoplayer.text.webvtt; import android.test.InstrumentationTestCase; +import android.text.Layout; + +import com.google.android.exoplayer.text.Cue; import java.io.IOException; import java.io.InputStream; @@ -28,6 +31,8 @@ public class WebvttParserTest extends InstrumentationTestCase { private static final String TYPICAL_WEBVTT_FILE = "webvtt/typical"; private static final String TYPICAL_WITH_IDS_WEBVTT_FILE = "webvtt/typical_with_identifiers"; private static final String TYPICAL_WITH_TAGS_WEBVTT_FILE = "webvtt/typical_with_tags"; + private static final String TYPICAL_WITH_COMMENTS_WEBVTT_FILE = "webvtt/typical_with_comments"; + private static final String TYPICAL_WITH_METADATA_WEBVTT_FILE = "webvtt/typical_with_metadata"; private static final String LIVE_TYPICAL_WEBVTT_FILE = "webvtt/live_typical"; private static final String EMPTY_WEBVTT_FILE = "webvtt/empty"; @@ -124,6 +129,88 @@ public class WebvttParserTest extends InstrumentationTestCase { assertEquals(7000000, subtitle.getEventTime(7)); } + public void testParseTypicalWithCommentsWebvttFile() throws IOException { + WebvttParser parser = new WebvttParser(); + InputStream inputStream = + getInstrumentation().getContext().getResources().getAssets() + .open(TYPICAL_WITH_COMMENTS_WEBVTT_FILE); + WebvttSubtitle subtitle = parser.parse(inputStream); + + // test event count + assertEquals(4, subtitle.getEventTimeCount()); + + // test first cue + assertEquals(0, subtitle.getEventTime(0)); + assertEquals("This is the first subtitle.", + subtitle.getCues(subtitle.getEventTime(0)).get(0).text.toString()); + assertEquals(1234000, subtitle.getEventTime(1)); + + // test second cue + assertEquals(2345000, subtitle.getEventTime(2)); + assertEquals("This is the second subtitle.", + subtitle.getCues(subtitle.getEventTime(2)).get(0).text.toString()); + assertEquals(3456000, subtitle.getEventTime(3)); + } + + public void testParseTypicalWithMetadataWebvttFile() throws IOException { + WebvttParser parser = new WebvttParser(); + InputStream inputStream = + getInstrumentation().getContext().getResources().getAssets() + .open(TYPICAL_WITH_METADATA_WEBVTT_FILE); + WebvttSubtitle subtitle = parser.parse(inputStream); + + // test event count + assertEquals(8, subtitle.getEventTimeCount()); + + // test first cue + assertEquals(0, subtitle.getEventTime(0)); + assertEquals("This is the first subtitle.", + subtitle.getCues(subtitle.getEventTime(0)).get(0).text.toString()); + assertEquals(10, + subtitle.getCues(subtitle.getEventTime(0)).get(0).position); + assertEquals(Layout.Alignment.ALIGN_NORMAL, + subtitle.getCues(subtitle.getEventTime(0)).get(0).alignment); + assertEquals(35, + subtitle.getCues(subtitle.getEventTime(0)).get(0).size); + assertEquals(1234000, subtitle.getEventTime(1)); + + // test second cue + assertEquals(2345000, subtitle.getEventTime(2)); + assertEquals("This is the second subtitle.", + subtitle.getCues(subtitle.getEventTime(2)).get(0).text.toString()); + assertEquals(Cue.UNSET_VALUE, + subtitle.getCues(subtitle.getEventTime(2)).get(0).position); + assertEquals(Layout.Alignment.ALIGN_OPPOSITE, + subtitle.getCues(subtitle.getEventTime(2)).get(0).alignment); + assertEquals(35, + subtitle.getCues(subtitle.getEventTime(2)).get(0).size); + assertEquals(3456000, subtitle.getEventTime(3)); + + // test third cue + assertEquals(4000000, subtitle.getEventTime(4)); + assertEquals("This is the third subtitle.", + subtitle.getCues(subtitle.getEventTime(4)).get(0).text.toString()); + assertEquals(45, + subtitle.getCues(subtitle.getEventTime(4)).get(0).line); + assertEquals(Layout.Alignment.ALIGN_CENTER, + subtitle.getCues(subtitle.getEventTime(4)).get(0).alignment); + assertEquals(35, + subtitle.getCues(subtitle.getEventTime(4)).get(0).size); + assertEquals(5000000, subtitle.getEventTime(5)); + + // test forth cue + assertEquals(6000000, subtitle.getEventTime(6)); + assertEquals("This is the forth subtitle.", + subtitle.getCues(subtitle.getEventTime(6)).get(0).text.toString()); + assertEquals(-10, + subtitle.getCues(subtitle.getEventTime(6)).get(0).line); + assertEquals(Layout.Alignment.ALIGN_CENTER, + subtitle.getCues(subtitle.getEventTime(6)).get(0).alignment); + assertEquals(35, + subtitle.getCues(subtitle.getEventTime(6)).get(0).size); + assertEquals(7000000, subtitle.getEventTime(7)); + } + public void testParseLiveTypicalWebvttFile() throws IOException { WebvttParser parser = new WebvttParser(); InputStream inputStream = diff --git a/library/src/main/java/com/google/android/exoplayer/text/webvtt/WebvttParser.java b/library/src/main/java/com/google/android/exoplayer/text/webvtt/WebvttParser.java index 6ff7237743..03d7ae98dc 100644 --- a/library/src/main/java/com/google/android/exoplayer/text/webvtt/WebvttParser.java +++ b/library/src/main/java/com/google/android/exoplayer/text/webvtt/WebvttParser.java @@ -24,6 +24,7 @@ import com.google.android.exoplayer.util.MimeTypes; import android.text.Html; import android.text.Layout.Alignment; import android.util.Log; +import android.util.Pair; import java.io.BufferedReader; import java.io.IOException; @@ -42,10 +43,14 @@ public final class WebvttParser implements SubtitleParser { private static final String TAG = "WebvttParser"; - private static final String WEBVTT_FILE_HEADER_STRING = "^\uFEFF?WEBVTT((\\u0020|\u0009).*)?$"; + private static final String WEBVTT_FILE_HEADER_STRING = "^\uFEFF?WEBVTT((\u0020|\u0009).*)?$"; private static final Pattern WEBVTT_FILE_HEADER = Pattern.compile(WEBVTT_FILE_HEADER_STRING); + private static final String WEBVTT_COMMENT_BLOCK_STRING = "^NOTE((\u0020|\u0009).*)?$"; + private static final Pattern WEBVTT_COMMENT_BLOCK = + Pattern.compile(WEBVTT_COMMENT_BLOCK_STRING); + private static final String WEBVTT_METADATA_HEADER_STRING = "\\S*[:=]\\S*"; private static final Pattern WEBVTT_METADATA_HEADER = Pattern.compile(WEBVTT_METADATA_HEADER_STRING); @@ -60,6 +65,8 @@ public final class WebvttParser implements SubtitleParser { private static final String WEBVTT_CUE_SETTING_STRING = "\\S*:\\S*"; private static final Pattern WEBVTT_CUE_SETTING = Pattern.compile(WEBVTT_CUE_SETTING_STRING); + private static final String WEBVTT_PERCENTAGE_NUMBER_STRING = "^([0-9]+|[0-9]+\\.[0-9]+)$"; + private static final String NON_NUMERIC_STRING = ".*[^0-9].*"; private final StringBuilder textBuilder; @@ -118,9 +125,18 @@ public final class WebvttParser implements SubtitleParser { // process the cues and text while ((line = webvttData.readLine()) != null) { + // parse webvtt comment block in case it is present + Matcher matcher = WEBVTT_COMMENT_BLOCK.matcher(line); + if(matcher.find()) { + // read lines until finding an empty one (webvtt line terminator: CRLF, or LF or CR) + while (((line = webvttData.readLine()) != null) && (!line.isEmpty())) { + // just ignoring comment text + } + continue; + } // parse the cue identifier (if present) { - Matcher matcher = WEBVTT_CUE_IDENTIFIER.matcher(line); + matcher = WEBVTT_CUE_IDENTIFIER.matcher(line); if (matcher.find()) { // ignore the identifier (we currently don't use it) and read the next line line = webvttData.readLine(); @@ -164,29 +180,30 @@ public final class WebvttParser implements SubtitleParser { try { if ("line".equals(name)) { + Pair lineMetadata = parseLinePositionAttributes(value); + value = lineMetadata.first; if (value.endsWith("%")) { lineNum = parseIntPercentage(value); - } else if (value.matches(NON_NUMERIC_STRING)) { - Log.w(TAG, "Invalid line value: " + value); } else { - lineNum = Integer.parseInt(value); + // Following WebVTT spec, line number can be a negative number + int sign = 1; + if (value.startsWith("-") && value.length() > 1) { + sign = -1; + value = value.substring(1); + } + + if (value.matches(NON_NUMERIC_STRING)) { + Log.w(TAG, "Invalid line value: " + value); + } else { + lineNum = sign * Integer.parseInt(value); + } } } else if ("align".equals(name)) { // TODO: handle for RTL languages - if ("start".equals(value)) { - alignment = Alignment.ALIGN_NORMAL; - } else if ("middle".equals(value)) { - alignment = Alignment.ALIGN_CENTER; - } else if ("end".equals(value)) { - alignment = Alignment.ALIGN_OPPOSITE; - } else if ("left".equals(value)) { - alignment = Alignment.ALIGN_NORMAL; - } else if ("right".equals(value)) { - alignment = Alignment.ALIGN_OPPOSITE; - } else { - Log.w(TAG, "Invalid align value: " + value); - } + alignment = parseAlignment(value); } else if ("position".equals(name)) { + Pair lineMetadata = parseLinePositionAttributes(value); + value = lineMetadata.first; position = parseIntPercentage(value); } else if ("size".equals(name)) { size = parseIntPercentage(value); @@ -226,11 +243,11 @@ public final class WebvttParser implements SubtitleParser { } s = s.substring(0, s.length() - 1); - if (s.matches(NON_NUMERIC_STRING)) { + if (!s.matches(WEBVTT_PERCENTAGE_NUMBER_STRING)) { throw new NumberFormatException(s + " contains an invalid character"); } - int value = Integer.parseInt(s); + int value = Math.round(Float.parseFloat(s)); if (value < 0 || value > 100) { throw new NumberFormatException(value + " is out of range [0-100]"); } @@ -250,4 +267,37 @@ public final class WebvttParser implements SubtitleParser { return (value * 1000 + Long.parseLong(parts[1])) * 1000; } + private static Pair parseLinePositionAttributes(String s) { + String value; + Alignment alignment = null; + + int commaPos; + if ((commaPos = s.indexOf(",")) > 0 && commaPos < s.length() - 1) { + alignment = parseAlignment(s.substring(commaPos + 1)); + value = s.substring(0, commaPos); + } else { + value = s; + } + + return new Pair(value, alignment); + } + + private static Alignment parseAlignment(String s) { + Alignment alignment = null; + if ("start".equals(s)) { + alignment = Alignment.ALIGN_NORMAL; + } else if ("middle".equals(s)) { + alignment = Alignment.ALIGN_CENTER; + } else if ("end".equals(s)) { + alignment = Alignment.ALIGN_OPPOSITE; + } else if ("left".equals(s)) { + alignment = Alignment.ALIGN_NORMAL; + } else if ("right".equals(s)) { + alignment = Alignment.ALIGN_OPPOSITE; + } else { + Log.w(TAG, "Invalid align value: " + s); + } + return alignment; + } + }