Improve WebVTT parser according to WebVTT spec

- Line parameter
  - Added support for value and line alignment attributes.
  - Support negative numbers when line is an absolute number (not a
percentage).
- Position parameter
  - Added support for value and position alignment attributes
- Added support for WebVTT comment blocks
- Percentage values now accept decimal numbers (as webvtt spec states)
- Added new WebVTT tests for testing all new implemented features
This commit is contained in:
joli 2015-09-11 02:58:56 +02:00
parent 5050913ba0
commit 925795cb4f
4 changed files with 197 additions and 20 deletions

View File

@ -0,0 +1,18 @@
WEBVTT
NOTE
This is a comment block
with multiple lines
1
00:00.000 --> 00:01.234
This is the first subtitle.
NOTE Single line comment
2
00:02.345 --> 00:03.456
This is the second subtitle.
NOTE
File ending with a comment

View File

@ -0,0 +1,22 @@
WEBVTT
NOTE Position with percentage and position alignment
00:00:00.000 --> 00:00:01.234 position:10%,start align:start size:35%
This is the first subtitle.
NOTE Wrong position provided. It should be provided as
a percentage value
00:02.345 --> 00:03.456 position:10 align:end size:35%
This is the second subtitle.
NOTE Line as percentage and line alignment
00:04.000 --> 00:05.000 line:45%,end align:middle size:35%
This is the third subtitle.
NOTE Line as absolute negative number and without line alignment
00:06.000 --> 00:07.000 line:-10 align:middle size:35%
This is the forth subtitle.

View File

@ -16,6 +16,9 @@
package com.google.android.exoplayer.text.webvtt;
import android.test.InstrumentationTestCase;
import android.text.Layout;
import com.google.android.exoplayer.text.Cue;
import java.io.IOException;
import java.io.InputStream;
@ -28,6 +31,8 @@ public class WebvttParserTest extends InstrumentationTestCase {
private static final String TYPICAL_WEBVTT_FILE = "webvtt/typical";
private static final String TYPICAL_WITH_IDS_WEBVTT_FILE = "webvtt/typical_with_identifiers";
private static final String TYPICAL_WITH_TAGS_WEBVTT_FILE = "webvtt/typical_with_tags";
private static final String TYPICAL_WITH_COMMENTS_WEBVTT_FILE = "webvtt/typical_with_comments";
private static final String TYPICAL_WITH_METADATA_WEBVTT_FILE = "webvtt/typical_with_metadata";
private static final String LIVE_TYPICAL_WEBVTT_FILE = "webvtt/live_typical";
private static final String EMPTY_WEBVTT_FILE = "webvtt/empty";
@ -124,6 +129,88 @@ public class WebvttParserTest extends InstrumentationTestCase {
assertEquals(7000000, subtitle.getEventTime(7));
}
public void testParseTypicalWithCommentsWebvttFile() throws IOException {
WebvttParser parser = new WebvttParser();
InputStream inputStream =
getInstrumentation().getContext().getResources().getAssets()
.open(TYPICAL_WITH_COMMENTS_WEBVTT_FILE);
WebvttSubtitle subtitle = parser.parse(inputStream);
// test event count
assertEquals(4, subtitle.getEventTimeCount());
// test first cue
assertEquals(0, subtitle.getEventTime(0));
assertEquals("This is the first subtitle.",
subtitle.getCues(subtitle.getEventTime(0)).get(0).text.toString());
assertEquals(1234000, subtitle.getEventTime(1));
// test second cue
assertEquals(2345000, subtitle.getEventTime(2));
assertEquals("This is the second subtitle.",
subtitle.getCues(subtitle.getEventTime(2)).get(0).text.toString());
assertEquals(3456000, subtitle.getEventTime(3));
}
public void testParseTypicalWithMetadataWebvttFile() throws IOException {
WebvttParser parser = new WebvttParser();
InputStream inputStream =
getInstrumentation().getContext().getResources().getAssets()
.open(TYPICAL_WITH_METADATA_WEBVTT_FILE);
WebvttSubtitle subtitle = parser.parse(inputStream);
// test event count
assertEquals(8, subtitle.getEventTimeCount());
// test first cue
assertEquals(0, subtitle.getEventTime(0));
assertEquals("This is the first subtitle.",
subtitle.getCues(subtitle.getEventTime(0)).get(0).text.toString());
assertEquals(10,
subtitle.getCues(subtitle.getEventTime(0)).get(0).position);
assertEquals(Layout.Alignment.ALIGN_NORMAL,
subtitle.getCues(subtitle.getEventTime(0)).get(0).alignment);
assertEquals(35,
subtitle.getCues(subtitle.getEventTime(0)).get(0).size);
assertEquals(1234000, subtitle.getEventTime(1));
// test second cue
assertEquals(2345000, subtitle.getEventTime(2));
assertEquals("This is the second subtitle.",
subtitle.getCues(subtitle.getEventTime(2)).get(0).text.toString());
assertEquals(Cue.UNSET_VALUE,
subtitle.getCues(subtitle.getEventTime(2)).get(0).position);
assertEquals(Layout.Alignment.ALIGN_OPPOSITE,
subtitle.getCues(subtitle.getEventTime(2)).get(0).alignment);
assertEquals(35,
subtitle.getCues(subtitle.getEventTime(2)).get(0).size);
assertEquals(3456000, subtitle.getEventTime(3));
// test third cue
assertEquals(4000000, subtitle.getEventTime(4));
assertEquals("This is the third subtitle.",
subtitle.getCues(subtitle.getEventTime(4)).get(0).text.toString());
assertEquals(45,
subtitle.getCues(subtitle.getEventTime(4)).get(0).line);
assertEquals(Layout.Alignment.ALIGN_CENTER,
subtitle.getCues(subtitle.getEventTime(4)).get(0).alignment);
assertEquals(35,
subtitle.getCues(subtitle.getEventTime(4)).get(0).size);
assertEquals(5000000, subtitle.getEventTime(5));
// test forth cue
assertEquals(6000000, subtitle.getEventTime(6));
assertEquals("This is the forth subtitle.",
subtitle.getCues(subtitle.getEventTime(6)).get(0).text.toString());
assertEquals(-10,
subtitle.getCues(subtitle.getEventTime(6)).get(0).line);
assertEquals(Layout.Alignment.ALIGN_CENTER,
subtitle.getCues(subtitle.getEventTime(6)).get(0).alignment);
assertEquals(35,
subtitle.getCues(subtitle.getEventTime(6)).get(0).size);
assertEquals(7000000, subtitle.getEventTime(7));
}
public void testParseLiveTypicalWebvttFile() throws IOException {
WebvttParser parser = new WebvttParser();
InputStream inputStream =

View File

@ -24,6 +24,7 @@ import com.google.android.exoplayer.util.MimeTypes;
import android.text.Html;
import android.text.Layout.Alignment;
import android.util.Log;
import android.util.Pair;
import java.io.BufferedReader;
import java.io.IOException;
@ -42,10 +43,14 @@ public final class WebvttParser implements SubtitleParser {
private static final String TAG = "WebvttParser";
private static final String WEBVTT_FILE_HEADER_STRING = "^\uFEFF?WEBVTT((\\u0020|\u0009).*)?$";
private static final String WEBVTT_FILE_HEADER_STRING = "^\uFEFF?WEBVTT((\u0020|\u0009).*)?$";
private static final Pattern WEBVTT_FILE_HEADER =
Pattern.compile(WEBVTT_FILE_HEADER_STRING);
private static final String WEBVTT_COMMENT_BLOCK_STRING = "^NOTE((\u0020|\u0009).*)?$";
private static final Pattern WEBVTT_COMMENT_BLOCK =
Pattern.compile(WEBVTT_COMMENT_BLOCK_STRING);
private static final String WEBVTT_METADATA_HEADER_STRING = "\\S*[:=]\\S*";
private static final Pattern WEBVTT_METADATA_HEADER =
Pattern.compile(WEBVTT_METADATA_HEADER_STRING);
@ -60,6 +65,8 @@ public final class WebvttParser implements SubtitleParser {
private static final String WEBVTT_CUE_SETTING_STRING = "\\S*:\\S*";
private static final Pattern WEBVTT_CUE_SETTING = Pattern.compile(WEBVTT_CUE_SETTING_STRING);
private static final String WEBVTT_PERCENTAGE_NUMBER_STRING = "^([0-9]+|[0-9]+\\.[0-9]+)$";
private static final String NON_NUMERIC_STRING = ".*[^0-9].*";
private final StringBuilder textBuilder;
@ -118,9 +125,18 @@ public final class WebvttParser implements SubtitleParser {
// process the cues and text
while ((line = webvttData.readLine()) != null) {
// parse webvtt comment block in case it is present
Matcher matcher = WEBVTT_COMMENT_BLOCK.matcher(line);
if(matcher.find()) {
// read lines until finding an empty one (webvtt line terminator: CRLF, or LF or CR)
while (((line = webvttData.readLine()) != null) && (!line.isEmpty())) {
// just ignoring comment text
}
continue;
}
// parse the cue identifier (if present) {
Matcher matcher = WEBVTT_CUE_IDENTIFIER.matcher(line);
matcher = WEBVTT_CUE_IDENTIFIER.matcher(line);
if (matcher.find()) {
// ignore the identifier (we currently don't use it) and read the next line
line = webvttData.readLine();
@ -164,29 +180,30 @@ public final class WebvttParser implements SubtitleParser {
try {
if ("line".equals(name)) {
Pair<String, Alignment> lineMetadata = parseLinePositionAttributes(value);
value = lineMetadata.first;
if (value.endsWith("%")) {
lineNum = parseIntPercentage(value);
} else if (value.matches(NON_NUMERIC_STRING)) {
Log.w(TAG, "Invalid line value: " + value);
} else {
lineNum = Integer.parseInt(value);
// Following WebVTT spec, line number can be a negative number
int sign = 1;
if (value.startsWith("-") && value.length() > 1) {
sign = -1;
value = value.substring(1);
}
if (value.matches(NON_NUMERIC_STRING)) {
Log.w(TAG, "Invalid line value: " + value);
} else {
lineNum = sign * Integer.parseInt(value);
}
}
} else if ("align".equals(name)) {
// TODO: handle for RTL languages
if ("start".equals(value)) {
alignment = Alignment.ALIGN_NORMAL;
} else if ("middle".equals(value)) {
alignment = Alignment.ALIGN_CENTER;
} else if ("end".equals(value)) {
alignment = Alignment.ALIGN_OPPOSITE;
} else if ("left".equals(value)) {
alignment = Alignment.ALIGN_NORMAL;
} else if ("right".equals(value)) {
alignment = Alignment.ALIGN_OPPOSITE;
} else {
Log.w(TAG, "Invalid align value: " + value);
}
alignment = parseAlignment(value);
} else if ("position".equals(name)) {
Pair<String, Alignment> lineMetadata = parseLinePositionAttributes(value);
value = lineMetadata.first;
position = parseIntPercentage(value);
} else if ("size".equals(name)) {
size = parseIntPercentage(value);
@ -226,11 +243,11 @@ public final class WebvttParser implements SubtitleParser {
}
s = s.substring(0, s.length() - 1);
if (s.matches(NON_NUMERIC_STRING)) {
if (!s.matches(WEBVTT_PERCENTAGE_NUMBER_STRING)) {
throw new NumberFormatException(s + " contains an invalid character");
}
int value = Integer.parseInt(s);
int value = Math.round(Float.parseFloat(s));
if (value < 0 || value > 100) {
throw new NumberFormatException(value + " is out of range [0-100]");
}
@ -250,4 +267,37 @@ public final class WebvttParser implements SubtitleParser {
return (value * 1000 + Long.parseLong(parts[1])) * 1000;
}
private static Pair<String, Alignment> parseLinePositionAttributes(String s) {
String value;
Alignment alignment = null;
int commaPos;
if ((commaPos = s.indexOf(",")) > 0 && commaPos < s.length() - 1) {
alignment = parseAlignment(s.substring(commaPos + 1));
value = s.substring(0, commaPos);
} else {
value = s;
}
return new Pair<String, Alignment>(value, alignment);
}
private static Alignment parseAlignment(String s) {
Alignment alignment = null;
if ("start".equals(s)) {
alignment = Alignment.ALIGN_NORMAL;
} else if ("middle".equals(s)) {
alignment = Alignment.ALIGN_CENTER;
} else if ("end".equals(s)) {
alignment = Alignment.ALIGN_OPPOSITE;
} else if ("left".equals(s)) {
alignment = Alignment.ALIGN_NORMAL;
} else if ("right".equals(s)) {
alignment = Alignment.ALIGN_OPPOSITE;
} else {
Log.w(TAG, "Invalid align value: " + s);
}
return alignment;
}
}