WebVTT parser improvements.
* Split findNextCueHeader and validateWebVttHeader into static methods. This is a step toward WebVTT in HLS, where we'll need to re-use these to peek at the top of the WebVTT file (they'll be moved into a util class). * Made parser robust against bad cue headers + added a test. * Removed spurious looking assertion in WebvttSubtitle.
This commit is contained in:
parent
963e604ffd
commit
72f093c4f6
10
library/src/androidTest/assets/webvtt/with_bad_cue_header
Normal file
10
library/src/androidTest/assets/webvtt/with_bad_cue_header
Normal file
@ -0,0 +1,10 @@
|
||||
WEBVTT # This comment is allowed
|
||||
|
||||
00:00.000 --> 00:01.234
|
||||
This is the first subtitle.
|
||||
|
||||
00:02.badbadbadbadbadbad --> 00:03.456
|
||||
This is the second subtitle.
|
||||
|
||||
00:04.000 --> 00:05.000
|
||||
This is the third subtitle.
|
@ -33,6 +33,7 @@ public class WebvttParserTest extends InstrumentationTestCase {
|
||||
private static final String TYPICAL_WITH_IDS_FILE = "webvtt/typical_with_identifiers";
|
||||
private static final String TYPICAL_WITH_COMMENTS_FILE = "webvtt/typical_with_comments";
|
||||
private static final String WITH_POSITIONING_FILE = "webvtt/with_positioning";
|
||||
private static final String WITH_BAD_CUE_HEADER_FILE = "webvtt/with_bad_cue_header";
|
||||
private static final String WITH_TAGS_FILE = "webvtt/with_tags";
|
||||
private static final String EMPTY_FILE = "webvtt/empty";
|
||||
|
||||
@ -132,6 +133,20 @@ public class WebvttParserTest extends InstrumentationTestCase {
|
||||
Cue.ANCHOR_TYPE_END, 0.1f);
|
||||
}
|
||||
|
||||
public void testParseWithBadCueHeader() throws IOException {
|
||||
WebvttParser parser = new WebvttParser();
|
||||
InputStream inputStream =
|
||||
getInstrumentation().getContext().getResources().getAssets().open(WITH_BAD_CUE_HEADER_FILE);
|
||||
WebvttSubtitle subtitle = parser.parse(inputStream);
|
||||
|
||||
// test event count
|
||||
assertEquals(4, subtitle.getEventTimeCount());
|
||||
|
||||
// test cues
|
||||
assertCue(subtitle, 0, 0, 1234000, "This is the first subtitle.");
|
||||
assertCue(subtitle, 2, 4000000, 5000000, "This is the third subtitle.");
|
||||
}
|
||||
|
||||
private static void assertCue(WebvttSubtitle subtitle, int eventTimeIndex, long startTimeUs,
|
||||
int endTimeUs, String text) {
|
||||
assertCue(subtitle, eventTimeIndex, startTimeUs, endTimeUs, text, null, Cue.DIMEN_UNSET,
|
||||
|
@ -23,6 +23,7 @@ import com.google.android.exoplayer.util.MimeTypes;
|
||||
|
||||
import android.text.Html;
|
||||
import android.text.Layout.Alignment;
|
||||
import android.text.TextUtils;
|
||||
import android.util.Log;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
@ -43,9 +44,9 @@ public final class WebvttParser implements SubtitleParser {
|
||||
private static final String TAG = "WebvttParser";
|
||||
|
||||
private static final Pattern HEADER = Pattern.compile("^\uFEFF?WEBVTT((\u0020|\u0009).*)?$");
|
||||
private static final Pattern COMMENT_BLOCK = Pattern.compile("^NOTE((\u0020|\u0009).*)?$");
|
||||
private static final Pattern COMMENT = Pattern.compile("^NOTE((\u0020|\u0009).*)?$");
|
||||
private static final Pattern CUE_HEADER = Pattern.compile("^(\\S+)\\s+-->\\s+(\\S+)(.*)?$");
|
||||
private static final Pattern CUE_SETTING = Pattern.compile("\\S+?:\\S+");
|
||||
private static final Pattern CUE_SETTING = Pattern.compile("(\\S+?):(\\S+)");
|
||||
|
||||
private final PositionHolder positionHolder;
|
||||
private final StringBuilder textBuilder;
|
||||
@ -62,51 +63,26 @@ public final class WebvttParser implements SubtitleParser {
|
||||
|
||||
@Override
|
||||
public final WebvttSubtitle parse(InputStream inputStream) throws IOException {
|
||||
ArrayList<WebvttCue> subtitles = new ArrayList<>();
|
||||
|
||||
BufferedReader webvttData = new BufferedReader(new InputStreamReader(inputStream, C.UTF8_NAME));
|
||||
String line;
|
||||
|
||||
// File should start with "WEBVTT".
|
||||
line = webvttData.readLine();
|
||||
if (line == null || !HEADER.matcher(line).matches()) {
|
||||
throw new ParserException("Expected WEBVTT. Got " + line);
|
||||
}
|
||||
// Validate the first line of the header, and skip the remainder.
|
||||
validateWebvttHeaderLine(webvttData);
|
||||
while (!TextUtils.isEmpty(webvttData.readLine())) {}
|
||||
|
||||
// Parse the remainder of the header.
|
||||
while (true) {
|
||||
line = webvttData.readLine();
|
||||
if (line == null) {
|
||||
// We reached EOF before finishing the header.
|
||||
throw new ParserException("Expected an empty line after webvtt header");
|
||||
} else if (line.isEmpty()) {
|
||||
// We read the newline that separates the header from the body.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Process the cues and text.
|
||||
while ((line = webvttData.readLine()) != null) {
|
||||
// Skip a comment block, if present.
|
||||
Matcher matcher = COMMENT_BLOCK.matcher(line);
|
||||
if (matcher.find()) {
|
||||
// Skip until the end of the comment block.
|
||||
while ((line = webvttData.readLine()) != null && !line.isEmpty()) {
|
||||
// Ignore comment text.
|
||||
}
|
||||
ArrayList<WebvttCue> subtitles = new ArrayList<>();
|
||||
Matcher cueHeaderMatcher;
|
||||
while ((cueHeaderMatcher = findNextCueHeader(webvttData)) != null) {
|
||||
long cueStartTime;
|
||||
long cueEndTime;
|
||||
try {
|
||||
// Parse the cue start and end times.
|
||||
cueStartTime = parseTimestampUs(cueHeaderMatcher.group(1));
|
||||
cueEndTime = parseTimestampUs(cueHeaderMatcher.group(2));
|
||||
} catch (NumberFormatException e) {
|
||||
Log.w(TAG, "Skipping cue with bad header: " + cueHeaderMatcher.group());
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip anything other than a cue header.
|
||||
matcher = CUE_HEADER.matcher(line);
|
||||
if (!matcher.matches()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse the cue start and end times.
|
||||
long cueStartTime = parseTimestampUs(matcher.group(1));
|
||||
long cueEndTime = parseTimestampUs(matcher.group(2));
|
||||
|
||||
// Default cue settings.
|
||||
Alignment cueTextAlignment = null;
|
||||
float cueLine = Cue.DIMEN_UNSET;
|
||||
@ -117,12 +93,10 @@ public final class WebvttParser implements SubtitleParser {
|
||||
float cueWidth = Cue.DIMEN_UNSET;
|
||||
|
||||
// Parse the cue settings list.
|
||||
matcher = CUE_SETTING.matcher(matcher.group(3));
|
||||
while (matcher.find()) {
|
||||
String match = matcher.group();
|
||||
String[] parts = match.split(":", 2);
|
||||
String name = parts[0];
|
||||
String value = parts[1];
|
||||
Matcher cueSettingMatcher = CUE_SETTING.matcher(cueHeaderMatcher.group(3));
|
||||
while (cueSettingMatcher.find()) {
|
||||
String name = cueSettingMatcher.group(1);
|
||||
String value = cueSettingMatcher.group(2);
|
||||
try {
|
||||
if ("line".equals(name)) {
|
||||
parseLineAttribute(value, positionHolder);
|
||||
@ -141,7 +115,7 @@ public final class WebvttParser implements SubtitleParser {
|
||||
Log.w(TAG, "Unknown cue setting " + name + ":" + value);
|
||||
}
|
||||
} catch (NumberFormatException e) {
|
||||
Log.w(TAG, e.getMessage() + ": " + match);
|
||||
Log.w(TAG, "Skipping bad cue setting: " + cueSettingMatcher.group());
|
||||
}
|
||||
}
|
||||
|
||||
@ -153,6 +127,7 @@ public final class WebvttParser implements SubtitleParser {
|
||||
|
||||
// Parse the cue text.
|
||||
textBuilder.setLength(0);
|
||||
String line;
|
||||
while ((line = webvttData.readLine()) != null && !line.isEmpty()) {
|
||||
if (textBuilder.length() > 0) {
|
||||
textBuilder.append("<br>");
|
||||
@ -169,6 +144,45 @@ public final class WebvttParser implements SubtitleParser {
|
||||
return new WebvttSubtitle(subtitles);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads and validates the first line of a WebVTT file.
|
||||
*
|
||||
* @param input The input from which the line should be read.
|
||||
* @throws ParserException If the line isn't the start of a valid WebVTT file.
|
||||
* @throws IOException If an error occurs reading from the input.
|
||||
*/
|
||||
private static void validateWebvttHeaderLine(BufferedReader input) throws IOException {
|
||||
String line = input.readLine();
|
||||
if (line == null || !HEADER.matcher(line).matches()) {
|
||||
throw new ParserException("Expected WEBVTT. Got " + line);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads lines up to and including the next WebVTT cue header.
|
||||
*
|
||||
* @param input The input from which lines should be read.
|
||||
* @throws IOException If an error occurs reading from the input.
|
||||
* @return A {@link Matcher} for the WebVTT cue header, or null if the end of the input was
|
||||
* reached without a cue header being found. In the case that a cue header is found, groups 1,
|
||||
* 2 and 3 of the returned matcher contain the start time, end time and settings list.
|
||||
*/
|
||||
private static Matcher findNextCueHeader(BufferedReader input) throws IOException {
|
||||
String line;
|
||||
while ((line = input.readLine()) != null) {
|
||||
if (COMMENT.matcher(line).matches()) {
|
||||
// Skip until the end of the comment block.
|
||||
while ((line = input.readLine()) != null && !line.isEmpty()) {}
|
||||
} else {
|
||||
Matcher cueHeaderMatcher = CUE_HEADER.matcher(line);
|
||||
if (cueHeaderMatcher.matches()) {
|
||||
return cueHeaderMatcher;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static long parseTimestampUs(String s) throws NumberFormatException {
|
||||
long value = 0;
|
||||
String[] parts = s.split("\\.", 2);
|
||||
|
@ -56,7 +56,6 @@ public final class WebvttSubtitle implements Subtitle {
|
||||
|
||||
@Override
|
||||
public int getNextEventTimeIndex(long timeUs) {
|
||||
Assertions.checkArgument(timeUs >= 0);
|
||||
int index = Util.binarySearchCeil(sortedCueTimesUs, timeUs, false, false);
|
||||
return index < sortedCueTimesUs.length ? index : -1;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user