mirror of
https://github.com/androidx/media.git
synced 2025-05-07 15:40:37 +08:00
WebVTT parser improvements.
* Split findNextCueHeader and validateWebVttHeader into static methods. This is a step toward WebVTT in HLS, where we'll need to re-use these to peek at the top of the WebVTT file (they'll be moved into a util class). * Made parser robust against bad cue headers + added a test. * Removed spurious looking assertion in WebvttSubtitle.
This commit is contained in:
parent
963e604ffd
commit
72f093c4f6
10
library/src/androidTest/assets/webvtt/with_bad_cue_header
Normal file
10
library/src/androidTest/assets/webvtt/with_bad_cue_header
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
WEBVTT # This comment is allowed
|
||||||
|
|
||||||
|
00:00.000 --> 00:01.234
|
||||||
|
This is the first subtitle.
|
||||||
|
|
||||||
|
00:02.badbadbadbadbadbad --> 00:03.456
|
||||||
|
This is the second subtitle.
|
||||||
|
|
||||||
|
00:04.000 --> 00:05.000
|
||||||
|
This is the third subtitle.
|
@ -33,6 +33,7 @@ public class WebvttParserTest extends InstrumentationTestCase {
|
|||||||
private static final String TYPICAL_WITH_IDS_FILE = "webvtt/typical_with_identifiers";
|
private static final String TYPICAL_WITH_IDS_FILE = "webvtt/typical_with_identifiers";
|
||||||
private static final String TYPICAL_WITH_COMMENTS_FILE = "webvtt/typical_with_comments";
|
private static final String TYPICAL_WITH_COMMENTS_FILE = "webvtt/typical_with_comments";
|
||||||
private static final String WITH_POSITIONING_FILE = "webvtt/with_positioning";
|
private static final String WITH_POSITIONING_FILE = "webvtt/with_positioning";
|
||||||
|
private static final String WITH_BAD_CUE_HEADER_FILE = "webvtt/with_bad_cue_header";
|
||||||
private static final String WITH_TAGS_FILE = "webvtt/with_tags";
|
private static final String WITH_TAGS_FILE = "webvtt/with_tags";
|
||||||
private static final String EMPTY_FILE = "webvtt/empty";
|
private static final String EMPTY_FILE = "webvtt/empty";
|
||||||
|
|
||||||
@ -132,6 +133,20 @@ public class WebvttParserTest extends InstrumentationTestCase {
|
|||||||
Cue.ANCHOR_TYPE_END, 0.1f);
|
Cue.ANCHOR_TYPE_END, 0.1f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testParseWithBadCueHeader() throws IOException {
|
||||||
|
WebvttParser parser = new WebvttParser();
|
||||||
|
InputStream inputStream =
|
||||||
|
getInstrumentation().getContext().getResources().getAssets().open(WITH_BAD_CUE_HEADER_FILE);
|
||||||
|
WebvttSubtitle subtitle = parser.parse(inputStream);
|
||||||
|
|
||||||
|
// test event count
|
||||||
|
assertEquals(4, subtitle.getEventTimeCount());
|
||||||
|
|
||||||
|
// test cues
|
||||||
|
assertCue(subtitle, 0, 0, 1234000, "This is the first subtitle.");
|
||||||
|
assertCue(subtitle, 2, 4000000, 5000000, "This is the third subtitle.");
|
||||||
|
}
|
||||||
|
|
||||||
private static void assertCue(WebvttSubtitle subtitle, int eventTimeIndex, long startTimeUs,
|
private static void assertCue(WebvttSubtitle subtitle, int eventTimeIndex, long startTimeUs,
|
||||||
int endTimeUs, String text) {
|
int endTimeUs, String text) {
|
||||||
assertCue(subtitle, eventTimeIndex, startTimeUs, endTimeUs, text, null, Cue.DIMEN_UNSET,
|
assertCue(subtitle, eventTimeIndex, startTimeUs, endTimeUs, text, null, Cue.DIMEN_UNSET,
|
||||||
|
@ -23,6 +23,7 @@ import com.google.android.exoplayer.util.MimeTypes;
|
|||||||
|
|
||||||
import android.text.Html;
|
import android.text.Html;
|
||||||
import android.text.Layout.Alignment;
|
import android.text.Layout.Alignment;
|
||||||
|
import android.text.TextUtils;
|
||||||
import android.util.Log;
|
import android.util.Log;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
@ -43,9 +44,9 @@ public final class WebvttParser implements SubtitleParser {
|
|||||||
private static final String TAG = "WebvttParser";
|
private static final String TAG = "WebvttParser";
|
||||||
|
|
||||||
private static final Pattern HEADER = Pattern.compile("^\uFEFF?WEBVTT((\u0020|\u0009).*)?$");
|
private static final Pattern HEADER = Pattern.compile("^\uFEFF?WEBVTT((\u0020|\u0009).*)?$");
|
||||||
private static final Pattern COMMENT_BLOCK = Pattern.compile("^NOTE((\u0020|\u0009).*)?$");
|
private static final Pattern COMMENT = Pattern.compile("^NOTE((\u0020|\u0009).*)?$");
|
||||||
private static final Pattern CUE_HEADER = Pattern.compile("^(\\S+)\\s+-->\\s+(\\S+)(.*)?$");
|
private static final Pattern CUE_HEADER = Pattern.compile("^(\\S+)\\s+-->\\s+(\\S+)(.*)?$");
|
||||||
private static final Pattern CUE_SETTING = Pattern.compile("\\S+?:\\S+");
|
private static final Pattern CUE_SETTING = Pattern.compile("(\\S+?):(\\S+)");
|
||||||
|
|
||||||
private final PositionHolder positionHolder;
|
private final PositionHolder positionHolder;
|
||||||
private final StringBuilder textBuilder;
|
private final StringBuilder textBuilder;
|
||||||
@ -62,50 +63,25 @@ public final class WebvttParser implements SubtitleParser {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final WebvttSubtitle parse(InputStream inputStream) throws IOException {
|
public final WebvttSubtitle parse(InputStream inputStream) throws IOException {
|
||||||
ArrayList<WebvttCue> subtitles = new ArrayList<>();
|
|
||||||
|
|
||||||
BufferedReader webvttData = new BufferedReader(new InputStreamReader(inputStream, C.UTF8_NAME));
|
BufferedReader webvttData = new BufferedReader(new InputStreamReader(inputStream, C.UTF8_NAME));
|
||||||
String line;
|
|
||||||
|
|
||||||
// File should start with "WEBVTT".
|
// Validate the first line of the header, and skip the remainder.
|
||||||
line = webvttData.readLine();
|
validateWebvttHeaderLine(webvttData);
|
||||||
if (line == null || !HEADER.matcher(line).matches()) {
|
while (!TextUtils.isEmpty(webvttData.readLine())) {}
|
||||||
throw new ParserException("Expected WEBVTT. Got " + line);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse the remainder of the header.
|
|
||||||
while (true) {
|
|
||||||
line = webvttData.readLine();
|
|
||||||
if (line == null) {
|
|
||||||
// We reached EOF before finishing the header.
|
|
||||||
throw new ParserException("Expected an empty line after webvtt header");
|
|
||||||
} else if (line.isEmpty()) {
|
|
||||||
// We read the newline that separates the header from the body.
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process the cues and text.
|
|
||||||
while ((line = webvttData.readLine()) != null) {
|
|
||||||
// Skip a comment block, if present.
|
|
||||||
Matcher matcher = COMMENT_BLOCK.matcher(line);
|
|
||||||
if (matcher.find()) {
|
|
||||||
// Skip until the end of the comment block.
|
|
||||||
while ((line = webvttData.readLine()) != null && !line.isEmpty()) {
|
|
||||||
// Ignore comment text.
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip anything other than a cue header.
|
|
||||||
matcher = CUE_HEADER.matcher(line);
|
|
||||||
if (!matcher.matches()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
ArrayList<WebvttCue> subtitles = new ArrayList<>();
|
||||||
|
Matcher cueHeaderMatcher;
|
||||||
|
while ((cueHeaderMatcher = findNextCueHeader(webvttData)) != null) {
|
||||||
|
long cueStartTime;
|
||||||
|
long cueEndTime;
|
||||||
|
try {
|
||||||
// Parse the cue start and end times.
|
// Parse the cue start and end times.
|
||||||
long cueStartTime = parseTimestampUs(matcher.group(1));
|
cueStartTime = parseTimestampUs(cueHeaderMatcher.group(1));
|
||||||
long cueEndTime = parseTimestampUs(matcher.group(2));
|
cueEndTime = parseTimestampUs(cueHeaderMatcher.group(2));
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
Log.w(TAG, "Skipping cue with bad header: " + cueHeaderMatcher.group());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Default cue settings.
|
// Default cue settings.
|
||||||
Alignment cueTextAlignment = null;
|
Alignment cueTextAlignment = null;
|
||||||
@ -117,12 +93,10 @@ public final class WebvttParser implements SubtitleParser {
|
|||||||
float cueWidth = Cue.DIMEN_UNSET;
|
float cueWidth = Cue.DIMEN_UNSET;
|
||||||
|
|
||||||
// Parse the cue settings list.
|
// Parse the cue settings list.
|
||||||
matcher = CUE_SETTING.matcher(matcher.group(3));
|
Matcher cueSettingMatcher = CUE_SETTING.matcher(cueHeaderMatcher.group(3));
|
||||||
while (matcher.find()) {
|
while (cueSettingMatcher.find()) {
|
||||||
String match = matcher.group();
|
String name = cueSettingMatcher.group(1);
|
||||||
String[] parts = match.split(":", 2);
|
String value = cueSettingMatcher.group(2);
|
||||||
String name = parts[0];
|
|
||||||
String value = parts[1];
|
|
||||||
try {
|
try {
|
||||||
if ("line".equals(name)) {
|
if ("line".equals(name)) {
|
||||||
parseLineAttribute(value, positionHolder);
|
parseLineAttribute(value, positionHolder);
|
||||||
@ -141,7 +115,7 @@ public final class WebvttParser implements SubtitleParser {
|
|||||||
Log.w(TAG, "Unknown cue setting " + name + ":" + value);
|
Log.w(TAG, "Unknown cue setting " + name + ":" + value);
|
||||||
}
|
}
|
||||||
} catch (NumberFormatException e) {
|
} catch (NumberFormatException e) {
|
||||||
Log.w(TAG, e.getMessage() + ": " + match);
|
Log.w(TAG, "Skipping bad cue setting: " + cueSettingMatcher.group());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -153,6 +127,7 @@ public final class WebvttParser implements SubtitleParser {
|
|||||||
|
|
||||||
// Parse the cue text.
|
// Parse the cue text.
|
||||||
textBuilder.setLength(0);
|
textBuilder.setLength(0);
|
||||||
|
String line;
|
||||||
while ((line = webvttData.readLine()) != null && !line.isEmpty()) {
|
while ((line = webvttData.readLine()) != null && !line.isEmpty()) {
|
||||||
if (textBuilder.length() > 0) {
|
if (textBuilder.length() > 0) {
|
||||||
textBuilder.append("<br>");
|
textBuilder.append("<br>");
|
||||||
@ -169,6 +144,45 @@ public final class WebvttParser implements SubtitleParser {
|
|||||||
return new WebvttSubtitle(subtitles);
|
return new WebvttSubtitle(subtitles);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads and validates the first line of a WebVTT file.
|
||||||
|
*
|
||||||
|
* @param input The input from which the line should be read.
|
||||||
|
* @throws ParserException If the line isn't the start of a valid WebVTT file.
|
||||||
|
* @throws IOException If an error occurs reading from the input.
|
||||||
|
*/
|
||||||
|
private static void validateWebvttHeaderLine(BufferedReader input) throws IOException {
|
||||||
|
String line = input.readLine();
|
||||||
|
if (line == null || !HEADER.matcher(line).matches()) {
|
||||||
|
throw new ParserException("Expected WEBVTT. Got " + line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads lines up to and including the next WebVTT cue header.
|
||||||
|
*
|
||||||
|
* @param input The input from which lines should be read.
|
||||||
|
* @throws IOException If an error occurs reading from the input.
|
||||||
|
* @return A {@link Matcher} for the WebVTT cue header, or null if the end of the input was
|
||||||
|
* reached without a cue header being found. In the case that a cue header is found, groups 1,
|
||||||
|
* 2 and 3 of the returned matcher contain the start time, end time and settings list.
|
||||||
|
*/
|
||||||
|
private static Matcher findNextCueHeader(BufferedReader input) throws IOException {
|
||||||
|
String line;
|
||||||
|
while ((line = input.readLine()) != null) {
|
||||||
|
if (COMMENT.matcher(line).matches()) {
|
||||||
|
// Skip until the end of the comment block.
|
||||||
|
while ((line = input.readLine()) != null && !line.isEmpty()) {}
|
||||||
|
} else {
|
||||||
|
Matcher cueHeaderMatcher = CUE_HEADER.matcher(line);
|
||||||
|
if (cueHeaderMatcher.matches()) {
|
||||||
|
return cueHeaderMatcher;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
private static long parseTimestampUs(String s) throws NumberFormatException {
|
private static long parseTimestampUs(String s) throws NumberFormatException {
|
||||||
long value = 0;
|
long value = 0;
|
||||||
String[] parts = s.split("\\.", 2);
|
String[] parts = s.split("\\.", 2);
|
||||||
|
@ -56,7 +56,6 @@ public final class WebvttSubtitle implements Subtitle {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getNextEventTimeIndex(long timeUs) {
|
public int getNextEventTimeIndex(long timeUs) {
|
||||||
Assertions.checkArgument(timeUs >= 0);
|
|
||||||
int index = Util.binarySearchCeil(sortedCueTimesUs, timeUs, false, false);
|
int index = Util.binarySearchCeil(sortedCueTimesUs, timeUs, false, false);
|
||||||
return index < sortedCueTimesUs.length ? index : -1;
|
return index < sortedCueTimesUs.length ? index : -1;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user