Require an end timecode in SSA and Subrip subtitles

SSA spec allows the lines in any order, so they must all have an end time:
http://moodub.free.fr/video/ass-specs.doc

The Matroska write-up of SubRip assumes the end time is present:
https://matroska.org/technical/specs/subtitles/srt.html

This will massively simplify merging issue:#6595

PiperOrigin-RevId: 279926730
This commit is contained in:
ibaker 2019-11-12 11:23:54 +00:00 committed by Oliver Woodman
parent b43db3bceb
commit ddb70d96ad
10 changed files with 79 additions and 160 deletions

View File

@ -111,6 +111,9 @@
([#5523](https://github.com/google/ExoPlayer/issues/5523)).
* Handle new signaling for E-AC3 JOC audio in DASH
([#6636](https://github.com/google/ExoPlayer/issues/6636)).
* Require an end time or duration for SubRip (SRT) and SubStation Alpha
(SSA/ASS) subtitles. This applies to both sidecar files & subtitles
[embedded in Matroska streams](https://matroska.org/technical/specs/subtitles/index.html).
### 2.10.7 (2019-11-12) ###

View File

@ -255,14 +255,6 @@ public class MatroskaExtractor implements Extractor {
* The byte offset of the end timecode in {@link #SUBRIP_PREFIX}.
*/
private static final int SUBRIP_PREFIX_END_TIMECODE_OFFSET = 19;
/**
* A special end timecode indicating that a subrip subtitle should be displayed until the next
* subtitle, or until the end of the media in the case of the last subtitle.
* <p>
* Equivalent to the UTF-8 string: " ".
*/
private static final byte[] SUBRIP_TIMECODE_EMPTY =
new byte[] {32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32};
/**
* The value by which to divide a time in microseconds to convert it to the unit of the last value
* in a subrip timecode (milliseconds).
@ -303,14 +295,6 @@ public class MatroskaExtractor implements Extractor {
* in an SSA timecode (1/100ths of a second).
*/
private static final long SSA_TIMECODE_LAST_VALUE_SCALING_FACTOR = 10000;
/**
* A special end timecode indicating that an SSA subtitle should be displayed until the next
* subtitle, or until the end of the media in the case of the last subtitle.
* <p>
* Equivalent to the UTF-8 string: " ".
*/
private static final byte[] SSA_TIMECODE_EMPTY =
new byte[] {32, 32, 32, 32, 32, 32, 32, 32, 32, 32};
/**
* The format of an SSA timecode.
*/
@ -1238,21 +1222,18 @@ public class MatroskaExtractor implements Extractor {
if (track.trueHdSampleRechunker != null) {
track.trueHdSampleRechunker.sampleMetadata(track, timeUs);
} else {
if (CODEC_ID_SUBRIP.equals(track.codecId)) {
commitSubtitleSample(
track,
SUBRIP_TIMECODE_FORMAT,
SUBRIP_PREFIX_END_TIMECODE_OFFSET,
SUBRIP_TIMECODE_LAST_VALUE_SCALING_FACTOR,
SUBRIP_TIMECODE_EMPTY);
} else if (CODEC_ID_ASS.equals(track.codecId)) {
commitSubtitleSample(
track,
SSA_TIMECODE_FORMAT,
SSA_PREFIX_END_TIMECODE_OFFSET,
SSA_TIMECODE_LAST_VALUE_SCALING_FACTOR,
SSA_TIMECODE_EMPTY);
if (CODEC_ID_SUBRIP.equals(track.codecId) || CODEC_ID_ASS.equals(track.codecId)) {
if (durationUs == C.TIME_UNSET) {
Log.w(TAG, "Skipping subtitle sample with no duration.");
} else {
setSubtitleEndTime(track.codecId, durationUs, subtitleSample.data);
// Note: If we ever want to support DRM protected subtitles then we'll need to output the
// appropriate encryption data here.
track.output.sampleData(subtitleSample, subtitleSample.limit());
sampleBytesWritten += subtitleSample.limit();
}
}
if ((blockFlags & C.BUFFER_FLAG_HAS_SUPPLEMENTAL_DATA) != 0) {
// Append supplemental data.
int size = blockAddData.limit();
@ -1480,51 +1461,58 @@ public class MatroskaExtractor implements Extractor {
// the correct end timecode, which we might not have yet.
}
private void commitSubtitleSample(Track track, String timecodeFormat, int endTimecodeOffset,
long lastTimecodeValueScalingFactor, byte[] emptyTimecode) {
setSubtitleSampleDuration(
subtitleSample.data,
blockDurationUs,
timecodeFormat,
endTimecodeOffset,
lastTimecodeValueScalingFactor,
emptyTimecode);
// Note: If we ever want to support DRM protected subtitles then we'll need to output the
// appropriate encryption data here.
track.output.sampleData(subtitleSample, subtitleSample.limit());
sampleBytesWritten += subtitleSample.limit();
}
/**
* Formats {@code durationUs} using {@code timecodeFormat}, and sets it as the end timecode in
* {@code subtitleSampleData}.
* Overwrites the end timecode in {@code subtitleData} with the correctly formatted time derived
* from {@code durationUs}.
*
* <p>See documentation on {@link #SSA_DIALOGUE_FORMAT} and {@link #SUBRIP_PREFIX} for why we use
* the duration as the end timecode.
*
* @param codecId The subtitle codec; must be {@link #CODEC_ID_SUBRIP} or {@link #CODEC_ID_ASS}.
* @param durationUs The duration of the sample, in microseconds.
* @param subtitleData The subtitle sample in which to overwrite the end timecode (output
* parameter).
*/
private static void setSubtitleSampleDuration(
byte[] subtitleSampleData,
long durationUs,
String timecodeFormat,
int endTimecodeOffset,
long lastTimecodeValueScalingFactor,
byte[] emptyTimecode) {
private static void setSubtitleEndTime(String codecId, long durationUs, byte[] subtitleData) {
byte[] endTimecode;
int endTimecodeOffset;
switch (codecId) {
case CODEC_ID_SUBRIP:
endTimecode =
formatSubtitleTimecode(
durationUs, SUBRIP_TIMECODE_FORMAT, SUBRIP_TIMECODE_LAST_VALUE_SCALING_FACTOR);
endTimecodeOffset = SUBRIP_PREFIX_END_TIMECODE_OFFSET;
break;
case CODEC_ID_ASS:
endTimecode =
formatSubtitleTimecode(
durationUs, SSA_TIMECODE_FORMAT, SSA_TIMECODE_LAST_VALUE_SCALING_FACTOR);
endTimecodeOffset = SSA_PREFIX_END_TIMECODE_OFFSET;
break;
default:
throw new IllegalArgumentException();
}
System.arraycopy(endTimecode, 0, subtitleData, endTimecodeOffset, endTimecode.length);
}
/**
* Formats {@code timeUs} using {@code timecodeFormat}, and sets it as the end timecode in {@code
* subtitleSampleData}.
*/
private static byte[] formatSubtitleTimecode(
long timeUs, String timecodeFormat, long lastTimecodeValueScalingFactor) {
Assertions.checkArgument(timeUs != C.TIME_UNSET);
byte[] timeCodeData;
if (durationUs == C.TIME_UNSET) {
timeCodeData = emptyTimecode;
} else {
int hours = (int) (durationUs / (3600 * C.MICROS_PER_SECOND));
durationUs -= (hours * 3600 * C.MICROS_PER_SECOND);
int minutes = (int) (durationUs / (60 * C.MICROS_PER_SECOND));
durationUs -= (minutes * 60 * C.MICROS_PER_SECOND);
int seconds = (int) (durationUs / C.MICROS_PER_SECOND);
durationUs -= (seconds * C.MICROS_PER_SECOND);
int lastValue = (int) (durationUs / lastTimecodeValueScalingFactor);
int hours = (int) (timeUs / (3600 * C.MICROS_PER_SECOND));
timeUs -= (hours * 3600 * C.MICROS_PER_SECOND);
int minutes = (int) (timeUs / (60 * C.MICROS_PER_SECOND));
timeUs -= (minutes * 60 * C.MICROS_PER_SECOND);
int seconds = (int) (timeUs / C.MICROS_PER_SECOND);
timeUs -= (seconds * C.MICROS_PER_SECOND);
int lastValue = (int) (timeUs / lastTimecodeValueScalingFactor);
timeCodeData = Util.getUtf8Bytes(String.format(Locale.US, timecodeFormat, hours, minutes,
seconds, lastValue));
}
Assertions.checkState(timeCodeData.length == emptyTimecode.length);
System.arraycopy(timeCodeData, 0, subtitleSampleData, endTimecodeOffset, timeCodeData.length);
return timeCodeData;
}
/**

View File

@ -180,21 +180,17 @@ public final class SsaDecoder extends SimpleSubtitleDecoder {
return;
}
long startTimeUs = SsaDecoder.parseTimecodeUs(lineValues[formatStartIndex]);
long startTimeUs = parseTimecodeUs(lineValues[formatStartIndex]);
if (startTimeUs == C.TIME_UNSET) {
Log.w(TAG, "Skipping invalid timing: " + dialogueLine);
return;
}
long endTimeUs = C.TIME_UNSET;
String endTimeString = lineValues[formatEndIndex];
if (!endTimeString.trim().isEmpty()) {
endTimeUs = SsaDecoder.parseTimecodeUs(endTimeString);
long endTimeUs = parseTimecodeUs(lineValues[formatEndIndex]);
if (endTimeUs == C.TIME_UNSET) {
Log.w(TAG, "Skipping invalid timing: " + dialogueLine);
return;
}
}
String text =
lineValues[formatTextIndex]
@ -203,11 +199,9 @@ public final class SsaDecoder extends SimpleSubtitleDecoder {
.replaceAll("\\\\n", "\n");
cues.add(new Cue(text));
cueTimesUs.add(startTimeUs);
if (endTimeUs != C.TIME_UNSET) {
cues.add(Cue.EMPTY);
cueTimesUs.add(endTimeUs);
}
}
/**
* Parses an SSA timecode string.

View File

@ -43,7 +43,7 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
private static final String SUBRIP_TIMECODE = "(?:(\\d+):)?(\\d+):(\\d+),(\\d+)";
private static final Pattern SUBRIP_TIMING_LINE =
Pattern.compile("\\s*(" + SUBRIP_TIMECODE + ")\\s*-->\\s*(" + SUBRIP_TIMECODE + ")?\\s*");
Pattern.compile("\\s*(" + SUBRIP_TIMECODE + ")\\s*-->\\s*(" + SUBRIP_TIMECODE + ")\\s*");
private static final Pattern SUBRIP_TAG_PATTERN = Pattern.compile("\\{\\\\.*?\\}");
private static final String SUBRIP_ALIGNMENT_TAG = "\\{\\\\an[1-9]\\}";
@ -90,7 +90,6 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
}
// Read and parse the timing line.
boolean haveEndTimecode = false;
currentLine = subripData.readLine();
if (currentLine == null) {
Log.w(TAG, "Unexpected end");
@ -99,11 +98,8 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
Matcher matcher = SUBRIP_TIMING_LINE.matcher(currentLine);
if (matcher.matches()) {
cueTimesUs.add(parseTimecode(matcher, 1));
if (!TextUtils.isEmpty(matcher.group(6))) {
haveEndTimecode = true;
cueTimesUs.add(parseTimecode(matcher, 6));
}
cueTimesUs.add(parseTimecode(matcher, /* groupOffset= */ 1));
cueTimesUs.add(parseTimecode(matcher, /* groupOffset= */ 6));
} else {
Log.w(TAG, "Skipping invalid timing: " + currentLine);
continue;
@ -133,11 +129,8 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
}
}
cues.add(buildCue(text, alignmentTag));
if (haveEndTimecode) {
cues.add(Cue.EMPTY);
}
}
Cue[] cuesArray = new Cue[cues.size()];
cues.toArray(cuesArray);

View File

@ -10,3 +10,5 @@ Format: Layer, Start, End, Style, Name, Text
Dialogue: 0,Invalid,0:00:01.23,Default,Olly,This is the first subtitle{ignored}.
Dialogue: 0,0:00:02.34,Invalid,Default,Olly,This is the second subtitle \nwith a newline \Nand another.
Dialogue: 0,0:00:04:56,0:00:08:90,Default,Olly,This is the third subtitle, with a comma.
Dialogue: 0, ,0:00:10:90,Default,Olly,This is the fourth subtitle.
Dialogue: 0,0:00:12:90, ,Default,Olly,This is the fifth subtitle.

View File

@ -1,12 +0,0 @@
[Script Info]
Title: SomeTitle
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: Default,Open Sans Semibold,36,&H00FFFFFF,&H000000FF,&H00020713,&H00000000,-1,0,0,0,100,100,0,0,1,1.7,0,2,0,0,28,1
[Events]
Format: Layer, Start, End, Style, Name, Text
Dialogue: 0,0:00:00.00, ,Default,Olly,This is the first subtitle.
Dialogue: 0,0:00:02.34, ,Default,Olly,This is the second subtitle \nwith a newline \Nand another.
Dialogue: 0,0:00:04.56, ,Default,Olly,This is the third subtitle, with a comma.

View File

@ -1,11 +0,0 @@
1
00:00:00,000 -->
SubRip doesn't technically allow missing end timecodes.
2
00:00:02,345 -->
We interpret it to mean that a subtitle extends to the start of the next one.
3
00:00:03,456 -->
Or to the end of the media.

View File

@ -9,3 +9,11 @@ Second subtitle with second line.
3
00:00:04,567 --> 00:00:08,901
This is the third subtitle.
4
--> 00:00:10,901
This is the fourth subtitle.
5
00:00:12,901 -->
This is the fifth subtitle.

View File

@ -36,7 +36,6 @@ public final class SsaDecoderTest {
private static final String TYPICAL_DIALOGUE_ONLY = "ssa/typical_dialogue";
private static final String TYPICAL_FORMAT_ONLY = "ssa/typical_format";
private static final String INVALID_TIMECODES = "ssa/invalid_timecodes";
private static final String NO_END_TIMECODES = "ssa/no_end_timecodes";
@Test
public void testDecodeEmpty() throws IOException {
@ -92,28 +91,6 @@ public final class SsaDecoderTest {
assertTypicalCue3(subtitle, 0);
}
@Test
public void testDecodeNoEndTimecodes() throws IOException {
SsaDecoder decoder = new SsaDecoder();
byte[] bytes =
TestUtil.getByteArray(ApplicationProvider.getApplicationContext(), NO_END_TIMECODES);
Subtitle subtitle = decoder.decode(bytes, bytes.length, false);
assertThat(subtitle.getEventTimeCount()).isEqualTo(3);
assertThat(subtitle.getEventTime(0)).isEqualTo(0);
assertThat(subtitle.getCues(subtitle.getEventTime(0)).get(0).text.toString())
.isEqualTo("This is the first subtitle.");
assertThat(subtitle.getEventTime(1)).isEqualTo(2340000);
assertThat(subtitle.getCues(subtitle.getEventTime(1)).get(0).text.toString())
.isEqualTo("This is the second subtitle \nwith a newline \nand another.");
assertThat(subtitle.getEventTime(2)).isEqualTo(4560000);
assertThat(subtitle.getCues(subtitle.getEventTime(2)).get(0).text.toString())
.isEqualTo("This is the third subtitle, with a comma.");
}
private static void assertTypicalCue1(Subtitle subtitle, int eventIndex) {
assertThat(subtitle.getEventTime(eventIndex)).isEqualTo(0);
assertThat(subtitle.getCues(subtitle.getEventTime(eventIndex)).get(0).text.toString())

View File

@ -39,7 +39,6 @@ public final class SubripDecoderTest {
private static final String TYPICAL_NEGATIVE_TIMESTAMPS = "subrip/typical_negative_timestamps";
private static final String TYPICAL_UNEXPECTED_END = "subrip/typical_unexpected_end";
private static final String TYPICAL_WITH_TAGS = "subrip/typical_with_tags";
private static final String NO_END_TIMECODES_FILE = "subrip/no_end_timecodes";
@Test
public void testDecodeEmpty() throws IOException {
@ -145,28 +144,6 @@ public final class SubripDecoderTest {
assertTypicalCue2(subtitle, 2);
}
@Test
public void testDecodeNoEndTimecodes() throws IOException {
SubripDecoder decoder = new SubripDecoder();
byte[] bytes =
TestUtil.getByteArray(ApplicationProvider.getApplicationContext(), NO_END_TIMECODES_FILE);
Subtitle subtitle = decoder.decode(bytes, bytes.length, false);
assertThat(subtitle.getEventTimeCount()).isEqualTo(3);
assertThat(subtitle.getEventTime(0)).isEqualTo(0);
assertThat(subtitle.getCues(subtitle.getEventTime(0)).get(0).text.toString())
.isEqualTo("SubRip doesn't technically allow missing end timecodes.");
assertThat(subtitle.getEventTime(1)).isEqualTo(2345000);
assertThat(subtitle.getCues(subtitle.getEventTime(1)).get(0).text.toString())
.isEqualTo("We interpret it to mean that a subtitle extends to the start of the next one.");
assertThat(subtitle.getEventTime(2)).isEqualTo(3456000);
assertThat(subtitle.getCues(subtitle.getEventTime(2)).get(0).text.toString())
.isEqualTo("Or to the end of the media.");
}
@Test
public void testDecodeCueWithTag() throws IOException {
SubripDecoder decoder = new SubripDecoder();