#4306 - Extract tags from SubRip subtitles, add support for alignment tags based on SSA v4+
This commit is contained in:
parent
05a31dfd24
commit
ae520a8c2c
@ -15,7 +15,9 @@
|
|||||||
*/
|
*/
|
||||||
package com.google.android.exoplayer2.text.subrip;
|
package com.google.android.exoplayer2.text.subrip;
|
||||||
|
|
||||||
|
import android.support.annotation.StringDef;
|
||||||
import android.text.Html;
|
import android.text.Html;
|
||||||
|
import android.text.Layout;
|
||||||
import android.text.Spanned;
|
import android.text.Spanned;
|
||||||
import android.text.TextUtils;
|
import android.text.TextUtils;
|
||||||
import android.util.Log;
|
import android.util.Log;
|
||||||
@ -23,7 +25,11 @@ import com.google.android.exoplayer2.text.Cue;
|
|||||||
import com.google.android.exoplayer2.text.SimpleSubtitleDecoder;
|
import com.google.android.exoplayer2.text.SimpleSubtitleDecoder;
|
||||||
import com.google.android.exoplayer2.util.LongArray;
|
import com.google.android.exoplayer2.util.LongArray;
|
||||||
import com.google.android.exoplayer2.util.ParsableByteArray;
|
import com.google.android.exoplayer2.util.ParsableByteArray;
|
||||||
|
|
||||||
|
import java.lang.annotation.Retention;
|
||||||
|
import java.lang.annotation.RetentionPolicy;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
@ -38,6 +44,33 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
|
|||||||
private static final Pattern SUBRIP_TIMING_LINE =
|
private static final Pattern SUBRIP_TIMING_LINE =
|
||||||
Pattern.compile("\\s*(" + SUBRIP_TIMECODE + ")\\s*-->\\s*(" + SUBRIP_TIMECODE + ")?\\s*");
|
Pattern.compile("\\s*(" + SUBRIP_TIMECODE + ")\\s*-->\\s*(" + SUBRIP_TIMECODE + ")?\\s*");
|
||||||
|
|
||||||
|
private static final Pattern SUBRIP_TAG_PATTERN = Pattern.compile("\\{\\\\.*?\\}");
|
||||||
|
private static final String SUBRIP_ALIGNMENT_TAG = "\\{\\\\an[1-9]\\}";
|
||||||
|
|
||||||
|
private static final float DEFAULT_START_FRACTION = 0.08f;
|
||||||
|
private static final float DEFAULT_END_FRACTION = 1 - DEFAULT_START_FRACTION;
|
||||||
|
private static final float DEFAULT_MID_FRACTION = 0.5f;
|
||||||
|
|
||||||
|
@Retention(RetentionPolicy.SOURCE)
|
||||||
|
@StringDef({
|
||||||
|
ALIGN_BOTTOM_LEFT, ALIGN_BOTTOM_MID, ALIGN_BOTTOM_RIGHT,
|
||||||
|
ALIGN_MID_LEFT, ALIGN_MID_MID, ALIGN_MID_RIGHT,
|
||||||
|
ALIGN_TOP_LEFT, ALIGN_TOP_MID, ALIGN_TOP_RIGHT
|
||||||
|
})
|
||||||
|
|
||||||
|
private @interface SubRipTag {}
|
||||||
|
|
||||||
|
// Possible valid alignment tags based on SSA v4+ specs
|
||||||
|
private static final String ALIGN_BOTTOM_LEFT = "{\\an1}";
|
||||||
|
private static final String ALIGN_BOTTOM_MID = "{\\an2}";
|
||||||
|
private static final String ALIGN_BOTTOM_RIGHT = "{\\an3}";
|
||||||
|
private static final String ALIGN_MID_LEFT = "{\\an4}";
|
||||||
|
private static final String ALIGN_MID_MID = "{\\an5}";
|
||||||
|
private static final String ALIGN_MID_RIGHT = "{\\an6}";
|
||||||
|
private static final String ALIGN_TOP_LEFT = "{\\an7}";
|
||||||
|
private static final String ALIGN_TOP_MID = "{\\an8}";
|
||||||
|
private static final String ALIGN_TOP_RIGHT = "{\\an9}";
|
||||||
|
|
||||||
private final StringBuilder textBuilder;
|
private final StringBuilder textBuilder;
|
||||||
|
|
||||||
public SubripDecoder() {
|
public SubripDecoder() {
|
||||||
@ -95,8 +128,36 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
|
|||||||
textBuilder.append(currentLine.trim());
|
textBuilder.append(currentLine.trim());
|
||||||
}
|
}
|
||||||
|
|
||||||
Spanned text = Html.fromHtml(textBuilder.toString());
|
// Extract tags
|
||||||
cues.add(new Cue(text));
|
SubtitleTagResult tagResult = extractTags(textBuilder);
|
||||||
|
Spanned text = Html.fromHtml(tagResult.cue);
|
||||||
|
|
||||||
|
Cue cue = null;
|
||||||
|
|
||||||
|
// Check if tags are present
|
||||||
|
if (tagResult.tags.length > 0) {
|
||||||
|
|
||||||
|
boolean alignTagFound = false;
|
||||||
|
|
||||||
|
// At end of this loop the clue must be created with the applied tags
|
||||||
|
for (String tag : tagResult.tags) {
|
||||||
|
|
||||||
|
// Check if the tag is an alignment tag
|
||||||
|
if (tag.matches(SUBRIP_ALIGNMENT_TAG)) {
|
||||||
|
|
||||||
|
// Based on the specs, in case of the alignment tags only the first appearance counts
|
||||||
|
if (alignTagFound) continue;
|
||||||
|
alignTagFound = true;
|
||||||
|
|
||||||
|
AlignmentResult alignmentResult = getAlignmentValues(tag);
|
||||||
|
cue = new Cue(text, Layout.Alignment.ALIGN_NORMAL, alignmentResult.line, Cue.LINE_TYPE_FRACTION,
|
||||||
|
alignmentResult.lineAnchor, alignmentResult.position, alignmentResult.positionAnchor, Cue.DIMEN_UNSET);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cues.add(cue == null ? new Cue(text) : cue);
|
||||||
|
|
||||||
if (haveEndTimecode) {
|
if (haveEndTimecode) {
|
||||||
cues.add(null);
|
cues.add(null);
|
||||||
}
|
}
|
||||||
@ -108,6 +169,111 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
|
|||||||
return new SubripSubtitle(cuesArray, cueTimesUsArray);
|
return new SubripSubtitle(cuesArray, cueTimesUsArray);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts the tags from the given {@code cue}
|
||||||
|
* The pattern that is used to extract the tags is specified in SSA v4+ specs and
|
||||||
|
* has the following form: "{\...}".
|
||||||
|
* <p>
|
||||||
|
* "All override codes appear within braces {}"
|
||||||
|
* "All override codes are always preceded by a backslash \"
|
||||||
|
*
|
||||||
|
* @param cue Cue text
|
||||||
|
* @return {@link SubtitleTagResult} that holds new cue and also the extracted tags
|
||||||
|
*/
|
||||||
|
private SubtitleTagResult extractTags(StringBuilder cue) {
|
||||||
|
StringBuilder cueCopy = new StringBuilder(cue.toString());
|
||||||
|
List<String> tags = new ArrayList<>();
|
||||||
|
|
||||||
|
int replacedCharacters = 0;
|
||||||
|
|
||||||
|
Matcher matcher = SUBRIP_TAG_PATTERN.matcher(cue.toString());
|
||||||
|
while (matcher.find()) {
|
||||||
|
String tag = matcher.group();
|
||||||
|
tags.add(tag);
|
||||||
|
cueCopy.replace(matcher.start() - replacedCharacters, matcher.end() - replacedCharacters, "");
|
||||||
|
replacedCharacters += tag.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
return new SubtitleTagResult(tags.toArray(new String[tags.size()]), cueCopy.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Match the alignment tag and calculate the line, position, position anchor accordingly
|
||||||
|
*
|
||||||
|
* Based on SSA v4+ specs the alignment tag can have the following form: {\an[1-9},
|
||||||
|
* where the number specifies the direction (based on the numpad layout).
|
||||||
|
* Note. older SSA scripts may contain tags like {\a1[1-9]} but these are based on
|
||||||
|
* other direction rules, but multiple sources says that these are deprecated, so no support here either
|
||||||
|
*
|
||||||
|
* @param tag Alignment tag
|
||||||
|
* @return {@link AlignmentResult} that holds the line, position, position anchor values
|
||||||
|
*/
|
||||||
|
private AlignmentResult getAlignmentValues(String tag) {
|
||||||
|
// Default values used for positioning the subtitle in case of align tags
|
||||||
|
float line = DEFAULT_END_FRACTION, position = DEFAULT_MID_FRACTION;
|
||||||
|
@Cue.AnchorType int positionAnchor = Cue.ANCHOR_TYPE_MIDDLE;
|
||||||
|
@Cue.AnchorType int lineAnchor = Cue.ANCHOR_TYPE_END;
|
||||||
|
|
||||||
|
switch (tag) {
|
||||||
|
case ALIGN_BOTTOM_LEFT:
|
||||||
|
line = DEFAULT_END_FRACTION;
|
||||||
|
position = DEFAULT_START_FRACTION;
|
||||||
|
positionAnchor = Cue.ANCHOR_TYPE_START;
|
||||||
|
lineAnchor = Cue.ANCHOR_TYPE_END;
|
||||||
|
break;
|
||||||
|
case ALIGN_BOTTOM_MID:
|
||||||
|
line = DEFAULT_END_FRACTION;
|
||||||
|
position = DEFAULT_MID_FRACTION;
|
||||||
|
positionAnchor = Cue.ANCHOR_TYPE_MIDDLE;
|
||||||
|
lineAnchor = Cue.ANCHOR_TYPE_END;
|
||||||
|
break;
|
||||||
|
case ALIGN_BOTTOM_RIGHT:
|
||||||
|
line = DEFAULT_END_FRACTION;
|
||||||
|
position = DEFAULT_END_FRACTION;
|
||||||
|
positionAnchor = Cue.ANCHOR_TYPE_END;
|
||||||
|
lineAnchor = Cue.ANCHOR_TYPE_END;
|
||||||
|
break;
|
||||||
|
case ALIGN_MID_LEFT:
|
||||||
|
line = DEFAULT_MID_FRACTION;
|
||||||
|
position = DEFAULT_START_FRACTION;
|
||||||
|
positionAnchor = Cue.ANCHOR_TYPE_START;
|
||||||
|
lineAnchor = Cue.ANCHOR_TYPE_MIDDLE;
|
||||||
|
break;
|
||||||
|
case ALIGN_MID_MID:
|
||||||
|
line = DEFAULT_MID_FRACTION;
|
||||||
|
position = DEFAULT_MID_FRACTION;
|
||||||
|
positionAnchor = Cue.ANCHOR_TYPE_MIDDLE;
|
||||||
|
lineAnchor = Cue.ANCHOR_TYPE_MIDDLE;
|
||||||
|
break;
|
||||||
|
case ALIGN_MID_RIGHT:
|
||||||
|
line = DEFAULT_MID_FRACTION;
|
||||||
|
position = DEFAULT_END_FRACTION;
|
||||||
|
positionAnchor = Cue.ANCHOR_TYPE_END;
|
||||||
|
lineAnchor = Cue.ANCHOR_TYPE_MIDDLE;
|
||||||
|
break;
|
||||||
|
case ALIGN_TOP_LEFT:
|
||||||
|
line = DEFAULT_START_FRACTION;
|
||||||
|
position = DEFAULT_START_FRACTION;
|
||||||
|
positionAnchor = Cue.ANCHOR_TYPE_START;
|
||||||
|
lineAnchor = Cue.ANCHOR_TYPE_START;
|
||||||
|
break;
|
||||||
|
case ALIGN_TOP_MID:
|
||||||
|
line = DEFAULT_START_FRACTION;
|
||||||
|
position = DEFAULT_MID_FRACTION;
|
||||||
|
positionAnchor = Cue.ANCHOR_TYPE_MIDDLE;
|
||||||
|
lineAnchor = Cue.ANCHOR_TYPE_START;
|
||||||
|
break;
|
||||||
|
case ALIGN_TOP_RIGHT:
|
||||||
|
line = DEFAULT_START_FRACTION;
|
||||||
|
position = DEFAULT_END_FRACTION;
|
||||||
|
positionAnchor = Cue.ANCHOR_TYPE_END;
|
||||||
|
lineAnchor = Cue.ANCHOR_TYPE_START;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new AlignmentResult(positionAnchor, position, lineAnchor, line);
|
||||||
|
}
|
||||||
|
|
||||||
private static long parseTimecode(Matcher matcher, int groupOffset) {
|
private static long parseTimecode(Matcher matcher, int groupOffset) {
|
||||||
long timestampMs = Long.parseLong(matcher.group(groupOffset + 1)) * 60 * 60 * 1000;
|
long timestampMs = Long.parseLong(matcher.group(groupOffset + 1)) * 60 * 60 * 1000;
|
||||||
timestampMs += Long.parseLong(matcher.group(groupOffset + 2)) * 60 * 1000;
|
timestampMs += Long.parseLong(matcher.group(groupOffset + 2)) * 60 * 1000;
|
||||||
@ -116,4 +282,35 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
|
|||||||
return timestampMs * 1000;
|
return timestampMs * 1000;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class that holds the tags, new clue after the tag extraction
|
||||||
|
*/
|
||||||
|
private static final class SubtitleTagResult {
|
||||||
|
public final String[] tags;
|
||||||
|
public final String cue;
|
||||||
|
|
||||||
|
public SubtitleTagResult(String[] tags, String cue) {
|
||||||
|
this.tags = tags;
|
||||||
|
this.cue = cue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class that holds the parsed and mapped alignment values (such as line,
|
||||||
|
* position and anchor type of line)
|
||||||
|
*/
|
||||||
|
private static final class AlignmentResult {
|
||||||
|
|
||||||
|
public @Cue.AnchorType int positionAnchor;
|
||||||
|
public @Cue.AnchorType int lineAnchor;
|
||||||
|
public float position, line;
|
||||||
|
|
||||||
|
public AlignmentResult(@Cue.AnchorType int positionAnchor, float position, @Cue.AnchorType int lineAnchor, float line) {
|
||||||
|
this.positionAnchor = positionAnchor;
|
||||||
|
this.position = position;
|
||||||
|
this.line = line;
|
||||||
|
this.lineAnchor = lineAnchor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
20
library/core/src/test/assets/subrip/typical_with_tags
Normal file
20
library/core/src/test/assets/subrip/typical_with_tags
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
1
|
||||||
|
00:00:00,000 --> 00:00:01,234
|
||||||
|
This is {\an1} the first subtitle.
|
||||||
|
|
||||||
|
2
|
||||||
|
00:00:02,345 --> 00:00:03,456
|
||||||
|
This is the second subtitle.
|
||||||
|
Second {\ an 2} subtitle with second line.
|
||||||
|
|
||||||
|
3
|
||||||
|
00:00:04,567 --> 00:00:08,901
|
||||||
|
This {\an2} is the third {\ tag} subtitle.
|
||||||
|
|
||||||
|
4
|
||||||
|
00:00:09,567 --> 00:00:12,901
|
||||||
|
This { \an2} is the fourth subtitle.
|
||||||
|
|
||||||
|
5
|
||||||
|
00:00:013,567 --> 00:00:14,901
|
||||||
|
This {\an2} is the fifth subtitle with multiple {\xyz} valid {\qwe} tags.
|
@ -36,6 +36,7 @@ public final class SubripDecoderTest {
|
|||||||
private static final String TYPICAL_MISSING_SEQUENCE = "subrip/typical_missing_sequence";
|
private static final String TYPICAL_MISSING_SEQUENCE = "subrip/typical_missing_sequence";
|
||||||
private static final String TYPICAL_NEGATIVE_TIMESTAMPS = "subrip/typical_negative_timestamps";
|
private static final String TYPICAL_NEGATIVE_TIMESTAMPS = "subrip/typical_negative_timestamps";
|
||||||
private static final String TYPICAL_UNEXPECTED_END = "subrip/typical_unexpected_end";
|
private static final String TYPICAL_UNEXPECTED_END = "subrip/typical_unexpected_end";
|
||||||
|
private static final String TYPICAL_WITH_TAGS = "subrip/typical_with_tags";
|
||||||
private static final String NO_END_TIMECODES_FILE = "subrip/no_end_timecodes";
|
private static final String NO_END_TIMECODES_FILE = "subrip/no_end_timecodes";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -154,6 +155,27 @@ public final class SubripDecoderTest {
|
|||||||
.isEqualTo("Or to the end of the media.");
|
.isEqualTo("Or to the end of the media.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDecodeCueWithTag() throws IOException{
|
||||||
|
SubripDecoder decoder = new SubripDecoder();
|
||||||
|
byte[] bytes = TestUtil.getByteArray(RuntimeEnvironment.application, TYPICAL_WITH_TAGS);
|
||||||
|
SubripSubtitle subtitle = decoder.decode(bytes, bytes.length, false);
|
||||||
|
assertThat(subtitle.getCues(subtitle.getEventTime(0)).get(0).text.toString())
|
||||||
|
.isEqualTo("This is the first subtitle.");
|
||||||
|
assertThat(subtitle.getCues(subtitle.getEventTime(2)).get(0).text.toString())
|
||||||
|
.isEqualTo("This is the second subtitle.\nSecond subtitle with second line.");
|
||||||
|
assertThat(subtitle.getCues(subtitle.getEventTime(4)).get(0).text.toString())
|
||||||
|
.isEqualTo("This is the third subtitle.");
|
||||||
|
|
||||||
|
// Based on the SSA v4+ specs the curly bracket must be followed by a backslash, so this is
|
||||||
|
// not a valid tag (won't be parsed / replaced)
|
||||||
|
assertThat(subtitle.getCues(subtitle.getEventTime(6)).get(0).text.toString())
|
||||||
|
.isEqualTo("This { \\an2} is the fourth subtitle.");
|
||||||
|
|
||||||
|
assertThat(subtitle.getCues(subtitle.getEventTime(8)).get(0).text.toString())
|
||||||
|
.isEqualTo("This is the fifth subtitle with multiple valid tags.");
|
||||||
|
}
|
||||||
|
|
||||||
private static void assertTypicalCue1(SubripSubtitle subtitle, int eventIndex) {
|
private static void assertTypicalCue1(SubripSubtitle subtitle, int eventIndex) {
|
||||||
assertThat(subtitle.getEventTime(eventIndex)).isEqualTo(0);
|
assertThat(subtitle.getEventTime(eventIndex)).isEqualTo(0);
|
||||||
assertThat(subtitle.getCues(subtitle.getEventTime(eventIndex)).get(0).text.toString())
|
assertThat(subtitle.getCues(subtitle.getEventTime(eventIndex)).get(0).text.toString())
|
||||||
|
Loading…
x
Reference in New Issue
Block a user