From ae520a8c2cfac9702f3cc1c87d504faf5232d95f Mon Sep 17 00:00:00 2001 From: Arnold Szabo Date: Wed, 25 Jul 2018 01:29:07 +0300 Subject: [PATCH] #4306 - Extract tags from SubRip subtitles, add support for alignment tags based on SSA v4+ --- .../exoplayer2/text/subrip/SubripDecoder.java | 201 +++++++++++++++++- .../src/test/assets/subrip/typical_with_tags | 20 ++ .../text/subrip/SubripDecoderTest.java | 22 ++ 3 files changed, 241 insertions(+), 2 deletions(-) create mode 100644 library/core/src/test/assets/subrip/typical_with_tags diff --git a/library/core/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java b/library/core/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java index 6cce902e87..96c065973e 100644 --- a/library/core/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java +++ b/library/core/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java @@ -15,7 +15,9 @@ */ package com.google.android.exoplayer2.text.subrip; +import android.support.annotation.StringDef; import android.text.Html; +import android.text.Layout; import android.text.Spanned; import android.text.TextUtils; import android.util.Log; @@ -23,7 +25,11 @@ import com.google.android.exoplayer2.text.Cue; import com.google.android.exoplayer2.text.SimpleSubtitleDecoder; import com.google.android.exoplayer2.util.LongArray; import com.google.android.exoplayer2.util.ParsableByteArray; + +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; import java.util.ArrayList; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -38,6 +44,33 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { private static final Pattern SUBRIP_TIMING_LINE = Pattern.compile("\\s*(" + SUBRIP_TIMECODE + ")\\s*-->\\s*(" + SUBRIP_TIMECODE + ")?\\s*"); + private static final Pattern SUBRIP_TAG_PATTERN = Pattern.compile("\\{\\\\.*?\\}"); + private static final String SUBRIP_ALIGNMENT_TAG = "\\{\\\\an[1-9]\\}"; + + private static final float DEFAULT_START_FRACTION = 0.08f; + private static final float DEFAULT_END_FRACTION = 1 - DEFAULT_START_FRACTION; + private static final float DEFAULT_MID_FRACTION = 0.5f; + + @Retention(RetentionPolicy.SOURCE) + @StringDef({ + ALIGN_BOTTOM_LEFT, ALIGN_BOTTOM_MID, ALIGN_BOTTOM_RIGHT, + ALIGN_MID_LEFT, ALIGN_MID_MID, ALIGN_MID_RIGHT, + ALIGN_TOP_LEFT, ALIGN_TOP_MID, ALIGN_TOP_RIGHT + }) + + private @interface SubRipTag {} + + // Possible valid alignment tags based on SSA v4+ specs + private static final String ALIGN_BOTTOM_LEFT = "{\\an1}"; + private static final String ALIGN_BOTTOM_MID = "{\\an2}"; + private static final String ALIGN_BOTTOM_RIGHT = "{\\an3}"; + private static final String ALIGN_MID_LEFT = "{\\an4}"; + private static final String ALIGN_MID_MID = "{\\an5}"; + private static final String ALIGN_MID_RIGHT = "{\\an6}"; + private static final String ALIGN_TOP_LEFT = "{\\an7}"; + private static final String ALIGN_TOP_MID = "{\\an8}"; + private static final String ALIGN_TOP_RIGHT = "{\\an9}"; + private final StringBuilder textBuilder; public SubripDecoder() { @@ -95,8 +128,36 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { textBuilder.append(currentLine.trim()); } - Spanned text = Html.fromHtml(textBuilder.toString()); - cues.add(new Cue(text)); + // Extract tags + SubtitleTagResult tagResult = extractTags(textBuilder); + Spanned text = Html.fromHtml(tagResult.cue); + + Cue cue = null; + + // Check if tags are present + if (tagResult.tags.length > 0) { + + boolean alignTagFound = false; + + // At end of this loop the clue must be created with the applied tags + for (String tag : tagResult.tags) { + + // Check if the tag is an alignment tag + if (tag.matches(SUBRIP_ALIGNMENT_TAG)) { + + // Based on the specs, in case of the alignment tags only the first appearance counts + if (alignTagFound) continue; + alignTagFound = true; + + AlignmentResult alignmentResult = getAlignmentValues(tag); + cue = new Cue(text, Layout.Alignment.ALIGN_NORMAL, alignmentResult.line, Cue.LINE_TYPE_FRACTION, + alignmentResult.lineAnchor, alignmentResult.position, alignmentResult.positionAnchor, Cue.DIMEN_UNSET); + } + } + } + + cues.add(cue == null ? new Cue(text) : cue); + if (haveEndTimecode) { cues.add(null); } @@ -108,6 +169,111 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { return new SubripSubtitle(cuesArray, cueTimesUsArray); } + /** + * Extracts the tags from the given {@code cue} + * The pattern that is used to extract the tags is specified in SSA v4+ specs and + * has the following form: "{\...}". + *

+ * "All override codes appear within braces {}" + * "All override codes are always preceded by a backslash \" + * + * @param cue Cue text + * @return {@link SubtitleTagResult} that holds new cue and also the extracted tags + */ + private SubtitleTagResult extractTags(StringBuilder cue) { + StringBuilder cueCopy = new StringBuilder(cue.toString()); + List tags = new ArrayList<>(); + + int replacedCharacters = 0; + + Matcher matcher = SUBRIP_TAG_PATTERN.matcher(cue.toString()); + while (matcher.find()) { + String tag = matcher.group(); + tags.add(tag); + cueCopy.replace(matcher.start() - replacedCharacters, matcher.end() - replacedCharacters, ""); + replacedCharacters += tag.length(); + } + + return new SubtitleTagResult(tags.toArray(new String[tags.size()]), cueCopy.toString()); + } + + /** + * Match the alignment tag and calculate the line, position, position anchor accordingly + * + * Based on SSA v4+ specs the alignment tag can have the following form: {\an[1-9}, + * where the number specifies the direction (based on the numpad layout). + * Note. older SSA scripts may contain tags like {\a1[1-9]} but these are based on + * other direction rules, but multiple sources says that these are deprecated, so no support here either + * + * @param tag Alignment tag + * @return {@link AlignmentResult} that holds the line, position, position anchor values + */ + private AlignmentResult getAlignmentValues(String tag) { + // Default values used for positioning the subtitle in case of align tags + float line = DEFAULT_END_FRACTION, position = DEFAULT_MID_FRACTION; + @Cue.AnchorType int positionAnchor = Cue.ANCHOR_TYPE_MIDDLE; + @Cue.AnchorType int lineAnchor = Cue.ANCHOR_TYPE_END; + + switch (tag) { + case ALIGN_BOTTOM_LEFT: + line = DEFAULT_END_FRACTION; + position = DEFAULT_START_FRACTION; + positionAnchor = Cue.ANCHOR_TYPE_START; + lineAnchor = Cue.ANCHOR_TYPE_END; + break; + case ALIGN_BOTTOM_MID: + line = DEFAULT_END_FRACTION; + position = DEFAULT_MID_FRACTION; + positionAnchor = Cue.ANCHOR_TYPE_MIDDLE; + lineAnchor = Cue.ANCHOR_TYPE_END; + break; + case ALIGN_BOTTOM_RIGHT: + line = DEFAULT_END_FRACTION; + position = DEFAULT_END_FRACTION; + positionAnchor = Cue.ANCHOR_TYPE_END; + lineAnchor = Cue.ANCHOR_TYPE_END; + break; + case ALIGN_MID_LEFT: + line = DEFAULT_MID_FRACTION; + position = DEFAULT_START_FRACTION; + positionAnchor = Cue.ANCHOR_TYPE_START; + lineAnchor = Cue.ANCHOR_TYPE_MIDDLE; + break; + case ALIGN_MID_MID: + line = DEFAULT_MID_FRACTION; + position = DEFAULT_MID_FRACTION; + positionAnchor = Cue.ANCHOR_TYPE_MIDDLE; + lineAnchor = Cue.ANCHOR_TYPE_MIDDLE; + break; + case ALIGN_MID_RIGHT: + line = DEFAULT_MID_FRACTION; + position = DEFAULT_END_FRACTION; + positionAnchor = Cue.ANCHOR_TYPE_END; + lineAnchor = Cue.ANCHOR_TYPE_MIDDLE; + break; + case ALIGN_TOP_LEFT: + line = DEFAULT_START_FRACTION; + position = DEFAULT_START_FRACTION; + positionAnchor = Cue.ANCHOR_TYPE_START; + lineAnchor = Cue.ANCHOR_TYPE_START; + break; + case ALIGN_TOP_MID: + line = DEFAULT_START_FRACTION; + position = DEFAULT_MID_FRACTION; + positionAnchor = Cue.ANCHOR_TYPE_MIDDLE; + lineAnchor = Cue.ANCHOR_TYPE_START; + break; + case ALIGN_TOP_RIGHT: + line = DEFAULT_START_FRACTION; + position = DEFAULT_END_FRACTION; + positionAnchor = Cue.ANCHOR_TYPE_END; + lineAnchor = Cue.ANCHOR_TYPE_START; + break; + } + + return new AlignmentResult(positionAnchor, position, lineAnchor, line); + } + private static long parseTimecode(Matcher matcher, int groupOffset) { long timestampMs = Long.parseLong(matcher.group(groupOffset + 1)) * 60 * 60 * 1000; timestampMs += Long.parseLong(matcher.group(groupOffset + 2)) * 60 * 1000; @@ -116,4 +282,35 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { return timestampMs * 1000; } + /** + * Class that holds the tags, new clue after the tag extraction + */ + private static final class SubtitleTagResult { + public final String[] tags; + public final String cue; + + public SubtitleTagResult(String[] tags, String cue) { + this.tags = tags; + this.cue = cue; + } + } + + /** + * Class that holds the parsed and mapped alignment values (such as line, + * position and anchor type of line) + */ + private static final class AlignmentResult { + + public @Cue.AnchorType int positionAnchor; + public @Cue.AnchorType int lineAnchor; + public float position, line; + + public AlignmentResult(@Cue.AnchorType int positionAnchor, float position, @Cue.AnchorType int lineAnchor, float line) { + this.positionAnchor = positionAnchor; + this.position = position; + this.line = line; + this.lineAnchor = lineAnchor; + } + } + } diff --git a/library/core/src/test/assets/subrip/typical_with_tags b/library/core/src/test/assets/subrip/typical_with_tags new file mode 100644 index 0000000000..02e1ffbcd9 --- /dev/null +++ b/library/core/src/test/assets/subrip/typical_with_tags @@ -0,0 +1,20 @@ +1 +00:00:00,000 --> 00:00:01,234 +This is {\an1} the first subtitle. + +2 +00:00:02,345 --> 00:00:03,456 +This is the second subtitle. +Second {\ an 2} subtitle with second line. + +3 +00:00:04,567 --> 00:00:08,901 +This {\an2} is the third {\ tag} subtitle. + +4 +00:00:09,567 --> 00:00:12,901 +This { \an2} is the fourth subtitle. + +5 +00:00:013,567 --> 00:00:14,901 +This {\an2} is the fifth subtitle with multiple {\xyz} valid {\qwe} tags. diff --git a/library/core/src/test/java/com/google/android/exoplayer2/text/subrip/SubripDecoderTest.java b/library/core/src/test/java/com/google/android/exoplayer2/text/subrip/SubripDecoderTest.java index e9abaca075..a9d69076c2 100644 --- a/library/core/src/test/java/com/google/android/exoplayer2/text/subrip/SubripDecoderTest.java +++ b/library/core/src/test/java/com/google/android/exoplayer2/text/subrip/SubripDecoderTest.java @@ -36,6 +36,7 @@ public final class SubripDecoderTest { private static final String TYPICAL_MISSING_SEQUENCE = "subrip/typical_missing_sequence"; private static final String TYPICAL_NEGATIVE_TIMESTAMPS = "subrip/typical_negative_timestamps"; private static final String TYPICAL_UNEXPECTED_END = "subrip/typical_unexpected_end"; + private static final String TYPICAL_WITH_TAGS = "subrip/typical_with_tags"; private static final String NO_END_TIMECODES_FILE = "subrip/no_end_timecodes"; @Test @@ -154,6 +155,27 @@ public final class SubripDecoderTest { .isEqualTo("Or to the end of the media."); } + @Test + public void testDecodeCueWithTag() throws IOException{ + SubripDecoder decoder = new SubripDecoder(); + byte[] bytes = TestUtil.getByteArray(RuntimeEnvironment.application, TYPICAL_WITH_TAGS); + SubripSubtitle subtitle = decoder.decode(bytes, bytes.length, false); + assertThat(subtitle.getCues(subtitle.getEventTime(0)).get(0).text.toString()) + .isEqualTo("This is the first subtitle."); + assertThat(subtitle.getCues(subtitle.getEventTime(2)).get(0).text.toString()) + .isEqualTo("This is the second subtitle.\nSecond subtitle with second line."); + assertThat(subtitle.getCues(subtitle.getEventTime(4)).get(0).text.toString()) + .isEqualTo("This is the third subtitle."); + + // Based on the SSA v4+ specs the curly bracket must be followed by a backslash, so this is + // not a valid tag (won't be parsed / replaced) + assertThat(subtitle.getCues(subtitle.getEventTime(6)).get(0).text.toString()) + .isEqualTo("This { \\an2} is the fourth subtitle."); + + assertThat(subtitle.getCues(subtitle.getEventTime(8)).get(0).text.toString()) + .isEqualTo("This is the fifth subtitle with multiple valid tags."); + } + private static void assertTypicalCue1(SubripSubtitle subtitle, int eventIndex) { assertThat(subtitle.getEventTime(eventIndex)).isEqualTo(0); assertThat(subtitle.getCues(subtitle.getEventTime(eventIndex)).get(0).text.toString())