Support multiple <rt> nodes inside <ruby> in WebVTT

This is supported (despite what my now-deleted comment says), see the
example here:
https://developer.mozilla.org/en-US/docs/Web/HTML/Element/rt

PiperOrigin-RevId: 304612099
This commit is contained in:
ibaker 2020-04-03 15:30:56 +01:00 committed by Oliver Woodman
parent fb0330d4db
commit bdffab1200
2 changed files with 56 additions and 21 deletions

View File

@ -48,6 +48,7 @@ import java.lang.annotation.Retention;
import java.util.ArrayDeque; import java.util.ArrayDeque;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -532,27 +533,7 @@ public final class WebvttCueParser {
Spanned.SPAN_EXCLUSIVE_EXCLUSIVE); Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
break; break;
case TAG_RUBY: case TAG_RUBY:
@Nullable Element rubyTextElement = null; applyRubySpans(nestedElements, text, start);
for (int i = 0; i < nestedElements.size(); i++) {
if (TAG_RUBY_TEXT.equals(nestedElements.get(i).startTag.name)) {
rubyTextElement = nestedElements.get(i);
// Behaviour of multiple <rt> tags inside <ruby> is undefined, so use the first one.
break;
}
}
if (rubyTextElement == null) {
break;
}
// Move the rubyText from spannedText into the RubySpan.
CharSequence rubyText =
text.subSequence(rubyTextElement.startTag.position, rubyTextElement.endPosition);
text.delete(rubyTextElement.startTag.position, rubyTextElement.endPosition);
end -= rubyText.length();
text.setSpan(
new RubySpan(rubyText.toString(), RubySpan.POSITION_OVER),
start,
end,
Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
break; break;
case TAG_UNDERLINE: case TAG_UNDERLINE:
text.setSpan(new UnderlineSpan(), start, end, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE); text.setSpan(new UnderlineSpan(), start, end, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
@ -575,6 +556,34 @@ public final class WebvttCueParser {
} }
} }
private static void applyRubySpans(
List<Element> nestedElements, SpannableStringBuilder text, int startTagPosition) {
List<Element> sortedNestedElements = new ArrayList<>(nestedElements.size());
sortedNestedElements.addAll(nestedElements);
Collections.sort(sortedNestedElements, Element.BY_START_POSITION_ASC);
int deletedCharCount = 0;
int lastRubyTextEnd = startTagPosition;
for (int i = 0; i < sortedNestedElements.size(); i++) {
if (!TAG_RUBY_TEXT.equals(sortedNestedElements.get(i).startTag.name)) {
continue;
}
Element rubyTextElement = sortedNestedElements.get(i);
// Move the rubyText from spannedText into the RubySpan.
int adjustedRubyTextStart = rubyTextElement.startTag.position - deletedCharCount;
int adjustedRubyTextEnd = rubyTextElement.endPosition - deletedCharCount;
CharSequence rubyText = text.subSequence(adjustedRubyTextStart, adjustedRubyTextEnd);
text.delete(adjustedRubyTextStart, adjustedRubyTextEnd);
text.setSpan(
new RubySpan(rubyText.toString(), RubySpan.POSITION_OVER),
lastRubyTextEnd,
adjustedRubyTextStart,
Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
deletedCharCount += rubyText.length();
// The ruby text has been deleted, so new-start == old-end.
lastRubyTextEnd = adjustedRubyTextStart;
}
}
/** /**
* Adds {@link ForegroundColorSpan}s and {@link BackgroundColorSpan}s to {@code text} for entries * Adds {@link ForegroundColorSpan}s and {@link BackgroundColorSpan}s to {@code text} for entries
* in {@code classes} that match WebVTT's <a * in {@code classes} that match WebVTT's <a
@ -921,6 +930,9 @@ public final class WebvttCueParser {
/** Information about a complete element (i.e. start tag and end position). */ /** Information about a complete element (i.e. start tag and end position). */
private static class Element { private static class Element {
private static final Comparator<Element> BY_START_POSITION_ASC =
(e1, e2) -> Integer.compare(e1.startTag.position, e2.startTag.position);
private final StartTag startTag; private final StartTag startTag;
/** /**
* The position of the end of this element's text in the un-marked-up cue text (i.e. the * The position of the end of this element's text in the un-marked-up cue text (i.e. the

View File

@ -62,6 +62,29 @@ public final class WebvttCueParserTest {
.withTextAndPosition("with ruby", RubySpan.POSITION_OVER); .withTextAndPosition("with ruby", RubySpan.POSITION_OVER);
} }
@Test
public void parseSingleRubyTagWithMultipleRts() throws Exception {
Spanned text = parseCueText("<ruby>A<rt>1</rt>B<rt>2</rt>C<rt>3</rt></ruby>");
// The text between the <rt> tags is stripped from Cue.text and only present on the RubySpan.
assertThat(text.toString()).isEqualTo("ABC");
assertThat(text).hasRubySpanBetween(0, 1).withTextAndPosition("1", RubySpan.POSITION_OVER);
assertThat(text).hasRubySpanBetween(1, 2).withTextAndPosition("2", RubySpan.POSITION_OVER);
assertThat(text).hasRubySpanBetween(2, 3).withTextAndPosition("3", RubySpan.POSITION_OVER);
}
@Test
public void parseMultipleRubyTagsWithSingleRtEach() throws Exception {
Spanned text =
parseCueText("<ruby>A<rt>1</rt></ruby><ruby>B<rt>2</rt></ruby><ruby>C<rt>3</rt></ruby>");
// The text between the <rt> tags is stripped from Cue.text and only present on the RubySpan.
assertThat(text.toString()).isEqualTo("ABC");
assertThat(text).hasRubySpanBetween(0, 1).withTextAndPosition("1", RubySpan.POSITION_OVER);
assertThat(text).hasRubySpanBetween(1, 2).withTextAndPosition("2", RubySpan.POSITION_OVER);
assertThat(text).hasRubySpanBetween(2, 3).withTextAndPosition("3", RubySpan.POSITION_OVER);
}
@Test @Test
public void parseRubyTagWithNoTextTag() throws Exception { public void parseRubyTagWithNoTextTag() throws Exception {
Spanned text = parseCueText("Some <ruby>base text with no ruby text</ruby>"); Spanned text = parseCueText("Some <ruby>base text with no ruby text</ruby>");