Add VTT voice spans to cues

This commit is contained in:
Michał Sikora 2024-08-24 01:44:31 +02:00 committed by Ian Baker
parent e8664dbc8e
commit d6f08a6237
7 changed files with 351 additions and 4 deletions

View File

@ -58,7 +58,7 @@ import java.util.ArrayList;
@Documented
@Retention(RetentionPolicy.SOURCE)
@Target({TYPE_USE})
@IntDef({UNKNOWN, RUBY, TEXT_EMPHASIS, HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT})
@IntDef({UNKNOWN, RUBY, TEXT_EMPHASIS, HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT, VOICE})
private @interface CustomSpanType {}
private static final int UNKNOWN = -1;
@ -69,6 +69,8 @@ import java.util.ArrayList;
private static final int HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT = 3;
private static final int VOICE = 4;
private static final String FIELD_START_INDEX = Util.intToStringMaxRadix(0);
private static final String FIELD_END_INDEX = Util.intToStringMaxRadix(1);
private static final String FIELD_FLAGS = Util.intToStringMaxRadix(2);
@ -94,6 +96,10 @@ import java.util.ArrayList;
text, span, /* spanType= */ HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT, /* params= */ null);
bundledCustomSpans.add(bundle);
}
for (VoiceSpan span : text.getSpans(0, text.length(), VoiceSpan.class)) {
Bundle bundle = spanToBundle(text, span, /* spanType= */ VOICE, /* params= */ span.toBundle());
bundledCustomSpans.add(bundle);
}
return bundledCustomSpans;
}
@ -113,6 +119,9 @@ import java.util.ArrayList;
case HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT:
text.setSpan(new HorizontalTextInVerticalContextSpan(), start, end, flags);
break;
case VOICE:
text.setSpan(VoiceSpan.fromBundle(checkNotNull(span)), start, end, flags);
break;
default:
break;
}

View File

@ -0,0 +1,62 @@
/*
* Copyright (C) 2024 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package androidx.media3.common.text;
import static androidx.media3.common.util.Assertions.checkNotNull;
import android.os.Bundle;
import androidx.media3.common.util.UnstableApi;
import androidx.media3.common.util.Util;
import com.google.common.collect.ImmutableSet;
import java.util.Set;
/**
* A span representing a speaker.
*
* <p>More information on <a href="https://www.w3.org/TR/webvtt1/#webvtt-cue-voice-span">
* voice spans</a>.
*/
@UnstableApi
public final class VoiceSpan implements LanguageFeatureSpan {
/** The speaker name. */
public final String speakerName;
/** The classes associated with the text. It can specify things like "first", "loud", etc. */
public final Set<String> classes;
private static final String FIELD_NAME = Util.intToStringMaxRadix(0);
private static final String FIELD_CLASSES = Util.intToStringMaxRadix(1);
public VoiceSpan(String speakerName, Set<String> classes) {
this.speakerName = speakerName;
this.classes = classes;
}
public Bundle toBundle() {
Bundle bundle = new Bundle();
bundle.putString(FIELD_NAME, speakerName);
bundle.putStringArray(FIELD_CLASSES, classes.toArray(new String[0]));
return bundle;
}
public static VoiceSpan fromBundle(Bundle bundle) {
return new VoiceSpan(
/* speakerName = */ checkNotNull(bundle.getString(FIELD_NAME)),
/* classes = */ ImmutableSet.copyOf(checkNotNull(bundle.getStringArray(FIELD_CLASSES))));
}
}

View File

@ -42,6 +42,7 @@ import org.junit.runner.RunWith;
@RunWith(AndroidJUnit4.class)
public class CustomCueBundlerTest {
private static final VoiceSpan VOICE_SPAN = new VoiceSpan("name", Set.of("first", "loud"));
private static final RubySpan RUBY_SPAN =
new RubySpan("ruby text", TextAnnotation.POSITION_AFTER);
private static final TextEmphasisSpan TEXT_EMPHASIS_SPAN =
@ -55,7 +56,8 @@ public class CustomCueBundlerTest {
ImmutableMap.of(
RUBY_SPAN, new Pair<>(1, 2),
TEXT_EMPHASIS_SPAN, new Pair<>(2, 3),
HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN, new Pair<>(5, 7));
HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN, new Pair<>(5, 7),
VOICE_SPAN, new Pair<>(8, 10));
@Test
public void serializingSpannableWithAllCustomSpans() {
@ -92,6 +94,11 @@ public class CustomCueBundlerTest {
.hasHorizontalTextInVerticalContextSpanBetween(
ALL_SPANS_TO_START_END_INDEX.get(HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN).first,
ALL_SPANS_TO_START_END_INDEX.get(HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN).second);
SpannedSubject.assertThat(result)
.hasVoiceSpanBetween(
ALL_SPANS_TO_START_END_INDEX.get(VOICE_SPAN).first,
ALL_SPANS_TO_START_END_INDEX.get(VOICE_SPAN).second)
.withSpeakerNameAndClasses(VOICE_SPAN.speakerName, VOICE_SPAN.classes);
}
@Test

View File

@ -43,6 +43,7 @@ import androidx.media3.common.text.HorizontalTextInVerticalContextSpan;
import androidx.media3.common.text.RubySpan;
import androidx.media3.common.text.SpanUtil;
import androidx.media3.common.text.TextAnnotation;
import androidx.media3.common.text.VoiceSpan;
import androidx.media3.common.util.Assertions;
import androidx.media3.common.util.Log;
import androidx.media3.common.util.ParsableByteArray;
@ -555,8 +556,10 @@ public final class WebvttCueParser {
case TAG_CLASS:
applyDefaultColors(text, startTag.classes, start, end);
break;
case TAG_LANG:
case TAG_VOICE:
applyVoiceSpan(text, startTag.voice, startTag.classes, start, end);
break;
case TAG_LANG:
case "": // Case of the "whole cue" virtual tag.
break;
default:
@ -658,6 +661,11 @@ public final class WebvttCueParser {
}
}
private static void applyVoiceSpan(
SpannableStringBuilder text, String voice, Set<String> classes, int start, int end) {
text.setSpan(new VoiceSpan(voice, classes), start, end, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
}
private static void applyStyleToText(
SpannableStringBuilder spannedText, WebvttCssStyle style, int start, int end) {
if (style == null) {

View File

@ -22,6 +22,7 @@ import android.graphics.Color;
import android.text.Spanned;
import androidx.test.ext.junit.runners.AndroidJUnit4;
import java.util.Collections;
import java.util.Set;
import org.junit.Test;
import org.junit.runner.RunWith;
@ -46,7 +47,7 @@ public final class WebvttCueParserTest {
public void parseStrictValidUnsupportedTagsStrippedOut() throws Exception {
Spanned text =
parseCueText(
"<v.first.loud Esme>This <unsupported>is</unsupported> text with "
"This <unsupported>is</unsupported> text with "
+ "<notsupp><invalid>html</invalid></notsupp> tags");
assertThat(text.toString()).isEqualTo("This is text with html tags");
@ -242,6 +243,59 @@ public final class WebvttCueParserTest {
assertThat(text.toString()).isEqualTo("&&&&&&&");
}
@Test
public void parseEmptyVoiceSpan() throws Exception {
Spanned text = parseCueText("<v>Text with a single voice span");
assertThat(text.toString()).isEqualTo("Text with a single voice span");
assertThat(text)
.hasVoiceSpanBetween(0, "Text with a single voice span".length())
.withSpeakerNameAndClasses("", Collections.emptySet());
}
@Test
public void parseVoiceSpanWithName() throws Exception {
Spanned text = parseCueText("<v Esme>Text with a single voice span");
assertThat(text.toString()).isEqualTo("Text with a single voice span");
assertThat(text)
.hasVoiceSpanBetween(0, "Text with a single voice span".length())
.withSpeakerNameAndClasses("Esme", Collections.emptySet());
}
@Test
public void parseVoiceSpanWithClasses() throws Exception {
Spanned text = parseCueText("<v.first.loud>Text with a single voice span");
assertThat(text.toString()).isEqualTo("Text with a single voice span");
assertThat(text)
.hasVoiceSpanBetween(0, "Text with a single voice span".length())
.withSpeakerNameAndClasses("", Set.of("first", "loud"));
}
@Test
public void parseVoiceSpanWithNameAndClasses() throws Exception {
Spanned text = parseCueText("<v.first.loud Esme>Text with a single voice span");
assertThat(text.toString()).isEqualTo("Text with a single voice span");
assertThat(text)
.hasVoiceSpanBetween(0, "Text with a single voice span".length())
.withSpeakerNameAndClasses("Esme", Set.of("first", "loud"));
}
@Test
public void parseMultipleVoiceSpans() throws Exception {
Spanned text = parseCueText("<v.loud Esme>Text with </v><v.quiet Mary>multiple voice spans");
assertThat(text.toString()).isEqualTo("Text with multiple voice spans");
assertThat(text)
.hasVoiceSpanBetween(0, "Text with ".length())
.withSpeakerNameAndClasses("Esme", Set.of("loud"));
assertThat(text)
.hasVoiceSpanBetween("Text with ".length(), "Text with multiple voice spans".length())
.withSpeakerNameAndClasses("Mary", Set.of("quiet"));
}
private static Spanned parseCueText(String string) {
return WebvttCueParser.parseCueText(
/* id= */ null, string, /* styles= */ Collections.emptyList());

View File

@ -42,6 +42,7 @@ import androidx.media3.common.text.HorizontalTextInVerticalContextSpan;
import androidx.media3.common.text.RubySpan;
import androidx.media3.common.text.TextAnnotation;
import androidx.media3.common.text.TextEmphasisSpan;
import androidx.media3.common.text.VoiceSpan;
import androidx.media3.common.util.NullableType;
import androidx.media3.common.util.UnstableApi;
import androidx.media3.common.util.Util;
@ -52,6 +53,8 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import org.checkerframework.checker.nullness.qual.RequiresNonNull;
/** A Truth {@link Subject} for assertions on {@link Spanned} instances containing text styling. */
@ -634,6 +637,42 @@ public final class SpannedSubject extends Subject {
hasNoSpansOfTypeBetween(HorizontalTextInVerticalContextSpan.class, start, end);
}
/**
* Checks that the subject has an {@link VoiceSpan} from {@code start} to {@code end}.
*
* @param start The start of the expected span.
* @param end The end of the expected span.
* @return A {@link VoiceSpan} object for optional additional assertions on the flags.
*/
public VoiceText hasVoiceSpanBetween(int start, int end) {
if (actual == null) {
failWithoutActual(simpleFact("Spanned must not be null"));
return ALREADY_FAILED_WITH_NAME_AND_CLASSES;
}
List<VoiceSpan> voiceSpans = findMatchingSpans(start, end, VoiceSpan.class);
if (voiceSpans.size() == 1) {
return check("VoiceSpan (start=%s,end=%s)", start, end)
.about(voiceSpanSubjects(actual))
.that(voiceSpans);
}
failWithExpectedSpan(start, end, VoiceSpan.class, actual.toString().substring(start, end));
return ALREADY_FAILED_WITH_NAME_AND_CLASSES;
}
/**
* Checks that the subject has no {@link VoiceSpan}s on any of the text
* between {@code start} and {@code end}.
*
* <p>This fails even if the start and end indexes don't exactly match.
*
* @param start The start index to start searching for spans.
* @param end The end index to stop searching for spans.
*/
public void hasNoVoiceSpanBetween(int start, int end) {
hasNoSpansOfTypeBetween(VoiceSpan.class, start, end);
}
/**
* Checks that the subject has no spans of type {@code spanClazz} on any of the text between
* {@code start} and {@code end}.
@ -1272,4 +1311,91 @@ public final class SpannedSubject extends Subject {
}
}
}
/** Allows assertions about a span's voice its position. */
public interface VoiceText {
/**
* Checks that at least one of the matched spans has the expected {@code name} and {@code
* classes}.
*
* @param name The expected name of the voice.
* @param classes The classes used to style the voice.
* @return A {@link AndSpanFlags} object for optional additional assertions on the flags.
*/
AndSpanFlags withSpeakerNameAndClasses(String name, Set<String> classes);
}
private static final VoiceText ALREADY_FAILED_WITH_NAME_AND_CLASSES =
(name, classes) -> ALREADY_FAILED_AND_FLAGS;
private static Factory<VoiceSpanSubject, List<VoiceSpan>> voiceSpanSubjects(
Spanned actualSpanned) {
return (FailureMetadata metadata, @Nullable List<VoiceSpan> spans) ->
new VoiceSpanSubject(metadata, spans, actualSpanned);
}
private static final class VoiceSpanSubject extends Subject implements VoiceText {
@Nullable private final List<VoiceSpan> actualSpans;
private final Spanned actualSpanned;
private VoiceSpanSubject(
FailureMetadata metadata,
@Nullable List<VoiceSpan> actualSpans,
Spanned actualSpanned) {
super(metadata, actualSpans);
this.actualSpans = actualSpans;
this.actualSpanned = actualSpanned;
}
@Override
public AndSpanFlags withSpeakerNameAndClasses(String name, Set<String> classes) {
List<Integer> matchingSpanFlags = new ArrayList<>();
List<SpeakerNameAndClasses> voiceSpeakerNameAndClasses = new ArrayList<>();
for (VoiceSpan span : checkNotNull(actualSpans)) {
voiceSpeakerNameAndClasses.add(new SpeakerNameAndClasses(span.speakerName, span.classes));
if (span.speakerName.equals(name) && span.classes.equals(classes)) {
matchingSpanFlags.add(actualSpanned.getSpanFlags(span));
}
}
check("voiceSpeakerNameAndClasses")
.that(voiceSpeakerNameAndClasses)
.containsExactly(new SpeakerNameAndClasses(name, classes));
return check("flags").about(spanFlags()).that(matchingSpanFlags);
}
private static final class SpeakerNameAndClasses {
private final String speakerName;
private final Set<String> classes;
private SpeakerNameAndClasses(String name, Set<String> classes) {
this.speakerName = name;
this.classes = classes;
}
@Override
public boolean equals(@Nullable Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
SpeakerNameAndClasses that = (SpeakerNameAndClasses) o;
return (speakerName.equals(that.speakerName)) && classes.equals(that.classes);
}
@Override
public int hashCode() {
return Objects.hash(speakerName, classes);
}
@Override
public String toString() {
return String.format("{speakerName=%s,classes=%s}", speakerName, classes);
}
}
}
}

View File

@ -40,11 +40,13 @@ import androidx.media3.common.text.HorizontalTextInVerticalContextSpan;
import androidx.media3.common.text.RubySpan;
import androidx.media3.common.text.TextAnnotation;
import androidx.media3.common.text.TextEmphasisSpan;
import androidx.media3.common.text.VoiceSpan;
import androidx.media3.common.util.Util;
import androidx.media3.test.utils.truth.SpannedSubject.AndSpanFlags;
import androidx.media3.test.utils.truth.SpannedSubject.WithSpanFlags;
import androidx.test.ext.junit.runners.AndroidJUnit4;
import com.google.common.truth.ExpectFailure;
import java.util.Set;
import org.junit.Test;
import org.junit.runner.RunWith;
@ -902,6 +904,85 @@ public class SpannedSubjectTest {
SpannedSubject::hasNoHorizontalTextInVerticalContextSpanBetween);
}
@Test
public void voiceSpan_success() {
SpannableString spannable =
createSpannable(
new VoiceSpan("speaker", Set.of("quiet")),
Spanned.SPAN_INCLUSIVE_EXCLUSIVE);
assertThat(spannable)
.hasVoiceSpanBetween(SPAN_START, SPAN_END)
.withSpeakerNameAndClasses("speaker", Set.of("quiet"))
.andFlags(Spanned.SPAN_INCLUSIVE_EXCLUSIVE);
}
@Test
public void voiceSpan_wrongEndIndex() {
checkHasSpanFailsDueToIndexMismatch(
new VoiceSpan("speaker", Set.of("quiet")),
SpannedSubject::hasVoiceSpanBetween);
}
@Test
public void voiceSpan_wrongSpeakerName() {
SpannableString spannable = createSpannable(new VoiceSpan("speaker", Set.of("quiet")));
AssertionError expected =
expectFailure(
whenTesting ->
whenTesting
.that(spannable)
.hasVoiceSpanBetween(SPAN_START, SPAN_END)
.withSpeakerNameAndClasses("different speaker", Set.of("quiet")));
assertThat(expected).factValue("value of").contains("voiceSpeakerNameAndClasses");
assertThat(expected).factValue("expected").contains("speakerName=different speaker");
assertThat(expected).factValue("but was").contains("speakerName=speaker");
}
@Test
public void voiceSpan_wrongClasses() {
SpannableString spannable = createSpannable(new VoiceSpan("speaker", Set.of("quiet")));
AssertionError expected =
expectFailure(
whenTesting ->
whenTesting
.that(spannable)
.hasVoiceSpanBetween(SPAN_START, SPAN_END)
.withSpeakerNameAndClasses("speaker", Set.of("loud")));
assertThat(expected).factValue("value of").contains("voiceSpeakerNameAndClasses");
assertThat(expected).factValue("expected").contains("classes=[loud]");
assertThat(expected).factValue("but was").contains("classes=[quiet]");
}
@Test
public void voiceSpan_wrongFlags() {
checkHasSpanFailsDueToFlagMismatch(
new VoiceSpan("speaker", Set.of("quiet")),
(subject, start, end) ->
subject
.hasVoiceSpanBetween(start, end)
.withSpeakerNameAndClasses("speaker", Set.of("quiet")));
}
@Test
public void noVoiceSpan_success() {
SpannableString spannable =
createSpannableWithUnrelatedSpanAnd(new VoiceSpan("speaker", Set.of("quiet")));
assertThat(spannable).hasNoVoiceSpanBetween(UNRELATED_SPAN_START, UNRELATED_SPAN_END);
}
@Test
public void noVoiceSpan_failure() {
checkHasNoSpanFails(
new VoiceSpan("speaker", Set.of("quiet")),
SpannedSubject::hasNoVoiceSpanBetween);
}
private interface HasSpanFunction<T> {
T call(SpannedSubject s, int start, int end);
}