From d6f08a6237ae8fdb2af1ebf2ac5bca5f0eb619ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Sikora?= Date: Sat, 24 Aug 2024 01:44:31 +0200 Subject: [PATCH] Add VTT voice spans to cues --- .../media3/common/text/CustomSpanBundler.java | 11 +- .../media3/common/text/VoiceSpan.java | 62 +++++++++ .../common/text/CustomCueBundlerTest.java | 9 +- .../text/webvtt/WebvttCueParser.java | 10 +- .../text/webvtt/WebvttCueParserTest.java | 56 +++++++- .../test/utils/truth/SpannedSubject.java | 126 ++++++++++++++++++ .../test/utils/truth/SpannedSubjectTest.java | 81 +++++++++++ 7 files changed, 351 insertions(+), 4 deletions(-) create mode 100644 libraries/common/src/main/java/androidx/media3/common/text/VoiceSpan.java diff --git a/libraries/common/src/main/java/androidx/media3/common/text/CustomSpanBundler.java b/libraries/common/src/main/java/androidx/media3/common/text/CustomSpanBundler.java index edcda586d2..d5f7d15e32 100644 --- a/libraries/common/src/main/java/androidx/media3/common/text/CustomSpanBundler.java +++ b/libraries/common/src/main/java/androidx/media3/common/text/CustomSpanBundler.java @@ -58,7 +58,7 @@ import java.util.ArrayList; @Documented @Retention(RetentionPolicy.SOURCE) @Target({TYPE_USE}) - @IntDef({UNKNOWN, RUBY, TEXT_EMPHASIS, HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT}) + @IntDef({UNKNOWN, RUBY, TEXT_EMPHASIS, HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT, VOICE}) private @interface CustomSpanType {} private static final int UNKNOWN = -1; @@ -69,6 +69,8 @@ import java.util.ArrayList; private static final int HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT = 3; + private static final int VOICE = 4; + private static final String FIELD_START_INDEX = Util.intToStringMaxRadix(0); private static final String FIELD_END_INDEX = Util.intToStringMaxRadix(1); private static final String FIELD_FLAGS = Util.intToStringMaxRadix(2); @@ -94,6 +96,10 @@ import java.util.ArrayList; text, span, /* spanType= */ HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT, /* params= */ null); bundledCustomSpans.add(bundle); } + for (VoiceSpan span : text.getSpans(0, text.length(), VoiceSpan.class)) { + Bundle bundle = spanToBundle(text, span, /* spanType= */ VOICE, /* params= */ span.toBundle()); + bundledCustomSpans.add(bundle); + } return bundledCustomSpans; } @@ -113,6 +119,9 @@ import java.util.ArrayList; case HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT: text.setSpan(new HorizontalTextInVerticalContextSpan(), start, end, flags); break; + case VOICE: + text.setSpan(VoiceSpan.fromBundle(checkNotNull(span)), start, end, flags); + break; default: break; } diff --git a/libraries/common/src/main/java/androidx/media3/common/text/VoiceSpan.java b/libraries/common/src/main/java/androidx/media3/common/text/VoiceSpan.java new file mode 100644 index 0000000000..66f74b1004 --- /dev/null +++ b/libraries/common/src/main/java/androidx/media3/common/text/VoiceSpan.java @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package androidx.media3.common.text; + +import static androidx.media3.common.util.Assertions.checkNotNull; + +import android.os.Bundle; +import androidx.media3.common.util.UnstableApi; +import androidx.media3.common.util.Util; +import com.google.common.collect.ImmutableSet; +import java.util.Set; + +/** + * A span representing a speaker. + * + *

More information on + * voice spans. + */ +@UnstableApi +public final class VoiceSpan implements LanguageFeatureSpan { + + /** The speaker name. */ + public final String speakerName; + + /** The classes associated with the text. It can specify things like "first", "loud", etc. */ + public final Set classes; + + private static final String FIELD_NAME = Util.intToStringMaxRadix(0); + private static final String FIELD_CLASSES = Util.intToStringMaxRadix(1); + + public VoiceSpan(String speakerName, Set classes) { + this.speakerName = speakerName; + this.classes = classes; + } + + public Bundle toBundle() { + Bundle bundle = new Bundle(); + bundle.putString(FIELD_NAME, speakerName); + bundle.putStringArray(FIELD_CLASSES, classes.toArray(new String[0])); + return bundle; + } + + public static VoiceSpan fromBundle(Bundle bundle) { + return new VoiceSpan( + /* speakerName = */ checkNotNull(bundle.getString(FIELD_NAME)), + /* classes = */ ImmutableSet.copyOf(checkNotNull(bundle.getStringArray(FIELD_CLASSES)))); + } +} diff --git a/libraries/common/src/test/java/androidx/media3/common/text/CustomCueBundlerTest.java b/libraries/common/src/test/java/androidx/media3/common/text/CustomCueBundlerTest.java index 45a8d2ab1e..a2b01343f9 100644 --- a/libraries/common/src/test/java/androidx/media3/common/text/CustomCueBundlerTest.java +++ b/libraries/common/src/test/java/androidx/media3/common/text/CustomCueBundlerTest.java @@ -42,6 +42,7 @@ import org.junit.runner.RunWith; @RunWith(AndroidJUnit4.class) public class CustomCueBundlerTest { + private static final VoiceSpan VOICE_SPAN = new VoiceSpan("name", Set.of("first", "loud")); private static final RubySpan RUBY_SPAN = new RubySpan("ruby text", TextAnnotation.POSITION_AFTER); private static final TextEmphasisSpan TEXT_EMPHASIS_SPAN = @@ -55,7 +56,8 @@ public class CustomCueBundlerTest { ImmutableMap.of( RUBY_SPAN, new Pair<>(1, 2), TEXT_EMPHASIS_SPAN, new Pair<>(2, 3), - HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN, new Pair<>(5, 7)); + HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN, new Pair<>(5, 7), + VOICE_SPAN, new Pair<>(8, 10)); @Test public void serializingSpannableWithAllCustomSpans() { @@ -92,6 +94,11 @@ public class CustomCueBundlerTest { .hasHorizontalTextInVerticalContextSpanBetween( ALL_SPANS_TO_START_END_INDEX.get(HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN).first, ALL_SPANS_TO_START_END_INDEX.get(HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN).second); + SpannedSubject.assertThat(result) + .hasVoiceSpanBetween( + ALL_SPANS_TO_START_END_INDEX.get(VOICE_SPAN).first, + ALL_SPANS_TO_START_END_INDEX.get(VOICE_SPAN).second) + .withSpeakerNameAndClasses(VOICE_SPAN.speakerName, VOICE_SPAN.classes); } @Test diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/text/webvtt/WebvttCueParser.java b/libraries/extractor/src/main/java/androidx/media3/extractor/text/webvtt/WebvttCueParser.java index cbf44a94b6..b2a5527db5 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/text/webvtt/WebvttCueParser.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/text/webvtt/WebvttCueParser.java @@ -43,6 +43,7 @@ import androidx.media3.common.text.HorizontalTextInVerticalContextSpan; import androidx.media3.common.text.RubySpan; import androidx.media3.common.text.SpanUtil; import androidx.media3.common.text.TextAnnotation; +import androidx.media3.common.text.VoiceSpan; import androidx.media3.common.util.Assertions; import androidx.media3.common.util.Log; import androidx.media3.common.util.ParsableByteArray; @@ -555,8 +556,10 @@ public final class WebvttCueParser { case TAG_CLASS: applyDefaultColors(text, startTag.classes, start, end); break; - case TAG_LANG: case TAG_VOICE: + applyVoiceSpan(text, startTag.voice, startTag.classes, start, end); + break; + case TAG_LANG: case "": // Case of the "whole cue" virtual tag. break; default: @@ -658,6 +661,11 @@ public final class WebvttCueParser { } } + private static void applyVoiceSpan( + SpannableStringBuilder text, String voice, Set classes, int start, int end) { + text.setSpan(new VoiceSpan(voice, classes), start, end, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE); + } + private static void applyStyleToText( SpannableStringBuilder spannedText, WebvttCssStyle style, int start, int end) { if (style == null) { diff --git a/libraries/extractor/src/test/java/androidx/media3/extractor/text/webvtt/WebvttCueParserTest.java b/libraries/extractor/src/test/java/androidx/media3/extractor/text/webvtt/WebvttCueParserTest.java index e49bc8604b..0fd5eda714 100644 --- a/libraries/extractor/src/test/java/androidx/media3/extractor/text/webvtt/WebvttCueParserTest.java +++ b/libraries/extractor/src/test/java/androidx/media3/extractor/text/webvtt/WebvttCueParserTest.java @@ -22,6 +22,7 @@ import android.graphics.Color; import android.text.Spanned; import androidx.test.ext.junit.runners.AndroidJUnit4; import java.util.Collections; +import java.util.Set; import org.junit.Test; import org.junit.runner.RunWith; @@ -46,7 +47,7 @@ public final class WebvttCueParserTest { public void parseStrictValidUnsupportedTagsStrippedOut() throws Exception { Spanned text = parseCueText( - "This is text with " + "This is text with " + "html tags"); assertThat(text.toString()).isEqualTo("This is text with html tags"); @@ -242,6 +243,59 @@ public final class WebvttCueParserTest { assertThat(text.toString()).isEqualTo("&&&&&&&"); } + @Test + public void parseEmptyVoiceSpan() throws Exception { + Spanned text = parseCueText("Text with a single voice span"); + + assertThat(text.toString()).isEqualTo("Text with a single voice span"); + assertThat(text) + .hasVoiceSpanBetween(0, "Text with a single voice span".length()) + .withSpeakerNameAndClasses("", Collections.emptySet()); + } + + @Test + public void parseVoiceSpanWithName() throws Exception { + Spanned text = parseCueText("Text with a single voice span"); + + assertThat(text.toString()).isEqualTo("Text with a single voice span"); + assertThat(text) + .hasVoiceSpanBetween(0, "Text with a single voice span".length()) + .withSpeakerNameAndClasses("Esme", Collections.emptySet()); + } + + @Test + public void parseVoiceSpanWithClasses() throws Exception { + Spanned text = parseCueText("Text with a single voice span"); + + assertThat(text.toString()).isEqualTo("Text with a single voice span"); + assertThat(text) + .hasVoiceSpanBetween(0, "Text with a single voice span".length()) + .withSpeakerNameAndClasses("", Set.of("first", "loud")); + } + + @Test + public void parseVoiceSpanWithNameAndClasses() throws Exception { + Spanned text = parseCueText("Text with a single voice span"); + + assertThat(text.toString()).isEqualTo("Text with a single voice span"); + assertThat(text) + .hasVoiceSpanBetween(0, "Text with a single voice span".length()) + .withSpeakerNameAndClasses("Esme", Set.of("first", "loud")); + } + + @Test + public void parseMultipleVoiceSpans() throws Exception { + Spanned text = parseCueText("Text with multiple voice spans"); + + assertThat(text.toString()).isEqualTo("Text with multiple voice spans"); + assertThat(text) + .hasVoiceSpanBetween(0, "Text with ".length()) + .withSpeakerNameAndClasses("Esme", Set.of("loud")); + assertThat(text) + .hasVoiceSpanBetween("Text with ".length(), "Text with multiple voice spans".length()) + .withSpeakerNameAndClasses("Mary", Set.of("quiet")); + } + private static Spanned parseCueText(String string) { return WebvttCueParser.parseCueText( /* id= */ null, string, /* styles= */ Collections.emptyList()); diff --git a/libraries/test_utils/src/main/java/androidx/media3/test/utils/truth/SpannedSubject.java b/libraries/test_utils/src/main/java/androidx/media3/test/utils/truth/SpannedSubject.java index 43d7b9533e..a05c6fde82 100644 --- a/libraries/test_utils/src/main/java/androidx/media3/test/utils/truth/SpannedSubject.java +++ b/libraries/test_utils/src/main/java/androidx/media3/test/utils/truth/SpannedSubject.java @@ -42,6 +42,7 @@ import androidx.media3.common.text.HorizontalTextInVerticalContextSpan; import androidx.media3.common.text.RubySpan; import androidx.media3.common.text.TextAnnotation; import androidx.media3.common.text.TextEmphasisSpan; +import androidx.media3.common.text.VoiceSpan; import androidx.media3.common.util.NullableType; import androidx.media3.common.util.UnstableApi; import androidx.media3.common.util.Util; @@ -52,6 +53,8 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Objects; +import java.util.Set; import org.checkerframework.checker.nullness.qual.RequiresNonNull; /** A Truth {@link Subject} for assertions on {@link Spanned} instances containing text styling. */ @@ -634,6 +637,42 @@ public final class SpannedSubject extends Subject { hasNoSpansOfTypeBetween(HorizontalTextInVerticalContextSpan.class, start, end); } + /** + * Checks that the subject has an {@link VoiceSpan} from {@code start} to {@code end}. + * + * @param start The start of the expected span. + * @param end The end of the expected span. + * @return A {@link VoiceSpan} object for optional additional assertions on the flags. + */ + public VoiceText hasVoiceSpanBetween(int start, int end) { + if (actual == null) { + failWithoutActual(simpleFact("Spanned must not be null")); + return ALREADY_FAILED_WITH_NAME_AND_CLASSES; + } + + List voiceSpans = findMatchingSpans(start, end, VoiceSpan.class); + if (voiceSpans.size() == 1) { + return check("VoiceSpan (start=%s,end=%s)", start, end) + .about(voiceSpanSubjects(actual)) + .that(voiceSpans); + } + failWithExpectedSpan(start, end, VoiceSpan.class, actual.toString().substring(start, end)); + return ALREADY_FAILED_WITH_NAME_AND_CLASSES; + } + + /** + * Checks that the subject has no {@link VoiceSpan}s on any of the text + * between {@code start} and {@code end}. + * + *

This fails even if the start and end indexes don't exactly match. + * + * @param start The start index to start searching for spans. + * @param end The end index to stop searching for spans. + */ + public void hasNoVoiceSpanBetween(int start, int end) { + hasNoSpansOfTypeBetween(VoiceSpan.class, start, end); + } + /** * Checks that the subject has no spans of type {@code spanClazz} on any of the text between * {@code start} and {@code end}. @@ -1272,4 +1311,91 @@ public final class SpannedSubject extends Subject { } } } + + /** Allows assertions about a span's voice its position. */ + public interface VoiceText { + /** + * Checks that at least one of the matched spans has the expected {@code name} and {@code + * classes}. + * + * @param name The expected name of the voice. + * @param classes The classes used to style the voice. + * @return A {@link AndSpanFlags} object for optional additional assertions on the flags. + */ + AndSpanFlags withSpeakerNameAndClasses(String name, Set classes); + } + + private static final VoiceText ALREADY_FAILED_WITH_NAME_AND_CLASSES = + (name, classes) -> ALREADY_FAILED_AND_FLAGS; + + private static Factory> voiceSpanSubjects( + Spanned actualSpanned) { + return (FailureMetadata metadata, @Nullable List spans) -> + new VoiceSpanSubject(metadata, spans, actualSpanned); + } + + private static final class VoiceSpanSubject extends Subject implements VoiceText { + + @Nullable private final List actualSpans; + private final Spanned actualSpanned; + + private VoiceSpanSubject( + FailureMetadata metadata, + @Nullable List actualSpans, + Spanned actualSpanned) { + super(metadata, actualSpans); + this.actualSpans = actualSpans; + this.actualSpanned = actualSpanned; + } + + @Override + public AndSpanFlags withSpeakerNameAndClasses(String name, Set classes) { + List matchingSpanFlags = new ArrayList<>(); + List voiceSpeakerNameAndClasses = new ArrayList<>(); + for (VoiceSpan span : checkNotNull(actualSpans)) { + voiceSpeakerNameAndClasses.add(new SpeakerNameAndClasses(span.speakerName, span.classes)); + if (span.speakerName.equals(name) && span.classes.equals(classes)) { + matchingSpanFlags.add(actualSpanned.getSpanFlags(span)); + } + } + check("voiceSpeakerNameAndClasses") + .that(voiceSpeakerNameAndClasses) + .containsExactly(new SpeakerNameAndClasses(name, classes)); + return check("flags").about(spanFlags()).that(matchingSpanFlags); + } + + private static final class SpeakerNameAndClasses { + + private final String speakerName; + private final Set classes; + + private SpeakerNameAndClasses(String name, Set classes) { + this.speakerName = name; + this.classes = classes; + } + + @Override + public boolean equals(@Nullable Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + SpeakerNameAndClasses that = (SpeakerNameAndClasses) o; + return (speakerName.equals(that.speakerName)) && classes.equals(that.classes); + } + + @Override + public int hashCode() { + return Objects.hash(speakerName, classes); + } + + @Override + public String toString() { + return String.format("{speakerName=%s,classes=%s}", speakerName, classes); + } + } + } } diff --git a/libraries/test_utils/src/test/java/androidx/media3/test/utils/truth/SpannedSubjectTest.java b/libraries/test_utils/src/test/java/androidx/media3/test/utils/truth/SpannedSubjectTest.java index 57ef16713b..c15ac26596 100644 --- a/libraries/test_utils/src/test/java/androidx/media3/test/utils/truth/SpannedSubjectTest.java +++ b/libraries/test_utils/src/test/java/androidx/media3/test/utils/truth/SpannedSubjectTest.java @@ -40,11 +40,13 @@ import androidx.media3.common.text.HorizontalTextInVerticalContextSpan; import androidx.media3.common.text.RubySpan; import androidx.media3.common.text.TextAnnotation; import androidx.media3.common.text.TextEmphasisSpan; +import androidx.media3.common.text.VoiceSpan; import androidx.media3.common.util.Util; import androidx.media3.test.utils.truth.SpannedSubject.AndSpanFlags; import androidx.media3.test.utils.truth.SpannedSubject.WithSpanFlags; import androidx.test.ext.junit.runners.AndroidJUnit4; import com.google.common.truth.ExpectFailure; +import java.util.Set; import org.junit.Test; import org.junit.runner.RunWith; @@ -902,6 +904,85 @@ public class SpannedSubjectTest { SpannedSubject::hasNoHorizontalTextInVerticalContextSpanBetween); } + @Test + public void voiceSpan_success() { + SpannableString spannable = + createSpannable( + new VoiceSpan("speaker", Set.of("quiet")), + Spanned.SPAN_INCLUSIVE_EXCLUSIVE); + + assertThat(spannable) + .hasVoiceSpanBetween(SPAN_START, SPAN_END) + .withSpeakerNameAndClasses("speaker", Set.of("quiet")) + .andFlags(Spanned.SPAN_INCLUSIVE_EXCLUSIVE); + } + + @Test + public void voiceSpan_wrongEndIndex() { + checkHasSpanFailsDueToIndexMismatch( + new VoiceSpan("speaker", Set.of("quiet")), + SpannedSubject::hasVoiceSpanBetween); + } + + @Test + public void voiceSpan_wrongSpeakerName() { + SpannableString spannable = createSpannable(new VoiceSpan("speaker", Set.of("quiet"))); + + AssertionError expected = + expectFailure( + whenTesting -> + whenTesting + .that(spannable) + .hasVoiceSpanBetween(SPAN_START, SPAN_END) + .withSpeakerNameAndClasses("different speaker", Set.of("quiet"))); + + assertThat(expected).factValue("value of").contains("voiceSpeakerNameAndClasses"); + assertThat(expected).factValue("expected").contains("speakerName=different speaker"); + assertThat(expected).factValue("but was").contains("speakerName=speaker"); + } + + @Test + public void voiceSpan_wrongClasses() { + SpannableString spannable = createSpannable(new VoiceSpan("speaker", Set.of("quiet"))); + + AssertionError expected = + expectFailure( + whenTesting -> + whenTesting + .that(spannable) + .hasVoiceSpanBetween(SPAN_START, SPAN_END) + .withSpeakerNameAndClasses("speaker", Set.of("loud"))); + + assertThat(expected).factValue("value of").contains("voiceSpeakerNameAndClasses"); + assertThat(expected).factValue("expected").contains("classes=[loud]"); + assertThat(expected).factValue("but was").contains("classes=[quiet]"); + } + + @Test + public void voiceSpan_wrongFlags() { + checkHasSpanFailsDueToFlagMismatch( + new VoiceSpan("speaker", Set.of("quiet")), + (subject, start, end) -> + subject + .hasVoiceSpanBetween(start, end) + .withSpeakerNameAndClasses("speaker", Set.of("quiet"))); + } + + @Test + public void noVoiceSpan_success() { + SpannableString spannable = + createSpannableWithUnrelatedSpanAnd(new VoiceSpan("speaker", Set.of("quiet"))); + + assertThat(spannable).hasNoVoiceSpanBetween(UNRELATED_SPAN_START, UNRELATED_SPAN_END); + } + + @Test + public void noVoiceSpan_failure() { + checkHasNoSpanFails( + new VoiceSpan("speaker", Set.of("quiet")), + SpannedSubject::hasNoVoiceSpanBetween); + } + private interface HasSpanFunction { T call(SpannedSubject s, int start, int end); }