diff --git a/libraries/common/src/main/java/androidx/media3/common/text/CustomSpanBundler.java b/libraries/common/src/main/java/androidx/media3/common/text/CustomSpanBundler.java
index edcda586d2..d5f7d15e32 100644
--- a/libraries/common/src/main/java/androidx/media3/common/text/CustomSpanBundler.java
+++ b/libraries/common/src/main/java/androidx/media3/common/text/CustomSpanBundler.java
@@ -58,7 +58,7 @@ import java.util.ArrayList;
@Documented
@Retention(RetentionPolicy.SOURCE)
@Target({TYPE_USE})
- @IntDef({UNKNOWN, RUBY, TEXT_EMPHASIS, HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT})
+ @IntDef({UNKNOWN, RUBY, TEXT_EMPHASIS, HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT, VOICE})
private @interface CustomSpanType {}
private static final int UNKNOWN = -1;
@@ -69,6 +69,8 @@ import java.util.ArrayList;
private static final int HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT = 3;
+ private static final int VOICE = 4;
+
private static final String FIELD_START_INDEX = Util.intToStringMaxRadix(0);
private static final String FIELD_END_INDEX = Util.intToStringMaxRadix(1);
private static final String FIELD_FLAGS = Util.intToStringMaxRadix(2);
@@ -94,6 +96,10 @@ import java.util.ArrayList;
text, span, /* spanType= */ HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT, /* params= */ null);
bundledCustomSpans.add(bundle);
}
+ for (VoiceSpan span : text.getSpans(0, text.length(), VoiceSpan.class)) {
+ Bundle bundle = spanToBundle(text, span, /* spanType= */ VOICE, /* params= */ span.toBundle());
+ bundledCustomSpans.add(bundle);
+ }
return bundledCustomSpans;
}
@@ -113,6 +119,9 @@ import java.util.ArrayList;
case HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT:
text.setSpan(new HorizontalTextInVerticalContextSpan(), start, end, flags);
break;
+ case VOICE:
+ text.setSpan(VoiceSpan.fromBundle(checkNotNull(span)), start, end, flags);
+ break;
default:
break;
}
diff --git a/libraries/common/src/main/java/androidx/media3/common/text/VoiceSpan.java b/libraries/common/src/main/java/androidx/media3/common/text/VoiceSpan.java
new file mode 100644
index 0000000000..66f74b1004
--- /dev/null
+++ b/libraries/common/src/main/java/androidx/media3/common/text/VoiceSpan.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package androidx.media3.common.text;
+
+import static androidx.media3.common.util.Assertions.checkNotNull;
+
+import android.os.Bundle;
+import androidx.media3.common.util.UnstableApi;
+import androidx.media3.common.util.Util;
+import com.google.common.collect.ImmutableSet;
+import java.util.Set;
+
+/**
+ * A span representing a speaker.
+ *
+ *
More information on
+ * voice spans.
+ */
+@UnstableApi
+public final class VoiceSpan implements LanguageFeatureSpan {
+
+ /** The speaker name. */
+ public final String speakerName;
+
+ /** The classes associated with the text. It can specify things like "first", "loud", etc. */
+ public final Set classes;
+
+ private static final String FIELD_NAME = Util.intToStringMaxRadix(0);
+ private static final String FIELD_CLASSES = Util.intToStringMaxRadix(1);
+
+ public VoiceSpan(String speakerName, Set classes) {
+ this.speakerName = speakerName;
+ this.classes = classes;
+ }
+
+ public Bundle toBundle() {
+ Bundle bundle = new Bundle();
+ bundle.putString(FIELD_NAME, speakerName);
+ bundle.putStringArray(FIELD_CLASSES, classes.toArray(new String[0]));
+ return bundle;
+ }
+
+ public static VoiceSpan fromBundle(Bundle bundle) {
+ return new VoiceSpan(
+ /* speakerName = */ checkNotNull(bundle.getString(FIELD_NAME)),
+ /* classes = */ ImmutableSet.copyOf(checkNotNull(bundle.getStringArray(FIELD_CLASSES))));
+ }
+}
diff --git a/libraries/common/src/test/java/androidx/media3/common/text/CustomCueBundlerTest.java b/libraries/common/src/test/java/androidx/media3/common/text/CustomCueBundlerTest.java
index 45a8d2ab1e..a2b01343f9 100644
--- a/libraries/common/src/test/java/androidx/media3/common/text/CustomCueBundlerTest.java
+++ b/libraries/common/src/test/java/androidx/media3/common/text/CustomCueBundlerTest.java
@@ -42,6 +42,7 @@ import org.junit.runner.RunWith;
@RunWith(AndroidJUnit4.class)
public class CustomCueBundlerTest {
+ private static final VoiceSpan VOICE_SPAN = new VoiceSpan("name", Set.of("first", "loud"));
private static final RubySpan RUBY_SPAN =
new RubySpan("ruby text", TextAnnotation.POSITION_AFTER);
private static final TextEmphasisSpan TEXT_EMPHASIS_SPAN =
@@ -55,7 +56,8 @@ public class CustomCueBundlerTest {
ImmutableMap.of(
RUBY_SPAN, new Pair<>(1, 2),
TEXT_EMPHASIS_SPAN, new Pair<>(2, 3),
- HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN, new Pair<>(5, 7));
+ HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN, new Pair<>(5, 7),
+ VOICE_SPAN, new Pair<>(8, 10));
@Test
public void serializingSpannableWithAllCustomSpans() {
@@ -92,6 +94,11 @@ public class CustomCueBundlerTest {
.hasHorizontalTextInVerticalContextSpanBetween(
ALL_SPANS_TO_START_END_INDEX.get(HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN).first,
ALL_SPANS_TO_START_END_INDEX.get(HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN).second);
+ SpannedSubject.assertThat(result)
+ .hasVoiceSpanBetween(
+ ALL_SPANS_TO_START_END_INDEX.get(VOICE_SPAN).first,
+ ALL_SPANS_TO_START_END_INDEX.get(VOICE_SPAN).second)
+ .withSpeakerNameAndClasses(VOICE_SPAN.speakerName, VOICE_SPAN.classes);
}
@Test
diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/text/webvtt/WebvttCueParser.java b/libraries/extractor/src/main/java/androidx/media3/extractor/text/webvtt/WebvttCueParser.java
index cbf44a94b6..b2a5527db5 100644
--- a/libraries/extractor/src/main/java/androidx/media3/extractor/text/webvtt/WebvttCueParser.java
+++ b/libraries/extractor/src/main/java/androidx/media3/extractor/text/webvtt/WebvttCueParser.java
@@ -43,6 +43,7 @@ import androidx.media3.common.text.HorizontalTextInVerticalContextSpan;
import androidx.media3.common.text.RubySpan;
import androidx.media3.common.text.SpanUtil;
import androidx.media3.common.text.TextAnnotation;
+import androidx.media3.common.text.VoiceSpan;
import androidx.media3.common.util.Assertions;
import androidx.media3.common.util.Log;
import androidx.media3.common.util.ParsableByteArray;
@@ -555,8 +556,10 @@ public final class WebvttCueParser {
case TAG_CLASS:
applyDefaultColors(text, startTag.classes, start, end);
break;
- case TAG_LANG:
case TAG_VOICE:
+ applyVoiceSpan(text, startTag.voice, startTag.classes, start, end);
+ break;
+ case TAG_LANG:
case "": // Case of the "whole cue" virtual tag.
break;
default:
@@ -658,6 +661,11 @@ public final class WebvttCueParser {
}
}
+ private static void applyVoiceSpan(
+ SpannableStringBuilder text, String voice, Set classes, int start, int end) {
+ text.setSpan(new VoiceSpan(voice, classes), start, end, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
+ }
+
private static void applyStyleToText(
SpannableStringBuilder spannedText, WebvttCssStyle style, int start, int end) {
if (style == null) {
diff --git a/libraries/extractor/src/test/java/androidx/media3/extractor/text/webvtt/WebvttCueParserTest.java b/libraries/extractor/src/test/java/androidx/media3/extractor/text/webvtt/WebvttCueParserTest.java
index e49bc8604b..0fd5eda714 100644
--- a/libraries/extractor/src/test/java/androidx/media3/extractor/text/webvtt/WebvttCueParserTest.java
+++ b/libraries/extractor/src/test/java/androidx/media3/extractor/text/webvtt/WebvttCueParserTest.java
@@ -22,6 +22,7 @@ import android.graphics.Color;
import android.text.Spanned;
import androidx.test.ext.junit.runners.AndroidJUnit4;
import java.util.Collections;
+import java.util.Set;
import org.junit.Test;
import org.junit.runner.RunWith;
@@ -46,7 +47,7 @@ public final class WebvttCueParserTest {
public void parseStrictValidUnsupportedTagsStrippedOut() throws Exception {
Spanned text =
parseCueText(
- "This is text with "
+ "This is text with "
+ "html tags");
assertThat(text.toString()).isEqualTo("This is text with html tags");
@@ -242,6 +243,59 @@ public final class WebvttCueParserTest {
assertThat(text.toString()).isEqualTo("&&&&&&&");
}
+ @Test
+ public void parseEmptyVoiceSpan() throws Exception {
+ Spanned text = parseCueText("Text with a single voice span");
+
+ assertThat(text.toString()).isEqualTo("Text with a single voice span");
+ assertThat(text)
+ .hasVoiceSpanBetween(0, "Text with a single voice span".length())
+ .withSpeakerNameAndClasses("", Collections.emptySet());
+ }
+
+ @Test
+ public void parseVoiceSpanWithName() throws Exception {
+ Spanned text = parseCueText("Text with a single voice span");
+
+ assertThat(text.toString()).isEqualTo("Text with a single voice span");
+ assertThat(text)
+ .hasVoiceSpanBetween(0, "Text with a single voice span".length())
+ .withSpeakerNameAndClasses("Esme", Collections.emptySet());
+ }
+
+ @Test
+ public void parseVoiceSpanWithClasses() throws Exception {
+ Spanned text = parseCueText("Text with a single voice span");
+
+ assertThat(text.toString()).isEqualTo("Text with a single voice span");
+ assertThat(text)
+ .hasVoiceSpanBetween(0, "Text with a single voice span".length())
+ .withSpeakerNameAndClasses("", Set.of("first", "loud"));
+ }
+
+ @Test
+ public void parseVoiceSpanWithNameAndClasses() throws Exception {
+ Spanned text = parseCueText("Text with a single voice span");
+
+ assertThat(text.toString()).isEqualTo("Text with a single voice span");
+ assertThat(text)
+ .hasVoiceSpanBetween(0, "Text with a single voice span".length())
+ .withSpeakerNameAndClasses("Esme", Set.of("first", "loud"));
+ }
+
+ @Test
+ public void parseMultipleVoiceSpans() throws Exception {
+ Spanned text = parseCueText("Text with multiple voice spans");
+
+ assertThat(text.toString()).isEqualTo("Text with multiple voice spans");
+ assertThat(text)
+ .hasVoiceSpanBetween(0, "Text with ".length())
+ .withSpeakerNameAndClasses("Esme", Set.of("loud"));
+ assertThat(text)
+ .hasVoiceSpanBetween("Text with ".length(), "Text with multiple voice spans".length())
+ .withSpeakerNameAndClasses("Mary", Set.of("quiet"));
+ }
+
private static Spanned parseCueText(String string) {
return WebvttCueParser.parseCueText(
/* id= */ null, string, /* styles= */ Collections.emptyList());
diff --git a/libraries/test_utils/src/main/java/androidx/media3/test/utils/truth/SpannedSubject.java b/libraries/test_utils/src/main/java/androidx/media3/test/utils/truth/SpannedSubject.java
index 43d7b9533e..a05c6fde82 100644
--- a/libraries/test_utils/src/main/java/androidx/media3/test/utils/truth/SpannedSubject.java
+++ b/libraries/test_utils/src/main/java/androidx/media3/test/utils/truth/SpannedSubject.java
@@ -42,6 +42,7 @@ import androidx.media3.common.text.HorizontalTextInVerticalContextSpan;
import androidx.media3.common.text.RubySpan;
import androidx.media3.common.text.TextAnnotation;
import androidx.media3.common.text.TextEmphasisSpan;
+import androidx.media3.common.text.VoiceSpan;
import androidx.media3.common.util.NullableType;
import androidx.media3.common.util.UnstableApi;
import androidx.media3.common.util.Util;
@@ -52,6 +53,8 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
+import java.util.Objects;
+import java.util.Set;
import org.checkerframework.checker.nullness.qual.RequiresNonNull;
/** A Truth {@link Subject} for assertions on {@link Spanned} instances containing text styling. */
@@ -634,6 +637,42 @@ public final class SpannedSubject extends Subject {
hasNoSpansOfTypeBetween(HorizontalTextInVerticalContextSpan.class, start, end);
}
+ /**
+ * Checks that the subject has an {@link VoiceSpan} from {@code start} to {@code end}.
+ *
+ * @param start The start of the expected span.
+ * @param end The end of the expected span.
+ * @return A {@link VoiceSpan} object for optional additional assertions on the flags.
+ */
+ public VoiceText hasVoiceSpanBetween(int start, int end) {
+ if (actual == null) {
+ failWithoutActual(simpleFact("Spanned must not be null"));
+ return ALREADY_FAILED_WITH_NAME_AND_CLASSES;
+ }
+
+ List voiceSpans = findMatchingSpans(start, end, VoiceSpan.class);
+ if (voiceSpans.size() == 1) {
+ return check("VoiceSpan (start=%s,end=%s)", start, end)
+ .about(voiceSpanSubjects(actual))
+ .that(voiceSpans);
+ }
+ failWithExpectedSpan(start, end, VoiceSpan.class, actual.toString().substring(start, end));
+ return ALREADY_FAILED_WITH_NAME_AND_CLASSES;
+ }
+
+ /**
+ * Checks that the subject has no {@link VoiceSpan}s on any of the text
+ * between {@code start} and {@code end}.
+ *
+ * This fails even if the start and end indexes don't exactly match.
+ *
+ * @param start The start index to start searching for spans.
+ * @param end The end index to stop searching for spans.
+ */
+ public void hasNoVoiceSpanBetween(int start, int end) {
+ hasNoSpansOfTypeBetween(VoiceSpan.class, start, end);
+ }
+
/**
* Checks that the subject has no spans of type {@code spanClazz} on any of the text between
* {@code start} and {@code end}.
@@ -1272,4 +1311,91 @@ public final class SpannedSubject extends Subject {
}
}
}
+
+ /** Allows assertions about a span's voice its position. */
+ public interface VoiceText {
+ /**
+ * Checks that at least one of the matched spans has the expected {@code name} and {@code
+ * classes}.
+ *
+ * @param name The expected name of the voice.
+ * @param classes The classes used to style the voice.
+ * @return A {@link AndSpanFlags} object for optional additional assertions on the flags.
+ */
+ AndSpanFlags withSpeakerNameAndClasses(String name, Set classes);
+ }
+
+ private static final VoiceText ALREADY_FAILED_WITH_NAME_AND_CLASSES =
+ (name, classes) -> ALREADY_FAILED_AND_FLAGS;
+
+ private static Factory> voiceSpanSubjects(
+ Spanned actualSpanned) {
+ return (FailureMetadata metadata, @Nullable List spans) ->
+ new VoiceSpanSubject(metadata, spans, actualSpanned);
+ }
+
+ private static final class VoiceSpanSubject extends Subject implements VoiceText {
+
+ @Nullable private final List actualSpans;
+ private final Spanned actualSpanned;
+
+ private VoiceSpanSubject(
+ FailureMetadata metadata,
+ @Nullable List actualSpans,
+ Spanned actualSpanned) {
+ super(metadata, actualSpans);
+ this.actualSpans = actualSpans;
+ this.actualSpanned = actualSpanned;
+ }
+
+ @Override
+ public AndSpanFlags withSpeakerNameAndClasses(String name, Set classes) {
+ List matchingSpanFlags = new ArrayList<>();
+ List voiceSpeakerNameAndClasses = new ArrayList<>();
+ for (VoiceSpan span : checkNotNull(actualSpans)) {
+ voiceSpeakerNameAndClasses.add(new SpeakerNameAndClasses(span.speakerName, span.classes));
+ if (span.speakerName.equals(name) && span.classes.equals(classes)) {
+ matchingSpanFlags.add(actualSpanned.getSpanFlags(span));
+ }
+ }
+ check("voiceSpeakerNameAndClasses")
+ .that(voiceSpeakerNameAndClasses)
+ .containsExactly(new SpeakerNameAndClasses(name, classes));
+ return check("flags").about(spanFlags()).that(matchingSpanFlags);
+ }
+
+ private static final class SpeakerNameAndClasses {
+
+ private final String speakerName;
+ private final Set classes;
+
+ private SpeakerNameAndClasses(String name, Set classes) {
+ this.speakerName = name;
+ this.classes = classes;
+ }
+
+ @Override
+ public boolean equals(@Nullable Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+
+ SpeakerNameAndClasses that = (SpeakerNameAndClasses) o;
+ return (speakerName.equals(that.speakerName)) && classes.equals(that.classes);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(speakerName, classes);
+ }
+
+ @Override
+ public String toString() {
+ return String.format("{speakerName=%s,classes=%s}", speakerName, classes);
+ }
+ }
+ }
}
diff --git a/libraries/test_utils/src/test/java/androidx/media3/test/utils/truth/SpannedSubjectTest.java b/libraries/test_utils/src/test/java/androidx/media3/test/utils/truth/SpannedSubjectTest.java
index 57ef16713b..c15ac26596 100644
--- a/libraries/test_utils/src/test/java/androidx/media3/test/utils/truth/SpannedSubjectTest.java
+++ b/libraries/test_utils/src/test/java/androidx/media3/test/utils/truth/SpannedSubjectTest.java
@@ -40,11 +40,13 @@ import androidx.media3.common.text.HorizontalTextInVerticalContextSpan;
import androidx.media3.common.text.RubySpan;
import androidx.media3.common.text.TextAnnotation;
import androidx.media3.common.text.TextEmphasisSpan;
+import androidx.media3.common.text.VoiceSpan;
import androidx.media3.common.util.Util;
import androidx.media3.test.utils.truth.SpannedSubject.AndSpanFlags;
import androidx.media3.test.utils.truth.SpannedSubject.WithSpanFlags;
import androidx.test.ext.junit.runners.AndroidJUnit4;
import com.google.common.truth.ExpectFailure;
+import java.util.Set;
import org.junit.Test;
import org.junit.runner.RunWith;
@@ -902,6 +904,85 @@ public class SpannedSubjectTest {
SpannedSubject::hasNoHorizontalTextInVerticalContextSpanBetween);
}
+ @Test
+ public void voiceSpan_success() {
+ SpannableString spannable =
+ createSpannable(
+ new VoiceSpan("speaker", Set.of("quiet")),
+ Spanned.SPAN_INCLUSIVE_EXCLUSIVE);
+
+ assertThat(spannable)
+ .hasVoiceSpanBetween(SPAN_START, SPAN_END)
+ .withSpeakerNameAndClasses("speaker", Set.of("quiet"))
+ .andFlags(Spanned.SPAN_INCLUSIVE_EXCLUSIVE);
+ }
+
+ @Test
+ public void voiceSpan_wrongEndIndex() {
+ checkHasSpanFailsDueToIndexMismatch(
+ new VoiceSpan("speaker", Set.of("quiet")),
+ SpannedSubject::hasVoiceSpanBetween);
+ }
+
+ @Test
+ public void voiceSpan_wrongSpeakerName() {
+ SpannableString spannable = createSpannable(new VoiceSpan("speaker", Set.of("quiet")));
+
+ AssertionError expected =
+ expectFailure(
+ whenTesting ->
+ whenTesting
+ .that(spannable)
+ .hasVoiceSpanBetween(SPAN_START, SPAN_END)
+ .withSpeakerNameAndClasses("different speaker", Set.of("quiet")));
+
+ assertThat(expected).factValue("value of").contains("voiceSpeakerNameAndClasses");
+ assertThat(expected).factValue("expected").contains("speakerName=different speaker");
+ assertThat(expected).factValue("but was").contains("speakerName=speaker");
+ }
+
+ @Test
+ public void voiceSpan_wrongClasses() {
+ SpannableString spannable = createSpannable(new VoiceSpan("speaker", Set.of("quiet")));
+
+ AssertionError expected =
+ expectFailure(
+ whenTesting ->
+ whenTesting
+ .that(spannable)
+ .hasVoiceSpanBetween(SPAN_START, SPAN_END)
+ .withSpeakerNameAndClasses("speaker", Set.of("loud")));
+
+ assertThat(expected).factValue("value of").contains("voiceSpeakerNameAndClasses");
+ assertThat(expected).factValue("expected").contains("classes=[loud]");
+ assertThat(expected).factValue("but was").contains("classes=[quiet]");
+ }
+
+ @Test
+ public void voiceSpan_wrongFlags() {
+ checkHasSpanFailsDueToFlagMismatch(
+ new VoiceSpan("speaker", Set.of("quiet")),
+ (subject, start, end) ->
+ subject
+ .hasVoiceSpanBetween(start, end)
+ .withSpeakerNameAndClasses("speaker", Set.of("quiet")));
+ }
+
+ @Test
+ public void noVoiceSpan_success() {
+ SpannableString spannable =
+ createSpannableWithUnrelatedSpanAnd(new VoiceSpan("speaker", Set.of("quiet")));
+
+ assertThat(spannable).hasNoVoiceSpanBetween(UNRELATED_SPAN_START, UNRELATED_SPAN_END);
+ }
+
+ @Test
+ public void noVoiceSpan_failure() {
+ checkHasNoSpanFails(
+ new VoiceSpan("speaker", Set.of("quiet")),
+ SpannedSubject::hasNoVoiceSpanBetween);
+ }
+
private interface HasSpanFunction {
T call(SpannedSubject s, int start, int end);
}