Merge pull request #1652 from MiSikora:ms/vtt-speaker

PiperOrigin-RevId: 668976037
This commit is contained in:
Copybara-Service 2024-08-29 10:08:13 -07:00
commit 39ed9cf88d
8 changed files with 294 additions and 14 deletions

View File

@ -67,6 +67,9 @@
to remove a previously set `Surface` if the codec supports this
(`MediaCodecInfo.detachedSurfaceSupported`).
* Text:
* Add a custom `VoiceSpan` and populate it for
[WebVTT voice spans](https://www.w3.org/TR/webvtt1/#webvtt-cue-voice-span)
([#1632](https://github.com/androidx/media/issues/1632)).
* Metadata:
* Image:
* Add `ExternallyLoadedImageDecoder` for simplified integration with

View File

@ -45,20 +45,11 @@ import java.util.ArrayList;
*/
/* package */ final class CustomSpanBundler {
/**
* Media3 custom span implementations. One of the following:
*
* <ul>
* <li>{@link #UNKNOWN}
* <li>{@link #RUBY}
* <li>{@link #TEXT_EMPHASIS}
* <li>{@link #HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT}
* </ul>
*/
/** Media3 custom span implementations. */
@Documented
@Retention(RetentionPolicy.SOURCE)
@Target({TYPE_USE})
@IntDef({UNKNOWN, RUBY, TEXT_EMPHASIS, HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT})
@IntDef({UNKNOWN, RUBY, TEXT_EMPHASIS, HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT, VOICE})
private @interface CustomSpanType {}
private static final int UNKNOWN = -1;
@ -69,6 +60,8 @@ import java.util.ArrayList;
private static final int HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT = 3;
private static final int VOICE = 4;
private static final String FIELD_START_INDEX = Util.intToStringMaxRadix(0);
private static final String FIELD_END_INDEX = Util.intToStringMaxRadix(1);
private static final String FIELD_FLAGS = Util.intToStringMaxRadix(2);
@ -94,6 +87,11 @@ import java.util.ArrayList;
text, span, /* spanType= */ HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT, /* params= */ null);
bundledCustomSpans.add(bundle);
}
for (VoiceSpan span : text.getSpans(0, text.length(), VoiceSpan.class)) {
Bundle bundle =
spanToBundle(text, span, /* spanType= */ VOICE, /* params= */ span.toBundle());
bundledCustomSpans.add(bundle);
}
return bundledCustomSpans;
}
@ -113,6 +111,9 @@ import java.util.ArrayList;
case HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT:
text.setSpan(new HorizontalTextInVerticalContextSpan(), start, end, flags);
break;
case VOICE:
text.setSpan(VoiceSpan.fromBundle(checkNotNull(span)), start, end, flags);
break;
default:
break;
}

View File

@ -0,0 +1,52 @@
/*
* Copyright (C) 2024 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package androidx.media3.common.text;
import static androidx.media3.common.util.Assertions.checkNotNull;
import android.os.Bundle;
import androidx.media3.common.util.UnstableApi;
import androidx.media3.common.util.Util;
/**
* A span representing the speaker of the spanned text.
*
* <p>For example a <a href="https://www.w3.org/TR/webvtt1/#webvtt-cue-voice-span">WebVTT voice
* span</a>.
*/
@UnstableApi
public final class VoiceSpan {
/** The voice name. */
public final String name;
private static final String FIELD_NAME = Util.intToStringMaxRadix(0);
public VoiceSpan(String name) {
this.name = name;
}
public Bundle toBundle() {
Bundle bundle = new Bundle();
bundle.putString(FIELD_NAME, name);
return bundle;
}
public static VoiceSpan fromBundle(Bundle bundle) {
return new VoiceSpan(checkNotNull(bundle.getString(FIELD_NAME)));
}
}

View File

@ -42,6 +42,7 @@ import org.junit.runner.RunWith;
@RunWith(AndroidJUnit4.class)
public class CustomCueBundlerTest {
private static final VoiceSpan VOICE_SPAN = new VoiceSpan("name");
private static final RubySpan RUBY_SPAN =
new RubySpan("ruby text", TextAnnotation.POSITION_AFTER);
private static final TextEmphasisSpan TEXT_EMPHASIS_SPAN =
@ -55,7 +56,8 @@ public class CustomCueBundlerTest {
ImmutableMap.of(
RUBY_SPAN, new Pair<>(1, 2),
TEXT_EMPHASIS_SPAN, new Pair<>(2, 3),
HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN, new Pair<>(5, 7));
HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN, new Pair<>(5, 7),
VOICE_SPAN, new Pair<>(8, 10));
@Test
public void serializingSpannableWithAllCustomSpans() {
@ -92,6 +94,11 @@ public class CustomCueBundlerTest {
.hasHorizontalTextInVerticalContextSpanBetween(
ALL_SPANS_TO_START_END_INDEX.get(HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN).first,
ALL_SPANS_TO_START_END_INDEX.get(HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN).second);
SpannedSubject.assertThat(result)
.hasVoiceSpanBetween(
ALL_SPANS_TO_START_END_INDEX.get(VOICE_SPAN).first,
ALL_SPANS_TO_START_END_INDEX.get(VOICE_SPAN).second)
.withName(VOICE_SPAN.name);
}
@Test

View File

@ -43,6 +43,7 @@ import androidx.media3.common.text.HorizontalTextInVerticalContextSpan;
import androidx.media3.common.text.RubySpan;
import androidx.media3.common.text.SpanUtil;
import androidx.media3.common.text.TextAnnotation;
import androidx.media3.common.text.VoiceSpan;
import androidx.media3.common.util.Assertions;
import androidx.media3.common.util.Log;
import androidx.media3.common.util.ParsableByteArray;
@ -555,8 +556,10 @@ public final class WebvttCueParser {
case TAG_CLASS:
applyDefaultColors(text, startTag.classes, start, end);
break;
case TAG_LANG:
case TAG_VOICE:
applyVoiceSpan(text, startTag.voice, start, end);
break;
case TAG_LANG:
case "": // Case of the "whole cue" virtual tag.
break;
default:
@ -658,6 +661,11 @@ public final class WebvttCueParser {
}
}
private static void applyVoiceSpan(
SpannableStringBuilder text, String voice, int start, int end) {
text.setSpan(new VoiceSpan(voice), start, end, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
}
private static void applyStyleToText(
SpannableStringBuilder spannedText, WebvttCssStyle style, int start, int end) {
if (style == null) {

View File

@ -46,7 +46,7 @@ public final class WebvttCueParserTest {
public void parseStrictValidUnsupportedTagsStrippedOut() throws Exception {
Spanned text =
parseCueText(
"<v.first.loud Esme>This <unsupported>is</unsupported> text with "
"This <unsupported>is</unsupported> text with "
+ "<notsupp><invalid>html</invalid></notsupp> tags");
assertThat(text.toString()).isEqualTo("This is text with html tags");
@ -242,6 +242,45 @@ public final class WebvttCueParserTest {
assertThat(text.toString()).isEqualTo("&&&&&&&");
}
@Test
public void parseEmptyVoiceSpan() throws Exception {
Spanned text = parseCueText("<v>Text with a single voice span");
assertThat(text.toString()).isEqualTo("Text with a single voice span");
assertThat(text).hasVoiceSpanBetween(0, "Text with a single voice span".length()).withName("");
}
@Test
public void parseVoiceSpanWithName() throws Exception {
Spanned text = parseCueText("<v Esme>Text with a single voice span");
assertThat(text.toString()).isEqualTo("Text with a single voice span");
assertThat(text)
.hasVoiceSpanBetween(0, "Text with a single voice span".length())
.withName("Esme");
}
@Test
public void ignoreVoiceSpanClasses() throws Exception {
Spanned text = parseCueText("<v.first.loud Esme>Text with a single voice span");
assertThat(text.toString()).isEqualTo("Text with a single voice span");
assertThat(text)
.hasVoiceSpanBetween(0, "Text with a single voice span".length())
.withName("Esme");
}
@Test
public void parseMultipleVoiceSpans() throws Exception {
Spanned text = parseCueText("<v.loud Esme>Text with </v><v.quiet Mary>multiple voice spans");
assertThat(text.toString()).isEqualTo("Text with multiple voice spans");
assertThat(text).hasVoiceSpanBetween(0, "Text with ".length()).withName("Esme");
assertThat(text)
.hasVoiceSpanBetween("Text with ".length(), "Text with multiple voice spans".length())
.withName("Mary");
}
private static Spanned parseCueText(String string) {
return WebvttCueParser.parseCueText(
/* id= */ null, string, /* styles= */ Collections.emptyList());

View File

@ -42,6 +42,7 @@ import androidx.media3.common.text.HorizontalTextInVerticalContextSpan;
import androidx.media3.common.text.RubySpan;
import androidx.media3.common.text.TextAnnotation;
import androidx.media3.common.text.TextEmphasisSpan;
import androidx.media3.common.text.VoiceSpan;
import androidx.media3.common.util.NullableType;
import androidx.media3.common.util.UnstableApi;
import androidx.media3.common.util.Util;
@ -634,6 +635,42 @@ public final class SpannedSubject extends Subject {
hasNoSpansOfTypeBetween(HorizontalTextInVerticalContextSpan.class, start, end);
}
/**
* Checks that the subject has an {@link VoiceSpan} from {@code start} to {@code end}.
*
* @param start The start of the expected span.
* @param end The end of the expected span.
* @return A {@link VoiceSpan} object for optional additional assertions on the flags.
*/
public VoiceText hasVoiceSpanBetween(int start, int end) {
if (actual == null) {
failWithoutActual(simpleFact("Spanned must not be null"));
return ALREADY_FAILED_WITH_NAME_AND_CLASSES;
}
List<VoiceSpan> voiceSpans = findMatchingSpans(start, end, VoiceSpan.class);
if (voiceSpans.size() == 1) {
return check("VoiceSpan (start=%s,end=%s)", start, end)
.about(voiceSpanSubjects(actual))
.that(voiceSpans);
}
failWithExpectedSpan(start, end, VoiceSpan.class, actual.toString().substring(start, end));
return ALREADY_FAILED_WITH_NAME_AND_CLASSES;
}
/**
* Checks that the subject has no {@link VoiceSpan}s on any of the text between {@code start} and
* {@code end}.
*
* <p>This fails even if the start and end indexes don't exactly match.
*
* @param start The start index to start searching for spans.
* @param end The end index to stop searching for spans.
*/
public void hasNoVoiceSpanBetween(int start, int end) {
hasNoSpansOfTypeBetween(VoiceSpan.class, start, end);
}
/**
* Checks that the subject has no spans of type {@code spanClazz} on any of the text between
* {@code start} and {@code end}.
@ -1272,4 +1309,83 @@ public final class SpannedSubject extends Subject {
}
}
}
/** Allows assertions about a span's voice its position. */
public interface VoiceText {
/**
* Checks that at least one of the matched spans has the expected {@code name}.
*
* @param name The expected name of the voice.
* @return A {@link AndSpanFlags} object for optional additional assertions on the flags.
*/
AndSpanFlags withName(String name);
}
private static final VoiceText ALREADY_FAILED_WITH_NAME_AND_CLASSES =
(name) -> ALREADY_FAILED_AND_FLAGS;
private static Factory<VoiceSpanSubject, List<VoiceSpan>> voiceSpanSubjects(
Spanned actualSpanned) {
return (FailureMetadata metadata, @Nullable List<VoiceSpan> spans) ->
new VoiceSpanSubject(metadata, spans, actualSpanned);
}
private static final class VoiceSpanSubject extends Subject implements VoiceText {
@Nullable private final List<VoiceSpan> actualSpans;
private final Spanned actualSpanned;
private VoiceSpanSubject(
FailureMetadata metadata, @Nullable List<VoiceSpan> actualSpans, Spanned actualSpanned) {
super(metadata, actualSpans);
this.actualSpans = actualSpans;
this.actualSpanned = actualSpanned;
}
@Override
public AndSpanFlags withName(String name) {
List<Integer> matchingSpanFlags = new ArrayList<>();
List<Name> voiceName = new ArrayList<>();
for (VoiceSpan span : checkNotNull(actualSpans)) {
voiceName.add(new Name(span.name));
if (span.name.equals(name)) {
matchingSpanFlags.add(actualSpanned.getSpanFlags(span));
}
}
check("voiceName").that(voiceName).containsExactly(new Name(name));
return check("flags").about(spanFlags()).that(matchingSpanFlags);
}
private static final class Name {
private final String name;
private Name(String name) {
this.name = name;
}
@Override
public boolean equals(@Nullable Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
Name that = (Name) o;
return name.equals(that.name);
}
@Override
public int hashCode() {
return name.hashCode();
}
@Override
public String toString() {
return String.format("{name=%s}", name);
}
}
}
}

View File

@ -40,6 +40,7 @@ import androidx.media3.common.text.HorizontalTextInVerticalContextSpan;
import androidx.media3.common.text.RubySpan;
import androidx.media3.common.text.TextAnnotation;
import androidx.media3.common.text.TextEmphasisSpan;
import androidx.media3.common.text.VoiceSpan;
import androidx.media3.common.util.Util;
import androidx.media3.test.utils.truth.SpannedSubject.AndSpanFlags;
import androidx.media3.test.utils.truth.SpannedSubject.WithSpanFlags;
@ -902,6 +903,59 @@ public class SpannedSubjectTest {
SpannedSubject::hasNoHorizontalTextInVerticalContextSpanBetween);
}
@Test
public void voiceSpan_success() {
SpannableString spannable =
createSpannable(new VoiceSpan("speaker"), Spanned.SPAN_INCLUSIVE_EXCLUSIVE);
assertThat(spannable)
.hasVoiceSpanBetween(SPAN_START, SPAN_END)
.withName("speaker")
.andFlags(Spanned.SPAN_INCLUSIVE_EXCLUSIVE);
}
@Test
public void voiceSpan_wrongEndIndex() {
checkHasSpanFailsDueToIndexMismatch(
new VoiceSpan("speaker"), SpannedSubject::hasVoiceSpanBetween);
}
@Test
public void voiceSpan_wrongName() {
SpannableString spannable = createSpannable(new VoiceSpan("speaker"));
AssertionError expected =
expectFailure(
whenTesting ->
whenTesting
.that(spannable)
.hasVoiceSpanBetween(SPAN_START, SPAN_END)
.withName("different speaker"));
assertThat(expected).factValue("value of").contains("voiceName");
assertThat(expected).factValue("expected").contains("name=different speaker");
assertThat(expected).factValue("but was").contains("name=speaker");
}
@Test
public void voiceSpan_wrongFlags() {
checkHasSpanFailsDueToFlagMismatch(
new VoiceSpan("speaker"),
(subject, start, end) -> subject.hasVoiceSpanBetween(start, end).withName("speaker"));
}
@Test
public void noVoiceSpan_success() {
SpannableString spannable = createSpannableWithUnrelatedSpanAnd(new VoiceSpan("speaker"));
assertThat(spannable).hasNoVoiceSpanBetween(UNRELATED_SPAN_START, UNRELATED_SPAN_END);
}
@Test
public void noVoiceSpan_failure() {
checkHasNoSpanFails(new VoiceSpan("speaker"), SpannedSubject::hasNoVoiceSpanBetween);
}
private interface HasSpanFunction<T> {
T call(SpannedSubject s, int start, int end);
}