Merge pull request #1652 from MiSikora:ms/vtt-speaker
PiperOrigin-RevId: 668976037
This commit is contained in:
commit
39ed9cf88d
@ -67,6 +67,9 @@
|
||||
to remove a previously set `Surface` if the codec supports this
|
||||
(`MediaCodecInfo.detachedSurfaceSupported`).
|
||||
* Text:
|
||||
* Add a custom `VoiceSpan` and populate it for
|
||||
[WebVTT voice spans](https://www.w3.org/TR/webvtt1/#webvtt-cue-voice-span)
|
||||
([#1632](https://github.com/androidx/media/issues/1632)).
|
||||
* Metadata:
|
||||
* Image:
|
||||
* Add `ExternallyLoadedImageDecoder` for simplified integration with
|
||||
|
@ -45,20 +45,11 @@ import java.util.ArrayList;
|
||||
*/
|
||||
/* package */ final class CustomSpanBundler {
|
||||
|
||||
/**
|
||||
* Media3 custom span implementations. One of the following:
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@link #UNKNOWN}
|
||||
* <li>{@link #RUBY}
|
||||
* <li>{@link #TEXT_EMPHASIS}
|
||||
* <li>{@link #HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT}
|
||||
* </ul>
|
||||
*/
|
||||
/** Media3 custom span implementations. */
|
||||
@Documented
|
||||
@Retention(RetentionPolicy.SOURCE)
|
||||
@Target({TYPE_USE})
|
||||
@IntDef({UNKNOWN, RUBY, TEXT_EMPHASIS, HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT})
|
||||
@IntDef({UNKNOWN, RUBY, TEXT_EMPHASIS, HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT, VOICE})
|
||||
private @interface CustomSpanType {}
|
||||
|
||||
private static final int UNKNOWN = -1;
|
||||
@ -69,6 +60,8 @@ import java.util.ArrayList;
|
||||
|
||||
private static final int HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT = 3;
|
||||
|
||||
private static final int VOICE = 4;
|
||||
|
||||
private static final String FIELD_START_INDEX = Util.intToStringMaxRadix(0);
|
||||
private static final String FIELD_END_INDEX = Util.intToStringMaxRadix(1);
|
||||
private static final String FIELD_FLAGS = Util.intToStringMaxRadix(2);
|
||||
@ -94,6 +87,11 @@ import java.util.ArrayList;
|
||||
text, span, /* spanType= */ HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT, /* params= */ null);
|
||||
bundledCustomSpans.add(bundle);
|
||||
}
|
||||
for (VoiceSpan span : text.getSpans(0, text.length(), VoiceSpan.class)) {
|
||||
Bundle bundle =
|
||||
spanToBundle(text, span, /* spanType= */ VOICE, /* params= */ span.toBundle());
|
||||
bundledCustomSpans.add(bundle);
|
||||
}
|
||||
return bundledCustomSpans;
|
||||
}
|
||||
|
||||
@ -113,6 +111,9 @@ import java.util.ArrayList;
|
||||
case HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT:
|
||||
text.setSpan(new HorizontalTextInVerticalContextSpan(), start, end, flags);
|
||||
break;
|
||||
case VOICE:
|
||||
text.setSpan(VoiceSpan.fromBundle(checkNotNull(span)), start, end, flags);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright (C) 2024 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
package androidx.media3.common.text;
|
||||
|
||||
import static androidx.media3.common.util.Assertions.checkNotNull;
|
||||
|
||||
import android.os.Bundle;
|
||||
import androidx.media3.common.util.UnstableApi;
|
||||
import androidx.media3.common.util.Util;
|
||||
|
||||
/**
|
||||
* A span representing the speaker of the spanned text.
|
||||
*
|
||||
* <p>For example a <a href="https://www.w3.org/TR/webvtt1/#webvtt-cue-voice-span">WebVTT voice
|
||||
* span</a>.
|
||||
*/
|
||||
@UnstableApi
|
||||
public final class VoiceSpan {
|
||||
|
||||
/** The voice name. */
|
||||
public final String name;
|
||||
|
||||
private static final String FIELD_NAME = Util.intToStringMaxRadix(0);
|
||||
|
||||
public VoiceSpan(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public Bundle toBundle() {
|
||||
Bundle bundle = new Bundle();
|
||||
bundle.putString(FIELD_NAME, name);
|
||||
return bundle;
|
||||
}
|
||||
|
||||
public static VoiceSpan fromBundle(Bundle bundle) {
|
||||
return new VoiceSpan(checkNotNull(bundle.getString(FIELD_NAME)));
|
||||
}
|
||||
}
|
@ -42,6 +42,7 @@ import org.junit.runner.RunWith;
|
||||
@RunWith(AndroidJUnit4.class)
|
||||
public class CustomCueBundlerTest {
|
||||
|
||||
private static final VoiceSpan VOICE_SPAN = new VoiceSpan("name");
|
||||
private static final RubySpan RUBY_SPAN =
|
||||
new RubySpan("ruby text", TextAnnotation.POSITION_AFTER);
|
||||
private static final TextEmphasisSpan TEXT_EMPHASIS_SPAN =
|
||||
@ -55,7 +56,8 @@ public class CustomCueBundlerTest {
|
||||
ImmutableMap.of(
|
||||
RUBY_SPAN, new Pair<>(1, 2),
|
||||
TEXT_EMPHASIS_SPAN, new Pair<>(2, 3),
|
||||
HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN, new Pair<>(5, 7));
|
||||
HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN, new Pair<>(5, 7),
|
||||
VOICE_SPAN, new Pair<>(8, 10));
|
||||
|
||||
@Test
|
||||
public void serializingSpannableWithAllCustomSpans() {
|
||||
@ -92,6 +94,11 @@ public class CustomCueBundlerTest {
|
||||
.hasHorizontalTextInVerticalContextSpanBetween(
|
||||
ALL_SPANS_TO_START_END_INDEX.get(HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN).first,
|
||||
ALL_SPANS_TO_START_END_INDEX.get(HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN).second);
|
||||
SpannedSubject.assertThat(result)
|
||||
.hasVoiceSpanBetween(
|
||||
ALL_SPANS_TO_START_END_INDEX.get(VOICE_SPAN).first,
|
||||
ALL_SPANS_TO_START_END_INDEX.get(VOICE_SPAN).second)
|
||||
.withName(VOICE_SPAN.name);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -43,6 +43,7 @@ import androidx.media3.common.text.HorizontalTextInVerticalContextSpan;
|
||||
import androidx.media3.common.text.RubySpan;
|
||||
import androidx.media3.common.text.SpanUtil;
|
||||
import androidx.media3.common.text.TextAnnotation;
|
||||
import androidx.media3.common.text.VoiceSpan;
|
||||
import androidx.media3.common.util.Assertions;
|
||||
import androidx.media3.common.util.Log;
|
||||
import androidx.media3.common.util.ParsableByteArray;
|
||||
@ -555,8 +556,10 @@ public final class WebvttCueParser {
|
||||
case TAG_CLASS:
|
||||
applyDefaultColors(text, startTag.classes, start, end);
|
||||
break;
|
||||
case TAG_LANG:
|
||||
case TAG_VOICE:
|
||||
applyVoiceSpan(text, startTag.voice, start, end);
|
||||
break;
|
||||
case TAG_LANG:
|
||||
case "": // Case of the "whole cue" virtual tag.
|
||||
break;
|
||||
default:
|
||||
@ -658,6 +661,11 @@ public final class WebvttCueParser {
|
||||
}
|
||||
}
|
||||
|
||||
private static void applyVoiceSpan(
|
||||
SpannableStringBuilder text, String voice, int start, int end) {
|
||||
text.setSpan(new VoiceSpan(voice), start, end, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
|
||||
}
|
||||
|
||||
private static void applyStyleToText(
|
||||
SpannableStringBuilder spannedText, WebvttCssStyle style, int start, int end) {
|
||||
if (style == null) {
|
||||
|
@ -46,7 +46,7 @@ public final class WebvttCueParserTest {
|
||||
public void parseStrictValidUnsupportedTagsStrippedOut() throws Exception {
|
||||
Spanned text =
|
||||
parseCueText(
|
||||
"<v.first.loud Esme>This <unsupported>is</unsupported> text with "
|
||||
"This <unsupported>is</unsupported> text with "
|
||||
+ "<notsupp><invalid>html</invalid></notsupp> tags");
|
||||
|
||||
assertThat(text.toString()).isEqualTo("This is text with html tags");
|
||||
@ -242,6 +242,45 @@ public final class WebvttCueParserTest {
|
||||
assertThat(text.toString()).isEqualTo("&&&&&&&");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseEmptyVoiceSpan() throws Exception {
|
||||
Spanned text = parseCueText("<v>Text with a single voice span");
|
||||
|
||||
assertThat(text.toString()).isEqualTo("Text with a single voice span");
|
||||
assertThat(text).hasVoiceSpanBetween(0, "Text with a single voice span".length()).withName("");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseVoiceSpanWithName() throws Exception {
|
||||
Spanned text = parseCueText("<v Esme>Text with a single voice span");
|
||||
|
||||
assertThat(text.toString()).isEqualTo("Text with a single voice span");
|
||||
assertThat(text)
|
||||
.hasVoiceSpanBetween(0, "Text with a single voice span".length())
|
||||
.withName("Esme");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void ignoreVoiceSpanClasses() throws Exception {
|
||||
Spanned text = parseCueText("<v.first.loud Esme>Text with a single voice span");
|
||||
|
||||
assertThat(text.toString()).isEqualTo("Text with a single voice span");
|
||||
assertThat(text)
|
||||
.hasVoiceSpanBetween(0, "Text with a single voice span".length())
|
||||
.withName("Esme");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseMultipleVoiceSpans() throws Exception {
|
||||
Spanned text = parseCueText("<v.loud Esme>Text with </v><v.quiet Mary>multiple voice spans");
|
||||
|
||||
assertThat(text.toString()).isEqualTo("Text with multiple voice spans");
|
||||
assertThat(text).hasVoiceSpanBetween(0, "Text with ".length()).withName("Esme");
|
||||
assertThat(text)
|
||||
.hasVoiceSpanBetween("Text with ".length(), "Text with multiple voice spans".length())
|
||||
.withName("Mary");
|
||||
}
|
||||
|
||||
private static Spanned parseCueText(String string) {
|
||||
return WebvttCueParser.parseCueText(
|
||||
/* id= */ null, string, /* styles= */ Collections.emptyList());
|
||||
|
@ -42,6 +42,7 @@ import androidx.media3.common.text.HorizontalTextInVerticalContextSpan;
|
||||
import androidx.media3.common.text.RubySpan;
|
||||
import androidx.media3.common.text.TextAnnotation;
|
||||
import androidx.media3.common.text.TextEmphasisSpan;
|
||||
import androidx.media3.common.text.VoiceSpan;
|
||||
import androidx.media3.common.util.NullableType;
|
||||
import androidx.media3.common.util.UnstableApi;
|
||||
import androidx.media3.common.util.Util;
|
||||
@ -634,6 +635,42 @@ public final class SpannedSubject extends Subject {
|
||||
hasNoSpansOfTypeBetween(HorizontalTextInVerticalContextSpan.class, start, end);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks that the subject has an {@link VoiceSpan} from {@code start} to {@code end}.
|
||||
*
|
||||
* @param start The start of the expected span.
|
||||
* @param end The end of the expected span.
|
||||
* @return A {@link VoiceSpan} object for optional additional assertions on the flags.
|
||||
*/
|
||||
public VoiceText hasVoiceSpanBetween(int start, int end) {
|
||||
if (actual == null) {
|
||||
failWithoutActual(simpleFact("Spanned must not be null"));
|
||||
return ALREADY_FAILED_WITH_NAME_AND_CLASSES;
|
||||
}
|
||||
|
||||
List<VoiceSpan> voiceSpans = findMatchingSpans(start, end, VoiceSpan.class);
|
||||
if (voiceSpans.size() == 1) {
|
||||
return check("VoiceSpan (start=%s,end=%s)", start, end)
|
||||
.about(voiceSpanSubjects(actual))
|
||||
.that(voiceSpans);
|
||||
}
|
||||
failWithExpectedSpan(start, end, VoiceSpan.class, actual.toString().substring(start, end));
|
||||
return ALREADY_FAILED_WITH_NAME_AND_CLASSES;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks that the subject has no {@link VoiceSpan}s on any of the text between {@code start} and
|
||||
* {@code end}.
|
||||
*
|
||||
* <p>This fails even if the start and end indexes don't exactly match.
|
||||
*
|
||||
* @param start The start index to start searching for spans.
|
||||
* @param end The end index to stop searching for spans.
|
||||
*/
|
||||
public void hasNoVoiceSpanBetween(int start, int end) {
|
||||
hasNoSpansOfTypeBetween(VoiceSpan.class, start, end);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks that the subject has no spans of type {@code spanClazz} on any of the text between
|
||||
* {@code start} and {@code end}.
|
||||
@ -1272,4 +1309,83 @@ public final class SpannedSubject extends Subject {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Allows assertions about a span's voice its position. */
|
||||
public interface VoiceText {
|
||||
/**
|
||||
* Checks that at least one of the matched spans has the expected {@code name}.
|
||||
*
|
||||
* @param name The expected name of the voice.
|
||||
* @return A {@link AndSpanFlags} object for optional additional assertions on the flags.
|
||||
*/
|
||||
AndSpanFlags withName(String name);
|
||||
}
|
||||
|
||||
private static final VoiceText ALREADY_FAILED_WITH_NAME_AND_CLASSES =
|
||||
(name) -> ALREADY_FAILED_AND_FLAGS;
|
||||
|
||||
private static Factory<VoiceSpanSubject, List<VoiceSpan>> voiceSpanSubjects(
|
||||
Spanned actualSpanned) {
|
||||
return (FailureMetadata metadata, @Nullable List<VoiceSpan> spans) ->
|
||||
new VoiceSpanSubject(metadata, spans, actualSpanned);
|
||||
}
|
||||
|
||||
private static final class VoiceSpanSubject extends Subject implements VoiceText {
|
||||
|
||||
@Nullable private final List<VoiceSpan> actualSpans;
|
||||
private final Spanned actualSpanned;
|
||||
|
||||
private VoiceSpanSubject(
|
||||
FailureMetadata metadata, @Nullable List<VoiceSpan> actualSpans, Spanned actualSpanned) {
|
||||
super(metadata, actualSpans);
|
||||
this.actualSpans = actualSpans;
|
||||
this.actualSpanned = actualSpanned;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AndSpanFlags withName(String name) {
|
||||
List<Integer> matchingSpanFlags = new ArrayList<>();
|
||||
List<Name> voiceName = new ArrayList<>();
|
||||
for (VoiceSpan span : checkNotNull(actualSpans)) {
|
||||
voiceName.add(new Name(span.name));
|
||||
if (span.name.equals(name)) {
|
||||
matchingSpanFlags.add(actualSpanned.getSpanFlags(span));
|
||||
}
|
||||
}
|
||||
check("voiceName").that(voiceName).containsExactly(new Name(name));
|
||||
return check("flags").about(spanFlags()).that(matchingSpanFlags);
|
||||
}
|
||||
|
||||
private static final class Name {
|
||||
|
||||
private final String name;
|
||||
|
||||
private Name(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(@Nullable Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Name that = (Name) o;
|
||||
return name.equals(that.name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return name.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("{name=%s}", name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -40,6 +40,7 @@ import androidx.media3.common.text.HorizontalTextInVerticalContextSpan;
|
||||
import androidx.media3.common.text.RubySpan;
|
||||
import androidx.media3.common.text.TextAnnotation;
|
||||
import androidx.media3.common.text.TextEmphasisSpan;
|
||||
import androidx.media3.common.text.VoiceSpan;
|
||||
import androidx.media3.common.util.Util;
|
||||
import androidx.media3.test.utils.truth.SpannedSubject.AndSpanFlags;
|
||||
import androidx.media3.test.utils.truth.SpannedSubject.WithSpanFlags;
|
||||
@ -902,6 +903,59 @@ public class SpannedSubjectTest {
|
||||
SpannedSubject::hasNoHorizontalTextInVerticalContextSpanBetween);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void voiceSpan_success() {
|
||||
SpannableString spannable =
|
||||
createSpannable(new VoiceSpan("speaker"), Spanned.SPAN_INCLUSIVE_EXCLUSIVE);
|
||||
|
||||
assertThat(spannable)
|
||||
.hasVoiceSpanBetween(SPAN_START, SPAN_END)
|
||||
.withName("speaker")
|
||||
.andFlags(Spanned.SPAN_INCLUSIVE_EXCLUSIVE);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void voiceSpan_wrongEndIndex() {
|
||||
checkHasSpanFailsDueToIndexMismatch(
|
||||
new VoiceSpan("speaker"), SpannedSubject::hasVoiceSpanBetween);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void voiceSpan_wrongName() {
|
||||
SpannableString spannable = createSpannable(new VoiceSpan("speaker"));
|
||||
|
||||
AssertionError expected =
|
||||
expectFailure(
|
||||
whenTesting ->
|
||||
whenTesting
|
||||
.that(spannable)
|
||||
.hasVoiceSpanBetween(SPAN_START, SPAN_END)
|
||||
.withName("different speaker"));
|
||||
|
||||
assertThat(expected).factValue("value of").contains("voiceName");
|
||||
assertThat(expected).factValue("expected").contains("name=different speaker");
|
||||
assertThat(expected).factValue("but was").contains("name=speaker");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void voiceSpan_wrongFlags() {
|
||||
checkHasSpanFailsDueToFlagMismatch(
|
||||
new VoiceSpan("speaker"),
|
||||
(subject, start, end) -> subject.hasVoiceSpanBetween(start, end).withName("speaker"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void noVoiceSpan_success() {
|
||||
SpannableString spannable = createSpannableWithUnrelatedSpanAnd(new VoiceSpan("speaker"));
|
||||
|
||||
assertThat(spannable).hasNoVoiceSpanBetween(UNRELATED_SPAN_START, UNRELATED_SPAN_END);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void noVoiceSpan_failure() {
|
||||
checkHasNoSpanFails(new VoiceSpan("speaker"), SpannedSubject::hasNoVoiceSpanBetween);
|
||||
}
|
||||
|
||||
private interface HasSpanFunction<T> {
|
||||
T call(SpannedSubject s, int start, int end);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user