Add UTF-16 encoded subtitle support to SsaDecoder

Issue: androidx/media#319
PiperOrigin-RevId: 527891646
This commit is contained in:
michaelkatz 2023-04-28 16:31:11 +01:00 committed by Marc Baechinger
parent 336d4b386f
commit 06ac2f7990
6 changed files with 149 additions and 34 deletions

View File

@ -121,6 +121,9 @@
* IMA DAI extension: * IMA DAI extension:
* Fix a bug where a new ad group is inserted in live streams because the * Fix a bug where a new ad group is inserted in live streams because the
calculated content position in consecutive timelines varies slightly. calculated content position in consecutive timelines varies slightly.
* Text:
* SSA: Add support for UTF-16 files if they start with a byte order mark
([#319](https://github.com/androidx/media/issues/319)).
* Remove deprecated symbols: * Remove deprecated symbols:
* Remove `DefaultAudioSink` constructors, use `DefaultAudioSink.Builder` * Remove `DefaultAudioSink` constructors, use `DefaultAudioSink.Builder`
instead. instead.

View File

@ -233,11 +233,28 @@ public final class ParsableByteArray {
return (data[position] & 0xFF); return (data[position] & 0xFF);
} }
/** Peeks at the next char. */ /**
* Peeks at the next char.
*
* <p>Equivalent to passing {@link Charsets#UTF_16} or {@link Charsets#UTF_16BE} to {@link
* #peekChar(Charset)}.
*/
public char peekChar() { public char peekChar() {
return (char) ((data[position] & 0xFF) << 8 | (data[position + 1] & 0xFF)); return (char) ((data[position] & 0xFF) << 8 | (data[position + 1] & 0xFF));
} }
/**
* Peeks at the next char (as decoded by {@code charset})
*
* @throws IllegalArgumentException if charset is not supported. Only US_ASCII, UTF-8, UTF-16,
* UTF-16BE, and UTF-16LE are supported.
*/
public char peekChar(Charset charset) {
Assertions.checkArgument(
SUPPORTED_CHARSETS_FOR_READLINE.contains(charset), "Unsupported charset: " + charset);
return (char) (peekCharacterAndSize(charset) >> Short.SIZE);
}
/** Reads the next byte as an unsigned value. */ /** Reads the next byte as an unsigned value. */
public int readUnsignedByte() { public int readUnsignedByte() {
return (data[position++] & 0xFF); return (data[position++] & 0xFF);
@ -649,27 +666,42 @@ public final class ParsableByteArray {
* UTF-8 and two bytes for UTF-16). * UTF-8 and two bytes for UTF-16).
*/ */
private char readCharacterIfInList(Charset charset, char[] chars) { private char readCharacterIfInList(Charset charset, char[] chars) {
char character; int characterAndSize = peekCharacterAndSize(charset);
int characterSize;
if (characterAndSize != 0 && Chars.contains(chars, (char) (characterAndSize >> Short.SIZE))) {
position += characterAndSize & 0xFFFF;
return (char) (characterAndSize >> Short.SIZE);
} else {
return 0;
}
}
/**
* Peeks at the character at {@link #position} (as decoded by {@code charset}), returns it and the
* number of bytes the character takes up within the array packed into an int. First four bytes
* are the character and the second four is the size in bytes it takes. Returns 0 if {@link
* #bytesLeft()} doesn't allow reading a whole character in {@code charset} or if the {@code
* charset} is not one of US_ASCII, UTF-8, UTF-16, UTF-16BE, or UTF-16LE.
*
* <p>Only supports characters that occupy a single code unit (i.e. one byte for UTF-8 and two
* bytes for UTF-16).
*/
private int peekCharacterAndSize(Charset charset) {
byte character;
short characterSize;
if ((charset.equals(Charsets.UTF_8) || charset.equals(Charsets.US_ASCII)) && bytesLeft() >= 1) { if ((charset.equals(Charsets.UTF_8) || charset.equals(Charsets.US_ASCII)) && bytesLeft() >= 1) {
character = Chars.checkedCast(UnsignedBytes.toInt(data[position])); character = (byte) Chars.checkedCast(UnsignedBytes.toInt(data[position]));
characterSize = 1; characterSize = 1;
} else if ((charset.equals(Charsets.UTF_16) || charset.equals(Charsets.UTF_16BE)) } else if ((charset.equals(Charsets.UTF_16) || charset.equals(Charsets.UTF_16BE))
&& bytesLeft() >= 2) { && bytesLeft() >= 2) {
character = Chars.fromBytes(data[position], data[position + 1]); character = (byte) Chars.fromBytes(data[position], data[position + 1]);
characterSize = 2; characterSize = 2;
} else if (charset.equals(Charsets.UTF_16LE) && bytesLeft() >= 2) { } else if (charset.equals(Charsets.UTF_16LE) && bytesLeft() >= 2) {
character = Chars.fromBytes(data[position + 1], data[position]); character = (byte) Chars.fromBytes(data[position + 1], data[position]);
characterSize = 2; characterSize = 2;
} else { } else {
return 0; return 0;
} }
return (Chars.checkedCast(character) << Short.SIZE) + characterSize;
if (Chars.contains(chars, character)) {
position += characterSize;
return Chars.checkedCast(character);
} else {
return 0;
}
} }
} }

View File

@ -37,6 +37,8 @@ import androidx.media3.common.util.Util;
import androidx.media3.extractor.text.SimpleSubtitleDecoder; import androidx.media3.extractor.text.SimpleSubtitleDecoder;
import androidx.media3.extractor.text.Subtitle; import androidx.media3.extractor.text.Subtitle;
import com.google.common.base.Ascii; import com.google.common.base.Ascii;
import com.google.common.base.Charsets;
import java.nio.charset.Charset;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
@ -98,11 +100,14 @@ public final class SsaDecoder extends SimpleSubtitleDecoder {
if (initializationData != null && !initializationData.isEmpty()) { if (initializationData != null && !initializationData.isEmpty()) {
haveInitializationData = true; haveInitializationData = true;
// Currently, construction with initialization data is only relevant to SSA subtitles muxed
// in a MKV. According to https://www.matroska.org/technical/subtitles.html, these muxed
// subtitles are always encoded in UTF-8.
String formatLine = Util.fromUtf8Bytes(initializationData.get(0)); String formatLine = Util.fromUtf8Bytes(initializationData.get(0));
Assertions.checkArgument(formatLine.startsWith(FORMAT_LINE_PREFIX)); Assertions.checkArgument(formatLine.startsWith(FORMAT_LINE_PREFIX));
dialogueFormatFromInitializationData = dialogueFormatFromInitializationData =
Assertions.checkNotNull(SsaDialogueFormat.fromFormatLine(formatLine)); Assertions.checkNotNull(SsaDialogueFormat.fromFormatLine(formatLine));
parseHeader(new ParsableByteArray(initializationData.get(1))); parseHeader(new ParsableByteArray(initializationData.get(1)), Charsets.UTF_8);
} else { } else {
haveInitializationData = false; haveInitializationData = false;
dialogueFormatFromInitializationData = null; dialogueFormatFromInitializationData = null;
@ -115,25 +120,37 @@ public final class SsaDecoder extends SimpleSubtitleDecoder {
List<Long> cueTimesUs = new ArrayList<>(); List<Long> cueTimesUs = new ArrayList<>();
ParsableByteArray parsableData = new ParsableByteArray(data, length); ParsableByteArray parsableData = new ParsableByteArray(data, length);
Charset charset = detectUtfCharset(parsableData);
if (!haveInitializationData) { if (!haveInitializationData) {
parseHeader(parsableData); parseHeader(parsableData, charset);
} }
parseEventBody(parsableData, cues, cueTimesUs); parseEventBody(parsableData, cues, cueTimesUs, charset);
return new SsaSubtitle(cues, cueTimesUs); return new SsaSubtitle(cues, cueTimesUs);
} }
/**
* Determine UTF encoding of the byte array from a byte order mark (BOM), defaulting to UTF-8 if
* no BOM is found.
*/
private Charset detectUtfCharset(ParsableByteArray data) {
@Nullable Charset charset = data.readUtfCharsetFromBom();
return charset != null ? charset : Charsets.UTF_8;
}
/** /**
* Parses the header of the subtitle. * Parses the header of the subtitle.
* *
* @param data A {@link ParsableByteArray} from which the header should be read. * @param data A {@link ParsableByteArray} from which the header should be read.
* @param charset The {@code Charset} of the encoding of {@code data}.
*/ */
private void parseHeader(ParsableByteArray data) { private void parseHeader(ParsableByteArray data, Charset charset) {
@Nullable String currentLine; @Nullable String currentLine;
while ((currentLine = data.readLine()) != null) { while ((currentLine = data.readLine(charset)) != null) {
if ("[Script Info]".equalsIgnoreCase(currentLine)) { if ("[Script Info]".equalsIgnoreCase(currentLine)) {
parseScriptInfo(data); parseScriptInfo(data, charset);
} else if ("[V4+ Styles]".equalsIgnoreCase(currentLine)) { } else if ("[V4+ Styles]".equalsIgnoreCase(currentLine)) {
styles = parseStyles(data); styles = parseStyles(data, charset);
} else if ("[V4 Styles]".equalsIgnoreCase(currentLine)) { } else if ("[V4 Styles]".equalsIgnoreCase(currentLine)) {
Log.i(TAG, "[V4 Styles] are not supported"); Log.i(TAG, "[V4 Styles] are not supported");
} else if ("[Events]".equalsIgnoreCase(currentLine)) { } else if ("[Events]".equalsIgnoreCase(currentLine)) {
@ -151,11 +168,12 @@ public final class SsaDecoder extends SimpleSubtitleDecoder {
* *
* @param data A {@link ParsableByteArray} with {@link ParsableByteArray#getPosition() position} * @param data A {@link ParsableByteArray} with {@link ParsableByteArray#getPosition() position}
* set to the beginning of the first line after {@code [Script Info]}. * set to the beginning of the first line after {@code [Script Info]}.
* @param charset The {@code Charset} of the encoding of {@code data}.
*/ */
private void parseScriptInfo(ParsableByteArray data) { private void parseScriptInfo(ParsableByteArray data, Charset charset) {
@Nullable String currentLine; @Nullable String currentLine;
while ((currentLine = data.readLine()) != null while ((currentLine = data.readLine(charset)) != null
&& (data.bytesLeft() == 0 || data.peekUnsignedByte() != '[')) { && (data.bytesLeft() == 0 || data.peekChar(charset) != '[')) {
String[] infoNameAndValue = currentLine.split(":"); String[] infoNameAndValue = currentLine.split(":");
if (infoNameAndValue.length != 2) { if (infoNameAndValue.length != 2) {
continue; continue;
@ -187,13 +205,14 @@ public final class SsaDecoder extends SimpleSubtitleDecoder {
* *
* @param data A {@link ParsableByteArray} with {@link ParsableByteArray#getPosition()} pointing * @param data A {@link ParsableByteArray} with {@link ParsableByteArray#getPosition()} pointing
* at the beginning of the first line after {@code [V4+ Styles]}. * at the beginning of the first line after {@code [V4+ Styles]}.
* @param charset The {@code Charset} of the encoding of {@code data}.
*/ */
private static Map<String, SsaStyle> parseStyles(ParsableByteArray data) { private static Map<String, SsaStyle> parseStyles(ParsableByteArray data, Charset charset) {
Map<String, SsaStyle> styles = new LinkedHashMap<>(); Map<String, SsaStyle> styles = new LinkedHashMap<>();
@Nullable SsaStyle.Format formatInfo = null; @Nullable SsaStyle.Format formatInfo = null;
@Nullable String currentLine; @Nullable String currentLine;
while ((currentLine = data.readLine()) != null while ((currentLine = data.readLine(charset)) != null
&& (data.bytesLeft() == 0 || data.peekUnsignedByte() != '[')) { && (data.bytesLeft() == 0 || data.peekChar(charset) != '[')) {
if (currentLine.startsWith(FORMAT_LINE_PREFIX)) { if (currentLine.startsWith(FORMAT_LINE_PREFIX)) {
formatInfo = SsaStyle.Format.fromFormatLine(currentLine); formatInfo = SsaStyle.Format.fromFormatLine(currentLine);
} else if (currentLine.startsWith(STYLE_LINE_PREFIX)) { } else if (currentLine.startsWith(STYLE_LINE_PREFIX)) {
@ -216,12 +235,14 @@ public final class SsaDecoder extends SimpleSubtitleDecoder {
* @param data A {@link ParsableByteArray} from which the body should be read. * @param data A {@link ParsableByteArray} from which the body should be read.
* @param cues A list to which parsed cues will be added. * @param cues A list to which parsed cues will be added.
* @param cueTimesUs A sorted list to which parsed cue timestamps will be added. * @param cueTimesUs A sorted list to which parsed cue timestamps will be added.
* @param charset The {@code Charset} of the encoding of {@code data}.
*/ */
private void parseEventBody(ParsableByteArray data, List<List<Cue>> cues, List<Long> cueTimesUs) { private void parseEventBody(
ParsableByteArray data, List<List<Cue>> cues, List<Long> cueTimesUs, Charset charset) {
@Nullable @Nullable
SsaDialogueFormat format = haveInitializationData ? dialogueFormatFromInitializationData : null; SsaDialogueFormat format = haveInitializationData ? dialogueFormatFromInitializationData : null;
@Nullable String currentLine; @Nullable String currentLine;
while ((currentLine = data.readLine()) != null) { while ((currentLine = data.readLine(charset)) != null) {
if (currentLine.startsWith(FORMAT_LINE_PREFIX)) { if (currentLine.startsWith(FORMAT_LINE_PREFIX)) {
format = SsaDialogueFormat.fromFormatLine(currentLine); format = SsaDialogueFormat.fromFormatLine(currentLine);
} else if (currentLine.startsWith(DIALOGUE_LINE_PREFIX)) { } else if (currentLine.startsWith(DIALOGUE_LINE_PREFIX)) {

View File

@ -30,6 +30,7 @@ import androidx.test.ext.junit.runners.AndroidJUnit4;
import com.google.common.collect.Iterables; import com.google.common.collect.Iterables;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Objects;
import org.junit.Test; import org.junit.Test;
import org.junit.runner.RunWith; import org.junit.runner.RunWith;
@ -43,6 +44,8 @@ public final class SsaDecoderTest {
private static final String TYPICAL_HEADER_ONLY = "media/ssa/typical_header"; private static final String TYPICAL_HEADER_ONLY = "media/ssa/typical_header";
private static final String TYPICAL_DIALOGUE_ONLY = "media/ssa/typical_dialogue"; private static final String TYPICAL_DIALOGUE_ONLY = "media/ssa/typical_dialogue";
private static final String TYPICAL_FORMAT_ONLY = "media/ssa/typical_format"; private static final String TYPICAL_FORMAT_ONLY = "media/ssa/typical_format";
private static final String TYPICAL_UTF16LE = "media/ssa/typical_utf16le";
private static final String TYPICAL_UTF16BE = "media/ssa/typical_utf16be";
private static final String OVERLAPPING_TIMECODES = "media/ssa/overlapping_timecodes"; private static final String OVERLAPPING_TIMECODES = "media/ssa/overlapping_timecodes";
private static final String POSITIONS = "media/ssa/positioning"; private static final String POSITIONS = "media/ssa/positioning";
private static final String INVALID_TIMECODES = "media/ssa/invalid_timecodes"; private static final String INVALID_TIMECODES = "media/ssa/invalid_timecodes";
@ -130,6 +133,58 @@ public final class SsaDecoderTest {
assertTypicalCue3(subtitle, 4); assertTypicalCue3(subtitle, 4);
} }
@Test
public void decodeTypicalUtf16le() throws IOException {
SsaDecoder decoder = new SsaDecoder();
byte[] bytes =
TestUtil.getByteArray(ApplicationProvider.getApplicationContext(), TYPICAL_UTF16LE);
Subtitle subtitle = decoder.decode(bytes, bytes.length, false);
assertThat(subtitle.getEventTimeCount()).isEqualTo(6);
// Check position, line, anchors & alignment are set from Alignment Style (2 - bottom-center).
Cue firstCue = subtitle.getCues(subtitle.getEventTime(0)).get(0);
assertWithMessage("Cue.textAlignment")
.that(firstCue.textAlignment)
.isEqualTo(Layout.Alignment.ALIGN_CENTER);
assertWithMessage("Cue.positionAnchor")
.that(firstCue.positionAnchor)
.isEqualTo(Cue.ANCHOR_TYPE_MIDDLE);
assertThat(firstCue.position).isEqualTo(0.5f);
assertThat(firstCue.lineAnchor).isEqualTo(Cue.ANCHOR_TYPE_END);
assertThat(firstCue.lineType).isEqualTo(Cue.LINE_TYPE_FRACTION);
assertThat(firstCue.line).isEqualTo(0.95f);
assertTypicalCue1(subtitle, 0);
assertTypicalCue2(subtitle, 2);
assertTypicalCue3(subtitle, 4);
}
@Test
public void decodeTypicalUtf16be() throws IOException {
SsaDecoder decoder = new SsaDecoder();
byte[] bytes =
TestUtil.getByteArray(ApplicationProvider.getApplicationContext(), TYPICAL_UTF16BE);
Subtitle subtitle = decoder.decode(bytes, bytes.length, false);
assertThat(subtitle.getEventTimeCount()).isEqualTo(6);
// Check position, line, anchors & alignment are set from Alignment Style (2 - bottom-center).
Cue firstCue = subtitle.getCues(subtitle.getEventTime(0)).get(0);
assertWithMessage("Cue.textAlignment")
.that(firstCue.textAlignment)
.isEqualTo(Layout.Alignment.ALIGN_CENTER);
assertWithMessage("Cue.positionAnchor")
.that(firstCue.positionAnchor)
.isEqualTo(Cue.ANCHOR_TYPE_MIDDLE);
assertThat(firstCue.position).isEqualTo(0.5f);
assertThat(firstCue.lineAnchor).isEqualTo(Cue.ANCHOR_TYPE_END);
assertThat(firstCue.lineType).isEqualTo(Cue.LINE_TYPE_FRACTION);
assertThat(firstCue.line).isEqualTo(0.95f);
assertTypicalCue1(subtitle, 0);
assertTypicalCue2(subtitle, 2);
assertTypicalCue3(subtitle, 4);
}
@Test @Test
public void decodeOverlappingTimecodes() throws IOException { public void decodeOverlappingTimecodes() throws IOException {
SsaDecoder decoder = new SsaDecoder(); SsaDecoder decoder = new SsaDecoder();
@ -438,6 +493,10 @@ public final class SsaDecoderTest {
assertThat(subtitle.getEventTime(eventIndex)).isEqualTo(0); assertThat(subtitle.getEventTime(eventIndex)).isEqualTo(0);
assertThat(subtitle.getCues(subtitle.getEventTime(eventIndex)).get(0).text.toString()) assertThat(subtitle.getCues(subtitle.getEventTime(eventIndex)).get(0).text.toString())
.isEqualTo("This is the first subtitle."); .isEqualTo("This is the first subtitle.");
assertThat(
Objects.requireNonNull(
subtitle.getCues(subtitle.getEventTime(eventIndex)).get(0).textAlignment))
.isEqualTo(Layout.Alignment.ALIGN_CENTER);
assertThat(subtitle.getEventTime(eventIndex + 1)).isEqualTo(1230000); assertThat(subtitle.getEventTime(eventIndex + 1)).isEqualTo(1230000);
} }