mirror of
https://github.com/androidx/media.git
synced 2025-05-09 16:40:55 +08:00
Merge pull request #10750 from Stronger197:subrip_utf_16
PiperOrigin-RevId: 492164739 (cherry picked from commit a9191418051a19681ddf884163ac5553871ec658)
This commit is contained in:
parent
f8155f1cd4
commit
6e58ca6baa
@ -16,6 +16,9 @@
|
||||
* Audio:
|
||||
* Use the compressed audio format bitrate to calculate the min buffer size
|
||||
for `AudioTrack` in direct playbacks (passthrough).
|
||||
* Text:
|
||||
* SubRip: Add support for UTF-16 files if they start with a byte order
|
||||
mark.
|
||||
* Session:
|
||||
* Add helper method to convert platform session token to Media3
|
||||
`SessionToken` ([#171](https://github.com/androidx/media/issues/171)).
|
||||
|
@ -17,6 +17,9 @@ package androidx.media3.common.util;
|
||||
|
||||
import androidx.annotation.Nullable;
|
||||
import com.google.common.base.Charsets;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.primitives.Chars;
|
||||
import com.google.common.primitives.UnsignedBytes;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Arrays;
|
||||
@ -28,6 +31,12 @@ import java.util.Arrays;
|
||||
@UnstableApi
|
||||
public final class ParsableByteArray {
|
||||
|
||||
private static final char[] CR_AND_LF = {'\r', '\n'};
|
||||
private static final char[] LF = {'\n'};
|
||||
private static final ImmutableSet<Charset> SUPPORTED_CHARSETS_FOR_READLINE =
|
||||
ImmutableSet.of(
|
||||
Charsets.US_ASCII, Charsets.UTF_8, Charsets.UTF_16, Charsets.UTF_16BE, Charsets.UTF_16LE);
|
||||
|
||||
private byte[] data;
|
||||
private int position;
|
||||
// TODO(internal b/147657250): Enforce this limit on all read methods.
|
||||
@ -490,45 +499,47 @@ public final class ParsableByteArray {
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a line of text.
|
||||
* Reads a line of text in UTF-8.
|
||||
*
|
||||
* <p>A line is considered to be terminated by any one of a carriage return ('\r'), a line feed
|
||||
* ('\n'), or a carriage return followed immediately by a line feed ('\r\n'). The UTF-8 charset is
|
||||
* used. This method discards leading UTF-8 byte order marks, if present.
|
||||
*
|
||||
* @return The line not including any line-termination characters, or null if the end of the data
|
||||
* has already been reached.
|
||||
* <p>Equivalent to passing {@link Charsets#UTF_8} to {@link #readLine(Charset)}.
|
||||
*/
|
||||
@Nullable
|
||||
public String readLine() {
|
||||
return readLine(Charsets.UTF_8);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a line of text in {@code charset}.
|
||||
*
|
||||
* <p>A line is considered to be terminated by any one of a carriage return ('\r'), a line feed
|
||||
* ('\n'), or a carriage return followed immediately by a line feed ('\r\n'). This method discards
|
||||
* leading UTF byte order marks (BOM), if present.
|
||||
*
|
||||
* <p>The {@linkplain #getPosition() position} is advanced to start of the next line (i.e. any
|
||||
* line terminators are skipped).
|
||||
*
|
||||
* @param charset The charset used to interpret the bytes as a {@link String}.
|
||||
* @return The line not including any line-termination characters, or null if the end of the data
|
||||
* has already been reached.
|
||||
* @throws IllegalArgumentException if charset is not supported. Only US_ASCII, UTF-8, UTF-16,
|
||||
* UTF-16BE, and UTF-16LE are supported.
|
||||
*/
|
||||
@Nullable
|
||||
public String readLine(Charset charset) {
|
||||
Assertions.checkArgument(
|
||||
SUPPORTED_CHARSETS_FOR_READLINE.contains(charset), "Unsupported charset: " + charset);
|
||||
if (bytesLeft() == 0) {
|
||||
return null;
|
||||
}
|
||||
int lineLimit = position;
|
||||
while (lineLimit < limit && !Util.isLinebreak(data[lineLimit])) {
|
||||
lineLimit++;
|
||||
if (!charset.equals(Charsets.US_ASCII)) {
|
||||
readUtfCharsetFromBom(); // Skip BOM if present
|
||||
}
|
||||
if (lineLimit - position >= 3
|
||||
&& data[position] == (byte) 0xEF
|
||||
&& data[position + 1] == (byte) 0xBB
|
||||
&& data[position + 2] == (byte) 0xBF) {
|
||||
// There's a UTF-8 byte order mark at the start of the line. Discard it.
|
||||
position += 3;
|
||||
}
|
||||
String line = Util.fromUtf8Bytes(data, position, lineLimit - position);
|
||||
position = lineLimit;
|
||||
int lineLimit = findNextLineTerminator(charset);
|
||||
String line = readString(lineLimit - position, charset);
|
||||
if (position == limit) {
|
||||
return line;
|
||||
}
|
||||
if (data[position] == '\r') {
|
||||
position++;
|
||||
if (position == limit) {
|
||||
return line;
|
||||
}
|
||||
}
|
||||
if (data[position] == '\n') {
|
||||
position++;
|
||||
}
|
||||
skipLineTerminator(charset);
|
||||
return line;
|
||||
}
|
||||
|
||||
@ -566,4 +577,99 @@ public final class ParsableByteArray {
|
||||
position += length;
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a UTF byte order mark (BOM) and returns the UTF {@link Charset} it represents. Returns
|
||||
* {@code null} without advancing {@link #getPosition() position} if no BOM is found.
|
||||
*/
|
||||
@Nullable
|
||||
public Charset readUtfCharsetFromBom() {
|
||||
if (bytesLeft() >= 3
|
||||
&& data[position] == (byte) 0xEF
|
||||
&& data[position + 1] == (byte) 0xBB
|
||||
&& data[position + 2] == (byte) 0xBF) {
|
||||
position += 3;
|
||||
return Charsets.UTF_8;
|
||||
} else if (bytesLeft() >= 2) {
|
||||
if (data[position] == (byte) 0xFE && data[position + 1] == (byte) 0xFF) {
|
||||
position += 2;
|
||||
return Charsets.UTF_16BE;
|
||||
} else if (data[position] == (byte) 0xFF && data[position + 1] == (byte) 0xFE) {
|
||||
position += 2;
|
||||
return Charsets.UTF_16LE;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the next occurrence of '\n' or '\r', or {@link #limit} if none is found.
|
||||
*/
|
||||
private int findNextLineTerminator(Charset charset) {
|
||||
int stride;
|
||||
if (charset.equals(Charsets.UTF_8) || charset.equals(Charsets.US_ASCII)) {
|
||||
stride = 1;
|
||||
} else if (charset.equals(Charsets.UTF_16)
|
||||
|| charset.equals(Charsets.UTF_16LE)
|
||||
|| charset.equals(Charsets.UTF_16BE)) {
|
||||
stride = 2;
|
||||
} else {
|
||||
throw new IllegalArgumentException("Unsupported charset: " + charset);
|
||||
}
|
||||
for (int i = position; i < limit - (stride - 1); i += stride) {
|
||||
if ((charset.equals(Charsets.UTF_8) || charset.equals(Charsets.US_ASCII))
|
||||
&& Util.isLinebreak(data[i])) {
|
||||
return i;
|
||||
} else if ((charset.equals(Charsets.UTF_16) || charset.equals(Charsets.UTF_16BE))
|
||||
&& data[i] == 0x00
|
||||
&& Util.isLinebreak(data[i + 1])) {
|
||||
return i;
|
||||
} else if (charset.equals(Charsets.UTF_16LE)
|
||||
&& data[i + 1] == 0x00
|
||||
&& Util.isLinebreak(data[i])) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return limit;
|
||||
}
|
||||
|
||||
private void skipLineTerminator(Charset charset) {
|
||||
if (readCharacterIfInList(charset, CR_AND_LF) == '\r') {
|
||||
readCharacterIfInList(charset, LF);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Peeks at the character at {@link #position} (as decoded by {@code charset}), returns it and
|
||||
* advances {@link #position} past it if it's in {@code chars}, otherwise returns {@code 0}
|
||||
* without advancing {@link #position}. Returns {@code 0} if {@link #bytesLeft()} doesn't allow
|
||||
* reading a whole character in {@code charset}.
|
||||
*
|
||||
* <p>Only supports characters in {@code chars} that occupy a single code unit (i.e. one byte for
|
||||
* UTF-8 and two bytes for UTF-16).
|
||||
*/
|
||||
private char readCharacterIfInList(Charset charset, char[] chars) {
|
||||
char character;
|
||||
int characterSize;
|
||||
if ((charset.equals(Charsets.UTF_8) || charset.equals(Charsets.US_ASCII)) && bytesLeft() >= 1) {
|
||||
character = Chars.checkedCast(UnsignedBytes.toInt(data[position]));
|
||||
characterSize = 1;
|
||||
} else if ((charset.equals(Charsets.UTF_16) || charset.equals(Charsets.UTF_16BE))
|
||||
&& bytesLeft() >= 2) {
|
||||
character = Chars.fromBytes(data[position], data[position + 1]);
|
||||
characterSize = 2;
|
||||
} else if (charset.equals(Charsets.UTF_16LE) && bytesLeft() >= 2) {
|
||||
character = Chars.fromBytes(data[position + 1], data[position]);
|
||||
characterSize = 2;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (Chars.contains(chars, character)) {
|
||||
position += characterSize;
|
||||
return Chars.checkedCast(character);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -15,11 +15,13 @@
|
||||
*/
|
||||
package androidx.media3.common.util;
|
||||
|
||||
import static androidx.media3.test.utils.TestUtil.createByteArray;
|
||||
import static com.google.common.truth.Truth.assertThat;
|
||||
import static java.nio.charset.Charset.forName;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import androidx.test.ext.junit.runners.AndroidJUnit4;
|
||||
import com.google.common.base.Charsets;
|
||||
import com.google.common.primitives.Bytes;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
@ -548,48 +550,324 @@ public final class ParsableByteArrayTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readSingleLineWithoutEndingTrail() {
|
||||
byte[] bytes = new byte[] {'f', 'o', 'o'};
|
||||
public void readSingleLineWithoutEndingTrail_ascii() {
|
||||
byte[] bytes = "foo".getBytes(Charsets.US_ASCII);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine(Charsets.US_ASCII)).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(3);
|
||||
assertThat(parser.readLine(Charsets.US_ASCII)).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readSingleLineWithEndingLf_ascii() {
|
||||
byte[] bytes = "foo\n".getBytes(Charsets.US_ASCII);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine(Charsets.US_ASCII)).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(4);
|
||||
assertThat(parser.readLine(Charsets.US_ASCII)).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readTwoLinesWithCrFollowedByLf_ascii() {
|
||||
byte[] bytes = "foo\r\nbar".getBytes(Charsets.US_ASCII);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine(Charsets.US_ASCII)).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(5);
|
||||
assertThat(parser.readLine(Charsets.US_ASCII)).isEqualTo("bar");
|
||||
assertThat(parser.getPosition()).isEqualTo(8);
|
||||
assertThat(parser.readLine(Charsets.US_ASCII)).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readThreeLinesWithEmptyLine_ascii() {
|
||||
byte[] bytes = "foo\r\n\rbar".getBytes(Charsets.US_ASCII);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine(Charsets.US_ASCII)).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(5);
|
||||
assertThat(parser.readLine(Charsets.US_ASCII)).isEqualTo("");
|
||||
assertThat(parser.getPosition()).isEqualTo(6);
|
||||
assertThat(parser.readLine(Charsets.US_ASCII)).isEqualTo("bar");
|
||||
assertThat(parser.getPosition()).isEqualTo(9);
|
||||
assertThat(parser.readLine(Charsets.US_ASCII)).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readFourLinesWithLfFollowedByCr_ascii() {
|
||||
byte[] bytes = "foo\n\r\rbar\r\n".getBytes(Charsets.US_ASCII);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine(Charsets.US_ASCII)).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(4);
|
||||
assertThat(parser.readLine(Charsets.US_ASCII)).isEqualTo("");
|
||||
assertThat(parser.getPosition()).isEqualTo(5);
|
||||
assertThat(parser.readLine(Charsets.US_ASCII)).isEqualTo("");
|
||||
assertThat(parser.getPosition()).isEqualTo(6);
|
||||
assertThat(parser.readLine(Charsets.US_ASCII)).isEqualTo("bar");
|
||||
assertThat(parser.getPosition()).isEqualTo(11);
|
||||
assertThat(parser.readLine(Charsets.US_ASCII)).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readSingleLineWithoutEndingTrail_utf8() {
|
||||
byte[] bytes = "foo".getBytes(Charsets.UTF_8);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine()).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(3);
|
||||
assertThat(parser.readLine()).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readSingleLineWithEndingLf() {
|
||||
byte[] bytes = new byte[] {'f', 'o', 'o', '\n'};
|
||||
public void readSingleLineWithEndingLf_utf8() {
|
||||
byte[] bytes = "foo\n".getBytes(Charsets.UTF_8);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine()).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(4);
|
||||
assertThat(parser.readLine()).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readTwoLinesWithCrFollowedByLf() {
|
||||
byte[] bytes = new byte[] {'f', 'o', 'o', '\r', '\n', 'b', 'a', 'r'};
|
||||
public void readTwoLinesWithCrFollowedByLf_utf8() {
|
||||
byte[] bytes = "foo\r\nbar".getBytes(Charsets.UTF_8);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine()).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(5);
|
||||
assertThat(parser.readLine()).isEqualTo("bar");
|
||||
assertThat(parser.getPosition()).isEqualTo(8);
|
||||
assertThat(parser.readLine()).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readThreeLinesWithEmptyLine() {
|
||||
byte[] bytes = new byte[] {'f', 'o', 'o', '\r', '\n', '\r', 'b', 'a', 'r'};
|
||||
public void readThreeLinesWithEmptyLineAndLeadingBom_utf8() {
|
||||
byte[] bytes =
|
||||
Bytes.concat(createByteArray(0xEF, 0xBB, 0xBF), "foo\r\n\rbar".getBytes(Charsets.UTF_8));
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine()).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(8);
|
||||
assertThat(parser.readLine()).isEqualTo("");
|
||||
assertThat(parser.getPosition()).isEqualTo(9);
|
||||
assertThat(parser.readLine()).isEqualTo("bar");
|
||||
assertThat(parser.getPosition()).isEqualTo(12);
|
||||
assertThat(parser.readLine()).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readFourLinesWithLfFollowedByCr() {
|
||||
byte[] bytes = new byte[] {'f', 'o', 'o', '\n', '\r', '\r', 'b', 'a', 'r', '\r', '\n'};
|
||||
public void readFourLinesWithLfFollowedByCr_utf8() {
|
||||
byte[] bytes = "foo\n\r\rbar\r\n".getBytes(Charsets.UTF_8);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine()).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(4);
|
||||
assertThat(parser.readLine()).isEqualTo("");
|
||||
assertThat(parser.getPosition()).isEqualTo(5);
|
||||
assertThat(parser.readLine()).isEqualTo("");
|
||||
assertThat(parser.getPosition()).isEqualTo(6);
|
||||
assertThat(parser.readLine()).isEqualTo("bar");
|
||||
assertThat(parser.getPosition()).isEqualTo(11);
|
||||
assertThat(parser.readLine()).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readSingleLineWithoutEndingTrail_utf16() {
|
||||
// Use UTF_16BE because we don't want the leading BOM that's added by getBytes(UTF_16). We
|
||||
// explicitly test with a BOM elsewhere.
|
||||
byte[] bytes = "foo".getBytes(Charsets.UTF_16BE);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine(Charsets.UTF_16)).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(6);
|
||||
assertThat(parser.readLine(Charsets.UTF_16)).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readSingleLineWithEndingLf_utf16() {
|
||||
// Use UTF_16BE because we don't want the leading BOM that's added by getBytes(UTF_16). We
|
||||
// explicitly test with a BOM elsewhere.
|
||||
byte[] bytes = "foo\n".getBytes(Charsets.UTF_16BE);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine(Charsets.UTF_16)).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(8);
|
||||
assertThat(parser.readLine(Charsets.UTF_16)).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readTwoLinesWithCrFollowedByLf_utf16() {
|
||||
// Use UTF_16BE because we don't want the leading BOM that's added by getBytes(UTF_16). We
|
||||
// explicitly test with a BOM elsewhere.
|
||||
byte[] bytes = "foo\r\nbar".getBytes(Charsets.UTF_16BE);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine(Charsets.UTF_16)).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(10);
|
||||
assertThat(parser.readLine(Charsets.UTF_16)).isEqualTo("bar");
|
||||
assertThat(parser.getPosition()).isEqualTo(16);
|
||||
assertThat(parser.readLine(Charsets.UTF_16)).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readThreeLinesWithEmptyLineAndLeadingBom_utf16() {
|
||||
// getBytes(UTF_16) always adds the leading BOM.
|
||||
byte[] bytes = "foo\r\n\rbar".getBytes(Charsets.UTF_16);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine(Charsets.UTF_16)).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(12);
|
||||
assertThat(parser.readLine(Charsets.UTF_16)).isEqualTo("");
|
||||
assertThat(parser.getPosition()).isEqualTo(14);
|
||||
assertThat(parser.readLine(Charsets.UTF_16)).isEqualTo("bar");
|
||||
assertThat(parser.getPosition()).isEqualTo(20);
|
||||
assertThat(parser.readLine(Charsets.UTF_16)).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readFourLinesWithLfFollowedByCr_utf16() {
|
||||
// Use UTF_16BE because we don't want the leading BOM that's added by getBytes(UTF_16). We
|
||||
// explicitly test with a BOM elsewhere.
|
||||
byte[] bytes = "foo\n\r\rbar\r\n".getBytes(Charsets.UTF_16BE);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine(Charsets.UTF_16)).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(8);
|
||||
assertThat(parser.readLine(Charsets.UTF_16)).isEqualTo("");
|
||||
assertThat(parser.getPosition()).isEqualTo(10);
|
||||
assertThat(parser.readLine(Charsets.UTF_16)).isEqualTo("");
|
||||
assertThat(parser.getPosition()).isEqualTo(12);
|
||||
assertThat(parser.readLine(Charsets.UTF_16)).isEqualTo("bar");
|
||||
assertThat(parser.getPosition()).isEqualTo(22);
|
||||
assertThat(parser.readLine(Charsets.UTF_16)).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readSingleLineWithoutEndingTrail_utf16be() {
|
||||
byte[] bytes = "foo".getBytes(Charsets.UTF_16BE);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine(Charsets.UTF_16BE)).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(6);
|
||||
assertThat(parser.readLine(Charsets.UTF_16BE)).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readSingleLineWithEndingLf_utf16be() {
|
||||
byte[] bytes = "foo\n".getBytes(Charsets.UTF_16BE);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine(Charsets.UTF_16BE)).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(8);
|
||||
assertThat(parser.readLine(Charsets.UTF_16BE)).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readTwoLinesWithCrFollowedByLf_utf16be() {
|
||||
byte[] bytes = "foo\r\nbar".getBytes(Charsets.UTF_16BE);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine(Charsets.UTF_16BE)).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(10);
|
||||
assertThat(parser.readLine(Charsets.UTF_16BE)).isEqualTo("bar");
|
||||
assertThat(parser.getPosition()).isEqualTo(16);
|
||||
assertThat(parser.readLine(Charsets.UTF_16BE)).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readThreeLinesWithEmptyLineAndLeadingBom_utf16be() {
|
||||
byte[] bytes =
|
||||
Bytes.concat(createByteArray(0xFE, 0xFF), "foo\r\n\rbar".getBytes(Charsets.UTF_16BE));
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine(Charsets.UTF_16BE)).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(12);
|
||||
assertThat(parser.readLine(Charsets.UTF_16BE)).isEqualTo("");
|
||||
assertThat(parser.getPosition()).isEqualTo(14);
|
||||
assertThat(parser.readLine(Charsets.UTF_16BE)).isEqualTo("bar");
|
||||
assertThat(parser.getPosition()).isEqualTo(20);
|
||||
assertThat(parser.readLine(Charsets.UTF_16BE)).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readFourLinesWithLfFollowedByCr_utf16be() {
|
||||
byte[] bytes = "foo\n\r\rbar\r\n".getBytes(Charsets.UTF_16BE);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine(Charsets.UTF_16BE)).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(8);
|
||||
assertThat(parser.readLine(Charsets.UTF_16BE)).isEqualTo("");
|
||||
assertThat(parser.getPosition()).isEqualTo(10);
|
||||
assertThat(parser.readLine(Charsets.UTF_16BE)).isEqualTo("");
|
||||
assertThat(parser.getPosition()).isEqualTo(12);
|
||||
assertThat(parser.readLine(Charsets.UTF_16BE)).isEqualTo("bar");
|
||||
assertThat(parser.getPosition()).isEqualTo(22);
|
||||
assertThat(parser.readLine(Charsets.UTF_16BE)).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readSingleLineWithoutEndingTrail_utf16le() {
|
||||
byte[] bytes = "foo".getBytes(Charsets.UTF_16LE);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine(Charsets.UTF_16LE)).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(6);
|
||||
assertThat(parser.readLine(Charsets.UTF_16LE)).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readSingleLineWithEndingLf_utf16le() {
|
||||
byte[] bytes = "foo\n".getBytes(Charsets.UTF_16LE);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine(Charsets.UTF_16LE)).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(8);
|
||||
assertThat(parser.readLine(Charsets.UTF_16LE)).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readTwoLinesWithCrFollowedByLf_utf16le() {
|
||||
byte[] bytes = "foo\r\nbar".getBytes(Charsets.UTF_16LE);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine(Charsets.UTF_16LE)).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(10);
|
||||
assertThat(parser.readLine(Charsets.UTF_16LE)).isEqualTo("bar");
|
||||
assertThat(parser.getPosition()).isEqualTo(16);
|
||||
assertThat(parser.readLine(Charsets.UTF_16LE)).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readThreeLinesWithEmptyLineAndLeadingBom_utf16le() {
|
||||
byte[] bytes =
|
||||
Bytes.concat(createByteArray(0xFF, 0xFE), "foo\r\n\rbar".getBytes(Charsets.UTF_16LE));
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine(Charsets.UTF_16LE)).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(12);
|
||||
assertThat(parser.readLine(Charsets.UTF_16LE)).isEqualTo("");
|
||||
assertThat(parser.getPosition()).isEqualTo(14);
|
||||
assertThat(parser.readLine(Charsets.UTF_16LE)).isEqualTo("bar");
|
||||
assertThat(parser.getPosition()).isEqualTo(20);
|
||||
assertThat(parser.readLine(Charsets.UTF_16LE)).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readFourLinesWithLfFollowedByCr_utf16le() {
|
||||
byte[] bytes = "foo\n\r\rbar\r\n".getBytes(Charsets.UTF_16LE);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine(Charsets.UTF_16LE)).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(8);
|
||||
assertThat(parser.readLine(Charsets.UTF_16LE)).isEqualTo("");
|
||||
assertThat(parser.getPosition()).isEqualTo(10);
|
||||
assertThat(parser.readLine(Charsets.UTF_16LE)).isEqualTo("");
|
||||
assertThat(parser.getPosition()).isEqualTo(12);
|
||||
assertThat(parser.readLine(Charsets.UTF_16LE)).isEqualTo("bar");
|
||||
assertThat(parser.getPosition()).isEqualTo(22);
|
||||
assertThat(parser.readLine(Charsets.UTF_16LE)).isNull();
|
||||
}
|
||||
}
|
||||
|
@ -27,6 +27,8 @@ import androidx.media3.common.util.ParsableByteArray;
|
||||
import androidx.media3.common.util.UnstableApi;
|
||||
import androidx.media3.extractor.text.SimpleSubtitleDecoder;
|
||||
import androidx.media3.extractor.text.Subtitle;
|
||||
import com.google.common.base.Charsets;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
@ -76,9 +78,10 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
|
||||
ArrayList<Cue> cues = new ArrayList<>();
|
||||
LongArray cueTimesUs = new LongArray();
|
||||
ParsableByteArray subripData = new ParsableByteArray(data, length);
|
||||
Charset charset = detectUtfCharset(subripData);
|
||||
|
||||
@Nullable String currentLine;
|
||||
while ((currentLine = subripData.readLine()) != null) {
|
||||
while ((currentLine = subripData.readLine(charset)) != null) {
|
||||
if (currentLine.length() == 0) {
|
||||
// Skip blank lines.
|
||||
continue;
|
||||
@ -93,7 +96,7 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
|
||||
}
|
||||
|
||||
// Read and parse the timing line.
|
||||
currentLine = subripData.readLine();
|
||||
currentLine = subripData.readLine(charset);
|
||||
if (currentLine == null) {
|
||||
Log.w(TAG, "Unexpected end");
|
||||
break;
|
||||
@ -111,13 +114,13 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
|
||||
// Read and parse the text and tags.
|
||||
textBuilder.setLength(0);
|
||||
tags.clear();
|
||||
currentLine = subripData.readLine();
|
||||
currentLine = subripData.readLine(charset);
|
||||
while (!TextUtils.isEmpty(currentLine)) {
|
||||
if (textBuilder.length() > 0) {
|
||||
textBuilder.append("<br>");
|
||||
}
|
||||
textBuilder.append(processLine(currentLine, tags));
|
||||
currentLine = subripData.readLine();
|
||||
currentLine = subripData.readLine(charset);
|
||||
}
|
||||
|
||||
Spanned text = Html.fromHtml(textBuilder.toString());
|
||||
@ -140,6 +143,15 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
|
||||
return new SubripSubtitle(cuesArray, cueTimesUsArray);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine UTF encoding of the byte array from a byte order mark (BOM), defaulting to UTF-8 if
|
||||
* no BOM is found.
|
||||
*/
|
||||
private Charset detectUtfCharset(ParsableByteArray data) {
|
||||
@Nullable Charset charset = data.readUtfCharsetFromBom();
|
||||
return charset != null ? charset : Charsets.UTF_8;
|
||||
}
|
||||
|
||||
/**
|
||||
* Trims and removes tags from the given line. The removed tags are added to {@code tags}.
|
||||
*
|
||||
|
@ -26,6 +26,7 @@ import android.text.style.ForegroundColorSpan;
|
||||
import android.text.style.StyleSpan;
|
||||
import android.text.style.TypefaceSpan;
|
||||
import android.text.style.UnderlineSpan;
|
||||
import androidx.annotation.Nullable;
|
||||
import androidx.media3.common.C;
|
||||
import androidx.media3.common.text.Cue;
|
||||
import androidx.media3.common.util.Log;
|
||||
@ -36,6 +37,7 @@ import androidx.media3.extractor.text.SimpleSubtitleDecoder;
|
||||
import androidx.media3.extractor.text.Subtitle;
|
||||
import androidx.media3.extractor.text.SubtitleDecoderException;
|
||||
import com.google.common.base.Charsets;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
@ -48,16 +50,12 @@ public final class Tx3gDecoder extends SimpleSubtitleDecoder {
|
||||
|
||||
private static final String TAG = "Tx3gDecoder";
|
||||
|
||||
private static final char BOM_UTF16_BE = '\uFEFF';
|
||||
private static final char BOM_UTF16_LE = '\uFFFE';
|
||||
|
||||
private static final int TYPE_STYL = 0x7374796c;
|
||||
private static final int TYPE_TBOX = 0x74626f78;
|
||||
private static final String TX3G_SERIF = "Serif";
|
||||
|
||||
private static final int SIZE_ATOM_HEADER = 8;
|
||||
private static final int SIZE_SHORT = 2;
|
||||
private static final int SIZE_BOM_UTF16 = 2;
|
||||
private static final int SIZE_STYLE_RECORD = 12;
|
||||
|
||||
private static final int FONT_FACE_BOLD = 0x0001;
|
||||
@ -173,13 +171,11 @@ public final class Tx3gDecoder extends SimpleSubtitleDecoder {
|
||||
if (textLength == 0) {
|
||||
return "";
|
||||
}
|
||||
if (parsableByteArray.bytesLeft() >= SIZE_BOM_UTF16) {
|
||||
char firstChar = parsableByteArray.peekChar();
|
||||
if (firstChar == BOM_UTF16_BE || firstChar == BOM_UTF16_LE) {
|
||||
return parsableByteArray.readString(textLength, Charsets.UTF_16);
|
||||
}
|
||||
}
|
||||
return parsableByteArray.readString(textLength, Charsets.UTF_8);
|
||||
int textStartPosition = parsableByteArray.getPosition();
|
||||
@Nullable Charset charset = parsableByteArray.readUtfCharsetFromBom();
|
||||
int bomSize = parsableByteArray.getPosition() - textStartPosition;
|
||||
return parsableByteArray.readString(
|
||||
textLength - bomSize, charset != null ? charset : Charsets.UTF_8);
|
||||
}
|
||||
|
||||
private void applyStyleRecord(ParsableByteArray parsableByteArray, SpannableStringBuilder cueText)
|
||||
|
@ -40,6 +40,8 @@ public final class SubripDecoderTest {
|
||||
private static final String TYPICAL_NEGATIVE_TIMESTAMPS =
|
||||
"media/subrip/typical_negative_timestamps";
|
||||
private static final String TYPICAL_UNEXPECTED_END = "media/subrip/typical_unexpected_end";
|
||||
private static final String TYPICAL_UTF16BE = "media/subrip/typical_utf16be";
|
||||
private static final String TYPICAL_UTF16LE = "media/subrip/typical_utf16le";
|
||||
private static final String TYPICAL_WITH_TAGS = "media/subrip/typical_with_tags";
|
||||
private static final String TYPICAL_NO_HOURS_AND_MILLIS =
|
||||
"media/subrip/typical_no_hours_and_millis";
|
||||
@ -148,6 +150,32 @@ public final class SubripDecoderTest {
|
||||
assertTypicalCue2(subtitle, 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decodeTypicalUtf16LittleEndian() throws IOException {
|
||||
SubripDecoder decoder = new SubripDecoder();
|
||||
byte[] bytes =
|
||||
TestUtil.getByteArray(ApplicationProvider.getApplicationContext(), TYPICAL_UTF16LE);
|
||||
Subtitle subtitle = decoder.decode(bytes, bytes.length, false);
|
||||
|
||||
assertThat(subtitle.getEventTimeCount()).isEqualTo(6);
|
||||
assertTypicalCue1(subtitle, 0);
|
||||
assertTypicalCue2(subtitle, 2);
|
||||
assertTypicalCue3(subtitle, 4);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decodeTypicalUtf16BigEndian() throws IOException {
|
||||
SubripDecoder decoder = new SubripDecoder();
|
||||
byte[] bytes =
|
||||
TestUtil.getByteArray(ApplicationProvider.getApplicationContext(), TYPICAL_UTF16BE);
|
||||
Subtitle subtitle = decoder.decode(bytes, bytes.length, false);
|
||||
|
||||
assertThat(subtitle.getEventTimeCount()).isEqualTo(6);
|
||||
assertTypicalCue1(subtitle, 0);
|
||||
assertTypicalCue2(subtitle, 2);
|
||||
assertTypicalCue3(subtitle, 4);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decodeCueWithTag() throws IOException {
|
||||
SubripDecoder decoder = new SubripDecoder();
|
||||
|
BIN
libraries/test_data/src/test/assets/media/subrip/typical_utf16be
Normal file
BIN
libraries/test_data/src/test/assets/media/subrip/typical_utf16be
Normal file
Binary file not shown.
BIN
libraries/test_data/src/test/assets/media/subrip/typical_utf16le
Normal file
BIN
libraries/test_data/src/test/assets/media/subrip/typical_utf16le
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user