diff --git a/libraries/common/src/main/java/androidx/media3/common/util/ParsableByteArray.java b/libraries/common/src/main/java/androidx/media3/common/util/ParsableByteArray.java index 44e93ae265..7fcc0fa4eb 100644 --- a/libraries/common/src/main/java/androidx/media3/common/util/ParsableByteArray.java +++ b/libraries/common/src/main/java/androidx/media3/common/util/ParsableByteArray.java @@ -15,12 +15,18 @@ */ package androidx.media3.common.util; +import static java.nio.ByteOrder.BIG_ENDIAN; +import static java.nio.ByteOrder.LITTLE_ENDIAN; + import androidx.annotation.Nullable; import com.google.common.collect.ImmutableSet; import com.google.common.primitives.Chars; import com.google.common.primitives.Ints; +import com.google.common.primitives.UnsignedBytes; +import com.google.common.primitives.UnsignedInts; import com.google.errorprone.annotations.CheckReturnValue; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -33,6 +39,9 @@ import java.util.Arrays; @CheckReturnValue public final class ParsableByteArray { + /** A value that is outside the valid range of unicode code points. */ + public static final int INVALID_CODE_POINT = 0x11_0000; + private static final char[] CR_AND_LF = {'\r', '\n'}; private static final char[] LF = {'\n'}; private static final ImmutableSet SUPPORTED_CHARSETS_FOR_READLINE = @@ -239,30 +248,74 @@ public final class ParsableByteArray { return (data[position] & 0xFF); } - /** - * Peeks at the next char. - * - *

Equivalent to passing {@link StandardCharsets#UTF_16} or {@link StandardCharsets#UTF_16BE} - * to {@link #peekChar(Charset)}. - */ + /** Peeks at the next two bytes and interprets them as a big-endian char. */ public char peekChar() { - return (char) ((data[position] & 0xFF) << 8 | (data[position + 1] & 0xFF)); + return peekChar(BIG_ENDIAN, /* offset= */ 0); } /** - * Peeks at the next char (as decoded by {@code charset}) - * - *

If {@code charset} is UTF-8, only single-byte characters are supported and this method - * returns zero if {@link #position} is pointing to any part of a multi-byte character. - * - * @throws IllegalArgumentException if charset is not supported. Only US_ASCII, UTF-8, UTF-16, - * UTF-16BE, and UTF-16LE are supported. + * @deprecated Either use {@link #peekChar()} to peek the next two bytes (big-endian) or {@link + * #peekCodePoint(Charset)} to peek in a {@link Charset}-aware way. */ - // TODO: b/398845842 - Make this work 'correctly' for multi-byte UTF-8, or deprecate it. + @Deprecated public char peekChar(Charset charset) { Assertions.checkArgument( SUPPORTED_CHARSETS_FOR_READLINE.contains(charset), "Unsupported charset: " + charset); - return (char) (peekCharacterAndSize(charset) >> Short.SIZE); + if (bytesLeft() < 1) { + return 0; + } + if (charset.equals(StandardCharsets.US_ASCII)) { + return (char) peekUnsignedByte(); + } else if (charset.equals(StandardCharsets.UTF_8)) { + return (data[position] & 0x80) == 0 ? (char) peekUnsignedByte() : 0; + } else { + // UTF-16 + if (bytesLeft() < 2) { + return 0; + } + ByteOrder byteOrder = charset.equals(StandardCharsets.UTF_16LE) ? LITTLE_ENDIAN : BIG_ENDIAN; + return peekChar(byteOrder, /* offset= */ 0); + } + } + + /** Peek the UTF-16 char at {@link #position}{@code + offset}. */ + private char peekChar(ByteOrder byteOrder, int offset) { + return byteOrder == BIG_ENDIAN + ? Chars.fromBytes(data[position + offset], data[position + offset + 1]) + : Chars.fromBytes(data[position + offset + 1], data[position + offset]); + } + + /** + * Peeks at the code point starting at {@link #getPosition()} as interpreted by {@code charset}. + * + *

The exact behaviour depends on {@code charset}: + * + *

+ * + * @throws IllegalArgumentException if charset is not supported. Only US_ASCII, UTF-8, UTF-16, + * UTF-16BE, and UTF-16LE are supported. + * @throws IndexOutOfBoundsException if {@link #bytesLeft()} doesn't allow reading the smallest + * code unit in {@code charset} (1 byte for ASCII and UTF-8, 2 bytes for UTF-16). + */ + public int peekCodePoint(Charset charset) { + int codePointAndSize = peekCodePointAndSize(charset); + return codePointAndSize != 0 ? Ints.checkedCast(codePointAndSize >>> 8) : INVALID_CODE_POINT; } /** Reads the next byte as an unsigned value. */ @@ -708,53 +761,145 @@ public final class ParsableByteArray { * without advancing {@link #position}. Returns {@code 0} if {@link #bytesLeft()} doesn't allow * reading a whole character in {@code charset}. * - *

Only supports characters in {@code chars} that occupy a single code unit (i.e. one byte for - * UTF-8 and two bytes for UTF-16). + *

Only supports characters in {@code chars} that are in the Basic Multilingual Plane (occupy a + * single char). */ private char readCharacterIfInList(Charset charset, char[] chars) { - int characterAndSize = peekCharacterAndSize(charset); + if (bytesLeft() < getSmallestCodeUnitSize(charset)) { + return 0; + } + int codePointAndSize = peekCodePointAndSize(charset); + if (codePointAndSize == 0) { + return 0; + } - if (characterAndSize != 0 && Chars.contains(chars, (char) (characterAndSize >> Short.SIZE))) { - position += characterAndSize & 0xFFFF; - return (char) (characterAndSize >> Short.SIZE); + int codePoint = UnsignedInts.checkedCast(codePointAndSize >>> 8); + if (Character.isSupplementaryCodePoint(codePoint)) { + return 0; + } + char c = Chars.checkedCast(codePoint); + if (Chars.contains(chars, c)) { + position += Ints.checkedCast(codePointAndSize & 0xFF); + return c; } else { return 0; } } /** - * Peeks at the character at {@link #position} (as decoded by {@code charset}), returns it and the - * number of bytes the character takes up within the array packed into an int. First two bytes are - * the character and the second two is the size in bytes it takes. Returns 0 if {@link - * #bytesLeft()} doesn't allow reading a whole character in {@code charset} or if the {@code - * charset} is not one of US_ASCII, UTF-8, UTF-16, UTF-16BE, or UTF-16LE. + * Peeks at the code unit at {@link #position} (as decoded by {@code charset}), and the number of + * bytes it occupies within {@link #data}. * - *

Only supports characters that occupy a single code unit (i.e. one byte for UTF-8 and two - * bytes for UTF-16). + *

See {@link #peekCodePoint(Charset)} for detailed per-charset behaviour & edge cases. + * + * @return The code point in the upper 24 bits, and the size in bytes in the lower 8 bits. Or zero + * if no valid code unit starts at {@link #position} and fits within {@link #bytesLeft()}. + * @throws IndexOutOfBoundsException if {@link #bytesLeft()} doesn't allow reading the smallest + * code unit in {@code charset} (1 byte for ASCII and UTF-8, 2 bytes for UTF-16). + * @throws IllegalArgumentException if charset is not supported. Only US_ASCII, UTF-8, UTF-16, + * UTF-16BE, and UTF-16LE are supported. */ - private int peekCharacterAndSize(Charset charset) { - byte charByte1; - byte charByte2; - byte characterSize; - if (bytesLeft() >= 1 - && ((charset.equals(StandardCharsets.UTF_8) && (data[position] & 0x80) == 0) - || charset.equals(StandardCharsets.US_ASCII))) { - // TODO: b/398845842 - Handle multi-byte UTF-8. - charByte1 = 0; - charByte2 = data[position]; - characterSize = 1; - } else if (bytesLeft() >= 2 - && (charset.equals(StandardCharsets.UTF_16) || charset.equals(StandardCharsets.UTF_16BE))) { - charByte1 = data[position]; - charByte2 = data[position + 1]; - characterSize = 2; - } else if (bytesLeft() >= 2 && charset.equals(StandardCharsets.UTF_16LE)) { - charByte1 = data[position + 1]; - charByte2 = data[position]; - characterSize = 2; + private int peekCodePointAndSize(Charset charset) { + Assertions.checkArgument( + SUPPORTED_CHARSETS_FOR_READLINE.contains(charset), "Unsupported charset: " + charset); + if (bytesLeft() < getSmallestCodeUnitSize(charset)) { + throw new IndexOutOfBoundsException("position=" + position + ", limit=" + limit); + } + int codePoint; + byte codePointSize; + if (charset.equals(StandardCharsets.US_ASCII)) { + if ((data[position] & 0x80) != 0) { + return 0; + } + codePoint = UnsignedBytes.toInt(data[position]); + codePointSize = 1; + } else if (charset.equals(StandardCharsets.UTF_8)) { + codePointSize = peekUtf8CodeUnitSize(); + switch (codePointSize) { + case 1: + codePoint = UnsignedBytes.toInt(data[position]); + break; + case 2: + codePoint = decodeUtf8CodeUnit(0, 0, data[position], data[position + 1]); + break; + case 3: + int firstByteWithoutStartCode = data[position] & 0xF; + codePoint = + decodeUtf8CodeUnit( + 0, firstByteWithoutStartCode, data[position + 1], data[position + 2]); + break; + case 4: + codePoint = + decodeUtf8CodeUnit( + data[position], data[position + 1], data[position + 2], data[position + 3]); + break; + case 0: + default: + return 0; + } } else { + // UTF-16 + ByteOrder byteOrder = charset.equals(StandardCharsets.UTF_16LE) ? LITTLE_ENDIAN : BIG_ENDIAN; + char c = peekChar(byteOrder, /* offset= */ 0); + if (Character.isHighSurrogate(c) && bytesLeft() >= 4) { + char lowSurrogate = peekChar(byteOrder, /* offset= */ 2); + codePoint = Character.toCodePoint(c, lowSurrogate); + codePointSize = 4; + } else { + // This is either a BMP code point, an unpaired surrogate, or position is in the middle of + // a matching surrogate pair. + codePoint = c; + codePointSize = 2; + } + } + return (codePoint << 8) | codePointSize; + } + + private static int getSmallestCodeUnitSize(Charset charset) { + Assertions.checkArgument( + SUPPORTED_CHARSETS_FOR_READLINE.contains(charset), "Unsupported charset: " + charset); + return charset.equals(StandardCharsets.UTF_8) || charset.equals(StandardCharsets.US_ASCII) + ? 1 + : 2; + } + + /** + * Returns the size (in bytes) of the UTF-8 code unit starting at {@link #position}. Returns zero + * if no full UTF-8 code unit seems to start at {@link #position}. + */ + private byte peekUtf8CodeUnitSize() { + if ((data[position] & 0x80) == 0) { + return 1; + } else if ((data[position] & 0xE0) == 0xC0 + && bytesLeft() >= 2 + && isUtf8ContinuationByte(data[position + 1])) { + return 2; + } else if ((data[position] & 0xF0) == 0xE0 + && bytesLeft() >= 3 + && isUtf8ContinuationByte(data[position + 1]) + && isUtf8ContinuationByte(data[position + 2])) { + return 3; + } else if ((data[position] & 0xF8) == 0xF0 + && bytesLeft() >= 4 + && isUtf8ContinuationByte(data[position + 1]) + && isUtf8ContinuationByte(data[position + 2]) + && isUtf8ContinuationByte(data[position + 3])) { + return 4; + } else { + // We found a pattern that doesn't seem to be valid UTF-8. return 0; } - return Ints.fromBytes(charByte1, charByte2, (byte) 0, characterSize); + } + + private static boolean isUtf8ContinuationByte(byte b) { + return (b & 0xC0) == 0x80; + } + + private static int decodeUtf8CodeUnit(int b1, int b2, int b3, int b4) { + return Ints.fromBytes( + (byte) 0, + UnsignedBytes.checkedCast(((b1 & 0x7) << 2) | (b2 & 0b0011_0000) >> 4), + UnsignedBytes.checkedCast(((byte) b2 & 0xF) << 4 | ((byte) b3 & 0b0011_1100) >> 2), + UnsignedBytes.checkedCast(((byte) b3 & 0x3) << 6 | ((byte) b4 & 0x3F))); } } diff --git a/libraries/common/src/test/java/androidx/media3/common/util/ParsableByteArrayTest.java b/libraries/common/src/test/java/androidx/media3/common/util/ParsableByteArrayTest.java index 44b275dff7..4b8674a422 100644 --- a/libraries/common/src/test/java/androidx/media3/common/util/ParsableByteArrayTest.java +++ b/libraries/common/src/test/java/androidx/media3/common/util/ParsableByteArrayTest.java @@ -998,6 +998,257 @@ public final class ParsableByteArrayTest { assertThat(parser.readLine(UTF_16LE)).isNull(); } + @Test + public void peekCodePoint_ascii() { + ParsableByteArray parser = new ParsableByteArray("foo".getBytes(US_ASCII)); + + assertThat(parser.peekCodePoint(US_ASCII)).isEqualTo((int) 'f'); + } + + @Test + public void peekCodePoint_ascii_invalid() { + // Choose é from ISO 8859-1 which is not valid 7-bit ASCII (since it has a high MSB). + ParsableByteArray parser = new ParsableByteArray(TestUtil.createByteArray(0xE9)); + + assertThat(parser.peekCodePoint(US_ASCII)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT); + } + + @Test + public void peekCodePoint_ascii_atLimit_throwsException() { + // Set the limit before the end of the byte array. + ParsableByteArray parser = new ParsableByteArray("foo".getBytes(US_ASCII), /* limit= */ 2); + parser.setPosition(2); + + IndexOutOfBoundsException e = + assertThrows(IndexOutOfBoundsException.class, () -> parser.peekCodePoint(US_ASCII)); + assertThat(e).hasMessageThat().contains("position=2"); + assertThat(e).hasMessageThat().contains("limit=2"); + } + + @Test + public void peekCodePoint_utf8() { + ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_8)); + + assertThat(parser.peekCodePoint(UTF_8)).isEqualTo((int) 'f'); + } + + @Test + public void peekCodePoint_utf8_twoByteCharacter() { + ParsableByteArray parser = new ParsableByteArray("étude".getBytes(UTF_8)); + + assertThat(parser.peekCodePoint(UTF_8)).isEqualTo((int) 'é'); + } + + @Test + public void peekCodePoint_utf8_twoByteCharacter_misaligned() { + ParsableByteArray parser = new ParsableByteArray("étude".getBytes(UTF_8)); + parser.setPosition(1); + + assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT); + } + + @Test + public void peekCodePoint_utf8_threeByteCharacter() { + ParsableByteArray parser = new ParsableByteArray("ऊ".getBytes(UTF_8)); + + assertThat(parser.peekCodePoint(UTF_8)).isEqualTo((int) 'ऊ'); + } + + @Test + public void peekCodePoint_utf8_threeByteCharacter_misaligned() { + ParsableByteArray parser = new ParsableByteArray("ऊ".getBytes(UTF_8)); + parser.setPosition(1); + + assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT); + } + + @Test + public void peekCodePoint_utf8_fourByteCharacter() { + ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_8)); + + assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(Character.codePointAt("\uD83D\uDE1B", 0)); + } + + @Test + public void peekCodePoint_utf8_fourByteCharacter_misaligned() { + ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_8)); + parser.setPosition(1); + + assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT); + } + + @Test + public void peekCodePoint_utf8_atLimit_throwsException() { + // Set the limit before the end of the byte array. + ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_8), /* limit= */ 2); + parser.setPosition(2); + + IndexOutOfBoundsException e = + assertThrows(IndexOutOfBoundsException.class, () -> parser.peekCodePoint(UTF_8)); + assertThat(e).hasMessageThat().contains("position=2"); + assertThat(e).hasMessageThat().contains("limit=2"); + } + + @Test + public void peekCodePoint_utf8_invalidByteSequence() { + // 2-byte start character not followed by anything. + ParsableByteArray parser = new ParsableByteArray(TestUtil.createByteArray(0xC1)); + assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT); + + // 2-byte character truncated by limit. + parser = new ParsableByteArray("é".getBytes(UTF_8), /* limit= */ 1); + assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT); + + // 2-byte start character not followed by a continuation byte. + parser = new ParsableByteArray(TestUtil.createByteArray(0xC1, 'a')); + assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT); + + // 3-byte start character followed by only one byte. + parser = new ParsableByteArray(TestUtil.createByteArray(0xE1, 0x81)); + assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT); + + // 3-byte character truncated by limit. + parser = new ParsableByteArray("ऊ".getBytes(UTF_8), /* limit= */ 2); + assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT); + + // 3-byte start character followed by only one continuation byte. + parser = new ParsableByteArray(TestUtil.createByteArray(0xE1, 0x81, 'a')); + assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT); + + // 4-byte start character followed by only two bytes. + parser = new ParsableByteArray(TestUtil.createByteArray(0xF1, 0x81, 0x81)); + assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT); + + // 4-byte character truncated by limit. + parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_8), /* limit= */ 3); + assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT); + + // 4-byte start character followed by only two continuation bytes. + parser = new ParsableByteArray(TestUtil.createByteArray(0xF1, 0x81, 0x81, 'a')); + assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT); + } + + @Test + public void peekCodePoint_utf16() { + // Use UTF_16BE to avoid encoding a BOM. + ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16BE)); + + int expectedCodePoint = 'f'; + assertThat(parser.peekCodePoint(UTF_16)).isEqualTo(expectedCodePoint); + assertThat(parser.peekCodePoint(UTF_16BE)).isEqualTo(expectedCodePoint); + } + + @Test + public void peekCodePoint_utf16_basicMultilingualPlane() { + // Use UTF_16BE to avoid encoding a BOM. + ParsableByteArray parser = new ParsableByteArray("étude".getBytes(UTF_16BE)); + + int expectedCodePoint = 'é'; + assertThat(parser.peekCodePoint(UTF_16)).isEqualTo(expectedCodePoint); + assertThat(parser.peekCodePoint(UTF_16BE)).isEqualTo(expectedCodePoint); + } + + @Test + public void peekCodePoint_utf16_surrogatePair() { + // Use UTF_16BE to avoid encoding a BOM. + ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16BE)); + + int expectedCodePoint = Character.codePointAt("\uD83D\uDE1B", 0); + assertThat(parser.peekCodePoint(UTF_16)).isEqualTo(expectedCodePoint); + assertThat(parser.peekCodePoint(UTF_16BE)).isEqualTo(expectedCodePoint); + } + + @Test + public void peekCodePoint_utf16_splitSurrogatePair_returnsLowSurrogate() { + // Use UTF_16BE to avoid encoding a BOM. + ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16BE)); + parser.skipBytes(2); + + int expectedCodePoint = 0xDE1B; + assertThat(parser.peekCodePoint(UTF_16)).isEqualTo(expectedCodePoint); + assertThat(parser.peekCodePoint(UTF_16BE)).isEqualTo(expectedCodePoint); + } + + @Test + public void peekCodePoint_utf16_misaligned_returnsGarbage() { + // Use UTF_16BE to avoid encoding a BOM. + ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16BE)); + // Move the position so we are reading the second byte of 'f' and the first byte of 'o'. + parser.setPosition(1); + + int expectedCodePoint = '昀'; + assertThat(parser.peekCodePoint(UTF_16)).isEqualTo(expectedCodePoint); + assertThat(parser.peekCodePoint(UTF_16BE)).isEqualTo(expectedCodePoint); + } + + @Test + public void peekCodePoint_utf16_atLimit_throwsException() { + // Use UTF_16BE to avoid encoding a BOM. Set the limit before the end of the byte array. + ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16BE), /* limit= */ 2); + // Only one readable byte, not enough for a UTF-16 code unit. + parser.setPosition(1); + + IndexOutOfBoundsException e1 = + assertThrows(IndexOutOfBoundsException.class, () -> parser.peekCodePoint(UTF_16)); + assertThat(e1).hasMessageThat().contains("position=1"); + assertThat(e1).hasMessageThat().contains("limit=2"); + IndexOutOfBoundsException e2 = + assertThrows(IndexOutOfBoundsException.class, () -> parser.peekCodePoint(UTF_16BE)); + assertThat(e2).hasMessageThat().contains("position=1"); + assertThat(e2).hasMessageThat().contains("limit=2"); + } + + @Test + public void peekCodePoint_utf16le() { + ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16LE)); + + assertThat(parser.peekCodePoint(UTF_16LE)).isEqualTo((int) 'f'); + } + + @Test + public void peekCodePoint_utf16le_basicMultilingualPlane() { + ParsableByteArray parser = new ParsableByteArray("étude".getBytes(UTF_16LE)); + + assertThat(parser.peekCodePoint(UTF_16LE)).isEqualTo((int) 'é'); + } + + @Test + public void peekCodePoint_utf16le_surrogatePair() { + ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16LE)); + + assertThat(parser.peekCodePoint(UTF_16LE)).isEqualTo(Character.codePointAt("\uD83D\uDE1B", 0)); + } + + @Test + public void peekCodePoint_utf16le_splitSurrogatePair_returnsLowSurrogate() { + ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16LE)); + parser.skipBytes(2); + + assertThat(parser.peekCodePoint(UTF_16LE)).isEqualTo(0xDE1B); + } + + @Test + public void peekCodePoint_utf16le_misaligned_returnsGarbage() { + ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16LE)); + // Move the position so we are reading the second byte of 'f' and the first byte of 'o'. + parser.setPosition(1); + + assertThat(parser.peekCodePoint(UTF_16LE)).isEqualTo((int) '漀'); + } + + @Test + public void peekCodePoint_utf16le_atLimit_throwsException() { + // Set the limit before the end of the byte array. + ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16LE), /* limit= */ 2); + // Only one readable byte, not enough for a UTF-16 code unit. + parser.setPosition(1); + + IndexOutOfBoundsException e = + assertThrows(IndexOutOfBoundsException.class, () -> parser.peekCodePoint(UTF_16LE)); + assertThat(e).hasMessageThat().contains("position=1"); + assertThat(e).hasMessageThat().contains("limit=2"); + } + @Test public void peekChar() { // Use UTF_16BE to avoid encoding a BOM. @@ -1045,6 +1296,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_ascii() { byte[] bytes = "foo".getBytes(US_ASCII); ParsableByteArray parser = new ParsableByteArray(bytes); @@ -1053,6 +1305,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_ascii_invalid_returns8BitCharacterAnyway() { // Choose é from ISO 8859-1 which is not valid 7-bit ASCII (since it has a high MSB). ParsableByteArray parser = new ParsableByteArray(TestUtil.createByteArray(0xE9)); @@ -1061,6 +1314,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_ascii_atLimit_throwsException() { // Set the limit before the end of the byte array. ParsableByteArray parser = new ParsableByteArray("foo".getBytes(US_ASCII), /* limit= */ 2); @@ -1071,6 +1325,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_utf8_oneByteCharacter() { byte[] bytes = "foo".getBytes(UTF_8); ParsableByteArray parser = new ParsableByteArray(bytes); @@ -1079,6 +1334,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_utf8_twoByteCharacter_returnsZero() { byte[] bytes = "étude".getBytes(UTF_8); ParsableByteArray parser = new ParsableByteArray(bytes); @@ -1088,6 +1344,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_utf8_threeByteCharacter_returnsZero() { ParsableByteArray parser = new ParsableByteArray("ऊ".getBytes(UTF_8)); @@ -1096,6 +1353,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_utf8_fourByteCharacter_returnsZero() { byte[] bytes = "\uD83D\uDE1B".getBytes(UTF_8); ParsableByteArray parser = new ParsableByteArray(bytes); @@ -1105,6 +1363,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_utf8_splitFourByteChar_returnsZero() { byte[] bytes = "\uD83D\uDE1B".getBytes(UTF_8); ParsableByteArray parser = new ParsableByteArray(bytes); @@ -1115,6 +1374,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_utf8_atLimit_returnsZero() { // Set the limit before the end of the byte array. ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_8), /* limit= */ 2); @@ -1125,6 +1385,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_utf8_invalidByteSequence() { // 2-byte start character not followed by anything. ParsableByteArray parser = new ParsableByteArray(TestUtil.createByteArray(0xC1)); @@ -1164,6 +1425,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_utf16() { // Use UTF_16BE to avoid encoding a BOM. ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16BE)); @@ -1174,6 +1436,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_utf16_basicMultilingualPlane() { // Use UTF_16BE to avoid encoding a BOM. ParsableByteArray parser = new ParsableByteArray("étude".getBytes(UTF_16BE)); @@ -1184,6 +1447,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_utf16_surrogatePair_returnsHighSurrogate() { // Use UTF_16BE to avoid encoding a BOM. ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16BE)); @@ -1195,6 +1459,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_utf16_splitSurrogatePair_returnsLowSurrogate() { // Use UTF_16BE to avoid encoding a BOM. ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16BE)); @@ -1207,6 +1472,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_utf16_misaligned_returnsGarbage() { // Use UTF_16BE to avoid encoding a BOM. ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16BE)); @@ -1219,6 +1485,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_utf16_atLimit_returnsZero() { // Use UTF_16BE to avoid encoding a BOM. Set the limit before the end of the byte array. ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16BE), /* limit= */ 2); @@ -1232,6 +1499,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_utf16le() { ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16LE)); @@ -1239,6 +1507,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_utf16le_basicMultilingualPlane() { ParsableByteArray parser = new ParsableByteArray("étude".getBytes(UTF_16LE)); @@ -1246,6 +1515,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_utf16le_surrogatePair_returnsHighSurrogate() { ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16LE)); @@ -1254,6 +1524,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_utf16le_splitSurrogatePair_returnsLowSurrogate() { ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16LE)); parser.skipBytes(2); @@ -1263,6 +1534,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_utf16le_misaligned_returnsGarbage() { ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16LE)); // Move the position so we are reading the second byte of 'f' and the first byte of 'o'. @@ -1272,6 +1544,7 @@ public final class ParsableByteArrayTest { } @Test + @SuppressWarnings("deprecation") // Testing deprecated method public void peekChar_utf16le_atLimit_returnsZero() { // Set the limit before the end of the byte array. ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16LE), /* limit= */ 2); diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/text/ssa/SsaParser.java b/libraries/extractor/src/main/java/androidx/media3/extractor/text/ssa/SsaParser.java index f387bb9f6c..6576ca05e7 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/text/ssa/SsaParser.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/text/ssa/SsaParser.java @@ -227,7 +227,7 @@ public final class SsaParser implements SubtitleParser { private void parseScriptInfo(ParsableByteArray data, Charset charset) { @Nullable String currentLine; while ((currentLine = data.readLine(charset)) != null - && (data.bytesLeft() == 0 || data.peekChar(charset) != '[')) { + && (data.bytesLeft() == 0 || data.peekCodePoint(charset) != '[')) { String[] infoNameAndValue = currentLine.split(":"); if (infoNameAndValue.length != 2) { continue; @@ -266,7 +266,7 @@ public final class SsaParser implements SubtitleParser { @Nullable SsaStyle.Format formatInfo = null; @Nullable String currentLine; while ((currentLine = data.readLine(charset)) != null - && (data.bytesLeft() == 0 || data.peekChar(charset) != '[')) { + && (data.bytesLeft() == 0 || data.peekCodePoint(charset) != '[')) { if (currentLine.startsWith(FORMAT_LINE_PREFIX)) { formatInfo = SsaStyle.Format.fromFormatLine(currentLine); } else if (currentLine.startsWith(STYLE_LINE_PREFIX)) {