Add ParsableByteArray.peekCodePoint and deprecate peekChar(Charset)

This is analagous to `String.codePointAt` and similar methods. By dealing only in unicode code points, and returning an `int`, we can avoid having to reason about how to handle UTF-8 code units that require two UTF-16 code units (each one a Java `char`) to represent them. The zero-arg `peekChar` method remains undeprecated, because it behaves as "you'd expect" when compared to `peekUnsignedByte` (always reads two big-endian bytes). PiperOrigin-RevId: 733752645
2025-04-30 06:46:50 +08:00 · 2025-03-05 08:54:02 -08:00 · 2025-03-05 08:54:02 -08:00 · d7574ffd66
commit d7574ffd66
parent d7163534ff
3 changed files with 470 additions and 52 deletions
--- a/libraries/common/src/main/java/androidx/media3/common/util/ParsableByteArray.java
+++ b/libraries/common/src/main/java/androidx/media3/common/util/ParsableByteArray.java
@ -15,12 +15,18 @@
 */
 package androidx.media3.common.util;
 import static java.nio.ByteOrder.BIG_ENDIAN;
 import static java.nio.ByteOrder.LITTLE_ENDIAN;
 import androidx.annotation.Nullable;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.primitives.Chars;
 import com.google.common.primitives.Ints;
 import com.google.common.primitives.UnsignedBytes;
 import com.google.common.primitives.UnsignedInts;
 import com.google.errorprone.annotations.CheckReturnValue;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
@ -33,6 +39,9 @@ import java.util.Arrays;
@CheckReturnValue
 public final class ParsableByteArray {
  /** A value that is outside the valid range of unicode code points. */
  public static final int INVALID_CODE_POINT = 0x11_0000;
  private static final char[] CR_AND_LF = {'\r', '\n'};
  private static final char[] LF = {'\n'};
  private static final ImmutableSet<Charset> SUPPORTED_CHARSETS_FOR_READLINE =
@ -239,30 +248,74 @@ public final class ParsableByteArray {
    return (data[position] & 0xFF);
  }
-  /**
+  /** Peeks at the next two bytes and interprets them as a big-endian char. */
   * Peeks at the next char.
   *
   * <p>Equivalent to passing {@link StandardCharsets#UTF_16} or {@link StandardCharsets#UTF_16BE}
   * to {@link #peekChar(Charset)}.
   */
  public char peekChar() {
-    return (char) ((data[position] & 0xFF) << 8 | (data[position + 1] & 0xFF));
+    return peekChar(BIG_ENDIAN, /* offset= */ 0);
  }
  /**
-   * Peeks at the next char (as decoded by {@code charset})
+   * @deprecated Either use {@link #peekChar()} to peek the next two bytes (big-endian) or {@link
-   *
+   *     #peekCodePoint(Charset)} to peek in a {@link Charset}-aware way.
   * <p>If {@code charset} is UTF-8, only single-byte characters are supported and this method
   * returns zero if {@link #position} is pointing to any part of a multi-byte character.
   *
   * @throws IllegalArgumentException if charset is not supported. Only US_ASCII, UTF-8, UTF-16,
   *     UTF-16BE, and UTF-16LE are supported.
   */
-  // TODO: b/398845842 - Make this work 'correctly' for multi-byte UTF-8, or deprecate it.
+  @Deprecated
  public char peekChar(Charset charset) {
    Assertions.checkArgument(
        SUPPORTED_CHARSETS_FOR_READLINE.contains(charset), "Unsupported charset: " + charset);
-    return (char) (peekCharacterAndSize(charset) >> Short.SIZE);
+    if (bytesLeft() < 1) {
      return 0;
    }
    if (charset.equals(StandardCharsets.US_ASCII)) {
      return (char) peekUnsignedByte();
    } else if (charset.equals(StandardCharsets.UTF_8)) {
      return (data[position] & 0x80) == 0 ? (char) peekUnsignedByte() : 0;
    } else {
      // UTF-16
      if (bytesLeft() < 2) {
        return 0;
      }
      ByteOrder byteOrder = charset.equals(StandardCharsets.UTF_16LE) ? LITTLE_ENDIAN : BIG_ENDIAN;
      return peekChar(byteOrder, /* offset= */ 0);
    }
  }
  /** Peek the UTF-16 char at {@link #position}{@code + offset}. */
  private char peekChar(ByteOrder byteOrder, int offset) {
    return byteOrder == BIG_ENDIAN
        ? Chars.fromBytes(data[position + offset], data[position + offset + 1])
        : Chars.fromBytes(data[position + offset + 1], data[position + offset]);
  }
  /**
   * Peeks at the code point starting at {@link #getPosition()} as interpreted by {@code charset}.
   *
   * <p>The exact behaviour depends on {@code charset}:
   *
   * <ul>
   *   <li>US_ASCII: Returns the byte at {@link #getPosition()} if it's valid ASCII (less than
   *       {@code 0x80}), otherwise returns {@link #INVALID_CODE_POINT}.
   *   <li>UTF-8: If {@link #getPosition()} is the start of a UTF-8 code unit the whole unit is
   *       decoded and returned. Otherwise {@link #INVALID_CODE_POINT} is returned.
   *   <li>UTF-16 (all endian-nesses):
   *       <ul>
   *         <li>If {@link #getPosition()} is at the start of a {@linkplain
   *             Character#isHighSurrogate(char) high surrogate} code unit and the following two
   *             bytes are a {@linkplain Character#isLowSurrogate(char)} low surrogate} code unit,
   *             the {@linkplain Character#toCodePoint(char, char) combined code point} is returned.
   *         <li>Otherwise the single code unit starting at {@link #getPosition()} is returned
   *             directly.
   *         <li>UTF-16 has no support for byte-level synchronization, so if {@link #getPosition()}
   *             is not aligned with the start of a UTF-16 code unit then the result is undefined.
   *       </ul>
   * </ul>
   *
   * @throws IllegalArgumentException if charset is not supported. Only US_ASCII, UTF-8, UTF-16,
   *     UTF-16BE, and UTF-16LE are supported.
   * @throws IndexOutOfBoundsException if {@link #bytesLeft()} doesn't allow reading the smallest
   *     code unit in {@code charset} (1 byte for ASCII and UTF-8, 2 bytes for UTF-16).
   */
  public int peekCodePoint(Charset charset) {
    int codePointAndSize = peekCodePointAndSize(charset);
    return codePointAndSize != 0 ? Ints.checkedCast(codePointAndSize >>> 8) : INVALID_CODE_POINT;
  }
  /** Reads the next byte as an unsigned value. */
@ -708,53 +761,145 @@ public final class ParsableByteArray {
   * without advancing {@link #position}. Returns {@code 0} if {@link #bytesLeft()} doesn't allow
   * reading a whole character in {@code charset}.
   *
-   * <p>Only supports characters in {@code chars} that occupy a single code unit (i.e. one byte for
+   * <p>Only supports characters in {@code chars} that are in the Basic Multilingual Plane (occupy a
-   * UTF-8 and two bytes for UTF-16).
+   * single char).
   */
  private char readCharacterIfInList(Charset charset, char[] chars) {
-    int characterAndSize = peekCharacterAndSize(charset);
+    if (bytesLeft() < getSmallestCodeUnitSize(charset)) {
      return 0;
    }
    int codePointAndSize = peekCodePointAndSize(charset);
    if (codePointAndSize == 0) {
      return 0;
    }
-    if (characterAndSize != 0 && Chars.contains(chars, (char) (characterAndSize >> Short.SIZE))) {
+    int codePoint = UnsignedInts.checkedCast(codePointAndSize >>> 8);
-      position += characterAndSize & 0xFFFF;
+    if (Character.isSupplementaryCodePoint(codePoint)) {
-      return (char) (characterAndSize >> Short.SIZE);
+      return 0;
    }
    char c = Chars.checkedCast(codePoint);
    if (Chars.contains(chars, c)) {
      position += Ints.checkedCast(codePointAndSize & 0xFF);
      return c;
    } else {
      return 0;
    }
  }
  /**
-   * Peeks at the character at {@link #position} (as decoded by {@code charset}), returns it and the
+   * Peeks at the code unit at {@link #position} (as decoded by {@code charset}), and the number of
-   * number of bytes the character takes up within the array packed into an int. First two bytes are
+   * bytes it occupies within {@link #data}.
   * the character and the second two is the size in bytes it takes. Returns 0 if {@link
   * #bytesLeft()} doesn't allow reading a whole character in {@code charset} or if the {@code
   * charset} is not one of US_ASCII, UTF-8, UTF-16, UTF-16BE, or UTF-16LE.
   *
-   * <p>Only supports characters that occupy a single code unit (i.e. one byte for UTF-8 and two
+   * <p>See {@link #peekCodePoint(Charset)} for detailed per-charset behaviour & edge cases.
-   * bytes for UTF-16).
+   *
   * @return The code point in the upper 24 bits, and the size in bytes in the lower 8 bits. Or zero
   *     if no valid code unit starts at {@link #position} and fits within {@link #bytesLeft()}.
   * @throws IndexOutOfBoundsException if {@link #bytesLeft()} doesn't allow reading the smallest
   *     code unit in {@code charset} (1 byte for ASCII and UTF-8, 2 bytes for UTF-16).
   * @throws IllegalArgumentException if charset is not supported. Only US_ASCII, UTF-8, UTF-16,
   *     UTF-16BE, and UTF-16LE are supported.
   */
-  private int peekCharacterAndSize(Charset charset) {
+  private int peekCodePointAndSize(Charset charset) {
-    byte charByte1;
+    Assertions.checkArgument(
-    byte charByte2;
+        SUPPORTED_CHARSETS_FOR_READLINE.contains(charset), "Unsupported charset: " + charset);
-    byte characterSize;
+    if (bytesLeft() < getSmallestCodeUnitSize(charset)) {
-    if (bytesLeft() >= 1
+      throw new IndexOutOfBoundsException("position=" + position + ", limit=" + limit);
-        && ((charset.equals(StandardCharsets.UTF_8) && (data[position] & 0x80) == 0)
+    }
-            || charset.equals(StandardCharsets.US_ASCII))) {
+    int codePoint;
-      // TODO: b/398845842 - Handle multi-byte UTF-8.
+    byte codePointSize;
-      charByte1 = 0;
+    if (charset.equals(StandardCharsets.US_ASCII)) {
-      charByte2 = data[position];
+      if ((data[position] & 0x80) != 0) {
      characterSize = 1;
    } else if (bytesLeft() >= 2
        && (charset.equals(StandardCharsets.UTF_16) || charset.equals(StandardCharsets.UTF_16BE))) {
      charByte1 = data[position];
      charByte2 = data[position + 1];
      characterSize = 2;
    } else if (bytesLeft() >= 2 && charset.equals(StandardCharsets.UTF_16LE)) {
      charByte1 = data[position + 1];
      charByte2 = data[position];
      characterSize = 2;
    } else {
        return 0;
      }
-    return Ints.fromBytes(charByte1, charByte2, (byte) 0, characterSize);
+      codePoint = UnsignedBytes.toInt(data[position]);
      codePointSize = 1;
    } else if (charset.equals(StandardCharsets.UTF_8)) {
      codePointSize = peekUtf8CodeUnitSize();
      switch (codePointSize) {
        case 1:
          codePoint = UnsignedBytes.toInt(data[position]);
          break;
        case 2:
          codePoint = decodeUtf8CodeUnit(0, 0, data[position], data[position + 1]);
          break;
        case 3:
          int firstByteWithoutStartCode = data[position] & 0xF;
          codePoint =
              decodeUtf8CodeUnit(
                  0, firstByteWithoutStartCode, data[position + 1], data[position + 2]);
          break;
        case 4:
          codePoint =
              decodeUtf8CodeUnit(
                  data[position], data[position + 1], data[position + 2], data[position + 3]);
          break;
        case 0:
        default:
          return 0;
      }
    } else {
      // UTF-16
      ByteOrder byteOrder = charset.equals(StandardCharsets.UTF_16LE) ? LITTLE_ENDIAN : BIG_ENDIAN;
      char c = peekChar(byteOrder, /* offset= */ 0);
      if (Character.isHighSurrogate(c) && bytesLeft() >= 4) {
        char lowSurrogate = peekChar(byteOrder, /* offset= */ 2);
        codePoint = Character.toCodePoint(c, lowSurrogate);
        codePointSize = 4;
      } else {
        // This is either a BMP code point, an unpaired surrogate, or position is in the middle of
        // a matching surrogate pair.
        codePoint = c;
        codePointSize = 2;
      }
    }
    return (codePoint << 8) | codePointSize;
  }
  private static int getSmallestCodeUnitSize(Charset charset) {
    Assertions.checkArgument(
        SUPPORTED_CHARSETS_FOR_READLINE.contains(charset), "Unsupported charset: " + charset);
    return charset.equals(StandardCharsets.UTF_8) || charset.equals(StandardCharsets.US_ASCII)
        ? 1
        : 2;
  }
  /**
   * Returns the size (in bytes) of the UTF-8 code unit starting at {@link #position}. Returns zero
   * if no full UTF-8 code unit seems to start at {@link #position}.
   */
  private byte peekUtf8CodeUnitSize() {
    if ((data[position] & 0x80) == 0) {
      return 1;
    } else if ((data[position] & 0xE0) == 0xC0
        && bytesLeft() >= 2
        && isUtf8ContinuationByte(data[position + 1])) {
      return 2;
    } else if ((data[position] & 0xF0) == 0xE0
        && bytesLeft() >= 3
        && isUtf8ContinuationByte(data[position + 1])
        && isUtf8ContinuationByte(data[position + 2])) {
      return 3;
    } else if ((data[position] & 0xF8) == 0xF0
        && bytesLeft() >= 4
        && isUtf8ContinuationByte(data[position + 1])
        && isUtf8ContinuationByte(data[position + 2])
        && isUtf8ContinuationByte(data[position + 3])) {
      return 4;
    } else {
      // We found a pattern that doesn't seem to be valid UTF-8.
      return 0;
    }
  }
  private static boolean isUtf8ContinuationByte(byte b) {
    return (b & 0xC0) == 0x80;
  }
  private static int decodeUtf8CodeUnit(int b1, int b2, int b3, int b4) {
    return Ints.fromBytes(
        (byte) 0,
        UnsignedBytes.checkedCast(((b1 & 0x7) << 2) | (b2 & 0b0011_0000) >> 4),
        UnsignedBytes.checkedCast(((byte) b2 & 0xF) << 4 | ((byte) b3 & 0b0011_1100) >> 2),
        UnsignedBytes.checkedCast(((byte) b3 & 0x3) << 6 | ((byte) b4 & 0x3F)));
  }
 }
--- a/libraries/common/src/test/java/androidx/media3/common/util/ParsableByteArrayTest.java
+++ b/libraries/common/src/test/java/androidx/media3/common/util/ParsableByteArrayTest.java
@ -998,6 +998,257 @@ public final class ParsableByteArrayTest {
    assertThat(parser.readLine(UTF_16LE)).isNull();
  }
  @Test
  public void peekCodePoint_ascii() {
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(US_ASCII));
    assertThat(parser.peekCodePoint(US_ASCII)).isEqualTo((int) 'f');
  }
  @Test
  public void peekCodePoint_ascii_invalid() {
    // Choose é from ISO 8859-1 which is not valid 7-bit ASCII (since it has a high MSB).
    ParsableByteArray parser = new ParsableByteArray(TestUtil.createByteArray(0xE9));
    assertThat(parser.peekCodePoint(US_ASCII)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT);
  }
  @Test
  public void peekCodePoint_ascii_atLimit_throwsException() {
    // Set the limit before the end of the byte array.
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(US_ASCII), /* limit= */ 2);
    parser.setPosition(2);
    IndexOutOfBoundsException e =
        assertThrows(IndexOutOfBoundsException.class, () -> parser.peekCodePoint(US_ASCII));
    assertThat(e).hasMessageThat().contains("position=2");
    assertThat(e).hasMessageThat().contains("limit=2");
  }
  @Test
  public void peekCodePoint_utf8() {
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_8));
    assertThat(parser.peekCodePoint(UTF_8)).isEqualTo((int) 'f');
  }
  @Test
  public void peekCodePoint_utf8_twoByteCharacter() {
    ParsableByteArray parser = new ParsableByteArray("étude".getBytes(UTF_8));
    assertThat(parser.peekCodePoint(UTF_8)).isEqualTo((int) 'é');
  }
  @Test
  public void peekCodePoint_utf8_twoByteCharacter_misaligned() {
    ParsableByteArray parser = new ParsableByteArray("étude".getBytes(UTF_8));
    parser.setPosition(1);
    assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT);
  }
  @Test
  public void peekCodePoint_utf8_threeByteCharacter() {
    ParsableByteArray parser = new ParsableByteArray("ऊ".getBytes(UTF_8));
    assertThat(parser.peekCodePoint(UTF_8)).isEqualTo((int) 'ऊ');
  }
  @Test
  public void peekCodePoint_utf8_threeByteCharacter_misaligned() {
    ParsableByteArray parser = new ParsableByteArray("ऊ".getBytes(UTF_8));
    parser.setPosition(1);
    assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT);
  }
  @Test
  public void peekCodePoint_utf8_fourByteCharacter() {
    ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_8));
    assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(Character.codePointAt("\uD83D\uDE1B", 0));
  }
  @Test
  public void peekCodePoint_utf8_fourByteCharacter_misaligned() {
    ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_8));
    parser.setPosition(1);
    assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT);
  }
  @Test
  public void peekCodePoint_utf8_atLimit_throwsException() {
    // Set the limit before the end of the byte array.
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_8), /* limit= */ 2);
    parser.setPosition(2);
    IndexOutOfBoundsException e =
        assertThrows(IndexOutOfBoundsException.class, () -> parser.peekCodePoint(UTF_8));
    assertThat(e).hasMessageThat().contains("position=2");
    assertThat(e).hasMessageThat().contains("limit=2");
  }
  @Test
  public void peekCodePoint_utf8_invalidByteSequence() {
    // 2-byte start character not followed by anything.
    ParsableByteArray parser = new ParsableByteArray(TestUtil.createByteArray(0xC1));
    assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT);
    // 2-byte character truncated by limit.
    parser = new ParsableByteArray("é".getBytes(UTF_8), /* limit= */ 1);
    assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT);
    // 2-byte start character not followed by a continuation byte.
    parser = new ParsableByteArray(TestUtil.createByteArray(0xC1, 'a'));
    assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT);
    // 3-byte start character followed by only one byte.
    parser = new ParsableByteArray(TestUtil.createByteArray(0xE1, 0x81));
    assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT);
    // 3-byte character truncated by limit.
    parser = new ParsableByteArray("ऊ".getBytes(UTF_8), /* limit= */ 2);
    assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT);
    // 3-byte start character followed by only one continuation byte.
    parser = new ParsableByteArray(TestUtil.createByteArray(0xE1, 0x81, 'a'));
    assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT);
    // 4-byte start character followed by only two bytes.
    parser = new ParsableByteArray(TestUtil.createByteArray(0xF1, 0x81, 0x81));
    assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT);
    // 4-byte character truncated by limit.
    parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_8), /* limit= */ 3);
    assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT);
    // 4-byte start character followed by only two continuation bytes.
    parser = new ParsableByteArray(TestUtil.createByteArray(0xF1, 0x81, 0x81, 'a'));
    assertThat(parser.peekCodePoint(UTF_8)).isEqualTo(ParsableByteArray.INVALID_CODE_POINT);
  }
  @Test
  public void peekCodePoint_utf16() {
    // Use UTF_16BE to avoid encoding a BOM.
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16BE));
    int expectedCodePoint = 'f';
    assertThat(parser.peekCodePoint(UTF_16)).isEqualTo(expectedCodePoint);
    assertThat(parser.peekCodePoint(UTF_16BE)).isEqualTo(expectedCodePoint);
  }
  @Test
  public void peekCodePoint_utf16_basicMultilingualPlane() {
    // Use UTF_16BE to avoid encoding a BOM.
    ParsableByteArray parser = new ParsableByteArray("étude".getBytes(UTF_16BE));
    int expectedCodePoint = 'é';
    assertThat(parser.peekCodePoint(UTF_16)).isEqualTo(expectedCodePoint);
    assertThat(parser.peekCodePoint(UTF_16BE)).isEqualTo(expectedCodePoint);
  }
  @Test
  public void peekCodePoint_utf16_surrogatePair() {
    // Use UTF_16BE to avoid encoding a BOM.
    ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16BE));
    int expectedCodePoint = Character.codePointAt("\uD83D\uDE1B", 0);
    assertThat(parser.peekCodePoint(UTF_16)).isEqualTo(expectedCodePoint);
    assertThat(parser.peekCodePoint(UTF_16BE)).isEqualTo(expectedCodePoint);
  }
  @Test
  public void peekCodePoint_utf16_splitSurrogatePair_returnsLowSurrogate() {
    // Use UTF_16BE to avoid encoding a BOM.
    ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16BE));
    parser.skipBytes(2);
    int expectedCodePoint = 0xDE1B;
    assertThat(parser.peekCodePoint(UTF_16)).isEqualTo(expectedCodePoint);
    assertThat(parser.peekCodePoint(UTF_16BE)).isEqualTo(expectedCodePoint);
  }
  @Test
  public void peekCodePoint_utf16_misaligned_returnsGarbage() {
    // Use UTF_16BE to avoid encoding a BOM.
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16BE));
    // Move the position so we are reading the second byte of 'f' and the first byte of 'o'.
    parser.setPosition(1);
    int expectedCodePoint = '昀';
    assertThat(parser.peekCodePoint(UTF_16)).isEqualTo(expectedCodePoint);
    assertThat(parser.peekCodePoint(UTF_16BE)).isEqualTo(expectedCodePoint);
  }
  @Test
  public void peekCodePoint_utf16_atLimit_throwsException() {
    // Use UTF_16BE to avoid encoding a BOM. Set the limit before the end of the byte array.
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16BE), /* limit= */ 2);
    // Only one readable byte, not enough for a UTF-16 code unit.
    parser.setPosition(1);
    IndexOutOfBoundsException e1 =
        assertThrows(IndexOutOfBoundsException.class, () -> parser.peekCodePoint(UTF_16));
    assertThat(e1).hasMessageThat().contains("position=1");
    assertThat(e1).hasMessageThat().contains("limit=2");
    IndexOutOfBoundsException e2 =
        assertThrows(IndexOutOfBoundsException.class, () -> parser.peekCodePoint(UTF_16BE));
    assertThat(e2).hasMessageThat().contains("position=1");
    assertThat(e2).hasMessageThat().contains("limit=2");
  }
  @Test
  public void peekCodePoint_utf16le() {
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16LE));
    assertThat(parser.peekCodePoint(UTF_16LE)).isEqualTo((int) 'f');
  }
  @Test
  public void peekCodePoint_utf16le_basicMultilingualPlane() {
    ParsableByteArray parser = new ParsableByteArray("étude".getBytes(UTF_16LE));
    assertThat(parser.peekCodePoint(UTF_16LE)).isEqualTo((int) 'é');
  }
  @Test
  public void peekCodePoint_utf16le_surrogatePair() {
    ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16LE));
    assertThat(parser.peekCodePoint(UTF_16LE)).isEqualTo(Character.codePointAt("\uD83D\uDE1B", 0));
  }
  @Test
  public void peekCodePoint_utf16le_splitSurrogatePair_returnsLowSurrogate() {
    ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16LE));
    parser.skipBytes(2);
    assertThat(parser.peekCodePoint(UTF_16LE)).isEqualTo(0xDE1B);
  }
  @Test
  public void peekCodePoint_utf16le_misaligned_returnsGarbage() {
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16LE));
    // Move the position so we are reading the second byte of 'f' and the first byte of 'o'.
    parser.setPosition(1);
    assertThat(parser.peekCodePoint(UTF_16LE)).isEqualTo((int) '漀');
  }
  @Test
  public void peekCodePoint_utf16le_atLimit_throwsException() {
    // Set the limit before the end of the byte array.
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16LE), /* limit= */ 2);
    // Only one readable byte, not enough for a UTF-16 code unit.
    parser.setPosition(1);
    IndexOutOfBoundsException e =
        assertThrows(IndexOutOfBoundsException.class, () -> parser.peekCodePoint(UTF_16LE));
    assertThat(e).hasMessageThat().contains("position=1");
    assertThat(e).hasMessageThat().contains("limit=2");
  }
  @Test
  public void peekChar() {
    // Use UTF_16BE to avoid encoding a BOM.
@ -1045,6 +1296,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_ascii() {
    byte[] bytes = "foo".getBytes(US_ASCII);
    ParsableByteArray parser = new ParsableByteArray(bytes);
@ -1053,6 +1305,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_ascii_invalid_returns8BitCharacterAnyway() {
    // Choose é from ISO 8859-1 which is not valid 7-bit ASCII (since it has a high MSB).
    ParsableByteArray parser = new ParsableByteArray(TestUtil.createByteArray(0xE9));
@ -1061,6 +1314,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_ascii_atLimit_throwsException() {
    // Set the limit before the end of the byte array.
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(US_ASCII), /* limit= */ 2);
@ -1071,6 +1325,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_utf8_oneByteCharacter() {
    byte[] bytes = "foo".getBytes(UTF_8);
    ParsableByteArray parser = new ParsableByteArray(bytes);
@ -1079,6 +1334,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_utf8_twoByteCharacter_returnsZero() {
    byte[] bytes = "étude".getBytes(UTF_8);
    ParsableByteArray parser = new ParsableByteArray(bytes);
@ -1088,6 +1344,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_utf8_threeByteCharacter_returnsZero() {
    ParsableByteArray parser = new ParsableByteArray("ऊ".getBytes(UTF_8));
@ -1096,6 +1353,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_utf8_fourByteCharacter_returnsZero() {
    byte[] bytes = "\uD83D\uDE1B".getBytes(UTF_8);
    ParsableByteArray parser = new ParsableByteArray(bytes);
@ -1105,6 +1363,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_utf8_splitFourByteChar_returnsZero() {
    byte[] bytes = "\uD83D\uDE1B".getBytes(UTF_8);
    ParsableByteArray parser = new ParsableByteArray(bytes);
@ -1115,6 +1374,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_utf8_atLimit_returnsZero() {
    // Set the limit before the end of the byte array.
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_8), /* limit= */ 2);
@ -1125,6 +1385,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_utf8_invalidByteSequence() {
    // 2-byte start character not followed by anything.
    ParsableByteArray parser = new ParsableByteArray(TestUtil.createByteArray(0xC1));
@ -1164,6 +1425,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_utf16() {
    // Use UTF_16BE to avoid encoding a BOM.
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16BE));
@ -1174,6 +1436,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_utf16_basicMultilingualPlane() {
    // Use UTF_16BE to avoid encoding a BOM.
    ParsableByteArray parser = new ParsableByteArray("étude".getBytes(UTF_16BE));
@ -1184,6 +1447,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_utf16_surrogatePair_returnsHighSurrogate() {
    // Use UTF_16BE to avoid encoding a BOM.
    ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16BE));
@ -1195,6 +1459,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_utf16_splitSurrogatePair_returnsLowSurrogate() {
    // Use UTF_16BE to avoid encoding a BOM.
    ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16BE));
@ -1207,6 +1472,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_utf16_misaligned_returnsGarbage() {
    // Use UTF_16BE to avoid encoding a BOM.
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16BE));
@ -1219,6 +1485,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_utf16_atLimit_returnsZero() {
    // Use UTF_16BE to avoid encoding a BOM. Set the limit before the end of the byte array.
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16BE), /* limit= */ 2);
@ -1232,6 +1499,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_utf16le() {
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16LE));
@ -1239,6 +1507,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_utf16le_basicMultilingualPlane() {
    ParsableByteArray parser = new ParsableByteArray("étude".getBytes(UTF_16LE));
@ -1246,6 +1515,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_utf16le_surrogatePair_returnsHighSurrogate() {
    ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16LE));
@ -1254,6 +1524,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_utf16le_splitSurrogatePair_returnsLowSurrogate() {
    ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16LE));
    parser.skipBytes(2);
@ -1263,6 +1534,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_utf16le_misaligned_returnsGarbage() {
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16LE));
    // Move the position so we are reading the second byte of 'f' and the first byte of 'o'.
@ -1272,6 +1544,7 @@ public final class ParsableByteArrayTest {
  }
  @Test
  @SuppressWarnings("deprecation") // Testing deprecated method
  public void peekChar_utf16le_atLimit_returnsZero() {
    // Set the limit before the end of the byte array.
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16LE), /* limit= */ 2);
--- a/libraries/extractor/src/main/java/androidx/media3/extractor/text/ssa/SsaParser.java
+++ b/libraries/extractor/src/main/java/androidx/media3/extractor/text/ssa/SsaParser.java
@ -227,7 +227,7 @@ public final class SsaParser implements SubtitleParser {
  private void parseScriptInfo(ParsableByteArray data, Charset charset) {
    @Nullable String currentLine;
    while ((currentLine = data.readLine(charset)) != null
-        && (data.bytesLeft() == 0 || data.peekChar(charset) != '[')) {
+        && (data.bytesLeft() == 0 || data.peekCodePoint(charset) != '[')) {
      String[] infoNameAndValue = currentLine.split(":");
      if (infoNameAndValue.length != 2) {
        continue;
@ -266,7 +266,7 @@ public final class SsaParser implements SubtitleParser {
    @Nullable SsaStyle.Format formatInfo = null;
    @Nullable String currentLine;
    while ((currentLine = data.readLine(charset)) != null
-        && (data.bytesLeft() == 0 || data.peekChar(charset) != '[')) {
+        && (data.bytesLeft() == 0 || data.peekCodePoint(charset) != '[')) {
      if (currentLine.startsWith(FORMAT_LINE_PREFIX)) {
        formatInfo = SsaStyle.Format.fromFormatLine(currentLine);
      } else if (currentLine.startsWith(STYLE_LINE_PREFIX)) {