Add tests for ParsableByteArray.peekChar

This will help to ensure I don't change the behaviour when I add `peekCodePoint` in a follow-up change. PiperOrigin-RevId: 733139422
2025-04-30 06:46:50 +08:00 · 2025-03-03 18:56:52 -08:00 · 2025-03-03 18:56:52 -08:00 · a92cf5311f
commit a92cf5311f
parent cdb112a85a
1 changed files with 285 additions and 0 deletions
--- a/libraries/common/src/test/java/androidx/media3/common/util/ParsableByteArrayTest.java
+++ b/libraries/common/src/test/java/androidx/media3/common/util/ParsableByteArrayTest.java
@ -26,6 +26,7 @@ import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.junit.Assert.assertThrows;
 import static org.junit.Assert.fail;
 import androidx.media3.test.utils.TestUtil;
 import androidx.test.ext.junit.runners.AndroidJUnit4;
 import com.google.common.primitives.Bytes;
 import java.nio.ByteBuffer;
@ -945,6 +946,290 @@ public final class ParsableByteArrayTest {
    assertThat(parser.readLine(UTF_16LE)).isNull();
  }
  @Test
  public void peekChar() {
    // Use UTF_16BE to avoid encoding a BOM.
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16BE));
    assertThat(parser.peekChar()).isEqualTo('f');
  }
  @Test
  public void peekChar_returnsHighSurrogateFromPair() {
    // Use UTF_16BE to avoid encoding a BOM.
    ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16BE));
    // Compare ints instead of chars for unprintable characters, so the failure messages are better.
    assertThat((int) parser.peekChar()).isEqualTo(0xD83D);
  }
  @Test
  public void peekChar_returnsLowSurrogateFromPair() {
    // Use UTF_16BE to avoid encoding a BOM.
    ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16BE));
    parser.setPosition(2);
    // Compare ints instead of chars for unprintable characters, so the failure messages are better.
    assertThat((int) parser.peekChar()).isEqualTo(0xDE1B);
  }
  @Test
  public void peekChar_splitSurrogate() {
    // Encode only a low surrogate char
    ParsableByteArray parser = new ParsableByteArray(TestUtil.createByteArray(0xDE, 0x1B));
    // Compare ints instead of chars for unprintable characters, so the failure messages are better.
    assertThat((int) parser.peekChar()).isEqualTo(0xDE1B);
  }
  @Test
  public void peekChar_misaligned_returnsGarbage() {
    // Use UTF_16BE to avoid encoding a BOM.
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16BE));
    // Move the position so we are reading the second byte of 'f' and the first byte of 'o'.
    parser.setPosition(1);
    assertThat(parser.peekChar()).isEqualTo('昀');
  }
  @Test
  public void peekChar_ascii() {
    byte[] bytes = "foo".getBytes(US_ASCII);
    ParsableByteArray parser = new ParsableByteArray(bytes);
    assertThat(parser.peekChar(US_ASCII)).isEqualTo('f');
  }
  @Test
  public void peekChar_ascii_invalid_returns8BitCharacterAnyway() {
    // Choose é from ISO 8859-1 which is not valid 7-bit ASCII (since it has a high MSB).
    ParsableByteArray parser = new ParsableByteArray(TestUtil.createByteArray(0xE9));
    assertThat(parser.peekChar(US_ASCII)).isEqualTo('é');
  }
  @Test
  public void peekChar_ascii_atLimit_throwsException() {
    // Set the limit before the end of the byte array.
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(US_ASCII), /* limit= */ 2);
    parser.setPosition(2);
    // Compare ints instead of chars for unprintable characters, so the failure messages are better.
    assertThat((int) parser.peekChar(US_ASCII)).isEqualTo(0);
  }
  @Test
  public void peekChar_utf8_oneByteCharacter() {
    byte[] bytes = "foo".getBytes(UTF_8);
    ParsableByteArray parser = new ParsableByteArray(bytes);
    assertThat(parser.peekChar(UTF_8)).isEqualTo('f');
  }
  @Test
  public void peekChar_utf8_twoByteCharacter_returnsZero() {
    byte[] bytes = "étude".getBytes(UTF_8);
    ParsableByteArray parser = new ParsableByteArray(bytes);
    // Compare ints instead of chars for unprintable characters, so the failure messages are better.
    assertThat((int) parser.peekChar(UTF_8)).isEqualTo(0);
  }
  @Test
  public void peekChar_utf8_threeByteCharacter_returnsZero() {
    ParsableByteArray parser = new ParsableByteArray("ऊ".getBytes(UTF_8));
    // Compare ints instead of chars for unprintable characters, so the failure messages are better.
    assertThat(parser.peekChar(UTF_8)).isEqualTo(0);
  }
  @Test
  public void peekChar_utf8_fourByteCharacter_returnsZero() {
    byte[] bytes = "\uD83D\uDE1B".getBytes(UTF_8);
    ParsableByteArray parser = new ParsableByteArray(bytes);
    // Compare ints instead of chars for unprintable characters, so the failure messages are better.
    assertThat((int) parser.peekChar(UTF_8)).isEqualTo(0);
  }
  @Test
  public void peekChar_utf8_splitFourByteChar_returnsZero() {
    byte[] bytes = "\uD83D\uDE1B".getBytes(UTF_8);
    ParsableByteArray parser = new ParsableByteArray(bytes);
    parser.skipBytes(2);
    // Compare ints instead of chars for unprintable characters, so the failure messages are better.
    assertThat((int) parser.peekChar(UTF_8)).isEqualTo(0);
  }
  @Test
  public void peekChar_utf8_atLimit_returnsZero() {
    // Set the limit before the end of the byte array.
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_8), /* limit= */ 2);
    parser.setPosition(2);
    // Compare ints instead of chars for unprintable characters, so the failure messages are better.
    assertThat((int) parser.peekChar(UTF_8)).isEqualTo(0);
  }
  @Test
  public void peekChar_utf8_invalidByteSequence() {
    // 2-byte start character not followed by anything.
    ParsableByteArray parser = new ParsableByteArray(TestUtil.createByteArray(0xC1));
    assertThat(parser.peekChar(UTF_8)).isEqualTo(0);
    // 2-byte character truncated by limit.
    parser = new ParsableByteArray("é".getBytes(UTF_8), /* limit= */ 1);
    assertThat(parser.peekChar(UTF_8)).isEqualTo(0);
    // 2-byte start character not followed by a continuation byte.
    parser = new ParsableByteArray(TestUtil.createByteArray(0xC1, 'a'));
    assertThat(parser.peekChar(UTF_8)).isEqualTo(0);
    // 3-byte start character followed by only one byte.
    parser = new ParsableByteArray(TestUtil.createByteArray(0xE1, 0x81));
    assertThat(parser.peekChar(UTF_8)).isEqualTo(0);
    // 3-byte character truncated by limit.
    parser = new ParsableByteArray("ऊ".getBytes(UTF_8), /* limit= */ 2);
    assertThat(parser.peekChar(UTF_8)).isEqualTo(0);
    // 3-byte start character followed by only one continuation byte.
    parser = new ParsableByteArray(TestUtil.createByteArray(0xE1, 0x81, 'a'));
    assertThat(parser.peekChar(UTF_8)).isEqualTo(0);
    // 4-byte start character followed by only two bytes.
    parser = new ParsableByteArray(TestUtil.createByteArray(0xF1, 0x81, 0x81));
    assertThat(parser.peekChar(UTF_8)).isEqualTo(0);
    // 4-byte character truncated by limit.
    parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_8), /* limit= */ 3);
    assertThat(parser.peekChar(UTF_8)).isEqualTo(0);
    // 4-byte start character followed by only two continuation bytes.
    parser = new ParsableByteArray(TestUtil.createByteArray(0xF1, 0x81, 0x81, 'a'));
    assertThat(parser.peekChar(UTF_8)).isEqualTo(0);
  }
  @Test
  public void peekChar_utf16() {
    // Use UTF_16BE to avoid encoding a BOM.
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16BE));
    char expectedChar = 'f';
    assertThat(parser.peekChar(UTF_16)).isEqualTo(expectedChar);
    assertThat(parser.peekChar(UTF_16BE)).isEqualTo(expectedChar);
  }
  @Test
  public void peekChar_utf16_basicMultilingualPlane() {
    // Use UTF_16BE to avoid encoding a BOM.
    ParsableByteArray parser = new ParsableByteArray("étude".getBytes(UTF_16BE));
    char expectedChar = 'é';
    assertThat(parser.peekChar(UTF_16)).isEqualTo(expectedChar);
    assertThat(parser.peekChar(UTF_16BE)).isEqualTo(expectedChar);
  }
  @Test
  public void peekChar_utf16_surrogatePair_returnsHighSurrogate() {
    // Use UTF_16BE to avoid encoding a BOM.
    ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16BE));
    // Compare ints instead of chars for unprintable characters, so the failure messages are better.
    int expectedChar = 0xD83D;
    assertThat((int) parser.peekChar(UTF_16)).isEqualTo(expectedChar);
    assertThat((int) parser.peekChar(UTF_16BE)).isEqualTo(expectedChar);
  }
  @Test
  public void peekChar_utf16_splitSurrogatePair_returnsLowSurrogate() {
    // Use UTF_16BE to avoid encoding a BOM.
    ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16BE));
    parser.skipBytes(2);
    // Compare ints instead of chars for unprintable characters, so the failure messages are better.
    int expectedChar = 0xDE1B;
    assertThat((int) parser.peekChar(UTF_16)).isEqualTo(expectedChar);
    assertThat((int) parser.peekChar(UTF_16BE)).isEqualTo(expectedChar);
  }
  @Test
  public void peekChar_utf16_misaligned_returnsGarbage() {
    // Use UTF_16BE to avoid encoding a BOM.
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16BE));
    // Move the position so we are reading the second byte of 'f' and the first byte of 'o'.
    parser.setPosition(1);
    char expectedChar = '昀';
    assertThat(parser.peekChar(UTF_16)).isEqualTo(expectedChar);
    assertThat(parser.peekChar(UTF_16BE)).isEqualTo(expectedChar);
  }
  @Test
  public void peekChar_utf16_atLimit_returnsZero() {
    // Use UTF_16BE to avoid encoding a BOM. Set the limit before the end of the byte array.
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16BE), /* limit= */ 2);
    // Only one readable byte, not enough for a UTF-16 code unit.
    parser.setPosition(1);
    // Compare ints instead of chars for unprintable characters, so the failure messages are better.
    int expectedChar = 0;
    assertThat((int) parser.peekChar(UTF_16)).isEqualTo(expectedChar);
    assertThat((int) parser.peekChar(UTF_16BE)).isEqualTo(expectedChar);
  }
  @Test
  public void peekChar_utf16le() {
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16LE));
    assertThat(parser.peekChar(UTF_16LE)).isEqualTo('f');
  }
  @Test
  public void peekChar_utf16le_basicMultilingualPlane() {
    ParsableByteArray parser = new ParsableByteArray("étude".getBytes(UTF_16LE));
    assertThat(parser.peekChar(UTF_16LE)).isEqualTo('é');
  }
  @Test
  public void peekChar_utf16le_surrogatePair_returnsHighSurrogate() {
    ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16LE));
    // Compare ints instead of chars for unprintable characters, so the failure messages are better.
    assertThat((int) parser.peekChar(UTF_16LE)).isEqualTo(0xD83D);
  }
  @Test
  public void peekChar_utf16le_splitSurrogatePair_returnsLowSurrogate() {
    ParsableByteArray parser = new ParsableByteArray("\uD83D\uDE1B".getBytes(UTF_16LE));
    parser.skipBytes(2);
    // Compare ints instead of chars for unprintable characters, so the failure messages are better.
    assertThat((int) parser.peekChar(UTF_16LE)).isEqualTo(0xDE1B);
  }
  @Test
  public void peekChar_utf16le_misaligned_returnsGarbage() {
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16LE));
    // Move the position so we are reading the second byte of 'f' and the first byte of 'o'.
    parser.setPosition(1);
    assertThat(parser.peekChar(UTF_16LE)).isEqualTo('漀');
  }
  @Test
  public void peekChar_utf16le_atLimit_returnsZero() {
    // Set the limit before the end of the byte array.
    ParsableByteArray parser = new ParsableByteArray("foo".getBytes(UTF_16LE), /* limit= */ 2);
    // Only one readable byte, not enough for a UTF-16 code unit.
    parser.setPosition(1);
    // Compare ints instead of chars for unprintable characters, so the failure messages are better.
    assertThat((int) parser.peekChar(UTF_16LE)).isEqualTo(0);
  }
  @Test
  public void readUnsignedLeb128ToLong() {
    byte[] bytes = new byte[] {(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x0F};