mirror of
https://github.com/androidx/media.git
synced 2025-04-30 06:46:50 +08:00
Fix casting in ParsableByteArray.peekCharacterAndSize
This was introduced in 841bdc6efe
.
There's no need to cast the `char` (2 bytes) down to a `byte` in order
to pack it into an `int` (4 bytes) alongside a short (2 bytes).
We also don't need to use `short` to track the character count (max 4
with UTF-8) - a single byte is enough. This change also uses
`Ints.fromBytes` to avoid having to reason about how casting &
bit-shifting interact.
This change introduces a test which reproduces the failure reported in
Issue: androidx/media#2167.
#cherrypick
PiperOrigin-RevId: 730809219
This commit is contained in:
parent
3f493eaf9e
commit
fe19d8c9be
@ -17,6 +17,8 @@
|
||||
* Audio:
|
||||
* Video:
|
||||
* Text:
|
||||
* Fix handling of multi-byte UTF-8 characters in WebVTT files using CR
|
||||
line endings ([#2167](https://github.com/androidx/media/issues/2167)).
|
||||
* Metadata:
|
||||
* Image:
|
||||
* DataSource:
|
||||
|
@ -19,7 +19,6 @@ import androidx.annotation.Nullable;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.primitives.Chars;
|
||||
import com.google.common.primitives.Ints;
|
||||
import com.google.common.primitives.UnsignedBytes;
|
||||
import com.google.errorprone.annotations.CheckReturnValue;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
@ -721,8 +720,8 @@ public final class ParsableByteArray {
|
||||
|
||||
/**
|
||||
* Peeks at the character at {@link #position} (as decoded by {@code charset}), returns it and the
|
||||
* number of bytes the character takes up within the array packed into an int. First four bytes
|
||||
* are the character and the second four is the size in bytes it takes. Returns 0 if {@link
|
||||
* number of bytes the character takes up within the array packed into an int. First two bytes are
|
||||
* the character and the second two is the size in bytes it takes. Returns 0 if {@link
|
||||
* #bytesLeft()} doesn't allow reading a whole character in {@code charset} or if the {@code
|
||||
* charset} is not one of US_ASCII, UTF-8, UTF-16, UTF-16BE, or UTF-16LE.
|
||||
*
|
||||
@ -730,23 +729,27 @@ public final class ParsableByteArray {
|
||||
* bytes for UTF-16).
|
||||
*/
|
||||
private int peekCharacterAndSize(Charset charset) {
|
||||
byte character;
|
||||
short characterSize;
|
||||
byte charByte1;
|
||||
byte charByte2;
|
||||
byte characterSize;
|
||||
if ((charset.equals(StandardCharsets.UTF_8) || charset.equals(StandardCharsets.US_ASCII))
|
||||
&& bytesLeft() >= 1) {
|
||||
character = (byte) Chars.checkedCast(UnsignedBytes.toInt(data[position]));
|
||||
charByte1 = 0;
|
||||
charByte2 = data[position];
|
||||
characterSize = 1;
|
||||
} else if ((charset.equals(StandardCharsets.UTF_16)
|
||||
|| charset.equals(StandardCharsets.UTF_16BE))
|
||||
&& bytesLeft() >= 2) {
|
||||
character = (byte) Chars.fromBytes(data[position], data[position + 1]);
|
||||
charByte1 = data[position];
|
||||
charByte2 = data[position + 1];
|
||||
characterSize = 2;
|
||||
} else if (charset.equals(StandardCharsets.UTF_16LE) && bytesLeft() >= 2) {
|
||||
character = (byte) Chars.fromBytes(data[position + 1], data[position]);
|
||||
charByte1 = data[position + 1];
|
||||
charByte2 = data[position];
|
||||
characterSize = 2;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
return (Chars.checkedCast(character) << Short.SIZE) + characterSize;
|
||||
return Ints.fromBytes(charByte1, charByte2, (byte) 0, characterSize);
|
||||
}
|
||||
}
|
||||
|
@ -679,6 +679,31 @@ public final class ParsableByteArrayTest {
|
||||
assertThat(parser.readLine()).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readTwoLinesWithCr_utf8() {
|
||||
byte[] bytes = "foo\rbar".getBytes(StandardCharsets.UTF_8);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine()).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(4);
|
||||
assertThat(parser.readLine()).isEqualTo("bar");
|
||||
assertThat(parser.getPosition()).isEqualTo(7);
|
||||
assertThat(parser.readLine()).isNull();
|
||||
}
|
||||
|
||||
// https://github.com/androidx/media/issues/2167
|
||||
@Test
|
||||
public void readTwoLinesWithCrAndWideChar_utf8() {
|
||||
byte[] bytes = "foo\r\uD83D\uDE1B".getBytes(StandardCharsets.UTF_8);
|
||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||
|
||||
assertThat(parser.readLine()).isEqualTo("foo");
|
||||
assertThat(parser.getPosition()).isEqualTo(4);
|
||||
assertThat(parser.readLine()).isEqualTo("\uD83D\uDE1B");
|
||||
assertThat(parser.getPosition()).isEqualTo(8);
|
||||
assertThat(parser.readLine()).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readTwoLinesWithCrFollowedByLf_utf8() {
|
||||
byte[] bytes = "foo\r\nbar".getBytes(StandardCharsets.UTF_8);
|
||||
|
@ -56,6 +56,8 @@ public class WebvttParserTest {
|
||||
"media/webvtt/with_overlapping_timestamps";
|
||||
private static final String WITH_VERTICAL_FILE = "media/webvtt/with_vertical";
|
||||
private static final String WITH_RUBIES_FILE = "media/webvtt/with_rubies";
|
||||
private static final String WITH_WIDE_CHARS_AND_CR_ENDINGS_FILE =
|
||||
"media/webvtt/with_wide_unicode_chars_and_cr_endings";
|
||||
private static final String WITH_BAD_CUE_HEADER_FILE = "media/webvtt/with_bad_cue_header";
|
||||
private static final String WITH_TAGS_FILE = "media/webvtt/with_tags";
|
||||
private static final String WITH_CSS_STYLES = "media/webvtt/with_css_styles";
|
||||
@ -497,6 +499,17 @@ public class WebvttParserTest {
|
||||
assertThat((Spanned) fourthCue.text).hasNoSpans();
|
||||
}
|
||||
|
||||
// https://github.com/androidx/media/issues/2167
|
||||
@Test
|
||||
public void parseWithWideUnicodeCharsAndCrLineEndings() throws Exception {
|
||||
ImmutableList<CuesWithTiming> allCues =
|
||||
getCuesForTestAsset(WITH_WIDE_CHARS_AND_CR_ENDINGS_FILE);
|
||||
|
||||
// WebvttCueParser normalizes all line endings within cue text to \n.
|
||||
assertThat(Iterables.getOnlyElement(Iterables.getOnlyElement(allCues).cues).text.toString())
|
||||
.isEqualTo("\uD83D\uDE1B\n\uD83D\uDE1B");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseWithBadCueHeader() throws Exception {
|
||||
List<CuesWithTiming> allCues = getCuesForTestAsset(WITH_BAD_CUE_HEADER_FILE);
|
||||
|
@ -0,0 +1 @@
|
||||
WEBVTT
00:00.000 --> 00:01.234
😛
😛
|
Loading…
x
Reference in New Issue
Block a user