mirror of
https://github.com/androidx/media.git
synced 2025-04-30 06:46:50 +08:00
Fix casting in ParsableByteArray.peekCharacterAndSize
This was introduced in 841bdc6efe
.
There's no need to cast the `char` (2 bytes) down to a `byte` in order
to pack it into an `int` (4 bytes) alongside a short (2 bytes).
We also don't need to use `short` to track the character count (max 4
with UTF-8) - a single byte is enough. This change also uses
`Ints.fromBytes` to avoid having to reason about how casting &
bit-shifting interact.
This change introduces a test which reproduces the failure reported in
Issue: androidx/media#2167.
#cherrypick
PiperOrigin-RevId: 730809219
This commit is contained in:
parent
3f493eaf9e
commit
fe19d8c9be
@ -17,6 +17,8 @@
|
|||||||
* Audio:
|
* Audio:
|
||||||
* Video:
|
* Video:
|
||||||
* Text:
|
* Text:
|
||||||
|
* Fix handling of multi-byte UTF-8 characters in WebVTT files using CR
|
||||||
|
line endings ([#2167](https://github.com/androidx/media/issues/2167)).
|
||||||
* Metadata:
|
* Metadata:
|
||||||
* Image:
|
* Image:
|
||||||
* DataSource:
|
* DataSource:
|
||||||
|
@ -19,7 +19,6 @@ import androidx.annotation.Nullable;
|
|||||||
import com.google.common.collect.ImmutableSet;
|
import com.google.common.collect.ImmutableSet;
|
||||||
import com.google.common.primitives.Chars;
|
import com.google.common.primitives.Chars;
|
||||||
import com.google.common.primitives.Ints;
|
import com.google.common.primitives.Ints;
|
||||||
import com.google.common.primitives.UnsignedBytes;
|
|
||||||
import com.google.errorprone.annotations.CheckReturnValue;
|
import com.google.errorprone.annotations.CheckReturnValue;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
@ -721,8 +720,8 @@ public final class ParsableByteArray {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Peeks at the character at {@link #position} (as decoded by {@code charset}), returns it and the
|
* Peeks at the character at {@link #position} (as decoded by {@code charset}), returns it and the
|
||||||
* number of bytes the character takes up within the array packed into an int. First four bytes
|
* number of bytes the character takes up within the array packed into an int. First two bytes are
|
||||||
* are the character and the second four is the size in bytes it takes. Returns 0 if {@link
|
* the character and the second two is the size in bytes it takes. Returns 0 if {@link
|
||||||
* #bytesLeft()} doesn't allow reading a whole character in {@code charset} or if the {@code
|
* #bytesLeft()} doesn't allow reading a whole character in {@code charset} or if the {@code
|
||||||
* charset} is not one of US_ASCII, UTF-8, UTF-16, UTF-16BE, or UTF-16LE.
|
* charset} is not one of US_ASCII, UTF-8, UTF-16, UTF-16BE, or UTF-16LE.
|
||||||
*
|
*
|
||||||
@ -730,23 +729,27 @@ public final class ParsableByteArray {
|
|||||||
* bytes for UTF-16).
|
* bytes for UTF-16).
|
||||||
*/
|
*/
|
||||||
private int peekCharacterAndSize(Charset charset) {
|
private int peekCharacterAndSize(Charset charset) {
|
||||||
byte character;
|
byte charByte1;
|
||||||
short characterSize;
|
byte charByte2;
|
||||||
|
byte characterSize;
|
||||||
if ((charset.equals(StandardCharsets.UTF_8) || charset.equals(StandardCharsets.US_ASCII))
|
if ((charset.equals(StandardCharsets.UTF_8) || charset.equals(StandardCharsets.US_ASCII))
|
||||||
&& bytesLeft() >= 1) {
|
&& bytesLeft() >= 1) {
|
||||||
character = (byte) Chars.checkedCast(UnsignedBytes.toInt(data[position]));
|
charByte1 = 0;
|
||||||
|
charByte2 = data[position];
|
||||||
characterSize = 1;
|
characterSize = 1;
|
||||||
} else if ((charset.equals(StandardCharsets.UTF_16)
|
} else if ((charset.equals(StandardCharsets.UTF_16)
|
||||||
|| charset.equals(StandardCharsets.UTF_16BE))
|
|| charset.equals(StandardCharsets.UTF_16BE))
|
||||||
&& bytesLeft() >= 2) {
|
&& bytesLeft() >= 2) {
|
||||||
character = (byte) Chars.fromBytes(data[position], data[position + 1]);
|
charByte1 = data[position];
|
||||||
|
charByte2 = data[position + 1];
|
||||||
characterSize = 2;
|
characterSize = 2;
|
||||||
} else if (charset.equals(StandardCharsets.UTF_16LE) && bytesLeft() >= 2) {
|
} else if (charset.equals(StandardCharsets.UTF_16LE) && bytesLeft() >= 2) {
|
||||||
character = (byte) Chars.fromBytes(data[position + 1], data[position]);
|
charByte1 = data[position + 1];
|
||||||
|
charByte2 = data[position];
|
||||||
characterSize = 2;
|
characterSize = 2;
|
||||||
} else {
|
} else {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return (Chars.checkedCast(character) << Short.SIZE) + characterSize;
|
return Ints.fromBytes(charByte1, charByte2, (byte) 0, characterSize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -679,6 +679,31 @@ public final class ParsableByteArrayTest {
|
|||||||
assertThat(parser.readLine()).isNull();
|
assertThat(parser.readLine()).isNull();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void readTwoLinesWithCr_utf8() {
|
||||||
|
byte[] bytes = "foo\rbar".getBytes(StandardCharsets.UTF_8);
|
||||||
|
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||||
|
|
||||||
|
assertThat(parser.readLine()).isEqualTo("foo");
|
||||||
|
assertThat(parser.getPosition()).isEqualTo(4);
|
||||||
|
assertThat(parser.readLine()).isEqualTo("bar");
|
||||||
|
assertThat(parser.getPosition()).isEqualTo(7);
|
||||||
|
assertThat(parser.readLine()).isNull();
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://github.com/androidx/media/issues/2167
|
||||||
|
@Test
|
||||||
|
public void readTwoLinesWithCrAndWideChar_utf8() {
|
||||||
|
byte[] bytes = "foo\r\uD83D\uDE1B".getBytes(StandardCharsets.UTF_8);
|
||||||
|
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||||
|
|
||||||
|
assertThat(parser.readLine()).isEqualTo("foo");
|
||||||
|
assertThat(parser.getPosition()).isEqualTo(4);
|
||||||
|
assertThat(parser.readLine()).isEqualTo("\uD83D\uDE1B");
|
||||||
|
assertThat(parser.getPosition()).isEqualTo(8);
|
||||||
|
assertThat(parser.readLine()).isNull();
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void readTwoLinesWithCrFollowedByLf_utf8() {
|
public void readTwoLinesWithCrFollowedByLf_utf8() {
|
||||||
byte[] bytes = "foo\r\nbar".getBytes(StandardCharsets.UTF_8);
|
byte[] bytes = "foo\r\nbar".getBytes(StandardCharsets.UTF_8);
|
||||||
|
@ -56,6 +56,8 @@ public class WebvttParserTest {
|
|||||||
"media/webvtt/with_overlapping_timestamps";
|
"media/webvtt/with_overlapping_timestamps";
|
||||||
private static final String WITH_VERTICAL_FILE = "media/webvtt/with_vertical";
|
private static final String WITH_VERTICAL_FILE = "media/webvtt/with_vertical";
|
||||||
private static final String WITH_RUBIES_FILE = "media/webvtt/with_rubies";
|
private static final String WITH_RUBIES_FILE = "media/webvtt/with_rubies";
|
||||||
|
private static final String WITH_WIDE_CHARS_AND_CR_ENDINGS_FILE =
|
||||||
|
"media/webvtt/with_wide_unicode_chars_and_cr_endings";
|
||||||
private static final String WITH_BAD_CUE_HEADER_FILE = "media/webvtt/with_bad_cue_header";
|
private static final String WITH_BAD_CUE_HEADER_FILE = "media/webvtt/with_bad_cue_header";
|
||||||
private static final String WITH_TAGS_FILE = "media/webvtt/with_tags";
|
private static final String WITH_TAGS_FILE = "media/webvtt/with_tags";
|
||||||
private static final String WITH_CSS_STYLES = "media/webvtt/with_css_styles";
|
private static final String WITH_CSS_STYLES = "media/webvtt/with_css_styles";
|
||||||
@ -497,6 +499,17 @@ public class WebvttParserTest {
|
|||||||
assertThat((Spanned) fourthCue.text).hasNoSpans();
|
assertThat((Spanned) fourthCue.text).hasNoSpans();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// https://github.com/androidx/media/issues/2167
|
||||||
|
@Test
|
||||||
|
public void parseWithWideUnicodeCharsAndCrLineEndings() throws Exception {
|
||||||
|
ImmutableList<CuesWithTiming> allCues =
|
||||||
|
getCuesForTestAsset(WITH_WIDE_CHARS_AND_CR_ENDINGS_FILE);
|
||||||
|
|
||||||
|
// WebvttCueParser normalizes all line endings within cue text to \n.
|
||||||
|
assertThat(Iterables.getOnlyElement(Iterables.getOnlyElement(allCues).cues).text.toString())
|
||||||
|
.isEqualTo("\uD83D\uDE1B\n\uD83D\uDE1B");
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void parseWithBadCueHeader() throws Exception {
|
public void parseWithBadCueHeader() throws Exception {
|
||||||
List<CuesWithTiming> allCues = getCuesForTestAsset(WITH_BAD_CUE_HEADER_FILE);
|
List<CuesWithTiming> allCues = getCuesForTestAsset(WITH_BAD_CUE_HEADER_FILE);
|
||||||
|
@ -0,0 +1 @@
|
|||||||
|
WEBVTT
00:00.000 --> 00:01.234
😛
😛
|
Loading…
x
Reference in New Issue
Block a user