From 5609efd0e0e5087faa586a0bdd4520818e93ecb2 Mon Sep 17 00:00:00 2001 From: Lev Date: Sat, 5 Nov 2022 10:40:01 +0300 Subject: [PATCH 1/4] Added UTF-16 (LE) and UTF-16 (BE) support for subrip subtitles. --- .../exoplayer2/util/ParsableByteArray.java | 73 ++++++++++++++++++ .../google/android/exoplayer2/util/Util.java | 24 ++++++ .../exoplayer2/text/subrip/SubripDecoder.java | 54 ++++++++++++- .../text/subrip/SubripDecoderTest.java | 30 +++++++ .../test/assets/media/subrip/typical_utf16be | Bin 0 -> 434 bytes .../test/assets/media/subrip/typical_utf16le | Bin 0 -> 434 bytes 6 files changed, 177 insertions(+), 4 deletions(-) create mode 100644 testdata/src/test/assets/media/subrip/typical_utf16be create mode 100644 testdata/src/test/assets/media/subrip/typical_utf16le diff --git a/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java b/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java index 79913d2aa9..0eaeddd74a 100644 --- a/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java +++ b/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java @@ -531,6 +531,54 @@ public final class ParsableByteArray { return line; } + /** + * Reads a line of text. + * + *

A line is considered to be terminated by any one of a carriage return ('\r'), a line feed + * ('\n'), or a carriage return followed immediately by a line feed ('\r\n'). The UTF-16 charset + * is used. This method discards leading UTF-16 byte order marks (BOM), if present. + * + * @param isLittleEndian UTF-16 (LE) or UTF-16 (BE) encoding should be used + * @return The line not including any line-termination characters, or null if the end of the data + * has already been reached. + */ + @Nullable + public String readLineUtf16(boolean isLittleEndian) { + if (bytesLeft() == 0) { + return null; + } + + int lineLimit = calculateLineLimitForUtf16(isLittleEndian); + + if (lineLimit - position >= 2 && isUtf16BOM(data[position], data[position + 1])) { + // There's a UTF-16 byte order mark at the start of the line. Discard it. + position += 2; + } + + String line; + if (isLittleEndian) { + line = Util.fromUtf16LEBytes(data, position, lineLimit - position); + } else { + line = Util.fromUtf16BEBytes(data, position, lineLimit - position); + } + + position = lineLimit; + if (position == limit) { + return line; + } + + if (isEqualsInUtf16(data[position], data[position + 1], '\r', isLittleEndian)) { + position += 2; + if (position == limit) { + return line; + } + } + if (isEqualsInUtf16(data[position], data[position + 1], '\n', isLittleEndian)) { + position += 2; + } + return line; + } + /** * Reads a long value encoded by UTF-8 encoding * @@ -565,4 +613,29 @@ public final class ParsableByteArray { position += length; return value; } + + private boolean isEqualsInUtf16(byte first, byte second, char value, boolean isLittleEndian) { + return (isLittleEndian && (first | second << 8) == value) + || (!isLittleEndian && (first << 8 | second) == value); + } + + private boolean isUtf16BOM(byte first, byte second) { + return (first == (byte) 0xFF && second == (byte) 0xFE) + || (first == (byte) 0xFE && second == (byte) 0xFF); + } + + private int calculateLineLimitForUtf16(boolean isLittleEndian) { + int lineLimit = position; + while (lineLimit < limit - 1) { + if (isLittleEndian && Util.isLinebreak(data[lineLimit] | data[lineLimit + 1] << 8)) { + break; + } else if (!isLittleEndian && Util.isLinebreak(data[lineLimit] << 8 | data[lineLimit + 1])) { + break; + } + + lineLimit += 2; + } + + return lineLimit; + } } diff --git a/library/common/src/main/java/com/google/android/exoplayer2/util/Util.java b/library/common/src/main/java/com/google/android/exoplayer2/util/Util.java index b0479023d2..41dd6ce193 100644 --- a/library/common/src/main/java/com/google/android/exoplayer2/util/Util.java +++ b/library/common/src/main/java/com/google/android/exoplayer2/util/Util.java @@ -682,6 +682,30 @@ public final class Util { return new String(bytes, offset, length, Charsets.UTF_8); } + /** + * Returns a new {@link String} constructed by decoding UTF-16 (LE) encoded bytes in a subarray. + * + * @param bytes The UTF-16 encoded bytes to decode. + * @param offset The index of the first byte to decode. + * @param length The number of bytes to decode. + * @return The string. + */ + public static String fromUtf16LEBytes(byte[] bytes, int offset, int length) { + return new String(bytes, offset, length, Charsets.UTF_16LE); + } + + /** + * Returns a new {@link String} constructed by decoding UTF-16 (BE) encoded bytes in a subarray. + * + * @param bytes The UTF-16 encoded bytes to decode. + * @param offset The index of the first byte to decode. + * @param length The number of bytes to decode. + * @return The string. + */ + public static String fromUtf16BEBytes(byte[] bytes, int offset, int length) { + return new String(bytes, offset, length, Charsets.UTF_16BE); + } + /** * Returns a new byte array containing the code points of a {@link String} encoded using UTF-8. * diff --git a/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java b/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java index 2ae22bacd2..df5c2a7bb4 100644 --- a/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java +++ b/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java @@ -26,6 +26,8 @@ import com.google.android.exoplayer2.util.Assertions; import com.google.android.exoplayer2.util.Log; import com.google.android.exoplayer2.util.LongArray; import com.google.android.exoplayer2.util.ParsableByteArray; +import com.google.common.base.Charsets; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -75,8 +77,25 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { LongArray cueTimesUs = new LongArray(); ParsableByteArray subripData = new ParsableByteArray(bytes, length); + @Nullable Charset utf16Charset; + if (bytes.length >= 2) { + utf16Charset = getUtf16Charset(bytes[0], bytes[1]); + } else { + utf16Charset = null; + } + @Nullable String currentLine; - while ((currentLine = subripData.readLine()) != null) { + while (true) { + if (utf16Charset != null) { + currentLine = subripData.readLineUtf16(utf16Charset.equals(Charsets.UTF_16LE)); + } else { + currentLine = subripData.readLine(); + } + + if (currentLine == null) { + break; + } + if (currentLine.length() == 0) { // Skip blank lines. continue; @@ -91,7 +110,11 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { } // Read and parse the timing line. - currentLine = subripData.readLine(); + if (utf16Charset != null) { + currentLine = subripData.readLineUtf16(utf16Charset.equals(Charsets.UTF_16LE)); + } else { + currentLine = subripData.readLine(); + } if (currentLine == null) { Log.w(TAG, "Unexpected end"); break; @@ -109,13 +132,21 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { // Read and parse the text and tags. textBuilder.setLength(0); tags.clear(); - currentLine = subripData.readLine(); + if (utf16Charset != null) { + currentLine = subripData.readLineUtf16(utf16Charset.equals(Charsets.UTF_16LE)); + } else { + currentLine = subripData.readLine(); + } while (!TextUtils.isEmpty(currentLine)) { if (textBuilder.length() > 0) { textBuilder.append("
"); } textBuilder.append(processLine(currentLine, tags)); - currentLine = subripData.readLine(); + if (utf16Charset != null) { + currentLine = subripData.readLineUtf16(utf16Charset.equals(Charsets.UTF_16LE)); + } else { + currentLine = subripData.readLine(); + } } Spanned text = Html.fromHtml(textBuilder.toString()); @@ -138,6 +169,21 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { return new SubripSubtitle(cuesArray, cueTimesUsArray); } + @Nullable + private Charset getUtf16Charset(byte first, byte second) { + if (first == (byte) 0xFE && second == (byte) 0xFF) { + // UTF-16 (BE) + return Charsets.UTF_16BE; + } + + if (first == (byte) 0xFF && second == (byte) 0xFE) { + // UTF-16 (LE) + return Charsets.UTF_16LE; + } + + return null; + } + /** * Trims and removes tags from the given line. The removed tags are added to {@code tags}. * diff --git a/library/extractor/src/test/java/com/google/android/exoplayer2/text/subrip/SubripDecoderTest.java b/library/extractor/src/test/java/com/google/android/exoplayer2/text/subrip/SubripDecoderTest.java index c868cc9a70..f7175b0b6b 100644 --- a/library/extractor/src/test/java/com/google/android/exoplayer2/text/subrip/SubripDecoderTest.java +++ b/library/extractor/src/test/java/com/google/android/exoplayer2/text/subrip/SubripDecoderTest.java @@ -40,6 +40,8 @@ public final class SubripDecoderTest { private static final String TYPICAL_NEGATIVE_TIMESTAMPS = "media/subrip/typical_negative_timestamps"; private static final String TYPICAL_UNEXPECTED_END = "media/subrip/typical_unexpected_end"; + private static final String TYPICAL_UTF16BE = "media/subrip/typical_utf16be"; + private static final String TYPICAL_UTF16LE = "media/subrip/typical_utf16le"; private static final String TYPICAL_WITH_TAGS = "media/subrip/typical_with_tags"; private static final String TYPICAL_NO_HOURS_AND_MILLIS = "media/subrip/typical_no_hours_and_millis"; @@ -80,6 +82,34 @@ public final class SubripDecoderTest { assertTypicalCue3(subtitle, 4); } + @Test + public void decodeTypicalUtf16LE() throws IOException { + SubripDecoder decoder = new SubripDecoder(); + byte[] bytes = + TestUtil.getByteArray( + ApplicationProvider.getApplicationContext(), TYPICAL_UTF16LE); + Subtitle subtitle = decoder.decode(bytes, bytes.length, false); + + assertThat(subtitle.getEventTimeCount()).isEqualTo(6); + assertTypicalCue1(subtitle, 0); + assertTypicalCue2(subtitle, 2); + assertTypicalCue3(subtitle, 4); + } + + @Test + public void decodeTypicalUtf16BE() throws IOException { + SubripDecoder decoder = new SubripDecoder(); + byte[] bytes = + TestUtil.getByteArray( + ApplicationProvider.getApplicationContext(), TYPICAL_UTF16BE); + Subtitle subtitle = decoder.decode(bytes, bytes.length, false); + + assertThat(subtitle.getEventTimeCount()).isEqualTo(6); + assertTypicalCue1(subtitle, 0); + assertTypicalCue2(subtitle, 2); + assertTypicalCue3(subtitle, 4); + } + @Test public void decodeTypicalExtraBlankLine() throws IOException { SubripDecoder decoder = new SubripDecoder(); diff --git a/testdata/src/test/assets/media/subrip/typical_utf16be b/testdata/src/test/assets/media/subrip/typical_utf16be new file mode 100644 index 0000000000000000000000000000000000000000..9531c268087bec207cf8b766bc60ef01c13b354a GIT binary patch literal 434 zcmaKoYYM_J5QOJ$fhL z8D_R1{Qq)-*}-tqO|8x&-1|vHs%KDIh3R-#L%;p$w?V&&oGVf1wXJ&kW5gQ71~ Date: Sat, 26 Nov 2022 02:37:26 +0300 Subject: [PATCH 2/4] Fixes after review --- .../exoplayer2/util/ParsableByteArray.java | 69 ++++++++++++------ .../google/android/exoplayer2/util/Util.java | 24 ------- .../exoplayer2/text/subrip/SubripDecoder.java | 72 +++++++------------ .../exoplayer2/text/tx3g/Tx3gDecoder.java | 5 +- 4 files changed, 76 insertions(+), 94 deletions(-) diff --git a/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java b/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java index 0eaeddd74a..4a101d82fa 100644 --- a/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java +++ b/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java @@ -27,6 +27,10 @@ import java.util.Arrays; */ public final class ParsableByteArray { + // UTF-16 BOM + public static final char BOM_UTF16_BE = '\uFEFF'; + public static final char BOM_UTF16_LE = '\uFFFE'; + private byte[] data; private int position; // TODO(internal b/147657250): Enforce this limit on all read methods. @@ -153,6 +157,11 @@ public final class ParsableByteArray { this.position = position; } + /** Resets the current byte offset. */ + public void resetPosition() { + this.position = 0; + } + /** * Returns the underlying array. * @@ -228,6 +237,11 @@ public final class ParsableByteArray { return (char) ((data[position] & 0xFF) << 8 | (data[position + 1] & 0xFF)); } + /** Peeks at the next char. */ + public char peekLittleEndianChar() { + return (char) ((data[position] & 0xFF) | (data[position + 1] & 0xFF) << 8 ); + } + /** Reads the next byte as an unsigned value. */ public int readUnsignedByte() { return (data[position++] & 0xFF); @@ -532,48 +546,67 @@ public final class ParsableByteArray { } /** - * Reads a line of text. + * Reads a line of text. Only UTF-8, UTF-16LE, UTF-16BE encoding supported. * *

A line is considered to be terminated by any one of a carriage return ('\r'), a line feed * ('\n'), or a carriage return followed immediately by a line feed ('\r\n'). The UTF-16 charset * is used. This method discards leading UTF-16 byte order marks (BOM), if present. * - * @param isLittleEndian UTF-16 (LE) or UTF-16 (BE) encoding should be used + * @param charset used encoding. * @return The line not including any line-termination characters, or null if the end of the data * has already been reached. + * @throws IllegalArgumentException if charset not supported. */ @Nullable - public String readLineUtf16(boolean isLittleEndian) { + public String readUtfLine(Charset charset) { + if(!charset.equals(Charsets.UTF_8) + && !charset.equals(Charsets.UTF_16BE) + && !charset.equals(Charsets.UTF_16LE)) { + throw new IllegalArgumentException("Only UTF-8, UTF-16LE, UTF-16BE encoding supported."); + } + if(charset.equals(Charsets.UTF_8)) { + return readLine(); + } + if (bytesLeft() == 0) { return null; } + boolean isLittleEndian = charset.equals(Charsets.UTF_16LE); int lineLimit = calculateLineLimitForUtf16(isLittleEndian); - if (lineLimit - position >= 2 && isUtf16BOM(data[position], data[position + 1])) { + if (lineLimit - position >= 2 && isUtf16BOM(peekChar())) { // There's a UTF-16 byte order mark at the start of the line. Discard it. position += 2; } - String line; - if (isLittleEndian) { - line = Util.fromUtf16LEBytes(data, position, lineLimit - position); - } else { - line = Util.fromUtf16BEBytes(data, position, lineLimit - position); - } + String line = readString(lineLimit - position, charset); - position = lineLimit; if (position == limit) { return line; } - if (isEqualsInUtf16(data[position], data[position + 1], '\r', isLittleEndian)) { + char currentChar; + if(isLittleEndian) { + currentChar = peekLittleEndianChar(); + } else { + currentChar = peekChar(); + } + + if (currentChar == '\r') { position += 2; if (position == limit) { return line; } } - if (isEqualsInUtf16(data[position], data[position + 1], '\n', isLittleEndian)) { + + if(isLittleEndian) { + currentChar = peekLittleEndianChar(); + } else { + currentChar = peekChar(); + } + + if (currentChar == '\n') { position += 2; } return line; @@ -614,14 +647,8 @@ public final class ParsableByteArray { return value; } - private boolean isEqualsInUtf16(byte first, byte second, char value, boolean isLittleEndian) { - return (isLittleEndian && (first | second << 8) == value) - || (!isLittleEndian && (first << 8 | second) == value); - } - - private boolean isUtf16BOM(byte first, byte second) { - return (first == (byte) 0xFF && second == (byte) 0xFE) - || (first == (byte) 0xFE && second == (byte) 0xFF); + private boolean isUtf16BOM(char character) { + return character == BOM_UTF16_BE || character == BOM_UTF16_LE; } private int calculateLineLimitForUtf16(boolean isLittleEndian) { diff --git a/library/common/src/main/java/com/google/android/exoplayer2/util/Util.java b/library/common/src/main/java/com/google/android/exoplayer2/util/Util.java index 41dd6ce193..b0479023d2 100644 --- a/library/common/src/main/java/com/google/android/exoplayer2/util/Util.java +++ b/library/common/src/main/java/com/google/android/exoplayer2/util/Util.java @@ -682,30 +682,6 @@ public final class Util { return new String(bytes, offset, length, Charsets.UTF_8); } - /** - * Returns a new {@link String} constructed by decoding UTF-16 (LE) encoded bytes in a subarray. - * - * @param bytes The UTF-16 encoded bytes to decode. - * @param offset The index of the first byte to decode. - * @param length The number of bytes to decode. - * @return The string. - */ - public static String fromUtf16LEBytes(byte[] bytes, int offset, int length) { - return new String(bytes, offset, length, Charsets.UTF_16LE); - } - - /** - * Returns a new {@link String} constructed by decoding UTF-16 (BE) encoded bytes in a subarray. - * - * @param bytes The UTF-16 encoded bytes to decode. - * @param offset The index of the first byte to decode. - * @param length The number of bytes to decode. - * @return The string. - */ - public static String fromUtf16BEBytes(byte[] bytes, int offset, int length) { - return new String(bytes, offset, length, Charsets.UTF_16BE); - } - /** * Returns a new byte array containing the code points of a {@link String} encoded using UTF-8. * diff --git a/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java b/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java index df5c2a7bb4..adf7dab6e4 100644 --- a/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java +++ b/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java @@ -72,30 +72,14 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { } @Override - protected Subtitle decode(byte[] bytes, int length, boolean reset) { + protected Subtitle decode(byte[] data, int length, boolean reset) { ArrayList cues = new ArrayList<>(); LongArray cueTimesUs = new LongArray(); - ParsableByteArray subripData = new ParsableByteArray(bytes, length); - - @Nullable Charset utf16Charset; - if (bytes.length >= 2) { - utf16Charset = getUtf16Charset(bytes[0], bytes[1]); - } else { - utf16Charset = null; - } + ParsableByteArray subripData = new ParsableByteArray(data, length); + Charset charset = detectUtfCharset(subripData); @Nullable String currentLine; - while (true) { - if (utf16Charset != null) { - currentLine = subripData.readLineUtf16(utf16Charset.equals(Charsets.UTF_16LE)); - } else { - currentLine = subripData.readLine(); - } - - if (currentLine == null) { - break; - } - + while ((currentLine = subripData.readUtfLine(charset)) != null) { if (currentLine.length() == 0) { // Skip blank lines. continue; @@ -110,11 +94,7 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { } // Read and parse the timing line. - if (utf16Charset != null) { - currentLine = subripData.readLineUtf16(utf16Charset.equals(Charsets.UTF_16LE)); - } else { - currentLine = subripData.readLine(); - } + currentLine = subripData.readUtfLine(charset); if (currentLine == null) { Log.w(TAG, "Unexpected end"); break; @@ -132,21 +112,13 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { // Read and parse the text and tags. textBuilder.setLength(0); tags.clear(); - if (utf16Charset != null) { - currentLine = subripData.readLineUtf16(utf16Charset.equals(Charsets.UTF_16LE)); - } else { - currentLine = subripData.readLine(); - } + currentLine = subripData.readUtfLine(charset); while (!TextUtils.isEmpty(currentLine)) { if (textBuilder.length() > 0) { textBuilder.append("
"); } textBuilder.append(processLine(currentLine, tags)); - if (utf16Charset != null) { - currentLine = subripData.readLineUtf16(utf16Charset.equals(Charsets.UTF_16LE)); - } else { - currentLine = subripData.readLine(); - } + currentLine = subripData.readUtfLine(charset); } Spanned text = Html.fromHtml(textBuilder.toString()); @@ -169,19 +141,29 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { return new SubripSubtitle(cuesArray, cueTimesUsArray); } - @Nullable - private Charset getUtf16Charset(byte first, byte second) { - if (first == (byte) 0xFE && second == (byte) 0xFF) { - // UTF-16 (BE) - return Charsets.UTF_16BE; + /** + * Determine UTF encoding of the byte array. It can be UTF-16LE/UTF-16BE + * if the byte array contains BOM, or UTF-8 otherwise as the default behavior. + * After it resets the offset in ParsableByteArray + * + * @param data byte array to determinate UTF encoding. + * @return Determined encoding + */ + private Charset detectUtfCharset(ParsableByteArray data) { + if(data.limit() < 2) { + return Charsets.UTF_8; } - if (first == (byte) 0xFF && second == (byte) 0xFE) { - // UTF-16 (LE) - return Charsets.UTF_16LE; - } + char twoBytes = data.peekChar(); - return null; + switch (twoBytes) { + case ParsableByteArray.BOM_UTF16_BE: + return Charsets.UTF_16BE; + case ParsableByteArray.BOM_UTF16_LE: + return Charsets.UTF_16LE; + default: + return Charsets.UTF_8; + } } /** diff --git a/library/extractor/src/main/java/com/google/android/exoplayer2/text/tx3g/Tx3gDecoder.java b/library/extractor/src/main/java/com/google/android/exoplayer2/text/tx3g/Tx3gDecoder.java index 4b8f539f42..7aa471bf63 100644 --- a/library/extractor/src/main/java/com/google/android/exoplayer2/text/tx3g/Tx3gDecoder.java +++ b/library/extractor/src/main/java/com/google/android/exoplayer2/text/tx3g/Tx3gDecoder.java @@ -46,9 +46,6 @@ public final class Tx3gDecoder extends SimpleSubtitleDecoder { private static final String TAG = "Tx3gDecoder"; - private static final char BOM_UTF16_BE = '\uFEFF'; - private static final char BOM_UTF16_LE = '\uFFFE'; - private static final int TYPE_STYL = 0x7374796c; private static final int TYPE_TBOX = 0x74626f78; private static final String TX3G_SERIF = "Serif"; @@ -173,7 +170,7 @@ public final class Tx3gDecoder extends SimpleSubtitleDecoder { } if (parsableByteArray.bytesLeft() >= SIZE_BOM_UTF16) { char firstChar = parsableByteArray.peekChar(); - if (firstChar == BOM_UTF16_BE || firstChar == BOM_UTF16_LE) { + if (firstChar == ParsableByteArray.BOM_UTF16_BE || firstChar == ParsableByteArray.BOM_UTF16_LE) { return parsableByteArray.readString(textLength, Charsets.UTF_16); } } From 5e3b817b8134f21fc2db93237e155e98882c8d2f Mon Sep 17 00:00:00 2001 From: Lev Date: Sat, 26 Nov 2022 02:55:00 +0300 Subject: [PATCH 3/4] Cleanup --- .../exoplayer2/util/ParsableByteArray.java | 25 +++---------------- .../exoplayer2/text/subrip/SubripDecoder.java | 3 +-- 2 files changed, 4 insertions(+), 24 deletions(-) diff --git a/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java b/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java index 4a101d82fa..f4c23f7472 100644 --- a/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java +++ b/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java @@ -157,11 +157,6 @@ public final class ParsableByteArray { this.position = position; } - /** Resets the current byte offset. */ - public void resetPosition() { - this.position = 0; - } - /** * Returns the underlying array. * @@ -572,8 +567,7 @@ public final class ParsableByteArray { return null; } - boolean isLittleEndian = charset.equals(Charsets.UTF_16LE); - int lineLimit = calculateLineLimitForUtf16(isLittleEndian); + int lineLimit = calculateLineLimitForUtf16(charset.equals(Charsets.UTF_16LE)); if (lineLimit - position >= 2 && isUtf16BOM(peekChar())) { // There's a UTF-16 byte order mark at the start of the line. Discard it. @@ -586,27 +580,14 @@ public final class ParsableByteArray { return line; } - char currentChar; - if(isLittleEndian) { - currentChar = peekLittleEndianChar(); - } else { - currentChar = peekChar(); - } - - if (currentChar == '\r') { + if (peekLittleEndianChar() == '\r' || peekChar() == '\r') { position += 2; if (position == limit) { return line; } } - if(isLittleEndian) { - currentChar = peekLittleEndianChar(); - } else { - currentChar = peekChar(); - } - - if (currentChar == '\n') { + if (peekLittleEndianChar() == '\n' || peekChar() == '\n') { position += 2; } return line; diff --git a/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java b/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java index adf7dab6e4..679fa76e4b 100644 --- a/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java +++ b/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java @@ -144,10 +144,9 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { /** * Determine UTF encoding of the byte array. It can be UTF-16LE/UTF-16BE * if the byte array contains BOM, or UTF-8 otherwise as the default behavior. - * After it resets the offset in ParsableByteArray * * @param data byte array to determinate UTF encoding. - * @return Determined encoding + * @return determined encoding */ private Charset detectUtfCharset(ParsableByteArray data) { if(data.limit() < 2) { From 101b7122672e3f39ee0690f3043fe3e47acd9243 Mon Sep 17 00:00:00 2001 From: Lev Date: Sat, 26 Nov 2022 03:04:35 +0300 Subject: [PATCH 4/4] Cleanup --- .../google/android/exoplayer2/util/ParsableByteArray.java | 6 +++--- .../android/exoplayer2/text/subrip/SubripDecoder.java | 6 ++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java b/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java index f4c23f7472..5eaf54f3b6 100644 --- a/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java +++ b/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java @@ -234,7 +234,7 @@ public final class ParsableByteArray { /** Peeks at the next char. */ public char peekLittleEndianChar() { - return (char) ((data[position] & 0xFF) | (data[position + 1] & 0xFF) << 8 ); + return (char) ((data[position] & 0xFF) | (data[position + 1] & 0xFF) << 8); } /** Reads the next byte as an unsigned value. */ @@ -554,12 +554,12 @@ public final class ParsableByteArray { */ @Nullable public String readUtfLine(Charset charset) { - if(!charset.equals(Charsets.UTF_8) + if (!charset.equals(Charsets.UTF_8) && !charset.equals(Charsets.UTF_16BE) && !charset.equals(Charsets.UTF_16LE)) { throw new IllegalArgumentException("Only UTF-8, UTF-16LE, UTF-16BE encoding supported."); } - if(charset.equals(Charsets.UTF_8)) { + if (charset.equals(Charsets.UTF_8)) { return readLine(); } diff --git a/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java b/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java index 679fa76e4b..c6300f2b2f 100644 --- a/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java +++ b/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java @@ -149,13 +149,11 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { * @return determined encoding */ private Charset detectUtfCharset(ParsableByteArray data) { - if(data.limit() < 2) { + if (data.limit() < 2) { return Charsets.UTF_8; } - char twoBytes = data.peekChar(); - - switch (twoBytes) { + switch (data.peekChar()) { case ParsableByteArray.BOM_UTF16_BE: return Charsets.UTF_16BE; case ParsableByteArray.BOM_UTF16_LE: