Fix ParsableByteArray.readLine + discard BOM at start of line.
- I think \r and \n are handled the wrong way around? - We only expect to encounter a BOM sequence at the start of a file, but it feels fine to automatically discard it in all cases for simplicity. A BOM sequence doesn't mean anything in UTF-8. See https://en.wikipedia.org/wiki/Byte_order_mark. Note that I think the advice not to remove it on that page relates only to the case where the file is being edited + saved. Issue #1136 ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=112143407
This commit is contained in:
parent
5ef1123391
commit
bc1d76a3eb
@ -0,0 +1,12 @@
|
|||||||
|
1
|
||||||
|
00:00:00,000 --> 00:00:01,234
|
||||||
|
This is the first subtitle.
|
||||||
|
|
||||||
|
2
|
||||||
|
00:00:02,345 --> 00:00:03,456
|
||||||
|
This is the second subtitle.
|
||||||
|
Second subtitle with second line.
|
||||||
|
|
||||||
|
3
|
||||||
|
00:00:04,567 --> 00:00:08,901
|
||||||
|
This is the third subtitle.
|
@ -28,6 +28,7 @@ public final class SubripParserTest extends InstrumentationTestCase {
|
|||||||
|
|
||||||
private static final String EMPTY_FILE = "subrip/empty";
|
private static final String EMPTY_FILE = "subrip/empty";
|
||||||
private static final String TYPICAL_FILE = "subrip/typical";
|
private static final String TYPICAL_FILE = "subrip/typical";
|
||||||
|
private static final String TYPICAL_WITH_BYTE_ORDER_MARK = "subrip/typical_with_byte_order_mark";
|
||||||
private static final String TYPICAL_EXTRA_BLANK_LINE = "subrip/typical_extra_blank_line";
|
private static final String TYPICAL_EXTRA_BLANK_LINE = "subrip/typical_extra_blank_line";
|
||||||
private static final String TYPICAL_MISSING_TIMECODE = "subrip/typical_missing_timecode";
|
private static final String TYPICAL_MISSING_TIMECODE = "subrip/typical_missing_timecode";
|
||||||
private static final String TYPICAL_MISSING_SEQUENCE = "subrip/typical_missing_sequence";
|
private static final String TYPICAL_MISSING_SEQUENCE = "subrip/typical_missing_sequence";
|
||||||
@ -52,6 +53,16 @@ public final class SubripParserTest extends InstrumentationTestCase {
|
|||||||
assertTypicalCue3(subtitle, 4);
|
assertTypicalCue3(subtitle, 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testParseTypicalWithByteOrderMark() throws IOException {
|
||||||
|
SubripParser parser = new SubripParser();
|
||||||
|
byte[] bytes = TestUtil.getByteArray(getInstrumentation(), TYPICAL_WITH_BYTE_ORDER_MARK);
|
||||||
|
SubripSubtitle subtitle = parser.parse(bytes, 0, bytes.length);
|
||||||
|
assertEquals(6, subtitle.getEventTimeCount());
|
||||||
|
assertTypicalCue1(subtitle, 0);
|
||||||
|
assertTypicalCue2(subtitle, 2);
|
||||||
|
assertTypicalCue3(subtitle, 4);
|
||||||
|
}
|
||||||
|
|
||||||
public void testParseTypicalExtraBlankLine() throws IOException {
|
public void testParseTypicalExtraBlankLine() throws IOException {
|
||||||
SubripParser parser = new SubripParser();
|
SubripParser parser = new SubripParser();
|
||||||
byte[] bytes = TestUtil.getByteArray(getInstrumentation(), TYPICAL_EXTRA_BLANK_LINE);
|
byte[] bytes = TestUtil.getByteArray(getInstrumentation(), TYPICAL_EXTRA_BLANK_LINE);
|
||||||
|
@ -346,9 +346,9 @@ public class ParsableByteArrayTest extends TestCase {
|
|||||||
assertNull(parser.readLine());
|
assertNull(parser.readLine());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testReadTwoLinesWithLfFollowedByCr() {
|
public void testReadTwoLinesWithCrFollowedByLf() {
|
||||||
byte[] bytes = new byte[] {
|
byte[] bytes = new byte[] {
|
||||||
'f', 'o', 'o', '\n', '\r', 'b', 'a', 'r'
|
'f', 'o', 'o', '\r', '\n', 'b', 'a', 'r'
|
||||||
};
|
};
|
||||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||||
assertEquals("foo", parser.readLine());
|
assertEquals("foo", parser.readLine());
|
||||||
@ -358,7 +358,7 @@ public class ParsableByteArrayTest extends TestCase {
|
|||||||
|
|
||||||
public void testReadThreeLinesWithEmptyLine() {
|
public void testReadThreeLinesWithEmptyLine() {
|
||||||
byte[] bytes = new byte[] {
|
byte[] bytes = new byte[] {
|
||||||
'f', 'o', 'o', '\n', '\r', '\n', 'b', 'a', 'r'
|
'f', 'o', 'o', '\r', '\n', '\r', 'b', 'a', 'r'
|
||||||
};
|
};
|
||||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||||
assertEquals("foo", parser.readLine());
|
assertEquals("foo", parser.readLine());
|
||||||
@ -367,9 +367,9 @@ public class ParsableByteArrayTest extends TestCase {
|
|||||||
assertNull(parser.readLine());
|
assertNull(parser.readLine());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testReadFourLinesWithCrFollowedByLf() {
|
public void testReadFourLinesWithLfFollowedByCr() {
|
||||||
byte[] bytes = new byte[] {
|
byte[] bytes = new byte[] {
|
||||||
'f', 'o', 'o', '\r', '\n', '\n', 'b', 'a', 'r', '\n', '\r'
|
'f', 'o', 'o', '\n', '\r', '\r', 'b', 'a', 'r', '\r', '\n'
|
||||||
};
|
};
|
||||||
ParsableByteArray parser = new ParsableByteArray(bytes);
|
ParsableByteArray parser = new ParsableByteArray(bytes);
|
||||||
assertEquals("foo", parser.readLine());
|
assertEquals("foo", parser.readLine());
|
||||||
|
@ -307,12 +307,14 @@ public final class ParsableByteArray {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads a line of text. A line is considered to be terminated by any one of a line feed ('\n'), a
|
* Reads a line of text.
|
||||||
* carriage return ('\r'), or a carriage return followed immediately by a line feed. Platform
|
* <p>
|
||||||
* default's charset used.
|
* A line is considered to be terminated by any one of a carriage return ('\r'), a line feed
|
||||||
|
* ('\n'), or a carriage return followed immediately by a line feed ('\r\n'). The system's default
|
||||||
|
* charset (UTF-8) is used.
|
||||||
*
|
*
|
||||||
* @return A String containing the contents of the line, not including any line-termination
|
* @return A String containing the contents of the line, not including any line-termination
|
||||||
* characters, or null if the end of the stream has been reached.
|
* characters, or null if the end of the stream has been reached.
|
||||||
*/
|
*/
|
||||||
public String readLine() {
|
public String readLine() {
|
||||||
if (bytesLeft() == 0) {
|
if (bytesLeft() == 0) {
|
||||||
@ -322,18 +324,23 @@ public final class ParsableByteArray {
|
|||||||
while (lineLimit < limit && data[lineLimit] != '\n' && data[lineLimit] != '\r') {
|
while (lineLimit < limit && data[lineLimit] != '\n' && data[lineLimit] != '\r') {
|
||||||
lineLimit++;
|
lineLimit++;
|
||||||
}
|
}
|
||||||
|
if (lineLimit - position >= 3 && data[position] == (byte) 0xEF
|
||||||
|
&& data[position + 1] == (byte) 0xBB && data[position + 2] == (byte) 0xBF) {
|
||||||
|
// There's a byte order mark at the start of the line. Discard it.
|
||||||
|
position += 3;
|
||||||
|
}
|
||||||
String line = new String(data, position, lineLimit - position);
|
String line = new String(data, position, lineLimit - position);
|
||||||
position = lineLimit;
|
position = lineLimit;
|
||||||
if (position == limit) {
|
if (position == limit) {
|
||||||
return line;
|
return line;
|
||||||
}
|
}
|
||||||
if (data[position] == '\n') {
|
if (data[position] == '\r') {
|
||||||
position++;
|
position++;
|
||||||
if (position == limit) {
|
if (position == limit) {
|
||||||
return line;
|
return line;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (data[position] == '\r') {
|
if (data[position] == '\n') {
|
||||||
position++;
|
position++;
|
||||||
}
|
}
|
||||||
return line;
|
return line;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user