Fixes for correctly supporting UTF-16 and UTF-16BE charsets

For fields encoded using UTF-16 or UTF-16BE charsets when looking for
termination character we have to look for two zero consecutive bytes.

Otherwise, as many characters encoded with UTF-16 or UTF-16BE has one
of their 2 bytes set with the value zero, we will be truncating text
fields.
This commit is contained in:
J. Oliva 2015-02-26 21:54:29 +01:00
parent ebbd022a52
commit c135bb7a57
2 changed files with 36 additions and 12 deletions

View File

@ -29,6 +29,11 @@ import java.util.Map;
*/ */
public class Id3Parser implements MetadataParser<Map<String, Object>> { public class Id3Parser implements MetadataParser<Map<String, Object>> {
private static final int ID3_TEXT_ENCODING_ISO_8859_1 = 0;
private static final int ID3_TEXT_ENCODING_UTF_16 = 1;
private static final int ID3_TEXT_ENCODING_UTF_16BE = 2;
private static final int ID3_TEXT_ENCODING_UTF_8 = 3;
@Override @Override
public boolean canParse(String mimeType) { public boolean canParse(String mimeType) {
return mimeType.equals(MimeTypes.APPLICATION_ID3); return mimeType.equals(MimeTypes.APPLICATION_ID3);
@ -60,10 +65,10 @@ public class Id3Parser implements MetadataParser<Map<String, Object>> {
byte[] frame = new byte[frameSize - 1]; byte[] frame = new byte[frameSize - 1];
id3Data.readBytes(frame, 0, frameSize - 1); id3Data.readBytes(frame, 0, frameSize - 1);
int firstZeroIndex = indexOf(frame, 0, (byte) 0); int firstZeroIndex = indexOfEOS(frame, 0, encoding);
String description = new String(frame, 0, firstZeroIndex, charset); String description = new String(frame, 0, firstZeroIndex, charset);
int valueStartIndex = indexOfNot(frame, firstZeroIndex, (byte) 0); int valueStartIndex = firstZeroIndex + 1;
int valueEndIndex = indexOf(frame, valueStartIndex, (byte) 0); int valueEndIndex = indexOfEOS(frame, valueStartIndex, encoding);
String value = new String(frame, valueStartIndex, valueEndIndex - valueStartIndex, String value = new String(frame, valueStartIndex, valueEndIndex - valueStartIndex,
charset); charset);
metadata.put(TxxxMetadata.TYPE, new TxxxMetadata(description, value)); metadata.put(TxxxMetadata.TYPE, new TxxxMetadata(description, value));
@ -73,7 +78,7 @@ public class Id3Parser implements MetadataParser<Map<String, Object>> {
id3Data.readBytes(frame, 0, frameSize); id3Data.readBytes(frame, 0, frameSize);
int firstZeroIndex = indexOf(frame, 0, (byte) 0); int firstZeroIndex = indexOf(frame, 0, (byte) 0);
String owner = new String(frame, 0, firstZeroIndex); String owner = new String(frame, 0, firstZeroIndex, "ISO-8859-1");
byte[] privateData = new byte[frameSize - firstZeroIndex - 1]; byte[] privateData = new byte[frameSize - firstZeroIndex - 1];
System.arraycopy(frame, firstZeroIndex + 1, privateData, 0, frameSize - firstZeroIndex - 1); System.arraycopy(frame, firstZeroIndex + 1, privateData, 0, frameSize - firstZeroIndex - 1);
metadata.put(PrivMetadata.TYPE, new PrivMetadata(owner, privateData)); metadata.put(PrivMetadata.TYPE, new PrivMetadata(owner, privateData));
@ -85,13 +90,13 @@ public class Id3Parser implements MetadataParser<Map<String, Object>> {
id3Data.readBytes(frame, 0, frameSize - 1); id3Data.readBytes(frame, 0, frameSize - 1);
int firstZeroIndex = indexOf(frame, 0, (byte) 0); int firstZeroIndex = indexOf(frame, 0, (byte) 0);
String mimeType = new String(frame, 0, firstZeroIndex); String mimeType = new String(frame, 0, firstZeroIndex, "ISO-8859-1");
int filenameStartIndex = firstZeroIndex + 1; int filenameStartIndex = firstZeroIndex + 1;
int filenameEndIndex = indexOf(frame, filenameStartIndex, (byte) 0); int filenameEndIndex = indexOfEOS(frame, filenameStartIndex, encoding);
String filename = new String(frame, filenameStartIndex, String filename = new String(frame, filenameStartIndex,
filenameEndIndex - filenameStartIndex, charset); filenameEndIndex - filenameStartIndex, charset);
int descriptionStartIndex = filenameEndIndex + 1; int descriptionStartIndex = filenameEndIndex + 1;
int descriptionEndIndex = indexOf(frame, descriptionStartIndex, (byte) 0); int descriptionEndIndex = indexOfEOS(frame, descriptionStartIndex, encoding);
String description = new String(frame, descriptionStartIndex, String description = new String(frame, descriptionStartIndex,
descriptionEndIndex - descriptionStartIndex, charset); descriptionEndIndex - descriptionStartIndex, charset);
@ -131,6 +136,25 @@ public class Id3Parser implements MetadataParser<Map<String, Object>> {
return data.length; return data.length;
} }
private static int indexOfEOS(byte[] data, int fromIndex, int encodingByte) {
int terminationPos = indexOf(data, fromIndex, (byte) 0);
// For single byte encoding charsets, we are done
if(encodingByte == ID3_TEXT_ENCODING_ISO_8859_1 || encodingByte == ID3_TEXT_ENCODING_UTF_8) {
return terminationPos;
}
// Otherwise, look for a two zero bytes
while(terminationPos < data.length - 1) {
if(data[terminationPos + 1] == (byte) 0) {
return terminationPos + 1;
}
terminationPos = indexOf(data, terminationPos + 1, (byte) 0);
}
return data.length;
}
/** /**
* Parses an ID3 header. * Parses an ID3 header.
* *
@ -175,13 +199,13 @@ public class Id3Parser implements MetadataParser<Map<String, Object>> {
*/ */
private static String getCharsetName(int encodingByte) { private static String getCharsetName(int encodingByte) {
switch (encodingByte) { switch (encodingByte) {
case 0: case ID3_TEXT_ENCODING_ISO_8859_1:
return "ISO-8859-1"; return "ISO-8859-1";
case 1: case ID3_TEXT_ENCODING_UTF_16:
return "UTF-16"; return "UTF-16";
case 2: case ID3_TEXT_ENCODING_UTF_16BE:
return "UTF-16BE"; return "UTF-16BE";
case 3: case ID3_TEXT_ENCODING_UTF_8:
return "UTF-8"; return "UTF-8";
default: default:
return "ISO-8859-1"; return "ISO-8859-1";

View File

@ -26,7 +26,7 @@ public class PrivMetadata {
public final String owner; public final String owner;
public final byte[] privateData; public final byte[] privateData;
public PrivMetadata(String owner, byte [] privateData) { public PrivMetadata(String owner, byte[] privateData) {
this.owner = owner; this.owner = owner;
this.privateData = privateData; this.privateData = privateData;
} }