Fixes for correctly supporting UTF-16 and UTF-16BE charsets
For fields encoded using UTF-16 or UTF-16BE charsets when looking for termination character we have to look for two zero consecutive bytes. Otherwise, as many characters encoded with UTF-16 or UTF-16BE has one of their 2 bytes set with the value zero, we will be truncating text fields.
This commit is contained in:
parent
ebbd022a52
commit
c135bb7a57
@ -29,6 +29,11 @@ import java.util.Map;
|
||||
*/
|
||||
public class Id3Parser implements MetadataParser<Map<String, Object>> {
|
||||
|
||||
private static final int ID3_TEXT_ENCODING_ISO_8859_1 = 0;
|
||||
private static final int ID3_TEXT_ENCODING_UTF_16 = 1;
|
||||
private static final int ID3_TEXT_ENCODING_UTF_16BE = 2;
|
||||
private static final int ID3_TEXT_ENCODING_UTF_8 = 3;
|
||||
|
||||
@Override
|
||||
public boolean canParse(String mimeType) {
|
||||
return mimeType.equals(MimeTypes.APPLICATION_ID3);
|
||||
@ -60,10 +65,10 @@ public class Id3Parser implements MetadataParser<Map<String, Object>> {
|
||||
byte[] frame = new byte[frameSize - 1];
|
||||
id3Data.readBytes(frame, 0, frameSize - 1);
|
||||
|
||||
int firstZeroIndex = indexOf(frame, 0, (byte) 0);
|
||||
int firstZeroIndex = indexOfEOS(frame, 0, encoding);
|
||||
String description = new String(frame, 0, firstZeroIndex, charset);
|
||||
int valueStartIndex = indexOfNot(frame, firstZeroIndex, (byte) 0);
|
||||
int valueEndIndex = indexOf(frame, valueStartIndex, (byte) 0);
|
||||
int valueStartIndex = firstZeroIndex + 1;
|
||||
int valueEndIndex = indexOfEOS(frame, valueStartIndex, encoding);
|
||||
String value = new String(frame, valueStartIndex, valueEndIndex - valueStartIndex,
|
||||
charset);
|
||||
metadata.put(TxxxMetadata.TYPE, new TxxxMetadata(description, value));
|
||||
@ -73,7 +78,7 @@ public class Id3Parser implements MetadataParser<Map<String, Object>> {
|
||||
id3Data.readBytes(frame, 0, frameSize);
|
||||
|
||||
int firstZeroIndex = indexOf(frame, 0, (byte) 0);
|
||||
String owner = new String(frame, 0, firstZeroIndex);
|
||||
String owner = new String(frame, 0, firstZeroIndex, "ISO-8859-1");
|
||||
byte[] privateData = new byte[frameSize - firstZeroIndex - 1];
|
||||
System.arraycopy(frame, firstZeroIndex + 1, privateData, 0, frameSize - firstZeroIndex - 1);
|
||||
metadata.put(PrivMetadata.TYPE, new PrivMetadata(owner, privateData));
|
||||
@ -85,13 +90,13 @@ public class Id3Parser implements MetadataParser<Map<String, Object>> {
|
||||
id3Data.readBytes(frame, 0, frameSize - 1);
|
||||
|
||||
int firstZeroIndex = indexOf(frame, 0, (byte) 0);
|
||||
String mimeType = new String(frame, 0, firstZeroIndex);
|
||||
String mimeType = new String(frame, 0, firstZeroIndex, "ISO-8859-1");
|
||||
int filenameStartIndex = firstZeroIndex + 1;
|
||||
int filenameEndIndex = indexOf(frame, filenameStartIndex, (byte) 0);
|
||||
int filenameEndIndex = indexOfEOS(frame, filenameStartIndex, encoding);
|
||||
String filename = new String(frame, filenameStartIndex,
|
||||
filenameEndIndex - filenameStartIndex, charset);
|
||||
int descriptionStartIndex = filenameEndIndex + 1;
|
||||
int descriptionEndIndex = indexOf(frame, descriptionStartIndex, (byte) 0);
|
||||
int descriptionEndIndex = indexOfEOS(frame, descriptionStartIndex, encoding);
|
||||
String description = new String(frame, descriptionStartIndex,
|
||||
descriptionEndIndex - descriptionStartIndex, charset);
|
||||
|
||||
@ -131,6 +136,25 @@ public class Id3Parser implements MetadataParser<Map<String, Object>> {
|
||||
return data.length;
|
||||
}
|
||||
|
||||
private static int indexOfEOS(byte[] data, int fromIndex, int encodingByte) {
|
||||
int terminationPos = indexOf(data, fromIndex, (byte) 0);
|
||||
|
||||
// For single byte encoding charsets, we are done
|
||||
if(encodingByte == ID3_TEXT_ENCODING_ISO_8859_1 || encodingByte == ID3_TEXT_ENCODING_UTF_8) {
|
||||
return terminationPos;
|
||||
}
|
||||
|
||||
// Otherwise, look for a two zero bytes
|
||||
while(terminationPos < data.length - 1) {
|
||||
if(data[terminationPos + 1] == (byte) 0) {
|
||||
return terminationPos + 1;
|
||||
}
|
||||
terminationPos = indexOf(data, terminationPos + 1, (byte) 0);
|
||||
}
|
||||
|
||||
return data.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses an ID3 header.
|
||||
*
|
||||
@ -175,13 +199,13 @@ public class Id3Parser implements MetadataParser<Map<String, Object>> {
|
||||
*/
|
||||
private static String getCharsetName(int encodingByte) {
|
||||
switch (encodingByte) {
|
||||
case 0:
|
||||
case ID3_TEXT_ENCODING_ISO_8859_1:
|
||||
return "ISO-8859-1";
|
||||
case 1:
|
||||
case ID3_TEXT_ENCODING_UTF_16:
|
||||
return "UTF-16";
|
||||
case 2:
|
||||
case ID3_TEXT_ENCODING_UTF_16BE:
|
||||
return "UTF-16BE";
|
||||
case 3:
|
||||
case ID3_TEXT_ENCODING_UTF_8:
|
||||
return "UTF-8";
|
||||
default:
|
||||
return "ISO-8859-1";
|
||||
|
@ -26,7 +26,7 @@ public class PrivMetadata {
|
||||
public final String owner;
|
||||
public final byte[] privateData;
|
||||
|
||||
public PrivMetadata(String owner, byte [] privateData) {
|
||||
public PrivMetadata(String owner, byte[] privateData) {
|
||||
this.owner = owner;
|
||||
this.privateData = privateData;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user