TWO_BYTE_CONSTANT1 = 192;
THREE_BYTE_CONSTANT1 = 224;
TWO_BYTE_MASK1 = 31;
TWO_BYTE_SHIFT1 = 6;
TWO_BYTE_MASK2 = 63;
THREE_BYTE_MASK1 = 15;
THREE_BYTE_SHIFT1 = 12;
THREE_BYTE_SHIFT2 = 6;
THREE_BYTE_MASK3 = 63;
THREE_BYTE_MASK2 = 63;
private String getStringRepresentation(byte[] bytes) throws UnsupportedEncodingException
{
// We're computing the string ourselves, because the implementation
// of "new String(bytes)" doesn't honor the special treatment of
// the 0 character in JRE 1.6_u11.
// Allocate the byte array with the computed length.
char[] chars = new char[bytes.length];
// Fill out the array.
int charIndex = 0;
int byteIndex = 0;
while (byteIndex < bytes.length)
{
int b = bytes[byteIndex++] & 0xff;
// Depending on the flag bits in the first byte, the character
// is represented by a single byte, by two bytes, or by three
// bytes. We're not checking the redundant flag bits in the
// second byte and the third byte.
try
{
chars[charIndex++] =
(char)(b < TWO_BYTE_CONSTANT1 ? b :
b < THREE_BYTE_CONSTANT1 ? ((b & TWO_BYTE_MASK1) << TWO_BYTE_SHIFT1) |
((bytes[byteIndex++] & TWO_BYTE_MASK2) ) :
((b & THREE_BYTE_MASK1) << THREE_BYTE_SHIFT1) |
((bytes[byteIndex++] & THREE_BYTE_MASK2) << THREE_BYTE_SHIFT2) |
((bytes[byteIndex++] & THREE_BYTE_MASK3) ));
}
catch (ArrayIndexOutOfBoundsException e)
{
throw new UnsupportedEncodingException("Missing UTF-8 bytes after initial byte [0x"+Integer.toHexString(b)+"] in string ["+new String(chars, 0, charIndex)+"]");
}
}
return new String(chars, 0, charIndex);
}
}