修复Utility.encodeUTF8和decodeUTF8 对4字节字符不能正常编码的bug

This commit is contained in:
Redkale
2019-04-22 12:24:55 +08:00
parent a9900d9bfa
commit a72c26a935

View File

@@ -1474,9 +1474,11 @@ public final class Utility {
final int limit = start + len;
for (int i = start; i < limit; i++) {
b = bytes[i];
if ((b >> 5) == -2) {
if ((b >> 5) == -2) {// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
size--;
} else if ((b >> 4) == -2) {
} else if ((b >> 4) == -2) {// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
size -= 2;
} else if ((b >> 3) == -2) {// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
size -= 2;
}
}
@@ -1484,12 +1486,17 @@ public final class Utility {
size = 0;
for (int i = start; i < limit;) {
b = bytes[i++];
if (b >= 0) {
if (b >= 0) {// 1 byte, 7 bits: 0xxxxxxx
text[size++] = (char) b;
} else if ((b >> 5) == -2) {
} else if ((b >> 5) == -2) {// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
text[size++] = (char) (((b << 6) ^ bytes[i++]) ^ (((byte) 0xC0 << 6) ^ ((byte) 0x80)));
} else if ((b >> 4) == -2) {
} else if ((b >> 4) == -2) {// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
text[size++] = (char) ((b << 12) ^ (bytes[i++] << 6) ^ (bytes[i++] ^ (((byte) 0xE0 << 12) ^ ((byte) 0x80 << 6) ^ ((byte) 0x80))));
} else if ((b >> 3) == -2) {// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
int uc = ((b << 18) ^ (bytes[i++] << 12) ^ (bytes[i++] << 6) ^ (bytes[i++] ^ (((byte) 0xF0 << 18) ^ ((byte) 0x80 << 12) ^ ((byte) 0x80 << 6) ^ ((byte) 0x80))));
text[size++] = Character.highSurrogate(uc);
text[size++] = Character.lowSurrogate(uc);
//测试代码 byte[] bs = {(byte)34, (byte)76, (byte)105, (byte)108, (byte)121, (byte)240, (byte)159, (byte)146, (byte)171, (byte)34};
}
}
return text;
@@ -1516,6 +1523,8 @@ public final class Utility {
size++;
} else if (c < 0x800) {
size += 2;
} else if (Character.isSurrogate(c)) {
size += 2;
} else {
size += 3;
}
@@ -1529,6 +1538,13 @@ public final class Utility {
} else if (c < 0x800) {
bytes[size++] = (byte) (0xc0 | (c >> 6));
bytes[size++] = (byte) (0x80 | (c & 0x3f));
} else if (Character.isSurrogate(c)) { //连取两个
int uc = Character.toCodePoint(c, chars[i + 1]);
bytes[size++] = (byte) (0xf0 | ((uc >> 18)));
bytes[size++] = (byte) (0x80 | ((uc >> 12) & 0x3f));
bytes[size++] = (byte) (0x80 | ((uc >> 6) & 0x3f));
bytes[size++] = (byte) (0x80 | (uc & 0x3f));
i++;
} else {
bytes[size++] = (byte) (0xe0 | ((c >> 12)));
bytes[size++] = (byte) (0x80 | ((c >> 6) & 0x3f));