From a72c26a9359f0bc58c3349a18e56a30c424dd930 Mon Sep 17 00:00:00 2001 From: Redkale <8730487+redkale@users.noreply.github.com> Date: Mon, 22 Apr 2019 12:24:55 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8DUtility.encodeUTF8=E5=92=8Cde?= =?UTF-8?q?codeUTF8=20=E5=AF=B94=E5=AD=97=E8=8A=82=E5=AD=97=E7=AC=A6?= =?UTF-8?q?=E4=B8=8D=E8=83=BD=E6=AD=A3=E5=B8=B8=E7=BC=96=E7=A0=81=E7=9A=84?= =?UTF-8?q?bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/org/redkale/util/Utility.java | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/org/redkale/util/Utility.java b/src/org/redkale/util/Utility.java index 426938903..21ff12414 100644 --- a/src/org/redkale/util/Utility.java +++ b/src/org/redkale/util/Utility.java @@ -1474,9 +1474,11 @@ public final class Utility { final int limit = start + len; for (int i = start; i < limit; i++) { b = bytes[i]; - if ((b >> 5) == -2) { + if ((b >> 5) == -2) {// 2 bytes, 11 bits: 110xxxxx 10xxxxxx size--; - } else if ((b >> 4) == -2) { + } else if ((b >> 4) == -2) {// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx + size -= 2; + } else if ((b >> 3) == -2) {// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx size -= 2; } } @@ -1484,12 +1486,17 @@ public final class Utility { size = 0; for (int i = start; i < limit;) { b = bytes[i++]; - if (b >= 0) { + if (b >= 0) {// 1 byte, 7 bits: 0xxxxxxx text[size++] = (char) b; - } else if ((b >> 5) == -2) { + } else if ((b >> 5) == -2) {// 2 bytes, 11 bits: 110xxxxx 10xxxxxx text[size++] = (char) (((b << 6) ^ bytes[i++]) ^ (((byte) 0xC0 << 6) ^ ((byte) 0x80))); - } else if ((b >> 4) == -2) { + } else if ((b >> 4) == -2) {// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx text[size++] = (char) ((b << 12) ^ (bytes[i++] << 6) ^ (bytes[i++] ^ (((byte) 0xE0 << 12) ^ ((byte) 0x80 << 6) ^ ((byte) 0x80)))); + } else if ((b >> 3) == -2) {// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int uc = ((b << 18) ^ (bytes[i++] << 12) ^ (bytes[i++] << 6) ^ (bytes[i++] ^ (((byte) 0xF0 << 18) ^ ((byte) 0x80 << 12) ^ ((byte) 0x80 << 6) ^ ((byte) 0x80)))); + text[size++] = Character.highSurrogate(uc); + text[size++] = Character.lowSurrogate(uc); + //测试代码 byte[] bs = {(byte)34, (byte)76, (byte)105, (byte)108, (byte)121, (byte)240, (byte)159, (byte)146, (byte)171, (byte)34}; } } return text; @@ -1516,6 +1523,8 @@ public final class Utility { size++; } else if (c < 0x800) { size += 2; + } else if (Character.isSurrogate(c)) { + size += 2; } else { size += 3; } @@ -1529,6 +1538,13 @@ public final class Utility { } else if (c < 0x800) { bytes[size++] = (byte) (0xc0 | (c >> 6)); bytes[size++] = (byte) (0x80 | (c & 0x3f)); + } else if (Character.isSurrogate(c)) { //连取两个 + int uc = Character.toCodePoint(c, chars[i + 1]); + bytes[size++] = (byte) (0xf0 | ((uc >> 18))); + bytes[size++] = (byte) (0x80 | ((uc >> 12) & 0x3f)); + bytes[size++] = (byte) (0x80 | ((uc >> 6) & 0x3f)); + bytes[size++] = (byte) (0x80 | (uc & 0x3f)); + i++; } else { bytes[size++] = (byte) (0xe0 | ((c >> 12))); bytes[size++] = (byte) (0x80 | ((c >> 6) & 0x3f));