附录:ASCII、UTF8、Uncicode 编码下的中英文字符大小
private static void ShowCode() {
string[] strArray = { "b", "abcd", "乙", "甲乙丙丁" };
byte[] buffer;
string mode, back;
foreach (string str in strArray) {
for (int i = 0; i <= 2; i++) {
if (i == 0) {
buffer = Encoding.ASCII.GetBytes(str);
back = Encoding.ASCII.GetString(buffer, 0, buffer.Length);
mode = "ASCII";
} else if (i == 1) {
buffer = Encoding.UTF8.GetBytes(str);
back = Encoding.UTF8.GetString(buffer, 0, buffer.Length);
mode = "UTF8";
} else {
buffer = Encoding.Unicode.GetBytes(str);
back = Encoding.Unicode.GetString(buffer, 0, buffer.Length);
mode = "Unicode";
}
Console.WriteLine("Mode: {0}, String: {1}, Buffer.Length: {2}",
mode, str, buffer.Length);
Console.WriteLine("Buffer:");
for (int j = 0; j <= buffer.Length - 1; j++) {
Console.Write(buffer[j] + " ");
}
Console.WriteLine("\nRetrived: {0}\n", back);
}
}
}
输出为:
Mode: ASCII, String: b, Buffer.Length: 1
Buffer: 98
Retrived: b
Mode: UTF8, String: b, Buffer.Length: 1
Buffer: 98
Retrived: b
Mode: Unicode, String: b, Buffer.Length: 2
Buffer: 98 0
Retrived: b
Mode: ASCII, String: abcd, Buffer.Length: 4
Buffer: 97 98 99 100
Retrived: abcd
Mode: UTF8, String: abcd, Buffer.Length: 4
Buffer: 97 98 99 100
Retrived: abcd
Mode: Unicode, String: abcd, Buffer.Length: 8
Buffer: 97 0 98 0 99 0 100 0
Retrived: abcd
Mode: ASCII, String: 乙, Buffer.Length: 1
Buffer: 63
Retrived: ?
Mode: UTF8, String: 乙, Buffer.Length: 3
Buffer: 228 185 153
Retrived: 乙
Mode: Unicode, String: 乙, Buffer.Length: 2
Buffer: 89 78
Retrived: 乙
Mode: ASCII, String: 甲乙丙丁, Buffer.Length: 4
Buffer: 63 63 63 63
Retrived: ????
Mode: UTF8, String: 甲乙丙丁, Buffer.Length: 12
Buffer: 231 148 178 228 185 153 228 184 153 228 184 129
Retrived: 甲乙丙丁
Mode: Unicode, String: 甲乙丙丁, Buffer.Length: 8
Buffer: 50 117 89 78 25 78 1 78
Retrived: 甲乙丙丁
大体上可以得出这么几个结论:
- ASCII 不能保存中文(貌似谁都知道=_-`)。
- UTF8 是变长编码。在对 ASCII 字符编码时,UTF 更省空间,只占 1 个字节,与 ASCII 编码方式和长度相同;Unicode 在对ASCII 字符编码时,占用 2 个字节,且第 2 个字节补零。
- UTF8 在对中文编码时需要占用 3 个字节;Unicode 对中文编码则只需要 2 个字节。
版权声明:本文为YES开发框架网发布内容,转载请附上原文出处连接
Socket 张国生