Unicode

关于 Unicode 编码

Unicode 也叫统一码、万国码、单一码，是由 Unicode 联盟 (opens new window)维护的文本编码标准，是计算机科学领域里的一项业界标准，包括字符集、编码方案等，Unicode 是为了解决传统的字符编码方案的局限而产生的，它为每种语言中的每个字符设定了统一并且唯一的二进制编码，以满足跨语言、跨平台进行文本转换、处理的要求。

Unicode 维基百科 (opens new window)

# Python 3.0+

# 方法一：仅支持中文编码

data = "spiderapi.cn - 虫术"

result_encoded = data.encode("unicode_escape").decode("utf-8")
result_decoded = result_encoded.encode("utf-8").decode("unicode_escape")

print("Unicode 编码值:", result_encoded)  # spiderapi.cn - \u866b\u672f
print("Unicode 解码值:", result_decoded)

1
2
3
4
5
6
7
8
9

# 方法二：支持中文、英文、符号编码

import re


def unicode_encode(input_string):
    return "".join(["\\u{:04x}".format(ord(char)) for char in input_string])


def unicode_decode(encoded_string):
    def decode(match):
        return chr(int(match.group(1), 16))
    return re.sub(r"\\u([0-9a-fA-F]{4})", decode, encoded_string)


if __name__ == "__main__":
    data = "spiderapi.cn - 虫术"

    result_encoded = unicode_encode(data)
    result_decoded = unicode_decode(result_encoded)
    print("Unicode 编码值:", result_encoded)  # \u0073\u0070\u0069\u0064\u0065\u0072\u0061\u0070\u0069\u002e\u0063\u006e\u0020\u002d\u0020\u866b\u672f
    print("Unicode 解码值:", result_decoded)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22

// Make sure to add code blocks to your code group

# JavaScript Node.js ECMAScript 5.1+

function unicodeEncode(inputString) {
    return inputString.split("").map(function(char) {
        return "\\u" + ("000" + char.charCodeAt(0).toString(16)).slice(-4);
    }).join("");
}

function unicodeDecode(encodedString) {
    return encodedString.replace(/\\u([0-9a-fA-F]{4})/g, function(match, group1) {
        return String.fromCharCode(parseInt(group1, 16));
    });
}

const data = "spiderapi.cn - 虫术";
const resultEncoded = unicodeEncode(data);
const resultDecoded = unicodeDecode(resultEncoded);

console.log("Unicode 编码值:", resultEncoded);  // \u0073\u0070\u0069\u0064\u0065\u0072\u0061\u0070\u0069\u002e\u0063\u006e\u0020\u002d\u0020\u866b\u672f
console.log("Unicode 解码值:", resultDecoded);

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18

# Golang 1.0+

// 方法一：仅支持中文编码

package main

import (
    "fmt"
    "strconv"
)

func main() {
    data := "spiderapi.cn - 虫术"
  
    resultEncoded := strconv.QuoteToASCII(data)
    resultDecoded, err := strconv.Unquote(resultEncoded)
    if err != nil {
        fmt.Println("Unicode decode error:", err)
        return
    }
  
    fmt.Println("Unicode 编码值:", resultEncoded) // spiderapi.cn - \u866b\u672f
    fmt.Println("Unicode 解码值:", resultDecoded)
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22

// 方法二：支持中文、英文、符号编码

package main

import (
    "fmt"
    "strconv"
    "strings"
)

func unicodeEncode(str string) string {
    var builder strings.Builder
    for _, char := range str {
        builder.WriteString(fmt.Sprintf("\\u%04x", char))
    }
    return builder.String()
}

func unicodeDecode(str string) string {
    var decodedStr strings.Builder
    segments := strings.Split(str, "\\u")
    for _, segment := range segments {
        if segment == "" {
            continue
        }
        code, err := strconv.ParseUint(segment, 16, 32)
        if err != nil {
            fmt.Println("Unicode decode error:", err)
            continue
        }
        decodedStr.WriteRune(rune(code))
    }
    return decodedStr.String()
}

func main() {
    data := "spiderapi.cn - 虫术"
  
    resultEncoded := unicodeEncode(data)
    resultDecoded := unicodeDecode(resultEncoded)
  
    fmt.Println("Unicode 编码值:", resultEncoded) // \u0073\u0070\u0069\u0064\u0065\u0072\u0061\u0070\u0069\u002e\u0063\u006e\u0020\u002d\u0020\u866b\u672f
    fmt.Println("Unicode 解码值:", resultDecoded)
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

// Make sure to add code blocks to your code group

# 主要特征

以 \u、&# 或 &#x 开头，后面是数字加字母组合。

PS：\u 开头和 &#x 开头是一样的，都是 16 进制 Unicode 字符的不同写法，&# 则是 Unicode 字符 10 进制的写法，此外，&# 和 &#x 开头的，也称为 HTML 字符实体转换，字符实体是用一个编号写入 HTML 代码中来代替一个字符，在 HTML 中，某些字符是预留的，如果希望正确地显示预留字符，就必须在 HTML 源代码中使用字符实体。

编码类型	示例一	示例二
明文	12345	admin
Unicode	`\u0031\u0032\u0033\u0034\u0035`	`\u0061\u0064\u006d\u0069\u006e`

# 在线工具

https://tool.chinaz.com/tools/unicode.aspx (opens new window)

帮助我们改善此页

上次更新: 2025/04/22, 14:38:07

← URL Base64→