1 Star 0 Fork 0

jiangplus / tidb-parser

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
charset.go 17.63 KB
一键复制 编辑 原始数据 按行查看 历史
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635
// Copyright 2015 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package mysql
import "unicode"
// CharsetNameToID maps charset name to its default collation ID.
func CharsetNameToID(charset string) uint8 {
// Use quick path for TiDB to avoid access CharsetIDs map
// "SHOW CHARACTER SET;" to see all the supported character sets.
if charset == "utf8mb4" {
return UTF8MB4DefaultCollationID
} else if charset == "binary" {
return BinaryDefaultCollationID
} else if charset == "utf8" {
return UTF8DefaultCollationID
} else if charset == "ascii" {
return ASCIIDefaultCollationID
} else if charset == "latin1" {
return Latin1DefaultCollationID
} else {
return CharsetIDs[charset]
}
}
// CharsetIDs maps charset name to its default collation ID.
var CharsetIDs = map[string]uint8{
"big5": 1,
"dec8": 3,
"cp850": 4,
"hp8": 6,
"koi8r": 7,
"latin1": Latin1DefaultCollationID,
"latin2": 9,
"swe7": 10,
"ascii": ASCIIDefaultCollationID,
"ujis": 12,
"sjis": 13,
"hebrew": 16,
"tis620": 18,
"euckr": 19,
"koi8u": 22,
"gb2312": 24,
"greek": 25,
"cp1250": 26,
"gbk": 28,
"latin5": 30,
"armscii8": 32,
"utf8": UTF8DefaultCollationID,
"ucs2": 35,
"cp866": 36,
"keybcs2": 37,
"macce": 38,
"macroman": 39,
"cp852": 40,
"latin7": 41,
"utf8mb4": UTF8MB4DefaultCollationID,
"cp1251": 51,
"utf16": 54,
"utf16le": 56,
"cp1256": 57,
"cp1257": 59,
"utf32": 60,
"binary": BinaryDefaultCollationID,
"geostd8": 92,
"cp932": 95,
"eucjpms": 97,
}
// Charsets maps charset name to its default collation name.
var Charsets = map[string]string{
"big5": "big5_chinese_ci",
"dec8": "dec8_swedish_ci",
"cp850": "cp850_general_ci",
"hp8": "hp8_english_ci",
"koi8r": "koi8r_general_ci",
"latin1": "latin1_bin",
"latin2": "latin2_general_ci",
"swe7": "swe7_swedish_ci",
"ascii": "ascii_bin",
"ujis": "ujis_japanese_ci",
"sjis": "sjis_japanese_ci",
"hebrew": "hebrew_general_ci",
"tis620": "tis620_thai_ci",
"euckr": "euckr_korean_ci",
"koi8u": "koi8u_general_ci",
"gb2312": "gb2312_chinese_ci",
"greek": "greek_general_ci",
"cp1250": "cp1250_general_ci",
"gbk": "gbk_chinese_ci",
"latin5": "latin5_turkish_ci",
"armscii8": "armscii8_general_ci",
"utf8": "utf8_bin",
"ucs2": "ucs2_general_ci",
"cp866": "cp866_general_ci",
"keybcs2": "keybcs2_general_ci",
"macce": "macce_general_ci",
"macroman": "macroman_general_ci",
"cp852": "cp852_general_ci",
"latin7": "latin7_general_ci",
"utf8mb4": "utf8mb4_bin",
"cp1251": "cp1251_general_ci",
"utf16": "utf16_general_ci",
"utf16le": "utf16le_general_ci",
"cp1256": "cp1256_general_ci",
"cp1257": "cp1257_general_ci",
"utf32": "utf32_general_ci",
"binary": "binary",
"geostd8": "geostd8_general_ci",
"cp932": "cp932_japanese_ci",
"eucjpms": "eucjpms_japanese_ci",
}
// Collations maps MySQL collation ID to its name.
var Collations = map[uint8]string{
1: "big5_chinese_ci",
2: "latin2_czech_cs",
3: "dec8_swedish_ci",
4: "cp850_general_ci",
5: "latin1_german1_ci",
6: "hp8_english_ci",
7: "koi8r_general_ci",
8: "latin1_swedish_ci",
9: "latin2_general_ci",
10: "swe7_swedish_ci",
11: "ascii_general_ci",
12: "ujis_japanese_ci",
13: "sjis_japanese_ci",
14: "cp1251_bulgarian_ci",
15: "latin1_danish_ci",
16: "hebrew_general_ci",
18: "tis620_thai_ci",
19: "euckr_korean_ci",
20: "latin7_estonian_cs",
21: "latin2_hungarian_ci",
22: "koi8u_general_ci",
23: "cp1251_ukrainian_ci",
24: "gb2312_chinese_ci",
25: "greek_general_ci",
26: "cp1250_general_ci",
27: "latin2_croatian_ci",
28: "gbk_chinese_ci",
29: "cp1257_lithuanian_ci",
30: "latin5_turkish_ci",
31: "latin1_german2_ci",
32: "armscii8_general_ci",
33: "utf8_general_ci",
34: "cp1250_czech_cs",
35: "ucs2_general_ci",
36: "cp866_general_ci",
37: "keybcs2_general_ci",
38: "macce_general_ci",
39: "macroman_general_ci",
40: "cp852_general_ci",
41: "latin7_general_ci",
42: "latin7_general_cs",
43: "macce_bin",
44: "cp1250_croatian_ci",
45: "utf8mb4_general_ci",
46: "utf8mb4_bin",
47: "latin1_bin",
48: "latin1_general_ci",
49: "latin1_general_cs",
50: "cp1251_bin",
51: "cp1251_general_ci",
52: "cp1251_general_cs",
53: "macroman_bin",
54: "utf16_general_ci",
55: "utf16_bin",
56: "utf16le_general_ci",
57: "cp1256_general_ci",
58: "cp1257_bin",
59: "cp1257_general_ci",
60: "utf32_general_ci",
61: "utf32_bin",
62: "utf16le_bin",
63: "binary",
64: "armscii8_bin",
65: "ascii_bin",
66: "cp1250_bin",
67: "cp1256_bin",
68: "cp866_bin",
69: "dec8_bin",
70: "greek_bin",
71: "hebrew_bin",
72: "hp8_bin",
73: "keybcs2_bin",
74: "koi8r_bin",
75: "koi8u_bin",
77: "latin2_bin",
78: "latin5_bin",
79: "latin7_bin",
80: "cp850_bin",
81: "cp852_bin",
82: "swe7_bin",
83: "utf8_bin",
84: "big5_bin",
85: "euckr_bin",
86: "gb2312_bin",
87: "gbk_bin",
88: "sjis_bin",
89: "tis620_bin",
90: "ucs2_bin",
91: "ujis_bin",
92: "geostd8_general_ci",
93: "geostd8_bin",
94: "latin1_spanish_ci",
95: "cp932_japanese_ci",
96: "cp932_bin",
97: "eucjpms_japanese_ci",
98: "eucjpms_bin",
99: "cp1250_polish_ci",
101: "utf16_unicode_ci",
102: "utf16_icelandic_ci",
103: "utf16_latvian_ci",
104: "utf16_romanian_ci",
105: "utf16_slovenian_ci",
106: "utf16_polish_ci",
107: "utf16_estonian_ci",
108: "utf16_spanish_ci",
109: "utf16_swedish_ci",
110: "utf16_turkish_ci",
111: "utf16_czech_ci",
112: "utf16_danish_ci",
113: "utf16_lithuanian_ci",
114: "utf16_slovak_ci",
115: "utf16_spanish2_ci",
116: "utf16_roman_ci",
117: "utf16_persian_ci",
118: "utf16_esperanto_ci",
119: "utf16_hungarian_ci",
120: "utf16_sinhala_ci",
121: "utf16_german2_ci",
122: "utf16_croatian_ci",
123: "utf16_unicode_520_ci",
124: "utf16_vietnamese_ci",
128: "ucs2_unicode_ci",
129: "ucs2_icelandic_ci",
130: "ucs2_latvian_ci",
131: "ucs2_romanian_ci",
132: "ucs2_slovenian_ci",
133: "ucs2_polish_ci",
134: "ucs2_estonian_ci",
135: "ucs2_spanish_ci",
136: "ucs2_swedish_ci",
137: "ucs2_turkish_ci",
138: "ucs2_czech_ci",
139: "ucs2_danish_ci",
140: "ucs2_lithuanian_ci",
141: "ucs2_slovak_ci",
142: "ucs2_spanish2_ci",
143: "ucs2_roman_ci",
144: "ucs2_persian_ci",
145: "ucs2_esperanto_ci",
146: "ucs2_hungarian_ci",
147: "ucs2_sinhala_ci",
148: "ucs2_german2_ci",
149: "ucs2_croatian_ci",
150: "ucs2_unicode_520_ci",
151: "ucs2_vietnamese_ci",
159: "ucs2_general_mysql500_ci",
160: "utf32_unicode_ci",
161: "utf32_icelandic_ci",
162: "utf32_latvian_ci",
163: "utf32_romanian_ci",
164: "utf32_slovenian_ci",
165: "utf32_polish_ci",
166: "utf32_estonian_ci",
167: "utf32_spanish_ci",
168: "utf32_swedish_ci",
169: "utf32_turkish_ci",
170: "utf32_czech_ci",
171: "utf32_danish_ci",
172: "utf32_lithuanian_ci",
173: "utf32_slovak_ci",
174: "utf32_spanish2_ci",
175: "utf32_roman_ci",
176: "utf32_persian_ci",
177: "utf32_esperanto_ci",
178: "utf32_hungarian_ci",
179: "utf32_sinhala_ci",
180: "utf32_german2_ci",
181: "utf32_croatian_ci",
182: "utf32_unicode_520_ci",
183: "utf32_vietnamese_ci",
192: "utf8_unicode_ci",
193: "utf8_icelandic_ci",
194: "utf8_latvian_ci",
195: "utf8_romanian_ci",
196: "utf8_slovenian_ci",
197: "utf8_polish_ci",
198: "utf8_estonian_ci",
199: "utf8_spanish_ci",
200: "utf8_swedish_ci",
201: "utf8_turkish_ci",
202: "utf8_czech_ci",
203: "utf8_danish_ci",
204: "utf8_lithuanian_ci",
205: "utf8_slovak_ci",
206: "utf8_spanish2_ci",
207: "utf8_roman_ci",
208: "utf8_persian_ci",
209: "utf8_esperanto_ci",
210: "utf8_hungarian_ci",
211: "utf8_sinhala_ci",
212: "utf8_german2_ci",
213: "utf8_croatian_ci",
214: "utf8_unicode_520_ci",
215: "utf8_vietnamese_ci",
223: "utf8_general_mysql500_ci",
224: "utf8mb4_unicode_ci",
225: "utf8mb4_icelandic_ci",
226: "utf8mb4_latvian_ci",
227: "utf8mb4_romanian_ci",
228: "utf8mb4_slovenian_ci",
229: "utf8mb4_polish_ci",
230: "utf8mb4_estonian_ci",
231: "utf8mb4_spanish_ci",
232: "utf8mb4_swedish_ci",
233: "utf8mb4_turkish_ci",
234: "utf8mb4_czech_ci",
235: "utf8mb4_danish_ci",
236: "utf8mb4_lithuanian_ci",
237: "utf8mb4_slovak_ci",
238: "utf8mb4_spanish2_ci",
239: "utf8mb4_roman_ci",
240: "utf8mb4_persian_ci",
241: "utf8mb4_esperanto_ci",
242: "utf8mb4_hungarian_ci",
243: "utf8mb4_sinhala_ci",
244: "utf8mb4_german2_ci",
245: "utf8mb4_croatian_ci",
246: "utf8mb4_unicode_520_ci",
247: "utf8mb4_vietnamese_ci",
255: "utf8mb4_0900_ai_ci",
}
// CollationNames maps MySQL collation name to its ID
var CollationNames = map[string]uint8{
"big5_chinese_ci": 1,
"latin2_czech_cs": 2,
"dec8_swedish_ci": 3,
"cp850_general_ci": 4,
"latin1_german1_ci": 5,
"hp8_english_ci": 6,
"koi8r_general_ci": 7,
"latin1_swedish_ci": 8,
"latin2_general_ci": 9,
"swe7_swedish_ci": 10,
"ascii_general_ci": 11,
"ujis_japanese_ci": 12,
"sjis_japanese_ci": 13,
"cp1251_bulgarian_ci": 14,
"latin1_danish_ci": 15,
"hebrew_general_ci": 16,
"tis620_thai_ci": 18,
"euckr_korean_ci": 19,
"latin7_estonian_cs": 20,
"latin2_hungarian_ci": 21,
"koi8u_general_ci": 22,
"cp1251_ukrainian_ci": 23,
"gb2312_chinese_ci": 24,
"greek_general_ci": 25,
"cp1250_general_ci": 26,
"latin2_croatian_ci": 27,
"gbk_chinese_ci": 28,
"cp1257_lithuanian_ci": 29,
"latin5_turkish_ci": 30,
"latin1_german2_ci": 31,
"armscii8_general_ci": 32,
"utf8_general_ci": 33,
"cp1250_czech_cs": 34,
"ucs2_general_ci": 35,
"cp866_general_ci": 36,
"keybcs2_general_ci": 37,
"macce_general_ci": 38,
"macroman_general_ci": 39,
"cp852_general_ci": 40,
"latin7_general_ci": 41,
"latin7_general_cs": 42,
"macce_bin": 43,
"cp1250_croatian_ci": 44,
"utf8mb4_general_ci": 45,
"utf8mb4_bin": 46,
"latin1_bin": 47,
"latin1_general_ci": 48,
"latin1_general_cs": 49,
"cp1251_bin": 50,
"cp1251_general_ci": 51,
"cp1251_general_cs": 52,
"macroman_bin": 53,
"utf16_general_ci": 54,
"utf16_bin": 55,
"utf16le_general_ci": 56,
"cp1256_general_ci": 57,
"cp1257_bin": 58,
"cp1257_general_ci": 59,
"utf32_general_ci": 60,
"utf32_bin": 61,
"utf16le_bin": 62,
"binary": 63,
"armscii8_bin": 64,
"ascii_bin": 65,
"cp1250_bin": 66,
"cp1256_bin": 67,
"cp866_bin": 68,
"dec8_bin": 69,
"greek_bin": 70,
"hebrew_bin": 71,
"hp8_bin": 72,
"keybcs2_bin": 73,
"koi8r_bin": 74,
"koi8u_bin": 75,
"latin2_bin": 77,
"latin5_bin": 78,
"latin7_bin": 79,
"cp850_bin": 80,
"cp852_bin": 81,
"swe7_bin": 82,
"utf8_bin": 83,
"big5_bin": 84,
"euckr_bin": 85,
"gb2312_bin": 86,
"gbk_bin": 87,
"sjis_bin": 88,
"tis620_bin": 89,
"ucs2_bin": 90,
"ujis_bin": 91,
"geostd8_general_ci": 92,
"geostd8_bin": 93,
"latin1_spanish_ci": 94,
"cp932_japanese_ci": 95,
"cp932_bin": 96,
"eucjpms_japanese_ci": 97,
"eucjpms_bin": 98,
"cp1250_polish_ci": 99,
"utf16_unicode_ci": 101,
"utf16_icelandic_ci": 102,
"utf16_latvian_ci": 103,
"utf16_romanian_ci": 104,
"utf16_slovenian_ci": 105,
"utf16_polish_ci": 106,
"utf16_estonian_ci": 107,
"utf16_spanish_ci": 108,
"utf16_swedish_ci": 109,
"utf16_turkish_ci": 110,
"utf16_czech_ci": 111,
"utf16_danish_ci": 112,
"utf16_lithuanian_ci": 113,
"utf16_slovak_ci": 114,
"utf16_spanish2_ci": 115,
"utf16_roman_ci": 116,
"utf16_persian_ci": 117,
"utf16_esperanto_ci": 118,
"utf16_hungarian_ci": 119,
"utf16_sinhala_ci": 120,
"utf16_german2_ci": 121,
"utf16_croatian_ci": 122,
"utf16_unicode_520_ci": 123,
"utf16_vietnamese_ci": 124,
"ucs2_unicode_ci": 128,
"ucs2_icelandic_ci": 129,
"ucs2_latvian_ci": 130,
"ucs2_romanian_ci": 131,
"ucs2_slovenian_ci": 132,
"ucs2_polish_ci": 133,
"ucs2_estonian_ci": 134,
"ucs2_spanish_ci": 135,
"ucs2_swedish_ci": 136,
"ucs2_turkish_ci": 137,
"ucs2_czech_ci": 138,
"ucs2_danish_ci": 139,
"ucs2_lithuanian_ci": 140,
"ucs2_slovak_ci": 141,
"ucs2_spanish2_ci": 142,
"ucs2_roman_ci": 143,
"ucs2_persian_ci": 144,
"ucs2_esperanto_ci": 145,
"ucs2_hungarian_ci": 146,
"ucs2_sinhala_ci": 147,
"ucs2_german2_ci": 148,
"ucs2_croatian_ci": 149,
"ucs2_unicode_520_ci": 150,
"ucs2_vietnamese_ci": 151,
"ucs2_general_mysql500_ci": 159,
"utf32_unicode_ci": 160,
"utf32_icelandic_ci": 161,
"utf32_latvian_ci": 162,
"utf32_romanian_ci": 163,
"utf32_slovenian_ci": 164,
"utf32_polish_ci": 165,
"utf32_estonian_ci": 166,
"utf32_spanish_ci": 167,
"utf32_swedish_ci": 168,
"utf32_turkish_ci": 169,
"utf32_czech_ci": 170,
"utf32_danish_ci": 171,
"utf32_lithuanian_ci": 172,
"utf32_slovak_ci": 173,
"utf32_spanish2_ci": 174,
"utf32_roman_ci": 175,
"utf32_persian_ci": 176,
"utf32_esperanto_ci": 177,
"utf32_hungarian_ci": 178,
"utf32_sinhala_ci": 179,
"utf32_german2_ci": 180,
"utf32_croatian_ci": 181,
"utf32_unicode_520_ci": 182,
"utf32_vietnamese_ci": 183,
"utf8_unicode_ci": 192,
"utf8_icelandic_ci": 193,
"utf8_latvian_ci": 194,
"utf8_romanian_ci": 195,
"utf8_slovenian_ci": 196,
"utf8_polish_ci": 197,
"utf8_estonian_ci": 198,
"utf8_spanish_ci": 199,
"utf8_swedish_ci": 200,
"utf8_turkish_ci": 201,
"utf8_czech_ci": 202,
"utf8_danish_ci": 203,
"utf8_lithuanian_ci": 204,
"utf8_slovak_ci": 205,
"utf8_spanish2_ci": 206,
"utf8_roman_ci": 207,
"utf8_persian_ci": 208,
"utf8_esperanto_ci": 209,
"utf8_hungarian_ci": 210,
"utf8_sinhala_ci": 211,
"utf8_german2_ci": 212,
"utf8_croatian_ci": 213,
"utf8_unicode_520_ci": 214,
"utf8_vietnamese_ci": 215,
"utf8_general_mysql500_ci": 223,
"utf8mb4_unicode_ci": 224,
"utf8mb4_icelandic_ci": 225,
"utf8mb4_latvian_ci": 226,
"utf8mb4_romanian_ci": 227,
"utf8mb4_slovenian_ci": 228,
"utf8mb4_polish_ci": 229,
"utf8mb4_estonian_ci": 230,
"utf8mb4_spanish_ci": 231,
"utf8mb4_swedish_ci": 232,
"utf8mb4_turkish_ci": 233,
"utf8mb4_czech_ci": 234,
"utf8mb4_danish_ci": 235,
"utf8mb4_lithuanian_ci": 236,
"utf8mb4_slovak_ci": 237,
"utf8mb4_spanish2_ci": 238,
"utf8mb4_roman_ci": 239,
"utf8mb4_persian_ci": 240,
"utf8mb4_esperanto_ci": 241,
"utf8mb4_hungarian_ci": 242,
"utf8mb4_sinhala_ci": 243,
"utf8mb4_german2_ci": 244,
"utf8mb4_croatian_ci": 245,
"utf8mb4_unicode_520_ci": 246,
"utf8mb4_vietnamese_ci": 247,
"utf8mb4_0900_ai_ci": 255,
}
// MySQL collation information.
const (
UTF8Charset = "utf8"
UTF8MB4Charset = "utf8mb4"
DefaultCharset = UTF8MB4Charset
// DefaultCollationID is utf8mb4_bin(46)
DefaultCollationID = 46
Latin1DefaultCollationID = 47
ASCIIDefaultCollationID = 65
UTF8DefaultCollationID = 83
UTF8MB4DefaultCollationID = 46
BinaryDefaultCollationID = 63
UTF8DefaultCollation = "utf8_bin"
UTF8MB4DefaultCollation = "utf8mb4_bin"
DefaultCollationName = UTF8MB4DefaultCollation
// MaxBytesOfCharacter, is the max bytes length of a character,
// refer to RFC3629, in UTF-8, characters from the U+0000..U+10FFFF range
// (the UTF-16 accessible range) are encoded using sequences of 1 to 4 octets.
MaxBytesOfCharacter = 4
)
// IsUTF8Charset checks if charset is utf8 or utf8mb4
func IsUTF8Charset(charset string) bool {
return charset == UTF8Charset || charset == UTF8MB4Charset
}
// RangeGraph defines valid unicode characters to use in column names. It strictly follows MySQL's definition.
// See #3994.
var RangeGraph = []*unicode.RangeTable{
// _MY_PNT
unicode.No,
unicode.Mn,
unicode.Me,
unicode.Pc,
unicode.Pd,
unicode.Pd,
unicode.Ps,
unicode.Pe,
unicode.Pi,
unicode.Pf,
unicode.Po,
unicode.Sm,
unicode.Sc,
unicode.Sk,
unicode.So,
// _MY_U
unicode.Lu,
unicode.Lt,
unicode.Nl,
// _MY_L
unicode.Ll,
unicode.Lm,
unicode.Lo,
unicode.Nl,
unicode.Mn,
unicode.Mc,
unicode.Me,
// _MY_NMR
unicode.Nd,
unicode.Nl,
unicode.No,
}
1
https://gitee.com/jiangplus/tidb-parser.git
git@gitee.com:jiangplus/tidb-parser.git
jiangplus
tidb-parser
tidb-parser
v3.1.2

搜索帮助