1 Star 0 Fork 0

AllyDale / charset

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
kuten.go 2.11 KB
一键复制 编辑 原始数据 按行查看 历史
AllyDale 提交于 2017-10-27 14:41 . initial commit
package mahonia
import (
"sync"
"unicode/utf8"
)
// A kutenTable holds the data for a double-byte character set, arranged by ku
// (区, zone) and ten (点, position). These can be converted to various actual
// encoding schemes.
type kutenTable struct {
// Data[ku][ten] is the unicode value for the character at that zone and
// position.
Data [94][94]uint16
// FromUnicode holds the ku and ten for each Unicode code point.
// It is not available until Reverse() has been called.
FromUnicode [][2]byte
// once is used to synchronize the generation of FromUnicode.
once sync.Once
}
// Reverse generates FromUnicode.
func (t *kutenTable) Reverse() {
t.once.Do(func() {
t.FromUnicode = make([][2]byte, 65536)
for ku := range t.Data {
for ten, unicode := range t.Data[ku] {
t.FromUnicode[unicode] = [2]byte{byte(ku), byte(ten)}
}
}
})
}
// DecodeLow decodes a character from an encoding that does not have the high
// bit set.
func (t *kutenTable) DecodeLow(p []byte) (c rune, size int, status Status) {
if len(p) < 2 {
return 0, 0, NO_ROOM
}
ku := p[0] - 0x21
ten := p[1] - 0x21
if ku > 93 || ten > 93 {
return utf8.RuneError, 1, INVALID_CHAR
}
u := t.Data[ku][ten]
if u == 0 {
return utf8.RuneError, 1, INVALID_CHAR
}
return rune(u), 2, SUCCESS
}
// DecodeHigh decodes a character from an encoding that has the high bit set.
func (t *kutenTable) DecodeHigh(p []byte) (c rune, size int, status Status) {
if len(p) < 2 {
return 0, 0, NO_ROOM
}
ku := p[0] - 0xa1
ten := p[1] - 0xa1
if ku > 93 || ten > 93 {
return utf8.RuneError, 1, INVALID_CHAR
}
u := t.Data[ku][ten]
if u == 0 {
return utf8.RuneError, 1, INVALID_CHAR
}
return rune(u), 2, SUCCESS
}
// EncodeHigh encodes a character in an encoding that has the high bit set.
func (t *kutenTable) EncodeHigh(p []byte, c rune) (size int, status Status) {
if len(p) < 2 {
return 0, NO_ROOM
}
if c > 0xffff {
p[0] = '?'
return 1, INVALID_CHAR
}
kuten := t.FromUnicode[c]
if kuten == [2]byte{0, 0} && c != rune(t.Data[0][0]) {
p[0] = '?'
return 1, INVALID_CHAR
}
p[0] = kuten[0] + 0xa1
p[1] = kuten[1] + 0xa1
return 2, SUCCESS
}
Go
1
https://gitee.com/vipally/charset.git
git@gitee.com:vipally/charset.git
vipally
charset
charset
3053f564123c

搜索帮助