代码拉取完成,页面将自动刷新
package s
import (
"bytes"
"fmt"
"io/ioutil"
"unicode/utf16"
"unicode/utf8"
"github.com/saintfish/chardet"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/encoding/traditionalchinese"
"golang.org/x/text/transform"
)
var (
ERR_UTF16_TO_UTF8 = "Must have even length byte slice"
TP_ISO_8859_1 = "ISO-8859-1"
TP_GB_18030 = "GB-18030"
TP_BIG_5 = "Big5"
TP_UTF16 = "UTF-16LE"
TP_UTF8 = "UTF-8"
)
func AutoDecode(fromBytes []byte) (utfStr Str, err error) {
buf, err := AutoEncodeToUTF8(fromBytes)
if err != nil {
return "", err
}
return Str(buf), nil
}
func AutoEncodeToUTF8(output []byte) ([]byte, error) {
detector := chardet.NewTextDetector()
result, err := detector.DetectBest(output)
if err != nil {
return nil, err
}
switch result.Charset {
case TP_UTF8:
return output, nil
case TP_ISO_8859_1:
return output, nil
case TP_GB_18030:
output, err = GbkToUtf8(output)
case TP_BIG_5:
output, err = Big5ToUtf8(output)
case TP_UTF16:
output, err = UTF16ToUTF8(output)
default:
output, err = GbkToUtf8(output)
if err != nil {
output, err = UTF16ToUTF8(output)
}
}
return output, err
}
func GbkToUtf8(s []byte) ([]byte, error) {
reader := transform.NewReader(bytes.NewReader(s), simplifiedchinese.GBK.NewDecoder())
d, e := ioutil.ReadAll(reader)
if e != nil {
return nil, e
}
return d, nil
}
func Utf8ToGbk(s []byte) ([]byte, error) {
reader := transform.NewReader(bytes.NewReader(s), simplifiedchinese.GBK.NewEncoder())
d, e := ioutil.ReadAll(reader)
if e != nil {
return nil, e
}
return d, nil
}
//convert BIG5 to UTF-8
func Big5ToUtf8(s []byte) ([]byte, error) {
I := bytes.NewReader(s)
O := transform.NewReader(I, traditionalchinese.Big5.NewDecoder())
d, e := ioutil.ReadAll(O)
if e != nil {
return nil, e
}
return d, nil
}
func UTF16ToUTF8(b []byte) ([]byte, error) {
if len(b)%2 != 0 {
return nil, fmt.Errorf(ERR_UTF16_TO_UTF8)
}
u16s := make([]uint16, 1)
ret := &bytes.Buffer{}
b8buf := make([]byte, 4)
lb := len(b)
for i := 0; i < lb; i += 2 {
u16s[0] = uint16(b[i]) + (uint16(b[i+1]) << 8)
r := utf16.Decode(u16s)
n := utf8.EncodeRune(b8buf, r[0])
ret.Write(b8buf[:n])
}
return ret.Bytes(), nil
}
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。