1 Star 0 Fork 1

王布衣 / pkg

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
string_unicode.go 12.03 KB
一键复制 编辑 原始数据 按行查看 历史
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619
package goja
import (
"errors"
"hash/maphash"
"io"
"math"
"reflect"
"strings"
"unicode/utf16"
"unicode/utf8"
"gitee.com/quant1x/pkg/goja/parser"
"gitee.com/quant1x/pkg/goja/unistring"
"golang.org/x/text/cases"
"golang.org/x/text/language"
)
type unicodeString []uint16
type unicodeRuneReader struct {
s unicodeString
pos int
}
type utf16RuneReader struct {
s unicodeString
pos int
}
// passes through invalid surrogate pairs
type lenientUtf16Decoder struct {
utf16Reader utf16Reader
prev uint16
prevSet bool
}
// StringBuilder serves similar purpose to strings.Builder, except it works with ECMAScript String.
// Use it to efficiently build 'native' ECMAScript values that either contain invalid UTF-16 surrogate pairs
// (and therefore cannot be represented as UTF-8) or never expected to be exported to Go. See also
// StringFromUTF16.
type StringBuilder struct {
asciiBuilder strings.Builder
unicodeBuilder unicodeStringBuilder
}
type unicodeStringBuilder struct {
buf []uint16
unicode bool
}
var (
InvalidRuneError = errors.New("invalid rune")
)
func (rr *utf16RuneReader) readChar() (c uint16, err error) {
if rr.pos < len(rr.s) {
c = rr.s[rr.pos]
rr.pos++
return
}
err = io.EOF
return
}
func (rr *utf16RuneReader) ReadRune() (r rune, size int, err error) {
if rr.pos < len(rr.s) {
r = rune(rr.s[rr.pos])
rr.pos++
size = 1
return
}
err = io.EOF
return
}
func (rr *lenientUtf16Decoder) ReadRune() (r rune, size int, err error) {
var c uint16
if rr.prevSet {
c = rr.prev
rr.prevSet = false
} else {
c, err = rr.utf16Reader.readChar()
if err != nil {
return
}
}
size = 1
if isUTF16FirstSurrogate(c) {
second, err1 := rr.utf16Reader.readChar()
if err1 != nil {
if err1 != io.EOF {
err = err1
} else {
r = rune(c)
}
return
}
if isUTF16SecondSurrogate(second) {
r = utf16.DecodeRune(rune(c), rune(second))
size++
return
} else {
rr.prev = second
rr.prevSet = true
}
}
r = rune(c)
return
}
func (rr *unicodeRuneReader) ReadRune() (r rune, size int, err error) {
if rr.pos < len(rr.s) {
c := rr.s[rr.pos]
size++
rr.pos++
if isUTF16FirstSurrogate(c) {
if rr.pos < len(rr.s) {
second := rr.s[rr.pos]
if isUTF16SecondSurrogate(second) {
r = utf16.DecodeRune(rune(c), rune(second))
size++
rr.pos++
return
}
}
err = InvalidRuneError
} else if isUTF16SecondSurrogate(c) {
err = InvalidRuneError
}
r = rune(c)
} else {
err = io.EOF
}
return
}
func (b *unicodeStringBuilder) Grow(n int) {
if len(b.buf) == 0 {
n++
}
if cap(b.buf)-len(b.buf) < n {
buf := make([]uint16, len(b.buf), 2*cap(b.buf)+n)
copy(buf, b.buf)
b.buf = buf
}
}
func (b *unicodeStringBuilder) ensureStarted(initialSize int) {
b.Grow(initialSize)
if len(b.buf) == 0 {
b.buf = append(b.buf, unistring.BOM)
}
}
// assumes already started
func (b *unicodeStringBuilder) writeString(s String) {
a, u := devirtualizeString(s)
if u != nil {
b.buf = append(b.buf, u[1:]...)
b.unicode = true
} else {
for i := 0; i < len(a); i++ {
b.buf = append(b.buf, uint16(a[i]))
}
}
}
func (b *unicodeStringBuilder) String() String {
if b.unicode {
return unicodeString(b.buf)
}
if len(b.buf) < 2 {
return stringEmpty
}
buf := make([]byte, 0, len(b.buf)-1)
for _, c := range b.buf[1:] {
buf = append(buf, byte(c))
}
return asciiString(buf)
}
func (b *unicodeStringBuilder) WriteRune(r rune) {
b.ensureStarted(2)
b.writeRuneFast(r)
}
// assumes already started
func (b *unicodeStringBuilder) writeRuneFast(r rune) {
if r <= 0xFFFF {
b.buf = append(b.buf, uint16(r))
if !b.unicode && r >= utf8.RuneSelf {
b.unicode = true
}
} else {
first, second := utf16.EncodeRune(r)
b.buf = append(b.buf, uint16(first), uint16(second))
b.unicode = true
}
}
func (b *unicodeStringBuilder) writeASCIIString(bytes string) {
for _, c := range bytes {
b.buf = append(b.buf, uint16(c))
}
}
func (b *unicodeStringBuilder) writeUnicodeString(str unicodeString) {
b.buf = append(b.buf, str[1:]...)
b.unicode = true
}
func (b *StringBuilder) ascii() bool {
return len(b.unicodeBuilder.buf) == 0
}
func (b *StringBuilder) WriteString(s String) {
a, u := devirtualizeString(s)
if u != nil {
b.switchToUnicode(u.Length())
b.unicodeBuilder.writeUnicodeString(u)
} else {
if b.ascii() {
b.asciiBuilder.WriteString(string(a))
} else {
b.unicodeBuilder.writeASCIIString(string(a))
}
}
}
func (b *StringBuilder) WriteUTF8String(s string) {
firstUnicodeIdx := 0
if b.ascii() {
for i := 0; i < len(s); i++ {
if s[i] >= utf8.RuneSelf {
b.switchToUnicode(len(s))
b.unicodeBuilder.writeASCIIString(s[:i])
firstUnicodeIdx = i
goto unicode
}
}
b.asciiBuilder.WriteString(s)
return
}
unicode:
for _, r := range s[firstUnicodeIdx:] {
b.unicodeBuilder.writeRuneFast(r)
}
}
func (b *StringBuilder) writeASCII(s string) {
if b.ascii() {
b.asciiBuilder.WriteString(s)
} else {
b.unicodeBuilder.writeASCIIString(s)
}
}
func (b *StringBuilder) WriteRune(r rune) {
if r < utf8.RuneSelf {
if b.ascii() {
b.asciiBuilder.WriteByte(byte(r))
} else {
b.unicodeBuilder.writeRuneFast(r)
}
} else {
var extraLen int
if r <= 0xFFFF {
extraLen = 1
} else {
extraLen = 2
}
b.switchToUnicode(extraLen)
b.unicodeBuilder.writeRuneFast(r)
}
}
func (b *StringBuilder) String() String {
if b.ascii() {
return asciiString(b.asciiBuilder.String())
}
return b.unicodeBuilder.String()
}
func (b *StringBuilder) Grow(n int) {
if b.ascii() {
b.asciiBuilder.Grow(n)
} else {
b.unicodeBuilder.Grow(n)
}
}
// LikelyUnicode hints to the builder that the resulting string is likely to contain Unicode (non-ASCII) characters.
// The argument is an extra capacity (in characters) to reserve on top of the current length (it's like calling
// Grow() afterwards).
// This method may be called at any point (not just when the buffer is empty), although for efficiency it should
// be called as early as possible.
func (b *StringBuilder) LikelyUnicode(extraLen int) {
b.switchToUnicode(extraLen)
}
func (b *StringBuilder) switchToUnicode(extraLen int) {
if b.ascii() {
c := b.asciiBuilder.Cap()
newCap := b.asciiBuilder.Len() + extraLen
if newCap < c {
newCap = c
}
b.unicodeBuilder.ensureStarted(newCap)
b.unicodeBuilder.writeASCIIString(b.asciiBuilder.String())
b.asciiBuilder.Reset()
}
}
func (b *StringBuilder) WriteSubstring(source String, start int, end int) {
a, us := devirtualizeString(source)
if us == nil {
if b.ascii() {
b.asciiBuilder.WriteString(string(a[start:end]))
} else {
b.unicodeBuilder.writeASCIIString(string(a[start:end]))
}
return
}
if b.ascii() {
uc := false
for i := start; i < end; i++ {
if us.CharAt(i) >= utf8.RuneSelf {
uc = true
break
}
}
if uc {
b.switchToUnicode(end - start + 1)
} else {
b.asciiBuilder.Grow(end - start + 1)
for i := start; i < end; i++ {
b.asciiBuilder.WriteByte(byte(us.CharAt(i)))
}
return
}
}
b.unicodeBuilder.buf = append(b.unicodeBuilder.buf, us[start+1:end+1]...)
b.unicodeBuilder.unicode = true
}
func (s unicodeString) Reader() io.RuneReader {
return &unicodeRuneReader{
s: s[1:],
}
}
func (s unicodeString) utf16Reader() utf16Reader {
return &utf16RuneReader{
s: s[1:],
}
}
func (s unicodeString) utf16RuneReader() io.RuneReader {
return &utf16RuneReader{
s: s[1:],
}
}
func (s unicodeString) utf16Runes() []rune {
runes := make([]rune, len(s)-1)
for i, ch := range s[1:] {
runes[i] = rune(ch)
}
return runes
}
func (s unicodeString) ToInteger() int64 {
return 0
}
func (s unicodeString) toString() String {
return s
}
func (s unicodeString) ToString() Value {
return s
}
func (s unicodeString) ToFloat() float64 {
return math.NaN()
}
func (s unicodeString) ToBoolean() bool {
return len(s) > 0
}
func (s unicodeString) toTrimmedUTF8() string {
if len(s) == 0 {
return ""
}
return strings.Trim(s.String(), parser.WhitespaceChars)
}
func (s unicodeString) ToNumber() Value {
return asciiString(s.toTrimmedUTF8()).ToNumber()
}
func (s unicodeString) ToObject(r *Runtime) *Object {
return r._newString(s, r.getStringPrototype())
}
func (s unicodeString) equals(other unicodeString) bool {
if len(s) != len(other) {
return false
}
for i, r := range s {
if r != other[i] {
return false
}
}
return true
}
func (s unicodeString) SameAs(other Value) bool {
return s.StrictEquals(other)
}
func (s unicodeString) Equals(other Value) bool {
if s.StrictEquals(other) {
return true
}
if o, ok := other.(*Object); ok {
return s.Equals(o.toPrimitive())
}
return false
}
func (s unicodeString) StrictEquals(other Value) bool {
if otherStr, ok := other.(unicodeString); ok {
return s.equals(otherStr)
}
if otherStr, ok := other.(*importedString); ok {
otherStr.ensureScanned()
if otherStr.u != nil {
return s.equals(otherStr.u)
}
}
return false
}
func (s unicodeString) baseObject(r *Runtime) *Object {
ss := r.getStringSingleton()
ss.value = s
ss.setLength()
return ss.val
}
func (s unicodeString) CharAt(idx int) uint16 {
return s[idx+1]
}
func (s unicodeString) Length() int {
return len(s) - 1
}
func (s unicodeString) Concat(other String) String {
a, u := devirtualizeString(other)
if u != nil {
b := make(unicodeString, len(s)+len(u)-1)
copy(b, s)
copy(b[len(s):], u[1:])
return b
}
b := make([]uint16, len(s)+len(a))
copy(b, s)
b1 := b[len(s):]
for i := 0; i < len(a); i++ {
b1[i] = uint16(a[i])
}
return unicodeString(b)
}
func (s unicodeString) Substring(start, end int) String {
ss := s[start+1 : end+1]
for _, c := range ss {
if c >= utf8.RuneSelf {
b := make(unicodeString, end-start+1)
b[0] = unistring.BOM
copy(b[1:], ss)
return b
}
}
as := make([]byte, end-start)
for i, c := range ss {
as[i] = byte(c)
}
return asciiString(as)
}
func (s unicodeString) String() string {
return string(utf16.Decode(s[1:]))
}
func (s unicodeString) CompareTo(other String) int {
// TODO handle invalid UTF-16
return strings.Compare(s.String(), other.String())
}
func (s unicodeString) index(substr String, start int) int {
var ss []uint16
a, u := devirtualizeString(substr)
if u != nil {
ss = u[1:]
} else {
ss = make([]uint16, len(a))
for i := 0; i < len(a); i++ {
ss[i] = uint16(a[i])
}
}
s1 := s[1:]
// TODO: optimise
end := len(s1) - len(ss)
for start <= end {
for i := 0; i < len(ss); i++ {
if s1[start+i] != ss[i] {
goto nomatch
}
}
return start
nomatch:
start++
}
return -1
}
func (s unicodeString) lastIndex(substr String, start int) int {
var ss []uint16
a, u := devirtualizeString(substr)
if u != nil {
ss = u[1:]
} else {
ss = make([]uint16, len(a))
for i := 0; i < len(a); i++ {
ss[i] = uint16(a[i])
}
}
s1 := s[1:]
if maxStart := len(s1) - len(ss); start > maxStart {
start = maxStart
}
// TODO: optimise
for start >= 0 {
for i := 0; i < len(ss); i++ {
if s1[start+i] != ss[i] {
goto nomatch
}
}
return start
nomatch:
start--
}
return -1
}
func unicodeStringFromRunes(r []rune) unicodeString {
return unistring.NewFromRunes(r).AsUtf16()
}
func toLower(s string) String {
caser := cases.Lower(language.Und)
r := []rune(caser.String(s))
// Workaround
ascii := true
for i := 0; i < len(r)-1; i++ {
if (i == 0 || r[i-1] != 0x3b1) && r[i] == 0x345 && r[i+1] == 0x3c2 {
i++
r[i] = 0x3c3
}
if r[i] >= utf8.RuneSelf {
ascii = false
}
}
if ascii {
ascii = r[len(r)-1] < utf8.RuneSelf
}
if ascii {
return asciiString(r)
}
return unicodeStringFromRunes(r)
}
func (s unicodeString) toLower() String {
return toLower(s.String())
}
func (s unicodeString) toUpper() String {
caser := cases.Upper(language.Und)
return newStringValue(caser.String(s.String()))
}
func (s unicodeString) Export() interface{} {
return s.String()
}
func (s unicodeString) ExportType() reflect.Type {
return reflectTypeString
}
func (s unicodeString) hash(hash *maphash.Hash) uint64 {
_, _ = hash.WriteString(string(unistring.FromUtf16(s)))
h := hash.Sum64()
hash.Reset()
return h
}
func (s unicodeString) string() unistring.String {
return unistring.FromUtf16(s)
}
1
https://gitee.com/quant1x/pkg.git
git@gitee.com:quant1x/pkg.git
quant1x
pkg
pkg
v0.2.8

搜索帮助

53164aa7 5694891 3bd8fe86 5694891