Ai
1 Star 0 Fork 0

鹏程/studygolang

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
string.go 6.16 KB
一键复制 编辑 原始数据 按行查看 历史
xuxinhua 提交于 2017-09-14 13:51 +08:00 . 搜索结果 html 过滤
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package utf8string provides an efficient way to index strings by rune rather than by byte.
package util
import (
"errors"
"strings"
"unicode"
"unicode/utf8"
"github.com/polaris1119/goutils"
)
// String wraps a regular string with a small structure that provides more
// efficient indexing by code point index, as opposed to byte index.
// Scanning incrementally forwards or backwards is O(1) per index operation
// (although not as fast a range clause going forwards). Random access is
// O(N) in the length of the string, but the overhead is less than always
// scanning from the beginning.
// If the string is ASCII, random access is O(1).
// Unlike the built-in string type, String has internal mutable state and
// is not thread-safe.
type String struct {
str string
numRunes int
// If width > 0, the rune at runePos starts at bytePos and has the specified width.
width int
bytePos int
runePos int
nonASCII int // byte index of the first non-ASCII rune.
}
// NewString returns a new UTF-8 string with the provided contents.
func NewString(contents string) *String {
return new(String).Init(contents)
}
// Init initializes an existing String to hold the provided contents.
// It returns a pointer to the initialized String.
func (s *String) Init(contents string) *String {
s.str = contents
s.bytePos = 0
s.runePos = 0
for i := 0; i < len(contents); i++ {
if contents[i] >= utf8.RuneSelf {
// Not ASCII.
s.numRunes = utf8.RuneCountInString(contents)
_, s.width = utf8.DecodeRuneInString(contents)
s.nonASCII = i
return s
}
}
// ASCII is simple. Also, the empty string is ASCII.
s.numRunes = len(contents)
s.width = 0
s.nonASCII = len(contents)
return s
}
// String returns the contents of the String. This method also means the
// String is directly printable by fmt.Print.
func (s *String) String() string {
return s.str
}
// RuneCount returns the number of runes (Unicode code points) in the String.
func (s *String) RuneCount() int {
return s.numRunes
}
// IsASCII returns a boolean indicating whether the String contains only ASCII bytes.
func (s *String) IsASCII() bool {
return s.width == 0
}
// Slice returns the string sliced at rune positions [i:j].
func (s *String) Slice(i, j int) string {
// ASCII is easy. Let the compiler catch the indexing error if there is one.
if j < s.nonASCII {
return s.str[i:j]
}
if i < 0 || j > s.numRunes || i > j {
panic(sliceOutOfRange)
}
if i == j {
return ""
}
// For non-ASCII, after At(i), bytePos is always the position of the indexed character.
var low, high int
switch {
case i < s.nonASCII:
low = i
case i == s.numRunes:
low = len(s.str)
default:
s.At(i)
low = s.bytePos
}
switch {
case j == s.numRunes:
high = len(s.str)
default:
s.At(j)
high = s.bytePos
}
return s.str[low:high]
}
// At returns the rune with index i in the String. The sequence of runes is the same
// as iterating over the contents with a "for range" clause.
func (s *String) At(i int) rune {
// ASCII is easy. Let the compiler catch the indexing error if there is one.
if i < s.nonASCII {
return rune(s.str[i])
}
// Now we do need to know the index is valid.
if i < 0 || i >= s.numRunes {
panic(outOfRange)
}
var r rune
// Five easy common cases: within 1 spot of bytePos/runePos, or the beginning, or the end.
// With these cases, all scans from beginning or end work in O(1) time per rune.
switch {
case i == s.runePos-1: // backing up one rune
r, s.width = utf8.DecodeLastRuneInString(s.str[0:s.bytePos])
s.runePos = i
s.bytePos -= s.width
return r
case i == s.runePos+1: // moving ahead one rune
s.runePos = i
s.bytePos += s.width
fallthrough
case i == s.runePos:
r, s.width = utf8.DecodeRuneInString(s.str[s.bytePos:])
return r
case i == 0: // start of string
r, s.width = utf8.DecodeRuneInString(s.str)
s.runePos = 0
s.bytePos = 0
return r
case i == s.numRunes-1: // last rune in string
r, s.width = utf8.DecodeLastRuneInString(s.str)
s.runePos = i
s.bytePos = len(s.str) - s.width
return r
}
// We need to do a linear scan. There are three places to start from:
// 1) The beginning
// 2) bytePos/runePos.
// 3) The end
// Choose the closest in rune count, scanning backwards if necessary.
forward := true
if i < s.runePos {
// Between beginning and pos. Which is closer?
// Since both i and runePos are guaranteed >= nonASCII, that's the
// lowest location we need to start from.
if i < (s.runePos-s.nonASCII)/2 {
// Scan forward from beginning
s.bytePos, s.runePos = s.nonASCII, s.nonASCII
} else {
// Scan backwards from where we are
forward = false
}
} else {
// Between pos and end. Which is closer?
if i-s.runePos < (s.numRunes-s.runePos)/2 {
// Scan forward from pos
} else {
// Scan backwards from end
s.bytePos, s.runePos = len(s.str), s.numRunes
forward = false
}
}
if forward {
// TODO: Is it much faster to use a range loop for this scan?
for {
r, s.width = utf8.DecodeRuneInString(s.str[s.bytePos:])
if s.runePos == i {
break
}
s.runePos++
s.bytePos += s.width
}
} else {
for {
r, s.width = utf8.DecodeLastRuneInString(s.str[0:s.bytePos])
s.runePos--
s.bytePos -= s.width
if s.runePos == i {
break
}
}
}
return r
}
// 求子串
func Substring(str string, length int, suffix string) string {
if length >= len(str) {
return str
}
utf8Str := NewString(str)
if length > utf8Str.RuneCount() {
return str
}
return utf8Str.Slice(0, length) + suffix
}
var outOfRange = errors.New("utf8string: index out of range")
var sliceOutOfRange = errors.New("utf8string: slice index out of range")
// 驼峰式写法转为下划线写法
func UnderscoreName(name string) string {
buffer := goutils.NewBuffer()
for i, r := range name {
if unicode.IsUpper(r) {
if i != 0 {
buffer.Append('_')
}
buffer.Append(unicode.ToLower(r))
} else {
buffer.Append(r)
}
}
return buffer.String()
}
func SafeHtml(s string) string {
r := strings.NewReplacer("<input", "&lt;input", "<a ", "&lt; a")
return r.Replace(s)
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Go
1
https://gitee.com/barry_zpc/studygolang.git
git@gitee.com:barry_zpc/studygolang.git
barry_zpc
studygolang
studygolang
master

搜索帮助