15 Star 70 Fork 16

He3DB/He3Proxy

Create your Gitee Account
Explore and code with more than 12 million developers,Free private repositories !:)
Sign up
Clone or Download
token.go 9.94 KB
Copy Edit Raw Blame History
wangyao authored 2022-08-30 16:43 . improve: change repo addr
// Copyright 2012, Google Inc. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Copyright 2016 The kingshard Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"): you may
// not use this file except in compliance with the License. You may obtain
// a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
package sqlparser
import (
"bytes"
"fmt"
"strings"
"gitee.com/he3db/he3proxy/sqltypes"
)
const EOFCHAR = 0x100
// Tokenizer is the struct used to generate SQL
// tokens for the parser.
type Tokenizer struct {
InStream *strings.Reader
AllowComments bool
ForceEOF bool
lastChar uint16
Position int
errorToken []byte
LastError string
posVarIndex int
ParseTree Statement
}
// NewStringTokenizer creates a new Tokenizer for the
// sql string.
func NewStringTokenizer(sql string) *Tokenizer {
return &Tokenizer{InStream: strings.NewReader(sql)}
}
var keywords = map[string]int{
"select": SELECT,
"insert": INSERT,
"update": UPDATE,
"delete": DELETE,
"from": FROM,
"where": WHERE,
"group": GROUP,
"having": HAVING,
"order": ORDER,
"by": BY,
"limit": LIMIT,
"for": FOR,
"union": UNION,
"all": ALL,
"minus": MINUS,
"except": EXCEPT,
"intersect": INTERSECT,
"join": JOIN,
"straight_join": STRAIGHT_JOIN,
"left": LEFT,
"right": RIGHT,
"inner": INNER,
"outer": OUTER,
"cross": CROSS,
"natural": NATURAL,
"use": USE,
"force": FORCE,
"on": ON,
"into": INTO,
"distinct": DISTINCT,
"case": CASE,
"when": WHEN,
"then": THEN,
"else": ELSE,
"end": END,
"as": AS,
"and": AND,
"or": OR,
"not": NOT,
"exists": EXISTS,
"in": IN,
"is": IS,
"like": LIKE,
"between": BETWEEN,
"null": NULL,
"asc": ASC,
"desc": DESC,
"values": VALUES,
"duplicate": DUPLICATE,
"key": KEY,
"default": DEFAULT,
"set": SET,
"lock": LOCK,
"create": CREATE,
"alter": ALTER,
"rename": RENAME,
"drop": DROP,
"table": TABLE,
"index": INDEX,
"view": VIEW,
"to": TO,
"ignore": IGNORE,
"if": IF,
"unique": UNIQUE,
"using": USING,
"begin": BEGIN,
"rollback": ROLLBACK,
"commit": COMMIT,
"names": NAMES,
"replace": REPLACE,
//for kingshard
"admin": ADMIN,
"help": HELP,
"start": START,
"transaction": TRANSACTION,
"collate": COLLATE,
"offset": OFFSET,
"truncate": TRUNCATE,
}
// Lex returns the next token form the Tokenizer.
// This function is used by go yacc.
func (tkn *Tokenizer) Lex(lval *yySymType) int {
typ, val := tkn.Scan()
for typ == COMMENT {
if tkn.AllowComments {
break
}
typ, val = tkn.Scan()
}
switch typ {
case ID, STRING, NUMBER, VALUE_ARG, COMMENT:
lval.bytes = val
}
tkn.errorToken = val
return typ
}
// Error is called by go yacc if there's a parsing error.
func (tkn *Tokenizer) Error(err string) {
buf := bytes.NewBuffer(make([]byte, 0, 32))
if tkn.errorToken != nil {
fmt.Fprintf(buf, "%s at position %v near %s", err, tkn.Position, tkn.errorToken)
} else {
fmt.Fprintf(buf, "%s at position %v", err, tkn.Position)
}
tkn.LastError = buf.String()
}
// Scan scans the tokenizer for the next token and returns
// the token type and an optional value.
func (tkn *Tokenizer) Scan() (int, []byte) {
if tkn.ForceEOF {
return 0, nil
}
if tkn.lastChar == 0 {
tkn.next()
}
tkn.skipBlank()
switch ch := tkn.lastChar; {
case isLetter(ch):
return tkn.scanIdentifier()
case isDigit(ch):
return tkn.scanNumber(false)
case ch == ':':
return tkn.scanBindVar()
default:
tkn.next()
switch ch {
case EOFCHAR:
return 0, nil
case '=', ',', ';', '(', ')', '+', '*', '%', '&', '|', '^', '~':
return int(ch), nil
case '?':
tkn.posVarIndex++
buf := new(bytes.Buffer)
fmt.Fprintf(buf, ":v%d", tkn.posVarIndex)
return VALUE_ARG, buf.Bytes()
case '.':
if isDigit(tkn.lastChar) {
return tkn.scanNumber(true)
} else {
return int(ch), nil
}
case '/':
switch tkn.lastChar {
case '/':
tkn.next()
return tkn.scanCommentType1("//")
case '*':
tkn.next()
return tkn.scanCommentType2()
default:
return int(ch), nil
}
case '-':
if tkn.lastChar == '-' {
tkn.next()
return tkn.scanCommentType1("--")
} else {
return int(ch), nil
}
case '<':
switch tkn.lastChar {
case '>':
tkn.next()
return NE, nil
case '=':
tkn.next()
switch tkn.lastChar {
case '>':
tkn.next()
return NULL_SAFE_EQUAL, nil
default:
return LE, nil
}
default:
return int(ch), nil
}
case '>':
if tkn.lastChar == '=' {
tkn.next()
return GE, nil
} else {
return int(ch), nil
}
case '!':
if tkn.lastChar == '=' {
tkn.next()
return NE, nil
} else {
return LEX_ERROR, []byte("!")
}
case '\'', '"':
return tkn.scanString(ch, STRING)
case '`':
return tkn.scanString(ch, ID)
default:
return LEX_ERROR, []byte{byte(ch)}
}
}
}
func (tkn *Tokenizer) skipBlank() {
ch := tkn.lastChar
for ch == ' ' || ch == '\n' || ch == '\r' || ch == '\t' {
tkn.next()
ch = tkn.lastChar
}
}
func (tkn *Tokenizer) scanIdentifier() (int, []byte) {
buffer := bytes.NewBuffer(make([]byte, 0, 8))
buffer.WriteByte(byte(tkn.lastChar))
for tkn.next(); isLetter(tkn.lastChar) || isDigit(tkn.lastChar); tkn.next() {
buffer.WriteByte(byte(tkn.lastChar))
}
lowered := bytes.ToLower(buffer.Bytes())
if keywordId, found := keywords[string(lowered)]; found {
return keywordId, lowered
}
return ID, buffer.Bytes()
}
func (tkn *Tokenizer) scanBindVar() (int, []byte) {
buffer := bytes.NewBuffer(make([]byte, 0, 8))
buffer.WriteByte(byte(tkn.lastChar))
for tkn.next(); isLetter(tkn.lastChar) || isDigit(tkn.lastChar) || tkn.lastChar == '.'; tkn.next() {
buffer.WriteByte(byte(tkn.lastChar))
}
if buffer.Len() == 1 {
return LEX_ERROR, buffer.Bytes()
}
return VALUE_ARG, buffer.Bytes()
}
func (tkn *Tokenizer) scanMantissa(base int, buffer *bytes.Buffer) {
for digitVal(tkn.lastChar) < base {
tkn.ConsumeNext(buffer)
}
}
func (tkn *Tokenizer) scanNumber(seenDecimalPoint bool) (int, []byte) {
buffer := bytes.NewBuffer(make([]byte, 0, 8))
if seenDecimalPoint {
buffer.WriteByte('.')
tkn.scanMantissa(10, buffer)
goto exponent
}
if tkn.lastChar == '0' {
// int or float
tkn.ConsumeNext(buffer)
if tkn.lastChar == 'x' || tkn.lastChar == 'X' {
// hexadecimal int
tkn.ConsumeNext(buffer)
tkn.scanMantissa(16, buffer)
} else {
// octal int or float
seenDecimalDigit := false
tkn.scanMantissa(8, buffer)
if tkn.lastChar == '8' || tkn.lastChar == '9' {
// illegal octal int or float
seenDecimalDigit = true
tkn.scanMantissa(10, buffer)
}
if tkn.lastChar == '.' || tkn.lastChar == 'e' || tkn.lastChar == 'E' {
goto fraction
}
// octal int
if seenDecimalDigit {
return LEX_ERROR, buffer.Bytes()
}
}
goto exit
}
// decimal int or float
tkn.scanMantissa(10, buffer)
fraction:
if tkn.lastChar == '.' {
tkn.ConsumeNext(buffer)
tkn.scanMantissa(10, buffer)
}
exponent:
if tkn.lastChar == 'e' || tkn.lastChar == 'E' {
tkn.ConsumeNext(buffer)
if tkn.lastChar == '+' || tkn.lastChar == '-' {
tkn.ConsumeNext(buffer)
}
tkn.scanMantissa(10, buffer)
}
exit:
return NUMBER, buffer.Bytes()
}
func (tkn *Tokenizer) scanString(delim uint16, typ int) (int, []byte) {
buffer := bytes.NewBuffer(make([]byte, 0, 8))
for {
ch := tkn.lastChar
tkn.next()
if ch == delim {
if tkn.lastChar == delim {
tkn.next()
} else {
break
}
} else if ch == '\\' {
if tkn.lastChar == EOFCHAR {
return LEX_ERROR, buffer.Bytes()
}
if decodedChar := sqltypes.SqlDecodeMap[byte(tkn.lastChar)]; decodedChar == sqltypes.DONTESCAPE {
ch = tkn.lastChar
} else {
ch = uint16(decodedChar)
}
tkn.next()
}
if ch == EOFCHAR {
return LEX_ERROR, buffer.Bytes()
}
buffer.WriteByte(byte(ch))
}
return typ, buffer.Bytes()
}
func (tkn *Tokenizer) scanCommentType1(prefix string) (int, []byte) {
buffer := bytes.NewBuffer(make([]byte, 0, 8))
buffer.WriteString(prefix)
for tkn.lastChar != EOFCHAR {
if tkn.lastChar == '\n' {
tkn.ConsumeNext(buffer)
break
}
tkn.ConsumeNext(buffer)
}
return COMMENT, buffer.Bytes()
}
func (tkn *Tokenizer) scanCommentType2() (int, []byte) {
buffer := bytes.NewBuffer(make([]byte, 0, 8))
buffer.WriteString("/*")
for {
if tkn.lastChar == '*' {
tkn.ConsumeNext(buffer)
if tkn.lastChar == '/' {
tkn.ConsumeNext(buffer)
break
}
continue
}
if tkn.lastChar == EOFCHAR {
return LEX_ERROR, buffer.Bytes()
}
tkn.ConsumeNext(buffer)
}
return COMMENT, buffer.Bytes()
}
func (tkn *Tokenizer) ConsumeNext(buffer *bytes.Buffer) {
if tkn.lastChar == EOFCHAR {
// This should never happen.
panic("unexpected EOF")
}
buffer.WriteByte(byte(tkn.lastChar))
tkn.next()
}
func (tkn *Tokenizer) next() {
if ch, err := tkn.InStream.ReadByte(); err != nil {
// Only EOF is possible.
tkn.lastChar = EOFCHAR
} else {
tkn.lastChar = uint16(ch)
}
tkn.Position++
}
func isLetter(ch uint16) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch == '@'
}
func digitVal(ch uint16) int {
switch {
case '0' <= ch && ch <= '9':
return int(ch) - '0'
case 'a' <= ch && ch <= 'f':
return int(ch) - 'a' + 10
case 'A' <= ch && ch <= 'F':
return int(ch) - 'A' + 10
}
return 16 // larger than any legal digit val
}
func isDigit(ch uint16) bool {
return '0' <= ch && ch <= '9'
}
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/he3db/he3proxy.git
git@gitee.com:he3db/he3proxy.git
he3db
he3proxy
He3Proxy
v1.0.0

Search

23e8dbc6 1850385 7e0993f3 1850385