3 Star 0 Fork 0

依赖极速下载/cc

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
lexer.go 12.39 KB
一键复制 编辑 原始数据 按行查看 历史
Jan Mercl 提交于 2017-01-08 20:48 +08:00 . Fix lexer encoding. Closes #81.
// Copyright 2016 The CC Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cc
import (
"bytes"
"fmt"
"go/token"
"io"
"strings"
"github.com/cznic/golex/lex"
"github.com/cznic/xc"
)
// Lexer state
const (
lsZero = iota
lsBOL // Preprocessor: Beginning of line.
lsDefine // Preprocessor: Seen ^#define.
lsSeekRParen // Preprocessor: Seen ^#define identifier(
lsTokens // Preprocessor: Convert anything to PPOTHER until EOL.
lsUndef // Preprocessor: Seen ^#undef.
lsConstExpr0 // Preprocessor: Parsing constant expression.
lsConstExpr // Preprocessor: Parsing constant expression.
lsTranslationUnit0 //
lsTranslationUnit //
)
type trigraphsReader struct {
*lex.Lexer //
pos0 token.Pos //
sc int // Start condition.
}
func (t *trigraphsReader) ReadRune() (rune, int, error) { return lex.RuneEOF, 0, io.EOF }
func (t *trigraphsReader) ReadChar() (c lex.Char, size int, err error) {
r := rune(t.scan())
pos0 := t.pos0
pos := t.Lookahead().Pos()
t.pos0 = pos
c = lex.NewChar(t.First.Pos(), r)
return c, int(pos - pos0), nil
}
type byteReader struct {
io.Reader
b [1]byte
}
func (b *byteReader) ReadRune() (r rune, size int, err error) {
if _, err = b.Read(b.b[:]); err != nil {
return -1, 0, err
}
return rune(b.b[0]), 1, nil
}
type lexer struct {
*lex.Lexer //
ch chan []xc.Token //
commentPos0 token.Pos //
constExprToks []xc.Token //
constantExpression *ConstantExpression //
cpp func([]xc.Token) //
encBuf []byte // PPTokens
encBuf1 [30]byte // Rune, position, optional value ID.
encPos token.Pos // For delta pos encoding
eof lex.Char //
example interface{} //
exampleRule int //
externs map[int]*Declarator //
file *token.File //
finalNLInjected bool //
fnDeclarator *Declarator //
includePaths []string //
injectFunc []xc.Token // [0], 6.4.2.2.
iota int64 //
isPreprocessing bool //
last xc.Token //
model *Model //
preprocessingFile *PreprocessingFile //
report *xc.Report //
sc int // Start condition.
scope *Bindings //
scs int // Start condition stack.
state int // Lexer state.
sysIncludePaths []string //
t *trigraphsReader //
textLine []xc.Token //
toC bool // Whether to translate preprocessor identifiers to reserved C words.
tokLast xc.Token //
tokPrev xc.Token //
toks []xc.Token // Parsing preprocessor constant expression.
translationUnit *TranslationUnit //
tweaks *tweaks //
fsm struct {
comment int
pos token.Pos
state int
}
}
func newLexer(nm string, sz int, r io.RuneReader, report *xc.Report, tweaks *tweaks, opts ...lex.Option) (*lexer, error) {
file := fset.AddFile(nm, -1, sz)
t := &trigraphsReader{}
lx, err := lex.New(
file,
&byteReader{Reader: r.(io.Reader)},
lex.ErrorFunc(func(pos token.Pos, msg string) {
report.Err(pos, msg)
}),
lex.RuneClass(func(r rune) int { return int(r) }),
)
if err != nil {
return nil, err
}
t.Lexer = lx
t.pos0 = lx.Lookahead().Pos()
if tweaks.enableTrigraphs {
t.sc = scTRIGRAPHS
}
r = t
scope := newBindings(nil, ScopeFile)
lexer := &lexer{
externs: map[int]*Declarator{},
file: file,
report: report,
scope: scope,
scs: -1, // Stack empty
t: t,
tweaks: tweaks,
}
if lexer.Lexer, err = lex.New(
file,
r,
append(opts, lex.RuneClass(rune2class))...,
); err != nil {
return nil, err
}
return lexer, nil
}
func newSimpleLexer(cpp func([]xc.Token), report *xc.Report, tweaks *tweaks) *lexer {
return &lexer{
cpp: cpp,
externs: map[int]*Declarator{},
report: report,
scope: newBindings(nil, ScopeFile),
tweaks: tweaks,
}
}
func (l *lexer) push(sc int) {
if l.scs >= 0 { // Stack overflow.
if l.sc != scDIRECTIVE || sc != scCOMMENT {
panic("internal error")
}
// /*-style comment in a line starting with #
l.pop()
}
l.scs = l.sc
l.sc = sc
}
func (l *lexer) pop() {
if l.scs < 0 { // Stack underflow
panic("internal error")
}
l.sc = l.scs
l.scs = -1 // Stack empty.
}
func (l *lexer) pushScope(kind Scope) (old *Bindings) {
old = l.scope
l.scope = newBindings(old, kind)
l.scope.maxAlign = 1
return old
}
func (l *lexer) popScope(tok xc.Token) (old, new *Bindings) {
return l.popScopePos(tok.Pos())
}
func (l *lexer) popScopePos(pos token.Pos) (old, new *Bindings) {
old = l.scope
new = l.scope.Parent
if new == nil {
l.report.Err(pos, "cannot pop scope")
return nil, old
}
l.scope = new
return old, new
}
const (
fsmZero = iota
fsmHasComment
)
var genCommentLeader = []byte("/*")
func (l *lexer) comment(general bool) {
if l.tweaks.comments != nil {
b := l.TokenBytes(nil)
pos := l.First.Pos()
if general {
pos = l.commentPos0
b = append(genCommentLeader, b...)
}
if l.Lookahead().Rune == '\n' {
b = append(b, '\n')
}
switch fsm := &l.fsm; fsm.state {
case fsmHasComment:
if pos == fsm.pos+token.Pos(len(dict.S(l.fsm.comment))) {
fsm.comment = dict.ID(append(dict.S(fsm.comment), b...))
break
}
fallthrough
case fsmZero:
fsm.state = fsmHasComment
fsm.comment = dict.ID(b)
fsm.pos = pos
}
}
}
func (l *lexer) scanChar() (c lex.Char) {
again:
r := rune(l.scan())
switch r {
case ' ':
if l.state != lsTokens || l.tokLast.Rune == ' ' {
goto again
}
case '\n':
if l.state == lsTokens {
l.encodeToken(xc.Token{Char: lex.NewChar(l.First.Pos(), ' '), Val: idSpace})
}
l.state = lsBOL
l.sc = scINITIAL
l.scs = -1 // Stack empty
case PREPROCESSING_FILE:
l.state = lsBOL
l.isPreprocessing = true
case CONSTANT_EXPRESSION, TRANSLATION_UNIT: //TODO- CONSTANT_EXPRESSION, then must add some manual yy:examples.
l.toC = true
}
fp := l.First.Pos()
if l.fsm.state == fsmHasComment {
switch {
case r == '\n' && fp == l.fsm.pos+token.Pos(len(dict.S(l.fsm.comment)))-1:
// keep going
case r != '\n' && fp == l.fsm.pos+token.Pos(len(dict.S(l.fsm.comment))):
l.tweaks.comments[fp] = dict.ID(bytes.TrimSpace(dict.S(l.fsm.comment)))
l.fsm.state = fsmZero
default:
l.fsm.state = fsmZero
}
}
return lex.NewChar(l.First.Pos(), r)
}
func (l *lexer) scanToken() (tok xc.Token) {
switch l.state {
case lsConstExpr0:
tok = xc.Token{Char: lex.NewChar(0, CONSTANT_EXPRESSION)}
l.state = lsConstExpr
case lsConstExpr:
if len(l.toks) == 0 {
tok = xc.Token{Char: lex.NewChar(l.tokLast.Pos(), lex.RuneEOF)}
break
}
tok = l.toks[0]
l.toks = l.toks[1:]
case lsTranslationUnit0:
tok = xc.Token{Char: lex.NewChar(0, TRANSLATION_UNIT)}
l.state = lsTranslationUnit
l.toC = true
case lsTranslationUnit:
again:
for len(l.textLine) == 0 {
var ok bool
if l.textLine, ok = <-l.ch; !ok {
return xc.Token{Char: lex.NewChar(l.tokLast.Pos(), lex.RuneEOF)}
}
if l.cpp != nil {
l.cpp(l.textLine)
}
}
tok = l.textLine[0]
l.textLine = l.textLine[1:]
if tok.Rune == ' ' {
goto again
}
tok = l.scope.lexerHack(tok, l.tokLast)
default:
c := l.scanChar()
if c.Rune == ccEOF {
c = lex.NewChar(c.Pos(), lex.RuneEOF)
if l.isPreprocessing && l.last.Rune != '\n' && !l.finalNLInjected {
l.finalNLInjected = true
l.eof = c
c.Rune = '\n'
l.state = lsBOL
return xc.Token{Char: c}
}
return xc.Token{Char: c}
}
val := 0
if tokHasVal[c.Rune] {
b := l.TokenBytes(nil)
val = dict.ID(b)
//TODO handle ID UCNs
//TODO- chars := l.Token()
//TODO- switch c.Rune {
//TODO- case IDENTIFIER, IDENTIFIER_LPAREN:
//TODO- b := l.TokenBytes(func(buf *bytes.Buffer) {
//TODO- for i := 0; i < len(chars); {
//TODO- switch c := chars[i]; {
//TODO- case c.Rune == '$' && !l.tweaks.enableDlrInIdentifiers:
//TODO- l.report.Err(c.Pos(), "identifier character set extension '$' not enabled")
//TODO- i++
//TODO- case c.Rune == '\\':
//TODO- r, n := decodeUCN(chars[i:])
//TODO- buf.WriteRune(r)
//TODO- i += n
//TODO- case c.Rune < 0x80: // ASCII
//TODO- buf.WriteByte(byte(c.Rune))
//TODO- i++
//TODO- default:
//TODO- panic("internal error")
//TODO- }
//TODO- }
//TODO- })
//TODO- val = dict.ID(b)
//TODO- default:
//TODO- panic("internal error: " + yySymName(int(c.Rune)))
//TODO- }
}
tok = xc.Token{Char: c, Val: val}
if !l.isPreprocessing {
tok = l.scope.lexerHack(tok, l.tokLast)
}
}
if l.toC {
tok = toC(tok, l.tweaks)
}
l.tokPrev = l.tokLast
l.tokLast = tok
return tok
}
// Lex implements yyLexer
func (l *lexer) Lex(lval *yySymType) int {
var tok xc.Token
if x := l.injectFunc; l.exampleRule == 0 && len(x) != 0 {
tok = x[0]
l.injectFunc = x[1:]
} else {
tok = l.scanToken()
}
//dbg("Lex %s", PrettyString(tok))
if l.constExprToks != nil {
l.constExprToks = append(l.constExprToks, tok)
}
l.last = tok
if tok.Rune == lex.RuneEOF {
lval.Token = tok
return 0
}
switch l.state {
case lsBOL:
switch tok.Rune {
case PREPROCESSING_FILE, '\n':
// nop
case '#':
l.push(scDIRECTIVE)
tok = l.scanToken()
switch tok.Rune {
case '\n':
tok.Char = lex.NewChar(tok.Pos(), PPHASH_NL)
case PPDEFINE:
l.push(scDEFINE)
l.state = lsDefine
case PPELIF, PPENDIF, PPERROR, PPIF, PPLINE, PPPRAGMA:
l.sc = scINITIAL
l.state = lsTokens
case PPELSE, PPIFDEF, PPIFNDEF:
l.state = lsZero
case PPUNDEF:
l.state = lsUndef
case PPINCLUDE:
l.sc = scHEADER
l.state = lsTokens
case PPINCLUDE_NEXT:
if l.tweaks.enableIncludeNext {
l.sc = scHEADER
l.state = lsTokens
break
}
l.state = lsTokens
tok.Char = lex.NewChar(tok.Pos(), PPNONDIRECTIVE)
tok.Val = xc.Dict.SID("include_next")
default:
l.state = lsTokens
tok.Char = lex.NewChar(tok.Pos(), PPNONDIRECTIVE)
l.pop()
}
default:
l.encodeToken(tok)
tok.Char = lex.NewChar(tok.Pos(), PPOTHER)
l.state = lsTokens
}
case lsDefine:
l.pop()
switch tok.Rune {
case IDENTIFIER:
l.state = lsTokens
case IDENTIFIER_LPAREN:
l.state = lsSeekRParen
default:
l.state = lsZero
}
case lsSeekRParen:
if tok.Rune == ')' {
l.state = lsTokens
}
case lsTokens:
l.encodeToken(tok)
tok.Char = lex.NewChar(tok.Pos(), PPOTHER)
case lsUndef:
l.state = lsTokens
}
lval.Token = tok
return int(tok.Char.Rune)
}
// Error Implements yyLexer.
func (l *lexer) Error(msg string) {
msg = strings.Replace(msg, "$end", "EOF", -1)
t := l.last
parts := strings.Split(msg, ", expected ")
if len(parts) == 2 && strings.HasPrefix(parts[0], "unexpected ") && tokHasVal[t.Rune] {
msg = fmt.Sprintf("%s %s, expected %s", parts[0], t.S(), parts[1])
}
l.report.ErrTok(t, "%s", msg)
}
// Reduced implements yyLexerEx
func (l *lexer) Reduced(rule, state int, lval *yySymType) (stop bool) {
if n := l.exampleRule; n >= 0 && rule != n {
return false
}
switch x := lval.node.(type) {
case interface {
fragment() interface{}
}:
l.example = x.fragment()
default:
l.example = x
}
return true
}
func (l *lexer) parsePPConstExpr0(list PPTokenList, p *pp) (interface{}, Type) {
l.toks = l.toks[:0]
p.expand(&tokenBuf{decodeTokens(list, nil, true)}, true, func(toks []xc.Token) {
l.toks = append(l.toks, toks...)
})
w := 0
for _, tok := range l.toks {
switch tok.Rune {
case ' ':
// nop
case IDENTIFIER:
if p.macros.m[tok.Val] != nil {
l.report.ErrTok(tok, "expected constant expression")
return nil, nil
}
tok.Rune = INTCONST
tok.Val = id0
fallthrough
default:
l.toks[w] = tok
w++
}
}
l.toks = l.toks[:w]
l.state = lsConstExpr0
if yyParse(l) == 0 {
e := l.constantExpression
return e.Value, e.Type
}
return nil, nil
}
func (l *lexer) parsePPConstExpr(list PPTokenList, p *pp) bool {
if v, _ := l.parsePPConstExpr0(list, p); v != nil {
return isNonZero(v)
}
return false
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/mirrors_addons/cc.git
git@gitee.com:mirrors_addons/cc.git
mirrors_addons
cc
cc
d673e9b70d4d

搜索帮助