代码拉取完成,页面将自动刷新
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package zorm
// sqlSpan 表示 SQL 中某个片段的位置范围 (左闭右开)
// sqlSpan represents the position range of a SQL fragment (left-closed, right-open)
type sqlSpan struct {
Start int // 起始位置 (包含) / Start position (inclusive)
End int // 结束位置 (不包含) / End position (exclusive)
}
// sqlPart 表示 SQL 语句的各个子句片段
// sqlPart represents the fragments of each clause in a SQL statement
type sqlPart struct {
// @TODO: With 子句暂不处理 / WITH clause is not handled for now
// With sqlSpan // WITH 子句 / WITH clause
Select sqlSpan // SELECT 子句 / SELECT clause
From sqlSpan // FROM 子句 / FROM clause
Where sqlSpan // WHERE 子句 / WHERE clause
GroupBy sqlSpan // GROUP BY 子句 / GROUP BY clause
OrderBy sqlSpan // ORDER BY 子句 / ORDER BY clause
Distinct sqlSpan // DISTINCT 关键字 / DISTINCT keyword
Union sqlSpan // UNION 关键字 / UNION keyword
Intersect sqlSpan // INTERSECT 关键字 / INTERSECT keyword
Except sqlSpan // EXCEPT 关键字 / EXCEPT keyword
}
// sqlScanner SQL 词法扫描器, 用于逐个字符解析 SQL
// sqlScanner SQL lexical scanner for parsing SQL character by character
type sqlScanner struct {
sqlStr string // 原始 SQL 字符串 / Original SQL string
index int // 当前扫描位置 / Current scan position
sqlLen int // SQL 字符串总长度 / Total length of SQL string
depth int // 括号嵌套深度, 用于处理子查询 / Parentheses nesting depth for handling subqueries
}
// ================= 基础能力 / Basic Capabilities =================
// isIdentChar 判断字符是否为标识符字符 (字母、数字、下划线)
// isIdentChar checks if a character is an identifier character (letter, digit, underscore)
func isIdentChar(c byte) bool {
return (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') ||
c == '_'
}
// skipString 跳过字符串字面量 (支持单引号 ' 和双引号 ")
// 处理转义: \' 和 "" (SQL 标准双单引号转义)
// skipString skips string literals (supports single quote ' and double quote ")
// Handles escapes: \' and "" (SQL standard double single-quote escape)
func (sc *sqlScanner) skipString() {
quote := sc.sqlStr[sc.index] // 记录字符串的引号类型 / Record the quote type of the string
sc.index++ // 跳过开引号 / Skip opening quote
for sc.index < sc.sqlLen {
// \ 转义: 处理 \' 这种情况
// Backslash escape: handles cases like \'
if sc.sqlStr[sc.index] == '\\' && sc.index+1 < sc.sqlLen {
sc.index += 2 // 跳过转义字符和下一个字符 / Skip escape character and next character
continue
}
// '' 转义 (SQL 标准) : 处理 'O''Brien' 这种情况
// '' escape (SQL standard): handles cases like 'O''Brien'
if sc.sqlStr[sc.index] == quote {
if sc.index+1 < sc.sqlLen && sc.sqlStr[sc.index+1] == quote {
sc.index += 2 // 跳过两个连续的引号 / Skip two consecutive quotes
continue
}
sc.index++ // 跳过闭引号 / Skip closing quote
return // 字符串结束 / End of string
}
sc.index++ // 继续扫描下一个字符 / Continue to scan next character
}
// 字符串未闭合也会正常退出, 不会报错 / Exits normally even if string is unclosed, no error
}
// skipComment 跳过注释, 返回是否成功跳过
// 支持两种注释格式: -- 单行注释 和 /* */ 多行注释
// skipComment skips comments, returns whether it successfully skipped
// Supports two comment formats: -- single-line comment and /* */ multi-line comment
func (sc *sqlScanner) skipComment() bool {
// -- comment: 单行注释 / Single-line comment
if sc.index+1 < sc.sqlLen && sc.sqlStr[sc.index] == '-' && sc.sqlStr[sc.index+1] == '-' {
sc.index += 2 // 跳过 -- / Skip --
// 扫描到行尾或 EOF (EOF 也视为注释结束)
// Scan to end of line or EOF (EOF also counts as end of comment)
for sc.index < sc.sqlLen && sc.sqlStr[sc.index] != '\n' {
sc.index++
}
return true
}
// /* comment */: 多行注释 / Multi-line comment
if sc.index+1 < sc.sqlLen && sc.sqlStr[sc.index] == '/' && sc.sqlStr[sc.index+1] == '*' {
sc.index += 2 // 跳过 /* / Skip /*
for sc.index+1 < sc.sqlLen {
if sc.sqlStr[sc.index] == '*' && sc.sqlStr[sc.index+1] == '/' {
sc.index += 2 // 跳过 */ / Skip */
return true
}
sc.index++
}
// 注释未闭合也返回 true / Returns true even if comment is unclosed
return true
}
return false // 不是注释 / Not a comment
}
// ================= 关键字匹配 / Keyword Matching =================
// matchKeyword 忽略大小写匹配关键字, 并检查单词边界
// 例如: 匹配 "from" 时不会匹配到 "from_addr" 或 "afrom"
// matchKeyword matches keywords case-insensitively and checks word boundaries
// For example: matching "from" will not match "from_addr" or "afrom"
func matchKeyword(s string, i int, word string) bool {
n := len(s)
wlen := len(word)
// 长度检查 / Length check
if i+wlen > n {
return false
}
// 前边界检查: 前面的字符不能是标识符字符
// Front boundary check: previous character must not be an identifier character
if i > 0 && isIdentChar(s[i-1]) {
return false
}
// 匹配内容 (忽略大小写)
// Match content (case-insensitive)
for j := 0; j < wlen; j++ {
c := s[i+j]
if c >= 'A' && c <= 'Z' {
c += 32 // 转换为小写 / Convert to lowercase
}
if c != word[j] {
return false
}
}
// 后边界检查: 后面的字符不能是标识符字符
// Back boundary check: next character must not be an identifier character
if i+wlen < n && isIdentChar(s[i+wlen]) {
return false
}
return true
}
// matchTwoKeywords 匹配两个连续的关键字, 如 "group by" 或 "order by"
// 允许两个关键字之间有空格、制表符、换行符
// matchTwoKeywords matches two consecutive keywords like "group by" or "order by"
// Allows spaces, tabs, and newlines between the two keywords
func matchTwoKeywords(s string, i int, w1, w2 string) bool {
// 先匹配第一个关键字 / First match the first keyword
if !matchKeyword(s, i, w1) {
return false
}
j := i + len(w1)
// 跳过中间的空白字符 (空格、制表符、换行符)
// Skip whitespace between keywords (spaces, tabs, newlines)
for j < len(s) {
switch s[j] {
case ' ', '\t', '\n', '\r':
j++
default:
return matchKeyword(s, j, w2)
}
}
// 匹配第二个关键字 / Match the second keyword
return matchKeyword(s, j, w2)
}
// ================= 核心解析 / Core Parsing =================
// parseSQL 解析 SQL 语句, 返回各个子句的位置片段
// 这是替代正则表达式方案的核心函数, 用于分页时包装 COUNT(*) 语句
// 特点:
// - 单次扫描完成所有关键字解析, 性能优于多次正则匹配
// - 正确处理括号嵌套 (子查询中的 FROM 不影响外层)
// - 正确处理字符串和注释中的伪关键字
// - 大小写不敏感
//
// parseSQL parses a SQL statement and returns position fragments for each clause
// This is the core function to replace the regex-based approach, used for wrapping COUNT(*) in pagination
// Features:
// - Single scan completes all keyword parsing, better performance than multiple regex matches
// - Correctly handles parentheses nesting (FROM in subquery doesn't affect outer query)
// - Correctly handles pseudo-keywords in strings and comments
// - Case-insensitive
func parseSQL(sqlStr string) sqlPart {
// 使用局部变量存储字符串值, 避免频繁解引用
// Use local variable to store string value and avoid frequent dereferencing
sc := &sqlScanner{sqlStr: sqlStr, sqlLen: len(sqlStr)}
var parts sqlPart
current := &parts.Select // 当前正在解析的子句, 默认为 SELECT / Current clause being parsed, defaults to SELECT
current.Start = 0 // SELECT 始终从位置 0 开始 / SELECT always starts at position 0
for sc.index < sc.sqlLen {
c := sqlStr[sc.index]
// 1. 字符串字面量: 跳过整个字符串, 避免误解析字符串中的关键字
// String literal: skip the entire string to avoid misparsing keywords inside
if c == '\'' || c == '"' {
sc.skipString()
continue
}
// 2. 注释: 跳过注释内容
// Comment: skip comment content
if c == '-' || c == '/' {
if sc.skipComment() {
continue
}
}
// 3. 括号深度管理: 用于处理子查询
// Parentheses depth management: for handling subqueries
switch c {
case '(':
sc.depth++ // 进入子查询 / Enter subquery
sc.index++
continue
case ')':
if sc.depth > 0 {
sc.depth-- // 退出子查询 / Exit subquery
}
sc.index++
continue
}
// 4. 只在最外层 (非子查询内) 解析关键字
// Only parse keywords at the outermost level (not inside subqueries)
if sc.depth == 0 {
switch c {
case 'f', 'F':
if matchKeyword(sc.sqlStr, sc.index, "from") {
current.End = sc.index // 结束当前子句 / End current clause
parts.From.Start = sc.index // 设置 FROM 起始位置 / Set FROM start position
current = &parts.From // 切换到 FROM 子句 / Switch to FROM clause
}
case 'w', 'W':
if matchKeyword(sc.sqlStr, sc.index, "where") {
current.End = sc.index // 结束当前子句 / End current clause
parts.Where.Start = sc.index // 设置 WHERE 起始位置 / Set WHERE start position
current = &parts.Where // 切换到 WHERE 子句 / Switch to WHERE clause
}
case 'g', 'G':
if matchTwoKeywords(sc.sqlStr, sc.index, "group", "by") {
current.End = sc.index // 结束当前子句 / End current clause
parts.GroupBy.Start = sc.index // 设置 GROUP BY 起始位置 / Set GROUP BY start position
current = &parts.GroupBy // 切换到 GROUP BY 子句 / Switch to GROUP BY clause
}
case 'o', 'O':
if matchTwoKeywords(sc.sqlStr, sc.index, "order", "by") {
current.End = sc.index // 结束当前子句 / End current clause
parts.OrderBy.Start = sc.index // 设置 ORDER BY 起始位置 / Set ORDER BY start position
current = &parts.OrderBy // 切换到 ORDER BY 子句 / Switch to ORDER BY clause
}
case 'd', 'D':
if matchKeyword(sc.sqlStr, sc.index, "distinct") {
parts.Distinct.Start = sc.index
parts.Distinct.End = sc.index + 8
}
case 'u', 'U':
if matchKeyword(sc.sqlStr, sc.index, "union") {
parts.Union.Start = sc.index
parts.Union.End = sc.index + 5
}
case 'i', 'I':
if matchKeyword(sc.sqlStr, sc.index, "intersect") {
parts.Intersect.Start = sc.index
parts.Intersect.End = sc.index + 9
}
case 'e', 'E':
if matchKeyword(sc.sqlStr, sc.index, "except") {
parts.Except.Start = sc.index
parts.Except.End = sc.index + 6
}
}
}
sc.index++
}
// 设置最后一个子句的结束位置
// Set end position for the last clause
if current != nil {
current.End = sc.sqlLen
}
// 为所有已启动但未设置 End 的 part 补全 End 值
// Complete End values for parts that were started but not ended
// 使用 Start > 0 判断, 因为正常 SQL 中这些关键字不可能在位置 0
// Uses Start > 0 check because these keywords cannot be at position 0 in normal SQL
if parts.From.Start > 0 && parts.From.End == 0 {
parts.From.End = sc.sqlLen
}
if parts.Where.Start > 0 && parts.Where.End == 0 {
parts.Where.End = sc.sqlLen
}
if parts.GroupBy.Start > 0 && parts.GroupBy.End == 0 {
parts.GroupBy.End = sc.sqlLen
}
if parts.OrderBy.Start > 0 && parts.OrderBy.End == 0 {
parts.OrderBy.End = sc.sqlLen
}
return parts
}
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。