1 Star 0 Fork 0

AMeT/chainsaw

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
main.go 6.01 KB
一键复制 编辑 原始数据 按行查看 历史
AMeT 提交于 2023-09-10 23:14 . release 1.0.0
package main
import (
"bufio"
"fmt"
"github.com/akamensky/argparse"
"log"
"os"
"path"
"regexp"
"strconv"
"strings"
"sync"
)
const (
Version = "1.0.0"
)
var waitGroup sync.WaitGroup
var outputDir string
var dryRun bool
func main() {
parser := argparse.NewParser("chainsaw", "Cut large log file into small pieces, version "+Version)
notBeforeDate := parser.Int("", "not-before", &argparse.Options{Required: false, Help: "Ignore logs before, format 20230908"})
notAfterDate := parser.Int("", "not-after", &argparse.Options{Required: false, Help: "Ignore logs after, format 20230908"})
// 日志中文本的数量会影响最终文件的大小
// 作为参考,20000 行(约 2.5 MB)以上的文件可能无法正确地高亮字符
// 50000 行(约 6 MB)以上的文件可能无法正确地显示行号
// 注意这不代表单个日志文件一定不会超过这个行数,因为 chainsaw 会尽量保证同一条日志不会被分割到两个文件中(例如堆栈信息)
chunkSize := parser.Int("", "chunk-size", &argparse.Options{Required: false, Default: 50000, Help: "Max lines per file"})
logfile := parser.String("f", "file", &argparse.Options{Required: true, Help: "Original log file"})
argsOutputDir := parser.String("o", "output", &argparse.Options{Required: false, Default: "cut/", Help: "Output directory"})
argsDryRun := parser.Flag("", "dry-run", &argparse.Options{Required: false, Help: "No file will be written"})
err := parser.Parse(os.Args)
if err != nil {
log.Fatal(parser.Usage(err))
}
outputDir = *argsOutputDir
// ensure output dir exists
_ = os.Mkdir(outputDir, 0777)
dryRun = *argsDryRun
if dryRun {
fmt.Println("Running in dry-run mode, no file will be written")
}
var notBeforeFilter func(int) bool
if *notBeforeDate > 0 {
notBeforeFilter = func(messageTimestampAsInt int) bool {
return messageTimestampAsInt >= *notBeforeDate
}
} else {
notBeforeFilter = func(messageTimestampAsInt int) bool {
return true
}
}
var notAfterFilter func(int) bool
if *notAfterDate > 0 {
notAfterFilter = func(messageTimestampAsInt int) bool {
return messageTimestampAsInt <= *notAfterDate
}
} else {
notAfterFilter = func(messageTimestampAsInt int) bool {
return true
}
}
countTotal := 0
countDropped := 0
countPassed := 0
countProcessed := 0
haveStarted := false
passLogMessageByDate := false
notAfterFilterSkipped := false
chunkCounter := 0
// something like 2023-06-16 16:26:45.495 INFO ...
//datetimePattern := regexp.MustCompile(`^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.[0-9]{3}`)
datePattern := regexp.MustCompile(`^\d{4}-\d{2}-\d{2}`)
var cursorTimestamp string
logBuffer := make([]string, 0)
file, _ := os.Open(*logfile)
defer func(file *os.File) {
_ = file.Close()
}(file)
scanner := bufio.NewScanner(file)
for scanner.Scan() {
countTotal += 1
line := scanner.Text()
match := datePattern.FindStringSubmatch(line)
isNewLogLine := len(match) > 0
if haveStarted == false {
// 还未匹配到过时间戳时读取的行不知道该归类到哪个文件,因此直接丢弃
if isNewLogLine {
haveStarted = true
cursorTimestamp = match[0]
} else {
countDropped += 1
continue
}
}
if passLogMessageByDate {
// 当前处于跳过模式,判断是否需要解除跳过
if isNewLogLine {
// 转换为数字,方便比较
logTimestampInt, _ := strconv.Atoi(strings.ReplaceAll(match[0], "-", ""))
if notBeforeFilter(logTimestampInt) && notAfterFilter(logTimestampInt) {
// 解除跳过状态
passLogMessageByDate = false
} else {
countPassed += 1
continue
}
} else {
countPassed += 1
continue
}
} else {
// 判断是否需要进入跳过模式
if isNewLogLine {
logTimestampInt, _ := strconv.Atoi(strings.ReplaceAll(match[0], "-", ""))
if notBeforeFilter(logTimestampInt) == false {
passLogMessageByDate = true
countPassed += 1
continue
} else if notAfterFilter(logTimestampInt) == false {
// 不统计之后的日志
notAfterFilterSkipped = true
break
}
}
}
countProcessed += 1
if !isNewLogLine {
// 不是新日志行,添加到现有缓冲区
logBuffer = append(logBuffer, line)
continue
}
messageTimestamp := match[0]
if cursorTimestamp == messageTimestamp {
// 匹配时间戳游标
if len(logBuffer) > *chunkSize {
// 判断缓冲区大小决定是否需要分块
waitGroup.Add(1)
go saveLog(logBuffer, cursorTimestamp, true, chunkCounter)
chunkCounter += 1
logBuffer = make([]string, 0)
}
} else {
// 不匹配时间戳游标
waitGroup.Add(1)
go saveLog(logBuffer, cursorTimestamp, chunkCounter != 0, chunkCounter)
chunkCounter = 0
logBuffer = make([]string, 0)
cursorTimestamp = messageTimestamp
}
logBuffer = append(logBuffer, line)
}
// 结束循环,保存最后一个缓冲区
if haveStarted {
waitGroup.Add(1)
go saveLog(logBuffer, cursorTimestamp, chunkCounter != 0, chunkCounter)
}
waitGroup.Wait()
fmt.Println("")
if notAfterFilterSkipped {
fmt.Println("Not after filter used, lines after specified date skipped")
fmt.Printf("%d lines dropped\n", countDropped)
fmt.Printf("%d lines saved\n", countProcessed)
} else {
fmt.Printf("%d lines dropped\n", countDropped)
fmt.Printf("%d lines saved\n", countProcessed)
fmt.Printf("%d lines passed\n", countPassed)
fmt.Printf("%d lines in given file\n", countTotal)
}
}
func saveLog(logBuffer []string, timestamp string, chunked bool, chunkNumber int) {
if len(logBuffer) <= 0 {
waitGroup.Done()
return
}
var filename string
if chunked {
filename = path.Join(outputDir, fmt.Sprintf("%s.%d.log", timestamp, chunkNumber))
} else {
filename = path.Join(outputDir, fmt.Sprintf("%s.log", timestamp))
}
if !dryRun {
file, _ := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0666)
defer func(file *os.File) {
_ = file.Close()
}(file)
for _, line := range logBuffer {
_, _ = file.WriteString(line + "\n")
}
}
fmt.Printf("%s saved (%d lines)\n", filename, len(logBuffer))
waitGroup.Done()
}
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Go
1
https://gitee.com/amet/chainsaw.git
git@gitee.com:amet/chainsaw.git
amet
chainsaw
chainsaw
master

搜索帮助

Cb406eda 1850385 E526c682 1850385