diff --git a/packages/mini-markdown-ast-parser/src/core/parse/compose/inline/index.ts b/packages/mini-markdown-ast-parser/src/core/parse/compose/inline/index.ts index a02f880403d1a96e593c642145817909aadbce85..852c1bc3856dbea3be4d39b8aafa165f04e89e90 100644 --- a/packages/mini-markdown-ast-parser/src/core/parse/compose/inline/index.ts +++ b/packages/mini-markdown-ast-parser/src/core/parse/compose/inline/index.ts @@ -1,265 +1,215 @@ -import { Tokens } from "@/types/tokens" +import { Tokens } from "@/types/tokens"; +import { TokenTypeVal } from "@/types/tokens-types"; + +interface MarkdownPattern { + regex: RegExp; + process: (match: RegExpMatchArray, context: ProcessContext) => Tokens; +} + +interface ProcessContext { + line: string; + index: number; + offset: number; + currentOffset: number; + parseInlineElements: (line: string, index: number, currentOffset: number) => Tokens[]; +} + +// 定义所有 Markdown 模式 +const MARKDOWN_PATTERNS: Record = { + bold: { + regex: /\*\*(?.*?)\*\*/, + process: (match, context) => createStandardToken("bold" as TokenTypeVal, match, context), + }, + italic: { + regex: /\_(?.*?)\_/, + process: (match, context) => createStandardToken("italic" as TokenTypeVal, match, context), + }, + underline: { + regex: /\-{2}(?.*?)\-{2}/, + process: (match, context) => createStandardToken("underline" as TokenTypeVal, match, context), + }, + delete: { + regex: /\~{2}(?.*?)\~{2}/, + process: (match, context) => createStandardToken("delete" as TokenTypeVal, match, context), + }, + inlineCode: { + regex: /`(?.*?)`/, + process: (match, context) => { + if (!match[1].trim()) { + return createTextToken(match[0], match, context); + } + return { + type: "inlineCode", + children: [createTextToken(match.groups?.content || "", match, context)], + position: createPosition(match, context), + }; + }, + }, + image: { + regex: /!\[(?.*?)\]\((?.*?)\)/, + process: (match, context) => ({ + type: "image", + title: null, + url: match.groups?.url || "", + alt: match.groups?.alt || "", + position: createPosition(match, context), + }), + }, + link: { + regex: /(?[^\]]+)\]\((?[^)]+)\)/, + process: (match, context) => ({ + type: "link", + title: null, + url: match.groups?.url || "", + children: [createTextToken(match.groups?.text || "", match, context)], + position: createPosition(match, context), + }), + }, + html: { + regex: /<(?[a-zA-Z0-9]+)(?[^>]*)>(?.*?)<\/\1>/g, + process: (match, context) => ({ + type: "html", + value: match[0], + position: createPosition(match, context), + }), + }, +}; + +// 创建位置信息 +function createPosition(match: RegExpMatchArray, context: ProcessContext) { + const startOffset = context.currentOffset + context.offset + (match.index ?? 0); + const endOffset = startOffset + match[0].length; + + return { + start: { + line: context.index + 1, + column: context.offset + (match.index ?? 0) + 1, + offset: startOffset, + }, + end: { + line: context.index + 1, + column: context.offset + (match.index ?? 0) + match[0].length + 1, + offset: endOffset, + }, + }; +} + +// 创建文本节点 +function createTextToken(value: string, match: RegExpMatchArray, context: ProcessContext): Tokens { + return { + type: "text", + value, + position: createPosition(match, context), + }; +} + +// 创建标准内联标记节点 +function createStandardToken( + type: TokenTypeVal, + match: RegExpMatchArray, + context: ProcessContext, +): Tokens { + const innerContent = match.groups?.content || match[1]; + const innerOffset = + context.currentOffset + + context.offset + + (match.index ?? 0) + + (type === "bold" || type === "delete" ? 2 : 1); + + return { + type, + children: context.parseInlineElements(innerContent, context.index, innerOffset), + position: createPosition(match, context), + }; +} + +// 查找最近的匹配 +function findNextMatch(line: string, offset: number) { + let bestMatch: { type: string; match: RegExpMatchArray } | null = null; + + for (const [type, pattern] of Object.entries(MARKDOWN_PATTERNS)) { + const match = line.slice(offset).match(pattern.regex); + if (!match) continue; + + if (!bestMatch || (match.index ?? Infinity) < (bestMatch.match.index ?? Infinity)) { + bestMatch = { type, match }; + } + } + + return bestMatch; +} export const parseInlineElements = ( line: string, index: number, - currentOffset: number -) => { - const boldRegex = /\*\*(.*?)\*\*/ - const italicRegex = /_(.*?)_/ - const underlineRegex = /\-\-(.*?)\-\-/ - const deleteRegex = /~~(.*?)~~/ - const inlineCodeRegex = /`(.*?)`/ - const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/ - const imageRegex = /!\[(.*?)\]\((.*?)\)/g - const htmlRegex = /<([a-zA-Z0-9]+)([^>]*)>(.*?)<\/\1>/g - let offset = 0 - let children = [] - let lastIndex = 0 + currentOffset: number, +): Tokens[] => { + let offset = 0; + let children: Tokens[] = []; + let lastIndex = 0; + + const context: ProcessContext = { + line, + index, + currentOffset, + offset, + parseInlineElements, + }; while (offset < line.length) { - const boldMatch = line.slice(offset).match(boldRegex) - const italicMatch = line.slice(offset).match(italicRegex) - const underlineMatch = line.slice(offset).match(underlineRegex) - const deleteMatch = line.slice(offset).match(deleteRegex) - const inlineCodeMatch = line.slice(offset).match(inlineCodeRegex) - const imageMatch = line.slice(offset).match(imageRegex) - const linkMatch = line.slice(offset).match(linkRegex) - const htmlMatch = line.slice(offset).match(htmlRegex) + const nextMatch = findNextMatch(line, offset); - let match: RegExpMatchArray | null = null - let type = '' - let regex = null + if (!nextMatch) break; - if ( - boldMatch && - (!match || - (boldMatch.index ?? Infinity) < - ((match as RegExpMatchArray)?.index ?? Infinity)) - ) { - match = boldMatch - type = 'bold' - regex = boldRegex - } - if ( - italicMatch && - (!match || (italicMatch.index ?? Infinity) < (match?.index ?? Infinity)) - ) { - match = italicMatch - type = 'italic' - regex = italicRegex - } - if ( - underlineMatch && - (!match || - (underlineMatch.index ?? Infinity) < (match?.index ?? Infinity)) - ) { - match = underlineMatch - type = 'underline' - regex = underlineRegex - } - if ( - deleteMatch && - (!match || (deleteMatch.index ?? Infinity) < (match?.index ?? Infinity)) - ) { - match = deleteMatch - type = 'delete' - regex = deleteRegex - } - if ( - inlineCodeMatch && - (!match || - (inlineCodeMatch.index ?? Infinity) < (match?.index ?? Infinity)) - ) { - match = inlineCodeMatch - type = 'inlineCode' - regex = inlineCodeRegex - } - if (htmlMatch && (!match || (htmlMatch.index ?? Infinity) < (match?.index ?? Infinity))) { - match = htmlMatch - type = 'html' - regex = htmlRegex - } + const { type, match } = nextMatch; - if (imageMatch) { - for (let i = 0; i < imageMatch.length; i++) { - const fullMatch = imageMatch[i] - const [alt, url] = fullMatch - .match(/^!\[(.*?)\]\((.*?)\)$/) - ?.slice(1) || ['', ''] - const imageNode = { - type: 'image', - title: null, - url: url, - alt: alt, - position: { - start: { - line: index + 1, - column: offset + 1, - offset: currentOffset + offset - }, - end: { - line: index + 1, - column: offset + fullMatch.length + 1, - offset: currentOffset + offset + fullMatch.length - } - } - } - children.push(imageNode) - offset += fullMatch.length - lastIndex = offset - } - } else { - if ( - linkMatch && - (!match || (linkMatch.index ?? Infinity) < (match?.index ?? Infinity)) - ) { - match = linkMatch - type = 'link' - regex = linkRegex - } + // 处理匹配前的文本 + if (match.index && match.index > 0) { + children.push({ + type: "text", + value: line.slice(offset, offset + match.index), + position: { + start: { + line: index + 1, + column: offset + 1, + offset: currentOffset + offset, + }, + end: { + line: index + 1, + column: offset + match.index + 1, + offset: currentOffset + offset + match.index, + }, + }, + }); } - if (match) { - // 如果是空的行内代码块,直接作为普通文本处理 - if (type === 'inlineCode' && !match[1].trim()) { - children.push({ - type: 'text', - value: match[0], - position: { - start: { - line: index + 1, - column: offset + (match.index ?? 0) + 1, - offset: currentOffset + offset + (match.index ?? 0) - }, - end: { - line: index + 1, - column: offset + (match.index ?? 0) + match[0].length + 1, - offset: currentOffset + offset + (match.index ?? 0) + match[0].length - } - } - }) - offset += (match.index ?? 0) + match[0].length - lastIndex = offset - continue - } - - if (match.index !== undefined && match.index > 0) { - children.push({ - type: 'text', - value: line.slice(offset, offset + (match.index ?? 0)), - position: { - start: { - line: index + 1, - column: offset + 1, - offset: currentOffset + offset - }, - end: { - line: index + 1, - column: offset + (match.index ?? 0) + 1, - offset: currentOffset + offset + (match.index ?? 0) - } - } - }) - } - if (type === 'link') { - children.push({ - type: type, - title: null, - url: match[2], - children: [ - { - type: 'text', - value: match[1], - position: { - start: { - line: index + 1, - column: offset + (match.index ?? 0) + 1, - offset: currentOffset + offset + (match.index ?? 0) - }, - end: { - line: index + 1, - column: offset + (match.index ?? 0) + match[1].length + 1, - offset: - currentOffset + - offset + - (match.index ?? 0) + - match[1].length - } - } - } - ], - position: { - start: { - line: index + 1, - column: offset + 1, - offset: currentOffset + offset - }, - end: { - line: index + 1, - column: offset + (match.index ?? 0) + match[0].length + 1, - offset: - currentOffset + offset + (match.index ?? 0) + match[0].length - } - } - }) - } else if (type === 'html') { // 添加HTML标签处理逻辑 - children.push({ - type: type, - value: match[0], - position: { - start: { - line: index + 1, - column: offset + 1, - offset: currentOffset + offset - }, - end: { - line: index + 1, - column: offset + match[0].length + 1, - offset: currentOffset + offset + match[0].length - } - } - }) - } else { - // 递归解析内部内容 - const innerContent = match[1] - const innerOffset = currentOffset + offset + (match.index ?? 0) + (type === 'bold' || type === 'delete' ? 2 : 1) - const innerChildren: any = parseInlineElements(innerContent, index, innerOffset) - - children.push({ - type: type, - children: innerChildren, - position: { - start: { line: index + 1, column: offset + (match.index ?? 0) + 1, offset: currentOffset + offset + (match.index ?? 0) }, - end: { line: index + 1, column: offset + (match.index ?? 0) + match[0].length + 1, offset: currentOffset + offset + (match.index ?? 0) + match[0].length } - } - }) - } + // 处理匹配的标记 + context.offset = offset; + children.push(MARKDOWN_PATTERNS[type].process(match, context)); - offset += (match.index ?? 0) + match[0].length - lastIndex = offset - } else { - break - } + offset += (match.index ?? 0) + match[0].length; + lastIndex = offset; } + // 处理剩余文本 if (lastIndex < line.length) { children.push({ - type: 'text', + type: "text", value: line.slice(lastIndex), position: { start: { line: index + 1, column: lastIndex + 1, - offset: currentOffset + lastIndex + offset: currentOffset + lastIndex, }, end: { line: index + 1, column: line.length + 1, - offset: currentOffset + line.length - } - } - }) + offset: currentOffset + line.length, + }, + }, + }); } - return children -} + + return children; +};