From 017be5622342a103db020be171b0890b2b4d4b15 Mon Sep 17 00:00:00 2001 From: e1chan <876394202@qq.com> Date: Fri, 24 Jan 2025 16:48:08 +0800 Subject: [PATCH 1/2] =?UTF-8?q?fix(mini-markdown-ast-parser):=20=E9=87=8D?= =?UTF-8?q?=E6=9E=84inline=E8=A7=A3=E6=9E=90=E9=80=BB=E8=BE=91=20=E4=BF=AE?= =?UTF-8?q?=E5=A4=8D=E4=BA=86inlineCode=E5=86=85=E9=83=A8=E5=86=85?= =?UTF-8?q?=E5=AE=B9=E4=BB=8D=E4=BC=9A=E8=A2=AB=E8=A7=A3=E6=9E=90=E7=9A=84?= =?UTF-8?q?bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/core/parse/compose/inline/index.ts | 446 ++++++++---------- 1 file changed, 208 insertions(+), 238 deletions(-) diff --git a/packages/mini-markdown-ast-parser/src/core/parse/compose/inline/index.ts b/packages/mini-markdown-ast-parser/src/core/parse/compose/inline/index.ts index a02f880..fcc7a74 100644 --- a/packages/mini-markdown-ast-parser/src/core/parse/compose/inline/index.ts +++ b/packages/mini-markdown-ast-parser/src/core/parse/compose/inline/index.ts @@ -1,265 +1,235 @@ -import { Tokens } from "@/types/tokens" +import { Tokens } from "@/types/tokens"; +import { TokenTypeVal } from "@/types/tokens-types"; + +interface MarkdownPattern { + regex: RegExp; + process: (match: RegExpMatchArray, context: ProcessContext) => Tokens; +} + +interface ProcessContext { + line: string; + index: number; + offset: number; + currentOffset: number; + parseInlineElements: ( + line: string, + index: number, + currentOffset: number + ) => Tokens[]; +} + +// 定义所有 Markdown 模式 +const MARKDOWN_PATTERNS: Record = { + bold: { + regex: /\*\*(?.*?)\*\*/, + process: (match, context) => + createStandardToken("bold" as TokenTypeVal, match, context), + }, + italic: { + regex: /\_(?.*?)\_/, + process: (match, context) => + createStandardToken("italic" as TokenTypeVal, match, context), + }, + underline: { + regex: /\-{2}(?.*?)\-{2}/, + process: (match, context) => + createStandardToken("underline" as TokenTypeVal, match, context), + }, + delete: { + regex: /\~{2}(?.*?)\~{2}/, + process: (match, context) => + createStandardToken("delete" as TokenTypeVal, match, context), + }, + inlineCode: { + regex: /`(?.*?)`/, + process: (match, context) => { + if (!match[1].trim()) { + return createTextToken(match[0], match, context); + } + return { + type: "inlineCode", + value: match.groups?.content || match[1], + position: createPosition(match, context), + }; + }, + }, + image: { + regex: /!\[(?.*?)\]\((?.*?)\)/g, + process: (match, context) => ({ + type: "image", + title: null, + url: match.groups?.url || "", + alt: match.groups?.alt || "", + position: createPosition(match, context), + }), + }, + link: { + regex: /\[(?[^\]]+)\]\((?[^)]+)\)/, + process: (match, context) => ({ + type: "link", + title: null, + url: match.groups?.url || "", + children: [createTextToken(match.groups?.text || "", match, context)], + position: createPosition(match, context), + }), + }, + html: { + regex: /<(?[a-zA-Z0-9]+)(?[^>]*)>(?.*?)<\/\1>/g, + process: (match, context) => ({ + type: "html", + value: match[0], + position: createPosition(match, context), + }), + }, +}; + +// 创建位置信息 +function createPosition(match: RegExpMatchArray, context: ProcessContext) { + const startOffset = + context.currentOffset + context.offset + (match.index ?? 0); + const endOffset = startOffset + match[0].length; + + return { + start: { + line: context.index + 1, + column: context.offset + (match.index ?? 0) + 1, + offset: startOffset, + }, + end: { + line: context.index + 1, + column: context.offset + (match.index ?? 0) + match[0].length + 1, + offset: endOffset, + }, + }; +} + +// 创建文本节点 +function createTextToken( + value: string, + match: RegExpMatchArray, + context: ProcessContext +): Tokens { + return { + type: "text", + value, + position: createPosition(match, context), + }; +} + +// 创建标准内联标记节点 +function createStandardToken( + type: TokenTypeVal, + match: RegExpMatchArray, + context: ProcessContext +): Tokens { + const innerContent = match.groups?.content || match[1]; + const innerOffset = + context.currentOffset + + context.offset + + (match.index ?? 0) + + (type === "bold" || type === "delete" ? 2 : 1); + + return { + type, + children: context.parseInlineElements( + innerContent, + context.index, + innerOffset + ), + position: createPosition(match, context), + }; +} + +// 查找最近的匹配 +function findNextMatch(line: string, offset: number) { + let bestMatch: { type: string; match: RegExpMatchArray } | null = null; + + for (const [type, pattern] of Object.entries(MARKDOWN_PATTERNS)) { + const match = line.slice(offset).match(pattern.regex); + if (!match) continue; + + if ( + !bestMatch || + (match.index ?? Infinity) < (bestMatch.match.index ?? Infinity) + ) { + bestMatch = { type, match }; + } + } + + return bestMatch; +} export const parseInlineElements = ( line: string, index: number, currentOffset: number -) => { - const boldRegex = /\*\*(.*?)\*\*/ - const italicRegex = /_(.*?)_/ - const underlineRegex = /\-\-(.*?)\-\-/ - const deleteRegex = /~~(.*?)~~/ - const inlineCodeRegex = /`(.*?)`/ - const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/ - const imageRegex = /!\[(.*?)\]\((.*?)\)/g - const htmlRegex = /<([a-zA-Z0-9]+)([^>]*)>(.*?)<\/\1>/g - let offset = 0 - let children = [] - let lastIndex = 0 +): Tokens[] => { + let offset = 0; + let children: Tokens[] = []; + let lastIndex = 0; + + const context: ProcessContext = { + line, + index, + currentOffset, + offset, + parseInlineElements, + }; while (offset < line.length) { - const boldMatch = line.slice(offset).match(boldRegex) - const italicMatch = line.slice(offset).match(italicRegex) - const underlineMatch = line.slice(offset).match(underlineRegex) - const deleteMatch = line.slice(offset).match(deleteRegex) - const inlineCodeMatch = line.slice(offset).match(inlineCodeRegex) - const imageMatch = line.slice(offset).match(imageRegex) - const linkMatch = line.slice(offset).match(linkRegex) - const htmlMatch = line.slice(offset).match(htmlRegex) + const nextMatch = findNextMatch(line, offset); - let match: RegExpMatchArray | null = null - let type = '' - let regex = null + if (!nextMatch) break; - if ( - boldMatch && - (!match || - (boldMatch.index ?? Infinity) < - ((match as RegExpMatchArray)?.index ?? Infinity)) - ) { - match = boldMatch - type = 'bold' - regex = boldRegex - } - if ( - italicMatch && - (!match || (italicMatch.index ?? Infinity) < (match?.index ?? Infinity)) - ) { - match = italicMatch - type = 'italic' - regex = italicRegex - } - if ( - underlineMatch && - (!match || - (underlineMatch.index ?? Infinity) < (match?.index ?? Infinity)) - ) { - match = underlineMatch - type = 'underline' - regex = underlineRegex - } - if ( - deleteMatch && - (!match || (deleteMatch.index ?? Infinity) < (match?.index ?? Infinity)) - ) { - match = deleteMatch - type = 'delete' - regex = deleteRegex - } - if ( - inlineCodeMatch && - (!match || - (inlineCodeMatch.index ?? Infinity) < (match?.index ?? Infinity)) - ) { - match = inlineCodeMatch - type = 'inlineCode' - regex = inlineCodeRegex - } - if (htmlMatch && (!match || (htmlMatch.index ?? Infinity) < (match?.index ?? Infinity))) { - match = htmlMatch - type = 'html' - regex = htmlRegex - } + const { type, match } = nextMatch; - if (imageMatch) { - for (let i = 0; i < imageMatch.length; i++) { - const fullMatch = imageMatch[i] - const [alt, url] = fullMatch - .match(/^!\[(.*?)\]\((.*?)\)$/) - ?.slice(1) || ['', ''] - const imageNode = { - type: 'image', - title: null, - url: url, - alt: alt, - position: { - start: { - line: index + 1, - column: offset + 1, - offset: currentOffset + offset - }, - end: { - line: index + 1, - column: offset + fullMatch.length + 1, - offset: currentOffset + offset + fullMatch.length - } - } - } - children.push(imageNode) - offset += fullMatch.length - lastIndex = offset - } - } else { - if ( - linkMatch && - (!match || (linkMatch.index ?? Infinity) < (match?.index ?? Infinity)) - ) { - match = linkMatch - type = 'link' - regex = linkRegex - } + // 处理匹配前的文本 + if (match.index && match.index > 0) { + children.push({ + type: "text", + value: line.slice(offset, offset + match.index), + position: { + start: { + line: index + 1, + column: offset + 1, + offset: currentOffset + offset, + }, + end: { + line: index + 1, + column: offset + match.index + 1, + offset: currentOffset + offset + match.index, + }, + }, + }); } - if (match) { - // 如果是空的行内代码块,直接作为普通文本处理 - if (type === 'inlineCode' && !match[1].trim()) { - children.push({ - type: 'text', - value: match[0], - position: { - start: { - line: index + 1, - column: offset + (match.index ?? 0) + 1, - offset: currentOffset + offset + (match.index ?? 0) - }, - end: { - line: index + 1, - column: offset + (match.index ?? 0) + match[0].length + 1, - offset: currentOffset + offset + (match.index ?? 0) + match[0].length - } - } - }) - offset += (match.index ?? 0) + match[0].length - lastIndex = offset - continue - } + // 处理匹配的标记 + context.offset = offset; + children.push(MARKDOWN_PATTERNS[type].process(match, context)); - if (match.index !== undefined && match.index > 0) { - children.push({ - type: 'text', - value: line.slice(offset, offset + (match.index ?? 0)), - position: { - start: { - line: index + 1, - column: offset + 1, - offset: currentOffset + offset - }, - end: { - line: index + 1, - column: offset + (match.index ?? 0) + 1, - offset: currentOffset + offset + (match.index ?? 0) - } - } - }) - } - if (type === 'link') { - children.push({ - type: type, - title: null, - url: match[2], - children: [ - { - type: 'text', - value: match[1], - position: { - start: { - line: index + 1, - column: offset + (match.index ?? 0) + 1, - offset: currentOffset + offset + (match.index ?? 0) - }, - end: { - line: index + 1, - column: offset + (match.index ?? 0) + match[1].length + 1, - offset: - currentOffset + - offset + - (match.index ?? 0) + - match[1].length - } - } - } - ], - position: { - start: { - line: index + 1, - column: offset + 1, - offset: currentOffset + offset - }, - end: { - line: index + 1, - column: offset + (match.index ?? 0) + match[0].length + 1, - offset: - currentOffset + offset + (match.index ?? 0) + match[0].length - } - } - }) - } else if (type === 'html') { // 添加HTML标签处理逻辑 - children.push({ - type: type, - value: match[0], - position: { - start: { - line: index + 1, - column: offset + 1, - offset: currentOffset + offset - }, - end: { - line: index + 1, - column: offset + match[0].length + 1, - offset: currentOffset + offset + match[0].length - } - } - }) - } else { - // 递归解析内部内容 - const innerContent = match[1] - const innerOffset = currentOffset + offset + (match.index ?? 0) + (type === 'bold' || type === 'delete' ? 2 : 1) - const innerChildren: any = parseInlineElements(innerContent, index, innerOffset) - - children.push({ - type: type, - children: innerChildren, - position: { - start: { line: index + 1, column: offset + (match.index ?? 0) + 1, offset: currentOffset + offset + (match.index ?? 0) }, - end: { line: index + 1, column: offset + (match.index ?? 0) + match[0].length + 1, offset: currentOffset + offset + (match.index ?? 0) + match[0].length } - } - }) - } - - offset += (match.index ?? 0) + match[0].length - lastIndex = offset - } else { - break - } + offset += (match.index ?? 0) + match[0].length; + lastIndex = offset; } + // 处理剩余文本 if (lastIndex < line.length) { children.push({ - type: 'text', + type: "text", value: line.slice(lastIndex), position: { start: { line: index + 1, column: lastIndex + 1, - offset: currentOffset + lastIndex + offset: currentOffset + lastIndex, }, end: { line: index + 1, column: line.length + 1, - offset: currentOffset + line.length - } - } - }) + offset: currentOffset + line.length, + }, + }, + }); } - return children -} + + return children; +}; -- Gitee From 1b15e4451898bfec45f6443b7ff30b4e0f9139bc Mon Sep 17 00:00:00 2001 From: e1chan <876394202@qq.com> Date: Fri, 24 Jan 2025 18:51:13 +0800 Subject: [PATCH 2/2] =?UTF-8?q?fix(mini-markdown-ast-parser):=20=E4=BF=AE?= =?UTF-8?q?=E5=A4=8D=E9=87=8D=E6=9E=84=E5=90=8EinlineCode=E6=A0=91?= =?UTF-8?q?=E7=BB=93=E6=9E=84=E5=BC=82=E5=B8=B8=E7=9A=84=E9=97=AE=E9=A2=98?= =?UTF-8?q?,=E4=BF=AE=E5=A4=8D=E5=9B=BE=E7=89=87=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E5=8C=B9=E9=85=8D=E4=B8=BA=E9=93=BE=E6=8E=A5=E6=A0=BC=E5=BC=8F?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/core/parse/compose/inline/index.ts | 48 ++++++------------- 1 file changed, 14 insertions(+), 34 deletions(-) diff --git a/packages/mini-markdown-ast-parser/src/core/parse/compose/inline/index.ts b/packages/mini-markdown-ast-parser/src/core/parse/compose/inline/index.ts index fcc7a74..852c1bc 100644 --- a/packages/mini-markdown-ast-parser/src/core/parse/compose/inline/index.ts +++ b/packages/mini-markdown-ast-parser/src/core/parse/compose/inline/index.ts @@ -11,34 +11,26 @@ interface ProcessContext { index: number; offset: number; currentOffset: number; - parseInlineElements: ( - line: string, - index: number, - currentOffset: number - ) => Tokens[]; + parseInlineElements: (line: string, index: number, currentOffset: number) => Tokens[]; } // 定义所有 Markdown 模式 const MARKDOWN_PATTERNS: Record = { bold: { regex: /\*\*(?.*?)\*\*/, - process: (match, context) => - createStandardToken("bold" as TokenTypeVal, match, context), + process: (match, context) => createStandardToken("bold" as TokenTypeVal, match, context), }, italic: { regex: /\_(?.*?)\_/, - process: (match, context) => - createStandardToken("italic" as TokenTypeVal, match, context), + process: (match, context) => createStandardToken("italic" as TokenTypeVal, match, context), }, underline: { regex: /\-{2}(?.*?)\-{2}/, - process: (match, context) => - createStandardToken("underline" as TokenTypeVal, match, context), + process: (match, context) => createStandardToken("underline" as TokenTypeVal, match, context), }, delete: { regex: /\~{2}(?.*?)\~{2}/, - process: (match, context) => - createStandardToken("delete" as TokenTypeVal, match, context), + process: (match, context) => createStandardToken("delete" as TokenTypeVal, match, context), }, inlineCode: { regex: /`(?.*?)`/, @@ -48,13 +40,13 @@ const MARKDOWN_PATTERNS: Record = { } return { type: "inlineCode", - value: match.groups?.content || match[1], + children: [createTextToken(match.groups?.content || "", match, context)], position: createPosition(match, context), }; }, }, image: { - regex: /!\[(?.*?)\]\((?.*?)\)/g, + regex: /!\[(?.*?)\]\((?.*?)\)/, process: (match, context) => ({ type: "image", title: null, @@ -64,7 +56,7 @@ const MARKDOWN_PATTERNS: Record = { }), }, link: { - regex: /\[(?[^\]]+)\]\((?[^)]+)\)/, + regex: /(?[^\]]+)\]\((?[^)]+)\)/, process: (match, context) => ({ type: "link", title: null, @@ -85,8 +77,7 @@ const MARKDOWN_PATTERNS: Record = { // 创建位置信息 function createPosition(match: RegExpMatchArray, context: ProcessContext) { - const startOffset = - context.currentOffset + context.offset + (match.index ?? 0); + const startOffset = context.currentOffset + context.offset + (match.index ?? 0); const endOffset = startOffset + match[0].length; return { @@ -104,11 +95,7 @@ function createPosition(match: RegExpMatchArray, context: ProcessContext) { } // 创建文本节点 -function createTextToken( - value: string, - match: RegExpMatchArray, - context: ProcessContext -): Tokens { +function createTextToken(value: string, match: RegExpMatchArray, context: ProcessContext): Tokens { return { type: "text", value, @@ -120,7 +107,7 @@ function createTextToken( function createStandardToken( type: TokenTypeVal, match: RegExpMatchArray, - context: ProcessContext + context: ProcessContext, ): Tokens { const innerContent = match.groups?.content || match[1]; const innerOffset = @@ -131,11 +118,7 @@ function createStandardToken( return { type, - children: context.parseInlineElements( - innerContent, - context.index, - innerOffset - ), + children: context.parseInlineElements(innerContent, context.index, innerOffset), position: createPosition(match, context), }; } @@ -148,10 +131,7 @@ function findNextMatch(line: string, offset: number) { const match = line.slice(offset).match(pattern.regex); if (!match) continue; - if ( - !bestMatch || - (match.index ?? Infinity) < (bestMatch.match.index ?? Infinity) - ) { + if (!bestMatch || (match.index ?? Infinity) < (bestMatch.match.index ?? Infinity)) { bestMatch = { type, match }; } } @@ -162,7 +142,7 @@ function findNextMatch(line: string, offset: number) { export const parseInlineElements = ( line: string, index: number, - currentOffset: number + currentOffset: number, ): Tokens[] => { let offset = 0; let children: Tokens[] = []; -- Gitee