diff --git a/packages/mini-markdown-ast-parser/src/core/parse/compose/blocks/html.ts b/packages/mini-markdown-ast-parser/src/core/parse/compose/blocks/html.ts index ef70f8475c6b7606ceaf7a0ba4ccddff618d319d..24aaec6d4374116047c13138e7d0a08f5c579a34 100644 --- a/packages/mini-markdown-ast-parser/src/core/parse/compose/blocks/html.ts +++ b/packages/mini-markdown-ast-parser/src/core/parse/compose/blocks/html.ts @@ -1,6 +1,24 @@ import type { Tokens } from "@/types/tokens"; import type { ParseFnParams } from ".."; +// 添加单标签列表 +const voidElements = new Set([ + "area", + "base", + "br", + "col", + "embed", + "hr", + "img", + "input", + "link", + "meta", + "param", + "source", + "track", + "wbr", +]); + // 转换 html export const parseHtml = ({ trimmedLine, @@ -12,28 +30,104 @@ export const parseHtml = ({ }: ParseFnParams) => { const htmlBlockStartRegex = /^\s*<([a-zA-Z][a-zA-Z0-9]*)(?![^>]*\/>)[^>]*>/; const htmlBlockEndRegex = /<\/([a-zA-Z][a-zA-Z0-9]*)>\s*$/; + const selfClosingTagRegex = /^\s*<([a-zA-Z][a-zA-Z0-9]*)[^>]*\/>\s*$/; + + // 如果在HTML块内,优先处理内容 + if (currentStatus.inHtmlBlock) { + currentStatus.htmlContent += line + "\n"; + + // 检查是否匹配到最外层标签的结束标签 + if (currentStatus.htmlBlockTag && htmlBlockEndRegex.test(trimmedLine)) { + const match = trimmedLine.match(htmlBlockEndRegex); + const endTag = match?.[1].toLowerCase(); + + if (endTag === currentStatus.htmlBlockTag) { + currentStatus.inHtmlBlock = false; + currentStatus.htmlBlockTag = null; + root.children.push({ + type: "html", + value: currentStatus.htmlContent.trim(), + position: { + start: { + line: index - currentStatus.htmlContent.split("\n").length + 2, + column: 1, + offset: currentOffset - currentStatus.htmlContent.length, + }, + end: { line: index + 1, column: line.length + 1, offset: currentOffset + line.length }, + }, + }); + currentStatus.htmlContent = ""; + } + } + return true; + } + // 检查是否是HTML块的开始 if (!currentStatus.inHtmlBlock && htmlBlockStartRegex.test(trimmedLine)) { + const match = trimmedLine.match(htmlBlockStartRegex); + const tagName = match?.[1].toLowerCase(); + + // 如果是单标签,直接处理并返回 + if (tagName && voidElements.has(tagName)) { + const dataLineAttr = ` data-line="${index + 1}"`; + const content = line.replace(/>/, `${dataLineAttr}>`); + root.children.push({ + type: "html", + value: content.trim(), + position: { + start: { + line: index + 1, + column: 1, + offset: currentOffset, + }, + end: { + line: index + 1, + column: trimmedLine.length + 1, + offset: currentOffset + trimmedLine.length, + }, + }, + }); + return true; + } + currentStatus.inHtmlBlock = true; + currentStatus.htmlBlockTag = tagName; // 记录最外层的标签名 + // 添加 data-line 属性 const dataLineAttr = ` data-line="${index + 1}"`; - if (trimmedLine.endsWith("/>")) { + if (selfClosingTagRegex.test(trimmedLine)) { // 自闭合标签 currentStatus.htmlContent = line.replace("/>", `${dataLineAttr} />`) + "\n"; + currentStatus.inHtmlBlock = false; // 自闭合标签不需要继续处理 + root.children.push({ + type: "html", + value: currentStatus.htmlContent.trim(), + position: { + start: { + line: index + 1, + column: 1, + offset: currentOffset, + }, + end: { + line: index + 1, + column: trimmedLine.length + 1, + offset: currentOffset + trimmedLine.length, + }, + }, + }); + currentStatus.htmlContent = ""; + return true; } else { // 开始标签 currentStatus.htmlContent = line.replace(/>/, `${dataLineAttr}>`) + "\n"; - } - // 检查是否是单行的HTML块 - const singleLineHtmlMatch = trimmedLine.match(htmlBlockStartRegex); - if (singleLineHtmlMatch) { - const tagName = singleLineHtmlMatch[1]; - const endTagRegex = new RegExp(`<\/${tagName}\\s*>$`); - if (endTagRegex.test(trimmedLine)) { + + // 检查是否是单行的完整HTML块 + if (trimmedLine.includes(` void; }; diff --git a/packages/mini-markdown-ast-parser/src/core/parse/tokenizer.ts b/packages/mini-markdown-ast-parser/src/core/parse/tokenizer.ts index f269168aa87c1ce3efe59b5c249434884c9190d1..194b331df4a9bbc4f76b033175ada57b0df5e7b7 100644 --- a/packages/mini-markdown-ast-parser/src/core/parse/tokenizer.ts +++ b/packages/mini-markdown-ast-parser/src/core/parse/tokenizer.ts @@ -29,6 +29,7 @@ export const tokenizer = (lines: string[], root: RootTokens) => { // html htmlContent: "", inHtmlBlock: false, + htmlBlockTag: "", }; const resetCurrentStatus = () => { currentStatus = { @@ -46,6 +47,7 @@ export const tokenizer = (lines: string[], root: RootTokens) => { currentTable: null, htmlContent: "", inHtmlBlock: false, + htmlBlockTag: "", }; };