From c3ecf46b53e753b581c8855114ee4aa6e70ec8d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?wifi=E6=AD=AAf?= <1402772884@qq.com> Date: Tue, 4 Feb 2025 00:24:17 +0800 Subject: [PATCH] =?UTF-8?q?fix(mini-markdown-ast-parser):=20=E4=BF=AE?= =?UTF-8?q?=E5=A4=8Dhtml=E8=A7=A3=E6=9E=90=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/core/parse/compose/blocks/html.ts | 136 +++++++++++++----- .../src/core/parse/compose/index.ts | 1 + .../src/core/parse/tokenizer.ts | 2 + 3 files changed, 106 insertions(+), 33 deletions(-) diff --git a/packages/mini-markdown-ast-parser/src/core/parse/compose/blocks/html.ts b/packages/mini-markdown-ast-parser/src/core/parse/compose/blocks/html.ts index ef70f84..24aaec6 100644 --- a/packages/mini-markdown-ast-parser/src/core/parse/compose/blocks/html.ts +++ b/packages/mini-markdown-ast-parser/src/core/parse/compose/blocks/html.ts @@ -1,6 +1,24 @@ import type { Tokens } from "@/types/tokens"; import type { ParseFnParams } from ".."; +// 添加单标签列表 +const voidElements = new Set([ + "area", + "base", + "br", + "col", + "embed", + "hr", + "img", + "input", + "link", + "meta", + "param", + "source", + "track", + "wbr", +]); + // 转换 html export const parseHtml = ({ trimmedLine, @@ -12,28 +30,104 @@ export const parseHtml = ({ }: ParseFnParams) => { const htmlBlockStartRegex = /^\s*<([a-zA-Z][a-zA-Z0-9]*)(?![^>]*\/>)[^>]*>/; const htmlBlockEndRegex = /<\/([a-zA-Z][a-zA-Z0-9]*)>\s*$/; + const selfClosingTagRegex = /^\s*<([a-zA-Z][a-zA-Z0-9]*)[^>]*\/>\s*$/; + + // 如果在HTML块内,优先处理内容 + if (currentStatus.inHtmlBlock) { + currentStatus.htmlContent += line + "\n"; + + // 检查是否匹配到最外层标签的结束标签 + if (currentStatus.htmlBlockTag && htmlBlockEndRegex.test(trimmedLine)) { + const match = trimmedLine.match(htmlBlockEndRegex); + const endTag = match?.[1].toLowerCase(); + + if (endTag === currentStatus.htmlBlockTag) { + currentStatus.inHtmlBlock = false; + currentStatus.htmlBlockTag = null; + root.children.push({ + type: "html", + value: currentStatus.htmlContent.trim(), + position: { + start: { + line: index - currentStatus.htmlContent.split("\n").length + 2, + column: 1, + offset: currentOffset - currentStatus.htmlContent.length, + }, + end: { line: index + 1, column: line.length + 1, offset: currentOffset + line.length }, + }, + }); + currentStatus.htmlContent = ""; + } + } + return true; + } + // 检查是否是HTML块的开始 if (!currentStatus.inHtmlBlock && htmlBlockStartRegex.test(trimmedLine)) { + const match = trimmedLine.match(htmlBlockStartRegex); + const tagName = match?.[1].toLowerCase(); + + // 如果是单标签,直接处理并返回 + if (tagName && voidElements.has(tagName)) { + const dataLineAttr = ` data-line="${index + 1}"`; + const content = line.replace(/>/, `${dataLineAttr}>`); + root.children.push({ + type: "html", + value: content.trim(), + position: { + start: { + line: index + 1, + column: 1, + offset: currentOffset, + }, + end: { + line: index + 1, + column: trimmedLine.length + 1, + offset: currentOffset + trimmedLine.length, + }, + }, + }); + return true; + } + currentStatus.inHtmlBlock = true; + currentStatus.htmlBlockTag = tagName; // 记录最外层的标签名 + // 添加 data-line 属性 const dataLineAttr = ` data-line="${index + 1}"`; - if (trimmedLine.endsWith("/>")) { + if (selfClosingTagRegex.test(trimmedLine)) { // 自闭合标签 currentStatus.htmlContent = line.replace("/>", `${dataLineAttr} />`) + "\n"; + currentStatus.inHtmlBlock = false; // 自闭合标签不需要继续处理 + root.children.push({ + type: "html", + value: currentStatus.htmlContent.trim(), + position: { + start: { + line: index + 1, + column: 1, + offset: currentOffset, + }, + end: { + line: index + 1, + column: trimmedLine.length + 1, + offset: currentOffset + trimmedLine.length, + }, + }, + }); + currentStatus.htmlContent = ""; + return true; } else { // 开始标签 currentStatus.htmlContent = line.replace(/>/, `${dataLineAttr}>`) + "\n"; - } - // 检查是否是单行的HTML块 - const singleLineHtmlMatch = trimmedLine.match(htmlBlockStartRegex); - if (singleLineHtmlMatch) { - const tagName = singleLineHtmlMatch[1]; - const endTagRegex = new RegExp(`<\/${tagName}\\s*>$`); - if (endTagRegex.test(trimmedLine)) { + + // 检查是否是单行的完整HTML块 + if (trimmedLine.includes(` void; }; diff --git a/packages/mini-markdown-ast-parser/src/core/parse/tokenizer.ts b/packages/mini-markdown-ast-parser/src/core/parse/tokenizer.ts index f269168..194b331 100644 --- a/packages/mini-markdown-ast-parser/src/core/parse/tokenizer.ts +++ b/packages/mini-markdown-ast-parser/src/core/parse/tokenizer.ts @@ -29,6 +29,7 @@ export const tokenizer = (lines: string[], root: RootTokens) => { // html htmlContent: "", inHtmlBlock: false, + htmlBlockTag: "", }; const resetCurrentStatus = () => { currentStatus = { @@ -46,6 +47,7 @@ export const tokenizer = (lines: string[], root: RootTokens) => { currentTable: null, htmlContent: "", inHtmlBlock: false, + htmlBlockTag: "", }; }; -- Gitee