From 017be5622342a103db020be171b0890b2b4d4b15 Mon Sep 17 00:00:00 2001
From: e1chan <876394202@qq.com>
Date: Fri, 24 Jan 2025 16:48:08 +0800
Subject: [PATCH 1/2] =?UTF-8?q?fix(mini-markdown-ast-parser):=20=E9=87=8D?=
 =?UTF-8?q?=E6=9E=84inline=E8=A7=A3=E6=9E=90=E9=80=BB=E8=BE=91=20=E4=BF=AE?=
 =?UTF-8?q?=E5=A4=8D=E4=BA=86inlineCode=E5=86=85=E9=83=A8=E5=86=85?=
 =?UTF-8?q?=E5=AE=B9=E4=BB=8D=E4=BC=9A=E8=A2=AB=E8=A7=A3=E6=9E=90=E7=9A=84?=
 =?UTF-8?q?bug?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/core/parse/compose/inline/index.ts    | 446 ++++++++----------
 1 file changed, 208 insertions(+), 238 deletions(-)
diff --git a/packages/mini-markdown-ast-parser/src/core/parse/compose/inline/index.ts b/packages/mini-markdown-ast-parser/src/core/parse/compose/inline/index.ts
index a02f880..fcc7a74 100644
--- a/packages/mini-markdown-ast-parser/src/core/parse/compose/inline/index.ts
+++ b/packages/mini-markdown-ast-parser/src/core/parse/compose/inline/index.ts
@@ -1,265 +1,235 @@
-import { Tokens } from "@/types/tokens"
+import { Tokens } from "@/types/tokens";
+import { TokenTypeVal } from "@/types/tokens-types";
+
+interface MarkdownPattern {
+  regex: RegExp;
+  process: (match: RegExpMatchArray, context: ProcessContext) => Tokens;
+}
+
+interface ProcessContext {
+  line: string;
+  index: number;
+  offset: number;
+  currentOffset: number;
+  parseInlineElements: (
+    line: string,
+    index: number,
+    currentOffset: number
+  ) => Tokens[];
+}
+
+// 定义所有 Markdown 模式
+const MARKDOWN_PATTERNS: Record<string, MarkdownPattern> = {
+  bold: {
+    regex: /\*\*(?<content>.*?)\*\*/,
+    process: (match, context) =>
+      createStandardToken("bold" as TokenTypeVal, match, context),
+  },
+  italic: {
+    regex: /\_(?<content>.*?)\_/,
+    process: (match, context) =>
+      createStandardToken("italic" as TokenTypeVal, match, context),
+  },
+  underline: {
+    regex: /\-{2}(?<content>.*?)\-{2}/,
+    process: (match, context) =>
+      createStandardToken("underline" as TokenTypeVal, match, context),
+  },
+  delete: {
+    regex: /\~{2}(?<content>.*?)\~{2}/,
+    process: (match, context) =>
+      createStandardToken("delete" as TokenTypeVal, match, context),
+  },
+  inlineCode: {
+    regex: /`(?<content>.*?)`/,
+    process: (match, context) => {
+      if (!match[1].trim()) {
+        return createTextToken(match[0], match, context);
+      }
+      return {
+        type: "inlineCode",
+        value: match.groups?.content || match[1],
+        position: createPosition(match, context),
+      };
+    },
+  },
+  image: {
+    regex: /!\[(?<alt>.*?)\]\((?<url>.*?)\)/g,
+    process: (match, context) => ({
+      type: "image",
+      title: null,
+      url: match.groups?.url || "",
+      alt: match.groups?.alt || "",
+      position: createPosition(match, context),
+    }),
+  },
+  link: {
+    regex: /\[(?<text>[^\]]+)\]\((?<url>[^)]+)\)/,
+    process: (match, context) => ({
+      type: "link",
+      title: null,
+      url: match.groups?.url || "",
+      children: [createTextToken(match.groups?.text || "", match, context)],
+      position: createPosition(match, context),
+    }),
+  },
+  html: {
+    regex: /<(?<tag>[a-zA-Z0-9]+)(?<attrs>[^>]*)>(?<content>.*?)<\/\1>/g,
+    process: (match, context) => ({
+      type: "html",
+      value: match[0],
+      position: createPosition(match, context),
+    }),
+  },
+};
+
+// 创建位置信息
+function createPosition(match: RegExpMatchArray, context: ProcessContext) {
+  const startOffset =
+    context.currentOffset + context.offset + (match.index ?? 0);
+  const endOffset = startOffset + match[0].length;
+
+  return {
+    start: {
+      line: context.index + 1,
+      column: context.offset + (match.index ?? 0) + 1,
+      offset: startOffset,
+    },
+    end: {
+      line: context.index + 1,
+      column: context.offset + (match.index ?? 0) + match[0].length + 1,
+      offset: endOffset,
+    },
+  };
+}
+
+// 创建文本节点
+function createTextToken(
+  value: string,
+  match: RegExpMatchArray,
+  context: ProcessContext
+): Tokens {
+  return {
+    type: "text",
+    value,
+    position: createPosition(match, context),
+  };
+}
+
+// 创建标准内联标记节点
+function createStandardToken(
+  type: TokenTypeVal,
+  match: RegExpMatchArray,
+  context: ProcessContext
+): Tokens {
+  const innerContent = match.groups?.content || match[1];
+  const innerOffset =
+    context.currentOffset +
+    context.offset +
+    (match.index ?? 0) +
+    (type === "bold" || type === "delete" ? 2 : 1);
+
+  return {
+    type,
+    children: context.parseInlineElements(
+      innerContent,
+      context.index,
+      innerOffset
+    ),
+    position: createPosition(match, context),
+  };
+}
+
+// 查找最近的匹配
+function findNextMatch(line: string, offset: number) {
+  let bestMatch: { type: string; match: RegExpMatchArray } | null = null;
+
+  for (const [type, pattern] of Object.entries(MARKDOWN_PATTERNS)) {
+    const match = line.slice(offset).match(pattern.regex);
+    if (!match) continue;
+
+    if (
+      !bestMatch ||
+      (match.index ?? Infinity) < (bestMatch.match.index ?? Infinity)
+    ) {
+      bestMatch = { type, match };
+    }
+  }
+
+  return bestMatch;
+}
 
 export const parseInlineElements = (
   line: string,
   index: number,
   currentOffset: number
-) => {
-  const boldRegex = /\*\*(.*?)\*\*/
-  const italicRegex = /_(.*?)_/
-  const underlineRegex = /\-\-(.*?)\-\-/
-  const deleteRegex = /~~(.*?)~~/
-  const inlineCodeRegex = /`(.*?)`/
-  const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/
-  const imageRegex = /!\[(.*?)\]\((.*?)\)/g
-  const htmlRegex = /<([a-zA-Z0-9]+)([^>]*)>(.*?)<\/\1>/g
-  let offset = 0
-  let children = []
-  let lastIndex = 0
+): Tokens[] => {
+  let offset = 0;
+  let children: Tokens[] = [];
+  let lastIndex = 0;
+
+  const context: ProcessContext = {
+    line,
+    index,
+    currentOffset,
+    offset,
+    parseInlineElements,
+  };
 
   while (offset < line.length) {
-    const boldMatch = line.slice(offset).match(boldRegex)
-    const italicMatch = line.slice(offset).match(italicRegex)
-    const underlineMatch = line.slice(offset).match(underlineRegex)
-    const deleteMatch = line.slice(offset).match(deleteRegex)
-    const inlineCodeMatch = line.slice(offset).match(inlineCodeRegex)
-    const imageMatch = line.slice(offset).match(imageRegex)
-    const linkMatch = line.slice(offset).match(linkRegex)
-    const htmlMatch = line.slice(offset).match(htmlRegex)
+    const nextMatch = findNextMatch(line, offset);
 
-    let match: RegExpMatchArray | null = null
-    let type = ''
-    let regex = null
+    if (!nextMatch) break;
 
-    if (
-      boldMatch &&
-      (!match ||
-        (boldMatch.index ?? Infinity) <
-          ((match as RegExpMatchArray)?.index ?? Infinity))
-    ) {
-      match = boldMatch
-      type = 'bold'
-      regex = boldRegex
-    }
-    if (
-      italicMatch &&
-      (!match || (italicMatch.index ?? Infinity) < (match?.index ?? Infinity))
-    ) {
-      match = italicMatch
-      type = 'italic'
-      regex = italicRegex
-    }
-    if (
-      underlineMatch &&
-      (!match ||
-        (underlineMatch.index ?? Infinity) < (match?.index ?? Infinity))
-    ) {
-      match = underlineMatch
-      type = 'underline'
-      regex = underlineRegex
-    }
-    if (
-      deleteMatch &&
-      (!match || (deleteMatch.index ?? Infinity) < (match?.index ?? Infinity))
-    ) {
-      match = deleteMatch
-      type = 'delete'
-      regex = deleteRegex
-    }
-    if (
-      inlineCodeMatch &&
-      (!match ||
-        (inlineCodeMatch.index ?? Infinity) < (match?.index ?? Infinity))
-    ) {
-      match = inlineCodeMatch
-      type = 'inlineCode'
-      regex = inlineCodeRegex
-    }
-    if (htmlMatch && (!match || (htmlMatch.index ?? Infinity) < (match?.index ?? Infinity))) {
-      match = htmlMatch
-      type = 'html'
-      regex = htmlRegex
-    }
+    const { type, match } = nextMatch;
 
-    if (imageMatch) {
-      for (let i = 0; i < imageMatch.length; i++) {
-        const fullMatch = imageMatch[i]
-        const [alt, url] = fullMatch
-          .match(/^!\[(.*?)\]\((.*?)\)$/)
-          ?.slice(1) || ['', '']
-        const imageNode = {
-          type: 'image',
-          title: null,
-          url: url,
-          alt: alt,
-          position: {
-            start: {
-              line: index + 1,
-              column: offset + 1,
-              offset: currentOffset + offset
-            },
-            end: {
-              line: index + 1,
-              column: offset + fullMatch.length + 1,
-              offset: currentOffset + offset + fullMatch.length
-            }
-          }
-        }
-        children.push(imageNode)
-        offset += fullMatch.length
-        lastIndex = offset
-      }
-    } else {
-      if (
-        linkMatch &&
-        (!match || (linkMatch.index ?? Infinity) < (match?.index ?? Infinity))
-      ) {
-        match = linkMatch
-        type = 'link'
-        regex = linkRegex
-      }
+    // 处理匹配前的文本
+    if (match.index && match.index > 0) {
+      children.push({
+        type: "text",
+        value: line.slice(offset, offset + match.index),
+        position: {
+          start: {
+            line: index + 1,
+            column: offset + 1,
+            offset: currentOffset + offset,
+          },
+          end: {
+            line: index + 1,
+            column: offset + match.index + 1,
+            offset: currentOffset + offset + match.index,
+          },
+        },
+      });
     }
 
-    if (match) {
-      // 如果是空的行内代码块，直接作为普通文本处理
-      if (type === 'inlineCode' && !match[1].trim()) {
-        children.push({
-          type: 'text',
-          value: match[0],
-          position: {
-            start: {
-              line: index + 1,
-              column: offset + (match.index ?? 0) + 1,
-              offset: currentOffset + offset + (match.index ?? 0)
-            },
-            end: {
-              line: index + 1,
-              column: offset + (match.index ?? 0) + match[0].length + 1,
-              offset: currentOffset + offset + (match.index ?? 0) + match[0].length
-            }
-          }
-        })
-        offset += (match.index ?? 0) + match[0].length
-        lastIndex = offset
-        continue
-      }
+    // 处理匹配的标记
+    context.offset = offset;
+    children.push(MARKDOWN_PATTERNS[type].process(match, context));
 
-      if (match.index !== undefined && match.index > 0) {
-        children.push({
-          type: 'text',
-          value: line.slice(offset, offset + (match.index ?? 0)),
-          position: {
-            start: {
-              line: index + 1,
-              column: offset + 1,
-              offset: currentOffset + offset
-            },
-            end: {
-              line: index + 1,
-              column: offset + (match.index ?? 0) + 1,
-              offset: currentOffset + offset + (match.index ?? 0)
-            }
-          }
-        })
-      }
-      if (type === 'link') {
-        children.push({
-          type: type,
-          title: null,
-          url: match[2],
-          children: [
-            {
-              type: 'text',
-              value: match[1],
-              position: {
-                start: {
-                  line: index + 1,
-                  column: offset + (match.index ?? 0) + 1,
-                  offset: currentOffset + offset + (match.index ?? 0)
-                },
-                end: {
-                  line: index + 1,
-                  column: offset + (match.index ?? 0) + match[1].length + 1,
-                  offset:
-                    currentOffset +
-                    offset +
-                    (match.index ?? 0) +
-                    match[1].length
-                }
-              }
-            }
-          ],
-          position: {
-            start: {
-              line: index + 1,
-              column: offset + 1,
-              offset: currentOffset + offset
-            },
-            end: {
-              line: index + 1,
-              column: offset + (match.index ?? 0) + match[0].length + 1,
-              offset:
-                currentOffset + offset + (match.index ?? 0) + match[0].length
-            }
-          }
-        })
-      } else if (type === 'html') { // 添加HTML标签处理逻辑
-        children.push({
-          type: type,
-          value: match[0],
-          position: {
-            start: {
-              line: index + 1,
-              column: offset + 1,
-              offset: currentOffset + offset
-            },
-            end: {
-              line: index + 1,
-              column: offset + match[0].length + 1,
-              offset: currentOffset + offset + match[0].length
-            }
-          }
-        })
-      } else {
-        // 递归解析内部内容
-        const innerContent = match[1]
-        const innerOffset = currentOffset + offset + (match.index ?? 0) + (type === 'bold' || type === 'delete' ? 2 : 1)
-        const innerChildren: any = parseInlineElements(innerContent, index, innerOffset)
-        
-        children.push({
-          type: type,
-          children: innerChildren,
-          position: {
-            start: { line: index + 1, column: offset + (match.index ?? 0) + 1, offset: currentOffset + offset + (match.index ?? 0) },
-            end: { line: index + 1, column: offset + (match.index ?? 0) + match[0].length + 1, offset: currentOffset + offset + (match.index ?? 0) + match[0].length }
-          }
-        })
-      }
-
-      offset += (match.index ?? 0) + match[0].length
-      lastIndex = offset
-    } else {
-      break
-    }
+    offset += (match.index ?? 0) + match[0].length;
+    lastIndex = offset;
   }
 
+  // 处理剩余文本
   if (lastIndex < line.length) {
     children.push({
-      type: 'text',
+      type: "text",
       value: line.slice(lastIndex),
       position: {
         start: {
           line: index + 1,
           column: lastIndex + 1,
-          offset: currentOffset + lastIndex
+          offset: currentOffset + lastIndex,
         },
         end: {
           line: index + 1,
           column: line.length + 1,
-          offset: currentOffset + line.length
-        }
-      }
-    })
+          offset: currentOffset + line.length,
+        },
+      },
+    });
   }
-  return children
-}
+
+  return children;
+};
-- 
Gitee


From 1b15e4451898bfec45f6443b7ff30b4e0f9139bc Mon Sep 17 00:00:00 2001
From: e1chan <876394202@qq.com>
Date: Fri, 24 Jan 2025 18:51:13 +0800
Subject: [PATCH 2/2] =?UTF-8?q?fix(mini-markdown-ast-parser):=20=E4=BF=AE?=
 =?UTF-8?q?=E5=A4=8D=E9=87=8D=E6=9E=84=E5=90=8EinlineCode=E6=A0=91?=
 =?UTF-8?q?=E7=BB=93=E6=9E=84=E5=BC=82=E5=B8=B8=E7=9A=84=E9=97=AE=E9=A2=98?=
 =?UTF-8?q?,=E4=BF=AE=E5=A4=8D=E5=9B=BE=E7=89=87=E9=94=99=E8=AF=AF?=
 =?UTF-8?q?=E5=8C=B9=E9=85=8D=E4=B8=BA=E9=93=BE=E6=8E=A5=E6=A0=BC=E5=BC=8F?=
 =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/core/parse/compose/inline/index.ts    | 48 ++++++-------------
 1 file changed, 14 insertions(+), 34 deletions(-)

diff --git a/packages/mini-markdown-ast-parser/src/core/parse/compose/inline/index.ts b/packages/mini-markdown-ast-parser/src/core/parse/compose/inline/index.ts
index fcc7a74..852c1bc 100644
--- a/packages/mini-markdown-ast-parser/src/core/parse/compose/inline/index.ts
+++ b/packages/mini-markdown-ast-parser/src/core/parse/compose/inline/index.ts
@@ -11,34 +11,26 @@ interface ProcessContext {
   index: number;
   offset: number;
   currentOffset: number;
-  parseInlineElements: (
-    line: string,
-    index: number,
-    currentOffset: number
-  ) => Tokens[];
+  parseInlineElements: (line: string, index: number, currentOffset: number) => Tokens[];
 }
 
 // 定义所有 Markdown 模式
 const MARKDOWN_PATTERNS: Record<string, MarkdownPattern> = {
   bold: {
     regex: /\*\*(?<content>.*?)\*\*/,
-    process: (match, context) =>
-      createStandardToken("bold" as TokenTypeVal, match, context),
+    process: (match, context) => createStandardToken("bold" as TokenTypeVal, match, context),
   },
   italic: {
     regex: /\_(?<content>.*?)\_/,
-    process: (match, context) =>
-      createStandardToken("italic" as TokenTypeVal, match, context),
+    process: (match, context) => createStandardToken("italic" as TokenTypeVal, match, context),
   },
   underline: {
     regex: /\-{2}(?<content>.*?)\-{2}/,
-    process: (match, context) =>
-      createStandardToken("underline" as TokenTypeVal, match, context),
+    process: (match, context) => createStandardToken("underline" as TokenTypeVal, match, context),
   },
   delete: {
     regex: /\~{2}(?<content>.*?)\~{2}/,
-    process: (match, context) =>
-      createStandardToken("delete" as TokenTypeVal, match, context),
+    process: (match, context) => createStandardToken("delete" as TokenTypeVal, match, context),
   },
   inlineCode: {
     regex: /`(?<content>.*?)`/,
@@ -48,13 +40,13 @@ const MARKDOWN_PATTERNS: Record<string, MarkdownPattern> = {
       }
       return {
         type: "inlineCode",
-        value: match.groups?.content || match[1],
+        children: [createTextToken(match.groups?.content || "", match, context)],
         position: createPosition(match, context),
       };
     },
   },
   image: {
-    regex: /!\[(?<alt>.*?)\]\((?<url>.*?)\)/g,
+    regex: /!\[(?<alt>.*?)\]\((?<url>.*?)\)/,
     process: (match, context) => ({
       type: "image",
       title: null,
@@ -64,7 +56,7 @@ const MARKDOWN_PATTERNS: Record<string, MarkdownPattern> = {
     }),
   },
   link: {
-    regex: /\[(?<text>[^\]]+)\]\((?<url>[^)]+)\)/,
+    regex: /(?<!!)\[(?<text>[^\]]+)\]\((?<url>[^)]+)\)/,
     process: (match, context) => ({
       type: "link",
       title: null,
@@ -85,8 +77,7 @@ const MARKDOWN_PATTERNS: Record<string, MarkdownPattern> = {
 
 // 创建位置信息
 function createPosition(match: RegExpMatchArray, context: ProcessContext) {
-  const startOffset =
-    context.currentOffset + context.offset + (match.index ?? 0);
+  const startOffset = context.currentOffset + context.offset + (match.index ?? 0);
   const endOffset = startOffset + match[0].length;
 
   return {
@@ -104,11 +95,7 @@ function createPosition(match: RegExpMatchArray, context: ProcessContext) {
 }
 
 // 创建文本节点
-function createTextToken(
-  value: string,
-  match: RegExpMatchArray,
-  context: ProcessContext
-): Tokens {
+function createTextToken(value: string, match: RegExpMatchArray, context: ProcessContext): Tokens {
   return {
     type: "text",
     value,
@@ -120,7 +107,7 @@ function createTextToken(
 function createStandardToken(
   type: TokenTypeVal,
   match: RegExpMatchArray,
-  context: ProcessContext
+  context: ProcessContext,
 ): Tokens {
   const innerContent = match.groups?.content || match[1];
   const innerOffset =
@@ -131,11 +118,7 @@ function createStandardToken(
 
   return {
     type,
-    children: context.parseInlineElements(
-      innerContent,
-      context.index,
-      innerOffset
-    ),
+    children: context.parseInlineElements(innerContent, context.index, innerOffset),
     position: createPosition(match, context),
   };
 }
@@ -148,10 +131,7 @@ function findNextMatch(line: string, offset: number) {
     const match = line.slice(offset).match(pattern.regex);
     if (!match) continue;
 
-    if (
-      !bestMatch ||
-      (match.index ?? Infinity) < (bestMatch.match.index ?? Infinity)
-    ) {
+    if (!bestMatch || (match.index ?? Infinity) < (bestMatch.match.index ?? Infinity)) {
       bestMatch = { type, match };
     }
   }
@@ -162,7 +142,7 @@ function findNextMatch(line: string, offset: number) {
 export const parseInlineElements = (
   line: string,
   index: number,
-  currentOffset: number
+  currentOffset: number,
 ): Tokens[] => {
   let offset = 0;
   let children: Tokens[] = [];
-- 
Gitee