# geotools-wiki **Repository Path**: lzugis15/geotools-wiki ## Basic Information - **Project Name**: geotools-wiki - **Description**: No description available - **Primary Language**: Unknown - **License**: Not specified - **Default Branch**: master - **Homepage**: None - **GVP Project**: No ## Statistics - **Stars**: 0 - **Forks**: 0 - **Created**: 2026-02-10 - **Last Updated**: 2026-02-13 ## Categories & Tags **Categories**: Uncategorized **Tags**: None ## README ## package.json ``` { "name": "playright", "version": "1.0.0", "description": "", "main": "index.js", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" }, "keywords": [], "author": "", "license": "ISC", "dependencies": { "html-to-markdown": "^1.0.0", "node-html-markdown": "^2.0.0", "parse5": "^8.0.0", "playwright": "^1.58.0", "turndown": "^7.2.2", "turndown-plugin-gfm": "^1.0.2", "xmlserializer": "^0.6.1" } } ``` ## crawler.js ```js const { chromium } = require('playwright'); const TurndownService = require('turndown'); const { gfm, strikethrough, tables } = require('turndown-plugin-gfm'); const fs = require('fs').promises; const path = require('path'); // 待处理的URL列表 const urls = []; // 输出目录 const OUTPUT_DIR = path.join(__dirname, 'output'); const SVG_DIR = path.join(OUTPUT_DIR, 'svgs'); function addCustomRules(turndownServiceObj) { // 删除线 turndownServiceObj.addRule('strikethrough', { filter: ['del', 's', 'strike'], replacement: (content) => `~~${content}~~`, }); // 代码块 turndownServiceObj.addRule('pre', { filter: ['pre'], replacement: (content, node) => { const code = node.querySelector('code'); let language = ''; if (node.getAttribute('lang')) { language = node.getAttribute('lang'); } else if (code?.className) { const langMatch = code.className.match(/language-(\S+)/); language = langMatch?.[1] || ''; } else if (node.className) { const mdFencesMatch = node.className.match(/md-fences|language-(\S+)/); language = mdFencesMatch?.[1] || ''; } let codeContent = code ? code.textContent.trim() : content.trim(); codeContent = codeContent.replace(/\\([^\\])/g, '$1'); language = language.toLowerCase().replace(/[^a-z0-9+#]+/g, ''); return `\`\`\`${language}\n${codeContent}\n\`\`\`\n`; }, }); // 行内代码 turndownServiceObj.addRule('inlineCode', { filter: (node) => node.nodeName === 'CODE' && node.parentNode?.nodeName !== 'PRE', replacement: (content) => `\`${content}\``, }); // 表格 turndownServiceObj.addRule('table', { filter: 'table', replacement: function (content, node) { const table = node; const rows = Array.from(table.rows); const headers = Array.from(rows[0]?.cells || []) .map((cell) => cell.textContent?.trim() || '') .join(' | '); const separator = Array.from(rows[0]?.cells || []) .map(() => '---') .join(' | '); const data = rows .slice(1) .map((row) => Array.from(row.cells) .map((cell) => cell.textContent?.trim() || '') .join(' | '), ) .join('\n'); return `\n| ${headers} |\n| ${separator} |\n${data ? `| ${data} |` : ''}\n\n`; }, }); } // 初始化 Turndown 服务 const turndownService = new TurndownService({ codeBlockStyle: 'fenced', }); // 使用 GFM 插件 (支持表格、删除线等) turndownService.use(gfm); turndownService.use([tables, strikethrough]); // 添加自定义规则 addCustomRules(turndownService); // 确保目录存在 async function ensureDirectories() { await fs.mkdir(OUTPUT_DIR, { recursive: true }); await fs.mkdir(SVG_DIR, { recursive: true }); console.log('✓ 输出目录已创建:', OUTPUT_DIR); console.log('✓ SVG目录已创建:', SVG_DIR); } // 生成安全的文件名 function generateFileName(url, pageTitle) { // 从URL路径获取最后一部分 const urlPath = new URL(url).pathname; const lastSegment = urlPath.split('/').pop() || 'untitled'; // 清理文件名中的非法字符 const safeName = lastSegment.replace(/[<>:"/\\|?*]/g, '_'); return `${safeName}.md`; } // 提取SVG元素 async function extractAndSaveSVGs(page, dirName) { const svgs = await page.evaluate(() => { const proseElement = document.querySelector('.prose-custom'); if (!proseElement) return []; // 只在 .prose-custom 范围内查找带 id 的 SVG const svgElements = proseElement.querySelectorAll('svg[id]'); // return Array.from(svgElements).map((svg) => ({ return [svgElements[0]].map((svg) => ({ id: svg.id, outerHTML: svg.outerHTML, })); }); const savedSvgs = []; // 使用与md文件名一致的目录名创建子目录 const pageSvgDir = path.join(SVG_DIR, dirName); await fs.mkdir(pageSvgDir, { recursive: true }); for (let i = 0; i < svgs.length; i++) { const svg = svgs[i]; // 使用序号作为文件名 const svgFileName = `${i}.svg`; const svgFilePath = path.join(pageSvgDir, svgFileName); try { // 格式化SVG代码 const formattedSvg = formatSVG(svg.outerHTML); await fs.writeFile(svgFilePath, formattedSvg, 'utf-8'); savedSvgs.push({ id: svg.id, fileName: svgFileName, path: svgFilePath, index: i, }); console.log(` ✓ SVG已保存: ${dirName}/${svgFileName}`); } catch (error) { console.error(` ✗ 保存SVG失败 (${svg.id}):`, error.message); } } return savedSvgs; } // 格式化SVG代码 function formatSVG(svgHtml) { let source = svgHtml.trim() // 规范化自闭合标签 // 只转换真正的自闭合元素(不应该包含内容的元素) const alwaysSelfClosing = ['path', 'circle', 'rect', 'ellipse', 'line', 'polygon', 'polyline', 'use', 'image', 'marker', 'clipPath', 'pattern', 'mask', 'filter', 'symbol']; for (const tag of alwaysSelfClosing) { // 匹配 格式并转换为 const regex = new RegExp(`<(${tag})([^>]*?)><\\/\\1>`, 'gi'); source = source.replace(regex, '<$1$2 />'); } source = source.replace(/
/g, '
') // 不移除空格和换行,保留原始格式 return source; } // 处理单个URL async function processUrl(browser, url, index, total) { console.log(`\n[${index + 1}/${total}] 正在处理: ${url}`); const context = await browser.newContext(); const page = await context.newPage(); try { // 导航到页面 await page.goto(url, { waitUntil: 'networkidle', timeout: 60000, }); // 等待 .prose-custom 元素加载 await page.waitForSelector('.prose-custom', { timeout: 30000, }); // 获取页面标题 const pageTitle = await page.title(); console.log(` ✓ 页面标题: ${pageTitle}`); // 生成文件名(先获取用于SVG目录) const fileName = generateFileName(url, pageTitle); const mdBaseName = fileName.replace(/\.md$/, ''); // 去掉.md扩展名 // 获取 .prose-custom 元素 const proseElement = await page.locator('.prose-custom').first(); // 提取并保存SVG(使用与md文件名一致的目录) const savedSvgs = await extractAndSaveSVGs(page, mdBaseName); console.log(` ✓ 找到并保存了 ${savedSvgs.length} 个SVG文件`); // 将HTML转换为Markdown const proseHtml = await proseElement.evaluate((el) => el.outerHTML); let markdownContent = turndownService.turndown(proseHtml); // 保存Markdown文件 const filePath = path.join(OUTPUT_DIR, fileName); await fs.writeFile(filePath, markdownContent, 'utf-8'); console.log(` ✓ Markdown已保存: ${fileName}`); return { url, fileName, filePath, pageTitle, // svgCount: savedSvgs.length, // savedSvgs, success: true, }; } catch (error) { console.error(` ✗ 处理失败:`, error.message); return { url, error: error.message, success: false, }; } finally { await context.close(); } } // 主函数 async function main() { console.log('========================================'); console.log(' OpenLayers 文档爬虫'); console.log('========================================'); console.log(`\n共 ${urls.length} 个URL待处理\n`); // 确保输出目录存在 await ensureDirectories(); // 启动浏览器 console.log('\n正在启动浏览器...'); const browser = await chromium.launch({ headless: true, }); const results = { successful: [], failed: [], }; // 依次处理每个URL for (let i = 0; i < urls.length; i++) { const result = await processUrl(browser, urls[i], i, urls.length); if (result.success) { results.successful.push(result); } else { results.failed.push(result); } // 在请求之间添加短暂延迟,避免对服务器造成压力 if (i < urls.length - 1) { await new Promise((resolve) => setTimeout(resolve, 1000)); } } // 关闭浏览器 await browser.close(); // 输出统计信息 console.log('\n========================================'); console.log(' 爬取完成统计'); console.log('========================================'); console.log(`总URL数: ${urls.length}`); console.log(`成功: ${results.successful.length}`); console.log(`失败: ${results.failed.length}`); if (results.failed.length > 0) { console.log('\n失败的URL:'); results.failed.forEach((item) => { console.log(` - ${item.url}: ${item.error}`); }); } // 保存结果摘要 const summaryPath = path.join(OUTPUT_DIR, 'summary.json'); await fs.writeFile( summaryPath, JSON.stringify(results, null, 2), 'utf-8' ); console.log(`\n✓ 结果摘要已保存: ${summaryPath}`); } // 运行主函数 main().catch((error) => { console.error('程序执行出错:', error); process.exit(1); }); ``` ## openlayers ``` 'https://deepwiki.com/openlayers/openlayers/1-introduction-to-openlayers', 'https://deepwiki.com/openlayers/openlayers/2-core-architecture', 'https://deepwiki.com/openlayers/openlayers/2.1-map-component', 'https://deepwiki.com/openlayers/openlayers/2.2-view-system', 'https://deepwiki.com/openlayers/openlayers/2.3-layer-architecture', 'https://deepwiki.com/openlayers/openlayers/2.4-source-system', 'https://deepwiki.com/openlayers/openlayers/3-rendering-system', 'https://deepwiki.com/openlayers/openlayers/3.1-canvas-rendering-pipeline', 'https://deepwiki.com/openlayers/openlayers/3.2-webgl-rendering-system', 'https://deepwiki.com/openlayers/openlayers/4-interaction-system', 'https://deepwiki.com/openlayers/openlayers/4.1-event-processing-and-conditions', 'https://deepwiki.com/openlayers/openlayers/4.2-navigation-interactions', 'https://deepwiki.com/openlayers/openlayers/4.3-feature-editing-interactions', 'https://deepwiki.com/openlayers/openlayers/5-data-management', 'https://deepwiki.com/openlayers/openlayers/5.1-spatial-data-and-utilities', 'https://deepwiki.com/openlayers/openlayers/5.2-data-formats-and-parsing', 'https://deepwiki.com/openlayers/openlayers/5.3-tile-system', 'https://deepwiki.com/openlayers/openlayers/6-ui-components', 'https://deepwiki.com/openlayers/openlayers/6.1-controls-system', 'https://deepwiki.com/openlayers/openlayers/6.2-overlays-and-positioning', 'https://deepwiki.com/openlayers/openlayers/7-development-infrastructure', 'https://deepwiki.com/openlayers/openlayers/7.1-build-system-and-tooling', 'https://deepwiki.com/openlayers/openlayers/7.2-example-framework', 'https://deepwiki.com/openlayers/openlayers/7.3-api-evolution-and-versioning', 'https://deepwiki.com/openlayers/openlayers/8-advanced-topics', 'https://deepwiki.com/openlayers/openlayers/8.1-performance-and-optimization', 'https://deepwiki.com/openlayers/openlayers/8.2-integration-patterns', ``` ## Cesium ``` 'https://deepwiki.com/CesiumGS/cesium', 'https://deepwiki.com/CesiumGS/cesium/1.1-packages-and-components', 'https://deepwiki.com/CesiumGS/cesium/1.2-applications-and-examples', 'https://deepwiki.com/CesiumGS/cesium/1.3-external-service-integration', 'https://deepwiki.com/CesiumGS/cesium/2-core-architecture', 'https://deepwiki.com/CesiumGS/cesium/2.1-scene-graph', 'https://deepwiki.com/CesiumGS/cesium/2.2-camera-system', 'https://deepwiki.com/CesiumGS/cesium/2.3-event-handling', 'https://deepwiki.com/CesiumGS/cesium/3-rendering-system', 'https://deepwiki.com/CesiumGS/cesium/3.1-webgl-integration', 'https://deepwiki.com/CesiumGS/cesium/3.2-primitives', 'https://deepwiki.com/CesiumGS/cesium/3.3-materials-and-appearance', 'https://deepwiki.com/CesiumGS/cesium/3.4-shadows-and-lighting', 'https://deepwiki.com/CesiumGS/cesium/4-3d-tiles', 'https://deepwiki.com/CesiumGS/cesium/4.1-tileset-structure-and-loading', 'https://deepwiki.com/CesiumGS/cesium/4.2-content-types', 'https://deepwiki.com/CesiumGS/cesium/4.3-styling', 'https://deepwiki.com/CesiumGS/cesium/4.4-testing-and-utilities', 'https://deepwiki.com/CesiumGS/cesium/4.5-performance-optimization', 'https://deepwiki.com/CesiumGS/cesium/5-globe-surface', 'https://deepwiki.com/CesiumGS/cesium/5.1-quadtree-system', 'https://deepwiki.com/CesiumGS/cesium/5.2-terrain', 'https://deepwiki.com/CesiumGS/cesium/5.3-imagery-layers', 'https://deepwiki.com/CesiumGS/cesium/5.4-atmosphere-effects', 'https://deepwiki.com/CesiumGS/cesium/6-data-sources', 'https://deepwiki.com/CesiumGS/cesium/6.1-czml', 'https://deepwiki.com/CesiumGS/cesium/6.2-kml', 'https://deepwiki.com/CesiumGS/cesium/6.3-geojson', 'https://deepwiki.com/CesiumGS/cesium/6.4-entity-api', 'https://deepwiki.com/CesiumGS/cesium/7-development-and-tooling', 'https://deepwiki.com/CesiumGS/cesium/7.1-build-system', 'https://deepwiki.com/CesiumGS/cesium/7.2-testing-framework', 'https://deepwiki.com/CesiumGS/cesium/7.3-sandcastle', 'https://deepwiki.com/CesiumGS/cesium/7.4-contributing', ```