# geotools-wiki
**Repository Path**: lzugis15/geotools-wiki
## Basic Information
- **Project Name**: geotools-wiki
- **Description**: No description available
- **Primary Language**: Unknown
- **License**: Not specified
- **Default Branch**: master
- **Homepage**: None
- **GVP Project**: No
## Statistics
- **Stars**: 0
- **Forks**: 0
- **Created**: 2026-02-10
- **Last Updated**: 2026-02-13
## Categories & Tags
**Categories**: Uncategorized
**Tags**: None
## README
## package.json
```
{
"name": "playright",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"html-to-markdown": "^1.0.0",
"node-html-markdown": "^2.0.0",
"parse5": "^8.0.0",
"playwright": "^1.58.0",
"turndown": "^7.2.2",
"turndown-plugin-gfm": "^1.0.2",
"xmlserializer": "^0.6.1"
}
}
```
## crawler.js
```js
const { chromium } = require('playwright');
const TurndownService = require('turndown');
const { gfm, strikethrough, tables } = require('turndown-plugin-gfm');
const fs = require('fs').promises;
const path = require('path');
// 待处理的URL列表
const urls = [];
// 输出目录
const OUTPUT_DIR = path.join(__dirname, 'output');
const SVG_DIR = path.join(OUTPUT_DIR, 'svgs');
function addCustomRules(turndownServiceObj) {
// 删除线
turndownServiceObj.addRule('strikethrough', {
filter: ['del', 's', 'strike'],
replacement: (content) => `~~${content}~~`,
});
// 代码块
turndownServiceObj.addRule('pre', {
filter: ['pre'],
replacement: (content, node) => {
const code = node.querySelector('code');
let language = '';
if (node.getAttribute('lang')) {
language = node.getAttribute('lang');
} else if (code?.className) {
const langMatch = code.className.match(/language-(\S+)/);
language = langMatch?.[1] || '';
} else if (node.className) {
const mdFencesMatch = node.className.match(/md-fences|language-(\S+)/);
language = mdFencesMatch?.[1] || '';
}
let codeContent = code ? code.textContent.trim() : content.trim();
codeContent = codeContent.replace(/\\([^\\])/g, '$1');
language = language.toLowerCase().replace(/[^a-z0-9+#]+/g, '');
return `\`\`\`${language}\n${codeContent}\n\`\`\`\n`;
},
});
// 行内代码
turndownServiceObj.addRule('inlineCode', {
filter: (node) => node.nodeName === 'CODE' && node.parentNode?.nodeName !== 'PRE',
replacement: (content) => `\`${content}\``,
});
// 表格
turndownServiceObj.addRule('table', {
filter: 'table',
replacement: function (content, node) {
const table = node;
const rows = Array.from(table.rows);
const headers = Array.from(rows[0]?.cells || [])
.map((cell) => cell.textContent?.trim() || '')
.join(' | ');
const separator = Array.from(rows[0]?.cells || [])
.map(() => '---')
.join(' | ');
const data = rows
.slice(1)
.map((row) =>
Array.from(row.cells)
.map((cell) => cell.textContent?.trim() || '')
.join(' | '),
)
.join('\n');
return `\n| ${headers} |\n| ${separator} |\n${data ? `| ${data} |` : ''}\n\n`;
},
});
}
// 初始化 Turndown 服务
const turndownService = new TurndownService({
codeBlockStyle: 'fenced',
});
// 使用 GFM 插件 (支持表格、删除线等)
turndownService.use(gfm);
turndownService.use([tables, strikethrough]);
// 添加自定义规则
addCustomRules(turndownService);
// 确保目录存在
async function ensureDirectories() {
await fs.mkdir(OUTPUT_DIR, { recursive: true });
await fs.mkdir(SVG_DIR, { recursive: true });
console.log('✓ 输出目录已创建:', OUTPUT_DIR);
console.log('✓ SVG目录已创建:', SVG_DIR);
}
// 生成安全的文件名
function generateFileName(url, pageTitle) {
// 从URL路径获取最后一部分
const urlPath = new URL(url).pathname;
const lastSegment = urlPath.split('/').pop() || 'untitled';
// 清理文件名中的非法字符
const safeName = lastSegment.replace(/[<>:"/\\|?*]/g, '_');
return `${safeName}.md`;
}
// 提取SVG元素
async function extractAndSaveSVGs(page, dirName) {
const svgs = await page.evaluate(() => {
const proseElement = document.querySelector('.prose-custom');
if (!proseElement) return [];
// 只在 .prose-custom 范围内查找带 id 的 SVG
const svgElements = proseElement.querySelectorAll('svg[id]');
// return Array.from(svgElements).map((svg) => ({
return [svgElements[0]].map((svg) => ({
id: svg.id,
outerHTML: svg.outerHTML,
}));
});
const savedSvgs = [];
// 使用与md文件名一致的目录名创建子目录
const pageSvgDir = path.join(SVG_DIR, dirName);
await fs.mkdir(pageSvgDir, { recursive: true });
for (let i = 0; i < svgs.length; i++) {
const svg = svgs[i];
// 使用序号作为文件名
const svgFileName = `${i}.svg`;
const svgFilePath = path.join(pageSvgDir, svgFileName);
try {
// 格式化SVG代码
const formattedSvg = formatSVG(svg.outerHTML);
await fs.writeFile(svgFilePath, formattedSvg, 'utf-8');
savedSvgs.push({
id: svg.id,
fileName: svgFileName,
path: svgFilePath,
index: i,
});
console.log(` ✓ SVG已保存: ${dirName}/${svgFileName}`);
} catch (error) {
console.error(` ✗ 保存SVG失败 (${svg.id}):`, error.message);
}
}
return savedSvgs;
}
// 格式化SVG代码
function formatSVG(svgHtml) {
let source = svgHtml.trim()
// 规范化自闭合标签
// 只转换真正的自闭合元素(不应该包含内容的元素)
const alwaysSelfClosing = ['path', 'circle', 'rect', 'ellipse', 'line', 'polygon', 'polyline', 'use', 'image', 'marker', 'clipPath', 'pattern', 'mask', 'filter', 'symbol'];
for (const tag of alwaysSelfClosing) {
// 匹配 格式并转换为
const regex = new RegExp(`<(${tag})([^>]*?)><\\/\\1>`, 'gi');
source = source.replace(regex, '<$1$2 />');
}
source = source.replace(/
/g, '
')
// 不移除空格和换行,保留原始格式
return source;
}
// 处理单个URL
async function processUrl(browser, url, index, total) {
console.log(`\n[${index + 1}/${total}] 正在处理: ${url}`);
const context = await browser.newContext();
const page = await context.newPage();
try {
// 导航到页面
await page.goto(url, {
waitUntil: 'networkidle',
timeout: 60000,
});
// 等待 .prose-custom 元素加载
await page.waitForSelector('.prose-custom', {
timeout: 30000,
});
// 获取页面标题
const pageTitle = await page.title();
console.log(` ✓ 页面标题: ${pageTitle}`);
// 生成文件名(先获取用于SVG目录)
const fileName = generateFileName(url, pageTitle);
const mdBaseName = fileName.replace(/\.md$/, ''); // 去掉.md扩展名
// 获取 .prose-custom 元素
const proseElement = await page.locator('.prose-custom').first();
// 提取并保存SVG(使用与md文件名一致的目录)
const savedSvgs = await extractAndSaveSVGs(page, mdBaseName);
console.log(` ✓ 找到并保存了 ${savedSvgs.length} 个SVG文件`);
// 将HTML转换为Markdown
const proseHtml = await proseElement.evaluate((el) => el.outerHTML);
let markdownContent = turndownService.turndown(proseHtml);
// 保存Markdown文件
const filePath = path.join(OUTPUT_DIR, fileName);
await fs.writeFile(filePath, markdownContent, 'utf-8');
console.log(` ✓ Markdown已保存: ${fileName}`);
return {
url,
fileName,
filePath,
pageTitle,
// svgCount: savedSvgs.length,
// savedSvgs,
success: true,
};
} catch (error) {
console.error(` ✗ 处理失败:`, error.message);
return {
url,
error: error.message,
success: false,
};
} finally {
await context.close();
}
}
// 主函数
async function main() {
console.log('========================================');
console.log(' OpenLayers 文档爬虫');
console.log('========================================');
console.log(`\n共 ${urls.length} 个URL待处理\n`);
// 确保输出目录存在
await ensureDirectories();
// 启动浏览器
console.log('\n正在启动浏览器...');
const browser = await chromium.launch({
headless: true,
});
const results = {
successful: [],
failed: [],
};
// 依次处理每个URL
for (let i = 0; i < urls.length; i++) {
const result = await processUrl(browser, urls[i], i, urls.length);
if (result.success) {
results.successful.push(result);
} else {
results.failed.push(result);
}
// 在请求之间添加短暂延迟,避免对服务器造成压力
if (i < urls.length - 1) {
await new Promise((resolve) => setTimeout(resolve, 1000));
}
}
// 关闭浏览器
await browser.close();
// 输出统计信息
console.log('\n========================================');
console.log(' 爬取完成统计');
console.log('========================================');
console.log(`总URL数: ${urls.length}`);
console.log(`成功: ${results.successful.length}`);
console.log(`失败: ${results.failed.length}`);
if (results.failed.length > 0) {
console.log('\n失败的URL:');
results.failed.forEach((item) => {
console.log(` - ${item.url}: ${item.error}`);
});
}
// 保存结果摘要
const summaryPath = path.join(OUTPUT_DIR, 'summary.json');
await fs.writeFile(
summaryPath,
JSON.stringify(results, null, 2),
'utf-8'
);
console.log(`\n✓ 结果摘要已保存: ${summaryPath}`);
}
// 运行主函数
main().catch((error) => {
console.error('程序执行出错:', error);
process.exit(1);
});
```
## openlayers
```
'https://deepwiki.com/openlayers/openlayers/1-introduction-to-openlayers',
'https://deepwiki.com/openlayers/openlayers/2-core-architecture',
'https://deepwiki.com/openlayers/openlayers/2.1-map-component',
'https://deepwiki.com/openlayers/openlayers/2.2-view-system',
'https://deepwiki.com/openlayers/openlayers/2.3-layer-architecture',
'https://deepwiki.com/openlayers/openlayers/2.4-source-system',
'https://deepwiki.com/openlayers/openlayers/3-rendering-system',
'https://deepwiki.com/openlayers/openlayers/3.1-canvas-rendering-pipeline',
'https://deepwiki.com/openlayers/openlayers/3.2-webgl-rendering-system',
'https://deepwiki.com/openlayers/openlayers/4-interaction-system',
'https://deepwiki.com/openlayers/openlayers/4.1-event-processing-and-conditions',
'https://deepwiki.com/openlayers/openlayers/4.2-navigation-interactions',
'https://deepwiki.com/openlayers/openlayers/4.3-feature-editing-interactions',
'https://deepwiki.com/openlayers/openlayers/5-data-management',
'https://deepwiki.com/openlayers/openlayers/5.1-spatial-data-and-utilities',
'https://deepwiki.com/openlayers/openlayers/5.2-data-formats-and-parsing',
'https://deepwiki.com/openlayers/openlayers/5.3-tile-system',
'https://deepwiki.com/openlayers/openlayers/6-ui-components',
'https://deepwiki.com/openlayers/openlayers/6.1-controls-system',
'https://deepwiki.com/openlayers/openlayers/6.2-overlays-and-positioning',
'https://deepwiki.com/openlayers/openlayers/7-development-infrastructure',
'https://deepwiki.com/openlayers/openlayers/7.1-build-system-and-tooling',
'https://deepwiki.com/openlayers/openlayers/7.2-example-framework',
'https://deepwiki.com/openlayers/openlayers/7.3-api-evolution-and-versioning',
'https://deepwiki.com/openlayers/openlayers/8-advanced-topics',
'https://deepwiki.com/openlayers/openlayers/8.1-performance-and-optimization',
'https://deepwiki.com/openlayers/openlayers/8.2-integration-patterns',
```
## Cesium
```
'https://deepwiki.com/CesiumGS/cesium',
'https://deepwiki.com/CesiumGS/cesium/1.1-packages-and-components',
'https://deepwiki.com/CesiumGS/cesium/1.2-applications-and-examples',
'https://deepwiki.com/CesiumGS/cesium/1.3-external-service-integration',
'https://deepwiki.com/CesiumGS/cesium/2-core-architecture',
'https://deepwiki.com/CesiumGS/cesium/2.1-scene-graph',
'https://deepwiki.com/CesiumGS/cesium/2.2-camera-system',
'https://deepwiki.com/CesiumGS/cesium/2.3-event-handling',
'https://deepwiki.com/CesiumGS/cesium/3-rendering-system',
'https://deepwiki.com/CesiumGS/cesium/3.1-webgl-integration',
'https://deepwiki.com/CesiumGS/cesium/3.2-primitives',
'https://deepwiki.com/CesiumGS/cesium/3.3-materials-and-appearance',
'https://deepwiki.com/CesiumGS/cesium/3.4-shadows-and-lighting',
'https://deepwiki.com/CesiumGS/cesium/4-3d-tiles',
'https://deepwiki.com/CesiumGS/cesium/4.1-tileset-structure-and-loading',
'https://deepwiki.com/CesiumGS/cesium/4.2-content-types',
'https://deepwiki.com/CesiumGS/cesium/4.3-styling',
'https://deepwiki.com/CesiumGS/cesium/4.4-testing-and-utilities',
'https://deepwiki.com/CesiumGS/cesium/4.5-performance-optimization',
'https://deepwiki.com/CesiumGS/cesium/5-globe-surface',
'https://deepwiki.com/CesiumGS/cesium/5.1-quadtree-system',
'https://deepwiki.com/CesiumGS/cesium/5.2-terrain',
'https://deepwiki.com/CesiumGS/cesium/5.3-imagery-layers',
'https://deepwiki.com/CesiumGS/cesium/5.4-atmosphere-effects',
'https://deepwiki.com/CesiumGS/cesium/6-data-sources',
'https://deepwiki.com/CesiumGS/cesium/6.1-czml',
'https://deepwiki.com/CesiumGS/cesium/6.2-kml',
'https://deepwiki.com/CesiumGS/cesium/6.3-geojson',
'https://deepwiki.com/CesiumGS/cesium/6.4-entity-api',
'https://deepwiki.com/CesiumGS/cesium/7-development-and-tooling',
'https://deepwiki.com/CesiumGS/cesium/7.1-build-system',
'https://deepwiki.com/CesiumGS/cesium/7.2-testing-framework',
'https://deepwiki.com/CesiumGS/cesium/7.3-sandcastle',
'https://deepwiki.com/CesiumGS/cesium/7.4-contributing',
```