diff --git a/Dockerfile b/Dockerfile index e03585e6406609bb155a49becd935f0cfbdbefc1..a59fac91471a3e653e9f820c0dd879581716f0bd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,16 +11,14 @@ RUN apt-get update && \ curl \ && rm -rf /var/lib/apt/lists/* +# 安装uv包管理器 +RUN pip install --no-cache-dir uv + # 复制项目依赖文件 COPY pyproject.toml uv.lock ./ -# 安装uv包管理器并安装Python依赖(在同一层完成) -RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ - export PATH="/root/.local/bin:$PATH" && \ - uv sync --frozen - -# 设置PATH环境变量 -ENV PATH="/root/.local/bin:${PATH}" +# 安装Python依赖 +RUN uv sync --frozen # 复制应用代码 COPY app/ ./app/ diff --git a/README.md b/README.md index ab07f42f845126fd51a8169ffaff37771a044b65..7af3a277cf6f2ca9e6bb14586e8b0334ea512db7 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,11 @@ - 支持多种文档格式:PDF、Word、Excel、PowerPoint、图片、HTML 等 - 提供简单的 REST API 接口 +- Web 可视化界面(文件上传、转换、管理) - 自动保存源文件和转换结果 - 使用 UUID 避免文件名冲突 - 数据持久化存储 +- 深色模式自动跟随系统 - Docker 一键部署 ## 快速开始 @@ -16,7 +18,7 @@ ### 使用 Docker(推荐) ```bash -# 启动服务 +# 启动服务(包含后端 API 和前端 Web) docker-compose up -d # 查看日志 @@ -26,9 +28,11 @@ docker-compose logs -f docker-compose down ``` -服务启动后访问:http://localhost:8300 +服务启动后访问: -API 文档:http://localhost:8300/docs +- **Web 界面**:http://localhost:8310 +- **API 服务**:http://localhost:8300 +- **API 文档**:http://localhost:8300/docs 或 http://localhost:8300/redoc ### 本地开发 @@ -120,6 +124,8 @@ curl -X POST "http://localhost:8300/convert/url?url=https://example.com" | jq '. | HTML | `.html`, `.htm` | | 文本 | `.txt`, `.json`, `.xml` 等 | +**文件大小限制**:单个文件最大 50MB + ## 数据存储 所有文件存储在 `data` 目录: @@ -148,7 +154,23 @@ Docker 部署时,`data` 目录会自动映射到宿主机,确保数据持久 ```yaml ports: - - "8300:8300" # 宿主机端口:容器端口 + - "8300:8300" # 后端 API 端口 + - "8310:8310" # 前端 Web 端口 +``` + +### 文件上传大小限制 + +默认最大上传 50MB,如需修改,编辑 `web/nginx.conf`: + +```nginx +# 设置最大上传文件大小 +client_max_body_size 100m; # 修改为你需要的大小 +``` + +修改后需要重新构建: + +```bash +docker-compose up -d --build ``` ### 时区配置 @@ -165,14 +187,38 @@ environment: - TZ=America/New_York # 修改为纽约时区 ``` +## 项目结构 + +``` +markitdown-api/ +├── app/ # 后端 API 服务 +│ ├── main.py # FastAPI 应用 +│ ├── routes.py # API 路由 +│ ├── database.py # 数据库操作 +│ └── ... +├── web/ # 前端 Web 界面 +│ ├── src/ # React 源码 +│ ├── Dockerfile # 前端镜像 +│ └── nginx.conf # Nginx 配置 +├── data/ # 数据存储(持久化) +├── docker-compose.yml # Docker 编排 +└── README.md +``` + ## 技术栈 +### 后端 - FastAPI - 高性能 Web 框架 - MarkItDown - Microsoft 开源的文档转换工具 - SQLite - 轻量级数据库 -- Docker - 容器化部署 - Python 3.12 - 运行环境 +### 前端 +- React 19 + TypeScript +- Vite - 构建工具 +- shadcn/ui + Tailwind CSS - UI 组件 +- Nginx - Web 服务器 + ## 常见问题 ### 1. 如何修改时区? diff --git a/app/config.py b/app/config.py index 4666d834f04852ac6212a01f9889e6053bc8106b..8dc1a766de7dd15edf93a1c23f3eed295b93c3c3 100644 --- a/app/config.py +++ b/app/config.py @@ -17,5 +17,5 @@ DB_PATH = str(DATA_DIR / "file_registry.db") # API信息 API_TITLE = "MarkItDown API" -API_VERSION = "0.1.0" +API_VERSION = "1.0.0" API_DESCRIPTION = "API服务用于将各种文档格式转换为Markdown" diff --git a/app/database.py b/app/database.py index 1221f13444f27bb89790db48ad25486308ad8792..b65329c706de6168e40019efcb79f1aa432c18b3 100644 --- a/app/database.py +++ b/app/database.py @@ -53,3 +53,10 @@ async def get_all_files(): async with db.execute("SELECT * FROM files ORDER BY created_at DESC") as cursor: rows = await cursor.fetchall() return [dict(row) for row in rows] + + +async def delete_file_record(file_id: str): + """删除文件记录""" + async with aiosqlite.connect(DB_PATH) as db: + await db.execute("DELETE FROM files WHERE file_id = ?", (file_id,)) + await db.commit() diff --git a/app/routes.py b/app/routes.py index 148e84d916a3955a08b82a77666ea48a482917e8..b4c613d1e230aa5b1db0840256aa8c86eb452393 100644 --- a/app/routes.py +++ b/app/routes.py @@ -8,13 +8,14 @@ import uuid import shutil from app.config import FILES_DIR, API_VERSION, DATA_DIR -from app.database import add_file_record, get_file_record, get_all_files +from app.database import add_file_record, get_file_record, get_all_files, delete_file_record from app.schemas import ( ConvertResponse, ConvertSaveResponse, ConvertUrlResponse, FilesListResponse, - FileInfo + FileInfo, + DeleteResponse ) from app.utils import get_utc_now, convert_utc_to_local @@ -250,3 +251,31 @@ async def convert_url_to_markdown(url: str): status_code=500, detail=f"URL转换失败: {str(e)}" ) + + +@router.delete("/files/{file_id}", response_model=DeleteResponse) +async def delete_file(file_id: str): + """ + 删除指定的文件记录及其相关文件 + """ + file_info = await get_file_record(file_id) + + if not file_info: + raise HTTPException( + status_code=404, + detail=f"文件ID '{file_id}' 不存在" + ) + + # 删除文件目录 + file_dir = FILES_DIR / file_id + if file_dir.exists(): + shutil.rmtree(file_dir) + + # 删除数据库记录 + await delete_file_record(file_id) + + return DeleteResponse( + success=True, + file_id=file_id, + message=f"文件 '{file_info['original_filename']}' 已成功删除" + ) diff --git a/app/schemas.py b/app/schemas.py index a3d298dc34d498cd75188e3d98fe8a7510ccd0f8..110c4e0d17c4a5027a251599e75548fb75716c67 100644 --- a/app/schemas.py +++ b/app/schemas.py @@ -42,3 +42,10 @@ class FileInfo(BaseModel): class FilesListResponse(BaseModel): """文件列表响应""" files: list[FileInfo] = Field(description="文件列表") + + +class DeleteResponse(BaseModel): + """删除响应""" + success: bool = Field(default=True, description="删除是否成功") + file_id: str = Field(description="被删除的文件ID") + message: str = Field(description="删除结果消息") diff --git a/docker-compose.yml b/docker-compose.yml index 880bab137f10983b8b95fb622bb0008eb2f51273..4f31021bc9c30cddad0713a6f7ff592843f2d568 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,3 +18,27 @@ services: timeout: 10s retries: 3 start_period: 40s + networks: + - markitdown-network + + markitdown-web: + build: ./web + image: markitdown-web:latest + container_name: markitdown-web + ports: + - "8310:8310" + depends_on: + - markitdown-api + restart: unless-stopped + healthcheck: + test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8310/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s + networks: + - markitdown-network + +networks: + markitdown-network: + driver: bridge diff --git a/pyproject.toml b/pyproject.toml index 4fedb04a5a78b57da2eaf7b15f46360186c08cff..e0ed8e1bc1ac3ad5d6ce04c13705df9bd217981c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "markitdown-api" -version = "0.1.0" +version = "1.0.0" description = "为 markitdown 提供标准的 api 服务" readme = "README.md" requires-python = ">=3.12" diff --git a/web/.dockerignore b/web/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..45e24fd026f6ba757b8afb9e43e297d743760f33 --- /dev/null +++ b/web/.dockerignore @@ -0,0 +1,14 @@ +node_modules +dist +.git +.gitignore +README.md +.env +.env.local +.env.*.local +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* +.DS_Store +*.local diff --git a/web/.gitignore b/web/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..a547bf36d8d11a4f89c59c144f24795749086dd1 --- /dev/null +++ b/web/.gitignore @@ -0,0 +1,24 @@ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* +lerna-debug.log* + +node_modules +dist +dist-ssr +*.local + +# Editor directories and files +.vscode/* +!.vscode/extensions.json +.idea +.DS_Store +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? diff --git a/web/Dockerfile b/web/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..48613dca61041ddcecd2b298ab75dc404670ae17 --- /dev/null +++ b/web/Dockerfile @@ -0,0 +1,31 @@ +# 构建阶段 +FROM node:20-alpine AS builder + +WORKDIR /app + +# 复制依赖文件 +COPY package*.json ./ + +# 安装依赖 +RUN npm ci + +# 复制源代码 +COPY . . + +# 构建应用 +RUN npm run build + +# 生产阶段 +FROM nginx:alpine + +# 复制构建产物到 nginx +COPY --from=builder /app/dist /usr/share/nginx/html + +# 复制 nginx 配置 +COPY nginx.conf /etc/nginx/conf.d/default.conf + +# 暴露端口 +EXPOSE 8310 + +# 启动 nginx +CMD ["nginx", "-g", "daemon off;"] diff --git a/web/README.md b/web/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0be616fa32688c94e68874d143c81c07221bbb52 --- /dev/null +++ b/web/README.md @@ -0,0 +1,103 @@ +# MarkItDown Web + +MarkItDown API 服务的前端 Web 界面,提供文件上传转换和文件管理功能。 + +## 技术栈 + +- **框架**: React 19 + TypeScript +- **构建工具**: Vite +- **UI 组件**: shadcn/ui + Tailwind CSS +- **图标**: Lucide React + +## 快速开始 + +### 安装依赖 + +```bash +npm install +``` + +### 开发模式 + +```bash +npm run dev +``` + +访问 http://localhost:5173 + +### 构建生产版本 + +```bash +npm run build +``` + +### Docker 部署 + +```bash +# 在项目根目录使用 docker-compose +cd .. +docker-compose up -d + +# 或单独构建前端镜像 +docker build -t markitdown-web:latest . +docker run -d -p 8310:8310 --name markitdown-web markitdown-web:latest +``` + +访问 http://localhost:8310 + +## 主要功能 + +- **上传转换**:支持拖拽上传、实时转换、一键复制、快速跳转文件列表 +- **文件管理**:查看文件列表、下载源文件/MD文件、删除文件、文件高亮定位 +- **文件大小**:支持最大 50MB 文件上传 + +## 配置说明 + +### 修改上传文件大小限制 + +默认最大上传 50MB,如需修改,编辑 `nginx.conf`: + +```nginx +# 设置最大上传文件大小 +client_max_body_size 100m; # 修改为你需要的大小 +``` + +修改后需要重新构建 Docker 镜像。 + +## API 代理配置 + +开发环境下,前端请求 `/api/*` 会自动代理到后端服务 `http://localhost:8300`。 + +配置位于 `vite.config.ts`: + +```typescript +server: { + proxy: { + '/api': { + target: 'http://localhost:8300', + changeOrigin: true, + rewrite: (path) => path.replace(/^\/api/, ''), + }, + }, +} +``` + +## 项目结构 + +``` +web/ +├── src/ +│ ├── api/ # API 客户端 +│ ├── components/ # UI 组件 +│ │ ├── ui/ # shadcn/ui 组件 +│ │ └── theme-provider.tsx +│ ├── pages/ # 页面组件 +│ │ ├── Upload.tsx # 上传转换页 +│ │ └── FileList.tsx # 文件列表页 +│ ├── types/ # TypeScript 类型定义 +│ ├── App.tsx # 主应用 +│ └── main.tsx # 入口文件 +├── index.html +├── vite.config.ts +└── package.json +``` diff --git a/web/components.json b/web/components.json new file mode 100644 index 0000000000000000000000000000000000000000..2b0833f0977df39fe315f4da463ce4cf2754815f --- /dev/null +++ b/web/components.json @@ -0,0 +1,22 @@ +{ + "$schema": "https://ui.shadcn.com/schema.json", + "style": "new-york", + "rsc": false, + "tsx": true, + "tailwind": { + "config": "", + "css": "src/index.css", + "baseColor": "neutral", + "cssVariables": true, + "prefix": "" + }, + "iconLibrary": "lucide", + "aliases": { + "components": "@/components", + "utils": "@/lib/utils", + "ui": "@/components/ui", + "lib": "@/lib", + "hooks": "@/hooks" + }, + "registries": {} +} diff --git a/web/eslint.config.js b/web/eslint.config.js new file mode 100644 index 0000000000000000000000000000000000000000..21a7e861b5449c1046fd162ee919a2a418a33f72 --- /dev/null +++ b/web/eslint.config.js @@ -0,0 +1,29 @@ +import js from '@eslint/js' +import globals from 'globals' +import reactHooks from 'eslint-plugin-react-hooks' +import reactRefresh from 'eslint-plugin-react-refresh' +import tseslint from 'typescript-eslint' +import { defineConfig, globalIgnores } from 'eslint/config' + +export default defineConfig([ + globalIgnores(['dist']), + { + files: ['**/*.{ts,tsx}'], + extends: [ + js.configs.recommended, + tseslint.configs.recommended, + reactHooks.configs['recommended-latest'], + reactRefresh.configs.vite, + ], + languageOptions: { + ecmaVersion: 2020, + globals: globals.browser, + }, + rules: { + 'react-refresh/only-export-components': [ + 'warn', + { allowExportNames: ['useTheme'] }, + ], + }, + }, +]) diff --git a/web/index.html b/web/index.html new file mode 100644 index 0000000000000000000000000000000000000000..0e172788cf79675cc6fd5353cf80865623b18ff3 --- /dev/null +++ b/web/index.html @@ -0,0 +1,16 @@ + + + +
+ + + ++ 文档转 Markdown 转换服务 +
+{markdown}
+