diff --git a/multimodal/diffusion/stable-diffusion/training/IMG/res.png b/multimodal/diffusion/stable-diffusion/training/IMG/res.png new file mode 100644 index 0000000000000000000000000000000000000000..28855d8cc45f2d9aa76ed63815184314cf62efb4 Binary files /dev/null and b/multimodal/diffusion/stable-diffusion/training/IMG/res.png differ diff --git a/multimodal/diffusion/stable-diffusion/training/README.md b/multimodal/diffusion/stable-diffusion/training/README.md index 5692785588a419aca16bf35a0591df54bb57464c..89f70e6fe62d7a9899af775a0ba2b9306758039b 100755 --- a/multimodal/diffusion/stable-diffusion/training/README.md +++ b/multimodal/diffusion/stable-diffusion/training/README.md @@ -11,16 +11,7 @@ Stable Diffusion is a text-to-image latent diffusion model created by the resear pip3 install -r requirements.txt ``` -### Download - -```bash -$ wget http://10.150.9.95/swapp/datasets/multimodal/stable_diffusion/pokemon-images.zip -$ unzip pokemon-images.zip -$ wget http://10.150.9.95/swapp/pretrained/multimodal/stable-diffusion/stable-diffusion-v1-4.zip -$ unzip stable-diffusion-v1-4.zip -``` - -## Train +## train ### step 1 使用accelerate初始化训练环境 @@ -42,7 +33,22 @@ multi-gpu ### step 2 开始训练 ```bash -accelerate launch --mixed_precision="fp16" train_text_to_image.py --pretrained_model_name_or_path=./stable-diffusion-v1-4 --use_ema --resolution=512 --center_crop --random_flip --train_batch_size=1 --gradient_accumulation_steps=4 --gradient_checkpointing --max_train_steps=15000 --learning_rate=1e-05 --max_grad_norm=1 --lr_scheduler="constant" --lr_warmup_steps=0 --output_dir="sd-pokemon-model" --caption_column 'additional_feature' --train_data_dir pokemon-images/datasets/images/train +export MODEL_NAME="CompVis/stable-diffusion-v1-4" +export dataset_name="lambdalabs/pokemon-blip-captions" + +accelerate launch --mixed_precision="fp16" train_text_to_image.py \ + --pretrained_model_name_or_path=$MODEL_NAME \ + --dataset_name=$dataset_name \ + --use_ema \ + --resolution=512 --center_crop --random_flip \ + --train_batch_size=1 \ + --gradient_accumulation_steps=4 \ + --gradient_checkpointing \ + --max_train_steps=15000 \ + --learning_rate=1e-05 \ + --max_grad_norm=1 \ + --lr_scheduler="constant" --lr_warmup_steps=0 \ + --output_dir="sd-pokemon-model" ``` ## Test @@ -50,6 +56,5 @@ accelerate launch --mixed_precision="fp16" train_text_to_image.py --pretrained_ python3 test.py ``` prompt:A pokemon with green eyes and red legs - -## Result -![image](IMG/pokemon.png) +result: +![image](IMG/res.png)