diff --git a/docs/pytorch/solutions/finetune/instruction_finetune.md b/docs/pytorch/solutions/finetune/instruction_finetune.md index 3f68217b0c586873151f1031d5797e3d41ebd8aa..ac78ca3597f1744f3e041d26ef703eb039de9a58 100644 --- a/docs/pytorch/solutions/finetune/instruction_finetune.md +++ b/docs/pytorch/solutions/finetune/instruction_finetune.md @@ -137,7 +137,7 @@ bash examples/mcore/qwen3/ckpt_convert_qwen3_hf2mcore.sh source /usr/local/Ascend/ascend-toolkit/set_env.sh # 修改为真实的ascend-toolkit路径 ...... --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet # 原始数据集路径 ---tokenizer-name-or-path ./mdoel_from_hf/qwen3_hf # HF的tokenizer路径 +--tokenizer-name-or-path ./model_from_hf/qwen3_hf # HF的tokenizer路径 --output-prefix ./finetune_dataset/alpaca # 保存路径 ...... ``` diff --git a/docs/pytorch/solutions/pretrain/pretrain.md b/docs/pytorch/solutions/pretrain/pretrain.md index 80de3f40e3187a60499f351d1628cf7c113f7adf..4e5039a8c572099d197eb299661ea3d5005b066b 100644 --- a/docs/pytorch/solutions/pretrain/pretrain.md +++ b/docs/pytorch/solutions/pretrain/pretrain.md @@ -40,7 +40,7 @@ source /usr/local/Ascend/nnal/atb/set_env.sh # 以具体的nnal路径为主 source /usr/local/Ascend/ascend-toolkit/set_env.sh # 修改为真实的ascend-toolkit路径 ...... --input ./dataset/train-00000-of-00042-d964455e17e96d5a.parquet # 原始数据集路径 ---tokenizer-name-or-path ./mdoel_from_hf/qwen3_hf # HF的tokenizer路径 +--tokenizer-name-or-path ./model_from_hf/qwen3_hf # HF的tokenizer路径 --output-prefix ./finetune_dataset/alpaca # 保存路径 ...... ``` diff --git a/docs/pytorch/solutions/pretrain/pretrain_eod.md b/docs/pytorch/solutions/pretrain/pretrain_eod.md index 1521696d2298453e6c43fe26d20a8f6e23aa649b..4ddfeef6264f33043a515404d10103d2b5583a05 100644 --- a/docs/pytorch/solutions/pretrain/pretrain_eod.md +++ b/docs/pytorch/solutions/pretrain/pretrain_eod.md @@ -30,7 +30,7 @@ source /usr/local/Ascend/nnal/atb/set_env.sh # 以具体的nnal路径为主 source /usr/local/Ascend/ascend-toolkit/set_env.sh # 修改为真实的ascend-toolkit路径 ...... --input ./dataset/train-00000-of-00042-d964455e17e96d5a.parquet # 原始数据集路径 ---tokenizer-name-or-path ./mdoel_from_hf/qwen3_hf # HF的tokenizer路径 +--tokenizer-name-or-path ./model_from_hf/qwen3_hf # HF的tokenizer路径 --output-prefix ./finetune_dataset/alpaca # 保存路径 --append-eod # 添加此参数开启pack模式数据预处理 ...... diff --git a/examples/mcore/qwen3/data_convert_qwen3_instruction.sh b/examples/mcore/qwen3/data_convert_qwen3_instruction.sh index 2dda0766e9db1bf03942e724e1cf48f785da21ee..e46fc8bc67ab110fe1150facdfcb0541574f4697 100644 --- a/examples/mcore/qwen3/data_convert_qwen3_instruction.sh +++ b/examples/mcore/qwen3/data_convert_qwen3_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/qwen3_hf/ \ + --tokenizer-name-or-path ./model_from_hf/qwen3_hf/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/examples/mcore/qwen3_moe/data_convert_qwen3_moe_instruction.sh b/examples/mcore/qwen3_moe/data_convert_qwen3_moe_instruction.sh index 14d39b128918821802987dd29dec951a202ad311..5958407b1ae1916b5ba4187419fc4bae54a69269 100644 --- a/examples/mcore/qwen3_moe/data_convert_qwen3_moe_instruction.sh +++ b/examples/mcore/qwen3_moe/data_convert_qwen3_moe_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/qwen3_moe_hf/ \ + --tokenizer-name-or-path ./model_from_hf/qwen3_moe_hf/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/examples/mcore/qwen3_moe/data_convert_qwen3_moe_instruction_pack.sh b/examples/mcore/qwen3_moe/data_convert_qwen3_moe_instruction_pack.sh index 7928ab093d91a5371dcdaf7390ea11d69b1fcccb..00ed92cb3f84e8adbe47966d07f8dc18c47c7f04 100644 --- a/examples/mcore/qwen3_moe/data_convert_qwen3_moe_instruction_pack.sh +++ b/examples/mcore/qwen3_moe/data_convert_qwen3_moe_instruction_pack.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/qwen3_moe_hf/ \ + --tokenizer-name-or-path ./model_from_hf/qwen3_moe_hf/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/examples/mcore/qwen3_next/data_convert_qwen3_next_instruction.sh b/examples/mcore/qwen3_next/data_convert_qwen3_next_instruction.sh index 843ae07d1252239840f5db2659c77ed89e007a2a..dda4f1b8289ef384262259526793947cde35d7a0 100644 --- a/examples/mcore/qwen3_next/data_convert_qwen3_next_instruction.sh +++ b/examples/mcore/qwen3_next/data_convert_qwen3_next_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/qwen3_next_hf/ \ + --tokenizer-name-or-path ./model_from_hf/qwen3_next_hf/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/examples/mcore/seed_oss/data_convert_seed_oss_instruction.sh b/examples/mcore/seed_oss/data_convert_seed_oss_instruction.sh index c148c41fc6a0e07be8344ff038a637f86ea16d67..309cc988dfad3e98d8ee4224c795fa573a2c14bd 100644 --- a/examples/mcore/seed_oss/data_convert_seed_oss_instruction.sh +++ b/examples/mcore/seed_oss/data_convert_seed_oss_instruction.sh @@ -4,7 +4,7 @@ mkdir ./dataset/seed_oss_finetune_dataset_cpm python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Seed-OSS-36B-Base_hf \ + --tokenizer-name-or-path ./model_from_hf/Seed-OSS-36B-Base_hf \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/examples/mcore/seed_oss/data_convert_seed_oss_pretrain.sh b/examples/mcore/seed_oss/data_convert_seed_oss_pretrain.sh index 7282f501e01622f0e1685b032d25137fc6720738..9f54f46de0e20b6264c270049ffe06ba7116842e 100644 --- a/examples/mcore/seed_oss/data_convert_seed_oss_pretrain.sh +++ b/examples/mcore/seed_oss/data_convert_seed_oss_pretrain.sh @@ -4,7 +4,7 @@ mkdir ./dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Seed-OSS-36B-Base_hf \ + --tokenizer-name-or-path ./model_from_hf/Seed-OSS-36B-Base_hf \ --output-prefix ./finetune_dataset/alpaca \ --tokenizer-type PretrainedFromHF \ --workers 4 \ diff --git a/examples/mindspore/qwen3/data_convert_qwen3_instruction.sh b/examples/mindspore/qwen3/data_convert_qwen3_instruction.sh index 2dda0766e9db1bf03942e724e1cf48f785da21ee..e46fc8bc67ab110fe1150facdfcb0541574f4697 100644 --- a/examples/mindspore/qwen3/data_convert_qwen3_instruction.sh +++ b/examples/mindspore/qwen3/data_convert_qwen3_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/qwen3_hf/ \ + --tokenizer-name-or-path ./model_from_hf/qwen3_hf/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/mindspeed_llm/features_manager/finetune/finetune.py b/mindspeed_llm/features_manager/finetune/finetune.py index a04e05ed6fa75f14cba36c7b2926a5b5083970c3..04923a4921de8783f5fc1ef3f78eb1499d84742b 100644 --- a/mindspeed_llm/features_manager/finetune/finetune.py +++ b/mindspeed_llm/features_manager/finetune/finetune.py @@ -1,3 +1,4 @@ +import warnings from argparse import ArgumentParser from mindspeed.features_manager.feature import MindSpeedFeature @@ -27,6 +28,9 @@ class FinetuneFeature(MindSpeedFeature): def pre_validate_args(self, args): self.origin_variable_seq_lengths = None if args.variable_seq_lengths: + if args.log_throughput: + args.log_throughput = False + warnings.warn("In variable-seq-lengths mode, accurate TFLOPS cannot be calculated, set --log-throughput to False.", RuntimeWarning) self.origin_variable_seq_lengths = args.variable_seq_lengths args.variable_seq_lengths = False diff --git a/tests/0day/qwen3/ckpt_convert_qwen3_hf2mcore.sh b/tests/0day/qwen3/ckpt_convert_qwen3_hf2mcore.sh index 5dc6efe55cac6dea8d48852958f54bcc8e9484a3..27020458afa5c209984bca221e5aae2ff0064f4e 100644 --- a/tests/0day/qwen3/ckpt_convert_qwen3_hf2mcore.sh +++ b/tests/0day/qwen3/ckpt_convert_qwen3_hf2mcore.sh @@ -8,9 +8,9 @@ python convert_ckpt.py \ --save-model-type mg \ --target-tensor-parallel-size 4 \ --target-pipeline-parallel-size 2 \ - --load-dir ./mdoel_from_hf/Qwen3-1.7B-Base/ \ + --load-dir ./model_from_hf/Qwen3-1.7B-Base/ \ --save-dir ./model_weight/Qwen3-1.7B-mcore \ - --tokenizer-model ./mdoel_from_hf/Qwen3-1.7B-Base/tokenizer.json \ + --tokenizer-model ./model_from_hf/Qwen3-1.7B-Base/tokenizer.json \ --model-type-hf qwen3 \ --params-dtype bf16 \ --spec mindspeed_llm.tasks.models.spec.qwen3_spec layer_spec \ No newline at end of file diff --git a/tests/0day/qwen3/data_convert_qwen3_pretrain.sh b/tests/0day/qwen3/data_convert_qwen3_pretrain.sh index 55ac7aa55ff01f7f3d9be11ab7c6f7283c5967ad..e42fc75926ad6fb57641e764d291d089cbae0079 100644 --- a/tests/0day/qwen3/data_convert_qwen3_pretrain.sh +++ b/tests/0day/qwen3/data_convert_qwen3_pretrain.sh @@ -4,7 +4,7 @@ mkdir ./dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-1.7B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-1.7B-Base/ \ --tokenizer-type PretrainedFromHF \ --handler-name GeneralPretrainHandler \ --output-prefix ./dataset/enwiki \ diff --git a/tests/0day/qwen3/qwen3-0.6b/ckpt_convert_qwen3_0point6b_hf2mcore.sh b/tests/0day/qwen3/qwen3-0.6b/ckpt_convert_qwen3_0point6b_hf2mcore.sh index bc0490af1359edcf8c719f7e284cd6ed4e2b4ee5..6820c606406da726bfdac86d62bfdbe561b45c16 100644 --- a/tests/0day/qwen3/qwen3-0.6b/ckpt_convert_qwen3_0point6b_hf2mcore.sh +++ b/tests/0day/qwen3/qwen3-0.6b/ckpt_convert_qwen3_0point6b_hf2mcore.sh @@ -5,7 +5,7 @@ python convert_ckpt.py \ --save-model-type mg \ --target-tensor-parallel-size 1 \ --target-pipeline-parallel-size 1 \ - --load-dir ./mdoel_from_hf/Qwen3-0.6B-Base/ \ + --load-dir ./model_from_hf/Qwen3-0.6B-Base/ \ --save-dir ./model_weights/Qwen3-0.6B-mcore \ --tokenizer-model ./model_from_hf/Qwen3-0.6B-Base/tokenizer.json \ --model-type-hf qwen3 \ diff --git a/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_instruction.sh b/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_instruction.sh index e4ecd4e4c14f9d549dcbb98869d9b17173dd6a9d..48c251f5f52b0c318616968afcacecc68c335646 100644 --- a/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_instruction.sh +++ b/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-0.6B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-0.6B-Base/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_pretrain.sh b/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_pretrain.sh index dfa1028ab943bc60b13f5307737116f60417c644..0ac2162f99d580bd592fd5192bb35cd6c4d1086f 100644 --- a/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_pretrain.sh +++ b/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_pretrain.sh @@ -4,7 +4,7 @@ mkdir ./dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-0.6B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-0.6B-Base/ \ --tokenizer-type PretrainedFromHF \ --handler-name GeneralPretrainHandler \ --output-prefix ./dataset/enwiki \ diff --git a/tests/0day/qwen3/qwen3-1.7b/ckpt_convert_qwen3_1point7b_hf2mcore.sh b/tests/0day/qwen3/qwen3-1.7b/ckpt_convert_qwen3_1point7b_hf2mcore.sh index 7fb4565b8d5900b537e2803743f561112caaa3e3..fc7c39258d154e9a979e94e03c18a6760b000068 100644 --- a/tests/0day/qwen3/qwen3-1.7b/ckpt_convert_qwen3_1point7b_hf2mcore.sh +++ b/tests/0day/qwen3/qwen3-1.7b/ckpt_convert_qwen3_1point7b_hf2mcore.sh @@ -5,7 +5,7 @@ python convert_ckpt.py \ --save-model-type mg \ --target-tensor-parallel-size 1 \ --target-pipeline-parallel-size 1 \ - --load-dir ./mdoel_from_hf/Qwen3-1.7B-Base/ \ + --load-dir ./model_from_hf/Qwen3-1.7B-Base/ \ --save-dir ./model_weights/Qwen3-1.7B-mcore \ --tokenizer-model ./model_from_hf/Qwen3-1.7B-Base/tokenizer.json \ --model-type-hf qwen3 \ diff --git a/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_instruction.sh b/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_instruction.sh index 4bd69c7faea173c77e787b5183a679cb89d71922..e7ac95e11143f9a634d1b20f61f684c1ff2019bd 100644 --- a/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_instruction.sh +++ b/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-1.7B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-1.7B-Base/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_pretrain.sh b/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_pretrain.sh index fd0577451c7a8810ad8836d8b4d4b3c45840f9e1..f7ad51b47271aa91cd4b269bab2e798da3f420cb 100644 --- a/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_pretrain.sh +++ b/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_pretrain.sh @@ -4,7 +4,7 @@ mkdir ./dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-1.7B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-1.7B-Base/ \ --tokenizer-type PretrainedFromHF \ --handler-name GeneralPretrainHandler \ --output-prefix ./dataset/enwiki \ diff --git a/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_instruction.sh b/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_instruction.sh index b388060df77e4e96391657e58880d60f4ba8c8d5..4e5276783c2346f0add4b1e9525a6d3b4341ec0d 100644 --- a/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_instruction.sh +++ b/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-14B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-14B-Base/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_pretrain.sh b/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_pretrain.sh index 5a82bc2638c3aaf5d0ccec1deeb614a7357996ea..ef146a177246fe3d5b92437c31c2a48a9bea7114 100644 --- a/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_pretrain.sh +++ b/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_pretrain.sh @@ -4,7 +4,7 @@ mkdir ./dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-14B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-14B-Base/ \ --tokenizer-type PretrainedFromHF \ --handler-name GeneralPretrainHandler \ --output-prefix ./dataset/enwiki \ diff --git a/tests/0day/qwen3/qwen3-30b-a3b/data_convert_qwen3_a3b_instruction.sh b/tests/0day/qwen3/qwen3-30b-a3b/data_convert_qwen3_a3b_instruction.sh index 5191f756122eb3b13519d3c2e2c70726d6553bfa..0c42e7f2af71393988db6df2d2ea4c82e38f3867 100644 --- a/tests/0day/qwen3/qwen3-30b-a3b/data_convert_qwen3_a3b_instruction.sh +++ b/tests/0day/qwen3/qwen3-30b-a3b/data_convert_qwen3_a3b_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/qwen3_a3b_hf/ \ + --tokenizer-name-or-path ./model_from_hf/qwen3_a3b_hf/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_instruction.sh b/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_instruction.sh index f72cf82f4b63b03ce7132b7c63061ac2332f18a9..fdc8c68705081ba42a57787f3994a889e144c1c1 100644 --- a/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_instruction.sh +++ b/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-4B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-4B-Base/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_pretrain.sh b/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_pretrain.sh index 7e03701a27e018980f8795fb5cce5b18e8814766..17f5bf12841d99463f7529d6e6d4227a92f07fae 100644 --- a/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_pretrain.sh +++ b/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_pretrain.sh @@ -4,7 +4,7 @@ mkdir ./dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00042-d964455e17e96d5a.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-4B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-4B-Base/ \ --tokenizer-type PretrainedFromHF \ --handler-name GeneralPretrainHandler \ --output-prefix ./dataset/enwiki \ diff --git a/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_instruction.sh b/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_instruction.sh index e3a528b303f96636e8cdcf518241c0fa5be2fe1c..a06e71ee6d4c0be16abc567be3310ffe3fa8d473 100644 --- a/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_instruction.sh +++ b/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-8B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-8B-Base/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_pretrain.sh b/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_pretrain.sh index bda76b554a02d09b1974fadfe6e80a951c4eceff..e4f2d200f76d02fb2f572cfbbd473ee18261e7fb 100644 --- a/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_pretrain.sh +++ b/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_pretrain.sh @@ -4,7 +4,7 @@ mkdir ./dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00042-d964455e17e96d5a.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-8B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-8B-Base/ \ --tokenizer-type PretrainedFromHF \ --handler-name GeneralPretrainHandler \ --output-prefix ./dataset/enwiki \