qwen2-function-calling-test
/
RunInstruct.txt

CUDA_VISIBLE_DEVICES=1  python -m vllm.entrypoints.openai.api_server --model /sie/workspaces/gongshutao/qwen/Qwen1.5-7B-Chat --gpu-memory-utilization 0.3
CUDA_VISIBLE_DEVICES=1 nohup python -m vllm.entrypoints.openai.api_server --model /sie/workspaces/gongshutao/qwen/Qwen1.5-7B-Chat --gpu-memory-utilization 0.8 > output.log 2>&1 &

scp -r gongshutao@192.168.174.10:/sie/workspaces/gongshutao/QWenFunctionCalling /sie/workspaces/gongshutao

CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server \
    --model /sie/workspaces/gongshutao/qwen/Qwen2-7B-Instruct \
    --tensor-parallel-size 2 \
    --gpu-memory-utilization 0.1 \
    --max-model-len 8192


nohup python -m vllm.entrypoints.openai.api_server \
    --model /sie/workspaces/gongshutao/qwen/Qwen2-7B-Instruct \
    --gpu-memory-utilization 0.3 \
    --max-model-len 8192 > output.log 2>&1 &


python -m vllm.entrypoints.openai.api_server \
    --model /sie/workspaces/gongshutao/qwen/Qwen2-72B-Instruct-GPTQ-Int4 \
    --quantization gptq \
    --enforce-eager \
    --gpu-memory-utilization 0

# 后台执行test.sh文件，将标准日志输出到output.log文件中，将错误日志也输出到output.log文件中
nohup python functioncall_evalution.py > eval.log 2>&1 &
tail -f output.log
tail -f eval.log


CUDA_VISIBLE_DEVICES=0 nohup python -m vllm.entrypoints.openai.api_server --model /sie/workspaces/gongshutao/qwen/Qwen2-7B-Instruct --enforce-eager --port 8085 --gpu-memory-utilization 0.5  > output.log 2>&1 &
PID 5193


CUDA_VISIBLE_DEVICES=1 API_PORT=8085 llamafactory-cli api --model_name_or_path /sie/workspaces/gongshutao/qwen/Qwen2-7B-Instruct --template qwen --infer_backend vllm --max_new_tokens 2048 --vllm_maxlen 32768 --vllm_enforce_eager --vllm_gpu_util 0.5

python openai_api.py -c /sie/workspaces/gongshutao/qwen/Qwen2-7B-Instruct --server-port 8085