main

分支 (15)

标签 (34)

管理

管理

main

bump-version

better-lmdeploy-list

0.5.1

plora

torch-radix-tree2

not-skip-special-token

triton-server

pytorch-poc

v0.0.12-rc1

v0.0.11-rc1

check-inf

revert-370-simplify_compile

v0.0.8-rc1

v0.0.2

v0.6.0a0

v0.5.3

v0.5.2.post1

v0.5.2

v0.5.1

v0.5.0

v0.4.2

v0.4.1

v0.4.0

v0.3.0

v0.2.6

v0.2.5

v0.2.4

v0.2.3

v0.2.2

v0.2.1

v0.2.0

v0.1.0

v0.1.0a2

v0.1.0a1

lmdeploy
/
benchmark
/
benchmark_pytorch_engine_a100.sh

#!/bin/bash

dataset_path="benchmark/ShareGPT_V3_unfiltered_cleaned_split.json"
########################################## PyTorch engine: fp16 or bf16 ##########################################
## 7B
tp=1
max_batch_size=256
cache_max_entry_count=0.95
model_path="/workspace/models-140/llama2/huggingface/llama-2-7b-chat"
CUDA_VISIBLE_DEVICES="6" python3 benchmark/profile_throughput.py ${dataset_path} ${model_path} --backend pytorch --tp ${tp} --concurrency ${max_batch_size} --cache-max-entry-count ${cache_max_entry_count} --csv llama2_pt_7b_thr.csv

## 13B
tp=1
max_batch_size=256
cache_max_entry_count=0.9
model_path="/workspace/models-140/llama2/huggingface/llama-2-13b-chat"
CUDA_VISIBLE_DEVICES="6" python3 benchmark/profile_throughput.py ${dataset_path} ${model_path} --backend pytorch --tp ${tp} --concurrency ${max_batch_size} --cache-max-entry-count ${cache_max_entry_count} --csv llama2_pt_7b_thr.csv

# 20B
tp=2
max_batch_size=256
cache_max_entry_count=0.9
model_path="/workspace/models-140/InternLM/internlm-chat-20b"
CUDA_VISIBLE_DEVICES="5,6" python3 benchmark/profile_throughput.py ${dataset_path} ${model_path}  --backend pytorch --tp ${tp} --concurrency ${max_batch_size} --cache-max-entry-count ${cache_max_entry_count} --csv llama2_pt_7b_thr.csv

# 70B
tp=4
max_batch_size=256
cache_max_entry_count=0.9
model_path="/workspace/models-140/llama2/huggingface/llama-2-70b-chat-hf"
CUDA_VISIBLE_DEVICES="4,5,6,7" python3 benchmark/profile_throughput.py ${dataset_path} ${model_path}  --backend pytorch --tp ${tp} --concurrency ${max_batch_size} --cache-max-entry-count ${cache_max_entry_count} --csv llama2_pt_7b_thr.csv

########################################## PyTorch engine: w8a8 ##########################################