master

分支 (25)

标签 (9)

管理

管理

master

igie_model_dev

fix-vllm-0.17.0-qwen_vl

update_dependent

add_llm_2606_model

add_ixrt_2606_model

fix-igie-vit-mvitv2_base

fix-igie-conformer

fix-test-13639

fix_efficientnet_b1

hackthon

release/26.03

ixrt_model_dev

precommit

release/25.12

release/25.09

check_models_valid

release/25.06

release/25.03

4.1.2

26.03

25.12

25.09

25.06

25.03

24.12

24.09

24.06

24.03

deepsparkinference
/
models
/
nlp
/
llm
/
ernie-4.5-300b-a47b
/
fastdeploy
/
run_demo.py

# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from fastdeploy import LLM, SamplingParams

prompts = [
    "Hello, my name is",
    "The largest ocean is",
]

# sampling parameters
sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=256)

# load the model
llm = LLM(model="/home/paddle/ERNIE-4.5-300B-A47B-Paddle", tensor_parallel_size=4, max_model_len=8192, static_decode_blocks=0, quantization='wint8')

# Perform batch inference
outputs = llm.generate(prompts, sampling_params)

for output in outputs:
    prompt = output.prompt
    generated_text = output.outputs.text
    print(prompt, generated_text)