代码拉取完成,页面将自动刷新
// Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <boost/interprocess/sync/scoped_lock.hpp>
#include "infer_request.h"
#include "pb_utils.h"
#ifdef TRITON_PB_STUB
#include "infer_response.h"
#include "pb_stub.h"
#endif
namespace triton { namespace backend { namespace python {
InferRequest::InferRequest(
const std::string& request_id, uint64_t correlation_id,
const std::vector<std::shared_ptr<PbTensor>>& inputs,
const std::vector<std::string>& requested_output_names,
const std::string& model_name, const int64_t model_version)
: request_id_(request_id), correlation_id_(correlation_id), inputs_(inputs),
requested_output_names_(requested_output_names), model_name_(model_name),
model_version_(model_version)
{
}
const std::vector<std::shared_ptr<PbTensor>>&
InferRequest::Inputs()
{
return inputs_;
}
const std::string&
InferRequest::RequestId()
{
return request_id_;
}
uint64_t
InferRequest::CorrelationId()
{
return correlation_id_;
}
const std::vector<std::string>&
InferRequest::RequestedOutputNames()
{
return requested_output_names_;
}
const std::string&
InferRequest::ModelName()
{
return model_name_;
}
int64_t
InferRequest::ModelVersion()
{
return model_version_;
}
void
InferRequest::SaveToSharedMemory(
std::unique_ptr<SharedMemory>& shm_pool, Request* request_shm)
{
request_shm->correlation_id = this->CorrelationId();
off_t id_offset;
SaveStringToSharedMemory(shm_pool, id_offset, this->RequestId().c_str());
request_shm->id = id_offset;
request_shm->requested_output_count = this->RequestedOutputNames().size();
off_t requested_output_names_offset;
off_t* requested_output_names;
shm_pool->Map(
(char**)&requested_output_names,
sizeof(off_t) * request_shm->requested_output_count,
requested_output_names_offset);
request_shm->requested_output_names = requested_output_names_offset;
size_t i = 0;
for (auto& requested_output_name : requested_output_names_) {
SaveStringToSharedMemory(
shm_pool, requested_output_names[i], requested_output_name.c_str());
i++;
}
request_shm->requested_input_count = this->Inputs().size();
request_shm->model_version = this->model_version_;
SaveStringToSharedMemory(
shm_pool, request_shm->model_name, this->model_name_.c_str());
}
std::unique_ptr<InferRequest>
InferRequest::LoadFromSharedMemory(
std::unique_ptr<SharedMemory>& shm_pool, off_t request_offset)
{
Request* request;
shm_pool->MapOffset((char**)&request, request_offset);
char* id = nullptr;
LoadStringFromSharedMemory(shm_pool, request->id, id);
uint32_t requested_input_count = request->requested_input_count;
std::vector<std::shared_ptr<PbTensor>> py_input_tensors;
for (size_t input_idx = 0; input_idx < requested_input_count; ++input_idx) {
std::shared_ptr<PbTensor> pb_input_tensor = PbTensor::LoadFromSharedMemory(
shm_pool, request->inputs + sizeof(Tensor) * input_idx);
py_input_tensors.emplace_back(std::move(pb_input_tensor));
}
std::vector<std::string> requested_output_names;
uint32_t requested_output_count = request->requested_output_count;
off_t* output_names;
shm_pool->MapOffset((char**)&output_names, request->requested_output_names);
for (size_t output_idx = 0; output_idx < requested_output_count;
++output_idx) {
char* output_name = nullptr;
LoadStringFromSharedMemory(shm_pool, output_names[output_idx], output_name);
requested_output_names.emplace_back(output_name);
}
char* model_name;
LoadStringFromSharedMemory(shm_pool, request->model_name, model_name);
return std::make_unique<InferRequest>(
id, request->correlation_id, std::move(py_input_tensors),
requested_output_names, model_name, request->model_version);
}
#ifdef TRITON_PB_STUB
std::unique_ptr<InferResponse>
InferRequest::Exec()
{
ResponseBatch* response_batch = nullptr;
bool responses_is_set = false;
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
std::unique_ptr<SharedMemory>& shm_pool = stub->GetSharedMemory();
try {
py::gil_scoped_release release;
std::unique_ptr<IPCMessage> ipc_message =
std::make_unique<IPCMessage>(shm_pool, true /* inline_response */);
bool has_exception = false;
PythonBackendException pb_exception(std::string{});
ipc_message->Command() =
PYTHONSTUB_CommandType::PYTHONSTUB_InferExecRequest;
RequestBatch* request_batch;
shm_pool->Map(
(char**)&request_batch, sizeof(RequestBatch), ipc_message->Args());
request_batch->batch_size = 1;
Request* request;
shm_pool->Map((char**)&request, sizeof(Request), request_batch->requests);
request->requested_input_count = this->Inputs().size();
Tensor* tensors;
bool has_gpu_tensor = false;
shm_pool->Map(
(char**)&tensors, sizeof(Tensor) * request->requested_input_count,
request->inputs);
size_t i = 0;
for (auto& input_tensor : inputs_) {
input_tensor->SaveToSharedMemory(
shm_pool, &tensors[i], true /* copy_cpu */, false /* copy_gpu */);
if (!input_tensor->IsCPU()) {
has_gpu_tensor = true;
}
++i;
}
SaveToSharedMemory(shm_pool, request);
{
bi::scoped_lock<bi::interprocess_mutex> lock{
*(ipc_message->ResponseMutex())};
stub->SendIPCMessage(ipc_message);
ipc_message->ResponseCondition()->wait(lock);
}
if (has_gpu_tensor) {
try {
for (auto& input_tensor : this->Inputs()) {
if (!input_tensor->IsCPU()) {
#ifdef TRITON_ENABLE_GPU
input_tensor->LoadGPUData(shm_pool, stub->GPULoadMutex());
#endif // TRITON_ENABLE_GPU
}
}
}
catch (const PythonBackendException& exception) {
// We need to catch the exception here. Otherwise, we will not notify
// the main process and it will wait for the resposne forever.
pb_exception = exception;
has_exception = true;
}
{
bi::scoped_lock<bi::interprocess_mutex> lock{
*(ipc_message->ResponseMutex())};
ipc_message->ResponseCondition()->notify_all();
ipc_message->ResponseCondition()->wait(lock);
}
}
// The exception will be thrown after the message was sent to the main
// process.
if (has_exception) {
throw pb_exception;
}
// Get the response for the current message.
std::unique_ptr<IPCMessage> bls_response = IPCMessage::LoadFromSharedMemory(
shm_pool, ipc_message->RequestOffset());
shm_pool->MapOffset((char**)&response_batch, bls_response->Args());
responses_is_set = true;
if (response_batch->has_error) {
if (response_batch->is_error_set) {
char* err_string;
LoadStringFromSharedMemory(shm_pool, response_batch->error, err_string);
return std::make_unique<InferResponse>(
std::vector<std::shared_ptr<PbTensor>>{},
std::make_shared<PbError>(err_string));
} else {
return std::make_unique<InferResponse>(
std::vector<std::shared_ptr<PbTensor>>{},
std::make_shared<PbError>(
"An error occurred while performing BLS request."));
}
}
}
catch (const PythonBackendException& pb_exception) {
return std::make_unique<InferResponse>(
std::vector<std::shared_ptr<PbTensor>>{},
std::make_shared<PbError>(pb_exception.what()));
}
if (responses_is_set) {
std::unique_ptr<InferResponse> infer_response =
InferResponse::LoadFromSharedMemory(
shm_pool, response_batch->responses);
return infer_response;
} else {
return std::make_unique<InferResponse>(
std::vector<std::shared_ptr<PbTensor>>{},
std::make_shared<PbError>(
"An error occurred while performing BLS request."));
}
}
#endif
}}} // namespace triton::backend::python
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。