diff --git a/.cppcheck.suppressions b/.cppcheck.suppressions index a9fb5ecb6d38579417109b85ba9badf84aa7fcf6..d1c685cdbfb51f9bc04f2cb7e9ec31d654b5228a 100644 --- a/.cppcheck.suppressions +++ b/.cppcheck.suppressions @@ -65,6 +65,7 @@ unknownMacro:ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_matrix unknownMacro:ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_x_like.cpp unknownMacro:ccsrc/lib/simulator/vector/detail/cpu_avx_double/cpu_vector_core_matrix_gate.cpp unknownMacro:ccsrc/lib/simulator/densitymatrix/detail/cpu_common/cpu_densitymatrix_core_matrix_gate.cpp +unknownMacro:ccsrc/include/simulator/vector/detail/cpu_vector_policy.hpp unreadVariable:ccsrc/lib/experimental/decompositions/time_evolution.cpp diff --git a/.jenkins/check/config/filter_cppcheck.txt b/.jenkins/check/config/filter_cppcheck.txt index 6da280ddf130b4ea13c4f3ca84ddf5abea8e3715..0024c9d1c5e4bc62c5a2d2af2a50f9eb77d20bc9 100644 --- a/.jenkins/check/config/filter_cppcheck.txt +++ b/.jenkins/check/config/filter_cppcheck.txt @@ -76,6 +76,7 @@ "mindquantum/ccsrc/lib/simulator/densitymatrix/detail/cpu_common/cpu_densitymatrix_core_swap_like.cpp" "unknownMacro" "mindquantum/ccsrc/lib/simulator/densitymatrix/detail/cpu_common/cpu_densitymatrix_core_gate_expect.cpp" "unknownMacro" "mindquantum/ccsrc/lib/simulator/densitymatrix/detail/cpu_common/cpu_densitymatrix_core_matrix_gate.cpp" "unknownMacro" +"mindquantum/ccsrc/include/simulator/vector/detail/cpu_vector_policy.hpp" "unknownMacro" "mindquantum/ccsrc/lib/experimental/decompositions/time_evolution.cpp" "unreadVariable" diff --git a/.jenkins/check/config/filter_cpplint.txt b/.jenkins/check/config/filter_cpplint.txt index e1192817e275aa6ab258254b9a627e2af12a877f..1b8445873c5d66732fd656577514543bae7ccacb 100644 --- a/.jenkins/check/config/filter_cpplint.txt +++ b/.jenkins/check/config/filter_cpplint.txt @@ -62,3 +62,6 @@ "mindquantum/ccsrc/include/simulator/alignedallocator.hpp" "whitespace/parens" "mindquantum/ccsrc/lib/math/operators/transform/bravyi_kitaev_superfast.cpp" "runtime/references" "mindquantum/ccsrc/python/math/lib/bind_math.cpp" "runtime/references" +"mindquantum/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_policy.cpp" "runtime/references" +"mindquantum/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_policy.cu" "runtime/references" +"mindquantum/ccsrc/include/simulator/vector/detail/cpu_vector_policy.hpp" "runtime/references" diff --git a/.jenkins/check/config/whitelizard.txt b/.jenkins/check/config/whitelizard.txt index 2e01ea1b1acb0e282782354b474d6d7ade3e571d..1a0eb7e93c328dd80c1fa0a3461299c8019b5803 100644 --- a/.jenkins/check/config/whitelizard.txt +++ b/.jenkins/check/config/whitelizard.txt @@ -39,6 +39,7 @@ mindquantum/ccsrc/lib/simulator/vector/detail/runtime/cmd.cpp:mindquantum::sim:: mindquantum/ccsrc/include/math/tensor/ops_cpu/advance_math.hpp:tensor::ops::cpu::ElementFunc mindquantum/mindquantum/core/parameterresolver/parameterresolver.py:__init__ mindquantum/ccsrc/python/math/lib/bind_math.cpp:BindQubitOperator +ccsrc/lib/simulator/vector/detail/cpu_avx_double/cpu_vector_core_gate_expect.cpp:mindquantum::sim::vector::detail::CPUVectorPolicyAvxDouble::ExpectDiffSingleQubitMatrix TEST_CASE mindquantum::sim::vector::detail::VectorState::ApplyGate diff --git a/.whitelizard.txt b/.whitelizard.txt index 503e7c94777c3b79e38e7b900dfbe4bc3b7c53c1..49b9e4dc1f56ef6b705e167b1cf49305db566d57 100644 --- a/.whitelizard.txt +++ b/.whitelizard.txt @@ -38,7 +38,7 @@ ccsrc/python/mqbackend/lib/terms_operators.cpp:init_terms_operators ccsrc/lib/simulator/vector/detail/runtime/cmd.cpp:mindquantum::sim::rt::cmd ccsrc/python/math/lib/bind_math.cpp:BindQubitOperator ccsrc/include/math/tensor/ops_cpu/advance_math.hpp:tensor::ops::cpu::ElementFunc - +ccsrc/lib/simulator/vector/detail/cpu_avx_double/cpu_vector_core_gate_expect.cpp:mindquantum::sim::vector::detail::CPUVectorPolicyAvxDouble::ExpectDiffSingleQubitMatrix TEST_CASE mindquantum::sim::vector::detail::VectorState::ApplyGate mindquantum::sim::vector::detail::VectorState::GetExpectationWithGradOneMulti diff --git a/ccsrc/include/core/sparse/algo.hpp b/ccsrc/include/core/sparse/algo.hpp index b1d0bc6bdf7b359ce5872dbe88423b9a13fa11f9..30747b0e63738c8770cbce5b27cb1d7b6abeb804 100644 --- a/ccsrc/include/core/sparse/algo.hpp +++ b/ccsrc/include/core/sparse/algo.hpp @@ -166,6 +166,29 @@ T2 *Csr_Dot_Vec(std::shared_ptr> a, T2 *vec) { return reinterpret_cast(new_vec); } +template +CT ExpectationOfCsr(std::shared_ptr> a, T2 *bra, T2 *ket) { + auto dim = a->dim_; + auto c_bra = reinterpret_cast>(bra); + auto c_ket = reinterpret_cast>(ket); + auto data = a->data_; + auto indptr = a->indptr_; + auto indices = a->indices_; + T2 res_real = 0, res_imag = 0; + THRESHOLD_OMP( + MQ_DO_PRAGMA(omp parallel for reduction(+:res_real, res_imag) schedule(static)), dim, 1UL << nQubitTh, + for (Index i = 0; i < dim; i++) { + CT sum = {0.0, 0.0}; + for (Index j = indptr[i]; j < indptr[i + 1]; j++) { + sum += data[j] * c_ket[indices[j]]; + } + auto tmp = std::conj(c_bra[i]) * sum; + res_real += std::real(tmp); + res_imag += std::imag(tmp); + }) + return {res_real, res_imag}; +} + template T2 *Csr_Dot_Vec(std::shared_ptr> a, std::shared_ptr> b, T2 *vec) { auto dim = a->dim_; @@ -192,5 +215,35 @@ T2 *Csr_Dot_Vec(std::shared_ptr> a, std::shared_ptr(new_vec); } + +template +CT ExpectationOfCsr(std::shared_ptr> a, std::shared_ptr> b, T2 *bra, T2 *ket) { + auto dim = a->dim_; + auto c_bra = reinterpret_cast>(bra); + auto c_ket = reinterpret_cast>(ket); + auto data = a->data_; + auto indptr = a->indptr_; + auto indices = a->indices_; + auto data_b = b->data_; + auto indptr_b = b->indptr_; + auto indices_b = b->indices_; + T2 res_real = 0, res_imag = 0; + + THRESHOLD_OMP( + MQ_DO_PRAGMA(omp parallel for reduction(+:res_real, res_imag) schedule(static)), dim, 1UL << nQubitTh, + for (Index i = 0; i < dim; i++) { + CT sum = {0.0, 0.0}; + for (Index j = indptr[i]; j < indptr[i + 1]; j++) { + sum += data[j] * c_ket[indices[j]]; + } + for (Index j = indptr_b[i]; j < indptr_b[i + 1]; j++) { + sum += data_b[j] * c_ket[indices_b[j]]; + } + auto tmp = std::conj(c_bra[i]) * sum; + res_real += std::real(tmp); + res_imag += std::imag(tmp); + }) + return {res_real, res_imag}; +} } // namespace mindquantum::sparse #endif // MINDQUANTUM_SPARSE_ALGO_H_ diff --git a/ccsrc/include/simulator/vector/detail/CPPLINT.cfg b/ccsrc/include/simulator/vector/detail/CPPLINT.cfg new file mode 100644 index 0000000000000000000000000000000000000000..cd2d10c42d0e3c2b31b0372b9f5fafeb934634a3 --- /dev/null +++ b/ccsrc/include/simulator/vector/detail/CPPLINT.cfg @@ -0,0 +1 @@ +filter=-runtime/reference diff --git a/ccsrc/include/simulator/vector/detail/cpu_vector_avx_double_policy.hpp b/ccsrc/include/simulator/vector/detail/cpu_vector_avx_double_policy.hpp index de70ac5645871d77e98d9c46d35ba184e2646f2e..4cde4e5fec7a27cb2bb369a6030dffe76c0107c7 100644 --- a/ccsrc/include/simulator/vector/detail/cpu_vector_avx_double_policy.hpp +++ b/ccsrc/include/simulator/vector/detail/cpu_vector_avx_double_policy.hpp @@ -54,9 +54,10 @@ namespace mindquantum::sim::vector::detail { struct CPUVectorPolicyAvxDouble : public CPUVectorPolicyBase { using gate_matrix_t = std::vector>>; - static void ApplySingleQubitMatrix(qs_data_p_t src, qs_data_p_t des, qbit_t obj_qubit, const qbits_t& ctrls, - const std::vector>& m, index_t dim); - static qs_data_t ExpectDiffSingleQubitMatrix(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, + static void ApplySingleQubitMatrix(const qs_data_p_t& src, qs_data_p_t* des_p, qbit_t obj_qubit, + const qbits_t& ctrls, const std::vector>& m, + index_t dim); + static qs_data_t ExpectDiffSingleQubitMatrix(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, const qbits_t& ctrls, const VVT& m, index_t dim); }; } // namespace mindquantum::sim::vector::detail diff --git a/ccsrc/include/simulator/vector/detail/cpu_vector_policy.hpp b/ccsrc/include/simulator/vector/detail/cpu_vector_policy.hpp index 36350ae3620fce1f0e2c2f714529ba232454e484..9eaf35a0d72ee6ff7b979d76687d53a49a5eb99e 100644 --- a/ccsrc/include/simulator/vector/detail/cpu_vector_policy.hpp +++ b/ccsrc/include/simulator/vector/detail/cpu_vector_policy.hpp @@ -52,129 +52,138 @@ struct CPUVectorPolicyBase { // ======================================================================================================== static qs_data_p_t InitState(index_t dim, bool zero_state = true); - static void Reset(qs_data_p_t qs, index_t dim); - static void FreeState(qs_data_p_t qs); - static void Display(qs_data_p_t qs, qbit_t n_qubits, qbit_t q_limit = 10); - static void SetToZeroExcept(qs_data_p_t qs, index_t ctrl_mask, index_t dim); + static void Reset(qs_data_p_t* qs_p, index_t dim); + static void FreeState(qs_data_p_t* qs_p); + static void Display(const qs_data_p_t& qs, qbit_t n_qubits, qbit_t q_limit = 10); + static void SetToZeroExcept(qs_data_p_t* qs_p, index_t ctrl_mask, index_t dim); template - static void ConditionalBinary(qs_data_p_t src, qs_data_p_t des, qs_data_t succ_coeff, qs_data_t fail_coeff, - index_t dim, const binary_op& op); - template - static void ConditionalBinary(qs_data_p_t src, qs_data_p_t des, index_t mask, index_t condi, qs_data_t succ_coeff, + static void ConditionalBinary(const qs_data_p_t& src, qs_data_p_t* des_p, qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim, const binary_op& op); - static void ConditionalAdd(qs_data_p_t src, qs_data_p_t des, index_t mask, index_t condi, qs_data_t succ_coeff, - qs_data_t fail_coeff, index_t dim); - static void ConditionalMinus(qs_data_p_t src, qs_data_p_t des, index_t mask, index_t condi, qs_data_t succ_coeff, - qs_data_t fail_coeff, index_t dim); - static void ConditionalMul(qs_data_p_t src, qs_data_p_t des, index_t mask, index_t condi, qs_data_t succ_coeff, - qs_data_t fail_coeff, index_t dim); - static void ConditionalDiv(qs_data_p_t src, qs_data_p_t des, index_t mask, index_t condi, qs_data_t succ_coeff, - qs_data_t fail_coeff, index_t dim); - static void QSMulValue(qs_data_p_t src, qs_data_p_t des, qs_data_t value, index_t dim); - static qs_data_t ConditionalCollect(qs_data_p_t qs, index_t mask, index_t condi, bool abs, index_t dim); - static VT GetQS(qs_data_p_t qs, index_t dim); - static void SetQS(qs_data_p_t qs, const VT& qs_out, index_t dim); - static qs_data_p_t ApplyTerms(qs_data_p_t qs, const std::vector>& ham, index_t dim); - static qs_data_p_t Copy(qs_data_p_t qs, index_t dim); + template + static void ConditionalBinary(const qs_data_p_t& src, qs_data_p_t* des_p, index_t mask, index_t condi, + qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim, const binary_op& op); + static void ConditionalAdd(const qs_data_p_t& src, qs_data_p_t* des_p, index_t mask, index_t condi, + qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim); + static void ConditionalMinus(const qs_data_p_t& src, qs_data_p_t* des_p, index_t mask, index_t condi, + qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim); + static void ConditionalMul(const qs_data_p_t& src, qs_data_p_t* des_p, index_t mask, index_t condi, + qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim); + static void ConditionalDiv(const qs_data_p_t& src, qs_data_p_t* des_p, index_t mask, index_t condi, + qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim); + static void QSMulValue(const qs_data_p_t& src, qs_data_p_t* des_p, qs_data_t value, index_t dim); + static qs_data_t ConditionalCollect(const qs_data_p_t& qs, index_t mask, index_t condi, bool abs, index_t dim); + static VT GetQS(const qs_data_p_t& qs, index_t dim); + static void SetQS(qs_data_p_t* qs, const VT& qs_out, index_t dim); + static qs_data_p_t ApplyTerms(qs_data_p_t* qs_p, const std::vector>& ham, index_t dim); + static py_qs_data_t ExpectationOfTerms(const qs_data_p_t& bra, const qs_data_p_t& ket, + const std::vector>& ham, index_t dim); + static qs_data_p_t Copy(const qs_data_p_t& qs, index_t dim); template - static py_qs_data_t ConditionVdot(qs_data_p_t bra, qs_data_p_t ket, index_t dim); - static py_qs_data_t OneStateVdot(qs_data_p_t bra, qs_data_p_t ket, qbit_t obj_qubit, index_t dim); - static py_qs_data_t ZeroStateVdot(qs_data_p_t bra, qs_data_p_t ket, qbit_t obj_qubit, index_t dim); - static py_qs_data_t Vdot(qs_data_p_t bra, qs_data_p_t ket, index_t dim); - static qs_data_p_t CsrDotVec(const std::shared_ptr>& a, qs_data_p_t vec, + static py_qs_data_t ConditionVdot(const qs_data_p_t& bra, const qs_data_p_t& ket_p, index_t dim); + static py_qs_data_t OneStateVdot(const qs_data_p_t& bra, const qs_data_p_t& ket, qbit_t obj_qubit, index_t dim); + static py_qs_data_t ZeroStateVdot(const qs_data_p_t& bra, const qs_data_p_t& ket, qbit_t obj_qubit, index_t dim); + static py_qs_data_t Vdot(const qs_data_p_t& bra, const qs_data_p_t& ket, index_t dim); + static qs_data_p_t CsrDotVec(const std::shared_ptr>& a, const qs_data_p_t& vec, index_t dim); static qs_data_p_t CsrDotVec(const std::shared_ptr>& a, - const std::shared_ptr>& b, qs_data_p_t vec, + const std::shared_ptr>& b, const qs_data_p_t& vec, index_t dim); + static py_qs_data_t ExpectationOfCsr(const std::shared_ptr>& a, + const qs_data_p_t& bra, const qs_data_p_t& ket, index_t dim); + static py_qs_data_t ExpectationOfCsr(const std::shared_ptr>& a, + const std::shared_ptr>& b, + const qs_data_p_t& bra, const qs_data_p_t& ket, index_t dim); // X like operator // ======================================================================================================== - static void ApplyXLike(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, qs_data_t v1, qs_data_t v2, + static void ApplyXLike(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, qs_data_t v1, qs_data_t v2, index_t dim); - static void ApplyX(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, index_t dim); - static void ApplyY(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, index_t dim); + static void ApplyX(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim); + static void ApplyY(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim); // Z like operator // ======================================================================================================== - static void ApplyZLike(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, qs_data_t val, index_t dim); - static void ApplyZ(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, index_t dim); + static void ApplyZLike(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, qs_data_t val, index_t dim); + static void ApplyZ(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim); // The crazy code spell check in CI do not allow apply s to name following API, even I set the filter file. - static void ApplySGate(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, index_t dim); - static void ApplyT(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, index_t dim); - static void ApplySdag(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, index_t dim); - static void ApplyTdag(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, index_t dim); - static void ApplyPS(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplySGate(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim); + static void ApplyT(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim); + static void ApplySdag(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim); + static void ApplyTdag(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim); + static void ApplyPS(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); // Single qubit operator // ======================================================================================================== - static void ApplySingleQubitMatrix(qs_data_p_t src, qs_data_p_t des, qbit_t obj_qubit, const qbits_t& ctrls, - const std::vector>& m, index_t dim); - static void ApplyTwoQubitsMatrix(qs_data_p_t src, qs_data_p_t des, const qbits_t& objs, const qbits_t& ctrls, - const std::vector>& m, index_t dim); - static void ApplyNQubitsMatrix(qs_data_p_t src, qs_data_p_t des, const qbits_t& objs, const qbits_t& ctrls, - const std::vector>& m, index_t dim); - static void ApplyMatrixGate(qs_data_p_t src, qs_data_p_t des, const qbits_t& objs, const qbits_t& ctrls, + static void ApplySingleQubitMatrix(const qs_data_p_t& src, qs_data_p_t* des_p, qbit_t obj_qubit, + const qbits_t& ctrls, const std::vector>& m, + index_t dim); + static void ApplyTwoQubitsMatrix(const qs_data_p_t& src, qs_data_p_t* des_p, const qbits_t& objs, + const qbits_t& ctrls, const std::vector>& m, + index_t dim); + static void ApplyNQubitsMatrix(const qs_data_p_t& src, qs_data_p_t* des_p, const qbits_t& objs, + const qbits_t& ctrls, const std::vector>& m, index_t dim); + static void ApplyMatrixGate(const qs_data_p_t& src, qs_data_p_t* des_p, const qbits_t& objs, const qbits_t& ctrls, const std::vector>& m, index_t dim); - static void ApplyH(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, index_t dim); - static void ApplyGP(qs_data_p_t qs, qbit_t obj_qubit, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplyH(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim); + static void ApplyGP(qs_data_p_t* qs_p, qbit_t obj_qubit, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); - static void ApplyRX(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplyRX(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); - static void ApplyRY(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplyRY(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); - static void ApplyRZ(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplyRZ(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); - static void ApplySWAP(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, index_t dim); - static void ApplyISWAP(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, bool daggered, index_t dim); - static void ApplyRxx(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplySWAP(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim); + static void ApplyISWAP(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, bool daggered, index_t dim); + static void ApplyRxx(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); - static void ApplyRyy(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplyRyy(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); - static void ApplyRzz(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplyRzz(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); - static void ApplyRxy(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplyRxy(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); - static void ApplyRxz(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplyRxz(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); - static void ApplyRyz(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplyRyz(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); // gate_expectation // ======================================================================================================== - static qs_data_t ExpectDiffSingleQubitMatrix(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, + static qs_data_t ExpectDiffSingleQubitMatrix(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, const qbits_t& ctrls, const VVT& m, index_t dim); - static qs_data_t ExpectDiffTwoQubitsMatrix(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, + static qs_data_t ExpectDiffTwoQubitsMatrix(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, const qbits_t& ctrls, const VVT& m, index_t dim); - static qs_data_t ExpectDiffNQubitsMatrix(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, + static qs_data_t ExpectDiffNQubitsMatrix(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, const qbits_t& ctrls, const VVT& m, index_t dim); - static qs_data_t ExpectDiffMatrixGate(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - const VVT& m, index_t dim); - static qs_data_t ExpectDiffRX(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); - static qs_data_t ExpectDiffRY(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); - static qs_data_t ExpectDiffRZ(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); - static qs_data_t ExpectDiffRxx(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); - static qs_data_t ExpectDiffRyy(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); - static qs_data_t ExpectDiffRzz(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); - static qs_data_t ExpectDiffRxy(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); - static qs_data_t ExpectDiffRxz(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); - static qs_data_t ExpectDiffRyz(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); - static qs_data_t ExpectDiffPS(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); - static qs_data_t ExpectDiffGP(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); + static qs_data_t ExpectDiffMatrixGate(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, const VVT& m, index_t dim); + static qs_data_t ExpectDiffRX(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); + static qs_data_t ExpectDiffRY(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); + static qs_data_t ExpectDiffRZ(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); + static qs_data_t ExpectDiffRxx(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); + static qs_data_t ExpectDiffRyy(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); + static qs_data_t ExpectDiffRzz(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); + static qs_data_t ExpectDiffRxy(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); + static qs_data_t ExpectDiffRxz(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); + static qs_data_t ExpectDiffRyz(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); + static qs_data_t ExpectDiffPS(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); + static qs_data_t ExpectDiffGP(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); static calc_type GroundStateOfZZs(const std::map& masks_value, qbit_t n_qubits); }; @@ -183,6 +192,9 @@ struct CastTo { static constexpr tensor::TDtype src_dtype = policy_src::dtype; static constexpr tensor::TDtype des_dtype = policy_des::dtype; static typename policy_des::qs_data_p_t cast(typename policy_src::qs_data_p_t qs, size_t dim) { + if (qs == nullptr) { + return nullptr; + } if constexpr (std::is_same_v) { return policy_des::Copy(qs, dim); } else { diff --git a/ccsrc/include/simulator/vector/detail/gpu_vector_policy.cuh b/ccsrc/include/simulator/vector/detail/gpu_vector_policy.cuh index babea8294e2b5e0bbf54dc7dd302f523705d49e1..b8a63f5d2058640f153fe0e057b3b4bddb2bcab8 100644 --- a/ccsrc/include/simulator/vector/detail/gpu_vector_policy.cuh +++ b/ccsrc/include/simulator/vector/detail/gpu_vector_policy.cuh @@ -43,130 +43,138 @@ struct GPUVectorPolicyBase { using py_qs_data_t = std::complex; using py_qs_datas_t = std::vector; static qs_data_p_t InitState(index_t dim, bool zero_state = true); - static void Reset(qs_data_p_t qs, index_t dim); - static void FreeState(qs_data_p_t qs); - static void Display(qs_data_p_t qs, qbit_t n_qubits, qbit_t q_limit = 10); - static void SetToZeroExcept(qs_data_p_t qs, index_t ctrl_mask, index_t dim); + static void Reset(qs_data_p_t* qs_p, index_t dim); + static void FreeState(qs_data_p_t* qs_p); + static void Display(const qs_data_p_t& qs, qbit_t n_qubits, qbit_t q_limit = 10); + static void SetToZeroExcept(qs_data_p_t* qs_p, index_t ctrl_mask, index_t dim); template - static void ConditionalBinary(qs_data_p_t src, qs_data_p_t des, qs_data_t succ_coeff, qs_data_t fail_coeff, - index_t dim, const binary_op& op); - template - static void ConditionalBinary(qs_data_p_t src, qs_data_p_t des, index_t mask, index_t condi, qs_data_t succ_coeff, + static void ConditionalBinary(const qs_data_p_t& src, qs_data_p_t* des_p, qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim, const binary_op& op); - static void ConditionalAdd(qs_data_p_t src, qs_data_p_t des, index_t mask, index_t condi, qs_data_t succ_coeff, - qs_data_t fail_coeff, index_t dim); - static void ConditionalMinus(qs_data_p_t src, qs_data_p_t des, index_t mask, index_t condi, qs_data_t succ_coeff, - qs_data_t fail_coeff, index_t dim); - static void ConditionalMul(qs_data_p_t src, qs_data_p_t des, index_t mask, index_t condi, qs_data_t succ_coeff, - qs_data_t fail_coeff, index_t dim); - static void ConditionalDiv(qs_data_p_t src, qs_data_p_t des, index_t mask, index_t condi, qs_data_t succ_coeff, - qs_data_t fail_coeff, index_t dim); - static void QSMulValue(qs_data_p_t src, qs_data_p_t des, qs_data_t value, index_t dim); - static qs_data_t ConditionalCollect(qs_data_p_t qs, index_t mask, index_t condi, bool abs, index_t dim); - static py_qs_datas_t GetQS(qs_data_p_t qs, index_t dim); - static void SetQS(qs_data_p_t qs, const py_qs_datas_t& qs_out, index_t dim); - static qs_data_p_t ApplyTerms(qs_data_p_t qs, const std::vector>& ham, index_t dim); - static qs_data_p_t Copy(qs_data_p_t qs, index_t dim); + template + static void ConditionalBinary(const qs_data_p_t& src, qs_data_p_t* des_p, index_t mask, index_t condi, + qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim, const binary_op& op); + static void ConditionalAdd(const qs_data_p_t& src, qs_data_p_t* des_p, index_t mask, index_t condi, + qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim); + static void ConditionalMinus(const qs_data_p_t& src, qs_data_p_t* des_p, index_t mask, index_t condi, + qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim); + static void ConditionalMul(const qs_data_p_t& src, qs_data_p_t* des_p, index_t mask, index_t condi, + qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim); + static void ConditionalDiv(const qs_data_p_t& src, qs_data_p_t* des_p, index_t mask, index_t condi, + qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim); + static void QSMulValue(const qs_data_p_t& src, qs_data_p_t* des_p, qs_data_t value, index_t dim); + static qs_data_t ConditionalCollect(const qs_data_p_t& qs, index_t mask, index_t condi, bool abs, index_t dim); + static py_qs_datas_t GetQS(const qs_data_p_t& qs, index_t dim); + static void SetQS(qs_data_p_t* qs_p, const py_qs_datas_t& qs_out, index_t dim); + static qs_data_p_t ApplyTerms(qs_data_p_t* qs_p, const std::vector>& ham, index_t dim); + static py_qs_data_t ExpectationOfTerms(const qs_data_p_t& bra, const qs_data_p_t& ket, + const std::vector>& ham, index_t dim); + static qs_data_p_t Copy(const qs_data_p_t& qs, index_t dim); template - static py_qs_data_t ConditionVdot(qs_data_p_t bra, qs_data_p_t ket, index_t dim); - static py_qs_data_t OneStateVdot(qs_data_p_t bra, qs_data_p_t ket, qbit_t obj_qubit, index_t dim); - static py_qs_data_t ZeroStateVdot(qs_data_p_t bra, qs_data_p_t ket, qbit_t obj_qubit, index_t dim); - static py_qs_data_t Vdot(qs_data_p_t bra, qs_data_p_t ket, index_t dim); - static qs_data_p_t CsrDotVec(const std::shared_ptr>& a, qs_data_p_t vec, + static py_qs_data_t ConditionVdot(const qs_data_p_t& bra, const qs_data_p_t& ket, index_t dim); + static py_qs_data_t OneStateVdot(const qs_data_p_t& bra, const qs_data_p_t& ket, qbit_t obj_qubit, index_t dim); + static py_qs_data_t ZeroStateVdot(const qs_data_p_t& bra, const qs_data_p_t& ket, qbit_t obj_qubit, index_t dim); + static py_qs_data_t Vdot(const qs_data_p_t& bra, const qs_data_p_t& ket, index_t dim); + static qs_data_p_t CsrDotVec(const std::shared_ptr>& a, const qs_data_p_t& vec, index_t dim); static qs_data_p_t CsrDotVec(const std::shared_ptr>& a, - const std::shared_ptr>& b, qs_data_p_t vec, + const std::shared_ptr>& b, const qs_data_p_t& vec, index_t dim); - + static py_qs_data_t ExpectationOfCsr(const std::shared_ptr>& a, + const qs_data_p_t& bra, const qs_data_p_t& ket, index_t dim); + static py_qs_data_t ExpectationOfCsr(const std::shared_ptr>& a, + const std::shared_ptr>& b, + const qs_data_p_t& bra, const qs_data_p_t& ket, index_t dim); // X like operator // ======================================================================================================== - static void ApplyXLike(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, qs_data_t v1, qs_data_t v2, + static void ApplyXLike(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, qs_data_t v1, qs_data_t v2, index_t dim); - static void ApplyX(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, index_t dim); - static void ApplyY(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, index_t dim); + static void ApplyX(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim); + static void ApplyY(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim); // Z like operator // ======================================================================================================== - static void ApplyZLike(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, qs_data_t val, index_t dim); - static void ApplyZ(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, index_t dim); - static void ApplySGate(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, index_t dim); - static void ApplyT(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, index_t dim); - static void ApplySdag(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, index_t dim); - static void ApplyTdag(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, index_t dim); - static void ApplyPS(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplyZLike(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, qs_data_t val, index_t dim); + static void ApplyZ(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim); + static void ApplySGate(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim); + static void ApplyT(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim); + static void ApplySdag(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim); + static void ApplyTdag(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim); + static void ApplyPS(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); // Single qubit operator // ======================================================================================================== - static void ApplySingleQubitMatrix(qs_data_p_t src, qs_data_p_t des, qbit_t obj_qubit, const qbits_t& ctrls, - const std::vector>& m, index_t dim); - static void ApplyTwoQubitsMatrix(qs_data_p_t src, qs_data_p_t des, const qbits_t& objs, const qbits_t& ctrls, - const std::vector>& m, index_t dim); - static void ApplyNQubitsMatrix(qs_data_p_t src, qs_data_p_t des, const qbits_t& objs, const qbits_t& ctrls, - const std::vector>& m, index_t dim); - static void ApplyMatrixGate(qs_data_p_t src, qs_data_p_t des, const qbits_t& objs, const qbits_t& ctrls, + static void ApplySingleQubitMatrix(const qs_data_p_t& src, qs_data_p_t* des_p, qbit_t obj_qubit, + const qbits_t& ctrls, const std::vector>& m, + index_t dim); + static void ApplyTwoQubitsMatrix(const qs_data_p_t& src, qs_data_p_t* des_p, const qbits_t& objs, + const qbits_t& ctrls, const std::vector>& m, + index_t dim); + static void ApplyNQubitsMatrix(const qs_data_p_t& src, qs_data_p_t* des_p, const qbits_t& objs, + const qbits_t& ctrls, const std::vector>& m, index_t dim); + static void ApplyMatrixGate(const qs_data_p_t& src, qs_data_p_t* des_p, const qbits_t& objs, const qbits_t& ctrls, const std::vector>& m, index_t dim); - static void ApplyH(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, index_t dim); - static void ApplyGP(qs_data_p_t qs, qbit_t obj_qubit, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplyH(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim); + static void ApplyGP(qs_data_p_t* qs_p, qbit_t obj_qubit, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); - static void ApplyRX(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplyRX(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); - static void ApplyRY(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplyRY(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); - static void ApplyRZ(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplyRZ(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); - static void ApplySWAP(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, index_t dim); - static void ApplyISWAP(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, bool daggered, index_t dim); - static void ApplyRxx(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplySWAP(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim); + static void ApplyISWAP(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, bool daggered, index_t dim); + static void ApplyRxx(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); - static void ApplyRyy(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplyRyy(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); - static void ApplyRzz(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplyRzz(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); - static void ApplyRxy(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplyRxy(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); - static void ApplyRxz(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplyRxz(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); - static void ApplyRyz(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, + static void ApplyRyz(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff = false); // gate_expec // ======================================================================================================== - static qs_data_t ExpectDiffSingleQubitMatrix(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, + static qs_data_t ExpectDiffSingleQubitMatrix(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, const qbits_t& ctrls, const std::vector& m, index_t dim); - static qs_data_t ExpectDiffTwoQubitsMatrix(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, + static qs_data_t ExpectDiffTwoQubitsMatrix(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, const qbits_t& ctrls, const std::vector& m, index_t dim); - static qs_data_t ExpectDiffNQubitsMatrix(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, + static qs_data_t ExpectDiffNQubitsMatrix(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, const qbits_t& ctrls, const std::vector& m, index_t dim); - static qs_data_t ExpectDiffMatrixGate(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - const std::vector& m, index_t dim); - static qs_data_t ExpectDiffRX(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); - static qs_data_t ExpectDiffRY(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); - static qs_data_t ExpectDiffRZ(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); - static qs_data_t ExpectDiffRxx(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); - static qs_data_t ExpectDiffRyy(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); - static qs_data_t ExpectDiffRzz(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); - static qs_data_t ExpectDiffRxy(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); - static qs_data_t ExpectDiffRxz(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); - static qs_data_t ExpectDiffRyz(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); - static qs_data_t ExpectDiffPS(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); - static qs_data_t ExpectDiffGP(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, const qbits_t& ctrls, - calc_type val, index_t dim); + static qs_data_t ExpectDiffMatrixGate(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, const std::vector& m, index_t dim); + static qs_data_t ExpectDiffRX(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); + static qs_data_t ExpectDiffRY(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); + static qs_data_t ExpectDiffRZ(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); + static qs_data_t ExpectDiffRxx(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); + static qs_data_t ExpectDiffRyy(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); + static qs_data_t ExpectDiffRzz(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); + static qs_data_t ExpectDiffRxy(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); + static qs_data_t ExpectDiffRxz(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); + static qs_data_t ExpectDiffRyz(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); + static qs_data_t ExpectDiffPS(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); + static qs_data_t ExpectDiffGP(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, + const qbits_t& ctrls, calc_type val, index_t dim); static calc_type GroundStateOfZZs(const std::map& masks_value, qbit_t n_qubits); }; @@ -209,6 +217,9 @@ struct CastTo { static constexpr tensor::TDtype src_dtype = policy_src::dtype; static constexpr tensor::TDtype des_dtype = policy_des::dtype; static typename policy_des::qs_data_p_t cast(typename policy_src::qs_data_p_t qs, size_t dim) { + if (qs == nullptr) { + return nullptr; + } if constexpr (std::is_same_v) { return policy_des::Copy(qs, dim); } diff --git a/ccsrc/include/simulator/vector/vector_state.hpp b/ccsrc/include/simulator/vector/vector_state.hpp index e6252f499608c77218d6d59a88f6175bfc1b66b5..586590f77f1dfd155fcc4509f7fdc8b47c47043e 100644 --- a/ccsrc/include/simulator/vector/vector_state.hpp +++ b/ccsrc/include/simulator/vector/vector_state.hpp @@ -74,7 +74,7 @@ class VectorState { //! dtor ~VectorState() { - qs_policy_t::FreeState(qs); + qs_policy_t::FreeState(&qs); } virtual tensor::TDtype DType(); @@ -116,13 +116,16 @@ class VectorState { //! calculate the expectation of differential form of parameterized gate two quantum state. That is //! - virtual tensor::Matrix ExpectDiffGate(qs_data_p_t bra, qs_data_p_t ket, const std::shared_ptr& gate, - const parameter::ParameterResolver& pr, index_t dim); - - virtual tensor::Matrix ExpectDiffU3(qs_data_p_t bra, qs_data_p_t ket, const std::shared_ptr& gate, - const parameter::ParameterResolver& pr, index_t dim); - virtual tensor::Matrix ExpectDiffFSim(qs_data_p_t bra, qs_data_p_t ket, const std::shared_ptr& gate, - const parameter::ParameterResolver& pr, index_t dim); + virtual tensor::Matrix ExpectDiffGate(const qs_data_p_t& bra, const qs_data_p_t& ket, + const std::shared_ptr& gate, + const parameter::ParameterResolver& pr, index_t dim) const; + + virtual tensor::Matrix ExpectDiffU3(const qs_data_p_t& bra, const qs_data_p_t& ket, + const std::shared_ptr& gate, const parameter::ParameterResolver& pr, + index_t dim) const; + virtual tensor::Matrix ExpectDiffFSim(const qs_data_p_t& bra, const qs_data_p_t& ket, + const std::shared_ptr& gate, + const parameter::ParameterResolver& pr, index_t dim) const; //! Apply a quantum circuit on this quantum state virtual std::map ApplyCircuit(const circuit_t& circ, const parameter::ParameterResolver& pr = parameter::ParameterResolver()); @@ -131,89 +134,68 @@ class VectorState { virtual void ApplyHamiltonian(const Hamiltonian& ham); //! Get the matrix of quantum circuit. - virtual VVT GetCircuitMatrix(const circuit_t& circ, const parameter::ParameterResolver& pr); + virtual VVT GetCircuitMatrix(const circuit_t& circ, const parameter::ParameterResolver& pr) const; //! Get expectation of given hamiltonian virtual py_qs_data_t GetExpectation(const Hamiltonian& ham, const circuit_t& circ, - const parameter::ParameterResolver& pr) { - auto ket = *this; - ket.ApplyCircuit(circ, pr); - auto bra = ket; - ket.ApplyHamiltonian(ham); - return qs_policy_t::Vdot(bra.qs, ket.qs, dim); - } + const parameter::ParameterResolver& pr) const; virtual py_qs_data_t GetExpectation(const Hamiltonian& ham, const circuit_t& circ_right, - const circuit_t& circ_left, const parameter::ParameterResolver& pr) { - auto ket = *this; - auto bra = *this; - ket.ApplyCircuit(circ_right, pr); - ket.ApplyHamiltonian(ham); - bra.ApplyCircuit(circ_left, pr); - return qs_policy_t::Vdot(bra.qs, ket.qs, dim); - } + const circuit_t& circ_left, const parameter::ParameterResolver& pr) const; virtual py_qs_data_t GetExpectation(const Hamiltonian& ham, const circuit_t& circ_right, const circuit_t& circ_left, const derived_t& simulator_left, - const parameter::ParameterResolver& pr) { - auto ket = *this; - auto bra = simulator_left; - ket.ApplyCircuit(circ_right, pr); - ket.ApplyHamiltonian(ham); - bra.ApplyCircuit(circ_left, pr); - return qs_policy_t::Vdot(bra.qs, ket.qs, dim); - } + const parameter::ParameterResolver& pr) const; //! Get the expectation of hamiltonian //! Here a single hamiltonian and single parameter data are needed virtual VT GetExpectationWithGradOneOne(const Hamiltonian& ham, const circuit_t& circ, const circuit_t& herm_circ, const parameter::ParameterResolver& pr, - const MST& p_map); + const MST& p_map) const; //! Get the expectation of hamiltonian //! Here multiple hamiltonian and single parameter data are needed virtual VVT GetExpectationWithGradOneMulti( const std::vector>>& hams, const circuit_t& circ, - const circuit_t& herm_circ, const parameter::ParameterResolver& pr, const MST& p_map, int n_thread); + const circuit_t& herm_circ, const parameter::ParameterResolver& pr, const MST& p_map, + int n_thread) const; + //! Get the expectation of hamiltonian //! Here multiple hamiltonian and multiple parameters are needed virtual VT> GetExpectationWithGradMultiMulti( const std::vector>>& hams, const circuit_t& circ, const circuit_t& herm_circ, const VVT& enc_data, const VT& ans_data, const VS& enc_name, - const VS& ans_name, size_t batch_threads, size_t mea_threads); + const VS& ans_name, size_t batch_threads, size_t mea_threads) const; virtual VVT GetExpectationNonHermitianWithGradOneMulti( const std::vector>>& hams, const std::vector>>& herm_hams, const circuit_t& left_circ, const circuit_t& herm_left_circ, const circuit_t& right_circ, const circuit_t& herm_right_circ, const parameter::ParameterResolver& pr, const MST& p_map, int n_thread, - const derived_t& simulator_left); + const derived_t& simulator_left) const; virtual VVT LeftSizeGradOneMulti(const std::vector>>& hams, const circuit_t& herm_left_circ, const parameter::ParameterResolver& pr, const MST& p_map, int n_thread, const derived_t& simulator_left, - const derived_t& simulator_right); + const derived_t& simulator_right) const; virtual VT> GetExpectationNonHermitianWithGradMultiMulti( const std::vector>>& hams, const std::vector>>& herm_hams, const circuit_t& left_circ, const circuit_t& herm_left_circ, const circuit_t& right_circ, const circuit_t& herm_right_circ, const VVT& enc_data, const VT& ans_data, const VS& enc_name, const VS& ans_name, - const derived_t& simulator_left, size_t batch_threads, size_t mea_threads); + const derived_t& simulator_left, size_t batch_threads, size_t mea_threads) const; virtual VT Sampling(const circuit_t& circ, const parameter::ParameterResolver& pr, size_t shots, - const MST& key_map, unsigned seed); + const MST& key_map, unsigned seed) const; template class cast_policy> - VectorState astype(unsigned seed) { - return VectorState(cast_policy::cast(this->qs, this->dim), this->n_qubits, - seed); - } + VectorState astype(unsigned seed) const; protected: - qs_data_p_t qs = nullptr; + qs_data_p_t qs = nullptr; // nullptr represent zero state. qbit_t n_qubits = 0; index_t dim = 0; unsigned seed = 0; diff --git a/ccsrc/include/simulator/vector/vector_state.tpp b/ccsrc/include/simulator/vector/vector_state.tpp index b384cebf9702dd8bd2ff969131f62f335007e93d..c778a1b26b181549e7ddeb61fe24d5c831be0ac9 100644 --- a/ccsrc/include/simulator/vector/vector_state.tpp +++ b/ccsrc/include/simulator/vector/vector_state.tpp @@ -52,7 +52,6 @@ namespace mindquantum::sim::vector::detail { template VectorState::VectorState(qbit_t n_qubits, unsigned seed) : n_qubits(n_qubits), dim(1UL << n_qubits), seed(seed), rnd_eng_(seed) { - qs = qs_policy_t::InitState(dim); std::uniform_real_distribution dist(0., 1.); rng_ = std::bind(dist, std::ref(rnd_eng_)); } @@ -85,7 +84,7 @@ VectorState::VectorState(const VectorState& sim) { template auto VectorState::operator=(const VectorState& sim) -> derived_t& { - qs_policy_t::FreeState(this->qs); + qs_policy_t::FreeState(&(this->qs)); this->qs = qs_policy_t::Copy(sim.qs, sim.dim); this->dim = sim.dim; this->n_qubits = sim.n_qubits; @@ -110,7 +109,7 @@ VectorState::VectorState(VectorState&& sim) { template auto VectorState::operator=(VectorState&& sim) -> derived_t& { - qs_policy_t::FreeState(this->qs); + qs_policy_t::FreeState(&(this->qs)); this->qs = sim.qs; this->dim = sim.dim; this->n_qubits = sim.n_qubits; @@ -129,7 +128,7 @@ tensor::TDtype VectorState::DType() { template void VectorState::Reset() { - qs_policy_t::Reset(qs, dim); + qs_policy_t::Reset(&qs, dim); } template @@ -144,7 +143,7 @@ auto VectorState::GetQS() const -> VT { template void VectorState::SetQS(const VT& qs_out) { - qs_policy_t::SetQS(qs, qs_out, dim); + qs_policy_t::SetQS(&qs, qs_out, dim); } template @@ -155,35 +154,35 @@ index_t VectorState::ApplyGate(const std::shared_ptr& g case GateID::I: break; case GateID::X: - qs_policy_t::ApplyX(qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); + qs_policy_t::ApplyX(&qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); break; case GateID::Y: - qs_policy_t::ApplyY(qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); + qs_policy_t::ApplyY(&qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); break; case GateID::Z: - qs_policy_t::ApplyZ(qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); + qs_policy_t::ApplyZ(&qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); break; case GateID::H: - qs_policy_t::ApplyH(qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); + qs_policy_t::ApplyH(&qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); break; case GateID::S: - qs_policy_t::ApplySGate(qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); + qs_policy_t::ApplySGate(&qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); break; case GateID::Sdag: - qs_policy_t::ApplySdag(qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); + qs_policy_t::ApplySdag(&qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); break; case GateID::T: - qs_policy_t::ApplyT(qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); + qs_policy_t::ApplyT(&qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); break; case GateID::Tdag: - qs_policy_t::ApplyTdag(qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); + qs_policy_t::ApplyTdag(&qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); break; case GateID::SWAP: - qs_policy_t::ApplySWAP(qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); + qs_policy_t::ApplySWAP(&qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); break; case GateID::ISWAP: { bool daggered = static_cast(gate.get())->daggered_; - qs_policy_t::ApplyISWAP(qs, gate->obj_qubits_, gate->ctrl_qubits_, daggered, dim); + qs_policy_t::ApplyISWAP(&qs, gate->obj_qubits_, gate->ctrl_qubits_, daggered, dim); } break; case GateID::RX: { auto g = static_cast(gate.get()); @@ -191,7 +190,7 @@ index_t VectorState::ApplyGate(const std::shared_ptr& g diff = false; } auto val = tensor::ops::cpu::to_vector(g->prs_[0].Combination(pr).const_value)[0]; - qs_policy_t::ApplyRX(qs, gate->obj_qubits_, gate->ctrl_qubits_, val, dim, diff); + qs_policy_t::ApplyRX(&qs, gate->obj_qubits_, gate->ctrl_qubits_, val, dim, diff); } break; case GateID::RY: { auto g = static_cast(gate.get()); @@ -199,7 +198,7 @@ index_t VectorState::ApplyGate(const std::shared_ptr& g diff = false; } auto val = tensor::ops::cpu::to_vector(g->prs_[0].Combination(pr).const_value)[0]; - qs_policy_t::ApplyRY(qs, gate->obj_qubits_, gate->ctrl_qubits_, val, dim, diff); + qs_policy_t::ApplyRY(&qs, gate->obj_qubits_, gate->ctrl_qubits_, val, dim, diff); } break; case GateID::RZ: { auto g = static_cast(gate.get()); @@ -207,7 +206,7 @@ index_t VectorState::ApplyGate(const std::shared_ptr& g diff = false; } auto val = tensor::ops::cpu::to_vector(g->prs_[0].Combination(pr).const_value)[0]; - qs_policy_t::ApplyRZ(qs, gate->obj_qubits_, gate->ctrl_qubits_, val, dim, diff); + qs_policy_t::ApplyRZ(&qs, gate->obj_qubits_, gate->ctrl_qubits_, val, dim, diff); } break; case GateID::Rxx: { auto g = static_cast(gate.get()); @@ -215,7 +214,7 @@ index_t VectorState::ApplyGate(const std::shared_ptr& g diff = false; } auto val = tensor::ops::cpu::to_vector(g->prs_[0].Combination(pr).const_value)[0]; - qs_policy_t::ApplyRxx(qs, gate->obj_qubits_, gate->ctrl_qubits_, val, dim, diff); + qs_policy_t::ApplyRxx(&qs, gate->obj_qubits_, gate->ctrl_qubits_, val, dim, diff); } break; case GateID::Ryy: { auto g = static_cast(gate.get()); @@ -223,7 +222,7 @@ index_t VectorState::ApplyGate(const std::shared_ptr& g diff = false; } auto val = tensor::ops::cpu::to_vector(g->prs_[0].Combination(pr).const_value)[0]; - qs_policy_t::ApplyRyy(qs, gate->obj_qubits_, gate->ctrl_qubits_, val, dim, diff); + qs_policy_t::ApplyRyy(&qs, gate->obj_qubits_, gate->ctrl_qubits_, val, dim, diff); } break; case GateID::Rzz: { auto g = static_cast(gate.get()); @@ -231,7 +230,7 @@ index_t VectorState::ApplyGate(const std::shared_ptr& g diff = false; } auto val = tensor::ops::cpu::to_vector(g->prs_[0].Combination(pr).const_value)[0]; - qs_policy_t::ApplyRzz(qs, gate->obj_qubits_, gate->ctrl_qubits_, val, dim, diff); + qs_policy_t::ApplyRzz(&qs, gate->obj_qubits_, gate->ctrl_qubits_, val, dim, diff); } break; case GateID::Rxy: { auto g = static_cast(gate.get()); @@ -239,7 +238,7 @@ index_t VectorState::ApplyGate(const std::shared_ptr& g diff = false; } auto val = tensor::ops::cpu::to_vector(g->prs_[0].Combination(pr).const_value)[0]; - qs_policy_t::ApplyRxy(qs, gate->obj_qubits_, gate->ctrl_qubits_, val, dim, diff); + qs_policy_t::ApplyRxy(&qs, gate->obj_qubits_, gate->ctrl_qubits_, val, dim, diff); } break; case GateID::Rxz: { auto g = static_cast(gate.get()); @@ -247,7 +246,7 @@ index_t VectorState::ApplyGate(const std::shared_ptr& g diff = false; } auto val = tensor::ops::cpu::to_vector(g->prs_[0].Combination(pr).const_value)[0]; - qs_policy_t::ApplyRxz(qs, gate->obj_qubits_, gate->ctrl_qubits_, val, dim, diff); + qs_policy_t::ApplyRxz(&qs, gate->obj_qubits_, gate->ctrl_qubits_, val, dim, diff); } break; case GateID::Ryz: { auto g = static_cast(gate.get()); @@ -255,7 +254,7 @@ index_t VectorState::ApplyGate(const std::shared_ptr& g diff = false; } auto val = tensor::ops::cpu::to_vector(g->prs_[0].Combination(pr).const_value)[0]; - qs_policy_t::ApplyRyz(qs, gate->obj_qubits_, gate->ctrl_qubits_, val, dim, diff); + qs_policy_t::ApplyRyz(&qs, gate->obj_qubits_, gate->ctrl_qubits_, val, dim, diff); } break; case GateID::PS: { auto g = static_cast(gate.get()); @@ -263,7 +262,7 @@ index_t VectorState::ApplyGate(const std::shared_ptr& g diff = false; } auto val = tensor::ops::cpu::to_vector(g->prs_[0].Combination(pr).const_value)[0]; - qs_policy_t::ApplyPS(qs, gate->obj_qubits_, gate->ctrl_qubits_, val, dim, diff); + qs_policy_t::ApplyPS(&qs, gate->obj_qubits_, gate->ctrl_qubits_, val, dim, diff); } break; case GateID::GP: { auto g = static_cast(gate.get()); @@ -271,7 +270,7 @@ index_t VectorState::ApplyGate(const std::shared_ptr& g diff = false; } auto val = tensor::ops::cpu::to_vector(g->prs_[0].Combination(pr).const_value)[0]; - qs_policy_t::ApplyGP(qs, gate->obj_qubits_[0], gate->ctrl_qubits_, val, dim, diff); + qs_policy_t::ApplyGP(&qs, gate->obj_qubits_[0], gate->ctrl_qubits_, val, dim, diff); } break; case GateID::U3: { if (diff) { @@ -287,7 +286,7 @@ index_t VectorState::ApplyGate(const std::shared_ptr& g auto lambda = u3->lambda.Combination(pr).const_value; m = U3Matrix(theta, phi, lambda); } - qs_policy_t::ApplySingleQubitMatrix(qs, qs, gate->obj_qubits_[0], gate->ctrl_qubits_, + qs_policy_t::ApplySingleQubitMatrix(qs, &qs, gate->obj_qubits_[0], gate->ctrl_qubits_, tensor::ops::cpu::to_vector(m), dim); } break; case GateID::FSim: { @@ -303,7 +302,7 @@ index_t VectorState::ApplyGate(const std::shared_ptr& g auto phi = fsim->phi.Combination(pr).const_value; m = FSimMatrix(theta, phi); } - qs_policy_t::ApplyTwoQubitsMatrix(qs, qs, gate->obj_qubits_, gate->ctrl_qubits_, + qs_policy_t::ApplyTwoQubitsMatrix(qs, &qs, gate->obj_qubits_, gate->ctrl_qubits_, tensor::ops::cpu::to_vector(m), dim); } break; case GateID::M: @@ -331,7 +330,7 @@ index_t VectorState::ApplyGate(const std::shared_ptr& g mat = g->numba_param_diff_matrix_(val); } } - qs_policy_t::ApplyMatrixGate(qs, qs, gate->obj_qubits_, gate->ctrl_qubits_, + qs_policy_t::ApplyMatrixGate(qs, &qs, gate->obj_qubits_, gate->ctrl_qubits_, tensor::ops::cpu::to_vector(mat), dim); break; } @@ -347,7 +346,7 @@ auto VectorState::ApplyMeasure(const std::shared_ptr& g auto one_amp = qs_policy_t::ConditionalCollect(qs, one_mask, one_mask, true, dim).real(); index_t collapse_mask = (static_cast(rng_() < one_amp) << gate->obj_qubits_[0]); qs_data_t norm_fact = (collapse_mask == 0) ? 1 / std::sqrt(1 - one_amp) : 1 / std::sqrt(one_amp); - qs_policy_t::ConditionalMul(qs, qs, one_mask, collapse_mask, norm_fact, 0.0, dim); + qs_policy_t::ConditionalMul(qs, &qs, one_mask, collapse_mask, norm_fact, 0.0, dim); return static_cast(collapse_mask != 0); } @@ -382,11 +381,11 @@ void VectorState::ApplyPauliChannel(const std::shared_ptrobj_qubits_, gate->ctrl_qubits_, dim); + qs_policy_t::ApplyX(&qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); } else if (gate_index == 1) { - qs_policy_t::ApplyY(qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); + qs_policy_t::ApplyY(&qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); } else if (gate_index == 2) { - qs_policy_t::ApplyZ(qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); + qs_policy_t::ApplyZ(&qs, gate->obj_qubits_, gate->ctrl_qubits_, dim); } } @@ -396,20 +395,20 @@ void VectorState::ApplyKrausChannel(const std::shared_ptr(gate.get()); for (size_t n_kraus = 0; n_kraus < g->kraus_operator_set_.size(); n_kraus++) { - qs_policy_t::ApplySingleQubitMatrix(qs, tmp_qs, gate->obj_qubits_[0], gate->ctrl_qubits_, + qs_policy_t::ApplySingleQubitMatrix(qs, &tmp_qs, gate->obj_qubits_[0], gate->ctrl_qubits_, tensor::ops::cpu::to_vector(g->kraus_operator_set_[n_kraus]), dim); calc_type renormal_factor_square = qs_policy_t::Vdot(tmp_qs, tmp_qs, dim).real(); prob = renormal_factor_square / (1 - prob); calc_type renormal_factor = 1 / std::sqrt(renormal_factor_square); if (static_cast(rng_()) <= prob) { - qs_policy_t::QSMulValue(tmp_qs, tmp_qs, renormal_factor, dim); - qs_policy_t::FreeState(qs); + qs_policy_t::QSMulValue(tmp_qs, &tmp_qs, renormal_factor, dim); + qs_policy_t::FreeState(&qs); qs = tmp_qs; tmp_qs = nullptr; } } - qs_policy_t::FreeState(tmp_qs); + qs_policy_t::FreeState(&tmp_qs); } template @@ -431,23 +430,87 @@ void VectorState::ApplyDampingChannel(const std::shared_ptr m({{0, 1 / reduced_factor_b}, {0, 0}}); - qs_policy_t::ApplySingleQubitMatrix(qs, tmp_qs, gate->obj_qubits_[0], gate->ctrl_qubits_, m, dim); + qs_policy_t::ApplySingleQubitMatrix(qs, &tmp_qs, gate->obj_qubits_[0], gate->ctrl_qubits_, m, dim); } else { - qs_policy_t::ConditionalMul(qs, tmp_qs, (1UL << gate->obj_qubits_[0]), 1, 1 / reduced_factor_b, 0, dim); + qs_policy_t::ConditionalMul(qs, &tmp_qs, (1UL << gate->obj_qubits_[0]), 1, 1 / reduced_factor_b, 0, dim); } qs = tmp_qs; tmp_qs = nullptr; - qs_policy_t::FreeState(tmp_qs); + qs_policy_t::FreeState(&tmp_qs); } else { calc_type coeff_a = 1 / std::sqrt(1 - prob); calc_type coeff_b = std::sqrt(1 - damping_coeff) / std::sqrt(1 - prob); - qs_policy_t::ConditionalMul(qs, qs, (1UL << gate->obj_qubits_[0]), 0, coeff_a, coeff_b, dim); + qs_policy_t::ConditionalMul(qs, &qs, (1UL << gate->obj_qubits_[0]), 0, coeff_a, coeff_b, dim); } } template -auto VectorState::ExpectDiffGate(qs_data_p_t bra, qs_data_p_t ket, const std::shared_ptr& gate, - const parameter::ParameterResolver& pr, index_t dim) -> tensor::Matrix { +auto VectorState::GetExpectation(const Hamiltonian& ham, const circuit_t& circ, + const parameter::ParameterResolver& pr) const -> py_qs_data_t { + py_qs_data_t out; + auto ket = *this; + ket.ApplyCircuit(circ, pr); + if (ham.how_to_ == ORIGIN) { + out = qs_policy_t::ExpectationOfTerms(ket.qs, ket.qs, ham.ham_, dim); + } else if (ham.how_to_ == BACKEND) { + out = qs_policy_t::ExpectationOfCsr(ham.ham_sparse_main_, ham.ham_sparse_second_, ket.qs, ket.qs, dim); + } else { + out = qs_policy_t::ExpectationOfCsr(ham.ham_sparse_main_, ket.qs, ket.qs, dim); + } + return out; +} + +template +auto VectorState::GetExpectation(const Hamiltonian& ham, const circuit_t& circ_right, + const circuit_t& circ_left, const parameter::ParameterResolver& pr) const + -> py_qs_data_t { + py_qs_data_t out; + + auto ket = *this; + auto bra = *this; + ket.ApplyCircuit(circ_right, pr); + bra.ApplyCircuit(circ_left, pr); + if (ham.how_to_ == ORIGIN) { + out = qs_policy_t::ExpectationOfTerms(bra.qs, ket.qs, ham.ham_, dim); + } else if (ham.how_to_ == BACKEND) { + out = qs_policy_t::ExpectationOfCsr(ham.ham_sparse_main_, ham.ham_sparse_second_, bra.qs, ket.qs, dim); + } else { + out = qs_policy_t::ExpectationOfCsr(ham.ham_sparse_main_, bra.qs, ket.qs, dim); + } + return out; +} + +template +auto VectorState::GetExpectation(const Hamiltonian& ham, const circuit_t& circ_right, + const circuit_t& circ_left, const derived_t& simulator_left, + const parameter::ParameterResolver& pr) const -> py_qs_data_t { + auto ket = *this; + auto bra = simulator_left; + ket.ApplyCircuit(circ_right, pr); + bra.ApplyCircuit(circ_left, pr); + py_qs_data_t out; + if (ham.how_to_ == ORIGIN) { + out = qs_policy_t::ExpectationOfTerms(bra.qs, ket.qs, ham.ham_, dim); + } else if (ham.how_to_ == BACKEND) { + out = qs_policy_t::ExpectationOfCsr(ham.ham_sparse_main_, ham.ham_sparse_second_, bra.qs, ket.qs, dim); + } else { + out = qs_policy_t::ExpectationOfCsr(ham.ham_sparse_main_, bra.qs, ket.qs, dim); + } + return out; +} + +template +template class cast_policy> +auto VectorState::astype(unsigned seed) const -> VectorState { + return VectorState(cast_policy::cast(this->qs, this->dim), this->n_qubits, + seed); +} + +template +auto VectorState::ExpectDiffGate(const qs_data_p_t& bra, const qs_data_p_t& ket, + const std::shared_ptr& gate, + const parameter::ParameterResolver& pr, index_t dim) const + -> tensor::Matrix { auto id = gate->id_; auto g = static_cast(gate.get()); auto val = tensor::ops::cpu::to_vector(g->prs_[0].Combination(pr).const_value)[0]; @@ -503,8 +566,10 @@ auto VectorState::ExpectDiffGate(qs_data_p_t bra, qs_data_p_t ket, } template -auto VectorState::ExpectDiffU3(qs_data_p_t bra, qs_data_p_t ket, const std::shared_ptr& gate, - const parameter::ParameterResolver& pr, index_t dim) -> tensor::Matrix { +auto VectorState::ExpectDiffU3(const qs_data_p_t& bra, const qs_data_p_t& ket, + const std::shared_ptr& gate, + const parameter::ParameterResolver& pr, index_t dim) const + -> tensor::Matrix { VT grad = {0, 0, 0}; auto u3 = static_cast(gate.get()); if (u3->parameterized_) { @@ -532,8 +597,10 @@ auto VectorState::ExpectDiffU3(qs_data_p_t bra, qs_data_p_t ket, c } template -auto VectorState::ExpectDiffFSim(qs_data_p_t bra, qs_data_p_t ket, const std::shared_ptr& gate, - const parameter::ParameterResolver& pr, index_t dim) -> tensor::Matrix { +auto VectorState::ExpectDiffFSim(const qs_data_p_t& bra, const qs_data_p_t& ket, + const std::shared_ptr& gate, + const parameter::ParameterResolver& pr, index_t dim) const + -> tensor::Matrix { VT grad = {0, 0}; auto fsim = static_cast(gate.get()); if (fsim->parameterized_) { @@ -572,25 +639,25 @@ template void VectorState::ApplyHamiltonian(const Hamiltonian& ham) { qs_data_p_t new_qs; if (ham.how_to_ == ORIGIN) { - new_qs = qs_policy_t::ApplyTerms(qs, ham.ham_, dim); + new_qs = qs_policy_t::ApplyTerms(&qs, ham.ham_, dim); } else if (ham.how_to_ == BACKEND) { new_qs = qs_policy_t::CsrDotVec(ham.ham_sparse_main_, ham.ham_sparse_second_, qs, dim); } else { new_qs = qs_policy_t::CsrDotVec(ham.ham_sparse_main_, qs, dim); } - qs_policy_t::FreeState(qs); + qs_policy_t::FreeState(&qs); qs = new_qs; } template -auto VectorState::GetCircuitMatrix(const circuit_t& circ, const parameter::ParameterResolver& pr) +auto VectorState::GetCircuitMatrix(const circuit_t& circ, const parameter::ParameterResolver& pr) const -> VVT { VVT> out((1UL << n_qubits), VT>((1UL << n_qubits), 0)); for (size_t i = 0; i < (1UL << n_qubits); i++) { auto sim = VectorState(n_qubits, seed); for (qbit_t j = 0; j < n_qubits; ++j) { if ((i >> j) & 1) { - qs_policy_t_::ApplyX(sim.qs, qbits_t({j}), qbits_t({}), sim.dim); + qs_policy_t_::ApplyX(&(sim.qs), qbits_t({j}), qbits_t({}), sim.dim); } } sim.ApplyCircuit(circ, pr); @@ -606,7 +673,7 @@ template auto VectorState::GetExpectationWithGradOneOne(const Hamiltonian& ham, const circuit_t& circ, const circuit_t& herm_circ, const parameter::ParameterResolver& pr, - const MST& p_map) -> VT { + const MST& p_map) const -> VT { // auto timer = Timer(); // timer.Start("First part"); VT f_and_g(1 + p_map.size(), 0); @@ -640,8 +707,8 @@ auto VectorState::GetExpectationNonHermitianWithGradOneMulti( const std::vector>>& hams, const std::vector>>& herm_hams, const circuit_t& left_circ, const circuit_t& herm_left_circ, const circuit_t& right_circ, const circuit_t& herm_right_circ, - const parameter::ParameterResolver& pr, const MST& p_map, int n_thread, const derived_t& simulator_left) - -> VVT { + const parameter::ParameterResolver& pr, const MST& p_map, int n_thread, + const derived_t& simulator_left) const -> VVT { auto n_hams = hams.size(); VVT f_and_g(n_hams, VT((1 + p_map.size()), 0)); @@ -664,7 +731,7 @@ auto VectorState::LeftSizeGradOneMulti(const std::vector& p_map, int n_thread, const derived_t& simulator_left, - const derived_t& simulator_right) -> VVT { + const derived_t& simulator_right) const -> VVT { auto n_hams = hams.size(); int max_thread = 15; if (n_thread == 0) { @@ -723,7 +790,7 @@ auto VectorState::LeftSizeGradOneMulti(const std::vector auto VectorState::GetExpectationWithGradOneMulti( const std::vector>>& hams, const circuit_t& circ, const circuit_t& herm_circ, - const parameter::ParameterResolver& pr, const MST& p_map, int n_thread) -> VVT { + const parameter::ParameterResolver& pr, const MST& p_map, int n_thread) const -> VVT { auto n_hams = hams.size(); int max_thread = 15; if (n_thread == 0) { @@ -783,7 +850,7 @@ auto VectorState::GetExpectationNonHermitianWithGradMultiMulti( const std::vector>>& herm_hams, const circuit_t& left_circ, const circuit_t& herm_left_circ, const circuit_t& right_circ, const circuit_t& herm_right_circ, const VVT& enc_data, const VT& ans_data, const VS& enc_name, const VS& ans_name, - const derived_t& simulator_left, size_t batch_threads, size_t mea_threads) -> VT> { + const derived_t& simulator_left, size_t batch_threads, size_t mea_threads) const -> VT> { auto n_hams = hams.size(); auto n_prs = enc_data.size(); auto n_params = enc_name.size() + ans_name.size(); @@ -849,7 +916,7 @@ template auto VectorState::GetExpectationWithGradMultiMulti( const std::vector>>& hams, const circuit_t& circ, const circuit_t& herm_circ, const VVT& enc_data, const VT& ans_data, const VS& enc_name, const VS& ans_name, - size_t batch_threads, size_t mea_threads) -> VT> { + size_t batch_threads, size_t mea_threads) const -> VT> { auto n_hams = hams.size(); auto n_prs = enc_data.size(); auto n_params = enc_name.size() + ans_name.size(); @@ -910,7 +977,7 @@ auto VectorState::GetExpectationWithGradMultiMulti( template VT VectorState::Sampling(const circuit_t& circ, const parameter::ParameterResolver& pr, - size_t shots, const MST& key_map, unsigned int seed) { + size_t shots, const MST& key_map, unsigned int seed) const { auto key_size = key_map.size(); VT res(shots * key_size); RndEngine rnd_eng = RndEngine(seed); diff --git a/ccsrc/lib/simulator/vector/detail/cpu_avx_double/cpu_vector_core_gate_expect.cpp b/ccsrc/lib/simulator/vector/detail/cpu_avx_double/cpu_vector_core_gate_expect.cpp index f09044e4ec89556b60d33bd36a46206d41ec4aac..f382a5109a539af9b7946f2f45e8641b2967bfae 100644 --- a/ccsrc/lib/simulator/vector/detail/cpu_avx_double/cpu_vector_core_gate_expect.cpp +++ b/ccsrc/lib/simulator/vector/detail/cpu_avx_double/cpu_vector_core_gate_expect.cpp @@ -18,9 +18,20 @@ #include "simulator/vector/detail/cpu_vector_avx_double_policy.hpp" namespace mindquantum::sim::vector::detail { -auto CPUVectorPolicyAvxDouble::ExpectDiffSingleQubitMatrix(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, const VVT& m, - index_t dim) -> qs_data_t { +auto CPUVectorPolicyAvxDouble::ExpectDiffSingleQubitMatrix(const qs_data_p_t& bra_out, const qs_data_p_t& ket_out, + const qbits_t& objs, const qbits_t& ctrls, + const VVT& m, index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } SingleQubitGateMask mask(objs, ctrls); gate_matrix_t gate = {{m[0][0], m[0][1]}, {m[1][0], m[1][1]}}; __m256d neg = _mm256_setr_pd(1.0, -1.0, 1.0, -1.0); @@ -100,6 +111,12 @@ auto CPUVectorPolicyAvxDouble::ExpectDiffSingleQubitMatrix(qs_data_p_t bra, qs_d // clang-format on } } + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } return {res_real, res_imag}; }; } // namespace mindquantum::sim::vector::detail diff --git a/ccsrc/lib/simulator/vector/detail/cpu_avx_double/cpu_vector_core_matrix_gate.cpp b/ccsrc/lib/simulator/vector/detail/cpu_avx_double/cpu_vector_core_matrix_gate.cpp index 98081d175259cb454e03d30bb9b17106a9afd629..ce474bd52137f8ae66bb943241c3b279b08a4c6f 100644 --- a/ccsrc/lib/simulator/vector/detail/cpu_avx_double/cpu_vector_core_matrix_gate.cpp +++ b/ccsrc/lib/simulator/vector/detail/cpu_avx_double/cpu_vector_core_matrix_gate.cpp @@ -13,14 +13,27 @@ // limitations under the License. #include "config/openmp.hpp" +#include "core/utils.hpp" #include "math/pr/parameter_resolver.hpp" #include "simulator/utils.hpp" #include "simulator/vector/detail/cpu_vector_avx_double_policy.hpp" namespace mindquantum::sim::vector::detail { -void CPUVectorPolicyAvxDouble::ApplySingleQubitMatrix(qs_data_p_t src, qs_data_p_t des, qbit_t obj_qubit, +void CPUVectorPolicyAvxDouble::ApplySingleQubitMatrix(const qs_data_p_t& src_out, qs_data_p_t* des_p, qbit_t obj_qubit, const qbits_t& ctrls, const std::vector>& m, index_t dim) { + auto& des = (*des_p); + if (des == nullptr) { + des = CPUVectorPolicyAvxDouble::InitState(dim); + } + qs_data_p_t src; + bool will_free = false; + if (src_out == nullptr) { + src = CPUVectorPolicyAvxDouble::InitState(dim); + will_free = true; + } else { + src = src_out; + } SingleQubitGateMask mask({obj_qubit}, ctrls); gate_matrix_t gate = {{m[0][0], m[0][1]}, {m[1][0], m[1][1]}}; __m256d neg = _mm256_setr_pd(1.0, -1.0, 1.0, -1.0); @@ -48,5 +61,8 @@ void CPUVectorPolicyAvxDouble::ApplySingleQubitMatrix(qs_data_p_t src, qs_data_p } }); } + if (will_free) { + CPUVectorPolicyAvxDouble::FreeState(&src); + } } } // namespace mindquantum::sim::vector::detail diff --git a/ccsrc/lib/simulator/vector/detail/cpu_common/CPPLINT.cfg b/ccsrc/lib/simulator/vector/detail/cpu_common/CPPLINT.cfg new file mode 100644 index 0000000000000000000000000000000000000000..cd2d10c42d0e3c2b31b0372b9f5fafeb934634a3 --- /dev/null +++ b/ccsrc/lib/simulator/vector/detail/cpu_common/CPPLINT.cfg @@ -0,0 +1 @@ +filter=-runtime/reference diff --git a/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_condition.cpp b/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_condition.cpp index dcc6322c163e3e57e9e1b60b94806086e3ea1e7b..3ca676bcf8ad89837e691e257a1e20791094eea5 100644 --- a/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_condition.cpp +++ b/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_condition.cpp @@ -26,73 +26,111 @@ namespace mindquantum::sim::vector::detail { template template -void CPUVectorPolicyBase::ConditionalBinary(qs_data_p_t src, qs_data_p_t des, index_t mask, - index_t condi, qs_data_t succ_coeff, +void CPUVectorPolicyBase::ConditionalBinary(const qs_data_p_t& src, qs_data_p_t* des_p, + index_t mask, index_t condi, qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim, const binary_op& op) { // if index mask satisfied condition, multiply by success_coeff, otherwise multiply fail_coeff - THRESHOLD_OMP_FOR( - dim, DimTh, for (omp::idx_t i = 0; i < dim; i++) { - if ((i & mask) == condi) { - des[i] = op(src[i], succ_coeff); - } else { - des[i] = op(src[i], fail_coeff); - } - }) + auto& des = *des_p; + if (des == nullptr) { + des = derived::InitState(dim); + } + if (src == nullptr) { + if ((0 & mask) == condi) { + des[0] = op(1.0, succ_coeff); + } else { + des[0] = op(1.0, fail_coeff); + } + } else { + THRESHOLD_OMP_FOR( + dim, DimTh, for (omp::idx_t i = 0; i < dim; i++) { + if ((i & mask) == condi) { + des[i] = op(src[i], succ_coeff); + } else { + des[i] = op(src[i], fail_coeff); + } + }) + } } template template -void CPUVectorPolicyBase::ConditionalBinary(qs_data_p_t src, qs_data_p_t des, +void CPUVectorPolicyBase::ConditionalBinary(const qs_data_p_t& src, qs_data_p_t* des_p, qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim, const binary_op& op) { // if index mask satisfied condition, multiply by success_coeff, otherwise multiply fail_coeff - THRESHOLD_OMP_FOR( - dim, DimTh, for (omp::idx_t i = 0; i < dim; i++) { - if ((i & mask) == condi) { - des[i] = op(src[i], succ_coeff); - } else { - des[i] = op(src[i], fail_coeff); - } - }) + auto& des = *des_p; + if (des == nullptr) { + des = derived::InitState(dim); + } + if (src == nullptr) { + if ((0 & mask) == condi) { + des[0] = op(1.0, succ_coeff); + } else { + des[0] = op(1.0, fail_coeff); + } + } else { + THRESHOLD_OMP_FOR( + dim, DimTh, for (omp::idx_t i = 0; i < dim; i++) { + if ((i & mask) == condi) { + des[i] = op(src[i], succ_coeff); + } else { + des[i] = op(src[i], fail_coeff); + } + }) + } } template -void CPUVectorPolicyBase::QSMulValue(qs_data_p_t src, qs_data_p_t des, qs_data_t value, +void CPUVectorPolicyBase::QSMulValue(const qs_data_p_t& src, qs_data_p_t* des_p, qs_data_t value, index_t dim) { - derived::template ConditionalBinary<0, 0>(src, des, value, 0, dim, std::multiplies()); + derived::template ConditionalBinary<0, 0>(src, des_p, value, 0, dim, std::multiplies()); } template -void CPUVectorPolicyBase::ConditionalAdd(qs_data_p_t src, qs_data_p_t des, index_t mask, +void CPUVectorPolicyBase::ConditionalAdd(const qs_data_p_t& src, qs_data_p_t* des_p, index_t mask, index_t condi, qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim) { - derived::template ConditionalBinary(src, des, mask, condi, succ_coeff, fail_coeff, dim, std::plus()); + derived::template ConditionalBinary(src, des_p, mask, condi, succ_coeff, fail_coeff, dim, std::plus()); } template -void CPUVectorPolicyBase::ConditionalMinus(qs_data_p_t src, qs_data_p_t des, index_t mask, - index_t condi, qs_data_t succ_coeff, +void CPUVectorPolicyBase::ConditionalMinus(const qs_data_p_t& src, qs_data_p_t* des_p, + index_t mask, index_t condi, qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim) { - derived::template ConditionalBinary(src, des, mask, condi, succ_coeff, fail_coeff, dim, std::minus()); + derived::template ConditionalBinary(src, des_p, mask, condi, succ_coeff, fail_coeff, dim, std::minus()); } template -void CPUVectorPolicyBase::ConditionalMul(qs_data_p_t src, qs_data_p_t des, index_t mask, +void CPUVectorPolicyBase::ConditionalMul(const qs_data_p_t& src, qs_data_p_t* des_p, index_t mask, index_t condi, qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim) { - derived::template ConditionalBinary(src, des, mask, condi, succ_coeff, fail_coeff, dim, + derived::template ConditionalBinary(src, des_p, mask, condi, succ_coeff, fail_coeff, dim, std::multiplies()); } template -void CPUVectorPolicyBase::ConditionalDiv(qs_data_p_t src, qs_data_p_t des, index_t mask, +void CPUVectorPolicyBase::ConditionalDiv(const qs_data_p_t& src, qs_data_p_t* des_p, index_t mask, index_t condi, qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim) { - derived::template ConditionalBinary(src, des, mask, condi, succ_coeff, fail_coeff, dim, std::divides()); + derived::template ConditionalBinary(src, des_p, mask, condi, succ_coeff, fail_coeff, dim, + std::divides()); } template -auto CPUVectorPolicyBase::ConditionalCollect(qs_data_p_t qs, index_t mask, index_t condi, +auto CPUVectorPolicyBase::ConditionalCollect(const qs_data_p_t& qs, index_t mask, index_t condi, bool abs, index_t dim) -> qs_data_t { // collect amplitude with index mask satisfied condition. calc_type res_real = 0, res_imag = 0; + if (qs == nullptr) { + if (abs) { + if ((0 & mask) == condi) { + res_real += qs[0].real() * qs[0].real() + qs[0].imag() * qs[0].imag(); + } + } else { + if ((0 & mask) == condi) { + res_real += qs[0].real(); + res_imag += qs[0].imag(); + } + } + return qs_data_t(res_real, res_imag); + } if (abs) { THRESHOLD_OMP( MQ_DO_PRAGMA(omp parallel for schedule(static) reduction(+: res_real)), dim, DimTh, diff --git a/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_dot_like.cpp b/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_dot_like.cpp index f21c2ab0eb2aeea00de64c8e253b093edd449efc..141945e3ab10e6d487b9af1817f2cf93a9ffdc16 100644 --- a/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_dot_like.cpp +++ b/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_dot_like.cpp @@ -11,6 +11,8 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +#include + #include "config/openmp.hpp" #include "core/sparse/algo.hpp" @@ -27,7 +29,15 @@ namespace mindquantum::sim::vector::detail { template -auto CPUVectorPolicyBase::Vdot(qs_data_p_t bra, qs_data_p_t ket, index_t dim) -> py_qs_data_t { +auto CPUVectorPolicyBase::Vdot(const qs_data_p_t& bra, const qs_data_p_t& ket, index_t dim) + -> py_qs_data_t { + if (bra == nullptr && ket == nullptr) { + return 1.0; + } else if (bra == nullptr) { + return ket[0]; + } else if (ket == nullptr) { + return std::conj(bra[0]); + } calc_type res_real = 0, res_imag = 0; // clang-format off THRESHOLD_OMP( @@ -42,8 +52,24 @@ auto CPUVectorPolicyBase::Vdot(qs_data_p_t bra, qs_data_p_ template template -auto CPUVectorPolicyBase::ConditionVdot(qs_data_p_t bra, qs_data_p_t ket, index_t dim) - -> py_qs_data_t { +auto CPUVectorPolicyBase::ConditionVdot(const qs_data_p_t& bra, const qs_data_p_t& ket, + index_t dim) -> py_qs_data_t { + if (bra == nullptr && ket == nullptr) { + if ((0 & mask) == condi) { + return 1.0; + } + return 0.0; + } else if (bra == nullptr) { + if ((0 & mask) == condi) { + return ket[0]; + } + return 0.0; + } else if (ket == nullptr) { + if ((0 & mask) == condi) { + return std::conj(bra[0]); + } + return 0.0; + } calc_type res_real = 0, res_imag = 0; // clang-format off THRESHOLD_OMP( @@ -59,8 +85,11 @@ auto CPUVectorPolicyBase::ConditionVdot(qs_data_p_t bra, q } template -auto CPUVectorPolicyBase::OneStateVdot(qs_data_p_t bra, qs_data_p_t ket, qbit_t obj_qubit, - index_t dim) -> py_qs_data_t { +auto CPUVectorPolicyBase::OneStateVdot(const qs_data_p_t& bra, const qs_data_p_t& ket, + qbit_t obj_qubit, index_t dim) -> py_qs_data_t { + if (bra == nullptr || ket == nullptr) { + return 0.0; + } SingleQubitGateMask mask({obj_qubit}, {}); calc_type res_real = 0, res_imag = 0; // clang-format off @@ -76,8 +105,15 @@ auto CPUVectorPolicyBase::OneStateVdot(qs_data_p_t bra, qs } template -auto CPUVectorPolicyBase::ZeroStateVdot(qs_data_p_t bra, qs_data_p_t ket, qbit_t obj_qubit, - index_t dim) -> py_qs_data_t { +auto CPUVectorPolicyBase::ZeroStateVdot(const qs_data_p_t& bra, const qs_data_p_t& ket, + qbit_t obj_qubit, index_t dim) -> py_qs_data_t { + if (bra == nullptr && ket == nullptr) { + return 1.0; + } else if (bra == nullptr) { + return ket[0]; + } else if (ket == nullptr) { + return std::conj(bra[0]); + } SingleQubitGateMask mask({obj_qubit}, {}); calc_type res_real = 0, res_imag = 0; // clang-format off @@ -94,25 +130,99 @@ auto CPUVectorPolicyBase::ZeroStateVdot(qs_data_p_t bra, q template auto CPUVectorPolicyBase::CsrDotVec(const std::shared_ptr>& a, - qs_data_p_t vec, index_t dim) -> qs_data_p_t { + const qs_data_p_t& vec_out, index_t dim) -> qs_data_p_t { if (dim != a->dim_) { throw std::runtime_error("Sparse hamiltonian size not match with quantum state size."); } + auto vec = vec_out; + bool will_free = false; + if (vec == nullptr) { + vec = derived::InitState(dim); + will_free = true; + } auto out = sparse::Csr_Dot_Vec(a, reinterpret_cast(vec)); + if (will_free) { + derived::FreeState(&vec); + } return reinterpret_cast(out); } template auto CPUVectorPolicyBase::CsrDotVec(const std::shared_ptr>& a, const std::shared_ptr>& b, - qs_data_p_t vec, index_t dim) -> qs_data_p_t { + const qs_data_p_t& vec_out, index_t dim) -> qs_data_p_t { if ((dim != a->dim_) || (dim != b->dim_)) { throw std::runtime_error("Sparse hamiltonian size not match with quantum state size."); } + auto vec = vec_out; + bool will_free = false; + if (vec == nullptr) { + vec = derived::InitState(dim); + } auto out = sparse::Csr_Dot_Vec(a, b, reinterpret_cast(vec)); + if (will_free) { + derived::FreeState(&vec); + } return reinterpret_cast(out); } +template +auto CPUVectorPolicyBase::ExpectationOfCsr( + const std::shared_ptr>& a, const qs_data_p_t& bra_out, const qs_data_p_t& ket_out, + index_t dim) -> py_qs_data_t { + if (dim != a->dim_) { + throw std::runtime_error("Sparse hamiltonian size not match with quantum state size."); + } + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } + auto res = sparse::ExpectationOfCsr(a, reinterpret_cast(bra), + reinterpret_cast(ket)); + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } + return res; +} +template +auto CPUVectorPolicyBase::ExpectationOfCsr( + const std::shared_ptr>& a, const std::shared_ptr>& b, + const qs_data_p_t& bra_out, const qs_data_p_t& ket_out, index_t dim) -> py_qs_data_t { + if ((dim != a->dim_) || (dim != b->dim_)) { + throw std::runtime_error("Sparse hamiltonian size not match with quantum state size."); + } + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } + auto res = sparse::ExpectationOfCsr(a, b, reinterpret_cast(bra), + reinterpret_cast(ket)); + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } + return res; +} + #ifdef __x86_64__ template struct CPUVectorPolicyBase; template struct CPUVectorPolicyBase; diff --git a/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_gate_expect.cpp b/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_gate_expect.cpp index 44890e3fa5ca0bed511a4005f5e68b2e72056ca3..338c62f35d9765f31a531c0823e63b5c50ba6553 100644 --- a/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_gate_expect.cpp +++ b/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_gate_expect.cpp @@ -26,10 +26,22 @@ #include "simulator/vector/detail/cpu_vector_policy.hpp" namespace mindquantum::sim::vector::detail { template -auto CPUVectorPolicyBase::ExpectDiffNQubitsMatrix(qs_data_p_t bra, qs_data_p_t ket, - const qbits_t& objs, const qbits_t& ctrls, +auto CPUVectorPolicyBase::ExpectDiffNQubitsMatrix(const qs_data_p_t& bra_out, + const qs_data_p_t& ket_out, const qbits_t& objs, + const qbits_t& ctrls, const VVT& gate, index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } size_t n_qubit = objs.size(); size_t m_dim = (1UL << n_qubit); size_t ctrl_mask = 0; @@ -65,14 +77,32 @@ auto CPUVectorPolicyBase::ExpectDiffNQubitsMatrix(qs_data_ } } }) + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } return {res_real, res_imag}; } template -auto CPUVectorPolicyBase::ExpectDiffTwoQubitsMatrix(qs_data_p_t bra, qs_data_p_t ket, +auto CPUVectorPolicyBase::ExpectDiffTwoQubitsMatrix(const qs_data_p_t& bra_out, + const qs_data_p_t& ket_out, const qbits_t& objs, const qbits_t& ctrls, const VVT& gate, index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } DoubleQubitGateMask mask(objs, ctrls); calc_type res_real = 0, res_imag = 0; // clang-format off @@ -122,14 +152,32 @@ auto CPUVectorPolicyBase::ExpectDiffTwoQubitsMatrix(qs_dat }) } // clang-format on + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } return {res_real, res_imag}; }; template -auto CPUVectorPolicyBase::ExpectDiffSingleQubitMatrix(qs_data_p_t bra, qs_data_p_t ket, +auto CPUVectorPolicyBase::ExpectDiffSingleQubitMatrix(const qs_data_p_t& bra_out, + const qs_data_p_t& ket_out, const qbits_t& objs, const qbits_t& ctrls, const VVT& m, index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } SingleQubitGateMask mask(objs, ctrls); calc_type res_real = 0, res_imag = 0; if (!mask.ctrl_mask) { @@ -192,11 +240,17 @@ auto CPUVectorPolicyBase::ExpectDiffSingleQubitMatrix(qs_d // clang-format on } } + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } return {res_real, res_imag}; }; template -auto CPUVectorPolicyBase::ExpectDiffMatrixGate(qs_data_p_t bra, qs_data_p_t ket, +auto CPUVectorPolicyBase::ExpectDiffMatrixGate(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, const qbits_t& ctrls, const VVT& m, index_t dim) -> qs_data_t { @@ -210,9 +264,9 @@ auto CPUVectorPolicyBase::ExpectDiffMatrixGate(qs_data_p_t } template -auto CPUVectorPolicyBase::ExpectDiffRX(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto CPUVectorPolicyBase::ExpectDiffRX(const qs_data_p_t& bra, const qs_data_p_t& ket, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { auto c = static_cast(-0.5 * std::sin(val / 2)); auto is = static_cast(0.5 * std::cos(val / 2)) * IMAGE_MI; VVT gate = {{c, is}, {is, c}}; @@ -220,9 +274,9 @@ auto CPUVectorPolicyBase::ExpectDiffRX(qs_data_p_t bra, qs }; template -auto CPUVectorPolicyBase::ExpectDiffRY(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto CPUVectorPolicyBase::ExpectDiffRY(const qs_data_p_t& bra, const qs_data_p_t& ket, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { calc_type c = -0.5 * std::sin(val / 2); calc_type s = 0.5 * std::cos(val / 2); VVT gate = {{c, -s}, {s, c}}; @@ -230,9 +284,9 @@ auto CPUVectorPolicyBase::ExpectDiffRY(qs_data_p_t bra, qs }; template -auto CPUVectorPolicyBase::ExpectDiffRZ(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto CPUVectorPolicyBase::ExpectDiffRZ(const qs_data_p_t& bra, const qs_data_p_t& ket, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { calc_type c = -0.5 * std::sin(val / 2); calc_type s = 0.5 * std::cos(val / 2); auto e0 = c + IMAGE_MI * s; @@ -242,9 +296,9 @@ auto CPUVectorPolicyBase::ExpectDiffRZ(qs_data_p_t bra, qs }; template -auto CPUVectorPolicyBase::ExpectDiffGP(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto CPUVectorPolicyBase::ExpectDiffGP(const qs_data_p_t& bra, const qs_data_p_t& ket, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { auto e = std::complex(0, -1); e *= std::exp(std::complex(0, -val)); VVT gate = {{e, 0}, {0, e}}; @@ -252,9 +306,20 @@ auto CPUVectorPolicyBase::ExpectDiffGP(qs_data_p_t bra, qs } template -auto CPUVectorPolicyBase::ExpectDiffPS(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto CPUVectorPolicyBase::ExpectDiffPS(const qs_data_p_t& bra_out, const qs_data_p_t& ket_out, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } SingleQubitGateMask mask(objs, ctrls); calc_type res_real = 0, res_imag = 0; auto e = -std::sin(val) + IMAGE_I * std::cos(val); @@ -286,13 +351,30 @@ auto CPUVectorPolicyBase::ExpectDiffPS(qs_data_p_t bra, qs }) // clang-format on } + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } return {res_real, res_imag}; }; template -auto CPUVectorPolicyBase::ExpectDiffRxx(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto CPUVectorPolicyBase::ExpectDiffRxx(const qs_data_p_t& bra_out, const qs_data_p_t& ket_out, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } DoubleQubitGateMask mask(objs, ctrls); auto c = static_cast(-std::sin(val / 2) / 2); auto s = static_cast(std::cos(val / 2) / 2) * IMAGE_MI; @@ -342,13 +424,30 @@ auto CPUVectorPolicyBase::ExpectDiffRxx(qs_data_p_t bra, q } }) } + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } return {res_real, res_imag}; }; template -auto CPUVectorPolicyBase::ExpectDiffRxy(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto CPUVectorPolicyBase::ExpectDiffRxy(const qs_data_p_t& bra_out, const qs_data_p_t& ket_out, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } DoubleQubitGateMask mask(objs, ctrls); auto c = static_cast(-std::sin(val / 2) / 2); auto s = static_cast(std::cos(val / 2) / 2); @@ -398,13 +497,30 @@ auto CPUVectorPolicyBase::ExpectDiffRxy(qs_data_p_t bra, q } }) } + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } return {res_real, res_imag}; }; template -auto CPUVectorPolicyBase::ExpectDiffRxz(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto CPUVectorPolicyBase::ExpectDiffRxz(const qs_data_p_t& bra_out, const qs_data_p_t& ket_out, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } DoubleQubitGateMask mask(objs, ctrls); auto c = static_cast(-std::sin(val / 2) / 2); auto s = static_cast(std::cos(val / 2) / 2) * IMAGE_MI; @@ -454,13 +570,30 @@ auto CPUVectorPolicyBase::ExpectDiffRxz(qs_data_p_t bra, q } }) } + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } return {res_real, res_imag}; }; template -auto CPUVectorPolicyBase::ExpectDiffRyz(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto CPUVectorPolicyBase::ExpectDiffRyz(const qs_data_p_t& bra_out, const qs_data_p_t& ket_out, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } DoubleQubitGateMask mask(objs, ctrls); auto c = static_cast(-std::sin(val / 2) / 2); auto s = static_cast(std::cos(val / 2) / 2); @@ -510,13 +643,30 @@ auto CPUVectorPolicyBase::ExpectDiffRyz(qs_data_p_t bra, q } }) } + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } return {res_real, res_imag}; }; template -auto CPUVectorPolicyBase::ExpectDiffRyy(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto CPUVectorPolicyBase::ExpectDiffRyy(const qs_data_p_t& bra_out, const qs_data_p_t& ket_out, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } DoubleQubitGateMask mask(objs, ctrls); auto c = static_cast(-std::sin(val / 2) / 2); auto s = static_cast(std::cos(val / 2) / 2) * IMAGE_I; @@ -566,13 +716,30 @@ auto CPUVectorPolicyBase::ExpectDiffRyy(qs_data_p_t bra, q } }) } + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } return {res_real, res_imag}; }; template -auto CPUVectorPolicyBase::ExpectDiffRzz(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto CPUVectorPolicyBase::ExpectDiffRzz(const qs_data_p_t& bra_out, const qs_data_p_t& ket_out, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } DoubleQubitGateMask mask(objs, ctrls); auto c = static_cast(-std::sin(val / 2) / 2); @@ -617,6 +784,12 @@ auto CPUVectorPolicyBase::ExpectDiffRzz(qs_data_p_t bra, q } }) } + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } return {res_real, res_imag}; }; #ifdef __x86_64__ diff --git a/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_matrix_gate.cpp b/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_matrix_gate.cpp index 9175f31dfd37d722f1e52127014823ce38065bd6..b4b57a50715292a6d0d107a75d307c7341508bf4 100644 --- a/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_matrix_gate.cpp +++ b/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_matrix_gate.cpp @@ -25,10 +25,22 @@ #include "simulator/vector/detail/cpu_vector_policy.hpp" namespace mindquantum::sim::vector::detail { template -void CPUVectorPolicyBase::ApplyNQubitsMatrix(qs_data_p_t src, qs_data_p_t des, +void CPUVectorPolicyBase::ApplyNQubitsMatrix(const qs_data_p_t& src_out, qs_data_p_t* des_p, const qbits_t& objs, const qbits_t& ctrls, const std::vector>& gate, index_t dim) { + auto& des = *des_p; + if (des == nullptr) { + des = derived::InitState(dim); + } + qs_data_p_t src; + bool will_free = false; + if (src_out == nullptr) { + src = derived::InitState(dim); + will_free = true; + } else { + src = src_out; + } size_t n_qubit = objs.size(); size_t m_dim = (1UL << n_qubit); size_t ctrl_mask = 0; @@ -67,13 +79,28 @@ void CPUVectorPolicyBase::ApplyNQubitsMatrix(qs_data_p_t s } } }) + if (will_free) { + derived::FreeState(&src); + } } template -void CPUVectorPolicyBase::ApplyTwoQubitsMatrix(qs_data_p_t src, qs_data_p_t des, +void CPUVectorPolicyBase::ApplyTwoQubitsMatrix(const qs_data_p_t& src_out, qs_data_p_t* des_p, const qbits_t& objs, const qbits_t& ctrls, const std::vector>& gate, index_t dim) { + auto& des = *des_p; + if (des == nullptr) { + des = derived::InitState(dim); + } + qs_data_p_t src; + bool will_free = false; + if (src_out == nullptr) { + src = derived::InitState(dim); + will_free = true; + } else { + src = src_out; + } DoubleQubitGateMask mask(objs, ctrls); size_t mask1 = (1UL << objs[0]); size_t mask2 = (1UL << objs[1]); @@ -118,12 +145,27 @@ void CPUVectorPolicyBase::ApplyTwoQubitsMatrix(qs_data_p_t } }) } + if (will_free) { + derived::FreeState(&src); + } } template -void CPUVectorPolicyBase::ApplySingleQubitMatrix(qs_data_p_t src, qs_data_p_t des, +void CPUVectorPolicyBase::ApplySingleQubitMatrix(const qs_data_p_t& src_out, qs_data_p_t* des_p, qbit_t obj_qubit, const qbits_t& ctrls, const std::vector>& m, index_t dim) { + auto& des = (*des_p); + if (des == nullptr) { + des = derived::InitState(dim); + } + qs_data_p_t src; + bool will_free = false; + if (src_out == nullptr) { + src = derived::InitState(dim); + will_free = true; + } else { + src = src_out; + } SingleQubitGateMask mask({obj_qubit}, ctrls); if (!mask.ctrl_mask) { THRESHOLD_OMP_FOR( @@ -148,19 +190,22 @@ void CPUVectorPolicyBase::ApplySingleQubitMatrix(qs_data_p } }); } + if (will_free) { + derived::FreeState(&src); + } } template -void CPUVectorPolicyBase::ApplyMatrixGate(qs_data_p_t src, qs_data_p_t des, const qbits_t& objs, - const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyMatrixGate(const qs_data_p_t& src, qs_data_p_t* des_p, + const qbits_t& objs, const qbits_t& ctrls, const std::vector>& m, index_t dim) { if (objs.size() == 1) { - derived::ApplySingleQubitMatrix(src, des, objs[0], ctrls, m, dim); + derived::ApplySingleQubitMatrix(src, des_p, objs[0], ctrls, m, dim); } else if (objs.size() == 2) { - derived::ApplyTwoQubitsMatrix(src, des, objs, ctrls, m, dim); + derived::ApplyTwoQubitsMatrix(src, des_p, objs, ctrls, m, dim); } else { - derived::ApplyNQubitsMatrix(src, des, objs, ctrls, m, dim); + derived::ApplyNQubitsMatrix(src, des_p, objs, ctrls, m, dim); } } diff --git a/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_other_gate.cpp b/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_other_gate.cpp index 93704819101c68ca7a6d329028821ca222eda320..6780531ac5dcad73380922422ffcd902c475492f 100644 --- a/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_other_gate.cpp +++ b/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_other_gate.cpp @@ -23,18 +23,18 @@ #include "simulator/vector/detail/cpu_vector_policy.hpp" namespace mindquantum::sim::vector::detail { template -void CPUVectorPolicyBase::ApplyH(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyH(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim) { std::vector> m{{M_SQRT1_2, M_SQRT1_2}, {M_SQRT1_2, -M_SQRT1_2}}; - derived::ApplySingleQubitMatrix(qs, qs, objs[0], ctrls, m, dim); + derived::ApplySingleQubitMatrix((*qs_p), qs_p, objs[0], ctrls, m, dim); } template -void CPUVectorPolicyBase::ApplyGP(qs_data_p_t qs, qbit_t obj_qubit, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyGP(qs_data_p_t* qs_p, qbit_t obj_qubit, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { auto c = std::exp(std::complex(0, -val)); std::vector> m = {{c, 0}, {0, c}}; - derived::ApplySingleQubitMatrix(qs, qs, obj_qubit, ctrls, m, dim); + derived::ApplySingleQubitMatrix(*qs_p, qs_p, obj_qubit, ctrls, m, dim); } #ifdef __x86_64__ diff --git a/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_policy.cpp b/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_policy.cpp index 687b4ff3270fa906ace703814847493b1146fd82..1a4acfa6e01be9d3f648d8e2b8e993f35ade88c7 100644 --- a/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_policy.cpp +++ b/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_policy.cpp @@ -11,6 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include @@ -44,32 +45,43 @@ auto CPUVectorPolicyBase::InitState(index_t dim, bool zero } template -void CPUVectorPolicyBase::Reset(qs_data_p_t qs, index_t dim) { - THRESHOLD_OMP_FOR( - dim, DimTh, for (omp::idx_t i = 0; i < dim; i++) { qs[i] = 0; }) - qs[0] = 1; +void CPUVectorPolicyBase::Reset(qs_data_p_t* qs_p, index_t dim) { + derived::FreeState(qs_p); } template -void CPUVectorPolicyBase::FreeState(qs_data_p_t qs) { +void CPUVectorPolicyBase::FreeState(qs_data_p_t* qs_p) { + auto& qs = (*qs_p); if (qs != nullptr) { free(qs); + qs = nullptr; } } template -void CPUVectorPolicyBase::Display(qs_data_p_t qs, qbit_t n_qubits, qbit_t q_limit) { +void CPUVectorPolicyBase::Display(const qs_data_p_t& qs, qbit_t n_qubits, qbit_t q_limit) { if (n_qubits > q_limit) { n_qubits = q_limit; } std::cout << n_qubits << " qubits cpu simulator (little endian)." << std::endl; - for (index_t i = 0; i < (1UL << n_qubits); i++) { - std::cout << "(" << qs[i].real() << ", " << qs[i].imag() << ")" << std::endl; + if (qs == nullptr) { + std::cout << "(" << 1 << ", " << 0 << ")" << std::endl; + for (index_t i = 0; i < (1UL << n_qubits) - 1; i++) { + std::cout << "(" << 0 << ", " << 0 << ")" << std::endl; + } + } else { + for (index_t i = 0; i < (1UL << n_qubits); i++) { + std::cout << "(" << qs[i].real() << ", " << qs[i].imag() << ")" << std::endl; + } } } template -void CPUVectorPolicyBase::SetToZeroExcept(qs_data_p_t qs, index_t ctrl_mask, index_t dim) { +void CPUVectorPolicyBase::SetToZeroExcept(qs_data_p_t* qs_p, index_t ctrl_mask, index_t dim) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } THRESHOLD_OMP_FOR( dim, DimTh, for (omp::idx_t i = 0; i < dim; i++) { if ((i & ctrl_mask) != ctrl_mask) { @@ -79,33 +91,49 @@ void CPUVectorPolicyBase::SetToZeroExcept(qs_data_p_t qs, } template -auto CPUVectorPolicyBase::Copy(qs_data_p_t qs, index_t dim) -> qs_data_p_t { - qs_data_p_t out = derived::InitState(dim, false); - THRESHOLD_OMP_FOR( - dim, DimTh, for (omp::idx_t i = 0; i < dim; i++) { out[i] = qs[i]; }) +auto CPUVectorPolicyBase::Copy(const qs_data_p_t& qs, index_t dim) -> qs_data_p_t { + qs_data_p_t out = nullptr; + if (qs != nullptr) { + out = derived::InitState(dim, false); + THRESHOLD_OMP_FOR( + dim, DimTh, for (omp::idx_t i = 0; i < dim; i++) { out[i] = qs[i]; }) + } return out; }; template -auto CPUVectorPolicyBase::GetQS(qs_data_p_t qs, index_t dim) -> VT { +auto CPUVectorPolicyBase::GetQS(const qs_data_p_t& qs, index_t dim) -> VT { VT out(dim); - THRESHOLD_OMP_FOR( - dim, DimTh, for (omp::idx_t i = 0; i < dim; i++) { out[i] = qs[i]; }) + if (qs != nullptr) { + THRESHOLD_OMP_FOR( + dim, DimTh, for (omp::idx_t i = 0; i < dim; i++) { out[i] = qs[i]; }) + } else { + out[0] = 1.0; + } return out; } template -void CPUVectorPolicyBase::SetQS(qs_data_p_t qs, const VT& qs_out, index_t dim) { +void CPUVectorPolicyBase::SetQS(qs_data_p_t* qs_p, const VT& qs_out, index_t dim) { + auto& qs = (*qs_p); if (qs_out.size() != dim) { throw std::invalid_argument("state size not match"); } + if (qs == nullptr) { + qs = derived::InitState(dim, false); + } THRESHOLD_OMP_FOR( dim, DimTh, for (omp::idx_t i = 0; i < dim; i++) { qs[i] = qs_out[i]; }) } template -auto CPUVectorPolicyBase::ApplyTerms(qs_data_p_t qs, const std::vector>& ham, - index_t dim) -> qs_data_p_t { +auto CPUVectorPolicyBase::ApplyTerms(qs_data_p_t* qs_p, + const std::vector>& ham, index_t dim) + -> qs_data_p_t { + auto& qs = (*qs_p); + if (qs == nullptr) { + qs = derived::InitState(dim); + } qs_data_p_t out = derived::InitState(dim, false); for (const auto& [pauli_string, coeff_] : ham) { auto mask = GenPauliMask(pauli_string); @@ -129,6 +157,58 @@ auto CPUVectorPolicyBase::ApplyTerms(qs_data_p_t qs, const return out; }; +template +auto CPUVectorPolicyBase::ExpectationOfTerms(const qs_data_p_t& bra_out, + const qs_data_p_t& ket_out, + const std::vector>& ham, + index_t dim) -> py_qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } + py_qs_data_t out = 0.0; + for (const auto& [pauli_string, coeff_] : ham) { + auto mask = GenPauliMask(pauli_string); + auto mask_f = mask.mask_x | mask.mask_y; + auto coeff = coeff_; + calc_type res_real = 0, res_imag = 0; + // clang-format off + THRESHOLD_OMP( + MQ_DO_PRAGMA(omp parallel for reduction(+:res_real, res_imag) schedule(static)), dim, DimTh, + for (omp::idx_t i = 0; i < dim; i++) { + auto j = (i ^ mask_f); + if (i <= j) { + auto axis2power = CountOne(static_cast(i & mask.mask_z)); // -1 + auto axis3power = CountOne(static_cast(i & mask.mask_y)); // -1j + auto c = ComplexCast::apply( + POLAR[static_cast((mask.num_y + 2 * axis3power + 2 * axis2power) & 3)]); + auto tmp = std::conj(bra[j]) * ket[i] * coeff * c; + if (i != j) { + tmp += std::conj(bra[i]) * ket[j] * coeff / c; + } + res_real += std::real(tmp); + res_imag += std::imag(tmp); + } + }) + // clang-format on + out += py_qs_data_t(res_real, res_imag); + } + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } + return out; +} + template auto CPUVectorPolicyBase::GroundStateOfZZs(const std::map& masks_value, qbit_t n_qubits) -> calc_type { diff --git a/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_rot_pauli.cpp b/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_rot_pauli.cpp index fbbd2e36aed0de99a7c9100ccf643976416ffc69..8872af0b67e79ee0220b50168d024a306067ea0d 100644 --- a/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_rot_pauli.cpp +++ b/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_rot_pauli.cpp @@ -26,8 +26,12 @@ namespace mindquantum::sim::vector::detail { constexpr int ROT_PAULI_FACTOR = 2; template -void CPUVectorPolicyBase::ApplyRxx(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyRxx(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } DoubleQubitGateMask mask(objs, ctrls); auto c = static_cast(std::cos(val / ROT_PAULI_FACTOR)); auto s = static_cast(std::sin(val / ROT_PAULI_FACTOR)) * IMAGE_MI; @@ -74,14 +78,18 @@ void CPUVectorPolicyBase::ApplyRxx(qs_data_p_t qs, const q } }) if (diff) { - derived::SetToZeroExcept(qs, mask.ctrl_mask, dim); + derived::SetToZeroExcept(qs_p, mask.ctrl_mask, dim); } } } template -void CPUVectorPolicyBase::ApplyRxy(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyRxy(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } DoubleQubitGateMask mask(objs, ctrls); auto c = static_cast(std::cos(val / ROT_PAULI_FACTOR)); auto s = static_cast(std::sin(val / ROT_PAULI_FACTOR)); @@ -128,14 +136,18 @@ void CPUVectorPolicyBase::ApplyRxy(qs_data_p_t qs, const q } }) if (diff) { - derived::SetToZeroExcept(qs, mask.ctrl_mask, dim); + derived::SetToZeroExcept(qs_p, mask.ctrl_mask, dim); } } } template -void CPUVectorPolicyBase::ApplyRxz(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyRxz(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } DoubleQubitGateMask mask(objs, ctrls); auto c = static_cast(std::cos(val / ROT_PAULI_FACTOR)); auto s = static_cast(std::sin(val / ROT_PAULI_FACTOR)) * IMAGE_MI; @@ -182,14 +194,18 @@ void CPUVectorPolicyBase::ApplyRxz(qs_data_p_t qs, const q } }) if (diff) { - derived::SetToZeroExcept(qs, mask.ctrl_mask, dim); + derived::SetToZeroExcept(qs_p, mask.ctrl_mask, dim); } } } template -void CPUVectorPolicyBase::ApplyRyz(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyRyz(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } DoubleQubitGateMask mask(objs, ctrls); auto c = static_cast(std::cos(val / ROT_PAULI_FACTOR)); auto s = static_cast(std::sin(val / ROT_PAULI_FACTOR)); @@ -236,14 +252,18 @@ void CPUVectorPolicyBase::ApplyRyz(qs_data_p_t qs, const q } }) if (diff) { - derived::SetToZeroExcept(qs, mask.ctrl_mask, dim); + derived::SetToZeroExcept(qs_p, mask.ctrl_mask, dim); } } } template -void CPUVectorPolicyBase::ApplyRyy(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyRyy(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } DoubleQubitGateMask mask(objs, ctrls); auto c = static_cast(std::cos(val / ROT_PAULI_FACTOR)); auto s = static_cast(std::sin(val / ROT_PAULI_FACTOR)) * IMAGE_I; @@ -290,14 +310,18 @@ void CPUVectorPolicyBase::ApplyRyy(qs_data_p_t qs, const q } }) if (diff) { - derived::SetToZeroExcept(qs, mask.ctrl_mask, dim); + derived::SetToZeroExcept(qs_p, mask.ctrl_mask, dim); } } } template -void CPUVectorPolicyBase::ApplyRzz(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyRzz(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } DoubleQubitGateMask mask(objs, ctrls); auto c = static_cast(std::cos(val / ROT_PAULI_FACTOR)); auto s = static_cast(std::sin(val / ROT_PAULI_FACTOR)); @@ -338,13 +362,13 @@ void CPUVectorPolicyBase::ApplyRzz(qs_data_p_t qs, const q } }) if (diff) { - derived::SetToZeroExcept(qs, mask.ctrl_mask, dim); + derived::SetToZeroExcept(qs_p, mask.ctrl_mask, dim); } } } template -void CPUVectorPolicyBase::ApplyRX(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyRX(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { SingleQubitGateMask mask(objs, ctrls); auto a = std::cos(val / ROT_PAULI_FACTOR); @@ -354,14 +378,14 @@ void CPUVectorPolicyBase::ApplyRX(qs_data_p_t qs, const qb b = -std::cos(val / ROT_PAULI_FACTOR) / ROT_PAULI_FACTOR; } std::vector> m{{{a, 0}, {0, b}}, {{0, b}, {a, 0}}}; - derived::ApplySingleQubitMatrix(qs, qs, objs[0], ctrls, m, dim); + derived::ApplySingleQubitMatrix(*qs_p, qs_p, objs[0], ctrls, m, dim); if (diff && mask.ctrl_mask) { - derived::SetToZeroExcept(qs, mask.ctrl_mask, dim); + derived::SetToZeroExcept(qs_p, mask.ctrl_mask, dim); } } template -void CPUVectorPolicyBase::ApplyRY(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyRY(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { SingleQubitGateMask mask(objs, ctrls); auto a = std::cos(val / ROT_PAULI_FACTOR); @@ -371,14 +395,14 @@ void CPUVectorPolicyBase::ApplyRY(qs_data_p_t qs, const qb b = std::cos(val / ROT_PAULI_FACTOR) / ROT_PAULI_FACTOR; } std::vector> m{{{a, 0}, {-b, 0}}, {{b, 0}, {a, 0}}}; - derived::ApplySingleQubitMatrix(qs, qs, objs[0], ctrls, m, dim); + derived::ApplySingleQubitMatrix(*qs_p, qs_p, objs[0], ctrls, m, dim); if (diff && mask.ctrl_mask) { - derived::SetToZeroExcept(qs, mask.ctrl_mask, dim); + derived::SetToZeroExcept(qs_p, mask.ctrl_mask, dim); } } template -void CPUVectorPolicyBase::ApplyRZ(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyRZ(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { SingleQubitGateMask mask(objs, ctrls); auto a = std::cos(val / ROT_PAULI_FACTOR); @@ -388,9 +412,9 @@ void CPUVectorPolicyBase::ApplyRZ(qs_data_p_t qs, const qb b = std::cos(val / ROT_PAULI_FACTOR) / ROT_PAULI_FACTOR; } std::vector> m{{{a, -b}, {0, 0}}, {{0, 0}, {a, b}}}; - derived::ApplySingleQubitMatrix(qs, qs, objs[0], ctrls, m, dim); + derived::ApplySingleQubitMatrix(*qs_p, qs_p, objs[0], ctrls, m, dim); if (diff && mask.ctrl_mask) { - derived::SetToZeroExcept(qs, mask.ctrl_mask, dim); + derived::SetToZeroExcept(qs_p, mask.ctrl_mask, dim); } } diff --git a/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_swap_like.cpp b/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_swap_like.cpp index 33c0b7d2f8d5d993e0baecff1b2eb3fccf9dce73..950b3ac74d990b0cc45248c124412a49b843bd9c 100644 --- a/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_swap_like.cpp +++ b/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_swap_like.cpp @@ -25,8 +25,12 @@ #include "simulator/vector/detail/cpu_vector_policy.hpp" namespace mindquantum::sim::vector::detail { template -void CPUVectorPolicyBase::ApplySWAP(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplySWAP(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } DoubleQubitGateMask mask(objs, ctrls); if (!mask.ctrl_mask) { THRESHOLD_OMP_FOR( @@ -58,8 +62,12 @@ void CPUVectorPolicyBase::ApplySWAP(qs_data_p_t qs, const } template -void CPUVectorPolicyBase::ApplyISWAP(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyISWAP(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, bool daggered, index_t dim) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } DoubleQubitGateMask mask(objs, ctrls); calc_type frac = 1.0; if (daggered) { diff --git a/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_x_like.cpp b/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_x_like.cpp index ff70445274044f8f6c2795492689118946c20f83..b3f0445a215035bc6b85a1a24df060149a7e8907 100644 --- a/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_x_like.cpp +++ b/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_x_like.cpp @@ -25,8 +25,12 @@ #include "simulator/vector/detail/cpu_vector_policy.hpp" namespace mindquantum::sim::vector::detail { template -void CPUVectorPolicyBase::ApplyXLike(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyXLike(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, qs_data_t v1, qs_data_t v2, index_t dim) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } SingleQubitGateMask mask(objs, ctrls); if (!mask.ctrl_mask) { THRESHOLD_OMP_FOR( @@ -52,15 +56,15 @@ void CPUVectorPolicyBase::ApplyXLike(qs_data_p_t qs, const } template -void CPUVectorPolicyBase::ApplyX(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyX(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim) { - derived::ApplyXLike(qs, objs, ctrls, 1, 1, dim); + derived::ApplyXLike(qs_p, objs, ctrls, 1, 1, dim); } template -void CPUVectorPolicyBase::ApplyY(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyY(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim) { - derived::ApplyXLike(qs, objs, ctrls, IMAGE_MI, IMAGE_I, dim); + derived::ApplyXLike(qs_p, objs, ctrls, IMAGE_MI, IMAGE_I, dim); } #ifdef __x86_64__ template struct CPUVectorPolicyBase; diff --git a/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_z_like.cpp b/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_z_like.cpp index f6f4bb183abe6861048c1d8da80c5d1574471f41..47dfffc722eaffe5ea75639828f8f9b6e3ed95ba 100644 --- a/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_z_like.cpp +++ b/ccsrc/lib/simulator/vector/detail/cpu_common/cpu_vector_core_z_like.cpp @@ -25,8 +25,12 @@ #include "simulator/vector/detail/cpu_vector_policy.hpp" namespace mindquantum::sim::vector::detail { template -void CPUVectorPolicyBase::ApplyZLike(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyZLike(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, qs_data_t val, index_t dim) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } SingleQubitGateMask mask(objs, ctrls); if (!mask.ctrl_mask) { THRESHOLD_OMP_FOR( @@ -46,41 +50,45 @@ void CPUVectorPolicyBase::ApplyZLike(qs_data_p_t qs, const } template -void CPUVectorPolicyBase::ApplyZ(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyZ(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim) { - derived::ApplyZLike(qs, objs, ctrls, -1, dim); + derived::ApplyZLike(qs_p, objs, ctrls, -1, dim); } template -void CPUVectorPolicyBase::ApplySGate(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplySGate(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim) { - derived::ApplyZLike(qs, objs, ctrls, qs_data_t(0, 1), dim); + derived::ApplyZLike(qs_p, objs, ctrls, qs_data_t(0, 1), dim); } template -void CPUVectorPolicyBase::ApplySdag(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplySdag(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim) { - derived::ApplyZLike(qs, objs, ctrls, qs_data_t(0, -1), dim); + derived::ApplyZLike(qs_p, objs, ctrls, qs_data_t(0, -1), dim); } template -void CPUVectorPolicyBase::ApplyT(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyT(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim) { - derived::ApplyZLike(qs, objs, ctrls, qs_data_t(1, 1) / static_cast(std::sqrt(2.0)), dim); + derived::ApplyZLike(qs_p, objs, ctrls, qs_data_t(1, 1) / static_cast(std::sqrt(2.0)), dim); } template -void CPUVectorPolicyBase::ApplyTdag(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyTdag(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim) { - derived::ApplyZLike(qs, objs, ctrls, qs_data_t(1, -1) / static_cast(std::sqrt(2.0)), dim); + derived::ApplyZLike(qs_p, objs, ctrls, qs_data_t(1, -1) / static_cast(std::sqrt(2.0)), dim); } template -void CPUVectorPolicyBase::ApplyPS(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void CPUVectorPolicyBase::ApplyPS(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { if (!diff) { - derived::ApplyZLike(qs, objs, ctrls, qs_data_t(std::cos(val), std::sin(val)), dim); + derived::ApplyZLike(qs_p, objs, ctrls, qs_data_t(std::cos(val), std::sin(val)), dim); } else { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } SingleQubitGateMask mask(objs, ctrls); auto e = -std::sin(val) + IMAGE_I * std::cos(val); if (!mask.ctrl_mask) { @@ -101,7 +109,7 @@ void CPUVectorPolicyBase::ApplyPS(qs_data_p_t qs, const qb qs[j] *= e; } }) - derived::SetToZeroExcept(qs, mask.ctrl_mask, dim); + derived::SetToZeroExcept(qs_p, mask.ctrl_mask, dim); } } } diff --git a/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_condition.cu b/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_condition.cu index 1259082e111a7b540ea9dde974fca129d3d72c86..3371abbefbdb505f0273cdf36fe34ddc3b7fdefd 100644 --- a/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_condition.cu +++ b/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_condition.cu @@ -27,73 +27,103 @@ namespace mindquantum::sim::vector::detail { template template -void GPUVectorPolicyBase::ConditionalBinary(qs_data_p_t src, qs_data_p_t des, +void GPUVectorPolicyBase::ConditionalBinary(const qs_data_p_t& src, qs_data_p_t* des_p, qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim, const binary_op& op) { + auto& des = *des_p; + if (des == nullptr) { + des = derived::InitState(dim); + } thrust::counting_iterator i(0); - thrust::for_each(i, i + dim, [=] __device__(size_t i) { - if ((i & mask) == condi) { - des[i] = op(src[i], succ_coeff); - } else { - des[i] = op(src[i], fail_coeff); - } - }); + if (src == nullptr) { + thrust::for_each(i, i + 1, [=] __device__(size_t i) { + if ((i & mask) == condi) { + des[i] = op(1.0, succ_coeff); + } else { + des[i] = op(1.0, fail_coeff); + } + }); + } else { + thrust::for_each(i, i + dim, [=] __device__(size_t i) { + if ((i & mask) == condi) { + des[i] = op(src[i], succ_coeff); + } else { + des[i] = op(src[i], fail_coeff); + } + }); + } } template template -void GPUVectorPolicyBase::ConditionalBinary(qs_data_p_t src, qs_data_p_t des, index_t mask, - index_t condi, qs_data_t succ_coeff, +void GPUVectorPolicyBase::ConditionalBinary(const qs_data_p_t& src, qs_data_p_t* des_p, + index_t mask, index_t condi, qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim, const binary_op& op) { + auto& des = *des_p; + if (des == nullptr) { + des = derived::InitState(dim); + } thrust::counting_iterator i(0); - thrust::for_each(i, i + dim, [=] __device__(size_t i) { - if ((i & mask) == condi) { - des[i] = op(src[i], succ_coeff); - } else { - des[i] = op(src[i], fail_coeff); - } - }); + if (src == nullptr) { + thrust::for_each(i, i + 1, [=] __device__(size_t i) { + if ((i & mask) == condi) { + des[i] = op(1.0, succ_coeff); + } else { + des[i] = op(1.0, fail_coeff); + } + }); + } else { + thrust::for_each(i, i + dim, [=] __device__(size_t i) { + if ((i & mask) == condi) { + des[i] = op(src[i], succ_coeff); + } else { + des[i] = op(src[i], fail_coeff); + } + }); + } } template -void GPUVectorPolicyBase::ConditionalAdd(qs_data_p_t src, qs_data_p_t des, index_t mask, +void GPUVectorPolicyBase::ConditionalAdd(const qs_data_p_t& src, qs_data_p_t* des_p, index_t mask, index_t condi, qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim) { - derived::template ConditionalBinary(src, des, mask, condi, succ_coeff, fail_coeff, dim, thrust::plus()); + derived::template ConditionalBinary(src, des_p, mask, condi, succ_coeff, fail_coeff, dim, + thrust::plus()); } template -void GPUVectorPolicyBase::ConditionalMinus(qs_data_p_t src, qs_data_p_t des, index_t mask, - index_t condi, qs_data_t succ_coeff, +void GPUVectorPolicyBase::ConditionalMinus(const qs_data_p_t& src, qs_data_p_t* des_p, + index_t mask, index_t condi, qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim) { - derived::template ConditionalBinary(src, des, mask, condi, succ_coeff, fail_coeff, dim, thrust::minus()); + derived::template ConditionalBinary(src, des_p, mask, condi, succ_coeff, fail_coeff, dim, + thrust::minus()); } template -void GPUVectorPolicyBase::ConditionalMul(qs_data_p_t src, qs_data_p_t des, index_t mask, +void GPUVectorPolicyBase::ConditionalMul(const qs_data_p_t& src, qs_data_p_t* des_p, index_t mask, index_t condi, qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim) { - derived::template ConditionalBinary(src, des, mask, condi, succ_coeff, fail_coeff, dim, + derived::template ConditionalBinary(src, des_p, mask, condi, succ_coeff, fail_coeff, dim, thrust::multiplies()); } template -void GPUVectorPolicyBase::ConditionalDiv(qs_data_p_t src, qs_data_p_t des, index_t mask, +void GPUVectorPolicyBase::ConditionalDiv(const qs_data_p_t& src, qs_data_p_t* des_p, index_t mask, index_t condi, qs_data_t succ_coeff, qs_data_t fail_coeff, index_t dim) { - derived::template ConditionalBinary(src, des, mask, condi, succ_coeff, fail_coeff, dim, + derived::template ConditionalBinary(src, des_p, mask, condi, succ_coeff, fail_coeff, dim, thrust::divides()); } template -void GPUVectorPolicyBase::QSMulValue(qs_data_p_t src, qs_data_p_t des, qs_data_t value, +void GPUVectorPolicyBase::QSMulValue(const qs_data_p_t& src, qs_data_p_t* des_p, qs_data_t value, index_t dim) { - derived::template ConditionalBinary<0, 0>(src, des, value, 0, dim, thrust::multiplies()); + derived::template ConditionalBinary<0, 0>(src, des_p, value, 0, dim, thrust::multiplies()); } template -auto GPUVectorPolicyBase::ConditionalCollect(qs_data_p_t qs, index_t mask, index_t condi, +auto GPUVectorPolicyBase::ConditionalCollect(const qs_data_p_t& qs, index_t mask, index_t condi, bool abs, index_t dim) -> qs_data_t { qs_data_t res = 0; thrust::counting_iterator l(0); diff --git a/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_dot_like.cu b/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_dot_like.cu index 074d6368223682037d7cba0ff4e21a96b905f0d6..c30817f7c40ea7b441a3f1a894e5caeed1b09ee8 100644 --- a/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_dot_like.cu +++ b/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_dot_like.cu @@ -26,7 +26,19 @@ namespace mindquantum::sim::vector::detail { template -auto GPUVectorPolicyBase::Vdot(qs_data_p_t bra, qs_data_p_t ket, index_t dim) -> py_qs_data_t { +auto GPUVectorPolicyBase::Vdot(const qs_data_p_t& bra, const qs_data_p_t& ket, index_t dim) + -> py_qs_data_t { + if (bra == nullptr && ket == nullptr) { + return 1.0; + } else if (bra == nullptr) { + py_qs_data_t out; + cudaMemcpy(&out, ket, sizeof(qs_data_t), cudaMemcpyDeviceToHost); + return out; + } else if (ket == nullptr) { + py_qs_data_t out; + cudaMemcpy(&out, bra, sizeof(qs_data_t), cudaMemcpyDeviceToHost); + return std::conj(out); + } thrust::device_ptr dev_bra(bra); thrust::device_ptr dev_ket(ket); qs_data_t res = thrust::inner_product(dev_bra, dev_bra + dim, dev_ket, qs_data_t(0, 0), thrust::plus(), @@ -36,8 +48,28 @@ auto GPUVectorPolicyBase::Vdot(qs_data_p_t bra, qs_data_p_ template template -auto GPUVectorPolicyBase::ConditionVdot(qs_data_p_t bra, qs_data_p_t ket, index_t dim) - -> py_qs_data_t { +auto GPUVectorPolicyBase::ConditionVdot(const qs_data_p_t& bra, const qs_data_p_t& ket, + index_t dim) -> py_qs_data_t { + if (bra == nullptr && ket == nullptr) { + if ((0 & mask) == condi) { + return 1.0; + } + return 0.0; + } else if (bra == nullptr) { + if ((0 & mask) == condi) { + py_qs_data_t out; + cudaMemcpy(&out, ket, sizeof(qs_data_t), cudaMemcpyDeviceToHost); + return out; + } + return 0.0; + } else if (ket == nullptr) { + if ((0 & mask) == condi) { + py_qs_data_t out; + cudaMemcpy(&out, bra, sizeof(qs_data_t), cudaMemcpyDeviceToHost); + return std::conj(out); + } + return 0.0; + } thrust::counting_iterator i(0); return thrust::transform_reduce( i, i + dim, @@ -52,8 +84,11 @@ auto GPUVectorPolicyBase::ConditionVdot(qs_data_p_t bra, q } template -auto GPUVectorPolicyBase::OneStateVdot(qs_data_p_t bra, qs_data_p_t ket, qbit_t obj_qubit, - index_t dim) -> py_qs_data_t { +auto GPUVectorPolicyBase::OneStateVdot(const qs_data_p_t& bra, const qs_data_p_t& ket, + qbit_t obj_qubit, index_t dim) -> py_qs_data_t { + if (bra == nullptr || ket == nullptr) { + return 0.0; + } SingleQubitGateMask mask({obj_qubit}, {}); auto obj_high_mask = mask.obj_high_mask; auto obj_low_mask = mask.obj_low_mask; @@ -69,8 +104,19 @@ auto GPUVectorPolicyBase::OneStateVdot(qs_data_p_t bra, qs } template -auto GPUVectorPolicyBase::ZeroStateVdot(qs_data_p_t bra, qs_data_p_t ket, qbit_t obj_qubit, - index_t dim) -> py_qs_data_t { +auto GPUVectorPolicyBase::ZeroStateVdot(const qs_data_p_t& bra, const qs_data_p_t& ket, + qbit_t obj_qubit, index_t dim) -> py_qs_data_t { + if (bra == nullptr && ket == nullptr) { + return 1.0; + } else if (bra == nullptr) { + py_qs_data_t out; + cudaMemcpy(&out, ket, sizeof(qs_data_t), cudaMemcpyDeviceToHost); + return out; + } else if (ket == nullptr) { + py_qs_data_t out; + cudaMemcpy(&out, bra, sizeof(qs_data_t), cudaMemcpyDeviceToHost); + return std::conj(out); + } SingleQubitGateMask mask({obj_qubit}, {}); auto obj_high_mask = mask.obj_high_mask; auto obj_low_mask = mask.obj_low_mask; @@ -86,10 +132,16 @@ auto GPUVectorPolicyBase::ZeroStateVdot(qs_data_p_t bra, q template auto GPUVectorPolicyBase::CsrDotVec(const std::shared_ptr>& a, - qs_data_p_t vec, index_t dim) -> qs_data_p_t { + const qs_data_p_t& vec_out, index_t dim) -> qs_data_p_t { if (dim != a->dim_) { throw std::runtime_error("Sparse hamiltonian size not match with quantum state size."); } + auto vec = vec_out; + bool will_free = false; + if (vec == nullptr) { + vec = derived::InitState(dim); + will_free = true; + } auto host = reinterpret_cast*>(malloc(dim * sizeof(std::complex))); cudaMemcpy(host, vec, sizeof(qs_data_t) * dim, cudaMemcpyDeviceToHost); auto host_res = sparse::Csr_Dot_Vec(a, reinterpret_cast(host)); @@ -102,15 +154,25 @@ auto GPUVectorPolicyBase::CsrDotVec(const std::shared_ptr< if (host_res != nullptr) { free(host_res); } + if (will_free) { + derived::FreeState(&vec); + } return out; } + template auto GPUVectorPolicyBase::CsrDotVec(const std::shared_ptr>& a, const std::shared_ptr>& b, - qs_data_p_t vec, index_t dim) -> qs_data_p_t { + const qs_data_p_t& vec_out, index_t dim) -> qs_data_p_t { if ((dim != a->dim_) || (dim != b->dim_)) { throw std::runtime_error("Sparse hamiltonian size not match with quantum state size."); } + auto vec = vec_out; + bool will_free = false; + if (vec == nullptr) { + vec = derived::InitState(dim); + will_free = true; + } auto host = reinterpret_cast*>(malloc(dim * sizeof(std::complex))); cudaMemcpy(host, vec, sizeof(qs_data_t) * dim, cudaMemcpyDeviceToHost); auto host_res = sparse::Csr_Dot_Vec(a, b, reinterpret_cast(host)); @@ -123,9 +185,88 @@ auto GPUVectorPolicyBase::CsrDotVec(const std::shared_ptr< if (host_res != nullptr) { free(host_res); } + if (will_free) { + derived::FreeState(&vec); + } + return out; +} +template +auto GPUVectorPolicyBase::ExpectationOfCsr( + const std::shared_ptr>& a, const qs_data_p_t& bra_out, const qs_data_p_t& ket_out, + index_t dim) -> py_qs_data_t { + if (dim != a->dim_) { + throw std::runtime_error("Sparse hamiltonian size not match with quantum state size."); + } + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } + auto host_bra = reinterpret_cast*>(malloc(dim * sizeof(std::complex))); + cudaMemcpy(host_bra, bra, sizeof(qs_data_t) * dim, cudaMemcpyDeviceToHost); + auto host_ket = reinterpret_cast*>(malloc(dim * sizeof(std::complex))); + cudaMemcpy(host_bra, ket, sizeof(qs_data_t) * dim, cudaMemcpyDeviceToHost); + auto out = sparse::ExpectationOfCsr(a, reinterpret_cast(host_bra), + reinterpret_cast(host_ket)); + if (host_bra != nullptr) { + free(host_bra); + } + if (host_ket != nullptr) { + free(host_ket); + } + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } return out; } +template +auto GPUVectorPolicyBase::ExpectationOfCsr( + const std::shared_ptr>& a, const std::shared_ptr>& b, + const qs_data_p_t& bra_out, const qs_data_p_t& ket_out, index_t dim) -> py_qs_data_t { + if ((dim != a->dim_) || (dim != b->dim_)) { + throw std::runtime_error("Sparse hamiltonian size not match with quantum state size."); + } + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } + auto host_bra = reinterpret_cast*>(malloc(dim * sizeof(std::complex))); + cudaMemcpy(host_bra, bra, sizeof(qs_data_t) * dim, cudaMemcpyDeviceToHost); + auto host_ket = reinterpret_cast*>(malloc(dim * sizeof(std::complex))); + cudaMemcpy(host_bra, ket, sizeof(qs_data_t) * dim, cudaMemcpyDeviceToHost); + auto out = sparse::ExpectationOfCsr(a, b, reinterpret_cast(host_bra), + reinterpret_cast(host_ket)); + if (host_bra != nullptr) { + free(host_bra); + } + if (host_ket != nullptr) { + free(host_ket); + } + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } + return out; +} template struct GPUVectorPolicyBase; template struct GPUVectorPolicyBase; diff --git a/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_gate_expect.cu b/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_gate_expect.cu index e9a84fa1b5067c5c9dbef803b7faff64a4560378..5d0ae578e68f3c94b5b41e9ae32a86ca51e66357 100644 --- a/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_gate_expect.cu +++ b/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_gate_expect.cu @@ -26,10 +26,22 @@ namespace mindquantum::sim::vector::detail { template -auto GPUVectorPolicyBase::ExpectDiffNQubitsMatrix(qs_data_p_t bra, qs_data_p_t ket, - const qbits_t& objs, const qbits_t& ctrls, +auto GPUVectorPolicyBase::ExpectDiffNQubitsMatrix(const qs_data_p_t& bra_out, + const qs_data_p_t& ket_out, const qbits_t& objs, + const qbits_t& ctrls, const std::vector& gate, index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } size_t n_qubit = objs.size(); size_t m_dim = (1UL << n_qubit); size_t ctrl_mask = 0; @@ -60,7 +72,7 @@ auto GPUVectorPolicyBase::ExpectDiffNQubitsMatrix(qs_data_ auto device_gate_ptr = thrust::raw_pointer_cast(device_gate.data()); thrust::counting_iterator l(0); - return thrust::transform_reduce( + auto res = thrust::transform_reduce( l, l + dim, [=] __device__(size_t l) { qs_data_t res = 0; @@ -76,12 +88,31 @@ auto GPUVectorPolicyBase::ExpectDiffNQubitsMatrix(qs_data_ return res; }, qs_data_t(0, 0), thrust::plus()); + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } + return res; } template -auto GPUVectorPolicyBase::ExpectDiffTwoQubitsMatrix(qs_data_p_t bra, qs_data_p_t ket, +auto GPUVectorPolicyBase::ExpectDiffTwoQubitsMatrix(const qs_data_p_t& bra_out, + const qs_data_p_t& ket_out, const qbits_t& objs, const qbits_t& ctrls, const std::vector& m, index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } DoubleQubitGateMask mask(objs, ctrls); qs_data_t m00 = m[0][0]; qs_data_t m01 = m[0][1]; @@ -108,12 +139,36 @@ auto GPUVectorPolicyBase::ExpectDiffTwoQubitsMatrix(qs_dat auto obj_min_mask = mask.obj_min_mask; auto obj_max_mask = mask.obj_max_mask; auto ctrl_mask = mask.ctrl_mask; + qs_data_t res = 0.0; if (!mask.ctrl_mask) { - return thrust::transform_reduce( + res = thrust::transform_reduce( + l, l + dim / 4, + [=] __device__(size_t l) { + index_t i; + SHIFT_BIT_TWO(obj_low_mask, obj_rev_low_mask, obj_high_mask, obj_rev_high_mask, l, i); + auto m = i + obj_mask; + auto j = i + obj_min_mask; + auto k = i + obj_max_mask; + auto v00 = m00 * ket[i] + m01 * ket[j] + m02 * ket[k] + m03 * ket[m]; + auto v01 = m10 * ket[i] + m11 * ket[j] + m12 * ket[k] + m13 * ket[m]; + auto v10 = m20 * ket[i] + m21 * ket[j] + m22 * ket[k] + m23 * ket[m]; + auto v11 = m30 * ket[i] + m31 * ket[j] + m32 * ket[k] + m33 * ket[m]; + auto this_res = thrust::conj(bra[i]) * v00; + this_res += thrust::conj(bra[j]) * v01; + this_res += thrust::conj(bra[k]) * v10; + this_res += thrust::conj(bra[m]) * v11; + return this_res; + }, + qs_data_t(0, 0), thrust::plus()); + } else { + res = thrust::transform_reduce( l, l + dim / 4, [=] __device__(size_t l) { index_t i; SHIFT_BIT_TWO(obj_low_mask, obj_rev_low_mask, obj_high_mask, obj_rev_high_mask, l, i); + if ((i & ctrl_mask) != ctrl_mask) { + return qs_data_t(0, 0); + } auto m = i + obj_mask; auto j = i + obj_min_mask; auto k = i + obj_max_mask; @@ -129,35 +184,32 @@ auto GPUVectorPolicyBase::ExpectDiffTwoQubitsMatrix(qs_dat }, qs_data_t(0, 0), thrust::plus()); } - return thrust::transform_reduce( - l, l + dim / 4, - [=] __device__(size_t l) { - index_t i; - SHIFT_BIT_TWO(obj_low_mask, obj_rev_low_mask, obj_high_mask, obj_rev_high_mask, l, i); - if ((i & ctrl_mask) != ctrl_mask) { - return qs_data_t(0, 0); - } - auto m = i + obj_mask; - auto j = i + obj_min_mask; - auto k = i + obj_max_mask; - auto v00 = m00 * ket[i] + m01 * ket[j] + m02 * ket[k] + m03 * ket[m]; - auto v01 = m10 * ket[i] + m11 * ket[j] + m12 * ket[k] + m13 * ket[m]; - auto v10 = m20 * ket[i] + m21 * ket[j] + m22 * ket[k] + m23 * ket[m]; - auto v11 = m30 * ket[i] + m31 * ket[j] + m32 * ket[k] + m33 * ket[m]; - auto this_res = thrust::conj(bra[i]) * v00; - this_res += thrust::conj(bra[j]) * v01; - this_res += thrust::conj(bra[k]) * v10; - this_res += thrust::conj(bra[m]) * v11; - return this_res; - }, - qs_data_t(0, 0), thrust::plus()); + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } + return res; } template -auto GPUVectorPolicyBase::ExpectDiffSingleQubitMatrix(qs_data_p_t bra, qs_data_p_t ket, +auto GPUVectorPolicyBase::ExpectDiffSingleQubitMatrix(const qs_data_p_t& bra_out, + const qs_data_p_t& ket_out, const qbits_t& objs, const qbits_t& ctrls, const std::vector& m, index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } SingleQubitGateMask mask(objs, ctrls); qs_data_t m00 = m[0][0]; qs_data_t m01 = m[0][1]; @@ -168,8 +220,9 @@ auto GPUVectorPolicyBase::ExpectDiffSingleQubitMatrix(qs_d auto obj_low_mask = mask.obj_low_mask; auto obj_mask = mask.obj_mask; auto ctrl_mask = mask.ctrl_mask; + qs_data_t res = 0.0; if (!mask.ctrl_mask) { - return thrust::transform_reduce( + res = thrust::transform_reduce( l, l + dim / 2, [=] __device__(size_t l) { auto i = ((l & obj_high_mask) << 1) + (l & obj_low_mask); @@ -193,7 +246,7 @@ auto GPUVectorPolicyBase::ExpectDiffSingleQubitMatrix(qs_d } auto first_high_mask = ~first_low_mask; auto second_high_mask = ~second_low_mask; - return thrust::transform_reduce( + res = thrust::transform_reduce( l, l + dim / 4, [=] __device__(size_t l) { auto i = ((l & first_high_mask) << 1) + (l & first_low_mask); @@ -205,25 +258,33 @@ auto GPUVectorPolicyBase::ExpectDiffSingleQubitMatrix(qs_d return this_res; }, qs_data_t(0, 0), thrust::plus()); + } else { + res = thrust::transform_reduce( + l, l + dim / 2, + [=] __device__(size_t l) { + auto i = ((l & obj_high_mask) << 1) + (l & obj_low_mask); + if ((i & ctrl_mask) != ctrl_mask) { + return qs_data_t(0, 0); + } + auto j = i + obj_mask; + auto t1 = m00 * ket[i] + m01 * ket[j]; + auto t2 = m10 * ket[i] + m11 * ket[j]; + auto this_res = thrust::conj(bra[i]) * t1 + thrust::conj(bra[j]) * t2; + return this_res; + }, + qs_data_t(0, 0), thrust::plus()); } - return thrust::transform_reduce( - l, l + dim / 2, - [=] __device__(size_t l) { - auto i = ((l & obj_high_mask) << 1) + (l & obj_low_mask); - if ((i & ctrl_mask) != ctrl_mask) { - return qs_data_t(0, 0); - } - auto j = i + obj_mask; - auto t1 = m00 * ket[i] + m01 * ket[j]; - auto t2 = m10 * ket[i] + m11 * ket[j]; - auto this_res = thrust::conj(bra[i]) * t1 + thrust::conj(bra[j]) * t2; - return this_res; - }, - qs_data_t(0, 0), thrust::plus()); + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } + return res; } template -auto GPUVectorPolicyBase::ExpectDiffMatrixGate(qs_data_p_t bra, qs_data_p_t ket, +auto GPUVectorPolicyBase::ExpectDiffMatrixGate(const qs_data_p_t& bra, const qs_data_p_t& ket, const qbits_t& objs, const qbits_t& ctrls, const std::vector& m, index_t dim) -> qs_data_t { @@ -237,9 +298,9 @@ auto GPUVectorPolicyBase::ExpectDiffMatrixGate(qs_data_p_t } template -auto GPUVectorPolicyBase::ExpectDiffRX(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto GPUVectorPolicyBase::ExpectDiffRX(const qs_data_p_t& bra, const qs_data_p_t& ket, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { auto c = static_cast(-0.5 * std::sin(val / 2)); auto is = static_cast(0.5 * std::cos(val / 2)) * qs_data_t(0, -1); std::vector gate = {{c, is}, {is, c}}; @@ -247,9 +308,9 @@ auto GPUVectorPolicyBase::ExpectDiffRX(qs_data_p_t bra, qs } template -auto GPUVectorPolicyBase::ExpectDiffRY(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto GPUVectorPolicyBase::ExpectDiffRY(const qs_data_p_t& bra, const qs_data_p_t& ket, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { SingleQubitGateMask mask(objs, ctrls); auto c = static_cast(-0.5 * std::sin(val / 2)); auto s = static_cast(0.5 * std::cos(val / 2)); @@ -258,9 +319,9 @@ auto GPUVectorPolicyBase::ExpectDiffRY(qs_data_p_t bra, qs } template -auto GPUVectorPolicyBase::ExpectDiffRZ(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto GPUVectorPolicyBase::ExpectDiffRZ(const qs_data_p_t& bra, const qs_data_p_t& ket, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { SingleQubitGateMask mask(objs, ctrls); auto c = static_cast(-0.5 * std::sin(val / 2)); auto s = static_cast(0.5 * std::cos(val / 2)); @@ -271,9 +332,20 @@ auto GPUVectorPolicyBase::ExpectDiffRZ(qs_data_p_t bra, qs } template -auto GPUVectorPolicyBase::ExpectDiffRxx(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto GPUVectorPolicyBase::ExpectDiffRxx(const qs_data_p_t& bra_out, const qs_data_p_t& ket_out, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } DoubleQubitGateMask mask(objs, ctrls); auto c = static_cast(-std::sin(val / 2) / 2); auto s = static_cast(std::cos(val / 2) / 2) * qs_data_t(0, -1); @@ -286,12 +358,36 @@ auto GPUVectorPolicyBase::ExpectDiffRxx(qs_data_p_t bra, q auto obj_min_mask = mask.obj_min_mask; auto obj_max_mask = mask.obj_max_mask; auto ctrl_mask = mask.ctrl_mask; + qs_data_t res = 0.0; if (!mask.ctrl_mask) { - return thrust::transform_reduce( + res = thrust::transform_reduce( + l, l + dim / 4, + [=] __device__(size_t l) { + index_t i; + SHIFT_BIT_TWO(obj_low_mask, obj_rev_low_mask, obj_high_mask, obj_rev_high_mask, l, i); + auto m = i + obj_mask; + auto j = i + obj_min_mask; + auto k = i + obj_max_mask; + auto v00 = c * ket[i] + s * ket[m]; + auto v01 = c * ket[j] + s * ket[k]; + auto v10 = c * ket[k] + s * ket[j]; + auto v11 = c * ket[m] + s * ket[i]; + auto this_res = thrust::conj(bra[i]) * v00; + this_res += thrust::conj(bra[j]) * v01; + this_res += thrust::conj(bra[k]) * v10; + this_res += thrust::conj(bra[m]) * v11; + return this_res; + }, + qs_data_t(0, 0), thrust::plus()); + } else { + res = thrust::transform_reduce( l, l + dim / 4, [=] __device__(size_t l) { index_t i; SHIFT_BIT_TWO(obj_low_mask, obj_rev_low_mask, obj_high_mask, obj_rev_high_mask, l, i); + if ((i & ctrl_mask) != ctrl_mask) { + return qs_data_t(0, 0); + } auto m = i + obj_mask; auto j = i + obj_min_mask; auto k = i + obj_max_mask; @@ -307,34 +403,30 @@ auto GPUVectorPolicyBase::ExpectDiffRxx(qs_data_p_t bra, q }, qs_data_t(0, 0), thrust::plus()); } - return thrust::transform_reduce( - l, l + dim / 4, - [=] __device__(size_t l) { - index_t i; - SHIFT_BIT_TWO(obj_low_mask, obj_rev_low_mask, obj_high_mask, obj_rev_high_mask, l, i); - if ((i & ctrl_mask) != ctrl_mask) { - return qs_data_t(0, 0); - } - auto m = i + obj_mask; - auto j = i + obj_min_mask; - auto k = i + obj_max_mask; - auto v00 = c * ket[i] + s * ket[m]; - auto v01 = c * ket[j] + s * ket[k]; - auto v10 = c * ket[k] + s * ket[j]; - auto v11 = c * ket[m] + s * ket[i]; - auto this_res = thrust::conj(bra[i]) * v00; - this_res += thrust::conj(bra[j]) * v01; - this_res += thrust::conj(bra[k]) * v10; - this_res += thrust::conj(bra[m]) * v11; - return this_res; - }, - qs_data_t(0, 0), thrust::plus()); + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } + return res; } template -auto GPUVectorPolicyBase::ExpectDiffRxy(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto GPUVectorPolicyBase::ExpectDiffRxy(const qs_data_p_t& bra_out, const qs_data_p_t& ket_out, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } DoubleQubitGateMask mask(objs, ctrls); auto c = static_cast(-std::sin(val / 2) / 2); auto s = static_cast(std::cos(val / 2) / 2); @@ -347,8 +439,9 @@ auto GPUVectorPolicyBase::ExpectDiffRxy(qs_data_p_t bra, q auto obj_min_mask = mask.obj_min_mask; auto obj_max_mask = mask.obj_max_mask; auto ctrl_mask = mask.ctrl_mask; + qs_data_t res = 0.0; if (!mask.ctrl_mask) { - return thrust::transform_reduce( + res = thrust::transform_reduce( l, l + dim / 4, [=] __device__(size_t l) { index_t i; @@ -367,35 +460,54 @@ auto GPUVectorPolicyBase::ExpectDiffRxy(qs_data_p_t bra, q return this_res; }, qs_data_t(0, 0), thrust::plus()); + } else { + res = thrust::transform_reduce( + l, l + dim / 4, + [=] __device__(size_t l) { + index_t i; + SHIFT_BIT_TWO(obj_low_mask, obj_rev_low_mask, obj_high_mask, obj_rev_high_mask, l, i); + if ((i & ctrl_mask) != ctrl_mask) { + return qs_data_t(0, 0); + } + auto m = i + obj_mask; + auto j = i + obj_min_mask; + auto k = i + obj_max_mask; + auto v00 = c * ket[i] - s * ket[m]; + auto v01 = c * ket[j] - s * ket[k]; + auto v10 = c * ket[k] + s * ket[j]; + auto v11 = c * ket[m] + s * ket[i]; + auto this_res = thrust::conj(bra[i]) * v00; + this_res += thrust::conj(bra[j]) * v01; + this_res += thrust::conj(bra[k]) * v10; + this_res += thrust::conj(bra[m]) * v11; + return this_res; + }, + qs_data_t(0, 0), thrust::plus()); } - return thrust::transform_reduce( - l, l + dim / 4, - [=] __device__(size_t l) { - index_t i; - SHIFT_BIT_TWO(obj_low_mask, obj_rev_low_mask, obj_high_mask, obj_rev_high_mask, l, i); - if ((i & ctrl_mask) != ctrl_mask) { - return qs_data_t(0, 0); - } - auto m = i + obj_mask; - auto j = i + obj_min_mask; - auto k = i + obj_max_mask; - auto v00 = c * ket[i] - s * ket[m]; - auto v01 = c * ket[j] - s * ket[k]; - auto v10 = c * ket[k] + s * ket[j]; - auto v11 = c * ket[m] + s * ket[i]; - auto this_res = thrust::conj(bra[i]) * v00; - this_res += thrust::conj(bra[j]) * v01; - this_res += thrust::conj(bra[k]) * v10; - this_res += thrust::conj(bra[m]) * v11; - return this_res; - }, - qs_data_t(0, 0), thrust::plus()); + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } + return res; } template -auto GPUVectorPolicyBase::ExpectDiffRxz(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto GPUVectorPolicyBase::ExpectDiffRxz(const qs_data_p_t& bra_out, const qs_data_p_t& ket_out, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } DoubleQubitGateMask mask(objs, ctrls); auto c = static_cast(-std::sin(val / 2) / 2); auto s = static_cast(std::cos(val / 2) / 2) * qs_data_t(0, -1); @@ -408,12 +520,36 @@ auto GPUVectorPolicyBase::ExpectDiffRxz(qs_data_p_t bra, q auto obj_min_mask = mask.obj_min_mask; auto obj_max_mask = mask.obj_max_mask; auto ctrl_mask = mask.ctrl_mask; + qs_data_t res = 0.0; if (!mask.ctrl_mask) { - return thrust::transform_reduce( + res = thrust::transform_reduce( + l, l + dim / 4, + [=] __device__(size_t l) { + index_t i; + SHIFT_BIT_TWO(obj_low_mask, obj_rev_low_mask, obj_high_mask, obj_rev_high_mask, l, i); + auto m = i + obj_mask; + auto j = i + obj_min_mask; + auto k = i + obj_max_mask; + auto v00 = c * ket[i] + s * ket[j]; + auto v01 = c * ket[j] + s * ket[i]; + auto v10 = c * ket[k] - s * ket[m]; + auto v11 = c * ket[m] - s * ket[k]; + auto this_res = thrust::conj(bra[i]) * v00; + this_res += thrust::conj(bra[j]) * v01; + this_res += thrust::conj(bra[k]) * v10; + this_res += thrust::conj(bra[m]) * v11; + return this_res; + }, + qs_data_t(0, 0), thrust::plus()); + } else { + res = thrust::transform_reduce( l, l + dim / 4, [=] __device__(size_t l) { index_t i; SHIFT_BIT_TWO(obj_low_mask, obj_rev_low_mask, obj_high_mask, obj_rev_high_mask, l, i); + if ((i & ctrl_mask) != ctrl_mask) { + return qs_data_t(0, 0); + } auto m = i + obj_mask; auto j = i + obj_min_mask; auto k = i + obj_max_mask; @@ -429,34 +565,30 @@ auto GPUVectorPolicyBase::ExpectDiffRxz(qs_data_p_t bra, q }, qs_data_t(0, 0), thrust::plus()); } - return thrust::transform_reduce( - l, l + dim / 4, - [=] __device__(size_t l) { - index_t i; - SHIFT_BIT_TWO(obj_low_mask, obj_rev_low_mask, obj_high_mask, obj_rev_high_mask, l, i); - if ((i & ctrl_mask) != ctrl_mask) { - return qs_data_t(0, 0); - } - auto m = i + obj_mask; - auto j = i + obj_min_mask; - auto k = i + obj_max_mask; - auto v00 = c * ket[i] + s * ket[j]; - auto v01 = c * ket[j] + s * ket[i]; - auto v10 = c * ket[k] - s * ket[m]; - auto v11 = c * ket[m] - s * ket[k]; - auto this_res = thrust::conj(bra[i]) * v00; - this_res += thrust::conj(bra[j]) * v01; - this_res += thrust::conj(bra[k]) * v10; - this_res += thrust::conj(bra[m]) * v11; - return this_res; - }, - qs_data_t(0, 0), thrust::plus()); + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } + return res; } template -auto GPUVectorPolicyBase::ExpectDiffRyz(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto GPUVectorPolicyBase::ExpectDiffRyz(const qs_data_p_t& bra_out, const qs_data_p_t& ket_out, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } DoubleQubitGateMask mask(objs, ctrls); auto c = static_cast(-std::sin(val / 2) / 2); auto s = static_cast(std::cos(val / 2) / 2); @@ -469,8 +601,9 @@ auto GPUVectorPolicyBase::ExpectDiffRyz(qs_data_p_t bra, q auto obj_min_mask = mask.obj_min_mask; auto obj_max_mask = mask.obj_max_mask; auto ctrl_mask = mask.ctrl_mask; + qs_data_t res = 0.0; if (!mask.ctrl_mask) { - return thrust::transform_reduce( + res = thrust::transform_reduce( l, l + dim / 4, [=] __device__(size_t l) { index_t i; @@ -489,35 +622,54 @@ auto GPUVectorPolicyBase::ExpectDiffRyz(qs_data_p_t bra, q return this_res; }, qs_data_t(0, 0), thrust::plus()); + } else { + res = thrust::transform_reduce( + l, l + dim / 4, + [=] __device__(size_t l) { + index_t i; + SHIFT_BIT_TWO(obj_low_mask, obj_rev_low_mask, obj_high_mask, obj_rev_high_mask, l, i); + if ((i & ctrl_mask) != ctrl_mask) { + return qs_data_t(0, 0); + } + auto m = i + obj_mask; + auto j = i + obj_min_mask; + auto k = i + obj_max_mask; + auto v00 = c * ket[i] - s * ket[j]; + auto v01 = c * ket[j] + s * ket[i]; + auto v10 = c * ket[k] + s * ket[m]; + auto v11 = c * ket[m] - s * ket[k]; + auto this_res = thrust::conj(bra[i]) * v00; + this_res += thrust::conj(bra[j]) * v01; + this_res += thrust::conj(bra[k]) * v10; + this_res += thrust::conj(bra[m]) * v11; + return this_res; + }, + qs_data_t(0, 0), thrust::plus()); } - return thrust::transform_reduce( - l, l + dim / 4, - [=] __device__(size_t l) { - index_t i; - SHIFT_BIT_TWO(obj_low_mask, obj_rev_low_mask, obj_high_mask, obj_rev_high_mask, l, i); - if ((i & ctrl_mask) != ctrl_mask) { - return qs_data_t(0, 0); - } - auto m = i + obj_mask; - auto j = i + obj_min_mask; - auto k = i + obj_max_mask; - auto v00 = c * ket[i] - s * ket[j]; - auto v01 = c * ket[j] + s * ket[i]; - auto v10 = c * ket[k] + s * ket[m]; - auto v11 = c * ket[m] - s * ket[k]; - auto this_res = thrust::conj(bra[i]) * v00; - this_res += thrust::conj(bra[j]) * v01; - this_res += thrust::conj(bra[k]) * v10; - this_res += thrust::conj(bra[m]) * v11; - return this_res; - }, - qs_data_t(0, 0), thrust::plus()); + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } + return res; } template -auto GPUVectorPolicyBase::ExpectDiffRyy(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto GPUVectorPolicyBase::ExpectDiffRyy(const qs_data_p_t& bra_out, const qs_data_p_t& ket_out, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } DoubleQubitGateMask mask(objs, ctrls); auto c = static_cast(-std::sin(val / 2) / 2); auto s = static_cast(std::cos(val / 2) / 2) * qs_data_t(0, 1); @@ -530,12 +682,36 @@ auto GPUVectorPolicyBase::ExpectDiffRyy(qs_data_p_t bra, q auto obj_min_mask = mask.obj_min_mask; auto obj_max_mask = mask.obj_max_mask; auto ctrl_mask = mask.ctrl_mask; + qs_data_t res = 0.0; if (!mask.ctrl_mask) { - return thrust::transform_reduce( + res = thrust::transform_reduce( + l, l + dim / 4, + [=] __device__(size_t l) { + index_t i; + SHIFT_BIT_TWO(obj_low_mask, obj_rev_low_mask, obj_high_mask, obj_rev_high_mask, l, i); + auto m = i + obj_mask; + auto j = i + obj_min_mask; + auto k = i + obj_max_mask; + auto v00 = c * ket[i] + s * ket[m]; + auto v01 = c * ket[j] + s * ket[k]; + auto v10 = c * ket[k] + s * ket[j]; + auto v11 = c * ket[m] + s * ket[i]; + auto this_res = thrust::conj(bra[i]) * v00; + this_res += thrust::conj(bra[j]) * v01; + this_res += thrust::conj(bra[k]) * v10; + this_res += thrust::conj(bra[m]) * v11; + return this_res; + }, + qs_data_t(0, 0), thrust::plus()); + } else { + res = thrust::transform_reduce( l, l + dim / 4, [=] __device__(size_t l) { index_t i; SHIFT_BIT_TWO(obj_low_mask, obj_rev_low_mask, obj_high_mask, obj_rev_high_mask, l, i); + if ((i & ctrl_mask) != ctrl_mask) { + return qs_data_t(0, 0); + } auto m = i + obj_mask; auto j = i + obj_min_mask; auto k = i + obj_max_mask; @@ -551,34 +727,30 @@ auto GPUVectorPolicyBase::ExpectDiffRyy(qs_data_p_t bra, q }, qs_data_t(0, 0), thrust::plus()); } - return thrust::transform_reduce( - l, l + dim / 4, - [=] __device__(size_t l) { - index_t i; - SHIFT_BIT_TWO(obj_low_mask, obj_rev_low_mask, obj_high_mask, obj_rev_high_mask, l, i); - if ((i & ctrl_mask) != ctrl_mask) { - return qs_data_t(0, 0); - } - auto m = i + obj_mask; - auto j = i + obj_min_mask; - auto k = i + obj_max_mask; - auto v00 = c * ket[i] + s * ket[m]; - auto v01 = c * ket[j] + s * ket[k]; - auto v10 = c * ket[k] + s * ket[j]; - auto v11 = c * ket[m] + s * ket[i]; - auto this_res = thrust::conj(bra[i]) * v00; - this_res += thrust::conj(bra[j]) * v01; - this_res += thrust::conj(bra[k]) * v10; - this_res += thrust::conj(bra[m]) * v11; - return this_res; - }, - qs_data_t(0, 0), thrust::plus()); + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } + return res; } template -auto GPUVectorPolicyBase::ExpectDiffRzz(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto GPUVectorPolicyBase::ExpectDiffRzz(const qs_data_p_t& bra_out, const qs_data_p_t& ket_out, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } DoubleQubitGateMask mask(objs, ctrls); auto c = static_cast(-std::sin(val / 2) / 2); auto s = static_cast(std::cos(val / 2) / 2); @@ -593,8 +765,9 @@ auto GPUVectorPolicyBase::ExpectDiffRzz(qs_data_p_t bra, q auto obj_min_mask = mask.obj_min_mask; auto obj_max_mask = mask.obj_max_mask; auto ctrl_mask = mask.ctrl_mask; + qs_data_t res = 0.0; if (!mask.ctrl_mask) { - return thrust::transform_reduce( + res = thrust::transform_reduce( l, l + dim / 4, [=] __device__(size_t l) { index_t i; @@ -609,31 +782,39 @@ auto GPUVectorPolicyBase::ExpectDiffRzz(qs_data_p_t bra, q return this_res; }, qs_data_t(0, 0), thrust::plus()); + } else { + res = thrust::transform_reduce( + l, l + dim / 4, + [=] __device__(size_t l) { + index_t i; + SHIFT_BIT_TWO(obj_low_mask, obj_rev_low_mask, obj_high_mask, obj_rev_high_mask, l, i); + if ((i & ctrl_mask) != ctrl_mask) { + return qs_data_t(0, 0); + } + auto m = i + obj_mask; + auto j = i + obj_min_mask; + auto k = i + obj_max_mask; + auto this_res = thrust::conj(bra[i]) * ket[i] * me; + this_res += thrust::conj(bra[j]) * ket[j] * e; + this_res += thrust::conj(bra[k]) * ket[k] * e; + this_res += thrust::conj(bra[m]) * ket[m] * me; + return this_res; + }, + qs_data_t(0, 0), thrust::plus()); } - return thrust::transform_reduce( - l, l + dim / 4, - [=] __device__(size_t l) { - index_t i; - SHIFT_BIT_TWO(obj_low_mask, obj_rev_low_mask, obj_high_mask, obj_rev_high_mask, l, i); - if ((i & ctrl_mask) != ctrl_mask) { - return qs_data_t(0, 0); - } - auto m = i + obj_mask; - auto j = i + obj_min_mask; - auto k = i + obj_max_mask; - auto this_res = thrust::conj(bra[i]) * ket[i] * me; - this_res += thrust::conj(bra[j]) * ket[j] * e; - this_res += thrust::conj(bra[k]) * ket[k] * e; - this_res += thrust::conj(bra[m]) * ket[m] * me; - return this_res; - }, - qs_data_t(0, 0), thrust::plus()); + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } + return res; } template -auto GPUVectorPolicyBase::ExpectDiffGP(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto GPUVectorPolicyBase::ExpectDiffGP(const qs_data_p_t& bra, const qs_data_p_t& ket, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { SingleQubitGateMask mask(objs, ctrls); auto e = std::complex(0, -1); e *= std::exp(std::complex(0, -val)); @@ -642,9 +823,20 @@ auto GPUVectorPolicyBase::ExpectDiffGP(qs_data_p_t bra, qs } template -auto GPUVectorPolicyBase::ExpectDiffPS(qs_data_p_t bra, qs_data_p_t ket, const qbits_t& objs, - const qbits_t& ctrls, calc_type val, index_t dim) - -> qs_data_t { +auto GPUVectorPolicyBase::ExpectDiffPS(const qs_data_p_t& bra_out, const qs_data_p_t& ket_out, + const qbits_t& objs, const qbits_t& ctrls, calc_type val, + index_t dim) -> qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } SingleQubitGateMask mask(objs, ctrls); auto e = static_cast(std::cos(val)) + qs_data_t(0, 1) * static_cast(std::sin(val)); thrust::counting_iterator l(0); @@ -652,8 +844,9 @@ auto GPUVectorPolicyBase::ExpectDiffPS(qs_data_p_t bra, qs auto obj_low_mask = mask.obj_low_mask; auto obj_mask = mask.obj_mask; auto ctrl_mask = mask.ctrl_mask; + qs_data_t res = 0.0; if (!(mask.ctrl_mask)) { - return thrust::transform_reduce( + res = thrust::transform_reduce( l, l + dim / 2, [=] __device__(size_t l) { auto i = ((l & obj_high_mask) << 1) + (l & obj_low_mask); @@ -675,7 +868,7 @@ auto GPUVectorPolicyBase::ExpectDiffPS(qs_data_p_t bra, qs } auto first_high_mask = ~first_low_mask; auto second_high_mask = ~second_low_mask; - return thrust::transform_reduce( + res = thrust::transform_reduce( l, l + dim / 4, [=] __device__(size_t l) { auto i = ((l & first_high_mask) << 1) + (l & first_low_mask); @@ -685,19 +878,27 @@ auto GPUVectorPolicyBase::ExpectDiffPS(qs_data_p_t bra, qs return this_res; }, qs_data_t(0, 0), thrust::plus()); + } else { + res = thrust::transform_reduce( + l, l + dim / 2, + [=] __device__(size_t l) { + auto i = ((l & obj_high_mask) << 1) + (l & obj_low_mask); + if ((i & ctrl_mask) != ctrl_mask) { + return qs_data_t(0, 0); + } + auto j = i + obj_mask; + auto this_res = thrust::conj(bra[j]) * ket[j] * e; + return this_res; + }, + qs_data_t(0, 0), thrust::plus()); } - return thrust::transform_reduce( - l, l + dim / 2, - [=] __device__(size_t l) { - auto i = ((l & obj_high_mask) << 1) + (l & obj_low_mask); - if ((i & ctrl_mask) != ctrl_mask) { - return qs_data_t(0, 0); - } - auto j = i + obj_mask; - auto this_res = thrust::conj(bra[j]) * ket[j] * e; - return this_res; - }, - qs_data_t(0, 0), thrust::plus()); + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } + return res; } template struct GPUVectorPolicyBase; diff --git a/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_matrix_gate.cu b/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_matrix_gate.cu index a9d1a65cae4097f2b559807662d3877b1a1d9bc0..d537990e52fa0e8efcac0fd068c49274f5f26ec5 100644 --- a/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_matrix_gate.cu +++ b/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_matrix_gate.cu @@ -25,10 +25,22 @@ #include "thrust/inner_product.h" namespace mindquantum::sim::vector::detail { template -void GPUVectorPolicyBase::ApplyNQubitsMatrix(qs_data_p_t src, qs_data_p_t des, +void GPUVectorPolicyBase::ApplyNQubitsMatrix(const qs_data_p_t& src_out, qs_data_p_t* des_p, const qbits_t& objs, const qbits_t& ctrls, const std::vector>& gate, index_t dim) { + auto& des = *des_p; + if (des == nullptr) { + des = derived::InitState(dim); + } + qs_data_p_t src; + bool will_free = false; + if (src_out == nullptr) { + src = derived::InitState(dim); + will_free = true; + } else { + src = src_out; + } size_t n_qubit = objs.size(); size_t m_dim = (1UL << n_qubit); size_t ctrl_mask = 0; @@ -76,13 +88,28 @@ void GPUVectorPolicyBase::ApplyNQubitsMatrix(qs_data_p_t s free(tmp_des); } }); + if (will_free) { + derived::FreeState(&src); + } } template -void GPUVectorPolicyBase::ApplyTwoQubitsMatrix(qs_data_p_t src, qs_data_p_t des, +void GPUVectorPolicyBase::ApplyTwoQubitsMatrix(const qs_data_p_t& src_out, qs_data_p_t* des_p, const qbits_t& objs, const qbits_t& ctrls, const std::vector>& m, index_t dim) { + auto& des = *des_p; + if (des == nullptr) { + des = derived::InitState(dim); + } + qs_data_p_t src; + bool will_free = false; + if (src_out == nullptr) { + src = derived::InitState(dim); + will_free = true; + } else { + src = src_out; + } DoubleQubitGateMask mask(objs, ctrls); qs_data_t m00 = m[0][0]; qs_data_t m01 = m[0][1]; @@ -144,12 +171,27 @@ void GPUVectorPolicyBase::ApplyTwoQubitsMatrix(qs_data_p_t } }); } + if (will_free) { + derived::FreeState(&src); + } } template -void GPUVectorPolicyBase::ApplySingleQubitMatrix(qs_data_p_t src, qs_data_p_t des, +void GPUVectorPolicyBase::ApplySingleQubitMatrix(const qs_data_p_t& src_out, qs_data_p_t* des_p, qbit_t obj_qubit, const qbits_t& ctrls, const std::vector>& m, index_t dim) { + auto& des = (*des_p); + if (des == nullptr) { + des = derived::InitState(dim); + } + qs_data_p_t src; + bool will_free = false; + if (src_out == nullptr) { + src = derived::InitState(dim); + will_free = true; + } else { + src = src_out; + } SingleQubitGateMask mask({obj_qubit}, ctrls); qs_data_t m00 = m[0][0]; qs_data_t m01 = m[0][1]; @@ -181,19 +223,22 @@ void GPUVectorPolicyBase::ApplySingleQubitMatrix(qs_data_p } }); } + if (will_free) { + derived_::FreeState(&src); + } } template -void GPUVectorPolicyBase::ApplyMatrixGate(qs_data_p_t src, qs_data_p_t des, const qbits_t& objs, - const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyMatrixGate(const qs_data_p_t& src, qs_data_p_t* des_p, + const qbits_t& objs, const qbits_t& ctrls, const std::vector>& m, index_t dim) { if (objs.size() == 1) { - derived::ApplySingleQubitMatrix(src, des, objs[0], ctrls, m, dim); + derived::ApplySingleQubitMatrix(src, des_p, objs[0], ctrls, m, dim); } else if (objs.size() == 2) { - derived::ApplyTwoQubitsMatrix(src, des, objs, ctrls, m, dim); + derived::ApplyTwoQubitsMatrix(src, des_p, objs, ctrls, m, dim); } else { - derived::ApplyNQubitsMatrix(src, des, objs, ctrls, m, dim); + derived::ApplyNQubitsMatrix(src, des_p, objs, ctrls, m, dim); } } diff --git a/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_other_gate.cu b/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_other_gate.cu index e398b86d0562d0f13eb0cd947fe6ffc3d8a70004..cd0c758d13573106183f7f92c50f0d6d28b3d839 100644 --- a/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_other_gate.cu +++ b/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_other_gate.cu @@ -26,18 +26,18 @@ namespace mindquantum::sim::vector::detail { template -void GPUVectorPolicyBase::ApplyH(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyH(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim) { std::vector> m{{M_SQRT1_2, M_SQRT1_2}, {M_SQRT1_2, -M_SQRT1_2}}; - derived::ApplySingleQubitMatrix(qs, qs, objs[0], ctrls, m, dim); + derived::ApplySingleQubitMatrix(*qs_p, qs_p, objs[0], ctrls, m, dim); } template -void GPUVectorPolicyBase::ApplyGP(qs_data_p_t qs, qbit_t obj_qubit, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyGP(qs_data_p_t* qs_p, qbit_t obj_qubit, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { auto c = std::exp(std::complex(0, -val)); std::vector> m = {{c, 0}, {0, c}}; - derived::ApplySingleQubitMatrix(qs, qs, obj_qubit, ctrls, m, dim); + derived::ApplySingleQubitMatrix(*qs_p, qs_p, obj_qubit, ctrls, m, dim); } template struct GPUVectorPolicyBase; diff --git a/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_policy.cu b/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_policy.cu index 02e8c70922c693c219990545a7cbd4ea22f2c0d0..cb5925bbc0376cb7ba178c6e60f5bc21adbec523 100644 --- a/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_policy.cu +++ b/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_policy.cu @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include +#include #include @@ -22,15 +23,18 @@ #include "simulator/vector/detail/gpu_vector_float_policy.cuh" #include "simulator/vector/detail/gpu_vector_policy.cuh" #include "thrust/device_ptr.h" +#include "thrust/device_vector.h" #include "thrust/functional.h" #include "thrust/inner_product.h" -#include "thrust/device_vector.h" namespace mindquantum::sim::vector::detail { template auto GPUVectorPolicyBase::InitState(index_t dim, bool zero_state) -> qs_data_p_t { qs_data_p_t qs; - cudaMalloc((void**) &qs, sizeof(qs_data_t) * dim); // NOLINT + auto state = cudaMalloc((void**) &qs, sizeof(qs_data_t) * dim); // NOLINT + if (state != cudaSuccess) { + throw std::runtime_error("GPU out of memory for allocate quantum state."); + } cudaMemset(qs, 0, sizeof(qs_data_t) * dim); if (zero_state) { qs_data_t one = qs_data_t(1.0, 0.0); @@ -40,36 +44,47 @@ auto GPUVectorPolicyBase::InitState(index_t dim, bool zero } template -void GPUVectorPolicyBase::FreeState(qs_data_p_t qs) { +void GPUVectorPolicyBase::FreeState(qs_data_p_t* qs_p) { + auto& qs = (*qs_p); if (qs != nullptr) { cudaFree(qs); + qs = nullptr; } } template -void GPUVectorPolicyBase::Reset(qs_data_p_t qs, index_t dim) { - cudaMemset(qs, 0, sizeof(qs_data_t) * dim); - qs_data_t one(1, 0); - cudaMemcpy(qs, &one, sizeof(qs_data_t), cudaMemcpyHostToDevice); +void GPUVectorPolicyBase::Reset(qs_data_p_t* qs_p, index_t dim) { + derived::FreeState(qs_p); } template -void GPUVectorPolicyBase::Display(qs_data_p_t qs, qbit_t n_qubits, qbit_t q_limit) { +void GPUVectorPolicyBase::Display(const qs_data_p_t& qs, qbit_t n_qubits, qbit_t q_limit) { if (n_qubits > q_limit) { n_qubits = q_limit; } - std::complex* h_qs = reinterpret_cast*>( - malloc((1UL << n_qubits) * sizeof(std::complex))); - cudaMemcpy(h_qs, qs, sizeof(qs_data_t) * (1UL << n_qubits), cudaMemcpyDeviceToHost); - std::cout << n_qubits << " qubits cpu simulator (little endian)." << std::endl; - for (index_t i = 0; i < (1UL << n_qubits); i++) { - std::cout << "(" << h_qs[i].real() << ", " << h_qs[i].imag() << ")" << std::endl; + std::cout << n_qubits << " qubits gpu simulator (little endian)." << std::endl; + if (qs == nullptr) { + std::cout << "(" << 1 << ", " << 0 << ")" << std::endl; + for (index_t i = 0; i < (1UL << n_qubits) - 1; i++) { + std::cout << "(" << 0 << ", " << 0 << ")" << std::endl; + } + } else { + std::complex* h_qs = reinterpret_cast*>( + malloc((1UL << n_qubits) * sizeof(std::complex))); + cudaMemcpy(h_qs, qs, sizeof(qs_data_t) * (1UL << n_qubits), cudaMemcpyDeviceToHost); + for (index_t i = 0; i < (1UL << n_qubits); i++) { + std::cout << "(" << h_qs[i].real() << ", " << h_qs[i].imag() << ")" << std::endl; + } + free(h_qs); } - free(h_qs); } template -void GPUVectorPolicyBase::SetToZeroExcept(qs_data_p_t qs, index_t ctrl_mask, index_t dim) { +void GPUVectorPolicyBase::SetToZeroExcept(qs_data_p_t* qs_p, index_t ctrl_mask, index_t dim) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } thrust::counting_iterator i(0); thrust::for_each(i, i + dim, [=] __device__(index_t i) { if ((i & ctrl_mask) != ctrl_mask) { @@ -79,23 +94,96 @@ void GPUVectorPolicyBase::SetToZeroExcept(qs_data_p_t qs, } template -auto GPUVectorPolicyBase::GetQS(qs_data_p_t qs, index_t dim) -> py_qs_datas_t { +auto GPUVectorPolicyBase::GetQS(const qs_data_p_t& qs, index_t dim) -> py_qs_datas_t { py_qs_datas_t out(dim); - cudaMemcpy(out.data(), qs, sizeof(qs_data_t) * dim, cudaMemcpyDeviceToHost); + if (qs == nullptr) { + out[0] = 1.0; + } else { + cudaMemcpy(out.data(), qs, sizeof(qs_data_t) * dim, cudaMemcpyDeviceToHost); + } return out; } template -void GPUVectorPolicyBase::SetQS(qs_data_p_t qs, const py_qs_datas_t& qs_out, index_t dim) { +void GPUVectorPolicyBase::SetQS(qs_data_p_t* qs_p, const py_qs_datas_t& qs_out, index_t dim) { + auto& qs = (*qs_p); if (qs_out.size() != dim) { throw std::invalid_argument("state size not match"); } + if (qs == nullptr) { + qs = derived::InitState(dim, false); + } cudaMemcpy(qs, qs_out.data(), sizeof(qs_data_t) * dim, cudaMemcpyHostToDevice); } template -auto GPUVectorPolicyBase::ApplyTerms(qs_data_p_t qs, const std::vector>& ham, - index_t dim) -> qs_data_p_t { +auto GPUVectorPolicyBase::ExpectationOfTerms(const qs_data_p_t& bra_out, + const qs_data_p_t& ket_out, + const std::vector>& ham, + index_t dim) -> py_qs_data_t { + auto bra = bra_out; + auto ket = ket_out; + bool will_free_bra = false, will_free_ket = false; + if (bra == nullptr) { + bra = derived::InitState(dim); + will_free_bra = true; + } + if (ket == nullptr) { + ket = derived::InitState(dim); + will_free_ket = true; + } + qs_data_t out = 0.0; + for (const auto& [pauli_string, coeff] : ham) { + auto mask = GenPauliMask(pauli_string); + auto mask_f = mask.mask_x | mask.mask_y; + auto mask_z = mask.mask_z; + auto mask_y = mask.mask_y; + auto num_y = mask.num_y; + auto this_coeff = qs_data_t(coeff); + thrust::counting_iterator i(0); + out += thrust::transform_reduce( + i, i + dim, + [=] __device__(index_t i) { + auto j = (i ^ mask_f); + qs_data_t tmp = 0.0; + if (i <= j) { + auto axis2power = __popcll(i & mask_z); + auto axis3power = __popcll(i & mask_y); + auto idx = (num_y + 2 * axis3power + 2 * axis2power) & 3; + auto c = qs_data_t(1, 0); + if (idx == 1) { + c = qs_data_t(0, 1); + } else if (idx == 2) { + c = qs_data_t(-1, 0); + } else if (idx == 3) { + c = qs_data_t(0, -1); + } + tmp += thrust::conj(bra[j]) * ket[i] * this_coeff * c; + if (i != j) { + tmp += thrust::conj(bra[i]) * ket[j] * this_coeff / c; + } + } + return tmp; + }, + qs_data_t(0, 0), thrust::plus()); + } + if (will_free_bra) { + derived::FreeState(&bra); + } + if (will_free_ket) { + derived::FreeState(&ket); + } + return out; +} + +template +auto GPUVectorPolicyBase::ApplyTerms(qs_data_p_t* qs_p, + const std::vector>& ham, index_t dim) + -> qs_data_p_t { + auto& qs = (*qs_p); + if (qs == nullptr) { + qs = derived::InitState(dim); + } qs_data_p_t out = derived::InitState(dim, false); for (const auto& [pauli_string, coeff] : ham) { auto mask = GenPauliMask(pauli_string); @@ -166,10 +254,15 @@ auto GPUVectorPolicyBase::GroundStateOfZZs(const std::map< } template -auto GPUVectorPolicyBase::Copy(qs_data_p_t qs, index_t dim) -> qs_data_p_t { - qs_data_p_t out; - cudaMalloc((void**) &out, sizeof(qs_data_t) * dim); // NOLINT - cudaMemcpy(out, qs, sizeof(qs_data_t) * dim, cudaMemcpyDeviceToDevice); +auto GPUVectorPolicyBase::Copy(const qs_data_p_t& qs, index_t dim) -> qs_data_p_t { + qs_data_p_t out = nullptr; + if (qs != nullptr) { + auto state = cudaMalloc((void**) &out, sizeof(qs_data_t) * dim); // NOLINT + if (state != cudaSuccess) { + throw std::runtime_error("GPU out of memory for allocate quantum state."); + } + cudaMemcpy(out, qs, sizeof(qs_data_t) * dim, cudaMemcpyDeviceToDevice); + } return out; }; diff --git a/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_rot_pauli.cu b/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_rot_pauli.cu index a348171a74fbfa0313eb143070d43f87bfb1f4b8..5bbcc53b52defd42af04e52852f76e30445d37cb 100644 --- a/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_rot_pauli.cu +++ b/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_rot_pauli.cu @@ -25,7 +25,7 @@ namespace mindquantum::sim::vector::detail { template -void GPUVectorPolicyBase::ApplyRX(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyRX(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { SingleQubitGateMask mask(objs, ctrls); auto a = static_cast(std::cos(val / 2)); @@ -35,14 +35,14 @@ void GPUVectorPolicyBase::ApplyRX(qs_data_p_t qs, const qb b = static_cast(-0.5 * std::cos(val / 2)); } std::vector> m{{{a, 0}, {0, b}}, {{0, b}, {a, 0}}}; - derived::ApplySingleQubitMatrix(qs, qs, objs[0], ctrls, m, dim); + derived::ApplySingleQubitMatrix(*qs_p, qs_p, objs[0], ctrls, m, dim); if (diff && mask.ctrl_mask) { - derived::SetToZeroExcept(qs, mask.ctrl_mask, dim); + derived::SetToZeroExcept(qs_p, mask.ctrl_mask, dim); } } template -void GPUVectorPolicyBase::ApplyRY(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyRY(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { SingleQubitGateMask mask(objs, ctrls); auto a = static_cast(std::cos(val / 2)); @@ -52,14 +52,14 @@ void GPUVectorPolicyBase::ApplyRY(qs_data_p_t qs, const qb b = static_cast(0.5 * std::cos(val / 2)); } std::vector> m{{{a, 0}, {-b, 0}}, {{b, 0}, {a, 0}}}; - derived::ApplySingleQubitMatrix(qs, qs, objs[0], ctrls, m, dim); + derived::ApplySingleQubitMatrix(*qs_p, qs_p, objs[0], ctrls, m, dim); if (diff && mask.ctrl_mask) { - derived::SetToZeroExcept(qs, mask.ctrl_mask, dim); + derived::SetToZeroExcept(qs_p, mask.ctrl_mask, dim); } } template -void GPUVectorPolicyBase::ApplyRZ(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyRZ(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { SingleQubitGateMask mask(objs, ctrls); auto a = static_cast(std::cos(val / 2)); @@ -69,14 +69,18 @@ void GPUVectorPolicyBase::ApplyRZ(qs_data_p_t qs, const qb b = static_cast(0.5 * std::cos(val / 2)); } std::vector> m{{{a, -b}, {0, 0}}, {{0, 0}, {a, b}}}; - derived::ApplySingleQubitMatrix(qs, qs, objs[0], ctrls, m, dim); + derived::ApplySingleQubitMatrix(*qs_p, qs_p, objs[0], ctrls, m, dim); if (diff && mask.ctrl_mask) { - derived::SetToZeroExcept(qs, mask.ctrl_mask, dim); + derived::SetToZeroExcept(qs_p, mask.ctrl_mask, dim); } } template -void GPUVectorPolicyBase::ApplyRzz(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyRzz(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } DoubleQubitGateMask mask(objs, ctrls); thrust::counting_iterator l(0); auto obj_high_mask = mask.obj_high_mask; @@ -122,14 +126,18 @@ void GPUVectorPolicyBase::ApplyRzz(qs_data_p_t qs, const q } }); if (diff) { - derived::SetToZeroExcept(qs, ctrl_mask, dim); + derived::SetToZeroExcept(&qs, ctrl_mask, dim); } } } template -void GPUVectorPolicyBase::ApplyRxx(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyRxx(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } DoubleQubitGateMask mask(objs, ctrls); thrust::counting_iterator l(0); auto obj_high_mask = mask.obj_high_mask; @@ -181,14 +189,18 @@ void GPUVectorPolicyBase::ApplyRxx(qs_data_p_t qs, const q } }); if (diff) { - derived::SetToZeroExcept(qs, ctrl_mask, dim); + derived::SetToZeroExcept(&qs, ctrl_mask, dim); } } } template -void GPUVectorPolicyBase::ApplyRxy(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyRxy(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } DoubleQubitGateMask mask(objs, ctrls); thrust::counting_iterator l(0); auto obj_high_mask = mask.obj_high_mask; @@ -240,14 +252,18 @@ void GPUVectorPolicyBase::ApplyRxy(qs_data_p_t qs, const q } }); if (diff) { - derived::SetToZeroExcept(qs, ctrl_mask, dim); + derived::SetToZeroExcept(&qs, ctrl_mask, dim); } } } template -void GPUVectorPolicyBase::ApplyRxz(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyRxz(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } DoubleQubitGateMask mask(objs, ctrls); thrust::counting_iterator l(0); auto obj_high_mask = mask.obj_high_mask; @@ -299,14 +315,18 @@ void GPUVectorPolicyBase::ApplyRxz(qs_data_p_t qs, const q } }); if (diff) { - derived::SetToZeroExcept(qs, ctrl_mask, dim); + derived::SetToZeroExcept(&qs, ctrl_mask, dim); } } } template -void GPUVectorPolicyBase::ApplyRyz(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyRyz(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } DoubleQubitGateMask mask(objs, ctrls); thrust::counting_iterator l(0); auto obj_high_mask = mask.obj_high_mask; @@ -358,14 +378,18 @@ void GPUVectorPolicyBase::ApplyRyz(qs_data_p_t qs, const q } }); if (diff) { - derived::SetToZeroExcept(qs, ctrl_mask, dim); + derived::SetToZeroExcept(&qs, ctrl_mask, dim); } } } template -void GPUVectorPolicyBase::ApplyRyy(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyRyy(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } DoubleQubitGateMask mask(objs, ctrls); thrust::counting_iterator l(0); auto obj_high_mask = mask.obj_high_mask; @@ -417,7 +441,7 @@ void GPUVectorPolicyBase::ApplyRyy(qs_data_p_t qs, const q } }); if (diff) { - derived::SetToZeroExcept(qs, ctrl_mask, dim); + derived::SetToZeroExcept(&qs, ctrl_mask, dim); } } } diff --git a/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_swap_like.cu b/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_swap_like.cu index 86d3e04be54291d55bc6252f2be1151082f3556c..2ccb969c0f432a096bd39f4fffc43a93bf3b31fc 100644 --- a/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_swap_like.cu +++ b/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_swap_like.cu @@ -26,8 +26,12 @@ namespace mindquantum::sim::vector::detail { template -void GPUVectorPolicyBase::ApplySWAP(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplySWAP(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } DoubleQubitGateMask mask(objs, ctrls); thrust::counting_iterator l(0); auto obj_high_mask = mask.obj_high_mask; @@ -59,8 +63,12 @@ void GPUVectorPolicyBase::ApplySWAP(qs_data_p_t qs, const } template -void GPUVectorPolicyBase::ApplyISWAP(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyISWAP(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, bool daggered, index_t dim) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } DoubleQubitGateMask mask(objs, ctrls); thrust::counting_iterator l(0); auto obj_high_mask = mask.obj_high_mask; diff --git a/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_x_like.cu b/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_x_like.cu index 1e643da2156793c370675eac41d822a860373f02..482f9785e9a80e0767ab057b20faed8b1ed80b06 100644 --- a/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_x_like.cu +++ b/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_x_like.cu @@ -25,8 +25,12 @@ namespace mindquantum::sim::vector::detail { template -void GPUVectorPolicyBase::ApplyXLike(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyXLike(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, qs_data_t v1, qs_data_t v2, index_t dim) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } SingleQubitGateMask mask(objs, ctrls); thrust::counting_iterator l(0); auto obj_high_mask = mask.obj_high_mask; @@ -55,15 +59,15 @@ void GPUVectorPolicyBase::ApplyXLike(qs_data_p_t qs, const } template -void GPUVectorPolicyBase::ApplyX(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyX(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim) { - derived::ApplyXLike(qs, objs, ctrls, 1, 1, dim); + derived::ApplyXLike(qs_p, objs, ctrls, 1, 1, dim); } template -void GPUVectorPolicyBase::ApplyY(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyY(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim) { - derived::ApplyXLike(qs, objs, ctrls, qs_data_t(0, -1), qs_data_t(0, 1), dim); + derived::ApplyXLike(qs_p, objs, ctrls, qs_data_t(0, -1), qs_data_t(0, 1), dim); } template struct GPUVectorPolicyBase; diff --git a/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_z_like.cu b/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_z_like.cu index 2a18a9560bcf02e71ffc04d884ee99dbe8aff879..fd82186b16c7aaead39c91c24634c86ba264c248 100644 --- a/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_z_like.cu +++ b/ccsrc/lib/simulator/vector/detail/gpu/gpu_vector_core_z_like.cu @@ -25,8 +25,12 @@ namespace mindquantum::sim::vector::detail { template -void GPUVectorPolicyBase::ApplyZLike(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyZLike(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, qs_data_t val, index_t dim) { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } SingleQubitGateMask mask(objs, ctrls); thrust::counting_iterator l(0); auto obj_high_mask = mask.obj_high_mask; @@ -49,37 +53,41 @@ void GPUVectorPolicyBase::ApplyZLike(qs_data_p_t qs, const } template -void GPUVectorPolicyBase::ApplyZ(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyZ(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim) { - derived::ApplyZLike(qs, objs, ctrls, -1, dim); + derived::ApplyZLike(qs_p, objs, ctrls, -1, dim); } template -void GPUVectorPolicyBase::ApplySGate(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplySGate(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim) { - derived::ApplyZLike(qs, objs, ctrls, qs_data_t(0, 1), dim); + derived::ApplyZLike(qs_p, objs, ctrls, qs_data_t(0, 1), dim); } template -void GPUVectorPolicyBase::ApplySdag(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplySdag(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim) { - derived::ApplyZLike(qs, objs, ctrls, qs_data_t(0, -1), dim); + derived::ApplyZLike(qs_p, objs, ctrls, qs_data_t(0, -1), dim); } template -void GPUVectorPolicyBase::ApplyT(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyT(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim) { - derived::ApplyZLike(qs, objs, ctrls, qs_data_t(1, 1) / std::sqrt(2.0), dim); + derived::ApplyZLike(qs_p, objs, ctrls, qs_data_t(1, 1) / std::sqrt(2.0), dim); } template -void GPUVectorPolicyBase::ApplyTdag(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyTdag(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, index_t dim) { - derived::ApplyZLike(qs, objs, ctrls, qs_data_t(1, -1) / std::sqrt(2.0), dim); + derived::ApplyZLike(qs_p, objs, ctrls, qs_data_t(1, -1) / std::sqrt(2.0), dim); } template -void GPUVectorPolicyBase::ApplyPS(qs_data_p_t qs, const qbits_t& objs, const qbits_t& ctrls, +void GPUVectorPolicyBase::ApplyPS(qs_data_p_t* qs_p, const qbits_t& objs, const qbits_t& ctrls, calc_type val, index_t dim, bool diff) { if (!diff) { - derived::ApplyZLike(qs, objs, ctrls, qs_data_t(std::cos(val), std::sin(val)), dim); + derived::ApplyZLike(qs_p, objs, ctrls, qs_data_t(std::cos(val), std::sin(val)), dim); } else { + auto& qs = *qs_p; + if (qs == nullptr) { + qs = derived::InitState(dim); + } SingleQubitGateMask mask(objs, ctrls); thrust::counting_iterator l(0); auto obj_high_mask = mask.obj_high_mask; @@ -105,7 +113,7 @@ void GPUVectorPolicyBase::ApplyPS(qs_data_p_t qs, const qb } }); } - derived::SetToZeroExcept(qs, ctrl_mask, dim); + derived::SetToZeroExcept(qs_p, ctrl_mask, dim); } } } diff --git a/ccsrc/python/simulator/include/python/vector/bind_vec_state.h b/ccsrc/python/simulator/include/python/vector/bind_vec_state.h index a94e31ee7331cca667c2832a1309578680e9c7f9..b938a5ebfe045d39f5c9e7dd56f75dc5b03ea3ea 100644 --- a/ccsrc/python/simulator/include/python/vector/bind_vec_state.h +++ b/ccsrc/python/simulator/include/python/vector/bind_vec_state.h @@ -65,12 +65,13 @@ auto BindSim(pybind11::module& module, const std::string_view& name) { // NOLIN .def("get_expectation", pybind11::overload_cast&, const circuit_t&, const circuit_t&, const typename sim_t::derived_t&, const parameter::ParameterResolver&>( - &sim_t::GetExpectation)) + &sim_t::GetExpectation, pybind11::const_)) .def("get_expectation", pybind11::overload_cast&, const circuit_t&, const circuit_t&, - const parameter::ParameterResolver&>(&sim_t::GetExpectation)) - .def("get_expectation", pybind11::overload_cast&, const circuit_t&, - const parameter::ParameterResolver&>(&sim_t::GetExpectation)) + const parameter::ParameterResolver&>(&sim_t::GetExpectation, pybind11::const_)) + .def("get_expectation", + pybind11::overload_cast&, const circuit_t&, + const parameter::ParameterResolver&>(&sim_t::GetExpectation, pybind11::const_)) .def("get_expectation_with_grad_one_one", &sim_t::GetExpectationWithGradOneOne) .def("get_expectation_with_grad_one_multi", &sim_t::GetExpectationWithGradOneMulti) .def("get_expectation_with_grad_multi_multi", &sim_t::GetExpectationWithGradMultiMulti)