From d0d81ac6fce23b366c3beeae6c8e5627fb237232 Mon Sep 17 00:00:00 2001 From: chenyiyuan Date: Sun, 14 Jan 2024 14:35:52 +0000 Subject: [PATCH 1/2] constant pool index verification fix issue: : https://gitee.com/openharmony/arkcompiler_runtime_core/issues/I8W0WQ?from=project-issue Signed-off-by: chenyiyuan Change-Id: I530af48e2d478dc4de0af6f95ed0dab42ac30fd6 --- verifier/tests/BUILD.gn | 1 + verifier/tests/ts/test_literal_array.ts | 6 +- verifier/tests/verify_constant_pool_tests.cpp | 6 +- verifier/verifier.cpp | 217 ++++++++++-------- verifier/verifier.h | 20 +- 5 files changed, 141 insertions(+), 109 deletions(-) diff --git a/verifier/tests/BUILD.gn b/verifier/tests/BUILD.gn index 82fc58b223..97ac64db9c 100755 --- a/verifier/tests/BUILD.gn +++ b/verifier/tests/BUILD.gn @@ -75,6 +75,7 @@ host_unittest_action("VerifierTest") { "utils.cpp", "verify_checksum_test.cpp", "verify_constant_pool_tests.cpp", + "verify_register_index_test.cpp", ] include_dirs = [ "$ark_root/verifier" ] diff --git a/verifier/tests/ts/test_literal_array.ts b/verifier/tests/ts/test_literal_array.ts index 45dd11bfc8..aaa19ef078 100644 --- a/verifier/tests/ts/test_literal_array.ts +++ b/verifier/tests/ts/test_literal_array.ts @@ -20,8 +20,8 @@ class ClassB { this.bProperty = bProp; } - bMethod() { - console.log('Method from ClassB'); + bMethod(): void { + console.log('你好: Method from ClassB'); } } @@ -32,7 +32,7 @@ class ClassA { this.classBInstance = new ClassB('Some value for ClassB'); } - aMethod() { + aMethod(): void { console.log('Method from ClassA'); this.classBInstance.bMethod(); } diff --git a/verifier/tests/verify_constant_pool_tests.cpp b/verifier/tests/verify_constant_pool_tests.cpp index 6dd4d901f7..e14b9e9abf 100644 --- a/verifier/tests/verify_constant_pool_tests.cpp +++ b/verifier/tests/verify_constant_pool_tests.cpp @@ -68,7 +68,7 @@ HWTEST_F(VerifierConstantPool, verifier_constant_pool_002, TestSize.Level1) std::vector buffer(std::istreambuf_iterator(base_file), {}); - std::vector new_method_id = {0xff, 0xff}; + std::vector new_method_id = {0x0c, 0x00}; // The known string id in the abc file std::vector method_id = {0x0e, 0x00}; // The known method id in the abc file for (size_t i = buffer.size() - 1; i >= 0; --i) { @@ -109,7 +109,7 @@ HWTEST_F(VerifierConstantPool, verifier_constant_pool_003, TestSize.Level1) std::vector buffer(std::istreambuf_iterator(base_file), {}); - std::vector new_literal_id = {0xac, 0xfc}; + std::vector new_literal_id = {0x0e, 0x00}; // The known method id in the abc file std::vector literal_id = {0x0f, 0x00}; // The known literal id in the abc file for (size_t i = 0; i < buffer.size(); ++i) { @@ -152,7 +152,7 @@ HWTEST_F(VerifierConstantPool, verifier_constant_pool_004, TestSize.Level1) std::vector buffer(std::istreambuf_iterator(base_file), {}); - std::vector new_string_id = {0xff, 0x00}; + std::vector new_string_id = {0x0f, 0x00}; // The known literal id in the abc file std::vector string_id = {0x0c, 0x00}; // The known string id in the abc file for (size_t i = 0; i < buffer.size(); ++i) { diff --git a/verifier/verifier.cpp b/verifier/verifier.cpp index ecb7e77908..08611075e2 100644 --- a/verifier/verifier.cpp +++ b/verifier/verifier.cpp @@ -39,6 +39,10 @@ bool Verifier::Verify() return false; } + if (!VerifyRegisterIndex()) { + return false; + } + return true; } @@ -47,7 +51,7 @@ void Verifier::CollectIdInfos() if (file_ == nullptr) { return; } - GetMethodIds(); + GetConstantPoolIds(); GetLiteralIds(); CheckConstantPool(verifier::ActionType::COLLECTINFOS); } @@ -69,7 +73,11 @@ bool Verifier::VerifyConstantPool() return false; } - if (!CheckConstantPool(verifier::ActionType::CHECKCONSTPOOL)) { + if (!CheckConstantPoolIndex()) { + return false; + } + + if (!CheckConstantPool(verifier::ActionType::CHECKCONSTPOOLCONTENT)) { return false; } @@ -86,7 +94,8 @@ bool Verifier::VerifyRegisterIndex() return false; } - for (const auto &method_id : method_ids_) { + for (const auto id : all_method_ids_) { + const panda_file::File::EntityId method_id = panda_file::File::EntityId(id); panda_file::MethodDataAccessor method_accessor {*file_, method_id}; if (!method_accessor.GetCodeId().has_value()) { continue; @@ -119,7 +128,7 @@ bool Verifier::VerifyConstantPoolIndex() return false; } - if (!CheckConstantPool(verifier::ActionType::CHECKCONSTPOOLIDX)) { + if (!CheckConstantPoolIndex()) { return false; } @@ -143,16 +152,16 @@ bool Verifier::VerifyConstantPoolContent() return true; } -void Verifier::GetMethodIds() +void Verifier::GetConstantPoolIds() { - if (method_ids_.size() != 0) { + if (method_string_literal_ids_.size() != 0) { return; } auto index_headers = file_->GetIndexHeaders(); - for (const auto &header : index_headers) { - auto method_index = file_->GetMethodIndex(&header); - for (auto method_id : method_index) { - method_ids_.emplace_back(method_id); + for (const auto &index_header : index_headers) { + auto region_indexs = file_->GetMethodIndex(&index_header); + for (auto &index : region_indexs) { + method_string_literal_ids_.push_back(index.GetOffset()); } } } @@ -164,27 +173,19 @@ void Verifier::GetLiteralIds() } const auto literal_arrays = file_->GetLiteralArrays(); for (const auto literal_id : literal_arrays) { - literal_ids_.emplace_back(literal_id); + literal_ids_.push_back(literal_id); } } bool Verifier::CheckConstantPoolActions(const verifier::ActionType type, panda_file::File::EntityId method_id) { switch (type) { - case verifier::ActionType::CHECKCONSTPOOL: { - return CheckConstantPoolIndex(method_id) && CheckConstantPoolMethodContent(method_id); - } - case verifier::ActionType::CHECKCONSTPOOLIDX: { - return CheckConstantPoolIndex(method_id); - } case verifier::ActionType::CHECKCONSTPOOLCONTENT: { return CheckConstantPoolMethodContent(method_id); } case verifier::ActionType::COLLECTINFOS: { - if (std::find(method_ids_.begin(), method_ids_.end(), method_id) == method_ids_.end()) { - method_ids_.emplace_back(method_id); - } - return true; + all_method_ids_.push_back(method_id.GetOffset()); + return CollectIdInInstructions(method_id); } default: { return true; @@ -192,6 +193,49 @@ bool Verifier::CheckConstantPoolActions(const verifier::ActionType type, panda_f } } +bool Verifier::CollectIdInInstructions(const panda_file::File::EntityId &method_id) +{ + panda_file::MethodDataAccessor method_accessor(*file_, method_id); + + ASSERT(method_accessor.GetCodeId().has_value()); + panda_file::CodeDataAccessor code_accessor(*file_, method_accessor.GetCodeId().value()); + const auto ins_size = code_accessor.GetCodeSize(); + const auto ins_arr = code_accessor.GetInstructions(); + + auto bc_ins = BytecodeInstruction(ins_arr); + const auto bc_ins_last = bc_ins.JumpTo(ins_size); + + while (bc_ins.GetAddress() < bc_ins_last.GetAddress()) { + if (!bc_ins.IsPrimaryOpcodeValid()) { + LOG(ERROR, VERIFIER) << "Verify primaryOpcode failed!"; + return false; + } + if (bc_ins.HasFlag(BytecodeInstruction::Flags::LITERALARRAY_ID)) { + // the idx of any instruction with a literal id is 0 except defineclasswithbuffer + size_t idx = 0; + if (bc_ins.GetOpcode() == BytecodeInstruction::Opcode::DEFINECLASSWITHBUFFER_IMM8_ID16_ID16_IMM16_V8 || + bc_ins.GetOpcode() == BytecodeInstruction::Opcode::DEFINECLASSWITHBUFFER_IMM16_ID16_ID16_IMM16_V8) { + idx = 1; + } + const auto arg_literal_idx = bc_ins.GetId(idx).AsIndex(); + const auto literal_id = file_->ResolveMethodIndex(method_id, arg_literal_idx); + ins_literal_ids_.insert(literal_id.GetOffset()); + } + if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) { + const auto arg_method_idx = bc_ins.GetId().AsIndex(); + const auto arg_method_id = file_->ResolveMethodIndex(method_id, arg_method_idx); + ins_method_ids_.insert(arg_method_id.GetOffset()); + } + if (bc_ins.HasFlag(BytecodeInstruction::Flags::STRING_ID)) { + const auto arg_string_idx = bc_ins.GetId().AsIndex(); + const auto string_id = file_->ResolveOffsetByIndex(method_id, arg_string_idx); + ins_string_ids_.insert(string_id.GetOffset()); + } + bc_ins = bc_ins.GetNext(); + } + return true; +} + void Verifier::CollectModuleLiteralId(const panda_file::File::EntityId &field_id) { panda_file::FieldDataAccessor field_accessor(*file_, field_id); @@ -255,13 +299,44 @@ bool Verifier::CheckVRegIdx(const BytecodeInstruction &bc_ins, const size_t coun return true; } -bool Verifier::VerifyMethodId(const BytecodeInstruction &bc_ins, const panda_file::File::EntityId &method_id) +bool Verifier::VerifyMethodId(const uint32_t &method_id) const +{ + auto iter = std::find(method_string_literal_ids_.begin(), method_string_literal_ids_.end(), method_id); + if (iter == method_string_literal_ids_.end() || + (std::find(literal_ids_.begin(), literal_ids_.end(), method_id) != literal_ids_.end()) || + ins_string_ids_.count(method_id)) { + LOG(ERROR, VERIFIER) << "Verify method_id failed. method_id(0x" << std::hex << method_id << ")!"; + return false; + } + return true; +} + +bool Verifier::VerifyLiteralId(const uint32_t &literal_id) const { - const auto arg_method_idx = bc_ins.GetId().AsIndex(); - const auto arg_method_id = file_->ResolveMethodIndex(method_id, arg_method_idx); - auto iter = std::find(method_ids_.begin(), method_ids_.end(), arg_method_id); - if (iter == method_ids_.end()) { - LOG(ERROR, VERIFIER) << "Verify method_id failed. method_id(0x" << std::hex << arg_method_id << ")!"; + auto iter = std::find(literal_ids_.begin(), literal_ids_.end(), literal_id); + if (iter == literal_ids_.end()) { + LOG(ERROR, VERIFIER) << "Verify literal_id failed. literal_id(0x" << std::hex << literal_id << ")!"; + return false; + } + return true; +} + +bool Verifier::VerifyStringId(const uint32_t &string_id) const +{ + + auto iter = std::find(method_string_literal_ids_.begin(), method_string_literal_ids_.end(), string_id); + if (iter == method_string_literal_ids_.end() || + ins_method_ids_.count(string_id) || + (std::find(literal_ids_.begin(), literal_ids_.end(), string_id) != literal_ids_.end())) { + LOG(ERROR, VERIFIER) << "Invalid string_id. string_id(0x" << std::hex << string_id << ")!"; + return false; + } + auto string_data = file_->GetStringData(panda_file::File::EntityId(string_id)); + auto desc = std::string(utf::Mutf8AsCString(string_data.data)); + std::wstring_convert> converter; + std::wstring utf16_desc = converter.from_bytes(desc); + if (string_data.utf16_length != utf16_desc.length()) { + LOG(ERROR, VERIFIER) << "Invalid string_id. string_id(0x" << std::hex << string_id << ")!"; return false; } return true; @@ -299,25 +374,11 @@ std::optional Verifier::GetSlotNumberFromAnnotation(panda_file::Method return slot_number; } -bool Verifier::VerifyLiteralId(const BytecodeInstruction &bc_ins, const panda_file::File::EntityId &method_id, - size_t idx /* = 0 */) -{ - const auto arg_literal_idx = bc_ins.GetId(idx).AsIndex(); - const auto arg_literal_id = file_->ResolveMethodIndex(method_id, arg_literal_idx); - const auto literal_id = panda_file::File::EntityId(arg_literal_id).GetOffset(); - auto iter = std::find(literal_ids_.begin(), literal_ids_.end(), literal_id); - if (iter == literal_ids_.end()) { - LOG(ERROR, VERIFIER) << "Verify literal_id failed. literal_id(0x" << std::hex << arg_literal_id << ")!"; - return false; - } - return true; -} - bool Verifier::VerifyMethodIdInLiteralArray(const uint32_t &id) { - const auto method_id = panda_file::File::EntityId(id); - auto iter = std::find(method_ids_.begin(), method_ids_.end(), method_id); - if (iter == method_ids_.end()) { + const auto method_id = panda_file::File::EntityId(id).GetOffset(); + auto iter = std::find(all_method_ids_.begin(), all_method_ids_.end(), method_id); + if (iter == all_method_ids_.end()) { LOG(ERROR, VERIFIER) << "Invalid method id(0x" << id << ") in literal array"; return false; } @@ -443,25 +504,6 @@ bool Verifier::VerifyLiteralArrays() return true; } -bool Verifier::VerifyStringId(const BytecodeInstruction &bc_ins, const panda_file::File::EntityId &method_id) -{ - const auto arg_string_idx = bc_ins.GetId().AsIndex(); - const auto arg_string_id = file_->ResolveOffsetByIndex(method_id, arg_string_idx); - if (!arg_string_id.IsValid()) { - LOG(ERROR, VERIFIER) << "Invalid string_id. string_id(0x" << std::hex << arg_string_id << ")!"; - return false; - } - auto string_data = file_->GetStringData(arg_string_id); - auto desc = std::string(utf::Mutf8AsCString(string_data.data)); - std::wstring_convert> converter; - std::wstring utf16_desc = converter.from_bytes(desc); - if (string_data.utf16_length != utf16_desc.length()) { - LOG(ERROR, VERIFIER) << "Invalid string_id. string_id(0x" << std::hex << arg_string_id << ")!"; - return false; - } - return true; -} - bool Verifier::IsJumpInstruction(const Opcode &ins_opcode) { bool valid = true; @@ -630,42 +672,29 @@ bool Verifier::CheckConstantPoolMethodContent(const panda_file::File::EntityId & return true; } -bool Verifier::CheckConstantPoolIndex(const panda_file::File::EntityId &method_id) +bool Verifier::CheckConstantPoolIndex() const { - panda_file::MethodDataAccessor method_accessor(*file_, method_id); - - ASSERT(method_accessor.GetCodeId().has_value()); - panda_file::CodeDataAccessor code_accessor(*file_, method_accessor.GetCodeId().value()); - const auto ins_size = code_accessor.GetCodeSize(); - const auto ins_arr = code_accessor.GetInstructions(); - - auto bc_ins = BytecodeInstruction(ins_arr); - const auto bc_ins_last = bc_ins.JumpTo(ins_size); - - while (bc_ins.GetAddress() < bc_ins_last.GetAddress()) { - if (bc_ins.HasFlag(BytecodeInstruction::Flags::LITERALARRAY_ID)) { - // the idx of any instruction with a literal id is 0 except defineclasswithbuffer - size_t idx = 0; - if (bc_ins.GetOpcode() == BytecodeInstruction::Opcode::DEFINECLASSWITHBUFFER_IMM8_ID16_ID16_IMM16_V8 || - bc_ins.GetOpcode() == BytecodeInstruction::Opcode::DEFINECLASSWITHBUFFER_IMM16_ID16_ID16_IMM16_V8) { - idx = 1; - } - if (!VerifyLiteralId(bc_ins, method_id, idx)) { - return false; - } + for(auto &id : ins_method_ids_) { + if (std::find(all_method_ids_.begin(), all_method_ids_.end(), id) == + all_method_ids_.end()) { } - if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) { - if (!VerifyMethodId(bc_ins, method_id)) { - return false; - } + if(!VerifyMethodId(id)) { + return false; } - if (bc_ins.HasFlag(BytecodeInstruction::Flags::STRING_ID)) { - if (!VerifyStringId(bc_ins, method_id)) { - return false; - } + } + + for(auto &id : ins_literal_ids_) { + if(!VerifyLiteralId(id)) { + return false; } - bc_ins = bc_ins.GetNext(); } + + for(auto &id : ins_string_ids_) { + if(!VerifyStringId(id)) { + return false; + } + } + return true; } } // namespace panda::verifier diff --git a/verifier/verifier.h b/verifier/verifier.h index f9162773ca..82e759c6f2 100644 --- a/verifier/verifier.h +++ b/verifier/verifier.h @@ -34,8 +34,6 @@ namespace panda::verifier { using Opcode = BytecodeInstruction::Opcode; enum class ActionType { - CHECKCONSTPOOL, - CHECKCONSTPOOLIDX, CHECKCONSTPOOLCONTENT, COLLECTINFOS, }; @@ -58,25 +56,25 @@ public: std::unordered_map inner_method_map_; private: - void GetMethodIds(); void GetLiteralIds(); + void GetConstantPoolIds(); + bool CollectIdInInstructions(const panda_file::File::EntityId &method_id); void CollectModuleLiteralId(const panda_file::File::EntityId &field_id); bool CheckConstantPool(const verifier::ActionType type); size_t GetVRegCount(const BytecodeInstruction &bc_ins); bool CheckConstantPoolActions(const verifier::ActionType type, panda_file::File::EntityId method_id); - bool VerifyMethodId(const BytecodeInstruction &bc_ins, const panda_file::File::EntityId &method_id); + bool VerifyMethodId(const uint32_t &method_id) const; + bool VerifyLiteralId(const uint32_t &literal_id) const; + bool VerifyStringId(const uint32_t &literal_id) const; bool CheckVRegIdx(const BytecodeInstruction &bc_ins, const size_t count, const uint32_t max_reg_idx); std::optional GetFirstImmFromInstruction(const BytecodeInstruction &bc_ins); std::optional GetSlotNumberFromAnnotation(panda_file::MethodDataAccessor &method_accessor); - bool VerifyLiteralId(const BytecodeInstruction &bc_ins, const panda_file::File::EntityId &method_id, - size_t idx); bool VerifyMethodIdInLiteralArray(const uint32_t &id); bool VerifyStringIdInLiteralArray(const uint32_t &id); bool VerifyLiteralIdInLiteralArray(const uint32_t &id); bool IsModuleLiteralId(const panda_file::File::EntityId &id) const; bool VerifySingleLiteralArray(const panda_file::File::EntityId &literal_id); bool VerifyLiteralArrays(); - bool VerifyStringId(const BytecodeInstruction &bc_ins, const panda_file::File::EntityId &method_id); bool IsJumpInstruction(const Opcode &ins_opcode); bool VerifyJumpInstruction(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last, const BytecodeInstruction &bc_ins_first); @@ -85,10 +83,14 @@ private: bool VerifySlotNumber(panda_file::MethodDataAccessor &method_accessor, const uint32_t &slot_number, const panda_file::File::EntityId &method_id); bool CheckConstantPoolMethodContent(const panda_file::File::EntityId &method_id); - bool CheckConstantPoolIndex(const panda_file::File::EntityId &method_id); + bool CheckConstantPoolIndex() const; std::unique_ptr file_; - std::vector method_ids_; + std::vector method_string_literal_ids_; + std::vector all_method_ids_; + std::unordered_set ins_method_ids_; + std::unordered_set ins_literal_ids_; + std::unordered_set ins_string_ids_; std::unordered_set module_literals_; static constexpr size_t DEFAULT_ARGUMENT_NUMBER = 3; static constexpr uint32_t FILE_CONTENT_OFFSET = 12U; -- Gitee From 410e6c79adf53744729f865fbac7db7ae288f42c Mon Sep 17 00:00:00 2001 From: chenyiyuan Date: Tue, 16 Jan 2024 09:16:03 +0000 Subject: [PATCH 2/2] Automated test script for verifier issue: https://gitee.com/openharmony/arkcompiler_runtime_core/issues/I8W0WQ?from=project-issue Signed-off-by: chenyiyuan Change-Id: I0c655d1766e29e74a9f7e728b20c17aae77bbd42 --- BUILD.gn | 3 + verifier/tests/verify_constant_pool_tests.cpp | 42 ------ verifier/tests/verify_sys_hap_abc.py | 132 ++++++++++++++++++ verifier/verifier.cpp | 89 +++++------- verifier/verifier.h | 2 +- 5 files changed, 172 insertions(+), 96 deletions(-) create mode 100644 verifier/tests/verify_sys_hap_abc.py diff --git a/BUILD.gn b/BUILD.gn index 1c6d17b45a..b2055653bf 100755 --- a/BUILD.gn +++ b/BUILD.gn @@ -66,6 +66,7 @@ group("ark_host_windows_tools_packages") { deps += [ "$ark_root/assembler:ark_asm($build_root/toolchain/mingw:mingw_x86_64)", "$ark_root/disassembler:ark_disasm($build_root/toolchain/mingw:mingw_x86_64)", + "$ark_root/verifier:ark_verifier($build_root/toolchain/mingw:mingw_x86_64)", ] } @@ -84,6 +85,7 @@ group("ark_host_mac_tools_packages") { "$ark_root/libpandabase:libarkbase($build_root/toolchain/mac:clang_arm64)", "$ark_root/libpandafile:libarkfile($build_root/toolchain/mac:clang_arm64)", "$ark_root/libziparchive:libarkziparchive($build_root/toolchain/mac:clang_arm64)", + "$ark_root/verifier:ark_verifier($build_root/toolchain/mac:clang_arm64)", ] } else { deps += [ @@ -92,6 +94,7 @@ group("ark_host_mac_tools_packages") { "$ark_root/libpandabase:libarkbase($build_root/toolchain/mac:clang_x64)", "$ark_root/libpandafile:libarkfile($build_root/toolchain/mac:clang_x64)", "$ark_root/libziparchive:libarkziparchive($build_root/toolchain/mac:clang_x64)", + "$ark_root/verifier:ark_verifier($build_root/toolchain/mac:clang_x64)", ] } } diff --git a/verifier/tests/verify_constant_pool_tests.cpp b/verifier/tests/verify_constant_pool_tests.cpp index e14b9e9abf..f323bf4bbd 100644 --- a/verifier/tests/verify_constant_pool_tests.cpp +++ b/verifier/tests/verify_constant_pool_tests.cpp @@ -291,48 +291,6 @@ HWTEST_F(VerifierConstantPool, verifier_constant_pool_008, TestSize.Level1) } } -/** -* @tc.name: verifier_constant_pool_009 -* @tc.desc: Verify the literal string of the abc file. -* @tc.type: FUNC -* @tc.require: file path and name -*/ -HWTEST_F(VerifierConstantPool, verifier_constant_pool_009, TestSize.Level1) -{ - const std::string base_file_name = GRAPH_TEST_ABC_DIR "test_constant_pool_content.abc"; - { - panda::verifier::Verifier ver {base_file_name}; - ver.CollectIdInfos(); - EXPECT_TRUE(ver.VerifyConstantPoolContent()); - } - std::ifstream base_file(base_file_name, std::ios::binary); - EXPECT_TRUE(base_file.is_open()); - - std::vector buffer(std::istreambuf_iterator(base_file), {}); - - std::vector new_str = {'a', 'b', 'c', 'd'}; - std::vector str = {'a', 'g', 'e'}; // The known string in the literal array - for (size_t i = 0; i < buffer.size(); ++i) { - if (buffer[i] == str[0] && buffer[i + 1] == str[1] && buffer[i + 2] == str[2]) { - buffer[i] = new_str[0]; - buffer[i + 1] = new_str[1]; - buffer[i + 2] = new_str[2]; - buffer[i + 3] = new_str[3]; - break; - } - } - - const std::string target_file_name = GRAPH_TEST_ABC_DIR "verifier_constant_pool_009.abc"; - GenerateModifiedAbc(buffer, target_file_name); - base_file.close(); - - { - panda::verifier::Verifier ver {target_file_name}; - ver.CollectIdInfos(); - EXPECT_FALSE(ver.VerifyConstantPoolContent()); - } -} - /** * @tc.name: verifier_constant_pool_010 * @tc.desc: Verify the literal id in the literal array of the abc file. diff --git a/verifier/tests/verify_sys_hap_abc.py b/verifier/tests/verify_sys_hap_abc.py new file mode 100644 index 0000000000..e8587f303e --- /dev/null +++ b/verifier/tests/verify_sys_hap_abc.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Huawei Device Co., Ltd. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import shutil +import subprocess +import zipfile + + +def parse_args(): + parser = argparse.ArgumentParser(description="Verify abc files in system app.") + parser.add_argument( + "--hap-dir", required=True, help="Path to the HAP files directory.") + parser.add_argument( + "--verifier-dir", required=True, help="Path to the ark_verifier directory.") + return parser.parse_args() + + +def copy_and_rename_hap_files(hap_folder, out_folder): + for file_path in os.listdir(hap_folder): + if file_path.endswith(".hap"): + destination_path = os.path.join(out_folder, file_path.replace(".hap", ".zip")) + shutil.copy(os.path.join(hap_folder, file_path), destination_path) + + +def extract_zip(zip_path, extract_folder): + try: + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + zip_ref.extractall(extract_folder) + except zipfile.BadZipFile as e: + print(f"Error extracting {zip_path}: {e}") + + +def verify_file(file_path, ark_verifier_path): + verification_command = [ark_verifier_path, "--input_file", file_path] + result = subprocess.run(verification_command, capture_output=True, text=True) + status = 'pass' if result.returncode == 0 else 'fail' + print(f"Verifying: {file_path} {status}") + return result.returncode == 0 + + +def process_directory(directory, ark_verifier_path): + total_count = 0 + passed_count = 0 + failed_abc_list = [] + + for root, dirs, files in os.walk(directory): + for file in files: + if not file.endswith(".abc"): + continue + abc_path = os.path.join(root, file) + if verify_file(abc_path, ark_verifier_path): + passed_count += 1 + else: + failed_abc_list.append(os.path.relpath(abc_path, hap_folder)) + total_count += 1 + + return total_count, passed_count, failed_abc_list + +def verify_hap(hap_folder, ark_verifier_path): + failed_abc_list = [] + passed_count = 0 + total_count = 0 + + for file in os.listdir(hap_folder): + if not file.endswith(".zip"): + continue + + zip_path = os.path.join(hap_folder, file) + extract_folder = os.path.join(hap_folder, file.replace(".zip", "")) + extract_zip(zip_path, extract_folder) + + ets_path = os.path.join(extract_folder, "ets") + if not os.path.exists(ets_path): + continue + + modules_abc_path = os.path.join(ets_path, "modules.abc") + if os.path.isfile(modules_abc_path): + if verify_file(modules_abc_path, ark_verifier_path): + passed_count += 1 + else: + failed_abc_list.append(os.path.relpath(modules_abc_path, hap_folder)) + total_count += 1 + else: + total_inc, passed_inc, failed_abc_inc = process_directory(ets_path, ark_verifier_path) + total_count += total_inc + passed_count += passed_inc + failed_abc_list.extend(failed_abc_inc) + + return total_count, passed_count, len(failed_abc_list), failed_abc_list + + +def main(): + args = parse_args() + + hap_folder_path = os.path.abspath(args.hap_dir) + ark_verifier_path = os.path.abspath(os.path.join(args.verifier_dir, "ark_verifier")) + + out_folder = os.path.join(os.path.dirname(__file__), "out") + os.makedirs(out_folder, exist_ok=True) + + copy_and_rename_hap_files(hap_folder_path, out_folder) + + total_count, passed_count, failed_count, failed_abc_list = verify_hap(out_folder, ark_verifier_path) + + print("Summary(abc verification):") + print(f"Total: {total_count}") + print(f"Passed: {passed_count}") + print(f"Failed: {failed_count}") + + if failed_count > 0: + print("\nFailed abc files:") + for failed_abc in failed_abc_list: + print(f" - {failed_abc}") + + +if __name__ == "__main__": + main() diff --git a/verifier/verifier.cpp b/verifier/verifier.cpp index 08611075e2..f5639425d6 100644 --- a/verifier/verifier.cpp +++ b/verifier/verifier.cpp @@ -154,14 +154,14 @@ bool Verifier::VerifyConstantPoolContent() void Verifier::GetConstantPoolIds() { - if (method_string_literal_ids_.size() != 0) { + if (constant_pool_ids_.size() != 0) { return; } auto index_headers = file_->GetIndexHeaders(); for (const auto &index_header : index_headers) { auto region_indexs = file_->GetMethodIndex(&index_header); for (auto &index : region_indexs) { - method_string_literal_ids_.push_back(index.GetOffset()); + constant_pool_ids_.push_back(index.GetOffset()); } } } @@ -196,7 +196,6 @@ bool Verifier::CheckConstantPoolActions(const verifier::ActionType type, panda_f bool Verifier::CollectIdInInstructions(const panda_file::File::EntityId &method_id) { panda_file::MethodDataAccessor method_accessor(*file_, method_id); - ASSERT(method_accessor.GetCodeId().has_value()); panda_file::CodeDataAccessor code_accessor(*file_, method_accessor.GetCodeId().value()); const auto ins_size = code_accessor.GetCodeSize(); @@ -207,7 +206,7 @@ bool Verifier::CollectIdInInstructions(const panda_file::File::EntityId &method_ while (bc_ins.GetAddress() < bc_ins_last.GetAddress()) { if (!bc_ins.IsPrimaryOpcodeValid()) { - LOG(ERROR, VERIFIER) << "Verify primaryOpcode failed!"; + LOG(ERROR, VERIFIER) << "Fail to verify primary opcode!"; return false; } if (bc_ins.HasFlag(BytecodeInstruction::Flags::LITERALARRAY_ID)) { @@ -301,11 +300,11 @@ bool Verifier::CheckVRegIdx(const BytecodeInstruction &bc_ins, const size_t coun bool Verifier::VerifyMethodId(const uint32_t &method_id) const { - auto iter = std::find(method_string_literal_ids_.begin(), method_string_literal_ids_.end(), method_id); - if (iter == method_string_literal_ids_.end() || + auto iter = std::find(constant_pool_ids_.begin(), constant_pool_ids_.end(), method_id); + if (iter == constant_pool_ids_.end() || (std::find(literal_ids_.begin(), literal_ids_.end(), method_id) != literal_ids_.end()) || ins_string_ids_.count(method_id)) { - LOG(ERROR, VERIFIER) << "Verify method_id failed. method_id(0x" << std::hex << method_id << ")!"; + LOG(ERROR, VERIFIER) << "Fail to verify method id. method_id(0x" << std::hex << method_id << ")!"; return false; } return true; @@ -315,7 +314,7 @@ bool Verifier::VerifyLiteralId(const uint32_t &literal_id) const { auto iter = std::find(literal_ids_.begin(), literal_ids_.end(), literal_id); if (iter == literal_ids_.end()) { - LOG(ERROR, VERIFIER) << "Verify literal_id failed. literal_id(0x" << std::hex << literal_id << ")!"; + LOG(ERROR, VERIFIER) << "Fail to verify literal id. literal_id(0x" << std::hex << literal_id << ")!"; return false; } return true; @@ -323,20 +322,11 @@ bool Verifier::VerifyLiteralId(const uint32_t &literal_id) const bool Verifier::VerifyStringId(const uint32_t &string_id) const { - - auto iter = std::find(method_string_literal_ids_.begin(), method_string_literal_ids_.end(), string_id); - if (iter == method_string_literal_ids_.end() || + auto iter = std::find(constant_pool_ids_.begin(), constant_pool_ids_.end(), string_id); + if (iter == constant_pool_ids_.end() || ins_method_ids_.count(string_id) || (std::find(literal_ids_.begin(), literal_ids_.end(), string_id) != literal_ids_.end())) { - LOG(ERROR, VERIFIER) << "Invalid string_id. string_id(0x" << std::hex << string_id << ")!"; - return false; - } - auto string_data = file_->GetStringData(panda_file::File::EntityId(string_id)); - auto desc = std::string(utf::Mutf8AsCString(string_data.data)); - std::wstring_convert> converter; - std::wstring utf16_desc = converter.from_bytes(desc); - if (string_data.utf16_length != utf16_desc.length()) { - LOG(ERROR, VERIFIER) << "Invalid string_id. string_id(0x" << std::hex << string_id << ")!"; + LOG(ERROR, VERIFIER) << "Fail to verify string id. string_id(0x" << std::hex << string_id << ")!"; return false; } return true; @@ -388,6 +378,10 @@ bool Verifier::VerifyMethodIdInLiteralArray(const uint32_t &id) bool Verifier::VerifyStringIdInLiteralArray(const uint32_t &id) { auto string_data = file_->GetStringData(panda_file::File::EntityId(id)); + if (string_data.data == nullptr) { + LOG(ERROR, VERIFIER) << "Invalid string_id. string_id(0x" << std::hex << id << ")!"; + return false; + } auto desc = std::string(utf::Mutf8AsCString(string_data.data)); std::wstring_convert> converter; std::wstring utf16_desc = converter.from_bytes(desc); @@ -457,10 +451,7 @@ bool Verifier::VerifySingleLiteralArray(const panda_file::File::EntityId &litera break; } case panda_file::LiteralTag::STRING: { - const auto value = static_cast(panda_file::helpers::Read(&sp)); - if (!VerifyStringIdInLiteralArray(value)) { - return false; - } + panda_file::helpers::Read(&sp); break; } case panda_file::LiteralTag::METHOD: { @@ -564,7 +555,7 @@ bool Verifier::VerifyJumpInstruction(const BytecodeInstruction &bc_ins, const By // update maximum backward offset const auto bc_ins_backward_size = bc_ins.GetAddress() - bc_ins_first.GetAddress(); if (!bc_ins.IsPrimaryOpcodeValid()) { - LOG(ERROR, VERIFIER) << "Verify primaryOpcode failed!"; + LOG(ERROR, VERIFIER) << "Fail to verify primary opcode!"; return false; } @@ -572,22 +563,16 @@ bool Verifier::VerifyJumpInstruction(const BytecodeInstruction &bc_ins, const By if (IsJumpInstruction(ins_opcode)) { std::optional immdata = GetFirstImmFromInstruction(bc_ins); if (!immdata.has_value()) { - LOG(ERROR, VERIFIER) << "Get immediate data failed!"; + LOG(ERROR, VERIFIER) << "Fail to get immediate data!"; return false; } - - if (immdata.value() > 0) { - // immdata -1,excluding instruction location offset - if (bc_ins_forward_size - 1 <= immdata.value()) { - LOG(ERROR, VERIFIER) << "Jump forward out of boundary"; - return false; - } - } else if (immdata.value() < 0) { - // immdata -1,excluding instruction location offset - if ((bc_ins_backward_size - 1) + immdata.value() < 0) { - LOG(ERROR, VERIFIER) << "Jump backward out of boundary"; - return false; - } + if ((immdata.value() > 0) && (immdata.value() >= bc_ins_forward_size)) { + LOG(ERROR, VERIFIER) << "Jump forward out of boundary"; + return false; + } + if ((immdata.value() < 0) && (bc_ins_backward_size + immdata.value() < 0)) { + LOG(ERROR, VERIFIER) << "Jump backward out of boundary"; + return false; } } return true; @@ -600,7 +585,7 @@ bool Verifier::GetIcSlotFromInstruction(const BytecodeInstruction &bc_ins, uint3 if (bc_ins.HasFlag(BytecodeInstruction::Flags::ONE_SLOT)) { first_imm = GetFirstImmFromInstruction(bc_ins); if (!first_imm.has_value()) { - LOG(ERROR, VERIFIER) << "Get first immediate data failed!"; + LOG(ERROR, VERIFIER) << "Fail to get first immediate data!"; return false; } first_slot_index = first_imm.value(); @@ -609,7 +594,7 @@ bool Verifier::GetIcSlotFromInstruction(const BytecodeInstruction &bc_ins, uint3 } else if (bc_ins.HasFlag(BytecodeInstruction::Flags::TWO_SLOT)) { first_imm = GetFirstImmFromInstruction(bc_ins); if (!first_imm.has_value()) { - LOG(ERROR, VERIFIER) << "Get first immediate data failed!"; + LOG(ERROR, VERIFIER) << "Fail to get first immediate data!"; return false; } first_slot_index = first_imm.value(); @@ -625,8 +610,9 @@ bool Verifier::VerifySlotNumber(panda_file::MethodDataAccessor &method_accessor, { const auto ann_slot_number = GetSlotNumberFromAnnotation(method_accessor); if (!ann_slot_number.has_value()) { - LOG(ERROR, VERIFIER) << "Fail to get slot number from annotation"; - return false; + LOG(INFO, VERIFIER) << "There is no slot number information in annotaion."; + // To be compatible with old abc, slot number verification is not continued + return true; } if (slot_number == ann_slot_number.value()) { return true; @@ -656,7 +642,7 @@ bool Verifier::CheckConstantPoolMethodContent(const panda_file::File::EntityId & return false; } if (!GetIcSlotFromInstruction(bc_ins, ins_slot_num, has_slot, is_two_slot)) { - LOG(ERROR, VERIFIER) << "Get first slot index failed!"; + LOG(ERROR, VERIFIER) << "Fail to get first slot index!"; return false; } bc_ins = bc_ins.GetNext(); @@ -674,23 +660,20 @@ bool Verifier::CheckConstantPoolMethodContent(const panda_file::File::EntityId & bool Verifier::CheckConstantPoolIndex() const { - for(auto &id : ins_method_ids_) { - if (std::find(all_method_ids_.begin(), all_method_ids_.end(), id) == - all_method_ids_.end()) { - } - if(!VerifyMethodId(id)) { + for (auto &id : ins_method_ids_) { + if (!VerifyMethodId(id)) { return false; } } - for(auto &id : ins_literal_ids_) { - if(!VerifyLiteralId(id)) { + for (auto &id : ins_literal_ids_) { + if (!VerifyLiteralId(id)) { return false; } } - for(auto &id : ins_string_ids_) { - if(!VerifyStringId(id)) { + for (auto &id : ins_string_ids_) { + if (!VerifyStringId(id)) { return false; } } diff --git a/verifier/verifier.h b/verifier/verifier.h index 82e759c6f2..167a111245 100644 --- a/verifier/verifier.h +++ b/verifier/verifier.h @@ -86,7 +86,7 @@ private: bool CheckConstantPoolIndex() const; std::unique_ptr file_; - std::vector method_string_literal_ids_; + std::vector constant_pool_ids_; std::vector all_method_ids_; std::unordered_set ins_method_ids_; std::unordered_set ins_literal_ids_; -- Gitee