diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index ebd2439dc660cff1384cda3310ec889ff24a6664..273a58b744703b748d2a0eb15734f9a1bcc6f49f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2283,6 +2283,14 @@ config ARCH_ENABLE_THP_MIGRATION def_bool y depends on TRANSPARENT_HUGEPAGE +config CVM_GUEST + bool "Enable cvm guest run" + depends on DMA_RESTRICTED_POOL + help + Support CVM guest based on S-EL2 + + If unsure, say N. + menu "Power management options" source "kernel/power/Kconfig" diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index d025bafcce433cfeee2c7b374915a41de900969d..2c4423df23db601f901976d89e62f9ed605b6709 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -110,6 +110,9 @@ CONFIG_ACPI_APEI_MEMORY_FAILURE=y CONFIG_ACPI_APEI_EINJ=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=y +CONFIG_CVM_HOST=y +CONFIG_CVM_GUEST=y +CONFIG_DMA_RESTRICTED_POOL=y CONFIG_ARM64_CRYPTO=y CONFIG_CRYPTO_SHA1_ARM64_CE=y CONFIG_CRYPTO_SHA2_ARM64_CE=y diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index af9890a1700ce594d9b113de2598e4fc9631ec13..cb4f5e03dccd32a899012d3140685c6f99c75254 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -762,6 +762,9 @@ CONFIG_ACPI_PPTT=y CONFIG_IRQ_BYPASS_MANAGER=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=y +CONFIG_CVM_HOST=y +CONFIG_CVM_GUEST=y +CONFIG_DMA_RESTRICTED_POOL=y CONFIG_HAVE_KVM_IRQCHIP=y CONFIG_HAVE_KVM_IRQFD=y CONFIG_HAVE_KVM_IRQ_ROUTING=y diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index a3426b61ff6572a614951671e54b663efb1e01c1..b63d86a52cc987c7fe29e0bd24abc9515bb1a13f 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -2,5 +2,4 @@ generic-y += early_ioremap.h generic-y += mcs_spinlock.h generic-y += qrwlock.h -generic-y += set_memory.h generic-y += user.h diff --git a/arch/arm64/include/asm/cvm_guest.h b/arch/arm64/include/asm/cvm_guest.h new file mode 100644 index 0000000000000000000000000000000000000000..f16c34edb26b10aca0e5c82f4f594d7383b90b35 --- /dev/null +++ b/arch/arm64/include/asm/cvm_guest.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. + */ +#ifndef __CVM_GUEST_H +#define __CVM_GUEST_H + +#ifdef CONFIG_CVM_GUEST +#include + +extern int set_cvm_memory_encrypted(unsigned long addr, int numpages); + +extern int set_cvm_memory_decrypted(unsigned long addr, int numpages); + +extern bool is_cvm_world(void); + +#define is_swiotlb_for_alloc is_swiotlb_for_alloc +static inline bool is_swiotlb_for_alloc(struct device *dev) +{ + /* Force dma alloc by swiotlb in Confidential VMs */ + return is_cvm_world(); +} + +extern void __init swiotlb_cvm_update_mem_attributes(void); + +#else + +static inline int set_cvm_memory_encrypted(unsigned long addr, int numpages) +{ + return 0; +} + +static inline int set_cvm_memory_decrypted(unsigned long addr, int numpages) +{ + return 0; +} + +static inline bool is_cvm_world(void) +{ + return false; +} + +static inline void __init swiotlb_cvm_update_mem_attributes(void) {} + +#endif /* CONFIG_CVM_GUEST */ +#endif /* __CVM_GUEST_H */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index fb3e3f6136ecdf1beb09a07985f3782520b1a1bd..5405636230c7bc54e91bb692173250a361680636 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -556,4 +556,22 @@ static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature) return test_bit(feature, vcpu->arch.features); } +#ifdef CONFIG_CVM_HOST +static inline bool kvm_is_cvm(struct kvm *kvm) +{ + if (static_branch_unlikely(&kvm_cvm_is_available)) { + struct cvm *cvm = kvm->arch.cvm; + + return cvm && cvm->is_cvm; + } + return false; +} + +static inline enum cvm_state kvm_cvm_state(struct kvm *kvm) +{ + struct cvm *cvm = kvm->arch.cvm; + + return cvm && READ_ONCE(cvm->state); +} +#endif #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8bb67dfb956455cd7740fe407ed5f669de9306af..9799948ec16df5dd44d71dfacb8e6da51f34e274 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -26,6 +26,9 @@ #include #include #include +#ifdef CONFIG_CVM_HOST +#include +#endif #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -124,7 +127,20 @@ struct kvm_arch { #ifdef CONFIG_KVM_HISI_VIRT spinlock_t dvm_lock; - cpumask_t *dvm_cpumask; /* Union of all vcpu's cpus_ptr */ +#endif + +#if defined(CONFIG_KVM_HISI_VIRT) || defined(CONFIG_CVM_HOST) +#ifndef __GENKSYMS__ + union { + cpumask_t *dvm_cpumask; /* Union of all vcpu's cpus_ptr */ + void *cvm; + }; +#else + cpumask_t *dvm_cpumask; /* Union of all vcpu's cpus_ptr */ +#endif +#endif + +#ifdef CONFIG_KVM_HISI_VIRT u64 lsudvmbm_el2; #endif }; @@ -403,8 +419,18 @@ struct kvm_vcpu_arch { #ifdef CONFIG_KVM_HISI_VIRT /* Copy of current->cpus_ptr */ cpumask_t *cpus_ptr; +#endif + +#if defined(CONFIG_KVM_HISI_VIRT) || defined(CONFIG_CVM_HOST) +#ifndef __GENKSYMS__ + union { + cpumask_t *pre_cpus_ptr; + void *tec; + }; +#else cpumask_t *pre_cpus_ptr; #endif +#endif }; /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ diff --git a/arch/arm64/include/asm/kvm_tmi.h b/arch/arm64/include/asm/kvm_tmi.h new file mode 100644 index 0000000000000000000000000000000000000000..1bba7c7cdb4ea8b2cc5823644258094aa55ae083 --- /dev/null +++ b/arch/arm64/include/asm/kvm_tmi.h @@ -0,0 +1,377 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, The Linux Foundation. All rights reserved. + */ +#ifndef __TMM_TMI_H +#define __TMM_TMI_H +#ifdef CONFIG_CVM_HOST +#include +#include +#include +#include + +#define GRANULE_SIZE 4096 + +#define NO_NUMA -1 + +#define TMM_TTT_LEVEL_3 3 + +#ifdef CONFIG_CVM_HOST_FVP_PLAT +#define CVM_MEM_BASE ULL(0x8800000000) /* choose FVP platform to run cVM */ +#define VQ_NUM 3 +#else +#define CVM_MEM_BASE ULL(0x800000000) /* choose qemu platform to run cVM */ +#define VQ_NUM 3 +#endif + +#define MEM_SEG_NUMS 2 + +/* define in QEMU hw/arm/virt.c */ +#define VIRT_PCIE_MMIO 0x10000000 /* 256MB */ +#define VIRT_PCIE_MMIO_SIZE 0x1000000 /* 16MB */ +#define VIRT_HIGH_PCIE_ECAM 0x8000000000 /* 512GB */ +#define VIRT_HIGH_PCIE_ECAM_SIZE 0x12000000 /* 288MB */ + +/* TMI error codes. */ +#define TMI_SUCCESS 0 +#define TMI_ERROR_INPUT 1 +#define TMI_ERROR_MEMORY 2 +#define TMI_ERROR_ALIAS 3 +#define TMI_ERROR_IN_USE 4 +#define TMI_ERROR_CVM_STATE 5 +#define TMI_ERROR_OWNER 6 +#define TMI_ERROR_TEC 7 +#define TMI_ERROR_TTT_WALK 8 +#define TMI_ERROR_TTT_ENTRY 9 +#define TMI_ERROR_NOT_SUPPORTED 10 +#define TMI_ERROR_INTERNAL 11 +#define TMI_ERROR_CVM_POWEROFF 12 + +#define TMI_RETURN_STATUS(ret) ((ret) & 0xFF) +#define TMI_RETURN_INDEX(ret) (((ret) >> 8) & 0xFF) + +#define TMI_FEATURE_REGISTER_0_S2SZ GENMASK(7, 0) +#define TMI_FEATURE_REGISTER_0_LPA2 BIT(8) +#define TMI_FEATURE_REGISTER_0_SVE_EN BIT(9) +#define TMI_FEATURE_REGISTER_0_SVE_VL GENMASK(13, 10) +#define TMI_FEATURE_REGISTER_0_NUM_BPS GENMASK(17, 14) +#define TMI_FEATURE_REGISTER_0_NUM_WPS GENMASK(21, 18) +#define TMI_FEATURE_REGISTER_0_PMU_EN BIT(22) +#define TMI_FEATURE_REGISTER_0_PMU_NUM_CTRS GENMASK(27, 23) +#define TMI_FEATURE_REGISTER_0_HASH_SHA_256 BIT(28) +#define TMI_FEATURE_REGISTER_0_HASH_SHA_512 BIT(29) + +#define TMI_CVM_PARAM_FLAG_LPA2 BIT(0) +#define TMI_CVM_PARAM_FLAG_SVE BIT(1) +#define TMI_CVM_PARAM_FLAG_PMU BIT(2) + +/* + * Many of these fields are smaller than u64 but all fields have u64 + * alignment, so use u64 to ensure correct alignment. + */ +struct tmi_cvm_params { + u64 flags; + u64 s2sz; + u64 sve_vl; + u64 num_bps; + u64 num_wps; + u64 pmu_num_cnts; + u64 measurement_algo; + u64 vmid; + u64 ns_vtcr; + u64 vttbr_el2; + u64 ttt_base; + s64 ttt_level_start; + u64 ttt_num_start; + u8 rpv[64]; /* Bits 512 */ +}; + +#define TMI_NOT_RUNNABLE 0 +#define TMI_RUNNABLE 1 + +/* + * The number of GPRs (starting from X0) that are + * configured by the host when a TEC is created. + */ +#define TEC_CREATE_NR_GPRS (8U) + +struct tmi_tec_params { + uint64_t gprs[TEC_CREATE_NR_GPRS]; + uint64_t pc; + uint64_t flags; + uint64_t ram_size; +}; + +#define TEC_ENTRY_FLAG_EMUL_MMIO (1UL << 0U) +#define TEC_ENTRY_FLAG_INJECT_SEA (1UL << 1U) +#define TEC_ENTRY_FLAG_TRAP_WFI (1UL << 2U) +#define TEC_ENTRY_FLAG_TRAP_WFE (1UL << 3U) + +#define TMI_EXIT_SYNC 0 +#define TMI_EXIT_IRQ 1 +#define TMI_EXIT_FIQ 2 +#define TMI_EXIT_PSCI 3 +#define TMI_EXIT_HOST_CALL 5 +#define TMI_EXIT_SERROR 6 + +/* + * The number of GPRs (starting from X0) per voluntary exit context. + * Per SMCCC. + */ + #define TEC_EXIT_NR_GPRS (31U) + +/* Maximum number of Interrupt Controller List Registers. */ +#define TEC_GIC_NUM_LRS (16U) + +struct tmi_tec_entry { + uint64_t flags; + uint64_t gprs[TEC_EXIT_NR_GPRS]; + uint64_t gicv3_lrs[TEC_GIC_NUM_LRS]; + uint64_t gicv3_hcr; +}; + +struct tmi_tec_exit { + uint64_t exit_reason; + uint64_t esr; + uint64_t far; + uint64_t hpfar; + uint64_t gprs[TEC_EXIT_NR_GPRS]; + uint64_t gicv3_hcr; + uint64_t gicv3_lrs[TEC_GIC_NUM_LRS]; + uint64_t gicv3_misr; + uint64_t gicv3_vmcr; + uint64_t cntv_ctl; + uint64_t cntv_cval; + uint64_t cntp_ctl; + uint64_t cntp_cval; + uint64_t imm; + uint64_t pmu_ovf_status; +}; + +struct tmi_tec_run { + struct tmi_tec_entry tec_entry; + struct tmi_tec_exit tec_exit; +}; + +#define TMI_FNUM_MIN_VALUE U(0x150) +#define TMI_FNUM_MAX_VALUE U(0x18F) + +/****************************************************************************** + * Bit definitions inside the function id as per the SMC calling convention + ******************************************************************************/ +#define FUNCID_TYPE_SHIFT 31 +#define FUNCID_CC_SHIFT 30 +#define FUNCID_OEN_SHIFT 24 +#define FUNCID_NUM_SHIFT 0 + +#define FUNCID_TYPE_MASK 0x1 +#define FUNCID_CC_MASK 0x1 +#define FUNCID_OEN_MASK 0x3f +#define FUNCID_NUM_MASK 0xffff + +#define FUNCID_TYPE_WIDTH 1 +#define FUNCID_CC_WIDTH 1 +#define FUNCID_OEN_WIDTH 6 +#define FUNCID_NUM_WIDTH 16 + +#define SMC_64 1 +#define SMC_32 0 +#define SMC_TYPE_FAST 1 +#define SMC_TYPE_STD 0 + +/***************************************************************************** + * Owning entity number definitions inside the function id as per the SMC + * calling convention + *****************************************************************************/ +#define OEN_ARM_START 0 +#define OEN_ARM_END 0 +#define OEN_CPU_START 1 +#define OEN_CPU_END 1 +#define OEN_SIP_START 2 +#define OEN_SIP_END 2 +#define OEN_OEM_START 3 +#define OEN_OEM_END 3 +#define OEN_STD_START 4 /* Standard Calls */ +#define OEN_STD_END 4 +#define OEN_TAP_START 48 /* Trusted Applications */ +#define OEN_TAP_END 49 +#define OEN_TOS_START 50 /* Trusted OS */ +#define OEN_TOS_END 63 +#define OEN_LIMIT 64 + +/* Get TMI fastcall std FID from function number */ +#define TMI_FID(smc_cc, func_num) \ + ((SMC_TYPE_FAST << FUNCID_TYPE_SHIFT) | \ + ((smc_cc) << FUNCID_CC_SHIFT) | \ + (OEN_STD_START << FUNCID_OEN_SHIFT) | \ + ((func_num) << FUNCID_NUM_SHIFT)) + +#define U(_x) (_x##U) + +/* + * SMC_TMM_INIT_COMPLETE is the only function in the TMI that originates from + * the CVM world and is handled by the SPMD. The remaining functions are + * always invoked by the Normal world, forward by SPMD and handled by the + * TMM. + */ +#define TMI_FNUM_VERSION U(0x260) +#define TMI_FNUM_MEM_ALLOC U(0x261) +#define TMI_FNUM_MEM_FREE U(0x262) +#define TMI_FNUM_DATA_CREATE U(0x263) +#define TMI_FNUM_DATA_DESTROY U(0x265) +#define TMI_FNUM_CVM_ACTIVATE U(0x267) +#define TMI_FNUM_CVM_CREATE U(0x268) +#define TMI_FNUM_CVM_DESTROY U(0x269) +#define TMI_FNUM_TEC_CREATE U(0x27A) +#define TMI_FNUM_TEC_DESTROY U(0x27B) +#define TMI_FNUM_TEC_ENTER U(0x27C) +#define TMI_FNUM_TTT_CREATE U(0x27D) +#define TMI_FNUM_TTT_DESTROY U(0x27E) +#define TMI_FNUM_TTT_MAP_UNPROTECTED U(0x27F) +#define TMI_FNUM_TTT_MAP_PROTECTED U(0x280) +#define TMI_FNUM_TTT_UNMAP_UNPROTECTED U(0x282) +#define TMI_FNUM_TTT_UNMAP_PROTECTED U(0x283) +#define TMI_FNUM_PSCI_COMPLETE U(0x284) +#define TMI_FNUM_FEATURES U(0x285) +#define TMI_FNUM_TTT_MAP_RANGE U(0x286) +#define TMI_FNUM_TTT_UNMAP_RANGE U(0x287) + +/* TMI SMC64 PIDs handled by the SPMD */ +#define TMI_TMM_VESION TMI_FID(SMC_64, TMI_FNUM_VERSION) +#define TMI_TMM_DATA_CREATE TMI_FID(SMC_64, TMI_FNUM_DATA_CREATE) +#define TMI_TMM_DATA_DESTROY TMI_FID(SMC_64, TMI_FNUM_DATA_DESTROY) +#define TMI_TMM_CVM_ACTIVATE TMI_FID(SMC_64, TMI_FNUM_CVM_ACTIVATE) +#define TMI_TMM_CVM_CREATE TMI_FID(SMC_64, TMI_FNUM_CVM_CREATE) +#define TMI_TMM_CVM_DESTROY TMI_FID(SMC_64, TMI_FNUM_CVM_DESTROY) +#define TMI_TMM_TEC_CREATE TMI_FID(SMC_64, TMI_FNUM_TEC_CREATE) +#define TMI_TMM_TEC_DESTROY TMI_FID(SMC_64, TMI_FNUM_TEC_DESTROY) +#define TMI_TMM_TEC_ENTER TMI_FID(SMC_64, TMI_FNUM_TEC_ENTER) +#define TMI_TMM_TTT_CREATE TMI_FID(SMC_64, TMI_FNUM_TTT_CREATE) +#define TMI_TMM_TTT_DESTROY TMI_FID(SMC_64, TMI_FNUM_TTT_DESTROY) +#define TMI_TMM_TTT_MAP_UNPROTECTED TMI_FID(SMC_64, TMI_FNUM_TTT_MAP_UNPROTECTED) +#define TMI_TMM_TTT_MAP_PROTECTED TMI_FID(SMC_64, TMI_FNUM_TTT_MAP_PROTECTED) +#define TMI_TMM_TTT_UNMAP_UNPROTECTED TMI_FID(SMC_64, TMI_FNUM_TTT_UNMAP_UNPROTECTED) +#define TMI_TMM_TTT_UNMAP_PROTECTED TMI_FID(SMC_64, TMI_FNUM_TTT_UNMAP_PROTECTED) +#define TMI_TMM_PSCI_COMPLETE TMI_FID(SMC_64, TMI_FNUM_PSCI_COMPLETE) +#define TMI_TMM_FEATURES TMI_FID(SMC_64, TMI_FNUM_FEATURES) +#define TMI_TMM_MEM_ALLOC TMI_FID(SMC_64, TMI_FNUM_MEM_ALLOC) +#define TMI_TMM_MEM_FREE TMI_FID(SMC_64, TMI_FNUM_MEM_FREE) +#define TMI_TMM_TTT_MAP_RANGE TMI_FID(SMC_64, TMI_FNUM_TTT_MAP_RANGE) +#define TMI_TMM_TTT_UNMAP_RANGE TMI_FID(SMC_64, TMI_FNUM_TTT_UNMAP_RANGE) + +#define TMI_ABI_VERSION_GET_MAJOR(_version) ((_version) >> 16) +#define TMI_ABI_VERSION_GET_MINOR(_version) ((_version) & 0xFFFF) + +#define TMI_ABI_VERSION_MAJOR U(0x0) + +/* KVM_CAP_ARM_TMM on VM fd */ +#define KVM_CAP_ARM_TMM_CONFIG_CVM_HOST 0 +#define KVM_CAP_ARM_TMM_CREATE_CVM 1 +#define KVM_CAP_ARM_TMM_INIT_IPA_CVM 2 +#define KVM_CAP_ARM_TMM_POPULATE_CVM 3 +#define KVM_CAP_ARM_TMM_ACTIVATE_CVM 4 + +#define KVM_CAP_ARM_TMM_MEASUREMENT_ALGO_SHA256 0 +#define KVM_CAP_ARM_TMM_MEASUREMENT_ALGO_SHA512 1 + +#define KVM_CAP_ARM_TMM_RPV_SIZE 64 + +/* List of configuration items accepted for KVM_CAP_ARM_TMM_CONFIG_CVM_HOST */ +#define KVM_CAP_ARM_TMM_CFG_RPV 0 +#define KVM_CAP_ARM_TMM_CFG_HASH_ALGO 1 +#define KVM_CAP_ARM_TMM_CFG_SVE 2 +#define KVM_CAP_ARM_TMM_CFG_DBG 3 +#define KVM_CAP_ARM_TMM_CFG_PMU 4 + +DECLARE_STATIC_KEY_FALSE(kvm_cvm_is_available); +DECLARE_STATIC_KEY_FALSE(kvm_cvm_is_enable); + +struct kvm_cap_arm_tmm_config_item { + __u32 cfg; + union { + /* cfg == KVM_CAP_ARM_TMM_CFG_RPV */ + struct { + __u8 rpv[KVM_CAP_ARM_TMM_RPV_SIZE]; + }; + + /* cfg == KVM_CAP_ARM_TMM_CFG_HASH_ALGO */ + struct { + __u32 hash_algo; + }; + + /* cfg == KVM_CAP_ARM_TMM_CFG_SVE */ + struct { + __u32 sve_vq; + }; + + /* cfg == KVM_CAP_ARM_TMM_CFG_DBG */ + struct { + __u32 num_brps; + __u32 num_wrps; + }; + + /* cfg == KVM_CAP_ARM_TMM_CFG_PMU */ + struct { + __u32 num_pmu_cntrs; + }; + /* Fix the size of the union */ + __u8 reserved[256]; + }; +}; + +enum tmi_tmm_mem_type { + TMM_MEM_TYPE_RD, + TMM_MEM_TYPE_TEC, + TMM_MEM_TYPE_TTT, + TMM_MEM_TYPE_CVM_PA, +}; + +enum tmi_tmm_map_size { + TMM_MEM_MAP_SIZE_4K, + TMM_MEM_MAP_SIZE_2M, + TMM_MEM_MAP_SIZE_1G, + TMM_MEM_MAP_SIZE_MAX, +}; + +static inline bool tmm_is_addr_ttt_level_aligned(uint64_t addr, int level) +{ + uint64_t mask = (1 << (12 + 9 * (3 - level))) - 1; + + return (addr & mask) == 0; +} + +u64 phys_to_cvm_phys(u64 phys); + +u64 tmi_version(void); +u64 tmi_data_create(u64 data, u64 rd, u64 map_addr, u64 src, u64 level); +u64 tmi_data_destroy(u64 rd, u64 map_addr, u64 level); +u64 tmi_cvm_activate(u64 rd); +u64 tmi_cvm_create(u64 rd, u64 params_ptr); +u64 tmi_cvm_destroy(u64 rd); +u64 tmi_tec_create(u64 tec, u64 rd, u64 mpidr, u64 params_ptr); +u64 tmi_tec_destroy(u64 tec); +u64 tmi_tec_enter(u64 tec, u64 run_ptr); +u64 tmi_ttt_create(u64 ttt, u64 rd, u64 map_addr, u64 level); +u64 tmi_ttt_destroy(u64 ttt, u64 rd, u64 map_addr, u64 level); +u64 tmi_ttt_map_unprotected(u64 rd, u64 map_addr, u64 level, u64 ttte); +u64 tmi_ttt_unmap_unprotected(u64 rd, u64 map_addr, u64 level, u64 ns); +u64 tmi_ttt_unmap_protected(u64 rd, u64 map_addr, u64 level); +u64 tmi_psci_complete(u64 calling_tec, u64 target_tec); +u64 tmi_features(u64 index); +u64 tmi_ttt_map_range(u64 rd, u64 map_addr, u64 size, u64 cur_node, u64 target_node); +u64 tmi_ttt_unmap_range(u64 rd, u64 map_addr, u64 size, u64 node_id); + +u64 tmi_mem_alloc(u64 rd, u64 numa_id, enum tmi_tmm_mem_type tmm_mem_type, + enum tmi_tmm_map_size tmm_map_size); +u64 tmi_mem_free(u64 pa, u64 numa_id, enum tmi_tmm_mem_type tmm_mem_type, + enum tmi_tmm_map_size tmm_map_size); + +void kvm_cvm_vcpu_put(struct kvm_vcpu *vcpu); +int kvm_load_user_data(struct kvm *kvm, unsigned long arg); +unsigned long cvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu, + unsigned long target_affinity, unsigned long lowest_affinity_level); +int kvm_cvm_vcpu_set_events(struct kvm_vcpu *vcpu, + bool serror_pending, bool ext_dabt_pending); + +#endif +#endif diff --git a/arch/arm64/include/asm/kvm_tmm.h b/arch/arm64/include/asm/kvm_tmm.h new file mode 100644 index 0000000000000000000000000000000000000000..3452b4429508be76ba195b346e21bbd8ae01369f --- /dev/null +++ b/arch/arm64/include/asm/kvm_tmm.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, The Linux Foundation. All rights reserved. + */ +#ifndef __ASM_KVM_TMM_H +#define __ASM_KVM_TMM_H + +#include + +enum cvm_state { + CVM_STATE_NONE, + CVM_STATE_NEW, + CVM_STATE_ACTIVE, + CVM_STATE_DYING +}; + +struct cvm { + enum cvm_state state; + u32 cvm_vmid; + u64 rd; + u64 loader_start; + u64 initrd_start; + u64 initrd_size; + u64 ram_size; + struct kvm_numa_info numa_info; + struct tmi_cvm_params *params; + bool is_cvm; +}; + +/* + * struct cvm_tec - Additional per VCPU data for a CVM + */ +struct cvm_tec { + u64 tec; + bool tec_created; + void *tec_run; +}; + +int kvm_init_tmm(void); +int kvm_cvm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); +int kvm_init_cvm_vm(struct kvm *kvm); +void kvm_destroy_cvm(struct kvm *kvm); +int kvm_create_tec(struct kvm_vcpu *vcpu); +void kvm_destroy_tec(struct kvm_vcpu *vcpu); +int kvm_tec_enter(struct kvm_vcpu *vcpu); +int handle_cvm_exit(struct kvm_vcpu *vcpu, int rec_run_status); +int kvm_arm_create_cvm(struct kvm *kvm); +void kvm_free_rd(struct kvm *kvm); +int cvm_create_rd(struct kvm *kvm); +int kvm_arm_cvm_first_run(struct kvm_vcpu *vcpu); +int cvm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target); +int kvm_arch_tec_init(struct kvm_vcpu *vcpu); + +void kvm_cvm_unmap_destroy_range(struct kvm *kvm); + +#define CVM_TTT_BLOCK_LEVEL 2 +#define CVM_TTT_MAX_LEVEL 3 + +#define CVM_PAGE_SHIFT 12 +#define CVM_PAGE_SIZE BIT(CVM_PAGE_SHIFT) +#define CVM_TTT_LEVEL_SHIFT(l) \ + ((CVM_PAGE_SHIFT - 3) * (4 - (l)) + 3) +#define CVM_L2_BLOCK_SIZE BIT(CVM_TTT_LEVEL_SHIFT(2)) + +static inline unsigned long cvm_ttt_level_mapsize(int level) +{ + if (WARN_ON(level > CVM_TTT_BLOCK_LEVEL)) + return CVM_PAGE_SIZE; + + return (1UL << CVM_TTT_LEVEL_SHIFT(level)); +} + +#endif diff --git a/arch/arm64/include/asm/set_memory.h b/arch/arm64/include/asm/set_memory.h new file mode 100644 index 0000000000000000000000000000000000000000..38cecbf44c5d5437d2fd3825411a3b1fdb7686fa --- /dev/null +++ b/arch/arm64/include/asm/set_memory.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_ARM64_SET_MEMORY_H +#define _ASM_ARM64_SET_MEMORY_H + +#include +#include + +#endif /* _ASM_ARM64_SET_MEMORY_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 312c164db2ed2393ccc5db551f8af0da0713c563..4c6eb5e78eeaa5159e05a6f17bf08533afe38baa 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -73,6 +73,7 @@ obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o obj-$(CONFIG_ARM64_MTE) += mte.o obj-$(CONFIG_MPAM) += mpam/ +obj-$(CONFIG_CVM_GUEST) += cvm_guest.o obj-y += vdso/ probes/ obj-$(CONFIG_COMPAT_VDSO) += vdso32/ diff --git a/arch/arm64/kernel/cvm_guest.c b/arch/arm64/kernel/cvm_guest.c new file mode 100644 index 0000000000000000000000000000000000000000..c1f27992ad6e1703fc0e87a7c22ce73e96316d26 --- /dev/null +++ b/arch/arm64/kernel/cvm_guest.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. + */ +#include +#include +#include +#include +#include + +#include +#include +#include + +#define CVM_PTE_NS_BIT 5 +#define CVM_PTE_NS_MASK (1 << CVM_PTE_NS_BIT) + +static bool cvm_guest_enable __read_mostly; + +/* please use 'cvm_guest=1' to enable cvm guest feature */ +static int __init setup_cvm_guest(char *str) +{ + int ret; + unsigned int val; + + if (!str) + return 0; + + ret = kstrtouint(str, 10, &val); + if (ret) { + pr_warn("Unable to parse cvm_guest.\n"); + } else { + if (val) + cvm_guest_enable = true; + } + return ret; +} +early_param("cvm_guest", setup_cvm_guest); + +bool is_cvm_world(void) +{ + return cvm_guest_enable; +} + +static int change_page_range_cvm(pte_t *ptep, unsigned long addr, void *data) +{ + bool encrypt = (bool)data; + pte_t pte = READ_ONCE(*ptep); + + if (encrypt) { + if (!(pte.pte & CVM_PTE_NS_MASK)) + return 0; + pte.pte = pte.pte & (~CVM_PTE_NS_MASK); + } else { + if (pte.pte & CVM_PTE_NS_MASK) + return 0; + /* Set NS BIT */ + pte.pte = pte.pte | CVM_PTE_NS_MASK; + } + set_pte(ptep, pte); + + return 0; +} + +static int __change_memory_common_cvm(unsigned long start, unsigned long size, bool encrypt) +{ + int ret; + + ret = apply_to_page_range(&init_mm, start, size, change_page_range_cvm, (void *)encrypt); + flush_tlb_kernel_range(start, start + size); + return ret; +} + +static int __set_memory_encrypted(unsigned long addr, + int numpages, + bool encrypt) +{ + if (!is_cvm_world()) + return 0; + + WARN_ON(!__is_lm_address(addr)); + return __change_memory_common_cvm(addr, PAGE_SIZE * numpages, encrypt); +} + +int set_cvm_memory_encrypted(unsigned long addr, int numpages) +{ + return __set_memory_encrypted(addr, numpages, true); +} + +int set_cvm_memory_decrypted(unsigned long addr, int numpages) +{ + return __set_memory_encrypted(addr, numpages, false); +} diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index d984a6041860ff0584e4d6f9ac75485aebd7bb0d..8f908db74db9c3624a863138d86e8eb6db7e220f 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -59,6 +59,13 @@ config KVM_ARM_PMU Adds support for a virtual Performance Monitoring Unit (PMU) in virtual machines. +config CVM_HOST + bool "Enable cvm host feature" + help + Support CVM based on S-EL2 + + If unsure, say N. + endif # KVM endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 395d651653671c472186c489c5ec90f14c99eaa0..5fd1b8be10920749623f237442e6e6d8a48b95b4 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -24,5 +24,9 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ vgic/vgic-its.o vgic/shadow_dev.o vgic/vgic-debug.o +kvm-$(CONFIG_CVM_HOST) += tmi.o +kvm-$(CONFIG_CVM_HOST) += cvm.o +kvm-$(CONFIG_CVM_HOST) += cvm_exit.o + kvm-$(CONFIG_KVM_ARM_PMU) += pmu-emul.o obj-$(CONFIG_KVM_HISI_VIRT) += hisilicon/ diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index f9d6a5cd4daa5c1adb97d2300e3bc1243fc167cc..960f45b4eed681a49ceb096fbcc55ad06bbb0430 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -16,6 +16,10 @@ #include #include +#ifdef CONFIG_CVM_HOST +#include +#endif + #include #include @@ -138,10 +142,80 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) } } +#ifdef CONFIG_CVM_HOST +static bool cvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) +{ + return timer_ctx && + ((timer_get_ctl(timer_ctx) & + (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE); +} + +void kvm_cvm_timers_update(struct kvm_vcpu *vcpu) +{ + int i; + u64 cval, now; + bool status, level; + struct arch_timer_context *timer; + struct arch_timer_cpu *arch_timer = &vcpu->arch.timer_cpu; + + for (i = 0; i < NR_KVM_TIMERS; i++) { + timer = &arch_timer->timers[i]; + + if (!timer->loaded) { + if (!cvm_timer_irq_can_fire(timer)) + continue; + cval = timer_get_cval(timer); + now = kvm_phys_timer_read() - timer_get_offset(timer); + level = (cval <= now); + kvm_timer_update_irq(vcpu, level, timer); + } else { + status = timer_get_ctl(timer) & ARCH_TIMER_CTRL_IT_STAT; + level = cvm_timer_irq_can_fire(timer) && status; + if (level != timer->irq.level) + kvm_timer_update_irq(vcpu, level, timer); + } + } +} + +static void set_cvm_timers_loaded(struct kvm_vcpu *vcpu, bool loaded) +{ + int i; + struct arch_timer_cpu *arch_timer = &vcpu->arch.timer_cpu; + + for (i = 0; i < NR_KVM_TIMERS; i++) { + struct arch_timer_context *timer = &arch_timer->timers[i]; + + timer->loaded = loaded; + } +} + +static void kvm_timer_blocking(struct kvm_vcpu *vcpu); +static void kvm_timer_unblocking(struct kvm_vcpu *vcpu); + +static inline void cvm_vcpu_load_timer_callback(struct kvm_vcpu *vcpu) +{ + kvm_cvm_timers_update(vcpu); + kvm_timer_unblocking(vcpu); + set_cvm_timers_loaded(vcpu, true); +} + +static inline void cvm_vcpu_put_timer_callback(struct kvm_vcpu *vcpu) +{ + set_cvm_timers_loaded(vcpu, false); + if (rcuwait_active(kvm_arch_vcpu_get_wait(vcpu))) + kvm_timer_blocking(vcpu); +} +#endif + static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) { struct kvm_vcpu *vcpu = ctxt->vcpu; +#ifdef CONFIG_CVM_HOST + if (kvm_is_cvm(vcpu->kvm)) + return; +#endif + switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: __vcpu_sys_reg(vcpu, CNTVOFF_EL2) = offset; @@ -667,6 +741,13 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) struct arch_timer_cpu *timer = vcpu_timer(vcpu); struct timer_map map; +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) { + cvm_vcpu_load_timer_callback(vcpu); + return; + } +#endif + if (unlikely(!timer->enabled)) return; @@ -752,6 +833,13 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) struct timer_map map; struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) { + cvm_vcpu_put_timer_callback(vcpu); + return; + } +#endif + if (unlikely(!timer->enabled)) return; @@ -898,7 +986,12 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) ptimer->vcpu = vcpu; /* Synchronize cntvoff across all vtimers of a VM. */ - update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); +#ifdef CONFIG_CVM_HOST + if (kvm_is_cvm(vcpu->kvm)) + update_vtimer_cntvoff(vcpu, 0); + else +#endif + update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); timer_set_offset(ptimer, 0); hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); @@ -1356,6 +1449,15 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) return -EINVAL; } +#ifdef CONFIG_CVM_HOST + /* + * We don't use mapped IRQs for CVM because the TMI doesn't allow + * us setting the LR.HW bit in the VGIC. + */ + if (vcpu_is_tec(vcpu)) + return 0; +#endif + get_timer_map(vcpu, &map); if (vtimer_is_irqbypass()) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 718f6060bbbf30a78d286ece94be6fe288c187cf..5372a53a6bdf7d2f2bdc4886ed1e56a72f0734ec 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -38,6 +38,10 @@ #include #include #include +#ifdef CONFIG_CVM_HOST +#include +#include +#endif #include #include @@ -108,6 +112,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, r = 0; kvm->arch.return_nisv_io_abort_to_user = true; break; +#ifdef CONFIG_CVM_HOST + case KVM_CAP_ARM_TMM: + if (static_branch_unlikely(&kvm_cvm_is_available)) + r = kvm_cvm_enable_cap(kvm, cap); + break; +#endif default: r = -EINVAL; break; @@ -149,13 +159,29 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return ret; #endif +#ifdef CONFIG_CVM_HOST + if (kvm_arm_cvm_type(type)) { + ret = cvm_create_rd(kvm); + if (ret) + return ret; + } +#endif + ret = kvm_arm_setup_stage2(kvm, type); if (ret) +#ifdef CONFIG_CVM_HOST + goto out_free_rd; +#else return ret; +#endif ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu); if (ret) +#ifdef CONFIG_CVM_HOST + goto out_free_rd; +#else return ret; +#endif ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP); if (ret) @@ -167,10 +193,21 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.max_vcpus = kvm_arm_default_max_vcpus(); set_default_csv2(kvm); +#ifdef CONFIG_CVM_HOST + if (kvm_arm_cvm_type(type)) { + ret = kvm_init_cvm_vm(kvm); + if (ret) + goto out_free_stage2_pgd; + } +#endif return ret; out_free_stage2_pgd: kvm_free_stage2_pgd(&kvm->arch.mmu); +#ifdef CONFIG_CVM_HOST +out_free_rd: + kvm_free_rd(kvm); +#endif return ret; } @@ -203,6 +240,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm) } } atomic_set(&kvm->online_vcpus, 0); +#ifdef CONFIG_CVM_HOST + if (kvm_is_cvm(kvm)) + kvm_destroy_cvm(kvm); +#endif } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -258,11 +299,21 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; case KVM_CAP_STEAL_TIME: - r = kvm_arm_pvtime_supported(); +#ifdef CONFIG_CVM_HOST + if (kvm && kvm_is_cvm(kvm)) + r = 0; + else +#endif + r = kvm_arm_pvtime_supported(); break; case KVM_CAP_ARM_VIRT_MSI_BYPASS: r = sdev_enable; break; +#ifdef CONFIG_CVM_HOST + case KVM_CAP_ARM_TMM: + r = static_key_enabled(&kvm_cvm_is_available); + break; +#endif default: r = kvm_arch_vm_ioctl_check_extension(kvm, ext); break; @@ -358,6 +409,13 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) return err; #endif +#ifdef CONFIG_CVM_HOST + if (kvm_is_cvm(vcpu->kvm)) { + err = kvm_arch_tec_init(vcpu); + if (err) + return err; + } +#endif return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP); } @@ -444,8 +502,23 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->cpu = cpu; +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) { + if (single_task_running()) + vcpu_clear_wfx_traps(vcpu); + else + vcpu_set_wfx_traps(vcpu); + } +#endif kvm_vgic_load(vcpu); kvm_timer_vcpu_load(vcpu); +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) { + if (kvm_arm_is_pvtime_enabled(&vcpu->arch)) + kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); + return; + } +#endif if (has_vhe()) kvm_vcpu_load_sysregs_vhe(vcpu); kvm_arch_vcpu_load_fp(vcpu); @@ -472,6 +545,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) { + kvm_cvm_vcpu_put(vcpu); + return; + } +#endif kvm_arch_vcpu_put_fp(vcpu); if (has_vhe()) kvm_vcpu_put_sysregs_vhe(vcpu); @@ -662,6 +741,9 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) * Tell the rest of the code that there are userspace irqchip * VMs in the wild. */ +#ifdef CONFIG_CVM_HOST + if (!kvm_is_cvm(kvm)) +#endif static_branch_inc(&userspace_irqchip_in_use); } @@ -809,6 +891,18 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret) xfer_to_guest_mode_work_pending(); } +#ifdef CONFIG_CVM_HOST +static inline void update_pmu_phys_irq(struct kvm_vcpu *vcpu, bool *pmu_stopped) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + + if (pmu->irq_level) { + *pmu_stopped = true; + arm_pmu_set_phys_irq(false); + } +} +#endif + /** * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code * @vcpu: The VCPU pointer @@ -830,7 +924,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) ret = kvm_vcpu_first_run_init(vcpu); if (ret) return ret; - +#ifdef CONFIG_CVM_HOST + if (kvm_is_cvm(vcpu->kvm)) { + ret = kvm_arm_cvm_first_run(vcpu); + if (ret) + return ret; + } +#endif if (run->exit_reason == KVM_EXIT_MMIO) { ret = kvm_handle_mmio_return(vcpu); if (ret) @@ -847,6 +947,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) ret = 1; run->exit_reason = KVM_EXIT_UNKNOWN; while (ret > 0) { +#ifdef CONFIG_CVM_HOST + bool pmu_stopped = false; +#endif /* * Check conditions before entering the guest */ @@ -866,6 +969,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) preempt_disable(); kvm_pmu_flush_hwstate(vcpu); +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) + update_pmu_phys_irq(vcpu, &pmu_stopped); +#endif local_irq_disable(); @@ -905,8 +1012,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ trace_kvm_entry(vcpu->vcpu_id, *vcpu_pc(vcpu)); guest_enter_irqoff(); - - ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu); +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) + ret = kvm_tec_enter(vcpu); + else +#endif + ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->stat.exits++; @@ -961,12 +1072,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) * guest time. */ guest_exit(); - trace_kvm_exit(vcpu->vcpu_id, ret, *vcpu_pc(vcpu)); - - /* Exit types that need handling before we can be preempted */ - handle_exit_early(vcpu, ret); +#ifdef CONFIG_CVM_HOST + if (!vcpu_is_tec(vcpu)) { +#endif + trace_kvm_exit(vcpu->vcpu_id, ret, *vcpu_pc(vcpu)); + /* Exit types that need handling before we can be preempted */ + handle_exit_early(vcpu, ret); +#ifdef CONFIG_CVM_HOST + } +#endif preempt_enable(); +#ifdef CONFIG_CVM_HOST + if (pmu_stopped) + arm_pmu_set_phys_irq(true); +#endif /* * The ARMv8 architecture doesn't give the hypervisor @@ -986,8 +1106,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) vcpu->arch.target = -1; ret = ARM_EXCEPTION_IL; } - - ret = handle_exit(vcpu, ret); +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) + ret = handle_cvm_exit(vcpu, ret); + else +#endif + ret = handle_exit(vcpu, ret); update_vcpu_stat_time(&vcpu->stat); } @@ -1419,6 +1543,11 @@ long kvm_arch_vm_ioctl(struct file *filp, void __user *argp = (void __user *)arg; switch (ioctl) { +#ifdef CONFIG_CVM_HOST + case KVM_LOAD_USER_DATA: { + return kvm_load_user_data(kvm, arg); + } +#endif case KVM_CREATE_IRQCHIP: { int ret; if (!vgic_present) @@ -1950,7 +2079,13 @@ int kvm_arch_init(void *opaque) kvm_pr_unimpl("CPU unsupported in non-VHE mode, not initializing\n"); return -ENODEV; } - +#ifdef CONFIG_CVM_HOST + if (static_branch_unlikely(&kvm_cvm_is_enable) && in_hyp_mode) { + err = kvm_init_tmm(); + if (err) + return err; + } +#endif if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) || cpus_have_final_cap(ARM64_WORKAROUND_1508412)) kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \ diff --git a/arch/arm64/kvm/cvm.c b/arch/arm64/kvm/cvm.c new file mode 100644 index 0000000000000000000000000000000000000000..2b58ebf728d40ab9e6554ae93c96b99595967367 --- /dev/null +++ b/arch/arm64/kvm/cvm.c @@ -0,0 +1,892 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, The Linux Foundation. All rights reserved. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Protects access to cvm_vmid_bitmap */ +static DEFINE_SPINLOCK(cvm_vmid_lock); +static unsigned long *cvm_vmid_bitmap; +DEFINE_STATIC_KEY_FALSE(kvm_cvm_is_available); +DEFINE_STATIC_KEY_FALSE(kvm_cvm_is_enable); +#define SIMD_PAGE_SIZE 0x3000 + +static int __init setup_cvm_host(char *str) +{ + int ret; + unsigned int val; + + if (!str) + return 0; + + ret = kstrtouint(str, 10, &val); + if (ret) { + pr_warn("Unable to parse cvm_guest.\n"); + } else { + if (val) + static_branch_enable(&kvm_cvm_is_enable); + } + return ret; +} +early_param("cvm_host", setup_cvm_host); + +u64 cvm_phys_to_phys(u64 phys) +{ + return phys; +} + +u64 phys_to_cvm_phys(u64 phys) +{ + return phys; +} + +static int cvm_vmid_init(void) +{ + unsigned int vmid_count = 1 << kvm_get_vmid_bits(); + + cvm_vmid_bitmap = bitmap_zalloc(vmid_count, GFP_KERNEL); + if (!cvm_vmid_bitmap) { + kvm_err("%s: Couldn't allocate cvm vmid bitmap\n", __func__); + return -ENOMEM; + } + return 0; +} + +static unsigned long tmm_feat_reg0; + +static bool tmm_supports(unsigned long feature) +{ + return !!u64_get_bits(tmm_feat_reg0, feature); +} + +bool kvm_cvm_supports_sve(void) +{ + return tmm_supports(TMI_FEATURE_REGISTER_0_SVE_EN); +} + +bool kvm_cvm_supports_pmu(void) +{ + return tmm_supports(TMI_FEATURE_REGISTER_0_PMU_EN); +} + +u32 kvm_cvm_ipa_limit(void) +{ + return u64_get_bits(tmm_feat_reg0, TMI_FEATURE_REGISTER_0_S2SZ); +} + +u32 kvm_cvm_get_num_brps(void) +{ + return u64_get_bits(tmm_feat_reg0, TMI_FEATURE_REGISTER_0_NUM_BPS); +} + +u32 kvm_cvm_get_num_wrps(void) +{ + return u64_get_bits(tmm_feat_reg0, TMI_FEATURE_REGISTER_0_NUM_WPS); +} + +static int cvm_vmid_reserve(void) +{ + int ret; + unsigned int vmid_count = 1 << kvm_get_vmid_bits(); + + spin_lock(&cvm_vmid_lock); + ret = bitmap_find_free_region(cvm_vmid_bitmap, vmid_count, 0); + spin_unlock(&cvm_vmid_lock); + + return ret; +} + +static void cvm_vmid_release(unsigned int vmid) +{ + spin_lock(&cvm_vmid_lock); + bitmap_release_region(cvm_vmid_bitmap, vmid, 0); + spin_unlock(&cvm_vmid_lock); +} + +static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) +{ + u64 shift = ARM64_HW_PGTABLE_LEVEL_SHIFT(pgt->start_level - 1); + u64 mask = BIT(pgt->ia_bits) - 1; + + return (addr & mask) >> shift; +} + +static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) +{ + struct kvm_pgtable pgt = { + .ia_bits = ia_bits, + .start_level = start_level, + }; + return __kvm_pgd_page_idx(&pgt, -1ULL) + 1; +} + +int kvm_arm_create_cvm(struct kvm *kvm) +{ + int ret; + struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; + unsigned int pgd_sz; + struct cvm *cvm = (struct cvm *)kvm->arch.cvm; + + if (!kvm_is_cvm(kvm) || kvm_cvm_state(kvm) != CVM_STATE_NONE) + return 0; + + ret = cvm_vmid_reserve(); + if (ret < 0) + return ret; + + cvm->cvm_vmid = ret; + + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level); + + cvm->params->ttt_base = phys_to_cvm_phys(kvm->arch.mmu.pgd_phys); + cvm->params->measurement_algo = 0; + cvm->params->ttt_level_start = kvm->arch.mmu.pgt->start_level; + cvm->params->ttt_num_start = pgd_sz; + cvm->params->s2sz = VTCR_EL2_IPA(kvm->arch.vtcr); + cvm->params->vmid = cvm->cvm_vmid; + cvm->params->ns_vtcr = kvm->arch.vtcr; + cvm->params->vttbr_el2 = kvm->arch.mmu.pgd_phys; + ret = tmi_cvm_create(cvm->rd, __pa(cvm->params)); + if (!ret) + kvm_info("KVM creates cVM: %d\n", cvm->cvm_vmid); + + WRITE_ONCE(cvm->state, CVM_STATE_NEW); + kfree(cvm->params); + cvm->params = NULL; + return ret; +} + +int cvm_create_rd(struct kvm *kvm) +{ + struct cvm *cvm; + + if (!static_key_enabled(&kvm_cvm_is_available)) + return -EFAULT; + + if (kvm->arch.cvm) { + kvm_err("cvm already create.\n"); + return -EFAULT; + } + + kvm->arch.cvm = kzalloc(sizeof(struct cvm), GFP_KERNEL_ACCOUNT); + if (!kvm->arch.cvm) + return -ENOMEM; + + cvm = (struct cvm *)kvm->arch.cvm; + cvm->rd = tmi_mem_alloc(cvm->rd, NO_NUMA, TMM_MEM_TYPE_RD, TMM_MEM_MAP_SIZE_MAX); + if (!cvm->rd) { + kfree(kvm->arch.cvm); + kvm->arch.cvm = NULL; + kvm_err("tmi_mem_alloc for cvm rd failed: %d\n", cvm->cvm_vmid); + return -ENOMEM; + } + cvm->is_cvm = true; + return 0; +} + +void kvm_free_rd(struct kvm *kvm) +{ + int ret; + struct cvm *cvm = (struct cvm *)kvm->arch.cvm; + + if (!cvm->rd) + return; + + ret = tmi_mem_free(cvm->rd, NO_NUMA, TMM_MEM_TYPE_RD, TMM_MEM_MAP_SIZE_MAX); + if (ret) + kvm_err("tmi_mem_free for cvm rd failed: %d\n", cvm->cvm_vmid); + else + cvm->rd = 0; +} + +void kvm_destroy_cvm(struct kvm *kvm) +{ + struct cvm *cvm = (struct cvm *)kvm->arch.cvm; + uint32_t cvm_vmid; + + if (!cvm) + return; + + cvm_vmid = cvm->cvm_vmid; + kfree(cvm->params); + cvm->params = NULL; + + if (kvm_cvm_state(kvm) == CVM_STATE_NONE) + return; + + cvm_vmid_release(cvm_vmid); + + WRITE_ONCE(cvm->state, CVM_STATE_DYING); + + if (!tmi_cvm_destroy(cvm->rd)) + kvm_info("KVM has destroyed cVM: %d\n", cvm->cvm_vmid); + + kvm_free_rd(kvm); + kfree(kvm->arch.cvm); + kvm->arch.cvm = NULL; +} + +static int kvm_cvm_ttt_create(struct cvm *cvm, + unsigned long addr, + int level, + phys_addr_t phys) +{ + addr = ALIGN_DOWN(addr, cvm_ttt_level_mapsize(level - 1)); + return tmi_ttt_create(phys, cvm->rd, addr, level); +} + +int kvm_cvm_create_ttt_levels(struct kvm *kvm, struct cvm *cvm, + unsigned long ipa, + int level, + int max_level, + struct kvm_mmu_memory_cache *mc) +{ + if (WARN_ON(level == max_level)) + return 0; + + while (level++ < max_level) { + phys_addr_t ttt; + + ttt = tmi_mem_alloc(cvm->rd, NO_NUMA, + TMM_MEM_TYPE_TTT, TMM_MEM_MAP_SIZE_MAX); + if (ttt == 0) + return -ENOMEM; + + if (kvm_cvm_ttt_create(cvm, ipa, level, ttt)) { + (void)tmi_mem_free(ttt, NO_NUMA, TMM_MEM_TYPE_TTT, TMM_MEM_MAP_SIZE_MAX); + return -ENXIO; + } + } + + return 0; +} + +static int kvm_cvm_create_protected_data_page(struct kvm *kvm, struct cvm *cvm, + unsigned long ipa, int level, + struct page *src_page, phys_addr_t dst_phys) +{ + phys_addr_t src_phys; + int ret; + + src_phys = page_to_phys(src_page); + ret = tmi_data_create(dst_phys, cvm->rd, ipa, src_phys, level); + if (TMI_RETURN_STATUS(ret) == TMI_ERROR_TTT_WALK) { + /* Create missing RTTs and retry */ + int level_fault = TMI_RETURN_INDEX(ret); + + ret = kvm_cvm_create_ttt_levels(kvm, cvm, ipa, level_fault, + level, NULL); + if (ret) + goto err; + ret = tmi_data_create(dst_phys, cvm->rd, ipa, src_phys, level); + } + WARN_ON(ret); + + if (ret) + goto err; + + return 0; + +err: + return -ENXIO; +} + +static u64 cvm_granule_size(u32 level) +{ + return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level)); +} + +int kvm_cvm_populate_par_region(struct kvm *kvm, + phys_addr_t ipa_base, + phys_addr_t ipa_end, + phys_addr_t dst_phys) +{ + struct cvm *cvm = (struct cvm *)kvm->arch.cvm; + struct kvm_memory_slot *memslot; + gfn_t base_gfn, end_gfn; + int idx; + phys_addr_t ipa; + int ret = 0; + int level = TMM_TTT_LEVEL_3; + unsigned long map_size = cvm_granule_size(level); + + base_gfn = gpa_to_gfn(ipa_base); + end_gfn = gpa_to_gfn(ipa_end); + + idx = srcu_read_lock(&kvm->srcu); + memslot = gfn_to_memslot(kvm, base_gfn); + if (!memslot) { + ret = -EFAULT; + goto out; + } + + /* We require the region to be contained within a single memslot */ + if (memslot->base_gfn + memslot->npages < end_gfn) { + ret = -EINVAL; + goto out; + } + + mmap_read_lock(current->mm); + + ipa = ipa_base; + while (ipa < ipa_end) { + struct page *page; + kvm_pfn_t pfn; + + /* + * FIXME: This causes over mapping, but there's no good + * solution here with the ABI as it stands + */ + ipa = ALIGN_DOWN(ipa, map_size); + + pfn = gfn_to_pfn_memslot(memslot, gpa_to_gfn(ipa)); + + if (is_error_pfn(pfn)) { + ret = -EFAULT; + break; + } + + page = pfn_to_page(pfn); + + ret = kvm_cvm_create_protected_data_page(kvm, cvm, ipa, level, page, dst_phys); + if (ret) + goto err_release_pfn; + + ipa += map_size; + dst_phys += map_size; + kvm_release_pfn_dirty(pfn); +err_release_pfn: + if (ret) { + kvm_release_pfn_clean(pfn); + break; + } + } + + mmap_read_unlock(current->mm); +out: + srcu_read_unlock(&kvm->srcu, idx); + return ret; +} + +static int kvm_sel2_map_protected_ipa(struct kvm_vcpu *vcpu) +{ + int ret = 0; + gpa_t gpa, gpa_data_end, gpa_end, data_size; + u64 i, map_size, dst_phys; + u64 l2_granule = cvm_granule_size(2); /* 2MB */ + u64 numa_id = NO_NUMA; + int cur_numa_id; + struct cvm *cvm = (struct cvm *)vcpu->kvm->arch.cvm; + struct kvm_numa_info *numa_info; + + /* 2MB alignment below addresses*/ + gpa = cvm->loader_start; + gpa_end = cvm->loader_start + cvm->ram_size; + data_size = cvm->initrd_start - cvm->loader_start + + cvm->initrd_size; + data_size = round_up(data_size, l2_granule); + gpa_data_end = cvm->loader_start + data_size + l2_granule; + gpa = round_down(gpa, l2_granule); + gpa_end = round_up(gpa_end, l2_granule); + gpa_data_end = round_up(gpa_data_end, l2_granule); + numa_info = &cvm->numa_info; + + /* get the first binded numa id */ + if (numa_info->numa_cnt > 0) + numa_id = numa_info->numa_nodes[0].host_numa_node; + map_size = l2_granule; + do { + dst_phys = tmi_mem_alloc(cvm->rd, numa_id, TMM_MEM_TYPE_CVM_PA, map_size); + if (!dst_phys) { + ret = -ENOMEM; + kvm_err("[%s] call tmi_mem_alloc failed.\n", __func__); + goto out; + } + + ret = kvm_cvm_populate_par_region(vcpu->kvm, gpa, gpa + map_size, dst_phys); + if (ret) { + kvm_err("kvm_cvm_populate_par_region fail:%d.\n", ret); + goto out; + } + gpa += map_size; + } while (gpa < gpa_data_end); + + cur_numa_id = numa_node_id(); + if (cur_numa_id < 0) { + ret = -EFAULT; + kvm_err("get current numa node fail\n"); + goto out; + } + + if (numa_info->numa_cnt > 0) + gpa_end = numa_info->numa_nodes[0].ipa_start + numa_info->numa_nodes[0].ipa_size; + /* Map gpa range to secure mem without copy data from host. + * The cvm gpa map pages will free by destroy cvm. + */ + ret = tmi_ttt_map_range(cvm->rd, gpa_data_end, + gpa_end - gpa_data_end, cur_numa_id, numa_id); + if (ret) { + kvm_err("tmi_ttt_map_range fail:%d.\n", ret); + goto out; + } + + for (i = 1; i < numa_info->numa_cnt; i++) { + struct kvm_numa_node *numa_node = &numa_info->numa_nodes[i]; + + ret = tmi_ttt_map_range(cvm->rd, numa_node->ipa_start, + numa_node->ipa_size, cur_numa_id, numa_node->host_numa_node); + if (ret) { + kvm_err("tmi_ttt_map_range fail:%d.\n", ret); + goto out; + } + } +out: + return ret; +} + +int kvm_create_tec(struct kvm_vcpu *vcpu) +{ + int ret; + int i; + struct tmi_tec_params *params_ptr; + struct user_pt_regs *vcpu_regs = vcpu_gp_regs(vcpu); + uint64_t mpidr = kvm_vcpu_get_mpidr_aff(vcpu); + struct cvm *cvm = (struct cvm *)vcpu->kvm->arch.cvm; + struct cvm_tec *tec = (struct cvm_tec *)vcpu->arch.tec; + + params_ptr = kzalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT); + if (!params_ptr) + return -ENOMEM; + + for (i = 0; i < TEC_CREATE_NR_GPRS; ++i) + params_ptr->gprs[i] = vcpu_regs->regs[i]; + + params_ptr->pc = vcpu_regs->pc; + + if (vcpu->vcpu_id == 0) + params_ptr->flags = TMI_RUNNABLE; + else + params_ptr->flags = TMI_NOT_RUNNABLE; + params_ptr->ram_size = cvm->ram_size; + ret = tmi_tec_create(tec->tec, cvm->rd, mpidr, __pa(params_ptr)); + + kfree(params_ptr); + + return ret; +} + +static int kvm_create_all_tecs(struct kvm *kvm) +{ + int ret = 0; + struct kvm_vcpu *vcpu; + unsigned long i; + struct cvm *cvm = (struct cvm *)kvm->arch.cvm; + + if (READ_ONCE(cvm->state) == CVM_STATE_ACTIVE) + return -1; + + mutex_lock(&kvm->lock); + kvm_for_each_vcpu(i, vcpu, kvm) { + struct cvm_tec *tec = (struct cvm_tec *)vcpu->arch.tec; + + if (!tec->tec_created) { + ret = kvm_create_tec(vcpu); + if (ret) { + mutex_unlock(&kvm->lock); + return ret; + } + tec->tec_created = true; + } + } + mutex_unlock(&kvm->lock); + return ret; +} + +static int config_cvm_sve(struct kvm *kvm, struct kvm_cap_arm_tmm_config_item *cfg) +{ + struct cvm *cvm = (struct cvm *)kvm->arch.cvm; + struct tmi_cvm_params *params; + int max_sve_vq; + + params = cvm->params; + max_sve_vq = u64_get_bits(tmm_feat_reg0, + TMI_FEATURE_REGISTER_0_SVE_VL); + + if (!kvm_cvm_supports_sve()) + return -EINVAL; + + if (cfg->sve_vq > max_sve_vq) + return -EINVAL; + + params->sve_vl = cfg->sve_vq; + params->flags |= TMI_CVM_PARAM_FLAG_SVE; + + return 0; +} + +static int config_cvm_pmu(struct kvm *kvm, struct kvm_cap_arm_tmm_config_item *cfg) +{ + struct cvm *cvm = (struct cvm *)kvm->arch.cvm; + struct tmi_cvm_params *params; + int max_pmu_num_ctrs; + + params = cvm->params; + max_pmu_num_ctrs = u64_get_bits(tmm_feat_reg0, + TMI_FEATURE_REGISTER_0_PMU_NUM_CTRS); + + if (!kvm_cvm_supports_pmu()) + return -EINVAL; + + if (cfg->num_pmu_cntrs > max_pmu_num_ctrs) + return -EINVAL; + + params->pmu_num_cnts = cfg->num_pmu_cntrs; + params->flags |= TMI_CVM_PARAM_FLAG_PMU; + + return 0; +} + +static int kvm_tmm_config_cvm(struct kvm *kvm, struct kvm_enable_cap *cap) +{ + struct kvm_cap_arm_tmm_config_item cfg; + int r = 0; + + if (kvm_cvm_state(kvm) != CVM_STATE_NONE) + return -EBUSY; + + if (copy_from_user(&cfg, (void __user *)cap->args[1], sizeof(cfg))) + return -EFAULT; + + switch (cfg.cfg) { + case KVM_CAP_ARM_TMM_CFG_SVE: + r = config_cvm_sve(kvm, &cfg); + break; + case KVM_CAP_ARM_TMM_CFG_PMU: + r = config_cvm_pmu(kvm, &cfg); + break; + default: + r = -EINVAL; + } + + return r; +} + +int kvm_cvm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) +{ + int r = 0; + + mutex_lock(&kvm->lock); + switch (cap->args[0]) { + case KVM_CAP_ARM_TMM_CONFIG_CVM_HOST: + r = kvm_tmm_config_cvm(kvm, cap); + break; + case KVM_CAP_ARM_TMM_CREATE_CVM: + r = kvm_arm_create_cvm(kvm); + break; + default: + r = -EINVAL; + break; + } + mutex_unlock(&kvm->lock); + + return r; +} + +void kvm_destroy_tec(struct kvm_vcpu *vcpu) +{ + int ret = 0; + struct cvm_tec *tec = (struct cvm_tec *)vcpu->arch.tec; + + if (!vcpu_is_tec(vcpu)) + return; + + if (tmi_tec_destroy(tec->tec) != 0) + kvm_err("%s vcpu id : %d failed!\n", __func__, vcpu->vcpu_id); + + ret = tmi_mem_free(tec->tec, NO_NUMA, TMM_MEM_TYPE_TEC, TMM_MEM_MAP_SIZE_MAX); + if (ret != 0) + kvm_err("tmi_mem_free for cvm tec failed\n"); + tec->tec = 0; + kfree(tec->tec_run); + kfree(tec); + vcpu->arch.tec = NULL; +} + +static int tmi_check_version(void) +{ + uint64_t res; + int version_major; + int version_minor; + + res = tmi_version(); + if (res == SMCCC_RET_NOT_SUPPORTED) + return -ENXIO; + + version_major = TMI_ABI_VERSION_GET_MAJOR(res); + version_minor = TMI_ABI_VERSION_GET_MINOR(res); + + if (version_major != TMI_ABI_VERSION_MAJOR) { + kvm_err("Unsupported TMI_ABI (version %d %d)\n", version_major, + version_minor); + return -ENXIO; + } + + kvm_info("TMI ABI version %d,%d\n", version_major, version_minor); + return 0; +} + +static int kvm_kick_boot_vcpu(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + unsigned long i; + struct cvm *cvm = (struct cvm *)kvm->arch.cvm; + + if (READ_ONCE(cvm->state) == CVM_STATE_ACTIVE) + return 0; + + mutex_lock(&kvm->lock); + kvm_for_each_vcpu(i, vcpu, kvm) { + if (i == 0) + kvm_vcpu_kick(vcpu); + } + mutex_unlock(&kvm->lock); + return 0; +} + +int kvm_arm_cvm_first_run(struct kvm_vcpu *vcpu) +{ + int ret = 0; + struct cvm *cvm = (struct cvm *)vcpu->kvm->arch.cvm; + + if (READ_ONCE(cvm->state) == CVM_STATE_ACTIVE) + return ret; + + if (vcpu->vcpu_id == 0) { + ret = kvm_create_all_tecs(vcpu->kvm); + if (ret != 0) + return ret; + } else { + kvm_kick_boot_vcpu(vcpu->kvm); + } + + mutex_lock(&vcpu->kvm->lock); + + if (vcpu->vcpu_id == 0) { + ret = kvm_sel2_map_protected_ipa(vcpu); + if (ret) { + kvm_err("Map protected ipa failed!\n"); + goto unlock_exit; + } + ret = tmi_cvm_activate(cvm->rd); + if (ret) { + kvm_err("tmi_cvm_activate failed!\n"); + goto unlock_exit; + } + + WRITE_ONCE(cvm->state, CVM_STATE_ACTIVE); + kvm_info("cVM%d is activated!\n", cvm->cvm_vmid); + } +unlock_exit: + mutex_unlock(&vcpu->kvm->lock); + + return ret; +} + +int kvm_tec_enter(struct kvm_vcpu *vcpu) +{ + struct tmi_tec_run *run; + struct cvm_tec *tec = (struct cvm_tec *)vcpu->arch.tec; + struct cvm *cvm = (struct cvm *)vcpu->kvm->arch.cvm; + + if (READ_ONCE(cvm->state) != CVM_STATE_ACTIVE) + return -EINVAL; + + run = tec->tec_run; + /* set/clear TWI TWE flags */ + if (vcpu->arch.hcr_el2 & HCR_TWI) + run->tec_entry.flags |= TEC_ENTRY_FLAG_TRAP_WFI; + else + run->tec_entry.flags &= ~TEC_ENTRY_FLAG_TRAP_WFI; + + if (vcpu->arch.hcr_el2 & HCR_TWE) + run->tec_entry.flags |= TEC_ENTRY_FLAG_TRAP_WFE; + else + run->tec_entry.flags &= ~TEC_ENTRY_FLAG_TRAP_WFE; + + return tmi_tec_enter(tec->tec, __pa(run)); +} + +int cvm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target) +{ + int ret; + struct cvm_tec *calling_tec = (struct cvm_tec *)calling->arch.tec; + struct cvm_tec *target_tec = (struct cvm_tec *)target->arch.tec; + + ret = tmi_psci_complete(calling_tec->tec, target_tec->tec); + if (ret) + return -EINVAL; + return 0; +} + +int kvm_arch_tec_init(struct kvm_vcpu *vcpu) +{ + int ret = -ENOMEM; + struct cvm_tec *tec; + struct cvm *cvm = (struct cvm *)vcpu->kvm->arch.cvm; + + if (vcpu->arch.tec) { + kvm_err("tec already create.\n"); + return -EFAULT; + } + vcpu->arch.tec = kzalloc(sizeof(struct cvm_tec), GFP_KERNEL_ACCOUNT); + if (!vcpu->arch.tec) + return -ENOMEM; + + tec = (struct cvm_tec *)vcpu->arch.tec; + tec->tec_run = kzalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT); + if (!tec->tec_run) + goto tec_free; + + tec->tec = tmi_mem_alloc(cvm->rd, NO_NUMA, TMM_MEM_TYPE_TEC, TMM_MEM_MAP_SIZE_MAX); + if (!tec->tec) { + kvm_err("KVM tmi_mem_alloc failed:%d\n", vcpu->vcpu_id); + goto tec_free; + } + kvm_info("KVM inits cVM VCPU:%d\n", vcpu->vcpu_id); + + return 0; +tec_free: + kfree(tec->tec_run); + kfree(tec); + vcpu->arch.tec = NULL; + return ret; +} + +int kvm_init_tmm(void) +{ + int ret; + + if (PAGE_SIZE != SZ_4K) + return 0; + + if (tmi_check_version()) + return 0; + + ret = cvm_vmid_init(); + if (ret) + return ret; + + tmm_feat_reg0 = tmi_features(0); + kvm_info("TMM feature0: 0x%lx\n", tmm_feat_reg0); + + static_branch_enable(&kvm_cvm_is_available); + + return 0; +} + +int kvm_init_cvm_vm(struct kvm *kvm) +{ + struct tmi_cvm_params *params; + struct cvm *cvm = (struct cvm *)kvm->arch.cvm; + + params = kzalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT); + if (!params) + return -ENOMEM; + + cvm->params = params; + + return 0; +} + +int kvm_load_user_data(struct kvm *kvm, unsigned long arg) +{ + struct kvm_user_data user_data; + void __user *argp = (void __user *)arg; + struct cvm *cvm = (struct cvm *)kvm->arch.cvm; + struct kvm_numa_info *numa_info; + + if (!kvm_is_cvm(kvm)) + return -EFAULT; + + if (copy_from_user(&user_data, argp, sizeof(user_data))) + return -EFAULT; + + numa_info = &user_data.numa_info; + if (numa_info->numa_cnt > MAX_NUMA_NODE) + return -EFAULT; + + if (numa_info->numa_cnt > 0) { + unsigned long i, total_size = 0; + struct kvm_numa_node *numa_node = &numa_info->numa_nodes[0]; + unsigned long ipa_end = numa_node->ipa_start + numa_node->ipa_size; + + if (user_data.loader_start < numa_node->ipa_start || + user_data.initrd_start + user_data.initrd_size > ipa_end) + return -EFAULT; + for (i = 0; i < numa_info->numa_cnt; i++) + total_size += numa_info->numa_nodes[i].ipa_size; + if (total_size != user_data.ram_size) + return -EFAULT; + } + + cvm->loader_start = user_data.loader_start; + cvm->initrd_start = user_data.initrd_start; + cvm->initrd_size = user_data.initrd_size; + cvm->ram_size = user_data.ram_size; + memcpy(&cvm->numa_info, numa_info, sizeof(struct kvm_numa_info)); + + return 0; +} + +void kvm_cvm_vcpu_put(struct kvm_vcpu *vcpu) +{ + kvm_timer_vcpu_put(vcpu); + kvm_vgic_put(vcpu); + vcpu->cpu = -1; +} + +unsigned long cvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu, + unsigned long target_affinity, unsigned long lowest_affinity_level) +{ + struct kvm_vcpu *target_vcpu; + + if (lowest_affinity_level != 0) + return PSCI_RET_INVALID_PARAMS; + + target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, target_affinity); + if (!target_vcpu) + return PSCI_RET_INVALID_PARAMS; + + cvm_psci_complete(vcpu, target_vcpu); + return PSCI_RET_SUCCESS; +} + +int kvm_cvm_vcpu_set_events(struct kvm_vcpu *vcpu, + bool serror_pending, bool ext_dabt_pending) +{ + struct cvm_tec *tec = (struct cvm_tec *)vcpu->arch.tec; + + if (serror_pending) + return -EINVAL; + + if (ext_dabt_pending) { + if (!(((struct tmi_tec_run *)tec->tec_run)->tec_entry.flags & + TEC_ENTRY_FLAG_EMUL_MMIO)) + return -EINVAL; + + ((struct tmi_tec_run *)tec->tec_run)->tec_entry.flags + &= ~TEC_ENTRY_FLAG_EMUL_MMIO; + ((struct tmi_tec_run *)tec->tec_run)->tec_entry.flags + |= TEC_ENTRY_FLAG_INJECT_SEA; + } + return 0; +} diff --git a/arch/arm64/kvm/cvm_exit.c b/arch/arm64/kvm/cvm_exit.c new file mode 100644 index 0000000000000000000000000000000000000000..7de6dd504a26ac040a12d8336a8b4c31a003db57 --- /dev/null +++ b/arch/arm64/kvm/cvm_exit.c @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, The Linux Foundation. All rights reserved. + */ +#include +#include +#include + +#include +#include +#include +#include + +typedef int (*exit_handler_fn)(struct kvm_vcpu *vcpu); + +static void update_arch_timer_irq_lines(struct kvm_vcpu *vcpu, bool unmask_ctl) +{ + struct tmi_tec_run *run; + struct cvm_tec *tec = (struct cvm_tec *)vcpu->arch.tec; + + run = tec->tec_run; + __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = run->tec_exit.cntv_ctl; + __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = run->tec_exit.cntv_cval; + __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = run->tec_exit.cntp_ctl; + __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = run->tec_exit.cntp_cval; + + /* Because the timer mask is tainted by TMM, we don't know the + * true intent of the guest. Here, we assume mask is always + * cleared during WFI. + */ + if (unmask_ctl) { + __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) &= ~ARCH_TIMER_CTRL_IT_MASK; + __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) &= ~ARCH_TIMER_CTRL_IT_MASK; + } + + kvm_cvm_timers_update(vcpu); +} + +static int tec_exit_reason_notimpl(struct kvm_vcpu *vcpu) +{ + struct tmi_tec_run *run; + struct cvm_tec *tec = (struct cvm_tec *)vcpu->arch.tec; + + run = tec->tec_run; + pr_err("[vcpu %d] Unhandled exit reason from cvm (ESR: %#llx)\n", + vcpu->vcpu_id, run->tec_exit.esr); + return -ENXIO; +} + +/* The process is the same as kvm_handle_wfx, + * except the tracing and updating operation for pc, + * we copy kvm_handle_wfx process here + * to avoid changing kvm_handle_wfx function. + */ +static int tec_exit_wfx(struct kvm_vcpu *vcpu) +{ + u64 esr = kvm_vcpu_get_esr(vcpu); + + if (esr & ESR_ELx_WFx_ISS_WFE) + vcpu->stat.wfe_exit_stat++; + else + vcpu->stat.wfi_exit_stat++; + + if (esr & ESR_ELx_WFx_ISS_WFxT) { + if (esr & ESR_ELx_WFx_ISS_RV) { + u64 val, now; + + now = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_TIMER_CNT); + val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); + + if (now >= val) + goto out; + } else { + /* Treat WFxT as WFx if RN is invalid */ + esr &= ~ESR_ELx_WFx_ISS_WFxT; + } + } + + if (esr & ESR_ELx_WFx_ISS_WFE) { + kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); + } else { + vcpu->arch.pvsched.pv_unhalted = false; + if (esr & ESR_ELx_WFx_ISS_WFxT) + vcpu->arch.flags |= KVM_ARM64_WFIT; + kvm_vcpu_block(vcpu); + vcpu->arch.flags &= ~KVM_ARM64_WFIT; + kvm_clear_request(KVM_REQ_UNHALT, vcpu); + } + +out: + return 1; +} + +static int tec_exit_sys_reg(struct kvm_vcpu *vcpu) +{ + int ret; + struct tmi_tec_run *run; + struct cvm_tec *tec = (struct cvm_tec *)vcpu->arch.tec; + unsigned long esr = kvm_vcpu_get_esr(vcpu); + int rt = kvm_vcpu_sys_get_rt(vcpu); + bool is_write = !(esr & 1); + + run = tec->tec_run; + if (is_write) + vcpu_set_reg(vcpu, rt, run->tec_exit.gprs[0]); + + ret = kvm_handle_sys_reg(vcpu); + + if (ret >= 0 && !is_write) + run->tec_entry.gprs[0] = vcpu_get_reg(vcpu, rt); + + return ret; +} + +static int tec_exit_sync_dabt(struct kvm_vcpu *vcpu) +{ + struct tmi_tec_run *run; + struct cvm_tec *tec = (struct cvm_tec *)vcpu->arch.tec; + + run = tec->tec_run; + if (kvm_vcpu_dabt_iswrite(vcpu) && kvm_vcpu_dabt_isvalid(vcpu)) { + vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), + run->tec_exit.gprs[0]); + } + return kvm_handle_guest_abort(vcpu); +} + +static int tec_exit_sync_iabt(struct kvm_vcpu *vcpu) +{ + struct tmi_tec_run *run; + struct cvm_tec *tec = (struct cvm_tec *)vcpu->arch.tec; + + run = tec->tec_run; + pr_err("[vcpu %d] Unhandled instruction abort (ESR: %#llx).\n", + vcpu->vcpu_id, run->tec_exit.esr); + + return -ENXIO; +} + +static exit_handler_fn tec_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = tec_exit_reason_notimpl, + [ESR_ELx_EC_WFx] = tec_exit_wfx, + [ESR_ELx_EC_SYS64] = tec_exit_sys_reg, + [ESR_ELx_EC_DABT_LOW] = tec_exit_sync_dabt, + [ESR_ELx_EC_IABT_LOW] = tec_exit_sync_iabt +}; + +static int tec_exit_psci(struct kvm_vcpu *vcpu) +{ + int i; + struct tmi_tec_run *run; + struct cvm_tec *tec = (struct cvm_tec *)vcpu->arch.tec; + + run = tec->tec_run; + for (i = 0; i < TEC_EXIT_NR_GPRS; ++i) + vcpu_set_reg(vcpu, i, run->tec_exit.gprs[i]); + + return kvm_psci_call(vcpu); +} + +static int tec_exit_host_call(struct kvm_vcpu *vcpu) +{ + int ret, i; + struct tmi_tec_run *run; + struct cvm_tec *tec = (struct cvm_tec *)vcpu->arch.tec; + + run = tec->tec_run; + vcpu->stat.hvc_exit_stat++; + + for (i = 0; i < TEC_EXIT_NR_GPRS; ++i) + vcpu_set_reg(vcpu, i, run->tec_exit.gprs[i]); + + ret = kvm_hvc_call_handler(vcpu); + + if (ret < 0) { + vcpu_set_reg(vcpu, 0, ~0UL); + ret = 1; + } + for (i = 0; i < TEC_EXIT_NR_GPRS; ++i) + run->tec_entry.gprs[i] = vcpu_get_reg(vcpu, i); + + return ret; +} + +/* + * Return > 0 to return to guest, < 0 on error, 0(and set exit_reason) on + * proper exit to userspace + */ + +int handle_cvm_exit(struct kvm_vcpu *vcpu, int tec_run_ret) +{ + unsigned long status; + struct tmi_tec_run *run; + struct cvm_tec *tec = (struct cvm_tec *)vcpu->arch.tec; + u8 esr_ec; + bool is_wfx; + + run = tec->tec_run; + esr_ec = ESR_ELx_EC(run->tec_exit.esr); + status = TMI_RETURN_STATUS(tec_run_ret); + + if (status == TMI_ERROR_CVM_POWEROFF) { + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; + vcpu->run->system_event.type = KVM_SYSTEM_EVENT_SHUTDOWN; + return 0; + } + + if (status == TMI_ERROR_CVM_STATE) { + vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; + return 0; + } + + if (tec_run_ret) + return -ENXIO; + + vcpu->arch.fault.esr_el2 = run->tec_exit.esr; + vcpu->arch.fault.far_el2 = run->tec_exit.far; + vcpu->arch.fault.hpfar_el2 = run->tec_exit.hpfar; + + is_wfx = (run->tec_exit.exit_reason == TMI_EXIT_SYNC) && (esr_ec == ESR_ELx_EC_WFx); + update_arch_timer_irq_lines(vcpu, is_wfx); + + run->tec_entry.flags = 0; + + switch (run->tec_exit.exit_reason) { + case TMI_EXIT_FIQ: + case TMI_EXIT_IRQ: + return 1; + case TMI_EXIT_PSCI: + return tec_exit_psci(vcpu); + case TMI_EXIT_SYNC: + return tec_exit_handlers[esr_ec](vcpu); + case TMI_EXIT_HOST_CALL: + return tec_exit_host_call(vcpu); + } + + kvm_pr_unimpl("Unsupported exit reason : 0x%llx\n", + run->tec_exit.exit_reason); + return 0; +} diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index f9c3dbc999e57ba8d1535cc67c56e2085455cde6..ecdd355275afdb5f72b6d000530ec6abeb5e8483 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -27,6 +27,10 @@ #include #include +#ifdef CONFIG_CVM_HOST +#include +#endif + #include "trace.h" struct kvm_stats_debugfs_item debugfs_entries[] = { @@ -818,6 +822,10 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, bool has_esr = events->exception.serror_has_esr; bool ext_dabt_pending = events->exception.ext_dabt_pending; +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) + return kvm_cvm_vcpu_set_events(vcpu, serror_pending, ext_dabt_pending); +#endif if (serror_pending && has_esr) { if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) return -EINVAL; diff --git a/arch/arm64/kvm/hisilicon/hisi_virt.c b/arch/arm64/kvm/hisilicon/hisi_virt.c index 10233b801896f3ead2fd6d4c73070952b59cc896..ca1227f1f9fca89de9ab2dc31435a2a7c307a199 100644 --- a/arch/arm64/kvm/hisilicon/hisi_virt.c +++ b/arch/arm64/kvm/hisilicon/hisi_virt.c @@ -7,6 +7,9 @@ #include #include #include +#ifdef CONFIG_CVM_HOST +#include +#endif #include "hisi_virt.h" static enum hisi_cpu_type cpu_type = UNKNOWN_HI_TYPE; @@ -153,6 +156,10 @@ static void hardware_disable_dvmbm(void *data) bool hisi_dvmbm_supported(void) { +#ifdef CONFIG_CVM_HOST + if (static_branch_unlikely(&kvm_cvm_is_enable)) + return false; +#endif if (cpu_type != HI_IP09) return false; diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 452f4cacd674351d2adb514a5c2f1b1269e2934c..a45e68ef2e9db056e888942c854589315ca3a5d8 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -229,6 +229,25 @@ void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) } } +#ifdef CONFIG_CVM_HOST +void __vgic_v3_restore_tec_state(struct vgic_v3_cpu_if *cpu_if, + u64 *entry_hcr, + u64 *entry_lrs) +{ + u64 used_lrs = cpu_if->used_lrs; + int i; + + *entry_hcr = cpu_if->vgic_hcr; + for (i = 0; i < kvm_vgic_global_state.nr_lr; ++i) { + if (i < used_lrs) { + entry_lrs[i] = cpu_if->vgic_lr[i]; + } else { + entry_lrs[i] = 0; + } + } +} +#endif + void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) { u64 used_lrs = cpu_if->used_lrs; diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 6a2826f1bf5e99532a08969be845022a2c8e2f76..6bda06ad3196c73711dfe50a6b43ff87ec8236b4 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -8,6 +8,10 @@ #include #include +#ifdef CONFIG_CVM_HOST +#include +#endif + #include "trace.h" void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data) @@ -109,6 +113,14 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) &data); data = vcpu_data_host_to_guest(vcpu, data, len); vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), data); +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) { + struct cvm_tec *tec = (struct cvm_tec *)vcpu->arch.tec; + + ((struct tmi_tec_run *)tec->tec_run)-> + tec_entry.gprs[0] = data; + } +#endif } /* @@ -177,7 +189,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) run->mmio.phys_addr = fault_ipa; run->mmio.len = len; vcpu->mmio_needed = 1; +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) { + struct cvm_tec *tec = (struct cvm_tec *)vcpu->arch.tec; + ((struct tmi_tec_run *)tec->tec_run)->tec_entry.flags |= + TEC_ENTRY_FLAG_EMUL_MMIO; + } +#endif if (!ret) { /* We handled the access successfully in the kernel. */ if (!is_write) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 6fa92a1435a5f5580f3ab5f3894bbb93fbdc26d9..30426f6addf1eb4c5aa18e86d1b310963d46f65a 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -474,6 +474,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) spin_lock(&kvm->mmu_lock); pgt = mmu->pgt; + if (pgt) { mmu->pgd_phys = 0; mmu->pgt = NULL; @@ -790,6 +791,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level); write_fault = kvm_is_write_fault(vcpu); +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) { + write_fault = true; + prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W; + } +#endif exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); VM_BUG_ON(write_fault && exec_fault); vcpu->stat.mabt_exit_stat++; diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 9fdc76c6d50fcaa5ef96761bb1962da3027475ee..67ba1bbdb8e3366a5c6905031fcb381a42da54df 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -13,6 +13,7 @@ #include #include #include +#include static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); @@ -370,6 +371,17 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) { u64 reg = 0; +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) { + struct tmi_tec_run *run; + struct cvm_tec *tec = (struct cvm_tec *)vcpu->arch.tec; + + run = tec->tec_run; + reg = run->tec_exit.pmu_ovf_status; + return reg; + } +#endif + if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 32bb26be8a9b1fbce778c9700eca0d08ef75d2e7..0160ee8d6d7d88c8404dc1dfe1f2abd801bb428b 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -16,6 +16,9 @@ #include #include +#ifdef CONFIG_CVM_HOST +#include +#endif /* * This is an implementation of the Power State Coordination Interface * as described in ARM document number ARM DEN 0022A. @@ -78,6 +81,10 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) */ if (!vcpu) return PSCI_RET_INVALID_PARAMS; +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) + cvm_psci_complete(source_vcpu, vcpu); +#endif if (!vcpu->arch.power_off) { if (kvm_psci_version(source_vcpu, kvm) != KVM_ARM_PSCI_0_1) return PSCI_RET_ALREADY_ON; @@ -133,7 +140,10 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) /* Ignore other bits of target affinity */ target_affinity &= target_affinity_mask; - +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) + return cvm_psci_vcpu_affinity_info(vcpu, target_affinity, lowest_affinity_level); +#endif /* * If one or more VCPU matching target affinity are running * then ON else OFF diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 51f4c5e85717a56e215196547316640d91f597d4..bb177d58c32033d8a2f2f3c3d63f117b2811c18a 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -30,6 +30,9 @@ #include #include +#ifdef CONFIG_CVM_HOST +#include +#endif /* Maximum phys_shift supported for any VM on this host */ static u32 kvm_ipa_limit; @@ -199,6 +202,9 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu) void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) { kfree(vcpu->arch.sve_state); +#ifdef CONFIG_CVM_HOST + kvm_destroy_tec(vcpu); +#endif } static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu) @@ -433,7 +439,11 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) u32 parange, phys_shift; u8 lvls, pbha = 0xf; +#ifdef CONFIG_CVM_HOST + if ((type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) && (!kvm_is_cvm(kvm))) +#else if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) +#endif return -EINVAL; phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); diff --git a/arch/arm64/kvm/tmi.c b/arch/arm64/kvm/tmi.c new file mode 100644 index 0000000000000000000000000000000000000000..83adfc9f05a18b4cc8338544f89498be2ca47242 --- /dev/null +++ b/arch/arm64/kvm/tmi.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, The Linux Foundation. All rights reserved. + */ +#include +#include + +u64 tmi_version(void) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_VESION, &res); + return res.a1; +} + +u64 tmi_data_create(u64 data, u64 rd, u64 map_addr, u64 src, u64 level) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_DATA_CREATE, data, rd, map_addr, src, level, &res); + return res.a1; +} + +u64 tmi_data_destroy(u64 rd, u64 map_addr, u64 level) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_DATA_DESTROY, rd, map_addr, level, &res); + return res.a1; +} + +u64 tmi_cvm_activate(u64 rd) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_CVM_ACTIVATE, rd, &res); + return res.a1; +} + +u64 tmi_cvm_create(u64 rd, u64 params_ptr) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_CVM_CREATE, rd, params_ptr, &res); + return res.a1; +} + +u64 tmi_cvm_destroy(u64 rd) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_CVM_DESTROY, rd, &res); + return res.a1; +} + +u64 tmi_tec_create(u64 tec, u64 rd, u64 mpidr, u64 params_ptr) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_TEC_CREATE, tec, rd, mpidr, params_ptr, &res); + return res.a1; +} + +u64 tmi_tec_destroy(u64 tec) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_TEC_DESTROY, tec, &res); + return res.a1; +} + +u64 tmi_tec_enter(u64 tec, u64 run_ptr) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_TEC_ENTER, tec, run_ptr, &res); + return res.a1; +} + +u64 tmi_ttt_create(u64 ttt, u64 rd, u64 map_addr, u64 level) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_TTT_CREATE, ttt, rd, map_addr, level, &res); + return res.a1; +} + +u64 tmi_ttt_destroy(u64 ttt, u64 rd, u64 map_addr, u64 level) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_TTT_DESTROY, ttt, rd, map_addr, level, &res); + return res.a1; +} + +u64 tmi_ttt_map_unprotected(u64 rd, u64 map_addr, u64 level, u64 ttte) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_TTT_MAP_UNPROTECTED, rd, map_addr, level, ttte, &res); + return res.a1; +} + +u64 tmi_ttt_unmap_unprotected(u64 rd, u64 map_addr, u64 level, u64 ns) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_TTT_UNMAP_UNPROTECTED, rd, map_addr, level, ns, &res); + return res.a1; +} + +u64 tmi_ttt_unmap_protected(u64 rd, u64 map_addr, u64 level) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_TTT_UNMAP_PROTECTED, rd, map_addr, level, &res); + return res.a1; +} + +u64 tmi_psci_complete(u64 calling_tec, u64 target_tec) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_PSCI_COMPLETE, calling_tec, target_tec, &res); + return res.a1; +} + +u64 tmi_features(u64 index) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_FEATURES, index, &res); + return res.a1; +} + +u64 tmi_mem_alloc(u64 rd, u64 numa_id, enum tmi_tmm_mem_type tmm_mem_type, + enum tmi_tmm_map_size tmm_map_size) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_MEM_ALLOC, rd, numa_id, tmm_mem_type, tmm_map_size, &res); + return res.a1; +} + +u64 tmi_mem_free(u64 pa, u64 numa_id, enum tmi_tmm_mem_type tmm_mem_type, + enum tmi_tmm_map_size tmm_map_size) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_MEM_FREE, pa, numa_id, tmm_mem_type, tmm_map_size, &res); + return res.a1; +} + +u64 tmi_ttt_map_range(u64 rd, u64 map_addr, u64 size, u64 cur_node, u64 target_node) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_TTT_MAP_RANGE, rd, map_addr, size, cur_node, target_node, &res); + return res.a1; +} + +u64 tmi_ttt_unmap_range(u64 rd, u64 map_addr, u64 size, u64 node_id) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_TTT_UNMAP_RANGE, rd, map_addr, size, node_id, &res); + return res.a1; +} diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 213afce812f245ed40582a2b9373c81ed30a0ef8..53ca5f4d802af25d872d78bdb4484673803843eb 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -10,6 +10,10 @@ #include #include +#ifdef CONFIG_CVM_HOST +#include +#endif + #include "vgic.h" static bool group0_trap; @@ -674,7 +678,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info) kvm_vgic_global_state.vcpu_base = 0; } else { kvm_vgic_global_state.vcpu_base = info->vcpu.start; - kvm_vgic_global_state.can_emulate_gicv2 = true; +#ifdef CONFIG_CVM_HOST + if (!static_branch_unlikely(&kvm_cvm_is_available)) +#endif + kvm_vgic_global_state.can_emulate_gicv2 = true; ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); if (ret) { kvm_err("Cannot register GICv2 KVM device.\n"); @@ -736,6 +743,15 @@ void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) { + struct cvm_tec *tec = (struct cvm_tec *)vcpu->arch.tec; + + cpu_if->vgic_vmcr = + ((struct tmi_tec_run *)tec->tec_run)->tec_exit.gicv3_vmcr; + return; + } +#endif if (likely(cpu_if->vgic_sre)) cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); } diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 116aa91d5544265dfabef6356177c8f2c95d1c8a..f691ec400daef9fac218119b816cca155eac23b7 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -12,6 +12,10 @@ #include +#ifdef CONFIG_CVM_HOST +#include +#endif + #include "vgic.h" #define CREATE_TRACE_POINTS @@ -872,12 +876,48 @@ static inline bool can_access_vgic_from_kernel(void) return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe(); } +#ifdef CONFIG_CVM_HOST +static inline void vgic_tmm_save_state(struct kvm_vcpu *vcpu) +{ + int i; + struct tmi_tec_run *tec_run; + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + struct cvm_tec *tec = (struct cvm_tec *)vcpu->arch.tec; + + tec_run = tec->tec_run; + for (i = 0; i < kvm_vgic_global_state.nr_lr; ++i) { + cpu_if->vgic_lr[i] = tec_run->tec_exit.gicv3_lrs[i]; + tec_run->tec_entry.gicv3_lrs[i] = 0; + } +} + +static inline void vgic_tmm_restore_state(struct kvm_vcpu *vcpu) +{ + int i; + struct tmi_tec_run *tec_run; + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + struct cvm_tec *tec = (struct cvm_tec *)vcpu->arch.tec; + + tec_run = tec->tec_run; + for (i = 0; i < kvm_vgic_global_state.nr_lr; ++i) { + tec_run->tec_entry.gicv3_lrs[i] = cpu_if->vgic_lr[i]; + tec_run->tec_exit.gicv3_lrs[i] = cpu_if->vgic_lr[i]; + } +} +#endif + static inline void vgic_save_state(struct kvm_vcpu *vcpu) { if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) vgic_v2_save_state(vcpu); else - __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) + vgic_tmm_save_state(vcpu); + else +#endif + __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); + } /* Sync back the hardware VGIC state into our emulation after a guest's run. */ @@ -907,7 +947,12 @@ static inline void vgic_restore_state(struct kvm_vcpu *vcpu) if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) vgic_v2_restore_state(vcpu); else - __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) + vgic_tmm_restore_state(vcpu); + else +#endif + __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); } /* Flush our emulation state into the GIC hardware before entering the guest. */ @@ -948,7 +993,10 @@ void kvm_vgic_load(struct kvm_vcpu *vcpu) { if (unlikely(!vgic_initialized(vcpu->kvm))) return; - +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) + return; +#endif if (kvm_vgic_global_state.type == VGIC_V2) vgic_v2_load(vcpu); else @@ -959,7 +1007,10 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu) { if (unlikely(!vgic_initialized(vcpu->kvm))) return; - +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) + return; +#endif if (kvm_vgic_global_state.type == VGIC_V2) vgic_v2_put(vcpu); else diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index be67a9c426288342a06be475d84cc0b9ed7fcaf8..afcede934bc317ff1632d0d47569aa863d2a77b8 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -47,6 +47,7 @@ #include #include #include +#include #include "internal.h" @@ -674,6 +675,8 @@ void __init mem_init(void) else swiotlb_force = SWIOTLB_NO_FORCE; + swiotlb_cvm_update_mem_attributes(); + set_max_mapnr(max_pfn - PHYS_PFN_OFFSET); #ifndef CONFIG_SPARSEMEM_VMEMMAP diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 804d5197c1a34cfdf4cd817a1fd640e1f91e85e8..095c192c729b390e2e0d4ca1de877d79d7aa8aa6 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -38,6 +38,7 @@ #include #include #include +#include #define NO_BLOCK_MAPPINGS BIT(0) #define NO_CONT_MAPPINGS BIT(1) @@ -494,7 +495,7 @@ static void __init map_mem(pgd_t *pgdp) int flags = 0, eflags = 0; u64 i; - if (rodata_full || debug_pagealloc_enabled()) + if (rodata_full || debug_pagealloc_enabled() || is_cvm_world()) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; #ifdef CONFIG_KFENCE @@ -1513,8 +1514,7 @@ int arch_add_memory(int nid, u64 start, u64 size, return -EINVAL; } - - if (rodata_full || debug_pagealloc_enabled()) + if (rodata_full || debug_pagealloc_enabled() || is_cvm_world()) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 0bc12dbf28437151e2fe91265c4636f41a4319c7..e84a57c4db95954c38aabc0c2f08100567c60935 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -188,6 +188,9 @@ int set_direct_map_default_noflush(struct page *page) void __kernel_map_pages(struct page *page, int numpages, int enable) { + if (is_cvm_world()) + return; + if (!debug_pagealloc_enabled() && !rodata_full) return; diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 632444f86314eb31dd3dab17b7bbe9b9fbb8f26c..806359f3376a3ac9ca82e6d35321f0d9691c2eea 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -29,6 +29,10 @@ #include #include #include +#ifdef CONFIG_CVM_GUEST +#include +#include +#endif #include #include @@ -313,6 +317,91 @@ static DEFINE_RAW_SPINLOCK(vmovp_lock); static DEFINE_IDA(its_vpeid_ida); +#ifdef CONFIG_CVM_GUEST +static struct device cvm_alloc_device; +static LIST_HEAD(cvm_its_nodes); +static raw_spinlock_t cvm_its_lock; + +struct its_device_order { + struct its_device *dev; + struct list_head entry; + int itt_order; +}; + +static inline struct page *its_alloc_shared_pages_node(int node, gfp_t gfp, + unsigned int order) +{ + return swiotlb_alloc(&cvm_alloc_device, (1 << order) * PAGE_SIZE); +} + +static inline struct page *its_alloc_shared_pages(gfp_t gfp, unsigned int order) +{ + return its_alloc_shared_pages_node(NUMA_NO_NODE, gfp, order); +} + +static void its_free_shared_pages(void *addr, int order) +{ + if (order < 0) + return; + + swiotlb_free(&cvm_alloc_device, (struct page *)addr, (1 << order) * PAGE_SIZE); +} + +static int add_its_device_order(struct its_device *dev, int itt_order) +{ + struct its_device_order *new; + unsigned long flags; + + new = kmalloc(sizeof(struct its_device_order), GFP_KERNEL); + if (!new) + return -ENOMEM; + new->dev = dev; + new->itt_order = itt_order; + raw_spin_lock_irqsave(&cvm_its_lock, flags); + list_add_tail(&new->entry, &cvm_its_nodes); + raw_spin_unlock_irqrestore(&cvm_its_lock, flags); + return 0; +} + +/* get its device order and then free its device order */ +static int get_its_device_order(struct its_device *dev) +{ + struct its_device_order *pos, *tmp; + unsigned long flags; + int itt_order = -1; + + raw_spin_lock_irqsave(&cvm_its_lock, flags); + list_for_each_entry_safe(pos, tmp, &cvm_its_nodes, entry) { + if (pos->dev == dev) { + itt_order = pos->itt_order; + list_del(&pos->entry); + kfree(pos); + goto found; + } + } +found: + raw_spin_unlock_irqrestore(&cvm_its_lock, flags); + return itt_order; +} + +static void *its_alloc_shared_page_address(struct its_device *dev, + struct its_node *its, int sz) +{ + struct page *page; + int itt_order; + + itt_order = get_order(sz); + if (add_its_device_order(dev, itt_order)) + return NULL; + + page = its_alloc_shared_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, + itt_order); + if (!page) + return NULL; + return (void *)page_address(page); +} +#endif + static void free_devid_to_rsv_pools(struct its_device *its_dev) { struct rsv_devid_pool *pool = its_dev->devid_pool; @@ -2447,7 +2536,13 @@ static struct page *its_allocate_prop_table(gfp_t gfp_flags) { struct page *prop_page; - prop_page = alloc_pages(gfp_flags, get_order(LPI_PROPBASE_SZ)); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + prop_page = its_alloc_shared_pages(gfp_flags, + get_order(LPI_PROPBASE_SZ)); + else +#endif + prop_page = alloc_pages(gfp_flags, get_order(LPI_PROPBASE_SZ)); if (!prop_page) return NULL; @@ -2458,8 +2553,14 @@ static struct page *its_allocate_prop_table(gfp_t gfp_flags) static void its_free_prop_table(struct page *prop_page) { - free_pages((unsigned long)page_address(prop_page), - get_order(LPI_PROPBASE_SZ)); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + its_free_shared_pages(page_address(prop_page), + get_order(LPI_PROPBASE_SZ)); + else +#endif + free_pages((unsigned long)page_address(prop_page), + get_order(LPI_PROPBASE_SZ)); } static bool gic_check_reserved_range(phys_addr_t addr, unsigned long size) @@ -2581,7 +2682,13 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, order = get_order(GITS_BASER_PAGES_MAX * psz); } - page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, order); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + page = its_alloc_shared_pages_node(its->numa_node, + GFP_KERNEL | __GFP_ZERO, order); + else +#endif + page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, order); if (!page) return -ENOMEM; @@ -2594,7 +2701,12 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, /* 52bit PA is supported only when PageSize=64K */ if (psz != SZ_64K) { pr_err("ITS: no 52bit PA support when psz=%d\n", psz); - free_pages((unsigned long)base, order); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + its_free_shared_pages(base, order); + else +#endif + free_pages((unsigned long)base, order); return -ENXIO; } @@ -2648,7 +2760,12 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, pr_err("ITS@%pa: %s doesn't stick: %llx %llx\n", &its->phys_base, its_base_type_string[type], val, tmp); - free_pages((unsigned long)base, order); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + its_free_shared_pages(base, order); + else +#endif + free_pages((unsigned long)base, order); return -ENXIO; } @@ -2787,8 +2904,14 @@ static void its_free_tables(struct its_node *its) for (i = 0; i < GITS_BASER_NR_REGS; i++) { if (its->tables[i].base) { - free_pages((unsigned long)its->tables[i].base, - its->tables[i].order); +#ifdef CONFIG_CVM_GUEST + if (!is_cvm_world()) + its_free_shared_pages(its->tables[i].base, + its->tables[i].order); + else +#endif + free_pages((unsigned long)its->tables[i].base, + its->tables[i].order); its->tables[i].base = NULL; } } @@ -3051,7 +3174,13 @@ static bool allocate_vpe_l2_table(int cpu, u32 id) /* Allocate memory for 2nd level table */ if (!table[idx]) { - page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(psz)); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + page = its_alloc_shared_pages(GFP_KERNEL | __GFP_ZERO, + get_order(psz)); + else +#endif + page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(psz)); if (!page) return false; @@ -3170,7 +3299,13 @@ static int allocate_vpe_l1_table(void) pr_debug("np = %d, npg = %lld, psz = %d, epp = %d, esz = %d\n", np, npg, psz, epp, esz); - page = alloc_pages(GFP_ATOMIC | __GFP_ZERO, get_order(np * PAGE_SIZE)); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + page = its_alloc_shared_pages(GFP_ATOMIC | __GFP_ZERO, + get_order(np * PAGE_SIZE)); + else +#endif + page = alloc_pages(GFP_ATOMIC | __GFP_ZERO, get_order(np * PAGE_SIZE)); if (!page) return -ENOMEM; @@ -3218,8 +3353,14 @@ static struct page *its_allocate_pending_table(gfp_t gfp_flags) { struct page *pend_page; - pend_page = alloc_pages(gfp_flags | __GFP_ZERO, - get_order(LPI_PENDBASE_SZ)); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + pend_page = its_alloc_shared_pages(gfp_flags | __GFP_ZERO, + get_order(LPI_PENDBASE_SZ)); + else +#endif + pend_page = alloc_pages(gfp_flags | __GFP_ZERO, + get_order(LPI_PENDBASE_SZ)); if (!pend_page) return NULL; @@ -3231,7 +3372,13 @@ static struct page *its_allocate_pending_table(gfp_t gfp_flags) static void its_free_pending_table(struct page *pt) { - free_pages((unsigned long)page_address(pt), get_order(LPI_PENDBASE_SZ)); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + its_free_shared_pages(page_address(pt), + get_order(LPI_PENDBASE_SZ)); + else +#endif + free_pages((unsigned long)page_address(pt), get_order(LPI_PENDBASE_SZ)); } /* @@ -3768,8 +3915,15 @@ static bool its_alloc_table_entry(struct its_node *its, /* Allocate memory for 2nd level table */ if (!table[idx]) { - page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, - get_order(baser->psz)); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + page = its_alloc_shared_pages_node(its->numa_node, + GFP_KERNEL | __GFP_ZERO, + get_order(baser->psz)); + else +#endif + page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, + get_order(baser->psz)); if (!page) return false; @@ -3872,7 +4026,12 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, nr_ites = max(2, nvecs); sz = nr_ites * (FIELD_GET(GITS_TYPER_ITT_ENTRY_SIZE, its->typer) + 1); sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1; - itt = kzalloc_node(sz, GFP_KERNEL, its->numa_node); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + itt = its_alloc_shared_page_address(dev, its, sz); + else +#endif + itt = kzalloc_node(sz, GFP_KERNEL, its->numa_node); if (alloc_lpis) { lpi_map = its_lpi_alloc(nvecs, &lpi_base, &nr_lpis); if (lpi_map) @@ -3886,7 +4045,12 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, if (!dev || !itt || !col_map || (!lpi_map && alloc_lpis)) { kfree(dev); - kfree(itt); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + its_free_shared_pages(itt, get_order(sz)); + else +#endif + kfree(itt); kfree(lpi_map); kfree(col_map); return NULL; @@ -3923,7 +4087,12 @@ static void its_free_device(struct its_device *its_dev) list_del(&its_dev->entry); raw_spin_unlock_irqrestore(&its_dev->its->lock, flags); kfree(its_dev->event_map.col_map); - kfree(its_dev->itt); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + its_free_shared_pages(its_dev->itt, get_its_device_order(its_dev)); + else +#endif + kfree(its_dev->itt); if (its_dev->is_vdev) { WARN_ON(!rsv_devid_pool_cap); @@ -5594,8 +5763,15 @@ static int __init its_probe_one(struct resource *res, its->numa_node = numa_node; - page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, - get_order(ITS_CMD_QUEUE_SZ)); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + page = its_alloc_shared_pages_node(its->numa_node, + GFP_KERNEL | __GFP_ZERO, + get_order(ITS_CMD_QUEUE_SZ)); + else +#endif + page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, + get_order(ITS_CMD_QUEUE_SZ)); if (!page) { err = -ENOMEM; goto out_unmap_sgir; @@ -5661,7 +5837,12 @@ static int __init its_probe_one(struct resource *res, out_free_tables: its_free_tables(its); out_free_cmd: - free_pages((unsigned long)its->cmd_base, get_order(ITS_CMD_QUEUE_SZ)); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + its_free_shared_pages(its->cmd_base, get_order(ITS_CMD_QUEUE_SZ)); + else +#endif + free_pages((unsigned long)its->cmd_base, get_order(ITS_CMD_QUEUE_SZ)); out_unmap_sgir: if (its->sgir_base) iounmap(its->sgir_base); @@ -5957,6 +6138,12 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists, bool has_vtimer_irqbypass = false; int err; +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) { + device_initialize(&cvm_alloc_device); + raw_spin_lock_init(&cvm_its_lock); + } +#endif gic_rdists = rdists; its_parent = parent_domain; diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 4ef8aee84eea044ff09b70ceb0811f658c7c4655..743f52d94d92a97fe6770ffa1521d8597f67d87b 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -797,6 +797,23 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node) return 0; } +#ifdef CONFIG_CVM_HOST +void arm_pmu_set_phys_irq(bool enable) +{ + int cpu = get_cpu(); + struct arm_pmu *pmu = per_cpu(cpu_armpmu, cpu); + int irq; + + irq = armpmu_get_cpu_irq(pmu, cpu); + if (irq && !enable) + per_cpu(cpu_irq_ops, cpu)->disable_pmuirq(irq); + else if (irq && enable) + per_cpu(cpu_irq_ops, cpu)->enable_pmuirq(irq); + + put_cpu(); +} +#endif + #ifdef CONFIG_CPU_PM static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd) { diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 413d6f9bc3ff1b057a835fb82616d6c13c4f0db3..18ccd16fcefc63565c111a98efcdcb92e0445562 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -117,4 +117,8 @@ void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, u32 timer_get_ctl(struct arch_timer_context *ctxt); u64 timer_get_cval(struct arch_timer_context *ctxt); +#ifdef CONFIG_CVM_HOST +/* Needed for S-EL2 */ +void kvm_cvm_timers_update(struct kvm_vcpu *vcpu); +#endif #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 595c9da4f46ea38c6933c99695b113cb7ab4ac18..a48dc3a1f335a4a8f062196f018bf0b377996d58 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -345,6 +345,29 @@ struct kvm_vcpu { struct kvm_vcpu_arch arch; }; +#ifdef CONFIG_CVM_HOST +#define KVM_TYPE_CVM_BIT 8 +#define CVM_MAX_HALT_POLL_NS 100000 + +DECLARE_STATIC_KEY_FALSE(kvm_cvm_is_available); + +static __always_inline bool vcpu_is_tec(struct kvm_vcpu *vcpu) +{ + if (static_branch_unlikely(&kvm_cvm_is_available)) { + struct cvm_tec *tec = (struct cvm_tec *)vcpu->arch.tec; + + return tec && tec->tec_run; + } + return false; +} + +static inline bool kvm_arm_cvm_type(unsigned long type) +{ + return type & (1UL << KVM_TYPE_CVM_BIT); +} + +#endif + static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) { /* diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 6fd58c8f93f921729cf3cf954a98bc9f7237be89..c7a35d32127271d8f2e3a3b8f796f10466436e03 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -189,6 +189,9 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn); static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } #endif +#ifdef CONFIG_CVM_HOST +void arm_pmu_set_phys_irq(bool enable); +#endif /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); struct arm_pmu *armpmu_alloc_atomic(void); diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 5d2dbe7e04c3cb1773c9da4befee61dbe40f96c0..cb225d0a11eb19e589a4d33b7b981ecc562bcced 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -106,4 +106,21 @@ static inline bool is_swiotlb_active(void) extern void swiotlb_print_info(void); extern void swiotlb_set_max_segment(unsigned int); +#ifdef CONFIG_DMA_RESTRICTED_POOL + +struct page *swiotlb_alloc(struct device *dev, size_t size); +bool swiotlb_free(struct device *dev, struct page *page, size_t size); + +#else +static inline struct page *swiotlb_alloc(struct device *dev, size_t size) +{ + return NULL; +} +static inline bool swiotlb_free(struct device *dev, struct page *page, + size_t size) +{ + return false; +} +#endif /* CONFIG_DMA_RESTRICTED_POOL */ + #endif /* __LINUX_SWIOTLB_H */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a1d8b11843c5ac795eb18d44db2553083e5167ac..476934ec68b309d62de8ed54973daf674579e9cd 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1373,6 +1373,33 @@ struct kvm_master_dev_info { #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) +#define KVM_LOAD_USER_DATA _IOW(KVMIO, 0x49, struct kvm_user_data) + +#define KVM_CAP_ARM_TMM 300 /* FIXME: Large number to prevent conflicts */ +#define MAX_NUMA_NODE 8 +#define MAX_CPU_BIT_MAP 4 + +struct kvm_numa_node { + __u64 numa_id; + __u64 ipa_start; + __u64 ipa_size; + __u64 host_numa_node; + __u64 cpu_id[MAX_CPU_BIT_MAP]; +}; + +struct kvm_numa_info { + __u64 numa_cnt; + struct kvm_numa_node numa_nodes[MAX_NUMA_NODE]; +}; + +struct kvm_user_data { + __u64 loader_start; + __u64 initrd_start; + __u64 initrd_size; + __u64 ram_size; + struct kvm_numa_info numa_info; +}; + /* enable ucontrol for s390 */ struct kvm_s390_ucas_mapping { __u64 user_addr; diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 07f30651b83d4937c1d9e719b394de6fdd60e2a5..8151fd2b6d64a0125ada53679e4c90f42b44f3de 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -84,6 +84,16 @@ config SWIOTLB bool select NEED_DMA_MAP_STATE +config DMA_RESTRICTED_POOL + bool "DMA Restricted Pool" + depends on OF && OF_RESERVED_MEM && SWIOTLB + help + This enables support for restricted DMA pools which provide a level of + DMA memory protection on systems with limited hardware protection + capabilities, such as those lacking an IOMMU. + + If unsure, say "n". + # # Should be selected if we can mmap non-coherent mappings to userspace. # The only thing that is really required is a way to set an uncached bit diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 2922250f93b44cb32a388d5097f306271b5b0161..f13493c54629be1bcb838432d6701c235660676c 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -22,6 +22,13 @@ */ unsigned int zone_dma_bits __ro_after_init = 24; +#ifndef is_swiotlb_for_alloc +static inline bool is_swiotlb_for_alloc(struct device *dev) +{ + return false; +} +#endif + static inline dma_addr_t phys_to_dma_direct(struct device *dev, phys_addr_t phys) { @@ -75,6 +82,15 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); } +static void __dma_direct_free_pages(struct device *dev, struct page *page, + size_t size) +{ + if (IS_ENABLED(CONFIG_DMA_RESTRICTED_POOL) && + swiotlb_free(dev, page, size)) + return; + dma_free_contiguous(dev, page, size); +} + static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, gfp_t gfp) { @@ -86,6 +102,16 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, &phys_limit); + if (IS_ENABLED(CONFIG_DMA_RESTRICTED_POOL) && + is_swiotlb_for_alloc(dev)) { + page = swiotlb_alloc(dev, size); + if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { + __dma_direct_free_pages(dev, page, size); + return NULL; + } + return page; + } + page = dma_alloc_contiguous(dev, size, gfp); if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { dma_free_contiguous(dev, page, size); @@ -142,7 +168,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, gfp |= __GFP_NOWARN; if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && - !force_dma_unencrypted(dev)) { + !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) { page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO); if (!page) return NULL; @@ -155,18 +181,23 @@ void *dma_direct_alloc(struct device *dev, size_t size, } if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - !dev_is_dma_coherent(dev)) + !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev) && + !is_swiotlb_for_alloc(dev)) return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); /* * Remapping or decrypting memory may block. If either is required and * we can't block, allocate the memory from the atomic pools. + * If restricted DMA (i.e., is_swiotlb_for_alloc) is required, one must + * set up another device coherent pool by shared-dma-pool and use + * dma_alloc_from_dev_coherent instead. */ if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && !gfpflags_allow_blocking(gfp) && (force_dma_unencrypted(dev) || - (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev)))) + (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && + !dev_is_dma_coherent(dev))) && + !is_swiotlb_for_alloc(dev)) return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); /* we always manually zero the memory once we are done */ @@ -237,7 +268,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, return NULL; } out_free_pages: - dma_free_contiguous(dev, page, size); + __dma_direct_free_pages(dev, page, size); return NULL; } @@ -245,15 +276,15 @@ void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && - !force_dma_unencrypted(dev)) { + !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) { /* cpu_addr is a struct page cookie, not a kernel address */ dma_free_contiguous(dev, cpu_addr, size); return; } if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - !dev_is_dma_coherent(dev)) { + !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev) && + !is_swiotlb_for_alloc(dev)) { arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); return; } @@ -271,7 +302,7 @@ void dma_direct_free(struct device *dev, size_t size, else if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED)) arch_dma_clear_uncached(cpu_addr, size); - dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size); + __dma_direct_free_pages(dev, dma_direct_to_page(dev, dma_addr), size); } struct page *dma_direct_alloc_pages(struct device *dev, size_t size, @@ -281,7 +312,8 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, void *ret; if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && - force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp)) + force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp) && + !is_swiotlb_for_alloc(dev)) return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); page = __dma_direct_alloc_pages(dev, size, gfp); @@ -307,7 +339,7 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return page; out_free_pages: - dma_free_contiguous(dev, page, size); + __dma_direct_free_pages(dev, page, size); return NULL; } @@ -325,7 +357,7 @@ void dma_direct_free_pages(struct device *dev, size_t size, if (force_dma_unencrypted(dev)) set_memory_encrypted((unsigned long)vaddr, PFN_UP(size)); - dma_free_contiguous(dev, page, size); + __dma_direct_free_pages(dev, page, size); } #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index d897d161366a429d059aaa6d5fb432d5aa58507a..e321c023ddc25fb421021f7086d16c3231dff67e 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -772,3 +772,95 @@ static int __init swiotlb_create_debugfs(void) late_initcall(swiotlb_create_debugfs); #endif + +#ifdef CONFIG_DMA_RESTRICTED_POOL + +static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, + size_t size) +{ + return find_slots(dev, orig_addr, size); +} + +struct page *swiotlb_alloc(struct device *dev, size_t size) +{ + phys_addr_t tlb_addr; + int index; + + index = swiotlb_find_slots(dev, 0, size); + if (index == -1) + return NULL; + + tlb_addr = slot_addr(io_tlb_start, index); + + return pfn_to_page(PFN_DOWN(tlb_addr)); +} + +static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr, + size_t alloc_size) +{ + unsigned long flags; + unsigned int offset = swiotlb_align_offset(dev, tlb_addr); + int i, count, nslots = nr_slots(alloc_size + offset); + int index = (tlb_addr - offset - io_tlb_start) >> IO_TLB_SHIFT; + + /* + * Return the buffer to the free list by setting the corresponding + * entries to indicate the number of contiguous entries available. + * While returning the entries to the free list, we merge the entries + * with slots below and above the pool being returned. + */ + spin_lock_irqsave(&io_tlb_lock, flags); + if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE)) + count = io_tlb_list[index + nslots]; + else + count = 0; + + /* + * Step 1: return the slots to the free list, merging the slots with + * superceeding slots + */ + for (i = index + nslots - 1; i >= index; i--) { + io_tlb_list[i] = ++count; + io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; + } + + /* + * Step 2: merge the returned slots with the preceding slots, if + * available (non zero) + */ + for (i = index - 1; + io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && io_tlb_list[i]; + i--) + io_tlb_list[i] = ++count; + io_tlb_used -= nslots; + spin_unlock_irqrestore(&io_tlb_lock, flags); +} + +bool swiotlb_free(struct device *dev, struct page *page, size_t size) +{ + phys_addr_t tlb_addr = page_to_phys(page); + + if (!is_swiotlb_buffer(tlb_addr)) + return false; + + swiotlb_release_slots(dev, tlb_addr, size); + + return true; +} + +#ifdef CONFIG_CVM_GUEST +void __init swiotlb_cvm_update_mem_attributes(void) +{ + void *vaddr; + unsigned long bytes; + + if (!is_cvm_world() || !io_tlb_start) + return; + vaddr = phys_to_virt(io_tlb_start); + bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT); + set_cvm_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); + memset(vaddr, 0, bytes); +} +#endif + +#endif /* CONFIG_DMA_RESTRICTED_POOL */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9166ef044bcf6e3f128e0d7b70cfc16cad7847f8..ef9f6d9df0f12beb395f43ec81d97b354c761a29 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1089,7 +1089,12 @@ static struct kvm *kvm_create_vm(unsigned long type) goto out_err_no_arch_destroy_vm; } - kvm->max_halt_poll_ns = halt_poll_ns; +#ifdef CONFIG_CVM_HOST + if (kvm_arm_cvm_type(type)) + kvm->max_halt_poll_ns = CVM_MAX_HALT_POLL_NS; + else +#endif + kvm->max_halt_poll_ns = halt_poll_ns; r = kvm_arch_init_vm(kvm, type); if (r)