From fc798a3820f272c176c6633de9759fd214ee03a2 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 28 Jul 2022 11:25:25 +0800 Subject: [PATCH 01/24] iommu: add domain argument to page response ANBZ: #1704 commit 999841cd78d3fe17bf4423c030eb3829312dadf2 intel-github. With mdev, page response needs pdev domain that is different than the device's own domain. Extend iommu_page_response() to support such case. Signed-off-by: Jacob Pan Signed-off-by: Liu Yi L Signed-off-by: Xuchun Shang Signed-off-by: Guanjun Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/iommu/intel/svm.c | 3 ++- drivers/iommu/io-pgfault.c | 3 ++- drivers/iommu/iommu.c | 8 ++++---- include/linux/intel-iommu.h | 3 ++- include/linux/iommu.h | 11 ++++++++--- 5 files changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 5ce6cbd463cc..7767e3d04031 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -1274,7 +1274,8 @@ u32 intel_svm_get_pasid(struct iommu_sva *sva) return pasid; } -int intel_svm_page_response(struct device *dev, +int intel_svm_page_response(struct iommu_domain *domain, + struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg) { diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 1df8c1dcae77..aaceb0a953ca 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -55,6 +55,7 @@ struct iopf_group { static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf, enum iommu_page_response_code status) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct iommu_page_response resp = { .version = IOMMU_PAGE_RESP_VERSION_1, .pasid = iopf->fault.prm.pasid, @@ -66,7 +67,7 @@ static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf, (iopf->fault.prm.flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID)) resp.flags = IOMMU_PAGE_RESP_PASID_VALID; - return iommu_page_response(dev, &resp); + return iommu_page_response(domain, dev, &resp); } static enum iommu_page_response_code diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 229e262c28bd..1efa8edb4bf6 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1280,7 +1280,7 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) EXPORT_SYMBOL_GPL(iommu_report_device_fault); static int iommu_page_response_prepare_msg(void __user *udata, - struct iommu_page_response *msg) + struct iommu_page_response *msg) { unsigned long minsz, maxsz; @@ -1314,7 +1314,8 @@ static int iommu_page_response_prepare_msg(void __user *udata, return 0; } -int iommu_page_response(struct device *dev, +int iommu_page_response(struct iommu_domain *domain, + struct device *dev, void __user *uinfo) { bool needs_pasid; @@ -1323,7 +1324,6 @@ int iommu_page_response(struct device *dev, struct iommu_fault_event *evt; struct iommu_fault_page_request *prm; struct dev_iommu *param = dev->iommu; - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); bool has_pasid; if (!domain || !domain->ops->page_response) @@ -1369,7 +1369,7 @@ int iommu_page_response(struct device *dev, msg.pasid = 0; } - ret = domain->ops->page_response(dev, evt, &msg); + ret = domain->ops->page_response(domain, dev, evt, &msg); trace_dev_page_response(dev, &msg); list_del(&evt->list); kfree(evt); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 37171ae2afe0..743be2a500ef 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -772,7 +772,8 @@ struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata); void intel_svm_unbind(struct iommu_sva *handle); u32 intel_svm_get_pasid(struct iommu_sva *handle); -int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, +int intel_svm_page_response(struct iommu_domain *domain, struct device *dev, + struct iommu_fault_event *evt, struct iommu_page_response *msg); void intel_svm_add_pasid_notifier(void); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e1b5ce697fec..dbf05815566f 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -324,7 +324,8 @@ struct iommu_ops { void (*sva_unbind)(struct iommu_sva *handle); u32 (*sva_get_pasid)(struct iommu_sva *handle); - int (*page_response)(struct device *dev, + int (*page_response)(struct iommu_domain *domain, + struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); int (*cache_invalidate)(struct iommu_domain *domain, struct device *dev, @@ -567,7 +568,9 @@ extern int iommu_unregister_device_fault_handler(struct device *dev); extern int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt); -extern int iommu_page_response(struct device *dev, void __user *uinfo); +extern int iommu_page_response(struct iommu_domain *domain, + struct device *dev, + void __user *uinfo); extern int iommu_group_id(struct iommu_group *group); extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *); @@ -970,7 +973,9 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) return -ENODEV; } -static inline int iommu_page_response(struct device *dev, void __user *uinfo) +static inline int iommu_page_response(struct iommu_domain *domain, + struct device *dev, + void __user *uinfo) { return -ENODEV; } -- Gitee From 3f66daea93f887b85e6b0e98416a88b5e907b084 Mon Sep 17 00:00:00 2001 From: "Liu, Yi L" Date: Thu, 1 Oct 2020 07:55:50 -0700 Subject: [PATCH 02/24] iommu/vt-d: Check pasid ownership in intel_svm_page_response ANBZ: #1704 commit be78cbefee7bdc409720ec416f74ab5d53e46cd4 intel-github. This patch needs to wait for DOMAIN_ATTR_IOASID_SET is ready. Signed-off-by: Liu, Yi L Signed-off-by: Xuchun Shang Signed-off-by: Guanjun Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/iommu/intel/svm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 7767e3d04031..8bcaed10d0e1 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -1280,6 +1280,7 @@ int intel_svm_page_response(struct iommu_domain *domain, struct iommu_page_response *msg) { struct iommu_fault_page_request *prm; + struct dmar_domain *dmar_domain; struct intel_svm_dev *sdev = NULL; struct intel_svm *svm = NULL; struct intel_iommu *iommu; @@ -1318,7 +1319,8 @@ int intel_svm_page_response(struct iommu_domain *domain, goto out; } - ret = pasid_to_svm_sdev(dev, NULL, + dmar_domain = to_dmar_domain(domain); + ret = pasid_to_svm_sdev(dev, NULL, // dmar_domain->pasid_set, prm->pasid, &svm, &sdev); if (ret || !sdev) { ret = -ENODEV; -- Gitee From ee2f1d0aff16f14d3e6ce4e84f7ff23b498fc7bf Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 3 Oct 2019 15:37:09 -0700 Subject: [PATCH 03/24] iommu: support multiple fault handler data per device ANBZ: #1704 commit d6443ccc901124b8a7dbf454a17dea89be23a4c1 intel-github. With the introduction of mdev, fault reporting must be supported at per PASID-dev granularity. Introduce APIs to support multiple data per handler. Signed-off-by: Jacob Pan Signed-off-by: Liu Yi L Signed-off-by: Xuchun Shang Signed-off-by: Guanjun Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/iommu/iommu.c | 167 +++++++++++++++++++++++++++++++++++++++++- include/linux/iommu.h | 27 ++++++- 2 files changed, 189 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 1efa8edb4bf6..fece7e76260b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1139,6 +1139,7 @@ int iommu_register_device_fault_handler(struct device *dev, void *data) { struct dev_iommu *param = dev->iommu; + struct iommu_fault_handler_data *hdata; int ret = 0; if (!param) @@ -1158,8 +1159,23 @@ int iommu_register_device_fault_handler(struct device *dev, ret = -ENOMEM; goto done_unlock; } + param->fault_param->handler = handler; - param->fault_param->data = data; + + hdata = kzalloc(sizeof(struct iommu_fault_handler_data), GFP_KERNEL); + if (!hdata) { + kfree(param->fault_param); + put_device(dev); + ret = -ENOMEM; + goto done_unlock; + } + + INIT_LIST_HEAD(¶m->fault_param->data); + /* Default handler data uses reserved vector 0 */ + hdata->data = data; + dev_dbg(dev, "Add IOMMU default handler data %llx\n", (u64)data); + list_add(&hdata->list, ¶m->fault_param->data); + mutex_init(¶m->fault_param->lock); INIT_LIST_HEAD(¶m->fault_param->faults); @@ -1173,6 +1189,111 @@ int iommu_register_device_fault_handler(struct device *dev, } EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler); + +/** + * iommu_add_device_fault_data() - add handler specific data + * + * For devices with partitioned resources, we may need to have multiple + * handler data that can be identified by IOMMU driver. This function + * allows device drivers to add handler specific data associated with + * a vector. When IOMMU detects device fault and its vector, handlers + * can be invoked with the matching data. + * For page request service related to DMA request with PASID, the vector + * is the PASID and the data is PASID associated data such as a mediated + * device. Vector 0 is researved for default handler data when no per vector + * data is added to device handler data list. + * + * @dev: the device + * @vector: identifies fault reporting data + * @data: opaque device handler data associated with the fault + */ +int iommu_add_device_fault_data(struct device *dev, + int vector, void *data) +{ + struct dev_iommu *param = dev->iommu; + struct iommu_fault_handler_data *hdata; + int ret = 0; + + dev_dbg(dev, "%s: vector: %d data: %llx\n", __func__, vector, (u64)data); + /* + * Fault handler must have been registered before adding handler data. + * Vector 0 is reserved for default data associated with handler. + */ + if (!param || !param->fault_param || !vector) + return -EINVAL; + + mutex_lock(¶m->lock); + + /* vector must be unique, check if we have the same vector already */ + list_for_each_entry(hdata, ¶m->fault_param->data, list) { + if (hdata->vector == vector) { + dev_err(dev, "IOMMU fault handler data exists for vector %d\n", vector); + ret = -EINVAL; + goto unlock; + } + } + + hdata = kzalloc(sizeof(struct iommu_fault_handler_data), GFP_KERNEL); + if (!hdata) { + ret = -ENOMEM; + goto unlock; + } + hdata->vector = vector; + hdata->data = data; + dev_dbg(dev, "Added IOMMU fault handler data %llx for vector %d\n", + (u64)data, vector); + list_add_tail(&hdata->list, ¶m->fault_param->data); + +unlock: + mutex_unlock(¶m->lock); + return ret; +} +EXPORT_SYMBOL_GPL(iommu_add_device_fault_data); + +/** + * iommu_delete_device_fault_data() - delete handler specific data + * + * For devices with partitioned resources, we may need to have multiple + * handler data that can be identified by IOMMU driver. This function + * allows device drivers to add handler specific data associated with + * a vector. When IOMMU detects device fault and its vector, handlers + * can be invoked with the matching data. + * For page request service related to DMA request with PASID, the vector + * is the PASID and the data is PASID associated data such as a mediated + * device. + * @dev: the device + * @vector: identifies fault reporting data to be removed + */ +void iommu_delete_device_fault_data(struct device *dev, int vector) +{ + struct dev_iommu *param = dev->iommu; + struct iommu_fault_handler_data *hdata, *tmp; + + dev_dbg(dev, "%s: vector:%d\n", __func__, vector); + /* + * Fault handler must have been registered before adding handler data. + * Vector 0 is reserved for default data associated with handler. + */ + if (!param || !param->fault_param || !vector) + return; + + mutex_lock(¶m->lock); + + list_for_each_entry_safe(hdata, tmp, ¶m->fault_param->data, list) { + if (hdata->vector == vector) { + list_del(&hdata->list); + kfree(hdata); + dev_dbg(dev, "Deleted IOMMU fault handler data for vector %d\n", vector); + goto unlock; + } + } + dev_err(dev, "Failed to find handler data for vector %d\n", vector); + +unlock: + mutex_unlock(¶m->lock); +} +EXPORT_SYMBOL_GPL(iommu_delete_device_fault_data); + /** * iommu_unregister_device_fault_handler() - Unregister the device fault handler * @dev: the device @@ -1186,6 +1307,7 @@ int iommu_unregister_device_fault_handler(struct device *dev) { struct dev_iommu *param = dev->iommu; int ret = 0; + struct iommu_fault_handler_data *hdata, *tmp; if (!param) return -EINVAL; @@ -1199,6 +1321,14 @@ int iommu_unregister_device_fault_handler(struct device *dev) if (!list_empty(¶m->fault_param->faults)) { ret = -EBUSY; goto unlock; + + } + /* TODO: Free handler data if any */ + list_for_each_entry_safe(hdata, tmp, ¶m->fault_param->data, list) { + dev_dbg(dev, "%s: free handler data %llx vector %d\n", __func__, + (u64)hdata->data, hdata->vector); + list_del(&hdata->list); + kfree(hdata); } kfree(param->fault_param); @@ -1226,8 +1356,10 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) { struct dev_iommu *param = dev->iommu; struct iommu_fault_event *evt_pending = NULL; + struct iommu_fault_handler_data *hdata; struct iommu_fault_param *fparam; struct timer_list *tmr; + void *handler_data = NULL; int ret = 0; u64 exp; @@ -1265,7 +1397,38 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) mutex_unlock(&fparam->lock); } - ret = fparam->handler(&evt->fault, fparam->data); + if (!evt->vector) { + hdata = list_first_entry(&fparam->data, + struct iommu_fault_handler_data, list); + handler_data = hdata->data; + dev_dbg(dev, "%s:default handler data %llx\n", + __func__, (u64)handler_data); + } else { + /* Find data for matching vector */ + list_for_each_entry(hdata, ¶m->fault_param->data, list) { + dev_dbg(dev, "Searching handler data vector %d to match %llu\n", + hdata->vector, evt->vector); + + if (hdata->vector == evt->vector) { + handler_data = hdata->data; + dev_dbg(dev, "IOMMU report data %llx on fault vector %llu\n", + (u64)handler_data, evt->vector); + break; + } + } + } + if (!handler_data) { + dev_err(dev, "No valid handler data for vector %llu\n", evt->vector); + if (evt_pending) + list_del(&evt_pending->list); + ret = -ENODEV; + goto done_unlock; + } + dev_dbg(dev, "%s: calling handler with data %llx\n", + __func__, (u64)handler_data); + + ret = fparam->handler(&evt->fault, handler_data); + trace_dev_fault(dev, &evt->fault); if (ret && evt_pending) { mutex_lock(&fparam->lock); list_del(&evt_pending->list); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index dbf05815566f..eba9141f84fc 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -395,19 +395,26 @@ struct iommu_fault_event { struct iommu_fault fault; struct list_head list; u64 expire; + u64 vector; +}; + +struct iommu_fault_handler_data { + u32 vector; + void *data; + struct list_head list; }; /** * struct iommu_fault_param - per-device IOMMU fault data * @handler: Callback function to handle IOMMU faults at device level - * @data: handler private data - * @faults: holds the pending faults which needs response + * @data: handler private data list + * @faults: holds the pending faults which needs response, e.g. page response. * @lock: protect pending faults list * @timer: track page request pending time limit */ struct iommu_fault_param { iommu_dev_fault_handler_t handler; - void *data; + struct list_head data; struct list_head faults; struct timer_list timer; struct mutex lock; @@ -568,6 +575,9 @@ extern int iommu_unregister_device_fault_handler(struct device *dev); extern int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt); +extern int iommu_add_device_fault_data(struct device *dev, + int vector, void *data); +extern void iommu_delete_device_fault_data(struct device *dev, int vector); extern int iommu_page_response(struct iommu_domain *domain, struct device *dev, void __user *uinfo); @@ -973,6 +983,17 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) return -ENODEV; } +static inline +int iommu_add_device_fault_data(struct device *dev, int vector, void *data) +{ + return -ENODEV; +} + +static inline +void iommu_delete_device_fault_data(struct device *dev, int vector) +{ +} + static inline int iommu_page_response(struct iommu_domain *domain, struct device *dev, void __user *uinfo) -- Gitee From 10f660f5893d93743c4b93ce2e0c682902edf77d Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 17 Aug 2022 21:42:24 +0800 Subject: [PATCH 04/24] iommu: make sure unregistration of fault handler never fails ANBZ: #1704 commit 34dfc72d9c9535c09dc064a43a3f0fb392042b00 intel-github. Add a temporary fix to flush pending faults, proper code needs to be validated in various race conditions. REVISIT! Signed-off-by: Liu Yi L Signed-off-by: Jacob Pan Signed-off-by: Xuchun Shang Signed-off-by: Guanjun Signed-off-by: Aubrey Li Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/iommu/iommu.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index fece7e76260b..af249afe1634 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1307,6 +1307,7 @@ int iommu_unregister_device_fault_handler(struct device *dev) { struct dev_iommu *param = dev->iommu; int ret = 0; + struct iommu_fault_event *evt, *next; struct iommu_fault_handler_data *hdata, *tmp; if (!param) @@ -1319,9 +1320,21 @@ int iommu_unregister_device_fault_handler(struct device *dev) /* we cannot unregister handler if there are pending faults */ if (!list_empty(¶m->fault_param->faults)) { - ret = -EBUSY; - goto unlock; - + /* + * REVISIT: We should not run into pending faults if we do unbind first. + * the proper termination flow will ensure no pending faults as follows: + * 1. pasid disable and tlb flush + * 2. unbind, free, flush and drain + * 3. unregister fault handler. + */ + mutex_lock(¶m->fault_param->lock); + list_for_each_entry_safe(evt, next, ¶m->fault_param->faults, list) { + dev_dbg(dev, "%s, free fault event: 0x%lx\n", __func__, + (unsigned long) evt); + list_del(&evt->list); + kfree(evt); + } + mutex_unlock(¶m->fault_param->lock); } /* TODO: Free handler data if any */ list_for_each_entry_safe(hdata, tmp, ¶m->fault_param->data, list) { @@ -2460,6 +2473,7 @@ EXPORT_SYMBOL_GPL(iommu_uapi_sva_bind_gpasid); int iommu_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid) { + pr_warn("%s: FIXME need to clear all pending faults!\n", __func__); if (unlikely(!domain->ops->sva_unbind_gpasid)) return -ENODEV; -- Gitee From 0ad8d79c765c45329e85997207271cc3c5cd1f7d Mon Sep 17 00:00:00 2001 From: Liu Yi L Date: Thu, 18 Aug 2022 10:14:28 +0800 Subject: [PATCH 05/24] iommu/vt-d: Support bind/unbind guest pt to def PASID (!VFIO) ANBZ: #1704 commit ee046e4fae242321050e8afadcdef7b37085680e intel-github. commit afce51aae289582f54def5f7a5c61d6910755605 intel-github. commit a64c0b29c8331ce0c407e4eb57d63e8b66b93416 intel-github. commit 02640a5be755eb1c2b37e9c524665c626a119320 intel-github. For guest IOVA support under nesting IOMMU, hypervisor needs to bind/ unbind guest's IOVA page table to host. For such bind/unbind request from user space, host should figure out a target PASID (host default PASID) to be bound. This patch adds a flag to indicate host IOMMU driver if it needs to use default PASID in host. FIXME: pasid_set = NULL; // dmar_domain->pasid_set; [ Xuchun Shang: fix merge conflict in drivers/iommu/intel/iommu.c ] Signed-off-by: Sun, Yi Y Signed-off-by: Liu Yi L Signed-off-by: Xuchun Shang Signed-off-by: Guanjun Signed-off-by: Aubrey Li Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/iommu/intel/iommu.c | 4 +- drivers/iommu/intel/svm.c | 83 ++++++++++++++++++++++++++++++------- drivers/iommu/iommu.c | 23 +++++++--- include/linux/intel-iommu.h | 9 +++- include/linux/iommu.h | 20 ++++++--- include/uapi/linux/iommu.h | 1 + 6 files changed, 109 insertions(+), 31 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 619e1d0eb576..dec7db3a5e5c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4777,8 +4777,8 @@ static void intel_iommu_domain_free(struct iommu_domain *domain) * Check whether a @domain could be attached to the @dev through the * aux-domain attach/detach APIs. */ -static inline bool -is_aux_domain(struct device *dev, struct iommu_domain *domain) +inline bool is_aux_domain(struct device *dev, + struct iommu_domain *domain) { struct device_domain_info *info = get_domain_info(dev); diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 8bcaed10d0e1..59640082d463 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -154,14 +154,15 @@ static inline bool intel_svm_capable(struct intel_iommu *iommu) return iommu->flags & VTD_FLAG_SVM_CAPABLE; } -static inline void intel_svm_drop_pasid(ioasid_t pasid) +static inline void intel_svm_drop_pasid(ioasid_t pasid, u64 flags) { /* * Detaching SPID results in UNBIND notification on the set, we must * do this before dropping the IOASID reference, otherwise the * notification chain may get destroyed. */ - ioasid_detach_spid(pasid); + if (!(flags & IOMMU_SVA_HPASID_DEF)) + ioasid_detach_spid(pasid); ioasid_detach_data(pasid); ioasid_put(NULL, pasid); } @@ -194,7 +195,7 @@ static void intel_svm_free_async_fn(struct work_struct *work) * the PASID is in FREE_PENDING state, no one can get new reference. * Therefore, we can safely free the private data svm. */ - intel_svm_drop_pasid(svm->pasid); + intel_svm_drop_pasid(svm->pasid, 0); /* * Free before unbind can only happen with host PASIDs used for @@ -378,8 +379,12 @@ static int pasid_to_svm_sdev(struct device *dev, return -EINVAL; svm = ioasid_find(set, pasid, NULL); - if (IS_ERR(svm)) - return PTR_ERR(svm); + if (IS_ERR(svm)) { + if (pasid == PASID_RID2PASID) + svm = NULL; + else + return PTR_ERR(svm); + } if (!svm) goto out; @@ -399,8 +404,10 @@ static int pasid_to_svm_sdev(struct device *dev, return 0; } -int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, - struct iommu_gpasid_bind_data *data) +int intel_svm_bind_gpasid(struct iommu_domain *domain, + struct device *dev, + struct iommu_gpasid_bind_data *data, + void *fault_data) { struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); struct intel_svm_dev *sdev = NULL; @@ -409,6 +416,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, struct intel_svm *svm = NULL; unsigned long iflags; int ret = 0; + struct ioasid_set *pasid_set; if (WARN_ON(!iommu) || !data) return -EINVAL; @@ -431,21 +439,29 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, if (pci_max_pasids(to_pci_dev(dev)) != PASID_MAX) return -EINVAL; + dmar_domain = to_dmar_domain(domain); + pasid_set = NULL; //dmar_domain->pasid_set; + /* * We only check host PASID range, we have no knowledge to check * guest PASID range. */ - if (data->hpasid <= 0 || data->hpasid >= PASID_MAX) + if (data->flags & IOMMU_SVA_HPASID_DEF) { + ret = domain_get_pasid(domain, dev); + if (ret < 0) + return ret; + data->hpasid = ret; + /* TODO: may consider to use NULL because host_pasid_set is native scope */ + pasid_set = host_pasid_set; + } else if (data->hpasid <= 0 || data->hpasid >= PASID_MAX) return -EINVAL; info = get_domain_info(dev); if (!info) return -EINVAL; - dmar_domain = to_dmar_domain(domain); - mutex_lock(&pasid_mutex); - ret = pasid_to_svm_sdev(dev, NULL, + ret = pasid_to_svm_sdev(dev, pasid_set, data->hpasid, &svm, &sdev); if (ret) goto out; @@ -473,7 +489,14 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, if (data->flags & IOMMU_SVA_GPASID_VAL) { svm->gpasid = data->gpasid; svm->flags |= SVM_FLAG_GUEST_PASID; - ioasid_attach_spid(data->hpasid, data->gpasid); + if (!(data->flags & IOMMU_SVA_HPASID_DEF)) + ioasid_attach_spid(data->hpasid, data->gpasid); + /* + * Partial assignment needs to add fault data per-pasid + */ + if (is_aux_domain(dev, domain) && fault_data) + iommu_add_device_fault_data(dev, data->hpasid, + fault_data); } ioasid_attach_data(data->hpasid, svm); ioasid_get(NULL, svm->pasid); @@ -542,18 +565,32 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, return ret; } -int intel_svm_unbind_gpasid(struct device *dev, u32 pasid) +int intel_svm_unbind_gpasid(struct iommu_domain *domain, + struct device *dev, u32 pasid, u64 user_flags) { struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); struct intel_svm_dev *sdev; struct intel_svm *svm; int ret; + struct dmar_domain *dmar_domain; + struct ioasid_set *pasid_set; if (WARN_ON(!iommu)) return -EINVAL; + dmar_domain = to_dmar_domain(domain); + pasid_set = NULL; // dmar_domain->pasid_set; + + if (user_flags & IOMMU_SVA_HPASID_DEF) { + ret = domain_get_pasid(domain, dev); + if (ret < 0) + return ret; + pasid = ret; + pasid_set = host_pasid_set; + } + mutex_lock(&pasid_mutex); - ret = pasid_to_svm_sdev(dev, NULL, pasid, &svm, &sdev); + ret = pasid_to_svm_sdev(dev, pasid_set, pasid, &svm, &sdev); if (ret) goto out; @@ -565,6 +602,11 @@ int intel_svm_unbind_gpasid(struct device *dev, u32 pasid) intel_pasid_tear_down_entry(iommu, dev, svm->pasid, false); intel_svm_drain_prq(dev, svm->pasid); + /* + * Partial assignment needs to delete fault data + */ + if (is_aux_domain(dev, domain)) + iommu_delete_device_fault_data(dev, pasid); kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { @@ -577,7 +619,7 @@ int intel_svm_unbind_gpasid(struct device *dev, u32 pasid) * the unbind, IOMMU driver will get notified * and perform cleanup. */ - intel_svm_drop_pasid(pasid); + intel_svm_drop_pasid(pasid, user_flags); kfree(svm); } } @@ -996,6 +1038,7 @@ static int prq_to_iommu_prot(struct page_req_dsc *req) static int intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc) { + struct device_domain_info *info; struct iommu_fault_event event; if (!dev || !dev_is_pci(dev)) @@ -1028,6 +1071,16 @@ intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc) sizeof(desc->priv_data)); } + /* + * If the device supports PASID granu scalable mode, reports the + * PASID as vector such that handlers can be dispatched with per + * vector data. + */ + info = get_domain_info(dev); + if (!list_empty(&info->subdevices)) { + dev_dbg(dev, "Aux domain present, assign vector %d\n", desc->pasid); + event.vector = desc->pasid; + } return iommu_report_device_fault(dev, &event); } diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index af249afe1634..6289fe3b83b1 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1327,6 +1327,10 @@ int iommu_unregister_device_fault_handler(struct device *dev) * 2. unbind, free, flush and drain * 3. unregister fault handler. */ + dev_dbg(dev, "%s, there is pending faults on dev: %s, here we force" + "to free the fault events and unregister the fault" + "handler, but this changes should be reverted when page" + "response path is ready\n", __func__, dev_name(dev)); mutex_lock(¶m->fault_param->lock); list_for_each_entry_safe(evt, next, ¶m->fault_param->faults, list) { dev_dbg(dev, "%s, free fault event: 0x%lx\n", __func__, @@ -2402,7 +2406,7 @@ static int iommu_check_bind_data(struct iommu_gpasid_bind_data *data) return -EINVAL; /* Check all flags */ - mask = IOMMU_SVA_GPASID_VAL; + mask = IOMMU_SVA_GPASID_VAL | IOMMU_SVA_HPASID_DEF; if (data->flags & ~mask) return -EINVAL; @@ -2447,8 +2451,15 @@ static int iommu_sva_prepare_bind_data(void __user *udata, return iommu_check_bind_data(data); } + +/* + * Caller could provide fault_data to differentiate future page + * requests from the device. This is helpful for page request + * handling for partial assignments of physical devices. e.g. + * mediated device assingment or other sub-device solution. + */ int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, struct device *dev, - void __user *udata) + void __user *udata, void *fault_data) { struct iommu_gpasid_bind_data data = { 0 }; int ret; @@ -2463,7 +2474,7 @@ int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, struct device *dev, ret = ioasid_get_if_owned(data.hpasid); if (ret) return ret; - ret = domain->ops->sva_bind_gpasid(domain, dev, &data); + ret = domain->ops->sva_bind_gpasid(domain, dev, &data, fault_data); ioasid_put(NULL, data.hpasid); return ret; @@ -2471,13 +2482,13 @@ int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, struct device *dev, EXPORT_SYMBOL_GPL(iommu_uapi_sva_bind_gpasid); int iommu_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev, - ioasid_t pasid) + ioasid_t pasid, u64 flags) { pr_warn("%s: FIXME need to clear all pending faults!\n", __func__); if (unlikely(!domain->ops->sva_unbind_gpasid)) return -ENODEV; - return domain->ops->sva_unbind_gpasid(dev, pasid); + return domain->ops->sva_unbind_gpasid(domain, dev, pasid, flags); } EXPORT_SYMBOL_GPL(iommu_sva_unbind_gpasid); @@ -2497,7 +2508,7 @@ int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev ret = ioasid_get_if_owned(data.hpasid); if (ret) return ret; - ret = iommu_sva_unbind_gpasid(domain, dev, data.hpasid); + ret = iommu_sva_unbind_gpasid(domain, dev, data.hpasid, data.flags); ioasid_put(NULL, data.hpasid); return ret; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 743be2a500ef..511a139732fc 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -760,14 +760,19 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev); struct dmar_domain *find_domain(struct device *dev); struct device_domain_info *get_domain_info(struct device *dev); struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); +int domain_get_pasid(struct iommu_domain *domain, struct device *dev); #ifdef CONFIG_INTEL_IOMMU_SVM extern void intel_svm_check(struct intel_iommu *iommu); extern int intel_svm_enable_prq(struct intel_iommu *iommu); extern int intel_svm_finish_prq(struct intel_iommu *iommu); +inline bool is_aux_domain(struct device *dev, + struct iommu_domain *domain); int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, - struct iommu_gpasid_bind_data *data); -int intel_svm_unbind_gpasid(struct device *dev, u32 pasid); + struct iommu_gpasid_bind_data *data, + void *fault_data); +int intel_svm_unbind_gpasid(struct iommu_domain *domain, + struct device *dev, u32 pasid, u64 user_flags); struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata); void intel_svm_unbind(struct iommu_sva *handle); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index eba9141f84fc..88370d6b4fa7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -331,9 +331,12 @@ struct iommu_ops { int (*cache_invalidate)(struct iommu_domain *domain, struct device *dev, struct iommu_cache_invalidate_info *inv_info); int (*sva_bind_gpasid)(struct iommu_domain *domain, - struct device *dev, struct iommu_gpasid_bind_data *data); + struct device *dev, + struct iommu_gpasid_bind_data *data, + void *fault_data); - int (*sva_unbind_gpasid)(struct device *dev, u32 pasid); + int (*sva_unbind_gpasid)(struct iommu_domain *domain, + struct device *dev, u32 pasid, u64 flags); void (*sva_suspend_pasid)(struct device *dev, u32 pasid); @@ -505,11 +508,14 @@ extern int iommu_uapi_cache_invalidate(struct iommu_domain *domain, void __user *uinfo); extern int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, - struct device *dev, void __user *udata); + struct device *dev, + void __user *udata, + void *fault_data); extern int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev, void __user *udata); extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid); + struct device *dev, ioasid_t pasid, + u64 flags); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern size_t iommu_pgsize(struct iommu_domain *domain, @@ -1199,7 +1205,8 @@ iommu_uapi_cache_invalidate(struct iommu_domain *domain, } static inline int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, - struct device *dev, void __user *udata) + struct device *dev, void __user *udata, + void *fault_data) { return -ENODEV; } @@ -1212,7 +1219,8 @@ static inline int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, static inline int iommu_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev, - ioasid_t pasid) + ioasid_t pasid, + u64 flags) { return -ENODEV; } diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h index e333b7224436..b11536a05c38 100644 --- a/include/uapi/linux/iommu.h +++ b/include/uapi/linux/iommu.h @@ -328,6 +328,7 @@ struct iommu_gpasid_bind_data { __u32 format; __u32 addr_width; #define IOMMU_SVA_GPASID_VAL (1 << 0) /* guest PASID valid */ +#define IOMMU_SVA_HPASID_DEF (1 << 1) /* use default host PASID */ __u64 flags; __u64 gpgd; __u64 hpasid; -- Gitee From c05098fdf6e5aad1cfcb049745108e6c5414dfcf Mon Sep 17 00:00:00 2001 From: "Sun, Yi Y" Date: Wed, 25 Nov 2020 16:43:36 +0800 Subject: [PATCH 06/24] iommu/vt-d: Ignore pte present check if it is not nested PASID entry ANBZ: #1704 commit 600ec25cb73620394835f6628aa5a5f0b0298f32 intel-github. commit a9e982372af1a7e0137583d8940791fef9a4ffc2 intel-github. commit 49b299309a566c5955a8198e530ced6dfa22d1a2 intel-github. We do not bind same PASID to the same device twice. But for non-nested PASID entry, we can ignore present bit check. Because we need modify the entry to do nested setup. This patch supports IOMMU cache invalidation for gIOVA page table. Such invalidation request from guest requires host IOMMU driver to figure out proper PASID (e.g. default PASID of aux-domain and etc.). If the pte pgtt is nesting mode, we should not clear it but only reset the pgtt to slt only. Otherwise, there will be page request without pasid happens when the scalable mode guest reboots to legacy mode guest. The root cause is that there will be no chance to setup second level page table on host when legacy guest booting. Signed-off-by: Sun, Yi Y Signed-off-by: Liu Yi L Signed-off-by: Xuchun Shang Signed-off-by: Guanjun Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/iommu/intel/iommu.c | 21 +++++++++++++++------ drivers/iommu/intel/pasid.c | 30 +++++++++++++++++++++++------- drivers/iommu/intel/pasid.h | 6 +++--- drivers/iommu/intel/svm.c | 1 + include/linux/intel-iommu.h | 1 + 5 files changed, 43 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index dec7db3a5e5c..1acca0e3f72e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5155,12 +5155,21 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, * PASID is stored in different locations based on the * granularity. */ - if (inv_info->granularity == IOMMU_INV_GRANU_PASID && - (inv_info->granu.pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID)) - pasid = inv_info->granu.pasid_info.pasid; - else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR && - (inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID)) - pasid = inv_info->granu.addr_info.pasid; + if (inv_info->granularity == IOMMU_INV_GRANU_PASID) { + if (inv_info->granu.pasid_info.flags & + IOMMU_INV_PASID_FLAGS_PASID) { + pasid = inv_info->granu.pasid_info.pasid; + } else { + pasid = domain_get_pasid(domain, dev); + } + } else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) { + if (inv_info->granu.addr_info.flags & + IOMMU_INV_ADDR_FLAGS_PASID) { + pasid = inv_info->granu.addr_info.pasid; + } else { + pasid = domain_get_pasid(domain, dev); + } + } ret = ioasid_get_if_owned(pasid); if (ret) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 5f3327a87d22..aeb9050693d8 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -309,14 +309,31 @@ static inline void pasid_clear_entry_with_fpd(struct pasid_entry *pe) } static void -intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore) +intel_pasid_clear_entry(struct intel_iommu *iommu, struct device *dev, + u32 pasid, bool fault_ignore) { struct pasid_entry *pe; + u64 pe_val; + bool nested; pe = intel_pasid_get_entry(dev, pasid); if (WARN_ON(!pe)) return; + /* + * The guest may reboot from scalable mode to legacy mode. During this + * phase, there is no chance to setup SLT. So, we should only reset PGTT + * from NESTED to SL and keep other bits when unbind gpasid is executed. + */ + pe_val = READ_ONCE(pe->val[0]); + nested = (((pe_val >> 6) & 0x7) == PASID_ENTRY_PGTT_NESTED) ? true : false; + if (nested && (iommu->flags & VTD_FLAG_PGTT_SL_ONLY)) { + pe_val &= 0xfffffffffffffebf; + WRITE_ONCE(pe->val[0], pe_val); + iommu->flags &= ~VTD_FLAG_PGTT_SL_ONLY; + return; + } + if (fault_ignore && pasid_pte_is_present(pe)) pasid_clear_entry_with_fpd(pe); else @@ -554,9 +571,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, return; did = pasid_get_domain_id(pte); - pgtt = pasid_pte_get_pgtt(pte); - - intel_pasid_clear_entry(dev, pasid, fault_ignore); + intel_pasid_clear_entry(iommu, dev, pasid, fault_ignore); if (!ecap_coherent(iommu->ecap)) clflush_cache_range(pte, sizeof(*pte)); @@ -873,10 +888,11 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, return -EINVAL; /* - * Caller must ensure PASID entry is not in use, i.e. not bind the - * same PASID to the same device twice. + * PASID entries with nesting translation type should not be set + * multiple times. If caller tries to setup nesting for a PASID + * entry which is already nested mode, should fail it. */ - if (pasid_pte_is_present(pte)) + if (pasid_pte_is_present(pte) && pasid_pte_is_nested(pte)) return -EBUSY; pasid_clear_entry(pte); diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 00a5860d0390..437dfd8e16fa 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -98,10 +98,10 @@ static inline bool pasid_pte_is_present(struct pasid_entry *pte) return READ_ONCE(pte->val[0]) & PASID_PTE_PRESENT; } -/* Get PGTT field of a PASID table entry */ -static inline u16 pasid_pte_get_pgtt(struct pasid_entry *pte) +/* Check if PGTT bits of a PASID table entry is nested. */ +static inline bool pasid_pte_is_nested(struct pasid_entry *pte) { - return (u16)((READ_ONCE(pte->val[0]) >> 6) & 0x7); + return ((READ_ONCE(pte->val[0]) >> 6) & 0x7) == PASID_ENTRY_PGTT_NESTED; } extern unsigned int intel_pasid_max_id; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 59640082d463..9103ddada32f 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -599,6 +599,7 @@ int intel_svm_unbind_gpasid(struct iommu_domain *domain, sdev->users--; if (!sdev->users) { list_del_rcu(&sdev->list); + iommu->flags |= VTD_FLAG_PGTT_SL_ONLY; intel_pasid_tear_down_entry(iommu, dev, svm->pasid, false); intel_svm_drain_prq(dev, svm->pasid); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 511a139732fc..e021855f4c49 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -479,6 +479,7 @@ enum { #define VTD_FLAG_TRANS_PRE_ENABLED (1 << 0) #define VTD_FLAG_IRQ_REMAP_PRE_ENABLED (1 << 1) #define VTD_FLAG_SVM_CAPABLE (1 << 2) +#define VTD_FLAG_PGTT_SL_ONLY (1 << 3) extern int intel_iommu_sm; extern spinlock_t device_domain_lock; -- Gitee From a88d05f8435bed3f2c4f23505b208af01d1b7547 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 28 Jul 2022 11:29:43 +0800 Subject: [PATCH 07/24] iommu/vt-d: Calculate and set flags for handle_mm_fault MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #1704 commit 3a8698da4510eb9dea1df6b8c306f99510e3c6ed intel-github. commit 77d7c99a6b30e30d436b01b1af302d9e56b985f8 intel-github. commit 169a5e5b3302b6b3b65f881dfa2cd84155f3cf32 intel-github. commit 4982e28dfaf3a1cab28603619c6fc62f66667cf5 intel-github. Page requests are originated from the user page fault. Therefore, we shall set FAULT_FLAG_USER.  FAULT_FLAG_REMOTE indicates that we are walking an mm which is not guaranteed to be the same as the current->mm and should not be subject to protection key enforcement. Therefore, we should set FAULT_FLAG_REMOTE to avoid faults when both SVM and PKEY are used. [ Xuchun Shang: fix merge conflict in drivers/iommu/intel/svm.c ] [ Aubrey Li: align prq_event_thread with BKC implementation, including: - commit 3a8698da4510 iommu/vt-d: Calculate and set flags for handle_mm_fault - commit 77d7c99a6b30 iommu: unresolved merge conflicts - commit 169a5e5b3302 iommu/vt-d: Add PRQ handling latency sampling - commit 4982e28dfaf3 iommu/vt-d: Two bug fix to svm.c ] References: commit 1b2ee1266ea6 ("mm/core: Do not enforce PKEY permissions on remote mm access") Reviewed-by: Raj Ashok Signed-off-by: Jacob Pan Signed-off-by: Xuchun Shang Signed-off-by: Guanjun Signed-off-by: Aubrey Li Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/iommu/intel/svm.c | 159 +++++++++++++++++++++++++++----------- 1 file changed, 114 insertions(+), 45 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 9103ddada32f..4c2516198ce0 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -1180,65 +1180,71 @@ static irqreturn_t prq_event_thread(int irq, void *d) struct intel_svm_dev *sdev = NULL; struct intel_iommu *iommu = d; struct intel_svm *svm = NULL; - struct page_req_dsc *req; - int head, tail, handled; - u64 address; + int head, tail, handled = 0; + unsigned int flags = 0; - /* - * Clear PPR bit before reading head/tail registers, to ensure that - * we get a new interrupt if needed. - */ + /* Clear PPR bit before reading head/tail registers, to + * ensure that we get a new interrupt if needed. */ writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG); tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; - handled = (head != tail); while (head != tail) { + struct vm_area_struct *vma; + struct page_req_dsc *req; + struct qi_desc resp; + int result; + vm_fault_t ret; + u64 address; + + handled = 1; req = &iommu->prq[head / sizeof(*req)]; + result = QI_RESP_INVALID; address = (u64)req->addr << VTD_PAGE_SHIFT; - - if (unlikely(!req->pasid_present)) { - pr_err("IOMMU: %s: Page request without PASID\n", - iommu->name); -bad_req: - svm = NULL; - sdev = NULL; - handle_bad_prq_event(iommu, req, QI_RESP_INVALID); - goto prq_advance; + if (!req->pasid_present) { + pr_err("%s: Page request without PASID: %08llx %08llx\n", + iommu->name, ((unsigned long long *)req)[0], + ((unsigned long long *)req)[1]); + goto no_pasid; } - - if (unlikely(!is_canonical_address(address))) { - pr_err("IOMMU: %s: Address is not canonical\n", - iommu->name); - goto bad_req; + /* We shall not receive page request for supervisor SVM */ + if (req->pm_req && (req->rd_req | req->wr_req)) { + pr_err("Unexpected page request in Privilege Mode"); + /* No need to find the matching sdev as for bad_req */ + goto no_pasid; } - - if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) { - pr_err("IOMMU: %s: Page request in Privilege Mode\n", - iommu->name); - goto bad_req; - } - - if (unlikely(req->exe_req && req->rd_req)) { - pr_err("IOMMU: %s: Execution request not supported\n", - iommu->name); - goto bad_req; + /* DMA read with exec requeset is not supported. */ + if (req->exe_req && req->rd_req) { + pr_err("Execution request not supported\n"); + goto no_pasid; } - if (!svm || svm->pasid != req->pasid) { - /* - * It can't go away, because the driver is not permitted - * to unbind the mm while any page faults are outstanding. - */ + rcu_read_lock(); svm = ioasid_find(NULL, req->pasid, NULL); - if (IS_ERR_OR_NULL(svm) || (svm->flags & SVM_FLAG_SUPERVISOR_MODE)) - goto bad_req; + /* It *can't* go away, because the driver is not permitted + * to unbind the mm while any page faults are outstanding. + * So we only need RCU to protect the internal idr code. */ + rcu_read_unlock(); + if (IS_ERR_OR_NULL(svm)) { + pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n", + iommu->name, req->pasid, ((unsigned long long *)req)[0], + ((unsigned long long *)req)[1]); + goto no_pasid; + } } if (!sdev || sdev->sid != req->rid) { - sdev = svm_lookup_device_by_sid(svm, req->rid); - if (!sdev) - goto bad_req; + struct intel_svm_dev *t; + + sdev = NULL; + rcu_read_lock(); + list_for_each_entry_rcu(t, &svm->devs, list) { + if (t->sid == req->rid) { + sdev = t; + break; + } + } + rcu_read_unlock(); } /* @@ -1246,13 +1252,76 @@ static irqreturn_t prq_event_thread(int irq, void *d) * the fault notifiers, we skip the page response here. */ if (svm->flags & SVM_FLAG_GUEST_MODE) { - if (!intel_svm_prq_report(sdev->dev, req)) + if (sdev && !intel_svm_prq_report(sdev->dev, req)) goto prq_advance; else goto bad_req; } - handle_single_prq_event(iommu, svm->mm, req); + /* Since we're using init_mm.pgd directly, we should never take + * any faults on kernel addresses. */ + if (!svm->mm) + goto bad_req; + + /* If address is not canonical, return invalid response */ + if (!is_canonical_address(address)) + goto bad_req; + + /* If the mm is already defunct, don't handle faults. */ + if (!mmget_not_zero(svm->mm)) + goto bad_req; + + mmap_read_lock(svm->mm); + vma = find_extend_vma(svm->mm, address); + if (!vma || address < vma->vm_start) + goto invalid; + + if (access_error(vma, req)) + goto invalid; + + flags = FAULT_FLAG_USER | FAULT_FLAG_REMOTE; + if (req->wr_req) + flags |= FAULT_FLAG_WRITE; + + ret = handle_mm_fault(vma, address, flags, NULL); + if (ret & VM_FAULT_ERROR) + goto invalid; + + result = QI_RESP_SUCCESS; +invalid: + mmap_read_unlock(svm->mm); + mmput(svm->mm); +bad_req: + /* We get here in the error case where the PASID lookup failed, + and these can be NULL. Do not use them below this point! */ + sdev = NULL; + svm = NULL; +no_pasid: + if (req->lpig || req->priv_data_present) { + /* + * Per VT-d spec. v3.0 ch7.7, system software must + * respond with page group response if private data + * is present (PDP) or last page in group (LPIG) bit + * is set. This is an additional VT-d feature beyond + * PCI ATS spec. + */ + resp.qw0 = QI_PGRP_PASID(req->pasid) | + QI_PGRP_DID(req->rid) | + QI_PGRP_PASID_P(req->pasid_present) | + QI_PGRP_PDP(req->priv_data_present) | + QI_PGRP_RESP_CODE(result) | + QI_PGRP_RESP_TYPE; + resp.qw1 = QI_PGRP_IDX(req->prg_index) | + QI_PGRP_LPIG(req->lpig); + resp.qw2 = 0; + resp.qw3 = 0; + + if (req->priv_data_present) + memcpy(&resp.qw2, req->priv_data, + sizeof(req->priv_data)); + qi_submit_sync(iommu, &resp, 1, 0); + } + prq_advance: head = (head + sizeof(*req)) & PRQ_RING_MASK; } -- Gitee From b43feafdd7db70136869611bd480801e6a855eac Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Fri, 19 Mar 2021 13:54:11 +0800 Subject: [PATCH 08/24] iommu/vt-d: Fix gpasid bind/unbind issue ANBZ: #1704 commit 52df39844906fcd2b9b16e8a981777327c22b6c6 intel-github. When gpasid 0 binding/unbinding, ioasid_get_if_owned() returns errors because the set type is not valid. We should change api to ioasid_get() to fix it. When doing cache invalidation, we have similar issue. Signed-off-by: Yi Sun Signed-off-by: Xuchun Shang Signed-off-by: Guanjun Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/iommu/intel/iommu.c | 9 ++++++++- drivers/iommu/iommu.c | 5 +++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 1acca0e3f72e..4e0505fd5db3 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5108,6 +5108,7 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, u16 did, sid; int ret = 0; u64 size = 0; + bool default_pasid = false; if (!inv_info || !dmar_domain) return -EINVAL; @@ -5161,6 +5162,7 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, pasid = inv_info->granu.pasid_info.pasid; } else { pasid = domain_get_pasid(domain, dev); + default_pasid = true; } } else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) { if (inv_info->granu.addr_info.flags & @@ -5168,10 +5170,15 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, pasid = inv_info->granu.addr_info.pasid; } else { pasid = domain_get_pasid(domain, dev); + default_pasid = true; } } - ret = ioasid_get_if_owned(pasid); + if (default_pasid) + ret = ioasid_get(NULL, pasid); + else + ret = ioasid_get_if_owned(pasid); + if (ret) goto out_unlock; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 6289fe3b83b1..2dafca037dd9 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2471,9 +2471,10 @@ int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, struct device *dev, if (ret) return ret; - ret = ioasid_get_if_owned(data.hpasid); + ret = ioasid_get(NULL, data.hpasid); if (ret) return ret; + ret = domain->ops->sva_bind_gpasid(domain, dev, &data, fault_data); ioasid_put(NULL, data.hpasid); @@ -2505,7 +2506,7 @@ int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev if (ret) return ret; - ret = ioasid_get_if_owned(data.hpasid); + ret = ioasid_get(NULL, data.hpasid); if (ret) return ret; ret = iommu_sva_unbind_gpasid(domain, dev, data.hpasid, data.flags); -- Gitee From dfe66294341165dffc5da60097a84138529cbcbc Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Fri, 9 Apr 2021 22:19:51 +0800 Subject: [PATCH 09/24] iommu/vt-d: Fix pasid bind issue ANBZ: #1704 commit 4a6c6b7b179544b3483c492927f26bbdcc6514f9 intel-github. We need set the original hpasid back after binding gpasid. Otherwise, iommu layer will report error because the hpasid is different for ioasid_get/ioasid_put. Signed-off-by: Yi Sun Signed-off-by: Xuchun Shang Signed-off-by: Guanjun Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/iommu/intel/svm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 4c2516198ce0..18c593b9ceb7 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -417,6 +417,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, unsigned long iflags; int ret = 0; struct ioasid_set *pasid_set; + u64 hpasid_org; if (WARN_ON(!iommu) || !data) return -EINVAL; @@ -450,6 +451,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, ret = domain_get_pasid(domain, dev); if (ret < 0) return ret; + hpasid_org = data->hpasid; data->hpasid = ret; /* TODO: may consider to use NULL because host_pasid_set is native scope */ pasid_set = host_pasid_set; @@ -561,6 +563,9 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, kfree(svm); } + if (data->flags & IOMMU_SVA_HPASID_DEF) + data->hpasid = hpasid_org; + mutex_unlock(&pasid_mutex); return ret; } -- Gitee From 441aa37ac9237b634d5d2b9786877e7ae0e458b2 Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Sat, 23 Oct 2021 07:12:51 +0800 Subject: [PATCH 10/24] iommu/vtd: Fix some gpasid related issues ANBZ: #1704 commit 6c89ec91ff5e0ae1a92999922153df2033a5dbfa intel-github. During tests, we found the NIC pf/vf (SRIOV) passthrough cannot work correctly. The NIC cannot get IP in VM. One reason is the iommu driver returns error when binding gpasid. There are some unnecessary check for such devices. Signed-off-by: Yi Sun Signed-off-by: Xuchun Shang Signed-off-by: Guanjun Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/iommu/intel/svm.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 18c593b9ceb7..2e8eae9ee94f 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -436,8 +436,9 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, if (!dev_is_pci(dev)) return -ENOTSUPP; - /* VT-d supports devices with full 20 bit PASIDs only */ - if (pci_max_pasids(to_pci_dev(dev)) != PASID_MAX) + /* Except gIOVA binding, VT-d supports devices with full 20 bit PASIDs only */ + if ((data->flags & IOMMU_SVA_HPASID_DEF) == 0 && + pci_max_pasids(to_pci_dev(dev)) != PASID_MAX) return -EINVAL; dmar_domain = to_dmar_domain(domain); @@ -522,12 +523,16 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) sdev->users = 1; - /* Set up device context entry for PASID if not enabled already */ - ret = intel_iommu_enable_pasid(iommu, sdev->dev); - if (ret) { - dev_err_ratelimited(dev, "Failed to enable PASID capability\n"); - kfree(sdev); - goto out; + /* For legacy device passthr giova usage, do not enable pasid */ + if ((data->flags & IOMMU_SVA_HPASID_DEF) == 0 && + pci_max_pasids(to_pci_dev(dev)) == PASID_MAX) { + /* Set up device context entry for PASID if not enabled already */ + ret = intel_iommu_enable_pasid(iommu, sdev->dev); + if (ret) { + dev_err_ratelimited(dev, "Failed to enable PASID capability\n"); + kfree(sdev); + goto out; + } } /* -- Gitee From 46570709859b44486e10815ec36c3cb2d49bacdc Mon Sep 17 00:00:00 2001 From: Liu Yi L Date: Thu, 18 Aug 2022 10:02:33 +0800 Subject: [PATCH 11/24] vfio/type1: Support binding guest page tables to PASID ANBZ: #1704 commit 9861f0079c060b9cb20d049959bdfe84a3ea9f07 intel-github. commit 3ab3a3fa8eed0c861370ef574a5960e8cff83b6f intel-github. commit 54a04b67a76754cf9ac686eec4a41ed68ad5cf5c intel-github. commit f6b1fabccb6345bb8a31c3cde3be519bf87fd89d intel-github. Nesting translation allows two-levels/stages page tables, with 1st level for guest translations (e.g. GVA->GPA), 2nd level for host translations (e.g. GPA->HPA). This patch adds interface for binding guest page tables to a PASID. This PASID must have been allocated by the userspace before the binding request. e.g. allocated from /dev/ioasid. As the bind data is parsed by iommu abstract layer, so this patch doesn't have the ownership check against the PASID from userspace. It would be done in the iommu sub- system. *) rename subject from "vfio/type1: Bind guest page tables to host" *) remove VFIO_IOMMU_BIND, introduce VFIO_IOMMU_NESTING_OP to support bind/ unbind guet page table *) replaced vfio_iommu_for_each_dev() with a group level loop since this series enforces one group per container w/ nesting type as start. *) rename vfio_bind/unbind_gpasid_fn() to vfio_dev_bind/unbind_gpasid_fn() *) vfio_dev_unbind_gpasid() always successful *) use vfio_mm->pasid_lock to avoid race between PASID free and page table bind/unbind vfio/type1: Get per-mdev fault_handler_data for IOMMU-capable mdevs With the introduction of mdev, fault reporting is done at per PASID-dev granularity. This requires VFIO to get fault_handler_data from parent device and pass it to IOMMU driver, thus IOMMU driver could bind PASID with the fault_hanlder_data to fulfill the per PASID-dev granularity fault reporting rule. vfio/type1: A fix for iommu: forklift v5.12 iommu to v5.15 bkc The iommu part of below commit was merged by forklift v5.12 iommu to v5.15 bkc. But vfio is still using the old API, so it will cause compiling errors. commit af8e85ec400d4fd01b80cc8cf8654098ffbff6ba Author: Sun, Yi Y Date: Wed Nov 25 16:37:42 2020 +0800 iommu/vt-d: Support bind/unbind guest page table to default PASID For guest IOVA support under nesting IOMMU, hypervisor needs to bind/ unbind guest's IOVA page table to host. For such bind/unbind request from user space, host should figure out a target PASID (host default PASID) to be bound. This patch adds a flag to indicate host IOMMU driver if it needs to use default PASID in host. FIXME: pasid_set = NULL; // dmar_domain->pasid_set; Signed-off-by: Sun, Yi Y Signed-off-by: Liu Yi L vfio/type1: Add vSVA support for IOMMU-backed mdevs Recent years, mediated device pass-through framework (e.g. vfio-mdev) is used to achieve flexible device sharing across domains (e.g. VMs). Also there are hardware assisted mediated pass-through solutions from platform vendors. e.g. Intel VT-d scalable mode which supports Intel Scalable I/O Virtualization technology. Such mdevs are called IOMMU- backed mdevs as there are IOMMU enforced DMA isolation for such mdevs. In kernel, IOMMU-backed mdevs are exposed to IOMMU layer by aux-domain concept, which means mdevs are protected by an iommu domain which is auxiliary to the domain that the kernel driver primarily uses for DMA API. Details can be found in the KVM presentation as below: https://events19.linuxfoundation.org/wp-content/uploads/2017/12/Hardware-Assisted-Mediated-Pass-Through-with-VFIO-Kevin-Tian-Intel.pdf This patch extends NESTING_IOMMU ops to IOMMU-backed mdev devices. The main requirement is to use the auxiliary domain associated with mdev. [ Aubrey Li : fix build error without CONFIG_IOASID ] Cc: Kevin Tian CC: Jacob Pan CC: Jun Tian Cc: Alex Williamson Cc: Eric Auger Cc: Jean-Philippe Brucker Cc: Joerg Roedel Cc: Lu Baolu Signed-off-by: Liu Yi L Reviewed-by: Eric Auger Signed-off-by: Cathy Zhang Signed-off-by: Xuchun Shang Signed-off-by: Guanjun Signed-off-by: Aubrey Li Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/vfio/vfio_iommu_type1.c | 166 ++++++++++++++++++++++++++++++++ include/uapi/linux/vfio.h | 35 +++++++ 2 files changed, 201 insertions(+) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index b5a7e2a4f47e..a3cce2c70835 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -40,6 +40,7 @@ #include #include #include +#include #define DRIVER_VERSION "0.2" #define DRIVER_AUTHOR "Alex Williamson " @@ -159,6 +160,26 @@ struct domain_capsule { u64 flags; }; +/* iommu->lock must be held */ +static int vfio_prepare_nesting_domain_capsule(struct vfio_iommu *iommu, + struct domain_capsule *dc) +{ + struct vfio_domain *domain; + struct vfio_group *group; + + if (!iommu->nesting_info) + return -EINVAL; + + domain = list_first_entry(&iommu->domain_list, + struct vfio_domain, next); + group = list_first_entry(&domain->group_list, + struct vfio_group, next); + dc->group = group; + dc->domain = domain->domain; + dc->user = true; + return 0; +} + static int put_pfn(unsigned long pfn, int prot); static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, @@ -2485,6 +2506,67 @@ static int vfio_iommu_resv_refresh(struct vfio_iommu *iommu, return ret; } +static int vfio_dev_bind_gpasid_fn(struct device *dev, void *data) +{ + struct domain_capsule *dc = (struct domain_capsule *)data; + unsigned long arg = *(unsigned long *)dc->data; + struct mdev_device *mdev = to_mdev_device(dev); + struct device *iommu_device; + void *iommu_fault_data = NULL; + + iommu_device = vfio_get_iommu_device(dc->group, dev); + if (!iommu_device) + return -EINVAL; + + if (iommu_device != dev) + iommu_fault_data = mdev_get_iommu_fault_data(mdev); + + return iommu_uapi_sva_bind_gpasid(dc->domain, iommu_device, + (void __user *)arg, + iommu_fault_data); +} + +static int vfio_dev_unbind_gpasid_fn(struct device *dev, void *data) +{ + struct domain_capsule *dc = (struct domain_capsule *)data; + struct device *iommu_device; + + iommu_device = vfio_get_iommu_device(dc->group, dev); + if (!iommu_device) + return -EINVAL; + + /* + * dc->user is a toggle for the unbind operation. When user + * set, the dc->data passes in a __user pointer and requires + * to use iommu_uapi_sva_unbind_gpasid(), in which it will + * copy the unbind data from the user buffer. When user is + * clear, the dc->data passes in a pasid which is going to + * be unbind no need to copy data from userspace. + */ + if (dc->user) { + unsigned long arg = *(unsigned long *)dc->data; + + iommu_uapi_sva_unbind_gpasid(dc->domain, iommu_device, + (void __user *)arg); + } else { + ioasid_t pasid = *(ioasid_t *)dc->data; + + iommu_sva_unbind_gpasid(dc->domain, iommu_device, pasid, dc->flags); + } + return 0; +} + +static void vfio_group_unbind_gpasid_fn(ioasid_t pasid, void *data) +{ + struct domain_capsule *dc = (struct domain_capsule *)data; + + dc->user = false; + dc->data = &pasid; + + iommu_group_for_each_dev(dc->group->iommu_group, + dc, vfio_dev_unbind_gpasid_fn); +} + static void vfio_iommu_type1_detach_group(void *iommu_data, struct iommu_group *iommu_group) { @@ -2529,6 +2611,28 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, if (!group) continue; +#if IS_ENABLED(CONFIG_IOASID_USER) + if (iommu->nesting_info && + iommu->nesting_info->features & + IOMMU_NESTING_FEAT_BIND_PGTBL) { + struct domain_capsule dc = { .group = group, + .domain = domain->domain, + .data = NULL }; + struct ioasid_user *iuser; + + /* + * For devices attached to nesting type iommu, + * VFIO should unbind page tables bound with the + * devices in the iommu group before detaching. + */ + iuser = ioasid_user_get_from_task(current); + if (!(IS_ERR(iuser) || !iuser)) { + ioasid_user_for_each_id(iuser, &dc, + vfio_group_unbind_gpasid_fn); + ioasid_user_put(iuser); + } + } +#endif vfio_iommu_detach_group(domain, group); update_dirty_scope = !group->pinned_page_dirty_scope; update_iommu_hwdbm = !group->iommu_hwdbm; @@ -3071,6 +3175,66 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, return -EINVAL; } +static long vfio_iommu_handle_pgtbl_op(struct vfio_iommu *iommu, + bool is_bind, unsigned long arg) +{ + struct domain_capsule dc = { .data = &arg, .user = true }; + struct iommu_nesting_info *info; + int ret; + + mutex_lock(&iommu->lock); + + info = iommu->nesting_info; + if (!info || !(info->features & IOMMU_NESTING_FEAT_BIND_PGTBL)) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + + ret = vfio_prepare_nesting_domain_capsule(iommu, &dc); + if (ret) + goto out_unlock; + + if (is_bind) + ret = iommu_group_for_each_dev(dc.group->iommu_group, &dc, + vfio_dev_bind_gpasid_fn); + if (ret || !is_bind) + iommu_group_for_each_dev(dc.group->iommu_group, + &dc, vfio_dev_unbind_gpasid_fn); + +out_unlock: + mutex_unlock(&iommu->lock); + return ret; +} + +static long vfio_iommu_type1_nesting_op(struct vfio_iommu *iommu, + unsigned long arg) +{ + struct vfio_iommu_type1_nesting_op hdr; + unsigned int minsz; + int ret; + + minsz = offsetofend(struct vfio_iommu_type1_nesting_op, flags); + + if (copy_from_user(&hdr, (void __user *)arg, minsz)) + return -EFAULT; + + if (hdr.argsz < minsz || hdr.flags & ~VFIO_NESTING_OP_MASK) + return -EINVAL; + + switch (hdr.flags & VFIO_NESTING_OP_MASK) { + case VFIO_IOMMU_NESTING_OP_BIND_PGTBL: + ret = vfio_iommu_handle_pgtbl_op(iommu, true, arg + minsz); + break; + case VFIO_IOMMU_NESTING_OP_UNBIND_PGTBL: + ret = vfio_iommu_handle_pgtbl_op(iommu, false, arg + minsz); + break; + default: + ret = -EINVAL; + } + + return ret; +} + static long vfio_iommu_type1_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { @@ -3087,6 +3251,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, return vfio_iommu_type1_unmap_dma(iommu, arg); case VFIO_IOMMU_DIRTY_PAGES: return vfio_iommu_type1_dirty_pages(iommu, arg); + case VFIO_IOMMU_NESTING_OP: + return vfio_iommu_type1_nesting_op(iommu, arg); default: return -ENOTTY; } diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 81cd642971d8..ea44df7fac3a 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1255,6 +1255,41 @@ struct vfio_iommu_type1_dirty_bitmap_get { #define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17) +/** + * VFIO_IOMMU_NESTING_OP - _IOW(VFIO_TYPE, VFIO_BASE + 18, + * struct vfio_iommu_type1_nesting_op) + * + * This interface allows userspace to utilize the nesting IOMMU + * capabilities as reported in VFIO_IOMMU_TYPE1_INFO_CAP_NESTING + * cap through VFIO_IOMMU_GET_INFO. For platforms which require + * system wide PASID, PASID will be allocated by VFIO_IOMMU_PASID + * _REQUEST. + * + * @data[] types defined for each op: + * +=================+===============================================+ + * | NESTING OP | @data[] | + * +=================+===============================================+ + * | BIND_PGTBL | struct iommu_gpasid_bind_data | + * +-----------------+-----------------------------------------------+ + * | UNBIND_PGTBL | struct iommu_gpasid_bind_data | + * +-----------------+-----------------------------------------------+ + * + * returns: 0 on success, -errno on failure. + */ +struct vfio_iommu_type1_nesting_op { + __u32 argsz; + __u32 flags; +#define VFIO_NESTING_OP_MASK (0xffff) /* lower 16-bits for op */ + __u8 data[]; +}; + +enum { + VFIO_IOMMU_NESTING_OP_BIND_PGTBL, + VFIO_IOMMU_NESTING_OP_UNBIND_PGTBL, +}; + +#define VFIO_IOMMU_NESTING_OP _IO(VFIO_TYPE, VFIO_BASE + 18) + /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ /* -- Gitee From 8050eb60bcbb3684fe452e36199ae272abe82434 Mon Sep 17 00:00:00 2001 From: Liu Yi L Date: Thu, 18 Aug 2022 10:07:04 +0800 Subject: [PATCH 12/24] vfio/type1: Allow invalidating first-level/stage IOMMU cache ANBZ: #1704 commit 3116f7f0c27edd58822e526ba3255554507011ed intel-github. This patch provides an interface allowing the userspace to invalidate IOMMU cache for first-level page table. It is required when the first level IOMMU page table is not managed by the host kernel in the nested translation setup. v1 -> v2: *) rename from "vfio/type1: Flush stage-1 IOMMU cache for nesting type" *) rename vfio_cache_inv_fn() to vfio_dev_cache_invalidate_fn() *) vfio_dev_cache_inv_fn() always successful *) remove VFIO_IOMMU_CACHE_INVALIDATE, and reuse VFIO_IOMMU_NESTING_OP [ Xuchun Shang: fix merge conflict in drivers/vfio/vfio_iommu_type1.c ] Cc: Kevin Tian CC: Jacob Pan Cc: Alex Williamson Cc: Eric Auger Cc: Jean-Philippe Brucker Cc: Joerg Roedel Cc: Lu Baolu Signed-off-by: Liu Yi L Signed-off-by: Eric Auger Signed-off-by: Jacob Pan Signed-off-by: Xuchun Shang Signed-off-by: Guanjun Signed-off-by: Aubrey Li Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/vfio/vfio_iommu_type1.c | 44 +++++++++++++++++++++++++++++++++ include/uapi/linux/vfio.h | 3 +++ 2 files changed, 47 insertions(+) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index a3cce2c70835..87d3b6ccf27f 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -3206,6 +3206,47 @@ static long vfio_iommu_handle_pgtbl_op(struct vfio_iommu *iommu, return ret; } +static int vfio_dev_cache_invalidate_fn(struct device *dev, void *data) +{ + struct domain_capsule *dc = (struct domain_capsule *)data; + unsigned long arg = *(unsigned long *)dc->data; + struct device *iommu_device; + + iommu_device = vfio_get_iommu_device(dc->group, dev); + if (!iommu_device) + return -EINVAL; + + iommu_uapi_cache_invalidate(dc->domain, iommu_device, + (void __user *)arg); + return 0; +} + +static long vfio_iommu_invalidate_cache(struct vfio_iommu *iommu, + unsigned long arg) +{ + struct domain_capsule dc = { .data = &arg }; + struct iommu_nesting_info *info; + int ret; + + mutex_lock(&iommu->lock); + info = iommu->nesting_info; + if (!info || !(info->features & IOMMU_NESTING_FEAT_CACHE_INVLD)) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + + ret = vfio_prepare_nesting_domain_capsule(iommu, &dc); + if (ret) + goto out_unlock; + + iommu_group_for_each_dev(dc.group->iommu_group, &dc, + vfio_dev_cache_invalidate_fn); + +out_unlock: + mutex_unlock(&iommu->lock); + return ret; +} + static long vfio_iommu_type1_nesting_op(struct vfio_iommu *iommu, unsigned long arg) { @@ -3228,6 +3269,9 @@ static long vfio_iommu_type1_nesting_op(struct vfio_iommu *iommu, case VFIO_IOMMU_NESTING_OP_UNBIND_PGTBL: ret = vfio_iommu_handle_pgtbl_op(iommu, false, arg + minsz); break; + case VFIO_IOMMU_NESTING_OP_CACHE_INVLD: + ret = vfio_iommu_invalidate_cache(iommu, arg + minsz); + break; default: ret = -EINVAL; } diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index ea44df7fac3a..f7ca2ba9cd0f 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1273,6 +1273,8 @@ struct vfio_iommu_type1_dirty_bitmap_get { * +-----------------+-----------------------------------------------+ * | UNBIND_PGTBL | struct iommu_gpasid_bind_data | * +-----------------+-----------------------------------------------+ + * | CACHE_INVLD | struct iommu_cache_invalidate_info | + * +-----------------+-----------------------------------------------+ * * returns: 0 on success, -errno on failure. */ @@ -1286,6 +1288,7 @@ struct vfio_iommu_type1_nesting_op { enum { VFIO_IOMMU_NESTING_OP_BIND_PGTBL, VFIO_IOMMU_NESTING_OP_UNBIND_PGTBL, + VFIO_IOMMU_NESTING_OP_CACHE_INVLD, }; #define VFIO_IOMMU_NESTING_OP _IO(VFIO_TYPE, VFIO_BASE + 18) -- Gitee From 8703aa41fb8b7d17ee3a483833ba52f7b51f871a Mon Sep 17 00:00:00 2001 From: Liu Yi L Date: Tue, 2 Aug 2022 13:14:16 +0800 Subject: [PATCH 13/24] vfio/type1: Handle page response from VM ANBZ: #1704 commit 4ec21bad7482c0736f437a4e0fb874ecbb010889 intel-github. Signed-off-by: Liu Yi L Signed-off-by: Xuchun Shang Signed-off-by: Guanjun Signed-off-by: Aubrey Li Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/iommu/intel/iommu.c | 7 ++++++- drivers/vfio/vfio_iommu_type1.c | 37 +++++++++++++++++++++++++++++++++ include/uapi/linux/iommu.h | 4 ++++ include/uapi/linux/vfio.h | 3 +++ 4 files changed, 50 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 4e0505fd5db3..950d8f2d389a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5878,8 +5878,13 @@ static int intel_iommu_get_nesting_info(struct iommu_domain *domain, info->addr_width = dmar_domain->gaw; info->format = IOMMU_PASID_FORMAT_INTEL_VTD; + /* REVISIT: + * to be precise, may only report SYSWIDE_PASID when pasid is + * supported, also may only report page_resp when PRS is supported + */ info->features = IOMMU_NESTING_FEAT_BIND_PGTBL | - IOMMU_NESTING_FEAT_CACHE_INVLD; + IOMMU_NESTING_FEAT_CACHE_INVLD | + IOMMU_NESTING_FEAT_PAGE_RESP; info->pasid_bits = ilog2(intel_pasid_max_id); memset(&info->padding, 0x0, 12); diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 87d3b6ccf27f..277c0fff5d88 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -3247,6 +3247,40 @@ static long vfio_iommu_invalidate_cache(struct vfio_iommu *iommu, return ret; } +static int vfio_dev_page_resp_fn(struct device *dev, void *data) +{ + struct domain_capsule *dc = (struct domain_capsule *)data; + unsigned long arg = *(unsigned long *) dc->data; + + return iommu_page_response(dc->domain, dev, (void __user *) arg); +} + +static long vfio_iommu_page_response(struct vfio_iommu *iommu, + unsigned long arg) +{ + struct domain_capsule dc = { .data = &arg }; + struct iommu_nesting_info *info; + int ret; + + mutex_lock(&iommu->lock); + info = iommu->nesting_info; + if (!info || !(info->features & IOMMU_NESTING_FEAT_PAGE_RESP)) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + + ret = vfio_prepare_nesting_domain_capsule(iommu, &dc); + if (ret) + goto out_unlock; + + ret = iommu_group_for_each_dev(dc.group->iommu_group, &dc, + vfio_dev_page_resp_fn); + +out_unlock: + mutex_unlock(&iommu->lock); + return ret; +} + static long vfio_iommu_type1_nesting_op(struct vfio_iommu *iommu, unsigned long arg) { @@ -3272,6 +3306,9 @@ static long vfio_iommu_type1_nesting_op(struct vfio_iommu *iommu, case VFIO_IOMMU_NESTING_OP_CACHE_INVLD: ret = vfio_iommu_invalidate_cache(iommu, arg + minsz); break; + case VFIO_IOMMU_NESTING_OP_PAGE_RESP: + ret = vfio_iommu_page_response(iommu, arg + minsz); + break; default: ret = -EINVAL; } diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h index b11536a05c38..ec5ad48704ff 100644 --- a/include/uapi/linux/iommu.h +++ b/include/uapi/linux/iommu.h @@ -387,6 +387,9 @@ struct iommu_nesting_info_vtd { * | | UAPI according to vendor-specific requirement when | * | | changing the 1st level/stage page table. | * +---------------+------------------------------------------------------+ + * | PAGE_RESP | IOMMU vendor driver sets it to mandate userspace to | + * | | respone any page request reported by kernel space | + * +---------------+------------------------------------------------------+ * * data struct types defined for @format: * +================================+=====================================+ @@ -402,6 +405,7 @@ struct iommu_nesting_info { __u32 format; #define IOMMU_NESTING_FEAT_BIND_PGTBL (1 << 0) #define IOMMU_NESTING_FEAT_CACHE_INVLD (1 << 1) +#define IOMMU_NESTING_FEAT_PAGE_RESP (1 << 2) __u32 features; __u16 addr_width; __u16 pasid_bits; diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index f7ca2ba9cd0f..f408c1f4b183 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1275,6 +1275,8 @@ struct vfio_iommu_type1_dirty_bitmap_get { * +-----------------+-----------------------------------------------+ * | CACHE_INVLD | struct iommu_cache_invalidate_info | * +-----------------+-----------------------------------------------+ + * | PAGE_RESP | struct iommu_page_response | + * +-----------------+-----------------------------------------------+ * * returns: 0 on success, -errno on failure. */ @@ -1289,6 +1291,7 @@ enum { VFIO_IOMMU_NESTING_OP_BIND_PGTBL, VFIO_IOMMU_NESTING_OP_UNBIND_PGTBL, VFIO_IOMMU_NESTING_OP_CACHE_INVLD, + VFIO_IOMMU_NESTING_OP_PAGE_RESP, }; #define VFIO_IOMMU_NESTING_OP _IO(VFIO_TYPE, VFIO_BASE + 18) -- Gitee From 683f069791ef38857daa3c9c549e462b649e3fb3 Mon Sep 17 00:00:00 2001 From: Liu Yi L Date: Sat, 27 Jun 2020 20:49:02 -0700 Subject: [PATCH 14/24] vfio/type1: Support page response for IOMMU-capable Mdev ANBZ: #1704 commit 64e63caa9932b43086dfc5523bacef680443518d intel-github. VFIO should pass the physical device info for IOMMU-capable mdev. Details can refer to below commit: "vfio/type1: Add vSVA support for IOMMU-backed mdevs" Signed-off-by: Liu Yi L Signed-off-by: Xuchun Shang Signed-off-by: Guanjun Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/vfio/vfio_iommu_type1.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 277c0fff5d88..e9ad2b2583a0 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -3251,8 +3251,14 @@ static int vfio_dev_page_resp_fn(struct device *dev, void *data) { struct domain_capsule *dc = (struct domain_capsule *)data; unsigned long arg = *(unsigned long *) dc->data; + struct device *iommu_device; + + iommu_device = vfio_get_iommu_device(dc->group, dev); + if (!iommu_device) + return -EINVAL; - return iommu_page_response(dc->domain, dev, (void __user *) arg); + return iommu_page_response(dc->domain, iommu_device, + (void __user *) arg); } static long vfio_iommu_page_response(struct vfio_iommu *iommu, -- Gitee From b8360c8aed11cec6fe70874be724d0b291524549 Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Thu, 4 Aug 2022 13:59:07 +0800 Subject: [PATCH 15/24] vfio: Support bind/unbind guest page table to default PASID ANBZ: #1704 commit 6d37212237974325b998dc8ab106f5a6e8cce798 intel-github. For guest IOVA support under nesting IOMMU, hypervisor needs to bind/ unbind guest's IOVA page table to host. For such bind/unbind request from user space, host should figure out a target PASID (host default PASID) to be bound. This patch adds a flag to indicate host IOMMU driver if it needs to use default PASID in host. FIXME: pasid_set = NULL; // dmar_domain->pasid_set; Intel-SIG: commit 6d3721223797 vfio: Support bind/unbind guest page table to default PASID. Signed-off-by: Yi Sun Signed-off-by: Xuchun Shang Signed-off-by: Guanjun [ Aubrey Li: amend commit log ] Signed-off-by: Aubrey Li Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/vfio/vfio_iommu_type1.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index e9ad2b2583a0..1f0c8a1e6517 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -2567,6 +2567,18 @@ static void vfio_group_unbind_gpasid_fn(ioasid_t pasid, void *data) dc, vfio_dev_unbind_gpasid_fn); } +static void vfio_group_unbind_default_gpasid(ioasid_t pasid, void *data) +{ + struct domain_capsule *dc = (struct domain_capsule *)data; + + dc->user = false; + dc->data = &pasid; + dc->flags = IOMMU_SVA_HPASID_DEF; + + iommu_group_for_each_dev(dc->group->iommu_group, + dc, vfio_dev_unbind_gpasid_fn); +} + static void vfio_iommu_type1_detach_group(void *iommu_data, struct iommu_group *iommu_group) { @@ -2631,6 +2643,11 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, vfio_group_unbind_gpasid_fn); ioasid_user_put(iuser); } + /* + * We should explicitly call interface to unbind default pasid gIOVA + * page table here. + */ + vfio_group_unbind_default_gpasid(0, &dc); } #endif vfio_iommu_detach_group(domain, group); -- Gitee From a0c81c8892a39b1b12b46d54868c06e620daa588 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 4 Aug 2022 14:01:33 +0800 Subject: [PATCH 16/24] dmaengine: idxd: remove shared wq restriction on config load ANBZ: #1704 commit 684faefea881973dbada146515c3902960efed41 intel-github. Shared WQ restriction accidentally left in idxd_wq_load_config(). Not needed when we support swq in VDCM. Intel-SIG: commit 684faefea881 dmaengine: idxd: remove shared wq restriction on config load. Signed-off-by: Dave Jiang Signed-off-by: Xuchun Shang Signed-off-by: Guanjun Signed-off-by: Fengqian Gao [ Aubrey Li: amend commit log ] Signed-off-by: Aubrey Li Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/dma/idxd/device.c | 13 ++++++++----- drivers/dma/idxd/idxd.h | 1 + 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 5533b025ec8c..b2a6546b8e57 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1193,14 +1193,17 @@ static int idxd_wq_load_config(struct idxd_wq *wq) wq->size = wq->wqcfg->wq_size; wq->threshold = wq->wqcfg->wq_thresh; - /* The driver does not support shared WQ mode in read-only config yet */ - if (wq->wqcfg->mode == 0 || wq->wqcfg->pasid_en) - return -EOPNOTSUPP; - - set_bit(WQ_FLAG_DEDICATED, &wq->flags); + if (wq->wqcfg->mode) + set_bit(WQ_FLAG_DEDICATED, &wq->flags); wq->priority = wq->wqcfg->priority; + if (wq->wqcfg->bof) + set_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); + + if (wq->wqcfg->mode_support) + set_bit(WQ_FLAG_MODE_1, &wq->flags); + wq->max_xfer_bytes = 1ULL << wq->wqcfg->max_xfer_shift; wq->max_batch_size = 1ULL << wq->wqcfg->max_batch_shift; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 098cc46010e7..0dc1605789fc 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -142,6 +142,7 @@ enum idxd_wq_state { enum idxd_wq_flag { WQ_FLAG_DEDICATED = 0, WQ_FLAG_BLOCK_ON_FAULT, + WQ_FLAG_MODE_1, }; enum idxd_wq_type { -- Gitee From 250e6df8d14e1fe167e7cc877c4e2a435cc5e9e1 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 13 Jul 2022 21:35:00 +0800 Subject: [PATCH 17/24] dmaengine: idxd: move wq pasid configuration until when needed ANBZ: #1704 commit 098159e3d405124cbc2020d93f529dff0eb7869e intel-github. The main issue is that when we program wq with pasid of 0 but pasid_en set for shared wq, it confuses the VDCM. Remove the WQ pasid setting during configuration programming of the device as the driver does not have enough information at that point to make that decision. Program the WQCFG pasid and pasid_en at the time when it is needed. Add support in VDCM to understand what pasid_en=1 and pasid=0 means. Intel-SIG: commit 098159e3d405 dmaengine: idxd: move wq pasid configuration until when needed. Deviation from intel-github: Only apply part of commit. Also integrate commit c2f10b603dc626985c6d3dcce79aee52ef95be2d from intel-github. dmaengine: idxd: set priv bit to 0 when wq is not kernel type Set priv bit to 0 for read-only WQ setup when the WQ type is not kernel. Current code does not touch the priv bit when the WQ is not kernel type. This non-action causes problem wq reconfiguration where the WQ is configured as a kernel type, gets disabled, and then reconfigured to be a user WQ. The priv bit that was set by the kernel type config remains. Signed-off-by: Dave Jiang [ Fengqian Gao: amend commit log ] Signed-off-by: Fengqian Gao Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/dma/idxd/device.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index b2a6546b8e57..90fe7d3dcd9b 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1454,26 +1454,27 @@ int __drv_enable_wq(struct idxd_wq *wq) } /* - * In the event that the WQ is configurable for pasid and priv bits. - * For kernel wq, the driver should setup the pasid, pasid_en, and priv bit. - * However, for non-kernel wq, the driver should only set the pasid_en bit for - * shared wq. A dedicated wq that is not 'kernel' type will configure pasid and - * pasid_en later on so there is no need to setup. - */ - if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) { - int priv = 0; - - if (device_pasid_enabled(idxd)) { - if (is_idxd_wq_kernel(wq) || wq_shared(wq)) { + * In the event that the WQ is configurable for pasid and priv bits. + * For kernel wq, the driver should setup the pasid, pasid_en, and priv bit. + * However, for non-kernel wq, the driver should only set the pasid_en bit for + * shared wq. A dedicated wq will configure pasid and pasid_en later on so + * there is no need to setup. + */ + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags) || + test_bit(WQ_FLAG_MODE_1, &wq->flags)) { + if (is_idxd_wq_kernel(wq)) { + if (device_pasid_enabled(idxd)) { u32 pasid = wq_dedicated(wq) ? idxd->pasid : 0; __idxd_wq_set_pasid_locked(wq, pasid); } + __idxd_wq_set_priv_locked(wq, 1); + } else { + if (device_user_pasid_enabled(idxd) && wq_shared(wq)) + __idxd_wq_set_pasid_locked(wq, 0); + __idxd_wq_set_priv_locked(wq, 0); } - if (is_idxd_wq_kernel(wq)) - priv = 1; - __idxd_wq_set_priv_locked(wq, priv); } rc = 0; -- Gitee From 8e3d6d4b3e0e998903e6a3d121aed39c1f043917 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Fri, 1 Apr 2022 08:09:01 -0700 Subject: [PATCH 18/24] iommu/vt-d: Allow disabling PRQ timeout ANBZ: #1704 commit bc603fa1d83379e290421aa15e1c8840f4939a0c intel-github. For debug purpose, if user use cmdline iommu.prq_timeout=0, let's disable PRQ timeout tracking. The downside is that we will not limit mis-behaved guests. Intel-SIG: commit bc603fa1d833 iommu/vt-d: Allow disabling PRQ timeout. Signed-off-by: Jacob Pan [ Fengqian Gao: amend commit log ] Signed-off-by: Fengqian Gao Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/iommu/iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 2dafca037dd9..16c4829fefcf 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1403,7 +1403,7 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) exp = get_jiffies_64() + prq_timeout; evt_pending->expire = exp; mutex_lock(&fparam->lock); - if (list_empty(&fparam->faults)) { + if (list_empty(&fparam->faults) && prq_timeout) { /* First pending event, start timer */ tmr = &dev->iommu->fault_param->timer; WARN_ON(timer_pending(tmr)); @@ -1557,7 +1557,7 @@ int iommu_page_response(struct iommu_domain *domain, } /* stop response timer if no more pending request */ - if (list_empty(¶m->fault_param->faults) && + if (prq_timeout && list_empty(¶m->fault_param->faults) && timer_pending(¶m->fault_param->timer)) { pr_debug("no pending PRQ, stop timer\n"); del_timer(¶m->fault_param->timer); -- Gitee From 2e674252f998f2adb3cf52916a1422c376344649 Mon Sep 17 00:00:00 2001 From: Liu Yi L Date: Thu, 14 Jul 2022 21:44:13 +0800 Subject: [PATCH 19/24] iommu/vt-d: Fix an issue in intel_svm_free_async_fn() ANBZ: #1704 commit 724dca3a9c8962bb66d60fa19bc529d18dabc907 intel-github. For subdevices, intel_svm_free_async_fn() should be the same cleanup work as intel_sva_unbind_gpasid(). Deleting the per-pasid fault_data is part of it. Without this, the next time the same pasid was used, it will report failure when trying to bind it to a fault_data. Intel-SIG: commit 724dca3a9c89 iommu/vt-d: Fix an issue in intel_svm_free_async_fn(). Signed-off-by: Liu Yi L Signed-off-by: Yi Sun [ Fengqian Gao: amend commit log ] Signed-off-by: Fengqian Gao Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/iommu/intel/svm.c | 7 +++++++ include/linux/intel-iommu.h | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 2e8eae9ee94f..5477be649512 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -188,6 +188,11 @@ static void intel_svm_free_async_fn(struct work_struct *work) svm->pasid, true); intel_svm_drain_prq(sdev->dev, svm->pasid); spin_unlock(&sdev->iommu->lock); + /* + * Partial assignment needs to delete fault data + */ + if (is_aux_domain(sdev->dev, &sdev->domain->domain)) + iommu_delete_device_fault_data(sdev->dev, svm->pasid); kfree_rcu(sdev, rcu); } /* @@ -518,6 +523,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, sdev->dev = dev; sdev->sid = PCI_DEVID(info->bus, info->devfn); sdev->iommu = iommu; + sdev->domain = dmar_domain; /* Only count users if device has aux domains */ if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) @@ -742,6 +748,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, sdev->qdep = 0; } + sdev->domain = info->domain; /* Finish the setup now we know we're keeping it */ sdev->users = 1; init_rcu_head(&sdev->rcu); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index e021855f4c49..454928856f6a 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -788,6 +788,7 @@ struct intel_svm_dev { struct rcu_head rcu; struct device *dev; struct intel_iommu *iommu; + struct dmar_domain *domain; struct iommu_sva sva; u32 pasid; int users; -- Gitee From cad85330a92ef8f2bd1ee152412c0c76aad16652 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 12 Aug 2022 21:37:48 +0800 Subject: [PATCH 20/24] iommu/vt-d: do not clear pte only if it is gIOVA case ANBZ: #1704 commit 77d7c99a6b30e30d436b01b1af302d9e56b985f8 intel-github. In commit "iommu/vt-d: do not clear pte if pgtt is nesting", we do not clear pte if pgtt is nesting mode. But this also covers guest SVM pasid unbind case. For this case, we should clear the pte. Otherwise it looks confusing by checking iommu debugfs pte output (there is no functional error found). To achieve this, we added one parameter in 'intel_pasid_tear_down_entry' but removed the codes to set VTD_FLAG_PGTT_SL_ONLY into 'iommu->flags'. Because 'iommu->flags' is a global parameter which is not appropriate to handle such special device (assigned to VM) case. Intel-SIG: commit 77d7c99a6b30 iommu: unresolved merge conflicts. Deviation from intel-github: Extract part of commit 77d7c99a6b30 iommu: unresolved merge conflicts. Signed-off-by: Jacob Pan [ Fengqian Gao: amend commit log ] Signed-off-by: Fengqian Gao Signed-off-by: Aubrey Li Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/iommu/intel/iommu.c | 8 ++++---- drivers/iommu/intel/pasid.c | 9 ++++----- drivers/iommu/intel/pasid.h | 2 +- drivers/iommu/intel/svm.c | 11 ++++++----- include/linux/intel-iommu.h | 1 - 5 files changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 950d8f2d389a..d04235c55b9a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4672,12 +4672,12 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) if (info->dev && !dev_is_real_dma_subdevice(info->dev)) { if (dev_is_pci(info->dev) && sm_supported(iommu)) { intel_pasid_tear_down_entry(iommu, info->dev, - PASID_RID2PASID, false); + PASID_RID2PASID, false, false); pasid = iommu_get_pasid_from_domain(info->dev, &info->domain->domain); if (pasid != INVALID_IOASID) intel_pasid_tear_down_entry(iommu, info->dev, - pasid, false); + pasid, false, false); } iommu_disable_dev_iotlb(info); @@ -4938,7 +4938,7 @@ static void aux_domain_remove_dev(struct dmar_domain *domain, if (!auxiliary_unlink_device(domain, dev)) { spin_lock(&iommu->lock); intel_pasid_tear_down_entry(iommu, dev, - domain->default_pasid, false); + domain->default_pasid, false, false); domain_detach_iommu(domain, iommu); spin_unlock(&iommu->lock); } @@ -6492,7 +6492,7 @@ static void intel_iommu_detach_dev_pasid(struct iommu_domain *domain, return; spin_lock_irqsave(&iommu->lock, flags); - intel_pasid_tear_down_entry(iommu, dev, pasid, false); + intel_pasid_tear_down_entry(iommu, dev, pasid, false, false); spin_unlock_irqrestore(&iommu->lock, flags); } diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index aeb9050693d8..fa5b683783f4 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -310,7 +310,7 @@ static inline void pasid_clear_entry_with_fpd(struct pasid_entry *pe) static void intel_pasid_clear_entry(struct intel_iommu *iommu, struct device *dev, - u32 pasid, bool fault_ignore) + u32 pasid, bool fault_ignore, bool keep_pte) { struct pasid_entry *pe; u64 pe_val; @@ -327,10 +327,9 @@ intel_pasid_clear_entry(struct intel_iommu *iommu, struct device *dev, */ pe_val = READ_ONCE(pe->val[0]); nested = (((pe_val >> 6) & 0x7) == PASID_ENTRY_PGTT_NESTED) ? true : false; - if (nested && (iommu->flags & VTD_FLAG_PGTT_SL_ONLY)) { + if (nested && keep_pte) { pe_val &= 0xfffffffffffffebf; WRITE_ONCE(pe->val[0], pe_val); - iommu->flags &= ~VTD_FLAG_PGTT_SL_ONLY; return; } @@ -558,7 +557,7 @@ flush_iotlb_all(struct intel_iommu *iommu, struct device *dev, } void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, - u32 pasid, bool fault_ignore) + u32 pasid, bool fault_ignore, bool keep_pte) { struct pasid_entry *pte; u16 did, pgtt; @@ -571,7 +570,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, return; did = pasid_get_domain_id(pte); - intel_pasid_clear_entry(iommu, dev, pasid, fault_ignore); + intel_pasid_clear_entry(iommu, dev, pasid, fault_ignore, keep_pte); if (!ecap_coherent(iommu->ecap)) clflush_cache_range(pte, sizeof(*pte)); diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 437dfd8e16fa..80cb29a63538 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -124,7 +124,7 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct dmar_domain *domain, int addr_width); void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, u32 pasid, - bool fault_ignore); + bool fault_ignore, bool keep_pte); int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid); void vcmd_free_pasid(struct intel_iommu *iommu, u32 pasid); int intel_pasid_setup_slade(struct device *dev, struct dmar_domain *domain, diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 5477be649512..4674bc502882 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -185,7 +185,7 @@ static void intel_svm_free_async_fn(struct work_struct *work) list_del_rcu(&sdev->list); spin_lock(&sdev->iommu->lock); intel_pasid_tear_down_entry(sdev->iommu, sdev->dev, - svm->pasid, true); + svm->pasid, true, false); intel_svm_drain_prq(sdev->dev, svm->pasid); spin_unlock(&sdev->iommu->lock); /* @@ -355,7 +355,7 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) intel_pasid_tear_down_entry(sdev->iommu, sdev->dev, - svm->pasid, true); + svm->pasid, true, false); rcu_read_unlock(); } @@ -590,6 +590,7 @@ int intel_svm_unbind_gpasid(struct iommu_domain *domain, int ret; struct dmar_domain *dmar_domain; struct ioasid_set *pasid_set; + bool keep_pte = false; if (WARN_ON(!iommu)) return -EINVAL; @@ -603,6 +604,7 @@ int intel_svm_unbind_gpasid(struct iommu_domain *domain, return ret; pasid = ret; pasid_set = host_pasid_set; + keep_pte = true; } mutex_lock(&pasid_mutex); @@ -615,9 +617,8 @@ int intel_svm_unbind_gpasid(struct iommu_domain *domain, sdev->users--; if (!sdev->users) { list_del_rcu(&sdev->list); - iommu->flags |= VTD_FLAG_PGTT_SL_ONLY; intel_pasid_tear_down_entry(iommu, dev, - svm->pasid, false); + svm->pasid, false, keep_pte); intel_svm_drain_prq(dev, svm->pasid); /* * Partial assignment needs to delete fault data @@ -870,7 +871,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) * large and has to be physically contiguous. So it's * hard to be as defensive as we might like. */ intel_pasid_tear_down_entry(iommu, dev, - svm->pasid, false); + svm->pasid, false, false); intel_svm_drain_prq(dev, svm->pasid); kfree_rcu(sdev, rcu); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 454928856f6a..e4937dd0a6e2 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -479,7 +479,6 @@ enum { #define VTD_FLAG_TRANS_PRE_ENABLED (1 << 0) #define VTD_FLAG_IRQ_REMAP_PRE_ENABLED (1 << 1) #define VTD_FLAG_SVM_CAPABLE (1 << 2) -#define VTD_FLAG_PGTT_SL_ONLY (1 << 3) extern int intel_iommu_sm; extern spinlock_t device_domain_lock; -- Gitee From 402efcad87325bd664aa35ece0261a50ae08bfd5 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 27 Jul 2022 09:31:17 +0800 Subject: [PATCH 21/24] iommu/vt-d: remove useless parameters of flush_iotlb_all ANBZ: #1704 commit 77d7c99a6b30e30d436b01b1af302d9e56b985f8 intel-github. Sync flush_iotlb_all function with intel-github. Intel-SIG: commit 77d7c99a6b30 iommu: unresolved merge conflicts. Deviation from intel-github: Extract flush_iotlb_all function part from commit 77d7c99a6b30 iommu: unresolved merge conflicts. Signed-off-by: Jacob Pan [ Fengqian Gao: amend commit log ] Signed-off-by: Fengqian Gao Signed-off-by: Aubrey Li Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/iommu/intel/pasid.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index fa5b683783f4..4a18a319f3b0 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -543,13 +543,13 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, static void flush_iotlb_all(struct intel_iommu *iommu, struct device *dev, - u16 did, u16 pgtt, u32 pasid, u64 type) + u16 did, u32 pasid, u64 type) { pasid_cache_invalidation_with_pasid(iommu, did, pasid); if (type) iommu->flush.flush_iotlb(iommu, did, 0, 0, type); - else if (pgtt == PASID_ENTRY_PGTT_PT || pgtt == PASID_ENTRY_PGTT_FL_ONLY) + else qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); if (!cap_caching_mode(iommu->cap)) @@ -560,7 +560,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, u32 pasid, bool fault_ignore, bool keep_pte) { struct pasid_entry *pte; - u16 did, pgtt; + u16 did; pte = intel_pasid_get_entry(dev, pasid); if (WARN_ON(!pte)) @@ -575,7 +575,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, if (!ecap_coherent(iommu->ecap)) clflush_cache_range(pte, sizeof(*pte)); - flush_iotlb_all(iommu, dev, did, pgtt, pasid, 0); + flush_iotlb_all(iommu, dev, did, pasid, 0); } /* @@ -993,7 +993,7 @@ int intel_pasid_setup_slade(struct device *dev, struct dmar_domain *domain, pasid_set_slade(pte, value); - flush_iotlb_all(iommu, dev, did, 0, pasid, DMA_TLB_DSI_FLUSH); + flush_iotlb_all(iommu, dev, did, pasid, DMA_TLB_DSI_FLUSH); return 0; } -- Gitee From f6d8157157dad6c3bed7cafebb6fea6006d09a23 Mon Sep 17 00:00:00 2001 From: Sanjay Kumar Date: Wed, 27 Jul 2022 09:35:23 +0800 Subject: [PATCH 22/24] iommu/vt-d: Fix incorrect invalidation when tearing down a pasid table entry ANBZ: #1704 commit 9a4130d8d00ae2c2223983827d2e0485ffa7f051 intel-github. Use domain selective invalidation for SL-Only and Nested PGTT types. Use pasid-selective invalidation for FL-Only and Pass-Through PGTT types. Intel-SIG: commit 9a4130d8d00a Fix incorrect invalidation when tearing down a pasid table entry. [ Fengqian Gao: amend commit log ] Signed-off-by: Fengqian Gao Signed-off-by: Aubrey Li Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/iommu/intel/pasid.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 4a18a319f3b0..bce8c205173a 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -561,6 +561,8 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, { struct pasid_entry *pte; u16 did; + u64 pe_val; + u16 pgtt_type; pte = intel_pasid_get_entry(dev, pasid); if (WARN_ON(!pte)) @@ -570,12 +572,19 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, return; did = pasid_get_domain_id(pte); + pe_val = READ_ONCE(pte->val[0]); + pgtt_type = (pe_val >> 6) & 0x7; + intel_pasid_clear_entry(iommu, dev, pasid, fault_ignore, keep_pte); if (!ecap_coherent(iommu->ecap)) clflush_cache_range(pte, sizeof(*pte)); - flush_iotlb_all(iommu, dev, did, pasid, 0); + if (pgtt_type == PASID_ENTRY_PGTT_FL_ONLY || + pgtt_type == PASID_ENTRY_PGTT_PT) + flush_iotlb_all(iommu, dev, did, pasid, 0); + else + flush_iotlb_all(iommu, dev, did, pasid, DMA_TLB_DSI_FLUSH); } /* -- Gitee From 2a93cfbf8d5f8c9aab45877cc6e86dc5644763c1 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 15 Jul 2022 22:27:59 +0800 Subject: [PATCH 23/24] iommu/vt-d: Remove IOVA domain rcache flushing for CPU offlining ANBZ: #1704 commit 363f266eeff6e22a09483dc922dccd7cd0b9fe9c upstream. Now that the core code handles flushing per-IOVA domain CPU rcaches, remove the handling here. Intel-SIG: commit 363f266eeff6 iommu/vt-d: Remove IOVA domain rcache flushing for CPU offlining. Reviewed-by: Lu Baolu Signed-off-by: John Garry Link: https://lore.kernel.org/r/1616675401-151997-3-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel [ Fengqian Gao: amend commit log ] Signed-off-by: Fengqian Gao Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/iommu/intel/iommu.c | 31 ------------------------------- include/linux/cpuhotplug.h | 1 - 2 files changed, 32 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d04235c55b9a..ab653181d74b 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4279,35 +4279,6 @@ static struct notifier_block intel_iommu_memory_nb = { .priority = 0 }; -static void free_all_cpu_cached_iovas(unsigned int cpu) -{ - int i; - - for (i = 0; i < g_num_of_iommus; i++) { - struct intel_iommu *iommu = g_iommus[i]; - struct dmar_domain *domain; - int did; - - if (!iommu) - continue; - - for (did = 0; did < cap_ndoms(iommu->cap); did++) { - domain = get_iommu_domain(iommu, (u16)did); - - if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA) - continue; - - iommu_dma_free_cpu_cached_iovas(cpu, &domain->domain); - } - } -} - -static int intel_iommu_cpu_dead(unsigned int cpu) -{ - free_all_cpu_cached_iovas(cpu); - return 0; -} - static void intel_disable_iommus(void) { struct intel_iommu *iommu = NULL; @@ -4602,8 +4573,6 @@ int __init intel_iommu_init(void) bus_set_iommu(&pci_bus_type, &intel_iommu_ops); if (si_domain && !hw_pass_through) register_memory_notifier(&intel_iommu_memory_nb); - cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL, - intel_iommu_cpu_dead); down_read(&dmar_global_lock); if (probe_acpi_namespace_devices()) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 4c90a92e1f5d..54f8b61d5bce 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -57,7 +57,6 @@ enum cpuhp_state { CPUHP_PAGE_ALLOC_DEAD, CPUHP_NET_DEV_DEAD, CPUHP_PCI_XGENE_DEAD, - CPUHP_IOMMU_INTEL_DEAD, CPUHP_LUSTRE_CFS_DEAD, CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, CPUHP_PADATA_DEAD, -- Gitee From edc1d4db3b7929a69f30e83324494f019fbac8a2 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 15 Jul 2022 22:38:46 +0800 Subject: [PATCH 24/24] iommu: Delete iommu_dma_free_cpu_cached_iovas() ANBZ: #1704 commit 149448b353e2517ecc6eced7d9f46e9f3e08b89e upstream. Function iommu_dma_free_cpu_cached_iovas() no longer has any caller, so delete it. With that, function free_cpu_cached_iovas() may be made static. Intel-SIG: commit 149448b353e2 iommu: Delete iommu_dma_free_cpu_cached_iovas(). Signed-off-by: John Garry Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/1616675401-151997-4-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel [ Fengqian Gao: amend commit log ] Signed-off-by: Fengqian Gao Link: https://gitee.com/anolis/cloud-kernel/pulls/562 Reviewed-by: Artie Ding --- drivers/iommu/dma-iommu.c | 9 --------- drivers/iommu/iova.c | 3 ++- include/linux/dma-iommu.h | 5 ----- include/linux/iova.h | 5 ----- 4 files changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 9f16146951cc..ccd498d8ca96 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -51,15 +51,6 @@ struct iommu_dma_cookie { struct iommu_domain *fq_domain; }; -void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, - struct iommu_domain *domain) -{ - struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - - free_cpu_cached_iovas(cpu, iovad); -} - static void iommu_dma_entry_dtor(unsigned long data) { struct page *freelist = (struct page *)data; diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 4600e97acb26..0c8810002404 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -22,6 +22,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn); static void init_iova_rcaches(struct iova_domain *iovad); +static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); static void free_iova_rcaches(struct iova_domain *iovad); static void fq_destroy_all_entries(struct iova_domain *iovad); static void fq_flush_timeout(struct timer_list *t); @@ -1029,7 +1030,7 @@ static void free_iova_rcaches(struct iova_domain *iovad) /* * free all the IOVA ranges cached by a cpu (used when cpu is unplugged) */ -void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad) +static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad) { struct iova_cpu_rcache *cpu_rcache; struct iova_rcache *rcache; diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index bbf29c981bbd..d810139229e2 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -84,10 +84,5 @@ static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_he { } -static inline void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, - struct iommu_domain *domain) -{ -} - #endif /* CONFIG_IOMMU_DMA */ #endif /* __DMA_IOMMU_H */ diff --git a/include/linux/iova.h b/include/linux/iova.h index a0637abffee8..c810a95bfb30 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -162,7 +162,6 @@ struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); struct iova *split_and_remove_iova(struct iova_domain *iovad, struct iova *iova, unsigned long pfn_lo, unsigned long pfn_hi); -void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); #else static inline int iova_cache_get(void) { @@ -266,10 +265,6 @@ static inline struct iova *split_and_remove_iova(struct iova_domain *iovad, return NULL; } -static inline void free_cpu_cached_iovas(unsigned int cpu, - struct iova_domain *iovad) -{ -} #endif #endif -- Gitee