diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 802c8d9152296c4630991a64c52ca041e0502dbe..3fd0a3888d22abd3b20593ec4fa7cd1b515b6be9 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -7,6 +7,10 @@ config IOMMU_IOVA config IOMMU_API bool +config IOMMUFD_DRIVER + bool + default n + menuconfig IOMMU_SUPPORT bool "IOMMU Hardware Support" depends on MMU diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig index 9b5fc3356bf2d8ac1ca9e3a8398200ae8ccd2940..8bd4c3b183ec6e475b58a1990d7b5c33ab141120 100644 --- a/drivers/iommu/amd/Kconfig +++ b/drivers/iommu/amd/Kconfig @@ -10,6 +10,7 @@ config AMD_IOMMU select IOMMU_API select IOMMU_IOVA select IOMMU_IO_PGTABLE + select IOMMUFD_DRIVER if IOMMUFD depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE help With this option you can enable support for AMD IOMMU hardware in diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 7dc30c2b56b302d8bd3cd129f410610d536059e6..dec4e5c2b66b8236fcd6faeb8497fdc9b42dfe20 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -97,7 +97,9 @@ #define FEATURE_GATS_MASK (3ULL) #define FEATURE_GAM_VAPIC BIT_ULL(21) #define FEATURE_GIOSUP BIT_ULL(48) +#define FEATURE_HASUP BIT_ULL(49) #define FEATURE_EPHSUP BIT_ULL(50) +#define FEATURE_HDSUP BIT_ULL(52) #define FEATURE_SNP BIT_ULL(63) #define FEATURE_PASID_SHIFT 32 @@ -212,6 +214,7 @@ /* macros and definitions for device table entries */ #define DEV_ENTRY_VALID 0x00 #define DEV_ENTRY_TRANSLATION 0x01 +#define DEV_ENTRY_HAD 0x07 #define DEV_ENTRY_PPR 0x34 #define DEV_ENTRY_IR 0x3d #define DEV_ENTRY_IW 0x3e @@ -370,10 +373,16 @@ #define PTE_LEVEL_PAGE_SIZE(level) \ (1ULL << (12 + (9 * (level)))) +/* + * The IOPTE dirty bit + */ +#define IOMMU_PTE_HD_BIT (6) + /* * Bit value definition for I/O PTE fields */ #define IOMMU_PTE_PR BIT_ULL(0) +#define IOMMU_PTE_HD BIT_ULL(IOMMU_PTE_HD_BIT) #define IOMMU_PTE_U BIT_ULL(59) #define IOMMU_PTE_FC BIT_ULL(60) #define IOMMU_PTE_IR BIT_ULL(61) @@ -384,6 +393,7 @@ */ #define DTE_FLAG_V BIT_ULL(0) #define DTE_FLAG_TV BIT_ULL(1) +#define DTE_FLAG_HAD (3ULL << 7) #define DTE_FLAG_GIOV BIT_ULL(54) #define DTE_FLAG_GV BIT_ULL(55) #define DTE_GLX_SHIFT (56) @@ -413,6 +423,7 @@ #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR) +#define IOMMU_PTE_DIRTY(pte) ((pte) & IOMMU_PTE_HD) #define IOMMU_PTE_PAGE(pte) (iommu_phys_to_virt((pte) & IOMMU_PAGE_MASK)) #define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07) @@ -563,6 +574,7 @@ struct protection_domain { int nid; /* Node ID */ u64 *gcr3_tbl; /* Guest CR3 table */ unsigned long flags; /* flags to find out type of domain */ + bool dirty_tracking; /* dirty tracking is enabled in the domain */ unsigned dev_cnt; /* devices assigned to this domain */ unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ }; diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 2892aa1b4dc1db1771b9ebe5d418a14da9e5f456..6c0621f6f572a4c4c0fb72ea1bdb5abe9d504311 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -486,6 +486,73 @@ static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned lo return (__pte & ~offset_mask) | (iova & offset_mask); } +static bool pte_test_and_clear_dirty(u64 *ptep, unsigned long size, + unsigned long flags) +{ + bool test_only = flags & IOMMU_DIRTY_NO_CLEAR; + bool dirty = false; + int i, count; + + /* + * 2.2.3.2 Host Dirty Support + * When a non-default page size is used , software must OR the + * Dirty bits in all of the replicated host PTEs used to map + * the page. The IOMMU does not guarantee the Dirty bits are + * set in all of the replicated PTEs. Any portion of the page + * may have been written even if the Dirty bit is set in only + * one of the replicated PTEs. + */ + count = PAGE_SIZE_PTE_COUNT(size); + for (i = 0; i < count && test_only; i++) { + if (test_bit(IOMMU_PTE_HD_BIT, (unsigned long *)&ptep[i])) { + dirty = true; + break; + } + } + + for (i = 0; i < count && !test_only; i++) { + if (test_and_clear_bit(IOMMU_PTE_HD_BIT, + (unsigned long *)&ptep[i])) { + dirty = true; + } + } + + return dirty; +} + +static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + unsigned long end = iova + size - 1; + + do { + unsigned long pgsize = 0; + u64 *ptep, pte; + + ptep = fetch_pte(pgtable, iova, &pgsize); + if (ptep) + pte = READ_ONCE(*ptep); + if (!ptep || !IOMMU_PTE_PRESENT(pte)) { + pgsize = pgsize ?: PTE_LEVEL_PAGE_SIZE(0); + iova += pgsize; + continue; + } + + /* + * Mark the whole IOVA range as dirty even if only one of + * the replicated PTEs were marked dirty. + */ + if (pte_test_and_clear_dirty(ptep, pgsize, flags)) + iommu_dirty_bitmap_record(dirty, iova, pgsize); + iova += pgsize; + } while (iova < end); + + return 0; +} + /* * ---------------------------------------------------- */ @@ -527,6 +594,7 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo pgtable->iop.ops.map_pages = iommu_v1_map_pages; pgtable->iop.ops.unmap_pages = iommu_v1_unmap_pages; pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; + pgtable->iop.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty; return &pgtable->iop; } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 95bd7c25ba6f366b5db2582e8cb5318491cbb523..caad10f9cee3f903d38a30df988bd7fa78551655 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "amd_iommu.h" #include "../dma-iommu.h" @@ -65,6 +66,7 @@ LIST_HEAD(hpet_map); LIST_HEAD(acpihid_map); const struct iommu_ops amd_iommu_ops; +const struct iommu_dirty_ops amd_dirty_ops; static ATOMIC_NOTIFIER_HEAD(ppr_notifier); int amd_iommu_max_glx_val = -1; @@ -1610,6 +1612,9 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid, pte_root |= 1ULL << DEV_ENTRY_PPR; } + if (domain->dirty_tracking) + pte_root |= DTE_FLAG_HAD; + if (domain->flags & PD_IOMMUV2_MASK) { u64 gcr3 = iommu_virt_to_phys(domain->gcr3_tbl); u64 glx = domain->glx; @@ -2155,28 +2160,76 @@ static inline u64 dma_max_address(void) return ((1ULL << PM_LEVEL_SHIFT(amd_iommu_gpt_level)) - 1); } -static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) +static bool amd_iommu_hd_support(struct amd_iommu *iommu) +{ + return iommu && (iommu->features & FEATURE_HDSUP); +} + +static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, + struct device *dev, u32 flags) { + bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; struct protection_domain *domain; + struct amd_iommu *iommu = NULL; + + if (dev) { + iommu = rlookup_amd_iommu(dev); + if (!iommu) + return ERR_PTR(-ENODEV); + } /* * Since DTE[Mode]=0 is prohibited on SNP-enabled system, * default to use IOMMU_DOMAIN_DMA[_FQ]. */ if (amd_iommu_snp_en && (type == IOMMU_DOMAIN_IDENTITY)) - return NULL; + return ERR_PTR(-EINVAL); + + if (dirty_tracking && !amd_iommu_hd_support(iommu)) + return ERR_PTR(-EOPNOTSUPP); domain = protection_domain_alloc(type); if (!domain) - return NULL; + return ERR_PTR(-ENOMEM); domain->domain.geometry.aperture_start = 0; domain->domain.geometry.aperture_end = dma_max_address(); domain->domain.geometry.force_aperture = true; + if (iommu) { + domain->domain.type = type; + domain->domain.pgsize_bitmap = iommu->iommu.ops->pgsize_bitmap; + domain->domain.ops = iommu->iommu.ops->default_domain_ops; + + if (dirty_tracking) + domain->domain.dirty_ops = &amd_dirty_ops; + } + return &domain->domain; } +static struct iommu_domain *amd_iommu_domain_alloc(unsigned int type) +{ + struct iommu_domain *domain; + + domain = do_iommu_domain_alloc(type, NULL, 0); + if (IS_ERR(domain)) + return NULL; + + return domain; +} + +static struct iommu_domain *amd_iommu_domain_alloc_user(struct device *dev, + u32 flags) +{ + unsigned int type = IOMMU_DOMAIN_UNMANAGED; + + if (flags & ~IOMMU_HWPT_ALLOC_DIRTY_TRACKING) + return ERR_PTR(-EOPNOTSUPP); + + return do_iommu_domain_alloc(type, dev, flags); +} + static void amd_iommu_domain_free(struct iommu_domain *dom) { struct protection_domain *domain; @@ -2214,6 +2267,13 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, dev_data->defer_attach = false; + /* + * Restrict to devices with compatible IOMMU hardware support + * when enforcement of dirty tracking is enabled. + */ + if (dom->dirty_ops && !amd_iommu_hd_support(iommu)) + return -EINVAL; + if (dev_data->domain) detach_device(dev); @@ -2332,6 +2392,11 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap) return true; case IOMMU_CAP_DEFERRED_FLUSH: return true; + case IOMMU_CAP_DIRTY_TRACKING: { + struct amd_iommu *iommu = rlookup_amd_iommu(dev); + + return amd_iommu_hd_support(iommu); + } default: break; } @@ -2339,6 +2404,73 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap) return false; } +static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain, + bool enable) +{ + struct protection_domain *pdomain = to_pdomain(domain); + struct dev_table_entry *dev_table; + struct iommu_dev_data *dev_data; + bool domain_flush = false; + struct amd_iommu *iommu; + unsigned long flags; + u64 pte_root; + + spin_lock_irqsave(&pdomain->lock, flags); + if (!(pdomain->dirty_tracking ^ enable)) { + spin_unlock_irqrestore(&pdomain->lock, flags); + return 0; + } + + list_for_each_entry(dev_data, &pdomain->dev_list, list) { + iommu = rlookup_amd_iommu(dev_data->dev); + if (!iommu) + continue; + + dev_table = get_dev_table(iommu); + pte_root = dev_table[dev_data->devid].data[0]; + + pte_root = (enable ? pte_root | DTE_FLAG_HAD : + pte_root & ~DTE_FLAG_HAD); + + /* Flush device DTE */ + dev_table[dev_data->devid].data[0] = pte_root; + device_flush_dte(dev_data); + domain_flush = true; + } + + /* Flush IOTLB to mark IOPTE dirty on the next translation(s) */ + if (domain_flush) { + amd_iommu_domain_flush_tlb_pde(pdomain); + amd_iommu_domain_flush_complete(pdomain); + } + pdomain->dirty_tracking = enable; + spin_unlock_irqrestore(&pdomain->lock, flags); + + return 0; +} + +static int amd_iommu_read_and_clear_dirty(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty) +{ + struct protection_domain *pdomain = to_pdomain(domain); + struct io_pgtable_ops *ops = &pdomain->iop.iop.ops; + unsigned long lflags; + + if (!ops || !ops->read_and_clear_dirty) + return -EOPNOTSUPP; + + spin_lock_irqsave(&pdomain->lock, lflags); + if (!pdomain->dirty_tracking && dirty->bitmap) { + spin_unlock_irqrestore(&pdomain->lock, lflags); + return -EINVAL; + } + spin_unlock_irqrestore(&pdomain->lock, lflags); + + return ops->read_and_clear_dirty(ops, iova, size, flags, dirty); +} + static void amd_iommu_get_resv_regions(struct device *dev, struct list_head *head) { @@ -2461,9 +2593,15 @@ static bool amd_iommu_enforce_cache_coherency(struct iommu_domain *domain) return true; } +const struct iommu_dirty_ops amd_dirty_ops = { + .set_dirty_tracking = amd_iommu_set_dirty_tracking, + .read_and_clear_dirty = amd_iommu_read_and_clear_dirty, +}; + const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, .domain_alloc = amd_iommu_domain_alloc, + .domain_alloc_user = amd_iommu_domain_alloc_user, .probe_device = amd_iommu_probe_device, .release_device = amd_iommu_release_device, .probe_finalize = amd_iommu_probe_finalize, diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 2e56bd79f589d30c2787e695b5c93750fcb480e0..f5348b80652b65bdc043c2a01168789c65a2e626 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -15,6 +15,7 @@ config INTEL_IOMMU select DMA_OPS select IOMMU_API select IOMMU_IOVA + select IOMMUFD_DRIVER if IOMMUFD select NEED_DMA_MAP_STATE select DMAR_TABLE select SWIOTLB diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 4c3707384bd92bef9d8c9a3a05a6d3aff4b3002f..1af77b46bba00f1116f8ef466306362d17d6cbfa 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -300,6 +300,7 @@ static int iommu_skip_te_disable; #define IDENTMAP_AZALIA 4 const struct iommu_ops intel_iommu_ops; +static const struct iommu_dirty_ops intel_dirty_ops; static bool translation_pre_enabled(struct intel_iommu *iommu) { @@ -4059,6 +4060,48 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) return NULL; } +static struct iommu_domain * +intel_iommu_domain_alloc_user(struct device *dev, u32 flags) +{ + struct iommu_domain *domain; + struct intel_iommu *iommu; + bool dirty_tracking; + + if (flags & + (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING))) + return ERR_PTR(-EOPNOTSUPP); + + iommu = device_to_iommu(dev, NULL, NULL); + if (!iommu) + return ERR_PTR(-ENODEV); + + if ((flags & IOMMU_HWPT_ALLOC_NEST_PARENT) && !nested_supported(iommu)) + return ERR_PTR(-EOPNOTSUPP); + + dirty_tracking = (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING); + if (dirty_tracking && !ssads_supported(iommu)) + return ERR_PTR(-EOPNOTSUPP); + + /* + * domain_alloc_user op needs to fully initialize a domain + * before return, so uses iommu_domain_alloc() here for + * simple. + */ + domain = iommu_domain_alloc(dev->bus); + if (!domain) + domain = ERR_PTR(-ENOMEM); + + if (!IS_ERR(domain) && dirty_tracking) { + if (to_dmar_domain(domain)->use_first_level) { + iommu_domain_free(domain); + return ERR_PTR(-EOPNOTSUPP); + } + domain->dirty_ops = &intel_dirty_ops; + } + + return domain; +} + static void intel_iommu_domain_free(struct iommu_domain *domain) { if (domain != &si_domain->domain && domain != &blocking_domain) @@ -4079,6 +4122,9 @@ static int prepare_domain_attach_device(struct iommu_domain *domain, if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap)) return -EINVAL; + if (domain->dirty_ops && !ssads_supported(iommu)) + return -EINVAL; + /* check if this iommu agaw is sufficient for max mapped address */ addr_width = agaw_to_width(iommu->agaw); if (addr_width > cap_mgaw(iommu->cap)) @@ -4333,6 +4379,8 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap) return dmar_platform_optin(); case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: return ecap_sc_support(info->iommu->ecap); + case IOMMU_CAP_DIRTY_TRACKING: + return ssads_supported(info->iommu); default: return false; } @@ -4730,6 +4778,9 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) return -EOPNOTSUPP; + if (domain->dirty_ops) + return -EINVAL; + if (context_copied(iommu, info->bus, info->devfn)) return -EBUSY; @@ -4788,10 +4839,88 @@ static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type) return vtd; } +static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain, + bool enable) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct device_domain_info *info; + int ret; + + spin_lock(&dmar_domain->lock); + if (dmar_domain->dirty_tracking == enable) + goto out_unlock; + + list_for_each_entry(info, &dmar_domain->devices, link) { + ret = intel_pasid_setup_dirty_tracking(info->iommu, + info->domain, info->dev, + IOMMU_NO_PASID, enable); + if (ret) + goto err_unwind; + } + + dmar_domain->dirty_tracking = enable; +out_unlock: + spin_unlock(&dmar_domain->lock); + + return 0; + +err_unwind: + list_for_each_entry(info, &dmar_domain->devices, link) + intel_pasid_setup_dirty_tracking(info->iommu, dmar_domain, + info->dev, IOMMU_NO_PASID, + dmar_domain->dirty_tracking); + spin_unlock(&dmar_domain->lock); + return ret; +} + +static int intel_iommu_read_and_clear_dirty(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + unsigned long end = iova + size - 1; + unsigned long pgsize; + + /* + * IOMMUFD core calls into a dirty tracking disabled domain without an + * IOVA bitmap set in order to clean dirty bits in all PTEs that might + * have occurred when we stopped dirty tracking. This ensures that we + * never inherit dirtied bits from a previous cycle. + */ + if (!dmar_domain->dirty_tracking && dirty->bitmap) + return -EINVAL; + + do { + struct dma_pte *pte; + int lvl = 0; + + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &lvl, + GFP_ATOMIC); + pgsize = level_size(lvl) << VTD_PAGE_SHIFT; + if (!pte || !dma_pte_present(pte)) { + iova += pgsize; + continue; + } + + if (dma_sl_pte_test_and_clear_dirty(pte, flags)) + iommu_dirty_bitmap_record(dirty, iova, pgsize); + iova += pgsize; + } while (iova < end); + + return 0; +} + +static const struct iommu_dirty_ops intel_dirty_ops = { + .set_dirty_tracking = intel_iommu_set_dirty_tracking, + .read_and_clear_dirty = intel_iommu_read_and_clear_dirty, +}; + const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .hw_info = intel_iommu_hw_info, .domain_alloc = intel_iommu_domain_alloc, + .domain_alloc_user = intel_iommu_domain_alloc_user, .probe_device = intel_iommu_probe_device, .probe_finalize = intel_iommu_probe_finalize, .release_device = intel_iommu_release_device, diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 7dac94f62b4ec661af7030b475103ef4ac184fee..8cf451bf3085487f845ece32d4a6b3256c81a339 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -48,6 +48,9 @@ #define DMA_FL_PTE_DIRTY BIT_ULL(6) #define DMA_FL_PTE_XD BIT_ULL(63) +#define DMA_SL_PTE_DIRTY_BIT 9 +#define DMA_SL_PTE_DIRTY BIT_ULL(DMA_SL_PTE_DIRTY_BIT) + #define ADDR_WIDTH_5LEVEL (57) #define ADDR_WIDTH_4LEVEL (48) @@ -539,6 +542,10 @@ enum { #define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) #define pasid_supported(iommu) (sm_supported(iommu) && \ ecap_pasid((iommu)->ecap)) +#define ssads_supported(iommu) (sm_supported(iommu) && \ + ecap_slads((iommu)->ecap)) +#define nested_supported(iommu) (sm_supported(iommu) && \ + ecap_nest((iommu)->ecap)) struct pasid_entry; struct pasid_state_entry; @@ -592,6 +599,7 @@ struct dmar_domain { * otherwise, goes through the second * level. */ + u8 dirty_tracking:1; /* Dirty tracking is enabled */ spinlock_t lock; /* Protect device tracking lists */ struct list_head devices; /* all devices' list */ @@ -781,6 +789,16 @@ static inline bool dma_pte_present(struct dma_pte *pte) return (pte->val & 3) != 0; } +static inline bool dma_sl_pte_test_and_clear_dirty(struct dma_pte *pte, + unsigned long flags) +{ + if (flags & IOMMU_DIRTY_NO_CLEAR) + return (pte->val & DMA_SL_PTE_DIRTY) != 0; + + return test_and_clear_bit(DMA_SL_PTE_DIRTY_BIT, + (unsigned long *)&pte->val); +} + static inline bool dma_pte_superpage(struct dma_pte *pte) { return (pte->val & DMA_PTE_LARGE_PAGE); diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 8f92b92f3d2aba5ce2455fcf8d3604ea4eeee4ae..b9264b9174e88dde2abbc3de677fa0aa864c16f9 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -277,6 +277,11 @@ static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits) WRITE_ONCE(*ptr, (old & ~mask) | bits); } +static inline u64 pasid_get_bits(u64 *ptr) +{ + return READ_ONCE(*ptr); +} + /* * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode * PASID entry. @@ -335,6 +340,36 @@ static inline void pasid_set_fault_enable(struct pasid_entry *pe) pasid_set_bits(&pe->val[0], 1 << 1, 0); } +/* + * Enable second level A/D bits by setting the SLADE (Second Level + * Access Dirty Enable) field (Bit 9) of a scalable mode PASID + * entry. + */ +static inline void pasid_set_ssade(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 9, 1 << 9); +} + +/* + * Disable second level A/D bits by clearing the SLADE (Second Level + * Access Dirty Enable) field (Bit 9) of a scalable mode PASID + * entry. + */ +static inline void pasid_clear_ssade(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 9, 0); +} + +/* + * Checks if second level A/D bits specifically the SLADE (Second Level + * Access Dirty Enable) field (Bit 9) of a scalable mode PASID + * entry is set. + */ +static inline bool pasid_get_ssade(struct pasid_entry *pe) +{ + return pasid_get_bits(&pe->val[0]) & (1 << 9); +} + /* * Setup the WPE(Write Protect Enable) field (Bit 132) of a * scalable mode PASID entry. @@ -627,6 +662,8 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY); pasid_set_fault_enable(pte); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + if (domain->dirty_tracking) + pasid_set_ssade(pte); pasid_set_present(pte); spin_unlock(&iommu->lock); @@ -636,6 +673,78 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, return 0; } +/* + * Set up dirty tracking on a second only or nested translation type. + */ +int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, u32 pasid, + bool enabled) +{ + struct pasid_entry *pte; + u16 did, pgtt; + + spin_lock(&iommu->lock); + + pte = intel_pasid_get_entry(dev, pasid); + if (!pte) { + spin_unlock(&iommu->lock); + dev_err_ratelimited( + dev, "Failed to get pasid entry of PASID %d\n", pasid); + return -ENODEV; + } + + did = domain_id_iommu(domain, iommu); + pgtt = pasid_pte_get_pgtt(pte); + if (pgtt != PASID_ENTRY_PGTT_SL_ONLY && + pgtt != PASID_ENTRY_PGTT_NESTED) { + spin_unlock(&iommu->lock); + dev_err_ratelimited( + dev, + "Dirty tracking not supported on translation type %d\n", + pgtt); + return -EOPNOTSUPP; + } + + if (pasid_get_ssade(pte) == enabled) { + spin_unlock(&iommu->lock); + return 0; + } + + if (enabled) + pasid_set_ssade(pte); + else + pasid_clear_ssade(pte); + spin_unlock(&iommu->lock); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(pte, sizeof(*pte)); + + /* + * From VT-d spec table 25 "Guidance to Software for Invalidations": + * + * - PASID-selective-within-Domain PASID-cache invalidation + * If (PGTT=SS or Nested) + * - Domain-selective IOTLB invalidation + * Else + * - PASID-selective PASID-based IOTLB invalidation + * - If (pasid is RID_PASID) + * - Global Device-TLB invalidation to affected functions + * Else + * - PASID-based Device-TLB invalidation (with S=1 and + * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions + */ + pasid_cache_invalidation_with_pasid(iommu, did, pasid); + + iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); + + /* Device IOTLB doesn't need to be flushed in caching mode. */ + if (!cap_caching_mode(iommu->cap)) + devtlb_invalidation_with_pasid(iommu, dev, pasid); + + return 0; +} + /* * Set up the scalable mode pasid entry for passthrough translation type. */ diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 4e9e68c3c3888f6acd4c3ecff8ebc90f1db39955..958050b093aa24df0d8cebcb327b00371b64cb01 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -106,6 +106,10 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, int intel_pasid_setup_second_level(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, u32 pasid); +int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, u32 pasid, + bool enabled); int intel_pasid_setup_pass_through(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, u32 pasid); diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile index 8aeba81800c512dc9e9eb1c32b3240080b503db1..34b446146961c29e7b24dc5cc890a5aa557a6ce8 100644 --- a/drivers/iommu/iommufd/Makefile +++ b/drivers/iommu/iommufd/Makefile @@ -11,3 +11,4 @@ iommufd-y := \ iommufd-$(CONFIG_IOMMUFD_TEST) += selftest.o obj-$(CONFIG_IOMMUFD) += iommufd.o +obj-$(CONFIG_IOMMUFD_DRIVER) += iova_bitmap.o diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index ce78c3671539c77d27059d3aa11c7367f493eeff..2a41fd2b6ef8e13b0c39e512dd484ec5c9ebab21 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -540,7 +540,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, } hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev, - immediate_attach); + 0, immediate_attach); if (IS_ERR(hwpt)) { destroy_hwpt = ERR_CAST(hwpt); goto out_unlock; @@ -1185,6 +1185,10 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) */ cmd->data_len = data_len; + cmd->out_capabilities = 0; + if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) + cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_free: kfree(data); diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index cf2c1504e20d843a6c00741f52c4fa6f66b01a90..72a5269984b0183ba262421486a09c0ccebf1077 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -5,6 +5,7 @@ #include #include +#include "../iommu-priv.h" #include "iommufd_private.h" void iommufd_hw_pagetable_destroy(struct iommufd_object *obj) @@ -60,6 +61,7 @@ int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt) * @ictx: iommufd context * @ioas: IOAS to associate the domain with * @idev: Device to get an iommu_domain for + * @flags: Flags from userspace * @immediate_attach: True if idev should be attached to the hwpt * * Allocate a new iommu_domain and return it as a hw_pagetable. The HWPT @@ -72,13 +74,18 @@ int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt) */ struct iommufd_hw_pagetable * iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, - struct iommufd_device *idev, bool immediate_attach) + struct iommufd_device *idev, u32 flags, + bool immediate_attach) { + const struct iommu_ops *ops = dev_iommu_ops(idev->dev); struct iommufd_hw_pagetable *hwpt; int rc; lockdep_assert_held(&ioas->mutex); + if (flags && !ops->domain_alloc_user) + return ERR_PTR(-EOPNOTSUPP); + hwpt = iommufd_object_alloc(ictx, hwpt, IOMMUFD_OBJ_HW_PAGETABLE); if (IS_ERR(hwpt)) return hwpt; @@ -88,10 +95,19 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, refcount_inc(&ioas->obj.users); hwpt->ioas = ioas; - hwpt->domain = iommu_domain_alloc(idev->dev->bus); - if (!hwpt->domain) { - rc = -ENOMEM; - goto out_abort; + if (ops->domain_alloc_user) { + hwpt->domain = ops->domain_alloc_user(idev->dev, flags); + if (IS_ERR(hwpt->domain)) { + rc = PTR_ERR(hwpt->domain); + hwpt->domain = NULL; + goto out_abort; + } + } else { + hwpt->domain = iommu_domain_alloc(idev->dev->bus); + if (!hwpt->domain) { + rc = -ENOMEM; + goto out_abort; + } } /* @@ -141,7 +157,9 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) struct iommufd_ioas *ioas; int rc; - if (cmd->flags || cmd->__reserved) + if ((cmd->flags & ~(IOMMU_HWPT_ALLOC_NEST_PARENT | + IOMMU_HWPT_ALLOC_DIRTY_TRACKING)) || + cmd->__reserved) return -EOPNOTSUPP; idev = iommufd_get_device(ucmd, cmd->dev_id); @@ -155,7 +173,8 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) } mutex_lock(&ioas->mutex); - hwpt = iommufd_hw_pagetable_alloc(ucmd->ictx, ioas, idev, false); + hwpt = iommufd_hw_pagetable_alloc(ucmd->ictx, ioas, + idev, cmd->flags, false); if (IS_ERR(hwpt)) { rc = PTR_ERR(hwpt); goto out_unlock; @@ -177,3 +196,50 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) iommufd_put_object(&idev->obj); return rc; } + +int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd) +{ + struct iommu_hwpt_set_dirty_tracking *cmd = ucmd->cmd; + struct iommufd_hw_pagetable *hwpt; + struct iommufd_ioas *ioas; + int rc = -EOPNOTSUPP; + bool enable; + + if (cmd->flags & ~IOMMU_HWPT_DIRTY_TRACKING_ENABLE) + return rc; + + hwpt = iommufd_get_hwpt(ucmd, cmd->hwpt_id); + if (IS_ERR(hwpt)) + return PTR_ERR(hwpt); + + ioas = hwpt->ioas; + enable = cmd->flags & IOMMU_HWPT_DIRTY_TRACKING_ENABLE; + + rc = iopt_set_dirty_tracking(&ioas->iopt, hwpt->domain, enable); + + iommufd_put_object(&hwpt->obj); + return rc; +} + +int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd) +{ + struct iommu_hwpt_get_dirty_bitmap *cmd = ucmd->cmd; + struct iommufd_hw_pagetable *hwpt; + struct iommufd_ioas *ioas; + int rc = -EOPNOTSUPP; + + if ((cmd->flags & ~(IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR)) || + cmd->__reserved) + return -EOPNOTSUPP; + + hwpt = iommufd_get_hwpt(ucmd, cmd->hwpt_id); + if (IS_ERR(hwpt)) + return PTR_ERR(hwpt); + + ioas = hwpt->ioas; + rc = iopt_read_and_clear_dirty_data(&ioas->iopt, hwpt->domain, + cmd->flags, cmd); + + iommufd_put_object(&hwpt->obj); + return rc; +} diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 117a39ae2e4aa4f6b5708f670357bc15a0bab261..504ac1b01b2d2ab45fbc22fde2bdcf324ce2d973 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "io_pagetable.h" #include "double_span.h" @@ -424,6 +425,177 @@ int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, return 0; } +struct iova_bitmap_fn_arg { + unsigned long flags; + struct io_pagetable *iopt; + struct iommu_domain *domain; + struct iommu_dirty_bitmap *dirty; +}; + +static int __iommu_read_and_clear_dirty(struct iova_bitmap *bitmap, + unsigned long iova, size_t length, + void *opaque) +{ + struct iopt_area *area; + struct iopt_area_contig_iter iter; + struct iova_bitmap_fn_arg *arg = opaque; + struct iommu_domain *domain = arg->domain; + struct iommu_dirty_bitmap *dirty = arg->dirty; + const struct iommu_dirty_ops *ops = domain->dirty_ops; + unsigned long last_iova = iova + length - 1; + unsigned long flags = arg->flags; + int ret; + + iopt_for_each_contig_area(&iter, area, arg->iopt, iova, last_iova) { + unsigned long last = min(last_iova, iopt_area_last_iova(area)); + + ret = ops->read_and_clear_dirty(domain, iter.cur_iova, + last - iter.cur_iova + 1, flags, + dirty); + if (ret) + return ret; + } + + if (!iopt_area_contig_done(&iter)) + return -EINVAL; + return 0; +} + +static int +iommu_read_and_clear_dirty(struct iommu_domain *domain, + struct io_pagetable *iopt, unsigned long flags, + struct iommu_hwpt_get_dirty_bitmap *bitmap) +{ + const struct iommu_dirty_ops *ops = domain->dirty_ops; + struct iommu_iotlb_gather gather; + struct iommu_dirty_bitmap dirty; + struct iova_bitmap_fn_arg arg; + struct iova_bitmap *iter; + int ret = 0; + + if (!ops || !ops->read_and_clear_dirty) + return -EOPNOTSUPP; + + iter = iova_bitmap_alloc(bitmap->iova, bitmap->length, + bitmap->page_size, + u64_to_user_ptr(bitmap->data)); + if (IS_ERR(iter)) + return -ENOMEM; + + iommu_dirty_bitmap_init(&dirty, iter, &gather); + + arg.flags = flags; + arg.iopt = iopt; + arg.domain = domain; + arg.dirty = &dirty; + iova_bitmap_for_each(iter, &arg, __iommu_read_and_clear_dirty); + + if (!(flags & IOMMU_DIRTY_NO_CLEAR)) + iommu_iotlb_sync(domain, &gather); + + iova_bitmap_free(iter); + + return ret; +} + +int iommufd_check_iova_range(struct io_pagetable *iopt, + struct iommu_hwpt_get_dirty_bitmap *bitmap) +{ + size_t iommu_pgsize = iopt->iova_alignment; + u64 last_iova; + + if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova)) + return -EOVERFLOW; + + if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX) + return -EOVERFLOW; + + if ((bitmap->iova & (iommu_pgsize - 1)) || + ((last_iova + 1) & (iommu_pgsize - 1))) + return -EINVAL; + + if (!bitmap->page_size) + return -EINVAL; + + if ((bitmap->iova & (bitmap->page_size - 1)) || + ((last_iova + 1) & (bitmap->page_size - 1))) + return -EINVAL; + + return 0; +} + +int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt, + struct iommu_domain *domain, + unsigned long flags, + struct iommu_hwpt_get_dirty_bitmap *bitmap) +{ + int ret; + + ret = iommufd_check_iova_range(iopt, bitmap); + if (ret) + return ret; + + down_read(&iopt->iova_rwsem); + ret = iommu_read_and_clear_dirty(domain, iopt, flags, bitmap); + up_read(&iopt->iova_rwsem); + + return ret; +} + +static int iopt_clear_dirty_data(struct io_pagetable *iopt, + struct iommu_domain *domain) +{ + const struct iommu_dirty_ops *ops = domain->dirty_ops; + struct iommu_iotlb_gather gather; + struct iommu_dirty_bitmap dirty; + struct iopt_area *area; + int ret = 0; + + lockdep_assert_held_read(&iopt->iova_rwsem); + + iommu_dirty_bitmap_init(&dirty, NULL, &gather); + + for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; + area = iopt_area_iter_next(area, 0, ULONG_MAX)) { + if (!area->pages) + continue; + + ret = ops->read_and_clear_dirty(domain, iopt_area_iova(area), + iopt_area_length(area), 0, + &dirty); + if (ret) + break; + } + + iommu_iotlb_sync(domain, &gather); + return ret; +} + +int iopt_set_dirty_tracking(struct io_pagetable *iopt, + struct iommu_domain *domain, bool enable) +{ + const struct iommu_dirty_ops *ops = domain->dirty_ops; + int ret = 0; + + if (!ops) + return -EOPNOTSUPP; + + down_read(&iopt->iova_rwsem); + + /* Clear dirty bits from PTEs to ensure a clean snapshot */ + if (enable) { + ret = iopt_clear_dirty_data(iopt, domain); + if (ret) + goto out_unlock; + } + + ret = ops->set_dirty_tracking(domain, enable); + +out_unlock: + up_read(&iopt->iova_rwsem); + return ret; +} + int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, unsigned long length, struct list_head *pages_list) { diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 2c58670011fe979b6da6687a9904408bba5f8a9a..034129130db3757ef58ee8a789747dcaa3586d7e 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -8,6 +8,9 @@ #include #include #include +#include +#include +#include struct iommu_domain; struct iommu_group; @@ -70,6 +73,13 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, unsigned long length, unsigned long *unmapped); int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped); +int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt, + struct iommu_domain *domain, + unsigned long flags, + struct iommu_hwpt_get_dirty_bitmap *bitmap); +int iopt_set_dirty_tracking(struct io_pagetable *iopt, + struct iommu_domain *domain, bool enable); + void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, unsigned long length); int iopt_table_add_domain(struct io_pagetable *iopt, @@ -222,6 +232,8 @@ int iommufd_option_rlimit_mode(struct iommu_option *cmd, struct iommufd_ctx *ictx); int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd); +int iommufd_check_iova_range(struct io_pagetable *iopt, + struct iommu_hwpt_get_dirty_bitmap *bitmap); /* * A HW pagetable is called an iommu_domain inside the kernel. This user object @@ -240,9 +252,20 @@ struct iommufd_hw_pagetable { struct list_head hwpt_item; }; +static inline struct iommufd_hw_pagetable * +iommufd_get_hwpt(struct iommufd_ucmd *ucmd, u32 id) +{ + return container_of(iommufd_get_object(ucmd->ictx, id, + IOMMUFD_OBJ_HW_PAGETABLE), + struct iommufd_hw_pagetable, obj); +} +int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd); +int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd); + struct iommufd_hw_pagetable * iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, - struct iommufd_device *idev, bool immediate_attach); + struct iommufd_device *idev, u32 flags, + bool immediate_attach); int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt); int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev); diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h index 3f3644375bf13c8fa78600f1f9e15d893195af65..1f2e93d3d4e87f758f0eb67e93343bf86f813f16 100644 --- a/drivers/iommu/iommufd/iommufd_test.h +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -19,6 +19,8 @@ enum { IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT, IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE, IOMMU_TEST_OP_ACCESS_REPLACE_IOAS, + IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS, + IOMMU_TEST_OP_DIRTY, }; enum { @@ -40,6 +42,10 @@ enum { MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES = 1 << 0, }; +enum { + MOCK_FLAGS_DEVICE_NO_DIRTY = 1 << 0, +}; + struct iommu_test_cmd { __u32 size; __u32 op; @@ -56,6 +62,13 @@ struct iommu_test_cmd { /* out_idev_id is the standard iommufd_bind object */ __u32 out_idev_id; } mock_domain; + struct { + __u32 out_stdev_id; + __u32 out_hwpt_id; + __u32 out_idev_id; + /* Expand mock_domain to set mock device flags */ + __u32 dev_flags; + } mock_domain_flags; struct { __u32 pt_id; } mock_domain_replace; @@ -95,6 +108,14 @@ struct iommu_test_cmd { struct { __u32 ioas_id; } access_replace_ioas; + struct { + __u32 flags; + __aligned_u64 iova; + __aligned_u64 length; + __aligned_u64 page_size; + __aligned_u64 uptr; + __aligned_u64 out_nr_dirty; + } dirty; }; __u32 last; }; diff --git a/drivers/vfio/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c similarity index 98% rename from drivers/vfio/iova_bitmap.c rename to drivers/iommu/iommufd/iova_bitmap.c index 0848f920efb7c1c13521cc9d59ae96231cbf4481..0a92c9eeaf7f50a6fe05c266b9ec39d1021844a9 100644 --- a/drivers/vfio/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -268,6 +268,7 @@ struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length, iova_bitmap_free(bitmap); return ERR_PTR(rc); } +EXPORT_SYMBOL_NS_GPL(iova_bitmap_alloc, IOMMUFD); /** * iova_bitmap_free() - Frees an IOVA bitmap object @@ -289,6 +290,7 @@ void iova_bitmap_free(struct iova_bitmap *bitmap) kfree(bitmap); } +EXPORT_SYMBOL_NS_GPL(iova_bitmap_free, IOMMUFD); /* * Returns the remaining bitmap indexes from mapped_total_index to process for @@ -387,6 +389,7 @@ int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, return ret; } +EXPORT_SYMBOL_NS_GPL(iova_bitmap_for_each, IOMMUFD); /** * iova_bitmap_set() - Records an IOVA range in bitmap @@ -420,4 +423,4 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, cur_bit += nbits; } while (cur_bit <= last_bit); } -EXPORT_SYMBOL_GPL(iova_bitmap_set); +EXPORT_SYMBOL_NS_GPL(iova_bitmap_set, IOMMUFD); diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index e71523cbd0de4352479aadeb8f33dd6d2ba87df8..d50f42a730aa36c63b408be1174d8f553880467b 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -307,6 +307,8 @@ union ucmd_buffer { struct iommu_destroy destroy; struct iommu_hw_info info; struct iommu_hwpt_alloc hwpt; + struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap; + struct iommu_hwpt_set_dirty_tracking set_dirty_tracking; struct iommu_ioas_alloc alloc; struct iommu_ioas_allow_iovas allow_iovas; struct iommu_ioas_copy ioas_copy; @@ -342,6 +344,10 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { __reserved), IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc, __reserved), + IOCTL_OP(IOMMU_HWPT_GET_DIRTY_BITMAP, iommufd_hwpt_get_dirty_bitmap, + struct iommu_hwpt_get_dirty_bitmap, data), + IOCTL_OP(IOMMU_HWPT_SET_DIRTY_TRACKING, iommufd_hwpt_set_dirty_tracking, + struct iommu_hwpt_set_dirty_tracking, __reserved), IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl, struct iommu_ioas_alloc, out_ioas_id), IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas, @@ -552,5 +558,6 @@ MODULE_ALIAS_MISCDEV(VFIO_MINOR); MODULE_ALIAS("devname:vfio/vfio"); #endif MODULE_IMPORT_NS(IOMMUFD_INTERNAL); +MODULE_IMPORT_NS(IOMMUFD); MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices"); MODULE_LICENSE("GPL"); diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 56506d5753f15c9f7079a661773404636d975c9e..22f9fcdfc55afc04031787500948b79934e8f513 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -24,6 +24,7 @@ static struct platform_device *selftest_iommu_dev; size_t iommufd_test_memory_limit = 65536; enum { + MOCK_DIRTY_TRACK = 1, MOCK_IO_PAGE_SIZE = PAGE_SIZE / 2, /* @@ -36,6 +37,7 @@ enum { _MOCK_PFN_START = MOCK_PFN_MASK + 1, MOCK_PFN_START_IOVA = _MOCK_PFN_START, MOCK_PFN_LAST_IOVA = _MOCK_PFN_START, + MOCK_PFN_DIRTY_IOVA = _MOCK_PFN_START << 1, }; /* @@ -86,6 +88,7 @@ void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, } struct mock_iommu_domain { + unsigned long flags; struct iommu_domain domain; struct xarray pfns; }; @@ -96,6 +99,7 @@ enum selftest_obj_type { struct mock_dev { struct device dev; + unsigned long flags; }; struct selftest_obj { @@ -118,6 +122,11 @@ static void mock_domain_blocking_free(struct iommu_domain *domain) static int mock_domain_nop_attach(struct iommu_domain *domain, struct device *dev) { + struct mock_dev *mdev = container_of(dev, struct mock_dev, dev); + + if (domain->dirty_ops && (mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY)) + return -EINVAL; + return 0; } @@ -146,6 +155,69 @@ static void *mock_domain_hw_info(struct device *dev, u32 *length, u32 *type) return info; } +static int mock_domain_set_dirty_tracking(struct iommu_domain *domain, + bool enable) +{ + struct mock_iommu_domain *mock = + container_of(domain, struct mock_iommu_domain, domain); + unsigned long flags = mock->flags; + + if (enable && !domain->dirty_ops) + return -EINVAL; + + /* No change? */ + if (!(enable ^ !!(flags & MOCK_DIRTY_TRACK))) + return 0; + + flags = (enable ? flags | MOCK_DIRTY_TRACK : flags & ~MOCK_DIRTY_TRACK); + + mock->flags = flags; + return 0; +} + +static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty) +{ + struct mock_iommu_domain *mock = + container_of(domain, struct mock_iommu_domain, domain); + unsigned long i, max = size / MOCK_IO_PAGE_SIZE; + void *ent, *old; + + if (!(mock->flags & MOCK_DIRTY_TRACK) && dirty->bitmap) + return -EINVAL; + + for (i = 0; i < max; i++) { + unsigned long cur = iova + i * MOCK_IO_PAGE_SIZE; + + ent = xa_load(&mock->pfns, cur / MOCK_IO_PAGE_SIZE); + if (ent && (xa_to_value(ent) & MOCK_PFN_DIRTY_IOVA)) { + /* Clear dirty */ + if (!(flags & IOMMU_DIRTY_NO_CLEAR)) { + unsigned long val; + + val = xa_to_value(ent) & ~MOCK_PFN_DIRTY_IOVA; + old = xa_store(&mock->pfns, + cur / MOCK_IO_PAGE_SIZE, + xa_mk_value(val), GFP_KERNEL); + WARN_ON_ONCE(ent != old); + } + iommu_dirty_bitmap_record(dirty, cur, + MOCK_IO_PAGE_SIZE); + } + } + + return 0; +} + +const struct iommu_dirty_ops dirty_ops = { + .set_dirty_tracking = mock_domain_set_dirty_tracking, + .read_and_clear_dirty = mock_domain_read_and_clear_dirty, +}; + +static const struct iommu_ops mock_ops; + static struct iommu_domain *mock_domain_alloc(unsigned int iommu_domain_type) { struct mock_iommu_domain *mock; @@ -162,10 +234,34 @@ static struct iommu_domain *mock_domain_alloc(unsigned int iommu_domain_type) mock->domain.geometry.aperture_start = MOCK_APERTURE_START; mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST; mock->domain.pgsize_bitmap = MOCK_IO_PAGE_SIZE; + mock->domain.ops = mock_ops.default_domain_ops; + mock->domain.type = iommu_domain_type; xa_init(&mock->pfns); return &mock->domain; } +static struct iommu_domain * +mock_domain_alloc_user(struct device *dev, u32 flags) +{ + struct mock_dev *mdev = container_of(dev, struct mock_dev, dev); + struct iommu_domain *domain; + + if (flags & + (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING))) + return ERR_PTR(-EOPNOTSUPP); + + if ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) && + (mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY)) + return ERR_PTR(-EOPNOTSUPP); + + domain = mock_domain_alloc(IOMMU_DOMAIN_UNMANAGED); + if (domain && !(mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY)) + domain->dirty_ops = &dirty_ops; + if (!domain) + domain = ERR_PTR(-ENOMEM); + return domain; +} + static void mock_domain_free(struct iommu_domain *domain) { struct mock_iommu_domain *mock = @@ -243,7 +339,7 @@ static size_t mock_domain_unmap_pages(struct iommu_domain *domain, for (cur = 0; cur != pgsize; cur += MOCK_IO_PAGE_SIZE) { ent = xa_erase(&mock->pfns, iova / MOCK_IO_PAGE_SIZE); - WARN_ON(!ent); + /* * iommufd generates unmaps that must be a strict * superset of the map's performend So every starting @@ -253,13 +349,13 @@ static size_t mock_domain_unmap_pages(struct iommu_domain *domain, * passed to map_pages */ if (first) { - WARN_ON(!(xa_to_value(ent) & - MOCK_PFN_START_IOVA)); + WARN_ON(ent && !(xa_to_value(ent) & + MOCK_PFN_START_IOVA)); first = false; } if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize) - WARN_ON(!(xa_to_value(ent) & - MOCK_PFN_LAST_IOVA)); + WARN_ON(ent && !(xa_to_value(ent) & + MOCK_PFN_LAST_IOVA)); iova += MOCK_IO_PAGE_SIZE; ret += MOCK_IO_PAGE_SIZE; @@ -283,7 +379,18 @@ static phys_addr_t mock_domain_iova_to_phys(struct iommu_domain *domain, static bool mock_domain_capable(struct device *dev, enum iommu_cap cap) { - return cap == IOMMU_CAP_CACHE_COHERENCY; + struct mock_dev *mdev = container_of(dev, struct mock_dev, dev); + + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return true; + case IOMMU_CAP_DIRTY_TRACKING: + return !(mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY); + default: + break; + } + + return false; } static void mock_domain_set_plaform_dma_ops(struct device *dev) @@ -307,6 +414,7 @@ static const struct iommu_ops mock_ops = { .pgsize_bitmap = MOCK_IO_PAGE_SIZE, .hw_info = mock_domain_hw_info, .domain_alloc = mock_domain_alloc, + .domain_alloc_user = mock_domain_alloc_user, .capable = mock_domain_capable, .set_platform_dma_ops = mock_domain_set_plaform_dma_ops, .device_group = generic_device_group, @@ -362,16 +470,20 @@ static void mock_dev_release(struct device *dev) kfree(mdev); } -static struct mock_dev *mock_dev_create(void) +static struct mock_dev *mock_dev_create(unsigned long dev_flags) { struct mock_dev *mdev; int rc; + if (dev_flags & ~(MOCK_FLAGS_DEVICE_NO_DIRTY)) + return ERR_PTR(-EINVAL); + mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); if (!mdev) return ERR_PTR(-ENOMEM); device_initialize(&mdev->dev); + mdev->flags = dev_flags; mdev->dev.release = mock_dev_release; mdev->dev.bus = &iommufd_mock_bus_type.bus; @@ -407,6 +519,7 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd, struct iommufd_device *idev; struct selftest_obj *sobj; u32 pt_id = cmd->id; + u32 dev_flags = 0; u32 idev_id; int rc; @@ -417,7 +530,10 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd, sobj->idev.ictx = ucmd->ictx; sobj->type = TYPE_IDEV; - sobj->idev.mock_dev = mock_dev_create(); + if (cmd->op == IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS) + dev_flags = cmd->mock_domain_flags.dev_flags; + + sobj->idev.mock_dev = mock_dev_create(dev_flags); if (IS_ERR(sobj->idev.mock_dev)) { rc = PTR_ERR(sobj->idev.mock_dev); goto out_sobj; @@ -977,6 +1093,73 @@ static_assert((unsigned int)MOCK_ACCESS_RW_WRITE == IOMMUFD_ACCESS_RW_WRITE); static_assert((unsigned int)MOCK_ACCESS_RW_SLOW_PATH == __IOMMUFD_ACCESS_RW_SLOW_PATH); +static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id, + unsigned long iova, size_t length, + unsigned long page_size, void __user *uptr, + u32 flags) +{ + unsigned long bitmap_size, i, max; + struct iommu_test_cmd *cmd = ucmd->cmd; + struct iommufd_hw_pagetable *hwpt; + struct mock_iommu_domain *mock; + int rc, count = 0; + void *tmp; + + if (!page_size || !length || iova % page_size || length % page_size || + !uptr) + return -EINVAL; + + hwpt = get_md_pagetable(ucmd, mockpt_id, &mock); + if (IS_ERR(hwpt)) + return PTR_ERR(hwpt); + + if (!(mock->flags & MOCK_DIRTY_TRACK)) { + rc = -EINVAL; + goto out_put; + } + + max = length / page_size; + bitmap_size = max / BITS_PER_BYTE; + + tmp = kvzalloc(bitmap_size, GFP_KERNEL_ACCOUNT); + if (!tmp) { + rc = -ENOMEM; + goto out_put; + } + + if (copy_from_user(tmp, uptr, bitmap_size)) { + rc = -EFAULT; + goto out_free; + } + + for (i = 0; i < max; i++) { + unsigned long cur = iova + i * page_size; + void *ent, *old; + + if (!test_bit(i, (unsigned long *)tmp)) + continue; + + ent = xa_load(&mock->pfns, cur / page_size); + if (ent) { + unsigned long val; + + val = xa_to_value(ent) | MOCK_PFN_DIRTY_IOVA; + old = xa_store(&mock->pfns, cur / page_size, + xa_mk_value(val), GFP_KERNEL); + WARN_ON_ONCE(ent != old); + count++; + } + } + + cmd->dirty.out_nr_dirty = count; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); +out_free: + kvfree(tmp); +out_put: + iommufd_put_object(&hwpt->obj); + return rc; +} + void iommufd_selftest_destroy(struct iommufd_object *obj) { struct selftest_obj *sobj = container_of(obj, struct selftest_obj, obj); @@ -1000,6 +1183,7 @@ int iommufd_test(struct iommufd_ucmd *ucmd) cmd->add_reserved.start, cmd->add_reserved.length); case IOMMU_TEST_OP_MOCK_DOMAIN: + case IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS: return iommufd_test_mock_domain(ucmd, cmd); case IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE: return iommufd_test_mock_domain_replace( @@ -1041,6 +1225,12 @@ int iommufd_test(struct iommufd_ucmd *ucmd) return -EINVAL; iommufd_test_memory_limit = cmd->memory_limit.limit; return 0; + case IOMMU_TEST_OP_DIRTY: + return iommufd_test_dirty(ucmd, cmd->id, cmd->dirty.iova, + cmd->dirty.length, + cmd->dirty.page_size, + u64_to_user_ptr(cmd->dirty.uptr), + cmd->dirty.flags); default: return -EOPNOTSUPP; } diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index c82ea032d3521268138811a1cc1b718755c90c26..68c05705200fce8fc9824a8521bbe554e5c130f7 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -1,8 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_VFIO) += vfio.o -vfio-y += vfio_main.o \ - iova_bitmap.o +vfio-y += vfio_main.o vfio-$(CONFIG_VFIO_DEVICE_CDEV) += device_cdev.o vfio-$(CONFIG_VFIO_GROUP) += group.o vfio-$(CONFIG_IOMMUFD) += iommufd.o diff --git a/drivers/vfio/pci/mlx5/Kconfig b/drivers/vfio/pci/mlx5/Kconfig index 7088edc4fb28d88f5603e8f68462993123eece46..c3ced56b7787650ce8b82039b419413e81deedfa 100644 --- a/drivers/vfio/pci/mlx5/Kconfig +++ b/drivers/vfio/pci/mlx5/Kconfig @@ -3,6 +3,7 @@ config MLX5_VFIO_PCI tristate "VFIO support for MLX5 PCI devices" depends on MLX5_CORE select VFIO_PCI_CORE + select IOMMUFD_DRIVER help This provides migration support for MLX5 devices using the VFIO framework. diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 42ec574a86221074b2b201d4546e116adfd79179..5cf2b491d15a01467cc82a5df624ffc494da8b20 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -1376,6 +1376,7 @@ static struct pci_driver mlx5vf_pci_driver = { module_pci_driver(mlx5vf_pci_driver); +MODULE_IMPORT_NS(IOMMUFD); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Max Gurtovoy "); MODULE_AUTHOR("Yishai Hadas "); diff --git a/drivers/vfio/pci/pds/Kconfig b/drivers/vfio/pci/pds/Kconfig index 6eceef7b028aae9b8b7a8cb49614e88525f4bade..fec9b167c7b9ac98ae24dddd9265e30d95942e7d 100644 --- a/drivers/vfio/pci/pds/Kconfig +++ b/drivers/vfio/pci/pds/Kconfig @@ -5,6 +5,7 @@ config PDS_VFIO_PCI tristate "VFIO support for PDS PCI devices" depends on PDS_CORE && PCI_IOV select VFIO_PCI_CORE + select IOMMUFD_DRIVER help This provides generic PCI support for PDS devices using the VFIO framework. diff --git a/drivers/vfio/pci/pds/pci_drv.c b/drivers/vfio/pci/pds/pci_drv.c index caffa1a2cf591e9428f4cf960f2d9265ee86c3a9..a34dda5166293583337372fc0059129c726a9f45 100644 --- a/drivers/vfio/pci/pds/pci_drv.c +++ b/drivers/vfio/pci/pds/pci_drv.c @@ -204,6 +204,7 @@ static struct pci_driver pds_vfio_pci_driver = { module_pci_driver(pds_vfio_pci_driver); +MODULE_IMPORT_NS(IOMMUFD); MODULE_DESCRIPTION(PDS_VFIO_DRV_DESCRIPTION); MODULE_AUTHOR("Brett Creeley "); MODULE_LICENSE("GPL"); diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 40732e8ed4c6fb018ddfb27fbeaff37775dcc233..a96d97da367daa87a9e5920f36216d64f2c1afc0 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -1693,6 +1693,7 @@ static void __exit vfio_cleanup(void) module_init(vfio_init); module_exit(vfio_cleanup); +MODULE_IMPORT_NS(IOMMUFD); MODULE_VERSION(DRIVER_VERSION); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR(DRIVER_AUTHOR); diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 1b7a44b35616c7d00cb383425c72fe10ee079ff1..25142a0e2fc2c51d4c7807a1fb87cc21b16a163b 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -166,6 +166,10 @@ struct io_pgtable_ops { struct iommu_iotlb_gather *gather); phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops, unsigned long iova); + int (*read_and_clear_dirty)(struct io_pgtable_ops *ops, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty); }; /** diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 0225cf7445de2a97bbfa3f33a0cf0ea39153d92a..5398c399fe9dc69f6556fb7ae09e73315758e0da 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #define IOMMU_READ (1 << 0) @@ -37,6 +38,7 @@ struct bus_type; struct device; struct iommu_domain; struct iommu_domain_ops; +struct iommu_dirty_ops; struct notifier_block; struct iommu_sva; struct iommu_fault_event; @@ -95,6 +97,8 @@ struct iommu_domain_geometry { struct iommu_domain { unsigned type; const struct iommu_domain_ops *ops; + const struct iommu_dirty_ops *dirty_ops; + unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ struct iommu_domain_geometry geometry; struct iommu_dma_cookie *iova_cookie; @@ -133,6 +137,7 @@ enum iommu_cap { * usefully support the non-strict DMA flush queue. */ IOMMU_CAP_DEFERRED_FLUSH, + IOMMU_CAP_DIRTY_TRACKING, /* IOMMU supports dirty tracking */ }; /* These are the possible reserved region types */ @@ -227,6 +232,35 @@ struct iommu_iotlb_gather { bool queued; }; +/** + * struct iommu_dirty_bitmap - Dirty IOVA bitmap state + * @bitmap: IOVA bitmap + * @gather: Range information for a pending IOTLB flush + */ +struct iommu_dirty_bitmap { + struct iova_bitmap *bitmap; + struct iommu_iotlb_gather *gather; +}; + +/* Read but do not clear any dirty bits */ +#define IOMMU_DIRTY_NO_CLEAR (1 << 0) + +/** + * struct iommu_dirty_ops - domain specific dirty tracking operations + * @set_dirty_tracking: Enable or Disable dirty tracking on the iommu domain + * @read_and_clear_dirty: Walk IOMMU page tables for dirtied PTEs marshalled + * into a bitmap, with a bit represented as a page. + * Reads the dirty PTE bits and clears it from IO + * pagetables. + */ +struct iommu_dirty_ops { + int (*set_dirty_tracking)(struct iommu_domain *domain, bool enabled); + int (*read_and_clear_dirty)(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty); +}; + /** * struct iommu_ops - iommu ops and capabilities * @capable: check capability @@ -234,7 +268,15 @@ struct iommu_iotlb_gather { * op is allocated in the iommu driver and freed by the caller after * use. The information type is one of enum iommu_hw_info_type defined * in include/uapi/linux/iommufd.h. - * @domain_alloc: allocate iommu domain + * @domain_alloc: allocate and return an iommu domain if success. Otherwise + * NULL is returned. The domain is not fully initialized until + * the caller iommu_domain_alloc() returns. + * @domain_alloc_user: Allocate an iommu domain corresponding to the input + * parameters as defined in include/uapi/linux/iommufd.h. + * Unlike @domain_alloc, it is called only by IOMMUFD and + * must fully initialize the new domain before return. + * Upon success, a domain is returned. Upon failure, + * ERR_PTR must be returned. * @probe_device: Add device to iommu driver handling * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU @@ -267,6 +309,7 @@ struct iommu_ops { /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); + struct iommu_domain *(*domain_alloc_user)(struct device *dev, u32 flags); struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); @@ -632,6 +675,28 @@ static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather) return gather && gather->queued; } +static inline void iommu_dirty_bitmap_init(struct iommu_dirty_bitmap *dirty, + struct iova_bitmap *bitmap, + struct iommu_iotlb_gather *gather) +{ + if (gather) + iommu_iotlb_gather_init(gather); + + dirty->bitmap = bitmap; + dirty->gather = gather; +} + +static inline void iommu_dirty_bitmap_record(struct iommu_dirty_bitmap *dirty, + unsigned long iova, + unsigned long length) +{ + if (dirty->bitmap) + iova_bitmap_set(dirty->bitmap, iova, length); + + if (dirty->gather) + iommu_iotlb_gather_add_range(dirty->gather, iova, length); +} + /* PCI device grouping function */ extern struct iommu_group *pci_device_group(struct device *dev); /* Generic device grouping function */ @@ -738,6 +803,8 @@ struct iommu_fwspec {}; struct iommu_device {}; struct iommu_fault_param {}; struct iommu_iotlb_gather {}; +struct iommu_dirty_bitmap {}; +struct iommu_dirty_ops {}; static inline bool iommu_present(const struct bus_type *bus) { @@ -970,6 +1037,18 @@ static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather) return false; } +static inline void iommu_dirty_bitmap_init(struct iommu_dirty_bitmap *dirty, + struct iova_bitmap *bitmap, + struct iommu_iotlb_gather *gather) +{ +} + +static inline void iommu_dirty_bitmap_record(struct iommu_dirty_bitmap *dirty, + unsigned long iova, + unsigned long length) +{ +} + static inline void iommu_device_unregister(struct iommu_device *iommu) { } diff --git a/include/linux/iova_bitmap.h b/include/linux/iova_bitmap.h index c006cf0a25f3daac2ccc39c67c9a3193245a4077..1c338f5e5b7a62027290b44ad47c4a74d84706ac 100644 --- a/include/linux/iova_bitmap.h +++ b/include/linux/iova_bitmap.h @@ -7,6 +7,7 @@ #define _IOVA_BITMAP_H_ #include +#include struct iova_bitmap; @@ -14,6 +15,7 @@ typedef int (*iova_bitmap_fn_t)(struct iova_bitmap *bitmap, unsigned long iova, size_t length, void *opaque); +#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER) struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length, unsigned long page_size, u64 __user *data); @@ -22,5 +24,29 @@ int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, iova_bitmap_fn_t fn); void iova_bitmap_set(struct iova_bitmap *bitmap, unsigned long iova, size_t length); +#else +static inline struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, + size_t length, + unsigned long page_size, + u64 __user *data) +{ + return NULL; +} + +static inline void iova_bitmap_free(struct iova_bitmap *bitmap) +{ +} + +static inline int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, + iova_bitmap_fn_t fn) +{ + return -EOPNOTSUPP; +} + +static inline void iova_bitmap_set(struct iova_bitmap *bitmap, + unsigned long iova, size_t length) +{ +} +#endif #endif diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index b4ba0c0cbab6b8b1562fa359d34f9835a9cde757..c44eecf5d318e520a3c78c69e60b2d55a4d1618b 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -47,6 +47,8 @@ enum { IOMMUFD_CMD_VFIO_IOAS, IOMMUFD_CMD_HWPT_ALLOC, IOMMUFD_CMD_GET_HW_INFO, + IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, + IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP, }; /** @@ -347,10 +349,22 @@ struct iommu_vfio_ioas { }; #define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS) +/** + * enum iommufd_hwpt_alloc_flags - Flags for HWPT allocation + * @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a HWPT that can serve as + * the parent HWPT in a nesting configuration. + * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is + * enforced on device attachment + */ +enum iommufd_hwpt_alloc_flags { + IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0, + IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1, +}; + /** * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC) * @size: sizeof(struct iommu_hwpt_alloc) - * @flags: Must be 0 + * @flags: Combination of enum iommufd_hwpt_alloc_flags * @dev_id: The device to allocate this HWPT for * @pt_id: The IOAS to connect this HWPT to * @out_hwpt_id: The ID of the new HWPT @@ -404,6 +418,20 @@ enum iommu_hw_info_type { IOMMU_HW_INFO_TYPE_INTEL_VTD, }; +/** + * enum iommufd_hw_capabilities + * @IOMMU_HW_CAP_DIRTY_TRACKING: IOMMU hardware support for dirty tracking + * If available, it means the following APIs + * are supported: + * + * IOMMU_HWPT_GET_DIRTY_BITMAP + * IOMMU_HWPT_SET_DIRTY_TRACKING + * + */ +enum iommufd_hw_capabilities { + IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0, +}; + /** * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO) * @size: sizeof(struct iommu_hw_info) @@ -415,6 +443,8 @@ enum iommu_hw_info_type { * the iommu type specific hardware information data * @out_data_type: Output the iommu hardware info type as defined in the enum * iommu_hw_info_type. + * @out_capabilities: Output the generic iommu capability info type as defined + * in the enum iommu_hw_capabilities. * @__reserved: Must be 0 * * Query an iommu type specific hardware information data from an iommu behind @@ -439,6 +469,81 @@ struct iommu_hw_info { __aligned_u64 data_uptr; __u32 out_data_type; __u32 __reserved; + __aligned_u64 out_capabilities; }; #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO) + +/* + * enum iommufd_hwpt_set_dirty_tracking_flags - Flags for steering dirty + * tracking + * @IOMMU_HWPT_DIRTY_TRACKING_ENABLE: Enable dirty tracking + */ +enum iommufd_hwpt_set_dirty_tracking_flags { + IOMMU_HWPT_DIRTY_TRACKING_ENABLE = 1, +}; + +/** + * struct iommu_hwpt_set_dirty_tracking - ioctl(IOMMU_HWPT_SET_DIRTY_TRACKING) + * @size: sizeof(struct iommu_hwpt_set_dirty_tracking) + * @flags: Combination of enum iommufd_hwpt_set_dirty_tracking_flags + * @hwpt_id: HW pagetable ID that represents the IOMMU domain + * @__reserved: Must be 0 + * + * Toggle dirty tracking on an HW pagetable. + */ +struct iommu_hwpt_set_dirty_tracking { + __u32 size; + __u32 flags; + __u32 hwpt_id; + __u32 __reserved; +}; +#define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \ + IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING) + +/** + * enum iommufd_hwpt_get_dirty_bitmap_flags - Flags for getting dirty bits + * @IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR: Just read the PTEs without clearing + * any dirty bits metadata. This flag + * can be passed in the expectation + * where the next operation is an unmap + * of the same IOVA range. + * + */ +enum iommufd_hwpt_get_dirty_bitmap_flags { + IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR = 1, +}; + +/** + * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP) + * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap) + * @hwpt_id: HW pagetable ID that represents the IOMMU domain + * @flags: Combination of enum iommufd_hwpt_get_dirty_bitmap_flags + * @__reserved: Must be 0 + * @iova: base IOVA of the bitmap first bit + * @length: IOVA range size + * @page_size: page size granularity of each bit in the bitmap + * @data: bitmap where to set the dirty bits. The bitmap bits each + * represent a page_size which you deviate from an arbitrary iova. + * + * Checking a given IOVA is dirty: + * + * data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64)) + * + * Walk the IOMMU pagetables for a given IOVA range to return a bitmap + * with the dirty IOVAs. In doing so it will also by default clear any + * dirty bit metadata set in the IOPTE. + */ +struct iommu_hwpt_get_dirty_bitmap { + __u32 size; + __u32 hwpt_id; + __u32 flags; + __u32 __reserved; + __aligned_u64 iova; + __aligned_u64 length; + __aligned_u64 page_size; + __aligned_u64 data; +}; +#define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \ + IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP) + #endif diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 33d08600be13d6c23cf7b36ef6ef8ab6145b4ecc..de61447b9558484288c10b21eee5b7de81a59ef3 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -86,12 +86,13 @@ TEST_F(iommufd, cmd_fail) TEST_F(iommufd, cmd_length) { -#define TEST_LENGTH(_struct, _ioctl) \ +#define TEST_LENGTH(_struct, _ioctl, _last) \ { \ + size_t min_size = offsetofend(struct _struct, _last); \ struct { \ struct _struct cmd; \ uint8_t extra; \ - } cmd = { .cmd = { .size = sizeof(struct _struct) - 1 }, \ + } cmd = { .cmd = { .size = min_size - 1 }, \ .extra = UINT8_MAX }; \ int old_errno; \ int rc; \ @@ -112,16 +113,19 @@ TEST_F(iommufd, cmd_length) } \ } - TEST_LENGTH(iommu_destroy, IOMMU_DESTROY); - TEST_LENGTH(iommu_hw_info, IOMMU_GET_HW_INFO); - TEST_LENGTH(iommu_ioas_alloc, IOMMU_IOAS_ALLOC); - TEST_LENGTH(iommu_ioas_iova_ranges, IOMMU_IOAS_IOVA_RANGES); - TEST_LENGTH(iommu_ioas_allow_iovas, IOMMU_IOAS_ALLOW_IOVAS); - TEST_LENGTH(iommu_ioas_map, IOMMU_IOAS_MAP); - TEST_LENGTH(iommu_ioas_copy, IOMMU_IOAS_COPY); - TEST_LENGTH(iommu_ioas_unmap, IOMMU_IOAS_UNMAP); - TEST_LENGTH(iommu_option, IOMMU_OPTION); - TEST_LENGTH(iommu_vfio_ioas, IOMMU_VFIO_IOAS); + TEST_LENGTH(iommu_destroy, IOMMU_DESTROY, id); + TEST_LENGTH(iommu_hw_info, IOMMU_GET_HW_INFO, __reserved); + TEST_LENGTH(iommu_hwpt_alloc, IOMMU_HWPT_ALLOC, __reserved); + TEST_LENGTH(iommu_ioas_alloc, IOMMU_IOAS_ALLOC, out_ioas_id); + TEST_LENGTH(iommu_ioas_iova_ranges, IOMMU_IOAS_IOVA_RANGES, + out_iova_alignment); + TEST_LENGTH(iommu_ioas_allow_iovas, IOMMU_IOAS_ALLOW_IOVAS, + allowed_iovas); + TEST_LENGTH(iommu_ioas_map, IOMMU_IOAS_MAP, iova); + TEST_LENGTH(iommu_ioas_copy, IOMMU_IOAS_COPY, src_iova); + TEST_LENGTH(iommu_ioas_unmap, IOMMU_IOAS_UNMAP, length); + TEST_LENGTH(iommu_option, IOMMU_OPTION, val64); + TEST_LENGTH(iommu_vfio_ioas, IOMMU_VFIO_IOAS, __reserved); #undef TEST_LENGTH } @@ -1404,16 +1408,242 @@ TEST_F(iommufd_mock_domain, alloc_hwpt) int i; for (i = 0; i != variant->mock_domains; i++) { + uint32_t hwpt_id[2]; uint32_t stddev_id; - uint32_t hwpt_id; - test_cmd_hwpt_alloc(self->idev_ids[0], self->ioas_id, &hwpt_id); - test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); + test_err_hwpt_alloc(EOPNOTSUPP, + self->idev_ids[i], self->ioas_id, + ~IOMMU_HWPT_ALLOC_NEST_PARENT, &hwpt_id[0]); + test_cmd_hwpt_alloc(self->idev_ids[i], self->ioas_id, + 0, &hwpt_id[0]); + test_cmd_hwpt_alloc(self->idev_ids[i], self->ioas_id, + IOMMU_HWPT_ALLOC_NEST_PARENT, &hwpt_id[1]); + + /* Do a hw_pagetable rotation test */ + test_cmd_mock_domain_replace(self->stdev_ids[i], hwpt_id[0]); + EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, hwpt_id[0])); + test_cmd_mock_domain_replace(self->stdev_ids[i], hwpt_id[1]); + EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, hwpt_id[1])); + test_cmd_mock_domain_replace(self->stdev_ids[i], self->ioas_id); + test_ioctl_destroy(hwpt_id[1]); + + test_cmd_mock_domain(hwpt_id[0], &stddev_id, NULL, NULL); test_ioctl_destroy(stddev_id); - test_ioctl_destroy(hwpt_id); + test_ioctl_destroy(hwpt_id[0]); } } +FIXTURE(iommufd_dirty_tracking) +{ + int fd; + uint32_t ioas_id; + uint32_t hwpt_id; + uint32_t stdev_id; + uint32_t idev_id; + unsigned long page_size; + unsigned long bitmap_size; + void *bitmap; + void *buffer; +}; + +FIXTURE_VARIANT(iommufd_dirty_tracking) +{ + unsigned long buffer_size; +}; + +FIXTURE_SETUP(iommufd_dirty_tracking) +{ + void *vrc; + int rc; + + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); + + rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, variant->buffer_size); + if (rc || !self->buffer) { + SKIP(return, "Skipping buffer_size=%lu due to errno=%d", + variant->buffer_size, rc); + } + + assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0); + vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + assert(vrc == self->buffer); + + self->page_size = MOCK_PAGE_SIZE; + self->bitmap_size = + variant->buffer_size / self->page_size / BITS_PER_BYTE; + + /* Provision with an extra (MOCK_PAGE_SIZE) for the unaligned case */ + rc = posix_memalign(&self->bitmap, PAGE_SIZE, + self->bitmap_size + MOCK_PAGE_SIZE); + assert(!rc); + assert(self->bitmap); + assert((uintptr_t)self->bitmap % PAGE_SIZE == 0); + + test_ioctl_ioas_alloc(&self->ioas_id); + test_cmd_mock_domain(self->ioas_id, &self->stdev_id, &self->hwpt_id, + &self->idev_id); +} + +FIXTURE_TEARDOWN(iommufd_dirty_tracking) +{ + munmap(self->buffer, variant->buffer_size); + munmap(self->bitmap, self->bitmap_size); + teardown_iommufd(self->fd, _metadata); +} + +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k) +{ + /* one u32 index bitmap */ + .buffer_size = 128UL * 1024UL, +}; + +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256k) +{ + /* one u64 index bitmap */ + .buffer_size = 256UL * 1024UL, +}; + +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty640k) +{ + /* two u64 index and trailing end bitmap */ + .buffer_size = 640UL * 1024UL, +}; + +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M) +{ + /* 4K bitmap (128M IOVA range) */ + .buffer_size = 128UL * 1024UL * 1024UL, +}; + +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M) +{ + /* 8K bitmap (256M IOVA range) */ + .buffer_size = 256UL * 1024UL * 1024UL, +}; + +TEST_F(iommufd_dirty_tracking, enforce_dirty) +{ + uint32_t ioas_id, stddev_id, idev_id; + uint32_t hwpt_id, _hwpt_id; + uint32_t dev_flags; + + /* Regular case */ + dev_flags = MOCK_FLAGS_DEVICE_NO_DIRTY; + test_cmd_hwpt_alloc(self->idev_id, self->ioas_id, + IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id); + test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); + test_err_mock_domain_flags(EINVAL, hwpt_id, dev_flags, &stddev_id, + NULL); + test_ioctl_destroy(stddev_id); + test_ioctl_destroy(hwpt_id); + + /* IOMMU device does not support dirty tracking */ + test_ioctl_ioas_alloc(&ioas_id); + test_cmd_mock_domain_flags(ioas_id, dev_flags, &stddev_id, &_hwpt_id, + &idev_id); + test_err_hwpt_alloc(EOPNOTSUPP, idev_id, ioas_id, + IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id); + test_ioctl_destroy(stddev_id); +} + +TEST_F(iommufd_dirty_tracking, set_dirty_tracking) +{ + uint32_t stddev_id; + uint32_t hwpt_id; + + test_cmd_hwpt_alloc(self->idev_id, self->ioas_id, + IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id); + test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); + test_cmd_set_dirty_tracking(hwpt_id, true); + test_cmd_set_dirty_tracking(hwpt_id, false); + + test_ioctl_destroy(stddev_id); + test_ioctl_destroy(hwpt_id); +} + +TEST_F(iommufd_dirty_tracking, device_dirty_capability) +{ + uint32_t caps = 0; + uint32_t stddev_id; + uint32_t hwpt_id; + + test_cmd_hwpt_alloc(self->idev_id, self->ioas_id, 0, &hwpt_id); + test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); + test_cmd_get_hw_capabilities(self->idev_id, caps, + IOMMU_HW_CAP_DIRTY_TRACKING); + ASSERT_EQ(IOMMU_HW_CAP_DIRTY_TRACKING, + caps & IOMMU_HW_CAP_DIRTY_TRACKING); + + test_ioctl_destroy(stddev_id); + test_ioctl_destroy(hwpt_id); +} + +TEST_F(iommufd_dirty_tracking, get_dirty_bitmap) +{ + uint32_t stddev_id; + uint32_t hwpt_id; + uint32_t ioas_id; + + test_ioctl_ioas_alloc(&ioas_id); + test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer, + variant->buffer_size, MOCK_APERTURE_START); + + test_cmd_hwpt_alloc(self->idev_id, ioas_id, + IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id); + test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); + + test_cmd_set_dirty_tracking(hwpt_id, true); + + test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, + MOCK_APERTURE_START, self->page_size, + self->bitmap, self->bitmap_size, 0, _metadata); + + /* PAGE_SIZE unaligned bitmap */ + test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, + MOCK_APERTURE_START, self->page_size, + self->bitmap + MOCK_PAGE_SIZE, + self->bitmap_size, 0, _metadata); + + test_ioctl_destroy(stddev_id); + test_ioctl_destroy(hwpt_id); +} + +TEST_F(iommufd_dirty_tracking, get_dirty_bitmap_no_clear) +{ + uint32_t stddev_id; + uint32_t hwpt_id; + uint32_t ioas_id; + + test_ioctl_ioas_alloc(&ioas_id); + test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer, + variant->buffer_size, MOCK_APERTURE_START); + + test_cmd_hwpt_alloc(self->idev_id, ioas_id, + IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id); + test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); + + test_cmd_set_dirty_tracking(hwpt_id, true); + + test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, + MOCK_APERTURE_START, self->page_size, + self->bitmap, self->bitmap_size, + IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR, + _metadata); + + /* Unaligned bitmap */ + test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, + MOCK_APERTURE_START, self->page_size, + self->bitmap + MOCK_PAGE_SIZE, + self->bitmap_size, + IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR, + _metadata); + + test_ioctl_destroy(stddev_id); + test_ioctl_destroy(hwpt_id); +} + /* VFIO compatibility IOCTLs */ TEST_F(iommufd, simple_ioctls) diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index a220ca2a689d160c95129d0bc281ed63dfb38c73..1fcd69cb0e416718c85657f5278e75824a0d7da4 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -612,10 +612,10 @@ TEST_FAIL_NTH(basic_fail_nth, device) &idev_id)) return -1; - if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info))) + if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info), NULL)) return -1; - if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, &hwpt_id)) + if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, &hwpt_id)) return -1; if (_test_cmd_mock_domain_replace(self->fd, stdev_id, ioas_id2, NULL)) diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index e0753d03ecaa8576005120cf86dd47a529df1f94..70d558e0f0c747ea887d33805c2e7ff78fc6b8fb 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -16,6 +16,25 @@ /* Hack to make assertions more readable */ #define _IOMMU_TEST_CMD(x) IOMMU_TEST_CMD +/* Imported from include/asm-generic/bitops/generic-non-atomic.h */ +#define BITS_PER_BYTE 8 +#define BITS_PER_LONG __BITS_PER_LONG +#define BIT_MASK(nr) (1UL << ((nr) % __BITS_PER_LONG)) +#define BIT_WORD(nr) ((nr) / __BITS_PER_LONG) + +static inline void set_bit(unsigned int nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + + *p |= mask; +} + +static inline bool test_bit(unsigned int nr, unsigned long *addr) +{ + return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1))); +} + static void *buffer; static unsigned long BUFFER_SIZE; @@ -74,6 +93,38 @@ static int _test_cmd_mock_domain(int fd, unsigned int ioas_id, __u32 *stdev_id, EXPECT_ERRNO(_errno, _test_cmd_mock_domain(self->fd, ioas_id, \ stdev_id, hwpt_id, NULL)) +static int _test_cmd_mock_domain_flags(int fd, unsigned int ioas_id, + __u32 stdev_flags, __u32 *stdev_id, + __u32 *hwpt_id, __u32 *idev_id) +{ + struct iommu_test_cmd cmd = { + .size = sizeof(cmd), + .op = IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS, + .id = ioas_id, + .mock_domain_flags = { .dev_flags = stdev_flags }, + }; + int ret; + + ret = ioctl(fd, IOMMU_TEST_CMD, &cmd); + if (ret) + return ret; + if (stdev_id) + *stdev_id = cmd.mock_domain_flags.out_stdev_id; + assert(cmd.id != 0); + if (hwpt_id) + *hwpt_id = cmd.mock_domain_flags.out_hwpt_id; + if (idev_id) + *idev_id = cmd.mock_domain_flags.out_idev_id; + return 0; +} +#define test_cmd_mock_domain_flags(ioas_id, flags, stdev_id, hwpt_id, idev_id) \ + ASSERT_EQ(0, _test_cmd_mock_domain_flags(self->fd, ioas_id, flags, \ + stdev_id, hwpt_id, idev_id)) +#define test_err_mock_domain_flags(_errno, ioas_id, flags, stdev_id, hwpt_id) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_mock_domain_flags(self->fd, ioas_id, flags, \ + stdev_id, hwpt_id, NULL)) + static int _test_cmd_mock_domain_replace(int fd, __u32 stdev_id, __u32 pt_id, __u32 *hwpt_id) { @@ -103,10 +154,11 @@ static int _test_cmd_mock_domain_replace(int fd, __u32 stdev_id, __u32 pt_id, pt_id, NULL)) static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, - __u32 *hwpt_id) + __u32 flags, __u32 *hwpt_id) { struct iommu_hwpt_alloc cmd = { .size = sizeof(cmd), + .flags = flags, .dev_id = device_id, .pt_id = pt_id, }; @@ -120,8 +172,12 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, return 0; } -#define test_cmd_hwpt_alloc(device_id, pt_id, hwpt_id) \ - ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, hwpt_id)) +#define test_cmd_hwpt_alloc(device_id, pt_id, flags, hwpt_id) \ + ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, \ + pt_id, flags, hwpt_id)) +#define test_err_hwpt_alloc(_errno, device_id, pt_id, flags, hwpt_id) \ + EXPECT_ERRNO(_errno, _test_cmd_hwpt_alloc(self->fd, device_id, \ + pt_id, flags, hwpt_id)) static int _test_cmd_access_replace_ioas(int fd, __u32 access_id, unsigned int ioas_id) @@ -142,6 +198,125 @@ static int _test_cmd_access_replace_ioas(int fd, __u32 access_id, #define test_cmd_access_replace_ioas(access_id, ioas_id) \ ASSERT_EQ(0, _test_cmd_access_replace_ioas(self->fd, access_id, ioas_id)) +static int _test_cmd_set_dirty_tracking(int fd, __u32 hwpt_id, bool enabled) +{ + struct iommu_hwpt_set_dirty_tracking cmd = { + .size = sizeof(cmd), + .flags = enabled ? IOMMU_HWPT_DIRTY_TRACKING_ENABLE : 0, + .hwpt_id = hwpt_id, + }; + int ret; + + ret = ioctl(fd, IOMMU_HWPT_SET_DIRTY_TRACKING, &cmd); + if (ret) + return -errno; + return 0; +} +#define test_cmd_set_dirty_tracking(hwpt_id, enabled) \ + ASSERT_EQ(0, _test_cmd_set_dirty_tracking(self->fd, hwpt_id, enabled)) + +static int _test_cmd_get_dirty_bitmap(int fd, __u32 hwpt_id, size_t length, + __u64 iova, size_t page_size, + __u64 *bitmap, __u32 flags) +{ + struct iommu_hwpt_get_dirty_bitmap cmd = { + .size = sizeof(cmd), + .hwpt_id = hwpt_id, + .flags = flags, + .iova = iova, + .length = length, + .page_size = page_size, + .data = (uintptr_t)bitmap, + }; + int ret; + + ret = ioctl(fd, IOMMU_HWPT_GET_DIRTY_BITMAP, &cmd); + if (ret) + return ret; + return 0; +} + +#define test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, \ + bitmap, flags) \ + ASSERT_EQ(0, _test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, \ + page_size, bitmap, flags)) + +static int _test_cmd_mock_domain_set_dirty(int fd, __u32 hwpt_id, size_t length, + __u64 iova, size_t page_size, + __u64 *bitmap, __u64 *dirty) +{ + struct iommu_test_cmd cmd = { + .size = sizeof(cmd), + .op = IOMMU_TEST_OP_DIRTY, + .id = hwpt_id, + .dirty = { + .iova = iova, + .length = length, + .page_size = page_size, + .uptr = (uintptr_t)bitmap, + } + }; + int ret; + + ret = ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_DIRTY), &cmd); + if (ret) + return -ret; + if (dirty) + *dirty = cmd.dirty.out_nr_dirty; + return 0; +} + +#define test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size, \ + bitmap, nr) \ + ASSERT_EQ(0, \ + _test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, \ + page_size, bitmap, nr)) + +static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, + __u64 iova, size_t page_size, __u64 *bitmap, + __u64 bitmap_size, __u32 flags, + struct __test_metadata *_metadata) +{ + unsigned long i, nbits = bitmap_size * BITS_PER_BYTE; + unsigned long nr = nbits / 2; + __u64 out_dirty = 0; + + /* Mark all even bits as dirty in the mock domain */ + for (i = 0; i < nbits; i += 2) + set_bit(i, (unsigned long *)bitmap); + + test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size, + bitmap, &out_dirty); + ASSERT_EQ(nr, out_dirty); + + /* Expect all even bits as dirty in the user bitmap */ + memset(bitmap, 0, bitmap_size); + test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap, + flags); + /* Beware ASSERT_EQ() is two statements -- braces are not redundant! */ + for (i = 0; i < nbits; i++) { + ASSERT_EQ(!(i % 2), test_bit(i, (unsigned long *)bitmap)); + } + + memset(bitmap, 0, bitmap_size); + test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap, + flags); + + /* It as read already -- expect all zeroes */ + for (i = 0; i < nbits; i++) { + ASSERT_EQ(!(i % 2) && (flags & + IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR), + test_bit(i, (unsigned long *)bitmap)); + } + + return 0; +} +#define test_mock_dirty_bitmaps(hwpt_id, length, iova, page_size, bitmap, \ + bitmap_size, flags, _metadata) \ + ASSERT_EQ(0, _test_mock_dirty_bitmaps(self->fd, hwpt_id, length, iova, \ + page_size, bitmap, bitmap_size, \ + flags, _metadata)) + static int _test_cmd_create_access(int fd, unsigned int ioas_id, __u32 *access_id, unsigned int flags) { @@ -266,6 +441,17 @@ static int _test_ioctl_ioas_map(int fd, unsigned int ioas_id, void *buffer, IOMMU_IOAS_MAP_READABLE)); \ }) +#define test_ioctl_ioas_map_fixed_id(ioas_id, buffer, length, iova) \ + ({ \ + __u64 __iova = iova; \ + ASSERT_EQ(0, \ + _test_ioctl_ioas_map( \ + self->fd, ioas_id, buffer, length, &__iova, \ + IOMMU_IOAS_MAP_FIXED_IOVA | \ + IOMMU_IOAS_MAP_WRITEABLE | \ + IOMMU_IOAS_MAP_READABLE)); \ + }) + #define test_err_ioctl_ioas_map_fixed(_errno, buffer, length, iova) \ ({ \ __u64 __iova = iova; \ @@ -354,8 +540,8 @@ static void teardown_iommufd(int fd, struct __test_metadata *_metadata) #endif /* @data can be NULL */ -static int _test_cmd_get_hw_info(int fd, __u32 device_id, - void *data, size_t data_len) +static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data, + size_t data_len, uint32_t *capabilities) { struct iommu_test_hw_info *info = (struct iommu_test_hw_info *)data; struct iommu_hw_info cmd = { @@ -363,6 +549,7 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, .dev_id = device_id, .data_len = data_len, .data_uptr = (uint64_t)data, + .out_capabilities = 0, }; int ret; @@ -399,14 +586,19 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, assert(!info->flags); } + if (capabilities) + *capabilities = cmd.out_capabilities; + return 0; } -#define test_cmd_get_hw_info(device_id, data, data_len) \ - ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, \ - data, data_len)) +#define test_cmd_get_hw_info(device_id, data, data_len) \ + ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, data, \ + data_len, NULL)) + +#define test_err_get_hw_info(_errno, device_id, data, data_len) \ + EXPECT_ERRNO(_errno, _test_cmd_get_hw_info(self->fd, device_id, data, \ + data_len, NULL)) -#define test_err_get_hw_info(_errno, device_id, data, data_len) \ - EXPECT_ERRNO(_errno, \ - _test_cmd_get_hw_info(self->fd, device_id, \ - data, data_len)) +#define test_cmd_get_hw_capabilities(device_id, caps, mask) \ + ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, 0, &caps))