diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index c4aacf42427f569873364f11a6f88186a093ce86..d3927bbc083db1e8e64700a392a488bd633fa2aa 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -101,6 +101,14 @@ struct vfio_dma { unsigned long *bitmap; }; +struct vfio_batch { + struct page **pages; /* for pin_user_pages_remote */ + struct page *fallback_page; /* if pages alloc fails */ + int capacity; /* length of pages array */ + int size; /* of batch currently */ + int offset; /* of next entry in pages */ +}; + struct vfio_group { struct iommu_group *iommu_group; struct list_head next; @@ -464,6 +472,45 @@ static int put_pfn(unsigned long pfn, int prot) return 0; } +#define VFIO_BATCH_MAX_CAPACITY (PAGE_SIZE / sizeof(struct page *)) + +static void vfio_batch_init(struct vfio_batch *batch) +{ + batch->size = 0; + batch->offset = 0; + + if (unlikely(disable_hugepages)) + goto fallback; + + batch->pages = (struct page **) __get_free_page(GFP_KERNEL); + if (!batch->pages) + goto fallback; + + batch->capacity = VFIO_BATCH_MAX_CAPACITY; + return; + +fallback: + batch->pages = &batch->fallback_page; + batch->capacity = 1; +} + +static void vfio_batch_unpin(struct vfio_batch *batch, struct vfio_dma *dma) +{ + while (batch->size) { + unsigned long pfn = page_to_pfn(batch->pages[batch->offset]); + + put_pfn(pfn, dma->prot); + batch->offset++; + batch->size--; + } +} + +static void vfio_batch_fini(struct vfio_batch *batch) +{ + if (batch->capacity == VFIO_BATCH_MAX_CAPACITY) + free_page((unsigned long)batch->pages); +} + static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, unsigned long vaddr, unsigned long *pfn, bool write_fault) @@ -500,10 +547,14 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, return ret; } -static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, - int prot, unsigned long *pfn) +/* + * Returns the positive number of pfns successfully obtained or a negative + * error code. + */ +static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr, + long npages, int prot, unsigned long *pfn, + struct page **pages) { - struct page *page[1]; struct vm_area_struct *vma; unsigned int flags = 0; int ret; @@ -512,11 +563,10 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, flags |= FOLL_WRITE; mmap_read_lock(mm); - ret = pin_user_pages_remote(mm, vaddr, 1, flags | FOLL_LONGTERM, - page, NULL, NULL); - if (ret == 1) { - *pfn = page_to_pfn(page[0]); - ret = 0; + ret = pin_user_pages_remote(mm, vaddr, npages, flags | FOLL_LONGTERM, + pages, NULL, NULL); + if (ret > 0) { + *pfn = page_to_pfn(pages[0]); goto done; } @@ -530,8 +580,12 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, if (ret == -EAGAIN) goto retry; - if (!ret && !is_invalid_reserved_pfn(*pfn)) - ret = -EFAULT; + if (!ret) { + if (is_invalid_reserved_pfn(*pfn)) + ret = 1; + else + ret = -EFAULT; + } } done: mmap_read_unlock(mm); @@ -545,76 +599,108 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, */ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, long npage, unsigned long *pfn_base, - unsigned long limit) + unsigned long limit, struct vfio_batch *batch) { - unsigned long pfn = 0; + unsigned long pfn; + struct mm_struct *mm = current->mm; long ret, pinned = 0, lock_acct = 0; bool rsvd; dma_addr_t iova = vaddr - dma->vaddr + dma->iova; /* This code path is only user initiated */ - if (!current->mm) + if (!mm) return -ENODEV; - ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, pfn_base); - if (ret) - return ret; - - pinned++; - rsvd = is_invalid_reserved_pfn(*pfn_base); - - /* - * Reserved pages aren't counted against the user, externally pinned - * pages are already counted against the user. - */ - if (!rsvd && !vfio_find_vpfn(dma, iova)) { - if (!dma->lock_cap && current->mm->locked_vm + 1 > limit) { - put_pfn(*pfn_base, dma->prot); - pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, - limit << PAGE_SHIFT); - return -ENOMEM; - } - lock_acct++; + if (batch->size) { + /* Leftover pages in batch from an earlier call. */ + *pfn_base = page_to_pfn(batch->pages[batch->offset]); + pfn = *pfn_base; + rsvd = is_invalid_reserved_pfn(*pfn_base); + } else { + *pfn_base = 0; } - if (unlikely(disable_hugepages)) - goto out; + while (npage) { + if (!batch->size) { + /* Empty batch, so refill it. */ + long req_pages = min_t(long, npage, batch->capacity); - /* Lock all the consecutive pages from pfn_base */ - for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage; - pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) { - ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, &pfn); - if (ret) - break; + ret = vaddr_get_pfns(mm, vaddr, req_pages, dma->prot, + &pfn, batch->pages); + if (ret < 0) + goto unpin_out; - if (pfn != *pfn_base + pinned || - rsvd != is_invalid_reserved_pfn(pfn)) { - put_pfn(pfn, dma->prot); - break; + batch->size = ret; + batch->offset = 0; + + if (!*pfn_base) { + *pfn_base = pfn; + rsvd = is_invalid_reserved_pfn(*pfn_base); + } } - if (!rsvd && !vfio_find_vpfn(dma, iova)) { - if (!dma->lock_cap && - current->mm->locked_vm + lock_acct + 1 > limit) { - put_pfn(pfn, dma->prot); - pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", - __func__, limit << PAGE_SHIFT); - ret = -ENOMEM; - goto unpin_out; + /* + * pfn is preset for the first iteration of this inner loop and + * updated at the end to handle a VM_PFNMAP pfn. In that case, + * batch->pages isn't valid (there's no struct page), so allow + * batch->pages to be touched only when there's more than one + * pfn to check, which guarantees the pfns are from a + * !VM_PFNMAP vma. + */ + while (true) { + if (pfn != *pfn_base + pinned || + rsvd != is_invalid_reserved_pfn(pfn)) + goto out; + + /* + * Reserved pages aren't counted against the user, + * externally pinned pages are already counted against + * the user. + */ + if (!rsvd && !vfio_find_vpfn(dma, iova)) { + if (!dma->lock_cap && + mm->locked_vm + lock_acct + 1 > limit) { + pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", + __func__, limit << PAGE_SHIFT); + ret = -ENOMEM; + goto unpin_out; + } + lock_acct++; } - lock_acct++; + + pinned++; + npage--; + vaddr += PAGE_SIZE; + iova += PAGE_SIZE; + batch->offset++; + batch->size--; + + if (!batch->size) + break; + + pfn = page_to_pfn(batch->pages[batch->offset]); } + + if (unlikely(disable_hugepages)) + break; } out: ret = vfio_lock_acct(dma, lock_acct, false); unpin_out: - if (ret) { - if (!rsvd) { + if (batch->size == 1 && !batch->offset) { + /* May be a VM_PFNMAP pfn, which the batch can't remember. */ + put_pfn(pfn, dma->prot); + batch->size = 0; + } + + if (ret < 0) { + if (pinned && !rsvd) { for (pfn = *pfn_base ; pinned ; pfn++, pinned--) put_pfn(pfn, dma->prot); } + vfio_batch_unpin(batch, dma); return ret; } @@ -646,6 +732,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, unsigned long *pfn_base, bool do_accounting) { + struct page *pages[1]; struct mm_struct *mm; int ret; @@ -653,8 +740,13 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, if (!mm) return -ENODEV; - ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base); - if (!ret && do_accounting && !is_invalid_reserved_pfn(*pfn_base)) { + ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages); + if (ret != 1) + goto out; + + ret = 0; + + if (do_accounting && !is_invalid_reserved_pfn(*pfn_base)) { ret = vfio_lock_acct(dma, 1, true); if (ret) { put_pfn(*pfn_base, dma->prot); @@ -666,6 +758,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, } } +out: mmput(mm); return ret; } @@ -1383,15 +1476,19 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, { dma_addr_t iova = dma->iova; unsigned long vaddr = dma->vaddr; + struct vfio_batch batch; size_t size = map_size; long npage; unsigned long pfn, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; int ret = 0; + vfio_batch_init(&batch); + while (size) { /* Pin a contiguous chunk of memory */ npage = vfio_pin_pages_remote(dma, vaddr + dma->size, - size >> PAGE_SHIFT, &pfn, limit); + size >> PAGE_SHIFT, &pfn, limit, + &batch); if (npage <= 0) { WARN_ON(!npage); ret = (int)npage; @@ -1404,6 +1501,7 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, if (ret) { vfio_unpin_pages_remote(dma, iova + dma->size, pfn, npage, true); + vfio_batch_unpin(&batch, dma); break; } @@ -1411,6 +1509,7 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, dma->size += npage << PAGE_SHIFT; } + vfio_batch_fini(&batch); dma->iommu_mapped = true; if (ret) @@ -1569,6 +1668,7 @@ static int vfio_bus_type(struct device *dev, void *data) static int vfio_iommu_replay(struct vfio_iommu *iommu, struct vfio_domain *domain) { + struct vfio_batch batch; struct vfio_domain *d = NULL; struct rb_node *n; unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; @@ -1579,6 +1679,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, d = list_first_entry(&iommu->domain_list, struct vfio_domain, next); + vfio_batch_init(&batch); + n = rb_first(&iommu->dma_list); for (; n; n = rb_next(n)) { @@ -1626,7 +1728,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, npage = vfio_pin_pages_remote(dma, vaddr, n >> PAGE_SHIFT, - &pfn, limit); + &pfn, limit, + &batch); if (npage <= 0) { WARN_ON(!npage); ret = (int)npage; @@ -1640,11 +1743,13 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, ret = iommu_map(domain->domain, iova, phys, size, dma->prot | domain->prot); if (ret) { - if (!dma->iommu_mapped) + if (!dma->iommu_mapped) { vfio_unpin_pages_remote(dma, iova, phys >> PAGE_SHIFT, size >> PAGE_SHIFT, true); + vfio_batch_unpin(&batch, dma); + } goto unwind; } @@ -1659,6 +1764,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, dma->iommu_mapped = true; } + vfio_batch_fini(&batch); return 0; unwind: @@ -1699,6 +1805,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, } } + vfio_batch_fini(&batch); return ret; }