diff --git a/drivers/media/platform/davinci/vpif.c b/drivers/media/platform/davinci/vpif.c index ee610daf90a3c9a93805cd7be44d4996d87e6cdf..9c10b6f33fd26f4abc15751ed245435d16e1b551 100644 --- a/drivers/media/platform/davinci/vpif.c +++ b/drivers/media/platform/davinci/vpif.c @@ -47,6 +47,11 @@ EXPORT_SYMBOL_GPL(vpif_lock); void __iomem *vpif_base; EXPORT_SYMBOL_GPL(vpif_base); +struct vpif_data { + struct platform_device *capture; + struct platform_device *display; +}; + /* * vpif_ch_params: video standard configuration parameters for vpif * @@ -423,11 +428,19 @@ int vpif_channel_getfid(u8 channel_id) } EXPORT_SYMBOL(vpif_channel_getfid); +static void vpif_pdev_release(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + + kfree(pdev); +} + static int vpif_probe(struct platform_device *pdev) { static struct resource *res, *res_irq; struct platform_device *pdev_capture, *pdev_display; struct device_node *endpoint = NULL; + struct vpif_data *data; int ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -435,6 +448,12 @@ static int vpif_probe(struct platform_device *pdev) if (IS_ERR(vpif_base)) return PTR_ERR(vpif_base); + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + platform_set_drvdata(pdev, data); + pm_runtime_enable(&pdev->dev); pm_runtime_get(&pdev->dev); @@ -463,49 +482,77 @@ static int vpif_probe(struct platform_device *pdev) goto err_put_rpm; } - pdev_capture = devm_kzalloc(&pdev->dev, sizeof(*pdev_capture), - GFP_KERNEL); - if (pdev_capture) { - pdev_capture->name = "vpif_capture"; - pdev_capture->id = -1; - pdev_capture->resource = res_irq; - pdev_capture->num_resources = 1; - pdev_capture->dev.dma_mask = pdev->dev.dma_mask; - pdev_capture->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; - pdev_capture->dev.parent = &pdev->dev; - platform_device_register(pdev_capture); - } else { - dev_warn(&pdev->dev, "Unable to allocate memory for pdev_capture.\n"); + pdev_capture = kzalloc(sizeof(*pdev_capture), GFP_KERNEL); + if (!pdev_capture) { + ret = -ENOMEM; + goto err_put_rpm; } - pdev_display = devm_kzalloc(&pdev->dev, sizeof(*pdev_display), - GFP_KERNEL); - if (pdev_display) { - pdev_display->name = "vpif_display"; - pdev_display->id = -1; - pdev_display->resource = res_irq; - pdev_display->num_resources = 1; - pdev_display->dev.dma_mask = pdev->dev.dma_mask; - pdev_display->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; - pdev_display->dev.parent = &pdev->dev; - platform_device_register(pdev_display); - } else { - dev_warn(&pdev->dev, "Unable to allocate memory for pdev_display.\n"); + pdev_capture->name = "vpif_capture"; + pdev_capture->id = -1; + pdev_capture->resource = res_irq; + pdev_capture->num_resources = 1; + pdev_capture->dev.dma_mask = pdev->dev.dma_mask; + pdev_capture->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; + pdev_capture->dev.parent = &pdev->dev; + pdev_capture->dev.release = vpif_pdev_release; + + ret = platform_device_register(pdev_capture); + if (ret) + goto err_put_pdev_capture; + + pdev_display = kzalloc(sizeof(*pdev_display), GFP_KERNEL); + if (!pdev_display) { + ret = -ENOMEM; + goto err_del_pdev_capture; } + pdev_display->name = "vpif_display"; + pdev_display->id = -1; + pdev_display->resource = res_irq; + pdev_display->num_resources = 1; + pdev_display->dev.dma_mask = pdev->dev.dma_mask; + pdev_display->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; + pdev_display->dev.parent = &pdev->dev; + pdev_display->dev.release = vpif_pdev_release; + + ret = platform_device_register(pdev_display); + if (ret) + goto err_put_pdev_display; + + data->capture = pdev_capture; + data->display = pdev_display; + return 0; +err_put_pdev_display: + platform_device_put(pdev_display); +err_del_pdev_capture: + platform_device_del(pdev_capture); +err_put_pdev_capture: + platform_device_put(pdev_capture); err_put_rpm: pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); + kfree(data); return ret; } static int vpif_remove(struct platform_device *pdev) { + struct vpif_data *data = platform_get_drvdata(pdev); + + if (data->capture) + platform_device_unregister(data->capture); + if (data->display) + platform_device_unregister(data->display); + pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); + + kfree(data); + return 0; } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index b540e5a60ea91b27abbd427370703d076d1d0ed6..ac109c61e882b596e271ac63c87e9521e1f10f1b 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -86,7 +86,7 @@ enum cpuhp_state { CPUHP_ARM_BL_PREPARE, CPUHP_TRACE_RB_PREPARE, CPUHP_MM_ZS_PREPARE, - CPUHP_MM_ZSWP_MEM_PREPARE, + CPUHP_MM_ZSWP_MEM_PREPARE, /* keep this to make kabi stable */ CPUHP_MM_ZSWP_POOL_PREPARE, CPUHP_KVM_PPC_BOOK3S_PREPARE, CPUHP_ZCOMP_PREPARE, diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index b25c809af61265b98c64a4d69fda227775ed15cd..8381d2ef7aa919853252e17cc47a9d09f56ae255 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -210,6 +210,8 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot); bool is_hugetlb_entry_migration(pte_t pte); +void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); +void hugetlb_split(struct vm_area_struct *vma, unsigned long addr); #else /* !CONFIG_HUGETLB_PAGE */ @@ -392,6 +394,10 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, return 0; } +static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } + +static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {} + #endif /* !CONFIG_HUGETLB_PAGE */ /* * hugepages at page global directory. If arch support diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a0a051f9ec293aaa6580ef5f475328b865d6e19e..6e4f58a0470a016afc0292b3588d017eabae7e2c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -89,6 +89,8 @@ int sysctl_hugetlb_pmem_allocall; /* Forward declaration */ static int hugetlb_acct_memory(struct hstate *h, long delta, struct hugetlbfs_inode_info *info); +static void hugetlb_unshare_pmds(struct vm_area_struct *vma, + unsigned long start, unsigned long end, bool take_locks); static inline void unlock_or_release_subpool(struct hugepage_subpool *spool, unsigned long irq_flags) @@ -4215,6 +4217,39 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr) return 0; } +void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) +{ + /* + * PMD sharing is only possible for PUD_SIZE-aligned address ranges + * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this + * split, unshare PMDs in the PUD_SIZE interval surrounding addr now. + * This function is called in the middle of a VMA split operation, with + * MM, VMA and rmap all write-locked to prevent concurrent page table + * walks (except hardware and gup_fast()). + */ + mmap_assert_write_locked(vma->vm_mm); + i_mmap_assert_write_locked(vma->vm_file->f_mapping); + + if (addr & ~PUD_MASK) { + unsigned long floor = addr & PUD_MASK; + unsigned long ceil = floor + PUD_SIZE; + + if (floor >= vma->vm_start && ceil <= vma->vm_end) { + /* + * Locking: + * Use take_locks=false here. + * The file rmap lock is already held. + * The hugetlb VMA lock can't be taken when we already + * hold the file rmap lock, and we don't need it because + * its purpose is to synchronize against concurrent page + * table walks, which are not possible thanks to the + * locks held by our caller. + */ + hugetlb_unshare_pmds(vma, floor, ceil, /* take_locks = */ false); + } + } +} + static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma) { struct hstate *hstate = hstate_vma(vma); @@ -6263,6 +6298,63 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason) } } +/* + * If @take_locks is false, the caller must ensure that no concurrent page table + * access can happen (except for gup_fast() and hardware page walks). + * If @take_locks is true, we take the hugetlb VMA lock (to lock out things like + * concurrent page fault handling) and the file rmap lock. + */ +static void hugetlb_unshare_pmds(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + bool take_locks) +{ + struct hstate *h = hstate_vma(vma); + unsigned long sz = huge_page_size(h); + struct mm_struct *mm = vma->vm_mm; + struct mmu_notifier_range range; + unsigned long address; + spinlock_t *ptl; + pte_t *ptep; + + if (!(vma->vm_flags & VM_MAYSHARE)) + return; + + if (start >= end) + return; + + flush_cache_range(vma, start, end); + /* + * No need to call adjust_range_if_pmd_sharing_possible(), because + * we have already done the PUD_SIZE alignment. + */ + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, + start, end); + mmu_notifier_invalidate_range_start(&range); + if (take_locks) { + i_mmap_lock_write(vma->vm_file->f_mapping); + } else { + i_mmap_assert_write_locked(vma->vm_file->f_mapping); + } + for (address = start; address < end; address += PUD_SIZE) { + ptep = huge_pte_offset(mm, address, sz); + if (!ptep) + continue; + ptl = huge_pte_lock(h, mm, ptep); + huge_pmd_unshare(mm, vma, &address, ptep); + spin_unlock(ptl); + } + flush_hugetlb_tlb_range(vma, start, end); + if (take_locks) { + i_mmap_unlock_write(vma->vm_file->f_mapping); + } + /* + * No need to call mmu_notifier_invalidate_range(), see + * Documentation/mm/mmu_notifier.rst. + */ + mmu_notifier_invalidate_range_end(&range); +} + #ifdef CONFIG_CMA static bool cma_reserve_called __initdata; diff --git a/mm/mmap.c b/mm/mmap.c index 6bf5d219aece1e4c07b3b80dbee620ca6941f5bc..c0262befcaae170b74b9684c4fe3182c32b3a014 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -842,7 +842,15 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, } } again: + /* + * Get rid of huge pages and shared page tables straddling the split + * boundary. + */ vma_adjust_trans_huge(orig_vma, start, end, adjust_next); + if (is_vm_hugetlb_page(orig_vma)) { + hugetlb_split(orig_vma, start); + hugetlb_split(orig_vma, end); + } if (file) { mapping = file->f_mapping; diff --git a/mm/zswap.c b/mm/zswap.c index d8e8d0084a22e87ff3b6927f5a78655d64b37811..fdddf004bd4f9c744dd75d8f62aa7f72796c6f96 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -135,8 +135,8 @@ struct crypto_acomp_ctx { struct crypto_acomp *acomp; struct acomp_req *req; struct crypto_wait wait; - u8 *dstmem; - struct mutex *mutex; + u8 *buffer; + struct mutex mutex; }; struct zswap_pool { @@ -402,76 +402,45 @@ static struct zswap_entry *zswap_entry_find_get(struct rb_root *root, /********************************* * per-cpu code **********************************/ -static DEFINE_PER_CPU(u8 *, zswap_dstmem); -/* - * If users dynamically change the zpool type and compressor at runtime, i.e. - * zswap is running, zswap can have more than one zpool on one cpu, but they - * are sharing dtsmem. So we need this mutex to be per-cpu. - */ -static DEFINE_PER_CPU(struct mutex *, zswap_mutex); - -static int zswap_dstmem_prepare(unsigned int cpu) -{ - struct mutex *mutex; - u8 *dst; - - dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); - if (!dst) - return -ENOMEM; - - mutex = kmalloc_node(sizeof(*mutex), GFP_KERNEL, cpu_to_node(cpu)); - if (!mutex) { - kfree(dst); - return -ENOMEM; - } - - mutex_init(mutex); - per_cpu(zswap_dstmem, cpu) = dst; - per_cpu(zswap_mutex, cpu) = mutex; - return 0; -} - -static int zswap_dstmem_dead(unsigned int cpu) -{ - struct mutex *mutex; - u8 *dst; - - mutex = per_cpu(zswap_mutex, cpu); - kfree(mutex); - per_cpu(zswap_mutex, cpu) = NULL; - - dst = per_cpu(zswap_dstmem, cpu); - kfree(dst); - per_cpu(zswap_dstmem, cpu) = NULL; - - return 0; -} - static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node) { struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); - struct crypto_acomp *acomp; - struct acomp_req *req; + struct crypto_acomp *acomp = NULL; + struct acomp_req *req = NULL; + u8 *buffer = NULL; + int ret; + + buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); + if (!buffer) { + ret = -ENOMEM; + goto fail; + } acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu)); if (IS_ERR(acomp)) { pr_err("could not alloc crypto acomp %s : %ld\n", pool->tfm_name, PTR_ERR(acomp)); - return PTR_ERR(acomp); + ret = PTR_ERR(acomp); + goto fail; } - acomp_ctx->acomp = acomp; - req = acomp_request_alloc(acomp_ctx->acomp); + req = acomp_request_alloc(acomp); if (!req) { pr_err("could not alloc crypto acomp_request %s\n", pool->tfm_name); - crypto_free_acomp(acomp_ctx->acomp); - return -ENOMEM; + ret = -ENOMEM; + goto fail; } - acomp_ctx->req = req; + /* + * Only hold the mutex after completing allocations, otherwise we may + * recurse into zswap through reclaim and attempt to hold the mutex + * again resulting in a deadlock. + */ + mutex_lock(&acomp_ctx->mutex); crypto_init_wait(&acomp_ctx->wait); + /* * if the backend of acomp is async zip, crypto_req_done() will wakeup * crypto_wait_req(); if the backend of acomp is scomp, the callback @@ -480,27 +449,77 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node) acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, crypto_req_done, &acomp_ctx->wait); - acomp_ctx->mutex = per_cpu(zswap_mutex, cpu); - acomp_ctx->dstmem = per_cpu(zswap_dstmem, cpu); - + acomp_ctx->buffer = buffer; + acomp_ctx->acomp = acomp; + acomp_ctx->req = req; + mutex_unlock(&acomp_ctx->mutex); return 0; + +fail: + if (acomp) + crypto_free_acomp(acomp); + kfree(buffer); + return ret; } static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node) { struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); + struct acomp_req *req; + struct crypto_acomp *acomp; + u8 *buffer; - if (!IS_ERR_OR_NULL(acomp_ctx)) { - if (!IS_ERR_OR_NULL(acomp_ctx->req)) - acomp_request_free(acomp_ctx->req); - if (!IS_ERR_OR_NULL(acomp_ctx->acomp)) - crypto_free_acomp(acomp_ctx->acomp); - } + if (IS_ERR_OR_NULL(acomp_ctx)) + return 0; + + mutex_lock(&acomp_ctx->mutex); + req = acomp_ctx->req; + acomp = acomp_ctx->acomp; + buffer = acomp_ctx->buffer; + acomp_ctx->req = NULL; + acomp_ctx->acomp = NULL; + acomp_ctx->buffer = NULL; + mutex_unlock(&acomp_ctx->mutex); + + /* + * Do the actual freeing after releasing the mutex to avoid subtle + * locking dependencies causing deadlocks. + */ + if (!IS_ERR_OR_NULL(req)) + acomp_request_free(req); + if (!IS_ERR_OR_NULL(acomp)) + crypto_free_acomp(acomp); + kfree(buffer); return 0; } +static struct crypto_acomp_ctx *acomp_ctx_get_cpu_lock(struct zswap_pool *pool) +{ + struct crypto_acomp_ctx *acomp_ctx; + + for (;;) { + acomp_ctx = raw_cpu_ptr(pool->acomp_ctx); + mutex_lock(&acomp_ctx->mutex); + if (likely(acomp_ctx->req)) + return acomp_ctx; + /* + * It is possible that we were migrated to a different CPU after + * getting the per-CPU ctx but before the mutex was acquired. If + * the old CPU got offlined, zswap_cpu_comp_dead() could have + * already freed ctx->req (among other things) and set it to + * NULL. Just try again on the new CPU that we ended up on. + */ + mutex_unlock(&acomp_ctx->mutex); + } +} + +static void acomp_ctx_put_unlock(struct crypto_acomp_ctx *acomp_ctx) +{ + mutex_unlock(&acomp_ctx->mutex); +} + /********************************* * pool functions **********************************/ @@ -592,7 +611,7 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor) struct zswap_pool *pool; char name[38]; /* 'zswap' + 32 char (max) num + \0 */ gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; - int ret; + int ret, cpu; if (!zswap_has_pool) { /* if either are unset, pool initialization failed, and we @@ -627,6 +646,9 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor) goto error; } + for_each_possible_cpu(cpu) + mutex_init(&per_cpu_ptr(pool->acomp_ctx, cpu)->mutex); + ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); if (ret) @@ -1009,14 +1031,14 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); dlen = PAGE_SIZE; - mutex_lock(acomp_ctx->mutex); + mutex_lock(&acomp_ctx->mutex); sg_init_one(&input, src, entry->length); sg_init_table(&output, 1); sg_set_page(&output, page, PAGE_SIZE, 0); acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen); ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); dlen = acomp_ctx->req->dlen; - mutex_unlock(acomp_ctx->mutex); + mutex_unlock(&acomp_ctx->mutex); BUG_ON(ret); BUG_ON(dlen != PAGE_SIZE); @@ -1173,15 +1195,17 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, } /* compress */ - acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); + acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool); - mutex_lock(acomp_ctx->mutex); - - dst = acomp_ctx->dstmem; + dst = acomp_ctx->buffer; sg_init_table(&input, 1); sg_set_page(&input, page, PAGE_SIZE, 0); - /* zswap_dstmem is of size (PAGE_SIZE * 2). Reflect same in sg_list */ + /* + * We need PAGE_SIZE * 2 here since there maybe over-compression case, + * and hardware-accelerators may won't check the dst buffer size, so + * giving the dst buffer with enough length to avoid buffer overflow. + */ sg_init_one(&output, dst, PAGE_SIZE * 2); acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen); /* @@ -1222,7 +1246,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, memcpy(buf, &zhdr, hlen); memcpy(buf + hlen, dst, dlen); zpool_unmap_handle(entry->pool->zpool, handle); - mutex_unlock(acomp_ctx->mutex); + acomp_ctx_put_unlock(acomp_ctx); /* populate entry */ entry->offset = offset; @@ -1250,7 +1274,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, return 0; put_dstmem: - mutex_unlock(acomp_ctx->mutex); + acomp_ctx_put_unlock(acomp_ctx); zswap_pool_put(entry->pool); freepage: zswap_entry_cache_free(entry); @@ -1311,19 +1335,18 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, zpool_unmap_handle(entry->pool->zpool, entry->handle); } - acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); - mutex_lock(acomp_ctx->mutex); + acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool); sg_init_one(&input, src, entry->length); sg_init_table(&output, 1); sg_set_page(&output, page, PAGE_SIZE, 0); acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen); ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); - mutex_unlock(acomp_ctx->mutex); if (zpool_can_sleep_mapped(entry->pool->zpool)) zpool_unmap_handle(entry->pool->zpool, entry->handle); else kfree(tmp); + acomp_ctx_put_unlock(acomp_ctx); BUG_ON(ret); @@ -1466,13 +1489,6 @@ static int zswap_setup(void) goto cache_fail; } - ret = cpuhp_setup_state(CPUHP_MM_ZSWP_MEM_PREPARE, "mm/zswap:prepare", - zswap_dstmem_prepare, zswap_dstmem_dead); - if (ret) { - pr_err("dstmem alloc failed\n"); - goto dstmem_fail; - } - ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE, "mm/zswap_pool:prepare", zswap_cpu_comp_prepare, @@ -1505,8 +1521,6 @@ static int zswap_setup(void) if (pool) zswap_pool_destroy(pool); hp_fail: - cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE); -dstmem_fail: zswap_entry_cache_destroy(); cache_fail: /* if built-in, we aren't unloaded on failure; don't allow use */