diff --git a/fs/file.c b/fs/file.c index fdada5d563c335f10698728dc5b082a663d7e227..f8a5af0d06f14973c1ea7c6dab501e7b2aa2b6a6 100644 --- a/fs/file.c +++ b/fs/file.c @@ -482,6 +482,15 @@ static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start) unsigned int maxfd = fdt->max_fds; /* always multiple of BITS_PER_LONG */ unsigned int maxbit = maxfd / BITS_PER_LONG; unsigned int bitbit = start / BITS_PER_LONG; + unsigned int bit; + + /* + * Try to avoid looking at the second level bitmap + */ + bit = find_next_zero_bit(&fdt->open_fds[bitbit], BITS_PER_LONG, + start & (BITS_PER_LONG - 1)); + if (bit < BITS_PER_LONG) + return bit + bitbit * BITS_PER_LONG; bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG; if (bitbit >= maxfd) @@ -508,7 +517,7 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) if (fd < files->next_fd) fd = files->next_fd; - if (fd < fdt->max_fds) + if (likely(fd < fdt->max_fds)) fd = find_next_fd(fdt, fd); /* @@ -516,19 +525,22 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) * will limit the total number of files that can be opened. */ error = -EMFILE; - if (fd >= end) + if (unlikely(fd >= end)) goto out; - error = expand_files(files, fd); - if (error < 0) - goto out; + if (unlikely(fd >= fdt->max_fds)) { + error = expand_files(files, fd); + if (error < 0) + goto out; + + /* + * If we needed to expand the fs array we + * might have blocked - try again. + */ + if (error) + goto repeat; + } - /* - * If we needed to expand the fs array we - * might have blocked - try again. - */ - if (error) - goto repeat; if (files_cg_alloc_fd(files, 1)) { error = -EMFILE; goto out; @@ -543,13 +555,6 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) else __clear_close_on_exec(fd, fdt); error = fd; -#if 1 - /* Sanity check */ - if (rcu_access_pointer(fdt->fd[fd]) != NULL) { - printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); - rcu_assign_pointer(fdt->fd[fd], NULL); - } -#endif out: spin_unlock(&files->file_lock); @@ -614,7 +619,7 @@ void fd_install(unsigned int fd, struct file *file) rcu_read_unlock_sched(); spin_lock(&files->file_lock); fdt = files_fdtable(files); - BUG_ON(fdt->fd[fd] != NULL); + WARN_ON(fdt->fd[fd] != NULL); rcu_assign_pointer(fdt->fd[fd], file); spin_unlock(&files->file_lock); return; diff --git a/fs/namei.c b/fs/namei.c index deb67b07776e10a6c6598f3566bd961edf103695..13d1ca4f6842dbf9603e57c8ff1e1f64605a3db6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3433,6 +3433,9 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, return dentry; } + if (open_flag & O_CREAT) + audit_inode(nd->name, dir, AUDIT_INODE_PARENT); + /* * Checking write permission is tricky, bacuse we don't know if we are * going to actually need it: O_CREAT opens should work as long as the @@ -3503,6 +3506,42 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, return ERR_PTR(error); } +static inline bool trailing_slashes(struct nameidata *nd) +{ + return (bool)nd->last.name[nd->last.len]; +} + +static struct dentry *lookup_fast_for_open(struct nameidata *nd, int open_flag) +{ + struct dentry *dentry; + + if (open_flag & O_CREAT) { + if (trailing_slashes(nd)) + return ERR_PTR(-EISDIR); + + /* Don't bother on an O_EXCL create */ + if (open_flag & O_EXCL) + return NULL; + } + + if (trailing_slashes(nd)) + nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; + + dentry = lookup_fast(nd); + if (IS_ERR_OR_NULL(dentry)) + return dentry; + + if (open_flag & O_CREAT) { + /* Discard negative dentries. Need inode_lock to do the create */ + if (!dentry->d_inode) { + if (!(nd->flags & LOOKUP_RCU)) + dput(dentry); + dentry = NULL; + } + } + return dentry; +} + static const char *open_last_lookups(struct nameidata *nd, struct file *file, const struct open_flags *op) { @@ -3520,27 +3559,21 @@ static const char *open_last_lookups(struct nameidata *nd, return handle_dots(nd, nd->last_type); } - if (!(open_flag & O_CREAT)) { - if (nd->last.name[nd->last.len]) - nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; - /* we _can_ be in RCU mode here */ - dentry = lookup_fast(nd); - if (IS_ERR(dentry)) - return ERR_CAST(dentry); - if (likely(dentry)) - goto finish_lookup; + /* We _can_ be in RCU mode here */ + dentry = lookup_fast_for_open(nd, open_flag); + if (IS_ERR(dentry)) + return ERR_CAST(dentry); + if (likely(dentry)) + goto finish_lookup; + + if (!(open_flag & O_CREAT)) { BUG_ON(nd->flags & LOOKUP_RCU); } else { - /* create side of things */ if (nd->flags & LOOKUP_RCU) { if (!try_to_unlazy(nd)) return ERR_PTR(-ECHILD); } - audit_inode(nd->name, dir, AUDIT_INODE_PARENT); - /* trailing slashes? */ - if (unlikely(nd->last.name[nd->last.len])) - return ERR_PTR(-EISDIR); } if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 5328e08723d7560124fdf529fb383a479d3739f9..d59d16cf4399e24a71cd30fca7269147c7239d03 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -7618,14 +7618,14 @@ static void mt_validate_nulls(struct maple_tree *mt) * 2. The gap is correctly set in the parents */ void mt_validate(struct maple_tree *mt) + __must_hold(mas->tree->ma_lock) { unsigned char end; MA_STATE(mas, mt, 0, 0); - rcu_read_lock(); mas_start(&mas); if (!mas_is_active(&mas)) - goto done; + return; while (!mte_is_leaf(mas.node)) mas_descend(&mas); @@ -7646,9 +7646,6 @@ void mt_validate(struct maple_tree *mt) mas_dfs_postorder(&mas, ULONG_MAX); } mt_validate_nulls(mt); -done: - rcu_read_unlock(); - } EXPORT_SYMBOL_GPL(mt_validate); diff --git a/mm/internal.h b/mm/internal.h index 37c17f921daeab31467d5f6c685c900ecde1063d..0478e5dab55bcbd85944c544938ab9247a3a9235 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1446,4 +1446,14 @@ void __meminit __init_single_page(struct page *page, unsigned long pfn, #ifdef CONFIG_PAGE_CACHE_LIMIT unsigned long shrink_memory(unsigned long nr_to_reclaim, bool may_swap); #endif /* CONFIG_PAGE_CACHE_LIMIT */ + +struct unlink_vma_file_batch { + int count; + struct vm_area_struct *vmas[8]; +}; + +void unlink_file_vma_batch_init(struct unlink_vma_file_batch *); +void unlink_file_vma_batch_add(struct unlink_vma_file_batch *, struct vm_area_struct *); +void unlink_file_vma_batch_final(struct unlink_vma_file_batch *); + #endif /* __MM_INTERNAL_H */ diff --git a/mm/memory.c b/mm/memory.c index e248b83384174a0c52ce4b103048eb0745d19fb3..a4f7066d1e68dbe840448252260713b34375f3e3 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -368,6 +368,8 @@ void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas, struct vm_area_struct *vma, unsigned long floor, unsigned long ceiling, bool mm_wr_locked) { + struct unlink_vma_file_batch vb; + do { unsigned long addr = vma->vm_start; struct vm_area_struct *next; @@ -387,12 +389,15 @@ void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas, if (mm_wr_locked) vma_start_write(vma); unlink_anon_vmas(vma); - unlink_file_vma(vma); if (is_vm_hugetlb_page(vma)) { + unlink_file_vma(vma); hugetlb_free_pgd_range(tlb, addr, vma->vm_end, floor, next ? next->vm_start : ceiling); } else { + unlink_file_vma_batch_init(&vb); + unlink_file_vma_batch_add(&vb, vma); + /* * Optimization: gather nearby vmas into one call down */ @@ -405,8 +410,9 @@ void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas, if (mm_wr_locked) vma_start_write(vma); unlink_anon_vmas(vma); - unlink_file_vma(vma); + unlink_file_vma_batch_add(&vb, vma); } + unlink_file_vma_batch_final(&vb); free_pgd_range(tlb, addr, vma->vm_end, floor, next ? next->vm_start : ceiling); } diff --git a/mm/mmap.c b/mm/mmap.c index 07ffb6c37b9602d624b400f15b14861d9a5bfdb8..c898103c6d72e1d2975e5a0d1f958c67871dceba 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -132,6 +132,47 @@ void unlink_file_vma(struct vm_area_struct *vma) } } +void unlink_file_vma_batch_init(struct unlink_vma_file_batch *vb) +{ + vb->count = 0; +} + +static void unlink_file_vma_batch_process(struct unlink_vma_file_batch *vb) +{ + struct address_space *mapping; + int i; + + mapping = vb->vmas[0]->vm_file->f_mapping; + i_mmap_lock_write(mapping); + for (i = 0; i < vb->count; i++) { + VM_WARN_ON_ONCE(vb->vmas[i]->vm_file->f_mapping != mapping); + __remove_shared_vm_struct(vb->vmas[i], mapping); + } + i_mmap_unlock_write(mapping); + + unlink_file_vma_batch_init(vb); +} + +void unlink_file_vma_batch_add(struct unlink_vma_file_batch *vb, + struct vm_area_struct *vma) +{ + if (vma->vm_file == NULL) + return; + + if ((vb->count > 0 && vb->vmas[0]->vm_file != vma->vm_file) || + vb->count == ARRAY_SIZE(vb->vmas)) + unlink_file_vma_batch_process(vb); + + vb->vmas[vb->count] = vma; + vb->count++; +} + +void unlink_file_vma_batch_final(struct unlink_vma_file_batch *vb) +{ + if (vb->count > 0) + unlink_file_vma_batch_process(vb); +} + /* * Close a vm structure and free it. */ diff --git a/mm/rmap.c b/mm/rmap.c index de385e29916b228b5edf8ac4e86b2ca0b0eb4316..dbcdac9bb7a38e08228f068d5799d34921e052be 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1131,7 +1131,7 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio, int *nr_pmdmapped) { atomic_t *mapped = &folio->_nr_pages_mapped; - int first, nr = 0; + int first = 0, nr = 0; __folio_rmap_sanity_checks(folio, page, nr_pages, level); @@ -1143,13 +1143,13 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio, } do { - first = atomic_inc_and_test(&page->_mapcount); - if (first) { - first = atomic_inc_return_relaxed(mapped); - if (first < ENTIRELY_MAPPED) - nr++; - } + first += atomic_inc_and_test(&page->_mapcount); } while (page++, --nr_pages > 0); + + if (first && + atomic_add_return_relaxed(first, mapped) < ENTIRELY_MAPPED) + nr = first; + break; case RMAP_LEVEL_PMD: first = atomic_inc_and_test(&folio->_entire_mapcount); @@ -1489,7 +1489,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, enum rmap_level level) { atomic_t *mapped = &folio->_nr_pages_mapped; - int last, nr = 0, nr_pmdmapped = 0; + int last = 0, nr = 0, nr_pmdmapped = 0; bool partially_mapped = false; __folio_rmap_sanity_checks(folio, page, nr_pages, level); @@ -1502,14 +1502,13 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, } do { - last = atomic_add_negative(-1, &page->_mapcount); - if (last) { - last = atomic_dec_return_relaxed(mapped); - if (last < ENTIRELY_MAPPED) - nr++; - } + last += atomic_add_negative(-1, &page->_mapcount); } while (page++, --nr_pages > 0); + if (last && + atomic_sub_return_relaxed(last, mapped) < ENTIRELY_MAPPED) + nr = last; + partially_mapped = nr && atomic_read(mapped); break; case RMAP_LEVEL_PMD: