diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 14b87bd77376a5a98af95fe7a9583fbc468ef4f1..4be708677ce66180f8adebb09a2c4e468e75a981 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1091,7 +1091,7 @@ static int hstate_next_node_to_alloc(struct hstate *h, } /* - * helper for free_pool_huge_page() - return the previously saved + * helper for remove_pool_huge_page() - return the previously saved * node ["this node"] from which to free a huge page. Advance the * next node id whether or not we find a free huge page to free so * that the next attempt to free addresses the next node. @@ -1236,24 +1236,33 @@ static inline void destroy_compound_gigantic_page(struct page *page, unsigned int order) { } #endif -static void update_and_free_page(struct hstate *h, struct page *page) +/* + * Remove hugetlb page from lists, and update dtor so that page appears + * as just a compound page. A reference is held on the page. + * + * Must be called with hugetlb lock held. + */ +static void remove_hugetlb_page(struct hstate *h, struct page *page, + bool adjust_surplus) { - int i; - struct page *subpage = page; + int nid = page_to_nid(page); + + VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page); if (hstate_is_gigantic(h) && !gigantic_page_supported()) return; - h->nr_huge_pages--; - h->nr_huge_pages_node[page_to_nid(page)]--; - for (i = 0; i < pages_per_huge_page(h); - i++, subpage = mem_map_next(subpage, page, i)) { - subpage->flags &= ~(1 << PG_locked | 1 << PG_error | - 1 << PG_referenced | 1 << PG_dirty | - 1 << PG_active | 1 << PG_private | - 1 << PG_writeback); + list_del(&page->lru); + + if (PageHugeFreed(page)) { + h->free_huge_pages--; + h->free_huge_pages_node[nid]--; } - VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page); + if (adjust_surplus) { + h->surplus_huge_pages--; + h->surplus_huge_pages_node[nid]--; + } + /* * Very subtle * @@ -1272,16 +1281,49 @@ static void update_and_free_page(struct hstate *h, struct page *page) * after update_and_free_page is called. */ set_page_refcounted(page); - if (hstate_is_gigantic(h)) { + if (hstate_is_gigantic(h)) set_compound_page_dtor(page, NULL_COMPOUND_DTOR); + else + set_compound_page_dtor(page, COMPOUND_PAGE_DTOR); + + h->nr_huge_pages--; + h->nr_huge_pages_node[nid]--; +} + +static void update_and_free_page(struct hstate *h, struct page *page) +{ + int i; + struct page *subpage = page; + + if (hstate_is_gigantic(h) && !gigantic_page_supported()) + return; + + for (i = 0; i < pages_per_huge_page(h); + i++, subpage = mem_map_next(subpage, page, i)) { + subpage->flags &= ~(1 << PG_locked | 1 << PG_error | + 1 << PG_referenced | 1 << PG_dirty | + 1 << PG_active | 1 << PG_private | + 1 << PG_writeback); + } + + if (hstate_is_gigantic(h)) { destroy_compound_gigantic_page(page, huge_page_order(h)); free_gigantic_page(page, huge_page_order(h)); } else { - set_compound_page_dtor(page, COMPOUND_PAGE_DTOR); __free_pages(page, huge_page_order(h)); } } +static void update_and_free_pages_bulk(struct hstate *h, struct list_head *list) +{ + struct page *page, *t_page; + + list_for_each_entry_safe(page, t_page, list, lru) { + update_and_free_page(h, page); + cond_resched(); + } +} + struct hstate *size_to_hstate(unsigned long size) { struct hstate *h; @@ -1451,20 +1493,20 @@ void free_huge_page(struct page *page) if (PageHugeTemporary(page)) { sp_memcg_uncharge_hpage(page); - list_del(&page->lru); ClearPageHugeTemporary(page); + remove_hugetlb_page(h, page, false); + spin_unlock_irqrestore(&hugetlb_lock, flags); update_and_free_page(h, page); } else if (h->surplus_huge_pages_node[nid]) { /* remove the page from active list */ - list_del(&page->lru); + remove_hugetlb_page(h, page, true); + spin_unlock_irqrestore(&hugetlb_lock, flags); update_and_free_page(h, page); - h->surplus_huge_pages--; - h->surplus_huge_pages_node[nid]--; } else { arch_clear_hugepage_flags(page); enqueue_huge_page(h, page); + spin_unlock_irqrestore(&hugetlb_lock, flags); } - spin_unlock_irqrestore(&hugetlb_lock, flags); } static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) @@ -1718,16 +1760,18 @@ static int alloc_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, } /* - * Free huge page from pool from next node to free. - * Attempt to keep persistent huge pages more or less - * balanced over allowed nodes. + * Remove huge page from pool from next node to free. Attempt to keep + * persistent huge pages more or less balanced over allowed nodes. + * This routine only 'removes' the hugetlb page. The caller must make + * an additional call to free the page to low level allocators. * Called with hugetlb_lock locked. */ -static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, - bool acct_surplus) +static struct page *remove_pool_huge_page(struct hstate *h, + nodemask_t *nodes_allowed, + bool acct_surplus) { int nr_nodes, node; - int ret = 0; + struct page *page = NULL; for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) { /* @@ -1736,23 +1780,14 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, */ if ((!acct_surplus || h->surplus_huge_pages_node[node]) && !list_empty(&h->hugepage_freelists[node])) { - struct page *page = - list_entry(h->hugepage_freelists[node].next, + page = list_entry(h->hugepage_freelists[node].next, struct page, lru); - list_del(&page->lru); - h->free_huge_pages--; - h->free_huge_pages_node[node]--; - if (acct_surplus) { - h->surplus_huge_pages--; - h->surplus_huge_pages_node[node]--; - } - update_and_free_page(h, page); - ret = 1; + remove_hugetlb_page(h, page, acct_surplus); break; } } - return ret; + return page; } /* @@ -1787,7 +1822,6 @@ int dissolve_free_huge_page(struct page *page) if (!page_count(page)) { struct page *head = compound_head(page); struct hstate *h = page_hstate(head); - int nid = page_to_nid(head); if (h->free_huge_pages - h->resv_huge_pages == 0) goto out; @@ -1818,12 +1852,11 @@ int dissolve_free_huge_page(struct page *page) SetPageHWPoison(page); ClearPageHWPoison(head); } - list_del(&head->lru); - h->free_huge_pages--; - h->free_huge_pages_node[nid]--; + remove_hugetlb_page(h, head, false); h->max_huge_pages--; + spin_unlock_irq(&hugetlb_lock); update_and_free_page(h, head); - rc = 0; + return 0; } out: spin_unlock_irq(&hugetlb_lock); @@ -2102,17 +2135,16 @@ static int gather_surplus_pages(struct hstate *h, long delta) * to the associated reservation map. * 2) Free any unused surplus pages that may have been allocated to satisfy * the reservation. As many as unused_resv_pages may be freed. - * - * Called with hugetlb_lock held. However, the lock could be dropped (and - * reacquired) during calls to cond_resched_lock. Whenever dropping the lock, - * we must make sure nobody else can claim pages we are in the process of - * freeing. Do this by ensuring resv_huge_page always is greater than the - * number of huge pages we plan to free when dropping the lock. */ static void return_unused_surplus_pages(struct hstate *h, unsigned long unused_resv_pages) { unsigned long nr_pages; + struct page *page; + LIST_HEAD(page_list); + + /* Uncommit the reservation */ + h->resv_huge_pages -= unused_resv_pages; /* Cannot return gigantic pages currently */ if (hstate_is_gigantic(h)) @@ -2129,24 +2161,21 @@ static void return_unused_surplus_pages(struct hstate *h, * evenly across all nodes with memory. Iterate across these nodes * until we can no longer free unreserved surplus pages. This occurs * when the nodes with surplus pages have no free pages. - * free_pool_huge_page() will balance the the freed pages across the + * remove_pool_huge_page() will balance the the freed pages across the * on-line nodes with memory and will handle the hstate accounting. - * - * Note that we decrement resv_huge_pages as we free the pages. If - * we drop the lock, resv_huge_pages will still be sufficiently large - * to cover subsequent pages we may free. */ while (nr_pages--) { - h->resv_huge_pages--; - unused_resv_pages--; - if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1)) + page = remove_pool_huge_page(h, &node_states[N_MEMORY], 1); + if (!page) goto out; - cond_resched_lock(&hugetlb_lock); + + list_add(&page->lru, &page_list); } out: - /* Fully uncommit the reservation */ - h->resv_huge_pages -= unused_resv_pages; + spin_unlock_irq(&hugetlb_lock); + update_and_free_pages_bulk(h, &page_list); + spin_lock_irq(&hugetlb_lock); } @@ -2707,24 +2736,31 @@ static void try_to_free_low(struct hstate *h, unsigned long count, nodemask_t *nodes_allowed) { int i; + LIST_HEAD(page_list); if (hstate_is_gigantic(h)) return; + /* + * Collect pages to be freed on a list, and free after dropping lock + */ for_each_node_mask(i, *nodes_allowed) { struct page *page, *next; struct list_head *freel = &h->hugepage_freelists[i]; list_for_each_entry_safe(page, next, freel, lru) { if (count >= h->nr_huge_pages) - return; + goto out; if (PageHighMem(page)) continue; - list_del(&page->lru); - update_and_free_page(h, page); - h->free_huge_pages--; - h->free_huge_pages_node[page_to_nid(page)]--; + remove_hugetlb_page(h, page, false); + list_add(&page->lru, &page_list); } } + +out: + spin_unlock_irq(&hugetlb_lock); + update_and_free_pages_bulk(h, &page_list); + spin_lock_irq(&hugetlb_lock); } #else static inline void try_to_free_low(struct hstate *h, unsigned long count, @@ -2770,6 +2806,8 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count, int nid, nodemask_t *nodes_allowed) { unsigned long min_count, ret; + struct page *page; + LIST_HEAD(page_list); NODEMASK_ALLOC(nodemask_t, node_alloc_noretry, GFP_KERNEL); /* @@ -2865,11 +2903,22 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count, min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages; min_count = max(count, min_count); try_to_free_low(h, min_count, nodes_allowed); + + /* + * Collect pages to be removed on list without dropping lock + */ while (min_count < persistent_huge_pages(h)) { - if (!free_pool_huge_page(h, nodes_allowed, 0)) + page = remove_pool_huge_page(h, nodes_allowed, 0); + if (!page) break; - cond_resched_lock(&hugetlb_lock); + + list_add(&page->lru, &page_list); } + /* free the pages after dropping lock */ + spin_unlock_irq(&hugetlb_lock); + update_and_free_pages_bulk(h, &page_list); + spin_lock_irq(&hugetlb_lock); + while (count < persistent_huge_pages(h)) { if (!adjust_pool_surplus(h, nodes_allowed, 1)) break;