diff --git a/Documentation/networking/page_pool.rst b/Documentation/networking/page_pool.rst index 43088ddf95e4b59ec5272974ac65e82633a7534a..14cb1b727f959c4b22d493c50381036fcfab84c1 100644 --- a/Documentation/networking/page_pool.rst +++ b/Documentation/networking/page_pool.rst @@ -97,6 +97,47 @@ a page will cause no race conditions is enough. * page_pool_get_dma_dir(): Retrieve the stored DMA direction. +* page_pool_get_stats(): Retrieve statistics about the page_pool. This API + is only available if the kernel has been configured with + ``CONFIG_PAGE_POOL_STATS=y``. A pointer to a caller allocated ``struct + page_pool_stats`` structure is passed to this API which is filled in. The + caller can then report those stats to the user (perhaps via ethtool, + debugfs, etc.). See below for an example usage of this API. + +Stats API and structures +------------------------ +If the kernel is configured with ``CONFIG_PAGE_POOL_STATS=y``, the API +``page_pool_get_stats()`` and structures described below are available. It +takes a pointer to a ``struct page_pool`` and a pointer to a ``struct +page_pool_stats`` allocated by the caller. + +The API will fill in the provided ``struct page_pool_stats`` with +statistics about the page_pool. + +The stats structure has the following fields:: + + struct page_pool_stats { + struct page_pool_alloc_stats alloc_stats; + struct page_pool_recycle_stats recycle_stats; + }; + + +The ``struct page_pool_alloc_stats`` has the following fields: + * ``fast``: successful fast path allocations + * ``slow``: slow path order-0 allocations + * ``slow_high_order``: slow path high order allocations + * ``empty``: ptr ring is empty, so a slow path allocation was forced. + * ``refill``: an allocation which triggered a refill of the cache + * ``waive``: pages obtained from the ptr ring that cannot be added to + the cache due to a NUMA mismatch. + +The ``struct page_pool_recycle_stats`` has the following fields: + * ``cached``: recycling placed page in the page pool cache + * ``cache_full``: page pool cache was full + * ``ring``: page placed into the ptr ring + * ``ring_full``: page released from page pool because the ptr ring was full + * ``released_refcnt``: page released (and not recycled) because refcnt > 1 + Coding examples =============== @@ -149,6 +190,21 @@ NAPI poller } } +Stats +----- + +.. code-block:: c + + #ifdef CONFIG_PAGE_POOL_STATS + /* retrieve stats */ + struct page_pool_stats stats = { 0 }; + if (page_pool_get_stats(page_pool, &stats)) { + /* perhaps the driver reports statistics with ethool */ + ethtool_print_allocation_stats(&stats.alloc_stats); + ethtool_print_recycle_stats(&stats.recycle_stats); + } + #endif + Driver unload ------------- diff --git a/include/net/page_pool.h b/include/net/page_pool.h index b9ecabff20325087b6ccc3fd3c7d38a24da107f8..15298098302e088c036faa2402b2e78d1ef24f31 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -83,6 +83,79 @@ struct page_pool_params { unsigned int offset; /* DMA addr offset */ }; +#ifdef CONFIG_PAGE_POOL_STATS +struct page_pool_alloc_stats { + u64 fast; /* fast path allocations */ + u64 slow; /* slow-path order 0 allocations */ + u64 slow_high_order; /* slow-path high order allocations */ + u64 empty; /* failed refills due to empty ptr ring, forcing + * slow path allocation + */ + u64 refill; /* allocations via successful refill */ + u64 waive; /* failed refills due to numa zone mismatch */ +}; + +struct page_pool_recycle_stats { + u64 cached; /* recycling placed page in the cache. */ + u64 cache_full; /* cache was full */ + u64 ring; /* recycling placed page back into ptr ring */ + u64 ring_full; /* page was released from page-pool because + * PTR ring was full. + */ + u64 released_refcnt; /* page released because of elevated + * refcnt + */ +}; + +/* This struct wraps the above stats structs so users of the + * page_pool_get_stats API can pass a single argument when requesting the + * stats for the page pool. + */ +struct page_pool_stats { + struct page_pool_alloc_stats alloc_stats; + struct page_pool_recycle_stats recycle_stats; +}; + +/* To solve the KABI issue, introduce the new statistics structure + * to store the member alloc_stats and recycle_stats. + */ +struct page_pool_raw_stats { + /* these stats are incremented while in softirq context */ + struct page_pool_alloc_stats alloc_stats; + /* recycle stats are per-cpu to avoid locking */ + struct page_pool_recycle_stats __percpu *recycle_stats; +}; + +int page_pool_ethtool_stats_get_count(void); +u8 *page_pool_ethtool_stats_get_strings(u8 *data); +u64 *page_pool_ethtool_stats_get(u64 *data, void *stats); + +/* + * Drivers that wish to harvest page pool stats and report them to users + * (perhaps via ethtool, debugfs, or another mechanism) can allocate a + * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool. + */ +bool page_pool_get_stats(struct page_pool *pool, + struct page_pool_stats *stats); +#else + +static inline int page_pool_ethtool_stats_get_count(void) +{ + return 0; +} + +static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data) +{ + return data; +} + +static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) +{ + return data; +} + +#endif + struct page_pool { struct page_pool_params p; @@ -134,7 +207,11 @@ struct page_pool { u64 destroy_cnt; +#ifdef CONFIG_PAGE_POOL_STATS + KABI_USE(1, struct page_pool_raw_stats *stats) +#else KABI_RESERVE(1) +#endif }; struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); diff --git a/net/Kconfig b/net/Kconfig index a22c3fb885647127589cee0a50cd3f900b2120c2..232075ae15e232084ee33329c75cbc71f5c46e8f 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -436,6 +436,19 @@ config NET_DEVLINK config PAGE_POOL bool +config PAGE_POOL_STATS + default n + bool "Page pool stats" + depends on PAGE_POOL + help + Enable page pool statistics to track page allocation and recycling + in page pools. This option incurs additional CPU cost in allocation + and recycle paths and additional memory cost to store the statistics. + These statistics are only available if this option is enabled and if + the driver using the page pool supports exporting this data. + + If unsure, say N. + config FAILOVER tristate "Generic failover module" help diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 9b60e4301a44fbf3b16c39396b14674bd827e900..3cb6899894751df9f8c036e4b44bf4012b15c230 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -26,6 +26,116 @@ #define BIAS_MAX LONG_MAX +#ifdef CONFIG_PAGE_POOL_STATS +/* alloc_stat_inc is intended to be used in softirq context */ +#define alloc_stat_inc(pool, __stat) (pool->stats->alloc_stats.__stat++) +/* recycle_stat_inc is safe to use when preemption is possible. */ +#define recycle_stat_inc(pool, __stat) \ + do { \ + struct page_pool_recycle_stats __percpu *s = pool->stats->recycle_stats;\ + this_cpu_inc(s->__stat); \ + } while (0) + +#define recycle_stat_add(pool, __stat, val) \ + do { \ + struct page_pool_recycle_stats __percpu *s = pool->stats->recycle_stats;\ + this_cpu_add(s->__stat, val); \ + } while (0) + +/* workaround for macro ETH_GSTRING_LEN, for include the header file ethtool.h + * will cause KABI issue, so define a new one to replace it. + */ +#define PP_ETH_GSTRING_LEN 32 +static const char pp_stats[][PP_ETH_GSTRING_LEN] = { + "rx_pp_alloc_fast", + "rx_pp_alloc_slow", + "rx_pp_alloc_slow_ho", + "rx_pp_alloc_empty", + "rx_pp_alloc_refill", + "rx_pp_alloc_waive", + "rx_pp_recycle_cached", + "rx_pp_recycle_cache_full", + "rx_pp_recycle_ring", + "rx_pp_recycle_ring_full", + "rx_pp_recycle_released_ref", +}; + +bool page_pool_get_stats(struct page_pool *pool, + struct page_pool_stats *stats) +{ + int cpu = 0; + + if (!stats) + return false; + + /* The caller is responsible to initialize stats. */ + stats->alloc_stats.fast += pool->stats->alloc_stats.fast; + stats->alloc_stats.slow += pool->stats->alloc_stats.slow; + stats->alloc_stats.slow_high_order += pool->stats->alloc_stats.slow_high_order; + stats->alloc_stats.empty += pool->stats->alloc_stats.empty; + stats->alloc_stats.refill += pool->stats->alloc_stats.refill; + stats->alloc_stats.waive += pool->stats->alloc_stats.waive; + + for_each_possible_cpu(cpu) { + const struct page_pool_recycle_stats *pcpu = + per_cpu_ptr(pool->stats->recycle_stats, cpu); + + stats->recycle_stats.cached += pcpu->cached; + stats->recycle_stats.cache_full += pcpu->cache_full; + stats->recycle_stats.ring += pcpu->ring; + stats->recycle_stats.ring_full += pcpu->ring_full; + stats->recycle_stats.released_refcnt += pcpu->released_refcnt; + } + + return true; +} +EXPORT_SYMBOL(page_pool_get_stats); + +u8 *page_pool_ethtool_stats_get_strings(u8 *data) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(pp_stats); i++) { + memcpy(data, pp_stats[i], PP_ETH_GSTRING_LEN); + data += PP_ETH_GSTRING_LEN; + } + + return data; +} +EXPORT_SYMBOL(page_pool_ethtool_stats_get_strings); + +int page_pool_ethtool_stats_get_count(void) +{ + return ARRAY_SIZE(pp_stats); +} +EXPORT_SYMBOL(page_pool_ethtool_stats_get_count); + +u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) +{ + struct page_pool_stats *pool_stats = stats; + + *data++ = pool_stats->alloc_stats.fast; + *data++ = pool_stats->alloc_stats.slow; + *data++ = pool_stats->alloc_stats.slow_high_order; + *data++ = pool_stats->alloc_stats.empty; + *data++ = pool_stats->alloc_stats.refill; + *data++ = pool_stats->alloc_stats.waive; + *data++ = pool_stats->recycle_stats.cached; + *data++ = pool_stats->recycle_stats.cache_full; + *data++ = pool_stats->recycle_stats.ring; + *data++ = pool_stats->recycle_stats.ring_full; + *data++ = pool_stats->recycle_stats.released_refcnt; + + return data; +} +EXPORT_SYMBOL(page_pool_ethtool_stats_get); + +#else +#define alloc_stat_inc(pool, __stat) +#define recycle_stat_inc(pool, __stat) +#define recycle_stat_add(pool, __stat, val) +#endif + static int page_pool_init(struct page_pool *pool, const struct page_pool_params *params) { @@ -75,8 +185,17 @@ static int page_pool_init(struct page_pool *pool, */ } - if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) +#ifdef CONFIG_PAGE_POOL_STATS + pool->stats = kzalloc_node(sizeof(*pool->stats), GFP_KERNEL, params->nid); + if (!pool->stats) return -ENOMEM; + pool->stats->recycle_stats = alloc_percpu(struct page_pool_recycle_stats); + if (!pool->stats->recycle_stats) + goto out; +#endif + + if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) + goto out; atomic_set(&pool->pages_state_release_cnt, 0); @@ -87,6 +206,13 @@ static int page_pool_init(struct page_pool *pool, get_device(pool->p.dev); return 0; +out: +#ifdef CONFIG_PAGE_POOL_STATS + free_percpu(pool->stats->recycle_stats); + kfree(pool->stats); + pool->stats = NULL; +#endif + return -ENOMEM; } struct page_pool *page_pool_create(const struct page_pool_params *params) @@ -119,8 +245,10 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) int pref_nid; /* preferred NUMA node */ /* Quicker fallback, avoid locks when ring is empty */ - if (__ptr_ring_empty(r)) + if (__ptr_ring_empty(r)) { + alloc_stat_inc(pool, empty); return NULL; + } /* Softirq guarantee CPU and thus NUMA node is stable. This, * assumes CPU refilling driver RX-ring will also run RX-NAPI. @@ -150,14 +278,17 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) * This limit stress on page buddy alloactor. */ page_pool_return_page(pool, page); + alloc_stat_inc(pool, waive); page = NULL; break; } } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL); /* Return last page */ - if (likely(pool->alloc.count > 0)) + if (likely(pool->alloc.count > 0)) { page = pool->alloc.cache[--pool->alloc.count]; + alloc_stat_inc(pool, refill); + } spin_unlock(&r->consumer_lock); return page; @@ -172,6 +303,7 @@ static struct page *__page_pool_get_cached(struct page_pool *pool) if (likely(pool->alloc.count)) { /* Fast-path */ page = pool->alloc.cache[--pool->alloc.count]; + alloc_stat_inc(pool, fast); } else { page = page_pool_refill_alloc_cache(pool); } @@ -243,6 +375,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, return NULL; } + alloc_stat_inc(pool, slow_high_order); page_pool_set_pp_info(pool, page); /* Track how many pages are held 'in-flight' */ @@ -297,10 +430,12 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, } /* Return last page */ - if (likely(pool->alloc.count > 0)) + if (likely(pool->alloc.count > 0)) { page = pool->alloc.cache[--pool->alloc.count]; - else + alloc_stat_inc(pool, slow); + } else { page = NULL; + } /* When page just alloc'ed is should/must have refcnt 1. */ return page; @@ -398,7 +533,12 @@ static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page) else ret = ptr_ring_produce_bh(&pool->ring, page); - return (ret == 0) ? true : false; + if (!ret) { + recycle_stat_inc(pool, ring); + return true; + } + + return false; } /* Only allow direct recycling in special circumstances, into the @@ -409,11 +549,14 @@ static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page) static bool page_pool_recycle_in_cache(struct page *page, struct page_pool *pool) { - if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) + if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) { + recycle_stat_inc(pool, cache_full); return false; + } /* Caller MUST have verified/know (page_ref_count(page) == 1) */ pool->alloc.cache[pool->alloc.count++] = page; + recycle_stat_inc(pool, cached); return true; } @@ -468,6 +611,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, * doing refcnt based recycle tricks, meaning another process * will be invoking put_page. */ + recycle_stat_inc(pool, released_refcnt); /* Do not replace this with page_pool_return_page() */ page_pool_release_page(pool, page); put_page(page); @@ -481,6 +625,7 @@ void page_pool_put_page(struct page_pool *pool, struct page *page, page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct); if (page && !page_pool_recycle_in_ring(pool, page)) { /* Cache full, fallback to free pages */ + recycle_stat_inc(pool, ring_full); page_pool_return_page(pool, page); } } @@ -507,9 +652,13 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, /* Bulk producer into ptr_ring page_pool cache */ page_pool_ring_lock(pool); for (i = 0; i < bulk_len; i++) { - if (__ptr_ring_produce(&pool->ring, data[i])) - break; /* ring full */ + if (__ptr_ring_produce(&pool->ring, data[i])) { + /* ring full */ + recycle_stat_inc(pool, ring_full); + break; + } } + recycle_stat_add(pool, ring, i); page_pool_ring_unlock(pool); /* Hopefully all pages was return into ptr_ring */ @@ -575,8 +724,10 @@ struct page *page_pool_alloc_frag(struct page_pool *pool, if (page && *offset + size > max_size) { page = page_pool_drain_frag(pool, page); - if (page) + if (page) { + alloc_stat_inc(pool, fast); goto frag_reset; + } } if (!page) { @@ -598,6 +749,7 @@ struct page *page_pool_alloc_frag(struct page_pool *pool, pool->frag_users++; pool->frag_offset = *offset + size; + alloc_stat_inc(pool, fast); return page; } EXPORT_SYMBOL(page_pool_alloc_frag); @@ -627,6 +779,10 @@ static void page_pool_free(struct page_pool *pool) if (pool->p.flags & PP_FLAG_DMA_MAP) put_device(pool->p.dev); +#ifdef CONFIG_PAGE_POOL_STATS + free_percpu(pool->stats->recycle_stats); + kfree(pool->stats); +#endif kfree(pool); }