diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 5e76af742c807afd3630e630156ab7962c26acdb..c5bf45dead9dd546196ad09a7d8ba7ca0403b2dd 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -35,6 +35,12 @@ #define PCPU_BITMAP_BLOCK_BITS (PCPU_BITMAP_BLOCK_SIZE >> \ PCPU_MIN_ALLOC_SHIFT) +#ifdef CONFIG_RANDOM_KMALLOC_CACHES +#define PERCPU_DYNAMIC_SIZE_SHIFT 12 +#else +#define PERCPU_DYNAMIC_SIZE_SHIFT 10 +#endif + /* * Percpu allocator can serve percpu allocations before slab is * initialized which allows slab to depend on the percpu allocator. @@ -43,7 +49,7 @@ * larger than PERCPU_DYNAMIC_EARLY_SIZE. */ #define PERCPU_DYNAMIC_EARLY_SLOTS 128 -#define PERCPU_DYNAMIC_EARLY_SIZE (12 << 10) +#define PERCPU_DYNAMIC_EARLY_SIZE (12 << PERCPU_DYNAMIC_SIZE_SHIFT) /* * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy @@ -57,9 +63,9 @@ * intelligent way to determine this would be nice. */ #if BITS_PER_LONG > 32 -#define PERCPU_DYNAMIC_RESERVE (28 << 10) +#define PERCPU_DYNAMIC_RESERVE (28 << PERCPU_DYNAMIC_SIZE_SHIFT) #else -#define PERCPU_DYNAMIC_RESERVE (20 << 10) +#define PERCPU_DYNAMIC_RESERVE (20 << PERCPU_DYNAMIC_SIZE_SHIFT) #endif extern void *pcpu_base_addr; diff --git a/include/linux/slab.h b/include/linux/slab.h index b27f7372f365664ae9a05a9ee036f233722cf299..d78d8fbd8c169486e3eb81e8bb8b992477756b81 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -17,6 +17,7 @@ #include #include #include +#include /* @@ -93,6 +94,18 @@ /* Avoid kmemleak tracing */ #define SLAB_NOLEAKTRACE ((slab_flags_t __force)0x00800000U) +/* + * Prevent merging with compatible kmem caches. This flag should be used + * cautiously. Valid use cases: + * + * - caches created for self-tests (e.g. kunit) + * - general caches created and used by a subsystem, only when a + * (subsystem-specific) debug option is enabled + * - performance critical caches, should be very rare and consulted with slab + * maintainers, and not used together with CONFIG_SLUB_TINY + */ +#define SLAB_NO_MERGE ((slab_flags_t __force)0x01000000U) + /* Fault injection mark */ #ifdef CONFIG_FAILSLAB # define SLAB_FAILSLAB ((slab_flags_t __force)0x02000000U) @@ -298,12 +311,26 @@ static inline void __check_heap_object(const void *ptr, unsigned long n, #define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \ (KMALLOC_MIN_SIZE) : 16) +#ifdef CONFIG_RANDOM_KMALLOC_CACHES +#define RANDOM_KMALLOC_CACHES_NR 15 // # of cache copies +#else +#define RANDOM_KMALLOC_CACHES_NR 0 +#endif + /* * Whenever changing this, take care of that kmalloc_type() and * create_kmalloc_caches() still work as intended. */ enum kmalloc_cache_type { KMALLOC_NORMAL = 0, + /* + * This config control, not present in Linux mainline, is just for + * kABI maintenance. + */ +#ifdef CONFIG_RANDOM_KMALLOC_CACHES + KMALLOC_RANDOM_START = KMALLOC_NORMAL, + KMALLOC_RANDOM_END = KMALLOC_RANDOM_START + RANDOM_KMALLOC_CACHES_NR, +#endif KMALLOC_RECLAIM, #ifdef CONFIG_ZONE_DMA KMALLOC_DMA, @@ -315,7 +342,9 @@ enum kmalloc_cache_type { extern struct kmem_cache * kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1]; -static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags) +extern unsigned long random_kmalloc_seed; + +static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, unsigned long caller) { #ifdef CONFIG_ZONE_DMA /* @@ -323,7 +352,13 @@ static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags) * with a single branch for both flags. */ if (likely((flags & (__GFP_DMA | __GFP_RECLAIMABLE)) == 0)) +#ifdef CONFIG_RANDOM_KMALLOC_CACHES + /* RANDOM_KMALLOC_CACHES_NR (=15) copies + the KMALLOC_NORMAL */ + return KMALLOC_RANDOM_START + hash_64(caller ^ random_kmalloc_seed, + ilog2(RANDOM_KMALLOC_CACHES_NR + 1)); +#else return KMALLOC_NORMAL; +#endif /* * At least one of the flags has to be set. If both are, __GFP_DMA @@ -331,7 +366,13 @@ static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags) */ return flags & __GFP_DMA ? KMALLOC_DMA : KMALLOC_RECLAIM; #else - return flags & __GFP_RECLAIMABLE ? KMALLOC_RECLAIM : KMALLOC_NORMAL; + return flags & __GFP_RECLAIMABLE ? KMALLOC_RECLAIM : +#ifdef CONFIG_RANDOM_KMALLOC_CACHES + KMALLOC_RANDOM_START + hash_64(caller ^ random_kmalloc_seed, + ilog2(RANDOM_KMALLOC_CACHES_NR + 1)); +#else + KMALLOC_NORMAL; +#endif #endif } @@ -561,7 +602,7 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) return ZERO_SIZE_PTR; return kmem_cache_alloc_trace( - kmalloc_caches[kmalloc_type(flags)][index], + kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], flags, size); #endif } @@ -579,7 +620,7 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) return ZERO_SIZE_PTR; return kmem_cache_alloc_node_trace( - kmalloc_caches[kmalloc_type(flags)][i], + kmalloc_caches[kmalloc_type(flags, _RET_IP_)][i], flags, node, size); } #endif diff --git a/init/Kconfig b/init/Kconfig index 9dcc127047296cf828d2df37286ad8fa7e8f8d2a..be284bca406ccc38172d49aa920ff9e9fc2fefc8 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -2203,6 +2203,23 @@ config SLUB_CPU_PARTIAL which requires the taking of locks that may cause latency spikes. Typically one would choose no for a realtime system. +config RANDOM_KMALLOC_CACHES + default n + depends on SLUB && !SLUB_TINY + bool "Randomize slab caches for normal kmalloc" + help + A hardening feature that creates multiple copies of slab caches for + normal kmalloc allocation and makes kmalloc randomly pick one based + on code address, which makes the attackers more difficult to spray + vulnerable memory objects on the heap for the purpose of exploiting + memory vulnerabilities. + + Currently the number of copies is set to 16, a reasonably large value + that effectively diverges the memory objects allocated for different + subsystems or modules into different caches, at the expense of a + limited degree of memory and CPU overhead that relates to hardware and + system workload. + config MMAP_ALLOW_UNINITIALIZED bool "Allow mmapped anonymous memory to be uninitialized" depends on EXPERT && !MMU @@ -2605,4 +2622,4 @@ config NOKASLR_MEM_RANGE default n help Say y here and add kernel parameters as nokaslr=nn[KMG]-ss[KMG] to avoid kaslr - place kernel image in such memory regions \ No newline at end of file + place kernel image in such memory regions diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c index 0acbc736541297e5befceec4b1544f2855caee14..81cac28ca34efb9bff18fbd16a66272e80e1cbdb 100644 --- a/mm/kfence/kfence_test.c +++ b/mm/kfence/kfence_test.c @@ -186,11 +186,10 @@ static size_t setup_test_cache(struct kunit *test, size_t size, slab_flags_t fla kunit_info(test, "%s: size=%zu, ctor=%ps\n", __func__, size, ctor); /* - * Use SLAB_NOLEAKTRACE to prevent merging with existing caches. Any - * other flag in SLAB_NEVER_MERGE also works. Use SLAB_ACCOUNT to - * allocate via memcg, if enabled. + * Use SLAB_NO_MERGE to prevent merging with existing caches. + * Use SLAB_ACCOUNT to allocate via memcg, if enabled. */ - flags |= SLAB_NOLEAKTRACE | SLAB_ACCOUNT; + flags |= SLAB_NO_MERGE | SLAB_ACCOUNT; test_cache = kmem_cache_create("test", size, 1, flags, ctor); KUNIT_ASSERT_TRUE_MSG(test, test_cache, "could not create cache"); @@ -208,7 +207,9 @@ static void test_cache_destroy(void) static inline size_t kmalloc_cache_alignment(size_t size) { - return kmalloc_caches[kmalloc_type(GFP_KERNEL)][__kmalloc_index(size, false)]->align; + /* just to get ->align so no need to pass in the real caller */ + enum kmalloc_cache_type type = kmalloc_type(GFP_KERNEL, 0); + return kmalloc_caches[type][__kmalloc_index(size, false)]->align; } /* Must always inline to match stack trace against caller. */ @@ -278,8 +279,9 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat if (is_kfence_address(alloc)) { struct page *page = virt_to_head_page(alloc); + enum kmalloc_cache_type type = kmalloc_type(GFP_KERNEL, _RET_IP_); struct kmem_cache *s = test_cache ?: - kmalloc_caches[kmalloc_type(GFP_KERNEL)][__kmalloc_index(size, false)]; + kmalloc_caches[type][__kmalloc_index(size, false)]; /* * Verify that various helpers return the right values diff --git a/mm/slab.c b/mm/slab.c index ae84578f3fdea5075d5f6b19ce776c446cbd1bcc..ca71d3f3e34b53b0a98fdb9a1c7ee2a2f27bae00 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1684,7 +1684,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, size_t freelist_size; freelist_size = num * sizeof(freelist_idx_t); - freelist_cache = kmalloc_slab(freelist_size, 0u); + freelist_cache = kmalloc_slab(freelist_size, 0u, _RET_IP_); if (!freelist_cache) continue; @@ -2074,7 +2074,7 @@ int __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags) if (OFF_SLAB(cachep)) { cachep->freelist_cache = - kmalloc_slab(cachep->freelist_size, 0u); + kmalloc_slab(cachep->freelist_size, 0u, _RET_IP_); } err = setup_cpu_cache(cachep, gfp); @@ -3628,7 +3628,7 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller) if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) return NULL; - cachep = kmalloc_slab(size, flags); + cachep = kmalloc_slab(size, flags, _RET_IP_); if (unlikely(ZERO_OR_NULL_PTR(cachep))) return cachep; ret = kmem_cache_alloc_node_trace(cachep, flags, node, size); @@ -3667,7 +3667,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) return NULL; - cachep = kmalloc_slab(size, flags); + cachep = kmalloc_slab(size, flags, _RET_IP_); if (unlikely(ZERO_OR_NULL_PTR(cachep))) return cachep; ret = slab_alloc(cachep, flags, size, caller); diff --git a/mm/slab.h b/mm/slab.h index 8414c345127b548376187a758ba59418d12cea29..a0e92203e99fb232c90081f26c92c5d0e6cfd3fa 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -86,7 +86,7 @@ void setup_kmalloc_cache_index_table(void); void create_kmalloc_caches(slab_flags_t); /* Find the kmalloc slab corresponding for a certain size */ -struct kmem_cache *kmalloc_slab(size_t, gfp_t); +struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller); #endif gfp_t kmalloc_fix_flags(gfp_t flags); @@ -142,10 +142,10 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size, #if defined(CONFIG_SLAB) #define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \ SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | \ - SLAB_ACCOUNT) + SLAB_ACCOUNT | SLAB_NO_MERGE) #elif defined(CONFIG_SLUB) #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ - SLAB_TEMPORARY | SLAB_ACCOUNT) + SLAB_TEMPORARY | SLAB_ACCOUNT | SLAB_NO_MERGE) #else #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE) #endif @@ -164,6 +164,7 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size, SLAB_NOLEAKTRACE | \ SLAB_RECLAIM_ACCOUNT | \ SLAB_TEMPORARY | \ + SLAB_NO_MERGE | \ SLAB_ACCOUNT) bool __kmem_cache_empty(struct kmem_cache *); diff --git a/mm/slab_common.c b/mm/slab_common.c index 5f1a1c38a815e4514eebaefdc1b8f9dca7f21498..fbfc628c5e3a046749a5343774e2c9fba596aa39 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -54,7 +54,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work, */ #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \ - SLAB_FAILSLAB | SLAB_KASAN) + SLAB_FAILSLAB | SLAB_NO_MERGE | SLAB_KASAN) #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ SLAB_CACHE_DMA32 | SLAB_ACCOUNT) @@ -576,6 +576,11 @@ kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init = { /* initialization for https://bugs.llvm.org/show_bug.cgi?id=42570 */ }; EXPORT_SYMBOL(kmalloc_caches); +#ifdef CONFIG_RANDOM_KMALLOC_CACHES +unsigned long random_kmalloc_seed __ro_after_init; +EXPORT_SYMBOL(random_kmalloc_seed); +#endif + /* * Conversion table for small slabs sizes / 8 to the index in the * kmalloc array. This is necessary for slabs < 192 since we have non power @@ -618,7 +623,7 @@ static inline unsigned int size_index_elem(unsigned int bytes) * Find the kmem_cache structure that serves a given size of * allocation */ -struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) +struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller) { unsigned int index; @@ -633,25 +638,51 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) index = fls(size - 1); } - return kmalloc_caches[kmalloc_type(flags)][index]; + return kmalloc_caches[kmalloc_type(flags, caller)][index]; } #ifdef CONFIG_ZONE_DMA -#define INIT_KMALLOC_INFO(__size, __short_size) \ -{ \ - .name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \ - .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #__short_size, \ - .name[KMALLOC_DMA] = "dma-kmalloc-" #__short_size, \ - .size = __size, \ -} +#define KMALLOC_DMA_NAME(sz) .name[KMALLOC_DMA] = "dma-kmalloc-" #sz, +#else +#define KMALLOC_DMA_NAME(sz) +#endif + +#ifndef CONFIG_SLUB_TINY +#define KMALLOC_RCL_NAME(sz) .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #sz, #else +#define KMALLOC_RCL_NAME(sz) +#endif + +#ifdef CONFIG_RANDOM_KMALLOC_CACHES +#define __KMALLOC_RANDOM_CONCAT(a, b) a ## b +#define KMALLOC_RANDOM_NAME(N, sz) __KMALLOC_RANDOM_CONCAT(KMA_RAND_, N)(sz) +#define KMA_RAND_1(sz) .name[KMALLOC_RANDOM_START + 1] = "kmalloc-rnd-01-" #sz, +#define KMA_RAND_2(sz) KMA_RAND_1(sz) .name[KMALLOC_RANDOM_START + 2] = "kmalloc-rnd-02-" #sz, +#define KMA_RAND_3(sz) KMA_RAND_2(sz) .name[KMALLOC_RANDOM_START + 3] = "kmalloc-rnd-03-" #sz, +#define KMA_RAND_4(sz) KMA_RAND_3(sz) .name[KMALLOC_RANDOM_START + 4] = "kmalloc-rnd-04-" #sz, +#define KMA_RAND_5(sz) KMA_RAND_4(sz) .name[KMALLOC_RANDOM_START + 5] = "kmalloc-rnd-05-" #sz, +#define KMA_RAND_6(sz) KMA_RAND_5(sz) .name[KMALLOC_RANDOM_START + 6] = "kmalloc-rnd-06-" #sz, +#define KMA_RAND_7(sz) KMA_RAND_6(sz) .name[KMALLOC_RANDOM_START + 7] = "kmalloc-rnd-07-" #sz, +#define KMA_RAND_8(sz) KMA_RAND_7(sz) .name[KMALLOC_RANDOM_START + 8] = "kmalloc-rnd-08-" #sz, +#define KMA_RAND_9(sz) KMA_RAND_8(sz) .name[KMALLOC_RANDOM_START + 9] = "kmalloc-rnd-09-" #sz, +#define KMA_RAND_10(sz) KMA_RAND_9(sz) .name[KMALLOC_RANDOM_START + 10] = "kmalloc-rnd-10-" #sz, +#define KMA_RAND_11(sz) KMA_RAND_10(sz) .name[KMALLOC_RANDOM_START + 11] = "kmalloc-rnd-11-" #sz, +#define KMA_RAND_12(sz) KMA_RAND_11(sz) .name[KMALLOC_RANDOM_START + 12] = "kmalloc-rnd-12-" #sz, +#define KMA_RAND_13(sz) KMA_RAND_12(sz) .name[KMALLOC_RANDOM_START + 13] = "kmalloc-rnd-13-" #sz, +#define KMA_RAND_14(sz) KMA_RAND_13(sz) .name[KMALLOC_RANDOM_START + 14] = "kmalloc-rnd-14-" #sz, +#define KMA_RAND_15(sz) KMA_RAND_14(sz) .name[KMALLOC_RANDOM_START + 15] = "kmalloc-rnd-15-" #sz, +#else // CONFIG_RANDOM_KMALLOC_CACHES +#define KMALLOC_RANDOM_NAME(N, sz) +#endif + #define INIT_KMALLOC_INFO(__size, __short_size) \ { \ .name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \ - .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #__short_size, \ + KMALLOC_RCL_NAME(__short_size) \ + KMALLOC_DMA_NAME(__short_size) \ + KMALLOC_RANDOM_NAME(RANDOM_KMALLOC_CACHES_NR, __short_size) \ .size = __size, \ } -#endif /* * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time. @@ -740,6 +771,11 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags) if (type == KMALLOC_RECLAIM) flags |= SLAB_RECLAIM_ACCOUNT; +#ifdef CONFIG_RANDOM_KMALLOC_CACHES + if (type >= KMALLOC_RANDOM_START && type <= KMALLOC_RANDOM_END) + flags |= SLAB_NO_MERGE; +#endif + kmalloc_caches[type][idx] = create_kmalloc_cache( kmalloc_info[idx].name[type], kmalloc_info[idx].size, flags, 0, @@ -774,6 +810,9 @@ void __init create_kmalloc_caches(slab_flags_t flags) new_kmalloc_cache(2, type, flags); } } +#ifdef CONFIG_RANDOM_KMALLOC_CACHES + random_kmalloc_seed = get_random_u64(); +#endif /* Kmalloc array is now usable */ slab_state = UP; diff --git a/mm/slub.c b/mm/slub.c index ec1c3a376d3678aab2c29654de911f4dd243e257..9dd4cc478ec3f6332b8a545aa5f3ab9b9bb5dd3c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4012,7 +4012,7 @@ void *__kmalloc(size_t size, gfp_t flags) if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) return kmalloc_large(size, flags); - s = kmalloc_slab(size, flags); + s = kmalloc_slab(size, flags, _RET_IP_); if (unlikely(ZERO_OR_NULL_PTR(s))) return s; @@ -4060,7 +4060,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) return ret; } - s = kmalloc_slab(size, flags); + s = kmalloc_slab(size, flags, _RET_IP_); if (unlikely(ZERO_OR_NULL_PTR(s))) return s; @@ -4520,7 +4520,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) return kmalloc_large(size, gfpflags); - s = kmalloc_slab(size, gfpflags); + s = kmalloc_slab(size, gfpflags, _RET_IP_); if (unlikely(ZERO_OR_NULL_PTR(s))) return s; @@ -4551,7 +4551,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, return ret; } - s = kmalloc_slab(size, gfpflags); + s = kmalloc_slab(size, gfpflags, _RET_IP_); if (unlikely(ZERO_OR_NULL_PTR(s))) return s;