diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c index 4b3dc8beaf77d31d0c828a7a04cc81e2d78ef2e0..cc15f7ca6cc17b809de60be5dca7653c8f596503 100644 --- a/arch/riscv/kernel/probes/uprobes.c +++ b/arch/riscv/kernel/probes/uprobes.c @@ -167,6 +167,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, /* Initialize the slot */ void *kaddr = kmap_atomic(page); void *dst = kaddr + (vaddr & ~PAGE_MASK); + unsigned long start = (unsigned long)dst; memcpy(dst, src, len); @@ -176,13 +177,6 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, *(uprobe_opcode_t *)dst = __BUG_INSN_32; } + flush_icache_range(start, start + len); kunmap_atomic(kaddr); - - /* - * We probably need flush_icache_user_page() but it needs vma. - * This should work on most of architectures by default. If - * architecture needs to do something different it can define - * its own version of the function. - */ - flush_dcache_page(page); } diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index dcf774d3edfe4c186f6486909a263c88fa08ceca..51eeaf14367dacf456d2755d8a6dd49d925d84e3 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -240,6 +240,9 @@ static struct scmi_device *scmi_child_dev_find(struct device *parent, if (!dev) return NULL; + /* Drop the refcnt bumped implicitly by device_find_child */ + put_device(dev); + return to_scmi_dev(dev); } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 60455c84a937427289e8183bdb9c7b2e12ec52d8..1f7a36de69af1e2a4e349f10937e4842d6da1570 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1814,7 +1814,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) */ enum { EXT4_MF_MNTDIR_SAMPLED, - EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */ + EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */ + EXT4_MF_JOURNAL_DESTROY /* Journal is in process of destroying */ }; static inline void ext4_set_mount_flag(struct super_block *sb, int bit) diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 0c77697d5e90d075364bb67dbc281a7fde89ea9d..ada46189b08603a8098ce5d25150870b80942c27 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -513,4 +513,33 @@ static inline int ext4_should_dioread_nolock(struct inode *inode) return 1; } +/* + * Pass journal explicitly as it may not be cached in the sbi->s_journal in some + * cases + */ +static inline int ext4_journal_destroy(struct ext4_sb_info *sbi, journal_t *journal) +{ + int err = 0; + + /* + * At this point only two things can be operating on the journal. + * JBD2 thread performing transaction commit and s_sb_upd_work + * issuing sb update through the journal. Once we set + * EXT4_JOURNAL_DESTROY, new ext4_handle_error() calls will not + * queue s_sb_upd_work and ext4_force_commit() makes sure any + * ext4_handle_error() calls from the running transaction commit are + * finished. Hence no new s_sb_upd_work can be queued after we + * flush it here. + */ + ext4_set_mount_flag(sbi->s_sb, EXT4_MF_JOURNAL_DESTROY); + + ext4_force_commit(sbi->s_sb); + flush_work(&sbi->s_sb_upd_work); + + err = jbd2_journal_destroy(journal); + sbi->s_journal = NULL; + + return err; +} + #endif /* _EXT4_JBD2_H */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 270f9d291643e756cde7604d53463e9b7251f584..92d2182f472bff20fcfc05c93edec9d44451de13 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -728,9 +728,13 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error, * In case the fs should keep running, we need to writeout * superblock through the journal. Due to lock ordering * constraints, it may not be safe to do it right here so we - * defer superblock flushing to a workqueue. + * defer superblock flushing to a workqueue. We just need to be + * careful when the journal is already shutting down. If we get + * here in that case, just update the sb directly as the last + * transaction won't commit anyway. */ - if (continue_fs && journal) + if (continue_fs && journal && + !ext4_test_mount_flag(sb, EXT4_MF_JOURNAL_DESTROY)) schedule_work(&EXT4_SB(sb)->s_sb_upd_work); else ext4_commit_super(sb); @@ -1315,18 +1319,17 @@ static void ext4_put_super(struct super_block *sb) ext4_unregister_li_request(sb); ext4_quotas_off(sb, EXT4_MAXQUOTAS); - flush_work(&sbi->s_sb_upd_work); destroy_workqueue(sbi->rsv_conversion_wq); ext4_release_orphan_info(sb); if (sbi->s_journal) { aborted = is_journal_aborted(sbi->s_journal); - err = jbd2_journal_destroy(sbi->s_journal); - sbi->s_journal = NULL; + err = ext4_journal_destroy(sbi, sbi->s_journal); if ((err < 0) && !aborted) { ext4_abort(sb, -err, "Couldn't clean up the journal"); } - } + } else + flush_work(&sbi->s_sb_upd_work); ext4_es_unregister_shrinker(sbi); timer_shutdown_sync(&sbi->s_err_report); @@ -4992,10 +4995,7 @@ static int ext4_load_and_init_journal(struct super_block *sb, return 0; out: - /* flush s_sb_upd_work before destroying the journal. */ - flush_work(&sbi->s_sb_upd_work); - jbd2_journal_destroy(sbi->s_journal); - sbi->s_journal = NULL; + ext4_journal_destroy(sbi, sbi->s_journal); return -EINVAL; } @@ -5664,10 +5664,7 @@ failed_mount8: __maybe_unused sbi->s_ea_block_cache = NULL; if (sbi->s_journal) { - /* flush s_sb_upd_work before journal destroy. */ - flush_work(&sbi->s_sb_upd_work); - jbd2_journal_destroy(sbi->s_journal); - sbi->s_journal = NULL; + ext4_journal_destroy(sbi, sbi->s_journal); } failed_mount3a: ext4_es_unregister_shrinker(sbi); @@ -5975,7 +5972,7 @@ static journal_t *ext4_open_dev_journal(struct super_block *sb, return journal; out_journal: - jbd2_journal_destroy(journal); + ext4_journal_destroy(EXT4_SB(sb), journal); out_bdev: blkdev_put(journal_bdev, sb); return ERR_PTR(errno); @@ -6092,8 +6089,7 @@ static int ext4_load_journal(struct super_block *sb, EXT4_SB(sb)->s_journal = journal; err = ext4_clear_journal_err(sb, es); if (err) { - EXT4_SB(sb)->s_journal = NULL; - jbd2_journal_destroy(journal); + ext4_journal_destroy(EXT4_SB(sb), journal); return err; } @@ -6111,7 +6107,7 @@ static int ext4_load_journal(struct super_block *sb, return 0; err_out: - jbd2_journal_destroy(journal); + ext4_journal_destroy(EXT4_SB(sb), journal); return err; } diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 8f77bb0f4ae0ca4de04ec21e6454e1b0a6fe4f7c..05f8b7d7d1e968747462801191974a2af0b4a34d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -237,6 +237,7 @@ struct hrtimer_cpu_base { ktime_t softirq_expires_next; struct hrtimer *softirq_next_timer; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; + call_single_data_t csd; } ____cacheline_aligned; static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index fc34f72702cc406aaaec6c17edb91e3206f871a3..fdc74aeaa16454cc296318ee11b231ee62ecd632 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -197,12 +197,12 @@ static bool htab_is_percpu(const struct bpf_htab *htab) static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size, void __percpu *pptr) { - *(void __percpu **)(l->key + key_size) = pptr; + *(void __percpu **)(l->key + roundup(key_size, 8)) = pptr; } static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size) { - return *(void __percpu **)(l->key + key_size); + return *(void __percpu **)(l->key + roundup(key_size, 8)); } static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 877535b06e73aa97dc2413ae011325d75f54aac5..6d9da768604d68d22f236dbfc5d5125317a3b7fb 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -58,6 +58,8 @@ #define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT) #define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) +static void retrigger_next_event(void *arg); + /* * The timer bases: * @@ -111,7 +113,8 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .clockid = CLOCK_TAI, .get_time = &ktime_get_clocktai, }, - } + }, + .csd = CSD_INIT(retrigger_next_event, NULL) }; static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { @@ -124,6 +127,14 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { [CLOCK_TAI] = HRTIMER_BASE_TAI, }; +static inline bool hrtimer_base_is_online(struct hrtimer_cpu_base *base) +{ + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) + return true; + else + return likely(base->online); +} + /* * Functions and macros which are different for UP/SMP systems are kept in a * single place @@ -178,27 +189,54 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, } /* - * We do not migrate the timer when it is expiring before the next - * event on the target cpu. When high resolution is enabled, we cannot - * reprogram the target cpu hardware and we would cause it to fire - * late. To keep it simple, we handle the high resolution enabled and - * disabled case similar. + * Check if the elected target is suitable considering its next + * event and the hotplug state of the current CPU. + * + * If the elected target is remote and its next event is after the timer + * to queue, then a remote reprogram is necessary. However there is no + * guarantee the IPI handling the operation would arrive in time to meet + * the high resolution deadline. In this case the local CPU becomes a + * preferred target, unless it is offline. + * + * High and low resolution modes are handled the same way for simplicity. * * Called with cpu_base->lock of target cpu held. */ -static int -hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) +static bool hrtimer_suitable_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base, + struct hrtimer_cpu_base *new_cpu_base, + struct hrtimer_cpu_base *this_cpu_base) { ktime_t expires; + /* + * The local CPU clockevent can be reprogrammed. Also get_target_base() + * guarantees it is online. + */ + if (new_cpu_base == this_cpu_base) + return true; + + /* + * The offline local CPU can't be the default target if the + * next remote target event is after this timer. Keep the + * elected new base. An IPI will we issued to reprogram + * it as a last resort. + */ + if (!hrtimer_base_is_online(this_cpu_base)) + return true; + expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); - return expires < new_base->cpu_base->expires_next; + + return expires >= new_base->cpu_base->expires_next; } -static inline -struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, - int pinned) +static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, int pinned) { + if (!hrtimer_base_is_online(base)) { + int cpu = cpumask_any_and(cpu_online_mask, housekeeping_cpumask(HK_TYPE_TIMER)); + + return &per_cpu(hrtimer_bases, cpu); + } + #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) if (static_branch_likely(&timers_migration_enabled) && !pinned) return &per_cpu(hrtimer_bases, get_nohz_timer_target()); @@ -249,8 +287,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, raw_spin_unlock(&base->cpu_base->lock); raw_spin_lock(&new_base->cpu_base->lock); - if (new_cpu_base != this_cpu_base && - hrtimer_check_target(timer, new_base)) { + if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, + this_cpu_base)) { raw_spin_unlock(&new_base->cpu_base->lock); raw_spin_lock(&base->cpu_base->lock); new_cpu_base = this_cpu_base; @@ -259,8 +297,7 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, } WRITE_ONCE(timer->base, new_base); } else { - if (new_cpu_base != this_cpu_base && - hrtimer_check_target(timer, new_base)) { + if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, this_cpu_base)) { new_cpu_base = this_cpu_base; goto again; } @@ -720,8 +757,6 @@ static inline int hrtimer_is_hres_enabled(void) return hrtimer_hres_enabled; } -static void retrigger_next_event(void *arg); - /* * Switch to high resolution mode */ @@ -1208,6 +1243,7 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode, struct hrtimer_clock_base *base) { + struct hrtimer_cpu_base *this_cpu_base = this_cpu_ptr(&hrtimer_bases); struct hrtimer_clock_base *new_base; bool force_local, first; @@ -1219,9 +1255,15 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * and enforce reprogramming after it is queued no matter whether * it is the new first expiring timer again or not. */ - force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases); + force_local = base->cpu_base == this_cpu_base; force_local &= base->cpu_base->next_timer == timer; + /* + * Don't force local queuing if this enqueue happens on a unplugged + * CPU after hrtimer_cpu_dying() has been invoked. + */ + force_local &= this_cpu_base->online; + /* * Remove an active timer from the queue. In case it is not queued * on the current CPU, make sure that remove_hrtimer() updates the @@ -1251,8 +1293,27 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, } first = enqueue_hrtimer(timer, new_base, mode); - if (!force_local) - return first; + if (!force_local) { + /* + * If the current CPU base is online, then the timer is + * never queued on a remote CPU if it would be the first + * expiring timer there. + */ + if (hrtimer_base_is_online(this_cpu_base)) + return first; + + /* + * Timer was enqueued remote because the current base is + * already offline. If the timer is the first to expire, + * kick the remote CPU to reprogram the clock event. + */ + if (first) { + struct hrtimer_cpu_base *new_cpu_base = new_base->cpu_base; + + smp_call_function_single_async(new_cpu_base->cpu, &new_cpu_base->csd); + } + return 0; + } /* * Timer was forced to stay on the current CPU to avoid