diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 420f15ddce392696bcd5e9a89ff77baf08846129..826fb16906fe29152c3dee68b3f190956bc24f75 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -851,7 +851,7 @@ struct perf_event { */ __u32 orig_type; - KABI_USE(1, struct rcuwait pending_work_wait) + KABI_RESERVE(1) KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4) diff --git a/kernel/events/core.c b/kernel/events/core.c index 69cdf8aabf52343c7565958354df87ff5bb154c6..f042d61019320fdf5e079bbec0b943403b44bd72 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2294,6 +2294,7 @@ event_sched_out(struct perf_event *event, struct perf_event_context *ctx) !event->pending_work && !task_work_add(current, &event->pending_task, TWA_RESUME)) { event->pending_work = 1; + WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); } else { local_dec(&event->ctx->nr_pending); } @@ -5187,35 +5188,9 @@ static bool exclusive_event_installable(struct perf_event *event, static void perf_addr_filters_splice(struct perf_event *event, struct list_head *head); -static void perf_pending_task_sync(struct perf_event *event) -{ - struct callback_head *head = &event->pending_task; - - if (!event->pending_work) - return; - /* - * If the task is queued to the current task's queue, we - * obviously can't wait for it to complete. Simply cancel it. - */ - if (task_work_cancel(current, head)) { - event->pending_work = 0; - local_dec(&event->ctx->nr_pending); - return; - } - - /* - * All accesses related to the event are within the same - * non-preemptible section in perf_pending_task(). The RCU - * grace period before the event is freed will make sure all - * those accesses are complete by then. - */ - rcuwait_wait_event(&event->pending_work_wait, !event->pending_work, TASK_UNINTERRUPTIBLE); -} - static void _free_event(struct perf_event *event) { irq_work_sync(&event->pending_irq); - perf_pending_task_sync(event); unaccount_event(event); @@ -5340,10 +5315,17 @@ static void perf_remove_from_owner(struct perf_event *event) static void put_event(struct perf_event *event) { + struct perf_event *parent; + if (!atomic_long_dec_and_test(&event->refcount)) return; + parent = event->parent; _free_event(event); + + /* Matches the refcount bump in inherit_event() */ + if (parent) + put_event(parent); } /* @@ -5427,11 +5409,6 @@ int perf_event_release_kernel(struct perf_event *event) if (tmp == child) { perf_remove_from_context(child, DETACH_GROUP); list_move(&child->child_list, &free_list); - /* - * This matches the refcount bump in inherit_event(); - * this can't be the last reference. - */ - put_event(event); } else { var = &ctx->refcount; } @@ -5457,7 +5434,8 @@ int perf_event_release_kernel(struct perf_event *event) void *var = &child->ctx->refcount; list_del(&child->child_list); - free_event(child); + /* Last reference unless ->pending_task work is pending */ + put_event(child); /* * Wake any perf_event_free_task() waiting for this event to be @@ -5468,7 +5446,11 @@ int perf_event_release_kernel(struct perf_event *event) } no_ctx: - put_event(event); /* Must be the 'last' reference */ + /* + * Last reference unless ->pending_task work is pending on this event + * or any of its children. + */ + put_event(event); return 0; } EXPORT_SYMBOL_GPL(perf_event_release_kernel); @@ -6857,12 +6839,6 @@ static void perf_pending_task(struct callback_head *head) struct perf_event *event = container_of(head, struct perf_event, pending_task); int rctx; - /* - * All accesses to the event must belong to the same implicit RCU read-side - * critical section as the ->pending_work reset. See comment in - * perf_pending_task_sync(). - */ - preempt_disable_notrace(); /* * If we 'fail' here, that's OK, it means recursion is already disabled * and we won't recurse 'further'. @@ -6873,12 +6849,11 @@ static void perf_pending_task(struct callback_head *head) event->pending_work = 0; perf_sigtrap(event); local_dec(&event->ctx->nr_pending); - rcuwait_wake_up(&event->pending_work_wait); } + put_event(event); if (rctx >= 0) perf_swevent_put_recursion_context(rctx); - preempt_enable_notrace(); } #ifdef CONFIG_GUEST_PERF_EVENTS @@ -11990,7 +11965,6 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, init_waitqueue_head(&event->waitq); init_irq_work(&event->pending_irq, perf_pending_irq); init_task_work(&event->pending_task, perf_pending_task); - rcuwait_init(&event->pending_work_wait); mutex_init(&event->mmap_mutex); raw_spin_lock_init(&event->addr_filters.lock); @@ -13143,8 +13117,7 @@ perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx) * Kick perf_poll() for is_event_hup(); */ perf_event_wakeup(parent_event); - free_event(event); - put_event(parent_event); + put_event(event); return; } @@ -13262,13 +13235,11 @@ static void perf_free_event(struct perf_event *event, list_del_init(&event->child_list); mutex_unlock(&parent->child_mutex); - put_event(parent); - raw_spin_lock_irq(&ctx->lock); perf_group_detach(event); list_del_event(event, ctx); raw_spin_unlock_irq(&ctx->lock); - free_event(event); + put_event(event); } /*