From 73bd62d22a6ba4c4c68a2ccde6fa71143c66d2b9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 3 Apr 2024 16:35:17 +0200 Subject: [PATCH 001/268] Revert "workqueue: Shorten events_freezable_power_efficient name" stable inclusion from stable-6.6.25 commit 7bff1820bcfaa4337662a17ae86712c4c0988970 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- This reverts commit 8b934390272d50ae0e7e320617437a03e5712baa which is commit 8318d6a6362f5903edb4c904a8dd447e59be4ad1 upstream. The workqueue patches backported to 6.6.y caused some reported regressions, so revert them for now. Reported-by: Thorsten Leemhuis Cc: Tejun Heo Cc: Marek Szyprowski Cc: Nathan Chancellor Cc: Sasha Levin Cc: Audra Mitchell Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/ Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 66a270e235b4..7d231bbd1659 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -7109,7 +7109,7 @@ void __init workqueue_init_early(void) WQ_FREEZABLE, 0); system_power_efficient_wq = alloc_workqueue("events_power_efficient", WQ_POWER_EFFICIENT, 0); - system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_pwr_efficient", + system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient", WQ_FREEZABLE | WQ_POWER_EFFICIENT, 0); BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq || -- Gitee From 96a4e65451828cab5e2b8afa5d7563a5b21bf9a3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 3 Apr 2024 16:35:37 +0200 Subject: [PATCH 002/268] Revert "workqueue: Don't call cpumask_test_cpu() with -1 CPU in wq_update_node_max_active()" stable inclusion from stable-6.6.25 commit a75ac2693d734d20724f0e10e039ca85f1fcfc4e category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- This reverts commit 7df62b8cca38aa452b508b477b16544cba615084 which is commit 15930da42f8981dc42c19038042947b475b19f47 upstream. The workqueue patches backported to 6.6.y caused some reported regressions, so revert them for now. Reported-by: Thorsten Leemhuis Cc: Tejun Heo Cc: Marek Szyprowski Cc: Nathan Chancellor Cc: Sasha Levin Cc: Audra Mitchell Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/ Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 7d231bbd1659..b2975e44dffa 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1500,7 +1500,7 @@ static void wq_update_node_max_active(struct workqueue_struct *wq, int off_cpu) lockdep_assert_held(&wq->mutex); - if (off_cpu >= 0 && !cpumask_test_cpu(off_cpu, effective)) + if (!cpumask_test_cpu(off_cpu, effective)) off_cpu = -1; total_cpus = cpumask_weight_and(effective, cpu_online_mask); -- Gitee From a081ecb0fdf2472bdf94fab9075f825c87271907 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 3 Apr 2024 16:36:17 +0200 Subject: [PATCH 003/268] Revert "workqueue: Implement system-wide nr_active enforcement for unbound workqueues" stable inclusion from stable-6.6.25 commit 6741dd3fd38e47c08bc39d11ef5fa141fa6d0441 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- This reverts commit 5a70baec2294e8a7d0fcc4558741c23e752dad5c which is commit 5797b1c18919cd9c289ded7954383e499f729ce0 upstream. The workqueue patches backported to 6.6.y caused some reported regressions, so revert them for now. Reported-by: Thorsten Leemhuis Cc: Tejun Heo Cc: Marek Szyprowski Cc: Nathan Chancellor Cc: Sasha Levin Cc: Audra Mitchell Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/ Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- include/linux/workqueue.h | 35 +--- kernel/workqueue.c | 341 ++++---------------------------------- 2 files changed, 35 insertions(+), 341 deletions(-) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index ad97453e7c3a..24b1e5070f4d 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -405,13 +405,6 @@ enum { WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_UNBOUND_MAX_ACTIVE = WQ_MAX_ACTIVE, WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, - - /* - * Per-node default cap on min_active. Unless explicitly set, min_active - * is set to min(max_active, WQ_DFL_MIN_ACTIVE). For more details, see - * workqueue_struct->min_active definition. - */ - WQ_DFL_MIN_ACTIVE = 8, }; /* @@ -454,33 +447,11 @@ extern struct workqueue_struct *system_freezable_power_efficient_wq; * alloc_workqueue - allocate a workqueue * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags - * @max_active: max in-flight work items, 0 for default + * @max_active: max in-flight work items per CPU, 0 for default * remaining args: args for @fmt * - * For a per-cpu workqueue, @max_active limits the number of in-flight work - * items for each CPU. e.g. @max_active of 1 indicates that each CPU can be - * executing at most one work item for the workqueue. - * - * For unbound workqueues, @max_active limits the number of in-flight work items - * for the whole system. e.g. @max_active of 16 indicates that that there can be - * at most 16 work items executing for the workqueue in the whole system. - * - * As sharing the same active counter for an unbound workqueue across multiple - * NUMA nodes can be expensive, @max_active is distributed to each NUMA node - * according to the proportion of the number of online CPUs and enforced - * independently. - * - * Depending on online CPU distribution, a node may end up with per-node - * max_active which is significantly lower than @max_active, which can lead to - * deadlocks if the per-node concurrency limit is lower than the maximum number - * of interdependent work items for the workqueue. - * - * To guarantee forward progress regardless of online CPU distribution, the - * concurrency limit on every node is guaranteed to be equal to or greater than - * min_active which is set to min(@max_active, %WQ_DFL_MIN_ACTIVE). This means - * that the sum of per-node max_active's may be larger than @max_active. - * - * For detailed information on %WQ_* flags, please refer to + * Allocate a workqueue with the specified parameters. For detailed + * information on WQ_* flags, please refer to * Documentation/core-api/workqueue.rst. * * RETURNS: diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b2975e44dffa..252ad3bfe799 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -122,9 +122,6 @@ enum { * * L: pool->lock protected. Access with pool->lock held. * - * LN: pool->lock and wq_node_nr_active->lock protected for writes. Either for - * reads. - * * K: Only modified by worker while holding pool->lock. Can be safely read by * self, while holding pool->lock or from IRQ context if %current is the * kworker. @@ -246,18 +243,17 @@ struct pool_workqueue { * pwq->inactive_works instead of pool->worklist and marked with * WORK_STRUCT_INACTIVE. * - * All work items marked with WORK_STRUCT_INACTIVE do not participate in - * nr_active and all work items in pwq->inactive_works are marked with - * WORK_STRUCT_INACTIVE. But not all WORK_STRUCT_INACTIVE work items are - * in pwq->inactive_works. Some of them are ready to run in - * pool->worklist or worker->scheduled. Those work itmes are only struct - * wq_barrier which is used for flush_work() and should not participate - * in nr_active. For non-barrier work item, it is marked with - * WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works. + * All work items marked with WORK_STRUCT_INACTIVE do not participate + * in pwq->nr_active and all work items in pwq->inactive_works are + * marked with WORK_STRUCT_INACTIVE. But not all WORK_STRUCT_INACTIVE + * work items are in pwq->inactive_works. Some of them are ready to + * run in pool->worklist or worker->scheduled. Those work itmes are + * only struct wq_barrier which is used for flush_work() and should + * not participate in pwq->nr_active. For non-barrier work item, it + * is marked with WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works. */ int nr_active; /* L: nr of active works */ struct list_head inactive_works; /* L: inactive works */ - struct list_head pending_node; /* LN: node on wq_node_nr_active->pending_pwqs */ struct list_head pwqs_node; /* WR: node on wq->pwqs */ struct list_head mayday_node; /* MD: node on wq->maydays */ @@ -289,19 +285,9 @@ struct wq_device; * on each CPU, in an unbound workqueue, max_active applies to the whole system. * As sharing a single nr_active across multiple sockets can be very expensive, * the counting and enforcement is per NUMA node. - * - * The following struct is used to enforce per-node max_active. When a pwq wants - * to start executing a work item, it should increment ->nr using - * tryinc_node_nr_active(). If acquisition fails due to ->nr already being over - * ->max, the pwq is queued on ->pending_pwqs. As in-flight work items finish - * and decrement ->nr, node_activate_pending_pwq() activates the pending pwqs in - * round-robin order. */ struct wq_node_nr_active { - int max; /* per-node max_active */ - atomic_t nr; /* per-node nr_active */ - raw_spinlock_t lock; /* nests inside pool locks */ - struct list_head pending_pwqs; /* LN: pwqs with inactive works */ + atomic_t nr; /* per-node nr_active count */ }; /* @@ -324,12 +310,8 @@ struct workqueue_struct { struct worker *rescuer; /* MD: rescue worker */ int nr_drainers; /* WQ: drain in progress */ - - /* See alloc_workqueue() function comment for info on min/max_active */ int max_active; /* WO: max active works */ - int min_active; /* WO: min active works */ int saved_max_active; /* WQ: saved max_active */ - int saved_min_active; /* WQ: saved min_active */ struct workqueue_attrs *unbound_attrs; /* PW: only for unbound wqs */ struct pool_workqueue __rcu *dfl_pwq; /* PW: only for unbound wqs */ @@ -675,19 +657,6 @@ static struct pool_workqueue *unbound_pwq(struct workqueue_struct *wq, int cpu) lockdep_is_held(&wq->mutex)); } -/** - * unbound_effective_cpumask - effective cpumask of an unbound workqueue - * @wq: workqueue of interest - * - * @wq->unbound_attrs->cpumask contains the cpumask requested by the user which - * is masked with wq_unbound_cpumask to determine the effective cpumask. The - * default pwq is always mapped to the pool with the current effective cpumask. - */ -static struct cpumask *unbound_effective_cpumask(struct workqueue_struct *wq) -{ - return unbound_pwq(wq, -1)->pool->attrs->__pod_cpumask; -} - static unsigned int work_color_to_flags(int color) { return color << WORK_STRUCT_COLOR_SHIFT; @@ -1482,46 +1451,6 @@ static struct wq_node_nr_active *wq_node_nr_active(struct workqueue_struct *wq, return wq->node_nr_active[node]; } -/** - * wq_update_node_max_active - Update per-node max_actives to use - * @wq: workqueue to update - * @off_cpu: CPU that's going down, -1 if a CPU is not going down - * - * Update @wq->node_nr_active[]->max. @wq must be unbound. max_active is - * distributed among nodes according to the proportions of numbers of online - * cpus. The result is always between @wq->min_active and max_active. - */ -static void wq_update_node_max_active(struct workqueue_struct *wq, int off_cpu) -{ - struct cpumask *effective = unbound_effective_cpumask(wq); - int min_active = READ_ONCE(wq->min_active); - int max_active = READ_ONCE(wq->max_active); - int total_cpus, node; - - lockdep_assert_held(&wq->mutex); - - if (!cpumask_test_cpu(off_cpu, effective)) - off_cpu = -1; - - total_cpus = cpumask_weight_and(effective, cpu_online_mask); - if (off_cpu >= 0) - total_cpus--; - - for_each_node(node) { - int node_cpus; - - node_cpus = cpumask_weight_and(effective, cpumask_of_node(node)); - if (off_cpu >= 0 && cpu_to_node(off_cpu) == node) - node_cpus--; - - wq_node_nr_active(wq, node)->max = - clamp(DIV_ROUND_UP(max_active * node_cpus, total_cpus), - min_active, max_active); - } - - wq_node_nr_active(wq, NUMA_NO_NODE)->max = min_active; -} - /** * get_pwq - get an extra reference on the specified pool_workqueue * @pwq: pool_workqueue to get @@ -1619,98 +1548,35 @@ static bool pwq_activate_work(struct pool_workqueue *pwq, return true; } -static bool tryinc_node_nr_active(struct wq_node_nr_active *nna) -{ - int max = READ_ONCE(nna->max); - - while (true) { - int old, tmp; - - old = atomic_read(&nna->nr); - if (old >= max) - return false; - tmp = atomic_cmpxchg_relaxed(&nna->nr, old, old + 1); - if (tmp == old) - return true; - } -} - /** * pwq_tryinc_nr_active - Try to increment nr_active for a pwq * @pwq: pool_workqueue of interest - * @fill: max_active may have increased, try to increase concurrency level * * Try to increment nr_active for @pwq. Returns %true if an nr_active count is * successfully obtained. %false otherwise. */ -static bool pwq_tryinc_nr_active(struct pool_workqueue *pwq, bool fill) +static bool pwq_tryinc_nr_active(struct pool_workqueue *pwq) { struct workqueue_struct *wq = pwq->wq; struct worker_pool *pool = pwq->pool; struct wq_node_nr_active *nna = wq_node_nr_active(wq, pool->node); - bool obtained = false; + bool obtained; lockdep_assert_held(&pool->lock); - if (!nna) { - /* per-cpu workqueue, pwq->nr_active is sufficient */ - obtained = pwq->nr_active < READ_ONCE(wq->max_active); - goto out; - } - - /* - * Unbound workqueue uses per-node shared nr_active $nna. If @pwq is - * already waiting on $nna, pwq_dec_nr_active() will maintain the - * concurrency level. Don't jump the line. - * - * We need to ignore the pending test after max_active has increased as - * pwq_dec_nr_active() can only maintain the concurrency level but not - * increase it. This is indicated by @fill. - */ - if (!list_empty(&pwq->pending_node) && likely(!fill)) - goto out; - - obtained = tryinc_node_nr_active(nna); - if (obtained) - goto out; - - /* - * Lockless acquisition failed. Lock, add ourself to $nna->pending_pwqs - * and try again. The smp_mb() is paired with the implied memory barrier - * of atomic_dec_return() in pwq_dec_nr_active() to ensure that either - * we see the decremented $nna->nr or they see non-empty - * $nna->pending_pwqs. - */ - raw_spin_lock(&nna->lock); - - if (list_empty(&pwq->pending_node)) - list_add_tail(&pwq->pending_node, &nna->pending_pwqs); - else if (likely(!fill)) - goto out_unlock; - - smp_mb(); - - obtained = tryinc_node_nr_active(nna); + obtained = pwq->nr_active < READ_ONCE(wq->max_active); - /* - * If @fill, @pwq might have already been pending. Being spuriously - * pending in cold paths doesn't affect anything. Let's leave it be. - */ - if (obtained && likely(!fill)) - list_del_init(&pwq->pending_node); - -out_unlock: - raw_spin_unlock(&nna->lock); -out: - if (obtained) + if (obtained) { pwq->nr_active++; + if (nna) + atomic_inc(&nna->nr); + } return obtained; } /** * pwq_activate_first_inactive - Activate the first inactive work item on a pwq * @pwq: pool_workqueue of interest - * @fill: max_active may have increased, try to increase concurrency level * * Activate the first inactive work item of @pwq if available and allowed by * max_active limit. @@ -1718,13 +1584,13 @@ static bool pwq_tryinc_nr_active(struct pool_workqueue *pwq, bool fill) * Returns %true if an inactive work item has been activated. %false if no * inactive work item is found or max_active limit is reached. */ -static bool pwq_activate_first_inactive(struct pool_workqueue *pwq, bool fill) +static bool pwq_activate_first_inactive(struct pool_workqueue *pwq) { struct work_struct *work = list_first_entry_or_null(&pwq->inactive_works, struct work_struct, entry); - if (work && pwq_tryinc_nr_active(pwq, fill)) { + if (work && pwq_tryinc_nr_active(pwq)) { __pwq_activate_work(pwq, work); return true; } else { @@ -1732,93 +1598,11 @@ static bool pwq_activate_first_inactive(struct pool_workqueue *pwq, bool fill) } } -/** - * node_activate_pending_pwq - Activate a pending pwq on a wq_node_nr_active - * @nna: wq_node_nr_active to activate a pending pwq for - * @caller_pool: worker_pool the caller is locking - * - * Activate a pwq in @nna->pending_pwqs. Called with @caller_pool locked. - * @caller_pool may be unlocked and relocked to lock other worker_pools. - */ -static void node_activate_pending_pwq(struct wq_node_nr_active *nna, - struct worker_pool *caller_pool) -{ - struct worker_pool *locked_pool = caller_pool; - struct pool_workqueue *pwq; - struct work_struct *work; - - lockdep_assert_held(&caller_pool->lock); - - raw_spin_lock(&nna->lock); -retry: - pwq = list_first_entry_or_null(&nna->pending_pwqs, - struct pool_workqueue, pending_node); - if (!pwq) - goto out_unlock; - - /* - * If @pwq is for a different pool than @locked_pool, we need to lock - * @pwq->pool->lock. Let's trylock first. If unsuccessful, do the unlock - * / lock dance. For that, we also need to release @nna->lock as it's - * nested inside pool locks. - */ - if (pwq->pool != locked_pool) { - raw_spin_unlock(&locked_pool->lock); - locked_pool = pwq->pool; - if (!raw_spin_trylock(&locked_pool->lock)) { - raw_spin_unlock(&nna->lock); - raw_spin_lock(&locked_pool->lock); - raw_spin_lock(&nna->lock); - goto retry; - } - } - - /* - * $pwq may not have any inactive work items due to e.g. cancellations. - * Drop it from pending_pwqs and see if there's another one. - */ - work = list_first_entry_or_null(&pwq->inactive_works, - struct work_struct, entry); - if (!work) { - list_del_init(&pwq->pending_node); - goto retry; - } - - /* - * Acquire an nr_active count and activate the inactive work item. If - * $pwq still has inactive work items, rotate it to the end of the - * pending_pwqs so that we round-robin through them. This means that - * inactive work items are not activated in queueing order which is fine - * given that there has never been any ordering across different pwqs. - */ - if (likely(tryinc_node_nr_active(nna))) { - pwq->nr_active++; - __pwq_activate_work(pwq, work); - - if (list_empty(&pwq->inactive_works)) - list_del_init(&pwq->pending_node); - else - list_move_tail(&pwq->pending_node, &nna->pending_pwqs); - - /* if activating a foreign pool, make sure it's running */ - if (pwq->pool != caller_pool) - kick_pool(pwq->pool); - } - -out_unlock: - raw_spin_unlock(&nna->lock); - if (locked_pool != caller_pool) { - raw_spin_unlock(&locked_pool->lock); - raw_spin_lock(&caller_pool->lock); - } -} - /** * pwq_dec_nr_active - Retire an active count * @pwq: pool_workqueue of interest * * Decrement @pwq's nr_active and try to activate the first inactive work item. - * For unbound workqueues, this function may temporarily drop @pwq->pool->lock. */ static void pwq_dec_nr_active(struct pool_workqueue *pwq) { @@ -1838,29 +1622,12 @@ static void pwq_dec_nr_active(struct pool_workqueue *pwq) * inactive work item on @pwq itself. */ if (!nna) { - pwq_activate_first_inactive(pwq, false); + pwq_activate_first_inactive(pwq); return; } - /* - * If @pwq is for an unbound workqueue, it's more complicated because - * multiple pwqs and pools may be sharing the nr_active count. When a - * pwq needs to wait for an nr_active count, it puts itself on - * $nna->pending_pwqs. The following atomic_dec_return()'s implied - * memory barrier is paired with smp_mb() in pwq_tryinc_nr_active() to - * guarantee that either we see non-empty pending_pwqs or they see - * decremented $nna->nr. - * - * $nna->max may change as CPUs come online/offline and @pwq->wq's - * max_active gets updated. However, it is guaranteed to be equal to or - * larger than @pwq->wq->min_active which is above zero unless freezing. - * This maintains the forward progress guarantee. - */ - if (atomic_dec_return(&nna->nr) >= READ_ONCE(nna->max)) - return; - - if (!list_empty(&nna->pending_pwqs)) - node_activate_pending_pwq(nna, pool); + atomic_dec(&nna->nr); + pwq_activate_first_inactive(pwq); } /** @@ -2181,7 +1948,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, * @work must also queue behind existing inactive work items to maintain * ordering when max_active changes. See wq_adjust_max_active(). */ - if (list_empty(&pwq->inactive_works) && pwq_tryinc_nr_active(pwq, false)) { + if (list_empty(&pwq->inactive_works) && pwq_tryinc_nr_active(pwq)) { if (list_empty(&pool->worklist)) pool->watchdog_ts = jiffies; @@ -3414,7 +3181,7 @@ static void insert_wq_barrier(struct pool_workqueue *pwq, barr->task = current; - /* The barrier work item does not participate in nr_active. */ + /* The barrier work item does not participate in pwq->nr_active. */ work_flags |= WORK_STRUCT_INACTIVE; /* @@ -4330,8 +4097,6 @@ static void free_node_nr_active(struct wq_node_nr_active **nna_ar) static void init_node_nr_active(struct wq_node_nr_active *nna) { atomic_set(&nna->nr, 0); - raw_spin_lock_init(&nna->lock); - INIT_LIST_HEAD(&nna->pending_pwqs); } /* @@ -4571,15 +4336,6 @@ static void pwq_release_workfn(struct kthread_work *work) mutex_unlock(&wq_pool_mutex); } - if (!list_empty(&pwq->pending_node)) { - struct wq_node_nr_active *nna = - wq_node_nr_active(pwq->wq, pwq->pool->node); - - raw_spin_lock_irq(&nna->lock); - list_del_init(&pwq->pending_node); - raw_spin_unlock_irq(&nna->lock); - } - call_rcu(&pwq->rcu, rcu_free_pwq); /* @@ -4605,7 +4361,6 @@ static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq, pwq->flush_color = -1; pwq->refcnt = 1; INIT_LIST_HEAD(&pwq->inactive_works); - INIT_LIST_HEAD(&pwq->pending_node); INIT_LIST_HEAD(&pwq->pwqs_node); INIT_LIST_HEAD(&pwq->mayday_node); kthread_init_work(&pwq->release_work, pwq_release_workfn); @@ -4813,9 +4568,6 @@ static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx) ctx->pwq_tbl[cpu]); ctx->dfl_pwq = install_unbound_pwq(ctx->wq, -1, ctx->dfl_pwq); - /* update node_nr_active->max */ - wq_update_node_max_active(ctx->wq, -1); - mutex_unlock(&ctx->wq->mutex); } @@ -5089,35 +4841,24 @@ static int init_rescuer(struct workqueue_struct *wq) static void wq_adjust_max_active(struct workqueue_struct *wq) { bool activated; - int new_max, new_min; lockdep_assert_held(&wq->mutex); if ((wq->flags & WQ_FREEZABLE) && workqueue_freezing) { - new_max = 0; - new_min = 0; - } else { - new_max = wq->saved_max_active; - new_min = wq->saved_min_active; + WRITE_ONCE(wq->max_active, 0); + return; } - if (wq->max_active == new_max && wq->min_active == new_min) + if (wq->max_active == wq->saved_max_active) return; /* - * Update @wq->max/min_active and then kick inactive work items if more + * Update @wq->max_active and then kick inactive work items if more * active work items are allowed. This doesn't break work item ordering * because new work items are always queued behind existing inactive * work items if there are any. */ - WRITE_ONCE(wq->max_active, new_max); - WRITE_ONCE(wq->min_active, new_min); - - if (wq->flags & WQ_UNBOUND) - wq_update_node_max_active(wq, -1); - - if (new_max == 0) - return; + WRITE_ONCE(wq->max_active, wq->saved_max_active); /* * Round-robin through pwq's activating the first inactive work item @@ -5132,7 +4873,7 @@ static void wq_adjust_max_active(struct workqueue_struct *wq) /* can be called during early boot w/ irq disabled */ raw_spin_lock_irqsave(&pwq->pool->lock, flags); - if (pwq_activate_first_inactive(pwq, true)) { + if (pwq_activate_first_inactive(pwq)) { activated = true; kick_pool(pwq->pool); } @@ -5194,9 +4935,7 @@ struct workqueue_struct *alloc_workqueue(const char *fmt, /* init wq */ wq->flags = flags; wq->max_active = max_active; - wq->min_active = min(max_active, WQ_DFL_MIN_ACTIVE); - wq->saved_max_active = wq->max_active; - wq->saved_min_active = wq->min_active; + wq->saved_max_active = max_active; mutex_init(&wq->mutex); atomic_set(&wq->nr_pwqs_to_flush, 0); INIT_LIST_HEAD(&wq->pwqs); @@ -5362,8 +5101,7 @@ EXPORT_SYMBOL_GPL(destroy_workqueue); * @wq: target workqueue * @max_active: new max_active value. * - * Set max_active of @wq to @max_active. See the alloc_workqueue() function - * comment. + * Set max_active of @wq to @max_active. * * CONTEXT: * Don't call from IRQ context. @@ -5380,9 +5118,6 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) wq->flags &= ~__WQ_ORDERED; wq->saved_max_active = max_active; - if (wq->flags & WQ_UNBOUND) - wq->saved_min_active = min(wq->saved_min_active, max_active); - wq_adjust_max_active(wq); mutex_unlock(&wq->mutex); @@ -6064,10 +5799,6 @@ int workqueue_online_cpu(unsigned int cpu) for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]]) wq_update_pod(wq, tcpu, cpu, true); - - mutex_lock(&wq->mutex); - wq_update_node_max_active(wq, -1); - mutex_unlock(&wq->mutex); } } @@ -6096,10 +5827,6 @@ int workqueue_offline_cpu(unsigned int cpu) for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]]) wq_update_pod(wq, tcpu, cpu, false); - - mutex_lock(&wq->mutex); - wq_update_node_max_active(wq, cpu); - mutex_unlock(&wq->mutex); } } mutex_unlock(&wq_pool_mutex); @@ -7296,12 +7023,8 @@ void __init workqueue_init_topology(void) * combinations to apply per-pod sharing. */ list_for_each_entry(wq, &workqueues, list) { - for_each_online_cpu(cpu) + for_each_online_cpu(cpu) { wq_update_pod(wq, cpu, cpu, true); - if (wq->flags & WQ_UNBOUND) { - mutex_lock(&wq->mutex); - wq_update_node_max_active(wq, -1); - mutex_unlock(&wq->mutex); } } -- Gitee From d25a0ae7b3f27019aeb8cff2310821f3339c6141 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 3 Apr 2024 16:36:30 +0200 Subject: [PATCH 004/268] Revert "workqueue: Introduce struct wq_node_nr_active" stable inclusion from stable-6.6.25 commit bfb429f37052ac825d84640c0ddeab85ed17ece5 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- This reverts commit b522229a56941adac1ea1da6593b2b5c734b5359 which is commit 91ccc6e7233bb10a9c176aa4cc70d6f432a441a5 upstream. The workqueue patches backported to 6.6.y caused some reported regressions, so revert them for now. Reported-by: Thorsten Leemhuis Cc: Tejun Heo Cc: Marek Szyprowski Cc: Nathan Chancellor Cc: Sasha Levin Cc: Audra Mitchell Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/ Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- kernel/workqueue.c | 142 +++------------------------------------------ 1 file changed, 7 insertions(+), 135 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 252ad3bfe799..80c0a84aa147 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -280,16 +280,6 @@ struct wq_flusher { struct wq_device; -/* - * Unlike in a per-cpu workqueue where max_active limits its concurrency level - * on each CPU, in an unbound workqueue, max_active applies to the whole system. - * As sharing a single nr_active across multiple sockets can be very expensive, - * the counting and enforcement is per NUMA node. - */ -struct wq_node_nr_active { - atomic_t nr; /* per-node nr_active count */ -}; - /* * The externally visible workqueue. It relays the issued work items to * the appropriate worker_pool through its pool_workqueues. @@ -336,7 +326,6 @@ struct workqueue_struct { /* hot fields used during command issue, aligned to cacheline */ unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */ struct pool_workqueue __percpu __rcu **cpu_pwq; /* I: per-cpu pwqs */ - struct wq_node_nr_active *node_nr_active[]; /* I: per-node nr_active */ }; static struct kmem_cache *pwq_cache; @@ -1426,31 +1415,6 @@ work_func_t wq_worker_last_func(struct task_struct *task) return worker->last_func; } -/** - * wq_node_nr_active - Determine wq_node_nr_active to use - * @wq: workqueue of interest - * @node: NUMA node, can be %NUMA_NO_NODE - * - * Determine wq_node_nr_active to use for @wq on @node. Returns: - * - * - %NULL for per-cpu workqueues as they don't need to use shared nr_active. - * - * - node_nr_active[nr_node_ids] if @node is %NUMA_NO_NODE. - * - * - Otherwise, node_nr_active[@node]. - */ -static struct wq_node_nr_active *wq_node_nr_active(struct workqueue_struct *wq, - int node) -{ - if (!(wq->flags & WQ_UNBOUND)) - return NULL; - - if (node == NUMA_NO_NODE) - node = nr_node_ids; - - return wq->node_nr_active[node]; -} - /** * get_pwq - get an extra reference on the specified pool_workqueue * @pwq: pool_workqueue to get @@ -1532,17 +1496,12 @@ static bool pwq_activate_work(struct pool_workqueue *pwq, struct work_struct *work) { struct worker_pool *pool = pwq->pool; - struct wq_node_nr_active *nna; lockdep_assert_held(&pool->lock); if (!(*work_data_bits(work) & WORK_STRUCT_INACTIVE)) return false; - nna = wq_node_nr_active(pwq->wq, pool->node); - if (nna) - atomic_inc(&nna->nr); - pwq->nr_active++; __pwq_activate_work(pwq, work); return true; @@ -1559,18 +1518,14 @@ static bool pwq_tryinc_nr_active(struct pool_workqueue *pwq) { struct workqueue_struct *wq = pwq->wq; struct worker_pool *pool = pwq->pool; - struct wq_node_nr_active *nna = wq_node_nr_active(wq, pool->node); bool obtained; lockdep_assert_held(&pool->lock); obtained = pwq->nr_active < READ_ONCE(wq->max_active); - if (obtained) { + if (obtained) pwq->nr_active++; - if (nna) - atomic_inc(&nna->nr); - } return obtained; } @@ -1607,26 +1562,10 @@ static bool pwq_activate_first_inactive(struct pool_workqueue *pwq) static void pwq_dec_nr_active(struct pool_workqueue *pwq) { struct worker_pool *pool = pwq->pool; - struct wq_node_nr_active *nna = wq_node_nr_active(pwq->wq, pool->node); lockdep_assert_held(&pool->lock); - /* - * @pwq->nr_active should be decremented for both percpu and unbound - * workqueues. - */ pwq->nr_active--; - - /* - * For a percpu workqueue, it's simple. Just need to kick the first - * inactive work item on @pwq itself. - */ - if (!nna) { - pwq_activate_first_inactive(pwq); - return; - } - - atomic_dec(&nna->nr); pwq_activate_first_inactive(pwq); } @@ -4081,63 +4020,11 @@ static void wq_free_lockdep(struct workqueue_struct *wq) } #endif -static void free_node_nr_active(struct wq_node_nr_active **nna_ar) -{ - int node; - - for_each_node(node) { - kfree(nna_ar[node]); - nna_ar[node] = NULL; - } - - kfree(nna_ar[nr_node_ids]); - nna_ar[nr_node_ids] = NULL; -} - -static void init_node_nr_active(struct wq_node_nr_active *nna) -{ - atomic_set(&nna->nr, 0); -} - -/* - * Each node's nr_active counter will be accessed mostly from its own node and - * should be allocated in the node. - */ -static int alloc_node_nr_active(struct wq_node_nr_active **nna_ar) -{ - struct wq_node_nr_active *nna; - int node; - - for_each_node(node) { - nna = kzalloc_node(sizeof(*nna), GFP_KERNEL, node); - if (!nna) - goto err_free; - init_node_nr_active(nna); - nna_ar[node] = nna; - } - - /* [nr_node_ids] is used as the fallback */ - nna = kzalloc_node(sizeof(*nna), GFP_KERNEL, NUMA_NO_NODE); - if (!nna) - goto err_free; - init_node_nr_active(nna); - nna_ar[nr_node_ids] = nna; - - return 0; - -err_free: - free_node_nr_active(nna_ar); - return -ENOMEM; -} - static void rcu_free_wq(struct rcu_head *rcu) { struct workqueue_struct *wq = container_of(rcu, struct workqueue_struct, rcu); - if (wq->flags & WQ_UNBOUND) - free_node_nr_active(wq->node_nr_active); - wq_free_lockdep(wq); free_percpu(wq->cpu_pwq); free_workqueue_attrs(wq->unbound_attrs); @@ -4889,8 +4776,7 @@ struct workqueue_struct *alloc_workqueue(const char *fmt, { va_list args; struct workqueue_struct *wq; - size_t wq_size; - int name_len; + int len; /* * Unbound && max_active == 1 used to imply ordered, which is no longer @@ -4906,12 +4792,7 @@ struct workqueue_struct *alloc_workqueue(const char *fmt, flags |= WQ_UNBOUND; /* allocate wq and format name */ - if (flags & WQ_UNBOUND) - wq_size = struct_size(wq, node_nr_active, nr_node_ids + 1); - else - wq_size = sizeof(*wq); - - wq = kzalloc(wq_size, GFP_KERNEL); + wq = kzalloc(sizeof(*wq), GFP_KERNEL); if (!wq) return NULL; @@ -4922,12 +4803,11 @@ struct workqueue_struct *alloc_workqueue(const char *fmt, } va_start(args, max_active); - name_len = vsnprintf(wq->name, sizeof(wq->name), fmt, args); + len = vsnprintf(wq->name, sizeof(wq->name), fmt, args); va_end(args); - if (name_len >= WQ_NAME_LEN) - pr_warn_once("workqueue: name exceeds WQ_NAME_LEN. Truncating to: %s\n", - wq->name); + if (len >= WQ_NAME_LEN) + pr_warn_once("workqueue: name exceeds WQ_NAME_LEN. Truncating to: %s\n", wq->name); max_active = max_active ?: WQ_DFL_ACTIVE; max_active = wq_clamp_max_active(max_active, flags, wq->name); @@ -4946,13 +4826,8 @@ struct workqueue_struct *alloc_workqueue(const char *fmt, wq_init_lockdep(wq); INIT_LIST_HEAD(&wq->list); - if (flags & WQ_UNBOUND) { - if (alloc_node_nr_active(wq->node_nr_active) < 0) - goto err_unreg_lockdep; - } - if (alloc_and_link_pwqs(wq) < 0) - goto err_free_node_nr_active; + goto err_unreg_lockdep; if (wq_online && init_rescuer(wq) < 0) goto err_destroy; @@ -4977,9 +4852,6 @@ struct workqueue_struct *alloc_workqueue(const char *fmt, return wq; -err_free_node_nr_active: - if (wq->flags & WQ_UNBOUND) - free_node_nr_active(wq->node_nr_active); err_unreg_lockdep: wq_unregister_lockdep(wq); wq_free_lockdep(wq); -- Gitee From 7a1cf9a598b89f87124bd037ae2e570a07696079 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 3 Apr 2024 16:36:42 +0200 Subject: [PATCH 005/268] Revert "workqueue: RCU protect wq->dfl_pwq and implement accessors for it" stable inclusion from stable-6.6.25 commit f3c11cb27a8b7c71cf48c8990ace6c8a0783f6db category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- This reverts commit bd31fb926dfa02d2ccfb4b79389168b1d16f36b1 which is commit 9f66cff212bb3c1cd25996aaa0dfd0c9e9d8baab upstream. The workqueue patches backported to 6.6.y caused some reported regressions, so revert them for now. Reported-by: Thorsten Leemhuis Cc: Tejun Heo Cc: Marek Szyprowski Cc: Nathan Chancellor Cc: Sasha Levin Cc: Audra Mitchell Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/ Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- kernel/workqueue.c | 64 +++++++++++++++++----------------------------- 1 file changed, 24 insertions(+), 40 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 80c0a84aa147..76559b63ee1f 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -304,7 +304,7 @@ struct workqueue_struct { int saved_max_active; /* WQ: saved max_active */ struct workqueue_attrs *unbound_attrs; /* PW: only for unbound wqs */ - struct pool_workqueue __rcu *dfl_pwq; /* PW: only for unbound wqs */ + struct pool_workqueue *dfl_pwq; /* PW: only for unbound wqs */ #ifdef CONFIG_SYSFS struct wq_device *wq_dev; /* I: for sysfs interface */ @@ -629,23 +629,6 @@ static int worker_pool_assign_id(struct worker_pool *pool) return ret; } -static struct pool_workqueue __rcu ** -unbound_pwq_slot(struct workqueue_struct *wq, int cpu) -{ - if (cpu >= 0) - return per_cpu_ptr(wq->cpu_pwq, cpu); - else - return &wq->dfl_pwq; -} - -/* @cpu < 0 for dfl_pwq */ -static struct pool_workqueue *unbound_pwq(struct workqueue_struct *wq, int cpu) -{ - return rcu_dereference_check(*unbound_pwq_slot(wq, cpu), - lockdep_is_held(&wq_pool_mutex) || - lockdep_is_held(&wq->mutex)); -} - static unsigned int work_color_to_flags(int color) { return color << WORK_STRUCT_COLOR_SHIFT; @@ -4335,11 +4318,10 @@ static void wq_calc_pod_cpumask(struct workqueue_attrs *attrs, int cpu, "possible intersect\n"); } -/* install @pwq into @wq and return the old pwq, @cpu < 0 for dfl_pwq */ +/* install @pwq into @wq's cpu_pwq and return the old pwq */ static struct pool_workqueue *install_unbound_pwq(struct workqueue_struct *wq, int cpu, struct pool_workqueue *pwq) { - struct pool_workqueue __rcu **slot = unbound_pwq_slot(wq, cpu); struct pool_workqueue *old_pwq; lockdep_assert_held(&wq_pool_mutex); @@ -4348,8 +4330,8 @@ static struct pool_workqueue *install_unbound_pwq(struct workqueue_struct *wq, /* link_pwq() can handle duplicate calls */ link_pwq(pwq); - old_pwq = rcu_access_pointer(*slot); - rcu_assign_pointer(*slot, pwq); + old_pwq = rcu_access_pointer(*per_cpu_ptr(wq->cpu_pwq, cpu)); + rcu_assign_pointer(*per_cpu_ptr(wq->cpu_pwq, cpu), pwq); return old_pwq; } @@ -4449,11 +4431,14 @@ static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx) copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs); - /* save the previous pwqs and install the new ones */ + /* save the previous pwq and install the new one */ for_each_possible_cpu(cpu) ctx->pwq_tbl[cpu] = install_unbound_pwq(ctx->wq, cpu, ctx->pwq_tbl[cpu]); - ctx->dfl_pwq = install_unbound_pwq(ctx->wq, -1, ctx->dfl_pwq); + + /* @dfl_pwq might not have been used, ensure it's linked */ + link_pwq(ctx->dfl_pwq); + swap(ctx->wq->dfl_pwq, ctx->dfl_pwq); mutex_unlock(&ctx->wq->mutex); } @@ -4576,7 +4561,9 @@ static void wq_update_pod(struct workqueue_struct *wq, int cpu, /* nothing to do if the target cpumask matches the current pwq */ wq_calc_pod_cpumask(target_attrs, cpu, off_cpu); - if (wqattrs_equal(target_attrs, unbound_pwq(wq, cpu)->pool->attrs)) + pwq = rcu_dereference_protected(*per_cpu_ptr(wq->cpu_pwq, cpu), + lockdep_is_held(&wq_pool_mutex)); + if (wqattrs_equal(target_attrs, pwq->pool->attrs)) return; /* create a new pwq */ @@ -4594,11 +4581,10 @@ static void wq_update_pod(struct workqueue_struct *wq, int cpu, use_dfl_pwq: mutex_lock(&wq->mutex); - pwq = unbound_pwq(wq, -1); - raw_spin_lock_irq(&pwq->pool->lock); - get_pwq(pwq); - raw_spin_unlock_irq(&pwq->pool->lock); - old_pwq = install_unbound_pwq(wq, cpu, pwq); + raw_spin_lock_irq(&wq->dfl_pwq->pool->lock); + get_pwq(wq->dfl_pwq); + raw_spin_unlock_irq(&wq->dfl_pwq->pool->lock); + old_pwq = install_unbound_pwq(wq, cpu, wq->dfl_pwq); out_unlock: mutex_unlock(&wq->mutex); put_pwq_unlocked(old_pwq); @@ -4636,13 +4622,10 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq) cpus_read_lock(); if (wq->flags & __WQ_ORDERED) { - struct pool_workqueue *dfl_pwq; - ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]); /* there should only be single pwq for ordering guarantee */ - dfl_pwq = rcu_access_pointer(wq->dfl_pwq); - WARN(!ret && (wq->pwqs.next != &dfl_pwq->pwqs_node || - wq->pwqs.prev != &dfl_pwq->pwqs_node), + WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node || + wq->pwqs.prev != &wq->dfl_pwq->pwqs_node), "ordering guarantee broken for workqueue %s\n", wq->name); } else { ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]); @@ -4873,7 +4856,7 @@ static bool pwq_busy(struct pool_workqueue *pwq) if (pwq->nr_in_flight[i]) return true; - if ((pwq != rcu_access_pointer(pwq->wq->dfl_pwq)) && (pwq->refcnt > 1)) + if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1)) return true; if (!pwq_is_empty(pwq)) return true; @@ -4957,12 +4940,13 @@ void destroy_workqueue(struct workqueue_struct *wq) rcu_read_lock(); for_each_possible_cpu(cpu) { - put_pwq_unlocked(unbound_pwq(wq, cpu)); - RCU_INIT_POINTER(*unbound_pwq_slot(wq, cpu), NULL); + pwq = rcu_access_pointer(*per_cpu_ptr(wq->cpu_pwq, cpu)); + RCU_INIT_POINTER(*per_cpu_ptr(wq->cpu_pwq, cpu), NULL); + put_pwq_unlocked(pwq); } - put_pwq_unlocked(unbound_pwq(wq, -1)); - RCU_INIT_POINTER(*unbound_pwq_slot(wq, -1), NULL); + put_pwq_unlocked(wq->dfl_pwq); + wq->dfl_pwq = NULL; rcu_read_unlock(); } -- Gitee From 4cf7e5bad4f08def1124a6a57d838be55342fc76 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 3 Apr 2024 16:36:44 +0200 Subject: [PATCH 006/268] Revert "workqueue: Make wq_adjust_max_active() round-robin pwqs while activating" stable inclusion from stable-6.6.25 commit e3ee73b57a2e67010407c9f02514916cab19bbc7 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- This reverts commit 5f99fee6f2dea1228980c3e785ab1a2c69b4da3c which is commit qc5404d4e6df6faba1007544b5f4e62c7c14416dd upstream. The workqueue patches backported to 6.6.y caused some reported regressions, so revert them for now. Reported-by: Thorsten Leemhuis Cc: Tejun Heo Cc: Marek Szyprowski Cc: Nathan Chancellor Cc: Sasha Levin Cc: Audra Mitchell Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/ Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- kernel/workqueue.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 76559b63ee1f..51dc508ac35b 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4710,7 +4710,7 @@ static int init_rescuer(struct workqueue_struct *wq) */ static void wq_adjust_max_active(struct workqueue_struct *wq) { - bool activated; + struct pool_workqueue *pwq; lockdep_assert_held(&wq->mutex); @@ -4730,26 +4730,19 @@ static void wq_adjust_max_active(struct workqueue_struct *wq) */ WRITE_ONCE(wq->max_active, wq->saved_max_active); - /* - * Round-robin through pwq's activating the first inactive work item - * until max_active is filled. - */ - do { - struct pool_workqueue *pwq; + for_each_pwq(pwq, wq) { + unsigned long flags; - activated = false; - for_each_pwq(pwq, wq) { - unsigned long flags; + /* this function can be called during early boot w/ irq disabled */ + raw_spin_lock_irqsave(&pwq->pool->lock, flags); - /* can be called during early boot w/ irq disabled */ - raw_spin_lock_irqsave(&pwq->pool->lock, flags); - if (pwq_activate_first_inactive(pwq)) { - activated = true; - kick_pool(pwq->pool); - } - raw_spin_unlock_irqrestore(&pwq->pool->lock, flags); - } - } while (activated); + while (pwq_activate_first_inactive(pwq)) + ; + + kick_pool(pwq->pool); + + raw_spin_unlock_irqrestore(&pwq->pool->lock, flags); + } } __printf(1, 4) -- Gitee From b2ea83fb4fbe584657984b26effee0a58aac7e8e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 3 Apr 2024 16:36:45 +0200 Subject: [PATCH 007/268] Revert "workqueue: Move nr_active handling into helpers" stable inclusion from stable-6.6.25 commit 5debbff9539c9c536d71e91d4bb8995206672b90 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- This reverts commit 4023a2d95076918abe2757d60810642a8115b586 which is commit 1c270b79ce0b8290f146255ea9057243f6dd3c17 upstream. The workqueue patches backported to 6.6.y caused some reported regressions, so revert them for now. Reported-by: Thorsten Leemhuis Cc: Tejun Heo Cc: Marek Szyprowski Cc: Nathan Chancellor Cc: Sasha Levin Cc: Audra Mitchell Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/ Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- kernel/workqueue.c | 86 ++++++++++------------------------------------ 1 file changed, 19 insertions(+), 67 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 51dc508ac35b..3436fd266cde 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1458,14 +1458,11 @@ static bool pwq_is_empty(struct pool_workqueue *pwq) static void __pwq_activate_work(struct pool_workqueue *pwq, struct work_struct *work) { - unsigned long *wdb = work_data_bits(work); - - WARN_ON_ONCE(!(*wdb & WORK_STRUCT_INACTIVE)); trace_workqueue_activate_work(work); if (list_empty(&pwq->pool->worklist)) pwq->pool->watchdog_ts = jiffies; move_linked_works(work, &pwq->pool->worklist, NULL); - __clear_bit(WORK_STRUCT_INACTIVE_BIT, wdb); + __clear_bit(WORK_STRUCT_INACTIVE_BIT, work_data_bits(work)); } /** @@ -1490,66 +1487,12 @@ static bool pwq_activate_work(struct pool_workqueue *pwq, return true; } -/** - * pwq_tryinc_nr_active - Try to increment nr_active for a pwq - * @pwq: pool_workqueue of interest - * - * Try to increment nr_active for @pwq. Returns %true if an nr_active count is - * successfully obtained. %false otherwise. - */ -static bool pwq_tryinc_nr_active(struct pool_workqueue *pwq) -{ - struct workqueue_struct *wq = pwq->wq; - struct worker_pool *pool = pwq->pool; - bool obtained; - - lockdep_assert_held(&pool->lock); - - obtained = pwq->nr_active < READ_ONCE(wq->max_active); - - if (obtained) - pwq->nr_active++; - return obtained; -} - -/** - * pwq_activate_first_inactive - Activate the first inactive work item on a pwq - * @pwq: pool_workqueue of interest - * - * Activate the first inactive work item of @pwq if available and allowed by - * max_active limit. - * - * Returns %true if an inactive work item has been activated. %false if no - * inactive work item is found or max_active limit is reached. - */ -static bool pwq_activate_first_inactive(struct pool_workqueue *pwq) -{ - struct work_struct *work = - list_first_entry_or_null(&pwq->inactive_works, - struct work_struct, entry); - - if (work && pwq_tryinc_nr_active(pwq)) { - __pwq_activate_work(pwq, work); - return true; - } else { - return false; - } -} - -/** - * pwq_dec_nr_active - Retire an active count - * @pwq: pool_workqueue of interest - * - * Decrement @pwq's nr_active and try to activate the first inactive work item. - */ -static void pwq_dec_nr_active(struct pool_workqueue *pwq) +static void pwq_activate_first_inactive(struct pool_workqueue *pwq) { - struct worker_pool *pool = pwq->pool; + struct work_struct *work = list_first_entry(&pwq->inactive_works, + struct work_struct, entry); - lockdep_assert_held(&pool->lock); - - pwq->nr_active--; - pwq_activate_first_inactive(pwq); + pwq_activate_work(pwq, work); } /** @@ -1567,8 +1510,14 @@ static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, unsigned long work_ { int color = get_work_color(work_data); - if (!(work_data & WORK_STRUCT_INACTIVE)) - pwq_dec_nr_active(pwq); + if (!(work_data & WORK_STRUCT_INACTIVE)) { + pwq->nr_active--; + if (!list_empty(&pwq->inactive_works)) { + /* one down, submit an inactive one */ + if (pwq->nr_active < READ_ONCE(pwq->wq->max_active)) + pwq_activate_first_inactive(pwq); + } + } pwq->nr_in_flight[color]--; @@ -1870,11 +1819,13 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, * @work must also queue behind existing inactive work items to maintain * ordering when max_active changes. See wq_adjust_max_active(). */ - if (list_empty(&pwq->inactive_works) && pwq_tryinc_nr_active(pwq)) { + if (list_empty(&pwq->inactive_works) && + pwq->nr_active < READ_ONCE(pwq->wq->max_active)) { if (list_empty(&pool->worklist)) pool->watchdog_ts = jiffies; trace_workqueue_activate_work(work); + pwq->nr_active++; insert_work(pwq, work, &pool->worklist, work_flags); kick_pool(pool); } else { @@ -4736,8 +4687,9 @@ static void wq_adjust_max_active(struct workqueue_struct *wq) /* this function can be called during early boot w/ irq disabled */ raw_spin_lock_irqsave(&pwq->pool->lock, flags); - while (pwq_activate_first_inactive(pwq)) - ; + while (!list_empty(&pwq->inactive_works) && + pwq->nr_active < wq->max_active) + pwq_activate_first_inactive(pwq); kick_pool(pwq->pool); -- Gitee From 0d7f6be14fdfc878b8f9da6f466fef191f82d1cf Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 3 Apr 2024 16:36:47 +0200 Subject: [PATCH 008/268] Revert "workqueue: Replace pwq_activate_inactive_work() with [__]pwq_activate_work()" stable inclusion from stable-6.6.25 commit 957578ec33d4d21dced2d0b219bd88b1464307fb category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- This reverts commit 6c592f0bb96815117538491e5ba12e0a8a8c4493 which is commit 4c6380305d21e36581b451f7337a36c93b64e050 upstream. The workqueue patches backported to 6.6.y caused some reported regressions, so revert them for now. Reported-by: Thorsten Leemhuis Cc: Tejun Heo Cc: Marek Szyprowski Cc: Nathan Chancellor Cc: Sasha Levin Cc: Audra Mitchell Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/ Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- kernel/workqueue.c | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 3436fd266cde..3eb0408133ad 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1455,36 +1455,16 @@ static bool pwq_is_empty(struct pool_workqueue *pwq) return !pwq->nr_active && list_empty(&pwq->inactive_works); } -static void __pwq_activate_work(struct pool_workqueue *pwq, - struct work_struct *work) +static void pwq_activate_inactive_work(struct work_struct *work) { + struct pool_workqueue *pwq = get_work_pwq(work); + trace_workqueue_activate_work(work); if (list_empty(&pwq->pool->worklist)) pwq->pool->watchdog_ts = jiffies; move_linked_works(work, &pwq->pool->worklist, NULL); __clear_bit(WORK_STRUCT_INACTIVE_BIT, work_data_bits(work)); -} - -/** - * pwq_activate_work - Activate a work item if inactive - * @pwq: pool_workqueue @work belongs to - * @work: work item to activate - * - * Returns %true if activated. %false if already active. - */ -static bool pwq_activate_work(struct pool_workqueue *pwq, - struct work_struct *work) -{ - struct worker_pool *pool = pwq->pool; - - lockdep_assert_held(&pool->lock); - - if (!(*work_data_bits(work) & WORK_STRUCT_INACTIVE)) - return false; - pwq->nr_active++; - __pwq_activate_work(pwq, work); - return true; } static void pwq_activate_first_inactive(struct pool_workqueue *pwq) @@ -1492,7 +1472,7 @@ static void pwq_activate_first_inactive(struct pool_workqueue *pwq) struct work_struct *work = list_first_entry(&pwq->inactive_works, struct work_struct, entry); - pwq_activate_work(pwq, work); + pwq_activate_inactive_work(work); } /** @@ -1630,7 +1610,8 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, * management later on and cause stall. Make sure the work * item is activated before grabbing. */ - pwq_activate_work(pwq, work); + if (*work_data_bits(work) & WORK_STRUCT_INACTIVE) + pwq_activate_inactive_work(work); list_del_init(&work->entry); pwq_dec_nr_in_flight(pwq, *work_data_bits(work)); -- Gitee From 3b184a2a5074e33fbc322bfbca3100c872defd0c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 3 Apr 2024 16:36:48 +0200 Subject: [PATCH 009/268] Revert "workqueue: Factor out pwq_is_empty()" stable inclusion from stable-6.6.25 commit 35bf38dd162bbbb8682b12e31ee78df54f40b7ca category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- This reverts commit bad184d26a4f68aa00ad75502f9669950a790f71 which is commit afa87ce85379e2d93863fce595afdb5771a84004 upstream. The workqueue patches backported to 6.6.y caused some reported regressions, so revert them for now. Reported-by: Thorsten Leemhuis Cc: Tejun Heo Cc: Marek Szyprowski Cc: Nathan Chancellor Cc: Sasha Levin Cc: Audra Mitchell Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/ Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- kernel/workqueue.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 3eb0408133ad..3aa48ba8c717 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1450,11 +1450,6 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq) } } -static bool pwq_is_empty(struct pool_workqueue *pwq) -{ - return !pwq->nr_active && list_empty(&pwq->inactive_works); -} - static void pwq_activate_inactive_work(struct work_struct *work) { struct pool_workqueue *pwq = get_work_pwq(work); @@ -3324,7 +3319,7 @@ void drain_workqueue(struct workqueue_struct *wq) bool drained; raw_spin_lock_irq(&pwq->pool->lock); - drained = pwq_is_empty(pwq); + drained = !pwq->nr_active && list_empty(&pwq->inactive_works); raw_spin_unlock_irq(&pwq->pool->lock); if (drained) @@ -4784,7 +4779,7 @@ static bool pwq_busy(struct pool_workqueue *pwq) if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1)) return true; - if (!pwq_is_empty(pwq)) + if (pwq->nr_active || !list_empty(&pwq->inactive_works)) return true; return false; @@ -5222,7 +5217,7 @@ void show_one_workqueue(struct workqueue_struct *wq) unsigned long flags; for_each_pwq(pwq, wq) { - if (!pwq_is_empty(pwq)) { + if (pwq->nr_active || !list_empty(&pwq->inactive_works)) { idle = false; break; } @@ -5234,7 +5229,7 @@ void show_one_workqueue(struct workqueue_struct *wq) for_each_pwq(pwq, wq) { raw_spin_lock_irqsave(&pwq->pool->lock, flags); - if (!pwq_is_empty(pwq)) { + if (pwq->nr_active || !list_empty(&pwq->inactive_works)) { /* * Defer printing to avoid deadlocks in console * drivers that queue work while holding locks -- Gitee From 3c343fc25dbd8c1c715c7fa8584699b34c7ed807 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 3 Apr 2024 16:36:50 +0200 Subject: [PATCH 010/268] Revert "workqueue: Move pwq->max_active to wq->max_active" stable inclusion from stable-6.6.25 commit d8354f268d928b6f04119d7a7cdd0145f3a6823f category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- This reverts commit 82e098f5bed1ff167332d26f8551662098747ec4 which is commit a045a272d887575da17ad86d6573e82871b50c27 upstream. The workqueue patches backported to 6.6.y caused some reported regressions, so revert them for now. Reported-by: Thorsten Leemhuis Cc: Tejun Heo Cc: Marek Szyprowski Cc: Nathan Chancellor Cc: Sasha Levin Cc: Audra Mitchell Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/ Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- kernel/workqueue.c | 133 +++++++++++++++++++++++---------------------- 1 file changed, 67 insertions(+), 66 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 3aa48ba8c717..e004e65ae987 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -143,9 +143,6 @@ enum { * * WR: wq->mutex protected for writes. RCU protected for reads. * - * WO: wq->mutex protected for writes. Updated with WRITE_ONCE() and can be read - * with READ_ONCE() without locking. - * * MD: wq_mayday_lock protected. * * WD: Used internally by the watchdog. @@ -253,6 +250,7 @@ struct pool_workqueue { * is marked with WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works. */ int nr_active; /* L: nr of active works */ + int max_active; /* L: max active works */ struct list_head inactive_works; /* L: inactive works */ struct list_head pwqs_node; /* WR: node on wq->pwqs */ struct list_head mayday_node; /* MD: node on wq->maydays */ @@ -300,8 +298,7 @@ struct workqueue_struct { struct worker *rescuer; /* MD: rescue worker */ int nr_drainers; /* WQ: drain in progress */ - int max_active; /* WO: max active works */ - int saved_max_active; /* WQ: saved max_active */ + int saved_max_active; /* WQ: saved pwq max_active */ struct workqueue_attrs *unbound_attrs; /* PW: only for unbound wqs */ struct pool_workqueue *dfl_pwq; /* PW: only for unbound wqs */ @@ -1489,7 +1486,7 @@ static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, unsigned long work_ pwq->nr_active--; if (!list_empty(&pwq->inactive_works)) { /* one down, submit an inactive one */ - if (pwq->nr_active < READ_ONCE(pwq->wq->max_active)) + if (pwq->nr_active < pwq->max_active) pwq_activate_first_inactive(pwq); } } @@ -1790,13 +1787,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, pwq->nr_in_flight[pwq->work_color]++; work_flags = work_color_to_flags(pwq->work_color); - /* - * Limit the number of concurrently active work items to max_active. - * @work must also queue behind existing inactive work items to maintain - * ordering when max_active changes. See wq_adjust_max_active(). - */ - if (list_empty(&pwq->inactive_works) && - pwq->nr_active < READ_ONCE(pwq->wq->max_active)) { + if (likely(pwq->nr_active < pwq->max_active)) { if (list_empty(&pool->worklist)) pool->watchdog_ts = jiffies; @@ -4145,6 +4136,50 @@ static void pwq_release_workfn(struct kthread_work *work) } } +/** + * pwq_adjust_max_active - update a pwq's max_active to the current setting + * @pwq: target pool_workqueue + * + * If @pwq isn't freezing, set @pwq->max_active to the associated + * workqueue's saved_max_active and activate inactive work items + * accordingly. If @pwq is freezing, clear @pwq->max_active to zero. + */ +static void pwq_adjust_max_active(struct pool_workqueue *pwq) +{ + struct workqueue_struct *wq = pwq->wq; + bool freezable = wq->flags & WQ_FREEZABLE; + unsigned long flags; + + /* for @wq->saved_max_active */ + lockdep_assert_held(&wq->mutex); + + /* fast exit for non-freezable wqs */ + if (!freezable && pwq->max_active == wq->saved_max_active) + return; + + /* this function can be called during early boot w/ irq disabled */ + raw_spin_lock_irqsave(&pwq->pool->lock, flags); + + /* + * During [un]freezing, the caller is responsible for ensuring that + * this function is called at least once after @workqueue_freezing + * is updated and visible. + */ + if (!freezable || !workqueue_freezing) { + pwq->max_active = wq->saved_max_active; + + while (!list_empty(&pwq->inactive_works) && + pwq->nr_active < pwq->max_active) + pwq_activate_first_inactive(pwq); + + kick_pool(pwq->pool); + } else { + pwq->max_active = 0; + } + + raw_spin_unlock_irqrestore(&pwq->pool->lock, flags); +} + /* initialize newly allocated @pwq which is associated with @wq and @pool */ static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq, struct worker_pool *pool) @@ -4177,6 +4212,9 @@ static void link_pwq(struct pool_workqueue *pwq) /* set the matching work_color */ pwq->work_color = wq->work_color; + /* sync max_active to the current setting */ + pwq_adjust_max_active(pwq); + /* link in @pwq */ list_add_rcu(&pwq->pwqs_node, &wq->pwqs); } @@ -4627,52 +4665,6 @@ static int init_rescuer(struct workqueue_struct *wq) return 0; } -/** - * wq_adjust_max_active - update a wq's max_active to the current setting - * @wq: target workqueue - * - * If @wq isn't freezing, set @wq->max_active to the saved_max_active and - * activate inactive work items accordingly. If @wq is freezing, clear - * @wq->max_active to zero. - */ -static void wq_adjust_max_active(struct workqueue_struct *wq) -{ - struct pool_workqueue *pwq; - - lockdep_assert_held(&wq->mutex); - - if ((wq->flags & WQ_FREEZABLE) && workqueue_freezing) { - WRITE_ONCE(wq->max_active, 0); - return; - } - - if (wq->max_active == wq->saved_max_active) - return; - - /* - * Update @wq->max_active and then kick inactive work items if more - * active work items are allowed. This doesn't break work item ordering - * because new work items are always queued behind existing inactive - * work items if there are any. - */ - WRITE_ONCE(wq->max_active, wq->saved_max_active); - - for_each_pwq(pwq, wq) { - unsigned long flags; - - /* this function can be called during early boot w/ irq disabled */ - raw_spin_lock_irqsave(&pwq->pool->lock, flags); - - while (!list_empty(&pwq->inactive_works) && - pwq->nr_active < wq->max_active) - pwq_activate_first_inactive(pwq); - - kick_pool(pwq->pool); - - raw_spin_unlock_irqrestore(&pwq->pool->lock, flags); - } -} - __printf(1, 4) struct workqueue_struct *alloc_workqueue(const char *fmt, unsigned int flags, @@ -4680,6 +4672,7 @@ struct workqueue_struct *alloc_workqueue(const char *fmt, { va_list args; struct workqueue_struct *wq; + struct pool_workqueue *pwq; int len; /* @@ -4718,7 +4711,6 @@ struct workqueue_struct *alloc_workqueue(const char *fmt, /* init wq */ wq->flags = flags; - wq->max_active = max_active; wq->saved_max_active = max_active; mutex_init(&wq->mutex); atomic_set(&wq->nr_pwqs_to_flush, 0); @@ -4747,7 +4739,8 @@ struct workqueue_struct *alloc_workqueue(const char *fmt, mutex_lock(&wq_pool_mutex); mutex_lock(&wq->mutex); - wq_adjust_max_active(wq); + for_each_pwq(pwq, wq) + pwq_adjust_max_active(pwq); mutex_unlock(&wq->mutex); list_add_tail_rcu(&wq->list, &workqueues); @@ -4885,6 +4878,8 @@ EXPORT_SYMBOL_GPL(destroy_workqueue); */ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) { + struct pool_workqueue *pwq; + /* disallow meddling with max_active for ordered workqueues */ if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT)) return; @@ -4895,7 +4890,9 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) wq->flags &= ~__WQ_ORDERED; wq->saved_max_active = max_active; - wq_adjust_max_active(wq); + + for_each_pwq(pwq, wq) + pwq_adjust_max_active(pwq); mutex_unlock(&wq->mutex); } @@ -5142,8 +5139,8 @@ static void show_pwq(struct pool_workqueue *pwq) pr_info(" pwq %d:", pool->id); pr_cont_pool_info(pool); - pr_cont(" active=%d refcnt=%d%s\n", - pwq->nr_active, pwq->refcnt, + pr_cont(" active=%d/%d refcnt=%d%s\n", + pwq->nr_active, pwq->max_active, pwq->refcnt, !list_empty(&pwq->mayday_node) ? " MAYDAY" : ""); hash_for_each(pool->busy_hash, bkt, worker, hentry) { @@ -5691,6 +5688,7 @@ EXPORT_SYMBOL_GPL(work_on_cpu_safe_key); void freeze_workqueues_begin(void) { struct workqueue_struct *wq; + struct pool_workqueue *pwq; mutex_lock(&wq_pool_mutex); @@ -5699,7 +5697,8 @@ void freeze_workqueues_begin(void) list_for_each_entry(wq, &workqueues, list) { mutex_lock(&wq->mutex); - wq_adjust_max_active(wq); + for_each_pwq(pwq, wq) + pwq_adjust_max_active(pwq); mutex_unlock(&wq->mutex); } @@ -5764,6 +5763,7 @@ bool freeze_workqueues_busy(void) void thaw_workqueues(void) { struct workqueue_struct *wq; + struct pool_workqueue *pwq; mutex_lock(&wq_pool_mutex); @@ -5775,7 +5775,8 @@ void thaw_workqueues(void) /* restore max_active and repopulate worklist */ list_for_each_entry(wq, &workqueues, list) { mutex_lock(&wq->mutex); - wq_adjust_max_active(wq); + for_each_pwq(pwq, wq) + pwq_adjust_max_active(pwq); mutex_unlock(&wq->mutex); } -- Gitee From a166a69f2e4edf6a8532a0a40b02887569210dc8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 3 Apr 2024 16:36:51 +0200 Subject: [PATCH 011/268] Revert "workqueue.c: Increase workqueue name length" stable inclusion from stable-6.6.25 commit a99d7274a2b1191744d7e905d01982f3225c0563 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- This reverts commit 43a181f8f41aca27e7454cf44a6dfbccc8b14e92 which is commit 31c89007285d365aa36f71d8fb0701581c770a27 upstream. The workqueue patches backported to 6.6.y caused some reported regressions, so revert them for now. Reported-by: Thorsten Leemhuis Cc: Tejun Heo Cc: Marek Szyprowski Cc: Nathan Chancellor Cc: Sasha Levin Cc: Audra Mitchell Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/ Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- kernel/workqueue.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e004e65ae987..fd7b84b06d92 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -108,7 +108,7 @@ enum { RESCUER_NICE_LEVEL = MIN_NICE, HIGHPRI_NICE_LEVEL = MIN_NICE, - WQ_NAME_LEN = 32, + WQ_NAME_LEN = 24, }; /* @@ -4673,7 +4673,6 @@ struct workqueue_struct *alloc_workqueue(const char *fmt, va_list args; struct workqueue_struct *wq; struct pool_workqueue *pwq; - int len; /* * Unbound && max_active == 1 used to imply ordered, which is no longer @@ -4700,12 +4699,9 @@ struct workqueue_struct *alloc_workqueue(const char *fmt, } va_start(args, max_active); - len = vsnprintf(wq->name, sizeof(wq->name), fmt, args); + vsnprintf(wq->name, sizeof(wq->name), fmt, args); va_end(args); - if (len >= WQ_NAME_LEN) - pr_warn_once("workqueue: name exceeds WQ_NAME_LEN. Truncating to: %s\n", wq->name); - max_active = max_active ?: WQ_DFL_ACTIVE; max_active = wq_clamp_max_active(max_active, flags, wq->name); -- Gitee From a1da296139f1f648fda7e31ec6994d6c3da20569 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 4 Apr 2024 20:23:07 +0200 Subject: [PATCH 012/268] Linux 6.6.25 stable inclusion from stable-6.6.25^0 commit e475741af1ebe2c92ee4a3f49e55749a84770a12 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- Link: https://lore.kernel.org/r/20240403175126.839589571@linuxfoundation.org Tested-by: Shuah Khan Tested-by: Ron Economos Tested-by: Jon Hunter Tested-by: Takeshi Ogasawara Tested-by: Mark Brown Tested-by: Linux Kernel Functional Testing Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b053bae9296a..022af2a9a6d9 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 24 +SUBLEVEL = 25 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth -- Gitee From 1c7a6f0a76655117bfcf1cd93992ba030c39c28b Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 25 Oct 2023 12:11:31 +0200 Subject: [PATCH 013/268] drm/i915/display: Use i915_gem_object_get_dma_address to get dma address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-6.6.26 commit 2c07e2437a3e98027c049ca560e4b6e39a975089 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 7054b551de18e9875fbdf8d4f3baade428353545 ] Works better for xe like that. obj is no longer const. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20231204134946.16219-1-maarten.lankhorst@linux.intel.com Reviewed-by: Jouni Högander Stable-dep-of: 582dc04b0658 ("drm/i915: Pre-populate the cursor physical dma address") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/gpu/drm/i915/display/intel_cursor.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index b342fad180ca..0d21c34f7499 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -23,6 +23,8 @@ #include "intel_psr.h" #include "skl_watermark.h" +#include "gem/i915_gem_object.h" + /* Cursor formats */ static const u32 intel_cursor_formats[] = { DRM_FORMAT_ARGB8888, @@ -33,11 +35,11 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state) struct drm_i915_private *dev_priv = to_i915(plane_state->uapi.plane->dev); const struct drm_framebuffer *fb = plane_state->hw.fb; - const struct drm_i915_gem_object *obj = intel_fb_obj(fb); + struct drm_i915_gem_object *obj = intel_fb_obj(fb); u32 base; if (DISPLAY_INFO(dev_priv)->cursor_needs_physical) - base = sg_dma_address(obj->mm.pages->sgl); + base = i915_gem_object_get_dma_address(obj, 0); else base = intel_plane_ggtt_offset(plane_state); -- Gitee From d3ad1efc7aab07d3f1402bc8dad6290310e37d22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 25 Mar 2024 19:57:38 +0200 Subject: [PATCH 014/268] drm/i915: Pre-populate the cursor physical dma address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-6.6.26 commit cc696ce93089e3e1bc28d749aee321a37cabe4bd category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 582dc04b0658ef3b90aeb49cbdd9747c2f1eccc3 ] Calling i915_gem_object_get_dma_address() from the vblank evade critical section triggers might_sleep(). While we know that we've already pinned the framebuffer and thus i915_gem_object_get_dma_address() will in fact not sleep in this case, it seems reasonable to keep the unconditional might_sleep() for maximum coverage. So let's instead pre-populate the dma address during fb pinning, which all happens before we enter the vblank evade critical section. We can use u32 for the dma address as this class of hardware doesn't support >32bit addresses. Cc: stable@vger.kernel.org Fixes: 0225a90981c8 ("drm/i915: Make cursor plane registers unlocked") Reported-by: Borislav Petkov Closes: https://lore.kernel.org/intel-gfx/20240227100342.GAZd2zfmYcPS_SndtO@fat_crate.local/ Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240325175738.3440-1-ville.syrjala@linux.intel.com Tested-by: Borislav Petkov (AMD) Reviewed-by: Chaitanya Kumar Borah (cherry picked from commit c1289a5c3594cf04caa94ebf0edeb50c62009f1f) Signed-off-by: Rodrigo Vivi Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/gpu/drm/i915/display/intel_cursor.c | 4 +--- drivers/gpu/drm/i915/display/intel_display_types.h | 1 + drivers/gpu/drm/i915/display/intel_fb_pin.c | 10 ++++++++++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index 0d21c34f7499..61df6cd3f377 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -34,12 +34,10 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane_state->uapi.plane->dev); - const struct drm_framebuffer *fb = plane_state->hw.fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); u32 base; if (DISPLAY_INFO(dev_priv)->cursor_needs_physical) - base = i915_gem_object_get_dma_address(obj, 0); + base = plane_state->phys_dma_addr; else base = intel_plane_ggtt_offset(plane_state); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 7fc92b1474cc..8b0dc2b75da4 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -701,6 +701,7 @@ struct intel_plane_state { #define PLANE_HAS_FENCE BIT(0) struct intel_fb_view view; + u32 phys_dma_addr; /* for cursor_needs_physical */ /* Plane pxp decryption state */ bool decrypt; diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c index fffd568070d4..a131656757f2 100644 --- a/drivers/gpu/drm/i915/display/intel_fb_pin.c +++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c @@ -254,6 +254,16 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state) return PTR_ERR(vma); plane_state->ggtt_vma = vma; + + /* + * Pre-populate the dma address before we enter the vblank + * evade critical section as i915_gem_object_get_dma_address() + * will trigger might_sleep() even if it won't actually sleep, + * which is the case when the fb has already been pinned. + */ + if (phys_cursor) + plane_state->phys_dma_addr = + i915_gem_object_get_dma_address(intel_fb_obj(fb), 0); } else { struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); -- Gitee From 3bc1dbea405454e59926e5bd56990b798774d55f Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 15 Mar 2024 10:34:43 +0800 Subject: [PATCH 015/268] scripts/bpf_doc: Use silent mode when exec make cmd stable inclusion from stable-6.6.26 commit 54d38a5ca0f7d1c70af5c0ccfba3b1eb3f8926b1 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 5384cc0d1a88c27448a6a4e65b8abe6486de8012 ] When getting kernel version via make, the result may be polluted by other output, like directory change info. e.g. $ export MAKEFLAGS="-w" $ make kernelversion make: Entering directory '/home/net' 6.8.0 make: Leaving directory '/home/net' This will distort the reStructuredText output and make latter rst2man failed like: [...] bpf-helpers.rst:20: (WARNING/2) Field list ends without a blank line; unexpected unindent. [...] Using silent mode would help. e.g. $ make -s --no-print-directory kernelversion 6.8.0 Fixes: fd0a38f9c37d ("scripts/bpf: Set version attribute for bpf-helpers(7) man page") Signed-off-by: Michael Hofmann Signed-off-by: Hangbin Liu Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Acked-by: Alejandro Colomar Link: https://lore.kernel.org/bpf/20240315023443.2364442-1-liuhangbin@gmail.com Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- scripts/bpf_doc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 0669bac5e900..3f899cc7e99a 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -414,8 +414,8 @@ class PrinterRST(Printer): version = version.stdout.decode().rstrip() except: try: - version = subprocess.run(['make', 'kernelversion'], cwd=linuxRoot, - capture_output=True, check=True) + version = subprocess.run(['make', '-s', '--no-print-directory', 'kernelversion'], + cwd=linuxRoot, capture_output=True, check=True) version = version.stdout.decode().rstrip() except: return 'Linux' -- Gitee From 58739a4911fd62bc2ae757ba1899dfd1839e60e8 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 20 Mar 2024 02:54:12 +0100 Subject: [PATCH 016/268] s390/bpf: Fix bpf_plt pointer arithmetic stable inclusion from stable-6.6.26 commit c3062bdb859b6e2567e7f5c8cde20c0250bb130f category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 7ded842b356d151ece8ac4985940438e6d3998bb ] Kui-Feng Lee reported a crash on s390x triggered by the dummy_st_ops/dummy_init_ptr_arg test [1]: [<0000000000000002>] 0x2 [<00000000009d5cde>] bpf_struct_ops_test_run+0x156/0x250 [<000000000033145a>] __sys_bpf+0xa1a/0xd00 [<00000000003319dc>] __s390x_sys_bpf+0x44/0x50 [<0000000000c4382c>] __do_syscall+0x244/0x300 [<0000000000c59a40>] system_call+0x70/0x98 This is caused by GCC moving memcpy() after assignments in bpf_jit_plt(), resulting in NULL pointers being written instead of the return and the target addresses. Looking at the GCC internals, the reordering is allowed because the alias analysis thinks that the memcpy() destination and the assignments' left-hand-sides are based on different objects: new_plt and bpf_plt_ret/bpf_plt_target respectively, and therefore they cannot alias. This is in turn due to a violation of the C standard: When two pointers are subtracted, both shall point to elements of the same array object, or one past the last element of the array object ... From the C's perspective, bpf_plt_ret and bpf_plt are distinct objects and cannot be subtracted. In the practical terms, doing so confuses the GCC's alias analysis. The code was written this way in order to let the C side know a few offsets defined in the assembly. While nice, this is by no means necessary. Fix the noncompliance by hardcoding these offsets. [1] https://lore.kernel.org/bpf/c9923c1d-971d-4022-8dc8-1364e929d34c@gmail.com/ Fixes: f1d5df84cd8c ("s390/bpf: Implement bpf_arch_text_poke()") Signed-off-by: Ilya Leoshkevich Message-ID: <20240320015515.11883-1-iii@linux.ibm.com> Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- arch/s390/net/bpf_jit_comp.c | 46 ++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index e507692e51e7..8af02176f68b 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -516,11 +516,12 @@ static void bpf_skip(struct bpf_jit *jit, int size) * PLT for hotpatchable calls. The calling convention is the same as for the * ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered. */ -extern const char bpf_plt[]; -extern const char bpf_plt_ret[]; -extern const char bpf_plt_target[]; -extern const char bpf_plt_end[]; -#define BPF_PLT_SIZE 32 +struct bpf_plt { + char code[16]; + void *ret; + void *target; +} __packed; +extern const struct bpf_plt bpf_plt; asm( ".pushsection .rodata\n" " .balign 8\n" @@ -531,15 +532,14 @@ asm( " .balign 8\n" "bpf_plt_ret: .quad 0\n" "bpf_plt_target: .quad 0\n" - "bpf_plt_end:\n" " .popsection\n" ); -static void bpf_jit_plt(void *plt, void *ret, void *target) +static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target) { - memcpy(plt, bpf_plt, BPF_PLT_SIZE); - *(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret; - *(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target ?: ret; + memcpy(plt, &bpf_plt, sizeof(*plt)); + plt->ret = ret; + plt->target = target; } /* @@ -662,9 +662,9 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) jit->prg = ALIGN(jit->prg, 8); jit->prologue_plt = jit->prg; if (jit->prg_buf) - bpf_jit_plt(jit->prg_buf + jit->prg, + bpf_jit_plt((struct bpf_plt *)(jit->prg_buf + jit->prg), jit->prg_buf + jit->prologue_plt_ret, NULL); - jit->prg += BPF_PLT_SIZE; + jit->prg += sizeof(struct bpf_plt); } static int get_probe_mem_regno(const u8 *insn) @@ -1901,9 +1901,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) struct bpf_jit jit; int pass; - if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE)) - return orig_fp; - if (!fp->jit_requested) return orig_fp; @@ -2009,14 +2006,11 @@ bool bpf_jit_supports_far_kfunc_call(void) int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *old_addr, void *new_addr) { + struct bpf_plt expected_plt, current_plt, new_plt, *plt; struct { u16 opc; s32 disp; } __packed insn; - char expected_plt[BPF_PLT_SIZE]; - char current_plt[BPF_PLT_SIZE]; - char new_plt[BPF_PLT_SIZE]; - char *plt; char *ret; int err; @@ -2035,18 +2029,18 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, */ } else { /* Verify the PLT. */ - plt = (char *)ip + (insn.disp << 1); - err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE); + plt = ip + (insn.disp << 1); + err = copy_from_kernel_nofault(¤t_plt, plt, + sizeof(current_plt)); if (err < 0) return err; ret = (char *)ip + 6; - bpf_jit_plt(expected_plt, ret, old_addr); - if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE)) + bpf_jit_plt(&expected_plt, ret, old_addr); + if (memcmp(¤t_plt, &expected_plt, sizeof(current_plt))) return -EINVAL; /* Adjust the call address. */ - bpf_jit_plt(new_plt, ret, new_addr); - s390_kernel_write(plt + (bpf_plt_target - bpf_plt), - new_plt + (bpf_plt_target - bpf_plt), + bpf_jit_plt(&new_plt, ret, new_addr); + s390_kernel_write(&plt->target, &new_plt.target, sizeof(void *)); } -- Gitee From 3ebf63489205dcda6c6108418d303da710c0a8cc Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Tue, 12 Mar 2024 23:59:17 +0000 Subject: [PATCH 017/268] bpf, arm64: fix bug in BPF_LDX_MEMSX stable inclusion from stable-6.6.26 commit aeecb678ec369d888b90d4a4dae9871fd8e56ae8 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 114b5b3b4bde7358624437be2f12cde1b265224e ] A64_LDRSW() takes three registers: Xt, Xn, Xm as arguments and it loads and sign extends the value at address Xn + Xm into register Xt. Currently, the offset is being directly used in place of the tmp register which has the offset already loaded by the last emitted instruction. This will cause JIT failures. The easiest way to reproduce this is to test the following code through test_bpf module: { "BPF_LDX_MEMSX | BPF_W", .u.insns_int = { BPF_LD_IMM64(R1, 0x00000000deadbeefULL), BPF_LD_IMM64(R2, 0xffffffffdeadbeefULL), BPF_STX_MEM(BPF_DW, R10, R1, -7), BPF_LDX_MEMSX(BPF_W, R0, R10, -7), BPF_JMP_REG(BPF_JNE, R0, R2, 1), BPF_ALU64_IMM(BPF_MOV, R0, 0), BPF_EXIT_INSN(), }, INTERNAL, { }, { { 0, 0 } }, .stack_depth = 7, }, We need to use the offset as -7 to trigger this code path, there could be other valid ways to trigger this from proper BPF programs as well. This code is rejected by the JIT because -7 is passed to A64_LDRSW() but it expects a valid register (0 - 31). roott@pjy:~# modprobe test_bpf test_name="BPF_LDX_MEMSX | BPF_W" [11300.490371] test_bpf: test_bpf: set 'test_bpf' as the default test_suite. [11300.491750] test_bpf: #345 BPF_LDX_MEMSX | BPF_W [11300.493179] aarch64_insn_encode_register: unknown register encoding -7 [11300.494133] aarch64_insn_encode_register: unknown register encoding -7 [11300.495292] FAIL to select_runtime err=-524 [11300.496804] test_bpf: Summary: 0 PASSED, 1 FAILED, [0/0 JIT'ed] modprobe: ERROR: could not insert 'test_bpf': Invalid argument Applying this patch fixes the issue. root@pjy:~# modprobe test_bpf test_name="BPF_LDX_MEMSX | BPF_W" [ 292.837436] test_bpf: test_bpf: set 'test_bpf' as the default test_suite. [ 292.839416] test_bpf: #345 BPF_LDX_MEMSX | BPF_W jited:1 156 PASS [ 292.844794] test_bpf: Summary: 1 PASSED, 0 FAILED, [1/1 JIT'ed] Fixes: cc88f540da52 ("bpf, arm64: Support sign-extension load instructions") Signed-off-by: Puranjay Mohan Message-ID: <20240312235917.103626-1-puranjay12@gmail.com> Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- arch/arm64/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 150d1c6543f7..5fe4d8b3fdc8 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1189,7 +1189,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, } else { emit_a64_mov_i(1, tmp, off, ctx); if (sign_extend) - emit(A64_LDRSW(dst, src_adj, off_adj), ctx); + emit(A64_LDRSW(dst, src, tmp), ctx); else emit(A64_LDR32(dst, src, tmp), ctx); } -- Gitee From ba2cf13c3af12f13a1ea3921b607bf652351cabc Mon Sep 17 00:00:00 2001 From: Pavel Sakharov Date: Wed, 20 Mar 2024 04:15:23 +0500 Subject: [PATCH 018/268] dma-buf: Fix NULL pointer dereference in sanitycheck() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-6.6.26 commit 156c226cbbdcf5f3bce7b2408a33b59fab7fae2c category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 2295bd846765c766701e666ed2e4b35396be25e6 ] If due to a memory allocation failure mock_chain() returns NULL, it is passed to dma_fence_enable_sw_signaling() resulting in NULL pointer dereference there. Call dma_fence_enable_sw_signaling() only if mock_chain() succeeds. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: d62c43a953ce ("dma-buf: Enable signaling on fence for selftests") Signed-off-by: Pavel Sakharov Reviewed-by: Christian König Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240319231527.1821372-1-p.sakharov@ispras.ru Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/dma-buf/st-dma-fence-chain.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c index c0979c8049b5..661de4add4c7 100644 --- a/drivers/dma-buf/st-dma-fence-chain.c +++ b/drivers/dma-buf/st-dma-fence-chain.c @@ -84,11 +84,11 @@ static int sanitycheck(void *arg) return -ENOMEM; chain = mock_chain(NULL, f, 1); - if (!chain) + if (chain) + dma_fence_enable_sw_signaling(chain); + else err = -ENOMEM; - dma_fence_enable_sw_signaling(chain); - dma_fence_signal(f); dma_fence_put(f); -- Gitee From 6d2308e309e8f5139c09db2d9c18460756a5350d Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Thu, 21 Mar 2024 09:18:09 +0100 Subject: [PATCH 019/268] arm64: bpf: fix 32bit unconditional bswap stable inclusion from stable-6.6.26 commit 087dc50d8bafbf72dd52d3c1c4e140e95f89ab2a category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit a51cd6bf8e10793103c5870ff9e4db295a843604 ] In case when is64 == 1 in emit(A64_REV32(is64, dst, dst), ctx) the generated insn reverses byte order for both high and low 32-bit words, resuling in an incorrect swap as indicated by the jit test: [ 9757.262607] test_bpf: #312 BSWAP 16: 0x0123456789abcdef -> 0xefcd jited:1 8 PASS [ 9757.264435] test_bpf: #313 BSWAP 32: 0x0123456789abcdef -> 0xefcdab89 jited:1 ret 1460850314 != -271733879 (0x5712ce8a != 0xefcdab89)FAIL (1 times) [ 9757.266260] test_bpf: #314 BSWAP 64: 0x0123456789abcdef -> 0x67452301 jited:1 8 PASS [ 9757.268000] test_bpf: #315 BSWAP 64: 0x0123456789abcdef >> 32 -> 0xefcdab89 jited:1 8 PASS [ 9757.269686] test_bpf: #316 BSWAP 16: 0xfedcba9876543210 -> 0x1032 jited:1 8 PASS [ 9757.271380] test_bpf: #317 BSWAP 32: 0xfedcba9876543210 -> 0x10325476 jited:1 ret -1460850316 != 271733878 (0xa8ed3174 != 0x10325476)FAIL (1 times) [ 9757.273022] test_bpf: #318 BSWAP 64: 0xfedcba9876543210 -> 0x98badcfe jited:1 7 PASS [ 9757.274721] test_bpf: #319 BSWAP 64: 0xfedcba9876543210 >> 32 -> 0x10325476 jited:1 9 PASS Fix this by forcing 32bit variant of rev32. Fixes: 1104247f3f979 ("bpf, arm64: Support unconditional bswap") Signed-off-by: Artem Savkov Tested-by: Puranjay Mohan Acked-by: Puranjay Mohan Acked-by: Xu Kuohai Message-ID: <20240321081809.158803-1-asavkov@redhat.com> Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- arch/arm64/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 5fe4d8b3fdc8..29196dce9b91 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -876,7 +876,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, emit(A64_UXTH(is64, dst, dst), ctx); break; case 32: - emit(A64_REV32(is64, dst, dst), ctx); + emit(A64_REV32(0, dst, dst), ctx); /* upper 32 bits already cleared */ break; case 64: -- Gitee From 7b3f2577730bfef2067454ce40655e087ac6b47b Mon Sep 17 00:00:00 2001 From: Ryosuke Yasuoka Date: Wed, 20 Mar 2024 09:54:10 +0900 Subject: [PATCH 020/268] nfc: nci: Fix uninit-value in nci_dev_up and nci_ntf_packet stable inclusion from stable-6.6.26 commit a946ebee45b09294c8b0b0e77410b763c4d2817a category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit d24b03535e5eb82e025219c2f632b485409c898f ] syzbot reported the following uninit-value access issue [1][2]: nci_rx_work() parses and processes received packet. When the payload length is zero, each message type handler reads uninitialized payload and KMSAN detects this issue. The receipt of a packet with a zero-size payload is considered unexpected, and therefore, such packets should be silently discarded. This patch resolved this issue by checking payload size before calling each message type handler codes. Fixes: 6a2968aaf50c ("NFC: basic NCI protocol implementation") Reported-and-tested-by: syzbot+7ea9413ea6749baf5574@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+29b5ca705d2e0f4a44d2@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7ea9413ea6749baf5574 [1] Closes: https://syzkaller.appspot.com/bug?extid=29b5ca705d2e0f4a44d2 [2] Signed-off-by: Ryosuke Yasuoka Reviewed-by: Jeremy Cline Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- net/nfc/nci/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 12684d835cb5..772ddb5824d9 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1516,6 +1516,11 @@ static void nci_rx_work(struct work_struct *work) nfc_send_to_raw_sock(ndev->nfc_dev, skb, RAW_PAYLOAD_NCI, NFC_DIRECTION_RX); + if (!nci_plen(skb->data)) { + kfree_skb(skb); + break; + } + /* Process frame */ switch (nci_mt(skb->data)) { case NCI_MT_RSP_PKT: -- Gitee From a35825074fe7b08e9cc2014d180bd27023532149 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 20 Mar 2024 19:02:14 -0700 Subject: [PATCH 021/268] tools: ynl: fix setting presence bits in simple nests stable inclusion from stable-6.6.26 commit 5c05bdd95f0e07ca3acf4f186771cb312c764703 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit f6c8f5e8694c7a78c94e408b628afa6255cc428a ] When we set members of simple nested structures in requests we need to set "presence" bits for all the nesting layers below. This has nothing to do with the presence type of the last layer. Fixes: be5bea1cc0bf ("net: add basic C code generators for Netlink") Reviewed-by: Breno Leitao Link: https://lore.kernel.org/r/20240321020214.1250202-1-kuba@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- tools/net/ynl/ynl-gen-c.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 897af958cee8..575b7e248e52 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -198,8 +198,11 @@ class Type(SpecAttr): presence = '' for i in range(0, len(ref)): presence = f"{var}->{'.'.join(ref[:i] + [''])}_present.{ref[i]}" - if self.presence_type() == 'bit': - code.append(presence + ' = 1;') + # Every layer below last is a nest, so we know it uses bit presence + # last layer is "self" and may be a complex type + if i == len(ref) - 1 and self.presence_type() != 'bit': + continue + code.append(presence + ' = 1;') code += self._setter_lines(ri, member, presence) func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}" -- Gitee From 9f43ade96b7b478c862d2c7fb0d677df738c7590 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Wed, 20 Mar 2024 15:31:17 -0400 Subject: [PATCH 022/268] mlxbf_gige: stop PHY during open() error paths stable inclusion from stable-6.6.26 commit 84d30c56786a5c0f6247f94f9d5925076f7b768d category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit d6c30c5a168f8586b8bcc0d8e42e2456eb05209b ] The mlxbf_gige_open() routine starts the PHY as part of normal initialization. The mlxbf_gige_open() routine must stop the PHY during its error paths. Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver") Signed-off-by: David Thompson Reviewed-by: Asmaa Mnebhi Reviewed-by: Andrew Lunn Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index aaf1faed4133..044ff5f87b5e 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -157,7 +157,7 @@ static int mlxbf_gige_open(struct net_device *netdev) err = mlxbf_gige_tx_init(priv); if (err) - goto free_irqs; + goto phy_deinit; err = mlxbf_gige_rx_init(priv); if (err) goto tx_deinit; @@ -185,6 +185,9 @@ static int mlxbf_gige_open(struct net_device *netdev) tx_deinit: mlxbf_gige_tx_deinit(priv); +phy_deinit: + phy_stop(phydev); + free_irqs: mlxbf_gige_free_irqs(priv); return err; -- Gitee From 3e02098e6351bdc8c05f8c22721efee619832a6e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Mar 2024 10:10:17 +0200 Subject: [PATCH 023/268] wifi: iwlwifi: mvm: rfi: fix potential response leaks stable inclusion from stable-6.6.26 commit c0a40f2f8eba07416f695ffe2011bf3f8b0b6dc8 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 06a093807eb7b5c5b29b6cff49f8174a4e702341 ] If the rx payload length check fails, or if kmemdup() fails, we still need to free the command response. Fix that. Fixes: 21254908cbe9 ("iwlwifi: mvm: add RFI-M support") Co-authored-by: Anjaneyulu Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240319100755.db2fa0196aa7.I116293b132502ac68a65527330fa37799694b79c@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/net/wireless/intel/iwlwifi/mvm/rfi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c index 2ecd32bed752..045c862a8fc4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c @@ -132,14 +132,18 @@ struct iwl_rfi_freq_table_resp_cmd *iwl_rfi_get_freq_table(struct iwl_mvm *mvm) if (ret) return ERR_PTR(ret); - if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) != resp_size)) + if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) != + resp_size)) { + iwl_free_resp(&cmd); return ERR_PTR(-EIO); + } resp = kmemdup(cmd.resp_pkt->data, resp_size, GFP_KERNEL); + iwl_free_resp(&cmd); + if (!resp) return ERR_PTR(-ENOMEM); - iwl_free_resp(&cmd); return resp; } -- Gitee From be3c3d97d30045f685c76739be6b651bc1718c3b Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 17 Oct 2023 12:16:46 +0300 Subject: [PATCH 024/268] wifi: iwlwifi: disable multi rx queue for 9000 stable inclusion from stable-6.6.26 commit 13fd96c975969b28669004a3ed5f2044f7ac3053 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 29fa9a984b6d1075020f12071a89897fd62ed27f ] Multi rx queue allows to spread the load of the Rx streams on different CPUs. 9000 series required complex synchronization mechanisms from the driver side since the hardware / firmware is not able to provide information about duplicate packets and timeouts inside the reordering buffer. Users have complained that for newer devices, all those synchronization mechanisms have caused spurious packet drops. Those packet drops disappeared if we simplify the code, but unfortunately, we can't have RSS enabled on 9000 series without this complex code. Remove support for RSS on 9000 so that we can make the code much simpler for newer devices and fix the bugs for them. The down side of this patch is a that all the Rx path will be routed to a single CPU, but this has never been an issue, the modern CPUs are just fast enough to cope with all the traffic. Signed-off-by: Emmanuel Grumbach Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20231017115047.2917eb8b7af9.Iddd7dcf335387ba46fcbbb6067ef4ff9cd3755a7@changeid Signed-off-by: Johannes Berg Stable-dep-of: e78d78773089 ("wifi: iwlwifi: mvm: include link ID when releasing frames") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/net/wireless/intel/iwlwifi/iwl-trans.h | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 4 +++- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 11 ++++++++++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 168eda2132fb..9dcc1506bd0b 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -278,7 +278,7 @@ static inline void iwl_free_rxb(struct iwl_rx_cmd_buffer *r) #define IWL_MGMT_TID 15 #define IWL_FRAME_LIMIT 64 #define IWL_MAX_RX_HW_QUEUES 16 -#define IWL_9000_MAX_RX_HW_QUEUES 6 +#define IWL_9000_MAX_RX_HW_QUEUES 1 /** * enum iwl_wowlan_status - WoWLAN image/device status diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index aaa9840d0d4c..ee9d14250a26 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -352,7 +352,9 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) ieee80211_hw_set(hw, HAS_RATE_CONTROL); } - if (iwl_mvm_has_new_rx_api(mvm)) + /* We want to use the mac80211's reorder buffer for 9000 */ + if (iwl_mvm_has_new_rx_api(mvm) && + mvm->trans->trans_cfg->device_family > IWL_DEVICE_FAMILY_9000) ieee80211_hw_set(hw, SUPPORTS_REORDERING_BUFFER); if (fw_has_capa(&mvm->fw->ucode_capa, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index bac0228b8c86..92b3e18dbe87 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -963,6 +963,9 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, baid = (reorder & IWL_RX_MPDU_REORDER_BAID_MASK) >> IWL_RX_MPDU_REORDER_BAID_SHIFT; + if (mvm->trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_9000) + return false; + /* * This also covers the case of receiving a Block Ack Request * outside a BA session; we'll pass it to mac80211 and that @@ -2621,9 +2624,15 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, if (!iwl_mvm_reorder(mvm, napi, queue, sta, skb, desc) && likely(!iwl_mvm_time_sync_frame(mvm, skb, hdr->addr2)) && - likely(!iwl_mvm_mei_filter_scan(mvm, skb))) + likely(!iwl_mvm_mei_filter_scan(mvm, skb))) { + if (mvm->trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_9000 && + (desc->mac_flags2 & IWL_RX_MPDU_MFLG2_AMSDU) && + !(desc->amsdu_info & IWL_RX_MPDU_AMSDU_LAST_SUBFRAME)) + rx_status->flag |= RX_FLAG_AMSDU_MORE; + iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta, link_sta); + } out: rcu_read_unlock(); } -- Gitee From b5fc895e8b208f726b7e9b8a1b737f41ce2ce3e7 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Wed, 20 Mar 2024 23:26:22 +0200 Subject: [PATCH 025/268] wifi: iwlwifi: mvm: include link ID when releasing frames stable inclusion from stable-6.6.26 commit dc1ec9c5efec2df1f249c53cde937d2b938b61fa category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit e78d7877308989ef91b64a3c746ae31324c07caa ] When releasing frames from the reorder buffer, the link ID was not included in the RX status information. This subsequently led mac80211 to drop the frame. Change it so that the link information is set immediately when possible so that it doesn't not need to be filled in anymore when submitting the frame to mac80211. Fixes: b8a85a1d42d7 ("wifi: iwlwifi: mvm: rxmq: report link ID to mac80211") Signed-off-by: Benjamin Berg Tested-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240320232419.bbbd5e9bfe80.Iec1bf5c884e371f7bc5ea2534ed9ea8d3f2c0bf6@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 92b3e18dbe87..e9360b555ac9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -236,21 +236,13 @@ static void iwl_mvm_add_rtap_sniffer_config(struct iwl_mvm *mvm, static void iwl_mvm_pass_packet_to_mac80211(struct iwl_mvm *mvm, struct napi_struct *napi, struct sk_buff *skb, int queue, - struct ieee80211_sta *sta, - struct ieee80211_link_sta *link_sta) + struct ieee80211_sta *sta) { if (unlikely(iwl_mvm_check_pn(mvm, skb, queue, sta))) { kfree_skb(skb); return; } - if (sta && sta->valid_links && link_sta) { - struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); - - rx_status->link_valid = 1; - rx_status->link_id = link_sta->link_id; - } - ieee80211_rx_napi(mvm->hw, sta, skb, napi); } @@ -636,7 +628,7 @@ static void iwl_mvm_release_frames(struct iwl_mvm *mvm, while ((skb = __skb_dequeue(skb_list))) { iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, reorder_buf->queue, - sta, NULL /* FIXME */); + sta); reorder_buf->num_stored--; } } @@ -2489,6 +2481,11 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, if (IS_ERR(sta)) sta = NULL; link_sta = rcu_dereference(mvm->fw_id_to_link_sta[id]); + + if (sta && sta->valid_links && link_sta) { + rx_status->link_valid = 1; + rx_status->link_id = link_sta->link_id; + } } } else if (!is_multicast_ether_addr(hdr->addr2)) { /* @@ -2630,8 +2627,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, !(desc->amsdu_info & IWL_RX_MPDU_AMSDU_LAST_SUBFRAME)) rx_status->flag |= RX_FLAG_AMSDU_MORE; - iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta, - link_sta); + iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta); } out: rcu_read_unlock(); -- Gitee From d85e62f93f8bfa6a16ade0fcc9d6c1eba18f8932 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Mon, 25 Mar 2024 14:55:10 +0000 Subject: [PATCH 026/268] ALSA: hda: cs35l56: Set the init_done flag before component_add() stable inclusion from stable-6.6.26 commit 0172edc572b052192eb89ebfb93191f8bb5bf1bf category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit cafe9c6a72cf1ffe96d2561d988a141cb5c093db ] Initialization is completed before adding the component as that can start the process of the device binding and trigger actions that check init_done. Signed-off-by: Simon Trimmer Signed-off-by: Richard Fitzgerald Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Message-ID: <20240325145510.328378-1-rf@opensource.cirrus.com> Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- sound/pci/hda/cs35l56_hda.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 7adc1d373d65..27848d646963 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -978,14 +978,14 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int id) pm_runtime_mark_last_busy(cs35l56->base.dev); pm_runtime_enable(cs35l56->base.dev); + cs35l56->base.init_done = true; + ret = component_add(cs35l56->base.dev, &cs35l56_hda_comp_ops); if (ret) { dev_err(cs35l56->base.dev, "Register component failed: %d\n", ret); goto pm_err; } - cs35l56->base.init_done = true; - return 0; pm_err: -- Gitee From 8ce9c9285a633c3ac2eb8ef435ce193fc6fa08ce Mon Sep 17 00:00:00 2001 From: Steven Zou Date: Wed, 7 Feb 2024 09:49:59 +0800 Subject: [PATCH 027/268] ice: Refactor FW data type and fix bitmap casting issue stable inclusion from stable-6.6.26 commit 493b29930f66b865d969badfb3d0431dae76a1ff category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 817b18965b58a6e5fb6ce97abf01b03a205a6aea ] According to the datasheet, the recipe association data is an 8-byte little-endian value. It is described as 'Bitmap of the recipe indexes associated with this profile', it is from 24 to 31 byte area in FW. Therefore, it is defined to '__le64 recipe_assoc' in struct ice_aqc_recipe_to_profile. And then fix the bitmap casting issue, as we must never ever use castings for bitmap type. Fixes: 1e0f9881ef79 ("ice: Flesh out implementation of support for SRIOV on bonded interface") Reviewed-by: Przemek Kitszel Reviewed-by: Andrii Staikov Reviewed-by: Jan Sokolowski Reviewed-by: Simon Horman Signed-off-by: Steven Zou Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- .../net/ethernet/intel/ice/ice_adminq_cmd.h | 3 ++- drivers/net/ethernet/intel/ice/ice_lag.c | 4 ++-- drivers/net/ethernet/intel/ice/ice_switch.c | 24 +++++++++++-------- drivers/net/ethernet/intel/ice/ice_switch.h | 4 ++-- 4 files changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 45f3e351653d..72ca2199c957 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -592,8 +592,9 @@ struct ice_aqc_recipe_data_elem { struct ice_aqc_recipe_to_profile { __le16 profile_id; u8 rsvd[6]; - DECLARE_BITMAP(recipe_assoc, ICE_MAX_NUM_RECIPES); + __le64 recipe_assoc; }; +static_assert(sizeof(struct ice_aqc_recipe_to_profile) == 16); /* Add/Update/Remove/Get switch rules (indirect 0x02A0, 0x02A1, 0x02A2, 0x02A3) */ diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 23e197c3d02a..4e675c7c199f 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -2000,14 +2000,14 @@ int ice_init_lag(struct ice_pf *pf) /* associate recipes to profiles */ for (n = 0; n < ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER; n++) { err = ice_aq_get_recipe_to_profile(&pf->hw, n, - (u8 *)&recipe_bits, NULL); + &recipe_bits, NULL); if (err) continue; if (recipe_bits & BIT(ICE_SW_LKUP_DFLT)) { recipe_bits |= BIT(lag->pf_recipe); ice_aq_map_recipe_to_profile(&pf->hw, n, - (u8 *)&recipe_bits, NULL); + recipe_bits, NULL); } } diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 2f77b684ff76..4c6d58bb2690 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -2032,12 +2032,12 @@ ice_update_recipe_lkup_idx(struct ice_hw *hw, * ice_aq_map_recipe_to_profile - Map recipe to packet profile * @hw: pointer to the HW struct * @profile_id: package profile ID to associate the recipe with - * @r_bitmap: Recipe bitmap filled in and need to be returned as response + * @r_assoc: Recipe bitmap filled in and need to be returned as response * @cd: pointer to command details structure or NULL * Recipe to profile association (0x0291) */ int -ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, +ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc, struct ice_sq_cd *cd) { struct ice_aqc_recipe_to_profile *cmd; @@ -2049,7 +2049,7 @@ ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, /* Set the recipe ID bit in the bitmask to let the device know which * profile we are associating the recipe to */ - memcpy(cmd->recipe_assoc, r_bitmap, sizeof(cmd->recipe_assoc)); + cmd->recipe_assoc = cpu_to_le64(r_assoc); return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); } @@ -2058,12 +2058,12 @@ ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, * ice_aq_get_recipe_to_profile - Map recipe to packet profile * @hw: pointer to the HW struct * @profile_id: package profile ID to associate the recipe with - * @r_bitmap: Recipe bitmap filled in and need to be returned as response + * @r_assoc: Recipe bitmap filled in and need to be returned as response * @cd: pointer to command details structure or NULL * Associate profile ID with given recipe (0x0293) */ int -ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, +ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc, struct ice_sq_cd *cd) { struct ice_aqc_recipe_to_profile *cmd; @@ -2076,7 +2076,7 @@ ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd); if (!status) - memcpy(r_bitmap, cmd->recipe_assoc, sizeof(cmd->recipe_assoc)); + *r_assoc = le64_to_cpu(cmd->recipe_assoc); return status; } @@ -2121,6 +2121,7 @@ int ice_alloc_recipe(struct ice_hw *hw, u16 *rid) static void ice_get_recp_to_prof_map(struct ice_hw *hw) { DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES); + u64 recp_assoc; u16 i; for (i = 0; i < hw->switch_info->max_used_prof_index + 1; i++) { @@ -2128,8 +2129,9 @@ static void ice_get_recp_to_prof_map(struct ice_hw *hw) bitmap_zero(profile_to_recipe[i], ICE_MAX_NUM_RECIPES); bitmap_zero(r_bitmap, ICE_MAX_NUM_RECIPES); - if (ice_aq_get_recipe_to_profile(hw, i, (u8 *)r_bitmap, NULL)) + if (ice_aq_get_recipe_to_profile(hw, i, &recp_assoc, NULL)) continue; + bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES); bitmap_copy(profile_to_recipe[i], r_bitmap, ICE_MAX_NUM_RECIPES); for_each_set_bit(j, r_bitmap, ICE_MAX_NUM_RECIPES) @@ -5431,22 +5433,24 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, */ list_for_each_entry(fvit, &rm->fv_list, list_entry) { DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES); + u64 recp_assoc; u16 j; status = ice_aq_get_recipe_to_profile(hw, fvit->profile_id, - (u8 *)r_bitmap, NULL); + &recp_assoc, NULL); if (status) goto err_unroll; + bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES); bitmap_or(r_bitmap, r_bitmap, rm->r_bitmap, ICE_MAX_NUM_RECIPES); status = ice_acquire_change_lock(hw, ICE_RES_WRITE); if (status) goto err_unroll; + bitmap_to_arr64(&recp_assoc, r_bitmap, ICE_MAX_NUM_RECIPES); status = ice_aq_map_recipe_to_profile(hw, fvit->profile_id, - (u8 *)r_bitmap, - NULL); + recp_assoc, NULL); ice_release_change_lock(hw); if (status) diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index db7e501b7e0a..89ffa1b51b5a 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -424,10 +424,10 @@ int ice_aq_add_recipe(struct ice_hw *hw, struct ice_aqc_recipe_data_elem *s_recipe_list, u16 num_recipes, struct ice_sq_cd *cd); int -ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, +ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc, struct ice_sq_cd *cd); int -ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, +ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc, struct ice_sq_cd *cd); #endif /* _ICE_SWITCH_H_ */ -- Gitee From de2e2410b43b8278c1314d930abaf5a2b8f5cca0 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Tue, 24 Oct 2023 13:09:26 +0200 Subject: [PATCH 028/268] ice: realloc VSI stats arrays stable inclusion from stable-6.6.26 commit feddf6c09c44339a75a54322573ae14ef226404b category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 5995ef88e3a8c2b014f51256a88be8e336532ce7 ] Previously only case when queues amount is lower was covered. Implement realloc for case when queues amount is higher than previous one. Use krealloc() function and zero new allocated elements. It has to be done before ice_vsi_def_cfg(), because stats element for ring is set there. Reviewed-by: Wojciech Drewek Signed-off-by: Michal Swiatkowski Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen Stable-dep-of: 1cb7fdb1dfde ("ice: fix memory corruption bug with suspend and rebuild") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/net/ethernet/intel/ice/ice_lib.c | 58 ++++++++++++++++-------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 7f4bc110ead4..47298ab675a5 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3084,27 +3084,26 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, } /** - * ice_vsi_realloc_stat_arrays - Frees unused stat structures + * ice_vsi_realloc_stat_arrays - Frees unused stat structures or alloc new ones * @vsi: VSI pointer - * @prev_txq: Number of Tx rings before ring reallocation - * @prev_rxq: Number of Rx rings before ring reallocation */ -static void -ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq) +static int +ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi) { + u16 req_txq = vsi->req_txq ? vsi->req_txq : vsi->alloc_txq; + u16 req_rxq = vsi->req_rxq ? vsi->req_rxq : vsi->alloc_rxq; + struct ice_ring_stats **tx_ring_stats; + struct ice_ring_stats **rx_ring_stats; struct ice_vsi_stats *vsi_stat; struct ice_pf *pf = vsi->back; + u16 prev_txq = vsi->alloc_txq; + u16 prev_rxq = vsi->alloc_rxq; int i; - if (!prev_txq || !prev_rxq) - return; - if (vsi->type == ICE_VSI_CHNL) - return; - vsi_stat = pf->vsi_stats[vsi->idx]; - if (vsi->num_txq < prev_txq) { - for (i = vsi->num_txq; i < prev_txq; i++) { + if (req_txq < prev_txq) { + for (i = req_txq; i < prev_txq; i++) { if (vsi_stat->tx_ring_stats[i]) { kfree_rcu(vsi_stat->tx_ring_stats[i], rcu); WRITE_ONCE(vsi_stat->tx_ring_stats[i], NULL); @@ -3112,14 +3111,36 @@ ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq) } } - if (vsi->num_rxq < prev_rxq) { - for (i = vsi->num_rxq; i < prev_rxq; i++) { + tx_ring_stats = vsi_stat->rx_ring_stats; + vsi_stat->tx_ring_stats = + krealloc_array(vsi_stat->tx_ring_stats, req_txq, + sizeof(*vsi_stat->tx_ring_stats), + GFP_KERNEL | __GFP_ZERO); + if (!vsi_stat->tx_ring_stats) { + vsi_stat->tx_ring_stats = tx_ring_stats; + return -ENOMEM; + } + + if (req_rxq < prev_rxq) { + for (i = req_rxq; i < prev_rxq; i++) { if (vsi_stat->rx_ring_stats[i]) { kfree_rcu(vsi_stat->rx_ring_stats[i], rcu); WRITE_ONCE(vsi_stat->rx_ring_stats[i], NULL); } } } + + rx_ring_stats = vsi_stat->rx_ring_stats; + vsi_stat->rx_ring_stats = + krealloc_array(vsi_stat->rx_ring_stats, req_rxq, + sizeof(*vsi_stat->rx_ring_stats), + GFP_KERNEL | __GFP_ZERO); + if (!vsi_stat->rx_ring_stats) { + vsi_stat->rx_ring_stats = rx_ring_stats; + return -ENOMEM; + } + + return 0; } /** @@ -3136,9 +3157,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) { struct ice_vsi_cfg_params params = {}; struct ice_coalesce_stored *coalesce; - int ret, prev_txq, prev_rxq; int prev_num_q_vectors = 0; struct ice_pf *pf; + int ret; if (!vsi) return -EINVAL; @@ -3157,8 +3178,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce); - prev_txq = vsi->num_txq; - prev_rxq = vsi->num_rxq; + ret = ice_vsi_realloc_stat_arrays(vsi); + if (ret) + goto err_vsi_cfg; ice_vsi_decfg(vsi); ret = ice_vsi_cfg_def(vsi, ¶ms); @@ -3176,8 +3198,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) return ice_schedule_reset(pf, ICE_RESET_PFR); } - ice_vsi_realloc_stat_arrays(vsi, prev_txq, prev_rxq); - ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors); kfree(coalesce); -- Gitee From 58f201e7c069ab5b29be19fc0708dffce78b85ac Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 5 Mar 2024 15:02:03 -0800 Subject: [PATCH 029/268] ice: fix memory corruption bug with suspend and rebuild stable inclusion from stable-6.6.26 commit e40a02f06ceb0e0b0183e0b973ac5dbf8f75edec category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 1cb7fdb1dfde1aab66780b4ba44dba6402172111 ] The ice driver would previously panic after suspend. This is caused from the driver *only* calling the ice_vsi_free_q_vectors() function by itself, when it is suspending. Since commit b3e7b3a6ee92 ("ice: prevent NULL pointer deref during reload") the driver has zeroed out num_q_vectors, and only restored it in ice_vsi_cfg_def(). This further causes the ice_rebuild() function to allocate a zero length buffer, after which num_q_vectors is updated, and then the new value of num_q_vectors is used to index into the zero length buffer, which corrupts memory. The fix entails making sure all the code referencing num_q_vectors only does so after it has been reset via ice_vsi_cfg_def(). I didn't perform a full bisect, but I was able to test against 6.1.77 kernel and that ice driver works fine for suspend/resume with no panic, so sometime since then, this problem was introduced. Also clean up an un-needed init of a local variable in the function being modified. PANIC from 6.8.0-rc1: [1026674.915596] PM: suspend exit [1026675.664697] ice 0000:17:00.1: PTP reset successful [1026675.664707] ice 0000:17:00.1: 2755 msecs passed between update to cached PHC time [1026675.667660] ice 0000:b1:00.0: PTP reset successful [1026675.675944] ice 0000:b1:00.0: 2832 msecs passed between update to cached PHC time [1026677.137733] ixgbe 0000:31:00.0 ens787: NIC Link is Up 1 Gbps, Flow Control: None [1026677.190201] BUG: kernel NULL pointer dereference, address: 0000000000000010 [1026677.192753] ice 0000:17:00.0: PTP reset successful [1026677.192764] ice 0000:17:00.0: 4548 msecs passed between update to cached PHC time [1026677.197928] #PF: supervisor read access in kernel mode [1026677.197933] #PF: error_code(0x0000) - not-present page [1026677.197937] PGD 1557a7067 P4D 0 [1026677.212133] ice 0000:b1:00.1: PTP reset successful [1026677.212143] ice 0000:b1:00.1: 4344 msecs passed between update to cached PHC time [1026677.212575] [1026677.243142] Oops: 0000 [#1] PREEMPT SMP NOPTI [1026677.247918] CPU: 23 PID: 42790 Comm: kworker/23:0 Kdump: loaded Tainted: G W 6.8.0-rc1+ #1 [1026677.257989] Hardware name: Intel Corporation M50CYP2SBSTD/M50CYP2SBSTD, BIOS SE5C620.86B.01.01.0005.2202160810 02/16/2022 [1026677.269367] Workqueue: ice ice_service_task [ice] [1026677.274592] RIP: 0010:ice_vsi_rebuild_set_coalesce+0x130/0x1e0 [ice] [1026677.281421] Code: 0f 84 3a ff ff ff 41 0f b7 74 ec 02 66 89 b0 22 02 00 00 81 e6 ff 1f 00 00 e8 ec fd ff ff e9 35 ff ff ff 48 8b 43 30 49 63 ed <41> 0f b7 34 24 41 83 c5 01 48 8b 3c e8 66 89 b7 aa 02 00 00 81 e6 [1026677.300877] RSP: 0018:ff3be62a6399bcc0 EFLAGS: 00010202 [1026677.306556] RAX: ff28691e28980828 RBX: ff28691e41099828 RCX: 0000000000188000 [1026677.314148] RDX: 0000000000000000 RSI: 0000000000000010 RDI: ff28691e41099828 [1026677.321730] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 [1026677.329311] R10: 0000000000000007 R11: ffffffffffffffc0 R12: 0000000000000010 [1026677.336896] R13: 0000000000000000 R14: 0000000000000000 R15: ff28691e0eaa81a0 [1026677.344472] FS: 0000000000000000(0000) GS:ff28693cbffc0000(0000) knlGS:0000000000000000 [1026677.353000] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1026677.359195] CR2: 0000000000000010 CR3: 0000000128df4001 CR4: 0000000000771ef0 [1026677.366779] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1026677.374369] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1026677.381952] PKRU: 55555554 [1026677.385116] Call Trace: [1026677.388023] [1026677.390589] ? __die+0x20/0x70 [1026677.394105] ? page_fault_oops+0x82/0x160 [1026677.398576] ? do_user_addr_fault+0x65/0x6a0 [1026677.403307] ? exc_page_fault+0x6a/0x150 [1026677.407694] ? asm_exc_page_fault+0x22/0x30 [1026677.412349] ? ice_vsi_rebuild_set_coalesce+0x130/0x1e0 [ice] [1026677.418614] ice_vsi_rebuild+0x34b/0x3c0 [ice] [1026677.423583] ice_vsi_rebuild_by_type+0x76/0x180 [ice] [1026677.429147] ice_rebuild+0x18b/0x520 [ice] [1026677.433746] ? delay_tsc+0x8f/0xc0 [1026677.437630] ice_do_reset+0xa3/0x190 [ice] [1026677.442231] ice_service_task+0x26/0x440 [ice] [1026677.447180] process_one_work+0x174/0x340 [1026677.451669] worker_thread+0x27e/0x390 [1026677.455890] ? __pfx_worker_thread+0x10/0x10 [1026677.460627] kthread+0xee/0x120 [1026677.464235] ? __pfx_kthread+0x10/0x10 [1026677.468445] ret_from_fork+0x2d/0x50 [1026677.472476] ? __pfx_kthread+0x10/0x10 [1026677.476671] ret_from_fork_asm+0x1b/0x30 [1026677.481050] Fixes: b3e7b3a6ee92 ("ice: prevent NULL pointer deref during reload") Reported-by: Robert Elliott Signed-off-by: Jesse Brandeburg Reviewed-by: Simon Horman Reviewed-by: Aleksandr Loktionov Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/net/ethernet/intel/ice/ice_lib.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 47298ab675a5..0b7132a42e35 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3157,7 +3157,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) { struct ice_vsi_cfg_params params = {}; struct ice_coalesce_stored *coalesce; - int prev_num_q_vectors = 0; + int prev_num_q_vectors; struct ice_pf *pf; int ret; @@ -3171,13 +3171,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf)) return -EINVAL; - coalesce = kcalloc(vsi->num_q_vectors, - sizeof(struct ice_coalesce_stored), GFP_KERNEL); - if (!coalesce) - return -ENOMEM; - - prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce); - ret = ice_vsi_realloc_stat_arrays(vsi); if (ret) goto err_vsi_cfg; @@ -3187,6 +3180,13 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) if (ret) goto err_vsi_cfg; + coalesce = kcalloc(vsi->num_q_vectors, + sizeof(struct ice_coalesce_stored), GFP_KERNEL); + if (!coalesce) + return -ENOMEM; + + prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce); + ret = ice_vsi_cfg_tc_lan(pf, vsi); if (ret) { if (vsi_flags & ICE_VSI_FLAG_INIT) { @@ -3205,8 +3205,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) err_vsi_cfg_tc_lan: ice_vsi_decfg(vsi); -err_vsi_cfg: kfree(coalesce); +err_vsi_cfg: return ret; } -- Gitee From c4a9229b379e1d4b2489aa6a8b0f61e22e1f1ecb Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Tue, 5 Mar 2024 17:02:02 +0100 Subject: [PATCH 030/268] ixgbe: avoid sleeping allocation in ixgbe_ipsec_vf_add_sa() stable inclusion from stable-6.6.26 commit 4465b15ae5c522c6d0b1510b2ecc997da4b8e4c3 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit aec806fb4afba5fe80b09e29351379a4292baa43 ] Change kzalloc() flags used in ixgbe_ipsec_vf_add_sa() to GFP_ATOMIC, to avoid sleeping in IRQ context. Dan Carpenter, with the help of Smatch, has found following issue: The patch eda0333ac293: "ixgbe: add VF IPsec management" from Aug 13, 2018 (linux-next), leads to the following Smatch static checker warning: drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c:917 ixgbe_ipsec_vf_add_sa() warn: sleeping in IRQ context The call tree that Smatch is worried about is: ixgbe_msix_other() <- IRQ handler -> ixgbe_msg_task() -> ixgbe_rcv_msg_from_vf() -> ixgbe_ipsec_vf_add_sa() Fixes: eda0333ac293 ("ixgbe: add VF IPsec management") Reported-by: Dan Carpenter Link: https://lore.kernel.org/intel-wired-lan/db31a0b0-4d9f-4e6b-aed8-88266eb5665c@moroto.mountain Reviewed-by: Michal Kubiak Signed-off-by: Przemek Kitszel Reviewed-by: Shannon Nelson Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index 13a6fca31004..866024f2b9ee 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -914,7 +914,13 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) goto err_out; } - xs = kzalloc(sizeof(*xs), GFP_KERNEL); + algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1); + if (unlikely(!algo)) { + err = -ENOENT; + goto err_out; + } + + xs = kzalloc(sizeof(*xs), GFP_ATOMIC); if (unlikely(!xs)) { err = -ENOMEM; goto err_out; @@ -930,14 +936,8 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) memcpy(&xs->id.daddr.a4, sam->addr, sizeof(xs->id.daddr.a4)); xs->xso.dev = adapter->netdev; - algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1); - if (unlikely(!algo)) { - err = -ENOENT; - goto err_xs; - } - aead_len = sizeof(*xs->aead) + IXGBE_IPSEC_KEY_BITS / 8; - xs->aead = kzalloc(aead_len, GFP_KERNEL); + xs->aead = kzalloc(aead_len, GFP_ATOMIC); if (unlikely(!xs->aead)) { err = -ENOMEM; goto err_xs; -- Gitee From 5e40d07aca23a2070ac9b8b7d9016155caa1cfcd Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Wed, 13 Mar 2024 14:03:10 +0100 Subject: [PATCH 031/268] igc: Remove stale comment about Tx timestamping stable inclusion from stable-6.6.26 commit 1b1c0f6ce790e5213dd6765a72c789a8aec76047 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 47ce2956c7a61ff354723e28235205fa2012265b ] The initial igc Tx timestamping implementation used only one register for retrieving Tx timestamps. Commit 3ed247e78911 ("igc: Add support for multiple in-flight TX timestamps") added support for utilizing all four of them e.g., for multiple domain support. Remove the stale comment/FIXME. Fixes: 3ed247e78911 ("igc: Add support for multiple in-flight TX timestamps") Signed-off-by: Kurt Kanzenbach Acked-by: Vinicius Costa Gomes Reviewed-by: Przemek Kitszel Tested-by: Naama Meir Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/net/ethernet/intel/igc/igc_main.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index fc1de116d554..e83700ad7e62 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1640,10 +1640,6 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { - /* FIXME: add support for retrieving timestamps from - * the other timer registers before skipping the - * timestamping request. - */ unsigned long flags; u32 tstamp_flags; -- Gitee From dc3a08203112d3448edf29a335eb191e128c77b3 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Thu, 21 Mar 2024 12:53:37 +0100 Subject: [PATCH 032/268] s390/qeth: handle deferred cc1 stable inclusion from stable-6.6.26 commit 21dea1475fd453ac4feaa3c395f51c61b5779260 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit afb373ff3f54c9d909efc7f810dc80a9742807b2 ] The IO subsystem expects a driver to retry a ccw_device_start, when the subsequent interrupt response block (irb) contains a deferred condition code 1. Symptoms before this commit: On the read channel we always trigger the next read anyhow, so no different behaviour here. On the write channel we may experience timeout errors, because the expected reply will never be received without the retry. Other callers of qeth_send_control_data() may wrongly assume that the ccw was successful, which may cause problems later. Note that since commit 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") and commit 5ef1dc40ffa6 ("s390/cio: fix invalid -EBUSY on ccw_device_start") deferred CC1s are much more likely to occur. See the commit message of the latter for more background information. Fixes: 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") Signed-off-by: Alexandra Winter Co-developed-by: Thorsten Winkler Signed-off-by: Thorsten Winkler Reviewed-by: Peter Oberparleiter Link: https://lore.kernel.org/r/20240321115337.3564694-1-wintera@linux.ibm.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/s390/net/qeth_core_main.c | 38 +++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index cd783290bde5..1148b4ecabdd 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1179,6 +1179,20 @@ static int qeth_check_irb_error(struct qeth_card *card, struct ccw_device *cdev, } } +/** + * qeth_irq() - qeth interrupt handler + * @cdev: ccw device + * @intparm: expect pointer to iob + * @irb: Interruption Response Block + * + * In the good path: + * corresponding qeth channel is locked with last used iob as active_cmd. + * But this function is also called for error interrupts. + * + * Caller ensures that: + * Interrupts are disabled; ccw device lock is held; + * + */ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) { @@ -1220,11 +1234,10 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, iob = (struct qeth_cmd_buffer *) (addr_t)intparm; } - qeth_unlock_channel(card, channel); - rc = qeth_check_irb_error(card, cdev, irb); if (rc) { /* IO was terminated, free its resources. */ + qeth_unlock_channel(card, channel); if (iob) qeth_cancel_cmd(iob, rc); return; @@ -1268,6 +1281,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, rc = qeth_get_problem(card, cdev, irb); if (rc) { card->read_or_write_problem = 1; + qeth_unlock_channel(card, channel); if (iob) qeth_cancel_cmd(iob, rc); qeth_clear_ipacmd_list(card); @@ -1276,6 +1290,26 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, } } + if (scsw_cmd_is_valid_cc(&irb->scsw) && irb->scsw.cmd.cc == 1 && iob) { + /* channel command hasn't started: retry. + * active_cmd is still set to last iob + */ + QETH_CARD_TEXT(card, 2, "irqcc1"); + rc = ccw_device_start_timeout(cdev, __ccw_from_cmd(iob), + (addr_t)iob, 0, 0, iob->timeout); + if (rc) { + QETH_DBF_MESSAGE(2, + "ccw retry on %x failed, rc = %i\n", + CARD_DEVID(card), rc); + QETH_CARD_TEXT_(card, 2, " err%d", rc); + qeth_unlock_channel(card, channel); + qeth_cancel_cmd(iob, rc); + } + return; + } + + qeth_unlock_channel(card, channel); + if (iob) { /* sanity check: */ if (irb->scsw.cmd.count > iob->length) { -- Gitee From ded9159ef004a6156476fec572b950ba6120f394 Mon Sep 17 00:00:00 2001 From: Ravi Gunasekaran Date: Fri, 22 Mar 2024 15:34:47 +0530 Subject: [PATCH 033/268] net: hsr: hsr_slave: Fix the promiscuous mode in offload mode stable inclusion from stable-6.6.26 commit 984c3d962c9ef711016e46dc8986cbd9241fdd85 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit b11c81731c810efe592e510bb0110e0db6877419 ] commit e748d0fd66ab ("net: hsr: Disable promiscuous mode in offload mode") disables promiscuous mode of slave devices while creating an HSR interface. But while deleting the HSR interface, it does not take care of it. It decreases the promiscuous mode count, which eventually enables promiscuous mode on the slave devices when creating HSR interface again. Fix this by not decrementing the promiscuous mode count while deleting the HSR interface when offload is enabled. Fixes: e748d0fd66ab ("net: hsr: Disable promiscuous mode in offload mode") Signed-off-by: Ravi Gunasekaran Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240322100447.27615-1-r-gunasekaran@ti.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- net/hsr/hsr_slave.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index e5742f2a2d52..1b6457f357bd 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -220,7 +220,8 @@ void hsr_del_port(struct hsr_port *port) netdev_update_features(master->dev); dev_set_mtu(master->dev, hsr_get_max_mtu(hsr)); netdev_rx_handler_unregister(port->dev); - dev_set_promiscuity(port->dev, -1); + if (!port->hsr->fwd_offloaded) + dev_set_promiscuity(port->dev, -1); netdev_upper_dev_unlink(port->dev, master->dev); } -- Gitee From 70409aef89d0df32760bc9e7cca62d24b5539872 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Mar 2024 13:57:32 +0000 Subject: [PATCH 034/268] tcp: properly terminate timers for kernel sockets stable inclusion from stable-6.6.26 commit c1ae4d1e76eacddaacb958b67cd942082f800c87 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 151c9c724d05d5b0dd8acd3e11cb69ef1f2dbada ] We had various syzbot reports about tcp timers firing after the corresponding netns has been dismantled. Fortunately Josef Bacik could trigger the issue more often, and could test a patch I wrote two years ago. When TCP sockets are closed, we call inet_csk_clear_xmit_timers() to 'stop' the timers. inet_csk_clear_xmit_timers() can be called from any context, including when socket lock is held. This is the reason it uses sk_stop_timer(), aka del_timer(). This means that ongoing timers might finish much later. For user sockets, this is fine because each running timer holds a reference on the socket, and the user socket holds a reference on the netns. For kernel sockets, we risk that the netns is freed before timer can complete, because kernel sockets do not hold reference on the netns. This patch adds inet_csk_clear_xmit_timers_sync() function that using sk_stop_timer_sync() to make sure all timers are terminated before the kernel socket is released. Modules using kernel sockets close them in their netns exit() handler. Also add sock_not_owned_by_me() helper to get LOCKDEP support : inet_csk_clear_xmit_timers_sync() must not be called while socket lock is held. It is very possible we can revert in the future commit 3a58f13a881e ("net: rds: acquire refcount on TCP sockets") which attempted to solve the issue in rds only. (net/smc/af_smc.c and net/mptcp/subflow.c have similar code) We probably can remove the check_net() tests from tcp_out_of_resources() and __tcp_close() in the future. Reported-by: Josef Bacik Closes: https://lore.kernel.org/netdev/20240314210740.GA2823176@perftesting/ Fixes: 26abe14379f8 ("net: Modify sk_alloc to not reference count the netns of kernel sockets.") Fixes: 8a68173691f0 ("net: sk_clone_lock() should only do get_net() if the parent is not a kernel socket") Link: https://lore.kernel.org/bpf/CANn89i+484ffqb93aQm1N-tjxxvb3WDKX0EbD7318RwRgsatjw@mail.gmail.com/ Signed-off-by: Eric Dumazet Tested-by: Josef Bacik Cc: Tetsuo Handa Link: https://lore.kernel.org/r/20240322135732.1535772-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- include/net/inet_connection_sock.h | 1 + include/net/sock.h | 7 +++++++ net/ipv4/inet_connection_sock.c | 14 ++++++++++++++ net/ipv4/tcp.c | 2 ++ 4 files changed, 24 insertions(+) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 01a73bf74fa1..6ecac01115d9 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -173,6 +173,7 @@ void inet_csk_init_xmit_timers(struct sock *sk, void (*delack_handler)(struct timer_list *), void (*keepalive_handler)(struct timer_list *)); void inet_csk_clear_xmit_timers(struct sock *sk); +void inet_csk_clear_xmit_timers_sync(struct sock *sk); static inline void inet_csk_schedule_ack(struct sock *sk) { diff --git a/include/net/sock.h b/include/net/sock.h index e70c903b04f3..25780942ec8b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1808,6 +1808,13 @@ static inline void sock_owned_by_me(const struct sock *sk) #endif } +static inline void sock_not_owned_by_me(const struct sock *sk) +{ +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks); +#endif +} + static inline bool sock_owned_by_user(const struct sock *sk) { sock_owned_by_me(sk); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 762817d6c8d7..a587cb6be807 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -774,6 +774,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk) } EXPORT_SYMBOL(inet_csk_clear_xmit_timers); +void inet_csk_clear_xmit_timers_sync(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + /* ongoing timer handlers need to acquire socket lock. */ + sock_not_owned_by_me(sk); + + icsk->icsk_pending = icsk->icsk_ack.pending = 0; + + sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer); + sk_stop_timer_sync(sk, &icsk->icsk_delack_timer); + sk_stop_timer_sync(sk, &sk->sk_timer); +} + void inet_csk_delete_keepalive_timer(struct sock *sk) { sk_stop_timer(sk, &sk->sk_timer); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 68bb8d6bcc11..f8df35f7352a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2931,6 +2931,8 @@ void tcp_close(struct sock *sk, long timeout) lock_sock(sk); __tcp_close(sk, timeout); release_sock(sk); + if (!sk->sk_net_refcnt) + inet_csk_clear_xmit_timers_sync(sk); sock_put(sk); } EXPORT_SYMBOL(tcp_close); -- Gitee From b7d5bd61cb5fd85fdcdee86c5390146bfb5758c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Fri, 22 Mar 2024 15:40:00 +0100 Subject: [PATCH 035/268] net: wwan: t7xx: Split 64bit accesses to fix alignment issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-6.6.26 commit 2e22c9cb618716b8e557fe17c3d4958171288082 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 7d5a7dd5a35876f0ecc286f3602a88887a788217 ] Some of the registers are aligned on a 32bit boundary, causing alignment faults on 64bit platforms. Unable to handle kernel paging request at virtual address ffffffc084a1d004 Mem abort info: ESR = 0x0000000096000061 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x21: alignment fault Data abort info: ISV = 0, ISS = 0x00000061, ISS2 = 0x00000000 CM = 0, WnR = 1, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 swapper pgtable: 4k pages, 39-bit VAs, pgdp=0000000046ad6000 [ffffffc084a1d004] pgd=100000013ffff003, p4d=100000013ffff003, pud=100000013ffff003, pmd=0068000020a00711 Internal error: Oops: 0000000096000061 [#1] SMP Modules linked in: mtk_t7xx(+) qcserial pppoe ppp_async option nft_fib_inet nf_flow_table_inet mt7921u(O) mt7921s(O) mt7921e(O) mt7921_common(O) iwlmvm(O) iwldvm(O) usb_wwan rndis_host qmi_wwan pppox ppp_generic nft_reject_ipv6 nft_reject_ipv4 nft_reject_inet nft_reject nft_redir nft_quota nft_numgen nft_nat nft_masq nft_log nft_limit nft_hash nft_flow_offload nft_fib_ipv6 nft_fib_ipv4 nft_fib nft_ct nft_chain_nat nf_tables nf_nat nf_flow_table nf_conntrack mt7996e(O) mt792x_usb(O) mt792x_lib(O) mt7915e(O) mt76_usb(O) mt76_sdio(O) mt76_connac_lib(O) mt76(O) mac80211(O) iwlwifi(O) huawei_cdc_ncm cfg80211(O) cdc_ncm cdc_ether wwan usbserial usbnet slhc sfp rtc_pcf8563 nfnetlink nf_reject_ipv6 nf_reject_ipv4 nf_log_syslog nf_defrag_ipv6 nf_defrag_ipv4 mt6577_auxadc mdio_i2c libcrc32c compat(O) cdc_wdm cdc_acm at24 crypto_safexcel pwm_fan i2c_gpio i2c_smbus industrialio i2c_algo_bit i2c_mux_reg i2c_mux_pca954x i2c_mux_pca9541 i2c_mux_gpio i2c_mux dummy oid_registry tun sha512_arm64 sha1_ce sha1_generic seqiv md5 geniv des_generic libdes cbc authencesn authenc leds_gpio xhci_plat_hcd xhci_pci xhci_mtk_hcd xhci_hcd nvme nvme_core gpio_button_hotplug(O) dm_mirror dm_region_hash dm_log dm_crypt dm_mod dax usbcore usb_common ptp aquantia pps_core mii tpm encrypted_keys trusted CPU: 3 PID: 5266 Comm: kworker/u9:1 Tainted: G O 6.6.22 #0 Hardware name: Bananapi BPI-R4 (DT) Workqueue: md_hk_wq t7xx_fsm_uninit [mtk_t7xx] pstate: 804000c5 (Nzcv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : t7xx_cldma_hw_set_start_addr+0x1c/0x3c [mtk_t7xx] lr : t7xx_cldma_start+0xac/0x13c [mtk_t7xx] sp : ffffffc085d63d30 x29: ffffffc085d63d30 x28: 0000000000000000 x27: 0000000000000000 x26: 0000000000000000 x25: ffffff80c804f2c0 x24: ffffff80ca196c05 x23: 0000000000000000 x22: ffffff80c814b9b8 x21: ffffff80c814b128 x20: 0000000000000001 x19: ffffff80c814b080 x18: 0000000000000014 x17: 0000000055c9806b x16: 000000007c5296d0 x15: 000000000f6bca68 x14: 00000000dbdbdce4 x13: 000000001aeaf72a x12: 0000000000000001 x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 x8 : ffffff80ca1ef6b4 x7 : ffffff80c814b818 x6 : 0000000000000018 x5 : 0000000000000870 x4 : 0000000000000000 x3 : 0000000000000000 x2 : 000000010a947000 x1 : ffffffc084a1d004 x0 : ffffffc084a1d004 Call trace: t7xx_cldma_hw_set_start_addr+0x1c/0x3c [mtk_t7xx] t7xx_fsm_uninit+0x578/0x5ec [mtk_t7xx] process_one_work+0x154/0x2a0 worker_thread+0x2ac/0x488 kthread+0xe0/0xec ret_from_fork+0x10/0x20 Code: f9400800 91001000 8b214001 d50332bf (f9000022) ---[ end trace 0000000000000000 ]--- The inclusion of io-64-nonatomic-lo-hi.h indicates that all 64bit accesses can be replaced by pairs of nonatomic 32bit access. Fix alignment by forcing all accesses to be 32bit on 64bit platforms. Link: https://forum.openwrt.org/t/fibocom-fm350-gl-support/142682/72 Fixes: 39d439047f1d ("net: wwan: t7xx: Add control DMA interface") Signed-off-by: Bjørn Mork Reviewed-by: Sergey Ryazanov Tested-by: Liviu Dudau Link: https://lore.kernel.org/r/20240322144000.1683822-1-bjorn@mork.no Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/net/wwan/t7xx/t7xx_cldma.c | 4 ++-- drivers/net/wwan/t7xx/t7xx_hif_cldma.c | 9 +++++---- drivers/net/wwan/t7xx/t7xx_pcie_mac.c | 8 ++++---- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/wwan/t7xx/t7xx_cldma.c b/drivers/net/wwan/t7xx/t7xx_cldma.c index 9f43f256db1d..f0a4783baf1f 100644 --- a/drivers/net/wwan/t7xx/t7xx_cldma.c +++ b/drivers/net/wwan/t7xx/t7xx_cldma.c @@ -106,7 +106,7 @@ bool t7xx_cldma_tx_addr_is_set(struct t7xx_cldma_hw *hw_info, unsigned int qno) { u32 offset = REG_CLDMA_UL_START_ADDRL_0 + qno * ADDR_SIZE; - return ioread64(hw_info->ap_pdn_base + offset); + return ioread64_lo_hi(hw_info->ap_pdn_base + offset); } void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qno, u64 address, @@ -117,7 +117,7 @@ void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qn reg = tx_rx == MTK_RX ? hw_info->ap_ao_base + REG_CLDMA_DL_START_ADDRL_0 : hw_info->ap_pdn_base + REG_CLDMA_UL_START_ADDRL_0; - iowrite64(address, reg + offset); + iowrite64_lo_hi(address, reg + offset); } void t7xx_cldma_hw_resume_queue(struct t7xx_cldma_hw *hw_info, unsigned int qno, diff --git a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c index cc70360364b7..554ba4669cc8 100644 --- a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c +++ b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c @@ -139,8 +139,9 @@ static int t7xx_cldma_gpd_rx_from_q(struct cldma_queue *queue, int budget, bool return -ENODEV; } - gpd_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_DL_CURRENT_ADDRL_0 + - queue->index * sizeof(u64)); + gpd_addr = ioread64_lo_hi(hw_info->ap_pdn_base + + REG_CLDMA_DL_CURRENT_ADDRL_0 + + queue->index * sizeof(u64)); if (req->gpd_addr == gpd_addr || hwo_polling_count++ >= 100) return 0; @@ -318,8 +319,8 @@ static void t7xx_cldma_txq_empty_hndl(struct cldma_queue *queue) struct t7xx_cldma_hw *hw_info = &md_ctrl->hw_info; /* Check current processing TGPD, 64-bit address is in a table by Q index */ - ul_curr_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 + - queue->index * sizeof(u64)); + ul_curr_addr = ioread64_lo_hi(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 + + queue->index * sizeof(u64)); if (req->gpd_addr != ul_curr_addr) { spin_unlock_irqrestore(&md_ctrl->cldma_lock, flags); dev_err(md_ctrl->dev, "CLDMA%d queue %d is not empty\n", diff --git a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c index 76da4c15e3de..f071ec7ff23d 100644 --- a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c +++ b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c @@ -75,7 +75,7 @@ static void t7xx_pcie_mac_atr_tables_dis(void __iomem *pbase, enum t7xx_atr_src_ for (i = 0; i < ATR_TABLE_NUM_PER_ATR; i++) { offset = ATR_PORT_OFFSET * port + ATR_TABLE_OFFSET * i; reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset; - iowrite64(0, reg); + iowrite64_lo_hi(0, reg); } } @@ -112,17 +112,17 @@ static int t7xx_pcie_mac_atr_cfg(struct t7xx_pci_dev *t7xx_dev, struct t7xx_atr_ reg = pbase + ATR_PCIE_WIN0_T0_TRSL_ADDR + offset; value = cfg->trsl_addr & ATR_PCIE_WIN0_ADDR_ALGMT; - iowrite64(value, reg); + iowrite64_lo_hi(value, reg); reg = pbase + ATR_PCIE_WIN0_T0_TRSL_PARAM + offset; iowrite32(cfg->trsl_id, reg); reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset; value = (cfg->src_addr & ATR_PCIE_WIN0_ADDR_ALGMT) | (atr_size << 1) | BIT(0); - iowrite64(value, reg); + iowrite64_lo_hi(value, reg); /* Ensure ATR is set */ - ioread64(reg); + ioread64_lo_hi(reg); return 0; } -- Gitee From d8f051190aeed5e96874ae759eb6eb4085671154 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 25 Mar 2024 09:50:30 +0200 Subject: [PATCH 036/268] selftests: vxlan_mdb: Fix failures with old libnet stable inclusion from stable-6.6.26 commit 7fb8b3de7f22adfee1a40562c249974b31e0697e category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit f1425529c33def8b46faae4400dd9e2bbaf16a05 ] Locally generated IP multicast packets (such as the ones used in the test) do not perform routing and simply egress the bound device. However, as explained in commit 8bcfb4ae4d97 ("selftests: forwarding: Fix failing tests with old libnet"), old versions of libnet (used by mausezahn) do not use the "SO_BINDTODEVICE" socket option. Specifically, the library started using the option for IPv6 sockets in version 1.1.6 and for IPv4 sockets in version 1.2. This explains why on Ubuntu - which uses version 1.1.6 - the IPv4 overlay tests are failing whereas the IPv6 ones are passing. Fix by specifying the source and destination MAC of the packets which will cause mausezahn to use a packet socket instead of an IP socket. Fixes: 62199e3f1658 ("selftests: net: Add VXLAN MDB test") Reported-by: Mirsad Todorovac Closes: https://lore.kernel.org/netdev/5bb50349-196d-4892-8ed2-f37543aa863f@alu.unizg.hr/ Tested-by: Mirsad Todorovac Signed-off-by: Ido Schimmel Link: https://lore.kernel.org/r/20240325075030.2379513-1-idosch@nvidia.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- tools/testing/selftests/net/test_vxlan_mdb.sh | 205 +++++++++++------- 1 file changed, 128 insertions(+), 77 deletions(-) diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh index 31e5f0f8859d..be8e66abc74e 100755 --- a/tools/testing/selftests/net/test_vxlan_mdb.sh +++ b/tools/testing/selftests/net/test_vxlan_mdb.sh @@ -984,6 +984,7 @@ encap_params_common() local plen=$1; shift local enc_ethtype=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local src=$1; shift local mz=$1; shift @@ -1002,11 +1003,11 @@ encap_params_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep2_ip src_vni 10020" run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Destination IP - match" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Destination IP - no match" @@ -1019,20 +1020,20 @@ encap_params_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip dst_port 1111 src_vni 10020" run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 4789 action pass" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev veth0 ingress" 101 1 log_test $? 0 "Default destination port - match" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev veth0 ingress" 101 1 log_test $? 0 "Default destination port - no match" run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 1111 action pass" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev veth0 ingress" 101 1 log_test $? 0 "Non-default destination port - match" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev veth0 ingress" 101 1 log_test $? 0 "Non-default destination port - no match" @@ -1045,11 +1046,11 @@ encap_params_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10020" run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10010 action pass" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Default destination VNI - match" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Default destination VNI - no match" @@ -1057,11 +1058,11 @@ encap_params_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10010 src_vni 10020" run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10020 action pass" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Non-default destination VNI - match" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Non-default destination VNI - no match" @@ -1079,6 +1080,7 @@ encap_params_ipv4_ipv4() local plen=32 local enc_ethtype="ip" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo @@ -1086,7 +1088,7 @@ encap_params_ipv4_ipv4() echo "------------------------------------------------------------------" encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ - $grp $src "mausezahn" + $grp $grp_dmac $src "mausezahn" } encap_params_ipv6_ipv4() @@ -1098,6 +1100,7 @@ encap_params_ipv6_ipv4() local plen=32 local enc_ethtype="ip" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo @@ -1105,7 +1108,7 @@ encap_params_ipv6_ipv4() echo "------------------------------------------------------------------" encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ - $grp $src "mausezahn -6" + $grp $grp_dmac $src "mausezahn -6" } encap_params_ipv4_ipv6() @@ -1117,6 +1120,7 @@ encap_params_ipv4_ipv6() local plen=128 local enc_ethtype="ipv6" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo @@ -1124,7 +1128,7 @@ encap_params_ipv4_ipv6() echo "------------------------------------------------------------------" encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ - $grp $src "mausezahn" + $grp $grp_dmac $src "mausezahn" } encap_params_ipv6_ipv6() @@ -1136,6 +1140,7 @@ encap_params_ipv6_ipv6() local plen=128 local enc_ethtype="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo @@ -1143,7 +1148,7 @@ encap_params_ipv6_ipv6() echo "------------------------------------------------------------------" encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ - $grp $src "mausezahn -6" + $grp $grp_dmac $src "mausezahn -6" } starg_exclude_ir_common() @@ -1154,6 +1159,7 @@ starg_exclude_ir_common() local vtep2_ip=$1; shift local plen=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mz=$1; shift @@ -1175,14 +1181,14 @@ starg_exclude_ir_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep2_ip src_vni 10010" # Check that invalid source is not forwarded to any VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 0 log_test $? 0 "Block excluded source - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 0 log_test $? 0 "Block excluded source - second VTEP" # Check that valid source is forwarded to both VTEPs. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Forward valid source - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1192,14 +1198,14 @@ starg_exclude_ir_common() run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010" # Check that invalid source is not forwarded to any VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Block excluded source after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 log_test $? 0 "Block excluded source after removal - second VTEP" # Check that valid source is forwarded to the remaining VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 2 log_test $? 0 "Forward valid source after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1214,6 +1220,7 @@ starg_exclude_ir_ipv4_ipv4() local vtep2_ip=198.51.100.200 local plen=32 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1222,7 +1229,7 @@ starg_exclude_ir_ipv4_ipv4() echo "-------------------------------------------------------------" starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn" + $grp_dmac $valid_src $invalid_src "mausezahn" } starg_exclude_ir_ipv6_ipv4() @@ -1233,6 +1240,7 @@ starg_exclude_ir_ipv6_ipv4() local vtep2_ip=198.51.100.200 local plen=32 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1241,7 +1249,7 @@ starg_exclude_ir_ipv6_ipv4() echo "-------------------------------------------------------------" starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn -6" + $grp_dmac $valid_src $invalid_src "mausezahn -6" } starg_exclude_ir_ipv4_ipv6() @@ -1252,6 +1260,7 @@ starg_exclude_ir_ipv4_ipv6() local vtep2_ip=2001:db8:2000::1 local plen=128 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1260,7 +1269,7 @@ starg_exclude_ir_ipv4_ipv6() echo "-------------------------------------------------------------" starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn" + $grp_dmac $valid_src $invalid_src "mausezahn" } starg_exclude_ir_ipv6_ipv6() @@ -1271,6 +1280,7 @@ starg_exclude_ir_ipv6_ipv6() local vtep2_ip=2001:db8:2000::1 local plen=128 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1279,7 +1289,7 @@ starg_exclude_ir_ipv6_ipv6() echo "-------------------------------------------------------------" starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn -6" + $grp_dmac $valid_src $invalid_src "mausezahn -6" } starg_include_ir_common() @@ -1290,6 +1300,7 @@ starg_include_ir_common() local vtep2_ip=$1; shift local plen=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mz=$1; shift @@ -1311,14 +1322,14 @@ starg_include_ir_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep2_ip src_vni 10010" # Check that invalid source is not forwarded to any VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 0 log_test $? 0 "Block excluded source - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 0 log_test $? 0 "Block excluded source - second VTEP" # Check that valid source is forwarded to both VTEPs. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Forward valid source - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1328,14 +1339,14 @@ starg_include_ir_common() run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010" # Check that invalid source is not forwarded to any VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Block excluded source after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 log_test $? 0 "Block excluded source after removal - second VTEP" # Check that valid source is forwarded to the remaining VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 2 log_test $? 0 "Forward valid source after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1350,6 +1361,7 @@ starg_include_ir_ipv4_ipv4() local vtep2_ip=198.51.100.200 local plen=32 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1358,7 +1370,7 @@ starg_include_ir_ipv4_ipv4() echo "-------------------------------------------------------------" starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn" + $grp_dmac $valid_src $invalid_src "mausezahn" } starg_include_ir_ipv6_ipv4() @@ -1369,6 +1381,7 @@ starg_include_ir_ipv6_ipv4() local vtep2_ip=198.51.100.200 local plen=32 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1377,7 +1390,7 @@ starg_include_ir_ipv6_ipv4() echo "-------------------------------------------------------------" starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn -6" + $grp_dmac $valid_src $invalid_src "mausezahn -6" } starg_include_ir_ipv4_ipv6() @@ -1388,6 +1401,7 @@ starg_include_ir_ipv4_ipv6() local vtep2_ip=2001:db8:2000::1 local plen=128 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1396,7 +1410,7 @@ starg_include_ir_ipv4_ipv6() echo "-------------------------------------------------------------" starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn" + $grp_dmac $valid_src $invalid_src "mausezahn" } starg_include_ir_ipv6_ipv6() @@ -1407,6 +1421,7 @@ starg_include_ir_ipv6_ipv6() local vtep2_ip=2001:db8:2000::1 local plen=128 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1415,7 +1430,7 @@ starg_include_ir_ipv6_ipv6() echo "-------------------------------------------------------------" starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn -6" + $grp_dmac $valid_src $invalid_src "mausezahn -6" } starg_exclude_p2mp_common() @@ -1425,6 +1440,7 @@ starg_exclude_p2mp_common() local mcast_grp=$1; shift local plen=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mz=$1; shift @@ -1442,12 +1458,12 @@ starg_exclude_p2mp_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $mcast_grp src_vni 10010 via veth0" # Check that invalid source is not forwarded. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 0 log_test $? 0 "Block excluded source" # Check that valid source is forwarded. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Forward valid source" @@ -1455,7 +1471,7 @@ starg_exclude_p2mp_common() run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0" # Check that valid source is not received anymore. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Receive of valid source after removal from group" } @@ -1467,6 +1483,7 @@ starg_exclude_p2mp_ipv4_ipv4() local mcast_grp=238.1.1.1 local plen=32 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1474,7 +1491,7 @@ starg_exclude_p2mp_ipv4_ipv4() echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv4 underlay" echo "---------------------------------------------------------------" - starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn" } @@ -1485,6 +1502,7 @@ starg_exclude_p2mp_ipv6_ipv4() local mcast_grp=238.1.1.1 local plen=32 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1492,7 +1510,7 @@ starg_exclude_p2mp_ipv6_ipv4() echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv4 underlay" echo "---------------------------------------------------------------" - starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn -6" } @@ -1503,6 +1521,7 @@ starg_exclude_p2mp_ipv4_ipv6() local mcast_grp=ff0e::2 local plen=128 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1510,7 +1529,7 @@ starg_exclude_p2mp_ipv4_ipv6() echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv6 underlay" echo "---------------------------------------------------------------" - starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn" } @@ -1521,6 +1540,7 @@ starg_exclude_p2mp_ipv6_ipv6() local mcast_grp=ff0e::2 local plen=128 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1528,7 +1548,7 @@ starg_exclude_p2mp_ipv6_ipv6() echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv6 underlay" echo "---------------------------------------------------------------" - starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn -6" } @@ -1539,6 +1559,7 @@ starg_include_p2mp_common() local mcast_grp=$1; shift local plen=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mz=$1; shift @@ -1556,12 +1577,12 @@ starg_include_p2mp_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $mcast_grp src_vni 10010 via veth0" # Check that invalid source is not forwarded. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 0 log_test $? 0 "Block excluded source" # Check that valid source is forwarded. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Forward valid source" @@ -1569,7 +1590,7 @@ starg_include_p2mp_common() run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0" # Check that valid source is not received anymore. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Receive of valid source after removal from group" } @@ -1581,6 +1602,7 @@ starg_include_p2mp_ipv4_ipv4() local mcast_grp=238.1.1.1 local plen=32 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1588,7 +1610,7 @@ starg_include_p2mp_ipv4_ipv4() echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv4 underlay" echo "---------------------------------------------------------------" - starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn" } @@ -1599,6 +1621,7 @@ starg_include_p2mp_ipv6_ipv4() local mcast_grp=238.1.1.1 local plen=32 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1606,7 +1629,7 @@ starg_include_p2mp_ipv6_ipv4() echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv4 underlay" echo "---------------------------------------------------------------" - starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn -6" } @@ -1617,6 +1640,7 @@ starg_include_p2mp_ipv4_ipv6() local mcast_grp=ff0e::2 local plen=128 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1624,7 +1648,7 @@ starg_include_p2mp_ipv4_ipv6() echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv6 underlay" echo "---------------------------------------------------------------" - starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn" } @@ -1635,6 +1659,7 @@ starg_include_p2mp_ipv6_ipv6() local mcast_grp=ff0e::2 local plen=128 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1642,7 +1667,7 @@ starg_include_p2mp_ipv6_ipv6() echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv6 underlay" echo "---------------------------------------------------------------" - starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn -6" } @@ -1654,6 +1679,7 @@ egress_vni_translation_common() local plen=$1; shift local proto=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local src=$1; shift local mz=$1; shift @@ -1689,20 +1715,20 @@ egress_vni_translation_common() # Make sure that packets sent from the first VTEP over VLAN 10 are # received by the SVI corresponding to the L3VNI (14000 / VLAN 4000) on # the second VTEP, since it is configured as PVID. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1 log_test $? 0 "Egress VNI translation - PVID configured" # Remove PVID flag from VLAN 4000 on the second VTEP and make sure # packets are no longer received by the SVI interface. run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1 log_test $? 0 "Egress VNI translation - no PVID configured" # Reconfigure the PVID and make sure packets are received again. run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0 pvid" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev br0.4000 ingress" 101 2 log_test $? 0 "Egress VNI translation - PVID reconfigured" } @@ -1715,6 +1741,7 @@ egress_vni_translation_ipv4_ipv4() local plen=32 local proto="ipv4" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo @@ -1722,7 +1749,7 @@ egress_vni_translation_ipv4_ipv4() echo "----------------------------------------------------------------" egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ - $src "mausezahn" + $grp_dmac $src "mausezahn" } egress_vni_translation_ipv6_ipv4() @@ -1733,6 +1760,7 @@ egress_vni_translation_ipv6_ipv4() local plen=32 local proto="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo @@ -1740,7 +1768,7 @@ egress_vni_translation_ipv6_ipv4() echo "----------------------------------------------------------------" egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ - $src "mausezahn -6" + $grp_dmac $src "mausezahn -6" } egress_vni_translation_ipv4_ipv6() @@ -1751,6 +1779,7 @@ egress_vni_translation_ipv4_ipv6() local plen=128 local proto="ipv4" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo @@ -1758,7 +1787,7 @@ egress_vni_translation_ipv4_ipv6() echo "----------------------------------------------------------------" egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ - $src "mausezahn" + $grp_dmac $src "mausezahn" } egress_vni_translation_ipv6_ipv6() @@ -1769,6 +1798,7 @@ egress_vni_translation_ipv6_ipv6() local plen=128 local proto="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo @@ -1776,7 +1806,7 @@ egress_vni_translation_ipv6_ipv6() echo "----------------------------------------------------------------" egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ - $src "mausezahn -6" + $grp_dmac $src "mausezahn -6" } all_zeros_mdb_common() @@ -1789,12 +1819,18 @@ all_zeros_mdb_common() local vtep4_ip=$1; shift local plen=$1; shift local ipv4_grp=239.1.1.1 + local ipv4_grp_dmac=01:00:5e:01:01:01 local ipv4_unreg_grp=239.2.2.2 + local ipv4_unreg_grp_dmac=01:00:5e:02:02:02 local ipv4_ll_grp=224.0.0.100 + local ipv4_ll_grp_dmac=01:00:5e:00:00:64 local ipv4_src=192.0.2.129 local ipv6_grp=ff0e::1 + local ipv6_grp_dmac=33:33:00:00:00:01 local ipv6_unreg_grp=ff0e::2 + local ipv6_unreg_grp_dmac=33:33:00:00:00:02 local ipv6_ll_grp=ff02::1 + local ipv6_ll_grp_dmac=33:33:00:00:00:01 local ipv6_src=2001:db8:100::1 # Install all-zeros (catchall) MDB entries for IPv4 and IPv6 traffic @@ -1830,7 +1866,7 @@ all_zeros_mdb_common() # Send registered IPv4 multicast and make sure it only arrives to the # first VTEP. - run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_grp_dmac -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Registered IPv4 multicast - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 0 @@ -1838,7 +1874,7 @@ all_zeros_mdb_common() # Send unregistered IPv4 multicast that is not link-local and make sure # it arrives to the first and second VTEPs. - run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_unreg_grp_dmac -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 2 log_test $? 0 "Unregistered IPv4 multicast - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1846,7 +1882,7 @@ all_zeros_mdb_common() # Send IPv4 link-local multicast traffic and make sure it does not # arrive to any VTEP. - run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_ll_grp_dmac -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 2 log_test $? 0 "Link-local IPv4 multicast - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1881,7 +1917,7 @@ all_zeros_mdb_common() # Send registered IPv6 multicast and make sure it only arrives to the # third VTEP. - run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_grp_dmac -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 103 1 log_test $? 0 "Registered IPv6 multicast - third VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 104 0 @@ -1889,7 +1925,7 @@ all_zeros_mdb_common() # Send unregistered IPv6 multicast that is not link-local and make sure # it arrives to the third and fourth VTEPs. - run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_unreg_grp_dmac -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 103 2 log_test $? 0 "Unregistered IPv6 multicast - third VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 104 1 @@ -1897,7 +1933,7 @@ all_zeros_mdb_common() # Send IPv6 link-local multicast traffic and make sure it does not # arrive to any VTEP. - run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_ll_grp_dmac -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 103 2 log_test $? 0 "Link-local IPv6 multicast - third VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 104 1 @@ -1972,6 +2008,7 @@ mdb_fdb_common() local plen=$1; shift local proto=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local src=$1; shift local mz=$1; shift @@ -1995,7 +2032,7 @@ mdb_fdb_common() # Send IP multicast traffic and make sure it is forwarded by the MDB # and only arrives to the first VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "IP multicast - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 0 @@ -2012,7 +2049,7 @@ mdb_fdb_common() # Remove the MDB entry and make sure that IP multicast is now forwarded # by the FDB to the second VTEP. run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "IP multicast after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 2 @@ -2028,14 +2065,15 @@ mdb_fdb_ipv4_ipv4() local plen=32 local proto="ipv4" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo echo "Data path: MDB with FDB - IPv4 overlay / IPv4 underlay" echo "------------------------------------------------------" - mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ - "mausezahn" + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ + $grp_dmac $src "mausezahn" } mdb_fdb_ipv6_ipv4() @@ -2047,14 +2085,15 @@ mdb_fdb_ipv6_ipv4() local plen=32 local proto="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo echo "Data path: MDB with FDB - IPv6 overlay / IPv4 underlay" echo "------------------------------------------------------" - mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ - "mausezahn -6" + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ + $grp_dmac $src "mausezahn -6" } mdb_fdb_ipv4_ipv6() @@ -2066,14 +2105,15 @@ mdb_fdb_ipv4_ipv6() local plen=128 local proto="ipv4" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo echo "Data path: MDB with FDB - IPv4 overlay / IPv6 underlay" echo "------------------------------------------------------" - mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ - "mausezahn" + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ + $grp_dmac $src "mausezahn" } mdb_fdb_ipv6_ipv6() @@ -2085,14 +2125,15 @@ mdb_fdb_ipv6_ipv6() local plen=128 local proto="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo echo "Data path: MDB with FDB - IPv6 overlay / IPv6 underlay" echo "------------------------------------------------------" - mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ - "mausezahn -6" + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ + $grp_dmac $src "mausezahn -6" } mdb_grp1_loop() @@ -2127,7 +2168,9 @@ mdb_torture_common() local vtep1_ip=$1; shift local vtep2_ip=$1; shift local grp1=$1; shift + local grp1_dmac=$1; shift local grp2=$1; shift + local grp2_dmac=$1; shift local src=$1; shift local mz=$1; shift local pid1 @@ -2152,9 +2195,9 @@ mdb_torture_common() pid1=$! mdb_grp2_loop $ns1 $vtep1_ip $vtep2_ip $grp2 & pid2=$! - ip netns exec $ns1 $mz br0.10 -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & + ip netns exec $ns1 $mz br0.10 -a own -b $grp1_dmac -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & pid3=$! - ip netns exec $ns1 $mz br0.10 -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & + ip netns exec $ns1 $mz br0.10 -a own -b $grp2_dmac -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & pid4=$! sleep 30 @@ -2170,15 +2213,17 @@ mdb_torture_ipv4_ipv4() local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local grp1=239.1.1.1 + local grp1_dmac=01:00:5e:01:01:01 local grp2=239.2.2.2 + local grp2_dmac=01:00:5e:02:02:02 local src=192.0.2.129 echo echo "Data path: MDB torture test - IPv4 overlay / IPv4 underlay" echo "----------------------------------------------------------" - mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ - "mausezahn" + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ + $grp2_dmac $src "mausezahn" } mdb_torture_ipv6_ipv4() @@ -2187,15 +2232,17 @@ mdb_torture_ipv6_ipv4() local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local grp1=ff0e::1 + local grp1_dmac=33:33:00:00:00:01 local grp2=ff0e::2 + local grp2_dmac=33:33:00:00:00:02 local src=2001:db8:100::1 echo echo "Data path: MDB torture test - IPv6 overlay / IPv4 underlay" echo "----------------------------------------------------------" - mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ - "mausezahn -6" + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ + $grp2_dmac $src "mausezahn -6" } mdb_torture_ipv4_ipv6() @@ -2204,15 +2251,17 @@ mdb_torture_ipv4_ipv6() local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local grp1=239.1.1.1 + local grp1_dmac=01:00:5e:01:01:01 local grp2=239.2.2.2 + local grp2_dmac=01:00:5e:02:02:02 local src=192.0.2.129 echo echo "Data path: MDB torture test - IPv4 overlay / IPv6 underlay" echo "----------------------------------------------------------" - mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ - "mausezahn" + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ + $grp2_dmac $src "mausezahn" } mdb_torture_ipv6_ipv6() @@ -2221,15 +2270,17 @@ mdb_torture_ipv6_ipv6() local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local grp1=ff0e::1 + local grp1_dmac=33:33:00:00:00:01 local grp2=ff0e::2 + local grp2_dmac=33:33:00:00:00:02 local src=2001:db8:100::1 echo echo "Data path: MDB torture test - IPv6 overlay / IPv6 underlay" echo "----------------------------------------------------------" - mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ - "mausezahn -6" + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ + $grp2_dmac $src "mausezahn -6" } ################################################################################ -- Gitee From 0afafc25124c804f539506d02cedad79f4001be1 Mon Sep 17 00:00:00 2001 From: Nikita Kiryushin Date: Fri, 22 Mar 2024 21:07:53 +0300 Subject: [PATCH 037/268] ACPICA: debugger: check status of acpi_evaluate_object() in acpi_db_walk_for_fields() stable inclusion from stable-6.6.26 commit 77ffc72b497e43e6a8f832729eb2358c3492b9c1 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 40e2710860e57411ab57a1529c5a2748abbe8a19 ] ACPICA commit 9061cd9aa131205657c811a52a9f8325a040c6c9 Errors in acpi_evaluate_object() can lead to incorrect state of buffer. This can lead to access to data in previously ACPI_FREEd buffer and secondary ACPI_FREE to the same buffer later. Handle errors in acpi_evaluate_object the same way it is done earlier with acpi_ns_handle_to_pathname. Found by Linux Verification Center (linuxtesting.org) with SVACE. Link: https://github.com/acpica/acpica/commit/9061cd9a Fixes: 5fd033288a86 ("ACPICA: debugger: add command to dump all fields of particular subtype") Signed-off-by: Nikita Kiryushin Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/acpi/acpica/dbnames.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpica/dbnames.c b/drivers/acpi/acpica/dbnames.c index b91155ea9c34..c9131259f717 100644 --- a/drivers/acpi/acpica/dbnames.c +++ b/drivers/acpi/acpica/dbnames.c @@ -550,8 +550,12 @@ acpi_db_walk_for_fields(acpi_handle obj_handle, ACPI_FREE(buffer.pointer); buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER; - acpi_evaluate_object(obj_handle, NULL, NULL, &buffer); - + status = acpi_evaluate_object(obj_handle, NULL, NULL, &buffer); + if (ACPI_FAILURE(status)) { + acpi_os_printf("Could Not evaluate object %p\n", + obj_handle); + return (AE_OK); + } /* * Since this is a field unit, surround the output in braces */ -- Gitee From 7baac442f26738e634f7a848c33509756d2a82ed Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Mon, 25 Mar 2024 20:43:09 +0800 Subject: [PATCH 038/268] net: hns3: fix index limit to support all queue stats stable inclusion from stable-6.6.26 commit b033da1461c1dd5a3a1ef868b2680cfe06331b0d category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 47e39d213e09c6cae0d6b4d95e454ea404013312 ] Currently, hns hardware supports more than 512 queues and the index limit in hclge_comm_tqps_update_stats is wrong. So this patch removes it. Fixes: 287db5c40d15 ("net: hns3: create new set of common tqp stats APIs for PF and VF reuse") Signed-off-by: Jie Wang Signed-off-by: Jijie Shao Reviewed-by: Michal Kubiak Reviewed-by: Kalesh AP Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- .../ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c index f3c9395d8351..618f66d9586b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c @@ -85,7 +85,7 @@ int hclge_comm_tqps_update_stats(struct hnae3_handle *handle, hclge_comm_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_TX_STATS, true); - desc.data[0] = cpu_to_le32(tqp->index & 0x1ff); + desc.data[0] = cpu_to_le32(tqp->index); ret = hclge_comm_cmd_send(hw, &desc, 1); if (ret) { dev_err(&hw->cmq.csq.pdev->dev, -- Gitee From fa858ad89a75bf28d3da0caa233a95f2f35cd69c Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Mon, 25 Mar 2024 20:43:10 +0800 Subject: [PATCH 039/268] net: hns3: fix kernel crash when devlink reload during pf initialization stable inclusion from stable-6.6.26 commit 1b550dae55901c2cc9075d6a7155a71b4f516e86 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 93305b77ffcb042f1538ecc383505e87d95aa05a ] The devlink reload process will access the hardware resources, but the register operation is done before the hardware is initialized. So, processing the devlink reload during initialization may lead to kernel crash. This patch fixes this by taking devl_lock during initialization. Fixes: b741269b2759 ("net: hns3: add support for registering devlink for PF") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index f1ca2cda2961..dfd0c5f4cb9f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -11614,6 +11614,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) if (ret) goto err_pci_uninit; + devl_lock(hdev->devlink); + /* Firmware command queue initialize */ ret = hclge_comm_cmd_queue_init(hdev->pdev, &hdev->hw.hw); if (ret) @@ -11793,6 +11795,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_task_schedule(hdev, round_jiffies_relative(HZ)); + devl_unlock(hdev->devlink); return 0; err_mdiobus_unreg: @@ -11805,6 +11808,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) err_cmd_uninit: hclge_comm_cmd_uninit(hdev->ae_dev, &hdev->hw.hw); err_devlink_uninit: + devl_unlock(hdev->devlink); hclge_devlink_uninit(hdev); err_pci_uninit: pcim_iounmap(pdev, hdev->hw.hw.io_base); -- Gitee From 136dc1c24f4597666148bff55ea32bde47ec78a9 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Mon, 25 Mar 2024 20:43:11 +0800 Subject: [PATCH 040/268] net: hns3: mark unexcuted loopback test result as UNEXECUTED stable inclusion from stable-6.6.26 commit 872f574f88606121c7c56c0cd26ad305d2960f78 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 5bd088d6c21a45ee70e6116879310e54174d75eb ] Currently, loopback test may be skipped when resetting, but the test result will still show as 'PASS', because the driver doesn't set ETH_TEST_FL_FAILED flag. Fix it by setting the flag and initializating the value to UNEXECUTED. Fixes: 4c8dab1c709c ("net: hns3: reconstruct function hns3_self_test") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Reviewed-by: Michal Kubiak Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- .../ethernet/hisilicon/hns3/hns3_ethtool.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 682239f33082..78181eea93c1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -78,6 +78,9 @@ static const struct hns3_stats hns3_rxq_stats[] = { #define HNS3_NIC_LB_TEST_NO_MEM_ERR 1 #define HNS3_NIC_LB_TEST_TX_CNT_ERR 2 #define HNS3_NIC_LB_TEST_RX_CNT_ERR 3 +#define HNS3_NIC_LB_TEST_UNEXECUTED 4 + +static int hns3_get_sset_count(struct net_device *netdev, int stringset); static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en) { @@ -418,18 +421,26 @@ static void hns3_do_external_lb(struct net_device *ndev, static void hns3_self_test(struct net_device *ndev, struct ethtool_test *eth_test, u64 *data) { + int cnt = hns3_get_sset_count(ndev, ETH_SS_TEST); struct hns3_nic_priv *priv = netdev_priv(ndev); struct hnae3_handle *h = priv->ae_handle; int st_param[HNAE3_LOOP_NONE][2]; bool if_running = netif_running(ndev); + int i; + + /* initialize the loopback test result, avoid marking an unexcuted + * loopback test as PASS. + */ + for (i = 0; i < cnt; i++) + data[i] = HNS3_NIC_LB_TEST_UNEXECUTED; if (hns3_nic_resetting(ndev)) { netdev_err(ndev, "dev resetting!"); - return; + goto failure; } if (!(eth_test->flags & ETH_TEST_FL_OFFLINE)) - return; + goto failure; if (netif_msg_ifdown(h)) netdev_info(ndev, "self test start\n"); @@ -451,6 +462,10 @@ static void hns3_self_test(struct net_device *ndev, if (netif_msg_ifdown(h)) netdev_info(ndev, "self test end\n"); + return; + +failure: + eth_test->flags |= ETH_TEST_FL_FAILED; } static void hns3_update_limit_promisc_mode(struct net_device *netdev, -- Gitee From 5a0778b8f065ced5bbcd6214900ed78b55adc3d0 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 25 Mar 2024 16:56:45 +0100 Subject: [PATCH 041/268] tls: recv: process_rx_list shouldn't use an offset with kvec stable inclusion from stable-6.6.26 commit dc4bce20fa9e6af3a8b4aaa1eb26198c04fcef8a category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 7608a971fdeb4c3eefa522d1bfe8d4bc6b2481cc ] Only MSG_PEEK needs to copy from an offset during the final process_rx_list call, because the bytes we copied at the beginning of tls_sw_recvmsg were left on the rx_list. In the KVEC case, we removed data from the rx_list as we were copying it, so there's no need to use an offset, just like in the normal case. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/e5487514f828e0347d2b92ca40002c62b58af73d.1711120964.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index acf5bb74fd38..8e753d10e694 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2152,7 +2152,7 @@ int tls_sw_recvmsg(struct sock *sk, } /* Drain records from the rx_list & copy if required */ - if (is_peek || is_kvec) + if (is_peek) err = process_rx_list(ctx, msg, &control, copied + peeked, decrypted - peeked, is_peek, NULL); else -- Gitee From 8c020ae7ccc792dbab0e4986dce5339c31ed9665 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 25 Mar 2024 16:56:46 +0100 Subject: [PATCH 042/268] tls: adjust recv return with async crypto and failed copy to userspace stable inclusion from stable-6.6.26 commit f19e995b4813a81b63df940ff2f8ba00882016c2 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 85eef9a41d019b59be7bc91793f26251909c0710 ] process_rx_list may not copy as many bytes as we want to the userspace buffer, for example in case we hit an EFAULT during the copy. If this happens, we should only count the bytes that were actually copied, which may be 0. Subtracting async_copy_bytes is correct in both peek and !peek cases, because decrypted == async_copy_bytes + peeked for the peek case: peek is always !ZC, and we can go through either the sync or async path. In the async case, we add chunk to both decrypted and async_copy_bytes. In the sync case, we add chunk to both decrypted and peeked. I missed that in commit 6caaf104423d ("tls: fix peeking with sync+async decryption"). Fixes: 4d42cd6bc2ac ("tls: rx: fix return value for async crypto") Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/1b5a1eaab3c088a9dd5d9f1059ceecd7afe888d1.1711120964.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- net/tls/tls_sw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 8e753d10e694..925de4caa894 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2158,6 +2158,9 @@ int tls_sw_recvmsg(struct sock *sk, else err = process_rx_list(ctx, msg, &control, 0, async_copy_bytes, is_peek, NULL); + + /* we could have copied less than we wanted, and possibly nothing */ + decrypted += max(err, 0) - async_copy_bytes; } copied += decrypted; -- Gitee From 4e3bb88c06e8bd160160ea9d0e2729954b7515f1 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 25 Mar 2024 16:56:48 +0100 Subject: [PATCH 043/268] tls: get psock ref after taking rxlock to avoid leak stable inclusion from stable-6.6.26 commit f1b7f14130d782433bc98c1e1e41ce6b4d4c3096 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 417e91e856099e9b8a42a2520e2255e6afe024be ] At the start of tls_sw_recvmsg, we take a reference on the psock, and then call tls_rx_reader_lock. If that fails, we return directly without releasing the reference. Instead of adding a new label, just take the reference after locking has succeeded, since we don't need it before. Fixes: 4cbc325ed6b4 ("tls: rx: allow only one reader at a time") Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/fe2ade22d030051ce4c3638704ed58b67d0df643.1711120964.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 925de4caa894..df166f6afad8 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1976,10 +1976,10 @@ int tls_sw_recvmsg(struct sock *sk, if (unlikely(flags & MSG_ERRQUEUE)) return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR); - psock = sk_psock_get(sk); err = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT); if (err < 0) return err; + psock = sk_psock_get(sk); bpf_strp_enabled = sk_psock_strp_enabled(psock); /* If crypto failed the connection is broken */ -- Gitee From 6a5e10d2ce1c576d7c3e5c62bd200493a5ace0ae Mon Sep 17 00:00:00 2001 From: David Thompson Date: Mon, 25 Mar 2024 14:36:27 -0400 Subject: [PATCH 044/268] mlxbf_gige: call request_irq() after NAPI initialized stable inclusion from stable-6.6.26 commit 867a2f598af6a645c865d1101b58c5e070c6dd9e category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit f7442a634ac06b953fc1f7418f307b25acd4cfbc ] The mlxbf_gige driver encounters a NULL pointer exception in mlxbf_gige_open() when kdump is enabled. The sequence to reproduce the exception is as follows: a) enable kdump b) trigger kdump via "echo c > /proc/sysrq-trigger" c) kdump kernel executes d) kdump kernel loads mlxbf_gige module e) the mlxbf_gige module runs its open() as the the "oob_net0" interface is brought up f) mlxbf_gige module will experience an exception during its open(), something like: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Mem abort info: ESR = 0x0000000086000004 EC = 0x21: IABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault user pgtable: 4k pages, 48-bit VAs, pgdp=00000000e29a4000 [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 0000000086000004 [#1] SMP CPU: 0 PID: 812 Comm: NetworkManager Tainted: G OE 5.15.0-1035-bluefield #37-Ubuntu Hardware name: https://www.mellanox.com BlueField-3 SmartNIC Main Card/BlueField-3 SmartNIC Main Card, BIOS 4.6.0.13024 Jan 19 2024 pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : 0x0 lr : __napi_poll+0x40/0x230 sp : ffff800008003e00 x29: ffff800008003e00 x28: 0000000000000000 x27: 00000000ffffffff x26: ffff000066027238 x25: ffff00007cedec00 x24: ffff800008003ec8 x23: 000000000000012c x22: ffff800008003eb7 x21: 0000000000000000 x20: 0000000000000001 x19: ffff000066027238 x18: 0000000000000000 x17: ffff578fcb450000 x16: ffffa870b083c7c0 x15: 0000aaab010441d0 x14: 0000000000000001 x13: 00726f7272655f65 x12: 6769675f6662786c x11: 0000000000000000 x10: 0000000000000000 x9 : ffffa870b0842398 x8 : 0000000000000004 x7 : fe5a48b9069706ea x6 : 17fdb11fc84ae0d2 x5 : d94a82549d594f35 x4 : 0000000000000000 x3 : 0000000000400100 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000066027238 Call trace: 0x0 net_rx_action+0x178/0x360 __do_softirq+0x15c/0x428 __irq_exit_rcu+0xac/0xec irq_exit+0x18/0x2c handle_domain_irq+0x6c/0xa0 gic_handle_irq+0xec/0x1b0 call_on_irq_stack+0x20/0x2c do_interrupt_handler+0x5c/0x70 el1_interrupt+0x30/0x50 el1h_64_irq_handler+0x18/0x2c el1h_64_irq+0x7c/0x80 __setup_irq+0x4c0/0x950 request_threaded_irq+0xf4/0x1bc mlxbf_gige_request_irqs+0x68/0x110 [mlxbf_gige] mlxbf_gige_open+0x5c/0x170 [mlxbf_gige] __dev_open+0x100/0x220 __dev_change_flags+0x16c/0x1f0 dev_change_flags+0x2c/0x70 do_setlink+0x220/0xa40 __rtnl_newlink+0x56c/0x8a0 rtnl_newlink+0x58/0x84 rtnetlink_rcv_msg+0x138/0x3c4 netlink_rcv_skb+0x64/0x130 rtnetlink_rcv+0x20/0x30 netlink_unicast+0x2ec/0x360 netlink_sendmsg+0x278/0x490 __sock_sendmsg+0x5c/0x6c ____sys_sendmsg+0x290/0x2d4 ___sys_sendmsg+0x84/0xd0 __sys_sendmsg+0x70/0xd0 __arm64_sys_sendmsg+0x2c/0x40 invoke_syscall+0x78/0x100 el0_svc_common.constprop.0+0x54/0x184 do_el0_svc+0x30/0xac el0_svc+0x48/0x160 el0t_64_sync_handler+0xa4/0x12c el0t_64_sync+0x1a4/0x1a8 Code: bad PC value ---[ end trace 7d1c3f3bf9d81885 ]--- Kernel panic - not syncing: Oops: Fatal exception in interrupt Kernel Offset: 0x2870a7a00000 from 0xffff800008000000 PHYS_OFFSET: 0x80000000 CPU features: 0x0,000005c1,a3332a5a Memory Limit: none ---[ end Kernel panic - not syncing: Oops: Fatal exception in interrupt ]--- The exception happens because there is a pending RX interrupt before the call to request_irq(RX IRQ) executes. Then, the RX IRQ handler fires immediately after this request_irq() completes. The RX IRQ handler runs "napi_schedule()" before NAPI is fully initialized via "netif_napi_add()" and "napi_enable()", both which happen later in the open() logic. The logic in mlxbf_gige_open() must fully initialize NAPI before any calls to request_irq() execute. Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver") Signed-off-by: David Thompson Reviewed-by: Asmaa Mnebhi Link: https://lore.kernel.org/r/20240325183627.7641-1-davthompson@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- .../mellanox/mlxbf_gige/mlxbf_gige_main.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index 044ff5f87b5e..f1fa5f10051f 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -139,13 +139,10 @@ static int mlxbf_gige_open(struct net_device *netdev) control |= MLXBF_GIGE_CONTROL_PORT_EN; writeq(control, priv->base + MLXBF_GIGE_CONTROL); - err = mlxbf_gige_request_irqs(priv); - if (err) - return err; mlxbf_gige_cache_stats(priv); err = mlxbf_gige_clean_port(priv); if (err) - goto free_irqs; + return err; /* Clear driver's valid_polarity to match hardware, * since the above call to clean_port() resets the @@ -166,6 +163,10 @@ static int mlxbf_gige_open(struct net_device *netdev) napi_enable(&priv->napi); netif_start_queue(netdev); + err = mlxbf_gige_request_irqs(priv); + if (err) + goto napi_deinit; + /* Set bits in INT_EN that we care about */ int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR | MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS | @@ -182,14 +183,17 @@ static int mlxbf_gige_open(struct net_device *netdev) return 0; +napi_deinit: + netif_stop_queue(netdev); + napi_disable(&priv->napi); + netif_napi_del(&priv->napi); + mlxbf_gige_rx_deinit(priv); + tx_deinit: mlxbf_gige_tx_deinit(priv); phy_deinit: phy_stop(phydev); - -free_irqs: - mlxbf_gige_free_irqs(priv); return err; } -- Gitee From 54a0aae372d62e979d33748e8cb06c728da2b624 Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Tue, 26 Mar 2024 22:42:45 -0400 Subject: [PATCH 045/268] bpf: Protect against int overflow for stack access size stable inclusion from stable-6.6.26 commit 3f0784b2f1eb9147973d8c43ba085c5fdf44ff69 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit ecc6a2101840177e57c925c102d2d29f260d37c8 ] This patch re-introduces protection against the size of access to stack memory being negative; the access size can appear negative as a result of overflowing its signed int representation. This should not actually happen, as there are other protections along the way, but we should protect against it anyway. One code path was missing such protections (fixed in the previous patch in the series), causing out-of-bounds array accesses in check_stack_range_initialized(). This patch causes the verification of a program with such a non-sensical access size to fail. This check used to exist in a more indirect way, but was inadvertendly removed in a833a17aeac7. Fixes: a833a17aeac7 ("bpf: Fix verification of indirect var-off stack access") Reported-by: syzbot+33f4297b5f927648741a@syzkaller.appspotmail.com Reported-by: syzbot+aafd0513053a1cbf52ef@syzkaller.appspotmail.com Closes: https://lore.kernel.org/bpf/CAADnVQLORV5PT0iTAhRER+iLBTkByCYNBYyvBSgjN1T31K+gOw@mail.gmail.com/ Acked-by: Andrii Nakryiko Signed-off-by: Andrei Matei Link: https://lore.kernel.org/r/20240327024245.318299-3-andreimatei1@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- kernel/bpf/verifier.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 396c4c66932f..c9fc734989c6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6637,6 +6637,11 @@ static int check_stack_access_within_bounds( err = check_stack_slot_within_bounds(env, min_off, state, type); if (!err && max_off > 0) err = -EINVAL; /* out of stack access into non-negative offsets */ + if (!err && access_size < 0) + /* access_size should not be negative (or overflow an int); others checks + * along the way should have prevented such an access. + */ + err = -EFAULT; /* invalid negative access size; integer overflow? */ if (err) { if (tnum_is_const(reg->var_off)) { -- Gitee From a88ae43090a714f96531717e8b4e9a45c519ac6d Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 Mar 2024 14:13:24 +0000 Subject: [PATCH 046/268] cifs: Fix duplicate fscache cookie warnings stable inclusion from stable-6.6.26 commit 614bc8c71ed5ae9ffdc89886003d20afbf20e86c category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 8876a37277cb832e1861c35f8c661825179f73f5 ] fscache emits a lot of duplicate cookie warnings with cifs because the index key for the fscache cookies does not include everything that the cifs_find_inode() function does. The latter is used with iget5_locked() to distinguish between inodes in the local inode cache. Fix this by adding the creation time and file type to the fscache cookie key. Additionally, add a couple of comments to note that if one is changed the other must be also. Signed-off-by: David Howells Fixes: 70431bfd825d ("cifs: Support fscache indexing rewrite") cc: Shyam Prasad N cc: Rohith Surabattula cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- fs/smb/client/fscache.c | 16 +++++++++++++++- fs/smb/client/inode.c | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c index e5cad149f5a2..a4ee801b2939 100644 --- a/fs/smb/client/fscache.c +++ b/fs/smb/client/fscache.c @@ -12,6 +12,16 @@ #include "cifs_fs_sb.h" #include "cifsproto.h" +/* + * Key for fscache inode. [!] Contents must match comparisons in cifs_find_inode(). + */ +struct cifs_fscache_inode_key { + + __le64 uniqueid; /* server inode number */ + __le64 createtime; /* creation time on server */ + u8 type; /* S_IFMT file type */ +} __packed; + static void cifs_fscache_fill_volume_coherency( struct cifs_tcon *tcon, struct cifs_fscache_volume_coherency_data *cd) @@ -97,15 +107,19 @@ void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) void cifs_fscache_get_inode_cookie(struct inode *inode) { struct cifs_fscache_inode_coherency_data cd; + struct cifs_fscache_inode_key key; struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + key.uniqueid = cpu_to_le64(cifsi->uniqueid); + key.createtime = cpu_to_le64(cifsi->createtime); + key.type = (inode->i_mode & S_IFMT) >> 12; cifs_fscache_fill_coherency(&cifsi->netfs.inode, &cd); cifsi->netfs.cache = fscache_acquire_cookie(tcon->fscache, 0, - &cifsi->uniqueid, sizeof(cifsi->uniqueid), + &key, sizeof(key), &cd, sizeof(cd), i_size_read(&cifsi->netfs.inode)); if (cifsi->netfs.cache) diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index cb9e719e67ae..fa6330d586e8 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1390,6 +1390,8 @@ cifs_find_inode(struct inode *inode, void *opaque) { struct cifs_fattr *fattr = opaque; + /* [!] The compared values must be the same in struct cifs_fscache_inode_key. */ + /* don't match inode with different uniqueid */ if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) return 0; -- Gitee From 3a25c9a0120d0fd6d7fa834be968d6815b47c7f5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 21 Mar 2024 01:27:50 +0100 Subject: [PATCH 047/268] netfilter: nf_tables: reject destroy command to remove basechain hooks stable inclusion from stable-6.6.26 commit cddd0480a682426d44fdadb55354367a905cedb7 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit b32ca27fa238ff83427d23bef2a5b741e2a88a1e ] Report EOPNOTSUPP if NFT_MSG_DESTROYCHAIN is used to delete hooks in an existing netdev basechain, thus, only NFT_MSG_DELCHAIN is allowed. Fixes: 7d937b107108f ("netfilter: nf_tables: support for deleting devices in an existing netdev chain") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- net/netfilter/nf_tables_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f10419ba6e0b..0653f1e5e892 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2934,7 +2934,8 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info, nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla); if (nla[NFTA_CHAIN_HOOK]) { - if (chain->flags & NFT_CHAIN_HW_OFFLOAD) + if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYCHAIN || + chain->flags & NFT_CHAIN_HW_OFFLOAD) return -EOPNOTSUPP; if (nft_is_base_chain(chain)) { -- Gitee From 0a0c9b18db93609170b57f75d386bc3bfddaecb5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 21 Mar 2024 01:27:59 +0100 Subject: [PATCH 048/268] netfilter: nf_tables: reject table flag and netdev basechain updates stable inclusion from stable-6.6.26 commit 2d0d1abe119af2595a760ccc404cdb4e8c846de9 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 1e1fb6f00f52812277963365d9bd835b9b0ea4e0 ] netdev basechain updates are stored in the transaction object hook list. When setting on the table dormant flag, it iterates over the existing hooks in the basechain. Thus, skipping the hooks that are being added/deleted in this transaction, which leaves hook registration in inconsistent state. Reject table flag updates in combination with netdev basechain updates in the same batch: - Update table flags and add/delete basechain: Check from basechain update path if there are pending flag updates for this table. - add/delete basechain and update table flags: Iterate over the transaction list to search for basechain updates from the table update path. In both cases, the batch is rejected. Based on suggestion from Florian Westphal. Fixes: b9703ed44ffb ("netfilter: nf_tables: support for adding new devices to an existing netdev chain") Fixes: 7d937b107108f ("netfilter: nf_tables: support for deleting devices in an existing netdev chain") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- net/netfilter/nf_tables_api.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0653f1e5e892..6e4e22a10a82 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1200,6 +1200,25 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table) #define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \ __NFT_TABLE_F_WAS_AWAKEN) +static bool nft_table_pending_update(const struct nft_ctx *ctx) +{ + struct nftables_pernet *nft_net = nft_pernet(ctx->net); + struct nft_trans *trans; + + if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + return true; + + list_for_each_entry(trans, &nft_net->commit_list, list) { + if ((trans->msg_type == NFT_MSG_NEWCHAIN || + trans->msg_type == NFT_MSG_DELCHAIN) && + trans->ctx.table == ctx->table && + nft_trans_chain_update(trans)) + return true; + } + + return false; +} + static int nf_tables_updtable(struct nft_ctx *ctx) { struct nft_trans *trans; @@ -1223,7 +1242,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) return -EOPNOTSUPP; /* No dormant off/on/off/on games in single transaction */ - if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + if (nft_table_pending_update(ctx)) return -EINVAL; trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, @@ -2621,6 +2640,13 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, } } + if (table->flags & __NFT_TABLE_F_UPDATE && + !list_empty(&hook.list)) { + NL_SET_BAD_ATTR(extack, attr); + err = -EOPNOTSUPP; + goto err_hooks; + } + if (!(table->flags & NFT_TABLE_F_DORMANT) && nft_is_base_chain(chain) && !list_empty(&hook.list)) { @@ -2850,6 +2876,9 @@ static int nft_delchain_hook(struct nft_ctx *ctx, struct nft_trans *trans; int err; + if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + return -EOPNOTSUPP; + err = nft_chain_parse_hook(ctx->net, basechain, nla, &chain_hook, ctx->family, chain->flags, extack); if (err < 0) -- Gitee From f93208eac69875bb8f49116fa983c7352c3eca32 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 21 Mar 2024 01:28:07 +0100 Subject: [PATCH 049/268] netfilter: nf_tables: skip netdev hook unregistration if table is dormant stable inclusion from stable-6.6.26 commit cf893953633db14a749aa6677bed20529cfb13ac category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 216e7bf7402caf73f4939a8e0248392e96d7c0da ] Skip hook unregistration when adding or deleting devices from an existing netdev basechain. Otherwise, commit/abort path try to unregister hooks which not enabled. Fixes: b9703ed44ffb ("netfilter: nf_tables: support for adding new devices to an existing netdev chain") Fixes: 7d937b107108 ("netfilter: nf_tables: support for deleting devices in an existing netdev chain") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- net/netfilter/nf_tables_api.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6e4e22a10a82..b2ef7e37f11c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -10083,9 +10083,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) if (nft_trans_chain_update(trans)) { nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, &nft_trans_chain_hooks(trans)); - nft_netdev_unregister_hooks(net, - &nft_trans_chain_hooks(trans), - true); + if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) { + nft_netdev_unregister_hooks(net, + &nft_trans_chain_hooks(trans), + true); + } } else { nft_chain_del(trans->ctx.chain); nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, @@ -10357,9 +10359,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { - nft_netdev_unregister_hooks(net, - &nft_trans_chain_hooks(trans), - true); + if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) { + nft_netdev_unregister_hooks(net, + &nft_trans_chain_hooks(trans), + true); + } free_percpu(nft_trans_chain_stats(trans)); kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans); -- Gitee From afffe85a79d8198a0e76a873a1e8c8e08c0d4a58 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Mon, 25 Mar 2024 12:30:24 -0700 Subject: [PATCH 050/268] net: bcmasp: Bring up unimac after PHY link up stable inclusion from stable-6.6.26 commit 4bb7ad116be79b652e1075727dbc16b541d449c3 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit dfd222e2aef68818320a57b13a1c52a44c22bc80 ] The unimac requires the PHY RX clk during reset or it may be put into a bad state. Bring up the unimac after link up to ensure the PHY RX clk exists. Fixes: 490cb412007d ("net: bcmasp: Add support for ASP2.0 Ethernet controller") Signed-off-by: Justin Chen Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- .../net/ethernet/broadcom/asp2/bcmasp_intf.c | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c index 9cae5a309000..b3d04f49f77e 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c @@ -391,7 +391,9 @@ static void umac_reset(struct bcmasp_intf *intf) umac_wl(intf, 0x0, UMC_CMD); umac_wl(intf, UMC_CMD_SW_RESET, UMC_CMD); usleep_range(10, 100); - umac_wl(intf, 0x0, UMC_CMD); + /* We hold the umac in reset and bring it out of + * reset when phy link is up. + */ } static void umac_set_hw_addr(struct bcmasp_intf *intf, @@ -411,6 +413,8 @@ static void umac_enable_set(struct bcmasp_intf *intf, u32 mask, u32 reg; reg = umac_rl(intf, UMC_CMD); + if (reg & UMC_CMD_SW_RESET) + return; if (enable) reg |= mask; else @@ -429,7 +433,6 @@ static void umac_init(struct bcmasp_intf *intf) umac_wl(intf, 0x800, UMC_FRM_LEN); umac_wl(intf, 0xffff, UMC_PAUSE_CNTRL); umac_wl(intf, 0x800, UMC_RX_MAX_PKT_SZ); - umac_enable_set(intf, UMC_CMD_PROMISC, 1); } static int bcmasp_tx_poll(struct napi_struct *napi, int budget) @@ -656,6 +659,12 @@ static void bcmasp_adj_link(struct net_device *dev) UMC_CMD_HD_EN | UMC_CMD_RX_PAUSE_IGNORE | UMC_CMD_TX_PAUSE_IGNORE); reg |= cmd_bits; + if (reg & UMC_CMD_SW_RESET) { + reg &= ~UMC_CMD_SW_RESET; + umac_wl(intf, reg, UMC_CMD); + udelay(2); + reg |= UMC_CMD_TX_EN | UMC_CMD_RX_EN | UMC_CMD_PROMISC; + } umac_wl(intf, reg, UMC_CMD); intf->eee.eee_active = phy_init_eee(phydev, 0) >= 0; @@ -1061,9 +1070,6 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect) umac_init(intf); - /* Disable the UniMAC RX/TX */ - umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 0); - umac_set_hw_addr(intf, dev->dev_addr); intf->old_duplex = -1; @@ -1083,9 +1089,6 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect) bcmasp_enable_rx(intf, 1); - /* Turn on UniMAC TX/RX */ - umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 1); - intf->crc_fwd = !!(umac_rl(intf, UMC_CMD) & UMC_CMD_CRC_FWD); bcmasp_netif_start(dev); @@ -1321,7 +1324,14 @@ static void bcmasp_suspend_to_wol(struct bcmasp_intf *intf) if (intf->wolopts & WAKE_FILTER) bcmasp_netfilt_suspend(intf); - /* UniMAC receive needs to be turned on */ + /* Bring UniMAC out of reset if needed and enable RX */ + reg = umac_rl(intf, UMC_CMD); + if (reg & UMC_CMD_SW_RESET) + reg &= ~UMC_CMD_SW_RESET; + + reg |= UMC_CMD_RX_EN | UMC_CMD_PROMISC; + umac_wl(intf, reg, UMC_CMD); + umac_enable_set(intf, UMC_CMD_RX_EN, 1); if (intf->parent->wol_irq > 0) { -- Gitee From fad832319278df7074f166df1ed6b2ca5a69fdac Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Tue, 26 Mar 2024 12:28:05 +0530 Subject: [PATCH 051/268] net: lan743x: Add set RFE read fifo threshold for PCI1x1x chips stable inclusion from stable-6.6.26 commit 74a78a00db8fe0b1a777e875ffd923e6aaa7a1d5 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit e4a58989f5c839316ac63675e8800b9eed7dbe96 ] PCI11x1x Rev B0 devices might drop packets when receiving back to back frames at 2.5G link speed. Change the B0 Rev device's Receive filtering Engine FIFO threshold parameter from its hardware default of 4 to 3 dwords to prevent the problem. Rev C0 and later hardware already defaults to 3 dwords. Fixes: bb4f6bffe33c ("net: lan743x: Add PCI11010 / PCI11414 device IDs") Signed-off-by: Raju Lakkaraju Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240326065805.686128-1-Raju.Lakkaraju@microchip.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/net/ethernet/microchip/lan743x_main.c | 18 ++++++++++++++++++ drivers/net/ethernet/microchip/lan743x_main.h | 4 ++++ 2 files changed, 22 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index c81cdeb4d4e7..0b6174748d2b 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -25,6 +25,8 @@ #define PCS_POWER_STATE_DOWN 0x6 #define PCS_POWER_STATE_UP 0x4 +#define RFE_RD_FIFO_TH_3_DWORDS 0x3 + static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter) { u32 chip_rev; @@ -3223,6 +3225,21 @@ static void lan743x_full_cleanup(struct lan743x_adapter *adapter) lan743x_pci_cleanup(adapter); } +static void pci11x1x_set_rfe_rd_fifo_threshold(struct lan743x_adapter *adapter) +{ + u16 rev = adapter->csr.id_rev & ID_REV_CHIP_REV_MASK_; + + if (rev == ID_REV_CHIP_REV_PCI11X1X_B0_) { + u32 misc_ctl; + + misc_ctl = lan743x_csr_read(adapter, MISC_CTL_0); + misc_ctl &= ~MISC_CTL_0_RFE_READ_FIFO_MASK_; + misc_ctl |= FIELD_PREP(MISC_CTL_0_RFE_READ_FIFO_MASK_, + RFE_RD_FIFO_TH_3_DWORDS); + lan743x_csr_write(adapter, MISC_CTL_0, misc_ctl); + } +} + static int lan743x_hardware_init(struct lan743x_adapter *adapter, struct pci_dev *pdev) { @@ -3238,6 +3255,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, pci11x1x_strap_get_status(adapter); spin_lock_init(&adapter->eth_syslock_spinlock); mutex_init(&adapter->sgmii_rw_lock); + pci11x1x_set_rfe_rd_fifo_threshold(adapter); } else { adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS; adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS; diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index 52609fc13ad9..f0b486f85450 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -26,6 +26,7 @@ #define ID_REV_CHIP_REV_MASK_ (0x0000FFFF) #define ID_REV_CHIP_REV_A0_ (0x00000000) #define ID_REV_CHIP_REV_B0_ (0x00000010) +#define ID_REV_CHIP_REV_PCI11X1X_B0_ (0x000000B0) #define FPGA_REV (0x04) #define FPGA_REV_GET_MINOR_(fpga_rev) (((fpga_rev) >> 8) & 0x000000FF) @@ -311,6 +312,9 @@ #define SGMII_CTL_LINK_STATUS_SOURCE_ BIT(8) #define SGMII_CTL_SGMII_POWER_DN_ BIT(1) +#define MISC_CTL_0 (0x920) +#define MISC_CTL_0_RFE_READ_FIFO_MASK_ GENMASK(6, 4) + /* Vendor Specific SGMII MMD details */ #define SR_VSMMD_PCS_ID1 0x0004 #define SR_VSMMD_PCS_ID2 0x0005 -- Gitee From 3b85e807069cb30b507c9a47f95ff8320bb61cde Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Tue, 26 Mar 2024 10:57:20 +0530 Subject: [PATCH 052/268] Octeontx2-af: fix pause frame configuration in GMP mode stable inclusion from stable-6.6.26 commit 54720f68c4ad0ffb5a776e7a14b25f02242068cb category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 40d4b4807cadd83fb3f46cc8cd67a945b5b25461 ] The Octeontx2 MAC block (CGX) has separate data paths (SMU and GMP) for different speeds, allowing for efficient data transfer. The previous patch which added pause frame configuration has a bug due to which pause frame feature is not working in GMP mode. This patch fixes the issue by configurating appropriate registers. Fixes: f7e086e754fe ("octeontx2-af: Pause frame configuration at cgx") Signed-off-by: Hariprasad Kelam Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240326052720.4441-1-hkelam@marvell.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 6c18d3d2442e..2539c985f695 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -808,6 +808,11 @@ static int cgx_lmac_enadis_pause_frm(void *cgxd, int lmac_id, if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; + cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); + cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; + cfg |= rx_pause ? CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK : 0x0; + cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK; cfg |= rx_pause ? CGX_SMUX_RX_FRM_CTL_CTL_BCK : 0x0; -- Gitee From d7535e1a7a707db6022e151821622bc196945eab Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 26 Mar 2024 11:18:41 +0100 Subject: [PATCH 053/268] inet: inet_defrag: prevent sk release while still in use stable inclusion from stable-6.6.26 commit f4877225313d474659ee53150ccc3d553a978727 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 18685451fc4e546fc0e718580d32df3c0e5c8272 ] ip_local_out() and other functions can pass skb->sk as function argument. If the skb is a fragment and reassembly happens before such function call returns, the sk must not be released. This affects skb fragments reassembled via netfilter or similar modules, e.g. openvswitch or ct_act.c, when run as part of tx pipeline. Eric Dumazet made an initial analysis of this bug. Quoting Eric: Calling ip_defrag() in output path is also implying skb_orphan(), which is buggy because output path relies on sk not disappearing. A relevant old patch about the issue was : 8282f27449bf ("inet: frag: Always orphan skbs inside ip_defrag()") [..] net/ipv4/ip_output.c depends on skb->sk being set, and probably to an inet socket, not an arbitrary one. If we orphan the packet in ipvlan, then downstream things like FQ packet scheduler will not work properly. We need to change ip_defrag() to only use skb_orphan() when really needed, ie whenever frag_list is going to be used. Eric suggested to stash sk in fragment queue and made an initial patch. However there is a problem with this: If skb is refragmented again right after, ip_do_fragment() will copy head->sk to the new fragments, and sets up destructor to sock_wfree. IOW, we have no choice but to fix up sk_wmem accouting to reflect the fully reassembled skb, else wmem will underflow. This change moves the orphan down into the core, to last possible moment. As ip_defrag_offset is aliased with sk_buff->sk member, we must move the offset into the FRAG_CB, else skb->sk gets clobbered. This allows to delay the orphaning long enough to learn if the skb has to be queued or if the skb is completing the reasm queue. In the former case, things work as before, skb is orphaned. This is safe because skb gets queued/stolen and won't continue past reasm engine. In the latter case, we will steal the skb->sk reference, reattach it to the head skb, and fix up wmem accouting when inet_frag inflates truesize. Fixes: 7026b1ddb6b8 ("netfilter: Pass socket pointer down through okfn().") Diagnosed-by: Eric Dumazet Reported-by: xingwei lee Reported-by: yue sun Reported-by: syzbot+e5167d7144a62715044c@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240326101845.30836-1-fw@strlen.de Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- include/linux/skbuff.h | 7 +-- net/ipv4/inet_fragment.c | 70 ++++++++++++++++++++----- net/ipv4/ip_fragment.c | 2 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- 4 files changed, 60 insertions(+), 21 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2922059908cc..9e61f6df6bc5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -736,8 +736,6 @@ typedef unsigned char *sk_buff_data_t; * @list: queue head * @ll_node: anchor in an llist (eg socket defer_list) * @sk: Socket we are owned by - * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in - * fragmentation management * @dev: Device we arrived on/are leaving by * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL * @cb: Control buffer. Free for use by every layer. Put private vars here @@ -860,10 +858,7 @@ struct sk_buff { struct llist_node ll_node; }; - union { - struct sock *sk; - int ip_defrag_offset; - }; + struct sock *sk; union { ktime_t tstamp; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 7072fc0783ef..c88c9034d630 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -24,6 +24,8 @@ #include #include +#include "../core/sock_destructor.h" + /* Use skb->cb to track consecutive/adjacent fragments coming at * the end of the queue. Nodes in the rb-tree queue will * contain "runs" of one or more adjacent fragments. @@ -39,6 +41,7 @@ struct ipfrag_skb_cb { }; struct sk_buff *next_frag; int frag_run_len; + int ip_defrag_offset; }; #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) @@ -396,12 +399,12 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, */ if (!last) fragrun_create(q, skb); /* First fragment. */ - else if (last->ip_defrag_offset + last->len < end) { + else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) { /* This is the common case: skb goes to the end. */ /* Detect and discard overlaps. */ - if (offset < last->ip_defrag_offset + last->len) + if (offset < FRAG_CB(last)->ip_defrag_offset + last->len) return IPFRAG_OVERLAP; - if (offset == last->ip_defrag_offset + last->len) + if (offset == FRAG_CB(last)->ip_defrag_offset + last->len) fragrun_append_to_last(q, skb); else fragrun_create(q, skb); @@ -418,13 +421,13 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, parent = *rbn; curr = rb_to_skb(parent); - curr_run_end = curr->ip_defrag_offset + + curr_run_end = FRAG_CB(curr)->ip_defrag_offset + FRAG_CB(curr)->frag_run_len; - if (end <= curr->ip_defrag_offset) + if (end <= FRAG_CB(curr)->ip_defrag_offset) rbn = &parent->rb_left; else if (offset >= curr_run_end) rbn = &parent->rb_right; - else if (offset >= curr->ip_defrag_offset && + else if (offset >= FRAG_CB(curr)->ip_defrag_offset && end <= curr_run_end) return IPFRAG_DUP; else @@ -438,7 +441,7 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, rb_insert_color(&skb->rbnode, &q->rb_fragments); } - skb->ip_defrag_offset = offset; + FRAG_CB(skb)->ip_defrag_offset = offset; return IPFRAG_OK; } @@ -448,13 +451,28 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, struct sk_buff *parent) { struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); - struct sk_buff **nextp; + void (*destructor)(struct sk_buff *); + unsigned int orig_truesize = 0; + struct sk_buff **nextp = NULL; + struct sock *sk = skb->sk; int delta; + if (sk && is_skb_wmem(skb)) { + /* TX: skb->sk might have been passed as argument to + * dst->output and must remain valid until tx completes. + * + * Move sk to reassembled skb and fix up wmem accounting. + */ + orig_truesize = skb->truesize; + destructor = skb->destructor; + } + if (head != skb) { fp = skb_clone(skb, GFP_ATOMIC); - if (!fp) - return NULL; + if (!fp) { + head = skb; + goto out_restore_sk; + } FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; if (RB_EMPTY_NODE(&skb->rbnode)) FRAG_CB(parent)->next_frag = fp; @@ -463,6 +481,12 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, &q->rb_fragments); if (q->fragments_tail == skb) q->fragments_tail = fp; + + if (orig_truesize) { + /* prevent skb_morph from releasing sk */ + skb->sk = NULL; + skb->destructor = NULL; + } skb_morph(skb, head); FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; rb_replace_node(&head->rbnode, &skb->rbnode, @@ -470,13 +494,13 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, consume_skb(head); head = skb; } - WARN_ON(head->ip_defrag_offset != 0); + WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0); delta = -head->truesize; /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) - return NULL; + goto out_restore_sk; delta += head->truesize; if (delta) @@ -492,7 +516,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, clone = alloc_skb(0, GFP_ATOMIC); if (!clone) - return NULL; + goto out_restore_sk; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); for (i = 0; i < skb_shinfo(head)->nr_frags; i++) @@ -509,6 +533,21 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, nextp = &skb_shinfo(head)->frag_list; } +out_restore_sk: + if (orig_truesize) { + int ts_delta = head->truesize - orig_truesize; + + /* if this reassembled skb is fragmented later, + * fraglist skbs will get skb->sk assigned from head->sk, + * and each frag skb will be released via sock_wfree. + * + * Update sk_wmem_alloc. + */ + head->sk = sk; + head->destructor = destructor; + refcount_add(ts_delta, &sk->sk_wmem_alloc); + } + return nextp; } EXPORT_SYMBOL(inet_frag_reasm_prepare); @@ -516,6 +555,8 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare); void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, void *reasm_data, bool try_coalesce) { + struct sock *sk = is_skb_wmem(head) ? head->sk : NULL; + const unsigned int head_truesize = head->truesize; struct sk_buff **nextp = reasm_data; struct rb_node *rbn; struct sk_buff *fp; @@ -579,6 +620,9 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, head->prev = NULL; head->tstamp = q->stamp; head->mono_delivery_time = q->mono_delivery_time; + + if (sk) + refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc); } EXPORT_SYMBOL(inet_frag_reasm_finish); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a4941f53b523..fb947d1613fe 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -384,6 +384,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } skb_dst_drop(skb); + skb_orphan(skb); return -EINPROGRESS; insert_error: @@ -487,7 +488,6 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) struct ipq *qp; __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); - skb_orphan(skb); /* Lookup (or create) queue header */ qp = ip_find(net, ip_hdr(skb), user, vif); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index b2dd48911c8d..efbec7ee27d0 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -294,6 +294,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, } skb_dst_drop(skb); + skb_orphan(skb); return -EINPROGRESS; insert_error: @@ -469,7 +470,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); - skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, hdr, skb->dev ? skb->dev->ifindex : 0); if (fq == NULL) { -- Gitee From e52ba4f812a1e4604312ced40eedd5f4d3ddd329 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 16 Aug 2023 14:42:05 -0700 Subject: [PATCH 054/268] drm/i915/dg2: Drop pre-production GT workarounds stable inclusion from stable-6.6.26 commit 0a9901fdb7bb785ec4975aeeebc1428e3abae172 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit eaeb4b3614529bfa8a7edfdd7ecf6977b27f18b2 ] DG2 first production steppings were C0 (for DG2-G10), B1 (for DG2-G11), and A1 (for DG2-G12). Several workarounds that apply onto to pre-production hardware can be dropped. Furthermore, several workarounds that apply to all production steppings can have their conditions simplified to no longer check the GT stepping. v2: - Keep Wa_16011777198 in place for now; it will be removed separately in a follow-up patch to keep review easier. Bspec: 44477 Signed-off-by: Matt Roper Acked-by: Jani Nikula Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20230816214201.534095-10-matthew.d.roper@intel.com Stable-dep-of: 186bce682772 ("drm/i915/mtl: Update workaround 14018575942") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/gpu/drm/i915/gt/intel_lrc.c | 34 +--- drivers/gpu/drm/i915/gt/intel_mocs.c | 21 +- drivers/gpu/drm/i915/gt/intel_rc6.c | 6 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 211 +------------------- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 20 +- drivers/gpu/drm/i915/intel_clock_gating.c | 8 - 6 files changed, 21 insertions(+), 279 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index c378cc7c953c..f297c5808e7c 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1316,29 +1316,6 @@ gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs) return cs; } -/* - * On DG2 during context restore of a preempted context in GPGPU mode, - * RCS restore hang is detected. This is extremely timing dependent. - * To address this below sw wabb is implemented for DG2 A steppings. - */ -static u32 * -dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs) -{ - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG(ce->engine->mmio_base)); - *cs++ = 0x21; - - *cs++ = MI_LOAD_REGISTER_REG; - *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base)); - *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT1); - - *cs++ = MI_LOAD_REGISTER_REG; - *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base)); - *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT2); - - return cs; -} - /* * The bspec's tuning guide asks us to program a vertical watermark value of * 0x3FF. However this register is not saved/restored properly by the @@ -1363,14 +1340,8 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) cs = gen12_emit_cmd_buf_wa(ce, cs); cs = gen12_emit_restore_scratch(ce, cs); - /* Wa_22011450934:dg2 */ - if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(ce->engine->i915, G11, STEP_A0, STEP_B0)) - cs = dg2_emit_rcs_hang_wabb(ce, cs); - /* Wa_16013000631:dg2 */ - if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) || - IS_DG2_G11(ce->engine->i915)) + if (IS_DG2_G11(ce->engine->i915)) cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); cs = gen12_emit_aux_table_inv(ce->engine, cs); @@ -1391,8 +1362,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) cs = gen12_emit_restore_scratch(ce, cs); /* Wa_16013000631:dg2 */ - if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) || - IS_DG2_G11(ce->engine->i915)) + if (IS_DG2_G11(ce->engine->i915)) if (ce->engine->class == COMPUTE_CLASS) cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 2c014407225c..bf8b42d2d327 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -404,18 +404,6 @@ static const struct drm_i915_mocs_entry dg2_mocs_table[] = { MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), }; -static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = { - /* Wa_14011441408: Set Go to Memory for MOCS#0 */ - MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), - /* UC - Coherent; GO:Memory */ - MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), - /* UC - Non-Coherent; GO:Memory */ - MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), - - /* WB - LC */ - MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), -}; - static const struct drm_i915_mocs_entry pvc_mocs_table[] = { /* Error */ MOCS_ENTRY(0, 0, L3_3_WB), @@ -521,13 +509,8 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->wb_index = 2; table->unused_entries_index = 2; } else if (IS_DG2(i915)) { - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { - table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax); - table->table = dg2_mocs_table_g10_ax; - } else { - table->size = ARRAY_SIZE(dg2_mocs_table); - table->table = dg2_mocs_table; - } + table->size = ARRAY_SIZE(dg2_mocs_table); + table->table = dg2_mocs_table; table->uc_index = 1; table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->unused_entries_index = 3; diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index ccdc1afbf11b..b8c9338176bd 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -118,14 +118,12 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_EI_MODE(1); /* - * Wa_16011777198 and BSpec 52698 - Render powergating must be off. + * BSpec 52698 - Render powergating must be off. * FIXME BSpec is outdated, disabling powergating for MTL is just * temporary wa and should be removed after fixing real cause * of forcewake timeouts. */ - if (IS_METEORLAKE(gt->i915) || - IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) + if (IS_METEORLAKE(gt->i915)) pg_enable = GEN9_MEDIA_PG_ENABLE | GEN11_MEDIA_SAMPLER_PG_ENABLE; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 3ae0dbd39eaa..7b426f3015b3 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -764,39 +764,15 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, { dg2_ctx_gt_tuning_init(engine, wal); - /* Wa_16011186671:dg2_g11 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { - wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH); - wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE); - } - - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { - /* Wa_14010469329:dg2_g10 */ - wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, - XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE); - - /* - * Wa_22010465075:dg2_g10 - * Wa_22010613112:dg2_g10 - * Wa_14010698770:dg2_g10 - */ - wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, - GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); - } - /* Wa_16013271637:dg2 */ wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1, MSC_MSAA_REODER_BUF_BYPASS_DISABLE); /* Wa_14014947963:dg2 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) - wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); + wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); /* Wa_18018764978:dg2 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_C0, STEP_FOREVER) || - IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) - wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL); + wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL); /* Wa_15010599737:dg2 */ wa_mcr_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); @@ -1606,31 +1582,11 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) static void dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - struct intel_engine_cs *engine; - int id; - xehp_init_mcr(gt, wal); /* Wa_14011060649:dg2 */ wa_14011060649(gt, wal); - /* - * Although there are per-engine instances of these registers, - * they technically exist outside the engine itself and are not - * impacted by engine resets. Furthermore, they're part of the - * GuC blacklist so trying to treat them as engine workarounds - * will result in GuC initialization failure and a wedged GPU. - */ - for_each_engine(engine, gt, id) { - if (engine->class != VIDEO_DECODE_CLASS) - continue; - - /* Wa_16010515920:dg2_g10 */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) - wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base), - ALNUNIT_CLKGATE_DIS); - } - if (IS_DG2_G10(gt->i915)) { /* Wa_22010523718:dg2 */ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, @@ -1641,65 +1597,6 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) DSS_ROUTER_CLKGATE_DIS); } - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) { - /* Wa_14012362059:dg2 */ - wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); - } - - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) { - /* Wa_14010948348:dg2_g10 */ - wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS); - - /* Wa_14011037102:dg2_g10 */ - wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS); - - /* Wa_14011371254:dg2_g10 */ - wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS); - - /* Wa_14011431319:dg2_g10 */ - wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS | - GAMTLBVDBOX7_CLKGATE_DIS | - GAMTLBVDBOX6_CLKGATE_DIS | - GAMTLBVDBOX5_CLKGATE_DIS | - GAMTLBVDBOX4_CLKGATE_DIS | - GAMTLBVDBOX3_CLKGATE_DIS | - GAMTLBVDBOX2_CLKGATE_DIS | - GAMTLBVDBOX1_CLKGATE_DIS | - GAMTLBVDBOX0_CLKGATE_DIS | - GAMTLBKCR_CLKGATE_DIS | - GAMTLBGUC_CLKGATE_DIS | - GAMTLBBLT_CLKGATE_DIS); - wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS | - GAMTLBGFXA1_CLKGATE_DIS | - GAMTLBCOMPA0_CLKGATE_DIS | - GAMTLBCOMPA1_CLKGATE_DIS | - GAMTLBCOMPB0_CLKGATE_DIS | - GAMTLBCOMPB1_CLKGATE_DIS | - GAMTLBCOMPC0_CLKGATE_DIS | - GAMTLBCOMPC1_CLKGATE_DIS | - GAMTLBCOMPD0_CLKGATE_DIS | - GAMTLBCOMPD1_CLKGATE_DIS | - GAMTLBMERT_CLKGATE_DIS | - GAMTLBVEBOX3_CLKGATE_DIS | - GAMTLBVEBOX2_CLKGATE_DIS | - GAMTLBVEBOX1_CLKGATE_DIS | - GAMTLBVEBOX0_CLKGATE_DIS); - - /* Wa_14010569222:dg2_g10 */ - wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, - GAMEDIA_CLKGATE_DIS); - - /* Wa_14011028019:dg2_g10 */ - wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS); - - /* Wa_14010680813:dg2_g10 */ - wa_mcr_write_or(wal, XEHP_GAMSTLB_CTRL, - CONTROL_BLOCK_CLKGATE_DIS | - EGRESS_BLOCK_CLKGATE_DIS | - TAG_BLOCK_CLKGATE_DIS); - } - /* Wa_14014830051:dg2 */ wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); @@ -2242,29 +2139,10 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine) switch (engine->class) { case RENDER_CLASS: - /* - * Wa_1507100340:dg2_g10 - * - * This covers 4 registers which are next to one another : - * - PS_INVOCATION_COUNT - * - PS_INVOCATION_COUNT_UDW - * - PS_DEPTH_COUNT - * - PS_DEPTH_COUNT_UDW - */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) - whitelist_reg_ext(w, PS_INVOCATION_COUNT, - RING_FORCE_TO_NONPRIV_ACCESS_RD | - RING_FORCE_TO_NONPRIV_RANGE_4); - /* Required by recommended tuning setting (not a workaround) */ whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3); break; - case COMPUTE_CLASS: - /* Wa_16011157294:dg2_g10 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) - whitelist_reg(w, GEN9_CTX_PREEMPT_REG); - break; default: break; } @@ -2415,12 +2293,6 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } } -static bool needs_wa_1308578152(struct intel_engine_cs *engine) -{ - return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >= - GEN_DSS_PER_GSLICE; -} - static void rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -2435,42 +2307,20 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915)) { + IS_DG2(i915)) { /* Wa_1509727124 */ wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, SC_DISABLE_POWER_OPTIMIZATION_EBB); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915) || - IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0)) { + if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || + IS_DG2(i915)) { /* Wa_22012856258 */ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_READ_SUPPRESSION); } - if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { - /* Wa_14013392000:dg2_g11 */ - wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); - } - - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { - /* Wa_14012419201:dg2 */ - wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, - GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX); - } - - /* Wa_1308578152:dg2_g10 when first gslice is fused off */ - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) && - needs_wa_1308578152(engine)) { - wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON, - GEN12_REPLAY_MODE_GRANULARITY); - } - - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915)) { + if (IS_DG2(i915)) { /* * Wa_22010960976:dg2 * Wa_14013347512:dg2 @@ -2479,34 +2329,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { - /* - * Wa_1608949956:dg2_g10 - * Wa_14010198302:dg2_g10 - */ - wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, - MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE); - } - - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) - /* Wa_22010430635:dg2 */ - wa_mcr_masked_en(wal, - GEN9_ROW_CHICKEN4, - GEN12_DISABLE_GRF_CLEAR); - - /* Wa_14013202645:dg2 */ - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) - wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY); - - /* Wa_22012532006:dg2 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) - wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, - DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); - - if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) || - IS_DG2_G10(i915)) { + if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) { /* Wa_22014600077:dg2 */ wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH), @@ -3050,8 +2873,7 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915)) { + IS_DG2(i915)) { /* Wa_22013037850 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DISABLE_128B_EVICTION_COMMAND_UDW); @@ -3072,8 +2894,7 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || - IS_DG2_G11(i915)) { + if (IS_DG2_G11(i915)) { /* * Wa_22012826095:dg2 * Wa_22013059131:dg2 @@ -3087,18 +2908,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li FORCE_1_SUB_MESSAGE_PER_FRAGMENT); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { - /* - * Wa_14010918519:dg2_g10 - * - * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping, - * so ignoring verification. - */ - wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0, - FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE, - 0, false); - } - if (IS_XEHPSDV(i915)) { /* Wa_1409954639 */ wa_mcr_masked_en(wal, @@ -3131,7 +2940,7 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || IS_DG2_G11(i915)) + if (IS_DG2_G11(i915)) /* * Wa_22012654132 * diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 569b5fe94c41..82a2ecc12b21 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -272,18 +272,14 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50)) flags |= GUC_WA_POLLCS; - /* Wa_16011759253:dg2_g10:a0 */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) - flags |= GUC_WA_GAM_CREDITS; - /* Wa_14014475959 */ if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || IS_DG2(gt->i915)) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; /* - * Wa_14012197797:dg2_g10:a0,dg2_g11:a0 - * Wa_22011391025:dg2_g10,dg2_g11,dg2_g12 + * Wa_14012197797 + * Wa_22011391025 * * The same WA bit is used for both and 22011391025 is applicable to * all DG2. @@ -297,17 +293,11 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70))) flags |= GUC_WA_PRE_PARSER; - /* Wa_16011777198:dg2 */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) - flags |= GUC_WA_RCS_RESET_BEFORE_RC6; - /* - * Wa_22012727170:dg2_g10[a0-c0), dg2_g11[a0..) - * Wa_22012727685:dg2_g11[a0..) + * Wa_22012727170 + * Wa_22012727685 */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_FOREVER)) + if (IS_DG2_G11(gt->i915)) flags |= GUC_WA_CONTEXT_ISOLATION; /* Wa_16015675438 */ diff --git a/drivers/gpu/drm/i915/intel_clock_gating.c b/drivers/gpu/drm/i915/intel_clock_gating.c index 81a4d32734e9..c66eb6abd4a2 100644 --- a/drivers/gpu/drm/i915/intel_clock_gating.c +++ b/drivers/gpu/drm/i915/intel_clock_gating.c @@ -396,14 +396,6 @@ static void dg2_init_clock_gating(struct drm_i915_private *i915) /* Wa_22010954014:dg2 */ intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS); - - /* - * Wa_14010733611:dg2_g10 - * Wa_22010146351:dg2_g10 - */ - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) - intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0, - SGR_DIS | SGGI_DIS); } static void pvc_init_clock_gating(struct drm_i915_private *i915) -- Gitee From 8ae4150c28a5bae82f7842e89144423e5299f6d7 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 16 Aug 2023 14:42:06 -0700 Subject: [PATCH 055/268] drm/i915: Tidy workaround definitions stable inclusion from stable-6.6.26 commit 6b25099eea4b65ba3b750ce49fa1a9a13d158046 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit f1c805716516f9e648e13f0108cea8096e0c7023 ] Removal of the DG2 pre-production workarounds has left duplicate condition blocks in a couple places, as well as some inconsistent platform ordering. Reshuffle and consolidate some of the workarounds to reduce the number of condition blocks and to more consistently follow the "newest platform first" convention. Code movement only; no functional change. Signed-off-by: Matt Roper Acked-by: Jani Nikula Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20230816214201.534095-11-matthew.d.roper@intel.com Stable-dep-of: 186bce682772 ("drm/i915/mtl: Update workaround 14018575942") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 100 +++++++++----------- 1 file changed, 46 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 7b426f3015b3..69973dc51828 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2337,6 +2337,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) true); } + if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || + IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { + /* + * Wa_1606700617:tgl,dg1,adl-p + * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p + * Wa_14010826681:tgl,dg1,rkl,adl-p + * Wa_18019627453:dg2 + */ + wa_masked_en(wal, + GEN9_CS_DEBUG_MODE1, + FF_DOP_CLOCK_GATE_DISABLE); + } + if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */ @@ -2350,19 +2363,11 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) */ wa_write_or(wal, GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE); - } - if (IS_ALDERLAKE_P(i915) || IS_DG2(i915) || IS_ALDERLAKE_S(i915) || - IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { - /* - * Wa_1606700617:tgl,dg1,adl-p - * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p - * Wa_14010826681:tgl,dg1,rkl,adl-p - * Wa_18019627453:dg2 - */ - wa_masked_en(wal, - GEN9_CS_DEBUG_MODE1, - FF_DOP_CLOCK_GATE_DISABLE); + /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */ + wa_mcr_masked_en(wal, + GEN10_SAMPLER_MODE, + ENABLE_SMALLPL); } if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || @@ -2389,14 +2394,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN8_RC_SEMA_IDLE_MSG_DISABLE); } - if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || - IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) { - /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */ - wa_mcr_masked_en(wal, - GEN10_SAMPLER_MODE, - ENABLE_SMALLPL); - } - if (GRAPHICS_VER(i915) == 11) { /* This is not an Wa. Enable for better image quality */ wa_masked_en(wal, @@ -2877,6 +2874,9 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li /* Wa_22013037850 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DISABLE_128B_EVICTION_COMMAND_UDW); + + /* Wa_18017747507 */ + wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); } if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || @@ -2887,11 +2887,20 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || - IS_DG2(i915)) { - /* Wa_18017747507 */ - wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); + if (IS_PONTEVECCHIO(i915) || IS_DG2(i915)) { + /* Wa_14015227452:dg2,pvc */ + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); + + /* Wa_16015675438:dg2,pvc */ + wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); + } + + if (IS_DG2(i915)) { + /* + * Wa_16011620976:dg2_g11 + * Wa_22015475538:dg2 + */ + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); } if (IS_DG2_G11(i915)) { @@ -2906,6 +2915,18 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li /* Wa_22013059131:dg2 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT); + + /* + * Wa_22012654132 + * + * Note that register 0xE420 is write-only and cannot be read + * back for verification on DG2 (due to Wa_14012342262), so + * we need to explicitly skip the readback. + */ + wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), + 0 /* write-only, so skip validation */, + true); } if (IS_XEHPSDV(i915)) { @@ -2923,35 +2944,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); } - - if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) { - /* Wa_14015227452:dg2,pvc */ - wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); - - /* Wa_16015675438:dg2,pvc */ - wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); - } - - if (IS_DG2(i915)) { - /* - * Wa_16011620976:dg2_g11 - * Wa_22015475538:dg2 - */ - wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); - } - - if (IS_DG2_G11(i915)) - /* - * Wa_22012654132 - * - * Note that register 0xE420 is write-only and cannot be read - * back for verification on DG2 (due to Wa_14012342262), so - * we need to explicitly skip the readback. - */ - wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), - 0 /* write-only, so skip validation */, - true); } static void -- Gitee From edd94fbc2f5bfd389f9814dd018f111a0b855104 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 21 Aug 2023 11:06:21 -0700 Subject: [PATCH 056/268] drm/i915: Consolidate condition for Wa_22011802037 stable inclusion from stable-6.6.26 commit 67f7fba8a08608cfd42ab354b79df56e9fee8856 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 28c46feec7f8760683ef08f12746630a3598173e ] The workaround bounds for Wa_22011802037 are somewhat complex and are replicated in several places throughout the code. Pull the condition out to a helper function to prevent mistakes if this condition needs to change again in the future. Signed-off-by: Matt Roper Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/20230821180619.650007-12-matthew.d.roper@intel.com Stable-dep-of: 186bce682772 ("drm/i915/mtl: Update workaround 14018575942") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 +--- .../drm/i915/gt/intel_execlists_submission.c | 4 +--- drivers/gpu/drm/i915/gt/intel_reset.c | 18 ++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_reset.h | 2 ++ drivers/gpu/drm/i915/gt/uc/intel_guc.c | 4 +--- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 +--- 6 files changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index e85d70a62123..84a75c95f3f7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1616,9 +1616,7 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine, * Wa_22011802037: Prior to doing a reset, ensure CS is * stopped, set ring stop bit and prefetch disable bit to halt CS */ - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || - (GRAPHICS_VER(engine->i915) >= 11 && - GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) + if (intel_engine_reset_needs_wa_22011802037(engine->gt)) intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base), _MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE)); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 5a720e252312..42e09f158920 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3001,9 +3001,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) * Wa_22011802037: In addition to stopping the cs, we need * to wait for any pending mi force wakeups */ - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || - (GRAPHICS_VER(engine->i915) >= 11 && - GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) + if (intel_engine_reset_needs_wa_22011802037(engine->gt)) intel_engine_wait_for_pending_mi_fw(engine); engine->execlists.reset_ccid = active_ccid(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 5fa57a34cf4b..3a3f71ce3cb7 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1632,6 +1632,24 @@ void __intel_fini_wedge(struct intel_wedge_me *w) w->gt = NULL; } +/* + * Wa_22011802037 requires that we (or the GuC) ensure that no command + * streamers are executing MI_FORCE_WAKE while an engine reset is initiated. + */ +bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt) +{ + if (GRAPHICS_VER(gt->i915) < 11) + return false; + + if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0)) + return true; + + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) + return false; + + return true; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_reset.c" #include "selftest_hangcheck.c" diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index 25c975b6e8fc..f615b30b81c5 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -78,4 +78,6 @@ void __intel_fini_wedge(struct intel_wedge_me *w); bool intel_has_gpu_reset(const struct intel_gt *gt); bool intel_has_reset_engine(const struct intel_gt *gt); +bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt); + #endif /* I915_RESET_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 82a2ecc12b21..da967938fea5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -288,9 +288,7 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) flags |= GUC_WA_DUAL_QUEUE; /* Wa_22011802037: graphics version 11/12 */ - if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || - (GRAPHICS_VER(gt->i915) >= 11 && - GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70))) + if (intel_engine_reset_needs_wa_22011802037(gt)) flags |= GUC_WA_PRE_PARSER; /* diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 836e4d9d65ef..7a3e02ea5663 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1690,9 +1690,7 @@ static void guc_engine_reset_prepare(struct intel_engine_cs *engine) * Wa_22011802037: In addition to stopping the cs, we need * to wait for any pending mi force wakeups */ - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || - (GRAPHICS_VER(engine->i915) >= 11 && - GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) { + if (intel_engine_reset_needs_wa_22011802037(engine->gt)) { intel_engine_stop_cs(engine); intel_engine_wait_for_pending_mi_fw(engine); } -- Gitee From 338cdf7b3f56dcf3dd2b9f0e4069616bc9657d8e Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 21 Aug 2023 11:06:23 -0700 Subject: [PATCH 057/268] drm/i915/xelpg: Call Xe_LPG workaround functions based on IP version stable inclusion from stable-6.6.26 commit 18e77951e14a73f75d269e54b90c648b1e18b66e category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit f7696ded7c9e358670dae1801660f442f059c7db ] Although some of our Xe_LPG workarounds were already being applied based on IP version correctly, others were matching on MTL as a base platform, which is incorrect. Although MTL is the only platform right now that uses Xe_LPG IP, this may not always be the case. If a future platform re-uses this graphics IP, the same workarounds should be applied, even if it isn't a "MTL" platform. We were also incorrectly applying Xe_LPG workarounds/tuning to the Xe_LPM+ media IP in one or two places; we should make sure that we don't try to apply graphics workarounds to the media GT and vice versa where they don't belong. A new helper macro IS_GT_IP_RANGE() is added to help ensure this is handled properly -- it checks that the GT matches the IP type being tested as well as the IP version falling in the proper range. Note that many of the stepping-based workarounds are still incorrectly checking for a MTL base platform; that will be remedied in a later patch. v2: - Rework macro into a slightly more generic IS_GT_IP_RANGE() that can be used for either GFX or MEDIA checks. v3: - Switch back to separate macros for gfx and media. (Jani) - Move macro to intel_gt.h. (Andi) Cc: Gustavo Sousa Cc: Tvrtko Ursulin Cc: Jani Nikula Cc: Andi Shyti Signed-off-by: Matt Roper Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230821180619.650007-14-matthew.d.roper@intel.com Stable-dep-of: 186bce682772 ("drm/i915/mtl: Update workaround 14018575942") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/gpu/drm/i915/gt/intel_gt.h | 11 ++++++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 38 +++++++++++---------- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 6c34547b58b5..15c25980411d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -14,6 +14,17 @@ struct drm_i915_private; struct drm_printer; +/* + * Check that the GT is a graphics GT and has an IP version within the + * specified range (inclusive). + */ +#define IS_GFX_GT_IP_RANGE(gt, from, until) ( \ + BUILD_BUG_ON_ZERO((from) < IP_VER(2, 0)) + \ + BUILD_BUG_ON_ZERO((until) < (from)) + \ + ((gt)->type != GT_MEDIA && \ + GRAPHICS_VER_FULL((gt)->i915) >= (from) && \ + GRAPHICS_VER_FULL((gt)->i915) <= (until))) + #define GT_TRACE(gt, fmt, ...) do { \ const struct intel_gt *gt__ __maybe_unused = (gt); \ GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \ diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 69973dc51828..4c24f3897aee 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -781,8 +781,8 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE); } -static void mtl_ctx_gt_tuning_init(struct intel_engine_cs *engine, - struct i915_wa_list *wal) +static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; @@ -793,12 +793,12 @@ static void mtl_ctx_gt_tuning_init(struct intel_engine_cs *engine, wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false); } -static void mtl_ctx_workarounds_init(struct intel_engine_cs *engine, - struct i915_wa_list *wal) +static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; - mtl_ctx_gt_tuning_init(engine, wal); + xelpg_ctx_gt_tuning_init(engine, wal); if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) { @@ -907,8 +907,8 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, if (engine->class != RENDER_CLASS) goto done; - if (IS_METEORLAKE(i915)) - mtl_ctx_workarounds_init(engine, wal); + if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71))) + xelpg_ctx_workarounds_init(engine, wal); else if (IS_PONTEVECCHIO(i915)) ; /* noop; none at this time */ else if (IS_DG2(i915)) @@ -1688,10 +1688,8 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) */ static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal) { - if (IS_METEORLAKE(gt->i915)) { - if (gt->type != GT_MEDIA) - wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); - + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) { + wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS); } @@ -1723,7 +1721,7 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) return; } - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) xelpg_gt_workarounds_init(gt, wal); else if (IS_PONTEVECCHIO(i915)) pvc_gt_workarounds_init(gt, wal); @@ -2172,7 +2170,7 @@ static void pvc_whitelist_build(struct intel_engine_cs *engine) blacklist_trtt(engine); } -static void mtl_whitelist_build(struct intel_engine_cs *engine) +static void xelpg_whitelist_build(struct intel_engine_cs *engine) { struct i915_wa_list *w = &engine->whitelist; @@ -2194,8 +2192,10 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) wa_init_start(w, engine->gt, "whitelist", engine->name); - if (IS_METEORLAKE(i915)) - mtl_whitelist_build(engine); + if (engine->gt->type == GT_MEDIA) + ; /* none yet */ + else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71))) + xelpg_whitelist_build(engine); else if (IS_PONTEVECCHIO(i915)) pvc_whitelist_build(engine); else if (IS_DG2(i915)) @@ -2795,10 +2795,12 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * function invoked by __intel_engine_init_ctx_wa(). */ static void -add_render_compute_tuning_settings(struct drm_i915_private *i915, +add_render_compute_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal) { - if (IS_METEORLAKE(i915) || IS_DG2(i915)) + struct drm_i915_private *i915 = gt->i915; + + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915)) wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); /* @@ -2828,7 +2830,7 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li { struct drm_i915_private *i915 = engine->i915; - add_render_compute_tuning_settings(i915, wal); + add_render_compute_tuning_settings(engine->gt, wal); if (GRAPHICS_VER(i915) >= 11) { /* This is not a Wa (although referred to as -- Gitee From fa156c9dffa38e96041c356ef726cb3806160130 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 21 Aug 2023 11:06:24 -0700 Subject: [PATCH 058/268] drm/i915: Eliminate IS_MTL_GRAPHICS_STEP stable inclusion from stable-6.6.26 commit b3749611a5e51188d17b4898eed8ecea571bc539 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 5a213086a025349361b5cf75c8fd4591d96a7a99 ] Several workarounds are guarded by IS_MTL_GRAPHICS_STEP. However none of these workarounds are actually tied to MTL as a platform; they only relate to the Xe_LPG graphics IP, regardless of what platform it appears in. At the moment MTL is the only platform that uses Xe_LPG with IP versions 12.70 and 12.71, but we can't count on this being true in the future. Switch these to use a new IS_GFX_GT_IP_STEP() macro instead that is purely based on IP version. IS_GFX_GT_IP_STEP() is also GT-based rather than device-based, which will help prevent mistakes where we accidentally try to apply Xe_LPG graphics workarounds to the Xe_LPM+ media GT and vice-versa. v2: - Switch to a more generic and shorter IS_GT_IP_STEP macro that can be used for both graphics and media IP (and any other kind of GTs that show up in the future). v3: - Switch back to long-form IS_GFX_GT_IP_STEP macro. (Jani) - Move macro to intel_gt.h. (Andi) v4: - Build IS_GFX_GT_IP_STEP on top of IS_GFX_GT_IP_RANGE and IS_GRAPHICS_STEP building blocks and name the parameters from/until rather than begin/fixed. (Jani) - Fix usage examples in comment. v5: - Tweak comment on macro. (Gustavo) Cc: Gustavo Sousa Cc: Tvrtko Ursulin Cc: Andi Shyti Cc: Jani Nikula Signed-off-by: Matt Roper Reviewed-by: Gustavo Sousa Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230821180619.650007-15-matthew.d.roper@intel.com Stable-dep-of: 186bce682772 ("drm/i915/mtl: Update workaround 14018575942") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- .../drm/i915/display/skl_universal_plane.c | 5 +- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 11 +++-- drivers/gpu/drm/i915/gt/intel_gt.h | 20 ++++++++ drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 7 ++- drivers/gpu/drm/i915/gt/intel_lrc.c | 4 +- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 48 ++++++++++--------- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 2 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 4 -- 10 files changed, 62 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index ffc15d278a39..d557ecd4e1eb 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -20,6 +20,7 @@ #include "skl_scaler.h" #include "skl_universal_plane.h" #include "skl_watermark.h" +#include "gt/intel_gt.h" #include "pxp/intel_pxp.h" static const u32 skl_plane_formats[] = { @@ -2169,8 +2170,8 @@ static bool skl_plane_has_rc_ccs(struct drm_i915_private *i915, enum pipe pipe, enum plane_id plane_id) { /* Wa_14017240301 */ - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(to_gt(i915), IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(to_gt(i915), IP_VER(12, 71), STEP_A0, STEP_B0)) return false; /* Wa_22011186057 */ diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 7ad36198aab2..3ac3e12d9c52 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -4,9 +4,9 @@ */ #include "gen8_engine_cs.h" -#include "i915_drv.h" #include "intel_engine_regs.h" #include "intel_gpu_commands.h" +#include "intel_gt.h" #include "intel_lrc.h" #include "intel_ring.h" @@ -226,8 +226,8 @@ u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs) static int mtl_dummy_pipe_control(struct i915_request *rq) { /* Wa_14016712196 */ - if (IS_MTL_GRAPHICS_STEP(rq->i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(rq->i915, P, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_STEP(rq->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(rq->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { u32 *cs; /* dummy PIPE_CONTROL + depth flush */ @@ -808,6 +808,7 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { struct drm_i915_private *i915 = rq->i915; + struct intel_gt *gt = rq->engine->gt; u32 flags = (PIPE_CONTROL_CS_STALL | PIPE_CONTROL_TLB_INVALIDATE | PIPE_CONTROL_TILE_CACHE_FLUSH | @@ -818,8 +819,8 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) PIPE_CONTROL_FLUSH_ENABLE); /* Wa_14016712196 */ - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) /* dummy PIPE_CONTROL + depth flush */ cs = gen12_emit_pipe_control(cs, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0); diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 15c25980411d..6e63b46682f7 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -25,6 +25,26 @@ struct drm_printer; GRAPHICS_VER_FULL((gt)->i915) >= (from) && \ GRAPHICS_VER_FULL((gt)->i915) <= (until))) +/* + * Check that the GT is a graphics GT with a specific IP version and has + * a stepping in the range [from, until). The lower stepping bound is + * inclusive, the upper bound is exclusive. The most common use-case of this + * macro is for checking bounds for workarounds, which usually have a stepping + * ("from") at which the hardware issue is first present and another stepping + * ("until") at which a hardware fix is present and the software workaround is + * no longer necessary. E.g., + * + * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) + * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B1, STEP_FOREVER) + * + * "STEP_FOREVER" can be passed as "until" for workarounds that have no upper + * stepping bound for the specified IP version. + */ +#define IS_GFX_GT_IP_STEP(gt, ipver, from, until) ( \ + BUILD_BUG_ON_ZERO((until) <= (from)) + \ + (IS_GFX_GT_IP_RANGE((gt), (ipver), (ipver)) && \ + IS_GRAPHICS_STEP((gt)->i915, (from), (until)))) + #define GT_TRACE(gt, fmt, ...) do { \ const struct intel_gt *gt__ __maybe_unused = (gt); \ GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 2c0f1f3e28ff..c6dec485aefb 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -3,8 +3,7 @@ * Copyright © 2022 Intel Corporation */ -#include "i915_drv.h" - +#include "intel_gt.h" #include "intel_gt_mcr.h" #include "intel_gt_print.h" #include "intel_gt_regs.h" @@ -166,8 +165,8 @@ void intel_gt_mcr_init(struct intel_gt *gt) gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table; } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { /* Wa_14016747170 */ - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) fuse = REG_FIELD_GET(MTL_GT_L3_EXC_MASK, intel_uncore_read(gt->uncore, MTL_GT_ACTIVITY_FACTOR)); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index f297c5808e7c..b99efa348ad1 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1347,8 +1347,8 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) cs = gen12_emit_aux_table_inv(ce->engine, cs); /* Wa_16014892111 */ - if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(ce->engine->i915, P, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) || IS_DG2(ce->engine->i915)) cs = dg2_emit_draw_watermark_setting(cs); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 3a3f71ce3cb7..63d0892d3c45 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1641,7 +1641,7 @@ bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt) if (GRAPHICS_VER(gt->i915) < 11) return false; - if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0)) return true; if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 4c24f3897aee..b6237e999be9 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -784,24 +784,24 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; dg2_ctx_gt_tuning_init(engine, wal); - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER)) wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false); } static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; xelpg_ctx_gt_tuning_init(engine, wal); - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { /* Wa_14014947963 */ wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); @@ -1644,8 +1644,8 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) /* Wa_22016670082 */ wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE); - if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { /* Wa_14014830051 */ wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); @@ -2297,23 +2297,24 @@ static void rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { /* Wa_22014600077 */ wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, ENABLE_EU_COUNT_FOR_TDL_FLUSH); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || IS_DG2(i915)) { /* Wa_1509727124 */ wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, SC_DISABLE_POWER_OPTIMIZATION_EBB); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || IS_DG2(i915)) { /* Wa_22012856258 */ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, @@ -2829,8 +2830,9 @@ static void general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; - add_render_compute_tuning_settings(engine->gt, wal); + add_render_compute_tuning_settings(gt, wal); if (GRAPHICS_VER(i915) >= 11) { /* This is not a Wa (although referred to as @@ -2851,13 +2853,13 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li GEN11_INDIRECT_STATE_BASE_ADDR_OVERRIDE); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER)) /* Wa_14017856879 */ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH); - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) /* * Wa_14017066071 * Wa_14017654203 @@ -2865,13 +2867,13 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, MTL_DISABLE_SAMPLER_SC_OOO); - if (IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) /* Wa_22015279794 */ wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_PREFETCH_INTO_IC); - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || IS_DG2(i915)) { /* Wa_22013037850 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, @@ -2881,8 +2883,8 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || IS_PONTEVECCHIO(i915) || IS_DG2(i915)) { /* Wa_22014226127 */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index da967938fea5..861d0c58388c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -273,7 +273,7 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) flags |= GUC_WA_POLLCS; /* Wa_14014475959 */ - if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || IS_DG2(gt->i915)) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 7a3e02ea5663..b5de5a9f5967 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -4297,7 +4297,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) /* Wa_14014475959:dg2 */ if (engine->class == COMPUTE_CLASS) - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || IS_DG2(engine->i915)) engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7a8ce7239bc9..e0e0493d6c1f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -658,10 +658,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_XEHPSDV_GRAPHICS_STEP(__i915, since, until) \ (IS_XEHPSDV(__i915) && IS_GRAPHICS_STEP(__i915, since, until)) -#define IS_MTL_GRAPHICS_STEP(__i915, variant, since, until) \ - (IS_SUBPLATFORM(__i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_##variant) && \ - IS_GRAPHICS_STEP(__i915, since, until)) - #define IS_MTL_DISPLAY_STEP(__i915, since, until) \ (IS_METEORLAKE(__i915) && \ IS_DISPLAY_STEP(__i915, since, until)) -- Gitee From 12c7041eec334fe2582fccf4c74cafdc313f667e Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 21 Aug 2023 11:06:29 -0700 Subject: [PATCH 059/268] drm/i915: Replace several IS_METEORLAKE with proper IP version checks stable inclusion from stable-6.6.26 commit ec84b2a44b057b2c51ed9f670b92690904e1106c category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 14128d64090fa88445376cb8ccf91c50c08bd410 ] Many of the IS_METEORLAKE conditions throughout the driver are supposed to be checks for Xe_LPG and/or Xe_LPM+ IP, not for the MTL platform specifically. Update those checks to ensure that the code will still operate properly if/when these IP versions show up on future platforms. v2: - Update two more conditions (one for pg_enable, one for MTL HuC compatibility). v3: - Don't change GuC/HuC compatibility check, which sounds like it truly is specific to the MTL platform. (Gustavo) - Drop a non-lineage workaround number for the OA timestamp frequency workaround. (Gustavo) Cc: Gustavo Sousa Signed-off-by: Matt Roper Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/20230821180619.650007-20-matthew.d.roper@intel.com Stable-dep-of: 186bce682772 ("drm/i915/mtl: Update workaround 14018575942") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 2 +- drivers/gpu/drm/i915/gt/intel_mocs.c | 2 +- drivers/gpu/drm/i915/gt/intel_rc6.c | 2 +- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gt/intel_rps.c | 2 +- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_perf.c | 11 +++++------ 8 files changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index d24c0ce8805c..19156ba4b9ef 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -405,8 +405,8 @@ static int ext_set_pat(struct i915_user_extension __user *base, void *data) BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) != offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd)); - /* Limiting the extension only to Meteor Lake */ - if (!IS_METEORLAKE(i915)) + /* Limiting the extension only to Xe_LPG and beyond */ + if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 70)) return -ENODEV; if (copy_from_user(&ext, base, sizeof(ext))) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index a95615b345cd..5a3a5b29d150 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -21,7 +21,7 @@ static void intel_gsc_idle_msg_enable(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; - if (IS_METEORLAKE(i915) && engine->id == GSC0) { + if (MEDIA_VER(i915) >= 13 && engine->id == GSC0) { intel_uncore_write(engine->gt->uncore, RC_PSMI_CTRL_GSCCS, _MASKED_BIT_DISABLE(IDLE_MSG_DISABLE)); diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index bf8b42d2d327..07269ff3be13 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -495,7 +495,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, memset(table, 0, sizeof(struct drm_i915_mocs_table)); table->unused_entries_index = I915_MOCS_PTE; - if (IS_METEORLAKE(i915)) { + if (IS_GFX_GT_IP_RANGE(&i915->gt0, IP_VER(12, 70), IP_VER(12, 71))) { table->size = ARRAY_SIZE(mtl_mocs_table); table->table = mtl_mocs_table; table->n_entries = MTL_NUM_MOCS_ENTRIES; diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index b8c9338176bd..9e113e947326 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -123,7 +123,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) * temporary wa and should be removed after fixing real cause * of forcewake timeouts. */ - if (IS_METEORLAKE(gt->i915)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) pg_enable = GEN9_MEDIA_PG_ENABLE | GEN11_MEDIA_SAMPLER_PG_ENABLE; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 63d0892d3c45..13fb8e5042c5 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -705,7 +705,7 @@ static int __reset_guc(struct intel_gt *gt) static bool needs_wa_14015076503(struct intel_gt *gt, intel_engine_mask_t engine_mask) { - if (!IS_METEORLAKE(gt->i915) || !HAS_ENGINE(gt, GSC0)) + if (MEDIA_VER_FULL(gt->i915) != IP_VER(13, 0) || !HAS_ENGINE(gt, GSC0)) return false; if (!__HAS_ENGINE(engine_mask, GSC0)) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 092542f53aad..4feef874e6d6 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1161,7 +1161,7 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c { struct drm_i915_private *i915 = rps_to_i915(rps); - if (IS_METEORLAKE(i915)) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) return mtl_get_freq_caps(rps, caps); else return __gen6_rps_get_freq_caps(rps, caps); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 4de44cf1026d..7a90a2e32c9f 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -144,7 +144,7 @@ static const char *i915_cache_level_str(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = obj_to_i915(obj); - if (IS_METEORLAKE(i915)) { + if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 70), IP_VER(12, 71))) { switch (obj->pat_index) { case 0: return " WB"; case 1: return " WT"; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 8f4a25d2cfc2..48ea17b49b3a 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3255,11 +3255,10 @@ get_sseu_config(struct intel_sseu *out_sseu, */ u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915) { - /* - * Wa_18013179988:dg2 - * Wa_14015846243:mtl - */ - if (IS_DG2(i915) || IS_METEORLAKE(i915)) { + struct intel_gt *gt = to_gt(i915); + + /* Wa_18013179988 */ + if (IS_DG2(i915) || IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) { intel_wakeref_t wakeref; u32 reg, shift; @@ -4564,7 +4563,7 @@ static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - if (IS_METEORLAKE(perf->i915)) + if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70)) return reg_in_range_table(addr, mtl_oa_mux_regs); else return reg_in_range_table(addr, gen12_oa_mux_regs); -- Gitee From 74f8a77c4a0be6d45c95db3ef15c150ae9bde87e Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Mon, 28 Aug 2023 12:04:50 +0530 Subject: [PATCH 060/268] drm/i915/mtl: Update workaround 14016712196 stable inclusion from stable-6.6.26 commit 338db8193cb2dd93544ac445a7b4b4a7f77094ad category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 7467e1da906468bcbd311023b30708193103ecf9 ] Now this workaround is permanent workaround on MTL and DG2, earlier we used to apply on MTL A0 step only. VLK-45480 Fixes: d922b80b1010 ("drm/i915/gt: Add workaround 14016712196") Signed-off-by: Tejas Upadhyay Acked-by: Nirmoy Das Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230828063450.2642748-1-tejas.upadhyay@intel.com Stable-dep-of: 186bce682772 ("drm/i915/mtl: Update workaround 14018575942") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 3ac3e12d9c52..ba4c2422b340 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -226,8 +226,8 @@ u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs) static int mtl_dummy_pipe_control(struct i915_request *rq) { /* Wa_14016712196 */ - if (IS_GFX_GT_IP_STEP(rq->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || - IS_GFX_GT_IP_STEP(rq->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 71)) || + IS_DG2(rq->i915)) { u32 *cs; /* dummy PIPE_CONTROL + depth flush */ @@ -819,8 +819,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) PIPE_CONTROL_FLUSH_ENABLE); /* Wa_14016712196 */ - if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || - IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915)) /* dummy PIPE_CONTROL + depth flush */ cs = gen12_emit_pipe_control(cs, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0); -- Gitee From 9c8c800bc8d84c5336a5d4688877bd1b335153ce Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 8 Jan 2024 17:57:38 +0530 Subject: [PATCH 061/268] drm/i915/xelpg: Extend some workarounds/tuning to gfx version 12.74 stable inclusion from stable-6.6.26 commit 798781b43194c6d2bdea0c4ded660f3135c484d3 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit c44d4ef47fdad0a33966de89f9064e19736bb52f ] Some of our existing Xe_LPG workarounds and tuning are also applicable to the version 12.74 variant. Extend the condition bounds accordingly. Also fix the comment on Wa_14018575942 while we're at it. v2: Extend some more workarounds (Harish) Signed-off-by: Matt Roper Signed-off-by: Harish Chegondi Signed-off-by: Haridhar Kalvala Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20240108122738.14399-4-haridhar.kalvala@intel.com Stable-dep-of: 186bce682772 ("drm/i915/mtl: Update workaround 14018575942") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_workarounds.c | 24 +++++++++++++-------- drivers/gpu/drm/i915/i915_perf.c | 2 +- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index ba4c2422b340..cddf8c16e9a7 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -226,7 +226,7 @@ u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs) static int mtl_dummy_pipe_control(struct i915_request *rq) { /* Wa_14016712196 */ - if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 71)) || + if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(rq->i915)) { u32 *cs; @@ -819,7 +819,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) PIPE_CONTROL_FLUSH_ENABLE); /* Wa_14016712196 */ - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915)) /* dummy PIPE_CONTROL + depth flush */ cs = gen12_emit_pipe_control(cs, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b6237e999be9..37b2b0440923 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -788,8 +788,13 @@ static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine, dg2_ctx_gt_tuning_init(engine, wal); - if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) || - IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER)) + /* + * Due to Wa_16014892111, the DRAW_WATERMARK tuning must be done in + * gen12_emit_indirect_ctx_rcs() rather than here on some early + * steppings. + */ + if (!(IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))) wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false); } @@ -907,7 +912,7 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, if (engine->class != RENDER_CLASS) goto done; - if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71))) + if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74))) xelpg_ctx_workarounds_init(engine, wal); else if (IS_PONTEVECCHIO(i915)) ; /* noop; none at this time */ @@ -1638,7 +1643,7 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) static void xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - /* Wa_14018778641 / Wa_18018781329 */ + /* Wa_14018575942 / Wa_18018781329 */ wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); /* Wa_22016670082 */ @@ -1688,7 +1693,7 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) */ static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal) { - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) { + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) { wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS); } @@ -1721,7 +1726,7 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) return; } - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) xelpg_gt_workarounds_init(gt, wal); else if (IS_PONTEVECCHIO(i915)) pvc_gt_workarounds_init(gt, wal); @@ -2194,7 +2199,7 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) if (engine->gt->type == GT_MEDIA) ; /* none yet */ - else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71))) + else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74))) xelpg_whitelist_build(engine); else if (IS_PONTEVECCHIO(i915)) pvc_whitelist_build(engine); @@ -2801,7 +2806,7 @@ add_render_compute_tuning_settings(struct intel_gt *gt, { struct drm_i915_private *i915 = gt->i915; - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915)) wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); /* @@ -2854,7 +2859,8 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li } if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) || - IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER)) + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER) || + IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 74), IP_VER(12, 74))) /* Wa_14017856879 */ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 48ea17b49b3a..3f90403d86cb 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3258,7 +3258,7 @@ u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915) struct intel_gt *gt = to_gt(i915); /* Wa_18013179988 */ - if (IS_DG2(i915) || IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) { + if (IS_DG2(i915) || IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) { intel_wakeref_t wakeref; u32 reg, shift; -- Gitee From 1eef9ae4712a3a136a382358cb37d834e720efc7 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 28 Feb 2024 16:07:38 +0530 Subject: [PATCH 062/268] drm/i915/mtl: Update workaround 14018575942 stable inclusion from stable-6.6.26 commit 2564623ee0da92ed7f8a87aa3758cbf2c46257bb category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 186bce682772e7346bf7ced5325b5f4ff050ccfb ] Applying WA 14018575942 only on Compute engine has impact on some apps like chrome. Updating this WA to apply on Render engine as well as it is helping with performance on Chrome. Note: There is no concern from media team thus not applying WA on media engines. We will revisit if any issues reported from media team. V2(Matt): - Use correct WA number Fixes: 668f37e1ee11 ("drm/i915/mtl: Update workaround 14018778641") Signed-off-by: Tejas Upadhyay Reviewed-by: Matt Roper Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240228103738.2018458-1-tejas.upadhyay@intel.com (cherry picked from commit 71271280175aa0ed6673e40cce7c01296bcd05f6) Signed-off-by: Rodrigo Vivi Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 37b2b0440923..0ea52f77b4c7 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1644,6 +1644,7 @@ static void xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { /* Wa_14018575942 / Wa_18018781329 */ + wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); /* Wa_22016670082 */ -- Gitee From 8d2c7198988f6c632aaa06e5ab5c754a3f7df40e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 28 Mar 2024 15:30:39 +0100 Subject: [PATCH 063/268] dm integrity: fix out-of-range warning stable inclusion from stable-6.6.26 commit 7cd73d90856d7edec080b356a33959b57a241598 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 8e91c2342351e0f5ef6c0a704384a7f6fc70c3b2 ] Depending on the value of CONFIG_HZ, clang complains about a pointless comparison: drivers/md/dm-integrity.c:4085:12: error: result of comparison of constant 42949672950 with expression of type 'unsigned int' is always false [-Werror,-Wtautological-constant-out-of-range-compare] if (val >= (uint64_t)UINT_MAX * 1000 / HZ) { As the check remains useful for other configurations, shut up the warning by adding a second type cast to uint64_t. Fixes: 468dfca38b1a ("dm integrity: add a bitmap mode") Signed-off-by: Arnd Bergmann Reviewed-by: Mikulas Patocka Reviewed-by: Justin Stitt Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/md/dm-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index e7cd27e387df..470add73f7bd 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4231,7 +4231,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) { log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval); } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) { - if (val >= (uint64_t)UINT_MAX * 1000 / HZ) { + if ((uint64_t)val >= (uint64_t)UINT_MAX * 1000 / HZ) { r = -EINVAL; ti->error = "Invalid bitmap_flush_interval argument"; goto bad; -- Gitee From 6cbd6d912c27b76a9dad7696ca3770cee9d07ae4 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 5 Mar 2024 12:37:48 +0800 Subject: [PATCH 064/268] mm/treewide: replace pud_large() with pud_leaf() stable inclusion from stable-6.6.26 commit 907835e6dee6f77ac30ae50bb3f88bd92055c86e category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 0a845e0f6348ccfa2dcc8c450ffd1c9ffe8c4add ] pud_large() is always defined as pud_leaf(). Merge their usages. Chose pud_leaf() because pud_leaf() is a global API, while pud_large() is not. Link: https://lkml.kernel.org/r/20240305043750.93762-9-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Jason Gunthorpe Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: "Aneesh Kumar K.V" Cc: Borislav Petkov Cc: Christophe Leroy Cc: Dave Hansen Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Kirill A. Shutemov Cc: Michael Ellerman Cc: Muchun Song Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Thomas Gleixner Cc: Vincenzo Frascino Cc: Yang Shi Signed-off-by: Andrew Morton Stable-dep-of: c567f2948f57 ("Revert "x86/mm/ident_map: Use gbpages only where full GB page should be mapped."") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- arch/powerpc/mm/book3s64/pgtable.c | 2 +- arch/s390/boot/vmem.c | 2 +- arch/s390/include/asm/pgtable.h | 4 ++-- arch/s390/mm/gmap.c | 2 +- arch/s390/mm/hugetlbpage.c | 4 ++-- arch/s390/mm/pageattr.c | 2 +- arch/s390/mm/pgtable.c | 2 +- arch/s390/mm/vmem.c | 6 +++--- arch/sparc/mm/init_64.c | 2 +- arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/mm/fault.c | 4 ++-- arch/x86/mm/ident_map.c | 2 +- arch/x86/mm/init_64.c | 4 ++-- arch/x86/mm/kasan_init_64.c | 2 +- arch/x86/mm/mem_encrypt_identity.c | 2 +- arch/x86/mm/pat/set_memory.c | 6 +++--- arch/x86/mm/pgtable.c | 2 +- arch/x86/mm/pti.c | 2 +- arch/x86/power/hibernate.c | 2 +- arch/x86/xen/mmu_pv.c | 4 ++-- 20 files changed, 29 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 926bec775f41..9822366dc186 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -130,7 +130,7 @@ void set_pud_at(struct mm_struct *mm, unsigned long addr, WARN_ON(pte_hw_valid(pud_pte(*pudp))); assert_spin_locked(pud_lockptr(mm, pudp)); - WARN_ON(!(pud_large(pud))); + WARN_ON(!(pud_leaf(pud))); #endif trace_hugepage_set_pud(addr, pud_val(pud)); return set_pte_at(mm, addr, pudp_ptep(pudp), pud_pte(pud)); diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 442a74f113cb..14e1a73ffcfe 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -360,7 +360,7 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e } pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY); pud_populate(&init_mm, pud, pmd); - } else if (pud_large(*pud)) { + } else if (pud_leaf(*pud)) { continue; } pgtable_pmd_populate(pud, addr, next, mode); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index fb3ee7758b76..38290b0078c5 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -729,7 +729,7 @@ static inline int pud_bad(pud_t pud) { unsigned long type = pud_val(pud) & _REGION_ENTRY_TYPE_MASK; - if (type > _REGION_ENTRY_TYPE_R3 || pud_large(pud)) + if (type > _REGION_ENTRY_TYPE_R3 || pud_leaf(pud)) return 1; if (type < _REGION_ENTRY_TYPE_R3) return 0; @@ -1396,7 +1396,7 @@ static inline unsigned long pud_deref(pud_t pud) unsigned long origin_mask; origin_mask = _REGION_ENTRY_ORIGIN; - if (pud_large(pud)) + if (pud_leaf(pud)) origin_mask = _REGION3_ENTRY_ORIGIN_LARGE; return (unsigned long)__va(pud_val(pud) & origin_mask); } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 157e0a8d5157..d17bb1ef63f4 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -596,7 +596,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) pud = pud_offset(p4d, vmaddr); VM_BUG_ON(pud_none(*pud)); /* large puds cannot yet be handled */ - if (pud_large(*pud)) + if (pud_leaf(*pud)) return -EFAULT; pmd = pmd_offset(pud, vmaddr); VM_BUG_ON(pmd_none(*pmd)); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 297a6d897d5a..5f64f3d0fafb 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -224,7 +224,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, if (p4d_present(*p4dp)) { pudp = pud_offset(p4dp, addr); if (pud_present(*pudp)) { - if (pud_large(*pudp)) + if (pud_leaf(*pudp)) return (pte_t *) pudp; pmdp = pmd_offset(pudp, addr); } @@ -240,7 +240,7 @@ int pmd_huge(pmd_t pmd) int pud_huge(pud_t pud) { - return pud_large(pud); + return pud_leaf(pud); } bool __init arch_hugetlb_valid_size(unsigned long size) diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index b87e96c64b61..441f654d048d 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -274,7 +274,7 @@ static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end, if (pud_none(*pudp)) return -EINVAL; next = pud_addr_end(addr, end); - if (pud_large(*pudp)) { + if (pud_leaf(*pudp)) { need_split = !!(flags & SET_MEMORY_4K); need_split |= !!(addr & ~PUD_MASK); need_split |= !!(addr + PUD_SIZE > next); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 5cb92941540b..5e349869590a 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -479,7 +479,7 @@ static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp) return -ENOENT; /* Large PUDs are not supported yet. */ - if (pud_large(*pud)) + if (pud_leaf(*pud)) return -EFAULT; *pmdp = pmd_offset(pud, addr); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 6d276103c6d5..2d3f65da56ee 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -322,7 +322,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, if (!add) { if (pud_none(*pud)) continue; - if (pud_large(*pud)) { + if (pud_leaf(*pud)) { if (IS_ALIGNED(addr, PUD_SIZE) && IS_ALIGNED(next, PUD_SIZE)) { pud_clear(pud); @@ -343,7 +343,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, if (!pmd) goto out; pud_populate(&init_mm, pud, pmd); - } else if (pud_large(*pud)) { + } else if (pud_leaf(*pud)) { continue; } ret = modify_pmd_table(pud, addr, next, add, direct); @@ -586,7 +586,7 @@ pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc) if (!pmd) goto out; pud_populate(&init_mm, pud, pmd); - } else if (WARN_ON_ONCE(pud_large(*pud))) { + } else if (WARN_ON_ONCE(pud_leaf(*pud))) { goto out; } pmd = pmd_offset(pud, addr); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index f83017992eaa..d7db4e737218 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1665,7 +1665,7 @@ bool kern_addr_valid(unsigned long addr) if (pud_none(*pud)) return false; - if (pud_large(*pud)) + if (pud_leaf(*pud)) return pfn_valid(pud_pfn(*pud)); pmd = pmd_offset(pud, addr); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index f7901cb4d2fa..11c484d72eab 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3120,7 +3120,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, if (pud_none(pud) || !pud_present(pud)) goto out; - if (pud_large(pud)) { + if (pud_leaf(pud)) { level = PG_LEVEL_1G; goto out; } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a9d69ec994b7..e23851796883 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -376,7 +376,7 @@ static void dump_pagetable(unsigned long address) goto bad; pr_cont("PUD %lx ", pud_val(*pud)); - if (!pud_present(*pud) || pud_large(*pud)) + if (!pud_present(*pud) || pud_leaf(*pud)) goto out; pmd = pmd_offset(pud, address); @@ -1037,7 +1037,7 @@ spurious_kernel_fault(unsigned long error_code, unsigned long address) if (!pud_present(*pud)) return 0; - if (pud_large(*pud)) + if (pud_leaf(*pud)) return spurious_kernel_fault_check(error_code, (pte_t *) pud); pmd = pmd_offset(pud, address); diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index f50cc210a981..a204a332c71f 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -33,7 +33,7 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, next = end; /* if this is already a gbpage, this portion is already mapped */ - if (pud_large(*pud)) + if (pud_leaf(*pud)) continue; /* Is using a gbpage allowed? */ diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a190aae8ceaf..19d209b412d7 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -617,7 +617,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, } if (!pud_none(*pud)) { - if (!pud_large(*pud)) { + if (!pud_leaf(*pud)) { pmd = pmd_offset(pud, 0); paddr_last = phys_pmd_init(pmd, paddr, paddr_end, @@ -1163,7 +1163,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, if (!pud_present(*pud)) continue; - if (pud_large(*pud) && + if (pud_leaf(*pud) && IS_ALIGNED(addr, PUD_SIZE) && IS_ALIGNED(next, PUD_SIZE)) { spin_lock(&init_mm.page_table_lock); diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 0302491d799d..fcf508c52bdc 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -115,7 +115,7 @@ static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr, pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); - if (!pud_large(*pud)) + if (!pud_leaf(*pud)) kasan_populate_pud(pud, addr, next, nid); } while (pud++, addr = next, addr != end); } diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 0166ab1780cc..ead356135924 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -144,7 +144,7 @@ static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) set_pud(pud, __pud(PUD_FLAGS | __pa(pmd))); } - if (pud_large(*pud)) + if (pud_leaf(*pud)) return NULL; return pud; diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index bda9f129835e..f3c4c756fe1e 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -684,7 +684,7 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, return NULL; *level = PG_LEVEL_1G; - if (pud_large(*pud) || !pud_present(*pud)) + if (pud_leaf(*pud) || !pud_present(*pud)) return (pte_t *)pud; pmd = pmd_offset(pud, address); @@ -743,7 +743,7 @@ pmd_t *lookup_pmd_address(unsigned long address) return NULL; pud = pud_offset(p4d, address); - if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud)) + if (pud_none(*pud) || pud_leaf(*pud) || !pud_present(*pud)) return NULL; return pmd_offset(pud, address); @@ -1274,7 +1274,7 @@ static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end) */ while (end - start >= PUD_SIZE) { - if (pud_large(*pud)) + if (pud_leaf(*pud)) pud_clear(pud); else unmap_pmd_range(pud, start, start + PUD_SIZE); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 9deadf517f14..8e1ef5345b7a 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -774,7 +774,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) */ int pud_clear_huge(pud_t *pud) { - if (pud_large(*pud)) { + if (pud_leaf(*pud)) { pud_clear(pud); return 1; } diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 78414c6d1b5e..51b6b78e6b17 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -217,7 +217,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) pud = pud_offset(p4d, address); /* The user page tables do not use large mappings: */ - if (pud_large(*pud)) { + if (pud_leaf(*pud)) { WARN_ON(1); return NULL; } diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c index 6f955eb1e163..d8af46e67750 100644 --- a/arch/x86/power/hibernate.c +++ b/arch/x86/power/hibernate.c @@ -170,7 +170,7 @@ int relocate_restore_code(void) goto out; } pud = pud_offset(p4d, relocated_restore_code); - if (pud_large(*pud)) { + if (pud_leaf(*pud)) { set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX)); goto out; } diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index b6830554ff69..9d4a9311e819 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1082,7 +1082,7 @@ static void __init xen_cleanmfnmap_pud(pud_t *pud, bool unpin) pmd_t *pmd_tbl; int i; - if (pud_large(*pud)) { + if (pud_leaf(*pud)) { pa = pud_val(*pud) & PHYSICAL_PAGE_MASK; xen_free_ro_pages(pa, PUD_SIZE); return; @@ -1863,7 +1863,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr) if (!pud_present(pud)) return 0; pa = pud_val(pud) & PTE_PFN_MASK; - if (pud_large(pud)) + if (pud_leaf(pud)) return pa + (vaddr & ~PUD_MASK); pmd = native_make_pmd(xen_read_phys_ulong(pa + pmd_index(vaddr) * -- Gitee From 94c7370737816e765de73521bdd0a31a81069553 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 25 Mar 2024 11:47:51 +0100 Subject: [PATCH 065/268] Revert "x86/mm/ident_map: Use gbpages only where full GB page should be mapped." stable inclusion from stable-6.6.26 commit fbc0a833c055a453d4bc1961c980a85ea1b0750d category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit c567f2948f57bdc03ed03403ae0234085f376b7d ] This reverts commit d794734c9bbfe22f86686dc2909c25f5ffe1a572. While the original change tries to fix a bug, it also unintentionally broke existing systems, see the regressions reported at: https://lore.kernel.org/all/3a1b9909-45ac-4f97-ad68-d16ef1ce99db@pavinjoseph.com/ Since d794734c9bbf was also marked for -stable, let's back it out before causing more damage. Note that due to another upstream change the revert was not 100% automatic: 0a845e0f6348 mm/treewide: replace pud_large() with pud_leaf() Signed-off-by: Ingo Molnar Cc: Cc: Russ Anderson Cc: Steve Wahl Cc: Dave Hansen Link: https://lore.kernel.org/all/3a1b9909-45ac-4f97-ad68-d16ef1ce99db@pavinjoseph.com/ Fixes: d794734c9bbf ("x86/mm/ident_map: Use gbpages only where full GB page should be mapped.") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- arch/x86/mm/ident_map.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index a204a332c71f..968d7005f4a7 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -26,31 +26,18 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, for (; addr < end; addr = next) { pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; - bool use_gbpage; next = (addr & PUD_MASK) + PUD_SIZE; if (next > end) next = end; - /* if this is already a gbpage, this portion is already mapped */ - if (pud_leaf(*pud)) - continue; - - /* Is using a gbpage allowed? */ - use_gbpage = info->direct_gbpages; - - /* Don't use gbpage if it maps more than the requested region. */ - /* at the begining: */ - use_gbpage &= ((addr & ~PUD_MASK) == 0); - /* ... or at the end: */ - use_gbpage &= ((next & ~PUD_MASK) == 0); - - /* Never overwrite existing mappings */ - use_gbpage &= !pud_present(*pud); - - if (use_gbpage) { + if (info->direct_gbpages) { pud_t pudval; + if (pud_present(*pud)) + continue; + + addr &= PUD_MASK; pudval = __pud((addr - info->offset) | info->page_flag); set_pud(pud, pudval); continue; -- Gitee From 8b7f7c4c2ba76cd93649ee94dc2d3fccba4ac581 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 22 Feb 2024 12:29:34 +0000 Subject: [PATCH 066/268] btrfs: ensure fiemap doesn't race with writes when FIEMAP_FLAG_SYNC is given stable inclusion from stable-6.6.26 commit 8cc484e85e0c6bf72ec7c3c8c697865355873cfb category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 418b09027743d9a9fb39116bed46a192f868a3c3 ] When FIEMAP_FLAG_SYNC is given to fiemap the expectation is that that are no concurrent writes and we get a stable view of the inode's extent layout. When the flag is given we flush all IO (and wait for ordered extents to complete) and then lock the inode in shared mode, however that leaves open the possibility that a write might happen right after the flushing and before locking the inode. So fix this by flushing again after locking the inode - we leave the initial flushing before locking the inode to avoid holding the lock and blocking other RO operations while waiting for IO and ordered extents to complete. The second flushing while holding the inode's lock will most of the time do nothing or very little since the time window for new writes to have happened is small. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Stable-dep-of: 978b63f7464a ("btrfs: fix race when detecting delalloc ranges during fiemap") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- fs/btrfs/extent_io.c | 21 ++++++++------------- fs/btrfs/inode.c | 22 +++++++++++++++++++++- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fc8eb8d86ca2..45d427c3033d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2953,17 +2953,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, range_end = round_up(start + len, sectorsize); prev_extent_end = range_start; - btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); - ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end); if (ret < 0) - goto out_unlock; + goto out; btrfs_release_path(path); path->reada = READA_FORWARD; ret = fiemap_search_slot(inode, path, range_start); if (ret < 0) { - goto out_unlock; + goto out; } else if (ret > 0) { /* * No file extent item found, but we may have delalloc between @@ -3010,7 +3008,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, backref_ctx, 0, 0, 0, prev_extent_end, hole_end); if (ret < 0) { - goto out_unlock; + goto out; } else if (ret > 0) { /* fiemap_fill_next_extent() told us to stop. */ stopped = true; @@ -3066,7 +3064,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, extent_gen, backref_ctx); if (ret < 0) - goto out_unlock; + goto out; else if (ret > 0) flags |= FIEMAP_EXTENT_SHARED; } @@ -3077,7 +3075,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, } if (ret < 0) { - goto out_unlock; + goto out; } else if (ret > 0) { /* fiemap_fill_next_extent() told us to stop. */ stopped = true; @@ -3088,12 +3086,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, next_item: if (fatal_signal_pending(current)) { ret = -EINTR; - goto out_unlock; + goto out; } ret = fiemap_next_leaf_item(inode, path); if (ret < 0) { - goto out_unlock; + goto out; } else if (ret > 0) { /* No more file extent items for this inode. */ break; @@ -3117,7 +3115,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, &delalloc_cached_state, backref_ctx, 0, 0, 0, prev_extent_end, range_end - 1); if (ret < 0) - goto out_unlock; + goto out; prev_extent_end = range_end; } @@ -3155,9 +3153,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, } ret = emit_last_fiemap_cache(fieinfo, &cache); - -out_unlock: - btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); out: free_extent_state(delalloc_cached_state); btrfs_free_backref_share_ctx(backref_ctx); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ca79c2b8adc4..1ac14223ffb5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7813,6 +7813,7 @@ struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter, static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { + struct btrfs_inode *btrfs_inode = BTRFS_I(inode); int ret; ret = fiemap_prep(inode, fieinfo, start, &len, 0); @@ -7838,7 +7839,26 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, return ret; } - return extent_fiemap(BTRFS_I(inode), fieinfo, start, len); + btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED); + + /* + * We did an initial flush to avoid holding the inode's lock while + * triggering writeback and waiting for the completion of IO and ordered + * extents. Now after we locked the inode we do it again, because it's + * possible a new write may have happened in between those two steps. + */ + if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) { + ret = btrfs_wait_ordered_range(inode, 0, LLONG_MAX); + if (ret) { + btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED); + return ret; + } + } + + ret = extent_fiemap(btrfs_inode, fieinfo, start, len); + btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED); + + return ret; } static int btrfs_writepages(struct address_space *mapping, -- Gitee From 6cf1f11c3b717e503792540a68e33ed4083a9ecd Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 28 Feb 2024 11:37:56 +0000 Subject: [PATCH 067/268] btrfs: fix race when detecting delalloc ranges during fiemap stable inclusion from stable-6.6.26 commit 49d640d2946c35a17b051d54171a032dd95b0f50 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 978b63f7464abcfd364a6c95f734282c50f3decf ] For fiemap we recently stopped locking the target extent range for the whole duration of the fiemap call, in order to avoid a deadlock in a scenario where the fiemap buffer happens to be a memory mapped range of the same file. This use case is very unlikely to be useful in practice but it may be triggered by fuzz testing (syzbot, etc). This however introduced a race that makes us miss delalloc ranges for file regions that are currently holes, so the caller of fiemap will not be aware that there's data for some file regions. This can be quite serious for some use cases - for example in coreutils versions before 9.0, the cp program used fiemap to detect holes and data in the source file, copying only regions with data (extents or delalloc) from the source file to the destination file in order to preserve holes (see the documentation for its --sparse command line option). This means that if cp was used with a source file that had delalloc in a hole, the destination file could end up without that data, which is effectively a data loss issue, if it happened to hit the race described below. The race happens like this: 1) Fiemap is called, without the FIEMAP_FLAG_SYNC flag, for a file that has delalloc in the file range [64M, 65M[, which is currently a hole; 2) Fiemap locks the inode in shared mode, then starts iterating the inode's subvolume tree searching for file extent items, without having the whole fiemap target range locked in the inode's io tree - the change introduced recently by commit b0ad381fa769 ("btrfs: fix deadlock with fiemap and extent locking"). It only locks ranges in the io tree when it finds a hole or prealloc extent since that commit; 3) Note that fiemap clones each leaf before using it, and this is to avoid deadlocks when locking a file range in the inode's io tree and the fiemap buffer is memory mapped to some file, because writing to the page with btrfs_page_mkwrite() will wait on any ordered extent for the page's range and the ordered extent needs to lock the range and may need to modify the same leaf, therefore leading to a deadlock on the leaf; 4) While iterating the file extent items in the cloned leaf before finding the hole in the range [64M, 65M[, the delalloc in that range is flushed and its ordered extent completes - meaning the corresponding file extent item is in the inode's subvolume tree, but not present in the cloned leaf that fiemap is iterating over; 5) When fiemap finds the hole in the [64M, 65M[ range by seeing the gap in the cloned leaf (or a file extent item with disk_bytenr == 0 in case the NO_HOLES feature is not enabled), it will lock that file range in the inode's io tree and then search for delalloc by checking for the EXTENT_DELALLOC bit in the io tree for that range and ordered extents (with btrfs_find_delalloc_in_range()). But it finds nothing since the delalloc in that range was already flushed and the ordered extent completed and is gone - as a result fiemap will not report that there's delalloc or an extent for the range [64M, 65M[, so user space will be mislead into thinking that there's a hole in that range. This could actually be sporadically triggered with test case generic/094 from fstests, which reports a missing extent/delalloc range like this: # generic/094 2s ... - output mismatch (see /home/fdmanana/git/hub/xfstests/results//generic/094.out.bad) # --- tests/generic/094.out 2020-06-10 19:29:03.830519425 +0100 # +++ /home/fdmanana/git/hub/xfstests/results//generic/094.out.bad 2024-02-28 11:00:00.381071525 +0000 # @@ -1,3 +1,9 @@ # QA output created by 094 # fiemap run with sync # fiemap run without sync # +ERROR: couldn't find extent at 7 # +map is 'HHDDHPPDPHPH' # +logical: [ 5.. 6] phys: 301517.. 301518 flags: 0x800 tot: 2 # +logical: [ 8.. 8] phys: 301520.. 301520 flags: 0x800 tot: 1 # ... # (Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/generic/094.out /home/fdmanana/git/hub/xfstests/results//generic/094.out.bad' to see the entire diff) So in order to fix this, while still avoiding deadlocks in the case where the fiemap buffer is memory mapped to the same file, change fiemap to work like the following: 1) Always lock the whole range in the inode's io tree before starting to iterate the inode's subvolume tree searching for file extent items, just like we did before commit b0ad381fa769 ("btrfs: fix deadlock with fiemap and extent locking"); 2) Now instead of writing to the fiemap buffer every time we have an extent to report, write instead to a temporary buffer (1 page), and when that buffer becomes full, stop iterating the file extent items, unlock the range in the io tree, release the search path, submit all the entries kept in that buffer to the fiemap buffer, and then resume the search for file extent items after locking again the remainder of the range in the io tree. The buffer having a size of a page, allows for 146 entries in a system with 4K pages. This is a large enough value to have a good performance by avoiding too many restarts of the search for file extent items. In other words this preserves the huge performance gains made in the last two years to fiemap, while avoiding the deadlocks in case the fiemap buffer is memory mapped to the same file (useless in practice, but possible and exercised by fuzz testing and syzbot). Fixes: b0ad381fa769 ("btrfs: fix deadlock with fiemap and extent locking") Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- fs/btrfs/extent_io.c | 221 +++++++++++++++++++++++++++++++------------ 1 file changed, 160 insertions(+), 61 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 45d427c3033d..5acb2cb79d4b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2410,12 +2410,65 @@ int try_release_extent_mapping(struct page *page, gfp_t mask) return try_release_extent_state(tree, page, mask); } +struct btrfs_fiemap_entry { + u64 offset; + u64 phys; + u64 len; + u32 flags; +}; + /* - * To cache previous fiemap extent + * Indicate the caller of emit_fiemap_extent() that it needs to unlock the file + * range from the inode's io tree, unlock the subvolume tree search path, flush + * the fiemap cache and relock the file range and research the subvolume tree. + * The value here is something negative that can't be confused with a valid + * errno value and different from 1 because that's also a return value from + * fiemap_fill_next_extent() and also it's often used to mean some btree search + * did not find a key, so make it some distinct negative value. + */ +#define BTRFS_FIEMAP_FLUSH_CACHE (-(MAX_ERRNO + 1)) + +/* + * Used to: + * + * - Cache the next entry to be emitted to the fiemap buffer, so that we can + * merge extents that are contiguous and can be grouped as a single one; * - * Will be used for merging fiemap extent + * - Store extents ready to be written to the fiemap buffer in an intermediary + * buffer. This intermediary buffer is to ensure that in case the fiemap + * buffer is memory mapped to the fiemap target file, we don't deadlock + * during btrfs_page_mkwrite(). This is because during fiemap we are locking + * an extent range in order to prevent races with delalloc flushing and + * ordered extent completion, which is needed in order to reliably detect + * delalloc in holes and prealloc extents. And this can lead to a deadlock + * if the fiemap buffer is memory mapped to the file we are running fiemap + * against (a silly, useless in practice scenario, but possible) because + * btrfs_page_mkwrite() will try to lock the same extent range. */ struct fiemap_cache { + /* An array of ready fiemap entries. */ + struct btrfs_fiemap_entry *entries; + /* Number of entries in the entries array. */ + int entries_size; + /* Index of the next entry in the entries array to write to. */ + int entries_pos; + /* + * Once the entries array is full, this indicates what's the offset for + * the next file extent item we must search for in the inode's subvolume + * tree after unlocking the extent range in the inode's io tree and + * releasing the search path. + */ + u64 next_search_offset; + /* + * This matches struct fiemap_extent_info::fi_mapped_extents, we use it + * to count ourselves emitted extents and stop instead of relying on + * fiemap_fill_next_extent() because we buffer ready fiemap entries at + * the @entries array, and we want to stop as soon as we hit the max + * amount of extents to map, not just to save time but also to make the + * logic at extent_fiemap() simpler. + */ + unsigned int extents_mapped; + /* Fields for the cached extent (unsubmitted, not ready, extent). */ u64 offset; u64 phys; u64 len; @@ -2423,6 +2476,28 @@ struct fiemap_cache { bool cached; }; +static int flush_fiemap_cache(struct fiemap_extent_info *fieinfo, + struct fiemap_cache *cache) +{ + for (int i = 0; i < cache->entries_pos; i++) { + struct btrfs_fiemap_entry *entry = &cache->entries[i]; + int ret; + + ret = fiemap_fill_next_extent(fieinfo, entry->offset, + entry->phys, entry->len, + entry->flags); + /* + * Ignore 1 (reached max entries) because we keep track of that + * ourselves in emit_fiemap_extent(). + */ + if (ret < 0) + return ret; + } + cache->entries_pos = 0; + + return 0; +} + /* * Helper to submit fiemap extent. * @@ -2437,8 +2512,8 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, struct fiemap_cache *cache, u64 offset, u64 phys, u64 len, u32 flags) { + struct btrfs_fiemap_entry *entry; u64 cache_end; - int ret = 0; /* Set at the end of extent_fiemap(). */ ASSERT((flags & FIEMAP_EXTENT_LAST) == 0); @@ -2451,7 +2526,9 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, * find an extent that starts at an offset behind the end offset of the * previous extent we processed. This happens if fiemap is called * without FIEMAP_FLAG_SYNC and there are ordered extents completing - * while we call btrfs_next_leaf() (through fiemap_next_leaf_item()). + * after we had to unlock the file range, release the search path, emit + * the fiemap extents stored in the buffer (cache->entries array) and + * the lock the remainder of the range and re-search the btree. * * For example we are in leaf X processing its last item, which is the * file extent item for file range [512K, 1M[, and after @@ -2564,11 +2641,35 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, emit: /* Not mergeable, need to submit cached one */ - ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, - cache->len, cache->flags); - cache->cached = false; - if (ret) - return ret; + + if (cache->entries_pos == cache->entries_size) { + /* + * We will need to research for the end offset of the last + * stored extent and not from the current offset, because after + * unlocking the range and releasing the path, if there's a hole + * between that end offset and this current offset, a new extent + * may have been inserted due to a new write, so we don't want + * to miss it. + */ + entry = &cache->entries[cache->entries_size - 1]; + cache->next_search_offset = entry->offset + entry->len; + cache->cached = false; + + return BTRFS_FIEMAP_FLUSH_CACHE; + } + + entry = &cache->entries[cache->entries_pos]; + entry->offset = cache->offset; + entry->phys = cache->phys; + entry->len = cache->len; + entry->flags = cache->flags; + cache->entries_pos++; + cache->extents_mapped++; + + if (cache->extents_mapped == fieinfo->fi_extents_max) { + cache->cached = false; + return 1; + } assign: cache->cached = true; cache->offset = offset; @@ -2694,8 +2795,8 @@ static int fiemap_search_slot(struct btrfs_inode *inode, struct btrfs_path *path * neighbour leaf). * We also need the private clone because holding a read lock on an * extent buffer of the subvolume's b+tree will make lockdep unhappy - * when we call fiemap_fill_next_extent(), because that may cause a page - * fault when filling the user space buffer with fiemap data. + * when we check if extents are shared, as backref walking may need to + * lock the same leaf we are processing. */ clone = btrfs_clone_extent_buffer(path->nodes[0]); if (!clone) @@ -2735,34 +2836,16 @@ static int fiemap_process_hole(struct btrfs_inode *inode, * it beyond i_size. */ while (cur_offset < end && cur_offset < i_size) { - struct extent_state *cached_state = NULL; u64 delalloc_start; u64 delalloc_end; u64 prealloc_start; - u64 lockstart; - u64 lockend; u64 prealloc_len = 0; bool delalloc; - lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize); - lockend = round_up(end, inode->root->fs_info->sectorsize); - - /* - * We are only locking for the delalloc range because that's the - * only thing that can change here. With fiemap we have a lock - * on the inode, so no buffered or direct writes can happen. - * - * However mmaps and normal page writeback will cause this to - * change arbitrarily. We have to lock the extent lock here to - * make sure that nobody messes with the tree while we're doing - * btrfs_find_delalloc_in_range. - */ - lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end, delalloc_cached_state, &delalloc_start, &delalloc_end); - unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); if (!delalloc) break; @@ -2930,6 +3013,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { const u64 ino = btrfs_ino(inode); + struct extent_state *cached_state = NULL; struct extent_state *delalloc_cached_state = NULL; struct btrfs_path *path; struct fiemap_cache cache = { 0 }; @@ -2942,26 +3026,33 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, bool stopped = false; int ret; + cache.entries_size = PAGE_SIZE / sizeof(struct btrfs_fiemap_entry); + cache.entries = kmalloc_array(cache.entries_size, + sizeof(struct btrfs_fiemap_entry), + GFP_KERNEL); backref_ctx = btrfs_alloc_backref_share_check_ctx(); path = btrfs_alloc_path(); - if (!backref_ctx || !path) { + if (!cache.entries || !backref_ctx || !path) { ret = -ENOMEM; goto out; } +restart: range_start = round_down(start, sectorsize); range_end = round_up(start + len, sectorsize); prev_extent_end = range_start; + lock_extent(&inode->io_tree, range_start, range_end, &cached_state); + ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end); if (ret < 0) - goto out; + goto out_unlock; btrfs_release_path(path); path->reada = READA_FORWARD; ret = fiemap_search_slot(inode, path, range_start); if (ret < 0) { - goto out; + goto out_unlock; } else if (ret > 0) { /* * No file extent item found, but we may have delalloc between @@ -3008,7 +3099,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, backref_ctx, 0, 0, 0, prev_extent_end, hole_end); if (ret < 0) { - goto out; + goto out_unlock; } else if (ret > 0) { /* fiemap_fill_next_extent() told us to stop. */ stopped = true; @@ -3064,7 +3155,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, extent_gen, backref_ctx); if (ret < 0) - goto out; + goto out_unlock; else if (ret > 0) flags |= FIEMAP_EXTENT_SHARED; } @@ -3075,9 +3166,9 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, } if (ret < 0) { - goto out; + goto out_unlock; } else if (ret > 0) { - /* fiemap_fill_next_extent() told us to stop. */ + /* emit_fiemap_extent() told us to stop. */ stopped = true; break; } @@ -3086,12 +3177,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, next_item: if (fatal_signal_pending(current)) { ret = -EINTR; - goto out; + goto out_unlock; } ret = fiemap_next_leaf_item(inode, path); if (ret < 0) { - goto out; + goto out_unlock; } else if (ret > 0) { /* No more file extent items for this inode. */ break; @@ -3100,22 +3191,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, } check_eof_delalloc: - /* - * Release (and free) the path before emitting any final entries to - * fiemap_fill_next_extent() to keep lockdep happy. This is because - * once we find no more file extent items exist, we may have a - * non-cloned leaf, and fiemap_fill_next_extent() can trigger page - * faults when copying data to the user space buffer. - */ - btrfs_free_path(path); - path = NULL; - if (!stopped && prev_extent_end < range_end) { ret = fiemap_process_hole(inode, fieinfo, &cache, &delalloc_cached_state, backref_ctx, 0, 0, 0, prev_extent_end, range_end - 1); if (ret < 0) - goto out; + goto out_unlock; prev_extent_end = range_end; } @@ -3123,28 +3204,16 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, const u64 i_size = i_size_read(&inode->vfs_inode); if (prev_extent_end < i_size) { - struct extent_state *cached_state = NULL; u64 delalloc_start; u64 delalloc_end; - u64 lockstart; - u64 lockend; bool delalloc; - lockstart = round_down(prev_extent_end, sectorsize); - lockend = round_up(i_size, sectorsize); - - /* - * See the comment in fiemap_process_hole as to why - * we're doing the locking here. - */ - lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); delalloc = btrfs_find_delalloc_in_range(inode, prev_extent_end, i_size - 1, &delalloc_cached_state, &delalloc_start, &delalloc_end); - unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); if (!delalloc) cache.flags |= FIEMAP_EXTENT_LAST; } else { @@ -3152,9 +3221,39 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, } } +out_unlock: + unlock_extent(&inode->io_tree, range_start, range_end, &cached_state); + + if (ret == BTRFS_FIEMAP_FLUSH_CACHE) { + btrfs_release_path(path); + ret = flush_fiemap_cache(fieinfo, &cache); + if (ret) + goto out; + len -= cache.next_search_offset - start; + start = cache.next_search_offset; + goto restart; + } else if (ret < 0) { + goto out; + } + + /* + * Must free the path before emitting to the fiemap buffer because we + * may have a non-cloned leaf and if the fiemap buffer is memory mapped + * to a file, a write into it (through btrfs_page_mkwrite()) may trigger + * waiting for an ordered extent that in order to complete needs to + * modify that leaf, therefore leading to a deadlock. + */ + btrfs_free_path(path); + path = NULL; + + ret = flush_fiemap_cache(fieinfo, &cache); + if (ret) + goto out; + ret = emit_last_fiemap_cache(fieinfo, &cache); out: free_extent_state(delalloc_cached_state); + kfree(cache.entries); btrfs_free_backref_share_ctx(backref_ctx); btrfs_free_path(path); return ret; -- Gitee From 43d5d724a6dfd293718966e6304271a1c4dde7a6 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Tue, 31 Oct 2023 23:30:59 +0100 Subject: [PATCH 068/268] x86/CPU/AMD: Add ZenX generations flags stable inclusion from stable-6.6.26 commit 936e59cb56ea1e7d2dc5635b48f72db300379d75 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 30fa92832f405d5ac9f263e99f62445fa3084008 ] Add X86_FEATURE flags for each Zen generation. They should be used from now on instead of checking f/m/s. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Nikolay Borisov Acked-by: Thomas Gleixner Link: http://lore.kernel.org/r/20231120104152.13740-2-bp@alien8.de Stable-dep-of: c7b2edd8377b ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- arch/x86/include/asm/cpufeatures.h | 5 ++- arch/x86/kernel/cpu/amd.c | 70 +++++++++++++++++++++++++++++- 2 files changed, 72 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index bd33f6366c80..1f9db287165a 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -218,7 +218,7 @@ #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_ZEN (7*32+28) /* "" CPU based on Zen microarchitecture */ +#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU based on Zen microarchitecture */ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ #define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ #define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ @@ -312,6 +312,9 @@ #define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ #define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ #define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ +#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */ +#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */ +#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 031bca974fbf..5391385707b3 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -620,6 +620,49 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) } resctrl_cpu_detect(c); + + /* Figure out Zen generations: */ + switch (c->x86) { + case 0x17: { + switch (c->x86_model) { + case 0x00 ... 0x2f: + case 0x50 ... 0x5f: + setup_force_cpu_cap(X86_FEATURE_ZEN); + break; + case 0x30 ... 0x4f: + case 0x60 ... 0x7f: + case 0x90 ... 0x91: + case 0xa0 ... 0xaf: + setup_force_cpu_cap(X86_FEATURE_ZEN2); + break; + default: + goto warn; + } + break; + } + case 0x19: { + switch (c->x86_model) { + case 0x00 ... 0x0f: + case 0x20 ... 0x5f: + setup_force_cpu_cap(X86_FEATURE_ZEN3); + break; + case 0x10 ... 0x1f: + case 0x60 ... 0xaf: + setup_force_cpu_cap(X86_FEATURE_ZEN4); + break; + default: + goto warn; + } + break; + } + default: + break; + } + + return; + +warn: + WARN_ONCE(1, "Family 0x%x, model: 0x%x??\n", c->x86, c->x86_model); } static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) @@ -978,8 +1021,6 @@ void init_spectral_chicken(struct cpuinfo_x86 *c) static void init_amd_zn(struct cpuinfo_x86 *c) { - set_cpu_cap(c, X86_FEATURE_ZEN); - #ifdef CONFIG_NUMA node_reclaim_distance = 32; #endif @@ -1042,6 +1083,22 @@ static void zenbleed_check(struct cpuinfo_x86 *c) } } +static void init_amd_zen(struct cpuinfo_x86 *c) +{ +} + +static void init_amd_zen2(struct cpuinfo_x86 *c) +{ +} + +static void init_amd_zen3(struct cpuinfo_x86 *c) +{ +} + +static void init_amd_zen4(struct cpuinfo_x86 *c) +{ +} + static void init_amd(struct cpuinfo_x86 *c) { early_init_amd(c); @@ -1080,6 +1137,15 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x19: init_amd_zn(c); break; } + if (boot_cpu_has(X86_FEATURE_ZEN)) + init_amd_zen(c); + else if (boot_cpu_has(X86_FEATURE_ZEN2)) + init_amd_zen2(c); + else if (boot_cpu_has(X86_FEATURE_ZEN3)) + init_amd_zen3(c); + else if (boot_cpu_has(X86_FEATURE_ZEN4)) + init_amd_zen4(c); + /* * Enable workaround for FXSAVE leak on CPUs * without a XSaveErPtr feature -- Gitee From c3dea0036bc02e0a5f237e8876093cd055f1e5f5 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 1 Nov 2023 11:14:59 +0100 Subject: [PATCH 069/268] x86/CPU/AMD: Carve out the erratum 1386 fix stable inclusion from stable-6.6.26 commit eae590201d4a7f9aa64f31db4c3074fb1d6368a6 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit a7c32a1ae9ee43abfe884f5af376877c4301d166 ] Call it on the affected CPU generations. No functional changes. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Nikolay Borisov Link: http://lore.kernel.org/r/20231120104152.13740-3-bp@alien8.de Stable-dep-of: c7b2edd8377b ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- arch/x86/kernel/cpu/amd.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 5391385707b3..28c3a1045b06 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -988,6 +988,19 @@ static void init_amd_bd(struct cpuinfo_x86 *c) clear_rdrand_cpuid_bit(c); } +static void fix_erratum_1386(struct cpuinfo_x86 *c) +{ + /* + * Work around Erratum 1386. The XSAVES instruction malfunctions in + * certain circumstances on Zen1/2 uarch, and not all parts have had + * updated microcode at the time of writing (March 2023). + * + * Affected parts all have no supervisor XSAVE states, meaning that + * the XSAVEC instruction (which works fine) is equivalent. + */ + clear_cpu_cap(c, X86_FEATURE_XSAVES); +} + void init_spectral_chicken(struct cpuinfo_x86 *c) { #ifdef CONFIG_CPU_UNRET_ENTRY @@ -1008,15 +1021,6 @@ void init_spectral_chicken(struct cpuinfo_x86 *c) } } #endif - /* - * Work around Erratum 1386. The XSAVES instruction malfunctions in - * certain circumstances on Zen1/2 uarch, and not all parts have had - * updated microcode at the time of writing (March 2023). - * - * Affected parts all have no supervisor XSAVE states, meaning that - * the XSAVEC instruction (which works fine) is equivalent. - */ - clear_cpu_cap(c, X86_FEATURE_XSAVES); } static void init_amd_zn(struct cpuinfo_x86 *c) @@ -1085,10 +1089,12 @@ static void zenbleed_check(struct cpuinfo_x86 *c) static void init_amd_zen(struct cpuinfo_x86 *c) { + fix_erratum_1386(c); } static void init_amd_zen2(struct cpuinfo_x86 *c) { + fix_erratum_1386(c); } static void init_amd_zen3(struct cpuinfo_x86 *c) -- Gitee From 33659541d6f1e4677895b35374fd92b8dff96a92 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 1 Nov 2023 12:31:44 +0100 Subject: [PATCH 070/268] x86/CPU/AMD: Move erratum 1076 fix into the Zen1 init function stable inclusion from stable-6.6.26 commit 2577e2a7cae00c786d49ac5ae08d668174d3f38f category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 0da91912fc150d6d321b15e648bead202ced1a27 ] No functional changes. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Nikolay Borisov Link: http://lore.kernel.org/r/20231120104152.13740-5-bp@alien8.de Stable-dep-of: c7b2edd8377b ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- arch/x86/kernel/cpu/amd.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 28c3a1045b06..71503181bffd 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1028,6 +1028,11 @@ static void init_amd_zn(struct cpuinfo_x86 *c) #ifdef CONFIG_NUMA node_reclaim_distance = 32; #endif +} + +static void init_amd_zen(struct cpuinfo_x86 *c) +{ + fix_erratum_1386(c); /* Fix up CPUID bits, but only if not virtualised. */ if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) { @@ -1087,11 +1092,6 @@ static void zenbleed_check(struct cpuinfo_x86 *c) } } -static void init_amd_zen(struct cpuinfo_x86 *c) -{ - fix_erratum_1386(c); -} - static void init_amd_zen2(struct cpuinfo_x86 *c) { fix_erratum_1386(c); -- Gitee From cba776e2674d0be710cd57df989427079c0feee9 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 1 Nov 2023 12:38:35 +0100 Subject: [PATCH 071/268] x86/CPU/AMD: Move Zenbleed check to the Zen2 init function stable inclusion from stable-6.6.26 commit 702a65272da64980ca2dc9bd8e26e260ad276ac7 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit f69759be251dce722942594fbc62e53a40822a82 ] Prefix it properly so that it is clear which generation it is dealing with. No functional changes. Signed-off-by: Borislav Petkov (AMD) Link: http://lore.kernel.org/r/20231120104152.13740-8-bp@alien8.de Stable-dep-of: c7b2edd8377b ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- arch/x86/kernel/cpu/amd.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 71503181bffd..d8a0dc01a7db 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -70,12 +70,6 @@ static const int amd_erratum_383[] = static const int amd_erratum_1054[] = AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf)); -static const int amd_zenbleed[] = - AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x30, 0x0, 0x4f, 0xf), - AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf), - AMD_MODEL_RANGE(0x17, 0x90, 0x0, 0x91, 0xf), - AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf)); - static const int amd_div0[] = AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf), AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf)); @@ -1073,11 +1067,8 @@ static bool cpu_has_zenbleed_microcode(void) return true; } -static void zenbleed_check(struct cpuinfo_x86 *c) +static void zen2_zenbleed_check(struct cpuinfo_x86 *c) { - if (!cpu_has_amd_erratum(c, amd_zenbleed)) - return; - if (cpu_has(c, X86_FEATURE_HYPERVISOR)) return; @@ -1095,6 +1086,7 @@ static void zenbleed_check(struct cpuinfo_x86 *c) static void init_amd_zen2(struct cpuinfo_x86 *c) { fix_erratum_1386(c); + zen2_zenbleed_check(c); } static void init_amd_zen3(struct cpuinfo_x86 *c) @@ -1219,8 +1211,6 @@ static void init_amd(struct cpuinfo_x86 *c) cpu_has(c, X86_FEATURE_AUTOIBRS)) WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS)); - zenbleed_check(c); - if (cpu_has_amd_erratum(c, amd_div0)) { pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n"); setup_force_cpu_bug(X86_BUG_DIV0); @@ -1385,7 +1375,7 @@ static void zenbleed_check_cpu(void *unused) { struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); - zenbleed_check(c); + zen2_zenbleed_check(c); } void amd_check_microcode(void) -- Gitee From 02149e033df9341291657acebbb0bb67476375a5 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 1 Nov 2023 12:52:01 +0100 Subject: [PATCH 072/268] x86/CPU/AMD: Move the DIV0 bug detection to the Zen1 init function stable inclusion from stable-6.6.26 commit 82454981660945fc7022c799c21e9811b0ebfb96 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit bfff3c6692ce64fa9d86eb829d18229c307a0855 ] No functional changes. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Nikolay Borisov Link: http://lore.kernel.org/r/20231120104152.13740-9-bp@alien8.de Stable-dep-of: c7b2edd8377b ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- arch/x86/kernel/cpu/amd.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index d8a0dc01a7db..f4373530c7de 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -70,10 +70,6 @@ static const int amd_erratum_383[] = static const int amd_erratum_1054[] = AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf)); -static const int amd_div0[] = - AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf), - AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf)); - static const int amd_erratum_1485[] = AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf), AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf)); @@ -1043,6 +1039,9 @@ static void init_amd_zen(struct cpuinfo_x86 *c) if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO)) set_cpu_cap(c, X86_FEATURE_BTC_NO); } + + pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n"); + setup_force_cpu_bug(X86_BUG_DIV0); } static bool cpu_has_zenbleed_microcode(void) @@ -1211,11 +1210,6 @@ static void init_amd(struct cpuinfo_x86 *c) cpu_has(c, X86_FEATURE_AUTOIBRS)) WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS)); - if (cpu_has_amd_erratum(c, amd_div0)) { - pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n"); - setup_force_cpu_bug(X86_BUG_DIV0); - } - if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has_amd_erratum(c, amd_erratum_1485)) msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT); -- Gitee From 28716c868ffde7b33a07c98aeebdcc9a0aa7130d Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 3 Nov 2023 19:53:49 +0100 Subject: [PATCH 073/268] x86/CPU/AMD: Get rid of amd_erratum_1054[] stable inclusion from stable-6.6.26 commit 54273025be0cffcef3964d5ebdc64aa678bdb684 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 54c33e23f75d5c9925495231c57d3319336722ef ] No functional changes. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Nikolay Borisov Link: http://lore.kernel.org/r/20231120104152.13740-10-bp@alien8.de Stable-dep-of: c7b2edd8377b ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- arch/x86/kernel/cpu/amd.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f4373530c7de..98fa23ef97df 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -66,10 +66,6 @@ static const int amd_erratum_400[] = static const int amd_erratum_383[] = AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); -/* #1054: Instructions Retired Performance Counter May Be Inaccurate */ -static const int amd_erratum_1054[] = - AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf)); - static const int amd_erratum_1485[] = AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf), AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf)); @@ -1194,7 +1190,7 @@ static void init_amd(struct cpuinfo_x86 *c) * Counter May Be Inaccurate". */ if (cpu_has(c, X86_FEATURE_IRPERF) && - !cpu_has_amd_erratum(c, amd_erratum_1054)) + (boot_cpu_has(X86_FEATURE_ZEN) && c->x86_model > 0x2f)) msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); check_null_seg_clears_base(c); -- Gitee From 86e62c65a2c9c957f9420c1a17dc393340aad722 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Sat, 2 Dec 2023 12:50:23 +0100 Subject: [PATCH 074/268] x86/CPU/AMD: Add X86_FEATURE_ZEN1 stable inclusion from stable-6.6.26 commit d2be2f872fe7ba865a37bb50d5def771e9cc1901 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 232afb557835d6f6859c73bf610bad308c96b131 ] Add a synthetic feature flag specifically for first generation Zen machines. There's need to have a generic flag for all Zen generations so make X86_FEATURE_ZEN be that flag. Fixes: 30fa92832f40 ("x86/CPU/AMD: Add ZenX generations flags") Suggested-by: Brian Gerst Suggested-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/dc3835e3-0731-4230-bbb9-336bbe3d042b@amd.com Stable-dep-of: c7b2edd8377b ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- arch/x86/include/asm/cpufeatures.h | 3 ++- arch/x86/kernel/cpu/amd.c | 11 ++++++----- tools/arch/x86/include/asm/cpufeatures.h | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 1f9db287165a..bc66aec9139e 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -218,7 +218,7 @@ #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU based on Zen microarchitecture */ +#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ #define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ #define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ @@ -315,6 +315,7 @@ #define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */ #define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */ #define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */ +#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 98fa23ef97df..9fd91022d92d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -613,7 +613,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) switch (c->x86_model) { case 0x00 ... 0x2f: case 0x50 ... 0x5f: - setup_force_cpu_cap(X86_FEATURE_ZEN); + setup_force_cpu_cap(X86_FEATURE_ZEN1); break; case 0x30 ... 0x4f: case 0x60 ... 0x7f: @@ -1011,12 +1011,13 @@ void init_spectral_chicken(struct cpuinfo_x86 *c) static void init_amd_zn(struct cpuinfo_x86 *c) { + setup_force_cpu_cap(X86_FEATURE_ZEN); #ifdef CONFIG_NUMA node_reclaim_distance = 32; #endif } -static void init_amd_zen(struct cpuinfo_x86 *c) +static void init_amd_zen1(struct cpuinfo_x86 *c) { fix_erratum_1386(c); @@ -1130,8 +1131,8 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x19: init_amd_zn(c); break; } - if (boot_cpu_has(X86_FEATURE_ZEN)) - init_amd_zen(c); + if (boot_cpu_has(X86_FEATURE_ZEN1)) + init_amd_zen1(c); else if (boot_cpu_has(X86_FEATURE_ZEN2)) init_amd_zen2(c); else if (boot_cpu_has(X86_FEATURE_ZEN3)) @@ -1190,7 +1191,7 @@ static void init_amd(struct cpuinfo_x86 *c) * Counter May Be Inaccurate". */ if (cpu_has(c, X86_FEATURE_IRPERF) && - (boot_cpu_has(X86_FEATURE_ZEN) && c->x86_model > 0x2f)) + (boot_cpu_has(X86_FEATURE_ZEN1) && c->x86_model > 0x2f)) msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); check_null_seg_clears_base(c); diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 798e60b5454b..845a4023ba44 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -219,7 +219,7 @@ #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_ZEN (7*32+28) /* "" CPU based on Zen microarchitecture */ +#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ #define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ #define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ -- Gitee From f596b9dfdab7d8922228005acba4663fe9a277c6 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 25 Mar 2024 13:17:53 +0530 Subject: [PATCH 075/268] perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later stable inclusion from stable-6.6.26 commit 69fe5f177ad39c0ed2186e2391cbee1908096616 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit c7b2edd8377be983442c1344cb940cd2ac21b601 ] AMD processors based on Zen 2 and later microarchitectures do not support PMCx087 (instruction pipe stalls) which is used as the backing event for "stalled-cycles-frontend" and "stalled-cycles-backend". Use PMCx0A9 (cycles where micro-op queue is empty) instead to count frontend stalls and remove the entry for backend stalls since there is no direct replacement. Signed-off-by: Sandipan Das Signed-off-by: Ingo Molnar Reviewed-by: Ian Rogers Fixes: 3fe3331bb285 ("perf/x86/amd: Add event map for AMD Family 17h") Link: https://lore.kernel.org/r/03d7fc8fa2a28f9be732116009025bdec1b3ec97.1711352180.git.sandipan.das@amd.com Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- arch/x86/events/amd/core.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 5365d6acbf09..b30349eeb767 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = /* * AMD Performance Monitor Family 17h and later: */ -static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = +static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, @@ -262,10 +262,24 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187, }; +static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9, +}; + static u64 amd_pmu_event_map(int hw_event) { - if (boot_cpu_data.x86 >= 0x17) - return amd_f17h_perfmon_event_map[hw_event]; + if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19) + return amd_zen2_perfmon_event_map[hw_event]; + + if (cpu_feature_enabled(X86_FEATURE_ZEN1)) + return amd_zen1_perfmon_event_map[hw_event]; return amd_perfmon_event_map[hw_event]; } -- Gitee From ed0b78acf50c90d2d366d8ae3f810d5c6da4eb59 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 25 Mar 2024 13:01:44 +0530 Subject: [PATCH 076/268] x86/cpufeatures: Add new word for scattered features stable inclusion from stable-6.6.26 commit 56e7373f9a67843caa6cf14fd8a6805aa41dfa11 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 7f274e609f3d5f45c22b1dd59053f6764458b492 ] Add a new word for scattered features because all free bits among the existing Linux-defined auxiliary flags have been exhausted. Signed-off-by: Sandipan Das Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/8380d2a0da469a1f0ad75b8954a79fb689599ff6.1711091584.git.sandipan.das@amd.com Stable-dep-of: 598c2fafc06f ("perf/x86/amd/lbr: Use freeze based on availability") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- arch/x86/include/asm/cpufeature.h | 6 ++++-- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/include/asm/disabled-features.h | 3 ++- arch/x86/include/asm/required-features.h | 3 ++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index a1273698fc43..42157ddcc09d 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -91,8 +91,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 21)) + BUILD_BUG_ON_ZERO(NCAPINTS != 22)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -116,8 +117,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 21)) + BUILD_BUG_ON_ZERO(NCAPINTS != 22)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index bc66aec9139e..a42db7bbe593 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 21 /* N 32-bit words worth of info */ +#define NCAPINTS 22 /* N 32-bit words worth of info */ #define NBUGINTS 2 /* N 32-bit bug flags */ /* diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 702d93fdd10e..88fcf08458d9 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -143,6 +143,7 @@ #define DISABLED_MASK18 (DISABLE_IBT) #define DISABLED_MASK19 0 #define DISABLED_MASK20 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) +#define DISABLED_MASK21 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 7ba1726b71c7..e9187ddd3d1f 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -99,6 +99,7 @@ #define REQUIRED_MASK18 0 #define REQUIRED_MASK19 0 #define REQUIRED_MASK20 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) +#define REQUIRED_MASK21 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ -- Gitee From e44f6d4a5e51df57f674100fb73c630321272de1 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 25 Mar 2024 13:01:45 +0530 Subject: [PATCH 077/268] perf/x86/amd/lbr: Use freeze based on availability stable inclusion from stable-6.6.26 commit 55ed6c477872bac7be7e688a6c3af57654ee0049 category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 598c2fafc06fe5c56a1a415fb7b544b31453d637 ] Currently, the LBR code assumes that LBR Freeze is supported on all processors when X86_FEATURE_AMD_LBR_V2 is available i.e. CPUID leaf 0x80000022[EAX] bit 1 is set. This is incorrect as the availability of the feature is additionally dependent on CPUID leaf 0x80000022[EAX] bit 2 being set, which may not be set for all Zen 4 processors. Define a new feature bit for LBR and PMC freeze and set the freeze enable bit (FLBRI) in DebugCtl (MSR 0x1d9) conditionally. It should still be possible to use LBR without freeze for profile-guided optimization of user programs by using an user-only branch filter during profiling. When the user-only filter is enabled, branches are no longer recorded after the transition to CPL 0 upon PMI arrival. When branch entries are read in the PMI handler, the branch stack does not change. E.g. $ perf record -j any,u -e ex_ret_brn_tkn ./workload Since the feature bit is visible under flags in /proc/cpuinfo, it can be used to determine the feasibility of use-cases which require LBR Freeze to be supported by the hardware such as profile-guided optimization of kernels. Fixes: ca5b7c0d9621 ("perf/x86/amd/lbr: Add LbrExtV2 branch record support") Signed-off-by: Sandipan Das Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/69a453c97cfd11c6f2584b19f937fe6df741510f.1711091584.git.sandipan.das@amd.com Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- arch/x86/events/amd/core.c | 4 ++-- arch/x86/events/amd/lbr.c | 16 ++++++++++------ arch/x86/include/asm/cpufeatures.h | 8 ++++++++ arch/x86/kernel/cpu/scattered.c | 1 + 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index b30349eeb767..8ed10366c4a2 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -918,8 +918,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (!status) goto done; - /* Read branch records before unfreezing */ - if (status & GLOBAL_STATUS_LBRS_FROZEN) { + /* Read branch records */ + if (x86_pmu.lbr_nr) { amd_pmu_lbr_read(); status &= ~GLOBAL_STATUS_LBRS_FROZEN; } diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c index eb31f850841a..110e34c59643 100644 --- a/arch/x86/events/amd/lbr.c +++ b/arch/x86/events/amd/lbr.c @@ -400,10 +400,12 @@ void amd_pmu_lbr_enable_all(void) wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select); } - rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); - rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); + if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + } - wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN); } @@ -416,10 +418,12 @@ void amd_pmu_lbr_disable_all(void) return; rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); - rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); - wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); - wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + + if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + } } __init int amd_pmu_lbr_init(void) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index a42db7bbe593..9b2b99670d36 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -456,6 +456,14 @@ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ +/* + * Extended auxiliary flags: Linux defined - for features scattered in various + * CPUID levels like 0x80000022, etc. + * + * Reuse free bits when adding new feature flags! + */ +#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ + /* * BUG word(s) */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 0dad49a09b7a..a515328d9d7d 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -49,6 +49,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, + { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 }, { 0, 0, 0, 0, 0 } }; -- Gitee From c9109dadc33e0510b9c32178fd91022d9fdba146 Mon Sep 17 00:00:00 2001 From: Jack Brennen Date: Tue, 26 Sep 2023 08:40:44 -0400 Subject: [PATCH 078/268] modpost: Optimize symbol search from linear to binary search stable inclusion from stable-6.6.26 commit 2bc92c61c5417982e7b92ba628281f411d31013c category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 4074532758c5c367d3fcb8d124150824a254659d ] Modify modpost to use binary search for converting addresses back into symbol references. Previously it used linear search. This change saves a few seconds of wall time for defconfig builds, but can save several minutes on allyesconfigs. Before: $ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error" $ time scripts/mod/modpost -M -m -a -N -o vmlinux.symvers vmlinux.o 198.38user 1.27system 3:19.71elapsed After: $ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error" $ time scripts/mod/modpost -M -m -a -N -o vmlinux.symvers vmlinux.o 11.91user 0.85system 0:12.78elapsed Signed-off-by: Jack Brennen Tested-by: Nick Desaulniers Signed-off-by: Masahiro Yamada Stable-dep-of: 1102f9f85bf6 ("modpost: do not make find_tosym() return NULL") Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi create mode 100644 scripts/mod/symsearch.c Signed-off-by: zhangshuqi --- scripts/mod/Makefile | 4 +- scripts/mod/modpost.c | 70 ++------------ scripts/mod/modpost.h | 25 +++++ scripts/mod/symsearch.c | 199 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 232 insertions(+), 66 deletions(-) create mode 100644 scripts/mod/symsearch.c diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile index c9e38ad937fd..3c54125eb373 100644 --- a/scripts/mod/Makefile +++ b/scripts/mod/Makefile @@ -5,7 +5,7 @@ CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO) hostprogs-always-y += modpost mk_elfconfig always-y += empty.o -modpost-objs := modpost.o file2alias.o sumversion.o +modpost-objs := modpost.o file2alias.o sumversion.o symsearch.o devicetable-offsets-file := devicetable-offsets.h @@ -16,7 +16,7 @@ targets += $(devicetable-offsets-file) devicetable-offsets.s # dependencies on generated files need to be listed explicitly -$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h +$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h $(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file) quiet_cmd_elfconfig = MKELF $@ diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 5191fdbd3fa2..66589fb4e9ae 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -22,7 +22,6 @@ #include #include "modpost.h" #include "../../include/linux/license.h" -#include "../../include/linux/module_symbol.h" static bool module_enabled; /* Are we using CONFIG_MODVERSIONS? */ @@ -577,11 +576,14 @@ static int parse_elf(struct elf_info *info, const char *filename) *p = TO_NATIVE(*p); } + symsearch_init(info); + return 1; } static void parse_elf_finish(struct elf_info *info) { + symsearch_finish(info); release_file(info->hdr, info->size); } @@ -1042,71 +1044,10 @@ static int secref_whitelist(const char *fromsec, const char *fromsym, return 1; } -/* - * If there's no name there, ignore it; likewise, ignore it if it's - * one of the magic symbols emitted used by current tools. - * - * Otherwise if find_symbols_between() returns those symbols, they'll - * fail the whitelist tests and cause lots of false alarms ... fixable - * only by merging __exit and __init sections into __text, bloating - * the kernel (which is especially evil on embedded platforms). - */ -static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym) -{ - const char *name = elf->strtab + sym->st_name; - - if (!name || !strlen(name)) - return 0; - return !is_mapping_symbol(name); -} - -/* Look up the nearest symbol based on the section and the address */ -static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr, - unsigned int secndx, bool allow_negative, - Elf_Addr min_distance) -{ - Elf_Sym *sym; - Elf_Sym *near = NULL; - Elf_Addr sym_addr, distance; - bool is_arm = (elf->hdr->e_machine == EM_ARM); - - for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) { - if (get_secindex(elf, sym) != secndx) - continue; - if (!is_valid_name(elf, sym)) - continue; - - sym_addr = sym->st_value; - - /* - * For ARM Thumb instruction, the bit 0 of st_value is set - * if the symbol is STT_FUNC type. Mask it to get the address. - */ - if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC) - sym_addr &= ~1; - - if (addr >= sym_addr) - distance = addr - sym_addr; - else if (allow_negative) - distance = sym_addr - addr; - else - continue; - - if (distance <= min_distance) { - min_distance = distance; - near = sym; - } - - if (min_distance == 0) - break; - } - return near; -} - static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr, unsigned int secndx) { - return find_nearest_sym(elf, addr, secndx, false, ~0); + return symsearch_find_nearest(elf, addr, secndx, false, ~0); } static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym) @@ -1119,7 +1060,8 @@ static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym) * Strive to find a better symbol name, but the resulting name may not * match the symbol referenced in the original code. */ - return find_nearest_sym(elf, addr, get_secindex(elf, sym), true, 20); + return symsearch_find_nearest(elf, addr, get_secindex(elf, sym), + true, 20); } static bool is_executable_section(struct elf_info *elf, unsigned int secndx) diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h index 5f94c2c9f2d9..6413f26fcb6b 100644 --- a/scripts/mod/modpost.h +++ b/scripts/mod/modpost.h @@ -10,6 +10,7 @@ #include #include #include +#include "../../include/linux/module_symbol.h" #include "list.h" #include "elfconfig.h" @@ -128,6 +129,8 @@ struct elf_info { * take shndx from symtab_shndx_start[N] instead */ Elf32_Word *symtab_shndx_start; Elf32_Word *symtab_shndx_stop; + + struct symsearch *symsearch; }; /* Accessor for sym->st_shndx, hides ugliness of "64k sections" */ @@ -154,6 +157,28 @@ static inline unsigned int get_secindex(const struct elf_info *info, return index; } +/* + * If there's no name there, ignore it; likewise, ignore it if it's + * one of the magic symbols emitted used by current tools. + * + * Internal symbols created by tools should be ignored by modpost. + */ +static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym) +{ + const char *name = elf->strtab + sym->st_name; + + if (!name || !strlen(name)) + return 0; + return !is_mapping_symbol(name); +} + +/* symsearch.c */ +void symsearch_init(struct elf_info *elf); +void symsearch_finish(struct elf_info *elf); +Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr, + unsigned int secndx, bool allow_negative, + Elf_Addr min_distance); + /* file2alias.c */ void handle_moddevtable(struct module *mod, struct elf_info *info, Elf_Sym *sym, const char *symname); diff --git a/scripts/mod/symsearch.c b/scripts/mod/symsearch.c new file mode 100644 index 000000000000..aa4ed51f9960 --- /dev/null +++ b/scripts/mod/symsearch.c @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Helper functions for finding the symbol in an ELF which is "nearest" + * to a given address. + */ + +#include "modpost.h" + +struct syminfo { + unsigned int symbol_index; + unsigned int section_index; + Elf_Addr addr; +}; + +/* + * Container used to hold an entire binary search table. + * Entries in table are ascending, sorted first by section_index, + * then by addr, and last by symbol_index. The sorting by + * symbol_index is used to ensure predictable behavior when + * multiple symbols are present with the same address; all + * symbols past the first are effectively ignored, by eliding + * them in symsearch_fixup(). + */ +struct symsearch { + unsigned int table_size; + struct syminfo table[]; +}; + +static int syminfo_compare(const void *s1, const void *s2) +{ + const struct syminfo *sym1 = s1; + const struct syminfo *sym2 = s2; + + if (sym1->section_index > sym2->section_index) + return 1; + if (sym1->section_index < sym2->section_index) + return -1; + if (sym1->addr > sym2->addr) + return 1; + if (sym1->addr < sym2->addr) + return -1; + if (sym1->symbol_index > sym2->symbol_index) + return 1; + if (sym1->symbol_index < sym2->symbol_index) + return -1; + return 0; +} + +static unsigned int symbol_count(struct elf_info *elf) +{ + unsigned int result = 0; + + for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) { + if (is_valid_name(elf, sym)) + result++; + } + return result; +} + +/* + * Populate the search array that we just allocated. + * Be slightly paranoid here. The ELF file is mmap'd and could + * conceivably change between symbol_count() and symsearch_populate(). + * If we notice any difference, bail out rather than potentially + * propagating errors or crashing. + */ +static void symsearch_populate(struct elf_info *elf, + struct syminfo *table, + unsigned int table_size) +{ + bool is_arm = (elf->hdr->e_machine == EM_ARM); + + for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) { + if (is_valid_name(elf, sym)) { + if (table_size-- == 0) + fatal("%s: size mismatch\n", __func__); + table->symbol_index = sym - elf->symtab_start; + table->section_index = get_secindex(elf, sym); + table->addr = sym->st_value; + + /* + * For ARM Thumb instruction, the bit 0 of st_value is + * set if the symbol is STT_FUNC type. Mask it to get + * the address. + */ + if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC) + table->addr &= ~1; + + table++; + } + } + + if (table_size != 0) + fatal("%s: size mismatch\n", __func__); +} + +/* + * Do any fixups on the table after sorting. + * For now, this just finds adjacent entries which have + * the same section_index and addr, and it propagates + * the first symbol_index over the subsequent entries, + * so that only one symbol_index is seen for any given + * section_index and addr. This ensures that whether + * we're looking at an address from "above" or "below" + * that we see the same symbol_index. + * This does leave some duplicate entries in the table; + * in practice, these are a small fraction of the + * total number of entries, and they are harmless to + * the binary search algorithm other than a few occasional + * unnecessary comparisons. + */ +static void symsearch_fixup(struct syminfo *table, unsigned int table_size) +{ + /* Don't look at index 0, it will never change. */ + for (unsigned int i = 1; i < table_size; i++) { + if (table[i].addr == table[i - 1].addr && + table[i].section_index == table[i - 1].section_index) { + table[i].symbol_index = table[i - 1].symbol_index; + } + } +} + +void symsearch_init(struct elf_info *elf) +{ + unsigned int table_size = symbol_count(elf); + + elf->symsearch = NOFAIL(malloc(sizeof(struct symsearch) + + sizeof(struct syminfo) * table_size)); + elf->symsearch->table_size = table_size; + + symsearch_populate(elf, elf->symsearch->table, table_size); + qsort(elf->symsearch->table, table_size, + sizeof(struct syminfo), syminfo_compare); + + symsearch_fixup(elf->symsearch->table, table_size); +} + +void symsearch_finish(struct elf_info *elf) +{ + free(elf->symsearch); + elf->symsearch = NULL; +} + +/* + * Find the syminfo which is in secndx and "nearest" to addr. + * allow_negative: allow returning a symbol whose address is > addr. + * min_distance: ignore symbols which are further away than this. + * + * Returns a pointer into the symbol table for success. + * Returns NULL if no legal symbol is found within the requested range. + */ +Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr, + unsigned int secndx, bool allow_negative, + Elf_Addr min_distance) +{ + unsigned int hi = elf->symsearch->table_size; + unsigned int lo = 0; + struct syminfo *table = elf->symsearch->table; + struct syminfo target; + + target.addr = addr; + target.section_index = secndx; + target.symbol_index = ~0; /* compares greater than any actual index */ + while (hi > lo) { + unsigned int mid = lo + (hi - lo) / 2; /* Avoids overflow */ + + if (syminfo_compare(&table[mid], &target) > 0) + hi = mid; + else + lo = mid + 1; + } + + /* + * table[hi], if it exists, is the first entry in the array which + * lies beyond target. table[hi - 1], if it exists, is the last + * entry in the array which comes before target, including the + * case where it perfectly matches the section and the address. + * + * Note -- if the address we're looking up falls perfectly + * in the middle of two symbols, this is written to always + * prefer the symbol with the lower address. + */ + Elf_Sym *result = NULL; + + if (allow_negative && + hi < elf->symsearch->table_size && + table[hi].section_index == secndx && + table[hi].addr - addr <= min_distance) { + min_distance = table[hi].addr - addr; + result = &elf->symtab_start[table[hi].symbol_index]; + } + if (hi > 0 && + table[hi - 1].section_index == secndx && + addr - table[hi - 1].addr <= min_distance) { + result = &elf->symtab_start[table[hi - 1].symbol_index]; + } + return result; +} -- Gitee From 44a529753ee1b479865c5b8a467951f582097bc2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 23 Mar 2024 20:45:11 +0900 Subject: [PATCH 079/268] modpost: do not make find_tosym() return NULL stable inclusion from stable-6.6.26 commit a2671601fa020a124fe9c7d181bf9c9faf17011a category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 1102f9f85bf66b1a7bd6a40afb40efbbe05dfc05 ] As mentioned in commit 397586506c3d ("modpost: Add '.ltext' and '.ltext.*' to TEXT_SECTIONS"), modpost can result in a segmentation fault due to a NULL pointer dereference in default_mismatch_handler(). find_tosym() can return the original symbol pointer instead of NULL if a better one is not found. This fixes the reported segmentation fault. Fixes: a23e7584ecf3 ("modpost: unify 'sym' and 'to' in default_mismatch_handler()") Reported-by: Nathan Chancellor Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- scripts/mod/modpost.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 66589fb4e9ae..7d53942445d7 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1052,6 +1052,8 @@ static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr, static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym) { + Elf_Sym *new_sym; + /* If the supplied symbol has a valid name, return it */ if (is_valid_name(elf, sym)) return sym; @@ -1060,8 +1062,9 @@ static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym) * Strive to find a better symbol name, but the resulting name may not * match the symbol referenced in the original code. */ - return symsearch_find_nearest(elf, addr, get_secindex(elf, sym), - true, 20); + new_sym = symsearch_find_nearest(elf, addr, get_secindex(elf, sym), + true, 20); + return new_sym ? new_sym : sym; } static bool is_executable_section(struct elf_info *elf, unsigned int secndx) -- Gitee From 4b0aacd4b28d611d70b5b55c032ce360f833aa3d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 25 Mar 2024 10:02:42 +0100 Subject: [PATCH 080/268] gpio: cdev: sanitize the label before requesting the interrupt stable inclusion from stable-6.6.26 commit 21bc9b158983992b0709222c60f4bc749919a93d category: bugfix issue: #IAZXS9 CVE: NA Signed-off-by: zhangshuqi --------------------------------------- commit b34490879baa847d16fc529c8ea6e6d34f004b38 upstream. When an interrupt is requested, a procfs directory is created under "/proc/irq//