diff --git a/Kconfig b/Kconfig index 745bc773f567067a85ce6574fb41ce80833247d9..4a96e16e5f31619cb3549db8de54eaa310fa9d92 100644 --- a/Kconfig +++ b/Kconfig @@ -29,4 +29,6 @@ source "lib/Kconfig" source "lib/Kconfig.debug" +source "lib/Kconfig.openeuler" + source "Documentation/Kconfig" diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 2002fe8d9ea399d3dba2852bb244d180aa6b0d83..91577cbfb2f0a6751ff06ae6001ad0169be2c597 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -1036,6 +1036,7 @@ CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE=y CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y CONFIG_PID_MAX_PER_NAMESPACE=y CONFIG_FREEZER=y +CONFIG_KWORKER_NUMA_AFFINITY=y # # Executable file formats diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 627a08ecbf4adfea6d2b76ddf8a4e7f647c752b6..987a13ca8d5fa25330462a42da1042e8bb19947f 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -1033,6 +1033,7 @@ CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE=y CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y CONFIG_PID_MAX_PER_NAMESPACE=y CONFIG_FREEZER=y +CONFIG_KWORKER_NUMA_AFFINITY=y # # Executable file formats diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 8e14cea15f980829e99afa2c43bf6872fcfd965c..f7ae9de005ec366801c9efb055c8be149388b92a 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -170,6 +170,9 @@ static void iscsi_sw_tcp_data_ready(struct sock *sk) struct iscsi_sw_tcp_conn *tcp_sw_conn; struct iscsi_tcp_conn *tcp_conn; struct iscsi_conn *conn; +#ifdef KWORKER_NUMA_AFFINITY + int current_cpu; +#endif trace_sk_data_ready(sk); @@ -180,6 +183,16 @@ static void iscsi_sw_tcp_data_ready(struct sock *sk) return; } tcp_conn = conn->dd_data; + +#ifdef KWORKER_NUMA_AFFINITY + /* save intimate cpu when in softirq */ + if (!sock_owned_by_user_nocheck(sk)) { + current_cpu = smp_processor_id(); + if (conn->intimate_cpu != current_cpu) + conn->intimate_cpu = current_cpu; + } +#endif + tcp_sw_conn = tcp_conn->dd_data; if (tcp_sw_conn->queue_recv) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 0fda8905eabd8278f578e54a8f502ab2996629ed..edb732d60c90f3fe7ba012f64230487942b289d4 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -89,9 +89,20 @@ inline void iscsi_conn_queue_xmit(struct iscsi_conn *conn) { struct Scsi_Host *shost = conn->session->host; struct iscsi_host *ihost = shost_priv(shost); +#ifdef KWORKER_NUMA_AFFINITY + int intimate_cpu = conn->intimate_cpu; + if (ihost->workq) { + /* we expect it to be excuted on the same numa of the intimate cpu */ + if ((intimate_cpu >= 0) && cpu_possible(intimate_cpu)) + queue_work_on(intimate_cpu, ihost->workq, &conn->xmitwork); + else + queue_work(ihost->workq, &conn->xmitwork); + } +#else if (ihost->workq) queue_work(ihost->workq, &conn->xmitwork); +#endif } EXPORT_SYMBOL_GPL(iscsi_conn_queue_xmit); @@ -2907,9 +2918,15 @@ struct Scsi_Host *iscsi_host_alloc(const struct scsi_host_template *sht, ihost = shost_priv(shost); if (xmit_can_sleep) { +#ifdef KWORKER_NUMA_AFFINITY + /* this kind of workqueue only support single work */ + ihost->workq = alloc_ordered_workqueue("iscsi_q_%d", __WQ_LEGACY | WQ_MEM_RECLAIM | + __WQ_DYNAMIC, shost->host_no); +#else ihost->workq = alloc_workqueue("iscsi_q_%d", - WQ_SYSFS | __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_UNBOUND, - 1, shost->host_no); + WQ_SYSFS | __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_UNBOUND, + 1, shost->host_no); +#endif if (!ihost->workq) goto free_host; } @@ -3190,6 +3207,9 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size, conn->c_stage = ISCSI_CONN_INITIAL_STAGE; conn->id = conn_idx; conn->exp_statsn = 0; +#ifdef KWORKER_NUMA_AFFINITY + conn->intimate_cpu = -1; +#endif timer_setup(&conn->transport_timer, iscsi_check_transport_timeouts, 0); diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 9619098755fb3d24fb3bf24b4d0ca43c992035ad..485c0f5b251882d5ce3dc30e5c65f02d08043609 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -415,6 +415,7 @@ enum { __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ __WQ_ORDERED_EXPLICIT = 1 << 19, /* internal: alloc_ordered_workqueue() */ + __WQ_DYNAMIC = 1 << 25, /* internal: only support single work order WQ */ WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_UNBOUND_MAX_ACTIVE = WQ_MAX_ACTIVE, diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index d253b82e973e43e08721bfaab14ef2b9503d1000..ead17d2224a1f231b072265ffe8c2bca5705b44c 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -266,6 +266,7 @@ struct iscsi_conn { /* custom statistics */ uint32_t eh_abort_cnt; uint32_t fmr_unalign_cnt; + int intimate_cpu; KABI_RESERVE(1) KABI_RESERVE(2) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1888741f5edd79e6cbf88f4a82e16e44e36f0f7f..74431968a05cbaebf956e8263e46b015166668d2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4355,6 +4355,10 @@ apply_wqattrs_prepare(struct workqueue_struct *wq, * it even if we don't use it immediately. */ copy_workqueue_attrs(new_attrs, attrs); +#ifdef KWORKER_NUMA_AFFINITY + if (wq->flags & __WQ_DYNAMIC) + new_attrs->ordered = false; +#endif wqattrs_actualize_cpumask(new_attrs, unbound_cpumask); cpumask_copy(new_attrs->__pod_cpumask, new_attrs->cpumask); ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs); @@ -4591,10 +4595,19 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq) cpus_read_lock(); if (wq->flags & __WQ_ORDERED) { ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]); +#ifdef KWORKER_NUMA_AFFINITY + if (!(wq->flags & __WQ_DYNAMIC)) { + /* there should only be single pwq for ordering guarantee */ + WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node || + wq->pwqs.prev != &wq->dfl_pwq->pwqs_node), + "ordering guarantee broken for workqueue %s\n", wq->name); + } +#else /* there should only be single pwq for ordering guarantee */ WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node || - wq->pwqs.prev != &wq->dfl_pwq->pwqs_node), - "ordering guarantee broken for workqueue %s\n", wq->name); + wq->pwqs.prev != &wq->dfl_pwq->pwqs_node), + "ordering guarantee broken for workqueue %s\n", wq->name); +#endif } else { ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]); } @@ -5799,7 +5812,11 @@ static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask) /* creating multiple pwqs breaks ordering guarantee */ if (!list_empty(&wq->pwqs)) { +#ifdef KWORKER_NUMA_AFFINITY + if (wq->flags & __WQ_ORDERED_EXPLICIT && !(wq->flags & __WQ_DYNAMIC)) +#else if (wq->flags & __WQ_ORDERED_EXPLICIT) +#endif continue; wq->flags &= ~__WQ_ORDERED; } diff --git a/lib/Kconfig.openeuler b/lib/Kconfig.openeuler new file mode 100644 index 0000000000000000000000000000000000000000..fc5fdf0f1123f2ede2f9ef9bc3a22a3b6d77b821 --- /dev/null +++ b/lib/Kconfig.openeuler @@ -0,0 +1,8 @@ +config KWORKER_NUMA_AFFINITY + bool "kworker NUMA affinity" + default n + help + This feature implements a set of adaptive mechanisms so that the + workqueue can automatically identify the CPU of the soft interrupt + and automatically schedule the workqueue to the corresponding NUMA + node.