diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 90182b7fd9e2267e7968458228c815c1bc7bff6b..3493515422259eeaf52c626cb6cf60b00a43263a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -15,6 +15,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 # include @@ -568,6 +569,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) x86_amd_ls_cfg_ssbd_mask = 1ULL << bit; } } + + resctrl_cpu_detect(c); } static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f01c13e55a14a8010c4d5c4f175328efec96e8b9..eb53cb1ab67ea5b92de363dacc8e62409257256e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -52,7 +52,6 @@ #include #include #include -#include #ifdef CONFIG_X86_LOCAL_APIC #include @@ -898,7 +897,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c) init_scattered_cpuid_features(c); init_speculation_control(c); - resctrl_cpu_detect(c); /* * Clear/Set all flags overridden by options, after probe. @@ -1413,20 +1411,6 @@ static void generic_identify(struct cpuinfo_x86 *c) #endif } -static void x86_init_cache_qos(struct cpuinfo_x86 *c) -{ - /* - * The heavy lifting of max_rmid and cache_occ_scale are handled - * in get_cpu_cap(). Here we just set the max_rmid for the boot_cpu - * in case CQM bits really aren't there in this CPU. - */ - if (c != &boot_cpu_data) { - boot_cpu_data.x86_cache_max_rmid = - min(boot_cpu_data.x86_cache_max_rmid, - c->x86_cache_max_rmid); - } -} - /* * Validate that ACPI/mptables have the same information about the * effective APIC id and update the package map. @@ -1542,7 +1526,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) #endif x86_init_rdrand(c); - x86_init_cache_qos(c); setup_pku(c); /* diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index a91d569615aa0ec92a0bbc50feba8236e1c7662c..dc087168d2ec73e4b074b40b00f2a9df048af5bb 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -21,6 +21,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 #include @@ -336,6 +337,11 @@ static void early_init_intel(struct cpuinfo_x86 *c) detect_ht_early(c); } +static void bsp_init_intel(struct cpuinfo_x86 *c) +{ + resctrl_cpu_detect(c); +} + #ifdef CONFIG_X86_32 /* * Early probe support logic for ppro memory erratum #50 @@ -1056,6 +1062,7 @@ static const struct cpu_dev intel_cpu_dev = { #endif .c_detect_tlb = intel_detect_tlb, .c_early_init = early_init_intel, + .c_bsp_init = bsp_init_intel, .c_init = init_intel, .c_bsp_resume = intel_bsp_resume, .c_x86_vendor = X86_VENDOR_INTEL, diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index b246bd72333b068c422d3899c7fc479e5cf80e7d..b709b2149d1ebad2b1342afa5b4c17aa18219a01 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -971,6 +971,7 @@ static __init void rdt_init_res_defs(void) static enum cpuhp_state rdt_online; +/* Runs once on the BSP during boot. */ void resctrl_cpu_detect(struct cpuinfo_x86 *c) { if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 3e2a343f24c54c1552166b6bdf247f40cdade000..15ce4b02de24aa6ea07cb69722bc4c14ad2657a6 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -503,14 +503,16 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of, return ret; } -void mon_event_read(struct rmid_read *rr, struct rdt_domain *d, - struct rdtgroup *rdtgrp, int evtid, int first) +void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, + struct rdt_domain *d, struct rdtgroup *rdtgrp, + int evtid, int first) { /* * setup the parameters to send to the IPI to read the data. */ rr->rgrp = rdtgrp; rr->evtid = evtid; + rr->r = r; rr->d = d; rr->val = 0; rr->first = first; @@ -547,7 +549,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) goto out; } - mon_event_read(&rr, d, rdtgrp, evtid, false); + mon_event_read(&rr, r, d, rdtgrp, evtid, false); if (rr.val & RMID_VAL_ERROR) seq_puts(m, "Error\n"); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index b4a6323f63a79899eae282a392603c393d658f90..acc365a9364ab378a84bf550070468c3ff3dbdcf 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -30,7 +30,7 @@ #define CQM_LIMBOCHECK_INTERVAL 1000 -#define MBM_CNTR_WIDTH 24 +#define MBM_CNTR_WIDTH_BASE 24 #define MBM_OVERFLOW_INTERVAL 1000 #define MAX_MBA_BW 100u #define MBA_IS_LINEAR 0x4 @@ -40,8 +40,15 @@ #define RMID_VAL_ERROR BIT_ULL(63) #define RMID_VAL_UNAVAIL BIT_ULL(62) +/* + * With the above fields in use 62 bits remain in MSR_IA32_QM_CTR for + * data to be returned. The counter width is discovered from the hardware + * as an offset from MBM_CNTR_WIDTH_BASE. + */ +#define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE) DECLARE_STATIC_KEY_FALSE(rdt_enable_key); +DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key); /** * struct mon_evt - Entry in the event list of a resource @@ -71,6 +78,7 @@ union mon_data_bits { struct rmid_read { struct rdtgroup *rgrp; + struct rdt_resource *r; struct rdt_domain *d; int evtid; bool first; @@ -442,6 +450,7 @@ struct rdt_resource { struct list_head evt_list; int num_rmid; unsigned int mon_scale; + unsigned int mbm_width; unsigned long fflags; }; @@ -569,8 +578,9 @@ void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id); void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, struct rdt_domain *d); -void mon_event_read(struct rmid_read *rr, struct rdt_domain *d, - struct rdtgroup *rdtgrp, int evtid, int first); +void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, + struct rdt_domain *d, struct rdtgroup *rdtgrp, + int evtid, int first); void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms); void mbm_handle_overflow(struct work_struct *work); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index e4af6492d7fca10d4e60d0ea7f9ce7905b727271..179b3d19e355b3a127fb7aeda62f21da988064e3 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -285,9 +285,9 @@ void free_rmid(u32 rmid) list_add_tail(&entry->list, &rmid_free_lru); } -static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr) +static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) { - u64 shift = 64 - MBM_CNTR_WIDTH, chunks; + u64 shift = 64 - width, chunks; chunks = (cur_msr << shift) - (prev_msr << shift); return chunks >>= shift; @@ -327,7 +327,7 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr) return 0; } - chunks = mbm_overflow_count(m->prev_msr, tval); + chunks = mbm_overflow_count(m->prev_msr, tval, rr->r->mbm_width); m->chunks += chunks; m->prev_msr = tval; @@ -350,7 +350,7 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr) if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) return; - chunks = mbm_overflow_count(m->prev_bw_msr, tval); + chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width); m->chunks += chunks; cur_bw = (get_corrected_mbm_count(rmid, chunks) * r->mon_scale) >> 20; @@ -504,11 +504,12 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) } } -static void mbm_update(struct rdt_domain *d, int rmid) +static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid) { struct rmid_read rr; rr.first = false; + rr.r = r; rr.d = d; /* @@ -583,23 +584,26 @@ void mbm_handle_overflow(struct work_struct *work) struct rdtgroup *prgrp, *crgrp; int cpu = smp_processor_id(); struct list_head *head; + struct rdt_resource *r; struct rdt_domain *d; mutex_lock(&rdtgroup_mutex); - if (!static_branch_likely(&rdt_enable_key)) + if (!static_branch_likely(&rdt_mon_enable_key)) goto out_unlock; - d = get_domain_from_cpu(cpu, &rdt_resources_all[RDT_RESOURCE_L3]); + r = &rdt_resources_all[RDT_RESOURCE_L3]; + + d = get_domain_from_cpu(cpu, r); if (!d) goto out_unlock; list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { - mbm_update(d, prgrp->mon.rmid); + mbm_update(r, d, prgrp->mon.rmid); head = &prgrp->mon.crdtgrp_list; list_for_each_entry(crgrp, head, mon.crdtgrp_list) - mbm_update(d, crgrp->mon.rmid); + mbm_update(r, d, crgrp->mon.rmid); if (is_mba_sc(NULL)) update_mba_bw(prgrp, d); @@ -616,7 +620,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms) unsigned long delay = msecs_to_jiffies(delay_ms); int cpu; - if (!static_branch_likely(&rdt_enable_key)) + if (!static_branch_likely(&rdt_mon_enable_key)) return; cpu = cpumask_any(&dom->cpu_mask); dom->mbm_work_cpu = cpu; @@ -687,11 +691,18 @@ static void l3_mon_evt_init(struct rdt_resource *r) int rdt_get_mon_l3_config(struct rdt_resource *r) { + unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; unsigned int cl_size = boot_cpu_data.x86_cache_size; int ret; r->mon_scale = boot_cpu_data.x86_cache_occ_scale; r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1; + r->mbm_width = MBM_CNTR_WIDTH_BASE; + + if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX) + r->mbm_width += mbm_offset; + else if (mbm_offset > MBM_CNTR_WIDTH_OFFSET_MAX) + pr_warn("Ignoring impossible MBM counter offset\n"); /* * A reasonable upper limit on the max threshold is the number diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 47ade7e59787c900fff8e882243deac960cfe191..b0cb9a1dec26fc0a23508a2b37ba8d261647e3ae 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2333,7 +2333,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, goto out_destroy; if (is_mbm_event(mevt->evtid)) - mon_event_read(&rr, d, prgrp, mevt->evtid, true); + mon_event_read(&rr, r, d, prgrp, mevt->evtid, true); } kernfs_activate(kn); return 0;