diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-uncore b/Documentation/ABI/testing/sysfs-bus-event_source-devices-uncore new file mode 100644 index 0000000000000000000000000000000000000000..b56e8f019fd4a5b31dc8bcc0610f9bade2cbcb86 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-uncore @@ -0,0 +1,13 @@ +What: /sys/bus/event_source/devices/uncore_*/alias +Date: June 2021 +KernelVersion: 5.15 +Contact: Linux kernel mailing list +Description: Read-only. An attribute to describe the alias name of + the uncore PMU if an alias exists on some platforms. + The 'perf(1)' tool should treat both names the same. + They both can be used to access the uncore PMU. + + Example: + + $ cat /sys/devices/uncore_cha_2/alias + uncore_type_0_2 diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile index e67a5886336c101b65c7a83769157fcf3fd3d648..10bde6c5abb2cb442271b41810b2e80885534d47 100644 --- a/arch/x86/events/intel/Makefile +++ b/arch/x86/events/intel/Makefile @@ -3,6 +3,6 @@ obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o -intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o +intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o uncore_discovery.o obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o intel-cstate-objs := cstate.o diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 80d52cbe2fde549e7d4f4a54f3c9b3195b6448cf..be79fbab4e02fd658ecd986cc4261b953ca6319b 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -4,8 +4,13 @@ #include #include #include "uncore.h" +#include "uncore_discovery.h" -static struct intel_uncore_type *empty_uncore[] = { NULL, }; +static bool uncore_no_discover; +module_param(uncore_no_discover, bool, 0); +MODULE_PARM_DESC(uncore_no_discover, "Don't enable the Intel uncore PerfMon discovery mechanism " + "(default: enable the discovery mechanism)."); +struct intel_uncore_type *empty_uncore[] = { NULL, }; struct intel_uncore_type **uncore_msr_uncores = empty_uncore; struct intel_uncore_type **uncore_pci_uncores = empty_uncore; struct intel_uncore_type **uncore_mmio_uncores = empty_uncore; @@ -31,21 +36,21 @@ struct event_constraint uncore_constraint_empty = MODULE_LICENSE("GPL"); -int uncore_pcibus_to_physid(struct pci_bus *bus) +int uncore_pcibus_to_dieid(struct pci_bus *bus) { struct pci2phy_map *map; - int phys_id = -1; + int die_id = -1; raw_spin_lock(&pci2phy_map_lock); list_for_each_entry(map, &pci2phy_map_head, list) { if (map->segment == pci_domain_nr(bus)) { - phys_id = map->pbus_to_physid[bus->number]; + die_id = map->pbus_to_dieid[bus->number]; break; } } raw_spin_unlock(&pci2phy_map_lock); - return phys_id; + return die_id; } static void uncore_free_pcibus_map(void) @@ -86,7 +91,7 @@ struct pci2phy_map *__find_pci2phy_map(int segment) alloc = NULL; map->segment = segment; for (i = 0; i < 256; i++) - map->pbus_to_physid[i] = -1; + map->pbus_to_dieid[i] = -1; list_add_tail(&map->list, &pci2phy_map_head); end: @@ -332,7 +337,6 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, uncore_pmu_init_hrtimer(box); box->cpu = -1; - box->pci_phys_id = -1; box->dieid = -1; /* set default hrtimer timeout */ @@ -830,6 +834,45 @@ static const struct attribute_group uncore_pmu_attr_group = { .attrs = uncore_pmu_attrs, }; +void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu) +{ + struct intel_uncore_type *type = pmu->type; + + if (type->num_boxes == 1) + sprintf(pmu_name, "uncore_type_%u", type->type_id); + else { + sprintf(pmu_name, "uncore_type_%u_%d", + type->type_id, type->box_ids[pmu->pmu_idx]); + } +} + +static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu) +{ + struct intel_uncore_type *type = pmu->type; + + /* + * No uncore block name in discovery table. + * Use uncore_type_&typeid_&boxid as name. + */ + if (!type->name) { + uncore_get_alias_name(pmu->name, pmu); + return; + } + + if (type->num_boxes == 1) { + if (strlen(type->name) > 0) + sprintf(pmu->name, "uncore_%s", type->name); + else + sprintf(pmu->name, "uncore"); + } else { + /* + * Use the box ID from the discovery table if applicable. + */ + sprintf(pmu->name, "uncore_%s_%d", type->name, + type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx); + } +} + static int uncore_pmu_register(struct intel_uncore_pmu *pmu) { int ret; @@ -856,15 +899,7 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu) pmu->pmu.attr_update = pmu->type->attr_update; } - if (pmu->type->num_boxes == 1) { - if (strlen(pmu->type->name) > 0) - sprintf(pmu->name, "uncore_%s", pmu->type->name); - else - sprintf(pmu->name, "uncore"); - } else { - sprintf(pmu->name, "uncore_%s_%d", pmu->type->name, - pmu->pmu_idx); - } + uncore_get_pmu_name(pmu); ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); if (!ret) @@ -905,6 +940,10 @@ static void uncore_type_exit(struct intel_uncore_type *type) kfree(type->pmus); type->pmus = NULL; } + if (type->box_ids) { + kfree(type->box_ids); + type->box_ids = NULL; + } kfree(type->events_group); type->events_group = NULL; } @@ -993,28 +1032,48 @@ uncore_types_init(struct intel_uncore_type **types, bool setid) /* * Get the die information of a PCI device. * @pdev: The PCI device. - * @phys_id: The physical socket id which the device maps to. * @die: The die id which the device maps to. */ -static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, - int *phys_id, int *die) +static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die) { - *phys_id = uncore_pcibus_to_physid(pdev->bus); - if (*phys_id < 0) - return -ENODEV; - - *die = (topology_max_die_per_package() > 1) ? *phys_id : - topology_phys_to_logical_pkg(*phys_id); + *die = uncore_pcibus_to_dieid(pdev->bus); if (*die < 0) return -EINVAL; return 0; } +static struct intel_uncore_pmu * +uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev) +{ + struct intel_uncore_type **types = uncore_pci_uncores; + struct intel_uncore_type *type; + u64 box_ctl; + int i, die; + + for (; *types; types++) { + type = *types; + for (die = 0; die < __uncore_max_dies; die++) { + for (i = 0; i < type->num_boxes; i++) { + if (!type->box_ctls[die]) + continue; + box_ctl = type->box_ctls[die] + type->pci_offsets[i]; + if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(box_ctl) && + pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(box_ctl) && + pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl)) + return &type->pmus[i]; + } + } + } + + return NULL; +} + /* * Find the PMU of a PCI device. * @pdev: The PCI device. * @ids: The ID table of the available PCI devices with a PMU. + * If NULL, search the whole uncore_pci_uncores. */ static struct intel_uncore_pmu * uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids) @@ -1024,6 +1083,9 @@ uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids) kernel_ulong_t data; unsigned int devfn; + if (!ids) + return uncore_pci_find_dev_pmu_from_types(pdev); + while (ids && ids->vendor) { if ((ids->vendor == pdev->vendor) && (ids->device == pdev->device)) { @@ -1046,13 +1108,12 @@ uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids) * @pdev: The PCI device. * @type: The corresponding PMU type of the device. * @pmu: The corresponding PMU of the device. - * @phys_id: The physical socket id which the device maps to. * @die: The die id which the device maps to. */ static int uncore_pci_pmu_register(struct pci_dev *pdev, struct intel_uncore_type *type, struct intel_uncore_pmu *pmu, - int phys_id, int die) + int die) { struct intel_uncore_box *box; int ret; @@ -1070,7 +1131,6 @@ static int uncore_pci_pmu_register(struct pci_dev *pdev, WARN_ON_ONCE(pmu->func_id != pdev->devfn); atomic_inc(&box->refcnt); - box->pci_phys_id = phys_id; box->dieid = die; box->pci_dev = pdev; box->pmu = pmu; @@ -1097,9 +1157,9 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id { struct intel_uncore_type *type; struct intel_uncore_pmu *pmu = NULL; - int phys_id, die, ret; + int die, ret; - ret = uncore_pci_get_dev_die_info(pdev, &phys_id, &die); + ret = uncore_pci_get_dev_die_info(pdev, &die); if (ret) return ret; @@ -1132,7 +1192,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; } - ret = uncore_pci_pmu_register(pdev, type, pmu, phys_id, die); + ret = uncore_pci_pmu_register(pdev, type, pmu, die); pci_set_drvdata(pdev, pmu->boxes[die]); @@ -1142,17 +1202,12 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id /* * Unregister the PMU of a PCI device * @pmu: The corresponding PMU is unregistered. - * @phys_id: The physical socket id which the device maps to. * @die: The die id which the device maps to. */ -static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, - int phys_id, int die) +static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die) { struct intel_uncore_box *box = pmu->boxes[die]; - if (WARN_ON_ONCE(phys_id != box->pci_phys_id)) - return; - pmu->boxes[die] = NULL; if (atomic_dec_return(&pmu->activeboxes) == 0) uncore_pmu_unregister(pmu); @@ -1164,9 +1219,9 @@ static void uncore_pci_remove(struct pci_dev *pdev) { struct intel_uncore_box *box; struct intel_uncore_pmu *pmu; - int i, phys_id, die; + int i, die; - if (uncore_pci_get_dev_die_info(pdev, &phys_id, &die)) + if (uncore_pci_get_dev_die_info(pdev, &die)) return; box = pci_get_drvdata(pdev); @@ -1185,35 +1240,43 @@ static void uncore_pci_remove(struct pci_dev *pdev) pci_set_drvdata(pdev, NULL); - uncore_pci_pmu_unregister(pmu, phys_id, die); + uncore_pci_pmu_unregister(pmu, die); } static int uncore_bus_notify(struct notifier_block *nb, - unsigned long action, void *data) + unsigned long action, void *data, + const struct pci_device_id *ids) { struct device *dev = data; struct pci_dev *pdev = to_pci_dev(dev); struct intel_uncore_pmu *pmu; - int phys_id, die; + int die; /* Unregister the PMU when the device is going to be deleted. */ if (action != BUS_NOTIFY_DEL_DEVICE) return NOTIFY_DONE; - pmu = uncore_pci_find_dev_pmu(pdev, uncore_pci_sub_driver->id_table); + pmu = uncore_pci_find_dev_pmu(pdev, ids); if (!pmu) return NOTIFY_DONE; - if (uncore_pci_get_dev_die_info(pdev, &phys_id, &die)) + if (uncore_pci_get_dev_die_info(pdev, &die)) return NOTIFY_DONE; - uncore_pci_pmu_unregister(pmu, phys_id, die); + uncore_pci_pmu_unregister(pmu, die); return NOTIFY_OK; } -static struct notifier_block uncore_notifier = { - .notifier_call = uncore_bus_notify, +static int uncore_pci_sub_bus_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + return uncore_bus_notify(nb, action, data, + uncore_pci_sub_driver->id_table); +} + +static struct notifier_block uncore_pci_sub_notifier = { + .notifier_call = uncore_pci_sub_bus_notify, }; static void uncore_pci_sub_driver_init(void) @@ -1224,7 +1287,7 @@ static void uncore_pci_sub_driver_init(void) struct pci_dev *pci_sub_dev; bool notify = false; unsigned int devfn; - int phys_id, die; + int die; while (ids && ids->vendor) { pci_sub_dev = NULL; @@ -1244,24 +1307,65 @@ static void uncore_pci_sub_driver_init(void) if (!pmu) continue; - if (uncore_pci_get_dev_die_info(pci_sub_dev, - &phys_id, &die)) + if (uncore_pci_get_dev_die_info(pci_sub_dev, &die)) continue; if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu, - phys_id, die)) + die)) notify = true; } ids++; } - if (notify && bus_register_notifier(&pci_bus_type, &uncore_notifier)) + if (notify && bus_register_notifier(&pci_bus_type, &uncore_pci_sub_notifier)) notify = false; if (!notify) uncore_pci_sub_driver = NULL; } +static int uncore_pci_bus_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + return uncore_bus_notify(nb, action, data, NULL); +} + +static struct notifier_block uncore_pci_notifier = { + .notifier_call = uncore_pci_bus_notify, +}; + + +static void uncore_pci_pmus_register(void) +{ + struct intel_uncore_type **types = uncore_pci_uncores; + struct intel_uncore_type *type; + struct intel_uncore_pmu *pmu; + struct pci_dev *pdev; + u64 box_ctl; + int i, die; + + for (; *types; types++) { + type = *types; + for (die = 0; die < __uncore_max_dies; die++) { + for (i = 0; i < type->num_boxes; i++) { + if (!type->box_ctls[die]) + continue; + box_ctl = type->box_ctls[die] + type->pci_offsets[i]; + pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl), + UNCORE_DISCOVERY_PCI_BUS(box_ctl), + UNCORE_DISCOVERY_PCI_DEVFN(box_ctl)); + if (!pdev) + continue; + pmu = &type->pmus[i]; + + uncore_pci_pmu_register(pdev, type, pmu, die); + } + } + } + + bus_register_notifier(&pci_bus_type, &uncore_pci_notifier); +} + static int __init uncore_pci_init(void) { size_t size; @@ -1278,12 +1382,15 @@ static int __init uncore_pci_init(void) if (ret) goto errtype; - uncore_pci_driver->probe = uncore_pci_probe; - uncore_pci_driver->remove = uncore_pci_remove; + if (uncore_pci_driver) { + uncore_pci_driver->probe = uncore_pci_probe; + uncore_pci_driver->remove = uncore_pci_remove; - ret = pci_register_driver(uncore_pci_driver); - if (ret) - goto errtype; + ret = pci_register_driver(uncore_pci_driver); + if (ret) + goto errtype; + } else + uncore_pci_pmus_register(); if (uncore_pci_sub_driver) uncore_pci_sub_driver_init(); @@ -1306,8 +1413,11 @@ static void uncore_pci_exit(void) if (pcidrv_registered) { pcidrv_registered = false; if (uncore_pci_sub_driver) - bus_unregister_notifier(&pci_bus_type, &uncore_notifier); - pci_unregister_driver(uncore_pci_driver); + bus_unregister_notifier(&pci_bus_type, &uncore_pci_sub_notifier); + if (uncore_pci_driver) + pci_unregister_driver(uncore_pci_driver); + else + bus_unregister_notifier(&pci_bus_type, &uncore_pci_notifier); uncore_types_exit(uncore_pci_uncores); kfree(uncore_extra_pci_dev); uncore_free_pcibus_map(); @@ -1556,6 +1666,7 @@ struct intel_uncore_init_fun { void (*cpu_init)(void); int (*pci_init)(void); void (*mmio_init)(void); + bool use_discovery; }; static const struct intel_uncore_init_fun nhm_uncore_init __initconst = { @@ -1648,6 +1759,19 @@ static const struct intel_uncore_init_fun snr_uncore_init __initconst = { .mmio_init = snr_uncore_mmio_init, }; +static const struct intel_uncore_init_fun spr_uncore_init __initconst = { + .cpu_init = spr_uncore_cpu_init, + .pci_init = spr_uncore_pci_init, + .mmio_init = spr_uncore_mmio_init, + .use_discovery = true, +}; + +static const struct intel_uncore_init_fun generic_uncore_init __initconst = { + .cpu_init = intel_uncore_generic_uncore_cpu_init, + .pci_init = intel_uncore_generic_uncore_pci_init, + .mmio_init = intel_uncore_generic_uncore_mmio_init, +}; + static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &nhm_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &nhm_uncore_init), @@ -1683,6 +1807,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &tgl_l_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &tgl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), {}, }; @@ -1694,17 +1819,26 @@ static int __init intel_uncore_init(void) struct intel_uncore_init_fun *uncore_init; int pret = 0, cret = 0, mret = 0, ret; - id = x86_match_cpu(intel_uncore_match); - if (!id) - return -ENODEV; - if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) return -ENODEV; __uncore_max_dies = topology_max_packages() * topology_max_die_per_package(); - uncore_init = (struct intel_uncore_init_fun *)id->driver_data; + id = x86_match_cpu(intel_uncore_match); + if (!id) { + if (!uncore_no_discover && intel_uncore_has_discovery_tables()) + uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init; + else + return -ENODEV; + } else { + uncore_init = (struct intel_uncore_init_fun *)id->driver_data; + if (uncore_no_discover && uncore_init->use_discovery) + return -ENODEV; + if (uncore_init->use_discovery && !intel_uncore_has_discovery_tables()) + return -ENODEV; + } + if (uncore_init->pci_init) { pret = uncore_init->pci_init(); if (!pret) @@ -1721,8 +1855,10 @@ static int __init intel_uncore_init(void) mret = uncore_mmio_init(); } - if (cret && pret && mret) - return -ENODEV; + if (cret && pret && mret) { + ret = -ENODEV; + goto free_discovery; + } /* Install hotplug callbacks to setup the targets for each package */ ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, @@ -1737,6 +1873,8 @@ static int __init intel_uncore_init(void) uncore_types_exit(uncore_msr_uncores); uncore_types_exit(uncore_mmio_uncores); uncore_pci_exit(); +free_discovery: + intel_uncore_clear_discovery_tables(); return ret; } module_init(intel_uncore_init); @@ -1747,5 +1885,6 @@ static void __exit intel_uncore_exit(void) uncore_types_exit(uncore_msr_uncores); uncore_types_exit(uncore_mmio_uncores); uncore_pci_exit(); + intel_uncore_clear_discovery_tables(); } module_exit(intel_uncore_exit); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 9efea154349d3984798d5105145a1fe97d3afcf3..59aa26e4ede2dcd01cf210152807947baf1c5572 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -50,6 +50,7 @@ struct intel_uncore_type { int perf_ctr_bits; int fixed_ctr_bits; int num_freerunning_types; + int type_id; unsigned perf_ctr; unsigned event_ctl; unsigned event_mask; @@ -57,6 +58,7 @@ struct intel_uncore_type { unsigned fixed_ctr; unsigned fixed_ctl; unsigned box_ctl; + u64 *box_ctls; /* Unit ctrl addr of the first box of each die */ union { unsigned msr_offset; unsigned mmio_offset; @@ -65,7 +67,12 @@ struct intel_uncore_type { unsigned num_shared_regs:8; unsigned single_fixed:1; unsigned pair_ctr_ctl:1; - unsigned *msr_offsets; + union { + unsigned *msr_offsets; + unsigned *pci_offsets; + unsigned *mmio_offsets; + }; + unsigned *box_ids; struct event_constraint unconstrainted; struct event_constraint *constraints; struct intel_uncore_pmu *pmus; @@ -124,7 +131,6 @@ struct intel_uncore_extra_reg { }; struct intel_uncore_box { - int pci_phys_id; int dieid; /* Logical die ID */ int n_active; /* number of active events */ int n_events; @@ -173,11 +179,11 @@ struct freerunning_counters { struct pci2phy_map { struct list_head list; int segment; - int pbus_to_physid[256]; + int pbus_to_dieid[256]; }; struct pci2phy_map *__find_pci2phy_map(int segment); -int uncore_pcibus_to_physid(struct pci_bus *bus); +int uncore_pcibus_to_dieid(struct pci_bus *bus); ssize_t uncore_event_show(struct device *dev, struct device_attribute *attr, char *buf); @@ -547,7 +553,9 @@ struct event_constraint * uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event); void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event); u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx); +void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu); +extern struct intel_uncore_type *empty_uncore[]; extern struct intel_uncore_type **uncore_msr_uncores; extern struct intel_uncore_type **uncore_pci_uncores; extern struct intel_uncore_type **uncore_mmio_uncores; @@ -592,6 +600,9 @@ void snr_uncore_mmio_init(void); int icx_uncore_pci_init(void); void icx_uncore_cpu_init(void); void icx_uncore_mmio_init(void); +int spr_uncore_pci_init(void); +void spr_uncore_cpu_init(void); +void spr_uncore_mmio_init(void); /* uncore_nhmex.c */ void nhmex_uncore_cpu_init(void); diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c new file mode 100644 index 0000000000000000000000000000000000000000..3049c646fa209f4c962c5126f2253a8ebe74c092 --- /dev/null +++ b/arch/x86/events/intel/uncore_discovery.c @@ -0,0 +1,622 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Support Intel uncore PerfMon discovery mechanism. + * Copyright(c) 2021 Intel Corporation. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "uncore.h" +#include "uncore_discovery.h" + +static struct rb_root discovery_tables = RB_ROOT; +static int num_discovered_types[UNCORE_ACCESS_MAX]; + +static bool has_generic_discovery_table(void) +{ + struct pci_dev *dev; + int dvsec; + + dev = pci_get_device(PCI_VENDOR_ID_INTEL, UNCORE_DISCOVERY_TABLE_DEVICE, NULL); + if (!dev) + return false; + + /* A discovery table device has the unique capability ID. */ + dvsec = pci_find_next_ext_capability(dev, 0, UNCORE_EXT_CAP_ID_DISCOVERY); + pci_dev_put(dev); + if (dvsec) + return true; + + return false; +} + +static int logical_die_id; + +static int get_device_die_id(struct pci_dev *dev) +{ + int cpu, node = pcibus_to_node(dev->bus); + + /* + * If the NUMA info is not available, assume that the logical die id is + * continuous in the order in which the discovery table devices are + * detected. + */ + if (node < 0) + return logical_die_id++; + + for_each_cpu(cpu, cpumask_of_node(node)) { + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->initialized && cpu_to_node(cpu) == node) + return c->logical_die_id; + } + + /* + * All CPUs of a node may be offlined. For this case, + * the PCI and MMIO type of uncore blocks which are + * enumerated by the device will be unavailable. + */ + return -1; +} + +#define __node_2_type(cur) \ + rb_entry((cur), struct intel_uncore_discovery_type, node) + +static inline int __type_cmp(const void *key, const struct rb_node *b) +{ + struct intel_uncore_discovery_type *type_b = __node_2_type(b); + const u16 *type_id = key; + + if (type_b->type > *type_id) + return -1; + else if (type_b->type < *type_id) + return 1; + + return 0; +} + +static inline struct intel_uncore_discovery_type * +search_uncore_discovery_type(u16 type_id) +{ + struct rb_node *node = rb_find(&type_id, &discovery_tables, __type_cmp); + + return (node) ? __node_2_type(node) : NULL; +} + +static inline bool __type_less(struct rb_node *a, const struct rb_node *b) +{ + return (__node_2_type(a)->type < __node_2_type(b)->type); +} + +static struct intel_uncore_discovery_type * +add_uncore_discovery_type(struct uncore_unit_discovery *unit) +{ + struct intel_uncore_discovery_type *type; + + if (unit->access_type >= UNCORE_ACCESS_MAX) { + pr_warn("Unsupported access type %d\n", unit->access_type); + return NULL; + } + + type = kzalloc(sizeof(struct intel_uncore_discovery_type), GFP_KERNEL); + if (!type) + return NULL; + + type->box_ctrl_die = kcalloc(__uncore_max_dies, sizeof(u64), GFP_KERNEL); + if (!type->box_ctrl_die) + goto free_type; + + type->access_type = unit->access_type; + num_discovered_types[type->access_type]++; + type->type = unit->box_type; + + rb_add(&type->node, &discovery_tables, __type_less); + + return type; + +free_type: + kfree(type); + + return NULL; + +} + +static struct intel_uncore_discovery_type * +get_uncore_discovery_type(struct uncore_unit_discovery *unit) +{ + struct intel_uncore_discovery_type *type; + + type = search_uncore_discovery_type(unit->box_type); + if (type) + return type; + + return add_uncore_discovery_type(unit); +} + +static void +uncore_insert_box_info(struct uncore_unit_discovery *unit, + int die, bool parsed) +{ + struct intel_uncore_discovery_type *type; + unsigned int *box_offset, *ids; + int i; + + if (WARN_ON_ONCE(!unit->ctl || !unit->ctl_offset || !unit->ctr_offset)) + return; + + if (parsed) { + type = search_uncore_discovery_type(unit->box_type); + if (WARN_ON_ONCE(!type)) + return; + /* Store the first box of each die */ + if (!type->box_ctrl_die[die]) + type->box_ctrl_die[die] = unit->ctl; + return; + } + + type = get_uncore_discovery_type(unit); + if (!type) + return; + + box_offset = kcalloc(type->num_boxes + 1, sizeof(unsigned int), GFP_KERNEL); + if (!box_offset) + return; + + ids = kcalloc(type->num_boxes + 1, sizeof(unsigned int), GFP_KERNEL); + if (!ids) + goto free_box_offset; + + /* Store generic information for the first box */ + if (!type->num_boxes) { + type->box_ctrl = unit->ctl; + type->box_ctrl_die[die] = unit->ctl; + type->num_counters = unit->num_regs; + type->counter_width = unit->bit_width; + type->ctl_offset = unit->ctl_offset; + type->ctr_offset = unit->ctr_offset; + *ids = unit->box_id; + goto end; + } + + for (i = 0; i < type->num_boxes; i++) { + ids[i] = type->ids[i]; + box_offset[i] = type->box_offset[i]; + + if (WARN_ON_ONCE(unit->box_id == ids[i])) + goto free_ids; + } + ids[i] = unit->box_id; + box_offset[i] = unit->ctl - type->box_ctrl; + kfree(type->ids); + kfree(type->box_offset); +end: + type->ids = ids; + type->box_offset = box_offset; + type->num_boxes++; + return; + +free_ids: + kfree(ids); + +free_box_offset: + kfree(box_offset); + +} + +static int parse_discovery_table(struct pci_dev *dev, int die, + u32 bar_offset, bool *parsed) +{ + struct uncore_global_discovery global; + struct uncore_unit_discovery unit; + void __iomem *io_addr; + resource_size_t addr; + unsigned long size; + u32 val; + int i; + + pci_read_config_dword(dev, bar_offset, &val); + + if (val & UNCORE_DISCOVERY_MASK) + return -EINVAL; + + addr = (resource_size_t)(val & ~UNCORE_DISCOVERY_MASK); + size = UNCORE_DISCOVERY_GLOBAL_MAP_SIZE; + io_addr = ioremap(addr, size); + if (!io_addr) + return -ENOMEM; + + /* Read Global Discovery State */ + memcpy_fromio(&global, io_addr, sizeof(struct uncore_global_discovery)); + if (uncore_discovery_invalid_unit(global)) { + pr_info("Invalid Global Discovery State: 0x%llx 0x%llx 0x%llx\n", + global.table1, global.ctl, global.table3); + iounmap(io_addr); + return -EINVAL; + } + iounmap(io_addr); + + size = (1 + global.max_units) * global.stride * 8; + io_addr = ioremap(addr, size); + if (!io_addr) + return -ENOMEM; + + /* Parsing Unit Discovery State */ + for (i = 0; i < global.max_units; i++) { + memcpy_fromio(&unit, io_addr + (i + 1) * (global.stride * 8), + sizeof(struct uncore_unit_discovery)); + + if (uncore_discovery_invalid_unit(unit)) + continue; + + if (unit.access_type >= UNCORE_ACCESS_MAX) + continue; + + uncore_insert_box_info(&unit, die, *parsed); + } + + *parsed = true; + iounmap(io_addr); + return 0; +} + +bool intel_uncore_has_discovery_tables(void) +{ + u32 device, val, entry_id, bar_offset; + int die, dvsec = 0, ret = true; + struct pci_dev *dev = NULL; + bool parsed = false; + + if (has_generic_discovery_table()) + device = UNCORE_DISCOVERY_TABLE_DEVICE; + else + device = PCI_ANY_ID; + + /* + * Start a new search and iterates through the list of + * the discovery table devices. + */ + while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, dev)) != NULL) { + while ((dvsec = pci_find_next_ext_capability(dev, dvsec, UNCORE_EXT_CAP_ID_DISCOVERY))) { + pci_read_config_dword(dev, dvsec + UNCORE_DISCOVERY_DVSEC_OFFSET, &val); + entry_id = val & UNCORE_DISCOVERY_DVSEC_ID_MASK; + if (entry_id != UNCORE_DISCOVERY_DVSEC_ID_PMON) + continue; + + pci_read_config_dword(dev, dvsec + UNCORE_DISCOVERY_DVSEC2_OFFSET, &val); + + if (val & ~UNCORE_DISCOVERY_DVSEC2_BIR_MASK) { + ret = false; + goto err; + } + bar_offset = UNCORE_DISCOVERY_BIR_BASE + + (val & UNCORE_DISCOVERY_DVSEC2_BIR_MASK) * UNCORE_DISCOVERY_BIR_STEP; + + die = get_device_die_id(dev); + if (die < 0) + continue; + + parse_discovery_table(dev, die, bar_offset, &parsed); + } + } + + /* None of the discovery tables are available */ + if (!parsed) + ret = false; +err: + pci_dev_put(dev); + + return ret; +} + +void intel_uncore_clear_discovery_tables(void) +{ + struct intel_uncore_discovery_type *type, *next; + + rbtree_postorder_for_each_entry_safe(type, next, &discovery_tables, node) { + kfree(type->box_ctrl_die); + kfree(type); + } +} + +DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); +DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); +DEFINE_UNCORE_FORMAT_ATTR(thresh, thresh, "config:24-31"); + +static struct attribute *generic_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh.attr, + NULL, +}; + +static const struct attribute_group generic_uncore_format_group = { + .name = "format", + .attrs = generic_uncore_formats_attr, +}; + +void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box) +{ + wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_INT); +} + +void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box) +{ + wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_FRZ); +} + +void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + wrmsrl(uncore_msr_box_ctl(box), 0); +} + +static void intel_generic_uncore_msr_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + wrmsrl(hwc->config_base, hwc->config); +} + +static void intel_generic_uncore_msr_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + wrmsrl(hwc->config_base, 0); +} + +static struct intel_uncore_ops generic_uncore_msr_ops = { + .init_box = intel_generic_uncore_msr_init_box, + .disable_box = intel_generic_uncore_msr_disable_box, + .enable_box = intel_generic_uncore_msr_enable_box, + .disable_event = intel_generic_uncore_msr_disable_event, + .enable_event = intel_generic_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + + __set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags); + pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_INT); +} + +void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + + pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_FRZ); +} + +void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + + pci_write_config_dword(pdev, box_ctl, 0); +} + +static void intel_generic_uncore_pci_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, hwc->config_base, hwc->config); +} + +void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, hwc->config_base, 0); +} + +u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + u64 count = 0; + + pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count); + pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1); + + return count; +} + +static struct intel_uncore_ops generic_uncore_pci_ops = { + .init_box = intel_generic_uncore_pci_init_box, + .disable_box = intel_generic_uncore_pci_disable_box, + .enable_box = intel_generic_uncore_pci_enable_box, + .disable_event = intel_generic_uncore_pci_disable_event, + .enable_event = intel_generic_uncore_pci_enable_event, + .read_counter = intel_generic_uncore_pci_read_counter, +}; + +#define UNCORE_GENERIC_MMIO_SIZE 0x4000 + +static unsigned int generic_uncore_mmio_box_ctl(struct intel_uncore_box *box) +{ + struct intel_uncore_type *type = box->pmu->type; + + if (!type->box_ctls || !type->box_ctls[box->dieid] || !type->mmio_offsets) + return 0; + + return type->box_ctls[box->dieid] + type->mmio_offsets[box->pmu->pmu_idx]; +} + +void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box) +{ + unsigned int box_ctl = generic_uncore_mmio_box_ctl(box); + struct intel_uncore_type *type = box->pmu->type; + resource_size_t addr; + + if (!box_ctl) { + pr_warn("Uncore type %d box %d: Invalid box control address.\n", + type->type_id, type->box_ids[box->pmu->pmu_idx]); + return; + } + + addr = box_ctl; + box->io_addr = ioremap(addr, UNCORE_GENERIC_MMIO_SIZE); + if (!box->io_addr) { + pr_warn("Uncore type %d box %d: ioremap error for 0x%llx.\n", + type->type_id, type->box_ids[box->pmu->pmu_idx], + (unsigned long long)addr); + return; + } + + writel(GENERIC_PMON_BOX_CTL_INT, box->io_addr); +} + +void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box) +{ + if (!box->io_addr) + return; + + writel(GENERIC_PMON_BOX_CTL_FRZ, box->io_addr); +} + +void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box) +{ + if (!box->io_addr) + return; + + writel(0, box->io_addr); +} + +static void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!box->io_addr) + return; + + writel(hwc->config, box->io_addr + hwc->config_base); +} + +void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!box->io_addr) + return; + + writel(0, box->io_addr + hwc->config_base); +} + +static struct intel_uncore_ops generic_uncore_mmio_ops = { + .init_box = intel_generic_uncore_mmio_init_box, + .exit_box = uncore_mmio_exit_box, + .disable_box = intel_generic_uncore_mmio_disable_box, + .enable_box = intel_generic_uncore_mmio_enable_box, + .disable_event = intel_generic_uncore_mmio_disable_event, + .enable_event = intel_generic_uncore_mmio_enable_event, + .read_counter = uncore_mmio_read_counter, +}; + +static bool uncore_update_uncore_type(enum uncore_access_type type_id, + struct intel_uncore_type *uncore, + struct intel_uncore_discovery_type *type) +{ + uncore->type_id = type->type; + uncore->num_boxes = type->num_boxes; + uncore->num_counters = type->num_counters; + uncore->perf_ctr_bits = type->counter_width; + uncore->box_ids = type->ids; + + switch (type_id) { + case UNCORE_ACCESS_MSR: + uncore->ops = &generic_uncore_msr_ops; + uncore->perf_ctr = (unsigned int)type->box_ctrl + type->ctr_offset; + uncore->event_ctl = (unsigned int)type->box_ctrl + type->ctl_offset; + uncore->box_ctl = (unsigned int)type->box_ctrl; + uncore->msr_offsets = type->box_offset; + break; + case UNCORE_ACCESS_PCI: + uncore->ops = &generic_uncore_pci_ops; + uncore->perf_ctr = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl) + type->ctr_offset; + uncore->event_ctl = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl) + type->ctl_offset; + uncore->box_ctl = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl); + uncore->box_ctls = type->box_ctrl_die; + uncore->pci_offsets = type->box_offset; + break; + case UNCORE_ACCESS_MMIO: + uncore->ops = &generic_uncore_mmio_ops; + uncore->perf_ctr = (unsigned int)type->ctr_offset; + uncore->event_ctl = (unsigned int)type->ctl_offset; + uncore->box_ctl = (unsigned int)type->box_ctrl; + uncore->box_ctls = type->box_ctrl_die; + uncore->mmio_offsets = type->box_offset; + uncore->mmio_map_size = UNCORE_GENERIC_MMIO_SIZE; + break; + default: + return false; + } + + return true; +} + +struct intel_uncore_type ** +intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra) +{ + struct intel_uncore_discovery_type *type; + struct intel_uncore_type **uncores; + struct intel_uncore_type *uncore; + struct rb_node *node; + int i = 0; + + uncores = kcalloc(num_discovered_types[type_id] + num_extra + 1, + sizeof(struct intel_uncore_type *), GFP_KERNEL); + if (!uncores) + return empty_uncore; + + for (node = rb_first(&discovery_tables); node; node = rb_next(node)) { + type = rb_entry(node, struct intel_uncore_discovery_type, node); + if (type->access_type != type_id) + continue; + + uncore = kzalloc(sizeof(struct intel_uncore_type), GFP_KERNEL); + if (!uncore) + break; + + uncore->event_mask = GENERIC_PMON_RAW_EVENT_MASK; + uncore->format_group = &generic_uncore_format_group; + + if (!uncore_update_uncore_type(type_id, uncore, type)) { + kfree(uncore); + continue; + } + uncores[i++] = uncore; + } + + return uncores; +} + +void intel_uncore_generic_uncore_cpu_init(void) +{ + uncore_msr_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MSR, 0); +} + +int intel_uncore_generic_uncore_pci_init(void) +{ + uncore_pci_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_PCI, 0); + + return 0; +} + +void intel_uncore_generic_uncore_mmio_init(void) +{ + uncore_mmio_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MMIO, 0); +} diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h new file mode 100644 index 0000000000000000000000000000000000000000..6d735611c281c1d4a3eccbb8aa652791247f6941 --- /dev/null +++ b/arch/x86/events/intel/uncore_discovery.h @@ -0,0 +1,152 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* Generic device ID of a discovery table device */ +#define UNCORE_DISCOVERY_TABLE_DEVICE 0x09a7 +/* Capability ID for a discovery table device */ +#define UNCORE_EXT_CAP_ID_DISCOVERY 0x23 +/* First DVSEC offset */ +#define UNCORE_DISCOVERY_DVSEC_OFFSET 0x8 +/* Mask of the supported discovery entry type */ +#define UNCORE_DISCOVERY_DVSEC_ID_MASK 0xffff +/* PMON discovery entry type ID */ +#define UNCORE_DISCOVERY_DVSEC_ID_PMON 0x1 +/* Second DVSEC offset */ +#define UNCORE_DISCOVERY_DVSEC2_OFFSET 0xc +/* Mask of the discovery table BAR offset */ +#define UNCORE_DISCOVERY_DVSEC2_BIR_MASK 0x7 +/* Discovery table BAR base offset */ +#define UNCORE_DISCOVERY_BIR_BASE 0x10 +/* Discovery table BAR step */ +#define UNCORE_DISCOVERY_BIR_STEP 0x4 +/* Mask of the discovery table offset */ +#define UNCORE_DISCOVERY_MASK 0xf +/* Global discovery table size */ +#define UNCORE_DISCOVERY_GLOBAL_MAP_SIZE 0x20 + +#define UNCORE_DISCOVERY_PCI_DOMAIN(data) ((data >> 28) & 0x7) +#define UNCORE_DISCOVERY_PCI_BUS(data) ((data >> 20) & 0xff) +#define UNCORE_DISCOVERY_PCI_DEVFN(data) ((data >> 12) & 0xff) +#define UNCORE_DISCOVERY_PCI_BOX_CTRL(data) (data & 0xfff) + + +#define uncore_discovery_invalid_unit(unit) \ + (!unit.table1 || !unit.ctl || \ + unit.table1 == -1ULL || unit.ctl == -1ULL || \ + unit.table3 == -1ULL) + +#define GENERIC_PMON_CTL_EV_SEL_MASK 0x000000ff +#define GENERIC_PMON_CTL_UMASK_MASK 0x0000ff00 +#define GENERIC_PMON_CTL_EDGE_DET (1 << 18) +#define GENERIC_PMON_CTL_INVERT (1 << 23) +#define GENERIC_PMON_CTL_TRESH_MASK 0xff000000 +#define GENERIC_PMON_RAW_EVENT_MASK (GENERIC_PMON_CTL_EV_SEL_MASK | \ + GENERIC_PMON_CTL_UMASK_MASK | \ + GENERIC_PMON_CTL_EDGE_DET | \ + GENERIC_PMON_CTL_INVERT | \ + GENERIC_PMON_CTL_TRESH_MASK) + +#define GENERIC_PMON_BOX_CTL_FRZ (1 << 0) +#define GENERIC_PMON_BOX_CTL_RST_CTRL (1 << 8) +#define GENERIC_PMON_BOX_CTL_RST_CTRS (1 << 9) +#define GENERIC_PMON_BOX_CTL_INT (GENERIC_PMON_BOX_CTL_RST_CTRL | \ + GENERIC_PMON_BOX_CTL_RST_CTRS) + +enum uncore_access_type { + UNCORE_ACCESS_MSR = 0, + UNCORE_ACCESS_MMIO, + UNCORE_ACCESS_PCI, + + UNCORE_ACCESS_MAX, +}; + +struct uncore_global_discovery { + union { + u64 table1; + struct { + u64 type : 8, + stride : 8, + max_units : 10, + __reserved_1 : 36, + access_type : 2; + }; + }; + + u64 ctl; /* Global Control Address */ + + union { + u64 table3; + struct { + u64 status_offset : 8, + num_status : 16, + __reserved_2 : 40; + }; + }; +}; + +struct uncore_unit_discovery { + union { + u64 table1; + struct { + u64 num_regs : 8, + ctl_offset : 8, + bit_width : 8, + ctr_offset : 8, + status_offset : 8, + __reserved_1 : 22, + access_type : 2; + }; + }; + + u64 ctl; /* Unit Control Address */ + + union { + u64 table3; + struct { + u64 box_type : 16, + box_id : 16, + __reserved_2 : 32; + }; + }; +}; + +struct intel_uncore_discovery_type { + struct rb_node node; + enum uncore_access_type access_type; + u64 box_ctrl; /* Unit ctrl addr of the first box */ + u64 *box_ctrl_die; /* Unit ctrl addr of the first box of each die */ + u16 type; /* Type ID of the uncore block */ + u8 num_counters; + u8 counter_width; + u8 ctl_offset; /* Counter Control 0 offset */ + u8 ctr_offset; /* Counter 0 offset */ + u16 num_boxes; /* number of boxes for the uncore block */ + unsigned int *ids; /* Box IDs */ + unsigned int *box_offset; /* Box offset */ +}; + +bool intel_uncore_has_discovery_tables(void); +void intel_uncore_clear_discovery_tables(void); +void intel_uncore_generic_uncore_cpu_init(void); +int intel_uncore_generic_uncore_pci_init(void); +void intel_uncore_generic_uncore_mmio_init(void); + +void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box); +void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box); +void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box); + +void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box); +void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box); +void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box); +void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box, + struct perf_event *event); + +void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box); +void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box); +void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box); +void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box, + struct perf_event *event); +u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box, + struct perf_event *event); + +struct intel_uncore_type ** +intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra); diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index bbd1120ae16101a835766c2c76e05d0b1c1b449d..f90b80957192f6c9d17aa883c2b3fdf25a209c7f 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -649,7 +649,7 @@ int snb_pci2phy_map_init(int devid) pci_dev_put(dev); return -ENOMEM; } - map->pbus_to_physid[bus] = 0; + map->pbus_to_dieid[bus] = 0; raw_spin_unlock(&pci2phy_map_lock); pci_dev_put(dev); diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 03c8047bebb38f83cf83f07ce412f5053e1eba14..f239b80184e0c10c8bc0e36a65c787fda14d5c4a 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* SandyBridge-EP/IvyTown uncore support */ #include "uncore.h" +#include "uncore_discovery.h" /* SNB-EP pci bus to socket mapping */ #define SNBEP_CPUNODEID 0x40 @@ -447,6 +448,17 @@ #define ICX_NUMBER_IMC_CHN 3 #define ICX_IMC_MEM_STRIDE 0x4 +/* SPR */ +#define SPR_RAW_EVENT_MASK_EXT 0xffffff + +/* SPR CHA */ +#define SPR_CHA_PMON_CTL_TID_EN (1 << 16) +#define SPR_CHA_PMON_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ + SPR_CHA_PMON_CTL_TID_EN) +#define SPR_CHA_PMON_BOX_FILTER_TID 0x3ff + +#define SPR_C0_MSR_PMON_BOX_FILTER0 0x200e + DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6"); DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); @@ -459,6 +471,7 @@ DEFINE_UNCORE_FORMAT_ATTR(umask_ext4, umask, "config:8-15,32-55"); DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); +DEFINE_UNCORE_FORMAT_ATTR(tid_en2, tid_en, "config:16"); DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); DEFINE_UNCORE_FORMAT_ATTR(thresh9, thresh, "config:24-35"); DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31"); @@ -1358,7 +1371,7 @@ static struct pci_driver snbep_uncore_pci_driver = { static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool reverse) { struct pci_dev *ubox_dev = NULL; - int i, bus, nodeid, segment; + int i, bus, nodeid, segment, die_id; struct pci2phy_map *map; int err = 0; u32 config = 0; @@ -1369,36 +1382,79 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool if (!ubox_dev) break; bus = ubox_dev->bus->number; - /* get the Node ID of the local register */ - err = pci_read_config_dword(ubox_dev, nodeid_loc, &config); - if (err) - break; - nodeid = config & NODE_ID_MASK; - /* get the Node ID mapping */ - err = pci_read_config_dword(ubox_dev, idmap_loc, &config); - if (err) - break; + /* + * The nodeid and idmap registers only contain enough + * information to handle 8 nodes. On systems with more + * than 8 nodes, we need to rely on NUMA information, + * filled in from BIOS supplied information, to determine + * the topology. + */ + if (nr_node_ids <= 8) { + /* get the Node ID of the local register */ + err = pci_read_config_dword(ubox_dev, nodeid_loc, &config); + if (err) + break; + nodeid = config & NODE_ID_MASK; + /* get the Node ID mapping */ + err = pci_read_config_dword(ubox_dev, idmap_loc, &config); + if (err) + break; - segment = pci_domain_nr(ubox_dev->bus); - raw_spin_lock(&pci2phy_map_lock); - map = __find_pci2phy_map(segment); - if (!map) { + segment = pci_domain_nr(ubox_dev->bus); + raw_spin_lock(&pci2phy_map_lock); + map = __find_pci2phy_map(segment); + if (!map) { + raw_spin_unlock(&pci2phy_map_lock); + err = -ENOMEM; + break; + } + + /* + * every three bits in the Node ID mapping register maps + * to a particular node. + */ + for (i = 0; i < 8; i++) { + if (nodeid == ((config >> (3 * i)) & 0x7)) { + if (topology_max_die_per_package() > 1) + die_id = i; + else + die_id = topology_phys_to_logical_pkg(i); + if (die_id < 0) + die_id = -ENODEV; + map->pbus_to_dieid[bus] = die_id; + break; + } + } raw_spin_unlock(&pci2phy_map_lock); - err = -ENOMEM; - break; - } + } else { + int node = pcibus_to_node(ubox_dev->bus); + int cpu; + + segment = pci_domain_nr(ubox_dev->bus); + raw_spin_lock(&pci2phy_map_lock); + map = __find_pci2phy_map(segment); + if (!map) { + raw_spin_unlock(&pci2phy_map_lock); + err = -ENOMEM; + break; + } - /* - * every three bits in the Node ID mapping register maps - * to a particular node. - */ - for (i = 0; i < 8; i++) { - if (nodeid == ((config >> (3 * i)) & 0x7)) { - map->pbus_to_physid[bus] = i; + die_id = -1; + for_each_cpu(cpu, cpumask_of_pcibus(ubox_dev->bus)) { + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->initialized && cpu_to_node(cpu) == node) { + map->pbus_to_dieid[bus] = die_id = c->logical_die_id; + break; + } + } + raw_spin_unlock(&pci2phy_map_lock); + + if (WARN_ON_ONCE(die_id == -1)) { + err = -EINVAL; break; } } - raw_spin_unlock(&pci2phy_map_lock); } if (!err) { @@ -1411,17 +1467,17 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool i = -1; if (reverse) { for (bus = 255; bus >= 0; bus--) { - if (map->pbus_to_physid[bus] >= 0) - i = map->pbus_to_physid[bus]; + if (map->pbus_to_dieid[bus] != -1) + i = map->pbus_to_dieid[bus]; else - map->pbus_to_physid[bus] = i; + map->pbus_to_dieid[bus] = i; } } else { for (bus = 0; bus <= 255; bus++) { - if (map->pbus_to_physid[bus] >= 0) - i = map->pbus_to_physid[bus]; + if (map->pbus_to_dieid[bus] != -1) + i = map->pbus_to_dieid[bus]; else - map->pbus_to_physid[bus] = i; + map->pbus_to_dieid[bus] = i; } } } @@ -4640,37 +4696,35 @@ int snr_uncore_pci_init(void) return 0; } -static struct pci_dev *snr_uncore_get_mc_dev(int id) +#define SNR_MC_DEVICE_ID 0x3451 + +static struct pci_dev *snr_uncore_get_mc_dev(unsigned int device, int id) { struct pci_dev *mc_dev = NULL; - int phys_id, pkg; + int pkg; while (1) { - mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3451, mc_dev); + mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, mc_dev); if (!mc_dev) break; - phys_id = uncore_pcibus_to_physid(mc_dev->bus); - if (phys_id < 0) - continue; - pkg = topology_phys_to_logical_pkg(phys_id); - if (pkg < 0) - continue; - else if (pkg == id) + pkg = uncore_pcibus_to_dieid(mc_dev->bus); + if (pkg == id) break; } return mc_dev; } -static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box, - unsigned int box_ctl, int mem_offset) +static int snr_uncore_mmio_map(struct intel_uncore_box *box, + unsigned int box_ctl, int mem_offset, + unsigned int device) { - struct pci_dev *pdev = snr_uncore_get_mc_dev(box->dieid); + struct pci_dev *pdev = snr_uncore_get_mc_dev(device, box->dieid); struct intel_uncore_type *type = box->pmu->type; resource_size_t addr; u32 pci_dword; if (!pdev) - return; + return -ENODEV; pci_read_config_dword(pdev, SNR_IMC_MMIO_BASE_OFFSET, &pci_dword); addr = ((resource_size_t)pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23; @@ -4683,16 +4737,25 @@ static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box, box->io_addr = ioremap(addr, type->mmio_map_size); if (!box->io_addr) { pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name); - return; + return -EINVAL; } - writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr); + return 0; +} + +static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box, + unsigned int box_ctl, int mem_offset, + unsigned int device) +{ + if (!snr_uncore_mmio_map(box, box_ctl, mem_offset, device)) + writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr); } static void snr_uncore_mmio_init_box(struct intel_uncore_box *box) { __snr_uncore_mmio_init_box(box, uncore_mmio_box_ctl(box), - SNR_IMC_MMIO_MEM0_OFFSET); + SNR_IMC_MMIO_MEM0_OFFSET, + SNR_MC_DEVICE_ID); } static void snr_uncore_mmio_disable_box(struct intel_uncore_box *box) @@ -5218,7 +5281,8 @@ static void icx_uncore_imc_init_box(struct intel_uncore_box *box) int mem_offset = (box->pmu->pmu_idx / ICX_NUMBER_IMC_CHN) * ICX_IMC_MEM_STRIDE + SNR_IMC_MMIO_MEM0_OFFSET; - __snr_uncore_mmio_init_box(box, box_ctl, mem_offset); + __snr_uncore_mmio_init_box(box, box_ctl, mem_offset, + SNR_MC_DEVICE_ID); } static struct intel_uncore_ops icx_uncore_mmio_ops = { @@ -5288,7 +5352,8 @@ static void icx_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) int mem_offset = box->pmu->pmu_idx * ICX_IMC_MEM_STRIDE + SNR_IMC_MMIO_MEM0_OFFSET; - __snr_uncore_mmio_init_box(box, uncore_mmio_box_ctl(box), mem_offset); + snr_uncore_mmio_map(box, uncore_mmio_box_ctl(box), + mem_offset, SNR_MC_DEVICE_ID); } static struct intel_uncore_ops icx_uncore_imc_freerunning_ops = { @@ -5322,3 +5387,507 @@ void icx_uncore_mmio_init(void) } /* end of ICX uncore support */ + +/* SPR uncore support */ + +static void spr_uncore_msr_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) + wrmsrl(reg1->reg, reg1->config); + + wrmsrl(hwc->config_base, hwc->config); +} + +static void spr_uncore_msr_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) + wrmsrl(reg1->reg, 0); + + wrmsrl(hwc->config_base, 0); +} + +static int spr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + bool tie_en = !!(event->hw.config & SPR_CHA_PMON_CTL_TID_EN); + struct intel_uncore_type *type = box->pmu->type; + + if (tie_en) { + reg1->reg = SPR_C0_MSR_PMON_BOX_FILTER0 + + HSWEP_CBO_MSR_OFFSET * type->box_ids[box->pmu->pmu_idx]; + reg1->config = event->attr.config1 & SPR_CHA_PMON_BOX_FILTER_TID; + reg1->idx = 0; + } + + return 0; +} + +static struct intel_uncore_ops spr_uncore_chabox_ops = { + .init_box = intel_generic_uncore_msr_init_box, + .disable_box = intel_generic_uncore_msr_disable_box, + .enable_box = intel_generic_uncore_msr_enable_box, + .disable_event = spr_uncore_msr_disable_event, + .enable_event = spr_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = spr_cha_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +static struct attribute *spr_uncore_cha_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask_ext4.attr, + &format_attr_tid_en2.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid5.attr, + NULL, +}; +static const struct attribute_group spr_uncore_chabox_format_group = { + .name = "format", + .attrs = spr_uncore_cha_formats_attr, +}; + +static ssize_t alias_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(dev); + char pmu_name[UNCORE_PMU_NAME_LEN]; + + uncore_get_alias_name(pmu_name, pmu); + return sysfs_emit(buf, "%s\n", pmu_name); +} + +static DEVICE_ATTR_RO(alias); + +static struct attribute *uncore_alias_attrs[] = { + &dev_attr_alias.attr, + NULL +}; + +ATTRIBUTE_GROUPS(uncore_alias); + +static struct intel_uncore_type spr_uncore_chabox = { + .name = "cha", + .event_mask = SPR_CHA_PMON_EVENT_MASK, + .event_mask_ext = SPR_RAW_EVENT_MASK_EXT, + .num_shared_regs = 1, + .constraints = skx_uncore_chabox_constraints, + .ops = &spr_uncore_chabox_ops, + .format_group = &spr_uncore_chabox_format_group, + .attr_update = uncore_alias_groups, +}; + +static struct intel_uncore_type spr_uncore_iio = { + .name = "iio", + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT, + .format_group = &snr_uncore_iio_format_group, + .attr_update = uncore_alias_groups, + .constraints = icx_uncore_iio_constraints, +}; + +static struct attribute *spr_uncore_raw_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask_ext4.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static const struct attribute_group spr_uncore_raw_format_group = { + .name = "format", + .attrs = spr_uncore_raw_formats_attr, +}; + +#define SPR_UNCORE_COMMON_FORMAT() \ + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, \ + .event_mask_ext = SPR_RAW_EVENT_MASK_EXT, \ + .format_group = &spr_uncore_raw_format_group, \ + .attr_update = uncore_alias_groups + +static struct intel_uncore_type spr_uncore_irp = { + SPR_UNCORE_COMMON_FORMAT(), + .name = "irp", + +}; + +static struct event_constraint spr_uncore_m2pcie_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x14, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type spr_uncore_m2pcie = { + SPR_UNCORE_COMMON_FORMAT(), + .name = "m2pcie", + .constraints = spr_uncore_m2pcie_constraints, +}; + +static struct intel_uncore_type spr_uncore_pcu = { + .name = "pcu", + .attr_update = uncore_alias_groups, +}; + +static void spr_uncore_mmio_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!box->io_addr) + return; + + if (uncore_pmc_fixed(hwc->idx)) + writel(SNBEP_PMON_CTL_EN, box->io_addr + hwc->config_base); + else + writel(hwc->config, box->io_addr + hwc->config_base); +} + +static struct intel_uncore_ops spr_uncore_mmio_ops = { + .init_box = intel_generic_uncore_mmio_init_box, + .exit_box = uncore_mmio_exit_box, + .disable_box = intel_generic_uncore_mmio_disable_box, + .enable_box = intel_generic_uncore_mmio_enable_box, + .disable_event = intel_generic_uncore_mmio_disable_event, + .enable_event = spr_uncore_mmio_enable_event, + .read_counter = uncore_mmio_read_counter, +}; + +static struct intel_uncore_type spr_uncore_imc = { + SPR_UNCORE_COMMON_FORMAT(), + .name = "imc", + .fixed_ctr_bits = 48, + .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, + .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, + .ops = &spr_uncore_mmio_ops, +}; + +static void spr_uncore_pci_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, hwc->config_base + 4, (u32)(hwc->config >> 32)); + pci_write_config_dword(pdev, hwc->config_base, (u32)hwc->config); +} + +static struct intel_uncore_ops spr_uncore_pci_ops = { + .init_box = intel_generic_uncore_pci_init_box, + .disable_box = intel_generic_uncore_pci_disable_box, + .enable_box = intel_generic_uncore_pci_enable_box, + .disable_event = intel_generic_uncore_pci_disable_event, + .enable_event = spr_uncore_pci_enable_event, + .read_counter = intel_generic_uncore_pci_read_counter, +}; + +#define SPR_UNCORE_PCI_COMMON_FORMAT() \ + SPR_UNCORE_COMMON_FORMAT(), \ + .ops = &spr_uncore_pci_ops + +static struct intel_uncore_type spr_uncore_m2m = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "m2m", +}; + +static struct intel_uncore_type spr_uncore_upi = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "upi", +}; + +static struct intel_uncore_type spr_uncore_m3upi = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "m3upi", + .constraints = icx_uncore_m3upi_constraints, +}; + +static struct intel_uncore_type spr_uncore_mdf = { + SPR_UNCORE_COMMON_FORMAT(), + .name = "mdf", +}; + +#define UNCORE_SPR_NUM_UNCORE_TYPES 12 +#define UNCORE_SPR_IIO 1 +#define UNCORE_SPR_IMC 6 + +static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = { + &spr_uncore_chabox, + &spr_uncore_iio, + &spr_uncore_irp, + &spr_uncore_m2pcie, + &spr_uncore_pcu, + NULL, + &spr_uncore_imc, + &spr_uncore_m2m, + &spr_uncore_upi, + &spr_uncore_m3upi, + NULL, + &spr_uncore_mdf, +}; + +enum perf_uncore_spr_iio_freerunning_type_id { + SPR_IIO_MSR_IOCLK, + SPR_IIO_MSR_BW_IN, + SPR_IIO_MSR_BW_OUT, + + SPR_IIO_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters spr_iio_freerunning[] = { + [SPR_IIO_MSR_IOCLK] = { 0x340e, 0x1, 0x10, 1, 48 }, + [SPR_IIO_MSR_BW_IN] = { 0x3800, 0x1, 0x10, 8, 48 }, + [SPR_IIO_MSR_BW_OUT] = { 0x3808, 0x1, 0x10, 8, 48 }, +}; + +static struct uncore_event_desc spr_uncore_iio_freerunning_events[] = { + /* Free-Running IIO CLOCKS Counter */ + INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), + /* Free-Running IIO BANDWIDTH IN Counters */ + INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), + /* Free-Running IIO BANDWIDTH OUT Counters */ + INTEL_UNCORE_EVENT_DESC(bw_out_port0, "event=0xff,umask=0x30"), + INTEL_UNCORE_EVENT_DESC(bw_out_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port1, "event=0xff,umask=0x31"), + INTEL_UNCORE_EVENT_DESC(bw_out_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port2, "event=0xff,umask=0x32"), + INTEL_UNCORE_EVENT_DESC(bw_out_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port2.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port3, "event=0xff,umask=0x33"), + INTEL_UNCORE_EVENT_DESC(bw_out_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port3.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port4, "event=0xff,umask=0x34"), + INTEL_UNCORE_EVENT_DESC(bw_out_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port4.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port5, "event=0xff,umask=0x35"), + INTEL_UNCORE_EVENT_DESC(bw_out_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port5.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port6, "event=0xff,umask=0x36"), + INTEL_UNCORE_EVENT_DESC(bw_out_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port6.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port7, "event=0xff,umask=0x37"), + INTEL_UNCORE_EVENT_DESC(bw_out_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port7.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type spr_uncore_iio_free_running = { + .name = "iio_free_running", + .num_counters = 17, + .num_freerunning_types = SPR_IIO_FREERUNNING_TYPE_MAX, + .freerunning = spr_iio_freerunning, + .ops = &skx_uncore_iio_freerunning_ops, + .event_descs = spr_uncore_iio_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +enum perf_uncore_spr_imc_freerunning_type_id { + SPR_IMC_DCLK, + SPR_IMC_PQ_CYCLES, + + SPR_IMC_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters spr_imc_freerunning[] = { + [SPR_IMC_DCLK] = { 0x22b0, 0x0, 0, 1, 48 }, + [SPR_IMC_PQ_CYCLES] = { 0x2318, 0x8, 0, 2, 48 }, +}; + +static struct uncore_event_desc spr_uncore_imc_freerunning_events[] = { + INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"), + + INTEL_UNCORE_EVENT_DESC(rpq_cycles, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(wpq_cycles, "event=0xff,umask=0x21"), + { /* end: all zeroes */ }, +}; + +#define SPR_MC_DEVICE_ID 0x3251 + +static void spr_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) +{ + int mem_offset = box->pmu->pmu_idx * ICX_IMC_MEM_STRIDE + SNR_IMC_MMIO_MEM0_OFFSET; + + snr_uncore_mmio_map(box, uncore_mmio_box_ctl(box), + mem_offset, SPR_MC_DEVICE_ID); +} + +static struct intel_uncore_ops spr_uncore_imc_freerunning_ops = { + .init_box = spr_uncore_imc_freerunning_init_box, + .exit_box = uncore_mmio_exit_box, + .read_counter = uncore_mmio_read_counter, + .hw_config = uncore_freerunning_hw_config, +}; + +static struct intel_uncore_type spr_uncore_imc_free_running = { + .name = "imc_free_running", + .num_counters = 3, + .mmio_map_size = SNR_IMC_MMIO_SIZE, + .num_freerunning_types = SPR_IMC_FREERUNNING_TYPE_MAX, + .freerunning = spr_imc_freerunning, + .ops = &spr_uncore_imc_freerunning_ops, + .event_descs = spr_uncore_imc_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +#define UNCORE_SPR_MSR_EXTRA_UNCORES 1 +#define UNCORE_SPR_MMIO_EXTRA_UNCORES 1 + +static struct intel_uncore_type *spr_msr_uncores[UNCORE_SPR_MSR_EXTRA_UNCORES] = { + &spr_uncore_iio_free_running, +}; + +static struct intel_uncore_type *spr_mmio_uncores[UNCORE_SPR_MMIO_EXTRA_UNCORES] = { + &spr_uncore_imc_free_running, +}; + +static void uncore_type_customized_copy(struct intel_uncore_type *to_type, + struct intel_uncore_type *from_type) +{ + if (!to_type || !from_type) + return; + + if (from_type->name) + to_type->name = from_type->name; + if (from_type->fixed_ctr_bits) + to_type->fixed_ctr_bits = from_type->fixed_ctr_bits; + if (from_type->event_mask) + to_type->event_mask = from_type->event_mask; + if (from_type->event_mask_ext) + to_type->event_mask_ext = from_type->event_mask_ext; + if (from_type->fixed_ctr) + to_type->fixed_ctr = from_type->fixed_ctr; + if (from_type->fixed_ctl) + to_type->fixed_ctl = from_type->fixed_ctl; + if (from_type->fixed_ctr_bits) + to_type->fixed_ctr_bits = from_type->fixed_ctr_bits; + if (from_type->num_shared_regs) + to_type->num_shared_regs = from_type->num_shared_regs; + if (from_type->constraints) + to_type->constraints = from_type->constraints; + if (from_type->ops) + to_type->ops = from_type->ops; + if (from_type->event_descs) + to_type->event_descs = from_type->event_descs; + if (from_type->format_group) + to_type->format_group = from_type->format_group; + if (from_type->attr_update) + to_type->attr_update = from_type->attr_update; +} + +static struct intel_uncore_type ** +uncore_get_uncores(enum uncore_access_type type_id, int num_extra, + struct intel_uncore_type **extra) +{ + struct intel_uncore_type **types, **start_types; + int i; + + start_types = types = intel_uncore_generic_init_uncores(type_id, num_extra); + + /* Only copy the customized features */ + for (; *types; types++) { + if ((*types)->type_id >= UNCORE_SPR_NUM_UNCORE_TYPES) + continue; + uncore_type_customized_copy(*types, spr_uncores[(*types)->type_id]); + } + + for (i = 0; i < num_extra; i++, types++) + *types = extra[i]; + + return start_types; +} + +static struct intel_uncore_type * +uncore_find_type_by_id(struct intel_uncore_type **types, int type_id) +{ + for (; *types; types++) { + if (type_id == (*types)->type_id) + return *types; + } + + return NULL; +} + +static int uncore_type_max_boxes(struct intel_uncore_type **types, + int type_id) +{ + struct intel_uncore_type *type; + int i, max = 0; + + type = uncore_find_type_by_id(types, type_id); + if (!type) + return 0; + + for (i = 0; i < type->num_boxes; i++) { + if (type->box_ids[i] > max) + max = type->box_ids[i]; + } + + return max + 1; +} + +void spr_uncore_cpu_init(void) +{ + uncore_msr_uncores = uncore_get_uncores(UNCORE_ACCESS_MSR, + UNCORE_SPR_MSR_EXTRA_UNCORES, + spr_msr_uncores); + + spr_uncore_iio_free_running.num_boxes = uncore_type_max_boxes(uncore_msr_uncores, UNCORE_SPR_IIO); +} + +int spr_uncore_pci_init(void) +{ + uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI, 0, NULL); + return 0; +} + +void spr_uncore_mmio_init(void) +{ + int ret = snbep_pci2phy_map_init(0x3250, SKX_CPUNODEID, SKX_GIDNIDMAP, true); + + if (ret) + uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, 0, NULL); + else { + uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, + UNCORE_SPR_MMIO_EXTRA_UNCORES, + spr_mmio_uncores); + + spr_uncore_imc_free_running.num_boxes = uncore_type_max_boxes(uncore_mmio_uncores, UNCORE_SPR_IMC) / 2; + } +} + +/* end of SPR uncore support */ diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 9abe842dbd843a2d569a10e0131c937282ed1b46..f56af54256c80782ee29d1e3b4dab5680e3f0d44 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -32,7 +32,9 @@ * _EP - 2 socket server parts * _EX - 4+ socket server parts * - * The #define line may optionally include a comment including platform names. + * The #define line may optionally include a comment including platform or core + * names. An exception is made for kabylake where steppings seem to have gotten + * their own names :-( */ /* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */ @@ -69,35 +71,40 @@ #define INTEL_FAM6_BROADWELL_X 0x4F #define INTEL_FAM6_BROADWELL_D 0x56 -#define INTEL_FAM6_SKYLAKE_L 0x4E -#define INTEL_FAM6_SKYLAKE 0x5E -#define INTEL_FAM6_SKYLAKE_X 0x55 -#define INTEL_FAM6_KABYLAKE_L 0x8E -#define INTEL_FAM6_KABYLAKE 0x9E +#define INTEL_FAM6_SKYLAKE_L 0x4E /* Sky Lake */ +#define INTEL_FAM6_SKYLAKE 0x5E /* Sky Lake */ +#define INTEL_FAM6_SKYLAKE_X 0x55 /* Sky Lake */ -#define INTEL_FAM6_CANNONLAKE_L 0x66 +#define INTEL_FAM6_KABYLAKE_L 0x8E /* Sky Lake */ +/* AMBERLAKE_L 0x8E Sky Lake -- s: 9 */ +/* COFFEELAKE_L 0x8E Sky Lake -- s: 10 */ +/* WHISKEYLAKE_L 0x8E Sky Lake -- s: 11,12 */ -#define INTEL_FAM6_ICELAKE_X 0x6A -#define INTEL_FAM6_ICELAKE_D 0x6C -#define INTEL_FAM6_ICELAKE 0x7D -#define INTEL_FAM6_ICELAKE_L 0x7E -#define INTEL_FAM6_ICELAKE_NNPI 0x9D +#define INTEL_FAM6_KABYLAKE 0x9E /* Sky Lake */ +/* COFFEELAKE 0x9E Sky Lake -- s: 10-13 */ -#define INTEL_FAM6_TIGERLAKE_L 0x8C -#define INTEL_FAM6_TIGERLAKE 0x8D +#define INTEL_FAM6_COMETLAKE 0xA5 /* Sky Lake */ +#define INTEL_FAM6_COMETLAKE_L 0xA6 /* Sky Lake */ -#define INTEL_FAM6_COMETLAKE 0xA5 -#define INTEL_FAM6_COMETLAKE_L 0xA6 +#define INTEL_FAM6_CANNONLAKE_L 0x66 /* Palm Cove */ -#define INTEL_FAM6_ROCKETLAKE 0xA7 +#define INTEL_FAM6_ICELAKE_X 0x6A /* Sunny Cove */ +#define INTEL_FAM6_ICELAKE_D 0x6C /* Sunny Cove */ +#define INTEL_FAM6_ICELAKE 0x7D /* Sunny Cove */ +#define INTEL_FAM6_ICELAKE_L 0x7E /* Sunny Cove */ +#define INTEL_FAM6_ICELAKE_NNPI 0x9D /* Sunny Cove */ -#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F +#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */ -/* Hybrid Core/Atom Processors */ +#define INTEL_FAM6_ROCKETLAKE 0xA7 /* Cypress Cove */ -#define INTEL_FAM6_LAKEFIELD 0x8A -#define INTEL_FAM6_ALDERLAKE 0x97 -#define INTEL_FAM6_ALDERLAKE_L 0x9A +#define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */ +#define INTEL_FAM6_TIGERLAKE 0x8D /* Willow Cove */ + +#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */ + +#define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ +#define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ /* "Small Core" Processors (Atom) */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 9226ec130ba342e6c3563a16586b80bcb6cbd383..bae6cf6402629c5bfc16febad04ccee600b70920 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -307,7 +307,7 @@ void css_task_iter_end(struct css_task_iter *it); * Inline functions. */ -static inline u64 cgroup_id(struct cgroup *cgrp) +static inline u64 cgroup_id(const struct cgroup *cgrp) { return cgrp->kn->id; } @@ -701,7 +701,7 @@ void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen); struct cgroup_subsys_state; struct cgroup; -static inline u64 cgroup_id(struct cgroup *cgrp) { return 1; } +static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; } static inline void css_get(struct cgroup_subsys_state *css) {} static inline void css_put(struct cgroup_subsys_state *css) {} static inline int cgroup_attach_task_all(struct task_struct *from, diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index e0b300de8f3fa2078eb90a4bcb2acfae6d5c9a90..d31ecaf4fdd3db7bac101d6b5fa89ab379993454 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -141,12 +141,18 @@ static inline void rb_insert_color_cached(struct rb_node *node, rb_insert_color(node, &root->rb_root); } -static inline void rb_erase_cached(struct rb_node *node, - struct rb_root_cached *root) + +static inline struct rb_node * +rb_erase_cached(struct rb_node *node, struct rb_root_cached *root) { + struct rb_node *leftmost = NULL; + if (root->rb_leftmost == node) - root->rb_leftmost = rb_next(node); + leftmost = root->rb_leftmost = rb_next(node); + rb_erase(node, &root->rb_root); + + return leftmost; } static inline void rb_replace_node_cached(struct rb_node *victim, @@ -179,8 +185,10 @@ static inline void rb_replace_node_cached(struct rb_node *victim, * @node: node to insert * @tree: leftmost cached tree to insert @node into * @less: operator defining the (partial) node order + * + * Returns @node when it is the new leftmost, or NULL. */ -static __always_inline void +static __always_inline struct rb_node * rb_add_cached(struct rb_node *node, struct rb_root_cached *tree, bool (*less)(struct rb_node *, const struct rb_node *)) { @@ -200,6 +208,8 @@ rb_add_cached(struct rb_node *node, struct rb_root_cached *tree, rb_link_node(node, parent, link); rb_insert_color_cached(node, tree, leftmost); + + return leftmost ? node : NULL; } /** diff --git a/kernel/events/core.c b/kernel/events/core.c index 68dc8a8e7990a97c675b547f593056630e46b59d..cba6aa2b9044ee5f9681f56ce13a998e27ec15fe 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1594,50 +1594,91 @@ static void perf_event_groups_init(struct perf_event_groups *groups) groups->index = 0; } +static inline struct cgroup *event_cgroup(const struct perf_event *event) +{ + struct cgroup *cgroup = NULL; + +#ifdef CONFIG_CGROUP_PERF + if (event->cgrp) + cgroup = event->cgrp->css.cgroup; +#endif + + return cgroup; +} + /* * Compare function for event groups; * * Implements complex key that first sorts by CPU and then by virtual index * which provides ordering when rotating groups for the same CPU. */ -static bool -perf_event_groups_less(struct perf_event *left, struct perf_event *right) +static __always_inline int +perf_event_groups_cmp(const int left_cpu, const struct cgroup *left_cgroup, + const u64 left_group_index, const struct perf_event *right) { - if (left->cpu < right->cpu) - return true; - if (left->cpu > right->cpu) - return false; + if (left_cpu < right->cpu) + return -1; + if (left_cpu > right->cpu) + return 1; #ifdef CONFIG_CGROUP_PERF - if (left->cgrp != right->cgrp) { - if (!left->cgrp || !left->cgrp->css.cgroup) { - /* - * Left has no cgroup but right does, no cgroups come - * first. - */ - return true; - } - if (!right->cgrp || !right->cgrp->css.cgroup) { - /* - * Right has no cgroup but left does, no cgroups come - * first. - */ - return false; - } - /* Two dissimilar cgroups, order by id. */ - if (left->cgrp->css.cgroup->kn->id < right->cgrp->css.cgroup->kn->id) - return true; + { + const struct cgroup *right_cgroup = event_cgroup(right); - return false; + if (left_cgroup != right_cgroup) { + if (!left_cgroup) { + /* + * Left has no cgroup but right does, no + * cgroups come first. + */ + return -1; + } + if (!right_cgroup) { + /* + * Right has no cgroup but left does, no + * cgroups come first. + */ + return 1; + } + /* Two dissimilar cgroups, order by id. */ + if (cgroup_id(left_cgroup) < cgroup_id(right_cgroup)) + return -1; + + return 1; + } } #endif - if (left->group_index < right->group_index) - return true; - if (left->group_index > right->group_index) - return false; + if (left_group_index < right->group_index) + return -1; + if (left_group_index > right->group_index) + return 1; - return false; + return 0; +} + +#define __node_2_pe(node) \ + rb_entry((node), struct perf_event, group_node) + +static inline bool __group_less(struct rb_node *a, const struct rb_node *b) +{ + struct perf_event *e = __node_2_pe(a); + return perf_event_groups_cmp(e->cpu, event_cgroup(e), e->group_index, + __node_2_pe(b)) < 0; +} + +struct __group_key { + int cpu; + struct cgroup *cgroup; +}; + +static inline int __group_cmp(const void *key, const struct rb_node *node) +{ + const struct __group_key *a = key; + const struct perf_event *b = __node_2_pe(node); + + /* partial/subtree match: @cpu, @cgroup; ignore: @group_index */ + return perf_event_groups_cmp(a->cpu, a->cgroup, b->group_index, b); } /* @@ -1649,27 +1690,9 @@ static void perf_event_groups_insert(struct perf_event_groups *groups, struct perf_event *event) { - struct perf_event *node_event; - struct rb_node *parent; - struct rb_node **node; - event->group_index = ++groups->index; - node = &groups->tree.rb_node; - parent = *node; - - while (*node) { - parent = *node; - node_event = container_of(*node, struct perf_event, group_node); - - if (perf_event_groups_less(event, node_event)) - node = &parent->rb_left; - else - node = &parent->rb_right; - } - - rb_link_node(&event->group_node, parent, node); - rb_insert_color(&event->group_node, &groups->tree); + rb_add(&event->group_node, &groups->tree, __group_less); } /* @@ -1717,45 +1740,17 @@ static struct perf_event * perf_event_groups_first(struct perf_event_groups *groups, int cpu, struct cgroup *cgrp) { - struct perf_event *node_event = NULL, *match = NULL; - struct rb_node *node = groups->tree.rb_node; -#ifdef CONFIG_CGROUP_PERF - u64 node_cgrp_id, cgrp_id = 0; - - if (cgrp) - cgrp_id = cgrp->kn->id; -#endif - - while (node) { - node_event = container_of(node, struct perf_event, group_node); - - if (cpu < node_event->cpu) { - node = node->rb_left; - continue; - } - if (cpu > node_event->cpu) { - node = node->rb_right; - continue; - } -#ifdef CONFIG_CGROUP_PERF - node_cgrp_id = 0; - if (node_event->cgrp && node_event->cgrp->css.cgroup) - node_cgrp_id = node_event->cgrp->css.cgroup->kn->id; + struct __group_key key = { + .cpu = cpu, + .cgroup = cgrp, + }; + struct rb_node *node; - if (cgrp_id < node_cgrp_id) { - node = node->rb_left; - continue; - } - if (cgrp_id > node_cgrp_id) { - node = node->rb_right; - continue; - } -#endif - match = node_event; - node = node->rb_left; - } + node = rb_find_first(&key, &groups->tree, __group_cmp); + if (node) + return __node_2_pe(node); - return match; + return NULL; } /* @@ -1764,27 +1759,17 @@ perf_event_groups_first(struct perf_event_groups *groups, int cpu, static struct perf_event * perf_event_groups_next(struct perf_event *event) { - struct perf_event *next; -#ifdef CONFIG_CGROUP_PERF - u64 curr_cgrp_id = 0; - u64 next_cgrp_id = 0; -#endif - - next = rb_entry_safe(rb_next(&event->group_node), typeof(*event), group_node); - if (next == NULL || next->cpu != event->cpu) - return NULL; - -#ifdef CONFIG_CGROUP_PERF - if (event->cgrp && event->cgrp->css.cgroup) - curr_cgrp_id = event->cgrp->css.cgroup->kn->id; + struct __group_key key = { + .cpu = event->cpu, + .cgroup = event_cgroup(event), + }; + struct rb_node *next; - if (next->cgrp && next->cgrp->css.cgroup) - next_cgrp_id = next->cgrp->css.cgroup->kn->id; + next = rb_next_match(&key, &event->group_node, __group_cmp); + if (next) + return __node_2_pe(next); - if (curr_cgrp_id != next_cgrp_id) - return NULL; -#endif - return next; + return NULL; } /* diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index e1bbb3b92921d8eb084d2844cd2ae85ebaf5e14f..8546113763af23e8bdf9bea0dd9ac51b17a2abce 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -613,41 +613,56 @@ static void put_uprobe(struct uprobe *uprobe) } } -static int match_uprobe(struct uprobe *l, struct uprobe *r) +static __always_inline +int uprobe_cmp(const struct inode *l_inode, const loff_t l_offset, + const struct uprobe *r) { - if (l->inode < r->inode) + if (l_inode < r->inode) return -1; - if (l->inode > r->inode) + if (l_inode > r->inode) return 1; - if (l->offset < r->offset) + if (l_offset < r->offset) return -1; - if (l->offset > r->offset) + if (l_offset > r->offset) return 1; return 0; } +#define __node_2_uprobe(node) \ + rb_entry((node), struct uprobe, rb_node) + +struct __uprobe_key { + struct inode *inode; + loff_t offset; +}; + +static inline int __uprobe_cmp_key(const void *key, const struct rb_node *b) +{ + const struct __uprobe_key *a = key; + return uprobe_cmp(a->inode, a->offset, __node_2_uprobe(b)); +} + +static inline int __uprobe_cmp(struct rb_node *a, const struct rb_node *b) +{ + struct uprobe *u = __node_2_uprobe(a); + return uprobe_cmp(u->inode, u->offset, __node_2_uprobe(b)); +} + static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset) { - struct uprobe u = { .inode = inode, .offset = offset }; - struct rb_node *n = uprobes_tree.rb_node; - struct uprobe *uprobe; - int match; + struct __uprobe_key key = { + .inode = inode, + .offset = offset, + }; + struct rb_node *node = rb_find(&key, &uprobes_tree, __uprobe_cmp_key); - while (n) { - uprobe = rb_entry(n, struct uprobe, rb_node); - match = match_uprobe(&u, uprobe); - if (!match) - return get_uprobe(uprobe); + if (node) + return get_uprobe(__node_2_uprobe(node)); - if (match < 0) - n = n->rb_left; - else - n = n->rb_right; - } return NULL; } @@ -668,32 +683,15 @@ static struct uprobe *find_uprobe(struct inode *inode, loff_t offset) static struct uprobe *__insert_uprobe(struct uprobe *uprobe) { - struct rb_node **p = &uprobes_tree.rb_node; - struct rb_node *parent = NULL; - struct uprobe *u; - int match; + struct rb_node *node; - while (*p) { - parent = *p; - u = rb_entry(parent, struct uprobe, rb_node); - match = match_uprobe(uprobe, u); - if (!match) - return get_uprobe(u); + node = rb_find_add(&uprobe->rb_node, &uprobes_tree, __uprobe_cmp); + if (node) + return get_uprobe(__node_2_uprobe(node)); - if (match < 0) - p = &parent->rb_left; - else - p = &parent->rb_right; - - } - - u = NULL; - rb_link_node(&uprobe->rb_node, parent, p); - rb_insert_color(&uprobe->rb_node, &uprobes_tree); /* get access + creation ref */ refcount_set(&uprobe->ref, 2); - - return u; + return NULL; } /* diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index f00dd928fc7113c6a31aaa05d6214c08f3fee201..a82d1176e7c6a9d7b0edb5434b986ac568dff623 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -267,27 +267,18 @@ rt_mutex_waiter_equal(struct rt_mutex_waiter *left, return 1; } +#define __node_2_waiter(node) \ + rb_entry((node), struct rt_mutex_waiter, tree_entry) + +static inline bool __waiter_less(struct rb_node *a, const struct rb_node *b) +{ + return rt_mutex_waiter_less(__node_2_waiter(a), __node_2_waiter(b)); +} + static void rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) { - struct rb_node **link = &lock->waiters.rb_root.rb_node; - struct rb_node *parent = NULL; - struct rt_mutex_waiter *entry; - bool leftmost = true; - - while (*link) { - parent = *link; - entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry); - if (rt_mutex_waiter_less(waiter, entry)) { - link = &parent->rb_left; - } else { - link = &parent->rb_right; - leftmost = false; - } - } - - rb_link_node(&waiter->tree_entry, parent, link); - rb_insert_color_cached(&waiter->tree_entry, &lock->waiters, leftmost); + rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less); } static void @@ -300,27 +291,18 @@ rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) RB_CLEAR_NODE(&waiter->tree_entry); } +#define __node_2_pi_waiter(node) \ + rb_entry((node), struct rt_mutex_waiter, pi_tree_entry) + +static inline bool __pi_waiter_less(struct rb_node *a, const struct rb_node *b) +{ + return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b)); +} + static void rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) { - struct rb_node **link = &task->pi_waiters.rb_root.rb_node; - struct rb_node *parent = NULL; - struct rt_mutex_waiter *entry; - bool leftmost = true; - - while (*link) { - parent = *link; - entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry); - if (rt_mutex_waiter_less(waiter, entry)) { - link = &parent->rb_left; - } else { - link = &parent->rb_right; - leftmost = false; - } - } - - rb_link_node(&waiter->pi_tree_entry, parent, link); - rb_insert_color_cached(&waiter->pi_tree_entry, &task->pi_waiters, leftmost); + rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less); } static void diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index cb487d7d33e159924d5fa9a7a76bdc3cac496f39..3c25d92eb3a8d1a45411081b5dfb35d56613a3de 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -501,58 +501,44 @@ static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) update_dl_migration(dl_rq); } +#define __node_2_pdl(node) \ + rb_entry((node), struct task_struct, pushable_dl_tasks) + +static inline bool __pushable_less(struct rb_node *a, const struct rb_node *b) +{ + return dl_entity_preempt(&__node_2_pdl(a)->dl, &__node_2_pdl(b)->dl); +} + /* * The list of pushable -deadline task is not a plist, like in * sched_rt.c, it is an rb-tree with tasks ordered by deadline. */ static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) { - struct dl_rq *dl_rq = &rq->dl; - struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_root.rb_node; - struct rb_node *parent = NULL; - struct task_struct *entry; - bool leftmost = true; + struct rb_node *leftmost; BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks)); - while (*link) { - parent = *link; - entry = rb_entry(parent, struct task_struct, - pushable_dl_tasks); - if (dl_entity_preempt(&p->dl, &entry->dl)) - link = &parent->rb_left; - else { - link = &parent->rb_right; - leftmost = false; - } - } - + leftmost = rb_add_cached(&p->pushable_dl_tasks, + &rq->dl.pushable_dl_tasks_root, + __pushable_less); if (leftmost) - dl_rq->earliest_dl.next = p->dl.deadline; - - rb_link_node(&p->pushable_dl_tasks, parent, link); - rb_insert_color_cached(&p->pushable_dl_tasks, - &dl_rq->pushable_dl_tasks_root, leftmost); + rq->dl.earliest_dl.next = p->dl.deadline; } static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p) { struct dl_rq *dl_rq = &rq->dl; + struct rb_root_cached *root = &dl_rq->pushable_dl_tasks_root; + struct rb_node *leftmost; if (RB_EMPTY_NODE(&p->pushable_dl_tasks)) return; - if (dl_rq->pushable_dl_tasks_root.rb_leftmost == &p->pushable_dl_tasks) { - struct rb_node *next_node; - - next_node = rb_next(&p->pushable_dl_tasks); - if (next_node) { - dl_rq->earliest_dl.next = rb_entry(next_node, - struct task_struct, pushable_dl_tasks)->dl.deadline; - } - } + leftmost = rb_erase_cached(&p->pushable_dl_tasks, root); + if (leftmost) + dl_rq->earliest_dl.next = __node_2_pdl(leftmost)->dl.deadline; - rb_erase_cached(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); RB_CLEAR_NODE(&p->pushable_dl_tasks); } @@ -1459,29 +1445,21 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) dec_dl_migration(dl_se, dl_rq); } +#define __node_2_dle(node) \ + rb_entry((node), struct sched_dl_entity, rb_node) + +static inline bool __dl_less(struct rb_node *a, const struct rb_node *b) +{ + return dl_time_before(__node_2_dle(a)->deadline, __node_2_dle(b)->deadline); +} + static void __enqueue_dl_entity(struct sched_dl_entity *dl_se) { struct dl_rq *dl_rq = dl_rq_of_se(dl_se); - struct rb_node **link = &dl_rq->root.rb_root.rb_node; - struct rb_node *parent = NULL; - struct sched_dl_entity *entry; - int leftmost = 1; BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node)); - while (*link) { - parent = *link; - entry = rb_entry(parent, struct sched_dl_entity, rb_node); - if (dl_time_before(dl_se->deadline, entry->deadline)) - link = &parent->rb_left; - else { - link = &parent->rb_right; - leftmost = 0; - } - } - - rb_link_node(&dl_se->rb_node, parent, link); - rb_insert_color_cached(&dl_se->rb_node, &dl_rq->root, leftmost); + rb_add_cached(&dl_se->rb_node, &dl_rq->root, __dl_less); inc_dl_tasks(dl_se, dl_rq); } @@ -1494,6 +1472,7 @@ static void __dequeue_dl_entity(struct sched_dl_entity *dl_se) return; rb_erase_cached(&dl_se->rb_node, &dl_rq->root); + RB_CLEAR_NODE(&dl_se->rb_node); dec_dl_tasks(dl_se, dl_rq); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 20b482688e3a6132fcb1319309db910594489397..fe2f527c71edd776f7b934785cb22fbf2bdedea4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -505,12 +505,15 @@ static inline u64 min_vruntime(u64 min_vruntime, u64 vruntime) return min_vruntime; } -static inline int entity_before(struct sched_entity *a, +static inline bool entity_before(struct sched_entity *a, struct sched_entity *b) { return (s64)(a->vruntime - b->vruntime) < 0; } +#define __node_2_se(node) \ + rb_entry((node), struct sched_entity, run_node) + static void update_min_vruntime(struct cfs_rq *cfs_rq) { struct sched_entity *curr = cfs_rq->curr; @@ -526,8 +529,7 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq) } if (leftmost) { /* non-empty tree */ - struct sched_entity *se; - se = rb_entry(leftmost, struct sched_entity, run_node); + struct sched_entity *se = __node_2_se(leftmost); if (!curr) vruntime = se->vruntime; @@ -543,37 +545,17 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq) #endif } +static inline bool __entity_less(struct rb_node *a, const struct rb_node *b) +{ + return entity_before(__node_2_se(a), __node_2_se(b)); +} + /* * Enqueue an entity into the rb-tree: */ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) { - struct rb_node **link = &cfs_rq->tasks_timeline.rb_root.rb_node; - struct rb_node *parent = NULL; - struct sched_entity *entry; - bool leftmost = true; - - /* - * Find the right place in the rbtree: - */ - while (*link) { - parent = *link; - entry = rb_entry(parent, struct sched_entity, run_node); - /* - * We dont care about collisions. Nodes with - * the same key stay together. - */ - if (entity_before(se, entry)) { - link = &parent->rb_left; - } else { - link = &parent->rb_right; - leftmost = false; - } - } - - rb_link_node(&se->run_node, parent, link); - rb_insert_color_cached(&se->run_node, - &cfs_rq->tasks_timeline, leftmost); + rb_add_cached(&se->run_node, &cfs_rq->tasks_timeline, __entity_less); } static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -588,7 +570,7 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) if (!left) return NULL; - return rb_entry(left, struct sched_entity, run_node); + return __node_2_se(left); } static struct sched_entity *__pick_next_entity(struct sched_entity *se) @@ -598,7 +580,7 @@ static struct sched_entity *__pick_next_entity(struct sched_entity *se) if (!next) return NULL; - return rb_entry(next, struct sched_entity, run_node); + return __node_2_se(next); } #ifdef CONFIG_SCHED_DEBUG @@ -609,7 +591,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) if (!last) return NULL; - return rb_entry(last, struct sched_entity, run_node); + return __node_2_se(last); } /************************************************************** diff --git a/lib/timerqueue.c b/lib/timerqueue.c index c5271096459306fb22f63699422fa4b7495783ab..cdb9c7658478f0505e2d1bdc8e6ede6a812fa958 100644 --- a/lib/timerqueue.c +++ b/lib/timerqueue.c @@ -14,6 +14,14 @@ #include #include +#define __node_2_tq(_n) \ + rb_entry((_n), struct timerqueue_node, node) + +static inline bool __timerqueue_less(struct rb_node *a, const struct rb_node *b) +{ + return __node_2_tq(a)->expires < __node_2_tq(b)->expires; +} + /** * timerqueue_add - Adds timer to timerqueue. * @@ -26,28 +34,10 @@ */ bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) { - struct rb_node **p = &head->rb_root.rb_root.rb_node; - struct rb_node *parent = NULL; - struct timerqueue_node *ptr; - bool leftmost = true; - /* Make sure we don't add nodes that are already added */ WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node)); - while (*p) { - parent = *p; - ptr = rb_entry(parent, struct timerqueue_node, node); - if (node->expires < ptr->expires) { - p = &(*p)->rb_left; - } else { - p = &(*p)->rb_right; - leftmost = false; - } - } - rb_link_node(&node->node, parent, p); - rb_insert_color_cached(&node->node, &head->rb_root, leftmost); - - return leftmost; + return rb_add_cached(&node->node, &head->rb_root, __timerqueue_less); } EXPORT_SYMBOL_GPL(timerqueue_add); diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 21dae7d7f2966549aa62547f2166297553ce51ed..a0cfab213b5fbf11de3fbdee8f68a81261458b49 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -325,6 +325,8 @@ static int read_symbols(struct elf *elf) { struct section *symtab, *symtab_shndx, *sec; struct symbol *sym, *pfunc; + struct list_head *entry; + struct rb_node *pnode; int symbols_nr, i; char *coldstr; Elf_Data *shndx_data = NULL; @@ -352,6 +354,7 @@ static int read_symbols(struct elf *elf) return -1; } memset(sym, 0, sizeof(*sym)); + sym->alias = sym; sym->idx = i; @@ -368,6 +371,9 @@ static int read_symbols(struct elf *elf) goto err; } + sym->type = GELF_ST_TYPE(sym->sym.st_info); + sym->bind = GELF_ST_BIND(sym->sym.st_info); + if ((sym->sym.st_shndx > SHN_UNDEF && sym->sym.st_shndx < SHN_LORESERVE) || (shndx_data && sym->sym.st_shndx == SHN_XINDEX)) { @@ -380,14 +386,32 @@ static int read_symbols(struct elf *elf) sym->name); goto err; } - if (GELF_ST_TYPE(sym->sym.st_info) == STT_SECTION) { + if (sym->type == STT_SECTION) { sym->name = sym->sec->name; sym->sec->sym = sym; } } else sym->sec = find_section_by_index(elf, 0); - elf_add_symbol(elf, sym); + sym->offset = sym->sym.st_value; + sym->len = sym->sym.st_size; + + rb_add(&sym->node, &sym->sec->symbol_tree, symbol_to_offset); + pnode = rb_prev(&sym->node); + if (pnode) + entry = &rb_entry(pnode, struct symbol, node)->list; + else + entry = &sym->sec->symbol_list; + list_add(&sym->list, entry); + elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx); + elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name)); + + /* + * Don't store empty STT_NOTYPE symbols in the rbtree. They + * can exist within a function, confusing the sorting. + */ + if (!sym->len) + rb_erase(&sym->node, &sym->sec->symbol_tree); } if (stats)