diff --git a/Documentation/admin-guide/perf/dwc_pcie_pmu.rst b/Documentation/admin-guide/perf/dwc_pcie_pmu.rst new file mode 100644 index 0000000000000000000000000000000000000000..d47cd229d7106f26ac47b85b66e6fc28a95479ad --- /dev/null +++ b/Documentation/admin-guide/perf/dwc_pcie_pmu.rst @@ -0,0 +1,94 @@ +====================================================================== +Synopsys DesignWare Cores (DWC) PCIe Performance Monitoring Unit (PMU) +====================================================================== + +DesignWare Cores (DWC) PCIe PMU +=============================== + +The PMU is a PCIe configuration space register block provided by each PCIe Root +Port in a Vendor-Specific Extended Capability named RAS D.E.S (Debug, Error +injection, and Statistics). + +As the name indicates, the RAS DES capability supports system level +debugging, AER error injection, and collection of statistics. To facilitate +collection of statistics, Synopsys DesignWare Cores PCIe controller +provides the following two features: + +- one 64-bit counter for Time Based Analysis (RX/TX data throughput and + time spent in each low-power LTSSM state) and +- one 32-bit counter for Event Counting (error and non-error events for + a specified lane) + +Note: There is no interrupt for counter overflow. + +Time Based Analysis +------------------- + +Using this feature you can obtain information regarding RX/TX data +throughput and time spent in each low-power LTSSM state by the controller. +The PMU measures data in two categories: + +- Group#0: Percentage of time the controller stays in LTSSM states. +- Group#1: Amount of data processed (Units of 16 bytes). + +Lane Event counters +------------------- + +Using this feature you can obtain Error and Non-Error information in +specific lane by the controller. The PMU event is selected by all of: + +- Group i +- Event j within the Group i +- Lane k + +Some of the events only exist for specific configurations. + +DesignWare Cores (DWC) PCIe PMU Driver +======================================= + +This driver adds PMU devices for each PCIe Root Port named based on the BDF of +the Root Port. For example, + + 30:03.0 PCI bridge: Device 1ded:8000 (rev 01) + +the PMU device name for this Root Port is dwc_rootport_3018. + +The DWC PCIe PMU driver registers a perf PMU driver, which provides +description of available events and configuration options in sysfs, see +/sys/bus/event_source/devices/dwc_rootport_{bdf}. + +The "format" directory describes format of the config fields of the +perf_event_attr structure. The "events" directory provides configuration +templates for all documented events. For example, +"Rx_PCIe_TLP_Data_Payload" is an equivalent of "eventid=0x22,type=0x1". + +The "perf list" command shall list the available events from sysfs, e.g.:: + + $# perf list | grep dwc_rootport + <...> + dwc_rootport_3018/Rx_PCIe_TLP_Data_Payload/ [Kernel PMU event] + <...> + dwc_rootport_3018/rx_memory_read,lane=?/ [Kernel PMU event] + +Time Based Analysis Event Usage +------------------------------- + +Example usage of counting PCIe RX TLP data payload (Units of bytes):: + + $# perf stat -a -e dwc_rootport_3018/Rx_PCIe_TLP_Data_Payload/ + +The average RX/TX bandwidth can be calculated using the following formula: + + PCIe RX Bandwidth = Rx_PCIe_TLP_Data_Payload / Measure_Time_Window + PCIe TX Bandwidth = Tx_PCIe_TLP_Data_Payload / Measure_Time_Window + +Lane Event Usage +------------------------------- + +Each lane has the same event set and to avoid generating a list of hundreds +of events, the user need to specify the lane ID explicitly, e.g.:: + + $# perf stat -a -e dwc_rootport_3018/rx_memory_read,lane=4/ + +The driver does not support sampling, therefore "perf record" will not +work. Per-task (without "-a") perf sessions are not supported. diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst index f60be04e4e336ecfc3d00eca73b609c3d20a909d..6bc7739fddb5e0b95a83d2f15edcf8ce1a11200d 100644 --- a/Documentation/admin-guide/perf/index.rst +++ b/Documentation/admin-guide/perf/index.rst @@ -19,6 +19,7 @@ Performance monitor support arm_dsu_pmu thunderx2-pmu alibaba_pmu + dwc_pcie_pmu nvidia-pmu meson-ddr-pmu cxl diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index 9d316fdc6f9a55a5055f793197117424800ccb01..a155519a862f83e6e4bec2dea2dec9fbc27eb141 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -11,8 +11,6 @@ #include /* PCIe device related definition. */ -#define PCI_VENDOR_ID_ALIBABA 0x1ded - #define ERDMA_PCI_WIDTH 64 #define ERDMA_FUNC_BAR 0 #define ERDMA_MISX_BAR 2 diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 6554a2e89d3612dcb8e232802269a34c42d28e83..6449056b57dd3032b0a1fbd991f4248be0801d84 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -598,3 +598,15 @@ int pci_write_config_dword(const struct pci_dev *dev, int where, return pci_bus_write_config_dword(dev->bus, dev->devfn, where, val); } EXPORT_SYMBOL(pci_write_config_dword); + +void pci_clear_and_set_config_dword(const struct pci_dev *dev, int pos, + u32 clear, u32 set) +{ + u32 val; + + pci_read_config_dword(dev, pos, &val); + val &= ~clear; + val |= set; + pci_write_config_dword(dev, pos, val); +} +EXPORT_SYMBOL(pci_clear_and_set_config_dword); diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index fc18e42f0a6edff2ded292aeccfbff7be661908a..5875f4f2f11075569099ac33f13b46268f0a1fb6 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -423,17 +423,6 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint) } } -static void pci_clear_and_set_dword(struct pci_dev *pdev, int pos, - u32 clear, u32 set) -{ - u32 val; - - pci_read_config_dword(pdev, pos, &val); - val &= ~clear; - val |= set; - pci_write_config_dword(pdev, pos, val); -} - /* Calculate L1.2 PM substate timing parameters */ static void aspm_calc_l12_info(struct pcie_link_state *link, u32 parent_l1ss_cap, u32 child_l1ss_cap) @@ -494,10 +483,12 @@ static void aspm_calc_l12_info(struct pcie_link_state *link, cl1_2_enables = cctl1 & PCI_L1SS_CTL1_L1_2_MASK; if (pl1_2_enables || cl1_2_enables) { - pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, - PCI_L1SS_CTL1_L1_2_MASK, 0); - pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, - PCI_L1SS_CTL1_L1_2_MASK, 0); + pci_clear_and_set_config_dword(child, + child->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1_2_MASK, 0); + pci_clear_and_set_config_dword(parent, + parent->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1_2_MASK, 0); } /* Program T_POWER_ON times in both ports */ @@ -505,22 +496,26 @@ static void aspm_calc_l12_info(struct pcie_link_state *link, pci_write_config_dword(child, child->l1ss + PCI_L1SS_CTL2, ctl2); /* Program Common_Mode_Restore_Time in upstream device */ - pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, - PCI_L1SS_CTL1_CM_RESTORE_TIME, ctl1); + pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_CM_RESTORE_TIME, ctl1); /* Program LTR_L1.2_THRESHOLD time in both ports */ - pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, - PCI_L1SS_CTL1_LTR_L12_TH_VALUE | - PCI_L1SS_CTL1_LTR_L12_TH_SCALE, ctl1); - pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, - PCI_L1SS_CTL1_LTR_L12_TH_VALUE | - PCI_L1SS_CTL1_LTR_L12_TH_SCALE, ctl1); + pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_LTR_L12_TH_VALUE | + PCI_L1SS_CTL1_LTR_L12_TH_SCALE, + ctl1); + pci_clear_and_set_config_dword(child, child->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_LTR_L12_TH_VALUE | + PCI_L1SS_CTL1_LTR_L12_TH_SCALE, + ctl1); if (pl1_2_enables || cl1_2_enables) { - pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, 0, - pl1_2_enables); - pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, 0, - cl1_2_enables); + pci_clear_and_set_config_dword(parent, + parent->l1ss + PCI_L1SS_CTL1, 0, + pl1_2_enables); + pci_clear_and_set_config_dword(child, + child->l1ss + PCI_L1SS_CTL1, 0, + cl1_2_enables); } } @@ -680,10 +675,10 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state) */ /* Disable all L1 substates */ - pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, - PCI_L1SS_CTL1_L1SS_MASK, 0); - pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, - PCI_L1SS_CTL1_L1SS_MASK, 0); + pci_clear_and_set_config_dword(child, child->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1SS_MASK, 0); + pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1SS_MASK, 0); /* * If needed, disable L1, and it gets enabled later * in pcie_config_aspm_link(). @@ -706,10 +701,10 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state) val |= PCI_L1SS_CTL1_PCIPM_L1_2; /* Enable what we need to enable */ - pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, - PCI_L1SS_CTL1_L1SS_MASK, val); - pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, - PCI_L1SS_CTL1_L1SS_MASK, val); + pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1SS_MASK, val); + pci_clear_and_set_config_dword(child, child->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1SS_MASK, val); } static void pcie_config_aspm_dev(struct pci_dev *pdev, u32 val) diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 273d67ecf6d2530f0f31c8dd393b31b1fde560ee..ec6e0d9194a1c577c1378444470a23ff614fc101 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -217,6 +217,13 @@ config MARVELL_CN10K_DDR_PMU Enable perf support for Marvell DDR Performance monitoring event on CN10K platform. +config DWC_PCIE_PMU + tristate "Synopsys DesignWare PCIe PMU" + depends on PCI + help + Enable perf support for Synopsys DesignWare PCIe PMU Performance + monitoring event on platform including the Alibaba Yitian 710. + source "drivers/perf/arm_cspmu/Kconfig" source "drivers/perf/amlogic/Kconfig" diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 16b3ec4db916d0bcb5f3895a82c7d9b1a77e18a2..a06338e3401c9f6ebbe9593a5176e220f760a357 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o +obj-$(CONFIG_DWC_PCIE_PMU) += dwc_pcie_pmu.o obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu/ obj-$(CONFIG_MESON_DDR_PMU) += amlogic/ obj-$(CONFIG_CXL_PMU) += cxl_pmu.o diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c new file mode 100644 index 0000000000000000000000000000000000000000..957058ad0099e2faa97c28d22153285983abf045 --- /dev/null +++ b/drivers/perf/dwc_pcie_pmu.c @@ -0,0 +1,792 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Synopsys DesignWare PCIe PMU driver + * + * Copyright (C) 2021-2023 Alibaba Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DWC_PCIE_VSEC_RAS_DES_ID 0x02 +#define DWC_PCIE_EVENT_CNT_CTL 0x8 + +/* + * Event Counter Data Select includes two parts: + * - 27-24: Group number(4-bit: 0..0x7) + * - 23-16: Event number(8-bit: 0..0x13) within the Group + * + * Put them together as in TRM. + */ +#define DWC_PCIE_CNT_EVENT_SEL GENMASK(27, 16) +#define DWC_PCIE_CNT_LANE_SEL GENMASK(11, 8) +#define DWC_PCIE_CNT_STATUS BIT(7) +#define DWC_PCIE_CNT_ENABLE GENMASK(4, 2) +#define DWC_PCIE_PER_EVENT_OFF 0x1 +#define DWC_PCIE_PER_EVENT_ON 0x3 +#define DWC_PCIE_EVENT_CLEAR GENMASK(1, 0) +#define DWC_PCIE_EVENT_PER_CLEAR 0x1 + +#define DWC_PCIE_EVENT_CNT_DATA 0xC + +#define DWC_PCIE_TIME_BASED_ANAL_CTL 0x10 +#define DWC_PCIE_TIME_BASED_REPORT_SEL GENMASK(31, 24) +#define DWC_PCIE_TIME_BASED_DURATION_SEL GENMASK(15, 8) +#define DWC_PCIE_DURATION_MANUAL_CTL 0x0 +#define DWC_PCIE_DURATION_1MS 0x1 +#define DWC_PCIE_DURATION_10MS 0x2 +#define DWC_PCIE_DURATION_100MS 0x3 +#define DWC_PCIE_DURATION_1S 0x4 +#define DWC_PCIE_DURATION_2S 0x5 +#define DWC_PCIE_DURATION_4S 0x6 +#define DWC_PCIE_DURATION_4US 0xFF +#define DWC_PCIE_TIME_BASED_TIMER_START BIT(0) +#define DWC_PCIE_TIME_BASED_CNT_ENABLE 0x1 + +#define DWC_PCIE_TIME_BASED_ANAL_DATA_REG_LOW 0x14 +#define DWC_PCIE_TIME_BASED_ANAL_DATA_REG_HIGH 0x18 + +/* Event attributes */ +#define DWC_PCIE_CONFIG_EVENTID GENMASK(15, 0) +#define DWC_PCIE_CONFIG_TYPE GENMASK(19, 16) +#define DWC_PCIE_CONFIG_LANE GENMASK(27, 20) + +#define DWC_PCIE_EVENT_ID(event) FIELD_GET(DWC_PCIE_CONFIG_EVENTID, (event)->attr.config) +#define DWC_PCIE_EVENT_TYPE(event) FIELD_GET(DWC_PCIE_CONFIG_TYPE, (event)->attr.config) +#define DWC_PCIE_EVENT_LANE(event) FIELD_GET(DWC_PCIE_CONFIG_LANE, (event)->attr.config) + +enum dwc_pcie_event_type { + DWC_PCIE_TIME_BASE_EVENT, + DWC_PCIE_LANE_EVENT, + DWC_PCIE_EVENT_TYPE_MAX, +}; + +#define DWC_PCIE_LANE_EVENT_MAX_PERIOD GENMASK_ULL(31, 0) +#define DWC_PCIE_MAX_PERIOD GENMASK_ULL(63, 0) + +struct dwc_pcie_pmu { + struct pmu pmu; + struct pci_dev *pdev; /* Root Port device */ + u16 ras_des_offset; + u32 nr_lanes; + + struct list_head pmu_node; + struct hlist_node cpuhp_node; + struct perf_event *event[DWC_PCIE_EVENT_TYPE_MAX]; + int on_cpu; +}; + +#define to_dwc_pcie_pmu(p) (container_of(p, struct dwc_pcie_pmu, pmu)) + +static int dwc_pcie_pmu_hp_state; +static struct list_head dwc_pcie_dev_info_head = + LIST_HEAD_INIT(dwc_pcie_dev_info_head); +static bool notify; + +struct dwc_pcie_dev_info { + struct platform_device *plat_dev; + struct pci_dev *pdev; + struct list_head dev_node; +}; + +struct dwc_pcie_vendor_id { + int vendor_id; +}; + +static const struct dwc_pcie_vendor_id dwc_pcie_vendor_ids[] = { + {.vendor_id = PCI_VENDOR_ID_ALIBABA }, + {} /* terminator */ +}; + +static ssize_t cpumask_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(dev_get_drvdata(dev)); + + return cpumap_print_to_pagebuf(true, buf, cpumask_of(pcie_pmu->on_cpu)); +} +static DEVICE_ATTR_RO(cpumask); + +static struct attribute *dwc_pcie_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL +}; + +static struct attribute_group dwc_pcie_cpumask_attr_group = { + .attrs = dwc_pcie_pmu_cpumask_attrs, +}; + +struct dwc_pcie_format_attr { + struct device_attribute attr; + u64 field; + int config; +}; + +PMU_FORMAT_ATTR(eventid, "config:0-15"); +PMU_FORMAT_ATTR(type, "config:16-19"); +PMU_FORMAT_ATTR(lane, "config:20-27"); + +static struct attribute *dwc_pcie_format_attrs[] = { + &format_attr_type.attr, + &format_attr_eventid.attr, + &format_attr_lane.attr, + NULL, +}; + +static struct attribute_group dwc_pcie_format_attrs_group = { + .name = "format", + .attrs = dwc_pcie_format_attrs, +}; + +struct dwc_pcie_event_attr { + struct device_attribute attr; + enum dwc_pcie_event_type type; + u16 eventid; + u8 lane; +}; + +static ssize_t dwc_pcie_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dwc_pcie_event_attr *eattr; + + eattr = container_of(attr, typeof(*eattr), attr); + + if (eattr->type == DWC_PCIE_LANE_EVENT) + return sysfs_emit(buf, "eventid=0x%x,type=0x%x,lane=?\n", + eattr->eventid, eattr->type); + else if (eattr->type == DWC_PCIE_TIME_BASE_EVENT) + return sysfs_emit(buf, "eventid=0x%x,type=0x%x\n", + eattr->eventid, eattr->type); + + return 0; +} + +#define DWC_PCIE_EVENT_ATTR(_name, _type, _eventid, _lane) \ + (&((struct dwc_pcie_event_attr[]) {{ \ + .attr = __ATTR(_name, 0444, dwc_pcie_event_show, NULL), \ + .type = _type, \ + .eventid = _eventid, \ + .lane = _lane, \ + }})[0].attr.attr) + +#define DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(_name, _eventid) \ + DWC_PCIE_EVENT_ATTR(_name, DWC_PCIE_TIME_BASE_EVENT, _eventid, 0) +#define DWC_PCIE_PMU_LANE_EVENT_ATTR(_name, _eventid) \ + DWC_PCIE_EVENT_ATTR(_name, DWC_PCIE_LANE_EVENT, _eventid, 0) + +static struct attribute *dwc_pcie_pmu_time_event_attrs[] = { + /* Group #0 */ + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(one_cycle, 0x00), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(TX_L0S, 0x01), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(RX_L0S, 0x02), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L0, 0x03), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1, 0x04), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_1, 0x05), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_2, 0x06), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(CFG_RCVRY, 0x07), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(TX_RX_L0S, 0x08), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_AUX, 0x09), + + /* Group #1 */ + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Tx_PCIe_TLP_Data_Payload, 0x20), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Rx_PCIe_TLP_Data_Payload, 0x21), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Tx_CCIX_TLP_Data_Payload, 0x22), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Rx_CCIX_TLP_Data_Payload, 0x23), + + /* + * Leave it to the user to specify the lane ID to avoid generating + * a list of hundreds of events. + */ + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_ack_dllp, 0x600), + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_update_fc_dllp, 0x601), + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_ack_dllp, 0x602), + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_update_fc_dllp, 0x603), + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_nulified_tlp, 0x604), + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_nulified_tlp, 0x605), + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_duplicate_tl, 0x606), + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_memory_write, 0x700), + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_memory_read, 0x701), + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_configuration_write, 0x702), + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_configuration_read, 0x703), + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_io_write, 0x704), + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_io_read, 0x705), + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_completion_without_data, 0x706), + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_completion_with_data, 0x707), + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_message_tlp, 0x708), + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_atomic, 0x709), + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_tlp_with_prefix, 0x70A), + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_memory_write, 0x70B), + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_memory_read, 0x70C), + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_io_write, 0x70F), + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_io_read, 0x710), + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_completion_without_data, 0x711), + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_completion_with_data, 0x712), + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_message_tlp, 0x713), + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_atomic, 0x714), + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_tlp_with_prefix, 0x715), + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_ccix_tlp, 0x716), + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_ccix_tlp, 0x717), + NULL +}; + +static const struct attribute_group dwc_pcie_event_attrs_group = { + .name = "events", + .attrs = dwc_pcie_pmu_time_event_attrs, +}; + +static const struct attribute_group *dwc_pcie_attr_groups[] = { + &dwc_pcie_event_attrs_group, + &dwc_pcie_format_attrs_group, + &dwc_pcie_cpumask_attr_group, + NULL +}; + +static void dwc_pcie_pmu_lane_event_enable(struct dwc_pcie_pmu *pcie_pmu, + bool enable) +{ + struct pci_dev *pdev = pcie_pmu->pdev; + u16 ras_des_offset = pcie_pmu->ras_des_offset; + + if (enable) + pci_clear_and_set_config_dword(pdev, + ras_des_offset + DWC_PCIE_EVENT_CNT_CTL, + DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_ON); + else + pci_clear_and_set_config_dword(pdev, + ras_des_offset + DWC_PCIE_EVENT_CNT_CTL, + DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_OFF); +} + +static void dwc_pcie_pmu_time_based_event_enable(struct dwc_pcie_pmu *pcie_pmu, + bool enable) +{ + struct pci_dev *pdev = pcie_pmu->pdev; + u16 ras_des_offset = pcie_pmu->ras_des_offset; + + pci_clear_and_set_config_dword(pdev, + ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_CTL, + DWC_PCIE_TIME_BASED_TIMER_START, enable); +} + +static u64 dwc_pcie_pmu_read_lane_event_counter(struct perf_event *event) +{ + struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu); + struct pci_dev *pdev = pcie_pmu->pdev; + u16 ras_des_offset = pcie_pmu->ras_des_offset; + u32 val; + + pci_read_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_DATA, &val); + + return val; +} + +static u64 dwc_pcie_pmu_read_time_based_counter(struct perf_event *event) +{ + struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu); + struct pci_dev *pdev = pcie_pmu->pdev; + int event_id = DWC_PCIE_EVENT_ID(event); + u16 ras_des_offset = pcie_pmu->ras_des_offset; + u32 lo, hi, ss; + u64 val; + + /* + * The 64-bit value of the data counter is spread across two + * registers that are not synchronized. In order to read them + * atomically, ensure that the high 32 bits match before and after + * reading the low 32 bits. + */ + pci_read_config_dword(pdev, + ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_DATA_REG_HIGH, &hi); + do { + /* snapshot the high 32 bits */ + ss = hi; + + pci_read_config_dword( + pdev, ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_DATA_REG_LOW, + &lo); + pci_read_config_dword( + pdev, ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_DATA_REG_HIGH, + &hi); + } while (hi != ss); + + val = ((u64)hi << 32) | lo; + /* + * The Group#1 event measures the amount of data processed in 16-byte + * units. Simplify the end-user interface by multiplying the counter + * at the point of read. + */ + if (event_id >= 0x20 && event_id <= 0x23) + val *= 16; + + return val; +} + +static void dwc_pcie_pmu_event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event); + u64 delta, prev, now = 0; + + do { + prev = local64_read(&hwc->prev_count); + + if (type == DWC_PCIE_LANE_EVENT) + now = dwc_pcie_pmu_read_lane_event_counter(event); + else if (type == DWC_PCIE_TIME_BASE_EVENT) + now = dwc_pcie_pmu_read_time_based_counter(event); + + } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev); + + delta = (now - prev) & DWC_PCIE_MAX_PERIOD; + /* 32-bit counter for Lane Event Counting */ + if (type == DWC_PCIE_LANE_EVENT) + delta &= DWC_PCIE_LANE_EVENT_MAX_PERIOD; + + local64_add(delta, &event->count); +} + +static int dwc_pcie_pmu_event_init(struct perf_event *event) +{ + struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu); + enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event); + struct perf_event *sibling; + u32 lane; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* We don't support sampling */ + if (is_sampling_event(event)) + return -EINVAL; + + /* We cannot support task bound events */ + if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK) + return -EINVAL; + + if (event->group_leader != event && + !is_software_event(event->group_leader)) + return -EINVAL; + + for_each_sibling_event(sibling, event->group_leader) { + if (sibling->pmu != event->pmu && !is_software_event(sibling)) + return -EINVAL; + } + + if (type < 0 || type >= DWC_PCIE_EVENT_TYPE_MAX) + return -EINVAL; + + if (type == DWC_PCIE_LANE_EVENT) { + lane = DWC_PCIE_EVENT_LANE(event); + if (lane < 0 || lane >= pcie_pmu->nr_lanes) + return -EINVAL; + } + + event->cpu = pcie_pmu->on_cpu; + + return 0; +} + +static void dwc_pcie_pmu_event_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu); + enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event); + + hwc->state = 0; + local64_set(&hwc->prev_count, 0); + + if (type == DWC_PCIE_LANE_EVENT) + dwc_pcie_pmu_lane_event_enable(pcie_pmu, true); + else if (type == DWC_PCIE_TIME_BASE_EVENT) + dwc_pcie_pmu_time_based_event_enable(pcie_pmu, true); +} + +static void dwc_pcie_pmu_event_stop(struct perf_event *event, int flags) +{ + struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu); + enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event); + struct hw_perf_event *hwc = &event->hw; + + if (event->hw.state & PERF_HES_STOPPED) + return; + + if (type == DWC_PCIE_LANE_EVENT) + dwc_pcie_pmu_lane_event_enable(pcie_pmu, false); + else if (type == DWC_PCIE_TIME_BASE_EVENT) + dwc_pcie_pmu_time_based_event_enable(pcie_pmu, false); + + dwc_pcie_pmu_event_update(event); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; +} + +static int dwc_pcie_pmu_event_add(struct perf_event *event, int flags) +{ + struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu); + struct pci_dev *pdev = pcie_pmu->pdev; + struct hw_perf_event *hwc = &event->hw; + enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event); + int event_id = DWC_PCIE_EVENT_ID(event); + int lane = DWC_PCIE_EVENT_LANE(event); + u16 ras_des_offset = pcie_pmu->ras_des_offset; + u32 ctrl; + + /* one counter for each type and it is in use */ + if (pcie_pmu->event[type]) + return -ENOSPC; + + pcie_pmu->event[type] = event; + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + + if (type == DWC_PCIE_LANE_EVENT) { + /* EVENT_COUNTER_DATA_REG needs clear manually */ + ctrl = FIELD_PREP(DWC_PCIE_CNT_EVENT_SEL, event_id) | + FIELD_PREP(DWC_PCIE_CNT_LANE_SEL, lane) | + FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_OFF) | + FIELD_PREP(DWC_PCIE_EVENT_CLEAR, DWC_PCIE_EVENT_PER_CLEAR); + pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL, + ctrl); + } else if (type == DWC_PCIE_TIME_BASE_EVENT) { + /* + * TIME_BASED_ANAL_DATA_REG is a 64 bit register, we can safely + * use it with any manually controlled duration. And it is + * cleared when next measurement starts. + */ + ctrl = FIELD_PREP(DWC_PCIE_TIME_BASED_REPORT_SEL, event_id) | + FIELD_PREP(DWC_PCIE_TIME_BASED_DURATION_SEL, + DWC_PCIE_DURATION_MANUAL_CTL) | + DWC_PCIE_TIME_BASED_CNT_ENABLE; + pci_write_config_dword( + pdev, ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_CTL, ctrl); + } + + if (flags & PERF_EF_START) + dwc_pcie_pmu_event_start(event, PERF_EF_RELOAD); + + perf_event_update_userpage(event); + + return 0; +} + +static void dwc_pcie_pmu_event_del(struct perf_event *event, int flags) +{ + struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu); + enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event); + + dwc_pcie_pmu_event_stop(event, flags | PERF_EF_UPDATE); + perf_event_update_userpage(event); + pcie_pmu->event[type] = NULL; +} + +static void dwc_pcie_pmu_remove_cpuhp_instance(void *hotplug_node) +{ + cpuhp_state_remove_instance_nocalls(dwc_pcie_pmu_hp_state, hotplug_node); +} + +/* + * Find the binded DES capability device info of a PCI device. + * @pdev: The PCI device. + */ +static struct dwc_pcie_dev_info *dwc_pcie_find_dev_info(struct pci_dev *pdev) +{ + struct dwc_pcie_dev_info *dev_info; + + list_for_each_entry(dev_info, &dwc_pcie_dev_info_head, dev_node) + if (dev_info->pdev == pdev) + return dev_info; + + return NULL; +} + +static void dwc_pcie_unregister_pmu(void *data) +{ + struct dwc_pcie_pmu *pcie_pmu = data; + + perf_pmu_unregister(&pcie_pmu->pmu); +} + +static bool dwc_pcie_match_des_cap(struct pci_dev *pdev) +{ + const struct dwc_pcie_vendor_id *vid; + u16 vsec = 0; + u32 val; + + if (!pci_is_pcie(pdev) || !(pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT)) + return false; + + for (vid = dwc_pcie_vendor_ids; vid->vendor_id; vid++) { + vsec = pci_find_vsec_capability(pdev, vid->vendor_id, + DWC_PCIE_VSEC_RAS_DES_ID); + if (vsec) + break; + } + if (!vsec) + return false; + + pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val); + if (PCI_VNDR_HEADER_REV(val) != 0x04) + return false; + + pci_dbg(pdev, + "Detected PCIe Vendor-Specific Extended Capability RAS DES\n"); + return true; +} + +static void dwc_pcie_unregister_dev(struct dwc_pcie_dev_info *dev_info) +{ + platform_device_unregister(dev_info->plat_dev); + list_del(&dev_info->dev_node); + kfree(dev_info); +} + +static int dwc_pcie_register_dev(struct pci_dev *pdev) +{ + struct platform_device *plat_dev; + struct dwc_pcie_dev_info *dev_info; + u32 bdf; + + bdf = PCI_DEVID(pdev->bus->number, pdev->devfn); + plat_dev = platform_device_register_data(NULL, "dwc_pcie_pmu", bdf, + pdev, sizeof(*pdev)); + + if (IS_ERR(plat_dev)) + return PTR_ERR(plat_dev); + + dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL); + if (!dev_info) + return -ENOMEM; + + /* Cache platform device to handle pci device hotplug */ + dev_info->plat_dev = plat_dev; + dev_info->pdev = pdev; + list_add(&dev_info->dev_node, &dwc_pcie_dev_info_head); + + return 0; +} + +static int dwc_pcie_pmu_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + struct pci_dev *pdev = to_pci_dev(dev); + struct dwc_pcie_dev_info *dev_info; + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + if (!dwc_pcie_match_des_cap(pdev)) + return NOTIFY_DONE; + if (dwc_pcie_register_dev(pdev)) + return NOTIFY_BAD; + break; + case BUS_NOTIFY_DEL_DEVICE: + dev_info = dwc_pcie_find_dev_info(pdev); + if (!dev_info) + return NOTIFY_DONE; + dwc_pcie_unregister_dev(dev_info); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block dwc_pcie_pmu_nb = { + .notifier_call = dwc_pcie_pmu_notifier, +}; + +static int dwc_pcie_pmu_probe(struct platform_device *plat_dev) +{ + struct pci_dev *pdev = plat_dev->dev.platform_data; + struct dwc_pcie_pmu *pcie_pmu; + char *name; + u32 bdf, val; + u16 vsec; + int ret; + + vsec = pci_find_vsec_capability(pdev, pdev->vendor, + DWC_PCIE_VSEC_RAS_DES_ID); + pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val); + bdf = PCI_DEVID(pdev->bus->number, pdev->devfn); + name = devm_kasprintf(&plat_dev->dev, GFP_KERNEL, "dwc_rootport_%x", bdf); + if (!name) + return -ENOMEM; + + pcie_pmu = devm_kzalloc(&plat_dev->dev, sizeof(*pcie_pmu), GFP_KERNEL); + if (!pcie_pmu) + return -ENOMEM; + + pcie_pmu->pdev = pdev; + pcie_pmu->ras_des_offset = vsec; + pcie_pmu->nr_lanes = pcie_get_width_cap(pdev); + pcie_pmu->on_cpu = -1; + pcie_pmu->pmu = (struct pmu){ + .name = name, + .parent = &pdev->dev, + .module = THIS_MODULE, + .attr_groups = dwc_pcie_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .task_ctx_nr = perf_invalid_context, + .event_init = dwc_pcie_pmu_event_init, + .add = dwc_pcie_pmu_event_add, + .del = dwc_pcie_pmu_event_del, + .start = dwc_pcie_pmu_event_start, + .stop = dwc_pcie_pmu_event_stop, + .read = dwc_pcie_pmu_event_update, + }; + + /* Add this instance to the list used by the offline callback */ + ret = cpuhp_state_add_instance(dwc_pcie_pmu_hp_state, + &pcie_pmu->cpuhp_node); + if (ret) { + pci_err(pdev, "Error %d registering hotplug @%x\n", ret, bdf); + return ret; + } + + /* Unwind when platform driver removes */ + ret = devm_add_action_or_reset(&plat_dev->dev, + dwc_pcie_pmu_remove_cpuhp_instance, + &pcie_pmu->cpuhp_node); + if (ret) + return ret; + + ret = perf_pmu_register(&pcie_pmu->pmu, name, -1); + if (ret) { + pci_err(pdev, "Error %d registering PMU @%x\n", ret, bdf); + return ret; + } + ret = devm_add_action_or_reset(&plat_dev->dev, dwc_pcie_unregister_pmu, + pcie_pmu); + if (ret) + return ret; + + return 0; +} + +static int dwc_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node) +{ + struct dwc_pcie_pmu *pcie_pmu; + + pcie_pmu = hlist_entry_safe(cpuhp_node, struct dwc_pcie_pmu, cpuhp_node); + if (pcie_pmu->on_cpu == -1) + pcie_pmu->on_cpu = cpumask_local_spread( + 0, dev_to_node(&pcie_pmu->pdev->dev)); + + return 0; +} + +static int dwc_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node) +{ + struct dwc_pcie_pmu *pcie_pmu; + struct pci_dev *pdev; + int node; + cpumask_t mask; + unsigned int target; + + pcie_pmu = hlist_entry_safe(cpuhp_node, struct dwc_pcie_pmu, cpuhp_node); + /* Nothing to do if this CPU doesn't own the PMU */ + if (cpu != pcie_pmu->on_cpu) + return 0; + + pcie_pmu->on_cpu = -1; + pdev = pcie_pmu->pdev; + node = dev_to_node(&pdev->dev); + if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) && + cpumask_andnot(&mask, &mask, cpumask_of(cpu))) + target = cpumask_any(&mask); + else + target = cpumask_any_but(cpu_online_mask, cpu); + + if (target >= nr_cpu_ids) { + pci_err(pdev, "There is no CPU to set\n"); + return 0; + } + + /* This PMU does NOT support interrupt, just migrate context. */ + perf_pmu_migrate_context(&pcie_pmu->pmu, cpu, target); + pcie_pmu->on_cpu = target; + + return 0; +} + +static struct platform_driver dwc_pcie_pmu_driver = { + .probe = dwc_pcie_pmu_probe, + .driver = {.name = "dwc_pcie_pmu",}, +}; + +static int __init dwc_pcie_pmu_init(void) +{ + struct pci_dev *pdev = NULL; + bool found = false; + int ret; + + for_each_pci_dev(pdev) { + if (!dwc_pcie_match_des_cap(pdev)) + continue; + + ret = dwc_pcie_register_dev(pdev); + if (ret) { + pci_dev_put(pdev); + return ret; + } + + found = true; + } + if (!found) + return -ENODEV; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "perf/dwc_pcie_pmu:online", + dwc_pcie_pmu_online_cpu, + dwc_pcie_pmu_offline_cpu); + if (ret < 0) + return ret; + + dwc_pcie_pmu_hp_state = ret; + + ret = platform_driver_register(&dwc_pcie_pmu_driver); + if (ret) + goto platform_driver_register_err; + + ret = bus_register_notifier(&pci_bus_type, &dwc_pcie_pmu_nb); + if (ret) + goto platform_driver_register_err; + notify = true; + + return 0; + +platform_driver_register_err: + cpuhp_remove_multi_state(dwc_pcie_pmu_hp_state); + + return ret; +} + +static void __exit dwc_pcie_pmu_exit(void) +{ + struct dwc_pcie_dev_info *dev_info, *tmp; + + if (notify) + bus_unregister_notifier(&pci_bus_type, &dwc_pcie_pmu_nb); + list_for_each_entry_safe(dev_info, tmp, &dwc_pcie_dev_info_head, dev_node) + dwc_pcie_unregister_dev(dev_info); + platform_driver_unregister(&dwc_pcie_pmu_driver); + cpuhp_remove_multi_state(dwc_pcie_pmu_hp_state); +} + +module_init(dwc_pcie_pmu_init); +module_exit(dwc_pcie_pmu_exit); + +MODULE_DESCRIPTION("PMU driver for DesignWare Cores PCI Express Controller"); +MODULE_AUTHOR("Shuai Xue "); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/pci.h b/include/linux/pci.h index b56417276042dc9dd5707e24f9d5c6cc09bd6bfd..94d65fa39cec4dc88488618d6deed57d87395d14 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1213,6 +1213,8 @@ int pci_read_config_dword(const struct pci_dev *dev, int where, u32 *val); int pci_write_config_byte(const struct pci_dev *dev, int where, u8 val); int pci_write_config_word(const struct pci_dev *dev, int where, u16 val); int pci_write_config_dword(const struct pci_dev *dev, int where, u32 val); +void pci_clear_and_set_config_dword(const struct pci_dev *dev, int pos, + u32 clear, u32 set); int pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val); int pcie_capability_read_dword(struct pci_dev *dev, int pos, u32 *val); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 3fb4124dca0e35260fa5b4934d6e0185d91fb410..fd76350bfafef9676ed14358846d99c38f1c6afb 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2606,6 +2606,8 @@ #define PCI_VENDOR_ID_TEKRAM 0x1de1 #define PCI_DEVICE_ID_TEKRAM_DC290 0xdc29 +#define PCI_VENDOR_ID_ALIBABA 0x1ded + #define PCI_VENDOR_ID_TEHUTI 0x1fc9 #define PCI_DEVICE_ID_TEHUTI_3009 0x3009 #define PCI_DEVICE_ID_TEHUTI_3010 0x3010