diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 4a021d86fc578d56b3151beafaabd009de2788f2..8739e15c137b491c536b69f7bd481bf7986a4d95 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -184,6 +184,7 @@ config LOONGARCH select USE_PERCPU_NUMA_NODE_ID select USER_STACKTRACE_SUPPORT select ZONE_DMA32 + select CPUMASK_OFFSTACK if NR_CPUS > 256 config 32BIT bool @@ -419,6 +420,15 @@ config SCHED_SMT Improves scheduler's performance when there are multiple threads in one physical core. +config SCHED_MC + bool "Multi-core scheduler support" + depends on SMP + default y + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. + config SMP bool "Multi-Processing support" help @@ -447,10 +457,10 @@ config HOTPLUG_CPU Say N if you want to disable CPU hotplug. config NR_CPUS - int "Maximum number of CPUs (2-256)" - range 2 256 + int "Maximum number of CPUs (2-2048)" + range 2 2048 depends on SMP - default "64" + default "2048" help This allows you to specify the maximum number of CPUs which this kernel will support. diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 106b2029211b56edb20c3ecab2bbd37bfc4d136e..6844a6c59de664aed2dd1a0ad6645337b1031638 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -42,7 +42,6 @@ CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y CONFIG_CMDLINE="vfio_iommu_type1.allow_unsafe_interrupts=1 nokaslr" CONFIG_CMDLINE_EXTEND=y -CONFIG_NR_CPUS=256 CONFIG_NUMA=y CONFIG_ARCH_IOREMAP=y CONFIG_CPU_HAS_LSX=y @@ -847,6 +846,7 @@ CONFIG_I40E_DCB=y CONFIG_I40EVF=m CONFIG_ICE=m CONFIG_FM10K=m +CONFIG_IGC=m # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m # CONFIG_MLX4_CORE_GEN2 is not set @@ -1179,6 +1179,7 @@ CONFIG_PINCTRL_LOONGSON2=y CONFIG_GPIO_SYSFS=y CONFIG_GPIO_AMDPT=m CONFIG_GPIO_LOONGSON_64BIT=y +CONFIG_GPIO_PCA953X=m CONFIG_GPIO_VIPERBOARD=m CONFIG_POWER_RESET=y CONFIG_SENSORS_AD7414=m diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index 630e5ebec21cb3700dc1d22b003111ef69906daa..06a0ca03d2ce786691b5be0aa583642db891772e 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -23,6 +23,7 @@ extern int smp_num_siblings; extern int num_processors; extern int disabled_cpus; extern cpumask_t cpu_sibling_map[]; +extern cpumask_t cpu_llc_shared_map[]; extern cpumask_t cpu_core_map[]; extern cpumask_t cpu_foreign_map[]; diff --git a/arch/loongarch/include/asm/topology.h b/arch/loongarch/include/asm/topology.h index 379f5e4830ebd5a96f4e4d33c6b40dd03f9c7031..3b21a6d4f5220f94ac2c7423b4398f6934beebe5 100644 --- a/arch/loongarch/include/asm/topology.h +++ b/arch/loongarch/include/asm/topology.h @@ -30,6 +30,15 @@ void numa_set_distance(int from, int to, int distance); #ifdef CONFIG_SMP extern unsigned int __max_packages; + +/* + * Return cpus that shares the last level cache. + */ +static inline const struct cpumask *cpu_coregroup_mask(int cpu) +{ + return &cpu_llc_shared_map[cpu]; +} + #define topology_max_packages() (__max_packages) #define topology_physical_package_id(cpu) (cpu_data[cpu].package) #define topology_core_id(cpu) (cpu_data[cpu].core) diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c index ec5b28e570c963482d18e50f28043b066a425ffc..4c476904227f953bab5c1a89c9fe1175bfcc6647 100644 --- a/arch/loongarch/kernel/kfpu.c +++ b/arch/loongarch/kernel/kfpu.c @@ -18,11 +18,28 @@ static unsigned int euen_mask = CSR_EUEN_FPEN; static DEFINE_PER_CPU(bool, in_kernel_fpu); static DEFINE_PER_CPU(unsigned int, euen_current); +static inline void fpregs_lock(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_disable(); + else + local_bh_disable(); +} + +static inline void fpregs_unlock(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); + else + local_bh_enable(); +} + void kernel_fpu_begin(void) { unsigned int *euen_curr; - preempt_disable(); + if (!irqs_disabled()) + fpregs_lock(); WARN_ON(this_cpu_read(in_kernel_fpu)); @@ -73,7 +90,8 @@ void kernel_fpu_end(void) this_cpu_write(in_kernel_fpu, false); - preempt_enable(); + if (!irqs_disabled()) + fpregs_unlock(); } EXPORT_SYMBOL_GPL(kernel_fpu_end); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 7f693a24687d1a364dc7eeae85dc02bad637a88a..5e9416106f425dd9944a79ca54de5078f1ff5f23 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -45,6 +45,10 @@ EXPORT_SYMBOL(__cpu_logical_map); cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); +/* Representing the last level cache shared map of each logical CPU */ +cpumask_t cpu_llc_shared_map[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(cpu_llc_shared_map); + /* Representing the core map of multi-core chips of each logical CPU */ cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); @@ -62,6 +66,9 @@ EXPORT_SYMBOL(cpu_foreign_map); /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; +/* representing cpus for which llc shared maps can be computed */ +static cpumask_t cpu_llc_shared_setup_map; + /* representing cpus for which core maps can be computed */ static cpumask_t cpu_core_setup_map; @@ -103,6 +110,34 @@ static inline void set_cpu_core_map(int cpu) } } +static inline void set_cpu_llc_shared_map(int cpu) +{ + int i; + + cpumask_set_cpu(cpu, &cpu_llc_shared_setup_map); + + for_each_cpu(i, &cpu_llc_shared_setup_map) { + if (cpu_to_node(cpu) == cpu_to_node(i)) { + cpumask_set_cpu(i, &cpu_llc_shared_map[cpu]); + cpumask_set_cpu(cpu, &cpu_llc_shared_map[i]); + } + } +} + +static inline void clear_cpu_llc_shared_map(int cpu) +{ + int i; + + for_each_cpu(i, &cpu_llc_shared_setup_map) { + if (cpu_to_node(cpu) == cpu_to_node(i)) { + cpumask_clear_cpu(i, &cpu_llc_shared_map[cpu]); + cpumask_clear_cpu(cpu, &cpu_llc_shared_map[i]); + } + } + + cpumask_clear_cpu(cpu, &cpu_llc_shared_setup_map); +} + static inline void set_cpu_sibling_map(int cpu) { int i; @@ -402,6 +437,7 @@ int loongson_cpu_disable(void) #endif set_cpu_online(cpu, false); clear_cpu_sibling_map(cpu); + clear_cpu_llc_shared_map(cpu); calculate_cpu_foreign_map(); local_irq_save(flags); fixup_irqs(); @@ -523,6 +559,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) current_thread_info()->cpu = 0; loongson_prepare_cpus(max_cpus); set_cpu_sibling_map(0); + set_cpu_llc_shared_map(0); set_cpu_core_map(0); calculate_cpu_foreign_map(); #ifndef CONFIG_HOTPLUG_CPU @@ -564,6 +601,7 @@ asmlinkage void start_secondary(void) loongson_init_secondary(); set_cpu_sibling_map(cpu); + set_cpu_llc_shared_map(cpu); set_cpu_core_map(cpu); notify_cpu_starting(cpu); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index 357d09656a7a9e3a179f1f7c2f097b9a19c6fda3..0bb4081c8c41630b1a489114216ff945b07761d1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -64,13 +64,15 @@ DMA_STATUS_TPS | DMA_STATUS_TI | \ DMA_STATUS_MSK_COMMON_LOONGSON) -#define PCI_DEVICE_ID_LOONGSON_GMAC 0x7a03 +#define PCI_DEVICE_ID_LOONGSON_GMAC1 0x7a03 +#define PCI_DEVICE_ID_LOONGSON_GMAC2 0x7a23 #define PCI_DEVICE_ID_LOONGSON_GNET 0x7a13 -#define LOONGSON_DWMAC_CORE_1_00 0x10 /* Loongson custom IP */ -#define CHANNEL_NUM 8 +#define DWMAC_CORE_MULTICHAN_V1 0x10 /* Loongson custom ID 0x10 */ +#define DWMAC_CORE_MULTICHAN_V2 0x12 /* Loongson custom ID 0x12 */ struct loongson_data { - u32 gmac_verion; + u32 multichan; + u32 loongson_id; struct device *dev; }; @@ -81,6 +83,8 @@ struct stmmac_pci_info { static void loongson_default_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { + struct loongson_data *ld = plat->bsp_priv; + /* Get bus_id, this can be overloaded later */ plat->bus_id = (pci_domain_nr(pdev->bus) << 16) | PCI_DEVID(pdev->bus->number, pdev->devfn); @@ -114,6 +118,31 @@ static void loongson_default_data(struct pci_dev *pdev, plat->dma_cfg->pblx8 = true; plat->multicast_filter_bins = 256; + + switch (ld->loongson_id) { + case DWMAC_CORE_MULTICHAN_V1: + ld->multichan = 1; + plat->rx_queues_to_use = 8; + plat->tx_queues_to_use = 8; + + /* Only channel 0 supports checksum, + * so turn off checksum to enable multiple channels. + */ + for (int i = 1; i < 8; i++) + plat->tx_queues_cfg[i].coe_unsupported = 1; + + break; + case DWMAC_CORE_MULTICHAN_V2: + ld->multichan = 1; + plat->rx_queues_to_use = 4; + plat->tx_queues_to_use = 4; + break; + default: + ld->multichan = 0; + plat->tx_queues_to_use = 1; + plat->rx_queues_to_use = 1; + break; + } } static int loongson_gmac_data(struct pci_dev *pdev, @@ -336,9 +365,11 @@ static int loongson_dwmac_config_msi(struct pci_dev *pdev, struct stmmac_resources *res, struct device_node *np) { - int i, ret, vecs; + int i, ch_num, ret, vecs; + + ch_num = min(plat->tx_queues_to_use, plat->rx_queues_to_use); - vecs = roundup_pow_of_two(CHANNEL_NUM * 2 + 1); + vecs = roundup_pow_of_two(ch_num * 2 + 1); ret = pci_alloc_irq_vectors(pdev, vecs, vecs, PCI_IRQ_MSI); if (ret < 0) { dev_info(&pdev->dev, @@ -353,10 +384,10 @@ static int loongson_dwmac_config_msi(struct pci_dev *pdev, * --------- ----- -------- -------- ... -------- -------- * IRQ NUM | 0 | 1 | 2 | ... | 15 | 16 | */ - for (i = 0; i < CHANNEL_NUM; i++) { - res->rx_irq[CHANNEL_NUM - 1 - i] = + for (i = 0; i < ch_num; i++) { + res->rx_irq[ch_num - 1 - i] = pci_irq_vector(pdev, 1 + i * 2); - res->tx_irq[CHANNEL_NUM - 1 - i] = + res->tx_irq[ch_num - 1 - i] = pci_irq_vector(pdev, 2 + i * 2); } @@ -391,7 +422,7 @@ static struct mac_device_info *loongson_dwmac_setup(void *apriv) * AV feature and GMAC_INT_STATUS CSR flags layout. Get back the * original value so the correct HW-interface would be selected. */ - if (ld->gmac_verion == LOONGSON_DWMAC_CORE_1_00) { + if (ld->multichan) { priv->synopsys_id = DWMAC_CORE_3_70; *dma = dwmac1000_dma_ops; dma->init_chan = loongson_gnet_dma_init_channel; @@ -415,10 +446,10 @@ static struct mac_device_info *loongson_dwmac_setup(void *apriv) /* The GMAC devices with PCI ID 0x7a03 does not support any pause mode. * The GNET devices without CORE ID 0x10 does not support half-duplex. */ - if (pdev->device == PCI_DEVICE_ID_LOONGSON_GMAC) { + if (pdev->device != PCI_DEVICE_ID_LOONGSON_GNET) { mac->link.caps = MAC_10 | MAC_100 | MAC_1000; } else { - if (ld->gmac_verion == LOONGSON_DWMAC_CORE_1_00) + if (ld->multichan) mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000; else @@ -504,6 +535,15 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id pci_set_master(pdev); + plat->bsp_priv = ld; + plat->setup = loongson_dwmac_setup; + plat->fix_soc_reset = loongson_fix_soc_reset; + ld->dev = &pdev->dev; + + memset(&res, 0, sizeof(res)); + res.addr = pcim_iomap_table(pdev)[0]; + ld->loongson_id = readl(res.addr + GMAC_VERSION) & 0xff; + info = (struct stmmac_pci_info *)id->driver_data; ret = info->setup(pdev, plat); if (ret) @@ -529,35 +569,12 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id plat->phy_interface = phy_mode; } - plat->bsp_priv = ld; - plat->setup = loongson_dwmac_setup; - plat->fix_soc_reset = loongson_fix_soc_reset; - ld->dev = &pdev->dev; - - memset(&res, 0, sizeof(res)); - res.addr = pcim_iomap_table(pdev)[0]; - ld->gmac_verion = readl(res.addr + GMAC_VERSION) & 0xff; - - switch (ld->gmac_verion) { - case LOONGSON_DWMAC_CORE_1_00: - plat->rx_queues_to_use = CHANNEL_NUM; - plat->tx_queues_to_use = CHANNEL_NUM; - - /* Only channel 0 supports checksum, - * so turn off checksum to enable multiple channels. - */ - for (i = 1; i < CHANNEL_NUM; i++) - plat->tx_queues_cfg[i].coe_unsupported = 1; - + if (ld->multichan) ret = loongson_dwmac_config_msi(pdev, plat, &res, np); - break; - default: /* 0x35 device and 0x37 device. */ - plat->tx_queues_to_use = 1; - plat->rx_queues_to_use = 1; - + else ret = loongson_dwmac_config_legacy(pdev, plat, &res, np); - break; - } + if (ret) + goto err_disable_device; ret = stmmac_dvr_probe(&pdev->dev, plat, &res); if (ret) @@ -631,7 +648,8 @@ static SIMPLE_DEV_PM_OPS(loongson_dwmac_pm_ops, loongson_dwmac_suspend, loongson_dwmac_resume); static const struct pci_device_id loongson_dwmac_id_table[] = { - { PCI_DEVICE_DATA(LOONGSON, GMAC, &loongson_gmac_pci_info) }, + { PCI_DEVICE_DATA(LOONGSON, GMAC1, &loongson_gmac_pci_info) }, + { PCI_DEVICE_DATA(LOONGSON, GMAC2, &loongson_gmac_pci_info) }, { PCI_DEVICE_DATA(LOONGSON, GNET, &loongson_gnet_pci_info) }, {} };