diff --git a/arch/sw_64/Kbuild b/arch/sw_64/Kbuild index aa0bf0507406c9790b10d0b0c0b6dd57d22286ae..474e3479b5cb98b250083f9113b52beafcd30b15 100644 --- a/arch/sw_64/Kbuild +++ b/arch/sw_64/Kbuild @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += kernel/ mm/ platform/ +obj-y += kernel/ mm/ obj-$(CONFIG_NET) += net/ obj-$(CONFIG_KVM) += kvm/ obj-$(CONFIG_MATHEMU) += math-emu/ diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index 9371df267fd376db85b2d1add940bb76dad7c80a..ae4b450170935c8f890ba06095fa8f4468c9b290 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -66,8 +66,9 @@ config SW64 select GENERIC_ARCH_TOPOLOGY select GENERIC_CLOCKEVENTS select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO + select GENERIC_IOREMAP if SUBARCH_C4 + select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP select GENERIC_IRQ_LEGACY - select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP select GENERIC_IRQ_MIGRATION if SMP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW @@ -91,13 +92,18 @@ config SW64 select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_BUGVERBOSE select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_EBPF_JIT + select HAVE_EFFICIENT_UNALIGNED_ACCESS if !SUBARCH_C3B select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_ERROR_INJECTION + select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_GENERIC_VDSO if MMU && 64BIT + select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI select HAVE_IDE select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE @@ -110,12 +116,14 @@ config SW64 select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI if SUBARCH_C4 - select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if STACKTRACE + select HAVE_RETHOOK select HAVE_RSEQ + select HAVE_SAMPLE_FTRACE_DIRECT + select HAVE_SAMPLE_FTRACE_DIRECT_MULTI select HAVE_SYSCALL_TRACEPOINTS select IRQ_DOMAIN select IRQ_FORCED_THREADING @@ -446,6 +454,44 @@ config NUMA Access). This option is for configuring high-end multiprocessor server machines. If in doubt, say N. +config NUMA_AWARE_SPINLOCKS + bool "Numa-aware spinlocks" + depends on NUMA + depends on QUEUED_SPINLOCKS + depends on 64BIT + depends on PARAVIRT_SPINLOCKS + default y + help + Introduce NUMA (Non Uniform Memory Access) awareness into + the slow path of spinlocks. + + In this variant of qspinlock, the kernel will try to keep the lock + on the same node, thus reducing the number of remote cache misses, + while trading some of the short term fairness for better performance. + + Say N if you want absolute first come first serve fairness. + +config PARAVIRT_SPINLOCKS + bool "Paravirtualization layer for spinlocks" + depends on PARAVIRT && SMP + help + Paravirtualized spinlocks allow a pvops backend to replace the + spinlock implementation with something virtualization-friendly + (for example, block the virtual CPU rather than spinning). + + It has a minimal impact on native kernels and gives a nice performance + benefit on paravirtualized KVM kernels. + + If you are unsure how to answer this question, answer Y. + +config PARAVIRT + bool "Enable paravirtualization code" + select PARAVIRT_SPINLOCKS + help + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. + config USE_PERCPU_NUMA_NODE_ID def_bool y depends on NUMA @@ -656,6 +702,14 @@ config ARCH_HIBERNATION_POSSIBLE config ARCH_SELECT_MEMORY_MODEL def_bool ARCH_SPARSEMEM_ENABLE +config SW64_POWERCAP + bool "Sunway powercap driver" + select IPMI_SI + depends on SW64 && CPU_FREQ && ACPI && IPMI_HANDLER + help + This enables support for the sunway powercap driver + based on BMC and IPMI system interface. + source "drivers/cpuidle/Kconfig" source "drivers/idle/Kconfig" @@ -663,3 +717,5 @@ source "drivers/idle/Kconfig" endmenu source "arch/sw_64/kvm/Kconfig" + +source "arch/sw_64/mm/Kconfig" diff --git a/arch/sw_64/include/asm/Kbuild b/arch/sw_64/include/asm/Kbuild index e9cbabda1516366e28336ecce546604bdb253b29..bbd410bda0a4b516030acbc5527c58f2de71a6ac 100644 --- a/arch/sw_64/include/asm/Kbuild +++ b/arch/sw_64/include/asm/Kbuild @@ -5,7 +5,6 @@ generic-y += export.h generic-y += mcs_spinlock.h generic-y += param.h generic-y += qrwlock.h -generic-y += qspinlock.h generic-y += rwsem.h generic-y += seccomp.h generic-y += segment.h diff --git a/arch/sw_64/include/asm/cpu.h b/arch/sw_64/include/asm/cpu.h index ab67d723c069d93da3d1a394aaaaf04cfd25e50f..37368a9d4d522943b22ae5947312bd197ff3dc7f 100644 --- a/arch/sw_64/include/asm/cpu.h +++ b/arch/sw_64/include/asm/cpu.h @@ -21,6 +21,7 @@ enum hmcall_cpuid_cmd { #define CPU_FEAT_FPU 0x1 #define CPU_FEAT_SIMD 0x2 #define CPU_FEAT_UNA 0x4 +#define CPU_FEAT_VINT 0x8 enum sunway_cpu_model { CPU_SW3231 = 0x31, diff --git a/arch/sw_64/include/asm/cpufreq.h b/arch/sw_64/include/asm/cpufreq.h deleted file mode 100644 index d89db83b42a66dae469c9547a2d0c5924c21aa7f..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/cpufreq.h +++ /dev/null @@ -1,89 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef _ASM_SW64_CPUFREQ_H -#define _ASM_SW64_CPUFREQ_H - -#include -#include -#include -#include -#include -#include - -struct clk; - -struct clk_ops { - void (*init)(struct clk *clk); - void (*enable)(struct clk *clk); - void (*disable)(struct clk *clk); - void (*recalc)(struct clk *clk); - int (*set_rate)(struct clk *clk, unsigned long rate, int algo_id); - long (*round_rate)(struct clk *clk, unsigned long rate); -}; - -struct clk { - struct list_head node; - const char *name; - int id; - struct module *owner; - - struct clk *parent; - const struct clk_ops *ops; - - struct kref kref; - - unsigned long rate; - unsigned long flags; -}; - -#define CLK_ALWAYS_ENABLED (1 << 0) -#define CLK_RATE_PROPAGATES (1 << 1) - -#define CORE_CLK0_VALID (0x1UL << 1) -#define CORE_CLK0_RESET (0x1UL << 2) - -#define CLK_LV1_SEL_PROTECT (0x1UL << 0) -#define CLK_LV1_SEL_MUXA (0x1UL << 2) -#define CLK_LV1_SEL_MUXB (0x1UL << 3) - -#ifdef CONFIG_UNCORE_JUNZHANG -#define CLK0_PROTECT (0x1UL << 0) -#define CLK2_PROTECT (0x1UL << 32) -#define CORE_CLK2_VALID (0x1UL << 33) -#define CORE_CLK2_RESET (0x1UL << 34) -#define CORE_CLK2_LOCK (0x1UL << 35) -#define CORE_PLL0_CFG_SHIFT 4 -#define CORE_PLL1_CFG_SHIFT 20 -#define CORE_PLL2_CFG_SHIFT 36 -#define CORE_PLL2_CFG_MASK 0x1f -#define STARTUP_RATE (2000UL * 1000 * 1000) -#endif - -#ifdef CONFIG_UNCORE_XUELANG -#define CLK_PROTECT (0x1UL << 0) -#define CLK0_PROTECT CLK_PROTECT -#define CLK2_PROTECT CLK_PROTECT -#define CORE_CLK2_VALID (0x1UL << 15) -#define CORE_CLK2_RESET (0x1UL << 16) -#define CORE_CLK2_LOCK (0x1UL << 17) -#define CORE_PLL0_CFG_SHIFT 4 -#define CORE_PLL1_CFG_SHIFT 11 -#define CORE_PLL2_CFG_SHIFT 18 -#define CORE_PLL2_CFG_MASK 0xf -#define STARTUP_RATE (2400UL * 1000 * 1000) -#endif - -#define OFFSET_CLU_LV1_SEL 0x3a80UL -#define OFFSET_CLK_CTL 0x3b80UL - -extern struct cpufreq_frequency_table freq_table[]; - -int clk_init(void); -int sw64_set_rate(unsigned int index); - -struct clk *sw64_clk_get(struct device *dev, const char *id); - -void sw64_update_clockevents(unsigned long cpu, u32 freq); - -unsigned int __sw64_cpufreq_get(struct cpufreq_policy *policy); -#endif /* _ASM_SW64_CPUFREQ_H */ diff --git a/arch/sw_64/include/asm/elf.h b/arch/sw_64/include/asm/elf.h index f94d69a5b1108d4957c9b5dfa50f6d3fd1174d20..7aec6327901ff01e0c50aabe8067b08a88540af4 100644 --- a/arch/sw_64/include/asm/elf.h +++ b/arch/sw_64/include/asm/elf.h @@ -91,7 +91,11 @@ typedef struct user_fpsimd_state elf_fpregset_t; * that it will "exec", and that there is sufficient room for the brk. */ -#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) +#ifdef CONFIG_SW64_FORCE_52BIT +#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) +#else +#define ELF_ET_DYN_BASE (2 * DEFAULT_MAP_WINDOW / 3) +#endif /* * $0 is set by ld.so to a pointer to a function which might be diff --git a/arch/sw_64/include/asm/ftrace.h b/arch/sw_64/include/asm/ftrace.h index 871c0b7b0aef29590d626978a2943456f26449fd..33122cd7f91b762f894e53e4a55e97e8de0c2563 100644 --- a/arch/sw_64/include/asm/ftrace.h +++ b/arch/sw_64/include/asm/ftrace.h @@ -46,5 +46,36 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +static inline void +__arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) +{ + regs->regs[2] = addr; +} + +#define arch_ftrace_set_direct_caller(fregs, addr) \ + __arch_ftrace_set_direct_caller(&(fregs)->regs, addr) +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + #endif /* ifndef __ASSEMBLY__ */ + +#ifndef __ASSEMBLY__ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +struct fgraph_ret_regs { + unsigned long ax; + unsigned long bp; +}; + +static inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs) +{ + return ret_regs->ax; +} + +static inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs) +{ + return ret_regs->bp; +} +#endif /* ifdef CONFIG_FUNCTION_GRAPH_TRACER */ +#endif + #endif /* _ASM_SW64_FTRACE_H */ diff --git a/arch/sw_64/include/asm/io.h b/arch/sw_64/include/asm/io.h index 1695801e6c4bf9f3c0b0c71a478784c0f61fc479..121e909a2cf943e439606555a2f64487f5aee337 100644 --- a/arch/sw_64/include/asm/io.h +++ b/arch/sw_64/include/asm/io.h @@ -30,6 +30,7 @@ extern void outl(u32 b, unsigned long port); #define outw outw #define outl outl +#ifndef CONFIG_GENERIC_IOREMAP static inline void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot) { @@ -37,19 +38,21 @@ static inline void __iomem *__ioremap(phys_addr_t addr, size_t size, return (void __iomem *)(tmp); } - #define ioremap(addr, size) __ioremap((addr), (size), PAGE_KERNEL) -#define ioremap_nocache(addr, size) __ioremap((addr), (size), PAGE_KERNEL) -#define ioremap_cache(addr, size) __ioremap((addr), (size), PAGE_KERNEL) + +static inline void __iounmap(volatile void __iomem *addr) +{ +} +#define iounmap __iounmap +#endif + +#define ioremap_nocache(addr, size) ioremap((addr), (size)) +#define ioremap_cache(addr, size) ioremap((addr), (size)) #define ioremap_uc ioremap_nocache #define ioport_map ioport_map extern void __iomem *ioport_map(unsigned long port, unsigned int nr); -static inline void iounmap(volatile void __iomem *addr) -{ -} - /* * String version of IO memory access ops: */ diff --git a/arch/sw_64/include/asm/kvm_host.h b/arch/sw_64/include/asm/kvm_host.h index 221a7db26f0f0c37407ce227ec4ee1e293828466..63b344ed0359fa4eda38b81bc4fadd9fa03fc02b 100644 --- a/arch/sw_64/include/asm/kvm_host.h +++ b/arch/sw_64/include/asm/kvm_host.h @@ -184,6 +184,12 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu); int kvm_sw64_perf_init(void); int kvm_sw64_perf_teardown(void); void kvm_flush_tlb_all(void); +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); +int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number); +void vcpu_send_ipi(struct kvm_vcpu *vcpu, int target_vcpuid, int type); +void sw64_kvm_clear_irq(struct kvm_vcpu *vcpu); +void sw64_kvm_try_deliver_interrupt(struct kvm_vcpu *vcpu); void kvm_sw64_update_vpn(struct kvm_vcpu *vcpu, unsigned long vpn); int kvm_sw64_init_vm(struct kvm *kvm); void kvm_sw64_destroy_vm(struct kvm *kvm); diff --git a/arch/sw_64/include/asm/page.h b/arch/sw_64/include/asm/page.h index 1ba314b342a0a3a15ce7f2387bc554ac322a51e1..89bf4ba56f229a1a7954f35c5ab9c92073c9ff72 100644 --- a/arch/sw_64/include/asm/page.h +++ b/arch/sw_64/include/asm/page.h @@ -67,6 +67,9 @@ extern unsigned long __boot_phys_addr(unsigned long addr); #define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #include #include + +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA + #endif #endif /* _ASM_SW64_PAGE_H */ diff --git a/arch/sw_64/include/asm/paravirt.h b/arch/sw_64/include/asm/paravirt.h new file mode 100644 index 0000000000000000000000000000000000000000..0ca6befc53ee08059074613629e57b469da94602 --- /dev/null +++ b/arch/sw_64/include/asm/paravirt.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_SW64_PARAVIRT_H +#define _ASM_SW64_PARAVIRT_H + +#ifdef CONFIG_PARAVIRT +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; + +struct pv_time_ops { + unsigned long long (*steal_clock)(int cpu); +}; + +struct pv_lock_ops { + void (*wait)(u8 *ptr, u8 val); + void (*kick)(int cpu); + void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val); + void (*queued_spin_unlock)(struct qspinlock *lock); + bool (*vcpu_is_preempted)(int cpu); +}; + +struct paravirt_patch_template { + struct pv_time_ops time; + struct pv_lock_ops lock; +}; + +extern struct paravirt_patch_template pv_ops; + +static inline u64 paravirt_steal_clock(int cpu) +{ + return pv_ops.time.steal_clock(cpu); +} + +__visible bool __native_vcpu_is_preempted(int cpu); + +static inline bool pv_vcpu_is_preempted(int cpu) +{ + return pv_ops.lock.vcpu_is_preempted(cpu); +} + +#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) +bool pv_is_native_spin_unlock(void); +void __init pv_qspinlock_init(void); + +static inline void pv_wait(u8 *ptr, u8 val) +{ + return pv_ops.lock.wait(ptr, val); +} + +static inline void pv_kick(int cpu) +{ + return pv_ops.lock.kick(cpu); +} + +static inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + return pv_ops.lock.queued_spin_lock_slowpath(lock, val); +} + +static inline void pv_queued_spin_unlock(struct qspinlock *lock) +{ + return pv_ops.lock.queued_spin_unlock(lock); +} +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ + +#else + +#define pv_qspinlock_init() do {} while (0) + +#endif /* CONFIG_PARAVIRT */ + +#endif /* _ASM_SW64_PARAVIRT_H */ diff --git a/arch/sw_64/include/asm/paravirt_api_clock.h b/arch/sw_64/include/asm/paravirt_api_clock.h new file mode 100644 index 0000000000000000000000000000000000000000..65ac7cee0dad748dcbfbc47c31833622d08a1a8f --- /dev/null +++ b/arch/sw_64/include/asm/paravirt_api_clock.h @@ -0,0 +1 @@ +#include diff --git a/arch/sw_64/include/asm/pci.h b/arch/sw_64/include/asm/pci.h index 74d33777f05d8fa1a0c80a93efa98a47d9a4cd9c..6e8d0ed37567cb8e51b3aaced661845fa586cd9e 100644 --- a/arch/sw_64/include/asm/pci.h +++ b/arch/sw_64/include/asm/pci.h @@ -48,6 +48,7 @@ enum SUNWAY_PIU_IOR0 { PCACHE_ACCESS = 0xb880UL, PCACHE_ITEM_TAG = 0xb900UL, PCACHE_ITEM_DATA0 = 0xb980UL, + SUNWAY_PIU_IOR0_SIZE = 0xba00UL, }; enum SUNWAY_PIU_IOR1 { @@ -57,6 +58,7 @@ enum SUNWAY_PIU_IOR1 { RCDEBUGINF1 = 0xc80UL, DCACONTROL = 0x1a00UL, DEVICEID0 = 0x1a80UL, + SUNWAY_PIU_IOR1_SIZE = 0x1b00UL, }; enum SUNWAY_RC { @@ -78,6 +80,7 @@ enum SUNWAY_RC { RC_PHY_INT_REG = 0x80000UL, RC_PHY_EXT_GEN1 = 0x82400UL, RC_PHY_EXT_GEN2 = 0x82480UL, + SUNWAY_RC_SIZE = 0x82500UL, }; struct pci_dev; diff --git a/arch/sw_64/include/asm/pgtable.h b/arch/sw_64/include/asm/pgtable.h index 0b9b65685e7443827905e36b7e2f4c47393b7075..0ad6f12c32753ad796e3f06f7ffd1d7fe1cd79c9 100644 --- a/arch/sw_64/include/asm/pgtable.h +++ b/arch/sw_64/include/asm/pgtable.h @@ -188,6 +188,7 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) #define PAGE_NONE __pgprot(__ACCESS_BITS | _PAGE_FOR | _PAGE_FOW | _PAGE_FOE | _PAGE_LEAF | _PAGE_PROTNONE) #define PAGE_KERNEL __pgprot(_PAGE_VALID | _PAGE_KERN | _PAGE_LEAF) #define _PAGE_NORMAL(x) __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_LEAF | (x)) +#define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL) #define page_valid_kern(x) ((x & (_PAGE_VALID | _PAGE_KERN)) == (_PAGE_VALID | _PAGE_KERN)) #endif @@ -829,7 +830,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) pr_err("%s: %d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e)) extern void paging_init(void); -/* We have our own get_unmapped_area to cope with ADDR_LIMIT_32BIT. */ #define HAVE_ARCH_UNMAPPED_AREA +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN #endif /* _ASM_SW64_PGTABLE_H */ diff --git a/arch/sw_64/include/asm/processor.h b/arch/sw_64/include/asm/processor.h index f66a77f233c9cc773f039d1b659c27213d59b1b6..4360140e953506fd491c26ae6f23e60ebb86ffac 100644 --- a/arch/sw_64/include/asm/processor.h +++ b/arch/sw_64/include/asm/processor.h @@ -8,7 +8,6 @@ #ifndef _ASM_SW64_PROCESSOR_H #define _ASM_SW64_PROCESSOR_H -#include /* for ADDR_LIMIT_32BIT */ #include #define task_pt_regs(task) \ @@ -27,19 +26,47 @@ /* * We have a 52-bit user address space: 4PB user VM... + * 20230728(mcw): + * To make sure that arch_get_unmapped_area_topdown and old + * software, e.g. golang runtime and v8 jit, works well at + * the same time, just providing 47-bit VAs unless a hint is + * supplied to mmap. */ -#define TASK_SIZE (0x10000000000000UL) -#define UNMAPPED_BASE (TASK_SIZE >> 6) -#define STACK_TOP \ - (current->personality & ADDR_LIMIT_32BIT ? 0x80000000 : 0x00120000000UL) -#define STACK_TOP_MAX 0x00120000000UL +#define VA_BITS (CONFIG_SW64_VA_BITS) +#if VA_BITS > 47 +#define VA_BITS_MIN (47) +#else +#define VA_BITS_MIN (VA_BITS) +#endif -/* This decides where the kernel will search for a free chunk of vm - * space during mmap's. - */ -#define TASK_UNMAPPED_BASE \ - ((current->personality & ADDR_LIMIT_32BIT) ? 0x40000000 : UNMAPPED_BASE) +#define DEFAULT_MAP_WINDOW_64 (1UL << VA_BITS_MIN) +#define TASK_SIZE_64 (1UL << VA_BITS) + +#define TASK_SIZE_MAX TASK_SIZE_64 +#define TASK_SIZE TASK_SIZE_64 +#define DEFAULT_MAP_WINDOW DEFAULT_MAP_WINDOW_64 + +#ifdef CONFIG_SW64_FORCE_52BIT +#define STACK_TOP_MAX TASK_SIZE +#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4)) +#else +#define STACK_TOP_MAX DEFAULT_MAP_WINDOW +#define TASK_UNMAPPED_BASE (PAGE_ALIGN(DEFAULT_MAP_WINDOW / 4)) +#endif + +#define STACK_TOP STACK_TOP_MAX + +#ifndef CONFIG_SW64_FORCE_52BIT +#define arch_get_mmap_end(addr, len, flags) \ + (((addr) > DEFAULT_MAP_WINDOW) ? TASK_SIZE : DEFAULT_MAP_WINDOW) +#define arch_get_mmap_base(addr, base) ((addr > DEFAULT_MAP_WINDOW) ? \ + base + TASK_SIZE - DEFAULT_MAP_WINDOW : \ + base) +#else +#define arch_get_mmap_end(addr, len, flags) (TASK_SIZE) +#define arch_get_mmap_base(addr, base) (base) +#endif struct thread_struct { struct user_fpsimd_state fpstate; diff --git a/arch/sw_64/include/asm/ptrace.h b/arch/sw_64/include/asm/ptrace.h index 0bbe85297f6288722779e1c234956ec934dce006..3e5730c8b6a23279bbf1df250d2b9b684981d696 100644 --- a/arch/sw_64/include/asm/ptrace.h +++ b/arch/sw_64/include/asm/ptrace.h @@ -87,6 +87,11 @@ static inline long regs_return_value(struct pt_regs *regs) return -regs->regs[0]; } +static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +{ + regs->regs[0] = rc; +} + #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/sw_64/include/asm/qspinlock.h b/arch/sw_64/include/asm/qspinlock.h new file mode 100644 index 0000000000000000000000000000000000000000..15ae23e22cc6633e96aaf79b410544306e62df82 --- /dev/null +++ b/arch/sw_64/include/asm/qspinlock.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_SW64_QSPINLOCK_H +#define _ASM_SW64_QSPINLOCK_H + +#include +#include + +#ifdef CONFIG_NUMA_AWARE_SPINLOCKS +extern void cna_configure_spin_lock_slowpath(void); +#endif + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +/* keep the same as x86 */ +#define _Q_PENDING_LOOPS (1 << 9) + +extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_init_lock_hash(void); +extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); + +#define queued_spin_unlock queued_spin_unlock +/* + * queued_spin_unlock - release a queued spinlock + * @lock : Pointer to queued spinlock structure + * + * A smp_store_release() on the least-significant byte. + */ +static inline void native_queued_spin_unlock(struct qspinlock *lock) +{ + /* + * Now that we have a reference to the (likely) + * blocked pv_node, release the lock. + */ + smp_store_release(&lock->locked, 0); +} + +static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + pv_queued_spin_lock_slowpath(lock, val); +} + +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + pv_queued_spin_unlock(lock); +} +#endif + +#include + +#endif /* _ASM_SW64_QSPINLOCK_H */ diff --git a/arch/sw_64/include/asm/qspinlock_paravirt.h b/arch/sw_64/include/asm/qspinlock_paravirt.h new file mode 100644 index 0000000000000000000000000000000000000000..e504b135e2b55351008f88aabb46dfa5f9c60e75 --- /dev/null +++ b/arch/sw_64/include/asm/qspinlock_paravirt.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_SW64_QSPINLOCK_PARAVIRT_H +#define _ASM_SW64_QSPINLOCK_PARAVIRT_H + +extern void __pv_queued_spin_unlock(struct qspinlock *lock); + +#endif /* _ASM_SW64_QSPINLOCK_PARAVIRT_H */ diff --git a/arch/sw_64/include/asm/smp.h b/arch/sw_64/include/asm/smp.h index 4249b10dc5501d16ec3998059b488a5da9f3ec92..0b1ebf2143f291885ed2a2cfaa0cc921bcbac544 100644 --- a/arch/sw_64/include/asm/smp.h +++ b/arch/sw_64/include/asm/smp.h @@ -34,6 +34,7 @@ struct smp_rcb_struct { unsigned long restart_args; unsigned long ready; unsigned long init_done; + unsigned long feat_vint; }; extern bool __init is_rcid_duplicate(int rcid); diff --git a/arch/sw_64/include/asm/spinlock.h b/arch/sw_64/include/asm/spinlock.h index 64358f32cd9a80b587a023dae6d5eecb1cf270e6..1770cd9a33762c515a31bbbd01052d916d9ff1dc 100644 --- a/arch/sw_64/include/asm/spinlock.h +++ b/arch/sw_64/include/asm/spinlock.h @@ -17,6 +17,10 @@ #include #include +#include + +/* How long a lock should spin before we consider blocking */ +#define SPIN_THRESHOLD (1 << 15) /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb() diff --git a/arch/sw_64/include/asm/vcpu.h b/arch/sw_64/include/asm/vcpu.h index b2ab8c7f8b865bf0b40cd99023df466367021bcb..b069031add3935fe2a42a1cbe4808b2419bd5408 100644 --- a/arch/sw_64/include/asm/vcpu.h +++ b/arch/sw_64/include/asm/vcpu.h @@ -56,6 +56,7 @@ struct vcpucb { }; #elif CONFIG_SUBARCH_C4 +#define CORE4VM_IRQS 256 struct vcpucb { unsigned long ktp; @@ -109,7 +110,10 @@ struct vcpucb { unsigned long csr_earg2; unsigned long csr_scratch; unsigned long atc; - unsigned long reserved[45]; + unsigned long reserved1[10]; + /* Pending virtual interrupts */ + DECLARE_BITMAP(irqs_pending, CORE4VM_IRQS); + unsigned long reserved2[31]; }; #endif diff --git a/arch/sw_64/kernel/Makefile b/arch/sw_64/kernel/Makefile index 539b3011c854eea47dc5c3078735a255dff59bd9..caf6de81dbdea9d00d74f17ef2b768be9e97599c 100644 --- a/arch/sw_64/kernel/Makefile +++ b/arch/sw_64/kernel/Makefile @@ -66,3 +66,4 @@ obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o +obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt-spinlocks.o diff --git a/arch/sw_64/kernel/cacheinfo.c b/arch/sw_64/kernel/cacheinfo.c index fb4c9ca7650b812fe649e2b576ff888c5aa66e18..a6e8298cff502566aee00e3af6dafd707a353b72 100644 --- a/arch/sw_64/kernel/cacheinfo.c +++ b/arch/sw_64/kernel/cacheinfo.c @@ -116,7 +116,7 @@ static void setup_shared_cpu_map(unsigned int cpu) { unsigned int index; unsigned int rcid = cpu_to_rcid(cpu); - struct cacheinfo *this_leaf; + struct cacheinfo *this_leaf, *sib_leaf; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); for (index = 0; index < this_cpu_ci->num_leaves; index++) { @@ -126,16 +126,20 @@ static void setup_shared_cpu_map(unsigned int cpu) cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); - for_each_possible_cpu(i) { + for_each_online_cpu(i) { unsigned int sib_rcid = cpu_to_rcid(i); + struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i); if ((rcid_to_domain_id(sib_rcid) != rcid_to_domain_id(rcid)) || (i == cpu)) continue; + sib_leaf = sib_cpu_ci->info_list + index; if ((rcid_to_core_id(rcid) == rcid_to_core_id(sib_rcid)) || - (this_leaf->level == 3)) + (this_leaf->level == 3)) { + cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map); cpumask_set_cpu(i, &this_leaf->shared_cpu_map); + } } } } diff --git a/arch/sw_64/kernel/cpuautoplug.c b/arch/sw_64/kernel/cpuautoplug.c index 0c9a41940c8d1abf0c1e735adbb8f008ce5fab72..3e0a0ad7a25ab399a217fd3a46015a7cf83cb4e9 100644 --- a/arch/sw_64/kernel/cpuautoplug.c +++ b/arch/sw_64/kernel/cpuautoplug.c @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -13,10 +14,8 @@ #include #include -#include #include #include -#include "../../../kernel/sched/sched.h" int autoplug_enabled; int autoplug_verbose; @@ -388,9 +387,10 @@ static void do_autoplug_timer(struct work_struct *work) long active; atomic_long_t calc_load_tasks; #endif - if (!policy || IS_ERR(policy->clk)) { - pr_err("%s: No %s associated to cpu: %d\n", - __func__, policy ? "clk" : "policy", 0); + + if (!policy) { + pr_err("%s: no policy associated to cpu: %d\n", + __func__, smp_processor_id()); return; } @@ -451,7 +451,7 @@ static void do_autoplug_timer(struct work_struct *work) #else active = atomic_long_read(&calc_load_tasks); active = active > 0 ? active * FIXED_1 : 0; - CALC_LOAD(avenrun[0], EXP_1, active); + calc_load(avenrun[0], EXP_1, active); load = avenrun[0] / 2; #endif diff --git a/arch/sw_64/kernel/dup_print.c b/arch/sw_64/kernel/dup_print.c index eef31527dc13045f1fb3efbf021d91aae2e691c3..fa604abef38c77bb97d96d5cd7f4c13a735cdee4 100644 --- a/arch/sw_64/kernel/dup_print.c +++ b/arch/sw_64/kernel/dup_print.c @@ -12,33 +12,88 @@ static DEFINE_SPINLOCK(printk_lock); -unsigned long sw64_printk_offset; +static unsigned long sw64_printk_offset; #define PRINTK_SIZE 0x100000UL -void sw64_printk(const char *fmt, va_list args) +static bool rrk_last_newline_end; +static unsigned long rrk_last_id; +static const char * const level_str[] = { + "(0)EMERG", + "(1)ALERT", + "(2)CRIT", + "(3)ERR", + "(4)WARNING", + "(5)NOTICE", + "(6)INFO", + "(7)DEBUG" +}; +#define LEVEL_STR_MAX_LEN 10 // length of "(4)WARNING" + +void sw64_rrk_store(const char *text, u16 text_len, u64 ts_nsec, int level, + unsigned long id, bool newline_end) { char *sw64_printk_buf; unsigned long flags; - char textbuf[1024]; - const char *text; - size_t text_len; + size_t __maybe_unused rrk_len; + char header_buf[128]; + /* same time fmt as print_time() in printk.c */ + char header_fmt[] = "[%5llu.%06llu %-"__stringify(LEVEL_STR_MAX_LEN)"s] "; + size_t header_len; + char *newline; + /* if writing a new entry while the last one did not end with '\n', print '\n' first */ + bool newline_first = rrk_last_id && (rrk_last_id != id) && (!rrk_last_newline_end); + bool wrap = false; + unsigned long max_offset_allowed; spin_lock_irqsave(&printk_lock, flags); - sw64_printk_buf = (char *)(KERNEL_PRINTK_BUFF_BASE + sw64_printk_offset); + header_len = scnprintf(header_buf, sizeof(header_buf), header_fmt, + ts_nsec / NSEC_PER_SEC, (ts_nsec % NSEC_PER_SEC) / NSEC_PER_USEC, + level >= 0 ? level_str[level] : "CONT"); - text_len = vscnprintf(textbuf, sizeof(textbuf), fmt, args); - text = printk_skip_headers(textbuf); - text_len -= text - textbuf; - - if (sw64_printk_offset >= (PRINTK_SIZE - 1024)) { + max_offset_allowed = PRINTK_SIZE - text_len - header_len - (newline_first ? 1 : 0); + if (unlikely(sw64_printk_offset >= max_offset_allowed)) { sw64_printk_offset = 0; - sw64_printk_buf = (char *)(KERNEL_PRINTK_BUFF_BASE + sw64_printk_offset); memset(sw64_printk_buf, 0, PRINTK_SIZE); + wrap = true; + } + sw64_printk_buf = (char *)(KERNEL_PRINTK_BUFF_BASE + sw64_printk_offset); + + if (unlikely(newline_first)) { + sw64_printk_buf[0] = '\n'; + sw64_printk_buf++; + sw64_printk_offset++; + } + + if (likely(level != -1) || unlikely(wrap)) { + memcpy(sw64_printk_buf, header_buf, header_len); + sw64_printk_offset += header_len; + sw64_printk_buf += header_len; + } + + while (unlikely((newline = strnchr(text, text_len, '\n')))) { + size_t len; + + /* copy the first line */ + newline++; + len = newline - text; + memcpy(sw64_printk_buf, text, len); + + /* add padding for next line */ + memset(&sw64_printk_buf[len], ' ', header_len); + + text += len; + text_len -= len; + sw64_printk_buf += len + header_len; + sw64_printk_offset += len + header_len; } memcpy(sw64_printk_buf, text, text_len); sw64_printk_offset += text_len; + if (likely(sw64_printk_buf[text_len - 1] != '\n' && newline_end)) { + sw64_printk_buf[text_len] = '\n'; + sw64_printk_offset++; + } if (is_in_emul()) { void __iomem *addr = __va(QEMU_PRINTF_BUFF_BASE); @@ -47,6 +102,9 @@ void sw64_printk(const char *fmt, va_list args) *(u64 *)addr = data; } + rrk_last_id = id; + rrk_last_newline_end = newline_end; + spin_unlock_irqrestore(&printk_lock, flags); } #endif @@ -55,8 +113,8 @@ void sw64_printk(const char *fmt, va_list args) #include static DEFINE_SPINLOCK(printf_lock); -#define USER_PRINT_BUFF_BASE (0x600000UL + __START_KERNEL_map) -#define USER_PRINT_BUFF_LEN 0x100000UL +#define USER_PRINT_BUFF_BASE (0x600000UL + __START_KERNEL_map) +#define USER_PRINT_BUFF_LEN 0x100000UL #define USER_MESSAGE_MAX_LEN 0x100000UL unsigned long sw64_printf_offset; int sw64_user_printf(const char __user *buf, int len) diff --git a/arch/sw_64/kernel/entry-ftrace.S b/arch/sw_64/kernel/entry-ftrace.S index 00e9d25931eca96ffc8e37206c715be942860ecf..5dbae49918757623c90d00138a3ca6fc1f550dd2 100644 --- a/arch/sw_64/kernel/entry-ftrace.S +++ b/arch/sw_64/kernel/entry-ftrace.S @@ -91,14 +91,6 @@ /* save pt_regs */ ldi $sp, -PT_REGS_SIZE($sp) stl $0, PT_REGS_R0($sp) - stl $1, PT_REGS_R1($sp) - stl $2, PT_REGS_R2($sp) - stl $3, PT_REGS_R3($sp) - stl $4, PT_REGS_R4($sp) - stl $5, PT_REGS_R5($sp) - stl $6, PT_REGS_R6($sp) - stl $7, PT_REGS_R7($sp) - stl $8, PT_REGS_R8($sp) stl $9, PT_REGS_R9($sp) stl $10, PT_REGS_R10($sp) stl $11, PT_REGS_R11($sp) @@ -122,19 +114,15 @@ stl $29, PT_REGS_GP($sp) ldi $0, PT_REGS_SIZE($sp) stl $0, PT_REGS_R30($sp) + + /* save direct caller reg */ + ldi $2, 0($31) + stl $2, PT_REGS_R2($sp) .endm .macro RESTORE_PT_REGS /* restore pt_regs */ ldl $0, PT_REGS_R0($sp) - ldl $1, PT_REGS_R1($sp) - ldl $2, PT_REGS_R2($sp) - ldl $3, PT_REGS_R3($sp) - ldl $4, PT_REGS_R4($sp) - ldl $5, PT_REGS_R5($sp) - ldl $6, PT_REGS_R6($sp) - ldl $7, PT_REGS_R7($sp) - ldl $8, PT_REGS_R8($sp) ldl $9, PT_REGS_R9($sp) ldl $10, PT_REGS_R10($sp) ldl $11, PT_REGS_R11($sp) @@ -173,14 +161,16 @@ /* save return value regs*/ .macro save_return_regs - subl $sp, 0x8, $sp - stl $0, 0x0($sp) + subl $sp, 16, $sp + stl $0, 0($sp) + stl $15, 8($sp) .endm /* restore return value regs*/ .macro restore_return_regs - ldl $0, 0x0($sp) - addl $sp, 0x8, $sp + ldl $15, 8($sp) + ldl $0, 0($sp) + addl $sp, 16, $sp .endm @@ -231,7 +221,7 @@ ENTRY(return_to_handler) br $27, 1f 1: ldgp $29, 0($27) save_return_regs - bis $31, $15, $16 /* parent's fp */ + bis $31, $sp, $16 /* ret_regs */ ldi $27, ftrace_return_to_handler call $26, ($27) bis $31, $0, $26 @@ -331,6 +321,7 @@ ftrace_regs_caller: 1: ldgp $29, 0($27) subl $28, MCOUNT_INSN_SIZE, $16 + stl $16, PT_REGS_PC($sp) bis $26, $31, $17 ldi $4, function_trace_op ldl $18, 0($4) @@ -351,7 +342,10 @@ ftrace_regs_call: call ftrace_graph_caller #endif RESTORE_PT_REGS - ret $31, ($28), 1 + bne $2, .Ldirect + ret $31, ($28), 1 +.Ldirect: + ret $31, ($2), 1 .end ftrace_regs_caller #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ diff --git a/arch/sw_64/kernel/kprobes/Makefile b/arch/sw_64/kernel/kprobes/Makefile index 110ba2bf7752361442022553269447ceb802d465..f07b3513c3a63e19c6961d252ecdcdb12bcd3521 100644 --- a/arch/sw_64/kernel/kprobes/Makefile +++ b/arch/sw_64/kernel/kprobes/Makefile @@ -1,3 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o obj-$(CONFIG_KPROBES_ON_FTRACE) += kprobes-ftrace.o +obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o +CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_rethook_trampoline.o = $(CC_FLAGS_FTRACE) diff --git a/arch/sw_64/kernel/kprobes/kprobes-ftrace.c b/arch/sw_64/kernel/kprobes/kprobes-ftrace.c index a0b33a52a9e4101aba6ca5e173265e90018f6ddf..8c462789dbdcbd310f72579aa27ab3d379a316ef 100644 --- a/arch/sw_64/kernel/kprobes/kprobes-ftrace.c +++ b/arch/sw_64/kernel/kprobes/kprobes-ftrace.c @@ -33,6 +33,16 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, kprobes_inc_nmissed_count(p); } else { regs->regs[28] -= MCOUNT_INSN_SIZE; + if (in_task()) { + regs->orig_r0 = current_pt_regs()->orig_r0; + regs->orig_r19 = current_pt_regs()->orig_r19; + regs->cause = current_pt_regs()->cause; + } + if (in_irq()) { + regs->orig_r0 = get_irq_regs()->orig_r0; + regs->orig_r19 = get_irq_regs()->orig_r19; + regs->cause = get_irq_regs()->cause; + } __this_cpu_write(current_kprobe, p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; diff --git a/arch/sw_64/kernel/kprobes/kprobes.c b/arch/sw_64/kernel/kprobes/kprobes.c index 2ee18260f2a3e04066f4747e904811a9601ddb49..662f65fb33817567929f17ad15a48e66b4bbaaa9 100644 --- a/arch/sw_64/kernel/kprobes/kprobes.c +++ b/arch/sw_64/kernel/kprobes/kprobes.c @@ -240,35 +240,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, } return ret; } -/* - * Function return probe trampoline: - * - init_kprobes() establishes a probepoint here - * - When the probed function returns, this probe causes the - * handlers to fire - */ -static void __used kretprobe_trampoline_holder(void) -{ - asm volatile( - /* Keep the assembler from reordering and placing JR here. */ - ".set noreorder\n\t" - "nop\n\t" - ".global __kretprobe_trampoline\n" - "__kretprobe_trampoline:\n\t" - "nop\n\t" - : : : "memory"); -} - -void __kretprobe_trampoline(void); - -void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, - struct pt_regs *regs) -{ - ri->ret_addr = (kprobe_opcode_t *) regs->regs[26]; - ri->fp = NULL; - - /* Replace the return addr with trampoline addr */ - regs->regs[26] = (unsigned long)__kretprobe_trampoline; -} /* * Provide a blacklist of symbols identifying ranges which cannot be kprobed. @@ -283,40 +254,12 @@ int __init arch_populate_kprobe_blacklist(void) return ret; } -/* - * Called when the probe at kretprobe trampoline is hit - */ -static int __kprobes trampoline_probe_handler(struct kprobe *p, - struct pt_regs *regs) -{ - unsigned long orig_ret_address; - - orig_ret_address = __kretprobe_trampoline_handler(regs, NULL); - instruction_pointer(regs) = orig_ret_address; - regs->regs[26] = orig_ret_address; - - /* - * By returning a non-zero value, we are telling - * kprobe_handler() that we don't want the post_handler - * to run (and have re-enabled preemption) - */ - return 1; -} - int __kprobes arch_trampoline_kprobe(struct kprobe *p) { - if (p->addr == (kprobe_opcode_t *)__kretprobe_trampoline) - return 1; - return 0; } -static struct kprobe trampoline_p = { - .addr = (kprobe_opcode_t *)__kretprobe_trampoline, - .pre_handler = trampoline_probe_handler -}; - int __init arch_init_kprobes(void) { - return register_kprobe(&trampoline_p); + return 0; } diff --git a/arch/sw_64/kernel/kprobes/rethook.c b/arch/sw_64/kernel/kprobes/rethook.c new file mode 100644 index 0000000000000000000000000000000000000000..982311b335d376dd1b37fe9cbe5738089c980a77 --- /dev/null +++ b/arch/sw_64/kernel/kprobes/rethook.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Generic return hook for sw64. + */ + +#include +#include +#include "rethook.h" + +/* This is called from arch_rethook_trampoline() */ +unsigned long __used arch_rethook_trampoline_callback(struct pt_regs *regs) +{ + return rethook_trampoline_handler(regs, regs->regs[15]); +} + +NOKPROBE_SYMBOL(arch_rethook_trampoline_callback); + +void arch_rethook_prepare(struct rethook_node *rhn, struct pt_regs *regs, bool mcount) +{ + rhn->ret_addr = regs->regs[26]; + rhn->frame = regs->regs[15]; + + /* replace return addr with trampoline */ + regs->regs[26] = (unsigned long)arch_rethook_trampoline; +} + +NOKPROBE_SYMBOL(arch_rethook_prepare); diff --git a/arch/sw_64/kernel/kprobes/rethook.h b/arch/sw_64/kernel/kprobes/rethook.h new file mode 100644 index 0000000000000000000000000000000000000000..b4e2e5fe81c25b70948b2a79bf2c6cf453a35148 --- /dev/null +++ b/arch/sw_64/kernel/kprobes/rethook.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __SW64_RETHOOK_H +#define __SW64_RETHOOK_H + +unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs); +void arch_rethook_prepare(struct rethook_node *rhn, struct pt_regs *regs, bool mcount); + +#endif diff --git a/arch/sw_64/kernel/kprobes/rethook_trampoline.S b/arch/sw_64/kernel/kprobes/rethook_trampoline.S new file mode 100644 index 0000000000000000000000000000000000000000..f194237751c33b51b36379ff7aa5803c6000af64 --- /dev/null +++ b/arch/sw_64/kernel/kprobes/rethook_trampoline.S @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#include +#include +#include + + .text + .set noat + .align 4 + + .macro save_all_base_regs + /* save $26 & fp of the function before caller */ + subl $sp, 0x10, $sp + stl $26, 0($sp) + stl $15, 0x8($sp) + ldi $15, 0($sp) + + /* save $28 & fp of caller */ + subl $sp, 0x10, $sp + stl $28, 0($sp) + stl $15, 0x8($sp) + ldi $15, 0($sp) + + /* save pt_regs */ + ldi $sp, -PT_REGS_SIZE($sp) + stl $0, PT_REGS_R0($sp) + stl $1, PT_REGS_R1($sp) + stl $2, PT_REGS_R2($sp) + stl $3, PT_REGS_R3($sp) + stl $4, PT_REGS_R4($sp) + stl $5, PT_REGS_R5($sp) + stl $6, PT_REGS_R6($sp) + stl $7, PT_REGS_R7($sp) + stl $8, PT_REGS_R8($sp) + stl $9, PT_REGS_R9($sp) + stl $10, PT_REGS_R10($sp) + stl $11, PT_REGS_R11($sp) + stl $12, PT_REGS_R12($sp) + stl $13, PT_REGS_R13($sp) + stl $14, PT_REGS_R14($sp) + stl $15, PT_REGS_R15($sp) + stl $16, PT_REGS_R16($sp) + stl $17, PT_REGS_R17($sp) + stl $18, PT_REGS_R18($sp) + stl $19, PT_REGS_R19($sp) + stl $20, PT_REGS_R20($sp) + stl $21, PT_REGS_R21($sp) + stl $22, PT_REGS_R22($sp) + stl $23, PT_REGS_R23($sp) + stl $24, PT_REGS_R24($sp) + stl $25, PT_REGS_R25($sp) + stl $26, PT_REGS_R26($sp) + stl $27, PT_REGS_R27($sp) + stl $28, PT_REGS_R28($sp) + stl $29, PT_REGS_GP($sp) + ldi $0, PT_REGS_SIZE($sp) + stl $0, PT_REGS_R30($sp) + .endm + + .macro restore_all_base_regs + /* restore pt_regs */ + ldl $0, PT_REGS_R0($sp) + ldl $1, PT_REGS_R1($sp) + ldl $2, PT_REGS_R2($sp) + ldl $3, PT_REGS_R3($sp) + ldl $4, PT_REGS_R4($sp) + ldl $5, PT_REGS_R5($sp) + ldl $6, PT_REGS_R6($sp) + ldl $7, PT_REGS_R7($sp) + ldl $8, PT_REGS_R8($sp) + ldl $9, PT_REGS_R9($sp) + ldl $10, PT_REGS_R10($sp) + ldl $11, PT_REGS_R11($sp) + ldl $12, PT_REGS_R12($sp) + ldl $13, PT_REGS_R13($sp) + ldl $14, PT_REGS_R14($sp) + ldl $15, PT_REGS_R15($sp) + ldl $16, PT_REGS_R16($sp) + ldl $17, PT_REGS_R17($sp) + ldl $18, PT_REGS_R18($sp) + ldl $19, PT_REGS_R19($sp) + ldl $20, PT_REGS_R20($sp) + ldl $21, PT_REGS_R21($sp) + ldl $22, PT_REGS_R22($sp) + ldl $23, PT_REGS_R23($sp) + ldl $24, PT_REGS_R24($sp) + ldl $25, PT_REGS_R25($sp) + ldl $27, PT_REGS_R27($sp) + ldl $28, PT_REGS_R28($sp) + ldl $29, PT_REGS_GP($sp) + ldi $sp, PT_REGS_SIZE($sp) + + /* only restore $fp */ + ldl $15, 0x18($sp) + addl $sp, 0x20, $sp + .endm + +ENTRY(arch_rethook_trampoline) + save_all_base_regs + + mov $sp, $16 + call arch_rethook_trampoline_callback + mov $0, $26 + + restore_all_base_regs + + ret +ENDPROC(arch_rethook_trampoline) diff --git a/arch/sw_64/kernel/paravirt-spinlocks.c b/arch/sw_64/kernel/paravirt-spinlocks.c new file mode 100644 index 0000000000000000000000000000000000000000..cd4bd3e68721c648ff9dc693af8604eb86a6d770 --- /dev/null +++ b/arch/sw_64/kernel/paravirt-spinlocks.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +__visible bool __native_vcpu_is_preempted(int cpu) +{ + return false; +} + +bool pv_is_native_spin_unlock(void) +{ + return false; +} diff --git a/arch/sw_64/kernel/paravirt.c b/arch/sw_64/kernel/paravirt.c new file mode 100644 index 0000000000000000000000000000000000000000..e22a718fc5252c71c782dde5d43e84a9c80a9f46 --- /dev/null +++ b/arch/sw_64/kernel/paravirt.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; + +struct paravirt_patch_template pv_ops = { +#ifdef CONFIG_PARAVIRT_SPINLOCKS + .lock.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath, + .lock.queued_spin_unlock = native_queued_spin_unlock, +#endif + .lock.vcpu_is_preempted = __native_vcpu_is_preempted, +}; +EXPORT_SYMBOL_GPL(pv_ops); + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +static bool pvqspinlock; + +static __init int parse_pvqspinlock(char *arg) +{ + pvqspinlock = true; + return 0; +} +early_param("pvqspinlock", parse_pvqspinlock); + +void __init pv_qspinlock_init(void) +{ + /* Don't use the PV qspinlock code if there is only 1 vCPU. */ + if (num_possible_cpus() == 1) + return; + + if (!pvqspinlock) { + pr_info("PV qspinlocks disabled\n"); + return; + } + + pr_info("PV qspinlocks enabled\n"); + + __pv_init_lock_hash(); + pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; + pv_ops.lock.queued_spin_unlock = __pv_queued_spin_unlock; + /* TODO: wait and kick */ + pv_ops.lock.wait = NULL; + pv_ops.lock.kick = NULL; +} +#endif diff --git a/arch/sw_64/kernel/setup.c b/arch/sw_64/kernel/setup.c index 16cdad9b8a940d9f67ec673cbc2f6450f6a21bc1..6eee740ce7e5784efa7ccee05269b62a0d320b4d 100644 --- a/arch/sw_64/kernel/setup.c +++ b/arch/sw_64/kernel/setup.c @@ -463,6 +463,31 @@ bool sunway_machine_is_compatible(const char *compat) return !fdt_node_check_compatible(fdt, offset, compat); } +#ifndef CONFIG_SUBARCH_C3B +static void __init setup_run_mode(void) +{ + static_branch_disable(&run_mode_host_key); + static_branch_disable(&run_mode_guest_key); + static_branch_disable(&run_mode_emul_key); + + if (sunway_machine_is_compatible("sunway,emulator")) { + static_branch_enable(&run_mode_emul_key); + pr_info("Mode: Emul\n"); + return; + } + + if (sunway_machine_is_compatible("sunway,virtual-machine") || + (rvpcr() >> VPCR_SHIFT)) { + static_branch_enable(&run_mode_guest_key); + pr_info("Mode: Guest\n"); + return; + } + + static_branch_enable(&run_mode_host_key); + pr_info("Mode: Host\n"); +} +#endif + static void __init setup_firmware_fdt(void) { void *dt_virt; @@ -491,6 +516,10 @@ static void __init setup_firmware_fdt(void) cpu_relax(); } +#ifndef CONFIG_SUBARCH_C3B + setup_run_mode(); +#endif + if (sunway_boot_magic == 0xDEED2024UL) { /* Parse MCLK(Hz) from firmware DTB */ early_parse_fdt_property(dt_virt, "/soc/clocks/mclk", @@ -547,6 +576,12 @@ static void __init setup_firmware_fdt(void) } } +static void __init setup_cpu_caps(void) +{ + if (!IS_ENABLED(CONFIG_SUBARCH_C3B) && !is_junzhang_v1()) + static_branch_enable(&hw_una_enabled); +} + static void __init setup_legacy_io(void) { if (is_guest_or_emul()) { @@ -606,7 +641,7 @@ static void __init device_tree_init(void) } #ifdef CONFIG_SUBARCH_C3B -static void __init setup_run_mode(void) +static void __init setup_run_mode_legacy(void) { if (*(unsigned long *)MM_SIZE) { static_branch_disable(&run_mode_host_key); @@ -627,28 +662,6 @@ static void __init setup_run_mode(void) static_branch_disable(&run_mode_emul_key); } } -#elif CONFIG_SUBARCH_C4 -static void __init setup_run_mode(void) -{ - if (rvpcr() >> VPCR_SHIFT) { - pr_info("run mode: guest\n"); - static_branch_disable(&run_mode_host_key); - static_branch_disable(&run_mode_emul_key); - static_branch_enable(&run_mode_guest_key); - } else if (sunway_boot_magic == 0xA2024) { - pr_info("run mode: emul\n"); - static_branch_disable(&run_mode_host_key); - static_branch_disable(&run_mode_guest_key); - static_branch_enable(&run_mode_emul_key); - sunway_boot_magic = 0xDEED2024; - } else { - pr_info("run mode: host\n"); - static_branch_disable(&run_mode_guest_key); - static_branch_disable(&run_mode_emul_key); - static_branch_enable(&run_mode_host_key); - } - -} #endif void __init @@ -661,7 +674,10 @@ setup_arch(char **cmdline_p) trap_init(); jump_label_init(); - setup_run_mode(); + +#ifdef CONFIG_SUBARCH_C3B + setup_run_mode_legacy(); +#endif setup_chip_ops(); setup_sched_clock(); @@ -669,6 +685,9 @@ setup_arch(char **cmdline_p) /* Early initialization for device tree */ setup_firmware_fdt(); + /* Now we know who we are, setup caps */ + setup_cpu_caps(); + /* Now we get the final boot_command_line */ *cmdline_p = boot_command_line; @@ -746,6 +765,10 @@ setup_arch(char **cmdline_p) /* Default root filesystem to sda2. */ ROOT_DEV = MKDEV(SCSI_DISK0_MAJOR, 2); + +#ifdef CONFIG_PARAVIRT_SPINLOCKS + pv_qspinlock_init(); +#endif } static int diff --git a/arch/sw_64/kernel/smp.c b/arch/sw_64/kernel/smp.c index eac7c2c093f1990d4fbde8df780031e959cdae70..08e0123956a37bda0d2c91de1bfbcf5a260d89d9 100644 --- a/arch/sw_64/kernel/smp.c +++ b/arch/sw_64/kernel/smp.c @@ -500,6 +500,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) for_each_possible_cpu(cpu) { numa_store_cpu_info(cpu); } +#ifdef CONFIG_NUMA_AWARE_SPINLOCKS + cna_configure_spin_lock_slowpath(); +#endif /* Nothing to do on a UP box, or when told not to. */ if (nr_cpu_ids == 1 || max_cpus == 0) { @@ -852,7 +855,7 @@ void arch_cpu_idle_dead(void) } #ifdef CONFIG_SUSPEND - if (!is_junzhang_v1()) { + if (is_in_host() && !is_junzhang_v1()) { sleepen(); send_sleep_interrupt(smp_processor_id()); while (1) @@ -862,7 +865,6 @@ void arch_cpu_idle_dead(void) while (1) asm("nop"); } - #else asm volatile("memb"); asm volatile("halt"); diff --git a/arch/sw_64/kernel/suspend.c b/arch/sw_64/kernel/suspend.c index 9947937c71b5d379037035fd972de4dad3ef8850..c647f9d3d8a76a319bb19a686fbe2e8a0d02638a 100644 --- a/arch/sw_64/kernel/suspend.c +++ b/arch/sw_64/kernel/suspend.c @@ -56,7 +56,7 @@ int sw64_suspend_enter(void) static int native_suspend_enter(suspend_state_t state) { - if (is_in_guest()) + if (!is_in_host()) return 0; /* processor specific suspend */ diff --git a/arch/sw_64/kernel/sys_sw64.c b/arch/sw_64/kernel/sys_sw64.c index 7efebce4af11e634876f0f90cea3ed0f194d5101..c11c02285bf3ef4f518e375751bb6d50b9b5dd3e 100644 --- a/arch/sw_64/kernel/sys_sw64.c +++ b/arch/sw_64/kernel/sys_sw64.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include + #include +#include SYSCALL_DEFINE5(getsysinfo, unsigned long, op, void __user *, buffer, unsigned long, nbytes, int __user *, start, void __user *, arg) @@ -335,3 +337,42 @@ SYSCALL_DEFINE0(pfh_ops) } #endif /* CONFIG_SUBARCH_C4 */ + +asmlinkage void noinstr do_entSys(struct pt_regs *regs) +{ + long ret = -ENOSYS; + unsigned long nr; + unsigned long ti_flags = current_thread_info()->flags; + + regs->orig_r0 = regs->regs[0]; + regs->orig_r19 = regs->regs[19]; + nr = regs->regs[0]; + + if (ti_flags & _TIF_SYSCALL_WORK) { + nr = syscall_trace_enter(); + if (nr == NO_SYSCALL) + goto syscall_out; + regs->orig_r0 = regs->regs[0]; + regs->orig_r19 = regs->regs[19]; + } + + if (nr < __NR_syscalls) { + syscall_fn_t syscall_fn = sys_call_table[nr]; + + ret = syscall_fn(regs->regs[16], regs->regs[17], regs->regs[18], + regs->regs[19], regs->regs[20], regs->regs[21]); + } + + if ((nr != __NR_sigreturn) && (nr != __NR_rt_sigreturn)) { + if (likely((ret >= 0) || regs->orig_r0 == NO_SYSCALL)) + syscall_set_return_value(current, regs, 0, ret); + else + syscall_set_return_value(current, regs, ret, 0); + } + +syscall_out: + rseq_syscall(regs); + + if (ti_flags & _TIF_SYSCALL_WORK) + syscall_trace_leave(); +} diff --git a/arch/sw_64/kernel/traps.c b/arch/sw_64/kernel/traps.c index a84441c79b61f395e6f484a30e01c4a3206529bb..c2433236fb2de8b99ad8e3953f8fe65121e68bb0 100644 --- a/arch/sw_64/kernel/traps.c +++ b/arch/sw_64/kernel/traps.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -31,7 +30,6 @@ #include #include #include -#include #include #include "proto.h" @@ -386,45 +384,6 @@ asmlinkage void noinstr do_entIF(struct pt_regs *regs) force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc); } -asmlinkage void noinstr do_entSys(struct pt_regs *regs) -{ - long ret = -ENOSYS; - unsigned long nr; - unsigned long ti_flags = current_thread_info()->flags; - - regs->orig_r0 = regs->regs[0]; - regs->orig_r19 = regs->regs[19]; - nr = regs->regs[0]; - - if (ti_flags & _TIF_SYSCALL_WORK) { - nr = syscall_trace_enter(); - if (nr == NO_SYSCALL) - goto syscall_out; - regs->orig_r0 = regs->regs[0]; - regs->orig_r19 = regs->regs[19]; - } - - if (nr < __NR_syscalls) { - syscall_fn_t syscall_fn = sys_call_table[nr]; - - ret = syscall_fn(regs->regs[16], regs->regs[17], regs->regs[18], - regs->regs[19], regs->regs[20], regs->regs[21]); - } - - if ((nr != __NR_sigreturn) && (nr != __NR_rt_sigreturn)) { - if (likely((ret >= 0) || regs->orig_r0 == NO_SYSCALL)) - syscall_set_return_value(current, regs, 0, ret); - else - syscall_set_return_value(current, regs, ret, 0); - } - -syscall_out: - rseq_syscall(regs); - - if (ti_flags & _TIF_SYSCALL_WORK) - syscall_trace_leave(); -} - struct nmi_ctx { unsigned long csr_sp; unsigned long csr_scratch; diff --git a/arch/sw_64/kvm/handle_exit.c b/arch/sw_64/kvm/handle_exit.c index 728d323a9f4dba5ed73ea95750c10fb2d5a02da6..8c6c7325ccfcf790823081563b37fbaea6b403ef 100644 --- a/arch/sw_64/kvm/handle_exit.c +++ b/arch/sw_64/kvm/handle_exit.c @@ -47,7 +47,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, case SW64_KVM_EXIT_STOP: vcpu->stat.stop_exits++; vcpu->arch.halted = 1; - memset(&vcpu->arch.irqs_pending, 0, sizeof(vcpu->arch.irqs_pending)); + sw64_kvm_clear_irq(vcpu); kvm_vcpu_block(vcpu); return 1; case SW64_KVM_EXIT_TIMER: diff --git a/arch/sw_64/kvm/kvm_core3.c b/arch/sw_64/kvm/kvm_core3.c index 142602c678312d18ff73b2e86cd275f3562d5bfd..26c27c69ba7b2d5735972eca104c2a1c5074fc35 100644 --- a/arch/sw_64/kvm/kvm_core3.c +++ b/arch/sw_64/kvm/kvm_core3.c @@ -286,6 +286,60 @@ long kvm_sw64_set_vcb(struct file *filp, unsigned long arg) return 0; } +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + return test_bit(SW64_KVM_IRQ_TIMER, vcpu->arch.irqs_pending); +} + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.restart) + return 1; + + if (vcpu->arch.vcb.vcpu_irq_disabled) + return 0; + + return ((!bitmap_empty(vcpu->arch.irqs_pending, SWVM_IRQS) || !vcpu->arch.halted) + && !vcpu->arch.power_off); +} + +int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number) +{ + set_bit(number, (vcpu->arch.irqs_pending)); + kvm_vcpu_kick(vcpu); + return 0; +} + +void vcpu_send_ipi(struct kvm_vcpu *vcpu, int target_vcpuid, int type) +{ + struct kvm_vcpu *target_vcpu = kvm_get_vcpu(vcpu->kvm, target_vcpuid); + + if (target_vcpu == NULL) + return; + if (type == II_RESET) { + target_vcpu->arch.restart = 1; + kvm_vcpu_kick(target_vcpu); + } else + vcpu_interrupt_line(target_vcpu, 1); +} + +void sw64_kvm_clear_irq(struct kvm_vcpu *vcpu) +{ + memset(&vcpu->arch.irqs_pending, 0, sizeof(vcpu->arch.irqs_pending)); +} + +void sw64_kvm_try_deliver_interrupt(struct kvm_vcpu *vcpu) +{ + int irq; + bool more; + + clear_vcpu_irq(vcpu); + irq = interrupt_pending(vcpu, &more); + + if (irq < SWVM_IRQS) + try_deliver_interrupt(vcpu, irq, more); +} + void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) { } diff --git a/arch/sw_64/kvm/kvm_core4.c b/arch/sw_64/kvm/kvm_core4.c index 6d518f28950c171628979b86e55f5b84ba8218e9..57f1176472f3a047f928874c3560a8023a07c869 100644 --- a/arch/sw_64/kvm/kvm_core4.c +++ b/arch/sw_64/kvm/kvm_core4.c @@ -20,6 +20,8 @@ #include #include "trace.h" +extern bool feature_vint; + static unsigned long shtclock_offset; void update_aptp(unsigned long pgd) @@ -57,7 +59,10 @@ int kvm_sw64_vcpu_reset(struct kvm_vcpu *vcpu) hrtimer_cancel(&vcpu->arch.hrt); vcpu->arch.pcpu_id = -1; /* force flush tlb for the first time */ vcpu->arch.power_off = 0; - memset(&vcpu->arch.irqs_pending, 0, sizeof(vcpu->arch.irqs_pending)); + if (feature_vint) + memset(&vcpu->arch.vcb.irqs_pending, 0, sizeof(vcpu->arch.vcb.irqs_pending)); + else + memset(&vcpu->arch.irqs_pending, 0, sizeof(vcpu->arch.irqs_pending)); return 0; } @@ -95,6 +100,104 @@ long kvm_sw64_set_vcb(struct file *filp, unsigned long arg) return 0; } +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + if (feature_vint) + return test_bit(SW64_KVM_IRQ_TIMER, vcpu->arch.vcb.irqs_pending); + else + return test_bit(SW64_KVM_IRQ_TIMER, vcpu->arch.irqs_pending); +} + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.restart) + return 1; + + if (vcpu->arch.vcb.vcpu_irq_disabled) + return 0; + + if (feature_vint) + return ((!bitmap_empty(vcpu->arch.vcb.irqs_pending, SWVM_IRQS) || + !vcpu->arch.halted) && !vcpu->arch.power_off); + else + return ((!bitmap_empty(vcpu->arch.irqs_pending, SWVM_IRQS) || + !vcpu->arch.halted) && !vcpu->arch.power_off); +} + +static int feat_vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number) +{ + int cpu = vcpu->cpu; + int me = smp_processor_id(); + + set_bit(number, (vcpu->arch.vcb.irqs_pending)); + + if (vcpu->mode == IN_GUEST_MODE) { + if (cpu != me && (unsigned int)cpu < nr_cpu_ids + && cpu_online(cpu)) { + if (vcpu->arch.vcb.vcpu_irq_disabled) + return 0; + send_ipi(cpu, II_II1); + } + } else + kvm_vcpu_kick(vcpu); + return 0; +} + +int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number) +{ + if (feature_vint) + return feat_vcpu_interrupt_line(vcpu, number); + /* + * Next time the Guest runs, the core code will see if it can deliver + * this interrupt. + */ + set_bit(number, (vcpu->arch.irqs_pending)); + /* + * Make sure it sees it; it might be asleep (eg. halted), or running + * the Guest right now, in which case kick_process() will knock it out. + */ + kvm_vcpu_kick(vcpu); + return 0; +} + +void vcpu_send_ipi(struct kvm_vcpu *vcpu, int target_vcpuid, int type) +{ + struct kvm_vcpu *target_vcpu = kvm_get_vcpu(vcpu->kvm, target_vcpuid); + + if (target_vcpu == NULL) + return; + if (type == II_RESET) { + target_vcpu->arch.restart = 1; + kvm_vcpu_kick(target_vcpu); + } else + vcpu_interrupt_line(target_vcpu, 1); +} + +void sw64_kvm_clear_irq(struct kvm_vcpu *vcpu) +{ + if (feature_vint) + memset(&vcpu->arch.vcb.irqs_pending, 0, sizeof(vcpu->arch.vcb.irqs_pending)); + else + memset(&vcpu->arch.irqs_pending, 0, sizeof(vcpu->arch.irqs_pending)); +} + +void sw64_kvm_try_deliver_interrupt_orig(struct kvm_vcpu *vcpu) +{ + int irq; + bool more; + + clear_vcpu_irq(vcpu); + irq = interrupt_pending(vcpu, &more); + if (irq < SWVM_IRQS) + try_deliver_interrupt(vcpu, irq, more); +} + +void sw64_kvm_try_deliver_interrupt(struct kvm_vcpu *vcpu) +{ + if (!feature_vint) + sw64_kvm_try_deliver_interrupt_orig(vcpu); +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old, struct kvm_memory_slot *new, diff --git a/arch/sw_64/kvm/kvm_timer.c b/arch/sw_64/kvm/kvm_timer.c index 895be63cd8d132b316b02388764744d863c36131..04a4709c996d6d42862097e574806571f8572616 100644 --- a/arch/sw_64/kvm/kvm_timer.c +++ b/arch/sw_64/kvm/kvm_timer.c @@ -50,17 +50,7 @@ void set_timer(struct kvm_vcpu *vcpu, unsigned long delta) /* And this is the routine when we want to set an interrupt for the Guest. */ void set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) { - /* - * Next time the Guest runs, the core code will see if it can deliver - * this interrupt. - */ - set_bit(irq, (vcpu->arch.irqs_pending)); - - /* - * Make sure it sees it; it might be asleep (eg. halted), or running - * the Guest right now, in which case kick_process() will knock it out. - */ - kvm_vcpu_kick(vcpu); + vcpu_interrupt_line(vcpu, irq); } enum hrtimer_restart clockdev_fn(struct hrtimer *timer) diff --git a/arch/sw_64/kvm/sw64.c b/arch/sw_64/kvm/sw64.c index b70f50e5610f94c77d92497243ea7928598e99da..053115c8284cf8e65b00fbd6131d51e0ba03b3e8 100644 --- a/arch/sw_64/kvm/sw64.c +++ b/arch/sw_64/kvm/sw64.c @@ -22,6 +22,8 @@ #include "irq.h" bool set_msi_flag; +bool feature_vint; +extern struct smp_rcb_struct *smp_rcb; #define VCPU_STAT(n, x, ...) \ { n, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ } @@ -58,13 +60,6 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, return -EWOULDBLOCK; } -int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) -{ - set_bit(number, (vcpu->arch.irqs_pending)); - kvm_vcpu_kick(vcpu); - return 0; -} - int kvm_arch_check_processor_compat(void *opaque) { return 0; @@ -83,7 +78,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq if (!vcpu) return -EINVAL; - return vcpu_interrupt_line(vcpu, vector, true); + return vcpu_interrupt_line(vcpu, vector); } void sw64_kvm_switch_vpn(struct kvm_vcpu *vcpu) @@ -188,18 +183,6 @@ struct dfx_sw64_kvm_stats_debugfs_item dfx_sw64_debugfs_entries[] = { { NULL } }; -int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) -{ - if (vcpu->arch.restart) - return 1; - - if (vcpu->arch.vcb.vcpu_irq_disabled) - return 0; - - return ((!bitmap_empty(vcpu->arch.irqs_pending, SWVM_IRQS) || !vcpu->arch.halted) - && !vcpu->arch.power_off); -} - int kvm_arch_hardware_enable(void) { return 0; @@ -251,11 +234,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) { } -int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) -{ - return test_bit(SW64_KVM_IRQ_TIMER, vcpu->arch.irqs_pending); -} - int kvm_arch_hardware_setup(void *opaque) { return 0; @@ -266,6 +244,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (type) return -EINVAL; + feature_vint = (cpuid(GET_FEATURES, 0) & CPU_FEAT_VINT); + smp_rcb->feat_vint = 1; return kvm_sw64_init_vm(kvm); } @@ -428,8 +408,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) struct kvm_run *run = vcpu->run; struct vcpucb *vcb = &(vcpu->arch.vcb); struct hcall_args hargs; - int irq, ret; - bool more; + int ret; sigset_t sigsaved; /* Set guest vcb */ @@ -465,17 +444,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) memset(&hargs, 0, sizeof(hargs)); - clear_vcpu_irq(vcpu); - if (vcpu->arch.restart == 1) { /* handle reset vCPU */ vcpu->arch.regs.pc = GUEST_RESET_PC; vcpu->arch.restart = 0; } - irq = interrupt_pending(vcpu, &more); - if (irq < SWVM_IRQS) - try_deliver_interrupt(vcpu, irq, more); + sw64_kvm_try_deliver_interrupt(vcpu); vcpu->arch.halted = 0; @@ -627,24 +602,12 @@ int kvm_dev_ioctl_check_extension(long ext) return r; } -void vcpu_send_ipi(struct kvm_vcpu *vcpu, int target_vcpuid, int type) -{ - struct kvm_vcpu *target_vcpu = kvm_get_vcpu(vcpu->kvm, target_vcpuid); - - if (type == II_RESET) - target_vcpu->arch.restart = 1; - - if (target_vcpu != NULL) - vcpu_interrupt_line(target_vcpu, 1, 1); -} - int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, bool line_status) { u32 irq = irq_level->irq; unsigned int irq_num; struct kvm_vcpu *vcpu = NULL; - bool level = irq_level->level; irq_num = irq; trace_kvm_irq_line(0, irq_num, irq_level->level); @@ -654,7 +617,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, if (!vcpu) return -EINVAL; - return vcpu_interrupt_line(vcpu, irq_num, level); + return vcpu_interrupt_line(vcpu, irq_num); } diff --git a/arch/sw_64/lib/Makefile b/arch/sw_64/lib/Makefile index aa6d8eee29c20f1da87b9dcc3f8fd1338dd3af46..c2927c1e98dd2d36ac91b42795e5bfea09c1e2b4 100644 --- a/arch/sw_64/lib/Makefile +++ b/arch/sw_64/lib/Makefile @@ -40,6 +40,8 @@ lib-y += $(lib-clear_page-y) $(lib-clear_user-y) $(lib-copy_page-y) $(lib-copy_u obj-y = iomap.o obj-y += iomap_copy.o +obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o + ifeq ($(CONFIG_SUBARCH_C3B),y) # The division routines are built from single source, with different defines. AFLAGS___divlu.o = -DDIV diff --git a/arch/sw_64/lib/error-inject.c b/arch/sw_64/lib/error-inject.c new file mode 100644 index 0000000000000000000000000000000000000000..b1549c4529e58baa3c1d4dbf31dbda677f8ef999 --- /dev/null +++ b/arch/sw_64/lib/error-inject.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +void override_function_with_return(struct pt_regs *regs) +{ + instruction_pointer_set(regs, regs->regs[26]); +} +NOKPROBE_SYMBOL(override_function_with_return); diff --git a/arch/sw_64/lib/iomap.c b/arch/sw_64/lib/iomap.c index c73004a87dcd8ac288305870a1526cdc1ceb5fd3..51e7600b0fec03551b9e967c646efb71e5e95805 100644 --- a/arch/sw_64/lib/iomap.c +++ b/arch/sw_64/lib/iomap.c @@ -262,8 +262,8 @@ EXPORT_SYMBOL(_memset_c_io); void __iomem *ioport_map(unsigned long port, unsigned int size) { if (port >= 0x100000) - return __va(port); + return ioremap(port, size); - return __va((port << legacy_io_shift) | legacy_io_base); + return ioremap((port << legacy_io_shift) | legacy_io_base, size); } EXPORT_SYMBOL(ioport_map); diff --git a/arch/sw_64/mm/Kconfig b/arch/sw_64/mm/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..4f5f7f4599d7d73c7977caa4addfcbeb82443ef8 --- /dev/null +++ b/arch/sw_64/mm/Kconfig @@ -0,0 +1,41 @@ +choice + prompt "Virtual address space size" + default SW64_VA_BITS_47 + help + Allows choosing one of multiple possible userspace virtual address space size. + +config SW64_VA_BITS_47 + bool "47-bit" + help + Limit the userspace VAs to 47-bit size. + + It may waste some userspace VA range, but is safe and enough for current + userland to works wells. + +config SW64_VA_BITS_52 + bool "52-bit" + help + Use the full 52-bit size userspace VAs. + + The kernel will attempt to maintain compatibility with older software by + providing 47-bit VAs unless a hint is supplied to mmap. + +endchoice + +config SW64_VA_BITS + int + default 47 if SW64_VA_BITS_47 + default 52 if SW64_VA_BITS_52 + default 47 + help + The size of userspace VA. + +config SW64_FORCE_52BIT + bool "Force 52-bit virtual address for userspace" + depends on SW64_VA_BITS_52 && EXPERT + help + This configuration option disable the 47-bit compatibility logic, and forces + all userspace address to be 52-bit if possible. + + Note, it is not 100% safe for software that unable to handle VA bigger than + 47-bit to works well with this option enabled. Do not enable it for now. diff --git a/arch/sw_64/mm/hugetlbpage.c b/arch/sw_64/mm/hugetlbpage.c index f986422398ef36671e6577613a3d585033a29d5d..d723a506e9b38ef3bbedd08a28377f86a135e010 100644 --- a/arch/sw_64/mm/hugetlbpage.c +++ b/arch/sw_64/mm/hugetlbpage.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -217,25 +218,24 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, info.flags = 0; info.length = len; - info.low_limit = current->mm->mmap_legacy_base; - info.high_limit = TASK_SIZE; + info.low_limit = current->mm->mmap_base; + info.high_limit = arch_get_mmap_end(addr, len, flags); info.align_mask = PAGE_MASK & ~huge_page_mask(h); info.align_offset = 0; return vm_unmapped_area(&info); } static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, - unsigned long addr0, unsigned long len, + unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct hstate *h = hstate_file(file); struct vm_unmapped_area_info info; - unsigned long addr; info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; info.low_limit = PAGE_SIZE; - info.high_limit = current->mm->mmap_base; + info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base); info.align_mask = PAGE_MASK & ~huge_page_mask(h); info.align_offset = 0; addr = vm_unmapped_area(&info); @@ -250,7 +250,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, VM_BUG_ON(addr != -ENOMEM); info.flags = 0; info.low_limit = TASK_UNMAPPED_BASE; - info.high_limit = TASK_SIZE; + info.high_limit = arch_get_mmap_end(addr, len, flags); addr = vm_unmapped_area(&info); } diff --git a/arch/sw_64/mm/mmap.c b/arch/sw_64/mm/mmap.c index a7a189fc36d675c44bdf4f9192867de1409dc480..d4bf9a7d262794e69ee88fb2f29657c586aac08b 100644 --- a/arch/sw_64/mm/mmap.c +++ b/arch/sw_64/mm/mmap.c @@ -5,10 +5,20 @@ #include #include #include +#include #include +#include #include +/* + * Top of mmap area (just below the process stack). + * Leave at least a ~128 MB hole. + */ + +#define MIN_GAP (SZ_128M) +#define MAX_GAP (STACK_TOP / 6 * 5) + unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, @@ -17,15 +27,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, struct mm_struct *mm = current->mm; struct vm_area_struct *vma; struct vm_unmapped_area_info info; - unsigned long limit; + const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags); - /* Support 32 bit heap. */ - if (current->personality & ADDR_LIMIT_32BIT) - limit = 0x80000000; - else - limit = TASK_SIZE; - - if (len > limit) + if (unlikely(len > mmap_end - mmap_min_addr)) return -ENOMEM; if (flags & MAP_FIXED) { @@ -47,26 +51,100 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, info.flags = 0; info.length = len; info.low_limit = mm->mmap_base; - info.high_limit = limit; + info.high_limit = mmap_end; info.align_mask = 0; info.align_offset = pgoff << PAGE_SHIFT; return vm_unmapped_area(&info); } -unsigned long arch_mmap_rnd(void) +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) { - unsigned long rnd; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct vm_unmapped_area_info info; + const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags); + + if (unlikely(len > mmap_end - mmap_min_addr)) + return -ENOMEM; - /* 8MB for 32bit, 256MB for 64bit */ - if (current->personality & ADDR_LIMIT_32BIT) - rnd = get_random_long() & 0x7ffffful; - else - rnd = get_random_long() & 0xffffffful; + if (flags & MAP_FIXED) { + if (addr + len > TASK_SIZE) + return -EINVAL; + + return addr; + } + + if (addr) { + addr = PAGE_ALIGN(addr); + + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = FIRST_USER_ADDRESS; + info.high_limit = arch_get_mmap_base(addr, mm->mmap_base); + info.align_mask = 0; + info.align_offset = pgoff << PAGE_SHIFT; + addr = vm_unmapped_area(&info); + + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + if (addr & ~PAGE_MASK) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = mm->mmap_base; + info.high_limit = mmap_end; + addr = vm_unmapped_area(&info); + } + return addr; +} + +unsigned long arch_mmap_rnd(void) +{ + unsigned long rnd = get_random_long() & 0x7fffffful; return rnd << PAGE_SHIFT; } +unsigned long mmap_is_legacy(struct rlimit *rlim_stack) +{ + if (current->personality & ADDR_COMPAT_LAYOUT) + return 1; + if (rlim_stack->rlim_cur == RLIM_INFINITY) + return 1; + + return sysctl_legacy_va_layout; +} + +static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) +{ + unsigned long gap = rlim_stack->rlim_cur; + unsigned long pad = stack_guard_gap; + + /* Account for stack randomization if necessary. 8M of VA. */ + if (current->flags & PF_RANDOMIZE) + pad += 0x7ff00; + /* Values close to RLIM_INFINITY can overflow. */ + if (gap + pad > gap) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + + return PAGE_ALIGN(STACK_TOP - gap - rnd); +} + /* * This function, called very early during the creation of a new process VM * image, sets up which VM layout function to use: @@ -82,8 +160,13 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) * Fall back to the standard layout if the personality bit is set, or * if the expected stack growth is unlimited: */ - mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; - mm->get_unmapped_area = arch_get_unmapped_area; + if (mmap_is_legacy(rlim_stack)) { + mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; + mm->get_unmapped_area = arch_get_unmapped_area; + } else { + mm->mmap_base = mmap_base(random_factor, rlim_stack); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + } } SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, diff --git a/arch/sw_64/net/bpf_jit_comp.c b/arch/sw_64/net/bpf_jit_comp.c index ed78ae0a23c9085fe8fcea32a23e23eb42305ba9..5a6b461c383224c157f7d892fba3f6c2a21399f7 100644 --- a/arch/sw_64/net/bpf_jit_comp.c +++ b/arch/sw_64/net/bpf_jit_comp.c @@ -227,10 +227,9 @@ static void emit_sw64_ldu64(const int dst, const u64 imm, struct jit_ctx *ctx) } /* constant insn count */ -static void emit_sw64_load_call_addr(u64 addr, struct jit_ctx *ctx) +static void emit_sw64_load_call_addr(u8 dst, u64 addr, struct jit_ctx *ctx) { u16 imm_tmp; - u8 dst = SW64_BPF_REG_PV; u8 reg_tmp = get_tmp_reg(ctx); imm_tmp = (addr >> 60) & 0xf; @@ -1192,7 +1191,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, if (ret < 0) return ret; - emit_sw64_load_call_addr(func, ctx); + emit_sw64_load_call_addr(SW64_BPF_REG_PV, func, ctx); emit(SW64_BPF_CALL(SW64_BPF_REG_RA, SW64_BPF_REG_PV), ctx); break; } @@ -1224,7 +1223,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, case BPF_LD | BPF_IMM | BPF_DW: insn1 = insn[1]; imm64 = ((u64)insn1.imm << 32) | (u32)imm; - emit_sw64_ldu64(dst, imm64, ctx); + if (bpf_pseudo_func(insn)) + emit_sw64_load_call_addr(SW64_BPF_REG_A1, imm64, ctx); + else + emit_sw64_ldu64(dst, imm64, ctx); put_tmp_reg(ctx); put_tmp_reg(ctx); return 1; diff --git a/arch/sw_64/platform/Makefile b/arch/sw_64/platform/Makefile deleted file mode 100644 index 09ed3eb0f743fc0675238cdbad40a9b8b9f1fb64..0000000000000000000000000000000000000000 --- a/arch/sw_64/platform/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -obj-y += cpufreq.o diff --git a/drivers/clocksource/timer-sw64.c b/drivers/clocksource/timer-sw64.c index 8b022f455c784636add3efcf17b2fffd7ac25239..9af19cb362fc8c8222e7073014afaf5214bf4df6 100644 --- a/drivers/clocksource/timer-sw64.c +++ b/drivers/clocksource/timer-sw64.c @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -385,18 +386,36 @@ static int timer_set_oneshot(struct clock_event_device *evt) return 0; } -void sw64_update_clockevents(unsigned long cpu, u32 freq) +static void sw64_update_clockevents(void *data) { - struct clock_event_device *swevt = &per_cpu(timer_events, cpu); + struct cpufreq_freqs *freqs = (struct cpufreq_freqs *)data; + struct clock_event_device *swevt = this_cpu_ptr(&timer_events); - if (cpu == smp_processor_id()) - clockevents_update_freq(swevt, freq); - else { - clockevents_calc_mult_shift(swevt, freq, 4); - swevt->min_delta_ns = clockevent_delta2ns(swevt->min_delta_ticks, swevt); - swevt->max_delta_ns = clockevent_delta2ns(swevt->max_delta_ticks, swevt); - } + clockevents_update_freq(swevt, freqs->new * 1000); +} + +static int sw64_cpufreq_notifier(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freqs = (struct cpufreq_freqs *)data; + + if (val == CPUFREQ_POSTCHANGE) + on_each_cpu_mask(freqs->policy->cpus, + sw64_update_clockevents, data, 1); + + return NOTIFY_OK; +} + +static struct notifier_block sw64_cpufreq_notifier_block = { + .notifier_call = sw64_cpufreq_notifier +}; + +static int __init register_cpufreq_notifier(void) +{ + return cpufreq_register_notifier(&sw64_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); } +arch_initcall(register_cpufreq_notifier); /* * Setup the local timer for this CPU. Copy the initialized values diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 80d3dc2505bc4ca8ffdcdf6be786193806360770..4c13741a744f9f9e197e6685673718e1c370f04b 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -325,15 +325,6 @@ config SW64_CPUFREQ For details, take a look at . If unsure, say N. - -config SW64_CPUFREQ_DEBUGFS - bool "SW64 CPU Frequency debugfs interface" - depends on SW64_CPUFREQ && DEBUG_FS - default y - help - Turns on the DebugFS interface for CPU Frequency. - - If you don't know what to do here, say N. endif config QORIQ_CPUFREQ diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index f9c1c9012ce7b20ca229044e1eb7b902ef20f5b7..7deb3b194e5ceafb18cf4ec00c162b0ea4d5770e 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -108,5 +108,4 @@ obj-$(CONFIG_LOONGSON3_ACPI_CPUFREQ) += loongson3-acpi-cpufreq.o obj-$(CONFIG_SH_CPU_FREQ) += sh-cpufreq.o obj-$(CONFIG_SPARC_US2E_CPUFREQ) += sparc-us2e-cpufreq.o obj-$(CONFIG_SPARC_US3_CPUFREQ) += sparc-us3-cpufreq.o -obj-$(CONFIG_SW64_CPUFREQ) += sw64_cpufreq.o -obj-$(CONFIG_SW64_CPUFREQ_DEBUGFS) += sw64_cpufreq_debugfs.o +obj-$(CONFIG_SW64_CPUFREQ) += sunway-cpufreq.o diff --git a/arch/sw_64/platform/cpufreq.c b/drivers/cpufreq/sunway-cpufreq.c similarity index 54% rename from arch/sw_64/platform/cpufreq.c rename to drivers/cpufreq/sunway-cpufreq.c index e18201069f6756e314c22f2908e3cd1fe24b2add..41a6b21ff8276984f4e4d51ddcff1bc93e42b197 100644 --- a/arch/sw_64/platform/cpufreq.c +++ b/drivers/cpufreq/sunway-cpufreq.c @@ -1,24 +1,25 @@ // SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2025 WXIAT + */ + +#define pr_fmt(fmt) "cpufreq: " fmt -#include #include -#include -#include +#include #include -#include #include #include -#include -#include -#include #define MAX_RETRY 10 -static struct platform_device sw64_cpufreq_device = { - .name = "sw64_cpufreq", - .id = -1, -}; +#define CLK_LV1_SEL_PROTECT (0x1UL << 0) +#define CLK_LV1_SEL_MUXA (0x1UL << 2) +#define CLK_LV1_SEL_MUXB (0x1UL << 3) + +#define OFFSET_CLU_LV1_SEL 0x3a80UL +#define OFFSET_CLK_CTL 0x3b80UL /* * frequency in MHz, volts in mV and stored as "driver_data" in the structure. @@ -31,6 +32,16 @@ static struct platform_device sw64_cpufreq_device = { } #ifdef CONFIG_PLATFORM_JUNZHANG +#define CLK0_PROTECT (0x1UL << 0) +#define CLK2_PROTECT (0x1UL << 32) +#define CORE_CLK2_VALID (0x1UL << 33) +#define CORE_CLK2_RESET (0x1UL << 34) +#define CORE_CLK2_LOCK (0x1UL << 35) +#define CORE_PLL0_CFG_SHIFT 4 +#define CORE_PLL1_CFG_SHIFT 20 +#define CORE_PLL2_CFG_SHIFT 36 +#define CORE_PLL2_CFG_MASK 0x1f + struct cpufreq_frequency_table freq_table[] = { {0, 0, CPUFREQ_ENTRY_INVALID}, /* 200Mhz is ignored */ FV(1200, 850), @@ -69,7 +80,20 @@ struct cpufreq_frequency_table freq_table[] = { static void __init fill_freq_table(struct cpufreq_frequency_table *ft) { } -#elif CONFIG_PLATFORM_XUELANG +#endif + +#ifdef CONFIG_PLATFORM_XUELANG +#define CLK_PROTECT (0x1UL << 0) +#define CLK0_PROTECT CLK_PROTECT +#define CLK2_PROTECT CLK_PROTECT +#define CORE_CLK2_VALID (0x1UL << 15) +#define CORE_CLK2_RESET (0x1UL << 16) +#define CORE_CLK2_LOCK (0x1UL << 17) +#define CORE_PLL0_CFG_SHIFT 4 +#define CORE_PLL1_CFG_SHIFT 11 +#define CORE_PLL2_CFG_SHIFT 18 +#define CORE_PLL2_CFG_MASK 0xf + struct cpufreq_frequency_table freq_table[] = { {0, 0, CPUFREQ_ENTRY_INVALID}, /* 200Mhz is ignored */ {0, 0, CPUFREQ_ENTRY_INVALID}, /* 1200Mhz is ignored */ @@ -95,6 +119,7 @@ static void __init fill_freq_table(struct cpufreq_frequency_table *ft) int i; unsigned long freq_off; unsigned char external_clk; + external_clk = *((unsigned char *)__va(MB_EXTCLK)); if (external_clk == 240) @@ -108,59 +133,29 @@ static void __init fill_freq_table(struct cpufreq_frequency_table *ft) } #endif -static int __init sw64_cpufreq_init(void) +static unsigned int sunway_get_rate(struct cpufreq_policy *policy) { int i; - unsigned long max_rate = get_cpu_freq() / 1000; - - fill_freq_table(freq_table); - for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { - if (max_rate == freq_table[i].frequency) - freq_table[i+1].frequency = CPUFREQ_TABLE_END; - } - return platform_device_register(&sw64_cpufreq_device); -} -arch_initcall(sw64_cpufreq_init); - -static struct clk cpu_clk = { - .name = "cpu_clk", - .flags = CLK_ALWAYS_ENABLED | CLK_RATE_PROPAGATES, - .rate = STARTUP_RATE, -}; - -struct clk *sw64_clk_get(struct device *dev, const char *id) -{ - return &cpu_clk; -} -EXPORT_SYMBOL(sw64_clk_get); - -unsigned int __sw64_cpufreq_get(struct cpufreq_policy *policy) -{ - int i, clu_lv1_sel; u64 val; void __iomem *spbu_base = misc_platform_get_spbu_base(0); struct cpufreq_frequency_table *ft = policy->freq_table; - clu_lv1_sel = (readq(spbu_base + OFFSET_CLU_LV1_SEL) >> 2) & 0x3; - - if (clu_lv1_sel == 0) - val = readq(spbu_base + OFFSET_CLK_CTL) >> CORE_PLL0_CFG_SHIFT; - else if (clu_lv1_sel == 2) - val = readq(spbu_base + OFFSET_CLK_CTL) >> CORE_PLL1_CFG_SHIFT; - else - val = readq(spbu_base + OFFSET_CLK_CTL) >> CORE_PLL2_CFG_SHIFT; - + /* PLL2 provides working frequency for core */ + val = readq(spbu_base + OFFSET_CLK_CTL) >> CORE_PLL2_CFG_SHIFT; val &= CORE_PLL2_CFG_MASK; for (i = 0; ft[i].frequency != CPUFREQ_TABLE_END; i++) { - if (val == i) + if (val == i) { + if (ft[i].frequency == CPUFREQ_ENTRY_INVALID) + return cpuid(GET_CPU_FREQ, 0) * 1000UL; return ft[i].frequency; + } } + return 0; } -EXPORT_SYMBOL(__sw64_cpufreq_get); -int sw64_set_rate(unsigned int index) +static int sunway_set_rate(unsigned int index) { int i, retry, cpu_num; void __iomem *spbu_base; @@ -196,4 +191,94 @@ int sw64_set_rate(unsigned int index) } return 0; } -EXPORT_SYMBOL_GPL(sw64_set_rate); + +static unsigned int sunway_cpufreq_get(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); + + if (!policy) { + pr_err("%s: no policy associated to cpu: %d\n", + __func__, cpu); + return 0; + } + + return sunway_get_rate(policy); +} + +/* + * Here we notify other drivers of the proposed change and the final change. + */ +static int sunway_cpufreq_target(struct cpufreq_policy *policy, + unsigned int index) +{ + int ret; + unsigned int cpu = policy->cpu; + + if (!cpu_online(cpu)) + return -ENODEV; + + /* setting the cpu frequency */ + ret = sunway_set_rate(index); + if (ret) + return ret; + update_cpu_freq(freq_table[index].frequency); + + return 0; +} + +static int sunway_cpufreq_init(struct cpufreq_policy *policy) +{ + cpufreq_generic_init(policy, freq_table, 0); + + return 0; +} + +static int sunway_cpufreq_verify(struct cpufreq_policy_data *policy) +{ + return cpufreq_frequency_table_verify(policy, freq_table); +} + +static int sunway_cpufreq_exit(struct cpufreq_policy *policy) +{ + return 0; +} + +static struct freq_attr *sunway_table_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, NULL, +}; + +static struct cpufreq_driver sunway_cpufreq_driver = { + .name = "sunway-cpufreq", + .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK, + .init = sunway_cpufreq_init, + .verify = sunway_cpufreq_verify, + .target_index = sunway_cpufreq_target, + .get = sunway_cpufreq_get, + .exit = sunway_cpufreq_exit, + .attr = sunway_table_attr, +}; + +static int __init cpufreq_init(void) +{ + int i, ret; + unsigned long max_rate = get_cpu_freq() / 1000; + + if (!is_in_host()) { + pr_warn("cpufreq driver of Sunway platforms is only supported in host mode\n"); + return -ENODEV; + } + + fill_freq_table(freq_table); + for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { + if (max_rate == freq_table[i].frequency) + freq_table[i+1].frequency = CPUFREQ_TABLE_END; + } + + ret = cpufreq_register_driver(&sunway_cpufreq_driver); + if (ret) + return ret; + + return 0; +} +device_initcall(cpufreq_init); + diff --git a/drivers/cpufreq/sw64_cpufreq.c b/drivers/cpufreq/sw64_cpufreq.c deleted file mode 100644 index 93163da904a2b2605eb2a18a719477c891737424..0000000000000000000000000000000000000000 --- a/drivers/cpufreq/sw64_cpufreq.c +++ /dev/null @@ -1,178 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/arch/sw/kernel/setup.c - * - * Copyright (C) 1995 Linus Torvalds - */ - -/* - * Cpufreq driver for the sw64 processors - * - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include /* set_cpus_allowed() */ -#include -#include -#include - -#include -#include -#include - -static uint nowait; - -static struct clk *cpuclk; - - -static int sw64_cpu_freq_notifier(struct notifier_block *nb, - unsigned long val, void *data); - -static struct notifier_block sw64_cpufreq_notifier_block = { - .notifier_call = sw64_cpu_freq_notifier -}; - -static int sw64_cpu_freq_notifier(struct notifier_block *nb, - unsigned long val, void *data) -{ - struct cpufreq_freqs *freqs = (struct cpufreq_freqs *)data; - unsigned long cpu = freqs->policy->cpu; - - if (val == CPUFREQ_POSTCHANGE) - sw64_update_clockevents(cpu, freqs->new * 1000); - - return 0; -} - -static unsigned int sw64_cpufreq_get(unsigned int cpu) -{ - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); - - if (!policy || IS_ERR(policy->clk)) { - pr_err("%s: No %s associated to cpu: %d\n", - __func__, policy ? "clk" : "policy", cpu); - return 0; - } - - return __sw64_cpufreq_get(policy); -} - -/* - * Here we notify other drivers of the proposed change and the final change. - */ -static int sw64_cpufreq_target(struct cpufreq_policy *policy, - unsigned int index) -{ - int ret; - unsigned int cpu = policy->cpu; - - if (!cpu_online(cpu)) - return -ENODEV; - - /* setting the cpu frequency */ - ret = sw64_set_rate(index); - if (ret) - return ret; - update_cpu_freq(freq_table[index].frequency); - - return 0; -} - -static int sw64_cpufreq_cpu_init(struct cpufreq_policy *policy) -{ - cpuclk = sw64_clk_get(NULL, "cpu_clk"); - if (IS_ERR(cpuclk)) { - pr_err("couldn't get CPU clk\n"); - return PTR_ERR(cpuclk); - } - - policy->clk = cpuclk; - - cpufreq_generic_init(policy, freq_table, 0); - - return 0; -} - -static int sw64_cpufreq_verify(struct cpufreq_policy_data *policy) -{ - return cpufreq_frequency_table_verify(policy, freq_table); -} - -static int sw64_cpufreq_exit(struct cpufreq_policy *policy) -{ - return 0; -} - -static struct freq_attr *sw64_table_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, NULL, -}; - -static struct cpufreq_driver sw64_cpufreq_driver = { - .name = "sw64", - .init = sw64_cpufreq_cpu_init, - .verify = sw64_cpufreq_verify, - .target_index = sw64_cpufreq_target, - .get = sw64_cpufreq_get, - .exit = sw64_cpufreq_exit, - .attr = sw64_table_attr, -}; - -static const struct platform_device_id platform_device_ids[] = { - { - .name = "sw64_cpufreq", - }, - {} -}; - -MODULE_DEVICE_TABLE(platform, platform_device_ids); - -static struct platform_driver platform_driver = { - .driver = { - .name = "sw64_cpufreq", - }, - .id_table = platform_device_ids, -}; - - -static int __init cpufreq_init(void) -{ - int ret; - - if (is_in_guest()) { - pr_warn("Now sw_64 CPUFreq does not support virtual machines\n"); - return -ENODEV; - } - - /* Register platform stuff */ - ret = platform_driver_register(&platform_driver); - if (ret) - return ret; - - pr_info("SW-64 CPU frequency driver\n"); - - cpufreq_register_notifier(&sw64_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - - return cpufreq_register_driver(&sw64_cpufreq_driver); -} - -static void __exit cpufreq_exit(void) -{ - cpufreq_unregister_driver(&sw64_cpufreq_driver); - cpufreq_unregister_notifier(&sw64_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - - platform_driver_unregister(&platform_driver); -} - -module_init(cpufreq_init); -module_exit(cpufreq_exit); - -module_param(nowait, uint, 0644); -MODULE_PARM_DESC(nowait, "Disable SW-64 specific wait"); - -MODULE_DESCRIPTION("cpufreq driver for sw64"); -MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/sw64_cpufreq_debugfs.c b/drivers/cpufreq/sw64_cpufreq_debugfs.c deleted file mode 100644 index e09c63495a0eaf77fc45fc6cea56a13d4f7a48cc..0000000000000000000000000000000000000000 --- a/drivers/cpufreq/sw64_cpufreq_debugfs.c +++ /dev/null @@ -1,94 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include -#include -#include - -#include -#include -#include - -/* Show cpufreq in Mhz */ -static int cpufreq_show(struct seq_file *m, void *v) -{ - int i; - u64 val; - void __iomem *spbu_base = misc_platform_get_spbu_base(0); - - val = readq(spbu_base + OFFSET_CLK_CTL) >> CORE_PLL2_CFG_SHIFT; - val &= CORE_PLL2_CFG_MASK; - seq_puts(m, "CPU frequency in Mhz:\n"); - for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { - if (freq_table[i].frequency == CPUFREQ_ENTRY_INVALID) - continue; - if (val == i) - seq_printf(m, "[%d] ", freq_table[i].frequency / 1000); - else - seq_printf(m, "%d ", freq_table[i].frequency / 1000); - } - seq_puts(m, "\n"); - - return 0; -} - -static int cpufreq_open(struct inode *inode, struct file *file) -{ - return single_open(file, cpufreq_show, NULL); -} - -static ssize_t cpufreq_set(struct file *file, const char __user *user_buf, - size_t len, loff_t *ppos) -{ - char buf[5]; - size_t size; - int cf, i, err, index, freq; - - size = min(sizeof(buf) - 1, len); - if (copy_from_user(buf, user_buf, size)) - return -EFAULT; - buf[size] = '\0'; - - err = kstrtoint(buf, 10, &cf); - if (err) - return err; - cf *= 1000; /* convert Mhz to khz */ - index = -1; - for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { - if (cf == freq_table[i].frequency) { - index = i; - break; - } - } - - if (index < 0) - return -EINVAL; - - sw64_set_rate(index); - update_cpu_freq(freq); - return len; -} - -static const struct file_operations set_cpufreq_fops = { - .open = cpufreq_open, - .read = seq_read, - .write = cpufreq_set, - .llseek = seq_lseek, - .release = single_release, -}; - -static int __init cpufreq_debugfs_init(void) -{ - struct dentry *cpufreq_entry; - - if (!sw64_debugfs_dir) - return -ENODEV; - - cpufreq_entry = debugfs_create_file("cpufreq", 0600, - sw64_debugfs_dir, NULL, - &set_cpufreq_fops); - if (!cpufreq_entry) - return -ENOMEM; - - return 0; -} -late_initcall(cpufreq_debugfs_init); diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index 6566d08c5fac2deea2d15e59b6b60f7ba76d554f..2eb1636c560e3e2280587d4f58c72dc74333aada 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -238,7 +238,7 @@ int radeon_vce_resume(struct radeon_device *rdev) return r; } -#ifdef __sw_64__ +#if IS_ENABLED(CONFIG_SW64) memset_io(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo)); #else memset(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo)); diff --git a/drivers/iommu/sw64/iommu.c b/drivers/iommu/sw64/iommu.c index dfb0c79fc0add37d46497cdd617c18d7768737de..de135a7e648bab15109d2d7091e95597e699c4c8 100644 --- a/drivers/iommu/sw64/iommu.c +++ b/drivers/iommu/sw64/iommu.c @@ -857,7 +857,6 @@ sunway_iommu_unmap_page(struct sunway_iommu_domain *sunway_domain, unsigned long *pte_l2, unmapped; pr_debug("%s iova %#lx, page_size %#lx\n", __func__, iova, page_size); - BUG_ON(!is_power_of_2(page_size)); unmapped = 0; while (unmapped < page_size) { @@ -1162,39 +1161,73 @@ sunway_iommu_iova_to_phys(struct iommu_domain *dom, dma_addr_t iova) return paddr; } -static int -sunway_iommu_map(struct iommu_domain *dom, unsigned long iova, - phys_addr_t paddr, size_t page_size, int iommu_prot, gfp_t gfp) +static int sunway_iommu_map_pages(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { - struct sunway_iommu_domain *sdomain = to_sunway_domain(dom); - int ret; + struct sunway_iommu_domain *sdomain = to_sunway_domain(domain); + size_t size = pgcount << __ffs(pgsize); + unsigned long mapped_size = 0; + int ret = 0; /* * As VFIO cannot distinguish between normal DMA request * and pci device BAR, check should be introduced manually * to avoid VFIO trying to map pci config space. */ - if (iova >= SW64_BAR_ADDRESS) + if (iova >= SW64_BAR_ADDRESS) { + if (mapped) + *mapped = size; return 0; + } + + if (!(pgsize & domain->pgsize_bitmap)) { + pr_err("pgsize: %lx not supported.\n", pgsize); + return -EINVAL; + } + + while (mapped_size < size) { + ret = sunway_iommu_map_page(sdomain, iova, paddr, pgsize, prot); + if (ret) + goto out; + + iova += pgsize; + paddr += pgsize; + mapped_size += pgsize; + } - ret = sunway_iommu_map_page(sdomain, iova, paddr, page_size, iommu_prot); + if (mapped) + *mapped = size; +out: return ret; } -static size_t -sunway_iommu_unmap(struct iommu_domain *dom, unsigned long iova, - size_t page_size, struct iommu_iotlb_gather *gather) +static size_t sunway_iommu_unmap_pages(struct iommu_domain *domain, unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *iotlb_gather) { - struct sunway_iommu_domain *sdomain = to_sunway_domain(dom); - size_t unmap_size; + struct sunway_iommu_domain *sdomain = to_sunway_domain(domain); + size_t size = pgcount << __ffs(pgsize); + unsigned long unmapped_size = 0; + unsigned long unmap_size; if (iova >= SW64_BAR_ADDRESS) - return page_size; + return size; - unmap_size = sunway_iommu_unmap_page(sdomain, iova, page_size); + if (!(pgsize & domain->pgsize_bitmap)) { + pr_err("pgsize: %lx not supported.\n", pgsize); + return -EINVAL; + } + + while (unmapped_size < size) { + unmap_size = sunway_iommu_unmap_page(sdomain, iova, pgsize); + + iova += unmap_size; + unmapped_size += unmap_size; + } - return unmap_size; + return size; } static struct iommu_group *sunway_iommu_device_group(struct device *dev) @@ -1364,8 +1397,8 @@ const struct iommu_ops sunway_iommu_ops = { .def_domain_type = sunway_iommu_def_domain_type, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = sunway_iommu_attach_device, - .map = sunway_iommu_map, - .unmap = sunway_iommu_unmap, + .map_pages = sunway_iommu_map_pages, + .unmap_pages = sunway_iommu_unmap_pages, .iova_to_phys = sunway_iommu_iova_to_phys, .free = sunway_iommu_domain_free, } diff --git a/drivers/iommu/sw64/iommu_v2.c b/drivers/iommu/sw64/iommu_v2.c index 7ac13d6732f091c42a7cf8a643094d02488b13d7..7e363bfbdaafb8e0cc73a43796cc25581f577e1d 100644 --- a/drivers/iommu/sw64/iommu_v2.c +++ b/drivers/iommu/sw64/iommu_v2.c @@ -1117,7 +1117,6 @@ sunway_iommu_unmap_page(struct sunway_iommu_domain *sunway_domain, int tmp = 1; pr_debug("%s iova %#lx, page_size %#lx\n", __func__, iova, page_size); - BUG_ON(!is_power_of_2(page_size)); switch (page_size) { case (1UL << 33): @@ -1389,10 +1388,6 @@ sunway_iommu_iova_to_phys(struct iommu_domain *dom, dma_addr_t iova) unsigned long paddr, grn; unsigned long is_last; - if ((iova > SW64_32BIT_DMA_LIMIT) - && (iova <= DMA_BIT_MASK(32))) - return iova; - if (iova >= SW64_BAR_ADDRESS) return iova; @@ -1449,12 +1444,15 @@ sunway_iommu_iova_to_phys(struct iommu_domain *dom, dma_addr_t iova) paddr += iova & ~PAGE_MASK; return paddr; } -static int -sunway_iommu_map(struct iommu_domain *dom, unsigned long iova, - phys_addr_t paddr, size_t page_size, int iommu_prot, gfp_t gfp) + +static int sunway_iommu_map_pages(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { - struct sunway_iommu_domain *sdomain = to_sunway_domain(dom); - int ret; + struct sunway_iommu_domain *sdomain = to_sunway_domain(domain); + size_t size = pgcount << __ffs(pgsize); + unsigned long mapped_size = 0; + int ret = 0; /* * 3.5G ~ 4G currently is seen as PCI 32-bit MEMIO space. In theory, @@ -1470,14 +1468,14 @@ sunway_iommu_map(struct iommu_domain *dom, unsigned long iova, * and buggy. * * We manage to find a compromise solution, which is allow these IOVA - * being allocated and "mapped" as usual, but with a warning issued to + * being allocated and mapped as usual, and with a warning issued to * users at the same time. So users can quickly learn if they are using * these "illegal" IOVA and thus change their strategies accordingly. */ - if ((iova > SW64_32BIT_DMA_LIMIT) + if ((SW64_32BIT_DMA_LIMIT < iova + size) && (iova <= DMA_BIT_MASK(32))) { - pr_warn_once("Domain %d are using IOVA: %lx\n", sdomain->id, iova); - return 0; + pr_warn_once("process %s (pid:%d) is using domain %d with IOVA: %lx\n", + current->comm, current->pid, sdomain->id, iova); } /* @@ -1486,6 +1484,8 @@ sunway_iommu_map(struct iommu_domain *dom, unsigned long iova, */ if (iova >= SW64_BAR_ADDRESS) { pr_warn_once("Domain %d are using IOVA: %lx\n", sdomain->id, iova); + if (mapped) + *mapped = size; return 0; } @@ -1495,24 +1495,39 @@ sunway_iommu_map(struct iommu_domain *dom, unsigned long iova, return -EFAULT; } - ret = sunway_iommu_map_page(sdomain, iova, paddr, page_size, iommu_prot); + if (!(pgsize & domain->pgsize_bitmap)) { + pr_err("pgsize: %lx not supported\n", pgsize); + return -EINVAL; + } + + while (mapped_size < size) { + ret = sunway_iommu_map_page(sdomain, iova, paddr, pgsize, prot); + if (ret) + goto out; + + iova += pgsize; + paddr += pgsize; + mapped_size += pgsize; + } + if (mapped) + *mapped = size; + +out: return ret; } -static size_t -sunway_iommu_unmap(struct iommu_domain *dom, unsigned long iova, - size_t page_size, struct iommu_iotlb_gather *gather) +static size_t sunway_iommu_unmap_pages(struct iommu_domain *domain, unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *iotlb_gather) { - struct sunway_iommu_domain *sdomain = to_sunway_domain(dom); - size_t unmap_size; - - if ((iova > SW64_32BIT_DMA_LIMIT) - && (iova <= DMA_BIT_MASK(32))) - return page_size; + struct sunway_iommu_domain *sdomain = to_sunway_domain(domain); + size_t size = pgcount << __ffs(pgsize); + unsigned long unmapped_size = 0; + unsigned long unmap_size; if (iova >= SW64_BAR_ADDRESS) - return page_size; + return size; /* IOMMU v2 supports 42 bit mapped address width */ if (iova >= MAX_IOVA_WIDTH) { @@ -1520,9 +1535,19 @@ sunway_iommu_unmap(struct iommu_domain *dom, unsigned long iova, return 0; } - unmap_size = sunway_iommu_unmap_page(sdomain, iova, page_size); + if (!(pgsize & domain->pgsize_bitmap)) { + pr_err("pgsize: %lx not supported\n", pgsize); + return -EINVAL; + } + + while (unmapped_size < size) { + unmap_size = sunway_iommu_unmap_page(sdomain, iova, pgsize); + + iova += unmap_size; + unmapped_size += unmap_size; + } - return unmap_size; + return size; } static struct iommu_group *sunway_iommu_device_group(struct device *dev) @@ -1708,8 +1733,8 @@ const struct iommu_ops sunway_iommu_ops = { .def_domain_type = sunway_iommu_def_domain_type, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = sunway_iommu_attach_device, - .map = sunway_iommu_map, - .unmap = sunway_iommu_unmap, + .map_pages = sunway_iommu_map_pages, + .unmap_pages = sunway_iommu_unmap_pages, .iova_to_phys = sunway_iommu_iova_to_phys, .free = sunway_iommu_domain_free, } diff --git a/drivers/irqchip/irq-sunway-cpu.c b/drivers/irqchip/irq-sunway-cpu.c index 58579c1044e06f12f22843cbe8b6d478289ab681..28b346f87bf34e575c3e25868300351306b46d29 100644 --- a/drivers/irqchip/irq-sunway-cpu.c +++ b/drivers/irqchip/irq-sunway-cpu.c @@ -94,7 +94,7 @@ asmlinkage void noinstr do_entInt(struct pt_regs *regs) old_regs = set_irq_regs(regs); #ifdef CONFIG_PM - if (is_junzhang_v1()) { + if (is_in_host() && is_junzhang_v1()) { if (pme_state == PME_WFW) { pme_state = PME_PENDING; goto out; diff --git a/drivers/pci/controller/pci-sunway.c b/drivers/pci/controller/pci-sunway.c index cd6b4680d1c3e54a9f5bd82f88d6c5a02be1ea42..5c7346efd5a3effb085cb48dd71109a47d7c4e34 100644 --- a/drivers/pci/controller/pci-sunway.c +++ b/drivers/pci/controller/pci-sunway.c @@ -865,10 +865,9 @@ static int pci_prepare_controller(struct pci_controller *hose, hose->dense_mem_base = props[PROP_PCIE_IO_BASE]; hose->dense_io_base = props[PROP_EP_IO_BASE]; - hose->rc_config_space_base = __va(props[PROP_RC_CONFIG_BASE]); - hose->ep_config_space_base = __va(props[PROP_EP_CONFIG_BASE]); - hose->piu_ior0_base = __va(props[PROP_PIU_IOR0_BASE]); - hose->piu_ior1_base = __va(props[PROP_PIU_IOR1_BASE]); + hose->rc_config_space_base = ioremap(props[PROP_RC_CONFIG_BASE], SUNWAY_RC_SIZE); + hose->piu_ior0_base = ioremap(props[PROP_PIU_IOR0_BASE], SUNWAY_PIU_IOR0_SIZE); + hose->piu_ior1_base = ioremap(props[PROP_PIU_IOR1_BASE], SUNWAY_PIU_IOR1_SIZE); hose->first_busno = 0xff; hose->last_busno = 0xff; @@ -964,6 +963,8 @@ static int sunway_pci_ecam_init(struct pci_config_window *cfg) if (!hose) return -ENOMEM; + hose->ep_config_space_base = cfg->win; + /* Init pci_controller */ ret = pci_prepare_controller(hose, fwnode); if (ret) { diff --git a/drivers/pci/ecam.c b/drivers/pci/ecam.c index 1c40d2506aef347a57b4f11989ce455c8529f0dd..e568ca75052213318e1b6471f4481a8415fc6c71 100644 --- a/drivers/pci/ecam.c +++ b/drivers/pci/ecam.c @@ -79,7 +79,12 @@ struct pci_config_window *pci_ecam_create(struct device *dev, if (!cfg->winp) goto err_exit_malloc; } else { +#if IS_ENABLED(CONFIG_SW64) + cfg->win = pci_remap_cfgspace(cfgres->start, + (unsigned long)bus_range * (unsigned long)bsz); +#else cfg->win = pci_remap_cfgspace(cfgres->start, bus_range * bsz); +#endif if (!cfg->win) goto err_exit_iomap; } diff --git a/drivers/platform/sw64/Makefile b/drivers/platform/sw64/Makefile index 65dead29600ec6e0061650dd689551800fd543da..6cd6fa0307e019bdbca6d0178f0ea64549ad6cdd 100644 --- a/drivers/platform/sw64/Makefile +++ b/drivers/platform/sw64/Makefile @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_SW64_POWERCAP) += powercap.o obj-y += misc-platform.o diff --git a/drivers/platform/sw64/powercap.c b/drivers/platform/sw64/powercap.c new file mode 100644 index 0000000000000000000000000000000000000000..3f380ace560172536bb13b66f53c8120e5154813 --- /dev/null +++ b/drivers/platform/sw64/powercap.c @@ -0,0 +1,745 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2025 WXIAT + */ + +#define pr_fmt(fmt) "sunway-powercap: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#define SUNWAY_POWERCAP_NETFN 0x3A + +#define SUNWAY_POWERCAP_ACPI_NOTIFY_VALUE 0x84 + +enum sunway_powercap_version { + SUNWAY_POWERCAP_V1 = 1, + SUNWAY_POWERCAP_VERSION_MAX, +}; + +enum sunway_powercap_mode { + SUNWAY_POWERCAP_MODE_POLL = 0, + SUNWAY_POWERCAP_MODE_INTERRUPT, +}; + +enum sunway_powercap_poll_interval { + SUNWAY_POWERCAP_POLL_INTERVAL0 = 50, + SUNWAY_POWERCAP_POLL_INTERVAL1 = 100, + SUNWAY_POWERCAP_POLL_INTERVAL2 = 200, + SUNWAY_POWERCAP_POLL_INTERVAL3 = 250, +}; + +enum sunway_powercap_cmd { + SUNWAY_POWERCAP_CMD_GET_CFG = 0x30, + SUNWAY_POWERCAP_CMD_GET_FREQ = 0x31, + SUNWAY_POWERCAP_CMD_ACK = 0x32, +}; + +enum sunway_powercap_state { + SUNWAY_POWERCAP_STATE_FREE = 0x0F, + SUNWAY_POWERCAP_STATE_LIMIT = 0xF0, +}; + +#pragma pack(1) + +struct sunway_powercap_cfg { + u8 version; + u8 mode; + u8 poll_interval; + u8 reserved; +}; + +#define FREQ_FLAG_ENABLE (1 << 0) +#define FREQ_FLAG_FREE (1 << 1) +#define FREQ_FLAG_TERMINATE (1 << 2) + +struct sunway_powercap_freq { + u32 target_freq; + u16 target_core; + u16 flags; +}; + +#define ACK_FLAG_VALID_SIZE (1 << 0) +#define ACK_FLAG_VALID_VERSION (1 << 1) +#define ACK_FLAG_VALID_MODE (1 << 2) +#define ACK_FLAG_VALID_INTERVAL (1 << 3) +#define ACK_FLAG_VALID_FREQ (1 << 4) +#define ACK_FLAG_VALID_NODE (1 << 5) +#define ACK_FLAG_VALID_CORE (1 << 6) + +struct sunway_powercap_ack { + u8 cmd; + u8 flags; + u16 reserved; +}; + +/* Reset to default packing */ +#pragma pack() + +struct sunway_powercap_bmc_data { + struct device *bmc_device; + struct ipmi_addr address; + struct ipmi_user *user; + struct completion complete; + int interface; + + struct kernel_ipmi_msg tx_message; + unsigned char tx_msg_data[IPMI_MAX_MSG_LENGTH]; + long tx_msgid; + + unsigned char rx_msg_data[IPMI_MAX_MSG_LENGTH]; + unsigned short rx_msg_len; + unsigned char rx_result; + int rx_recv_type; + + bool initialized; +}; + +struct sunway_powercap_driver_data { + struct device *dev; + + unsigned char version; + unsigned char mode; + unsigned char poll_interval; + + struct timer_list timer; + struct work_struct work; + + struct ipmi_smi_watcher bmc_events; + struct ipmi_user_hndl ipmi_hndlrs; + struct sunway_powercap_bmc_data bmc_data; +}; + +struct sunway_powercap_cpu { + unsigned int state; + unsigned int node; + unsigned int core; + struct cpufreq_policy *policy; + struct freq_qos_request *qos_req; +}; + +static void sunway_powercap_register_bmc(int iface, struct device *dev); +static void sunway_powercap_bmc_gone(int iface); +static void sunway_powercap_msg_handler(struct ipmi_recv_msg *msg, + void *user_msg_data); + +static struct sunway_powercap_driver_data driver_data = { + .bmc_events = { + .new_smi = sunway_powercap_register_bmc, + .smi_gone = sunway_powercap_bmc_gone, + }, + + .ipmi_hndlrs = { + .ipmi_recv_hndl = sunway_powercap_msg_handler, + }, +}; + +static struct sunway_powercap_cpu powercap_cpu_data[NR_CPUS]; + +static unsigned char powercap_freq_ack[IPMI_MAX_MSG_LENGTH]; + +static int sunway_powercap_send_message(struct sunway_powercap_bmc_data *bmc_data) +{ + int ret; + + ret = ipmi_validate_addr(&bmc_data->address, sizeof(bmc_data->address)); + if (ret) { + dev_err(bmc_data->bmc_device, "invalid ipmi addr (%d)\n", ret); + return ret; + } + + bmc_data->tx_msgid++; + ret = ipmi_request_settime(bmc_data->user, &bmc_data->address, + bmc_data->tx_msgid, &bmc_data->tx_message, + bmc_data, 0, 0, 0); + if (ret) { + dev_err(bmc_data->bmc_device, + "unable to send message (%d)\n", ret); + return ret; + } + + return 0; +} + +static int sunway_powercap_send_cmd(struct sunway_powercap_bmc_data *bmc_data, + unsigned char cmd, const unsigned char *data, unsigned short data_len) +{ + bmc_data->tx_message.cmd = cmd; + bmc_data->tx_message.data_len = data_len; + + if (data_len) + memcpy(bmc_data->tx_msg_data, data, data_len); + + return sunway_powercap_send_message(bmc_data); +} + +static int sunway_powercap_query(struct sunway_powercap_bmc_data *bmc_data, + unsigned char cmd, const char *info) +{ + int ret; + + ret = sunway_powercap_send_cmd(bmc_data, cmd, NULL, 0); + if (ret) { + dev_err(bmc_data->bmc_device, "unable to query %s\n", info); + return ret; + } + + wait_for_completion(&bmc_data->complete); + + if (bmc_data->rx_result) { + dev_err(bmc_data->bmc_device, "rx error 0x%x when query %s\n", + bmc_data->rx_result, info); + return -EINVAL; + } + + return 0; +} + +static int sunway_powercap_ack_bmc(struct sunway_powercap_bmc_data *bmc_data, + const struct sunway_powercap_ack *ack, int num) +{ + unsigned char cmd = SUNWAY_POWERCAP_CMD_ACK; + int ret; + + ret = sunway_powercap_send_cmd(bmc_data, cmd, + (const char *)ack, sizeof(*ack) * num); + if (ret) { + dev_err(bmc_data->bmc_device, "unable to send ack\n"); + return ret; + } + + wait_for_completion(&bmc_data->complete); + + return 0; +} + +static inline unsigned int +powercap_target_node(const struct sunway_powercap_freq *freq) +{ + return freq->target_core & 0x3F; +} + +static inline unsigned int +powercap_target_core(const struct sunway_powercap_freq *freq) +{ + return (freq->target_core >> 6) & 0x3FF; +} + +static inline bool is_powercap_cpu_match(const struct sunway_powercap_cpu *data, + unsigned int node, unsigned int core) +{ + if ((node != 0x3F) && (data->node != node)) + return false; + + if ((core != 0x3FF) && (data->core != core)) + return false; + + return true; +} + +static int sunway_powercap_validate_freq(const struct sunway_powercap_freq *freq, + struct sunway_powercap_ack *ack) +{ + unsigned int node = powercap_target_node(freq); + unsigned int core = powercap_target_core(freq); + int i; + + /* Currently, core must be 0x3FF(all bits are 1) */ + if (core != 0x3FF) + goto out_validate_freq; + + for (i = 0; i < ARRAY_SIZE(powercap_cpu_data); i++) { + struct cpufreq_policy *policy = powercap_cpu_data[i].policy; + unsigned int target_freq = freq->target_freq; + + if (!policy) + continue; + + if (!is_powercap_cpu_match(&powercap_cpu_data[i], node, core)) + continue; + + /* Now we confirm that core and node are valid */ + ack->flags |= ACK_FLAG_VALID_NODE; + ack->flags |= ACK_FLAG_VALID_CORE; + + if (cpufreq_frequency_table_get_index(policy, target_freq) < 0) { + pr_err("invalid target freq %u\n", target_freq); + return -EINVAL; + } + + ack->flags |= ACK_FLAG_VALID_FREQ; + + return 0; + } + +out_validate_freq: + pr_err("invalid core %u on node %u\n", core, node); + + return -EINVAL; +} + +static inline bool is_powercap_enabled(const struct sunway_powercap_freq *freq) +{ + return !!(freq->flags & FREQ_FLAG_ENABLE); +} + +static inline bool is_powercap_no_limit(const struct sunway_powercap_freq *freq) +{ + return !!(freq->flags & FREQ_FLAG_FREE); +} + +static inline bool +is_powercap_freq_data_terminate(const struct sunway_powercap_freq *freq) +{ + return !!(freq->flags & FREQ_FLAG_TERMINATE); +} + +static int sunway_powercap_handle_free_cpus(struct cpufreq_policy *policy, + struct freq_qos_request *req, const struct sunway_powercap_freq *freq) +{ + int ret, related_cpu; + + if (!is_powercap_enabled(freq) || is_powercap_no_limit(freq)) + return 0; + + ret = freq_qos_add_request(&policy->constraints, + req, FREQ_QOS_MAX, freq->target_freq); + if (ret < 0) { + pr_err("unable to add qos request on cpus %*pbl\n", + cpumask_pr_args(policy->related_cpus)); + return ret; + } + + for_each_cpu(related_cpu, policy->related_cpus) + powercap_cpu_data[related_cpu].state = SUNWAY_POWERCAP_STATE_LIMIT; + + return 0; +} + +static int sunway_powercap_handle_limit_cpus(struct cpufreq_policy *policy, + struct freq_qos_request *req, const struct sunway_powercap_freq *freq) +{ + int ret, related_cpu; + + if (is_powercap_enabled(freq) && !is_powercap_no_limit(freq)) { + ret = freq_qos_update_request(req, freq->target_freq); + if (ret < 0) + pr_err("unable to update qos request on cpus %*pbl\n", + cpumask_pr_args(policy->related_cpus)); + return ret; + } + + ret = freq_qos_remove_request(req); + if (ret < 0) { + pr_err("unable to remove qos request on cpus %*pbl\n", + cpumask_pr_args(policy->related_cpus)); + return ret; + } + + for_each_cpu(related_cpu, policy->related_cpus) + powercap_cpu_data[related_cpu].state = SUNWAY_POWERCAP_STATE_FREE; + + return 0; +} + +static int sunway_powercap_handle_one_freq(const struct sunway_powercap_freq *freq, + struct sunway_powercap_ack *ack) +{ + int i; + unsigned int node = powercap_target_node(freq); + unsigned int core = powercap_target_core(freq); + unsigned int state; + struct freq_qos_request *req; + struct cpufreq_policy *policy; + cpumask_var_t done; + + /* Ack freq */ + ack->cmd = SUNWAY_POWERCAP_CMD_GET_FREQ; + + /* Size must be valid here */ + ack->flags |= ACK_FLAG_VALID_SIZE; + + if (sunway_powercap_validate_freq(freq, ack)) + return -EINVAL; + + if (!alloc_cpumask_var(&done, GFP_KERNEL)) + return -ENOMEM; + + cpumask_clear(done); + + for (i = 0; i < ARRAY_SIZE(powercap_cpu_data); i++) { + policy = powercap_cpu_data[i].policy; + + if (!policy || policy_is_inactive(policy)) + continue; + + if (cpumask_test_cpu(i, done)) + continue; + + if (!is_powercap_cpu_match(&powercap_cpu_data[i], node, core)) + continue; + + state = powercap_cpu_data[i].state; + req = powercap_cpu_data[i].qos_req; + + if (state == SUNWAY_POWERCAP_STATE_FREE) + sunway_powercap_handle_free_cpus(policy, req, freq); + else if (state == SUNWAY_POWERCAP_STATE_LIMIT) + sunway_powercap_handle_limit_cpus(policy, req, freq); + else + pr_err("cpu %d with invalid state 0x%x\n", i, state); + + cpumask_or(done, done, policy->related_cpus); + } + + free_cpumask_var(done); + + return 0; +} + +static int sunway_powercap_poll_once(struct sunway_powercap_bmc_data *bmc_data) +{ + struct sunway_powercap_ack *ack; + struct sunway_powercap_freq *freq; + unsigned char cmd = SUNWAY_POWERCAP_CMD_GET_FREQ; + int ret, num, i; + +query_freq: + /* Clean ACK data */ + memset(powercap_freq_ack, 0, sizeof(powercap_freq_ack)); + + ret = sunway_powercap_query(bmc_data, cmd, "freq"); + if (ret) + return ret; + + ack = (struct sunway_powercap_ack *)&powercap_freq_ack[0]; + freq = (struct sunway_powercap_freq *)&bmc_data->rx_msg_data[0]; + + /* Number of freq data */ + num = bmc_data->rx_msg_len >> 3; + + if (!num || (bmc_data->rx_msg_len & 0x7)) { + dev_err(bmc_data->bmc_device, "invalid freq size %d\n", + bmc_data->rx_msg_len); + + /** + * The size must be multiple of 8 bytes, otherwise + * send only one ack with invalid size. + */ + ack->cmd = cmd; + ack->flags &= ~ACK_FLAG_VALID_SIZE; + sunway_powercap_ack_bmc(bmc_data, ack, 1); + + return -EINVAL; + } + + /* Handle freq data one by one */ + for (i = 0; i < num; i++) + sunway_powercap_handle_one_freq(freq + i, ack + i); + + sunway_powercap_ack_bmc(bmc_data, ack, num); + + /* More freq Data needs to be queried */ + if (!is_powercap_freq_data_terminate(&freq[num - 1])) + goto query_freq; + + return 0; +} + +static inline bool is_legal_poll_interval(u8 interval) +{ + return (interval == SUNWAY_POWERCAP_POLL_INTERVAL0) || + (interval == SUNWAY_POWERCAP_POLL_INTERVAL1) || + (interval == SUNWAY_POWERCAP_POLL_INTERVAL2) || + (interval == SUNWAY_POWERCAP_POLL_INTERVAL3); +} + +static int sunway_powercap_validate_cfg(const struct sunway_powercap_cfg *cfg, + struct sunway_powercap_ack *ack) +{ + bool valid = true; + + if (!cfg->version || (cfg->version >= SUNWAY_POWERCAP_VERSION_MAX)) { + pr_err("invalid version %d\n", cfg->version); + valid = false; + } else + ack->flags |= ACK_FLAG_VALID_VERSION; + + if (cfg->mode > SUNWAY_POWERCAP_MODE_INTERRUPT) { + pr_err("invalid mode %d\n", cfg->mode); + valid = false; + } else + ack->flags |= ACK_FLAG_VALID_MODE; + + if ((cfg->mode == SUNWAY_POWERCAP_MODE_POLL) && + !is_legal_poll_interval(cfg->poll_interval)) { + pr_err("invalid poll interval %dms\n", cfg->poll_interval); + valid = false; + } else + ack->flags |= ACK_FLAG_VALID_INTERVAL; + + return valid ? 0 : -EINVAL; +} + +static void sunway_powercap_add_timer(void) +{ + unsigned long expire; + struct timer_list *timer = &driver_data.timer; + + expire = jiffies + msecs_to_jiffies(driver_data.poll_interval); + timer->expires = round_jiffies_relative(expire); + add_timer(timer); +} + +static void sunway_powercap_poll_func(struct timer_list *t) +{ + struct work_struct *work = &driver_data.work; + + schedule_work(work); + sunway_powercap_add_timer(); +} + +static void sunway_powercap_acpi_notify(acpi_handle device, u32 value, void *data) +{ + struct device *dev = driver_data.dev; + struct work_struct *work = &driver_data.work; + + if (value != SUNWAY_POWERCAP_ACPI_NOTIFY_VALUE) { + dev_err(dev, "unknown acpi notify value\n"); + return; + } + + schedule_work(work); +} + +static int sunway_powercap_setup_cfg(const struct sunway_powercap_cfg *cfg) +{ + bool is_poll_mode = (cfg->mode == SUNWAY_POWERCAP_MODE_POLL); + struct device *dev = driver_data.dev; + struct acpi_device *adev; + acpi_status status; + + driver_data.version = cfg->version; + driver_data.mode = cfg->mode; + driver_data.poll_interval = cfg->poll_interval; + + if (is_poll_mode) { + timer_setup(&driver_data.timer, sunway_powercap_poll_func, 0); + sunway_powercap_add_timer(); + } else { + /* Must be interrupt mode */ + + adev = ACPI_COMPANION(dev); + if (WARN_ON(!adev)) + return -EINVAL; + + status = acpi_install_notify_handler(adev->handle, + ACPI_DEVICE_NOTIFY, + sunway_powercap_acpi_notify, + NULL); + if (ACPI_FAILURE(status)) { + dev_err(dev, "unable to register notifier %08x\n", + status); + return -EINVAL; + } + } + + dev_info(dev, "found with version %d and %s mode\n", + driver_data.version, + is_poll_mode ? "polling" : "interrupt"); + + return 0; +} + +static int sunway_powercap_init_cfg(struct sunway_powercap_bmc_data *bmc_data) +{ + struct sunway_powercap_cfg cfg = { 0 }; + struct sunway_powercap_ack ack = { 0 }; + unsigned char cmd = SUNWAY_POWERCAP_CMD_GET_CFG; + int ret; + + ret = sunway_powercap_query(bmc_data, cmd, "cfg"); + if (ret) + return ret; + + ack.cmd = cmd; + + if (bmc_data->rx_msg_len != sizeof(cfg)) { + dev_err(bmc_data->bmc_device, "invalid cfg size %d\n", + bmc_data->rx_msg_len); + ret = -EINVAL; + } + + if (!ret) { + ack.flags |= ACK_FLAG_VALID_SIZE; + memcpy(&cfg, bmc_data->rx_msg_data, sizeof(cfg)); + + ret = sunway_powercap_validate_cfg(&cfg, &ack); + if (!ret) + ret = sunway_powercap_setup_cfg(&cfg); + } + + sunway_powercap_ack_bmc(bmc_data, &ack, 1); + + return ret; +} + +static void sunway_powercap_register_bmc(int iface, struct device *dev) +{ + struct sunway_powercap_bmc_data *bmc_data = &driver_data.bmc_data; + int ret; + + /* Multiple BMC for suwnay powercap are not supported */ + if (bmc_data->initialized) { + dev_err(dev, "unable to register sunway-powercap repeatedly\n"); + return; + } + + bmc_data->address.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; + bmc_data->address.channel = IPMI_BMC_CHANNEL; + bmc_data->address.data[0] = 0; + bmc_data->interface = iface; + bmc_data->bmc_device = dev; + + /* Create IPMI user */ + ret = ipmi_create_user(bmc_data->interface, &driver_data.ipmi_hndlrs, + bmc_data, &bmc_data->user); + if (ret) { + dev_err(dev, "unable to register user with IPMI interface %d", + bmc_data->interface); + return; + } + + /* Initialize message */ + bmc_data->tx_msgid = 0; + bmc_data->tx_message.netfn = SUNWAY_POWERCAP_NETFN; + bmc_data->tx_message.data = bmc_data->tx_msg_data; + + init_completion(&bmc_data->complete); + + ret = sunway_powercap_init_cfg(bmc_data); + if (ret) { + dev_err(dev, "unable to initialize powercap configuration\n"); + goto out_destroy_user; + } + + bmc_data->initialized = true; + + return; + +out_destroy_user: + ipmi_destroy_user(bmc_data->user); +} + +static void sunway_powercap_bmc_gone(int iface) +{ + struct sunway_powercap_bmc_data *bmc_data = &driver_data.bmc_data; + + if (WARN_ON(bmc_data->interface != iface)) + return; + + ipmi_destroy_user(bmc_data->user); +} + +static void sunway_powercap_msg_handler(struct ipmi_recv_msg *msg, + void *user_msg_data) +{ + struct sunway_powercap_bmc_data *bmc_data = user_msg_data; + + if (msg->msgid != bmc_data->tx_msgid) { + dev_err(bmc_data->bmc_device, + "mismatch between rx msgid (0x%lx) and tx msgid (0x%lx)!\n", + msg->msgid, + bmc_data->tx_msgid); + ipmi_free_recv_msg(msg); + return; + } + + bmc_data->rx_recv_type = msg->recv_type; + if (msg->msg.data_len > 0) + bmc_data->rx_result = msg->msg.data[0]; + else + bmc_data->rx_result = IPMI_UNKNOWN_ERR_COMPLETION_CODE; + + if (msg->msg.data_len > 1) { + bmc_data->rx_msg_len = msg->msg.data_len - 1; + memcpy(bmc_data->rx_msg_data, msg->msg.data + 1, + bmc_data->rx_msg_len); + } else + bmc_data->rx_msg_len = 0; + + ipmi_free_recv_msg(msg); + complete(&bmc_data->complete); +} + +static void do_powercap(struct work_struct *work) +{ + sunway_powercap_poll_once(&driver_data.bmc_data); +} + +static int sunway_powercap_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct cpufreq_policy *policy; + int cpu; + + driver_data.dev = dev; + + INIT_WORK(&driver_data.work, do_powercap); + + for_each_possible_cpu(cpu) { + int related_cpu, rcid = cpu_physical_id(cpu); + struct freq_qos_request *req; + + /* Initial state */ + powercap_cpu_data[related_cpu].state = SUNWAY_POWERCAP_STATE_FREE; + + powercap_cpu_data[related_cpu].core = rcid_to_core_id(rcid); + powercap_cpu_data[related_cpu].node = rcid_to_domain_id(rcid); + + if (powercap_cpu_data[cpu].policy) + continue; + + policy = cpufreq_cpu_get(cpu); + if (!policy) + continue; + + req = devm_kzalloc(dev, sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; + + for_each_cpu(related_cpu, policy->related_cpus) { + powercap_cpu_data[related_cpu].policy = policy; + powercap_cpu_data[related_cpu].qos_req = req; + } + } + + return ipmi_smi_watcher_register(&driver_data.bmc_events); +} + +#ifdef CONFIG_ACPI +static const struct acpi_device_id sunway_powercap_acpi_match[] = { + { "SUNW0203", 0 }, + {}, +}; +#endif + +static struct platform_driver sunway_powercap_driver = { + .probe = sunway_powercap_probe, + .driver = { + .name = "sunway-powercap", + .acpi_match_table = ACPI_PTR(sunway_powercap_acpi_match), + }, +}; + +static int __init sunway_powercap_driver_init(void) +{ + return platform_driver_register(&sunway_powercap_driver); +} +late_initcall(sunway_powercap_driver_init); diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index 80e5ca7b940f719aa9d0d9a9546bb7347ff64e06..88c493b60d9a517b872fe2451dcac344f0f01193 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -1328,7 +1328,7 @@ fixup_usb_xhci_reset(struct pci_dev *dev) if (offset == 0) return; - base = (void *)__va(SW64_PCI_IO_BASE(hose->node, hose->index) | offset); + base = ioremap(SW64_PCI_IO_BASE(hose->node, hose->index) | offset, SZ_8K); ext_cap_offset = xhci_find_next_ext_cap(base, 0, XHCI_EXT_CAPS_LEGACY); if (!ext_cap_offset) @@ -1415,6 +1415,8 @@ fixup_usb_xhci_reset(struct pci_dev *dev) pci_read_config_dword(dev, PCI_COMMAND, &tmp); tmp &= ~(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); pci_write_config_dword(dev, PCI_COMMAND, tmp); + + iounmap(base); } DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_SERIAL_USB_XHCI, 0, fixup_usb_xhci_reset); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 4386683f9c609948f825e6b2f7afdccd814ca3f0..de9f3fc6ec2bdf1c5983a64b01fd3ff954aa0feb 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2184,6 +2184,10 @@ static u16 printk_sprint(char *text, u16 size, int facility, return text_len; } +#ifdef CONFIG_SW64_RRK +extern void sw64_rrk_store(const char *text, u16 text_len, u64 ts_nsec, int level, + unsigned long id, bool final); +#endif __printf(4, 0) int vprintk_store(int facility, int level, const struct dev_printk_info *dev_info, @@ -2202,17 +2206,10 @@ int vprintk_store(int facility, int level, u16 text_len; int ret = 0; u64 ts_nsec; -#ifdef CONFIG_SW64_RRK - extern void sw64_printk(const char *fmt, va_list args); -#endif if (!printk_enter_irqsave(recursion_ptr, irqflags)) return 0; -#ifdef CONFIG_SW64_RRK - sw64_printk(fmt, args); -#endif - /* * Since the duration of printk() can vary depending on the message * and state of the ringbuffer, grab the timestamp now so that it is @@ -2260,6 +2257,12 @@ int vprintk_store(int facility, int level, prb_commit(&e); } +#ifdef CONFIG_SW64_RRK + sw64_rrk_store(&r.text_buf[r.info->text_len], text_len, + r.info->ts_nsec, -1, e.id, + !!(flags & LOG_NEWLINE)); +#endif + ret = text_len; goto out; } @@ -2299,6 +2302,11 @@ int vprintk_store(int facility, int level, else prb_final_commit(&e); +#ifdef CONFIG_SW64_RRK + sw64_rrk_store(&r.text_buf[0], r.info->text_len, r.info->ts_nsec, r.info->level, + e.id, !!(flags & LOG_NEWLINE)); +#endif + ret = text_len + trunc_msg_len; out: printk_exit_irqrestore(recursion_ptr, irqflags); diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c index e2a6a69352dfb775ebc7d6954c98943ec2a3097f..23ea4ac065822f44c6f1a0aef0adf738fbaca034 100644 --- a/samples/ftrace/ftrace-direct-modify.c +++ b/samples/ftrace/ftrace-direct-modify.c @@ -164,6 +164,74 @@ asm ( #endif /* CONFIG_LOONGARCH */ +#ifdef CONFIG_SW64 + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp1, @function\n" +" .globl my_tramp1\n" +" my_tramp1:\n" +" subl $sp, 0x10, $sp\n" +" stl $26, 0($sp)\n" +" stl $15, 0x8($sp)\n" +" ldi $15, 0($sp)\n" + /* save $28 & fp of caller */ +" subl $sp, 0x10, $sp\n" +" stl $28, 0($sp)\n" +" stl $15, 0x8($sp)\n" +" ldi $15, 0($sp)\n" + /* save other regs */ +" subl $sp, 0x18, $sp\n" +" stl $26, 0($sp)\n" +" stl $28, 0x8($sp)\n" +" stl $29, 0x10($sp)\n" +" br $27, 1f\n" +"1: ldgp $29, 0($27)\n" +" call my_direct_func1\n" + /* restore all regs */ +" ldl $26, 0x0($sp)\n" +" ldl $28, 0x8($sp)\n" +" ldl $29, 0x10($sp)\n" +" addl $sp, 0x18, $sp\n" +" ldl $15, 0x18($sp)\n" +" addl $sp, 0x20, $sp\n" +" ret $31, ($28), 1\n" +" .size my_tramp1, .-my_tramp1\n" + +" .type my_tramp2, @function\n" +" .globl my_tramp2\n" +" my_tramp2:\n" +" subl $sp, 0x10, $sp\n" +" stl $26, 0($sp)\n" +" stl $15, 0x8($sp)\n" +" ldi $15, 0($sp)\n" + /* save $28 & fp of caller */ +" subl $sp, 0x10, $sp\n" +" stl $28, 0($sp)\n" +" stl $15, 0x8($sp)\n" +" ldi $15, 0($sp)\n" + /* save other regs */ +" subl $sp, 0x18, $sp\n" +" stl $26, 0($sp)\n" +" stl $28, 0x8($sp)\n" +" stl $29, 0x10($sp)\n" +" br $27, 1f\n" +"1: ldgp $29, 0($27)\n" +" call my_direct_func2\n" + /* restore all regs */ +" ldl $26, 0x0($sp)\n" +" ldl $28, 0x8($sp)\n" +" ldl $29, 0x10($sp)\n" +" addl $sp, 0x18, $sp\n" +" ldl $15, 0x18($sp)\n" +" addl $sp, 0x20, $sp\n" +" ret $31, ($28), 1\n" +" .size my_tramp2, .-my_tramp2\n" +" .popsection\n" +); + +#endif /* CONFIG_SW64 */ + static struct ftrace_ops direct; static unsigned long my_tramp = (unsigned long)my_tramp1; diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c index 2e349834d63c386ef54a8d3fecb2df713e7e4f2e..455da065d89f28192425ca9affa14aa3135b1845 100644 --- a/samples/ftrace/ftrace-direct-multi-modify.c +++ b/samples/ftrace/ftrace-direct-multi-modify.c @@ -184,6 +184,80 @@ asm ( #endif /* CONFIG_LOONGARCH */ +#ifdef CONFIG_SW64 + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp1, @function\n" +" .globl my_tramp1\n" +" my_tramp1:\n" +" subl $sp, 0x10, $sp\n" +" stl $26, 0($sp)\n" +" stl $15, 0x8($sp)\n" +" ldi $15, 0($sp)\n" + /* save $28 & fp of caller */ +" subl $sp, 0x10, $sp\n" +" stl $28, 0($sp)\n" +" stl $15, 0x8($sp)\n" +" ldi $15, 0($sp)\n" + /* save other regs */ +" subl $sp, 0x20, $sp\n" +" stl $16, 0($sp)\n" +" stl $26, 0x8($sp)\n" +" stl $28, 0x10($sp)\n" +" stl $29, 0x18($sp)\n" +" mov $28, $16\n" +" br $27, 1f\n" +"1: ldgp $29, 0($27)\n" +" call my_direct_func1\n" + /* restore all regs */ +" ldl $16, 0($sp)\n" +" ldl $26, 0x8($sp)\n" +" ldl $28, 0x10($sp)\n" +" ldl $29, 0x18($sp)\n" +" addl $sp, 0x20, $sp\n" +" ldl $15, 0x18($sp)\n" +" addl $sp, 0x20, $sp\n" +" ret $31, ($28), 1\n" +" .size my_tramp1, .-my_tramp1\n" + +" .type my_tramp2, @function\n" +" .globl my_tramp2\n" +" my_tramp2:\n" +" subl $sp, 0x10, $sp\n" +" stl $26, 0($sp)\n" +" stl $15, 0x8($sp)\n" +" ldi $15, 0($sp)\n" + /* save $28 & fp of caller */ +" subl $sp, 0x10, $sp\n" +" stl $28, 0($sp)\n" +" stl $15, 0x8($sp)\n" +" ldi $15, 0($sp)\n" + /* save other regs */ +" subl $sp, 0x20, $sp\n" +" stl $16, 0($sp)\n" +" stl $26, 0x8($sp)\n" +" stl $28, 0x10($sp)\n" +" stl $29, 0x18($sp)\n" +" mov $28, $16\n" +" br $27, 1f\n" +"1: ldgp $29, 0($2)\n" +" call my_direct_func2\n" + /* restore all regs */ +" ldl $16, 0($sp)\n" +" ldl $26, 0x8($sp)\n" +" ldl $28, 0x10($sp)\n" +" ldl $29, 0x18($sp)\n" +" addl $sp, 0x20, $sp\n" +" ldl $15, 0x18($sp)\n" +" addl $sp, 0x20, $sp\n" +" ret $31, ($28), 1\n" +" .size my_tramp2, .-my_tramp2\n" +" .popsection\n" +); + +#endif /* CONFIG_SW64 */ + static unsigned long my_tramp = (unsigned long)my_tramp1; static unsigned long tramps[2] = { (unsigned long)my_tramp1, diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c index 9243dbfe4d0c1f72f7e9d55f5d7fb9631482018f..91b03bcf2f9c7dee054cee246604615f74b602cd 100644 --- a/samples/ftrace/ftrace-direct-multi.c +++ b/samples/ftrace/ftrace-direct-multi.c @@ -116,6 +116,47 @@ asm ( #endif /* CONFIG_LOONGARCH */ +#ifdef CONFIG_SW64 + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" +" subl $sp, 0x10, $sp\n" +" stl $26, 0($sp)\n" +" stl $15, 0x8($sp)\n" +" ldi $15, 0($sp)\n" + /* save $28 & fp of caller */ +" subl $sp, 0x10, $sp\n" +" stl $28, 0($sp)\n" +" stl $15, 0x8($sp)\n" +" ldi $15, 0($sp)\n" + /* save other regs */ +" subl $sp, 0x20, $sp\n" +" stl $16, 0($sp)\n" +" stl $26, 0x8($sp)\n" +" stl $28, 0x10($sp)\n" +" stl $29, 0x18($sp)\n" +" mov $28, $16\n" +" br $27, 1f\n" +"1: ldgp $29, 0($27)\n" +" call my_direct_func\n" + /* restore all regs */ +" ldl $16, 0($sp)\n" +" ldl $26, 0x8($sp)\n" +" ldl $28, 0x10($sp)\n" +" ldl $29, 0x18($sp)\n" +" addl $sp, 0x20, $sp\n" +" ldl $15, 0x18($sp)\n" +" addl $sp, 0x20, $sp\n" +" ret $31, ($28), 1\n" +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_SW64 */ + static struct ftrace_ops direct; static int __init ftrace_direct_multi_init(void) diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index e39c3563ae4e42845aa8028aafa8fce394ab7759..8d4bf161bda8b11325cbb17f46738105ccd22c98 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -125,6 +125,52 @@ asm ( #endif /* CONFIG_LOONGARCH */ +#ifdef CONFIG_SW64 + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" +" subl $sp, 0x10, $sp\n" +" stl $26, 0($sp)\n" +" stl $15, 0x8($sp)\n" +" ldi $15, 0($sp)\n" + /* save $28 & fp of caller */ +" subl $sp, 0x10, $sp\n" +" stl $28, 0($sp)\n" +" stl $15, 0x8($sp)\n" +" ldi $15, 0($sp)\n" + /* save other regs */ +" subl $sp, 0x38, $sp\n" +" stl $16, 0($sp)\n" +" stl $17, 0x8($sp)\n" +" stl $18, 0x10($sp)\n" +" stl $19, 0x18($sp)\n" +" stl $26, 0x20($sp)\n" +" stl $28, 0x28($sp)\n" +" stl $29, 0x30($sp)\n" +" br $27, 1f\n" +"1: ldgp $29, 0($27)\n" +" call my_direct_func\n" + /* restore all regs */ +" ldl $16, 0($sp)\n" +" ldl $17, 0x8($sp)\n" +" ldl $18, 0x10($sp)\n" +" stl $19, 0x18($sp)\n" +" ldl $26, 0x20($sp)\n" +" ldl $28, 0x28($sp)\n" +" ldl $29, 0x30($sp)\n" +" addl $sp, 0x38, $sp\n" +" ldl $15, 0x18($sp)\n" +" addl $sp, 0x20, $sp\n" +" ret $31, ($28), 1\n" +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_SW64 */ + static struct ftrace_ops direct; static int __init ftrace_direct_init(void) diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c index 32c477da1e9aa3719cd1ac6997d8e774e7d84658..4e062e97fe010877a92d5d9c670de1d7844a278e 100644 --- a/samples/ftrace/ftrace-direct.c +++ b/samples/ftrace/ftrace-direct.c @@ -110,6 +110,46 @@ asm ( #endif /* CONFIG_LOONGARCH */ +#ifdef CONFIG_SW64 + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" +" subl $sp, 0x10, $sp\n" +" stl $26, 0($sp)\n" +" stl $15, 0x8($sp)\n" +" ldi $15, 0($sp)\n" + /* save $28 & fp of caller */ +" subl $sp, 0x10, $sp\n" +" stl $28, 0($sp)\n" +" stl $15, 0x8($sp)\n" +" ldi $15, 0($sp)\n" + /* save other regs */ +" subl $sp, 0x20, $sp\n" +" stl $16, 0($sp)\n" +" stl $26, 0x8($sp)\n" +" stl $28, 0x10($sp)\n" +" stl $29, 0x18($sp)\n" +" br $27, 1f\n" +"1: ldgp $29, 0($27)\n" +" call my_direct_func\n" + /* restore all regs */ +" ldl $16, 0($sp)\n" +" ldl $26, 0x8($sp)\n" +" ldl $28, 0x10($sp)\n" +" ldl $29, 0x18($sp)\n" +" addl $sp, 0x20, $sp\n" +" ldl $15, 0x18($sp)\n" +" addl $sp, 0x20, $sp\n" +" ret $31, ($28), 1\n" +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_SW64 */ + static struct ftrace_ops direct; static int __init ftrace_direct_init(void)