diff --git a/arch/sw_64/include/asm/cpufreq.h b/arch/sw_64/include/asm/cpufreq.h new file mode 100644 index 0000000000000000000000000000000000000000..cf47f1fc6866860b56ec2112abc1a1449ff66d72 --- /dev/null +++ b/arch/sw_64/include/asm/cpufreq.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_SW64_CPUFREQ_H +#define _ASM_SW64_CPUFREQ_H + +#include +#include +#include +#include +#include + +struct clk; + +extern char curruent_policy[CPUFREQ_NAME_LEN]; + +struct clk_ops { + void (*init)(struct clk *clk); + void (*enable)(struct clk *clk); + void (*disable)(struct clk *clk); + void (*recalc)(struct clk *clk); + int (*set_rate)(struct clk *clk, unsigned long rate, int algo_id); + long (*round_rate)(struct clk *clk, unsigned long rate); +}; + +struct clk { + struct list_head node; + const char *name; + int id; + struct module *owner; + + struct clk *parent; + const struct clk_ops *ops; + + struct kref kref; + + unsigned long rate; + unsigned long flags; +}; + +#define CLK_ALWAYS_ENABLED (1 << 0) +#define CLK_RATE_PROPAGATES (1 << 1) + +#define CLK_PRT 0x1UL +#define CORE_CLK0_V (0x1UL << 1) +#define CORE_CLK0_R (0x1UL << 2) +#define CORE_CLK2_V (0x1UL << 15) +#define CORE_CLK2_R (0x1UL << 16) + +#define CLK_LV1_SEL_PRT 0x1UL +#define CLK_LV1_SEL_MUXA (0x1UL << 2) +#define CLK_LV1_SEL_MUXB (0x1UL << 3) + +#define CORE_PLL0_CFG_SHIFT 4 +#define CORE_PLL2_CFG_SHIFT 18 + +extern struct cpufreq_frequency_table freq_table[]; + +int clk_init(void); +void sw64_set_rate(unsigned int index); + +struct clk *sw64_clk_get(struct device *dev, const char *id); + +void sw64_update_clockevents(unsigned long cpu, u32 freq); + +unsigned int __sw64_cpufreq_get(struct cpufreq_policy *policy); +#endif /* _ASM_SW64_CPUFREQ_H */ diff --git a/arch/sw_64/include/asm/cputime.h b/arch/sw_64/include/asm/cputime.h new file mode 100644 index 0000000000000000000000000000000000000000..cdd46b05e22840bbbe033ca200951269afa0b98f --- /dev/null +++ b/arch/sw_64/include/asm/cputime.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_SW64_CPUTIME_H +#define _ASM_SW64_CPUTIME_H + +typedef u64 __nocast cputime64_t; + +#define jiffies64_to_cputime64(__jif) ((__force cputime64_t)(__jif)) + +#endif /* _ASM_SW64_CPUTIME_H */ diff --git a/arch/sw_64/include/asm/ftrace.h b/arch/sw_64/include/asm/ftrace.h new file mode 100644 index 0000000000000000000000000000000000000000..7ed6e3c06a333e81d96e881de91c0d1aac670ae7 --- /dev/null +++ b/arch/sw_64/include/asm/ftrace.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * arch/sw_64/include/asm/ftrace.h + * + * Copyright (C) 2019, serveros, linyue + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _ASM_SW64_FTRACE_H +#define _ASM_SW64_FTRACE_H + +#define MCOUNT_ADDR ((unsigned long)_mcount) +#define MCOUNT_INSN_SIZE 20 /* 5 * SW64_INSN_SIZE */ +#define MCOUNT_LDGP_SIZE 8 /* 2 * SW64_INSN_SIZE */ + +#define ARCH_SUPPORTS_FTRACE_OPS 1 + +#ifndef __ASSEMBLY__ +#include +#include + + +extern void _mcount(unsigned long); + +struct dyn_arch_ftrace { + /* No extra data needed for sw64 */ +}; + +extern unsigned long ftrace_graph_call; + + +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + /* + * addr is the address of the mcount call instruction. + * recordmcount does the necessary offset calculation. + */ + return addr; +} + +#endif /* ifndef __ASSEMBLY__ */ +#endif /* _ASM_SW64_FTRACE_H */ diff --git a/arch/sw_64/include/asm/jump_label.h b/arch/sw_64/include/asm/jump_label.h new file mode 100644 index 0000000000000000000000000000000000000000..32fbf7573b206bb2c935cc173de392b100d02010 --- /dev/null +++ b/arch/sw_64/include/asm/jump_label.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_SW64_JUMP_LABEL_H +#define _ASM_SW64_JUMP_LABEL_H + +#ifndef __ASSEMBLY__ + +#include +#include + +#define JUMP_LABEL_NOP_SIZE SW64_INSN_SIZE + +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) +{ + asm_volatile_goto("1: nop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".align 3\n\t" + ".quad 1b, %l[l_yes], %0\n\t" + ".popsection\n\t" + : : "i"(&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("1: br %l[l_yes]\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".align 3\n\t" + ".quad 1b, %l[l_yes], %0\n\t" + ".popsection\n\t" + : : "i"(&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +typedef u64 jump_label_t; + +struct jump_entry { + jump_label_t code; + jump_label_t target; + jump_label_t key; +}; + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_SW64_JUMP_LABEL_H */ diff --git a/arch/sw_64/include/asm/kexec.h b/arch/sw_64/include/asm/kexec.h new file mode 100644 index 0000000000000000000000000000000000000000..25e0d8da84f8dbe98908179bb061ea5f4759aa6e --- /dev/null +++ b/arch/sw_64/include/asm/kexec.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_SW64_KEXEC_H +#define _ASM_SW64_KEXEC_H + +#ifdef CONFIG_KEXEC + +/* Maximum physical address we can use pages from */ +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) +/* Maximum address we can reach in physical address mode */ +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) +/* Maximum address we can use for the control code buffer */ +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL) + +#define KEXEC_CONTROL_PAGE_SIZE 8192 + +#define KEXEC_ARCH KEXEC_ARCH_SW64 + +#define KEXEC_SW64_ATAGS_OFFSET 0x1000 +#define KEXEC_SW64_ZIMAGE_OFFSET 0x8000 + +#ifndef __ASSEMBLY__ + +/** + * crash_setup_regs() - save registers for the panic kernel + * @newregs: registers are saved here + * @oldregs: registers to be saved (may be %NULL) + * + * Function copies machine registers from @oldregs to @newregs. If @oldregs is + * %NULL then current registers are stored there. + */ +static inline void crash_setup_regs(struct pt_regs *newregs, + struct pt_regs *oldregs) +{ + if (oldregs) { + memcpy(newregs, oldregs, sizeof(*newregs)); + } else { + __asm__ __volatile__ ("stl $0, %0" : "=m" (newregs->regs[0])); + __asm__ __volatile__ ("stl $1, %0" : "=m" (newregs->regs[1])); + __asm__ __volatile__ ("stl $2, %0" : "=m" (newregs->regs[2])); + __asm__ __volatile__ ("stl $3, %0" : "=m" (newregs->regs[3])); + __asm__ __volatile__ ("stl $4, %0" : "=m" (newregs->regs[4])); + __asm__ __volatile__ ("stl $5, %0" : "=m" (newregs->regs[5])); + __asm__ __volatile__ ("stl $6, %0" : "=m" (newregs->regs[6])); + __asm__ __volatile__ ("stl $7, %0" : "=m" (newregs->regs[7])); + __asm__ __volatile__ ("stl $8, %0" : "=m" (newregs->regs[8])); + __asm__ __volatile__ ("stl $9, %0" : "=m" (newregs->regs[9])); + __asm__ __volatile__ ("stl $10, %0" : "=m" (newregs->regs[10])); + __asm__ __volatile__ ("stl $11, %0" : "=m" (newregs->regs[11])); + __asm__ __volatile__ ("stl $12, %0" : "=m" (newregs->regs[12])); + __asm__ __volatile__ ("stl $13, %0" : "=m" (newregs->regs[13])); + __asm__ __volatile__ ("stl $14, %0" : "=m" (newregs->regs[14])); + __asm__ __volatile__ ("stl $15, %0" : "=m" (newregs->regs[15])); + __asm__ __volatile__ ("stl $16, %0" : "=m" (newregs->regs[16])); + __asm__ __volatile__ ("stl $17, %0" : "=m" (newregs->regs[17])); + __asm__ __volatile__ ("stl $18, %0" : "=m" (newregs->regs[18])); + __asm__ __volatile__ ("stl $19, %0" : "=m" (newregs->regs[19])); + __asm__ __volatile__ ("stl $20, %0" : "=m" (newregs->regs[20])); + __asm__ __volatile__ ("stl $21, %0" : "=m" (newregs->regs[21])); + __asm__ __volatile__ ("stl $22, %0" : "=m" (newregs->regs[22])); + __asm__ __volatile__ ("stl $23, %0" : "=m" (newregs->regs[23])); + __asm__ __volatile__ ("stl $24, %0" : "=m" (newregs->regs[24])); + __asm__ __volatile__ ("stl $25, %0" : "=m" (newregs->regs[25])); + __asm__ __volatile__ ("stl $26, %0" : "=m" (newregs->regs[26])); + __asm__ __volatile__ ("stl $27, %0" : "=m" (newregs->regs[27])); + __asm__ __volatile__ ("stl $28, %0" : "=m" (newregs->regs[28])); + __asm__ __volatile__ ("stl $29, %0" : "=m" (newregs->regs[29])); + __asm__ __volatile__ ("stl $30, %0" : "=m" (newregs->regs[30])); + newregs->pc = (unsigned long)current_text_addr(); + } +} + +/* Function pointer to optional machine-specific reinitialization */ +extern void (*kexec_reinit)(void); + +#endif /* __ASSEMBLY__ */ + +struct kimage; +extern unsigned long kexec_args[4]; + +#endif /* CONFIG_KEXEC */ + +#endif /* _ASM_SW64_KEXEC_H */ diff --git a/arch/sw_64/include/asm/kgdb.h b/arch/sw_64/include/asm/kgdb.h new file mode 100644 index 0000000000000000000000000000000000000000..a00a45ce767ca74361319836d3b188db9178285b --- /dev/null +++ b/arch/sw_64/include/asm/kgdb.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * sw64 KGDB support + * + * Based on arch/arm64/include/kgdb.h + * + * Copyright (C) Xia Bin + * Author: Xia Bin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef _ASM_SW64_KGDB_H +#define _ASM_SW64_KGDB_H + +#include +#include + +#ifndef __ASSEMBLY__ + + +#define GDB_ADJUSTS_BREAK_OFFSET +#define BREAK_INSTR_SIZE 4 +#define CACHE_FLUSH_IS_SAFE 0 + +static inline void arch_kgdb_breakpoint(void) +{ + __asm__ __volatile__("sys_call %0" : : "i"(HMC_bpt)); +} + +void sw64_task_to_gdb_regs(struct task_struct *task, unsigned long *regs); + +extern void kgdb_handle_bus_error(void); +extern int kgdb_fault_expected; +extern unsigned long get_reg(struct task_struct *task, unsigned long regno); + +#endif /* !__ASSEMBLY__ */ + +/* + * general purpose registers size in bytes. + */ +#define DBG_MAX_REG_NUM (67) + +/* + * Size of I/O buffer for gdb packet. + * considering to hold all register contents, size is set + */ + +#define BUFMAX 4096 + +/* + * Number of bytes required for gdb_regs buffer. + * _GP_REGS: 8 bytes, _FP_REGS: 16 bytes and _EXTRA_REGS: 4 bytes each + * GDB fails to connect for size beyond this with error + * "'g' packet reply is too long" + */ +#define NUMREGBYTES (DBG_MAX_REG_NUM * 8) + +#endif /* _ASM_SW64_KGDB_H */ diff --git a/arch/sw_64/include/asm/kprobes.h b/arch/sw_64/include/asm/kprobes.h new file mode 100644 index 0000000000000000000000000000000000000000..0c7be8109ed29423cadec91e4f0ffc9d65e7ab0b --- /dev/null +++ b/arch/sw_64/include/asm/kprobes.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Kernel Probes (KProbes) + * Based on arch/mips/include/asm/kprobes.h + */ + +#ifndef _ASM_SW64_KPROBES_H +#define _ASM_SW64_KPROBES_H + +#include + +#define BREAK_KPROBE 0x40ffffff +#define BREAK_KPROBE_SS 0x40fffeff + +#ifdef CONFIG_KPROBES +#include +#include + +#include +#include + +#define __ARCH_WANT_KPROBES_INSN_SLOT + +struct kprobe; +struct pt_regs; + +typedef u32 kprobe_opcode_t; + +#define MAX_INSN_SIZE 2 + +#define flush_insn_slot(p) \ +do { \ + if (p->addr) \ + flush_icache_range((unsigned long)p->addr, \ + (unsigned long)p->addr + \ + (MAX_INSN_SIZE * sizeof(kprobe_opcode_t))); \ +} while (0) + + +#define kretprobe_blacklist_size 0 + +void arch_remove_kprobe(struct kprobe *p); + +/* Architecture specific copy of original instruction*/ +struct arch_specific_insn { + /* copy of the original instruction */ + kprobe_opcode_t *insn; + /* + * Set in kprobes code, initially to 0. If the instruction can be + * eumulated, this is set to 1, if not, to -1. + */ + int boostable; +}; + +struct prev_kprobe { + struct kprobe *kp; + unsigned long status; +}; + +#define SKIP_DELAYSLOT 0x0001 + +/* per-cpu kprobe control block */ +struct kprobe_ctlblk { + unsigned long kprobe_status; + /* Per-thread fields, used while emulating branches */ + unsigned long flags; + unsigned long target_pc; + struct prev_kprobe prev_kprobe; +}; +extern int kprobe_handler(struct pt_regs *regs); +extern int post_kprobe_handler(struct pt_regs *regs); +extern int kprobe_fault_handler(struct pt_regs *regs, unsigned long mmcsr); + + +#endif /* CONFIG_KPROBES */ +#endif /* _ASM_SW64_KPROBES_H */ diff --git a/arch/sw_64/include/asm/livepatch.h b/arch/sw_64/include/asm/livepatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1feec0f6be76ddad2c1e65e0bfacf3d511510af0 --- /dev/null +++ b/arch/sw_64/include/asm/livepatch.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * livepatch.h - sw64-specific Kernel Live Patching Core + */ + +#ifndef _ASM_SW64_LIVEPATCH_H +#define _ASM_SW64_LIVEPATCH_H + +#include + +static inline int klp_check_compiler_support(void) +{ + return 0; +} + +static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) +{ + regs->regs[27] = ip; + regs->regs[28] = ip; +} + +#endif /* _ASM_SW64_LIVEPATCH_H */ diff --git a/arch/sw_64/include/asm/perf_event.h b/arch/sw_64/include/asm/perf_event.h new file mode 100644 index 0000000000000000000000000000000000000000..dc55a361babd015aa92fbf7b0387f1e2beeecc40 --- /dev/null +++ b/arch/sw_64/include/asm/perf_event.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_SW64_PERF_EVENT_H +#define _ASM_SW64_PERF_EVENT_H + +#include +#include + +#ifdef CONFIG_PERF_EVENTS +struct pt_regs; +extern unsigned long perf_instruction_pointer(struct pt_regs *regs); +extern unsigned long perf_misc_flags(struct pt_regs *regs); +#define perf_misc_flags(regs) perf_misc_flags(regs) +#define perf_arch_bpf_user_pt_regs(regs) ®s->user_regs +#endif + +#endif /* _ASM_SW64_PERF_EVENT_H */ diff --git a/arch/sw_64/include/asm/pmc.h b/arch/sw_64/include/asm/pmc.h new file mode 100644 index 0000000000000000000000000000000000000000..d5672dd940a791c62e0edbe1e5e2356183cdd131 --- /dev/null +++ b/arch/sw_64/include/asm/pmc.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Definitions for use with the sw64 PMC interface. + */ + +#ifndef _ASM_SW64_PMC_H +#define _ASM_SW64_PMC_H + +#define PMC_PC0 0 +#define PMC_PC1 1 + +/* Following commands are implemented on all CPUs */ +#define PMC_CMD_DISABLE 0 +#define PMC_CMD_ENABLE 1 +#define PMC_CMD_EVENT_BASE 2 +#define PMC_CMD_PM 4 +#define PMC_CMD_READ 5 +#define PMC_CMD_READ_CLEAR 6 +#define PMC_CMD_WRITE_BASE 7 + +#define PMC_DISABLE_BASE 1 + +#define PMC_ENABLE_BASE 1 + +#define PC0_RAW_BASE 0x0 +#define PC1_RAW_BASE 0x100 +#define PC0_MAX 0xF +#define PC1_MAX 0x3D + +#define SW64_PERFCTRL_KM 2 +#define SW64_PERFCTRL_UM 3 +#define SW64_PERFCTRL_AM 4 + +/* pc0 events */ +#define PC0_INSTRUCTIONS 0x0 +#define PC0_BRANCH_INSTRUCTIONS 0x3 +#define PC0_CPU_CYCLES 0x8 +#define PC0_ITB_READ 0x9 +#define PC0_DTB_READ 0xA +#define PC0_ICACHE_READ 0xB +#define PC0_DCACHE_READ 0xC +#define PC0_SCACHE_REFERENCES 0xD + +/* pc1 events */ +#define PC1_BRANCH_MISSES 0xB +#define PC1_SCACHE_MISSES 0x10 +#define PC1_ICACHE_READ_MISSES 0x16 +#define PC1_ITB_MISSES 0x17 +#define PC1_DTB_SINGLE_MISSES 0x30 +#define PC1_DCACHE_MISSES 0x32 + +#define MAX_HWEVENTS 2 +#define PMC_COUNT_MASK ((1UL << 58) - 1) + +#endif /* _ASM_SW64_PMC_H */ diff --git a/arch/sw_64/include/asm/spinlock.h b/arch/sw_64/include/asm/spinlock.h new file mode 100644 index 0000000000000000000000000000000000000000..64358f32cd9a80b587a023dae6d5eecb1cf270e6 --- /dev/null +++ b/arch/sw_64/include/asm/spinlock.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef _ASM_SW64_SPINLOCK_H +#define _ASM_SW64_SPINLOCK_H + +#include +#include + +/* See include/linux/spinlock.h */ +#define smp_mb__after_spinlock() smp_mb() + +#endif /* _ASM_SW64_SPINLOCK_H */ diff --git a/arch/sw_64/include/asm/spinlock_types.h b/arch/sw_64/include/asm/spinlock_types.h new file mode 100644 index 0000000000000000000000000000000000000000..62e554e4f48c35b2d4578072231b58c75b202a4b --- /dev/null +++ b/arch/sw_64/include/asm/spinlock_types.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_SW64_SPINLOCK_TYPES_H +#define _ASM_SW64_SPINLOCK_TYPES_H + +#include +#include + +#endif /* _ASM_SW64_SPINLOCK_TYPES_H */ diff --git a/arch/sw_64/include/asm/stacktrace.h b/arch/sw_64/include/asm/stacktrace.h new file mode 100644 index 0000000000000000000000000000000000000000..958c9892fd6d0943bf78484c7e870323694fbde8 --- /dev/null +++ b/arch/sw_64/include/asm/stacktrace.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_SW64_STACKTRACE_H +#define _ASM_SW64_STACKTRACE_H + +#include +#include +#include +#include +#include + +struct stackframe { + unsigned long pc; + unsigned long fp; +}; + +enum stack_type { + STACK_TYPE_UNKNOWN, + STACK_TYPE_TASK, +}; + +struct stack_info { + unsigned long low; + unsigned long high; + enum stack_type type; +}; + +/* The form of the top of the frame on the stack */ +struct stack_frame { + unsigned long return_address; + struct stack_frame *next_frame; +}; + +extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); +extern void walk_stackframe(struct task_struct *tsk, struct pt_regs *regs, + int (*fn)(unsigned long, void *), void *data); + +static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp, + struct stack_info *info) +{ + unsigned long low = (unsigned long)task_stack_page(tsk); + unsigned long high = low + THREAD_SIZE; + + if (sp < low || sp >= high) + return false; + + if (info) { + info->low = low; + info->high = high; + info->type = STACK_TYPE_TASK; + } + + return true; +} + +/* + * We can only safely access per-cpu stacks from current in a non-preemptible + * context. + */ +static inline bool on_accessible_stack(struct task_struct *tsk, + unsigned long sp, + struct stack_info *info) +{ + if (on_task_stack(tsk, sp, info)) + return true; + if (tsk != current || preemptible()) + return false; + + return false; +} + +#endif /* _ASM_SW64_STACKTRACE_H */ diff --git a/arch/sw_64/include/asm/suspend.h b/arch/sw_64/include/asm/suspend.h new file mode 100644 index 0000000000000000000000000000000000000000..833e27f9d5e14a729a285406234e90fc03afbdfe --- /dev/null +++ b/arch/sw_64/include/asm/suspend.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_SW64_SUSPEND_H +#define _ASM_SW64_SUSPEND_H + +#include +#include +#include +#define SOFTINF_SLEEP_MAGIC 0x0123456789ABCDEFUL + +#ifdef CONFIG_HIBERNATION +#include +#include +#endif + +struct callee_saved_regs { + unsigned long r9; + unsigned long r10; + unsigned long r11; + unsigned long r12; + unsigned long r13; + unsigned long r14; + unsigned long r15; + unsigned long ra; +}; + +struct callee_saved_fpregs { + unsigned long f2[4]; + unsigned long f3[4]; + unsigned long f4[4]; + unsigned long f5[4]; + unsigned long f6[4]; + unsigned long f7[4]; + unsigned long f8[4]; + unsigned long f9[4]; +} __aligned(32); /* 256 bits aligned for simd */ + +struct processor_state { + struct callee_saved_regs regs; + struct callee_saved_fpregs fpregs; + unsigned long fpcr; + unsigned long ktp; +#ifdef CONFIG_HIBERNATION + unsigned long sp; + struct vcpucb vcb; +#endif +}; + +extern void sw64_suspend_deep_sleep(struct processor_state *state); +extern const struct platform_suspend_ops native_suspend_ops; +#endif /* _ASM_SW64_SUSPEND_H */ diff --git a/arch/sw_64/include/asm/uprobes.h b/arch/sw_64/include/asm/uprobes.h new file mode 100644 index 0000000000000000000000000000000000000000..fcd2026c3622e20a781107c70d414f075d1bf588 --- /dev/null +++ b/arch/sw_64/include/asm/uprobes.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#ifndef _ASM_SW64_UPROBES_H +#define _ASM_SW64_UPROBES_H + +#include +#include +#include + +/* + * We want this to be defined as union sw64_instruction but that makes the + * generic code blow up. + */ +typedef u32 uprobe_opcode_t; + +#define MAX_UINSN_BYTES SW64_INSN_SIZE +#define UPROBE_XOL_SLOT_BYTES SW64_INSN_SIZE + +#define UPROBE_BRK_UPROBE 0x000d000d /* break 13 */ +#define UPROBE_BRK_UPROBE_XOL 0x000e000d /* break 14 */ + +#define UPROBE_SWBP_INSN UPROBE_BRK_UPROBE +#define UPROBE_SWBP_INSN_SIZE MAX_UINSN_BYTES + +struct arch_uprobe { + u32 insn; + u32 ixol[2]; +}; + +struct arch_uprobe_task { + unsigned long saved_trap_nr; +}; + +#ifdef CONFIG_UPROBES +void sw64_fix_uretprobe(struct pt_regs *regs, unsigned long exc_pc); +#else +static inline void +sw64_fix_uretprobe(struct pt_regs *regs, unsigned long exc_pc) {} +#endif + +#endif /* _ASM_SW64_UPROBES_H */ diff --git a/arch/sw_64/include/uapi/asm/bpf_perf_event.h b/arch/sw_64/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 0000000000000000000000000000000000000000..52f6f1e555f162ef7668965386cc758125726224 --- /dev/null +++ b/arch/sw_64/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_ASM_SW64_BPF_PERF_EVENT_H +#define _UAPI_ASM_SW64_BPF_PERF_EVENT_H + +#include + +typedef struct user_pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI_ASM_SW64_BPF_PERF_EVENT_H */ diff --git a/arch/sw_64/include/uapi/asm/perf_regs.h b/arch/sw_64/include/uapi/asm/perf_regs.h new file mode 100644 index 0000000000000000000000000000000000000000..871ad4663d1dbd29cd23395b977615323c67d81e --- /dev/null +++ b/arch/sw_64/include/uapi/asm/perf_regs.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_ASM_SW64_PERF_REGS_H +#define _UAPI_ASM_SW64_PERF_REGS_H + +enum perf_event_sw64_regs { + PERF_REG_SW64_R0, + PERF_REG_SW64_R1, + PERF_REG_SW64_R2, + PERF_REG_SW64_R3, + PERF_REG_SW64_R4, + PERF_REG_SW64_R5, + PERF_REG_SW64_R6, + PERF_REG_SW64_R7, + PERF_REG_SW64_R8, + PERF_REG_SW64_R9, + PERF_REG_SW64_R10, + PERF_REG_SW64_R11, + PERF_REG_SW64_R12, + PERF_REG_SW64_R13, + PERF_REG_SW64_R14, + PERF_REG_SW64_R15, + PERF_REG_SW64_R16, + PERF_REG_SW64_R17, + PERF_REG_SW64_R18, + PERF_REG_SW64_R19, + PERF_REG_SW64_R20, + PERF_REG_SW64_R21, + PERF_REG_SW64_R22, + PERF_REG_SW64_R23, + PERF_REG_SW64_R24, + PERF_REG_SW64_R25, + PERF_REG_SW64_R26, + PERF_REG_SW64_R27, + PERF_REG_SW64_R28, + PERF_REG_SW64_GP, + PERF_REG_SW64_SP, + PERF_REG_SW64_PC, + PERF_REG_SW64_MAX, +}; +#endif /* _UAPI_ASM_SW64_PERF_REGS_H */ diff --git a/arch/sw_64/kernel/cpuautoplug.c b/arch/sw_64/kernel/cpuautoplug.c new file mode 100644 index 0000000000000000000000000000000000000000..a7571a77a72c6ed4733fc179610e5b98853c1510 --- /dev/null +++ b/arch/sw_64/kernel/cpuautoplug.c @@ -0,0 +1,485 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +int autoplug_enabled; +int autoplug_verbose; +int autoplug_adjusting; + +DEFINE_PER_CPU(int, cpu_adjusting); + +struct cpu_autoplug_info { + cputime64_t prev_idle; + cputime64_t prev_wall; + struct delayed_work work; + unsigned int sampling_rate; + int maxcpus; /* max cpus for autoplug */ + int mincpus; /* min cpus for autoplug */ + int dec_reqs; /* continuous core-decreasing requests */ + int inc_reqs; /* continuous core-increasing requests */ +}; + +struct cpu_autoplug_info ap_info; + +static ssize_t enabled_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", autoplug_enabled); +} + + +static ssize_t enabled_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + char val[5]; + int n; + + memcpy(val, buf, count); + n = kstrtol(val, 0, 0); + + if (n > 1 || n < 0) + return -EINVAL; + + autoplug_enabled = n; + + return count; +} + +static ssize_t verbose_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", autoplug_verbose); +} + +static ssize_t verbose_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + char val[5]; + int n; + + memcpy(val, buf, count); + n = kstrtol(val, 0, 0); + + if (n > 1 || n < 0) + return -EINVAL; + + autoplug_verbose = n; + + return count; +} + +static ssize_t maxcpus_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", ap_info.maxcpus); +} + +static ssize_t maxcpus_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + char val[5]; + int n; + + memcpy(val, buf, count); + n = kstrtol(val, 0, 0); + + if (n > num_possible_cpus() || n < ap_info.mincpus) + return -EINVAL; + + ap_info.maxcpus = n; + + return count; +} + +static ssize_t mincpus_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", ap_info.mincpus); +} + +static ssize_t mincpus_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + char val[5]; + int n; + + memcpy(val, buf, count); + n = kstrtol(val, 0, 0); + + if (n > ap_info.maxcpus || n < 1) + return -EINVAL; + + ap_info.mincpus = n; + + return count; +} + +static ssize_t sampling_rate_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", ap_info.sampling_rate); +} + +#define SAMPLING_RATE_MAX 1000 +#define SAMPLING_RATE_MIN 600 + +static ssize_t sampling_rate_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + char val[6]; + int n; + + memcpy(val, buf, count); + n = kstrtol(val, 0, 0); + + if (n > SAMPLING_RATE_MAX || n < SAMPLING_RATE_MIN) + return -EINVAL; + + ap_info.sampling_rate = n; + + return count; +} + +static ssize_t available_value_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "enabled: 0-1\nverbose: 0-1\nmaxcpus:" + "1-%d\nmincpus: 1-%d\nsampling_rate: %d-%d\n", + num_possible_cpus(), num_possible_cpus(), + SAMPLING_RATE_MIN, SAMPLING_RATE_MAX); +} + +static DEVICE_ATTR_RW(enabled); +static DEVICE_ATTR_RW(verbose); +static DEVICE_ATTR_RW(maxcpus); +static DEVICE_ATTR_RW(mincpus); +static DEVICE_ATTR_RW(sampling_rate); +static DEVICE_ATTR_RO(available_value); + +static struct attribute *cpuclass_default_attrs[] = { + &dev_attr_enabled.attr, + &dev_attr_verbose.attr, + &dev_attr_maxcpus.attr, + &dev_attr_mincpus.attr, + &dev_attr_sampling_rate.attr, + &dev_attr_available_value.attr, + NULL +}; + +static struct attribute_group cpuclass_attr_group = { + .attrs = cpuclass_default_attrs, + .name = "cpuautoplug", +}; + +static int __init setup_autoplug(char *str) +{ + if (!strcmp(str, "off")) + autoplug_enabled = 0; + else if (!strcmp(str, "on")) + autoplug_enabled = 1; + else + return 0; + return 1; +} + +__setup("autoplug=", setup_autoplug); + +static cputime64_t calc_busy_time(unsigned int cpu) +{ + cputime64_t busy_time; + + busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE]; + busy_time += 1; + + return busy_time; +} + +static inline cputime64_t get_idle_time_jiffy(cputime64_t *wall) +{ + unsigned int cpu; + cputime64_t idle_time = 0; + cputime64_t cur_wall_time; + cputime64_t busy_time; + + cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); + + for_each_online_cpu(cpu) { + busy_time = calc_busy_time(cpu); + + idle_time += cur_wall_time - busy_time; + } + + if (wall) + *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); + + return (cputime64_t)jiffies_to_usecs(idle_time); +} + +static inline cputime64_t get_idle_time(cputime64_t *wall) +{ + unsigned int cpu; + u64 idle_time = 0; + + for_each_online_cpu(cpu) { + idle_time += get_cpu_idle_time_us(cpu, wall); + if (idle_time == -1ULL) + return get_idle_time_jiffy(wall); + } + + return idle_time; +} + +static cputime64_t get_min_busy_time(cputime64_t arr[], int size) +{ + int i, min_cpu_idx; + cputime64_t min_time = arr[0]; + + for (i = 0; i < size; i++) { + if (arr[i] > 0 && arr[i] < min_time) { + min_time = arr[i]; + min_cpu_idx = i; + } + } + + return min_cpu_idx; +} + +static int find_min_busy_cpu(void) +{ + int nr_all_cpus = num_possible_cpus(); + unsigned int cpus, target_cpu; + cputime64_t busy_time; + cputime64_t b_time[NR_CPUS]; + + memset(b_time, 0, sizeof(b_time)); + for_each_online_cpu(cpus) { + busy_time = calc_busy_time(cpus); + b_time[cpus] = busy_time; + } + target_cpu = get_min_busy_time(b_time, nr_all_cpus); + return target_cpu; +} + +static void increase_cores(int cur_cpus) +{ + struct device *dev; + + if (cur_cpus == ap_info.maxcpus) + return; + + cur_cpus = cpumask_next_zero(0, cpu_online_mask); + + dev = get_cpu_device(cur_cpus); + + per_cpu(cpu_adjusting, dev->id) = 1; + lock_device_hotplug(); + cpu_device_up(dev); + pr_info("The target_cpu is %d, After cpu_up, the cpu_num is %d\n", + dev->id, num_online_cpus()); + get_cpu_device(dev->id)->offline = false; + unlock_device_hotplug(); + per_cpu(cpu_adjusting, dev->id) = 0; +} + +static void decrease_cores(int cur_cpus) +{ + struct device *dev; + + if (cur_cpus == ap_info.mincpus) + return; + + cur_cpus = find_min_busy_cpu(); + + dev = get_cpu_device(cur_cpus); + + if (dev->id > 0) { + per_cpu(cpu_adjusting, dev->id) = -1; + lock_device_hotplug(); + cpu_device_down(dev); + pr_info("The target_cpu is %d. After cpu_down, the cpu_num is %d\n", + cur_cpus, num_online_cpus()); + get_cpu_device(dev->id)->offline = true; + unlock_device_hotplug(); + per_cpu(cpu_adjusting, dev->id) = 0; + } +} + +#define INC_THRESHOLD 80 +#define DEC_THRESHOLD 40 + +static void do_autoplug_timer(struct work_struct *work) +{ + cputime64_t cur_wall_time = 0, cur_idle_time; + unsigned long idle_time, wall_time; + int delay, load; + int nr_cur_cpus = num_online_cpus(); + int nr_all_cpus = num_possible_cpus(); + int inc_req = 1, dec_req = 2; + struct cpufreq_policy *policy = cpufreq_cpu_get_raw(smp_processor_id()); + + if (!policy || IS_ERR(policy->clk)) { + pr_err("%s: No %s associated to cpu: %d\n", + __func__, policy ? "clk" : "policy", 0); + return; + } + + ap_info.maxcpus = + setup_max_cpus > nr_cpu_ids ? nr_cpu_ids : setup_max_cpus; + ap_info.mincpus = ap_info.maxcpus / 4; + + if (strcmp(policy->governor->name, "performance") == 0) { + ap_info.mincpus = ap_info.maxcpus; + } else if (strcmp(policy->governor->name, "powersave") == 0) { + ap_info.maxcpus = ap_info.mincpus; + } else if (strcmp(policy->governor->name, "ondemand") == 0) { + ap_info.sampling_rate = 500; + inc_req = 0; + dec_req = 2; + } else if (strcmp(policy->governor->name, "conservative") == 0) { + inc_req = 1; + dec_req = 3; + ap_info.sampling_rate = 1000; /* 1s */ + } + + BUG_ON(smp_processor_id() != 0); + delay = msecs_to_jiffies(ap_info.sampling_rate); + if (!autoplug_enabled || system_state != SYSTEM_RUNNING) + goto out; + + autoplug_adjusting = 1; + + if (nr_cur_cpus > ap_info.maxcpus) { + decrease_cores(nr_cur_cpus); + autoplug_adjusting = 0; + goto out; + } + if (nr_cur_cpus < ap_info.mincpus) { + increase_cores(nr_cur_cpus); + autoplug_adjusting = 0; + goto out; + } + + cur_idle_time = get_idle_time(&cur_wall_time); + if (cur_wall_time == 0) + cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); + + wall_time = (unsigned int)(cur_wall_time - ap_info.prev_wall); + ap_info.prev_wall = cur_wall_time; + + idle_time = (unsigned int)(cur_idle_time - ap_info.prev_idle); + idle_time += wall_time * (nr_all_cpus - nr_cur_cpus); + ap_info.prev_wall = cur_idle_time; + + if (unlikely(!wall_time || wall_time * nr_all_cpus < idle_time)) { + autoplug_adjusting = 0; + goto out; + } + + load = 100 * (wall_time * nr_all_cpus - idle_time) / wall_time; + + if (load < (nr_cur_cpus - 1) * 100 - DEC_THRESHOLD) { + ap_info.inc_reqs = 0; + if (ap_info.dec_reqs < dec_req) + ap_info.dec_reqs++; + else { + ap_info.dec_reqs = 0; + decrease_cores(nr_cur_cpus); + } + } else { + ap_info.dec_reqs = 0; + if (load > (nr_cur_cpus - 1) * 100 + INC_THRESHOLD) { + if (ap_info.inc_reqs < inc_req) + ap_info.inc_reqs++; + else { + ap_info.inc_reqs = 0; + increase_cores(nr_cur_cpus); + } + } + } + + autoplug_adjusting = 0; +out: + schedule_delayed_work_on(0, &ap_info.work, delay); +} + +static struct platform_device_id platform_device_ids[] = { + { + .name = "sw64_cpuautoplug", + }, + {} +}; + +MODULE_DEVICE_TABLE(platform, platform_device_ids); + +static struct platform_driver platform_driver = { + .driver = { + .name = "sw64_cpuautoplug", + .owner = THIS_MODULE, + }, + .id_table = platform_device_ids, +}; + +static int __init cpuautoplug_init(void) +{ + int i, ret, delay; + + ret = sysfs_create_group(&cpu_subsys.dev_root->kobj, + &cpuclass_attr_group); + if (ret) + return ret; + + ret = platform_driver_register(&platform_driver); + if (ret) + return ret; + + pr_info("cpuautoplug: SW64 CPU autoplug driver.\n"); + + ap_info.maxcpus = + setup_max_cpus > nr_cpu_ids ? nr_cpu_ids : setup_max_cpus; + ap_info.mincpus = ap_info.maxcpus / 4; + ap_info.dec_reqs = 0; + ap_info.inc_reqs = 0; + ap_info.sampling_rate = 720; /* 720ms */ + if (setup_max_cpus == 0) { /* boot with npsmp */ + ap_info.maxcpus = 1; + autoplug_enabled = 0; + } + if (setup_max_cpus > num_possible_cpus()) + ap_info.maxcpus = num_possible_cpus(); + + pr_info("mincpu = %d, maxcpu = %d, autoplug_enabled = %d, rate = %d\n", + ap_info.mincpus, ap_info.maxcpus, autoplug_enabled, + ap_info.sampling_rate); + + for_each_possible_cpu(i) + per_cpu(cpu_adjusting, i) = 0; + delay = msecs_to_jiffies(ap_info.sampling_rate * 24); + INIT_DEFERRABLE_WORK(&ap_info.work, do_autoplug_timer); + schedule_delayed_work_on(0, &ap_info.work, delay); + + if (!autoplug_enabled) + cancel_delayed_work_sync(&ap_info.work); + + return ret; +} + +late_initcall(cpuautoplug_init); diff --git a/arch/sw_64/kernel/crash_dump.c b/arch/sw_64/kernel/crash_dump.c new file mode 100644 index 0000000000000000000000000000000000000000..4484673823b8e6065d9efb5f2299a21df67d421a --- /dev/null +++ b/arch/sw_64/kernel/crash_dump.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * arch/sw_64/kernel/crash_dump.c + * + * Copyright (C) 2019 JN + * Author: He Sheng + * + * This code is taken from arch/x86/kernel/crash_dump_64.c + * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) + * Copyright (C) IBM Corporation, 2004. All rights reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +/** + * copy_oldmem_page() - copy one page from old kernel memory + * @pfn: page frame number to be copied + * @buf: buffer where the copied page is placed + * @csize: number of bytes to copy + * @offset: offset in bytes into the page + * @userbuf: if set, @buf is int he user address space + * + * This function copies one page from old kernel memory into buffer pointed by + * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes + * copied or negative error in case of failure. + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, + int userbuf) +{ + void *vaddr; + + if (!csize) + return 0; + + vaddr = ioremap(__pfn_to_phys(pfn), PAGE_SIZE); + if (!vaddr) + return -ENOMEM; + + if (userbuf) { + if (copy_to_user(buf, vaddr + offset, csize)) { + iounmap(vaddr); + return -EFAULT; + } + } else { + memcpy(buf, vaddr + offset, csize); + } + + iounmap(vaddr); + return csize; +} diff --git a/arch/sw_64/kernel/entry-ftrace.S b/arch/sw_64/kernel/entry-ftrace.S new file mode 100644 index 0000000000000000000000000000000000000000..73e8e043fc9d14fbbaa50bb164fcc4326329001b --- /dev/null +++ b/arch/sw_64/kernel/entry-ftrace.S @@ -0,0 +1,326 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * arch/sw_64/kernel/entry-ftrace.S + * + * Author: linyue + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include +#include +#include + + .text + .set noat + .align 4 + +#define FTRACE_SP_OFF 0x50 + .macro mcount_enter + subl $sp, FTRACE_SP_OFF, $sp + stl $16, 0($sp) + stl $17, 0x8($sp) + stl $18, 0x10($sp) + stl $26, 0x18($sp) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + stl $9, 0x20($sp) +#endif + stl $28, 0x28($sp) + stl $29, 0x30($sp) + stl $19, 0x38($sp) + stl $20, 0x40($sp) + stl $21, 0x48($sp) + .endm + + .macro mcount_end + ldl $16, 0($sp) + ldl $17, 0x8($sp) + ldl $18, 0x10($sp) + ldl $26, 0x18($sp) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + ldl $9, 0x20($sp) +#endif + ldl $28, 0x28($sp) + ldl $29, 0x30($sp) + ldl $19, 0x38($sp) + ldl $20, 0x40($sp) + ldl $21, 0x48($sp) + addl $sp, FTRACE_SP_OFF, $sp + .endm + + .macro RESTORE_GRAPH_ARGS + ldi $16, 0x18($sp) /* &ra */ + bis $31, $9, $17 /* pc */ + #ifdef HAVE_FUNCTION_GRAPH_FP_TEST + bis $31, $15, $18 /* fp */ + #endif + .endm + + .macro SAVE_PT_REGS + ldi $sp, -PT_REGS_SIZE($sp) + stl $0, PT_REGS_R0($sp) + stl $1, PT_REGS_R1($sp) + stl $2, PT_REGS_R2($sp) + stl $3, PT_REGS_R3($sp) + stl $4, PT_REGS_R4($sp) + stl $5, PT_REGS_R5($sp) + stl $6, PT_REGS_R6($sp) + stl $7, PT_REGS_R7($sp) + stl $8, PT_REGS_R8($sp) + stl $9, PT_REGS_R9($sp) + stl $10, PT_REGS_R10($sp) + stl $11, PT_REGS_R11($sp) + stl $12, PT_REGS_R12($sp) + stl $13, PT_REGS_R13($sp) + stl $14, PT_REGS_R14($sp) + stl $15, PT_REGS_R15($sp) + stl $16, PT_REGS_R16($sp) + stl $17, PT_REGS_R17($sp) + stl $18, PT_REGS_R18($sp) + stl $19, PT_REGS_R19($sp) + stl $20, PT_REGS_R20($sp) + stl $21, PT_REGS_R21($sp) + stl $22, PT_REGS_R22($sp) + stl $23, PT_REGS_R23($sp) + stl $24, PT_REGS_R24($sp) + stl $25, PT_REGS_R25($sp) + stl $26, PT_REGS_R26($sp) + stl $27, PT_REGS_R27($sp) + stl $28, PT_REGS_R28($sp) + stl $29, PT_REGS_GP($sp) + ldi $0, PT_REGS_SIZE($sp) + stl $0, PT_REGS_SP($sp) + .endm + + .macro RESTORE_PT_REGS + ldl $0, PT_REGS_R0($sp) + ldl $1, PT_REGS_R1($sp) + ldl $2, PT_REGS_R2($sp) + ldl $3, PT_REGS_R3($sp) + ldl $4, PT_REGS_R4($sp) + ldl $5, PT_REGS_R5($sp) + ldl $6, PT_REGS_R6($sp) + ldl $7, PT_REGS_R7($sp) + ldl $8, PT_REGS_R8($sp) + ldl $9, PT_REGS_R9($sp) + ldl $10, PT_REGS_R10($sp) + ldl $11, PT_REGS_R11($sp) + ldl $12, PT_REGS_R12($sp) + ldl $13, PT_REGS_R13($sp) + ldl $14, PT_REGS_R14($sp) + ldl $15, PT_REGS_R15($sp) + ldl $16, PT_REGS_R16($sp) + ldl $17, PT_REGS_R17($sp) + ldl $18, PT_REGS_R18($sp) + ldl $19, PT_REGS_R19($sp) + ldl $20, PT_REGS_R20($sp) + ldl $21, PT_REGS_R21($sp) + ldl $22, PT_REGS_R22($sp) + ldl $23, PT_REGS_R23($sp) + ldl $24, PT_REGS_R24($sp) + ldl $25, PT_REGS_R25($sp) + ldl $26, PT_REGS_R26($sp) + ldl $27, PT_REGS_R27($sp) + ldl $28, PT_REGS_R28($sp) + ldl $29, PT_REGS_GP($sp) + ldi $sp, PT_REGS_SIZE($sp) + .endm + + .macro RESTORE_GRAPH_REG_ARGS + ldi $16, PT_REGS_R26($sp) + bis $31, $9, $17 +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + bis $31, $15, $18 +#endif + .endm + + /* save return value regs*/ + .macro save_return_regs + subl $sp, 0x8, $sp + stl $0, 0x0($sp) + .endm + + /* restore return value regs*/ + .macro restore_return_regs + ldl $0, 0x0($sp) + addl $sp, 0x8, $sp + .endm + + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * void ftrace_graph_caller(void) + * + * Called from ftrace_caller() or ftrace_regs_caller() when function_graph + * tracer is selected. + * This function prepare_ftrace_return() fakes ra's value on the call + * stack in order to intercept instrumented function's return path and + * run return_to_handler() later on its exit. + */ + +ENTRY(ftrace_graph_caller) + ldgp $29, 0($27) + ldi $sp, -16($sp) + stl $26, 0($sp) + stl $15, 8($sp) + bis $31, $sp, $15 + + ldi $27, prepare_ftrace_return +ftrace_graph_call: + .global ftrace_graph_call + /* + * Calling ftrace_enable/disable_ftrace_graph_caller would overwrite + * the nop below. + */ + nop /* nop, or call prepare_ftrace_return() */ + + ldl $26, 0($sp) + ldl $15, 8($sp) + ldi $sp, 16($sp) + ret $31, ($26), 1 +ENDPROC(ftrace_graph_caller) + +/* + * void return_to_handler(void) + * + * Run ftrace_return_to_handler() before going back to parent. + * @fp is checked against the value passed by ftrace_graph_caller() + * only when HAVE_FUNCTION_GRAPH_FP_TEST is enabled. + * + * It is run by "ret" instruction which does not modify $27, so it + * has to recaculate $27 before ldgp. + */ +ENTRY(return_to_handler) + br $27, 1f +1: ldgp $29, 0($27) + save_return_regs + bis $31, $15, $16 /* parent's fp */ + ldi $27, ftrace_return_to_handler + call $26, ($27) + bis $31, $0, $26 + restore_return_regs + ret $31, ($26), 1 +END(return_to_handler) + +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE + .global _mcount + .ent _mcount +_mcount: + ret $31, ($28), 1 + .end _mcount + + + .global ftrace_caller + .ent ftrace_caller +ftrace_caller: + mcount_enter + br $27, 1f +1: ldgp $29, 0($27) + + subl $28, MCOUNT_INSN_SIZE, $16 + bis $26, $31, $17 + ldl $18, function_trace_op + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* + * the graph tracer (specifically, prepare_ftrace_return) needs these + * arguments but for now the function tracer occupies the regs, so we + * save them in callee-saved regs to recover later. + */ + bis $31, $16, $9 +#endif + ldi $4, current_tracer + ldl $27, 0($4) + + .global ftrace_call +ftrace_call: /* tracer(pc, ra); */ + nop + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + RESTORE_GRAPH_ARGS + call ftrace_graph_caller +#endif + mcount_end + ret $31, ($28), 1 + .end ftrace_caller +#else /* !CONFIG_DYNAMIC_FTRACE */ + + .global _mcount + .ent _mcount +_mcount: + mcount_enter + br $27, 1f +1: ldgp $29, 0($27) + + ldl $27, ftrace_trace_function // if (ftrace_trace_function + ldi $5, ftrace_stub // != ftrace_stub) + cmpeq $27, $5, $6 // + bne $6, skip_ftrace + + subl $28, MCOUNT_INSN_SIZE, $16 // function's pc +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + bis $31, $16, $9 +#endif + bis $26, $31, $17 // function's ra (parent's pc) + call $26, ($27) // (*ftrace_trace_function)(pc, ra); + +skip_ftrace: +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + ldl $4, ftrace_graph_return // if ((ftrace_graph_return + cmpeq $4, $5, $6 // != ftrace_stub) + beq $6, 2f + ldl $4, ftrace_graph_entry // || (ftrace_graph_entry + ldi $5, ftrace_graph_entry_stub // != ftrace_graph_entry_stub)) + cmpeq $4, $5, $6 + bne $6, 3f +2: RESTORE_GRAPH_ARGS + call ftrace_graph_caller // ftrace_graph_caller(); +#endif +3: mcount_end + ret $31, ($28), 1 + .end _mcount + +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + .global ftrace_regs_caller + .ent ftrace_regs_caller +ftrace_regs_caller: + SAVE_PT_REGS + br $27, 1f +1: ldgp $29, 0($27) + + subl $28, MCOUNT_INSN_SIZE, $16 + bis $26, $31, $17 + ldi $4, function_trace_op + ldl $18, 0($4) + mov $sp, $19 + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + bis $31, $16, $9 +#endif + ldi $4, current_tracer + ldl $27, 0($4) + + .global ftrace_regs_call +ftrace_regs_call: + nop + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + RESTORE_GRAPH_REG_ARGS + call ftrace_graph_caller +#endif + RESTORE_PT_REGS + ret $31, ($28), 1 + .end ftrace_regs_caller +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ + + .global ftrace_stub + .ent ftrace_stub +ftrace_stub: + ret $31, ($26), 1 + .end ftrace_stub diff --git a/arch/sw_64/kernel/ftrace.c b/arch/sw_64/kernel/ftrace.c new file mode 100644 index 0000000000000000000000000000000000000000..fb25ffe3dbdaf4f26bf4389e63d37fd1aaaa754b --- /dev/null +++ b/arch/sw_64/kernel/ftrace.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Based on arch/arm64/kernel/ftrace.c + * + * Copyright (C) 2019 os kernel team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +#include + +#ifdef CONFIG_FUNCTION_TRACER +EXPORT_SYMBOL(_mcount); +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE + +#define TI_FTRACE_ADDR (offsetof(struct thread_info, dyn_ftrace_addr)) +#define TI_FTRACE_REGS_ADDR \ + (offsetof(struct thread_info, dyn_ftrace_regs_addr)) + +unsigned long current_tracer = (unsigned long)ftrace_stub; + +/* + * Replace a single instruction, which may be a branch or NOP. + */ +static int ftrace_modify_code(unsigned long pc, u32 new) +{ + if (sw64_insn_write((void *)pc, new)) + return -EPERM; + return 0; +} + +/* + * Replace tracer function in ftrace_caller() + */ +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long pc; + u32 new; + int ret; + + current_tracer = (unsigned long)func; + pc = (unsigned long)&ftrace_call; + new = SW64_CALL(R26, R27, 0); + ret = ftrace_modify_code(pc, new); + + if (!ret) { + pc = (unsigned long)&ftrace_regs_call; + new = SW64_CALL(R26, R27, 0); + ret = ftrace_modify_code(pc, new); + } + + return ret; +} + +/* + * Turn on the call to ftrace_caller() in instrumented function + */ +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned int insn[3]; + unsigned long pc = rec->ip + MCOUNT_LDGP_SIZE; + unsigned long offset; + + if (addr == FTRACE_ADDR) + offset = TI_FTRACE_ADDR; + else + offset = TI_FTRACE_REGS_ADDR; + + insn[0] = SW64_NOP; + /* ldl r28,(ftrace_addr_offset)(r8) */ + insn[1] = (0x23U << 26) | (28U << 21) | (8U << 16) | offset; + insn[2] = SW64_CALL(R28, R28, 0); + + /* replace the 3 mcount instructions at once */ + return copy_to_kernel_nofault((void *)pc, insn, 3 * SW64_INSN_SIZE); +} + +/* + * Turn off the call to ftrace_caller() in instrumented function + */ +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + unsigned long pc = rec->ip + MCOUNT_LDGP_SIZE; + unsigned int insn[3] = {SW64_NOP, SW64_NOP, SW64_NOP}; + + return copy_to_kernel_nofault((void *)pc, insn, 3 * SW64_INSN_SIZE); +} + +void arch_ftrace_update_code(int command) +{ + ftrace_modify_all_code(command); +} + +int __init ftrace_dyn_arch_init(void) +{ + struct thread_info *ti = task_thread_info(&init_task); + + ti->dyn_ftrace_addr = FTRACE_ADDR; + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + ti->dyn_ftrace_regs_addr = FTRACE_REGS_ADDR; +#endif + return 0; +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + return 0; +} +#endif + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * function_graph tracer expects ftrace_return_to_handler() to be called + * on the way back to parent. For this purpose, this function is called + * in _mcount() or ftrace_caller() to replace return address (*parent) on + * the call stack to return_to_handler. + * + * Note that @frame_pointer is used only for sanity check later. + */ +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long old; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + /* + * Note: + * No protection against faulting at *parent, which may be seen + * on other archs. It's unlikely on AArch64. + */ + old = *parent; + + if (!function_graph_enter(old, self_addr, frame_pointer, NULL)) + *parent = return_hooker; +} + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Turn on/off the call to ftrace_graph_caller() in ftrace_caller() + * depending on @enable. + */ +static int ftrace_modify_graph_caller(bool enable) +{ + unsigned long pc = (unsigned long)&ftrace_graph_call; + u32 new = SW64_NOP; + + if (enable) + new = SW64_CALL(R26, R27, 0); + return ftrace_modify_code(pc, new); +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/sw_64/kernel/hibernate.c b/arch/sw_64/kernel/hibernate.c new file mode 100644 index 0000000000000000000000000000000000000000..644ea85043136066c1129b059735d3feb7dc9f71 --- /dev/null +++ b/arch/sw_64/kernel/hibernate.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + +struct processor_state hibernate_state; +/* Defined in hibernate_asm.S */ +extern int restore_image(void); + +void save_processor_state(void) +{ + struct vcpucb *vcb = &(hibernate_state.vcb); + + vcb->ksp = rdksp(); + vcb->usp = rdusp(); + vcb->soft_tid = rtid(); + vcb->ptbr = rdptbr(); +} + +void restore_processor_state(void) +{ + struct vcpucb *vcb = &(hibernate_state.vcb); + + wrksp(vcb->ksp); + wrusp(vcb->usp); + wrtp(vcb->soft_tid); + wrptbr(vcb->ptbr); + sflush(); + tbiv(); +} + +int swsusp_arch_resume(void) +{ + restore_image(); + return 0; +} +/* References to section boundaries */ +extern const void __nosave_begin, __nosave_end; +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin)); + unsigned long nosave_end_pfn = PFN_UP(__pa(&__nosave_end)); + + return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); +} + +struct restore_data_record { + unsigned long magic; +}; + +#define RESTORE_MAGIC 0x0123456789ABCDEFUL + +/** + * arch_hibernation_header_save - populate the architecture specific part + * of a hibernation image header + * @addr: address to save the data at + */ +int arch_hibernation_header_save(void *addr, unsigned int max_size) +{ + struct restore_data_record *rdr = addr; + + if (max_size < sizeof(struct restore_data_record)) + return -EOVERFLOW; + rdr->magic = RESTORE_MAGIC; + return 0; +} + +/** + * arch_hibernation_header_restore - read the architecture specific data + * from the hibernation image header + * @addr: address to read the data from + */ +int arch_hibernation_header_restore(void *addr) +{ + struct restore_data_record *rdr = addr; + + return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL; +} diff --git a/arch/sw_64/kernel/hibernate_asm.S b/arch/sw_64/kernel/hibernate_asm.S new file mode 100644 index 0000000000000000000000000000000000000000..ff997cd76c5aef4bb9fa2eaaced2f57c21a0c631 --- /dev/null +++ b/arch/sw_64/kernel/hibernate_asm.S @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include + + .text + .set noat +ENTRY(swsusp_arch_suspend) + ldi $16, hibernate_state + ldi $1, PSTATE_REGS($16) + stl $9, CALLEE_R9($1) + stl $10, CALLEE_R10($1) + stl $11, CALLEE_R11($1) + stl $12, CALLEE_R12($1) + stl $13, CALLEE_R13($1) + stl $14, CALLEE_R14($1) + stl $15, CALLEE_R15($1) + stl $26, CALLEE_RA($1) + /* SIMD-FP */ + ldi $1, PSTATE_FPREGS($16) + vstd $f2, CALLEE_F2($1) + vstd $f3, CALLEE_F3($1) + vstd $f4, CALLEE_F4($1) + vstd $f5, CALLEE_F5($1) + vstd $f6, CALLEE_F6($1) + vstd $f7, CALLEE_F7($1) + vstd $f8, CALLEE_F8($1) + vstd $f9, CALLEE_F9($1) + rfpcr $f0 + fstd $f0, PSTATE_FPCR($16) + + stl $8, PSTATE_KTP($16) + stl sp, PSTATE_SP($16) + call swsusp_save + ldi $16, hibernate_state + ldi $1, PSTATE_REGS($16) + ldl $26, CALLEE_RA($1) + + /* save current_thread_info()->pcbb */ + ret +END(swsusp_arch_suspend) + +ENTRY(restore_image) + /* prepare to copy image data to their original locations */ + ldi t0, restore_pblist + ldl t0, 0(t0) +$loop: + beq t0, $done + + /* get addresses from the pbe and copy the page */ + ldl t1, PBE_ADDR(t0) /* source */ + ldl t2, PBE_ORIG_ADDR(t0) /* destination */ + ldi t3, PAGE_SIZE + addl t1, t3, t3 +$cpyloop: + ldl t8, 0(t1) + stl t8, 0(t2) + addl t1, 8, t1 + addl t2, 8, t2 + cmpeq t1, t3, t4 + beq t4, $cpyloop + + /* progress to the next pbe */ + ldl t0, PBE_NEXT(t0) + bne t0, $loop +$done: + + /* tell the hibernation core that we've just restored the memory */ + ldi $0, in_suspend + stl $31, 0($0) + + ldi $16, hibernate_state + ldi $1, PSTATE_REGS($16) + + ldl $9, CALLEE_R9($1) + ldl $10, CALLEE_R10($1) + ldl $11, CALLEE_R11($1) + ldl $12, CALLEE_R12($1) + ldl $13, CALLEE_R13($1) + ldl $14, CALLEE_R14($1) + ldl $15, CALLEE_R15($1) + ldl $26, CALLEE_RA($1) + /* SIMD-FP */ + fldd $f0, PSTATE_FPCR($16) + wfpcr $f0 + fimovd $f0, $2 + and $2, 0x3, $2 + beq $2, $hibernate_setfpec_0 + subl $2, 0x1, $2 + beq $2, $hibernate_setfpec_1 + subl $2, 0x1, $2 + beq $2, $hibernate_setfpec_2 + setfpec3 + br $hibernate_setfpec_over +$hibernate_setfpec_0: + setfpec0 + br $hibernate_setfpec_over +$hibernate_setfpec_1: + setfpec1 + br $hibernate_setfpec_over +$hibernate_setfpec_2: + setfpec2 +$hibernate_setfpec_over: + ldi $1, PSTATE_FPREGS($16) + vldd $f2, CALLEE_F2($1) + vldd $f3, CALLEE_F3($1) + vldd $f4, CALLEE_F4($1) + vldd $f5, CALLEE_F5($1) + vldd $f6, CALLEE_F6($1) + vldd $f7, CALLEE_F7($1) + vldd $f8, CALLEE_F8($1) + vldd $f9, CALLEE_F9($1) + + ldl sp, PSTATE_SP($16) + ldl $8, PSTATE_KTP($16) + sys_call HMC_wrktp + + ldi $0, 0($31) + + ret +END(restore_image) diff --git a/arch/sw_64/kernel/insn.c b/arch/sw_64/kernel/insn.c new file mode 100644 index 0000000000000000000000000000000000000000..281578e1bfc03b708be124e0e3d28644d811b512 --- /dev/null +++ b/arch/sw_64/kernel/insn.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019, serveros, linyue + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include +#include + +//static DEFINE_RAW_SPINLOCK(patch_lock); + +int __kprobes sw64_insn_read(void *addr, u32 *insnp) +{ + int ret; + __le32 val; + + ret = copy_from_kernel_nofault(&val, addr, SW64_INSN_SIZE); + if (!ret) + *insnp = le32_to_cpu(val); + + return ret; +} + +static int __kprobes __sw64_insn_write(void *addr, __le32 insn) +{ + void *waddr = addr; + int ret; + + //raw_spin_lock_irqsave(&patch_lock, flags); + + ret = copy_to_kernel_nofault(waddr, &insn, SW64_INSN_SIZE); + + //raw_spin_unlock_irqrestore(&patch_lock, flags); + + return ret; +} + +static int __kprobes __sw64_insn_double_write(void *addr, __le64 insn) +{ + void *waddr = addr; + //unsigned long flags = 0; + int ret; + + //raw_spin_lock_irqsave(&patch_lock, flags); + + ret = copy_to_kernel_nofault(waddr, &insn, 2 * SW64_INSN_SIZE); + + //raw_spin_unlock_irqrestore(&patch_lock, flags); + + return ret; +} + +int __kprobes sw64_insn_write(void *addr, u32 insn) +{ + u32 *tp = addr; + /* SW64 instructions must be word aligned */ + if ((uintptr_t)tp & 0x3) + return -EINVAL; + return __sw64_insn_write(addr, cpu_to_le32(insn)); +} + +int __kprobes sw64_insn_double_write(void *addr, u64 insn) +{ + u32 *tp = addr; + /* SW64 instructions must be word aligned */ + if ((uintptr_t)tp & 0x3) + return -EINVAL; + return __sw64_insn_double_write(addr, cpu_to_le64(insn)); +} +unsigned int __kprobes sw64_insn_nop(void) +{ + return SW64_BIS(R31, R31, R31); +} + +unsigned int __kprobes sw64_insn_call(unsigned int ra, unsigned int rb) +{ + return SW64_CALL(ra, rb, 0); +} + +unsigned int __kprobes sw64_insn_sys_call(unsigned int num) +{ + return SW64_SYS_CALL(num); +} + +/* 'pc' is the address of br instruction, not the +4 PC. 'new_pc' is the target address. */ +unsigned int __kprobes sw64_insn_br(unsigned int ra, unsigned long pc, unsigned long new_pc) +{ + int offset = new_pc - pc; + unsigned int disp, minus = 0x1fffff; + + if (!(offset <= BR_MAX_DISP && offset >= -BR_MAX_DISP)) + return -1; + if (offset > 0) + disp = (offset - 4) / 4; + else + disp = ~(-offset / 4) & minus; + + return SW64_BR(ra, disp); + +} diff --git a/arch/sw_64/kernel/jump_label.c b/arch/sw_64/kernel/jump_label.c new file mode 100644 index 0000000000000000000000000000000000000000..f3bc40370e4de9b77889343338b509d6bdcad8c6 --- /dev/null +++ b/arch/sw_64/kernel/jump_label.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#include +#include + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + u32 *insnp = (u32 *)entry->code; + u32 insn; + + if (type == JUMP_LABEL_JMP) { + insn = sw64_insn_br(R31, (entry->code), entry->target); + BUG_ON(insn == -1); + } else { + insn = sw64_insn_nop(); + } + + *insnp = insn; + + flush_icache_range(entry->code, entry->code + SW64_INSN_SIZE); +} + +void arch_jump_label_transform_static(struct jump_entry *entry, + enum jump_label_type type) +{ + /* + * no need to rewrite NOP + */ +} diff --git a/arch/sw_64/kernel/kgdb.c b/arch/sw_64/kernel/kgdb.c new file mode 100644 index 0000000000000000000000000000000000000000..833f72a1577ca8f2d2f01113c0443739a9a9c025 --- /dev/null +++ b/arch/sw_64/kernel/kgdb.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * sw64 KGDB support + * + * Based on arch/arm64/kernel/kgdb.c + * + * Copyright (C) Xia Bin + * Author: Xia Bin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { + { "r0", 8, offsetof(struct pt_regs, regs[0])}, + { "r1", 8, offsetof(struct pt_regs, regs[1])}, + { "r2", 8, offsetof(struct pt_regs, regs[2])}, + { "r3", 8, offsetof(struct pt_regs, regs[3])}, + { "r4", 8, offsetof(struct pt_regs, regs[4])}, + { "r5", 8, offsetof(struct pt_regs, regs[5])}, + { "r6", 8, offsetof(struct pt_regs, regs[6])}, + { "r7", 8, offsetof(struct pt_regs, regs[7])}, + { "r8", 8, offsetof(struct pt_regs, regs[8])}, + + { "r9", 8, offsetof(struct pt_regs, regs[9])}, + { "r10", 8, offsetof(struct pt_regs, regs[10])}, + { "r11", 8, offsetof(struct pt_regs, regs[11])}, + { "r12", 8, offsetof(struct pt_regs, regs[12])}, + { "r13", 8, offsetof(struct pt_regs, regs[13])}, + { "r14", 8, offsetof(struct pt_regs, regs[14])}, + { "r15", 8, offsetof(struct pt_regs, regs[15])}, + + { "r16", 8, offsetof(struct pt_regs, regs[16])}, + { "r17", 8, offsetof(struct pt_regs, regs[17])}, + { "r18", 8, offsetof(struct pt_regs, regs[18])}, + + { "r19", 8, offsetof(struct pt_regs, regs[19])}, + { "r20", 8, offsetof(struct pt_regs, regs[20])}, + { "r21", 8, offsetof(struct pt_regs, regs[21])}, + { "r22", 8, offsetof(struct pt_regs, regs[22])}, + { "r23", 8, offsetof(struct pt_regs, regs[23])}, + { "r24", 8, offsetof(struct pt_regs, regs[24])}, + { "r25", 8, offsetof(struct pt_regs, regs[25])}, + { "r26", 8, offsetof(struct pt_regs, regs[26])}, + { "r27", 8, offsetof(struct pt_regs, regs[27])}, + { "at", 8, offsetof(struct pt_regs, regs[28])}, + { "gp", 8, offsetof(struct pt_regs, regs[29])}, + { "sp", 8, offsetof(struct pt_regs, regs[30])}, + { "zero", 8, -1 }, + + { "f0", 8, -1 }, + { "f1", 8, -1 }, + { "f2", 8, -1 }, + { "f3", 8, -1 }, + { "f4", 8, -1 }, + { "f5", 8, -1 }, + { "f6", 8, -1 }, + { "f7", 8, -1 }, + { "f8", 8, -1 }, + { "f9", 8, -1 }, + { "f10", 8, -1 }, + { "f11", 8, -1 }, + { "f12", 8, -1 }, + { "f13", 8, -1 }, + { "f14", 8, -1 }, + { "f15", 8, -1 }, + { "f16", 8, -1 }, + { "f17", 8, -1 }, + { "f18", 8, -1 }, + { "f19", 8, -1 }, + { "f20", 8, -1 }, + { "f21", 8, -1 }, + { "f22", 8, -1 }, + { "f23", 8, -1 }, + { "f24", 8, -1 }, + { "f25", 8, -1 }, + { "f26", 8, -1 }, + { "f27", 8, -1 }, + { "f28", 8, -1 }, + { "f29", 8, -1 }, + { "f30", 8, -1 }, + { "fpcr", 8, -1 }, + + { "pc", 8, offsetof(struct pt_regs, pc)}, + { "", 8, -1 }, + { "tp", 8, -1}, +}; + +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return NULL; + + if (dbg_reg_def[regno].offset != -1) + memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, + dbg_reg_def[regno].size); + else + memset(mem, 0, dbg_reg_def[regno].size); + return dbg_reg_def[regno].name; +} + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return -EINVAL; + + if (dbg_reg_def[regno].offset != -1) + memcpy((void *)regs + dbg_reg_def[regno].offset, mem, + dbg_reg_def[regno].size); + return 0; +} + +void +sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) +{ + int i; + /* Initialize to zero */ + memset((char *)gdb_regs, 0, NUMREGBYTES); + for (i = 0; i < DBG_MAX_REG_NUM; i++) + gdb_regs[i] = get_reg(task, i); +} + +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ + pr_info("BEFORE SET PC WITH %lx\n", pc); + instruction_pointer(regs) = pc; + pr_info("AFTER SET PC IS %lx\n", instruction_pointer(regs)); +} + +void kgdb_call_nmi_hook(void *ignored) +{ + kgdb_nmicallback(raw_smp_processor_id(), NULL); +} + +void kgdb_roundup_cpus(void) +{ + local_irq_enable(); + smp_call_function(kgdb_call_nmi_hook, NULL, 0); + local_irq_disable(); +} + +int kgdb_arch_handle_exception(int exception_vector, int signo, + int err_code, char *remcom_in_buffer, + char *remcom_out_buffer, + struct pt_regs *linux_regs) +{ + char *ptr; + unsigned long address = -1; + + switch (remcom_in_buffer[0]) { + case 'c': + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &address)) + kgdb_arch_set_pc(linux_regs, address); + return 0; + } + return -1; +} + +static int __kgdb_notify(struct die_args *args, unsigned long cmd) +{ + struct pt_regs *regs = args->regs; + + /* Userspace events, ignore. */ + if (user_mode(regs)) + return NOTIFY_DONE; + + if (kgdb_handle_exception(1, args->signr, cmd, regs)) + return NOTIFY_DONE; + + return NOTIFY_STOP; +} + +static int +kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = __kgdb_notify(ptr, cmd); + local_irq_restore(flags); + + return ret; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_notify, +}; + +/* + * kgdb_arch_init - Perform any architecture specific initalization. + * This function will handle the initalization of any architecture + * specific callbacks. + */ +int kgdb_arch_init(void) +{ + int ret = register_die_notifier(&kgdb_notifier); + + if (ret != 0) + return ret; + return 0; +} + +/* + * kgdb_arch_exit - Perform any architecture specific uninitalization. + * This function will handle the uninitalization of any architecture + * specific callbacks, for dynamic registration and unregistration. + */ +void kgdb_arch_exit(void) +{ + unregister_die_notifier(&kgdb_notifier); +} + +/* + * sw64 instructions are always in LE. + * Break instruction is encoded in LE format + */ +const struct kgdb_arch arch_kgdb_ops = { + .gdb_bpt_instr = {0x80, 00, 00, 00} +}; diff --git a/arch/sw_64/kernel/kprobes/Makefile b/arch/sw_64/kernel/kprobes/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..110ba2bf7752361442022553269447ceb802d465 --- /dev/null +++ b/arch/sw_64/kernel/kprobes/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o +obj-$(CONFIG_KPROBES_ON_FTRACE) += kprobes-ftrace.o diff --git a/arch/sw_64/kernel/kprobes/common.h b/arch/sw_64/kernel/kprobes/common.h new file mode 100644 index 0000000000000000000000000000000000000000..de10058f0376ea342c973e0e03a8ef1bd9faa72c --- /dev/null +++ b/arch/sw_64/kernel/kprobes/common.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SW64_KERNEL_KPROBES_COMMON_H +#define _SW64_KERNEL_KPROBES_COMMON_H + + +extern bool sw64_insn_can_kprobe(kprobe_opcode_t *addr); + + +#endif /* _SW64_KERNEL_KPROBES_COMMON_H */ diff --git a/arch/sw_64/kernel/kprobes/decode-insn.c b/arch/sw_64/kernel/kprobes/decode-insn.c new file mode 100644 index 0000000000000000000000000000000000000000..91c31111f2b73273d186d6b0c1cb9961e12dd68a --- /dev/null +++ b/arch/sw_64/kernel/kprobes/decode-insn.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Based on arch/arm64/kernel/probes/decode-insn.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include + +#include "common.h" + +static bool __kprobes sw64_insn_is_steppable(u32 insn) +{ + /* + * Branch instructions will write a new value into the PC which is + * likely to be relative to the XOL address and therefore invalid. + * Deliberate generation of an exception during stepping is also not + * currently safe. Lastly, MSR instructions can do any number of nasty + * things we can't handle during single-stepping. + */ + if (sw64_insn_is_sys_call_b(insn) || + sw64_insn_is_sys_call(insn) || + sw64_insn_is_call(insn) || + sw64_insn_is_ret(insn) || + sw64_insn_is_jmp(insn) || + sw64_insn_is_br(insn) || + sw64_insn_is_bsr(insn) || + sw64_insn_is_memb(insn) || + sw64_insn_is_imemb(insn) || + sw64_insn_is_rtc(insn) || + sw64_insn_is_lldl(insn) || + sw64_insn_is_lldw(insn) || + sw64_insn_is_beq(insn) || + sw64_insn_is_bne(insn) || + sw64_insn_is_blt(insn) || + sw64_insn_is_ble(insn) || + sw64_insn_is_bgt(insn) || + sw64_insn_is_bge(insn) || + sw64_insn_is_blbc(insn) || + sw64_insn_is_blbs(insn) || + sw64_insn_is_fbeq(insn) || + sw64_insn_is_fbne(insn) || + sw64_insn_is_fblt(insn) || + sw64_insn_is_fble(insn) || + sw64_insn_is_fbgt(insn) || + sw64_insn_is_fbge(insn)) + return false; + + return true; +} + + +#ifdef CONFIG_KPROBES +// lldl rd_f +static bool __kprobes is_probed_between_atomic(kprobe_opcode_t *addr) +{ + int count = 0; + unsigned long size = 0, offset = 0; + kprobe_opcode_t *scan_start = NULL; + + if (kallsyms_lookup_size_offset((unsigned long)addr, &size, &offset)) + scan_start = addr - (offset / sizeof(kprobe_opcode_t)); + + while (scan_start < addr) { + if (sw64_insn_is_lldl(le32_to_cpu(*scan_start)) || + sw64_insn_is_lldw(le32_to_cpu(*scan_start))) + count++; + if (sw64_insn_is_rd_f(le32_to_cpu(*scan_start))) + count--; + scan_start++; + } + if (count) + return false; + + return true; +} + +bool __kprobes sw64_insn_can_kprobe(kprobe_opcode_t *addr) +{ + u32 insn = le32_to_cpu(*addr); + + if (!sw64_insn_is_steppable(insn)) { + pr_warn("addr is not steppable\n"); + return false; + } +#ifdef CONFIG_SUBARCH_C3B + if (!is_probed_between_atomic(addr)) { + pr_warn("addr between atomic can't probe\n"); + return false; + } +#endif + return true; +} +#endif diff --git a/arch/sw_64/kernel/kprobes/kprobes-ftrace.c b/arch/sw_64/kernel/kprobes/kprobes-ftrace.c new file mode 100644 index 0000000000000000000000000000000000000000..89d7dba9dc25c7938019f8362ef461ab2607b4c6 --- /dev/null +++ b/arch/sw_64/kernel/kprobes/kprobes-ftrace.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Dynamic Ftrace based Kprobes Optimization + */ + +#include +#include +#include +#include +#include + +/* Ftrace callback handler for kprobes */ +void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + struct kprobe *p; + struct kprobe_ctlblk *kcb; + + p = get_kprobe((kprobe_opcode_t *)ip); + if (unlikely(!p) || kprobe_disabled(p)) + return; + + kcb = get_kprobe_ctlblk(); + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + } else { + regs->regs[28] -= MCOUNT_INSN_SIZE; + + __this_cpu_write(current_kprobe, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (!p->pre_handler || !p->pre_handler(p, regs)) { + regs->regs[28] += MCOUNT_INSN_SIZE; + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + } + __this_cpu_write(current_kprobe, NULL); + } +} +NOKPROBE_SYMBOL(kprobe_ftrace_handler); + +int arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + p->ainsn.boostable = -1; + return 0; +} diff --git a/arch/sw_64/kernel/kprobes/kprobes.c b/arch/sw_64/kernel/kprobes/kprobes.c new file mode 100644 index 0000000000000000000000000000000000000000..024ce7d99e61688b7b95c5120e9432a030c65735 --- /dev/null +++ b/arch/sw_64/kernel/kprobes/kprobes.c @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Kernel Probes (KProbes) + * arch/sw_64/kernel/kprobes.c + */ + +#include +#include +#include + +#include "common.h" + +static u32 breakpoint_insn = BREAK_KPROBE; +static u32 breakpoint2_insn = BREAK_KPROBE_SS; + +int post_kprobe_handler(struct pt_regs *regs); + +DEFINE_PER_CPU(struct kprobe *, current_kprobe); +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + int ret = 0; + extern char __start_rodata[]; + extern char __end_rodata[]; + unsigned long probe_addr = (unsigned long)p->addr; + + if (probe_addr & 0x3) + return -EINVAL; + + if (!sw64_insn_can_kprobe(p->addr)) + return -EINVAL; + /* copy instruction */ + p->opcode = le32_to_cpu(*p->addr); + + + if (probe_addr >= (unsigned long) __start_rodata && + probe_addr <= (unsigned long) __end_rodata) + return -EINVAL; + + + /* insn: must be on special executable page on mips. */ + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) { + ret = -ENOMEM; + goto out; + } + /* + * In the kprobe->ainsn.insn[] array we store the original + * instruction at index zero and a break trap instruction at + * index one. + */ + p->ainsn.insn[0] = p->opcode; + p->ainsn.insn[1] = breakpoint2_insn; +out: + return ret; +} + +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + sw64_insn_write(p->addr, breakpoint_insn); + flush_insn_slot(p); +} + +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + sw64_insn_write(p->addr, p->opcode); + flush_insn_slot(p); +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; + } +} + +static void save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; +} + +static void restore_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); + kcb->kprobe_status = kcb->prev_kprobe.status; +} + +static void __kprobes set_current_kprobe(struct kprobe *p) +{ + __this_cpu_write(current_kprobe, p); +} + + +static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb, int reenter) +{ + if (reenter) { + save_previous_kprobe(kcb); + set_current_kprobe(p); + kcb->kprobe_status = KPROBE_REENTER; + } else { + kcb->kprobe_status = KPROBE_HIT_SS; + } + + /* insn simulation */ + kcb->target_pc = regs->pc; + regs->pc = (unsigned long)&p->ainsn.insn[0]; +} + +static int __kprobes reenter_kprobe(struct kprobe *p, + struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + switch (kcb->kprobe_status) { + case KPROBE_HIT_SSDONE: + case KPROBE_HIT_ACTIVE: + kprobes_inc_nmissed_count(p); + setup_singlestep(p, regs, kcb, 1); + break; + case KPROBE_HIT_SS: + case KPROBE_REENTER: + pr_warn("Unrecoverable kprobe detected.\n"); + dump_kprobe(p); + BUG(); + break; + default: + WARN_ON(1); + return 0; + } + return 1; +} + +int __kprobes kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *p; + struct kprobe_ctlblk *kcb; + unsigned long addr = instruction_pointer(regs); + + if (user_mode(regs)) + return 0; + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ + preempt_disable(); + kcb = get_kprobe_ctlblk(); + p = get_kprobe((kprobe_opcode_t *)(addr - 4)); + + if (p) { + if (kprobe_running()) { + if (reenter_kprobe(p, regs, kcb)) + return 1; + } else { + set_current_kprobe(p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + + /* + * If we have no pre-handler or it returned 0, we + * continue with normal processing. If we have a + * pre-handler and it returned non-zero, that means + * user handler setup registers to exit to another + * instruction, we must skip the single stepping. + */ + if (!p->pre_handler || !p->pre_handler(p, regs)) + setup_singlestep(p, regs, kcb, 0); + else + reset_current_kprobe(); + return 1; + } + } + return 0; + +} +int __kprobes post_kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (!cur) + return 0; + + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); + } + + // resume_execution(cur, regs, kcb); + regs->pc = kcb->target_pc; + + + /* Restore back the original saved kprobes variables and continue. */ + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); + goto out; + } + reset_current_kprobe(); +out: + preempt_enable_no_resched(); + + return 1; +} + +int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned long mmcsr) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (kcb->kprobe_status & KPROBE_HIT_SS) { + regs->pc = kcb->target_pc; + + reset_current_kprobe(); + preempt_enable_no_resched(); + } + return 0; +} + +/* + * Wrapper routine for handling exceptions. + */ +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + switch (val) { + case DIE_BREAK: + if (kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + case DIE_SSTEPBP: + if (post_kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + default: + break; + } + return ret; +} +/* + * Function return probe trampoline: + * - init_kprobes() establishes a probepoint here + * - When the probed function returns, this probe causes the + * handlers to fire + */ +static void __used kretprobe_trampoline_holder(void) +{ + asm volatile( + /* Keep the assembler from reordering and placing JR here. */ + ".set noreorder\n\t" + "nop\n\t" + ".global __kretprobe_trampoline\n" + "__kretprobe_trampoline:\n\t" + "nop\n\t" + : : : "memory"); +} + +void __kretprobe_trampoline(void); + +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + ri->ret_addr = (kprobe_opcode_t *) regs->regs[26]; + ri->fp = NULL; + + /* Replace the return addr with trampoline addr */ + regs->regs[26] = (unsigned long)__kretprobe_trampoline; +} + +/* + * Called when the probe at kretprobe trampoline is hit + */ +static int __kprobes trampoline_probe_handler(struct kprobe *p, + struct pt_regs *regs) +{ + unsigned long orig_ret_address; + + orig_ret_address = __kretprobe_trampoline_handler(regs, NULL); + instruction_pointer(regs) = orig_ret_address; + regs->regs[26] = orig_ret_address; + + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) + */ + return 1; +} + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + if (p->addr == (kprobe_opcode_t *)__kretprobe_trampoline) + return 1; + + return 0; +} + +static struct kprobe trampoline_p = { + .addr = (kprobe_opcode_t *)__kretprobe_trampoline, + .pre_handler = trampoline_probe_handler +}; + +int __init arch_init_kprobes(void) +{ + return register_kprobe(&trampoline_p); +} diff --git a/arch/sw_64/kernel/machine_kexec.c b/arch/sw_64/kernel/machine_kexec.c new file mode 100644 index 0000000000000000000000000000000000000000..950998476cdaced4b7368cb4712a1d7081e11047 --- /dev/null +++ b/arch/sw_64/kernel/machine_kexec.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * machine_kexec.c for kexec + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ +#include +#include +#include +#include +#include + +#include + +extern void *kexec_control_page; +extern const unsigned char relocate_new_kernel[]; +extern const size_t relocate_new_kernel_size; + +extern unsigned long kexec_start_address; +extern unsigned long kexec_indirection_page; + +static atomic_t waiting_for_crash_ipi; + +#ifdef CONFIG_SMP +extern struct smp_rcb_struct *smp_rcb; + +/* + * Wait for relocation code is prepared and send + * secondary CPUs to spin until kernel is relocated. + */ +static void kexec_smp_down(void *ignored) +{ + int cpu = smp_processor_id(); + + local_irq_disable(); + while (READ_ONCE(smp_rcb->ready) != 0) + mdelay(1); + set_cpu_online(cpu, false); + reset_cpu(cpu); +} +#endif + +int machine_kexec_prepare(struct kimage *kimage) +{ + return 0; +} + +void machine_kexec_cleanup(struct kimage *kimage) +{ +} + +void machine_shutdown(void) +{ +#ifdef CONFIG_SMP + WRITE_ONCE(smp_rcb->ready, 0); + smp_call_function(kexec_smp_down, NULL, 0); + smp_wmb(); + while (num_online_cpus() > 1) { + cpu_relax(); + mdelay(1); + } +#endif +} + +#ifdef CONFIG_SMP +static void machine_crash_nonpanic_core(void *unused) +{ + int cpu; + struct pt_regs regs; + + cpu = smp_processor_id(); + + local_irq_disable(); + crash_setup_regs(®s, NULL); + pr_debug("CPU %u will stop doing anything useful since another CPU has crashed\n", cpu); + crash_save_cpu(®s, cpu); + flush_cache_all(); + + set_cpu_online(cpu, false); + atomic_dec(&waiting_for_crash_ipi); + while (READ_ONCE(smp_rcb->ready) != 0) + mdelay(1); + if (cpu != 0) + reset_cpu(cpu); + else + machine_kexec(kexec_crash_image); +} +#else +static inline void machine_crash_nonpanic_core(void *unused) { } +#endif + +static void machine_kexec_mask_interrupts(void) +{ + unsigned int i; + struct irq_desc *desc; + + for_each_irq_desc(i, desc) { + struct irq_chip *chip; + + chip = irq_desc_get_chip(desc); + if (!chip) + continue; + + if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) + chip->irq_eoi(&desc->irq_data); + + if (chip->irq_mask) + chip->irq_mask(&desc->irq_data); + + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) + chip->irq_disable(&desc->irq_data); + } +} + +void machine_crash_shutdown(struct pt_regs *regs) +{ + int cpu; + unsigned long msecs; + + cpu = smp_processor_id(); + local_irq_disable(); + kernel_restart_prepare(NULL); + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + smp_call_function(machine_crash_nonpanic_core, NULL, false); + msecs = 1000; /* Wait at most a second for the other cpus to stop */ + while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { + mdelay(1); + msecs--; + } + if (atomic_read(&waiting_for_crash_ipi) > 0) + pr_warn("Non-crashing CPUs did not react to IPI\n"); + + crash_save_cpu(regs, cpu); + machine_kexec_mask_interrupts(); + pr_info("Loading crashdump kernel...\n"); +#ifdef CONFIG_SMP + WRITE_ONCE(smp_rcb->ready, 0); + if (cpu != 0) + reset_cpu(cpu); +#endif +} + +#define phys_to_ktext(pa) (__START_KERNEL_map + (pa)) + +typedef void (*noretfun_t)(void) __noreturn; + +void machine_kexec(struct kimage *image) +{ + void *reboot_code_buffer; + unsigned long entry; + unsigned long *ptr; + struct boot_params *params = sunway_boot_params; + + + reboot_code_buffer = kexec_control_page; + pr_info("reboot_code_buffer = %px\n", reboot_code_buffer); + kexec_start_address = phys_to_ktext(image->start); + pr_info("kexec_start_address = %#lx\n", kexec_start_address); + if (image->type == KEXEC_TYPE_DEFAULT) + kexec_indirection_page = + (unsigned long) phys_to_virt(image->head & PAGE_MASK); + else + kexec_indirection_page = (unsigned long)&image->head; + + pr_info("kexec_indirection_page = %#lx, image->head=%#lx\n", + kexec_indirection_page, image->head); + + params->cmdline = kexec_start_address - COMMAND_LINE_OFF; + params->initrd_start = *(__u64 *)(kexec_start_address - INITRD_START_OFF); + params->initrd_size = *(__u64 *)(kexec_start_address - INITRD_SIZE_OFF); + + pr_info("initrd_start = %#llx, initrd_size = %#llx\n" + "dtb_start = %#llx, efi_systab = %#llx\n" + "efi_memmap = %#llx, efi_memmap_size = %#llx\n" + "efi_memdesc_size = %#llx, efi_memdesc_version = %#llx\n" + "cmdline = %#llx\n", + params->initrd_start, params->initrd_size, + params->dtb_start, params->efi_systab, + params->efi_memmap, params->efi_memmap_size, + params->efi_memdesc_size, params->efi_memdesc_version, + params->cmdline); + + memcpy(reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size); + + /* + * The generic kexec code builds a page list with physical + * addresses. they are directly accessible through KSEG0 (or + * CKSEG0 or XPHYS if on 64bit system), hence the + * phys_to_virt() call. + */ + for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); + ptr = (entry & IND_INDIRECTION) ? + phys_to_virt(entry & PAGE_MASK) : ptr + 1) { + if (*ptr & IND_SOURCE || *ptr & IND_INDIRECTION || + *ptr & IND_DESTINATION) + *ptr = (unsigned long) phys_to_virt(*ptr); + } + + /* + * we do not want to be bothered. + */ + local_irq_disable(); + + pr_info("Will call new kernel at %08lx\n", image->start); + pr_info("Bye ...\n"); + smp_wmb(); + ((noretfun_t) reboot_code_buffer)(); +} diff --git a/arch/sw_64/kernel/perf_event.c b/arch/sw_64/kernel/perf_event.c new file mode 100644 index 0000000000000000000000000000000000000000..83bb051be9de4767779d8783b31c8eda4277caa2 --- /dev/null +++ b/arch/sw_64/kernel/perf_event.c @@ -0,0 +1,787 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Performance events support for SW64 platforms. + * + * This code is based upon riscv and sparc perf event code. + */ + +#include +#include + +/* For tracking PMCs and the hw events they monitor on each CPU. */ +struct cpu_hw_events { + /* + * Set the bit (indexed by the counter number) when the counter + * is used for an event. + */ + unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; + /* Array of events current scheduled on this cpu. */ + struct perf_event *event[MAX_HWEVENTS]; +}; + +DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); + +struct sw64_perf_event { + /* pmu index */ + int counter; + /* events selector */ + int event; +}; + +/* + * A structure to hold the description of the PMCs available on a particular + * type of SW64 CPU. + */ +struct sw64_pmu_t { + /* generic hw/cache events table */ + const struct sw64_perf_event *hw_events; + const struct sw64_perf_event (*cache_events)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + + /* method used to map hw/cache events */ + const struct sw64_perf_event *(*map_hw_event)(u64 config); + const struct sw64_perf_event *(*map_cache_event)(u64 config); + + /* The number of entries in the hw_event_map */ + int max_events; + + /* The number of counters on this pmu */ + int num_pmcs; + + /* + * All PMC counters reside in the IBOX register PCTR. This is the + * LSB of the counter. + */ + int pmc_count_shift[MAX_HWEVENTS]; + + /* + * The mask that isolates the PMC bits when the LSB of the counter + * is shifted to bit 0. + */ + unsigned long pmc_count_mask; + + /* The maximum period the PMC can count. */ + unsigned long pmc_max_period; + + /* + * The maximum value that may be written to the counter due to + * hardware restrictions is pmc_max_period - pmc_left. + */ + long pmc_left; + + /* Subroutine for checking validity of a raw event for this PMU. */ + bool (*raw_event_valid)(u64 config); +}; + +/* + * The SW64 PMU description currently in operation. This is set during + * the boot process to the specific CPU of the machine. + */ +static const struct sw64_pmu_t *sw64_pmu; + +/* + * SW64 PMC event types + * + * There is no one-to-one mapping of the possible hw event types to the + * actual codes that are used to program the PMCs hence we introduce our + * own hw event type identifiers. + */ +#define SW64_OP_UNSUP {-1, -1} + +/* Mapping of the hw event types to the perf tool interface */ +static const struct sw64_perf_event core3_hw_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = {PMC_PC0, PC0_CPU_CYCLES}, + [PERF_COUNT_HW_INSTRUCTIONS] = {PMC_PC0, PC0_INSTRUCTIONS}, + [PERF_COUNT_HW_CACHE_REFERENCES] = {PMC_PC0, PC0_SCACHE_REFERENCES}, + [PERF_COUNT_HW_CACHE_MISSES] = {PMC_PC1, PC1_SCACHE_MISSES}, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = {PMC_PC0, PC0_BRANCH_INSTRUCTIONS}, + [PERF_COUNT_HW_BRANCH_MISSES] = {PMC_PC1, PC1_BRANCH_MISSES}, +}; + +/* Mapping of the hw cache event types to the perf tool interface */ +#define C(x) PERF_COUNT_HW_CACHE_##x +static const struct sw64_perf_event core3_cache_event_map + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = {PMC_PC0, PC0_DCACHE_READ}, + [C(RESULT_MISS)] = {PMC_PC1, PC1_DCACHE_MISSES} + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = SW64_OP_UNSUP, + [C(RESULT_MISS)] = SW64_OP_UNSUP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = SW64_OP_UNSUP, + [C(RESULT_MISS)] = SW64_OP_UNSUP, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = {PMC_PC0, PC0_ICACHE_READ}, + [C(RESULT_MISS)] = {PMC_PC1, PC1_ICACHE_READ_MISSES}, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = SW64_OP_UNSUP, + [C(RESULT_MISS)] = SW64_OP_UNSUP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = SW64_OP_UNSUP, + [C(RESULT_MISS)] = SW64_OP_UNSUP, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = SW64_OP_UNSUP, + [C(RESULT_MISS)] = SW64_OP_UNSUP, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = SW64_OP_UNSUP, + [C(RESULT_MISS)] = SW64_OP_UNSUP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = SW64_OP_UNSUP, + [C(RESULT_MISS)] = SW64_OP_UNSUP, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = {PMC_PC0, PC0_DTB_READ}, + [C(RESULT_MISS)] = {PMC_PC1, PC1_DTB_SINGLE_MISSES}, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = SW64_OP_UNSUP, + [C(RESULT_MISS)] = SW64_OP_UNSUP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = SW64_OP_UNSUP, + [C(RESULT_MISS)] = SW64_OP_UNSUP, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = {PMC_PC0, PC0_ITB_READ}, + [C(RESULT_MISS)] = {PMC_PC1, PC1_ITB_MISSES}, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = SW64_OP_UNSUP, + [C(RESULT_MISS)] = SW64_OP_UNSUP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = SW64_OP_UNSUP, + [C(RESULT_MISS)] = SW64_OP_UNSUP, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = SW64_OP_UNSUP, + [C(RESULT_MISS)] = SW64_OP_UNSUP, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = SW64_OP_UNSUP, + [C(RESULT_MISS)] = SW64_OP_UNSUP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = SW64_OP_UNSUP, + [C(RESULT_MISS)] = SW64_OP_UNSUP, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = SW64_OP_UNSUP, + [C(RESULT_MISS)] = SW64_OP_UNSUP, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = SW64_OP_UNSUP, + [C(RESULT_MISS)] = SW64_OP_UNSUP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = SW64_OP_UNSUP, + [C(RESULT_MISS)] = SW64_OP_UNSUP, + }, + }, + +}; + +static const struct sw64_perf_event *core3_map_hw_event(u64 config) +{ + return &sw64_pmu->hw_events[config]; +} + +static const struct sw64_perf_event *core3_map_cache_event(u64 config) +{ + unsigned int cache_type, cache_op, cache_result; + const struct sw64_perf_event *perf_event; + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return ERR_PTR(-EINVAL); + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return ERR_PTR(-EINVAL); + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return ERR_PTR(-EINVAL); + + perf_event = &((*sw64_pmu->cache_events)[cache_type][cache_op][cache_result]); + if (perf_event->counter == -1) /* SW64_OP_UNSUP */ + return ERR_PTR(-ENOENT); + + return perf_event; +} + +/* + * r0xx for counter0, r1yy for counter1. + * According to the datasheet, 00 <= xx <= 0F, 00 <= yy <= 3D + */ +static bool core3_raw_event_valid(u64 config) +{ + if ((config >= PC0_RAW_BASE && config <= (PC0_RAW_BASE + PC0_MAX)) || + (config >= PC1_RAW_BASE && config <= (PC1_RAW_BASE + PC1_MAX))) + return true; + + pr_info("sw64 pmu: invalid raw event config %#llx\n", config); + return false; +} + +static const struct sw64_pmu_t core3_pmu = { + .max_events = ARRAY_SIZE(core3_hw_event_map), + .hw_events = core3_hw_event_map, + .map_hw_event = core3_map_hw_event, + .cache_events = &core3_cache_event_map, + .map_cache_event = core3_map_cache_event, + .num_pmcs = MAX_HWEVENTS, + .pmc_count_mask = PMC_COUNT_MASK, + .pmc_max_period = PMC_COUNT_MASK, + .pmc_left = 4, + .raw_event_valid = core3_raw_event_valid, +}; + +/* + * Low-level functions: reading/writing counters + */ +static void sw64_write_pmc(int idx, unsigned long val) +{ + wrperfmon(PMC_CMD_WRITE_BASE + idx, val); +} + +static unsigned long sw64_read_pmc(int idx) +{ + return wrperfmon(PMC_CMD_READ, idx); +} + +/* Set a new period to sample over */ +static int sw64_perf_event_set_period(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + long left = local64_read(&hwc->period_left); + long period = hwc->sample_period; + int overflow = 0; + unsigned long value; + + if (unlikely(left <= -period)) { + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + overflow = 1; + } + + if (unlikely(left <= 0)) { + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + overflow = 1; + } + + if (left > (long)sw64_pmu->pmc_max_period) + left = sw64_pmu->pmc_max_period; + + value = sw64_pmu->pmc_max_period - left; + local64_set(&hwc->prev_count, value); + sw64_write_pmc(idx, value); + + perf_event_update_userpage(event); + + return overflow; +} + +/* + * Calculates the count (the 'delta') since the last time the PMC was read. + * + * As the PMCs' full period can easily be exceeded within the perf system + * sampling period we cannot use any high order bits as a guard bit in the + * PMCs to detect overflow as is done by other architectures. The code here + * calculates the delta on the basis that there is no overflow when ovf is + * zero. The value passed via ovf by the interrupt handler corrects for + * overflow. + * + * This can be racey on rare occasions -- a call to this routine can occur + * with an overflowed counter just before the PMI service routine is called. + * The check for delta negative hopefully always rectifies this situation. + */ +static unsigned long sw64_perf_event_update(struct perf_event *event, + struct hw_perf_event *hwc, int idx, long ovf) +{ + long prev_raw_count, new_raw_count; + long delta; + +again: + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = sw64_read_pmc(idx); + + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + delta = (new_raw_count - (prev_raw_count & sw64_pmu->pmc_count_mask)) + ovf; + + /* It is possible on very rare occasions that the PMC has overflowed + * but the interrupt is yet to come. Detect and fix this situation. + */ + if (unlikely(delta < 0)) + delta += sw64_pmu->pmc_max_period + 1; + + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); + + return new_raw_count; +} + +/* + * State transition functions: + * + * add()/del() & start()/stop() + * + */ + +/* + * pmu->start: start the event. + */ +static void sw64_pmu_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + sw64_perf_event_set_period(event, hwc, hwc->idx); + } + + hwc->state = 0; + + /* counting in selected modes, for both counters */ + wrperfmon(PMC_CMD_PM, hwc->config_base); + wrperfmon(PMC_CMD_EVENT_BASE + hwc->idx, hwc->event_base); + wrperfmon(PMC_CMD_ENABLE, PMC_ENABLE_BASE + hwc->idx); +} + +/* + * pmu->stop: stop the counter + */ +static void sw64_pmu_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!(hwc->state & PERF_HES_STOPPED)) { + wrperfmon(PMC_CMD_DISABLE, PMC_DISABLE_BASE + hwc->idx); + hwc->state |= PERF_HES_STOPPED; + barrier(); + } + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + sw64_perf_event_update(event, hwc, hwc->idx, 0); + hwc->state |= PERF_HES_UPTODATE; + } +} + +/* + * pmu->add: add the event to PMU. + */ +static int sw64_pmu_add(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int err = 0; + unsigned long irq_flags; + + local_irq_save(irq_flags); + + if (__test_and_set_bit(hwc->idx, cpuc->used_mask)) { + err = -ENOSPC; + goto out; + } + + cpuc->event[hwc->idx] = event; + + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + if (flags & PERF_EF_START) + sw64_pmu_start(event, PERF_EF_RELOAD); + + /* Propagate our changes to the userspace mapping. */ + perf_event_update_userpage(event); + +out: + local_irq_restore(irq_flags); + + return err; +} + +/* + * pmu->del: delete the event from PMU. + */ +static void sw64_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + unsigned long irq_flags; + + local_irq_save(irq_flags); + + sw64_pmu_stop(event, PERF_EF_UPDATE); + cpuc->event[hwc->idx] = NULL; + __clear_bit(event->hw.idx, cpuc->used_mask); + + /* Absorb the final count and turn off the event. */ + perf_event_update_userpage(event); + + local_irq_restore(irq_flags); +} + +/* + * pmu->read: read and update the counter + */ +static void sw64_pmu_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + sw64_perf_event_update(event, hwc, hwc->idx, 0); +} + +static bool supported_cpu(void) +{ + return true; +} + +static void hw_perf_event_destroy(struct perf_event *event) +{ + /* Nothing to be done! */ +} + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + const struct sw64_perf_event *event_type; + + + /* + * SW64 does not have per-counter usr/os/guest/host bits, + * we can distinguish exclude_user and exclude_kernel by + * sample mode. + */ + if (event->attr.exclude_hv || event->attr.exclude_idle || + event->attr.exclude_host || event->attr.exclude_guest) + return -EINVAL; + + /* + * SW64 does not support precise ip feature, and system hang when + * detecting precise_ip by perf_event_attr__set_max_precise_ip + * in userspace + */ + if (attr->precise_ip != 0) + return -EOPNOTSUPP; + + /* SW64 has fixed counter for given event type */ + if (attr->type == PERF_TYPE_HARDWARE) { + if (attr->config >= sw64_pmu->max_events) + return -EINVAL; + event_type = sw64_pmu->map_hw_event(attr->config); + hwc->idx = event_type->counter; + hwc->event_base = event_type->event; + } else if (attr->type == PERF_TYPE_HW_CACHE) { + event_type = sw64_pmu->map_cache_event(attr->config); + if (IS_ERR(event_type)) /* */ + return PTR_ERR(event_type); + hwc->idx = event_type->counter; + hwc->event_base = event_type->event; + } else { /* PERF_TYPE_RAW */ + if (!sw64_pmu->raw_event_valid(attr->config)) + return -EINVAL; + hwc->idx = attr->config >> 8; /* counter selector */ + hwc->event_base = attr->config & 0xff; /* event selector */ + } + + hwc->config_base = SW64_PERFCTRL_AM; + + if (attr->exclude_user) + hwc->config_base = SW64_PERFCTRL_KM; + if (attr->exclude_kernel) + hwc->config_base = SW64_PERFCTRL_UM; + + hwc->config = attr->config; + + if (!is_sampling_event(event)) + pr_debug("not sampling event\n"); + + event->destroy = hw_perf_event_destroy; + + if (!hwc->sample_period) { + hwc->sample_period = sw64_pmu->pmc_max_period; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + } + + return 0; +} + +/* + * Main entry point to initialise a HW performance event. + */ +static int sw64_pmu_event_init(struct perf_event *event) +{ + int err; + + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + default: + return -ENOENT; + } + + if (!sw64_pmu) + return -ENODEV; + + /* Do the real initialisation work. */ + err = __hw_perf_event_init(event); + + return err; +} + +static struct pmu pmu = { + .name = "core3-base", + .capabilities = PERF_PMU_CAP_NO_NMI, + .event_init = sw64_pmu_event_init, + .add = sw64_pmu_add, + .del = sw64_pmu_del, + .start = sw64_pmu_start, + .stop = sw64_pmu_stop, + .read = sw64_pmu_read, +}; + +void perf_event_print_debug(void) +{ + unsigned long flags; + unsigned long pcr0, pcr1; + int cpu; + + if (!supported_cpu()) + return; + + local_irq_save(flags); + + cpu = smp_processor_id(); + + pcr0 = wrperfmon(PMC_CMD_READ, PMC_PC0); + pcr1 = wrperfmon(PMC_CMD_READ, PMC_PC1); + + pr_info("CPU#%d: PCTR0[%lx] PCTR1[%lx]\n", cpu, pcr0, pcr1); + + local_irq_restore(flags); +} + +static void sw64_perf_event_irq_handler(unsigned long idx, + struct pt_regs *regs) +{ + struct cpu_hw_events *cpuc; + struct perf_sample_data data; + struct perf_event *event; + struct hw_perf_event *hwc; + + __this_cpu_inc(irq_pmi_count); + cpuc = this_cpu_ptr(&cpu_hw_events); + + event = cpuc->event[idx]; + + if (unlikely(!event)) { + irq_err_count++; + return; + } + + hwc = &event->hw; + sw64_perf_event_update(event, hwc, idx, sw64_pmu->pmc_max_period + 1); + perf_sample_data_init(&data, 0, hwc->last_period); + + if (sw64_perf_event_set_period(event, hwc, idx)) { + if (perf_event_overflow(event, &data, regs)) { + /* Interrupts coming too quickly; "throttle" the + * counter, i.e., disable it for a little while. + */ + sw64_pmu_stop(event, 0); + } + } +} + +bool valid_utext_addr(unsigned long addr) +{ + return addr >= current->mm->start_code && addr <= current->mm->end_code; +} + +bool valid_dy_addr(unsigned long addr) +{ + bool ret = false; + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + + if (addr > TASK_SIZE || addr < TASK_UNMAPPED_BASE) + return ret; + vma = find_vma(mm, addr); + if (vma && vma->vm_start <= addr && (vma->vm_flags & VM_EXEC)) + ret = true; + return ret; +} + +#ifdef CONFIG_FRAME_POINTER +void perf_callchain_user(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + + struct stack_frame frame; + unsigned long __user *fp; + int err; + + perf_callchain_store(entry, regs->pc); + + fp = (unsigned long __user *)regs->regs[15]; + + while (entry->nr < entry->max_stack && (unsigned long)fp < current->mm->start_stack) { + if (!access_ok(fp, sizeof(frame))) + break; + + pagefault_disable(); + err = __copy_from_user_inatomic(&frame, fp, sizeof(frame)); + pagefault_enable(); + + if (err) + break; + + if (valid_utext_addr(frame.return_address) || valid_dy_addr(frame.return_address)) + perf_callchain_store(entry, frame.return_address); + fp = (void __user *)frame.next_frame; + } +} +#else /* !CONFIG_FRAME_POINTER */ +void perf_callchain_user(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + unsigned long usp = rdusp(); + unsigned long user_addr; + int err; + + perf_callchain_store(entry, regs->pc); + + while (entry->nr < entry->max_stack && usp < current->mm->start_stack) { + if (!access_ok((const void __user *)usp, 8)) + break; + + pagefault_disable(); + err = __get_user(user_addr, (unsigned long *)usp); + pagefault_enable(); + + if (err) + break; + + if (valid_utext_addr(user_addr) || valid_dy_addr(user_addr)) + perf_callchain_store(entry, user_addr); + usp = usp + 8; + } +} +#endif/* CONFIG_FRAME_POINTER */ + +/* + * Gets called by walk_stackframe() for every stackframe. This will be called + * whist unwinding the stackframe and is like a subroutine return so we use + * the PC. + */ +static int callchain_trace(unsigned long pc, void *data) +{ + struct perf_callchain_entry_ctx *entry = data; + + perf_callchain_store(entry, pc); + return 0; +} + +void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + walk_stackframe(NULL, regs, callchain_trace, entry); +} + +/* + * Gets the perf_instruction_pointer and perf_misc_flags for guest os. + */ + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + if (perf_guest_state()) + return perf_guest_get_ip(); + + return instruction_pointer(regs); +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + unsigned int guest_state = perf_guest_state(); + int misc = 0; + + if (guest_state) { + if (guest_state & PERF_GUEST_USER) + misc |= PERF_RECORD_MISC_GUEST_USER; + else + misc |= PERF_RECORD_MISC_GUEST_KERNEL; + } else { + if (user_mode(regs)) + misc |= PERF_RECORD_MISC_USER; + else + misc |= PERF_RECORD_MISC_KERNEL; + } + + return misc; +} + +/* + * Init call to initialise performance events at kernel startup. + */ +int __init init_hw_perf_events(void) +{ + if (!supported_cpu()) { + pr_info("Performance events: Unsupported CPU type!\n"); + return 0; + } + + pr_info("Performance events: Supported CPU type!\n"); + + /* Override performance counter IRQ vector */ + + perf_irq = sw64_perf_event_irq_handler; + + /* And set up PMU specification */ + sw64_pmu = &core3_pmu; + + perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); + + return 0; +} +early_initcall(init_hw_perf_events); diff --git a/arch/sw_64/kernel/perf_regs.c b/arch/sw_64/kernel/perf_regs.c new file mode 100644 index 0000000000000000000000000000000000000000..b036f213936bc6d79214c9b7bdf1ab9a82a40b69 --- /dev/null +++ b/arch/sw_64/kernel/perf_regs.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE((u32)idx >= PERF_REG_SW64_MAX)) + return 0; + + return ((unsigned long *)regs)[idx]; +} + +#define REG_RESERVED (~((1ULL << PERF_REG_SW64_MAX) - 1)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + return PERF_SAMPLE_REGS_ABI_64; +} + +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs) +{ + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); +} diff --git a/arch/sw_64/kernel/pm.c b/arch/sw_64/kernel/pm.c new file mode 100644 index 0000000000000000000000000000000000000000..f0a35e5d0486167340b44f3bac1c80104f25649e --- /dev/null +++ b/arch/sw_64/kernel/pm.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +#include + +struct syscore_ops io_syscore_ops; + +static int __init sw64_pm_init(void) +{ +#ifdef CONFIG_SUSPEND + suspend_set_ops(&native_suspend_ops); +#endif + register_syscore_ops(&io_syscore_ops); + + return 0; +} +device_initcall(sw64_pm_init); diff --git a/arch/sw_64/kernel/relocate.c b/arch/sw_64/kernel/relocate.c new file mode 100644 index 0000000000000000000000000000000000000000..ebdf7d894805e8f2c0a1853d853d5d2ef8bf6c09 --- /dev/null +++ b/arch/sw_64/kernel/relocate.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Support for kernel relocation at boot time. + * + * Based on arch/mips/kernel/relocate.c + * + * Copyright (C) 2019 He Sheng + * Authors: He Sheng (hesheng05@gmail.com) + */ +#include +#include +#include + +#include + +#define KTEXT_MAX 0xffffffffa0000000UL +#define RELOCATED(x) ((void *)((unsigned long)x + offset)) + +extern unsigned long _got_start[]; +extern unsigned long _got_end[]; +extern char pre_start_kernel[]; + +extern unsigned int _relocation_start[]; /* End kernel image / start relocation table */ +extern unsigned int _relocation_end[]; /* End relocation table */ + +extern unsigned long __start___ex_table; /* Start exception table */ +extern unsigned long __stop___ex_table; /* End exception table */ +extern union thread_union init_thread_union; + +/* + * This function may be defined for a platform to perform any post-relocation + * fixup necessary. + * Return non-zero to abort relocation + */ +int __weak plat_post_relocation(long offset) +{ + return 0; +} + +static int __init apply_r_sw64_refquad(unsigned long *loc_orig, unsigned long *loc_new, unsigned int offset) +{ + *(unsigned long *)loc_new += offset; + + return 0; +} + +static int (*reloc_handlers_rel[]) (unsigned long *, unsigned long *, unsigned int) __initdata = { + [R_SW64_REFQUAD] = apply_r_sw64_refquad, +}; + +int __init do_relocations(void *kbase_old, void *kbase_new, unsigned int offset) +{ + unsigned int *r; + unsigned long *loc_orig; + unsigned long *loc_new; + int type; + int res; + + for (r = _relocation_start; r < _relocation_end; r++) { + /* Sentinel for last relocation */ + if (*r == 0) + break; + + type = (*r >> 24) & 0xff; + loc_orig = kbase_old + ((*r & 0x00ffffff) << 2); + loc_new = RELOCATED(loc_orig); + + if (reloc_handlers_rel[type] == NULL) { + /* Unsupported relocation */ + pr_err("Unhandled relocation type %d at 0x%pK\n", + type, loc_orig); + return -ENOEXEC; + } + + res = reloc_handlers_rel[type](loc_orig, loc_new, offset); + if (res) + return res; + } + + return 0; +} + +static int __init relocate_got(unsigned int offset) +{ + unsigned long *got_start, *got_end, *e; + + got_start = RELOCATED(&_got_start); + got_end = RELOCATED(&_got_end); + + for (e = got_start; e < got_end; e++) + *e += offset; + + return 0; +} + +#ifdef CONFIG_RANDOMIZE_BASE + +static inline __init unsigned long rotate_xor(unsigned long hash, + const void *area, size_t size) +{ + size_t i; + unsigned long start, *ptr; + /* Make sure start is 8 byte aligned */ + start = ALIGN((unsigned long)area, 8); + size -= (start - (unsigned long)area); + ptr = (unsigned long *) start; + for (i = 0; i < size / sizeof(hash); i++) { + /* Rotate by odd number of bits and XOR. */ + hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7); + hash ^= ptr[i]; + } + return hash; +} + +static inline __init unsigned long get_random_boot(void) +{ + unsigned long entropy = random_get_entropy(); + unsigned long hash = 0; + + /* Attempt to create a simple but unpredictable starting entropy. */ + hash = rotate_xor(hash, linux_banner, strlen(linux_banner)); + + /* Add in any runtime entropy we can get */ + hash = rotate_xor(hash, &entropy, sizeof(entropy)); + + return hash; +} + +static inline __init bool kaslr_disabled(void) +{ + char *str; + + str = strstr(COMMAND_LINE, "nokaslr"); + if (str == COMMAND_LINE || (str > COMMAND_LINE && *(str - 1) == ' ')) + return true; + + return false; +} + +static unsigned long __init determine_relocation_offset(void) +{ + /* Choose a new address for the kernel */ + unsigned long kernel_length; + unsigned long offset; + + if (kaslr_disabled()) + return 0; + + kernel_length = (unsigned long)_end - (unsigned long)(&_text); + + /* TODO: offset is 64K align. maybe 8KB align is okay. */ + offset = get_random_boot() << 16; + offset &= (CONFIG_RANDOMIZE_BASE_MAX_OFFSET - 1); + if (offset < kernel_length) + offset += ALIGN(kernel_length, 0x10000); + + /* + * TODO:new location should not overlaps initrd, dtb, acpi + * tables, etc. + */ + + if ((KTEXT_MAX - (unsigned long)_end) < offset) + offset = 0; + + return offset; +} + +#else + +static inline unsigned long __init determine_relocation_offset(void) +{ + /* + * Choose a new address for the kernel + * For now we'll hard code the destination offset. + */ + return 0; +} + +#endif + +static inline int __init relocation_offset_valid(unsigned long offset) +{ + unsigned long loc_new = (unsigned long)_text + offset; + + if (loc_new & 0x0000ffff) { + /* Inappropriately aligned new location */ + return 0; + } + if (loc_new < (unsigned long)&_end) { + /* New location overlaps original kernel */ + return 0; + } + return 1; +} + +unsigned int __init relocate_kernel(void) +{ + void *loc_new; + unsigned long kernel_length; + unsigned long bss_length; + unsigned int offset = 0; + int res = 1; + + kernel_length = (unsigned long)(&_relocation_start) - (long)(&_text); + bss_length = (unsigned long)&__bss_stop - (long)&__bss_start; + + offset = determine_relocation_offset(); + /* Reset the command line now so we don't end up with a duplicate */ + + /* Sanity check relocation address */ + if (offset && relocation_offset_valid(offset)) { + + loc_new = RELOCATED(&_text); + /* Copy the kernel to it's new location */ + memcpy(loc_new, &_text, kernel_length); + + /* Perform relocations on the new kernel */ + res = do_relocations(&_text, loc_new, offset); + if (res < 0) + goto out; + + res = relocate_got(offset); + if (res < 0) + goto out; + + /* + * The original .bss has already been cleared, and + * some variables such as command line parameters + * stored to it so make a copy in the new location. + */ + memcpy(RELOCATED(&__bss_start), &__bss_start, bss_length); + + /* + * Last chance for the platform to abort relocation. + * This may also be used by the platform to perform any + * initialisation required now that the new kernel is + * resident in memory and ready to be executed. + */ + if (plat_post_relocation(offset)) + goto out; + + /* Return the new kernel's offset */ + return offset; + } +out: + return 0; +} + +/* + * Show relocation information on panic. + */ +void show_kernel_relocation(const char *level) +{ + unsigned long offset; + + offset = __pa_symbol(_text) - __pa_symbol(_TEXT_START); + + if (IS_ENABLED(CONFIG_RELOCATABLE) && offset > 0) { + printk(level); + pr_cont("Kernel relocated by 0x%pK\n", (void *)offset); + pr_cont(" .text @ 0x%pK\n", _text); + pr_cont(" .data @ 0x%pK\n", _sdata); + pr_cont(" .bss @ 0x%pK\n", __bss_start); + } +} + +static int kernel_location_notifier_fn(struct notifier_block *self, + unsigned long v, void *p) +{ + show_kernel_relocation(KERN_EMERG); + return NOTIFY_DONE; +} + +static struct notifier_block kernel_location_notifier = { + .notifier_call = kernel_location_notifier_fn +}; + +static int __init register_kernel_offset_dumper(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &kernel_location_notifier); + return 0; +} +device_initcall(register_kernel_offset_dumper); diff --git a/arch/sw_64/kernel/relocate_kernel.S b/arch/sw_64/kernel/relocate_kernel.S new file mode 100644 index 0000000000000000000000000000000000000000..f1a160636212fed8e73dd32616edaea155c51154 --- /dev/null +++ b/arch/sw_64/kernel/relocate_kernel.S @@ -0,0 +1,176 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * relocate_kernel.S for kexec + * Created by Jul 2 2019 + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include + + .align 3 + .globl relocate_new_kernel + .ent relocate_new_kernel + +relocate_new_kernel: + .prologue 0 + ldl a0, arg0 + ldl a1, arg1 + ldl a2, arg2 + ldl a3, arg3 + + ldl s0, kexec_indirection_page + ldl s1, kexec_start_address + +process_entry: + ldl s2, 0(s0) + addl s0, 8, s0 + + /* + * In case of a kdump/crash kernel, the indirection page is not + * populated as the kernel is directly copied to a reserved location + */ + beq s2, done + + /* destination page */ + and s2, 0x1, s3 + beq s3, 1f + bic s2, 0x1, s4/* store destination addr in s4 */ + br $31, process_entry + +1: + /* indirection page, update s0*/ + and s2, 0x2, s3 + beq s3, 1f + bic s2, 0x2, s0 + br $31, process_entry + +1: + /* done page */ + and s2, 0x4, s3 + beq s3, 1f + br $31, done +1: + /* source page */ + and s2, 0x8, s3 + beq s3, process_entry + bic s2, 0x8, s2 + ldi s6, 0x1 + sll s6, (PAGE_SHIFT - 3), s6 + +copy_word: + /* copy page word by word */ + ldl s5, 0(s2) + stl s5, 0(s4) + addl s4, 8, s4 + addl s2, 8, s2 + subl s6, 1, s6 + beq s6, process_entry + br $31, copy_word + br $31, process_entry + +done: +#ifdef CONFIG_CRASH_SMP /* unsupported now!!!! */ + /* kexec_flag reset is signal to other CPUs what kernel + * was moved to it's location. Note - we need relocated address + * of kexec_flag. + */ + + br ra, 1f +1: mov ra, t1 + ldi t2, 1b + ldi t0, kexec_flag + subl t0, t2, t0 + addl t1, t0, t0 + stl zero, 0(t0) +#endif + memb + jmp ra, (s1) + .end relocate_new_kernel + .size relocate_new_kernel, .-relocate_new_kernel + +#ifdef CONFIG_CRASH_SMP + /* + * Other CPUs should wait until code is relocated and + * then start at entry (?) point. + */ + .align 3 + .globl kexec_smp_wait + .ent kexec_smp_wait +kexec_smp_wait: + ldl a0, s_arg0 + ldl a1, s_arg1 + ldl a2, s_arg2 + ldl a3, s_arg3 + ldl s1, kexec_start_address + + /* Non-relocated address works for args and kexec_start_address (old + * kernel is not overwritten). But we need relocated address of + * kexec_flag. + */ + + bsr ra, 1f +1: mov ra, t1 + ldi t2, 1b + ldi t0, kexec_flag + subl t0, t2, t0 + addl t1, t0, t0 + +1: stl s0, 0(t0) + bne s0, 1b + memb + jmp ra, (s1) + .end kexec_smp_wait + .size kexec_smp_wait, .-kexec_smp_wait +#endif + + .align 3 + + /* All parameters to new kernel are passed in registers a0-a3. + * kexec_args[0..3] are uses to prepare register values. + */ + +kexec_args: + .globl kexec_args +arg0: .quad 0x0 +arg1: .quad 0x0 +arg2: .quad 0x0 +arg3: .quad 0x0 + .size kexec_args, 8*4 + +#ifdef CONFIG_CRASH_SMP + /* + * Secondary CPUs may have different kernel parameters in + * their registers a0-a3. secondary_kexec_args[0..3] are used + * to prepare register values. + */ +secondary_kexec_args: + .globl secondary_kexec_args +s_arg0: .quad 0x0 +s_arg1: .quad 0x0 +s_arg2: .quad 0x0 +s_arg3: .quad 0x0 + .size secondary_kexec_args, 8*4 + +kexec_flag: + .quad 0x1 +#endif + +kexec_start_address: + .globl kexec_start_address + .quad 0x0 + .size kexec_start_address, 8 + +kexec_indirection_page: + .globl kexec_indirection_page + .quad 0 + .size kexec_indirection_page, 8 + +relocate_new_kernel_end: + +relocate_new_kernel_size: + .global relocate_new_kernel_size + .quad relocate_new_kernel_end - relocate_new_kernel + .size relocate_new_kernel_size, 8 diff --git a/arch/sw_64/kernel/stacktrace.c b/arch/sw_64/kernel/stacktrace.c new file mode 100644 index 0000000000000000000000000000000000000000..ff00506d5b824727161449fa8c5f3602574c1e6e --- /dev/null +++ b/arch/sw_64/kernel/stacktrace.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Stack trace management functions + * + * Copyright (C) 2018 snyh + */ +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * sw_64 PCS assigns the frame pointer to r15. + * + * A simple function prologue looks like this: + * ldi sp,-xx(sp) + * stl ra,0(sp) + * stl fp,8(sp) + * mov sp,fp + * + * A simple function epilogue looks like this: + * mov fp,sp + * ldl ra,0(sp) + * ldl fp,8(sp) + * ldi sp,+xx(sp) + */ + +#ifdef CONFIG_FRAME_POINTER + +int unwind_frame(struct task_struct *tsk, struct stackframe *frame) +{ + unsigned long fp = frame->fp; + + if (fp & 0x7) + return -EINVAL; + + if (!tsk) + tsk = current; + + if (!on_accessible_stack(tsk, fp, NULL)) + return -EINVAL; + + frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); + frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); + + /* + * Frames created upon entry from user have NULL FP and PC values, so + * don't bother reporting these. Frames created by __noreturn functions + * might have a valid FP even if PC is bogus, so only terminate where + * both are NULL. + */ + if (!frame->fp && !frame->pc) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(unwind_frame); + +void walk_stackframe(struct task_struct *tsk, struct pt_regs *regs, + int (*fn)(unsigned long, void *), void *data) +{ + unsigned long pc, fp; + + struct stackframe frame; + + if (regs) { + unsigned long offset; + + pc = regs->pc; + fp = regs->regs[15]; + if (kallsyms_lookup_size_offset(pc, NULL, &offset) + && offset < 16) { + /* call stack has not been setup + * store pc first then loop from ra + */ + if (fn(pc, data)) + return; + pc = regs->regs[26]; + } + } else if (tsk == current || tsk == NULL) { + fp = (unsigned long)__builtin_frame_address(0); + pc = (unsigned long)walk_stackframe; + } else { + fp = tsk->thread.s[6]; + pc = tsk->thread.ra; + } + + if (!__kernel_text_address(pc) || fn(pc, data)) + return; + + frame.pc = pc; + frame.fp = fp; + while (1) { + int ret; + + ret = unwind_frame(tsk, &frame); + if (ret < 0) + break; + + if (fn(frame.pc, data)) + break; + } +} +EXPORT_SYMBOL_GPL(walk_stackframe); + +#else /* !CONFIG_FRAME_POINTER */ +void walk_stackframe(struct task_struct *tsk, struct pt_regs *regs, + int (*fn)(unsigned long, void *), void *data) +{ + unsigned long *ksp; + unsigned long sp, pc; + + if (regs) { + sp = (unsigned long)(regs+1); + pc = regs->pc; + } else if (tsk == current || tsk == NULL) { + register unsigned long current_sp __asm__ ("$30"); + sp = current_sp; + pc = (unsigned long)walk_stackframe; + } else { + sp = tsk->thread.sp; + pc = tsk->thread.ra; + } + + ksp = (unsigned long *)sp; + + while (!kstack_end(ksp)) { + if (__kernel_text_address(pc) && fn(pc, data)) + break; + pc = *ksp++; + } +} +EXPORT_SYMBOL_GPL(walk_stackframe); + +#endif/* CONFIG_FRAME_POINTER */ + +static int print_address_trace(unsigned long pc, void *data) +{ + print_ip_sym((const char *)data, pc); + return 0; +} + +void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) +{ + pr_info("Trace:\n"); + walk_stackframe(task, NULL, print_address_trace, (void *)loglvl); +} + +#ifdef CONFIG_STACKTRACE +/* + * Save stack-backtrace addresses into a stack_trace buffer. + */ +struct stack_trace_data { + struct stack_trace *trace; + unsigned int nosched; +}; + +int save_trace(unsigned long pc, void *d) +{ + struct stack_trace_data *data = d; + struct stack_trace *trace = data->trace; + + if (data->nosched && in_sched_functions(pc)) + return 0; + if (trace->skip > 0) { + trace->skip--; + return 0; + } + + trace->entries[trace->nr_entries++] = pc; + return (trace->nr_entries >= trace->max_entries); +} + +void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) +{ + struct stack_trace_data data; + + data.trace = trace; + data.nosched = 0; + + walk_stackframe(current, regs, save_trace, &data); + + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} + +static void __save_stack_trace(struct task_struct *tsk, + struct stack_trace *trace, unsigned int nosched) +{ + struct stack_trace_data data; + + data.trace = trace; + data.nosched = nosched; + + walk_stackframe(tsk, NULL, save_trace, &data); + + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ + __save_stack_trace(tsk, trace, 1); +} +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); + +void save_stack_trace(struct stack_trace *trace) +{ + __save_stack_trace(current, trace, 0); +} +EXPORT_SYMBOL_GPL(save_stack_trace); +#endif + +static int save_pc(unsigned long pc, void *data) +{ + unsigned long *p = data; + *p = 0; + + if (!in_sched_functions(pc)) + *p = pc; + + return *p; +} + +unsigned long __get_wchan(struct task_struct *tsk) +{ + unsigned long pc; + + if (!tsk || tsk == current || task_is_running(tsk)) + return 0; + walk_stackframe(tsk, NULL, save_pc, &pc); + + return pc; +} + +#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE +int save_stack_trace_tsk_reliable(struct task_struct *tsk, + struct stack_trace *trace) +{ + return 0; +} +#endif diff --git a/arch/sw_64/kernel/suspend.c b/arch/sw_64/kernel/suspend.c new file mode 100644 index 0000000000000000000000000000000000000000..27a240e6614955835f7abe8c21558b956898da43 --- /dev/null +++ b/arch/sw_64/kernel/suspend.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +#include +#include + +struct processor_state suspend_state; + +static int native_suspend_state_valid(suspend_state_t pm_state) +{ + switch (pm_state) { + case PM_SUSPEND_ON: + case PM_SUSPEND_STANDBY: + case PM_SUSPEND_MEM: + return 1; + default: + return 0; + } +} + +void disable_local_timer(void) +{ + wrtimer(0); +} + +extern struct pci_controller *hose_head; + +/* + * Boot Core will enter suspend stat here. + */ +void sw64_suspend_enter(void) +{ + /* boot processor will go to deep sleep mode from here + * After wake up boot processor, pc will go here + */ + disable_local_timer(); + current_thread_info()->pcb.tp = rtid(); + + sw64_suspend_deep_sleep(&suspend_state); + wrtp(current_thread_info()->pcb.tp); + + disable_local_timer(); +} + +static int native_suspend_enter(suspend_state_t state) +{ + if (is_in_guest()) + return 0; + /* processor specific suspend */ + sw64_suspend_enter(); + return 0; +} + +const struct platform_suspend_ops native_suspend_ops = { + .valid = native_suspend_state_valid, + .enter = native_suspend_enter, +}; diff --git a/arch/sw_64/kernel/suspend_asm.S b/arch/sw_64/kernel/suspend_asm.S new file mode 100644 index 0000000000000000000000000000000000000000..34ee349515a7c1278f24bc9c64dc3e8a6e864137 --- /dev/null +++ b/arch/sw_64/kernel/suspend_asm.S @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include + + .text + .set noat +ENTRY(sw64_suspend_deep_sleep) + /* a0 $16 will be the address of suspend_state */ + ldi $1, PSTATE_REGS($16) + stl $9, CALLEE_R9($1) + stl $10, CALLEE_R10($1) + stl $11, CALLEE_R11($1) + stl $12, CALLEE_R12($1) + stl $13, CALLEE_R13($1) + stl $14, CALLEE_R14($1) + stl $15, CALLEE_R15($1) + stl $26, CALLEE_RA($1) + /* SIMD-FP */ + ldi $1, PSTATE_FPREGS($16) + vstd $f2, CALLEE_F2($1) + vstd $f3, CALLEE_F3($1) + vstd $f4, CALLEE_F4($1) + vstd $f5, CALLEE_F5($1) + vstd $f6, CALLEE_F6($1) + vstd $f7, CALLEE_F7($1) + vstd $f8, CALLEE_F8($1) + vstd $f9, CALLEE_F9($1) + rfpcr $f0 + fstd $f0, PSTATE_FPCR($16) + stl $8, PSTATE_KTP($16) + + /* save the address of suspend_state to $18 */ + mov $16, $18 + + /* + * Now will Go to Deep Sleep + * HMcode should save pc, gp, ps, r16, r17, r18 + */ + + sys_call HMC_sleepen + sys_call HMC_whami + bis $0, $0, $16 + ldi $17, 0x2($31) + sys_call HMC_sendii + + /* wait for a while to receive interrupt */ + ldi $16, 0x1($31) + sll $16, 24, $16 +$subloop: + subl $16, 1, $16 + bis $16, $16, $16 + bis $16, $16, $16 + bne $16, $subloop + + + ldl $8, PSTATE_KTP($18) + ldi $1, PSTATE_REGS($18) + ldl $9, CALLEE_R9($1) + ldl $10, CALLEE_R10($1) + ldl $11, CALLEE_R11($1) + ldl $12, CALLEE_R12($1) + ldl $13, CALLEE_R13($1) + ldl $14, CALLEE_R14($1) + ldl $15, CALLEE_R15($1) + ldl $26, CALLEE_RA($1) + /* SIMD-FP */ + fldd $f0, PSTATE_FPCR($18) + wfpcr $f0 + fimovd $f0, $2 + and $2, 0x3, $2 + beq $2, $suspend_setfpec_0 + subl $2, 0x1, $2 + beq $2, $suspend_setfpec_1 + subl $2, 0x1, $2 + beq $2, $suspend_setfpec_2 + setfpec3 + br $suspend_setfpec_over +$suspend_setfpec_0: + setfpec0 + br $suspend_setfpec_over +$suspend_setfpec_1: + setfpec1 + br $suspend_setfpec_over +$suspend_setfpec_2: + setfpec2 +$suspend_setfpec_over: + ldi $1, PSTATE_FPREGS($18) + vldd $f2, CALLEE_F2($1) + vldd $f3, CALLEE_F3($1) + vldd $f4, CALLEE_F4($1) + vldd $f5, CALLEE_F5($1) + vldd $f6, CALLEE_F6($1) + vldd $f7, CALLEE_F7($1) + vldd $f8, CALLEE_F8($1) + vldd $f9, CALLEE_F9($1) + ret +END(sw64_suspend_deep_sleep) diff --git a/arch/sw_64/kernel/uprobes.c b/arch/sw_64/kernel/uprobes.c new file mode 100644 index 0000000000000000000000000000000000000000..928312d62cfd172f20edcc113fea897a5c054a56 --- /dev/null +++ b/arch/sw_64/kernel/uprobes.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +/** + * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. + * @mm: the probed address space. + * @arch_uprobe: the probepoint information. + * @addr: virtual address at which to install the probepoint + * Return 0 on success or a -ve number on error. + */ +int arch_uprobe_analyze_insn(struct arch_uprobe *aup, + struct mm_struct *mm, unsigned long addr) +{ + u32 inst; + + if (addr & 0x03) + return -EINVAL; + + inst = aup->insn; + + aup->ixol[0] = aup->insn; + aup->ixol[1] = UPROBE_BRK_UPROBE_XOL; /* NOP */ + + return 0; +} + +void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, + void *src, unsigned long len) +{ + unsigned long kaddr, kstart; + + /* Initialize the slot */ + kaddr = (unsigned long)kmap_local_page(page); + kstart = kaddr + (vaddr & ~PAGE_MASK); + memcpy((void *)kstart, src, len); + flush_icache_range(kstart, kstart + len); + kunmap_local((void *)kaddr); +} + +/* + * arch_uprobe_pre_xol - prepare to execute out of line. + * @auprobe: the probepoint information. + * @regs: reflects the saved user state of current task. + */ +int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + /* Instruction points to execute ol */ + instruction_pointer_set(regs, utask->xol_vaddr); + + return 0; +} + +int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + /* Instruction points to execute next to breakpoint address */ + instruction_pointer_set(regs, utask->vaddr + 4); + + return 0; +} + +/* + * If xol insn itself traps and generates a signal(Say, + * SIGILL/SIGSEGV/etc), then detect the case where a singlestepped + * instruction jumps back to its own address. It is assumed that anything + * like do_page_fault/do_trap/etc sets thread.trap_nr != -1. + * + * arch_uprobe_pre_xol/arch_uprobe_post_xol save/restore thread.trap_nr, + * arch_uprobe_xol_was_trapped() simply checks that ->trap_nr is not equal to + * UPROBE_TRAP_NR == -1 set by arch_uprobe_pre_xol(). + */ +bool arch_uprobe_xol_was_trapped(struct task_struct *tsk) +{ + return false; +} + +int arch_uprobe_exception_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct die_args *args = data; + struct pt_regs *regs = args->regs; + + /* regs == NULL is a kernel bug */ + if (WARN_ON(!regs)) + return NOTIFY_DONE; + + /* We are only interested in userspace traps */ + if (!user_mode(regs)) + return NOTIFY_DONE; + + switch (val) { + case DIE_UPROBE: + if (uprobe_pre_sstep_notifier(regs)) + return NOTIFY_STOP; + break; + case DIE_UPROBE_XOL: + if (uprobe_post_sstep_notifier(regs)) + return NOTIFY_STOP; + default: + break; + } + + return 0; +} + +/* + * This function gets called when XOL instruction either gets trapped or + * the thread has a fatal signal. Reset the instruction pointer to its + * probed address for the potential restart or for post mortem analysis. + */ +void arch_uprobe_abort_xol(struct arch_uprobe *aup, + struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + instruction_pointer_set(regs, utask->vaddr); +} + +unsigned long arch_uretprobe_hijack_return_addr( + unsigned long trampoline_vaddr, struct pt_regs *regs) +{ + unsigned long ra; + + ra = regs->regs[26]; + + /* Replace the return address with the trampoline address */ + regs->regs[26] = trampoline_vaddr; + + return ra; +} + +/* + * See if the instruction can be emulated. + * Returns true if instruction was emulated, false otherwise. + * + * For now we always emulate so this function just returns 0. + */ +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + return 0; +} + +/* + * struct xol_area and get_trampoline_vaddr() are copied from + * kernel/events/uprobes.c to avoid modifying arch-independent + * code. + */ +struct xol_area { + wait_queue_head_t wq; + atomic_t slot_count; + unsigned long *bitmap; + struct vm_special_mapping xol_mapping; + struct page *pages[2]; + unsigned long vaddr; +}; + +static unsigned long get_trampoline_vaddr(void) +{ + struct xol_area *area; + unsigned long trampoline_vaddr = -1; + + area = READ_ONCE(current->mm->uprobes_state.xol_area); + if (area) + trampoline_vaddr = area->vaddr; + + return trampoline_vaddr; +} + +void sw64_fix_uretprobe(struct pt_regs *regs, unsigned long exc_pc) +{ + /* + * regs->pc has been changed to orig_ret_vaddr in handle_trampoline(). + */ + if (exc_pc == get_trampoline_vaddr()) + regs->regs[26] = regs->pc; +} diff --git a/arch/sw_64/net/Makefile b/arch/sw_64/net/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d4663b4bf509894e62c3b02c69726ee5717c2dd4 --- /dev/null +++ b/arch/sw_64/net/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Arch-specific network modules +# +obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o diff --git a/arch/sw_64/net/bpf_jit.h b/arch/sw_64/net/bpf_jit.h new file mode 100644 index 0000000000000000000000000000000000000000..929036d8ea6b10daec13166c1e87f63165d99f1a --- /dev/null +++ b/arch/sw_64/net/bpf_jit.h @@ -0,0 +1,368 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * BPF JIT compiler for SW64 + * + * Copyright (C) Mao Minkai + * Author: Mao Minkai + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _SW64_NET_BPF_JIT_H +#define _SW64_NET_BPF_JIT_H + +/* SW64 instruction field shift */ +#define SW64_BPF_OPCODE_OFFSET 26 +#define SW64_BPF_RA_OFFSET 21 +#define SW64_BPF_RB_OFFSET 16 +#define SW64_BPF_SIMPLE_ALU_IMM_OFFSET 13 +#define SW64_BPF_SIMPLE_ALU_FUNC_OFFSET 5 +#define SW64_BPF_SIMPLE_ALU_RC_OFFSET 0 +#define SW64_BPF_LS_FUNC_OFFSET 12 + +/* SW64 instruction opcodes */ +#define SW64_BPF_OPCODE_CALL 0x01 +#define SW64_BPF_OPCODE_RET 0x02 +#define SW64_BPF_OPCODE_JMP 0x03 +#define SW64_BPF_OPCODE_BR 0x04 +#define SW64_BPF_OPCODE_BSR 0x05 +#define SW64_BPF_OPCODE_MISC 0x06 +#define SW64_BPF_OPCODE_LOCK 0x08 +#define SW64_BPF_OPCODE_ALU_REG 0x10 +#define SW64_BPF_OPCODE_ALU_IMM 0x12 +#define SW64_BPF_OPCODE_LDBU 0x20 +#define SW64_BPF_OPCODE_LDHU 0x21 +#define SW64_BPF_OPCODE_LDW 0x22 +#define SW64_BPF_OPCODE_LDL 0x23 +#define SW64_BPF_OPCODE_STB 0x28 +#define SW64_BPF_OPCODE_STH 0x29 +#define SW64_BPF_OPCODE_STW 0x2A +#define SW64_BPF_OPCODE_STL 0x2B +#define SW64_BPF_OPCODE_BEQ 0x30 +#define SW64_BPF_OPCODE_BNE 0x31 +#define SW64_BPF_OPCODE_BLT 0x32 +#define SW64_BPF_OPCODE_BLE 0x33 +#define SW64_BPF_OPCODE_BGT 0x34 +#define SW64_BPF_OPCODE_BGE 0x35 +#define SW64_BPF_OPCODE_BLBC 0x36 +#define SW64_BPF_OPCODE_BLBS 0x37 +#define SW64_BPF_OPCODE_LDI 0x3E +#define SW64_BPF_OPCODE_LDIH 0x3F + +/* SW64 MISC instructions function codes */ +#define SW64_BPF_FUNC_MISC_RD_F 0x1000 +#define SW64_BPF_FUNC_MISC_WR_F 0x1020 + +/* SW64 LOCK instructions function codes */ +#define SW64_BPF_FUNC_LOCK_LLDW 0x0 +#define SW64_BPF_FUNC_LOCK_LLDL 0x1 +#define SW64_BPF_FUNC_LOCK_LSTW 0x8 +#define SW64_BPF_FUNC_LOCK_LSTL 0x9 + +/* SW64 ALU instructions function codes */ +#define SW64_BPF_FUNC_ALU_ADDW 0x00 +#define SW64_BPF_FUNC_ALU_SUBW 0x01 +#define SW64_BPF_FUNC_ALU_ADDL 0x08 +#define SW64_BPF_FUNC_ALU_SUBL 0x09 +#define SW64_BPF_FUNC_ALU_MULW 0x10 +#define SW64_BPF_FUNC_ALU_MULL 0x18 +#define SW64_BPF_FUNC_ALU_CMPEQ 0x28 +#define SW64_BPF_FUNC_ALU_CMPLT 0x29 +#define SW64_BPF_FUNC_ALU_CMPLE 0x2A +#define SW64_BPF_FUNC_ALU_CMPULT 0x2B +#define SW64_BPF_FUNC_ALU_CMPULE 0x2C +#define SW64_BPF_FUNC_ALU_AND 0x38 +#define SW64_BPF_FUNC_ALU_BIC 0x39 +#define SW64_BPF_FUNC_ALU_BIS 0x3A +#define SW64_BPF_FUNC_ALU_ORNOT 0x3B +#define SW64_BPF_FUNC_ALU_XOR 0x3C +#define SW64_BPF_FUNC_ALU_EQV 0x3D +#define SW64_BPF_FUNC_ALU_SLL 0x48 +#define SW64_BPF_FUNC_ALU_SRL 0x49 +#define SW64_BPF_FUNC_ALU_SRA 0x4A +#define SW64_BPF_FUNC_ALU_ZAP 0x68 +#define SW64_BPF_FUNC_ALU_ZAPNOT 0x69 +#define SW64_BPF_FUNC_ALU_SEXTB 0x6A +#define SW64_BPF_FUNC_ALU_SEXTH 0x6B + +/* special instuction used in jit_fill_hole() */ +#define SW64_BPF_ILLEGAL_INSN (0x1ff00000) /* pri_ret/b $31 */ + +enum sw64_bpf_registers { + SW64_BPF_REG_V0 = 0, /* keep return value */ + SW64_BPF_REG_T0 = 1, + SW64_BPF_REG_T1 = 2, + SW64_BPF_REG_T2 = 3, + SW64_BPF_REG_T3 = 4, + SW64_BPF_REG_T4 = 5, + SW64_BPF_REG_T5 = 6, + SW64_BPF_REG_T6 = 7, + SW64_BPF_REG_T7 = 8, + SW64_BPF_REG_S0 = 9, /* callee saved */ + SW64_BPF_REG_S1 = 10, /* callee saved */ + SW64_BPF_REG_S2 = 11, /* callee saved */ + SW64_BPF_REG_S3 = 12, /* callee saved */ + SW64_BPF_REG_S4 = 13, /* callee saved */ + SW64_BPF_REG_S5 = 14, /* callee saved */ + SW64_BPF_REG_S6 = 15, /* callee saved */ + SW64_BPF_REG_FP = 15, /* frame pointer if necessary */ + SW64_BPF_REG_A0 = 16, /* argument 0 */ + SW64_BPF_REG_A1 = 17, /* argument 1 */ + SW64_BPF_REG_A2 = 18, /* argument 2 */ + SW64_BPF_REG_A3 = 19, /* argument 3 */ + SW64_BPF_REG_A4 = 20, /* argument 4 */ + SW64_BPF_REG_A5 = 21, /* argument 5 */ + SW64_BPF_REG_T8 = 22, + SW64_BPF_REG_T9 = 23, + SW64_BPF_REG_T10 = 24, + SW64_BPF_REG_T11 = 25, + SW64_BPF_REG_RA = 26, /* callee saved, keep retuen address */ + SW64_BPF_REG_T12 = 27, + SW64_BPF_REG_PV = 27, + SW64_BPF_REG_AT = 28, /* reserved by assembler */ + SW64_BPF_REG_GP = 29, /* global pointer */ + SW64_BPF_REG_SP = 30, /* callee saved, stack pointer */ + SW64_BPF_REG_ZR = 31 /* read 0 */ +}; + +/* SW64 load and store instructions */ +#define SW64_BPF_LDBU(dst, rb, offset16) \ + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDBU, dst, rb, offset16) +#define SW64_BPF_LDHU(dst, rb, offset16) \ + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDHU, dst, rb, offset16) +#define SW64_BPF_LDW(dst, rb, offset16) \ + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDW, dst, rb, offset16) +#define SW64_BPF_LDL(dst, rb, offset16) \ + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDL, dst, rb, offset16) +#define SW64_BPF_STB(src, rb, offset16) \ + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_STB, src, rb, offset16) +#define SW64_BPF_STH(src, rb, offset16) \ + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_STH, src, rb, offset16) +#define SW64_BPF_STW(src, rb, offset16) \ + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_STW, src, rb, offset16) +#define SW64_BPF_STL(src, rb, offset16) \ + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_STL, src, rb, offset16) +#define SW64_BPF_LDI(dst, rb, imm16) \ + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDI, dst, rb, imm16) +#define SW64_BPF_LDIH(dst, rb, imm16) \ + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDIH, dst, rb, imm16) + +/* SW64 lock instructions */ +#define SW64_BPF_LLDW(ra, rb, offset16) \ + sw64_bpf_gen_format_ls_func(SW64_BPF_OPCODE_LOCK, \ + ra, rb, offset16, SW64_BPF_FUNC_LOCK_LLDW) +#define SW64_BPF_LLDL(ra, rb, offset16) \ + sw64_bpf_gen_format_ls_func(SW64_BPF_OPCODE_LOCK, \ + ra, rb, offset16, SW64_BPF_FUNC_LOCK_LLDL) +#define SW64_BPF_LSTW(ra, rb, offset16) \ + sw64_bpf_gen_format_ls_func(SW64_BPF_OPCODE_LOCK, \ + ra, rb, offset16, SW64_BPF_FUNC_LOCK_LSTW) +#define SW64_BPF_LSTL(ra, rb, offset16) \ + sw64_bpf_gen_format_ls_func(SW64_BPF_OPCODE_LOCK, \ + ra, rb, offset16, SW64_BPF_FUNC_LOCK_LSTL) +#define SW64_BPF_RD_F(ra) \ + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_MISC, \ + ra, SW64_BPF_REG_ZR, SW64_BPF_FUNC_MISC_RD_F) +#define SW64_BPF_WR_F(ra) \ + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_MISC, \ + ra, SW64_BPF_REG_ZR, SW64_BPF_FUNC_MISC_WR_F) + +/* SW64 ALU instructions REG format */ +#define SW64_BPF_ADDW_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_ADDW) +#define SW64_BPF_ADDL_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_ADDL) +#define SW64_BPF_SUBW_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_SUBW) +#define SW64_BPF_SUBL_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_SUBL) +#define SW64_BPF_MULW_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_MULW) +#define SW64_BPF_MULL_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_MULL) +#define SW64_BPF_ZAP_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_ZAP) +#define SW64_BPF_ZAPNOT_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_ZAPNOT) +#define SW64_BPF_SEXTB_REG(rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + SW64_BPF_REG_ZR, rb, dst, SW64_BPF_FUNC_ALU_SEXTB) +#define SW64_BPF_SEXTH_REG(rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + SW64_BPF_REG_ZR, rb, dst, SW64_BPF_FUNC_ALU_SEXTH) + +/* SW64 ALU instructions IMM format */ +#define SW64_BPF_ADDW_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_ADDW) +#define SW64_BPF_ADDL_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_ADDL) +#define SW64_BPF_SUBW_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_SUBW) +#define SW64_BPF_SUBL_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_SUBL) +#define SW64_BPF_MULW_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_MULW) +#define SW64_BPF_MULL_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_MULL) +#define SW64_BPF_ZAP_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_ZAP) +#define SW64_BPF_ZAPNOT_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_ZAPNOT) +#define SW64_BPF_SEXTB_IMM(imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + SW64_BPF_REG_ZR, imm8, dst, SW64_BPF_FUNC_ALU_SEXTB) +#define SW64_BPF_SEXTH_IMM(imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + SW64_BPF_REG_ZR, imm8, dst, SW64_BPF_FUNC_ALU_SEXTH) + +/* SW64 bit shift instructions REG format */ +#define SW64_BPF_SLL_REG(src, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + src, rb, dst, SW64_BPF_FUNC_ALU_SLL) +#define SW64_BPF_SRL_REG(src, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + src, rb, dst, SW64_BPF_FUNC_ALU_SRL) +#define SW64_BPF_SRA_REG(src, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + src, rb, dst, SW64_BPF_FUNC_ALU_SRA) + +/* SW64 bit shift instructions IMM format */ +#define SW64_BPF_SLL_IMM(src, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + src, imm8, dst, SW64_BPF_FUNC_ALU_SLL) +#define SW64_BPF_SRL_IMM(src, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + src, imm8, dst, SW64_BPF_FUNC_ALU_SRL) +#define SW64_BPF_SRA_IMM(src, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + src, imm8, dst, SW64_BPF_FUNC_ALU_SRA) + +/* SW64 control instructions */ +#define SW64_BPF_CALL(ra, rb) \ + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_CALL, ra, rb, 0) +#define SW64_BPF_RET(rb) \ + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_RET, SW64_BPF_REG_ZR, rb, 0) +#define SW64_BPF_JMP(ra, rb) \ + sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_JMP, ra, rb, 0) +#define SW64_BPF_BR(ra, offset) \ + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR, ra, offset) +#define SW64_BPF_BSR(ra, offset) \ + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BSR, ra, offset) +#define SW64_BPF_BEQ(ra, offset) \ + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BEQ, ra, offset) +#define SW64_BPF_BNE(ra, offset) \ + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BNE, ra, offset) +#define SW64_BPF_BLT(ra, offset) \ + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BLT, ra, offset) +#define SW64_BPF_BLE(ra, offset) \ + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BLE, ra, offset) +#define SW64_BPF_BGT(ra, offset) \ + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BGT, ra, offset) +#define SW64_BPF_BGE(ra, offset) \ + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BGE, ra, offset) +#define SW64_BPF_BLBC(ra, offset) \ + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BLBC, ra, offset) +#define SW64_BPF_BLBS(ra, offset) \ + sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BLBS, ra, offset) + +/* SW64 bit logic instructions REG format */ +#define SW64_BPF_AND_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_AND) +#define SW64_BPF_ANDNOT_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_BIC) +#define SW64_BPF_BIS_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_BIS) +#define SW64_BPF_ORNOT_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_ORNOT) +#define SW64_BPF_XOR_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_XOR) +#define SW64_BPF_EQV_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_EQV) + +/* SW64 bit logic instructions IMM format */ +#define SW64_BPF_AND_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_AND) +#define SW64_BPF_ANDNOT_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_BIC) +#define SW64_BPF_BIS_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_BIS) +#define SW64_BPF_ORNOT_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_ORNOT) +#define SW64_BPF_XOR_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_XOR) +#define SW64_BPF_EQV_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_EQV) + +/* SW64 compare instructions REG format */ +#define SW64_BPF_CMPEQ_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_CMPEQ) +#define SW64_BPF_CMPLT_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_CMPLT) +#define SW64_BPF_CMPLE_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_CMPLE) +#define SW64_BPF_CMPULT_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_CMPULT) +#define SW64_BPF_CMPULE_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_CMPULE) + +/* SW64 compare instructions imm format */ +#define SW64_BPF_CMPEQ_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPEQ) +#define SW64_BPF_CMPLT_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPLT) +#define SW64_BPF_CMPLE_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPLE) +#define SW64_BPF_CMPULT_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPULT) +#define SW64_BPF_CMPULE_IMM(ra, imm8, dst) \ + sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \ + ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPULE) + +#endif /* _SW64_NET_BPF_JIT_H */ diff --git a/arch/sw_64/net/bpf_jit_comp.c b/arch/sw_64/net/bpf_jit_comp.c new file mode 100644 index 0000000000000000000000000000000000000000..31202dd0f9cf8dd8fd51d0d30c94ea422d74c8b7 --- /dev/null +++ b/arch/sw_64/net/bpf_jit_comp.c @@ -0,0 +1,1455 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * BPF JIT compiler for SW64 + * + * Copyright (C) Mao Minkai + * Author: Mao Minkai + * + * This file is taken from arch/arm64/net/bpf_jit_comp.c + * Copyright (C) 2014-2016 Zi Shen Lim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include + +#include "bpf_jit.h" + +#define TCALL_CNT (MAX_BPF_JIT_REG + 0) + +static const int bpf2sw64[] = { + /* return value from in-kernel function, and exit value from eBPF */ + [BPF_REG_0] = SW64_BPF_REG_V0, + /* arguments from eBPF program to in-kernel function */ + [BPF_REG_1] = SW64_BPF_REG_A0, + [BPF_REG_2] = SW64_BPF_REG_A1, + [BPF_REG_3] = SW64_BPF_REG_A2, + [BPF_REG_4] = SW64_BPF_REG_A3, + [BPF_REG_5] = SW64_BPF_REG_A4, + /* callee saved registers that in-kernel function will preserve */ + [BPF_REG_6] = SW64_BPF_REG_S0, + [BPF_REG_7] = SW64_BPF_REG_S1, + [BPF_REG_8] = SW64_BPF_REG_S2, + [BPF_REG_9] = SW64_BPF_REG_S3, + /* read-only frame pointer to access stack */ + [BPF_REG_FP] = SW64_BPF_REG_FP, + /* tail_call_cnt */ + [TCALL_CNT] = SW64_BPF_REG_S4, + /* temporary register for blinding constants */ + [BPF_REG_AX] = SW64_BPF_REG_T11, +}; + +struct jit_ctx { + const struct bpf_prog *prog; + int idx; // JITed instruction index + int current_tmp_reg; + int epilogue_offset; + int *insn_offset; // [bpf_insn_idx] = jited_insn_idx + int exentry_idx; + u32 *image; // JITed instruction + u32 stack_size; +}; + +struct sw64_jit_data { + struct bpf_binary_header *header; + u8 *image; // bpf instruction + struct jit_ctx ctx; +}; + +static inline u32 sw64_bpf_gen_format_br(int opcode, enum sw64_bpf_registers ra, u32 disp) +{ + opcode = opcode << SW64_BPF_OPCODE_OFFSET; + ra = ra << SW64_BPF_RA_OFFSET; + return opcode | ra | (disp & 0x1fffff); +} + +static inline u32 sw64_bpf_gen_format_ls(int opcode, enum sw64_bpf_registers ra, + enum sw64_bpf_registers rb, u16 disp) +{ + opcode = opcode << SW64_BPF_OPCODE_OFFSET; + ra = ra << SW64_BPF_RA_OFFSET; + rb = rb << SW64_BPF_RB_OFFSET; + return opcode | ra | rb | (disp & 0xffff); +} + +static inline u32 sw64_bpf_gen_format_ls_func(int opcode, enum sw64_bpf_registers ra, + enum sw64_bpf_registers rb, u16 disp, int function) +{ + opcode = opcode << SW64_BPF_OPCODE_OFFSET; + ra = ra << SW64_BPF_RA_OFFSET; + rb = rb << SW64_BPF_RB_OFFSET; + function = function << SW64_BPF_LS_FUNC_OFFSET; + return opcode | ra | rb | function | (disp & 0xfff); +} + +static inline u32 sw64_bpf_gen_format_simple_alu_reg(int opcode, enum sw64_bpf_registers ra, + enum sw64_bpf_registers rb, enum sw64_bpf_registers rc, int function) +{ + opcode = opcode << SW64_BPF_OPCODE_OFFSET; + ra = ra << SW64_BPF_RA_OFFSET; + rb = rb << SW64_BPF_RB_OFFSET; + rc = rc << SW64_BPF_SIMPLE_ALU_RC_OFFSET; + function = function << SW64_BPF_SIMPLE_ALU_FUNC_OFFSET; + return opcode | ra | rb | function | rc; +} + +static inline u32 sw64_bpf_gen_format_simple_alu_imm(int opcode, enum sw64_bpf_registers ra, + u32 imm, enum sw64_bpf_registers rc, int function) +{ + opcode = opcode << SW64_BPF_OPCODE_OFFSET; + ra = ra << SW64_BPF_RA_OFFSET; + imm = (imm & 0xff) << SW64_BPF_SIMPLE_ALU_IMM_OFFSET; + rc = rc << SW64_BPF_SIMPLE_ALU_RC_OFFSET; + function = function << SW64_BPF_SIMPLE_ALU_FUNC_OFFSET; + return opcode | ra | imm | function | rc; +} + +static inline void emit(const u32 insn, struct jit_ctx *ctx) +{ + if (ctx->image != NULL) + ctx->image[ctx->idx] = insn; + + ctx->idx++; +} + +static inline int get_tmp_reg(struct jit_ctx *ctx) +{ + ctx->current_tmp_reg++; + /* Do not use 22-25. Should be more than enough. */ + if (unlikely(ctx->current_tmp_reg == 8)) { + pr_err("eBPF JIT %s[%d]: not enough temporary registers!\n", + current->comm, current->pid); + return -1; + } + return ctx->current_tmp_reg; +} + +static inline void put_tmp_reg(struct jit_ctx *ctx) +{ + ctx->current_tmp_reg--; + if (ctx->current_tmp_reg == 21) + ctx->current_tmp_reg = 7; +} + +static void emit_sw64_ldu32(const int dst, const u32 imm, struct jit_ctx *ctx) +{ + u16 imm_tmp; + u8 reg_tmp = get_tmp_reg(ctx); + + if (!imm) { + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, dst), ctx); + put_tmp_reg(ctx); + return; + } + + if (imm <= S16_MAX) { + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx); + put_tmp_reg(ctx); + return; + } + + if (imm >= U32_MAX - S16_MAX) { + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + put_tmp_reg(ctx); + return; + } + + imm_tmp = (imm >> 30) & 3; + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm_tmp), ctx); + if (imm_tmp) + emit(SW64_BPF_SLL_IMM(dst, 30, dst), ctx); + + imm_tmp = (imm >> 15) & 0x7fff; + if (imm_tmp) { + emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); + emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx); + emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); + } + + imm_tmp = imm & 0x7fff; + if (imm_tmp) + emit(SW64_BPF_LDI(dst, dst, imm_tmp), ctx); + + put_tmp_reg(ctx); +} + +static void emit_sw64_lds32(const int dst, const s32 imm, struct jit_ctx *ctx) +{ + s16 hi = imm >> 16; + s16 lo = imm & 0xffff; + u8 reg_tmp = get_tmp_reg(ctx); + + if (!imm) { + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, dst), ctx); + put_tmp_reg(ctx); + return; + } + + if (imm >= S16_MIN && imm <= S16_MAX) { + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx); + put_tmp_reg(ctx); + return; + } + + emit(SW64_BPF_LDIH(dst, SW64_BPF_REG_ZR, hi), ctx); + if (lo & 0x8000) { // sign bit is 1 + lo = lo & 0x7fff; + emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, 1), ctx); + emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx); + emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); + if (lo) + emit(SW64_BPF_LDI(dst, dst, lo), ctx); + } else { // sign bit is 0 + if (lo) + emit(SW64_BPF_LDI(dst, dst, lo), ctx); + } + + put_tmp_reg(ctx); +} + +static void emit_sw64_ldu64(const int dst, const u64 imm, struct jit_ctx *ctx) +{ + u16 imm_tmp; + u8 reg_tmp = get_tmp_reg(ctx); + + if (!imm) { + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, dst), ctx); + put_tmp_reg(ctx); + return; + } + + if (imm <= U32_MAX) { + put_tmp_reg(ctx); + return emit_sw64_ldu32(dst, (u32)imm, ctx); + } + + if (imm >= (U64_MAX - S16_MAX) || imm <= S16_MAX) { + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx); + put_tmp_reg(ctx); + return; + } + + imm_tmp = (imm >> 60) & 0xf; + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm_tmp), ctx); + if (imm_tmp) + emit(SW64_BPF_SLL_IMM(dst, 60, dst), ctx); + + imm_tmp = (imm >> 45) & 0x7fff; + if (imm_tmp) { + emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); + emit(SW64_BPF_SLL_IMM(reg_tmp, 45, reg_tmp), ctx); + emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); + } + + imm_tmp = (imm >> 30) & 0x7fff; + if (imm_tmp) { + emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); + emit(SW64_BPF_SLL_IMM(reg_tmp, 30, reg_tmp), ctx); + emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); + } + + imm_tmp = (imm >> 15) & 0x7fff; + if (imm_tmp) { + emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx); + emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx); + emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx); + } + + imm_tmp = imm & 0x7fff; + if (imm_tmp) + emit(SW64_BPF_LDI(dst, dst, imm_tmp), ctx); + + put_tmp_reg(ctx); +} + +/* Do not change!!! See arch/sw_64/lib/divide.S for more detail */ +#define REG(x) "$"str(x) +#define str(x) #x +#define DIV_RET_ADDR 23 +#define DIVIDEND 24 +#define DIVISOR 25 +#define RESULT 27 + +#include +static void emit_sw64_divmod(const int dst, const int src, struct jit_ctx *ctx, u8 code) +{ + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, dst, DIVIDEND), ctx); + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, src, DIVISOR), ctx); + switch (BPF_CLASS(code)) { + case BPF_ALU: + switch (BPF_OP(code)) { + case BPF_DIV: + emit_sw64_ldu64(SW64_BPF_REG_PV, (u64)__divwu, ctx); + break; + case BPF_MOD: + emit_sw64_ldu64(SW64_BPF_REG_PV, (u64)__remwu, ctx); + break; + } + emit(SW64_BPF_CALL(DIV_RET_ADDR, SW64_BPF_REG_PV), ctx); + emit(SW64_BPF_ZAP_IMM(RESULT, 0xf0, dst), ctx); + break; + case BPF_ALU64: + switch (BPF_OP(code)) { + case BPF_DIV: + emit_sw64_ldu64(SW64_BPF_REG_PV, (u64)__divlu, ctx); + break; + case BPF_MOD: + emit_sw64_ldu64(SW64_BPF_REG_PV, (u64)__remlu, ctx); + break; + } + emit(SW64_BPF_CALL(DIV_RET_ADDR, SW64_BPF_REG_PV), ctx); + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, RESULT, dst), ctx); + break; + } +} + +#undef REG +#undef str +#undef DIVIDEND +#undef DIVISOR +#undef RESULT + +/* STX XADD: lock *(u32 *)(dst + off) += src */ +static void emit_sw64_xadd32(const int src, int dst, s16 off, struct jit_ctx *ctx) +{ + int atomic_start; + int atomic_end; + u8 tmp1 = get_tmp_reg(ctx); + u8 tmp2 = get_tmp_reg(ctx); + u8 tmp3 = get_tmp_reg(ctx); + + if (off < -0x800 || off > 0x7ff) { + emit(SW64_BPF_LDI(tmp1, dst, off), ctx); + dst = tmp1; + off = 0; + } + + atomic_start = ctx->idx; + emit(SW64_BPF_LLDW(tmp2, dst, off), ctx); + emit(SW64_BPF_LDI(tmp3, SW64_BPF_REG_ZR, 1), ctx); + emit(SW64_BPF_WR_F(tmp3), ctx); + emit(SW64_BPF_ADDW_REG(tmp2, src, tmp2), ctx); + if (ctx->idx & 1) + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx); + emit(SW64_BPF_LSTW(tmp2, dst, off), ctx); + emit(SW64_BPF_RD_F(tmp3), ctx); + atomic_end = ctx->idx; + emit(SW64_BPF_BEQ(tmp3, atomic_start - atomic_end - 1), ctx); + + put_tmp_reg(ctx); + put_tmp_reg(ctx); + put_tmp_reg(ctx); +} + +/* STX XADD: lock *(u64 *)(dst + off) += src */ +static void emit_sw64_xadd64(const int src, int dst, s16 off, struct jit_ctx *ctx) +{ + int atomic_start; + int atomic_end; + u8 tmp1 = get_tmp_reg(ctx); + u8 tmp2 = get_tmp_reg(ctx); + u8 tmp3 = get_tmp_reg(ctx); + + if (off < -0x800 || off > 0x7ff) { + emit(SW64_BPF_LDI(tmp1, dst, off), ctx); + dst = tmp1; + off = 0; + } + + atomic_start = ctx->idx; + emit(SW64_BPF_LLDL(tmp2, dst, off), ctx); + emit(SW64_BPF_LDI(tmp3, SW64_BPF_REG_ZR, 1), ctx); + emit(SW64_BPF_WR_F(tmp3), ctx); + emit(SW64_BPF_ADDL_REG(tmp2, src, tmp2), ctx); + if (ctx->idx & 1) + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx); + emit(SW64_BPF_LSTL(tmp2, dst, off), ctx); + emit(SW64_BPF_RD_F(tmp3), ctx); + atomic_end = ctx->idx; + emit(SW64_BPF_BEQ(tmp3, atomic_start - atomic_end - 1), ctx); + + put_tmp_reg(ctx); + put_tmp_reg(ctx); + put_tmp_reg(ctx); +} + +static void emit_sw64_htobe16(const int dst, struct jit_ctx *ctx) +{ + u8 tmp = get_tmp_reg(ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x2, tmp), ctx); + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x1, dst), ctx); + emit(SW64_BPF_SRL_IMM(tmp, 8, tmp), ctx); + emit(SW64_BPF_SLL_IMM(dst, 8, dst), ctx); + emit(SW64_BPF_BIS_REG(dst, tmp, dst), ctx); + + put_tmp_reg(ctx); +} + +static void emit_sw64_htobe32(const int dst, struct jit_ctx *ctx) +{ + u8 tmp1 = get_tmp_reg(ctx); + u8 tmp2 = get_tmp_reg(ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x8, tmp1), ctx); + emit(SW64_BPF_SRL_IMM(tmp1, 24, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x4, tmp1), ctx); + emit(SW64_BPF_SRL_IMM(tmp1, 8, tmp1), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x2, tmp1), ctx); + emit(SW64_BPF_SLL_IMM(tmp1, 8, tmp1), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x1, dst), ctx); + emit(SW64_BPF_SLL_IMM(dst, 24, dst), ctx); + emit(SW64_BPF_BIS_REG(dst, tmp2, dst), ctx); + + put_tmp_reg(ctx); + put_tmp_reg(ctx); +} + +static void emit_sw64_htobe64(const int dst, struct jit_ctx *ctx) +{ + u8 tmp1 = get_tmp_reg(ctx); + u8 tmp2 = get_tmp_reg(ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x80, tmp1), ctx); + emit(SW64_BPF_SRL_IMM(tmp1, 56, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x40, tmp1), ctx); + emit(SW64_BPF_SRL_IMM(tmp1, 40, tmp1), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x20, tmp1), ctx); + emit(SW64_BPF_SRL_IMM(tmp1, 24, tmp1), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x10, tmp1), ctx); + emit(SW64_BPF_SRL_IMM(tmp1, 8, tmp1), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x08, tmp1), ctx); + emit(SW64_BPF_SLL_IMM(tmp1, 8, tmp1), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x04, tmp1), ctx); + emit(SW64_BPF_SLL_IMM(tmp1, 24, tmp1), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x02, tmp1), ctx); + emit(SW64_BPF_SLL_IMM(tmp1, 40, tmp1), ctx); + emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx); + + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x01, dst), ctx); + emit(SW64_BPF_SLL_IMM(dst, 56, dst), ctx); + emit(SW64_BPF_BIS_REG(dst, tmp2, dst), ctx); + + put_tmp_reg(ctx); + put_tmp_reg(ctx); +} + +static void jit_fill_hole(void *area, unsigned int size) +{ + unsigned long c = SW64_BPF_ILLEGAL_INSN; + + c |= c << 32; + __constant_c_memset(area, c, size); +} + +static int offset_to_epilogue(const struct jit_ctx *ctx); +static int bpf2sw64_offset(int bpf_idx, s32 off, const struct jit_ctx *ctx) +{ + int from = ctx->insn_offset[bpf_idx + 1]; + int to = ctx->insn_offset[bpf_idx + 1 + off]; + + if (ctx->image == NULL) + return 0; + + return to - from; +} + +static int offset_to_epilogue(const struct jit_ctx *ctx) +{ + if (ctx->image == NULL) + return 0; + + return ctx->epilogue_offset - ctx->idx; +} + +/* For tail call, jump to set up function call stack */ +#define PROLOGUE_OFFSET 11 + +static void build_prologue(struct jit_ctx *ctx, bool was_classic) +{ + const u8 r6 = bpf2sw64[BPF_REG_6]; + const u8 r7 = bpf2sw64[BPF_REG_7]; + const u8 r8 = bpf2sw64[BPF_REG_8]; + const u8 r9 = bpf2sw64[BPF_REG_9]; + const u8 fp = bpf2sw64[BPF_REG_FP]; + const u8 tcc = bpf2sw64[TCALL_CNT]; + + /* Save callee-saved registers */ + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, -64), ctx); + emit(SW64_BPF_STL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 0), ctx); + emit(SW64_BPF_STL(fp, SW64_BPF_REG_SP, 8), ctx); + emit(SW64_BPF_STL(r6, SW64_BPF_REG_SP, 16), ctx); + emit(SW64_BPF_STL(r7, SW64_BPF_REG_SP, 24), ctx); + emit(SW64_BPF_STL(r8, SW64_BPF_REG_SP, 32), ctx); + emit(SW64_BPF_STL(r9, SW64_BPF_REG_SP, 40), ctx); + emit(SW64_BPF_STL(tcc, SW64_BPF_REG_SP, 48), ctx); + emit(SW64_BPF_STL(SW64_BPF_REG_GP, SW64_BPF_REG_SP, 56), ctx); + + /* Set up BPF prog stack base register */ + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_SP, fp), ctx); + if (!was_classic) + /* Initialize tail_call_cnt */ + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, tcc), ctx); + + /* Set up function call stack */ + ctx->stack_size = (ctx->prog->aux->stack_depth + 15) & (~15); + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, -ctx->stack_size), ctx); +} + +static void build_epilogue(struct jit_ctx *ctx) +{ + const u8 r6 = bpf2sw64[BPF_REG_6]; + const u8 r7 = bpf2sw64[BPF_REG_7]; + const u8 r8 = bpf2sw64[BPF_REG_8]; + const u8 r9 = bpf2sw64[BPF_REG_9]; + const u8 fp = bpf2sw64[BPF_REG_FP]; + const u8 tcc = bpf2sw64[TCALL_CNT]; + + /* Destroy function call stack */ + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, ctx->stack_size), ctx); + + /* Restore callee-saved registers */ + emit(SW64_BPF_LDL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 0), ctx); + emit(SW64_BPF_LDL(fp, SW64_BPF_REG_SP, 8), ctx); + emit(SW64_BPF_LDL(r6, SW64_BPF_REG_SP, 16), ctx); + emit(SW64_BPF_LDL(r7, SW64_BPF_REG_SP, 24), ctx); + emit(SW64_BPF_LDL(r8, SW64_BPF_REG_SP, 32), ctx); + emit(SW64_BPF_LDL(r9, SW64_BPF_REG_SP, 40), ctx); + emit(SW64_BPF_LDL(tcc, SW64_BPF_REG_SP, 48), ctx); + emit(SW64_BPF_LDL(SW64_BPF_REG_GP, SW64_BPF_REG_SP, 56), ctx); + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, 64), ctx); + + /* Return */ + emit(SW64_BPF_RET(SW64_BPF_REG_RA), ctx); +} + +static int emit_bpf_tail_call(struct jit_ctx *ctx) +{ + /* bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) */ + const u8 r2 = bpf2sw64[BPF_REG_2]; /* struct bpf_array *array */ + const u8 r3 = bpf2sw64[BPF_REG_3]; /* u32 index */ + + const u8 tmp = get_tmp_reg(ctx); + const u8 prg = get_tmp_reg(ctx); + const u8 tcc = bpf2sw64[TCALL_CNT]; + u64 offset; + static int out_idx; +#define out_offset (ctx->image ? (out_idx - ctx->idx - 1) : 0) + + /* if (index >= array->map.max_entries) + * goto out; + */ + offset = offsetof(struct bpf_array, map.max_entries); + emit_sw64_ldu64(tmp, offset, ctx); + emit(SW64_BPF_ADDL_REG(r2, tmp, tmp), ctx); /* tmp = r2 + tmp = &map.max_entries */ + emit(SW64_BPF_LDW(tmp, tmp, 0), ctx); /* tmp = *tmp = map.max_entries */ + emit(SW64_BPF_ZAP_IMM(tmp, 0xf0, tmp), ctx); /* map.max_entries is u32 */ + emit(SW64_BPF_ZAP_IMM(r3, 0xf0, r3), ctx); /* index is u32 */ + emit(SW64_BPF_CMPULE_REG(tmp, r3, tmp), ctx); + emit(SW64_BPF_BNE(tmp, out_offset), ctx); + + /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) + * goto out; + * tail_call_cnt++; + */ + emit_sw64_ldu64(tmp, MAX_TAIL_CALL_CNT, ctx); + emit(SW64_BPF_CMPULT_REG(tmp, tcc, tmp), ctx); + emit(SW64_BPF_BNE(tmp, out_offset), ctx); + emit(SW64_BPF_ADDL_IMM(tcc, 1, tcc), ctx); + + /* prog = array->ptrs[index]; + * if (prog == NULL) + * goto out; + */ + offset = offsetof(struct bpf_array, ptrs); + emit_sw64_ldu64(tmp, offset, ctx); + emit(SW64_BPF_ADDL_REG(r2, tmp, tmp), ctx); /* tmp = r2 + tmp = &ptrs[0] */ + emit(SW64_BPF_SLL_IMM(r3, 3, prg), ctx); /* prg = r3 * 8, each entry is a pointer */ + emit(SW64_BPF_ADDL_REG(tmp, prg, prg), ctx); /* prg = tmp + prg = &ptrs[index] */ + emit(SW64_BPF_LDL(prg, prg, 0), ctx); /* prg = *prg = ptrs[index] = prog */ + emit(SW64_BPF_BEQ(prg, out_offset), ctx); + + /* goto *(prog->bpf_func + prologue_offset); */ + offset = offsetof(struct bpf_prog, bpf_func); + emit_sw64_ldu64(tmp, offset, ctx); + emit(SW64_BPF_ADDL_REG(prg, tmp, tmp), ctx); /* tmp = prg + tmp = &bpf_func */ + emit(SW64_BPF_LDL(tmp, tmp, 0), ctx); /* tmp = *tmp = bpf_func */ + emit(SW64_BPF_BEQ(tmp, out_offset), ctx); + emit(SW64_BPF_LDI(tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx); + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, ctx->stack_size), ctx); + emit(SW64_BPF_JMP(SW64_BPF_REG_ZR, tmp), ctx); + + put_tmp_reg(ctx); + put_tmp_reg(ctx); + + /* out */ + if (ctx->image == NULL) + out_idx = ctx->idx; + if (ctx->image != NULL && out_idx <= 0) + return -1; +#undef out_offset + return 0; +} + +/* For accesses to BTF pointers, add an entry to the exception table */ +static int add_exception_handler(const struct bpf_insn *insn, + struct jit_ctx *ctx, + int dst_reg) +{ + off_t offset; + unsigned long pc; + struct exception_table_entry *ex; + + if (!ctx->image) + /* First pass */ + return 0; + + if (!ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM) + return 0; + + if (WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries)) + return -EINVAL; + + ex = &ctx->prog->aux->extable[ctx->exentry_idx]; + pc = (unsigned long)&ctx->image[ctx->idx - 1]; + + offset = (long)&ex->insn - pc; + ex->insn = offset; + + ex->fixup.bits.nextinsn = sizeof(u32); + ex->fixup.bits.valreg = dst_reg; + ex->fixup.bits.errreg = SW64_BPF_REG_ZR; + + ctx->exentry_idx++; + return 0; +} + +/* JITs an eBPF instruction. + * Returns: + * 0 - successfully JITed an 8-byte eBPF instruction. + * >0 - successfully JITed a 16-byte eBPF instruction. + * <0 - failed to JIT. + */ +static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) +{ + const u8 code = insn->code; + u8 dst = bpf2sw64[insn->dst_reg]; + u8 src = bpf2sw64[insn->src_reg]; + const u8 tmp1 __maybe_unused = get_tmp_reg(ctx); + const u8 tmp2 __maybe_unused = get_tmp_reg(ctx); + const s16 off = insn->off; + const s32 imm = insn->imm; + const int bpf_idx = insn - ctx->prog->insnsi; + s32 jmp_offset; + u64 func; + struct bpf_insn insn1; + u64 imm64; + int ret; + + switch (code) { + case BPF_ALU | BPF_MOV | BPF_X: + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_ALU64 | BPF_MOV | BPF_X: + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, src, dst), ctx); + break; + case BPF_ALU | BPF_ADD | BPF_X: + emit(SW64_BPF_ADDW_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_ALU64 | BPF_ADD | BPF_X: + emit(SW64_BPF_ADDL_REG(dst, src, dst), ctx); + break; + case BPF_ALU | BPF_SUB | BPF_X: + emit(SW64_BPF_SUBW_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_ALU64 | BPF_SUB | BPF_X: + emit(SW64_BPF_SUBL_REG(dst, src, dst), ctx); + break; + case BPF_ALU | BPF_MUL | BPF_X: + emit(SW64_BPF_MULW_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_ALU64 | BPF_MUL | BPF_X: + emit(SW64_BPF_MULL_REG(dst, src, dst), ctx); + break; + case BPF_ALU | BPF_DIV | BPF_X: + emit_sw64_divmod(dst, src, ctx, code); + break; + case BPF_ALU64 | BPF_DIV | BPF_X: + emit_sw64_divmod(dst, src, ctx, code); + break; + case BPF_ALU | BPF_MOD | BPF_X: + emit_sw64_divmod(dst, src, ctx, code); + break; + case BPF_ALU64 | BPF_MOD | BPF_X: + emit_sw64_divmod(dst, src, ctx, code); + break; + case BPF_ALU | BPF_LSH | BPF_X: + emit(SW64_BPF_SLL_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_ALU64 | BPF_LSH | BPF_X: + emit(SW64_BPF_SLL_REG(dst, src, dst), ctx); + break; + case BPF_ALU | BPF_RSH | BPF_X: + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + fallthrough; + case BPF_ALU64 | BPF_RSH | BPF_X: + emit(SW64_BPF_SRL_REG(dst, src, dst), ctx); + break; + case BPF_ALU | BPF_ARSH | BPF_X: + emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, dst, dst), ctx); + emit(SW64_BPF_SRA_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_ALU64 | BPF_ARSH | BPF_X: + emit(SW64_BPF_SRA_REG(dst, src, dst), ctx); + break; + case BPF_ALU | BPF_AND | BPF_X: + emit(SW64_BPF_AND_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_ALU64 | BPF_AND | BPF_X: + emit(SW64_BPF_AND_REG(dst, src, dst), ctx); + break; + case BPF_ALU | BPF_OR | BPF_X: + emit(SW64_BPF_BIS_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_ALU64 | BPF_OR | BPF_X: + emit(SW64_BPF_BIS_REG(dst, src, dst), ctx); + break; + case BPF_ALU | BPF_XOR | BPF_X: + emit(SW64_BPF_XOR_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_ALU64 | BPF_XOR | BPF_X: + emit(SW64_BPF_XOR_REG(dst, src, dst), ctx); + break; + case BPF_ALU | BPF_NEG: + emit(SW64_BPF_SUBW_REG(SW64_BPF_REG_ZR, dst, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_ALU64 | BPF_NEG: + emit(SW64_BPF_SUBL_REG(SW64_BPF_REG_ZR, dst, dst), ctx); + break; + case BPF_ALU | BPF_END | BPF_TO_LE: + switch (imm) { + case 16: + emit(SW64_BPF_ZAPNOT_IMM(dst, 0x3, dst), ctx); + break; + case 32: + emit(SW64_BPF_ZAPNOT_IMM(dst, 0xf, dst), ctx); + break; + case 64: + break; + default: + pr_err("eBPF JIT %s[%d]: BPF_TO_LE unknown size\n", + current->comm, current->pid); + return -EINVAL; + } + break; + case BPF_ALU | BPF_END | BPF_TO_BE: + switch (imm) { + case 16: + emit_sw64_htobe16(dst, ctx); + break; + case 32: + emit_sw64_htobe32(dst, ctx); + break; + case 64: + emit_sw64_htobe64(dst, ctx); + break; + default: + pr_err("eBPF JIT %s[%d]: BPF_TO_BE unknown size\n", + current->comm, current->pid); + return -EINVAL; + } + break; + + case BPF_ALU | BPF_MOV | BPF_K: + if (imm >= S16_MIN && imm <= S16_MAX) + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx); + else + emit_sw64_ldu32(dst, imm, ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_ALU64 | BPF_MOV | BPF_K: + if (imm >= S16_MIN && imm <= S16_MAX) + emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx); + else + emit_sw64_lds32(dst, imm, ctx); + break; + case BPF_ALU | BPF_ADD | BPF_K: + if (imm >= S16_MIN && imm <= S16_MAX) { + emit(SW64_BPF_LDI(dst, dst, imm), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_ADDW_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_ALU64 | BPF_ADD | BPF_K: + if (imm >= S16_MIN && imm <= S16_MAX) { + emit(SW64_BPF_LDI(dst, dst, imm), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx); + } + break; + case BPF_ALU | BPF_SUB | BPF_K: + if (imm >= -S16_MAX && imm <= -S16_MIN) { + emit(SW64_BPF_LDI(dst, dst, -imm), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_SUBL_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_ALU64 | BPF_SUB | BPF_K: + if (imm >= -S16_MAX && imm <= -S16_MIN) { + emit(SW64_BPF_LDI(dst, dst, -imm), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_SUBL_REG(dst, tmp1, dst), ctx); + } + break; + case BPF_ALU | BPF_MUL | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_MULL_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_MULL_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_ALU64 | BPF_MUL | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_MULL_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_MULL_REG(dst, tmp1, dst), ctx); + } + break; + case BPF_ALU | BPF_DIV | BPF_K: + emit_sw64_ldu32(tmp1, imm, ctx); + emit_sw64_divmod(dst, tmp1, ctx, code); + break; + case BPF_ALU64 | BPF_DIV | BPF_K: + emit_sw64_lds32(tmp1, imm, ctx); + emit_sw64_divmod(dst, tmp1, ctx, code); + break; + case BPF_ALU | BPF_MOD | BPF_K: + emit_sw64_ldu32(tmp1, imm, ctx); + emit_sw64_divmod(dst, tmp1, ctx, code); + break; + case BPF_ALU64 | BPF_MOD | BPF_K: + emit_sw64_lds32(tmp1, imm, ctx); + emit_sw64_divmod(dst, tmp1, ctx, code); + break; + case BPF_ALU | BPF_LSH | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_SLL_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_SLL_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_ALU64 | BPF_LSH | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_SLL_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_SLL_REG(dst, tmp1, dst), ctx); + } + break; + case BPF_ALU | BPF_RSH | BPF_K: + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_SRL_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_SRL_REG(dst, tmp1, dst), ctx); + } + break; + case BPF_ALU64 | BPF_RSH | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_SRL_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_SRL_REG(dst, tmp1, dst), ctx); + } + break; + case BPF_ALU | BPF_ARSH | BPF_K: + emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, dst, dst), ctx); + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_SRA_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_SRA_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_ALU64 | BPF_ARSH | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_SRA_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_SRA_REG(dst, tmp1, dst), ctx); + } + break; + case BPF_ALU | BPF_AND | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_AND_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_AND_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_ALU64 | BPF_AND | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_AND_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_AND_REG(dst, tmp1, dst), ctx); + } + break; + case BPF_ALU | BPF_OR | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_BIS_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_BIS_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_ALU64 | BPF_OR | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_BIS_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_BIS_REG(dst, tmp1, dst), ctx); + } + break; + case BPF_ALU | BPF_XOR | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_XOR_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_ldu32(tmp1, imm, ctx); + emit(SW64_BPF_XOR_REG(dst, tmp1, dst), ctx); + } + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_ALU64 | BPF_XOR | BPF_K: + if (imm >= 0 && imm <= U8_MAX) { + emit(SW64_BPF_XOR_IMM(dst, imm, dst), ctx); + } else { + emit_sw64_lds32(tmp1, imm, ctx); + emit(SW64_BPF_XOR_REG(dst, tmp1, dst), ctx); + } + break; + + case BPF_JMP | BPF_JA: + jmp_offset = bpf2sw64_offset(bpf_idx, off, ctx); + if (jmp_offset >= -0x100000 && jmp_offset <= 0xfffff) { + emit(SW64_BPF_BR(SW64_BPF_REG_ZR, jmp_offset), ctx); + } else { + pr_err("eBPF JIT %s[%d]: BPF_JMP out of range, %d instructions\n", + current->comm, current->pid, jmp_offset); + return -EINVAL; + } + break; + + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_X: + emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, src, tmp1), ctx); + src = tmp1; + emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, dst, tmp2), ctx); + dst = tmp2; + fallthrough; + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP | BPF_JSET | BPF_X: + switch (BPF_OP(code)) { + case BPF_JEQ: + emit(SW64_BPF_CMPEQ_REG(dst, src, tmp1), ctx); + break; + case BPF_JGT: + emit(SW64_BPF_CMPULT_REG(src, dst, tmp1), ctx); + break; + case BPF_JLT: + emit(SW64_BPF_CMPULT_REG(dst, src, tmp1), ctx); + break; + case BPF_JGE: + emit(SW64_BPF_CMPULE_REG(src, dst, tmp1), ctx); + break; + case BPF_JLE: + emit(SW64_BPF_CMPULE_REG(dst, src, tmp1), ctx); + break; + case BPF_JNE: + emit(SW64_BPF_CMPEQ_REG(dst, src, tmp1), ctx); + emit(SW64_BPF_XOR_IMM(tmp1, 1, tmp1), ctx); + break; + case BPF_JSGT: + emit(SW64_BPF_CMPLT_REG(src, dst, tmp1), ctx); + break; + case BPF_JSLT: + emit(SW64_BPF_CMPLT_REG(dst, src, tmp1), ctx); + break; + case BPF_JSGE: + emit(SW64_BPF_CMPLE_REG(src, dst, tmp1), ctx); + break; + case BPF_JSLE: + emit(SW64_BPF_CMPLE_REG(dst, src, tmp1), ctx); + break; + case BPF_JSET: + emit(SW64_BPF_AND_REG(dst, src, tmp1), ctx); + break; + } + jmp_offset = bpf2sw64_offset(bpf_idx, off, ctx); + if (jmp_offset >= -0x100000 && jmp_offset <= 0xfffff) { + emit(SW64_BPF_BNE(tmp1, jmp_offset), ctx); + } else { + pr_err("eBPF JIT %s[%d]: BPF_JMP out of range, %d instructions\n", + current->comm, current->pid, jmp_offset); + return -EINVAL; + } + break; + + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: + emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, dst, tmp2), ctx); + dst = tmp2; + fallthrough; + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP | BPF_JSET | BPF_K: + emit_sw64_lds32(tmp1, imm, ctx); + switch (BPF_OP(code)) { + case BPF_JEQ: + emit(SW64_BPF_CMPEQ_REG(dst, tmp1, tmp2), ctx); + break; + case BPF_JGT: + emit(SW64_BPF_CMPULT_REG(tmp1, dst, tmp2), ctx); + break; + case BPF_JLT: + emit(SW64_BPF_CMPULT_REG(dst, tmp1, tmp2), ctx); + break; + case BPF_JGE: + emit(SW64_BPF_CMPULE_REG(tmp1, dst, tmp2), ctx); + break; + case BPF_JLE: + emit(SW64_BPF_CMPULE_REG(dst, tmp1, tmp2), ctx); + break; + case BPF_JNE: + emit(SW64_BPF_CMPEQ_REG(dst, tmp1, tmp2), ctx); + emit(SW64_BPF_XOR_IMM(tmp2, 1, tmp2), ctx); + break; + case BPF_JSGT: + emit(SW64_BPF_CMPLT_REG(tmp1, dst, tmp2), ctx); + break; + case BPF_JSLT: + emit(SW64_BPF_CMPLT_REG(dst, tmp1, tmp2), ctx); + break; + case BPF_JSGE: + emit(SW64_BPF_CMPLE_REG(tmp1, dst, tmp2), ctx); + break; + case BPF_JSLE: + emit(SW64_BPF_CMPLE_REG(dst, tmp1, tmp2), ctx); + break; + case BPF_JSET: + emit(SW64_BPF_AND_REG(dst, tmp1, tmp2), ctx); + break; + } + jmp_offset = bpf2sw64_offset(bpf_idx, off, ctx); + if (jmp_offset >= -0x100000 && jmp_offset <= 0xfffff) { + emit(SW64_BPF_BNE(tmp2, jmp_offset), ctx); + } else { + pr_err("eBPF JIT %s[%d]: BPF_JMP out of range, %d instructions\n", + current->comm, current->pid, jmp_offset); + return -EINVAL; + } + break; + + case BPF_JMP | BPF_CALL: + func = (u64)__bpf_call_base + imm; + if ((func & ~(KERNEL_IMAGE_SIZE - 1)) != __START_KERNEL_map) + /* calling bpf program, switch to vmalloc addr */ + func = (func & U32_MAX) | VMALLOC_START; + emit_sw64_ldu64(SW64_BPF_REG_PV, func, ctx); + emit(SW64_BPF_CALL(SW64_BPF_REG_RA, SW64_BPF_REG_PV), ctx); + break; + + case BPF_JMP | BPF_TAIL_CALL: + if (emit_bpf_tail_call(ctx)) + return -EFAULT; + break; + + case BPF_JMP | BPF_EXIT: + // if this is the last bpf instruction, skip to epilogue + if (bpf_idx == ctx->prog->len - 1) + break; + jmp_offset = offset_to_epilogue(ctx) - 1; + // epilogue is always at the end, must jump forward + if (jmp_offset >= -1 && jmp_offset <= 0xfffff) { + if (ctx->image && !jmp_offset) + // if this is the last jited instruction, generate nop + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx); + else + emit(SW64_BPF_BR(SW64_BPF_REG_ZR, jmp_offset), ctx); + } else { + pr_err("eBPF JIT %s[%d]: BPF_EXIT out of range, %d instructions\n", + current->comm, current->pid, jmp_offset); + return -EINVAL; + } + break; + + case BPF_LD | BPF_IMM | BPF_DW: + insn1 = insn[1]; + imm64 = ((u64)insn1.imm << 32) | (u32)imm; + emit_sw64_ldu64(dst, imm64, ctx); + put_tmp_reg(ctx); + put_tmp_reg(ctx); + return 1; + + /* LDX: dst = *(size *)(src + off) */ + case BPF_LDX | BPF_MEM | BPF_W: + case BPF_LDX | BPF_MEM | BPF_H: + case BPF_LDX | BPF_MEM | BPF_B: + case BPF_LDX | BPF_MEM | BPF_DW: + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + case BPF_LDX | BPF_PROBE_MEM | BPF_W: + case BPF_LDX | BPF_PROBE_MEM | BPF_H: + case BPF_LDX | BPF_PROBE_MEM | BPF_B: + switch (BPF_SIZE(code)) { + case BPF_W: + emit(SW64_BPF_LDW(dst, src, off), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); + break; + case BPF_H: + emit(SW64_BPF_LDHU(dst, src, off), ctx); + break; + case BPF_B: + emit(SW64_BPF_LDBU(dst, src, off), ctx); + break; + case BPF_DW: + emit(SW64_BPF_LDL(dst, src, off), ctx); + break; + } + + ret = add_exception_handler(insn, ctx, dst); + if (ret) + return ret; + break; + + /* ST: *(size *)(dst + off) = imm */ + case BPF_ST | BPF_MEM | BPF_W: + case BPF_ST | BPF_MEM | BPF_H: + case BPF_ST | BPF_MEM | BPF_B: + case BPF_ST | BPF_MEM | BPF_DW: + /* Load imm to a register then store it */ + emit_sw64_lds32(tmp1, imm, ctx); + switch (BPF_SIZE(code)) { + case BPF_W: + emit(SW64_BPF_STW(tmp1, dst, off), ctx); + break; + case BPF_H: + emit(SW64_BPF_STH(tmp1, dst, off), ctx); + break; + case BPF_B: + emit(SW64_BPF_STB(tmp1, dst, off), ctx); + break; + case BPF_DW: + emit(SW64_BPF_STL(tmp1, dst, off), ctx); + break; + } + break; + + /* STX: *(size *)(dst + off) = src */ + case BPF_STX | BPF_MEM | BPF_W: + emit(SW64_BPF_STW(src, dst, off), ctx); + break; + case BPF_STX | BPF_MEM | BPF_H: + emit(SW64_BPF_STH(src, dst, off), ctx); + break; + case BPF_STX | BPF_MEM | BPF_B: + emit(SW64_BPF_STB(src, dst, off), ctx); + break; + case BPF_STX | BPF_MEM | BPF_DW: + emit(SW64_BPF_STL(src, dst, off), ctx); + break; + + /* STX XADD: lock *(u32 *)(dst + off) += src */ + case BPF_STX | BPF_XADD | BPF_W: + emit_sw64_xadd32(src, dst, off, ctx); + break; + /* STX XADD: lock *(u64 *)(dst + off) += src */ + case BPF_STX | BPF_XADD | BPF_DW: + emit_sw64_xadd64(src, dst, off, ctx); + break; + + default: + pr_err("eBPF JIT %s[%d]: unknown opcode 0x%02x\n", + current->comm, current->pid, code); + return -EINVAL; + } + + put_tmp_reg(ctx); + put_tmp_reg(ctx); + return 0; +} + +static int build_body(struct jit_ctx *ctx) +{ + const struct bpf_prog *prog = ctx->prog; + int i; + + for (i = 0; i < prog->len; i++) { + const struct bpf_insn *insn = &prog->insnsi[i]; + int ret; + + if (ctx->image == NULL) + ctx->insn_offset[i] = ctx->idx; + ret = build_insn(insn, ctx); + if (ret < 0) + return ret; + while (ret > 0) { + i++; + if (ctx->image == NULL) + ctx->insn_offset[i] = ctx->insn_offset[i - 1]; + ret--; + } + } + + return 0; +} + +static int validate_code(struct jit_ctx *ctx) +{ + int i; + + for (i = 0; i < ctx->idx; i++) { + if (ctx->image[i] == SW64_BPF_ILLEGAL_INSN) + return -1; + } + + if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries)) + return -1; + + return 0; +} + +static inline void bpf_flush_icache(void *start, void *end) +{ + flush_icache_range((unsigned long)start, (unsigned long)end); +} + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +{ + struct bpf_prog *tmp, *orig_prog = prog; + struct bpf_binary_header *header; + struct sw64_jit_data *jit_data; + bool was_classic = bpf_prog_was_classic(prog); + bool tmp_blinded = false; + bool extra_pass = false; + struct jit_ctx ctx; + int image_size, prog_size, extable_size; + u8 *image_ptr; + + if (!prog->jit_requested) + return orig_prog; + + tmp = bpf_jit_blind_constants(prog); + /* If blinding was requested and we failed during blinding, + * we must fall back to the interpreter. + */ + if (IS_ERR(tmp)) + return orig_prog; + if (tmp != prog) { + tmp_blinded = true; + prog = tmp; + } + + jit_data = prog->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + prog = orig_prog; + goto out; + } + prog->aux->jit_data = jit_data; + } + if (jit_data->ctx.insn_offset) { + ctx = jit_data->ctx; + image_ptr = jit_data->image; + header = jit_data->header; + extra_pass = true; + prog_size = sizeof(u32) * ctx.idx; + goto skip_init_ctx; + } + memset(&ctx, 0, sizeof(ctx)); + ctx.prog = prog; + + ctx.insn_offset = kcalloc(prog->len + 1, sizeof(int), GFP_KERNEL); + if (ctx.insn_offset == NULL) { + prog = orig_prog; + goto out_off; + } + + /* 1. Initial fake pass to compute ctx->idx. */ + + /* Fake pass to fill in ctx->offset. */ + build_prologue(&ctx, was_classic); + + if (build_body(&ctx)) { + prog = orig_prog; + goto out_off; + } + + ctx.insn_offset[prog->len] = ctx.epilogue_offset = ctx.idx; + build_epilogue(&ctx); + + extable_size = prog->aux->num_exentries * + sizeof(struct exception_table_entry); + + /* Now we know the actual image size. */ + /* And we need extra 8 bytes for lock instructions alignment */ + prog_size = sizeof(u32) * ctx.idx + 8; + image_size = prog_size + extable_size; + header = bpf_jit_binary_alloc(image_size, &image_ptr, + sizeof(u32), jit_fill_hole); + if (header == NULL) { + prog = orig_prog; + goto out_off; + } + + /* 2. Now, the actual pass. */ + + /* lock instructions need 8-byte alignment */ + ctx.image = (u32 *)(((unsigned long)image_ptr + 7) & (~7)); + if (extable_size) + prog->aux->extable = (void *)image_ptr + prog_size; +skip_init_ctx: + ctx.idx = 0; + ctx.exentry_idx = 0; + + build_prologue(&ctx, was_classic); + + if (build_body(&ctx)) { + bpf_jit_binary_free(header); + prog = orig_prog; + goto out_off; + } + + build_epilogue(&ctx); + + /* 3. Extra pass to validate JITed code. */ + if (validate_code(&ctx)) { + bpf_jit_binary_free(header); + prog = orig_prog; + goto out_off; + } + + /* And we're done. */ + if (bpf_jit_enable > 1) + bpf_jit_dump(prog->len, prog_size, 2, ctx.image); + + bpf_flush_icache(header, ctx.image + ctx.idx); + + if (!prog->is_func || extra_pass) { + bpf_jit_binary_lock_ro(header); + } else { + jit_data->ctx = ctx; + jit_data->image = image_ptr; + jit_data->header = header; + } + prog->bpf_func = (void *)ctx.image; + prog->jited = 1; + prog->jited_len = prog_size; + if (ctx.current_tmp_reg) { + pr_err("eBPF JIT %s[%d]: unreleased temporary regsters %d\n", + current->comm, current->pid, ctx.current_tmp_reg); + } + + if (!prog->is_func || extra_pass) { +out_off: + kfree(ctx.insn_offset); + kfree(jit_data); + prog->aux->jit_data = NULL; + } +out: + if (tmp_blinded) + bpf_jit_prog_release_other(prog, prog == orig_prog ? + tmp : orig_prog); + return prog; +} diff --git a/arch/sw_64/platform/Makefile b/arch/sw_64/platform/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..4c0edceb4a2c1f4f7c8a5ee16617e80161b771a1 --- /dev/null +++ b/arch/sw_64/platform/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_PLATFORM_XUELANG) += cpufreq_xuelang.o diff --git a/arch/sw_64/platform/cpufreq_xuelang.c b/arch/sw_64/platform/cpufreq_xuelang.c new file mode 100644 index 0000000000000000000000000000000000000000..1259e58dc874ffa691d189dfd46d4e120cdb2cef --- /dev/null +++ b/arch/sw_64/platform/cpufreq_xuelang.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#include +#include +#include + +/* Minimum CLK support */ +enum { + DC_0, DC_1, DC_2, DC_3, DC_4, DC_5, DC_6, DC_7, DC_8, + DC_9, DC_10, DC_11, DC_12, DC_13, DC_14, DC_15, DC_RESV +}; + +struct cpufreq_frequency_table freq_table[] = { + {0, 200, CPUFREQ_ENTRY_INVALID}, + {0, DC_1, CPUFREQ_ENTRY_INVALID}, + {0, DC_2, 0}, + {0, DC_3, 0}, + {0, DC_4, 0}, + {0, DC_5, 0}, + {0, DC_6, 0}, + {0, DC_7, 0}, + {0, DC_8, 0}, + {0, DC_9, 0}, + {0, DC_10, 0}, + {0, DC_11, 0}, + {0, DC_12, 0}, + {0, DC_13, 0}, + {0, DC_14, 0}, + {0, DC_15, 0}, + {-1, DC_RESV, CPUFREQ_TABLE_END}, +}; + + +static struct platform_device sw64_cpufreq_device = { + .name = "sw64_cpufreq", + .id = -1, +}; + +static int __init sw64_cpufreq_init(void) +{ + int i; + unsigned char external_clk; + unsigned long max_rate, freq_off; + + max_rate = get_cpu_freq() / 1000; + + external_clk = *((unsigned char *)__va(MB_EXTCLK)); + + if (external_clk == 240) + freq_off = 60000; + else + freq_off = 50000; + + /* clock table init */ + for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { + if (i == 1) + freq_table[i].driver_data = freq_off * 24; + if (i == 2) + freq_table[i].frequency = freq_off * 36; + if (i > 2) + freq_table[i].frequency = freq_off * 38 + ((i - 3) * freq_off); + + if (freq_table[i].frequency == max_rate) + freq_table[i + 1].frequency = CPUFREQ_TABLE_END; + } + + return platform_device_register(&sw64_cpufreq_device); +} +arch_initcall(sw64_cpufreq_init); + +char curruent_policy[CPUFREQ_NAME_LEN]; + +static struct clk cpu_clk = { + .name = "cpu_clk", + .flags = CLK_ALWAYS_ENABLED | CLK_RATE_PROPAGATES, + .rate = 2400000000, +}; + +struct clk *sw64_clk_get(struct device *dev, const char *id) +{ + return &cpu_clk; +} +EXPORT_SYMBOL(sw64_clk_get); + +unsigned int __sw64_cpufreq_get(struct cpufreq_policy *policy) +{ + int i; + u64 val; + struct cpufreq_frequency_table *ft = policy->freq_table; + + val = sw64_io_read(0, CLK_CTL) >> CORE_PLL2_CFG_SHIFT; + + for (i = 0; ft[i].frequency != CPUFREQ_TABLE_END; i++) { + if (val == i) + return ft[i].frequency; + } + return 0; +} +EXPORT_SYMBOL(__sw64_cpufreq_get); + +void sw64_set_rate(unsigned int index) +{ + unsigned int i, val; + int cpu_num; + + cpu_num = sw64_chip->get_cpu_num(); + + for (i = 0; i < cpu_num; i++) { + sw64_io_write(i, CLK_CTL, CORE_CLK2_R | CORE_CLK2_V | CLK_PRT); + val = sw64_io_read(i, CLK_CTL); + + sw64_io_write(i, CLK_CTL, val | index << CORE_PLL2_CFG_SHIFT); + + udelay(1); + + sw64_io_write(i, CLK_CTL, CORE_CLK2_V | CLK_PRT + | index << CORE_PLL2_CFG_SHIFT); + val = sw64_io_read(i, CLK_CTL); + + /* LV1 select PLL1/PLL2 */ + sw64_io_write(i, CLU_LV1_SEL, CLK_LV1_SEL_MUXA | CLK_LV1_SEL_PRT); + + /* Set CLK_CTL PLL0 */ + sw64_io_write(i, CLK_CTL, val | CORE_CLK0_R | CORE_CLK0_V); + + sw64_io_write(i, CLK_CTL, val | CORE_CLK0_R | CORE_CLK0_V + | index << CORE_PLL0_CFG_SHIFT); + + udelay(1); + + sw64_io_write(i, CLK_CTL, val | CORE_CLK0_V + | index << CORE_PLL0_CFG_SHIFT); + + /* LV1 select PLL0/PLL1 */ + sw64_io_write(i, CLU_LV1_SEL, CLK_LV1_SEL_MUXB | CLK_LV1_SEL_PRT); + } +} +EXPORT_SYMBOL_GPL(sw64_set_rate); diff --git a/arch/sw_64/tools/.gitignore b/arch/sw_64/tools/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..f73e86272b7616f0c2ce1d704e1966da94aed182 --- /dev/null +++ b/arch/sw_64/tools/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +relocs diff --git a/arch/sw_64/tools/Makefile b/arch/sw_64/tools/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..66f55b035e223cc3f9073c6fbd252385293e4475 --- /dev/null +++ b/arch/sw_64/tools/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 + +hostprogs += relocs +relocs-objs += relocs.o +relocs-objs += relocs_main.o +PHONY += relocs +relocs: $(obj)/relocs + @: diff --git a/arch/sw_64/tools/relocs.c b/arch/sw_64/tools/relocs.c new file mode 100644 index 0000000000000000000000000000000000000000..ec0ed422a8369172d2db92550dfed98619419961 --- /dev/null +++ b/arch/sw_64/tools/relocs.c @@ -0,0 +1,635 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "relocs.h" + +#define ELF_BITS 64 + +#define ELF_MACHINE EM_SW64 +#define ELF_MACHINE_NAME "SW64" +#define SHT_REL_TYPE SHT_RELA +#define Elf_Rel Elf64_Rela + +#define ELF_CLASS ELFCLASS64 +#define ELF_R_SYM(val) ELF64_R_SYM(val) +#define ELF_R_TYPE(val) ELF64_R_TYPE(val) +#define ELF_ST_TYPE(o) ELF64_ST_TYPE(o) +#define ELF_ST_BIND(o) ELF64_ST_BIND(o) +#define ELF_ST_VISIBILITY(o) ELF64_ST_VISIBILITY(o) + +#define ElfW(type) _ElfW(ELF_BITS, type) +#define _ElfW(bits, type) __ElfW(bits, type) +#define __ElfW(bits, type) Elf##bits##_##type + +#define Elf_Addr ElfW(Addr) +#define Elf_Ehdr ElfW(Ehdr) +#define Elf_Phdr ElfW(Phdr) +#define Elf_Shdr ElfW(Shdr) +#define Elf_Sym ElfW(Sym) + +static Elf_Ehdr ehdr; + +struct relocs { + uint32_t *offset; + unsigned long count; + unsigned long size; +}; + +static struct relocs relocs; + +struct section { + Elf_Shdr shdr; + struct section *link; + Elf_Sym *symtab; + Elf_Rel *reltab; + char *strtab; + long shdr_offset; +}; +static struct section *secs; + +static const char * const regex_sym_kernel = { +/* Symbols matching these regex's should never be relocated */ + "^(__crc_)", +}; + +static regex_t sym_regex_c; + +static int regex_skip_reloc(const char *sym_name) +{ + return !regexec(&sym_regex_c, sym_name, 0, NULL, 0); +} + +static void regex_init(void) +{ + char errbuf[128]; + int err; + + err = regcomp(&sym_regex_c, regex_sym_kernel, + REG_EXTENDED|REG_NOSUB); + + if (err) { + regerror(err, &sym_regex_c, errbuf, sizeof(errbuf)); + die("%s", errbuf); + } +} + +static const char *rel_type(unsigned int type) +{ + static const char * const type_name[] = { +#define REL_TYPE(X)[X] = #X + REL_TYPE(R_SW64_NONE), + REL_TYPE(R_SW64_REFQUAD), + REL_TYPE(R_SW64_LITERAL), + REL_TYPE(R_SW64_LITUSE), + REL_TYPE(R_SW64_GPDISP), + REL_TYPE(R_SW64_BRADDR), + REL_TYPE(R_SW64_HINT), + REL_TYPE(R_SW64_SREL32), + REL_TYPE(R_SW64_GPRELHIGH), + REL_TYPE(R_SW64_GPRELLOW), +#undef REL_TYPE + }; + const char *name = "unknown type rel type name"; + + if (type < ARRAY_SIZE(type_name) && type_name[type]) + name = type_name[type]; + return name; +} + +static const char *sec_name(unsigned int shndx) +{ + const char *sec_strtab; + const char *name; + + sec_strtab = secs[ehdr.e_shstrndx].strtab; + if (shndx < ehdr.e_shnum) + name = sec_strtab + secs[shndx].shdr.sh_name; + else if (shndx == SHN_ABS) + name = "ABSOLUTE"; + else if (shndx == SHN_COMMON) + name = "COMMON"; + else + name = ""; + return name; +} + +static struct section *sec_lookup(const char *secname) +{ + int i; + + for (i = 0; i < ehdr.e_shnum; i++) + if (strcmp(secname, sec_name(i)) == 0) + return &secs[i]; + + return NULL; +} + +static const char *sym_name(const char *sym_strtab, Elf_Sym *sym) +{ + const char *name; + + if (sym->st_name) + name = sym_strtab + sym->st_name; + else + name = sec_name(sym->st_shndx); + return name; +} + +#define le16_to_cpu(val) (val) +#define le32_to_cpu(val) (val) +#define le64_to_cpu(val) (val) + +#define cpu_to_le16(val) (val) +#define cpu_to_le32(val) (val) +#define cpu_to_le64(val) (val) + +static uint16_t elf16_to_cpu(uint16_t val) +{ + return le16_to_cpu(val); +} + +static uint32_t elf32_to_cpu(uint32_t val) +{ + return le32_to_cpu(val); +} + +static uint32_t cpu_to_elf32(uint32_t val) +{ + return cpu_to_le32(val); +} + +#define elf_half_to_cpu(x) elf16_to_cpu(x) +#define elf_word_to_cpu(x) elf32_to_cpu(x) + +#if ELF_BITS == 64 +static uint64_t elf64_to_cpu(uint64_t val) +{ + return le64_to_cpu(val); +} +#define elf_addr_to_cpu(x) elf64_to_cpu(x) +#define elf_off_to_cpu(x) elf64_to_cpu(x) +#define elf_xword_to_cpu(x) elf64_to_cpu(x) +#else +#define elf_addr_to_cpu(x) elf32_to_cpu(x) +#define elf_off_to_cpu(x) elf32_to_cpu(x) +#define elf_xword_to_cpu(x) elf32_to_cpu(x) +#endif + +static void read_ehdr(FILE *fp) +{ + if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) + die("Cannot read ELF header: %s\n", strerror(errno)); + + if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0) + die("No ELF magic\n"); + + if (ehdr.e_ident[EI_CLASS] != ELF_CLASS) + die("Not a %d bit executable\n", ELF_BITS); + + if ((ehdr.e_ident[EI_DATA] != ELFDATA2LSB) && + (ehdr.e_ident[EI_DATA] != ELFDATA2MSB)) + die("Unknown ELF Endianness\n"); + + if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) + die("Unknown ELF version\n"); + + /* Convert the fields to native endian */ + ehdr.e_type = elf_half_to_cpu(ehdr.e_type); + ehdr.e_machine = elf_half_to_cpu(ehdr.e_machine); + ehdr.e_version = elf_word_to_cpu(ehdr.e_version); + ehdr.e_entry = elf_addr_to_cpu(ehdr.e_entry); + ehdr.e_phoff = elf_off_to_cpu(ehdr.e_phoff); + ehdr.e_shoff = elf_off_to_cpu(ehdr.e_shoff); + ehdr.e_flags = elf_word_to_cpu(ehdr.e_flags); + ehdr.e_ehsize = elf_half_to_cpu(ehdr.e_ehsize); + ehdr.e_phentsize = elf_half_to_cpu(ehdr.e_phentsize); + ehdr.e_phnum = elf_half_to_cpu(ehdr.e_phnum); + ehdr.e_shentsize = elf_half_to_cpu(ehdr.e_shentsize); + ehdr.e_shnum = elf_half_to_cpu(ehdr.e_shnum); + ehdr.e_shstrndx = elf_half_to_cpu(ehdr.e_shstrndx); + + if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) + die("Unsupported ELF header type\n"); + + if (ehdr.e_machine != ELF_MACHINE) + die("Not for %s\n", ELF_MACHINE_NAME); + + if (ehdr.e_version != EV_CURRENT) + die("Unknown ELF version\n"); + + if (ehdr.e_ehsize != sizeof(Elf_Ehdr)) + die("Bad Elf header size\n"); + + if (ehdr.e_phentsize != sizeof(Elf_Phdr)) + die("Bad program header entry\n"); + + if (ehdr.e_shentsize != sizeof(Elf_Shdr)) + die("Bad section header entry\n"); + + if (ehdr.e_shstrndx >= ehdr.e_shnum) + die("String table index out of bounds\n"); +} + +static void read_shdrs(FILE *fp) +{ + int i; + Elf_Shdr shdr; + + secs = calloc(ehdr.e_shnum, sizeof(struct section)); + if (!secs) + die("Unable to allocate %d section headers\n", ehdr.e_shnum); + + if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) + die("Seek to %d failed: %s\n", ehdr.e_shoff, strerror(errno)); + + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + + sec->shdr_offset = ftell(fp); + if (fread(&shdr, sizeof(shdr), 1, fp) != 1) + die("Cannot read ELF section headers %d/%d: %s\n", + i, ehdr.e_shnum, strerror(errno)); + sec->shdr.sh_name = elf_word_to_cpu(shdr.sh_name); + sec->shdr.sh_type = elf_word_to_cpu(shdr.sh_type); + sec->shdr.sh_flags = elf_xword_to_cpu(shdr.sh_flags); + sec->shdr.sh_addr = elf_addr_to_cpu(shdr.sh_addr); + sec->shdr.sh_offset = elf_off_to_cpu(shdr.sh_offset); + sec->shdr.sh_size = elf_xword_to_cpu(shdr.sh_size); + sec->shdr.sh_link = elf_word_to_cpu(shdr.sh_link); + sec->shdr.sh_info = elf_word_to_cpu(shdr.sh_info); + sec->shdr.sh_addralign = elf_xword_to_cpu(shdr.sh_addralign); + sec->shdr.sh_entsize = elf_xword_to_cpu(shdr.sh_entsize); + if (sec->shdr.sh_link < ehdr.e_shnum) + sec->link = &secs[sec->shdr.sh_link]; + } +} + +static void read_strtabs(FILE *fp) +{ + int i; + + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + + if (sec->shdr.sh_type != SHT_STRTAB) + continue; + + sec->strtab = malloc(sec->shdr.sh_size); + if (!sec->strtab) + die("malloc of %d bytes for strtab failed\n", + sec->shdr.sh_size); + + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) + die("Seek to %d failed: %s\n", + sec->shdr.sh_offset, strerror(errno)); + + if (fread(sec->strtab, 1, sec->shdr.sh_size, fp) != + sec->shdr.sh_size) + die("Cannot read symbol table: %s\n", strerror(errno)); + } +} + +static void read_symtabs(FILE *fp) +{ + int i, j; + + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + + if (sec->shdr.sh_type != SHT_SYMTAB) + continue; + + sec->symtab = malloc(sec->shdr.sh_size); + if (!sec->symtab) + die("malloc of %d bytes for symtab failed\n", + sec->shdr.sh_size); + + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) + die("Seek to %d failed: %s\n", + sec->shdr.sh_offset, strerror(errno)); + + if (fread(sec->symtab, 1, sec->shdr.sh_size, fp) != + sec->shdr.sh_size) + die("Cannot read symbol table: %s\n", strerror(errno)); + + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Sym); j++) { + Elf_Sym *sym = &sec->symtab[j]; + + sym->st_name = elf_word_to_cpu(sym->st_name); + sym->st_value = elf_addr_to_cpu(sym->st_value); + sym->st_size = elf_xword_to_cpu(sym->st_size); + sym->st_shndx = elf_half_to_cpu(sym->st_shndx); + } + } +} + +static void read_relocs(FILE *fp) +{ + static unsigned long base; + int i, j; + + if (!base) { + struct section *sec = sec_lookup(".text"); + + if (!sec) + die("Could not find .text section\n"); + + base = sec->shdr.sh_addr; + } + + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + + if (sec->shdr.sh_type != SHT_REL_TYPE) + continue; + + sec->reltab = malloc(sec->shdr.sh_size); + if (!sec->reltab) + die("malloc of %d bytes for relocs failed\n", + sec->shdr.sh_size); + + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) + die("Seek to %d failed: %s\n", + sec->shdr.sh_offset, strerror(errno)); + + if (fread(sec->reltab, 1, sec->shdr.sh_size, fp) != + sec->shdr.sh_size) + die("Cannot read symbol table: %s\n", strerror(errno)); + + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) { + Elf_Rel *rel = &sec->reltab[j]; + + rel->r_offset = elf_addr_to_cpu(rel->r_offset); + /* Set offset into kernel image */ + rel->r_offset -= base; + /* Convert SW64 RELA format - only the symbol + * index needs converting to native endianness + */ + rel->r_info = elf_xword_to_cpu(rel->r_info); +#if (SHT_REL_TYPE == SHT_RELA) + rel->r_addend = elf_xword_to_cpu(rel->r_addend); +#endif + } + } +} + +static void remove_relocs(FILE *fp) +{ + int i; + Elf_Shdr shdr; + + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + + if (sec->shdr.sh_type != SHT_REL_TYPE) + continue; + + if (fseek(fp, sec->shdr_offset, SEEK_SET) < 0) + die("Seek to %d failed: %s\n", + sec->shdr_offset, strerror(errno)); + + if (fread(&shdr, sizeof(shdr), 1, fp) != 1) + die("Cannot read ELF section headers %d/%d: %s\n", + i, ehdr.e_shnum, strerror(errno)); + + /* Set relocation section size to 0, effectively removing it. + * This is necessary due to lack of support for relocations + * in objcopy when creating 32bit elf from 64bit elf. + */ + shdr.sh_size = 0; + + if (fseek(fp, sec->shdr_offset, SEEK_SET) < 0) + die("Seek to %d failed: %s\n", + sec->shdr_offset, strerror(errno)); + + if (fwrite(&shdr, sizeof(shdr), 1, fp) != 1) + die("Cannot write ELF section headers %d/%d: %s\n", + i, ehdr.e_shnum, strerror(errno)); + } +} + +static void add_reloc(struct relocs *r, uint32_t offset, unsigned int type) +{ + /* Relocation representation in binary table: + * |76543210|76543210|76543210|76543210| + * | Type | offset from _text >> 2 | + */ + offset >>= 2; + if (offset > 0x00FFFFFF) + die("Kernel image exceeds maximum size for relocation!\n"); + + offset = (offset & 0x00FFFFFF) | ((type & 0xFF) << 24); + + if (r->count == r->size) { + unsigned long newsize = r->size + 50000; + void *mem = realloc(r->offset, newsize * sizeof(r->offset[0])); + + if (!mem) + die("realloc failed\n"); + + r->offset = mem; + r->size = newsize; + } + r->offset[r->count++] = offset; +} + +static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel, + Elf_Sym *sym, const char *symname)) +{ + int i; + + /* Walk through the relocations */ + for (i = 0; i < ehdr.e_shnum; i++) { + char *sym_strtab; + Elf_Sym *sh_symtab; + struct section *sec_applies, *sec_symtab; + int j; + struct section *sec = &secs[i]; + + if (sec->shdr.sh_type != SHT_REL_TYPE) + continue; + sec_symtab = sec->link; + sec_applies = &secs[sec->shdr.sh_info]; + if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) + continue; + + sh_symtab = sec_symtab->symtab; + sym_strtab = sec_symtab->link->strtab; + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) { + Elf_Rel *rel = &sec->reltab[j]; + Elf_Sym *sym = &sh_symtab[ELF_R_SYM(rel->r_info)]; + const char *symname = sym_name(sym_strtab, sym); + + process(sec, rel, sym, symname); + } + } +} + +static int do_reloc(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, + const char *symname) +{ + unsigned int r_type = ELF_R_TYPE(rel->r_info); + unsigned int bind = ELF_ST_BIND(sym->st_info); + + if ((bind == STB_WEAK) && (sym->st_value == 0)) { + /* Don't relocate weak symbols without a target */ + return 0; + } + + if (regex_skip_reloc(symname)) + return 0; + + switch (r_type) { + case R_SW64_NONE: + case R_SW64_LITERAL: /* relocated by GOT */ + case R_SW64_LITUSE: + case R_SW64_GPDISP: + case R_SW64_BRADDR: + case R_SW64_HINT: + case R_SW64_SREL32: + case R_SW64_GPRELHIGH: + case R_SW64_GPRELLOW: + case R_SW64_LITERAL_GOT: + /* + * NONE can be ignored and PC relative relocations don't + * need to be adjusted. + */ + break; + + case R_SW64_REFQUAD: + add_reloc(&relocs, rel->r_offset, r_type); + break; + + default: + die("Unsupported relocation type: %s (%d)\n", + rel_type(r_type), r_type); + break; + } + + return 0; +} + +static int write_reloc_as_bin(uint32_t v, FILE *f) +{ + unsigned char buf[4]; + + v = cpu_to_elf32(v); + + memcpy(buf, &v, sizeof(uint32_t)); + return fwrite(buf, 1, 4, f); +} + +static int write_reloc_as_text(uint32_t v, FILE *f) +{ + int res; + + res = fprintf(f, "\t.long 0x%08"PRIx32"\n", v); + if (res < 0) + return res; + else + return sizeof(uint32_t); +} + +static void emit_relocs(int as_text, int as_bin, FILE *outf) +{ + int i; + int (*write_reloc)(uint32_t, FILE *) = write_reloc_as_bin; + int size = 0; + int size_reserved; + struct section *sec_reloc; + + sec_reloc = sec_lookup(".data.reloc"); + if (!sec_reloc) + die("Could not find relocation section\n"); + + size_reserved = sec_reloc->shdr.sh_size; + /* Collect up the relocations */ + walk_relocs(do_reloc); + + /* Print the relocations */ + if (as_text) { + /* Print the relocations in a form suitable that + * gas will like. + */ + printf(".section \".data.reloc\",\"a\"\n"); + printf(".balign 8\n"); + /* Output text to stdout */ + write_reloc = write_reloc_as_text; + outf = stdout; + } else if (as_bin) { + /* Output raw binary to stdout */ + outf = stdout; + } else { + /* + * Seek to offset of the relocation section. + * Each relocation is then written into the + * vmlinux kernel image. + */ + if (fseek(outf, sec_reloc->shdr.sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + sec_reloc->shdr.sh_offset, strerror(errno)); + } + } + + for (i = 0; i < relocs.count; i++) + size += write_reloc(relocs.offset[i], outf); + + /* Print a stop, but only if we've actually written some relocs */ + if (size) + size += write_reloc(0, outf); + + if (size > size_reserved) + /* + * Die, but suggest a value for CONFIG_RELOCATION_TABLE_SIZE + * which will fix this problem and allow a bit of headroom + * if more kernel features are enabled + */ + die("Relocations overflow available space!\n" + "Please adjust CONFIG_RELOCATION_TABLE_SIZE " + "to at least 0x%08x\n", (size + 0x1000) & ~0xFFF); +} + +/* + * As an aid to debugging problems with different linkers + * print summary information about the relocs. + * Since different linkers tend to emit the sections in + * different orders we use the section names in the output. + */ +static int do_reloc_info(struct section *sec, Elf_Rel *rel, ElfW(Sym) * sym, + const char *symname) +{ + printf("%16s 0x%x %16s %40s %16s\n", + sec_name(sec->shdr.sh_info), + (unsigned int)rel->r_offset, + rel_type(ELF_R_TYPE(rel->r_info)), + symname, + sec_name(sym->st_shndx)); + return 0; +} + +static void print_reloc_info(void) +{ + printf("%16s %10s %16s %40s %16s\n", + "reloc section", + "offset", + "reloc type", + "symbol", + "symbol section"); + walk_relocs(do_reloc_info); +} + +void process(FILE *fp, int as_text, int as_bin, + int show_reloc_info, int keep_relocs) +{ + regex_init(); + read_ehdr(fp); + read_shdrs(fp); + read_strtabs(fp); + read_symtabs(fp); + read_relocs(fp); + if (show_reloc_info) { + print_reloc_info(); + return; + } + emit_relocs(as_text, as_bin, fp); + if (!keep_relocs) + remove_relocs(fp); +} diff --git a/arch/sw_64/tools/relocs.h b/arch/sw_64/tools/relocs.h new file mode 100644 index 0000000000000000000000000000000000000000..17c7e31113a0e5f93ac2b596d54e31bc9de7fe58 --- /dev/null +++ b/arch/sw_64/tools/relocs.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SW64_TOOLS_RELOCS_H +#define _SW64_TOOLS_RELOCS_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define USE_BSD +#include +#include + +#define EM_SW64 0x9916 +/* + * SW64 ELF relocation types + */ +#define R_SW64_NONE 0 /* No reloc */ +#define R_SW64_REFLONG 1 /* Direct 32 bit */ +#define R_SW64_REFQUAD 2 /* Direct 64 bit */ +#define R_SW64_GPREL32 3 /* GP relative 32 bit */ +#define R_SW64_LITERAL 4 /* GP relative 16 bit w/optimization */ +#define R_SW64_LITUSE 5 /* Optimization hint for LITERAL */ +#define R_SW64_GPDISP 6 /* Add displacement to GP */ +#define R_SW64_BRADDR 7 /* PC+4 relative 23 bit shifted */ +#define R_SW64_HINT 8 /* PC+4 relative 16 bit shifted */ +#define R_SW64_SREL16 9 /* PC relative 16 bit */ +#define R_SW64_SREL32 10 /* PC relative 32 bit */ +#define R_SW64_SREL64 11 /* PC relative 64 bit */ +#define R_SW64_GPRELHIGH 17 /* GP relative 32 bit, high 16 bits */ +#define R_SW64_GPRELLOW 18 /* GP relative 32 bit, low 16 bits */ +#define R_SW64_GPREL16 19 /* GP relative 16 bit */ +#define R_SW64_COPY 24 /* Copy symbol at runtime */ +#define R_SW64_GLOB_DAT 25 /* Create GOT entry */ +#define R_SW64_JMP_SLOT 26 /* Create PLT entry */ +#define R_SW64_RELATIVE 27 /* Adjust by program base */ +#define R_SW64_BRSGP 28 +#define R_SW64_TLSGD 29 +#define R_SW64_TLS_LDM 30 +#define R_SW64_DTPMOD64 31 +#define R_SW64_GOTDTPREL 32 +#define R_SW64_DTPREL64 33 +#define R_SW64_DTPRELHI 34 +#define R_SW64_DTPRELLO 35 +#define R_SW64_DTPREL16 36 +#define R_SW64_GOTTPREL 37 +#define R_SW64_TPREL64 38 +#define R_SW64_TPRELHI 39 +#define R_SW64_TPRELLO 40 +#define R_SW64_TPREL16 41 +#define R_SW64_LITERAL_GOT 43 /* GP relative */ + +void die(char *fmt, ...); + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +enum symtype { + S_ABS, + S_REL, + S_SEG, + S_LIN, + S_NSYMTYPES +}; + +void process(FILE *fp, int as_text, int as_bin, + int show_reloc_info, int keep_relocs); +#endif /* _SW64_TOOLS_RELOCS_H */ diff --git a/arch/sw_64/tools/relocs_main.c b/arch/sw_64/tools/relocs_main.c new file mode 100644 index 0000000000000000000000000000000000000000..30a830a070dbe98b4b0b4770b9fc3ca1d3406941 --- /dev/null +++ b/arch/sw_64/tools/relocs_main.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "relocs.h" + +void die(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(1); +} + +static void usage(void) +{ + die("relocs [--reloc-info|--text|--bin|--keep] vmlinux\n"); +} + +int main(int argc, char **argv) +{ + int show_reloc_info, as_text, as_bin, keep_relocs; + const char *fname; + FILE *fp; + int i; + unsigned char e_ident[EI_NIDENT]; + + show_reloc_info = 0; + as_text = 0; + as_bin = 0; + keep_relocs = 0; + fname = NULL; + for (i = 1; i < argc; i++) { + char *arg = argv[i]; + + if (*arg == '-') { + if (strcmp(arg, "--reloc-info") == 0) { + show_reloc_info = 1; + continue; + } + if (strcmp(arg, "--text") == 0) { + as_text = 1; + continue; + } + if (strcmp(arg, "--bin") == 0) { + as_bin = 1; + continue; + } + if (strcmp(arg, "--keep") == 0) { + keep_relocs = 1; + continue; + } + } else if (!fname) { + fname = arg; + continue; + } + usage(); + } + if (!fname) + usage(); + + fp = fopen(fname, "r+"); + if (!fp) + die("Cannot open %s: %s\n", fname, strerror(errno)); + + if (fread(&e_ident, 1, EI_NIDENT, fp) != EI_NIDENT) + die("Cannot read %s: %s", fname, strerror(errno)); + + rewind(fp); + if (e_ident[EI_CLASS] == ELFCLASS64) + process(fp, as_text, as_bin, show_reloc_info, keep_relocs); + else + die("Unsupport ELF class on SW64: %s", fname); + //process_32(fp, as_text, as_bin, show_reloc_info, keep_relocs); + fclose(fp); + return 0; +} diff --git a/scripts/package/buildtar b/scripts/package/buildtar index 65b4ea50296219e2cfed406dddd3cb4eac0737ea..93158943a4f73d868660e80eb488a3475512e7ac 100755 --- a/scripts/package/buildtar +++ b/scripts/package/buildtar @@ -64,6 +64,9 @@ case "${ARCH}" in alpha) [ -f "${objtree}/arch/alpha/boot/vmlinux.gz" ] && cp -v -- "${objtree}/arch/alpha/boot/vmlinux.gz" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}" ;; + sw_64) + [ -f "${objtree}/arch/sw_64/boot/vmlinux.bin" ] && cp -v -- "${objtree}/arch/sw_64/boot/vmlinux.bin" "${tmpdir}/boot/vmlinux-bin-${KERNELRELEASE}" + ;; parisc*) [ -f "${KBUILD_IMAGE}" ] && cp -v -- "${KBUILD_IMAGE}" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}" [ -f "${objtree}/lifimage" ] && cp -v -- "${objtree}/lifimage" "${tmpdir}/boot/lifimage-${KERNELRELEASE}" diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index 5044224cf6714b3e5738f1e6d30dda05c589e3ff..2586bcd5f43a3ab28b3ac512e6136e7c1bede7fd 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -26,7 +26,7 @@ set_debarch() { # Attempt to find the correct Debian architecture case "$UTS_MACHINE" in - i386|ia64|alpha|m68k|riscv*) + i386|ia64|alpha|m68k|riscv*|sw_64) debarch="$UTS_MACHINE" ;; x86_64) debarch=amd64 ;;