diff --git a/arch/sw_64/include/asm/cpufreq.h b/arch/sw_64/include/asm/cpufreq.h
new file mode 100644
index 0000000000000000000000000000000000000000..cf47f1fc6866860b56ec2112abc1a1449ff66d72
--- /dev/null
+++ b/arch/sw_64/include/asm/cpufreq.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_SW64_CPUFREQ_H
+#define _ASM_SW64_CPUFREQ_H
+
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/seq_file.h>
+#include <linux/clk.h>
+#include <linux/cpufreq.h>
+
+struct clk;
+
+extern char curruent_policy[CPUFREQ_NAME_LEN];
+
+struct clk_ops {
+	void (*init)(struct clk *clk);
+	void (*enable)(struct clk *clk);
+	void (*disable)(struct clk *clk);
+	void (*recalc)(struct clk *clk);
+	int (*set_rate)(struct clk *clk, unsigned long rate, int algo_id);
+	long (*round_rate)(struct clk *clk, unsigned long rate);
+};
+
+struct clk {
+	struct list_head node;
+	const char *name;
+	int id;
+	struct module *owner;
+
+	struct clk *parent;
+	const struct clk_ops *ops;
+
+	struct kref kref;
+
+	unsigned long rate;
+	unsigned long flags;
+};
+
+#define CLK_ALWAYS_ENABLED	(1 << 0)
+#define CLK_RATE_PROPAGATES	(1 << 1)
+
+#define CLK_PRT         0x1UL
+#define CORE_CLK0_V     (0x1UL << 1)
+#define CORE_CLK0_R     (0x1UL << 2)
+#define CORE_CLK2_V     (0x1UL << 15)
+#define CORE_CLK2_R     (0x1UL << 16)
+
+#define CLK_LV1_SEL_PRT         0x1UL
+#define CLK_LV1_SEL_MUXA        (0x1UL << 2)
+#define CLK_LV1_SEL_MUXB        (0x1UL << 3)
+
+#define CORE_PLL0_CFG_SHIFT     4
+#define CORE_PLL2_CFG_SHIFT     18
+
+extern struct cpufreq_frequency_table freq_table[];
+
+int clk_init(void);
+void sw64_set_rate(unsigned int index);
+
+struct clk *sw64_clk_get(struct device *dev, const char *id);
+
+void sw64_update_clockevents(unsigned long cpu, u32 freq);
+
+unsigned int __sw64_cpufreq_get(struct cpufreq_policy *policy);
+#endif /* _ASM_SW64_CPUFREQ_H */
diff --git a/arch/sw_64/include/asm/cputime.h b/arch/sw_64/include/asm/cputime.h
new file mode 100644
index 0000000000000000000000000000000000000000..cdd46b05e22840bbbe033ca200951269afa0b98f
--- /dev/null
+++ b/arch/sw_64/include/asm/cputime.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_CPUTIME_H
+#define _ASM_SW64_CPUTIME_H
+
+typedef u64 __nocast cputime64_t;
+
+#define jiffies64_to_cputime64(__jif)  ((__force cputime64_t)(__jif))
+
+#endif /* _ASM_SW64_CPUTIME_H */
diff --git a/arch/sw_64/include/asm/ftrace.h b/arch/sw_64/include/asm/ftrace.h
new file mode 100644
index 0000000000000000000000000000000000000000..7ed6e3c06a333e81d96e881de91c0d1aac670ae7
--- /dev/null
+++ b/arch/sw_64/include/asm/ftrace.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/sw_64/include/asm/ftrace.h
+ *
+ * Copyright (C) 2019, serveros, linyue
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _ASM_SW64_FTRACE_H
+#define _ASM_SW64_FTRACE_H
+
+#define MCOUNT_ADDR		((unsigned long)_mcount)
+#define MCOUNT_INSN_SIZE	20	/* 5 * SW64_INSN_SIZE */
+#define MCOUNT_LDGP_SIZE	8	/* 2 * SW64_INSN_SIZE */
+
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+
+#ifndef __ASSEMBLY__
+#include <linux/compat.h>
+#include <asm/insn.h>
+
+
+extern void _mcount(unsigned long);
+
+struct dyn_arch_ftrace {
+	/* No extra data needed for sw64 */
+};
+
+extern unsigned long ftrace_graph_call;
+
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	/*
+	 * addr is the address of the mcount call instruction.
+	 * recordmcount does the necessary offset calculation.
+	 */
+	return addr;
+}
+
+#endif /* ifndef __ASSEMBLY__ */
+#endif /* _ASM_SW64_FTRACE_H */
diff --git a/arch/sw_64/include/asm/jump_label.h b/arch/sw_64/include/asm/jump_label.h
new file mode 100644
index 0000000000000000000000000000000000000000..32fbf7573b206bb2c935cc173de392b100d02010
--- /dev/null
+++ b/arch/sw_64/include/asm/jump_label.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_SW64_JUMP_LABEL_H
+#define _ASM_SW64_JUMP_LABEL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <asm/insn.h>
+
+#define JUMP_LABEL_NOP_SIZE		SW64_INSN_SIZE
+
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1: nop\n\t"
+			".pushsection __jump_table,  \"aw\"\n\t"
+			".align 3\n\t"
+			".quad 1b, %l[l_yes], %0\n\t"
+			".popsection\n\t"
+			:  :  "i"(&((char *)key)[branch]) :  : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1: br %l[l_yes]\n\t"
+			".pushsection __jump_table,  \"aw\"\n\t"
+			".align 3\n\t"
+			".quad 1b, %l[l_yes], %0\n\t"
+			".popsection\n\t"
+			:  :  "i"(&((char *)key)[branch]) :  : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+typedef u64 jump_label_t;
+
+struct jump_entry {
+	jump_label_t code;
+	jump_label_t target;
+	jump_label_t key;
+};
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_SW64_JUMP_LABEL_H */
diff --git a/arch/sw_64/include/asm/kexec.h b/arch/sw_64/include/asm/kexec.h
new file mode 100644
index 0000000000000000000000000000000000000000..25e0d8da84f8dbe98908179bb061ea5f4759aa6e
--- /dev/null
+++ b/arch/sw_64/include/asm/kexec.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_KEXEC_H
+#define _ASM_SW64_KEXEC_H
+
+#ifdef CONFIG_KEXEC
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT	(-1UL)
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT	(-1UL)
+/* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT	(-1UL)
+
+#define KEXEC_CONTROL_PAGE_SIZE		8192
+
+#define KEXEC_ARCH			KEXEC_ARCH_SW64
+
+#define KEXEC_SW64_ATAGS_OFFSET		0x1000
+#define KEXEC_SW64_ZIMAGE_OFFSET	0x8000
+
+#ifndef __ASSEMBLY__
+
+/**
+ * crash_setup_regs() - save registers for the panic kernel
+ * @newregs: registers are saved here
+ * @oldregs: registers to be saved (may be %NULL)
+ *
+ * Function copies machine registers from @oldregs to @newregs. If @oldregs is
+ * %NULL then current registers are stored there.
+ */
+static inline void crash_setup_regs(struct pt_regs *newregs,
+				    struct pt_regs *oldregs)
+{
+	if (oldregs) {
+		memcpy(newregs, oldregs, sizeof(*newregs));
+	} else {
+		__asm__ __volatile__ ("stl $0, %0" : "=m" (newregs->regs[0]));
+		__asm__ __volatile__ ("stl $1, %0" : "=m" (newregs->regs[1]));
+		__asm__ __volatile__ ("stl $2, %0" : "=m" (newregs->regs[2]));
+		__asm__ __volatile__ ("stl $3, %0" : "=m" (newregs->regs[3]));
+		__asm__ __volatile__ ("stl $4, %0" : "=m" (newregs->regs[4]));
+		__asm__ __volatile__ ("stl $5, %0" : "=m" (newregs->regs[5]));
+		__asm__ __volatile__ ("stl $6, %0" : "=m" (newregs->regs[6]));
+		__asm__ __volatile__ ("stl $7, %0" : "=m" (newregs->regs[7]));
+		__asm__ __volatile__ ("stl $8, %0" : "=m" (newregs->regs[8]));
+		__asm__ __volatile__ ("stl $9, %0" : "=m" (newregs->regs[9]));
+		__asm__ __volatile__ ("stl $10, %0" : "=m" (newregs->regs[10]));
+		__asm__ __volatile__ ("stl $11, %0" : "=m" (newregs->regs[11]));
+		__asm__ __volatile__ ("stl $12, %0" : "=m" (newregs->regs[12]));
+		__asm__ __volatile__ ("stl $13, %0" : "=m" (newregs->regs[13]));
+		__asm__ __volatile__ ("stl $14, %0" : "=m" (newregs->regs[14]));
+		__asm__ __volatile__ ("stl $15, %0" : "=m" (newregs->regs[15]));
+		__asm__ __volatile__ ("stl $16, %0" : "=m" (newregs->regs[16]));
+		__asm__ __volatile__ ("stl $17, %0" : "=m" (newregs->regs[17]));
+		__asm__ __volatile__ ("stl $18, %0" : "=m" (newregs->regs[18]));
+		__asm__ __volatile__ ("stl $19, %0" : "=m" (newregs->regs[19]));
+		__asm__ __volatile__ ("stl $20, %0" : "=m" (newregs->regs[20]));
+		__asm__ __volatile__ ("stl $21, %0" : "=m" (newregs->regs[21]));
+		__asm__ __volatile__ ("stl $22, %0" : "=m" (newregs->regs[22]));
+		__asm__ __volatile__ ("stl $23, %0" : "=m" (newregs->regs[23]));
+		__asm__ __volatile__ ("stl $24, %0" : "=m" (newregs->regs[24]));
+		__asm__ __volatile__ ("stl $25, %0" : "=m" (newregs->regs[25]));
+		__asm__ __volatile__ ("stl $26, %0" : "=m" (newregs->regs[26]));
+		__asm__ __volatile__ ("stl $27, %0" : "=m" (newregs->regs[27]));
+		__asm__ __volatile__ ("stl $28, %0" : "=m" (newregs->regs[28]));
+		__asm__ __volatile__ ("stl $29, %0" : "=m" (newregs->regs[29]));
+		__asm__ __volatile__ ("stl $30, %0" : "=m" (newregs->regs[30]));
+		newregs->pc = (unsigned long)current_text_addr();
+	}
+}
+
+/* Function pointer to optional machine-specific reinitialization */
+extern void (*kexec_reinit)(void);
+
+#endif /* __ASSEMBLY__ */
+
+struct kimage;
+extern unsigned long kexec_args[4];
+
+#endif /* CONFIG_KEXEC */
+
+#endif /* _ASM_SW64_KEXEC_H */
diff --git a/arch/sw_64/include/asm/kgdb.h b/arch/sw_64/include/asm/kgdb.h
new file mode 100644
index 0000000000000000000000000000000000000000..a00a45ce767ca74361319836d3b188db9178285b
--- /dev/null
+++ b/arch/sw_64/include/asm/kgdb.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * sw64 KGDB support
+ *
+ * Based on arch/arm64/include/kgdb.h
+ *
+ * Copyright (C) Xia Bin
+ * Author: Xia Bin
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _ASM_SW64_KGDB_H
+#define _ASM_SW64_KGDB_H
+
+#include <asm/ptrace.h>
+#include <linux/sched.h>
+
+#ifndef __ASSEMBLY__
+
+
+#define GDB_ADJUSTS_BREAK_OFFSET
+#define BREAK_INSTR_SIZE	4
+#define CACHE_FLUSH_IS_SAFE	0
+
+static inline void arch_kgdb_breakpoint(void)
+{
+	__asm__ __volatile__("sys_call %0" : : "i"(HMC_bpt));
+}
+
+void sw64_task_to_gdb_regs(struct task_struct *task, unsigned long *regs);
+
+extern void kgdb_handle_bus_error(void);
+extern int kgdb_fault_expected;
+extern unsigned long get_reg(struct task_struct *task, unsigned long regno);
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * general purpose registers size in bytes.
+ */
+#define DBG_MAX_REG_NUM		(67)
+
+/*
+ * Size of I/O buffer for gdb packet.
+ * considering to hold all register contents, size is set
+ */
+
+#define BUFMAX			4096
+
+/*
+ * Number of bytes required for gdb_regs buffer.
+ * _GP_REGS: 8 bytes, _FP_REGS: 16 bytes and _EXTRA_REGS: 4 bytes each
+ * GDB fails to connect for size beyond this with error
+ * "'g' packet reply is too long"
+ */
+#define NUMREGBYTES		(DBG_MAX_REG_NUM * 8)
+
+#endif /* _ASM_SW64_KGDB_H */
diff --git a/arch/sw_64/include/asm/kprobes.h b/arch/sw_64/include/asm/kprobes.h
new file mode 100644
index 0000000000000000000000000000000000000000..0c7be8109ed29423cadec91e4f0ffc9d65e7ab0b
--- /dev/null
+++ b/arch/sw_64/include/asm/kprobes.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Kernel Probes (KProbes)
+ *  Based on arch/mips/include/asm/kprobes.h
+ */
+
+#ifndef _ASM_SW64_KPROBES_H
+#define _ASM_SW64_KPROBES_H
+
+#include <asm-generic/kprobes.h>
+
+#define BREAK_KPROBE	0x40ffffff
+#define BREAK_KPROBE_SS	0x40fffeff
+
+#ifdef CONFIG_KPROBES
+#include <linux/ptrace.h>
+#include <linux/types.h>
+
+#include <asm/cacheflush.h>
+#include <asm/kdebug.h>
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+
+struct kprobe;
+struct pt_regs;
+
+typedef u32 kprobe_opcode_t;
+
+#define MAX_INSN_SIZE 2
+
+#define flush_insn_slot(p)						\
+do {									\
+	if (p->addr)							\
+		flush_icache_range((unsigned long)p->addr,		\
+			(unsigned long)p->addr +			\
+			(MAX_INSN_SIZE * sizeof(kprobe_opcode_t)));	\
+} while (0)
+
+
+#define kretprobe_blacklist_size 0
+
+void arch_remove_kprobe(struct kprobe *p);
+
+/* Architecture specific copy of original instruction*/
+struct arch_specific_insn {
+	/* copy of the original instruction */
+	kprobe_opcode_t *insn;
+	/*
+	 * Set in kprobes code, initially to 0. If the instruction can be
+	 * eumulated, this is set to 1, if not, to -1.
+	 */
+	int boostable;
+};
+
+struct prev_kprobe {
+	struct kprobe *kp;
+	unsigned long status;
+};
+
+#define SKIP_DELAYSLOT 0x0001
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+	unsigned long kprobe_status;
+	/* Per-thread fields, used while emulating branches */
+	unsigned long flags;
+	unsigned long target_pc;
+	struct prev_kprobe prev_kprobe;
+};
+extern int kprobe_handler(struct pt_regs *regs);
+extern int post_kprobe_handler(struct pt_regs *regs);
+extern int kprobe_fault_handler(struct pt_regs *regs, unsigned long mmcsr);
+
+
+#endif /* CONFIG_KPROBES */
+#endif /* _ASM_SW64_KPROBES_H */
diff --git a/arch/sw_64/include/asm/livepatch.h b/arch/sw_64/include/asm/livepatch.h
new file mode 100644
index 0000000000000000000000000000000000000000..1feec0f6be76ddad2c1e65e0bfacf3d511510af0
--- /dev/null
+++ b/arch/sw_64/include/asm/livepatch.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * livepatch.h - sw64-specific Kernel Live Patching Core
+ */
+
+#ifndef _ASM_SW64_LIVEPATCH_H
+#define _ASM_SW64_LIVEPATCH_H
+
+#include <asm/ptrace.h>
+
+static inline int klp_check_compiler_support(void)
+{
+	return 0;
+}
+
+static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
+{
+	regs->regs[27] = ip;
+	regs->regs[28] = ip;
+}
+
+#endif /* _ASM_SW64_LIVEPATCH_H */
diff --git a/arch/sw_64/include/asm/perf_event.h b/arch/sw_64/include/asm/perf_event.h
new file mode 100644
index 0000000000000000000000000000000000000000..dc55a361babd015aa92fbf7b0387f1e2beeecc40
--- /dev/null
+++ b/arch/sw_64/include/asm/perf_event.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_PERF_EVENT_H
+#define _ASM_SW64_PERF_EVENT_H
+
+#include <asm/pmc.h>
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_PERF_EVENTS
+struct pt_regs;
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs)  perf_misc_flags(regs)
+#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
+#endif
+
+#endif /* _ASM_SW64_PERF_EVENT_H */
diff --git a/arch/sw_64/include/asm/pmc.h b/arch/sw_64/include/asm/pmc.h
new file mode 100644
index 0000000000000000000000000000000000000000..d5672dd940a791c62e0edbe1e5e2356183cdd131
--- /dev/null
+++ b/arch/sw_64/include/asm/pmc.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Definitions for use with the sw64 PMC interface.
+ */
+
+#ifndef _ASM_SW64_PMC_H
+#define _ASM_SW64_PMC_H
+
+#define PMC_PC0			0
+#define PMC_PC1			1
+
+/* Following commands are implemented on all CPUs */
+#define PMC_CMD_DISABLE		0
+#define PMC_CMD_ENABLE		1
+#define PMC_CMD_EVENT_BASE	2
+#define PMC_CMD_PM		4
+#define PMC_CMD_READ		5
+#define PMC_CMD_READ_CLEAR	6
+#define PMC_CMD_WRITE_BASE	7
+
+#define PMC_DISABLE_BASE	1
+
+#define PMC_ENABLE_BASE		1
+
+#define PC0_RAW_BASE		0x0
+#define PC1_RAW_BASE		0x100
+#define PC0_MAX			0xF
+#define PC1_MAX			0x3D
+
+#define SW64_PERFCTRL_KM	2
+#define SW64_PERFCTRL_UM	3
+#define SW64_PERFCTRL_AM	4
+
+/* pc0 events */
+#define PC0_INSTRUCTIONS		0x0
+#define PC0_BRANCH_INSTRUCTIONS		0x3
+#define PC0_CPU_CYCLES			0x8
+#define PC0_ITB_READ			0x9
+#define PC0_DTB_READ			0xA
+#define PC0_ICACHE_READ			0xB
+#define PC0_DCACHE_READ			0xC
+#define PC0_SCACHE_REFERENCES		0xD
+
+/* pc1 events */
+#define PC1_BRANCH_MISSES		0xB
+#define PC1_SCACHE_MISSES		0x10
+#define PC1_ICACHE_READ_MISSES		0x16
+#define PC1_ITB_MISSES			0x17
+#define PC1_DTB_SINGLE_MISSES		0x30
+#define PC1_DCACHE_MISSES		0x32
+
+#define MAX_HWEVENTS			2
+#define PMC_COUNT_MASK			((1UL << 58) - 1)
+
+#endif /* _ASM_SW64_PMC_H */
diff --git a/arch/sw_64/include/asm/spinlock.h b/arch/sw_64/include/asm/spinlock.h
new file mode 100644
index 0000000000000000000000000000000000000000..64358f32cd9a80b587a023dae6d5eecb1cf270e6
--- /dev/null
+++ b/arch/sw_64/include/asm/spinlock.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _ASM_SW64_SPINLOCK_H
+#define _ASM_SW64_SPINLOCK_H
+
+#include <asm/qspinlock.h>
+#include <asm/qrwlock.h>
+
+/* See include/linux/spinlock.h */
+#define smp_mb__after_spinlock()	smp_mb()
+
+#endif /* _ASM_SW64_SPINLOCK_H */
diff --git a/arch/sw_64/include/asm/spinlock_types.h b/arch/sw_64/include/asm/spinlock_types.h
new file mode 100644
index 0000000000000000000000000000000000000000..62e554e4f48c35b2d4578072231b58c75b202a4b
--- /dev/null
+++ b/arch/sw_64/include/asm/spinlock_types.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_SPINLOCK_TYPES_H
+#define _ASM_SW64_SPINLOCK_TYPES_H
+
+#include <asm-generic/qspinlock_types.h>
+#include <asm-generic/qrwlock_types.h>
+
+#endif /* _ASM_SW64_SPINLOCK_TYPES_H */
diff --git a/arch/sw_64/include/asm/stacktrace.h b/arch/sw_64/include/asm/stacktrace.h
new file mode 100644
index 0000000000000000000000000000000000000000..958c9892fd6d0943bf78484c7e870323694fbde8
--- /dev/null
+++ b/arch/sw_64/include/asm/stacktrace.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_SW64_STACKTRACE_H
+#define _ASM_SW64_STACKTRACE_H
+
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <asm/memory.h>
+#include <asm/ptrace.h>
+
+struct stackframe {
+	unsigned long pc;
+	unsigned long fp;
+};
+
+enum stack_type {
+	STACK_TYPE_UNKNOWN,
+	STACK_TYPE_TASK,
+};
+
+struct stack_info {
+	unsigned long low;
+	unsigned long high;
+	enum stack_type type;
+};
+
+/* The form of the top of the frame on the stack */
+struct stack_frame {
+	unsigned long return_address;
+	struct stack_frame *next_frame;
+};
+
+extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
+extern void walk_stackframe(struct task_struct *tsk, struct pt_regs *regs,
+			    int (*fn)(unsigned long, void *), void *data);
+
+static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp,
+				struct stack_info *info)
+{
+	unsigned long low = (unsigned long)task_stack_page(tsk);
+	unsigned long high = low + THREAD_SIZE;
+
+	if (sp < low || sp >= high)
+		return false;
+
+	if (info) {
+		info->low = low;
+		info->high = high;
+		info->type = STACK_TYPE_TASK;
+	}
+
+	return true;
+}
+
+/*
+ * We can only safely access per-cpu stacks from current in a non-preemptible
+ * context.
+ */
+static inline bool on_accessible_stack(struct task_struct *tsk,
+					unsigned long sp,
+					struct stack_info *info)
+{
+	if (on_task_stack(tsk, sp, info))
+		return true;
+	if (tsk != current || preemptible())
+		return false;
+
+	return false;
+}
+
+#endif /* _ASM_SW64_STACKTRACE_H */
diff --git a/arch/sw_64/include/asm/suspend.h b/arch/sw_64/include/asm/suspend.h
new file mode 100644
index 0000000000000000000000000000000000000000..833e27f9d5e14a729a285406234e90fc03afbdfe
--- /dev/null
+++ b/arch/sw_64/include/asm/suspend.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SW64_SUSPEND_H
+#define _ASM_SW64_SUSPEND_H
+
+#include <asm/hmcall.h>
+#include <asm/ptrace.h>
+#include <asm/processor.h>
+#define SOFTINF_SLEEP_MAGIC 0x0123456789ABCDEFUL
+
+#ifdef CONFIG_HIBERNATION
+#include <asm/vcpu.h>
+#include <asm/thread_info.h>
+#endif
+
+struct callee_saved_regs {
+	unsigned long r9;
+	unsigned long r10;
+	unsigned long r11;
+	unsigned long r12;
+	unsigned long r13;
+	unsigned long r14;
+	unsigned long r15;
+	unsigned long ra;
+};
+
+struct callee_saved_fpregs {
+	unsigned long f2[4];
+	unsigned long f3[4];
+	unsigned long f4[4];
+	unsigned long f5[4];
+	unsigned long f6[4];
+	unsigned long f7[4];
+	unsigned long f8[4];
+	unsigned long f9[4];
+} __aligned(32);	/* 256 bits aligned for simd */
+
+struct processor_state {
+	struct callee_saved_regs regs;
+	struct callee_saved_fpregs fpregs;
+	unsigned long fpcr;
+	unsigned long ktp;
+#ifdef CONFIG_HIBERNATION
+	unsigned long sp;
+	struct vcpucb vcb;
+#endif
+};
+
+extern void sw64_suspend_deep_sleep(struct processor_state *state);
+extern const struct platform_suspend_ops native_suspend_ops;
+#endif /* _ASM_SW64_SUSPEND_H */
diff --git a/arch/sw_64/include/asm/uprobes.h b/arch/sw_64/include/asm/uprobes.h
new file mode 100644
index 0000000000000000000000000000000000000000..fcd2026c3622e20a781107c70d414f075d1bf588
--- /dev/null
+++ b/arch/sw_64/include/asm/uprobes.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#ifndef _ASM_SW64_UPROBES_H
+#define _ASM_SW64_UPROBES_H
+
+#include <linux/notifier.h>
+#include <linux/types.h>
+#include <asm/insn.h>
+
+/*
+ * We want this to be defined as union sw64_instruction but that makes the
+ * generic code blow up.
+ */
+typedef u32 uprobe_opcode_t;
+
+#define MAX_UINSN_BYTES			SW64_INSN_SIZE
+#define UPROBE_XOL_SLOT_BYTES		SW64_INSN_SIZE
+
+#define UPROBE_BRK_UPROBE		0x000d000d	/* break 13 */
+#define UPROBE_BRK_UPROBE_XOL		0x000e000d	/* break 14 */
+
+#define UPROBE_SWBP_INSN		UPROBE_BRK_UPROBE
+#define UPROBE_SWBP_INSN_SIZE		MAX_UINSN_BYTES
+
+struct arch_uprobe {
+	u32	insn;
+	u32	ixol[2];
+};
+
+struct arch_uprobe_task {
+	unsigned long saved_trap_nr;
+};
+
+#ifdef CONFIG_UPROBES
+void sw64_fix_uretprobe(struct pt_regs *regs, unsigned long exc_pc);
+#else
+static inline void
+sw64_fix_uretprobe(struct pt_regs *regs, unsigned long exc_pc) {}
+#endif
+
+#endif /* _ASM_SW64_UPROBES_H */
diff --git a/arch/sw_64/include/uapi/asm/bpf_perf_event.h b/arch/sw_64/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 0000000000000000000000000000000000000000..52f6f1e555f162ef7668965386cc758125726224
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_SW64_BPF_PERF_EVENT_H
+#define _UAPI_ASM_SW64_BPF_PERF_EVENT_H
+
+#include <asm/ptrace.h>
+
+typedef struct user_pt_regs bpf_user_pt_regs_t;
+
+#endif /* _UAPI_ASM_SW64_BPF_PERF_EVENT_H */
diff --git a/arch/sw_64/include/uapi/asm/perf_regs.h b/arch/sw_64/include/uapi/asm/perf_regs.h
new file mode 100644
index 0000000000000000000000000000000000000000..871ad4663d1dbd29cd23395b977615323c67d81e
--- /dev/null
+++ b/arch/sw_64/include/uapi/asm/perf_regs.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _UAPI_ASM_SW64_PERF_REGS_H
+#define _UAPI_ASM_SW64_PERF_REGS_H
+
+enum perf_event_sw64_regs {
+	PERF_REG_SW64_R0,
+	PERF_REG_SW64_R1,
+	PERF_REG_SW64_R2,
+	PERF_REG_SW64_R3,
+	PERF_REG_SW64_R4,
+	PERF_REG_SW64_R5,
+	PERF_REG_SW64_R6,
+	PERF_REG_SW64_R7,
+	PERF_REG_SW64_R8,
+	PERF_REG_SW64_R9,
+	PERF_REG_SW64_R10,
+	PERF_REG_SW64_R11,
+	PERF_REG_SW64_R12,
+	PERF_REG_SW64_R13,
+	PERF_REG_SW64_R14,
+	PERF_REG_SW64_R15,
+	PERF_REG_SW64_R16,
+	PERF_REG_SW64_R17,
+	PERF_REG_SW64_R18,
+	PERF_REG_SW64_R19,
+	PERF_REG_SW64_R20,
+	PERF_REG_SW64_R21,
+	PERF_REG_SW64_R22,
+	PERF_REG_SW64_R23,
+	PERF_REG_SW64_R24,
+	PERF_REG_SW64_R25,
+	PERF_REG_SW64_R26,
+	PERF_REG_SW64_R27,
+	PERF_REG_SW64_R28,
+	PERF_REG_SW64_GP,
+	PERF_REG_SW64_SP,
+	PERF_REG_SW64_PC,
+	PERF_REG_SW64_MAX,
+};
+#endif /* _UAPI_ASM_SW64_PERF_REGS_H */
diff --git a/arch/sw_64/kernel/cpuautoplug.c b/arch/sw_64/kernel/cpuautoplug.c
new file mode 100644
index 0000000000000000000000000000000000000000..a7571a77a72c6ed4733fc179610e5b98853c1510
--- /dev/null
+++ b/arch/sw_64/kernel/cpuautoplug.c
@@ -0,0 +1,485 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <linux/tick.h>
+#include <linux/kernel_stat.h>
+#include <linux/platform_device.h>
+
+#include <asm/cpufreq.h>
+#include <asm/cputime.h>
+#include <asm/smp.h>
+
+int autoplug_enabled;
+int autoplug_verbose;
+int autoplug_adjusting;
+
+DEFINE_PER_CPU(int, cpu_adjusting);
+
+struct cpu_autoplug_info {
+	cputime64_t prev_idle;
+	cputime64_t prev_wall;
+	struct delayed_work work;
+	unsigned int sampling_rate;
+	int maxcpus;   /* max cpus for autoplug */
+	int mincpus;   /* min cpus for autoplug */
+	int dec_reqs;  /* continuous core-decreasing requests */
+	int inc_reqs;  /* continuous core-increasing requests */
+};
+
+struct cpu_autoplug_info ap_info;
+
+static ssize_t enabled_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", autoplug_enabled);
+}
+
+
+static ssize_t enabled_store(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	char val[5];
+	int n;
+
+	memcpy(val, buf, count);
+	n = kstrtol(val, 0, 0);
+
+	if (n > 1 || n < 0)
+		return -EINVAL;
+
+	autoplug_enabled = n;
+
+	return count;
+}
+
+static ssize_t verbose_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", autoplug_verbose);
+}
+
+static ssize_t verbose_store(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	char val[5];
+	int n;
+
+	memcpy(val, buf, count);
+	n = kstrtol(val, 0, 0);
+
+	if (n > 1 || n < 0)
+		return -EINVAL;
+
+	autoplug_verbose = n;
+
+	return count;
+}
+
+static ssize_t maxcpus_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", ap_info.maxcpus);
+}
+
+static ssize_t maxcpus_store(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	char val[5];
+	int n;
+
+	memcpy(val, buf, count);
+	n = kstrtol(val, 0, 0);
+
+	if (n > num_possible_cpus() || n < ap_info.mincpus)
+		return -EINVAL;
+
+	ap_info.maxcpus = n;
+
+	return count;
+}
+
+static ssize_t mincpus_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", ap_info.mincpus);
+}
+
+static ssize_t mincpus_store(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	char val[5];
+	int n;
+
+	memcpy(val, buf, count);
+	n = kstrtol(val, 0, 0);
+
+	if (n > ap_info.maxcpus || n < 1)
+		return -EINVAL;
+
+	ap_info.mincpus = n;
+
+	return count;
+}
+
+static ssize_t sampling_rate_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", ap_info.sampling_rate);
+}
+
+#define SAMPLING_RATE_MAX 1000
+#define SAMPLING_RATE_MIN 600
+
+static ssize_t sampling_rate_store(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	char val[6];
+	int n;
+
+	memcpy(val, buf, count);
+	n = kstrtol(val, 0, 0);
+
+	if (n > SAMPLING_RATE_MAX || n < SAMPLING_RATE_MIN)
+		return -EINVAL;
+
+	ap_info.sampling_rate = n;
+
+	return count;
+}
+
+static ssize_t available_value_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "enabled: 0-1\nverbose: 0-1\nmaxcpus:"
+			"1-%d\nmincpus: 1-%d\nsampling_rate: %d-%d\n",
+			num_possible_cpus(), num_possible_cpus(),
+			SAMPLING_RATE_MIN, SAMPLING_RATE_MAX);
+}
+
+static DEVICE_ATTR_RW(enabled);
+static DEVICE_ATTR_RW(verbose);
+static DEVICE_ATTR_RW(maxcpus);
+static DEVICE_ATTR_RW(mincpus);
+static DEVICE_ATTR_RW(sampling_rate);
+static DEVICE_ATTR_RO(available_value);
+
+static struct attribute *cpuclass_default_attrs[] = {
+	&dev_attr_enabled.attr,
+	&dev_attr_verbose.attr,
+	&dev_attr_maxcpus.attr,
+	&dev_attr_mincpus.attr,
+	&dev_attr_sampling_rate.attr,
+	&dev_attr_available_value.attr,
+	NULL
+};
+
+static struct attribute_group cpuclass_attr_group = {
+	.attrs = cpuclass_default_attrs,
+	.name = "cpuautoplug",
+};
+
+static int __init setup_autoplug(char *str)
+{
+	if (!strcmp(str, "off"))
+		autoplug_enabled = 0;
+	else if (!strcmp(str, "on"))
+		autoplug_enabled = 1;
+	else
+		return 0;
+	return 1;
+}
+
+__setup("autoplug=", setup_autoplug);
+
+static cputime64_t calc_busy_time(unsigned int cpu)
+{
+	cputime64_t busy_time;
+
+	busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
+	busy_time += 1;
+
+	return busy_time;
+}
+
+static inline cputime64_t get_idle_time_jiffy(cputime64_t *wall)
+{
+	unsigned int cpu;
+	cputime64_t idle_time = 0;
+	cputime64_t cur_wall_time;
+	cputime64_t busy_time;
+
+	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
+
+	for_each_online_cpu(cpu) {
+		busy_time = calc_busy_time(cpu);
+
+		idle_time += cur_wall_time - busy_time;
+	}
+
+	if (wall)
+		*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
+
+	return (cputime64_t)jiffies_to_usecs(idle_time);
+}
+
+static inline cputime64_t get_idle_time(cputime64_t *wall)
+{
+	unsigned int cpu;
+	u64 idle_time = 0;
+
+	for_each_online_cpu(cpu) {
+		idle_time += get_cpu_idle_time_us(cpu, wall);
+		if (idle_time == -1ULL)
+			return get_idle_time_jiffy(wall);
+	}
+
+	return idle_time;
+}
+
+static cputime64_t get_min_busy_time(cputime64_t arr[], int size)
+{
+	int i, min_cpu_idx;
+	cputime64_t min_time = arr[0];
+
+	for (i = 0; i < size; i++) {
+		if (arr[i] > 0 && arr[i] < min_time) {
+			min_time = arr[i];
+			min_cpu_idx = i;
+		}
+	}
+
+	return min_cpu_idx;
+}
+
+static int find_min_busy_cpu(void)
+{
+	int nr_all_cpus = num_possible_cpus();
+	unsigned int cpus, target_cpu;
+	cputime64_t busy_time;
+	cputime64_t b_time[NR_CPUS];
+
+	memset(b_time, 0, sizeof(b_time));
+	for_each_online_cpu(cpus) {
+		busy_time = calc_busy_time(cpus);
+		b_time[cpus] = busy_time;
+	}
+	target_cpu = get_min_busy_time(b_time, nr_all_cpus);
+	return target_cpu;
+}
+
+static void increase_cores(int cur_cpus)
+{
+	struct device *dev;
+
+	if (cur_cpus == ap_info.maxcpus)
+		return;
+
+	cur_cpus = cpumask_next_zero(0, cpu_online_mask);
+
+	dev = get_cpu_device(cur_cpus);
+
+	per_cpu(cpu_adjusting, dev->id) = 1;
+	lock_device_hotplug();
+	cpu_device_up(dev);
+	pr_info("The target_cpu is %d, After cpu_up, the cpu_num is %d\n",
+			dev->id, num_online_cpus());
+	get_cpu_device(dev->id)->offline = false;
+	unlock_device_hotplug();
+	per_cpu(cpu_adjusting, dev->id) = 0;
+}
+
+static void decrease_cores(int cur_cpus)
+{
+	struct device *dev;
+
+	if (cur_cpus == ap_info.mincpus)
+		return;
+
+	cur_cpus = find_min_busy_cpu();
+
+	dev = get_cpu_device(cur_cpus);
+
+	if (dev->id > 0) {
+		per_cpu(cpu_adjusting, dev->id) = -1;
+		lock_device_hotplug();
+		cpu_device_down(dev);
+		pr_info("The target_cpu is %d. After cpu_down, the cpu_num is %d\n",
+				cur_cpus, num_online_cpus());
+		get_cpu_device(dev->id)->offline = true;
+		unlock_device_hotplug();
+		per_cpu(cpu_adjusting, dev->id) = 0;
+	}
+}
+
+#define INC_THRESHOLD 80
+#define DEC_THRESHOLD 40
+
+static void do_autoplug_timer(struct work_struct *work)
+{
+	cputime64_t cur_wall_time = 0, cur_idle_time;
+	unsigned long idle_time, wall_time;
+	int delay, load;
+	int nr_cur_cpus = num_online_cpus();
+	int nr_all_cpus = num_possible_cpus();
+	int inc_req = 1, dec_req = 2;
+	struct cpufreq_policy *policy = cpufreq_cpu_get_raw(smp_processor_id());
+
+	if (!policy || IS_ERR(policy->clk)) {
+		pr_err("%s: No %s associated to cpu: %d\n",
+			__func__, policy ? "clk" : "policy", 0);
+		return;
+	}
+
+	ap_info.maxcpus =
+		setup_max_cpus > nr_cpu_ids ? nr_cpu_ids : setup_max_cpus;
+	ap_info.mincpus = ap_info.maxcpus / 4;
+
+	if (strcmp(policy->governor->name, "performance") == 0) {
+		ap_info.mincpus = ap_info.maxcpus;
+	} else if (strcmp(policy->governor->name, "powersave") == 0) {
+		ap_info.maxcpus = ap_info.mincpus;
+	} else if (strcmp(policy->governor->name, "ondemand") == 0) {
+		ap_info.sampling_rate = 500;
+		inc_req = 0;
+		dec_req = 2;
+	} else if (strcmp(policy->governor->name, "conservative") == 0) {
+		inc_req = 1;
+		dec_req = 3;
+		ap_info.sampling_rate = 1000;  /* 1s */
+	}
+
+	BUG_ON(smp_processor_id() != 0);
+	delay = msecs_to_jiffies(ap_info.sampling_rate);
+	if (!autoplug_enabled || system_state != SYSTEM_RUNNING)
+		goto out;
+
+	autoplug_adjusting = 1;
+
+	if (nr_cur_cpus > ap_info.maxcpus) {
+		decrease_cores(nr_cur_cpus);
+		autoplug_adjusting = 0;
+		goto out;
+	}
+	if (nr_cur_cpus < ap_info.mincpus) {
+		increase_cores(nr_cur_cpus);
+		autoplug_adjusting = 0;
+		goto out;
+	}
+
+	cur_idle_time = get_idle_time(&cur_wall_time);
+	if (cur_wall_time == 0)
+		cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
+
+	wall_time = (unsigned int)(cur_wall_time - ap_info.prev_wall);
+	ap_info.prev_wall = cur_wall_time;
+
+	idle_time = (unsigned int)(cur_idle_time - ap_info.prev_idle);
+	idle_time += wall_time * (nr_all_cpus - nr_cur_cpus);
+	ap_info.prev_wall = cur_idle_time;
+
+	if (unlikely(!wall_time || wall_time * nr_all_cpus < idle_time)) {
+		autoplug_adjusting = 0;
+		goto out;
+	}
+
+	load = 100 * (wall_time * nr_all_cpus - idle_time) / wall_time;
+
+	if (load < (nr_cur_cpus - 1) * 100 - DEC_THRESHOLD) {
+		ap_info.inc_reqs = 0;
+		if (ap_info.dec_reqs < dec_req)
+			ap_info.dec_reqs++;
+		else {
+			ap_info.dec_reqs = 0;
+			decrease_cores(nr_cur_cpus);
+		}
+	} else {
+		ap_info.dec_reqs = 0;
+		if (load > (nr_cur_cpus - 1) * 100 + INC_THRESHOLD) {
+			if (ap_info.inc_reqs < inc_req)
+				ap_info.inc_reqs++;
+			else {
+				ap_info.inc_reqs = 0;
+				increase_cores(nr_cur_cpus);
+			}
+		}
+	}
+
+	autoplug_adjusting = 0;
+out:
+	schedule_delayed_work_on(0, &ap_info.work, delay);
+}
+
+static struct platform_device_id platform_device_ids[] = {
+	{
+		.name = "sw64_cpuautoplug",
+	},
+	{}
+};
+
+MODULE_DEVICE_TABLE(platform, platform_device_ids);
+
+static struct platform_driver platform_driver = {
+	.driver = {
+		.name = "sw64_cpuautoplug",
+		.owner = THIS_MODULE,
+	},
+	.id_table = platform_device_ids,
+};
+
+static int __init cpuautoplug_init(void)
+{
+	int i, ret, delay;
+
+	ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
+					&cpuclass_attr_group);
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&platform_driver);
+	if (ret)
+		return ret;
+
+	pr_info("cpuautoplug: SW64 CPU autoplug driver.\n");
+
+	ap_info.maxcpus =
+		setup_max_cpus > nr_cpu_ids ? nr_cpu_ids : setup_max_cpus;
+	ap_info.mincpus = ap_info.maxcpus / 4;
+	ap_info.dec_reqs = 0;
+	ap_info.inc_reqs = 0;
+	ap_info.sampling_rate = 720;  /* 720ms */
+	if (setup_max_cpus == 0) {    /* boot with npsmp */
+		ap_info.maxcpus = 1;
+		autoplug_enabled = 0;
+	}
+	if (setup_max_cpus > num_possible_cpus())
+		ap_info.maxcpus = num_possible_cpus();
+
+	pr_info("mincpu = %d, maxcpu = %d, autoplug_enabled = %d, rate = %d\n",
+			ap_info.mincpus, ap_info.maxcpus, autoplug_enabled,
+			ap_info.sampling_rate);
+
+	for_each_possible_cpu(i)
+		per_cpu(cpu_adjusting, i) = 0;
+	delay = msecs_to_jiffies(ap_info.sampling_rate * 24);
+	INIT_DEFERRABLE_WORK(&ap_info.work, do_autoplug_timer);
+	schedule_delayed_work_on(0, &ap_info.work, delay);
+
+	if (!autoplug_enabled)
+		cancel_delayed_work_sync(&ap_info.work);
+
+	return ret;
+}
+
+late_initcall(cpuautoplug_init);
diff --git a/arch/sw_64/kernel/crash_dump.c b/arch/sw_64/kernel/crash_dump.c
new file mode 100644
index 0000000000000000000000000000000000000000..4484673823b8e6065d9efb5f2299a21df67d421a
--- /dev/null
+++ b/arch/sw_64/kernel/crash_dump.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/sw_64/kernel/crash_dump.c
+ *
+ * Copyright (C) 2019 JN
+ * Author: He Sheng
+ *
+ * This code is taken from arch/x86/kernel/crash_dump_64.c
+ *   Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
+ *   Copyright (C) IBM Corporation, 2004. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/io.h>
+
+/**
+ * copy_oldmem_page() - copy one page from old kernel memory
+ * @pfn: page frame number to be copied
+ * @buf: buffer where the copied page is placed
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page
+ * @userbuf: if set, @buf is int he user address space
+ *
+ * This function copies one page from old kernel memory into buffer pointed by
+ * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes
+ * copied or negative error in case of failure.
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
+			 size_t csize, unsigned long offset,
+			 int userbuf)
+{
+	void *vaddr;
+
+	if (!csize)
+		return 0;
+
+	vaddr = ioremap(__pfn_to_phys(pfn), PAGE_SIZE);
+	if (!vaddr)
+		return -ENOMEM;
+
+	if (userbuf) {
+		if (copy_to_user(buf, vaddr + offset, csize)) {
+			iounmap(vaddr);
+			return -EFAULT;
+		}
+	} else {
+		memcpy(buf, vaddr + offset, csize);
+	}
+
+	iounmap(vaddr);
+	return csize;
+}
diff --git a/arch/sw_64/kernel/entry-ftrace.S b/arch/sw_64/kernel/entry-ftrace.S
new file mode 100644
index 0000000000000000000000000000000000000000..73e8e043fc9d14fbbaa50bb164fcc4326329001b
--- /dev/null
+++ b/arch/sw_64/kernel/entry-ftrace.S
@@ -0,0 +1,326 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/sw_64/kernel/entry-ftrace.S
+ *
+ * Author: linyue
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+
+	.text
+	.set noat
+	.align 4
+
+#define FTRACE_SP_OFF	0x50
+	.macro mcount_enter
+	subl	$sp, FTRACE_SP_OFF, $sp
+	stl	$16, 0($sp)
+	stl	$17, 0x8($sp)
+	stl	$18, 0x10($sp)
+	stl	$26, 0x18($sp)
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	stl	$9, 0x20($sp)
+#endif
+	stl	$28, 0x28($sp)
+	stl	$29, 0x30($sp)
+	stl	$19, 0x38($sp)
+	stl	$20, 0x40($sp)
+	stl	$21, 0x48($sp)
+	.endm
+
+	.macro mcount_end
+	ldl	$16, 0($sp)
+	ldl	$17, 0x8($sp)
+	ldl	$18, 0x10($sp)
+	ldl	$26, 0x18($sp)
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	ldl	$9, 0x20($sp)
+#endif
+	ldl	$28, 0x28($sp)
+	ldl	$29, 0x30($sp)
+	ldl	$19, 0x38($sp)
+	ldl	$20, 0x40($sp)
+	ldl	$21, 0x48($sp)
+	addl	$sp, FTRACE_SP_OFF, $sp
+	.endm
+
+	.macro RESTORE_GRAPH_ARGS
+	ldi	$16, 0x18($sp)			/* &ra */
+	bis	$31, $9, $17			/* pc */
+ #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+	bis	$31, $15, $18			/* fp */
+ #endif
+	.endm
+
+	.macro SAVE_PT_REGS
+	ldi	$sp, -PT_REGS_SIZE($sp)
+	stl	$0, PT_REGS_R0($sp)
+	stl	$1, PT_REGS_R1($sp)
+	stl	$2, PT_REGS_R2($sp)
+	stl	$3, PT_REGS_R3($sp)
+	stl	$4, PT_REGS_R4($sp)
+	stl	$5, PT_REGS_R5($sp)
+	stl	$6, PT_REGS_R6($sp)
+	stl	$7, PT_REGS_R7($sp)
+	stl	$8, PT_REGS_R8($sp)
+	stl	$9, PT_REGS_R9($sp)
+	stl	$10, PT_REGS_R10($sp)
+	stl	$11, PT_REGS_R11($sp)
+	stl	$12, PT_REGS_R12($sp)
+	stl	$13, PT_REGS_R13($sp)
+	stl	$14, PT_REGS_R14($sp)
+	stl	$15, PT_REGS_R15($sp)
+	stl	$16, PT_REGS_R16($sp)
+	stl	$17, PT_REGS_R17($sp)
+	stl	$18, PT_REGS_R18($sp)
+	stl	$19, PT_REGS_R19($sp)
+	stl	$20, PT_REGS_R20($sp)
+	stl	$21, PT_REGS_R21($sp)
+	stl	$22, PT_REGS_R22($sp)
+	stl	$23, PT_REGS_R23($sp)
+	stl	$24, PT_REGS_R24($sp)
+	stl	$25, PT_REGS_R25($sp)
+	stl	$26, PT_REGS_R26($sp)
+	stl	$27, PT_REGS_R27($sp)
+	stl	$28, PT_REGS_R28($sp)
+	stl	$29, PT_REGS_GP($sp)
+	ldi	$0, PT_REGS_SIZE($sp)
+	stl	$0, PT_REGS_SP($sp)
+	.endm
+
+	.macro RESTORE_PT_REGS
+	ldl	$0, PT_REGS_R0($sp)
+	ldl	$1, PT_REGS_R1($sp)
+	ldl	$2, PT_REGS_R2($sp)
+	ldl	$3, PT_REGS_R3($sp)
+	ldl	$4, PT_REGS_R4($sp)
+	ldl	$5, PT_REGS_R5($sp)
+	ldl	$6, PT_REGS_R6($sp)
+	ldl	$7, PT_REGS_R7($sp)
+	ldl	$8, PT_REGS_R8($sp)
+	ldl	$9, PT_REGS_R9($sp)
+	ldl	$10, PT_REGS_R10($sp)
+	ldl	$11, PT_REGS_R11($sp)
+	ldl	$12, PT_REGS_R12($sp)
+	ldl	$13, PT_REGS_R13($sp)
+	ldl	$14, PT_REGS_R14($sp)
+	ldl	$15, PT_REGS_R15($sp)
+	ldl	$16, PT_REGS_R16($sp)
+	ldl	$17, PT_REGS_R17($sp)
+	ldl	$18, PT_REGS_R18($sp)
+	ldl	$19, PT_REGS_R19($sp)
+	ldl	$20, PT_REGS_R20($sp)
+	ldl	$21, PT_REGS_R21($sp)
+	ldl	$22, PT_REGS_R22($sp)
+	ldl	$23, PT_REGS_R23($sp)
+	ldl	$24, PT_REGS_R24($sp)
+	ldl	$25, PT_REGS_R25($sp)
+	ldl	$26, PT_REGS_R26($sp)
+	ldl	$27, PT_REGS_R27($sp)
+	ldl	$28, PT_REGS_R28($sp)
+	ldl	$29, PT_REGS_GP($sp)
+	ldi	$sp, PT_REGS_SIZE($sp)
+	.endm
+
+	.macro RESTORE_GRAPH_REG_ARGS
+	ldi	$16, PT_REGS_R26($sp)
+	bis	$31, $9, $17
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+	bis	$31, $15, $18
+#endif
+         .endm
+
+	/* save return value regs*/
+	.macro save_return_regs
+	subl	$sp, 0x8, $sp
+	stl	$0, 0x0($sp)
+	.endm
+
+	/* restore return value regs*/
+	.macro restore_return_regs
+	ldl	$0, 0x0($sp)
+	addl	$sp, 0x8, $sp
+	.endm
+
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * void ftrace_graph_caller(void)
+ *
+ * Called from ftrace_caller() or ftrace_regs_caller() when function_graph
+ * tracer is selected.
+ * This function prepare_ftrace_return() fakes ra's value on the call
+ * stack in order to intercept instrumented function's return path and
+ * run return_to_handler() later on its exit.
+ */
+
+ENTRY(ftrace_graph_caller)
+	ldgp	$29, 0($27)
+	ldi	$sp, -16($sp)
+	stl	$26, 0($sp)
+	stl	$15, 8($sp)
+	bis	$31, $sp, $15
+
+	ldi	$27, prepare_ftrace_return
+ftrace_graph_call:
+	.global ftrace_graph_call
+	/*
+	 * Calling ftrace_enable/disable_ftrace_graph_caller would overwrite
+	 * the nop below.
+	 */
+	nop	/* nop, or call prepare_ftrace_return() */
+
+	ldl	$26, 0($sp)
+	ldl	$15, 8($sp)
+	ldi	$sp, 16($sp)
+	ret	$31, ($26), 1
+ENDPROC(ftrace_graph_caller)
+
+/*
+ * void return_to_handler(void)
+ *
+ * Run ftrace_return_to_handler() before going back to parent.
+ * @fp is checked against the value passed by ftrace_graph_caller()
+ * only when HAVE_FUNCTION_GRAPH_FP_TEST is enabled.
+ *
+ * It is run by "ret" instruction which does not modify $27, so it
+ * has to recaculate $27 before ldgp.
+ */
+ENTRY(return_to_handler)
+	br	$27, 1f
+1:	ldgp	$29, 0($27)
+	save_return_regs
+	bis	$31, $15, $16	/* parent's fp */
+	ldi	$27, ftrace_return_to_handler
+	call	$26, ($27)
+	bis	$31, $0, $26
+	restore_return_regs
+	ret	$31, ($26), 1
+END(return_to_handler)
+
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+	.global _mcount
+	.ent _mcount
+_mcount:
+	ret	$31, ($28), 1
+	.end _mcount
+
+
+	.global ftrace_caller
+	.ent ftrace_caller
+ftrace_caller:
+	mcount_enter
+	br	$27, 1f
+1:	ldgp	$29, 0($27)
+
+	subl	$28, MCOUNT_INSN_SIZE, $16
+	bis	$26, $31, $17
+	ldl	$18, function_trace_op
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	/*
+	 * the graph tracer (specifically, prepare_ftrace_return) needs these
+	 * arguments but for now the function tracer occupies the regs, so we
+	 * save them in callee-saved regs to recover later.
+	 */
+	bis	$31, $16, $9
+#endif
+	ldi	$4, current_tracer
+	ldl	$27, 0($4)
+
+	.global ftrace_call
+ftrace_call:					/* tracer(pc, ra); */
+	nop
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	RESTORE_GRAPH_ARGS
+	call    ftrace_graph_caller
+#endif
+	mcount_end
+	ret	$31, ($28), 1
+	.end ftrace_caller
+#else /* !CONFIG_DYNAMIC_FTRACE */
+
+	.global _mcount
+	.ent _mcount
+_mcount:
+	mcount_enter
+	br	$27, 1f
+1:	ldgp	$29, 0($27)
+
+	ldl	$27, ftrace_trace_function	// if (ftrace_trace_function
+	ldi	$5, ftrace_stub			//	!= ftrace_stub)
+	cmpeq	$27, $5, $6			//
+	bne	$6, skip_ftrace
+
+	subl	$28, MCOUNT_INSN_SIZE, $16	// function's pc
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	bis	$31, $16, $9
+#endif
+	bis	$26, $31, $17		// function's ra (parent's pc)
+	call	$26, ($27)		// (*ftrace_trace_function)(pc, ra);
+
+skip_ftrace:
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	ldl	$4, ftrace_graph_return		// if ((ftrace_graph_return
+	cmpeq	$4, $5, $6			//	!= ftrace_stub)
+	beq	$6, 2f
+	ldl	$4, ftrace_graph_entry		// || (ftrace_graph_entry
+	ldi     $5, ftrace_graph_entry_stub	//	!= ftrace_graph_entry_stub))
+	cmpeq	$4, $5, $6
+	bne	$6, 3f
+2:	RESTORE_GRAPH_ARGS
+	call	ftrace_graph_caller		// ftrace_graph_caller();
+#endif
+3:	mcount_end
+	ret	$31, ($28), 1
+	.end _mcount
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	.global ftrace_regs_caller
+	.ent ftrace_regs_caller
+ftrace_regs_caller:
+	SAVE_PT_REGS
+	br	$27, 1f
+1:	ldgp	$29, 0($27)
+
+	subl	$28, MCOUNT_INSN_SIZE, $16
+	bis	$26, $31, $17
+	ldi	$4, function_trace_op
+	ldl	$18, 0($4)
+	mov	$sp, $19
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	bis	$31, $16, $9
+#endif
+	ldi	$4, current_tracer
+	ldl	$27, 0($4)
+
+	.global ftrace_regs_call
+ftrace_regs_call:
+	nop
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	RESTORE_GRAPH_REG_ARGS
+	call    ftrace_graph_caller
+#endif
+	RESTORE_PT_REGS
+	ret $31, ($28), 1
+	.end ftrace_regs_caller
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
+	.global ftrace_stub
+	.ent ftrace_stub
+ftrace_stub:
+	ret	$31, ($26), 1
+	.end ftrace_stub
diff --git a/arch/sw_64/kernel/ftrace.c b/arch/sw_64/kernel/ftrace.c
new file mode 100644
index 0000000000000000000000000000000000000000..fb25ffe3dbdaf4f26bf4389e63d37fd1aaaa754b
--- /dev/null
+++ b/arch/sw_64/kernel/ftrace.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Based on arch/arm64/kernel/ftrace.c
+ *
+ * Copyright (C) 2019 os kernel team
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/ftrace.h>
+
+#include <asm/ftrace.h>
+
+#ifdef CONFIG_FUNCTION_TRACER
+EXPORT_SYMBOL(_mcount);
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+#define TI_FTRACE_ADDR	(offsetof(struct thread_info, dyn_ftrace_addr))
+#define TI_FTRACE_REGS_ADDR \
+			(offsetof(struct thread_info, dyn_ftrace_regs_addr))
+
+unsigned long current_tracer = (unsigned long)ftrace_stub;
+
+/*
+ * Replace a single instruction, which may be a branch or NOP.
+ */
+static int ftrace_modify_code(unsigned long pc, u32 new)
+{
+	if (sw64_insn_write((void *)pc, new))
+		return -EPERM;
+	return 0;
+}
+
+/*
+ * Replace tracer function in ftrace_caller()
+ */
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	unsigned long pc;
+	u32 new;
+	int ret;
+
+	current_tracer = (unsigned long)func;
+	pc = (unsigned long)&ftrace_call;
+	new = SW64_CALL(R26, R27, 0);
+	ret = ftrace_modify_code(pc, new);
+
+	if (!ret) {
+		pc = (unsigned long)&ftrace_regs_call;
+		new = SW64_CALL(R26, R27, 0);
+		ret = ftrace_modify_code(pc, new);
+	}
+
+	return ret;
+}
+
+/*
+ * Turn on the call to ftrace_caller() in instrumented function
+ */
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned int insn[3];
+	unsigned long pc = rec->ip + MCOUNT_LDGP_SIZE;
+	unsigned long offset;
+
+	if (addr == FTRACE_ADDR)
+		offset = TI_FTRACE_ADDR;
+	else
+		offset = TI_FTRACE_REGS_ADDR;
+
+	insn[0] = SW64_NOP;
+	/* ldl r28,(ftrace_addr_offset)(r8) */
+	insn[1] = (0x23U << 26) | (28U << 21) | (8U << 16) | offset;
+	insn[2] = SW64_CALL(R28, R28, 0);
+
+	/* replace the 3 mcount instructions at once */
+	return copy_to_kernel_nofault((void *)pc, insn, 3 * SW64_INSN_SIZE);
+}
+
+/*
+ * Turn off the call to ftrace_caller() in instrumented function
+ */
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+		    unsigned long addr)
+{
+	unsigned long pc = rec->ip + MCOUNT_LDGP_SIZE;
+	unsigned int insn[3] = {SW64_NOP, SW64_NOP, SW64_NOP};
+
+	return copy_to_kernel_nofault((void *)pc, insn, 3 * SW64_INSN_SIZE);
+}
+
+void arch_ftrace_update_code(int command)
+{
+	ftrace_modify_all_code(command);
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+	struct thread_info *ti = task_thread_info(&init_task);
+
+	ti->dyn_ftrace_addr = FTRACE_ADDR;
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	ti->dyn_ftrace_regs_addr = FTRACE_REGS_ADDR;
+#endif
+	return 0;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+		       unsigned long addr)
+{
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * function_graph tracer expects ftrace_return_to_handler() to be called
+ * on the way back to parent. For this purpose, this function is called
+ * in _mcount() or ftrace_caller() to replace return address (*parent) on
+ * the call stack to return_to_handler.
+ *
+ * Note that @frame_pointer is used only for sanity check later.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+			   unsigned long frame_pointer)
+{
+	unsigned long return_hooker = (unsigned long)&return_to_handler;
+	unsigned long old;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		return;
+
+	/*
+	 * Note:
+	 * No protection against faulting at *parent, which may be seen
+	 * on other archs. It's unlikely on AArch64.
+	 */
+	old = *parent;
+
+	if (!function_graph_enter(old, self_addr, frame_pointer, NULL))
+		*parent = return_hooker;
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+/*
+ * Turn on/off the call to ftrace_graph_caller() in ftrace_caller()
+ * depending on @enable.
+ */
+static int ftrace_modify_graph_caller(bool enable)
+{
+	unsigned long pc = (unsigned long)&ftrace_graph_call;
+	u32 new = SW64_NOP;
+
+	if (enable)
+		new = SW64_CALL(R26, R27, 0);
+	return ftrace_modify_code(pc, new);
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_graph_caller(true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_graph_caller(false);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/sw_64/kernel/hibernate.c b/arch/sw_64/kernel/hibernate.c
new file mode 100644
index 0000000000000000000000000000000000000000..644ea85043136066c1129b059735d3feb7dc9f71
--- /dev/null
+++ b/arch/sw_64/kernel/hibernate.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/suspend.h>
+#include <asm/hmcall.h>
+#include <asm/suspend.h>
+
+struct processor_state hibernate_state;
+/* Defined in hibernate_asm.S */
+extern int restore_image(void);
+
+void save_processor_state(void)
+{
+	struct vcpucb *vcb = &(hibernate_state.vcb);
+
+	vcb->ksp = rdksp();
+	vcb->usp = rdusp();
+	vcb->soft_tid = rtid();
+	vcb->ptbr = rdptbr();
+}
+
+void restore_processor_state(void)
+{
+	struct vcpucb *vcb = &(hibernate_state.vcb);
+
+	wrksp(vcb->ksp);
+	wrusp(vcb->usp);
+	wrtp(vcb->soft_tid);
+	wrptbr(vcb->ptbr);
+	sflush();
+	tbiv();
+}
+
+int swsusp_arch_resume(void)
+{
+	restore_image();
+	return 0;
+}
+/* References to section boundaries */
+extern const void __nosave_begin, __nosave_end;
+int pfn_is_nosave(unsigned long pfn)
+{
+	unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
+	unsigned long nosave_end_pfn = PFN_UP(__pa(&__nosave_end));
+
+	return	(pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+}
+
+struct restore_data_record {
+	unsigned long magic;
+};
+
+#define RESTORE_MAGIC	0x0123456789ABCDEFUL
+
+/**
+ *	arch_hibernation_header_save - populate the architecture specific part
+ *		of a hibernation image header
+ *	@addr: address to save the data at
+ */
+int arch_hibernation_header_save(void *addr, unsigned int max_size)
+{
+	struct restore_data_record *rdr = addr;
+
+	if (max_size < sizeof(struct restore_data_record))
+		return -EOVERFLOW;
+	rdr->magic = RESTORE_MAGIC;
+	return 0;
+}
+
+/**
+ *	arch_hibernation_header_restore - read the architecture specific data
+ *		from the hibernation image header
+ *	@addr: address to read the data from
+ */
+int arch_hibernation_header_restore(void *addr)
+{
+	struct restore_data_record *rdr = addr;
+
+	return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
+}
diff --git a/arch/sw_64/kernel/hibernate_asm.S b/arch/sw_64/kernel/hibernate_asm.S
new file mode 100644
index 0000000000000000000000000000000000000000..ff997cd76c5aef4bb9fa2eaaced2f57c21a0c631
--- /dev/null
+++ b/arch/sw_64/kernel/hibernate_asm.S
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/page.h>
+#include <asm/regdef.h>
+
+	.text
+	.set noat
+ENTRY(swsusp_arch_suspend)
+	ldi	$16, hibernate_state
+	ldi	$1, PSTATE_REGS($16)
+	stl	$9, CALLEE_R9($1)
+	stl	$10, CALLEE_R10($1)
+	stl	$11, CALLEE_R11($1)
+	stl	$12, CALLEE_R12($1)
+	stl	$13, CALLEE_R13($1)
+	stl	$14, CALLEE_R14($1)
+	stl	$15, CALLEE_R15($1)
+	stl	$26, CALLEE_RA($1)
+	/* SIMD-FP */
+	ldi	$1, PSTATE_FPREGS($16)
+	vstd	$f2, CALLEE_F2($1)
+	vstd	$f3, CALLEE_F3($1)
+	vstd	$f4, CALLEE_F4($1)
+	vstd	$f5, CALLEE_F5($1)
+	vstd	$f6, CALLEE_F6($1)
+	vstd	$f7, CALLEE_F7($1)
+	vstd	$f8, CALLEE_F8($1)
+	vstd	$f9, CALLEE_F9($1)
+	rfpcr	$f0
+	fstd	$f0, PSTATE_FPCR($16)
+
+	stl	$8, PSTATE_KTP($16)
+	stl	sp, PSTATE_SP($16)
+	call	swsusp_save
+	ldi	$16, hibernate_state
+	ldi	$1, PSTATE_REGS($16)
+	ldl	$26, CALLEE_RA($1)
+
+	/* save current_thread_info()->pcbb */
+	ret
+END(swsusp_arch_suspend)
+
+ENTRY(restore_image)
+	/* prepare to copy image data to their original locations */
+	ldi	t0, restore_pblist
+	ldl	t0, 0(t0)
+$loop:
+	beq	t0, $done
+
+	/* get addresses from the pbe and copy the page */
+	ldl	t1, PBE_ADDR(t0)  /* source */
+	ldl	t2, PBE_ORIG_ADDR(t0) /* destination */
+	ldi	t3, PAGE_SIZE
+	addl	t1, t3, t3
+$cpyloop:
+	ldl	t8, 0(t1)
+	stl	t8, 0(t2)
+	addl	t1, 8, t1
+	addl	t2, 8, t2
+	cmpeq	t1, t3, t4
+	beq	t4, $cpyloop
+
+	/* progress to the next pbe */
+	ldl	t0, PBE_NEXT(t0)
+	bne	t0, $loop
+$done:
+
+	/* tell the hibernation core that we've just restored the memory */
+	ldi	$0, in_suspend
+	stl	$31, 0($0)
+
+	ldi	$16, hibernate_state
+	ldi	$1, PSTATE_REGS($16)
+
+	ldl	$9, CALLEE_R9($1)
+	ldl	$10, CALLEE_R10($1)
+	ldl	$11, CALLEE_R11($1)
+	ldl	$12, CALLEE_R12($1)
+	ldl	$13, CALLEE_R13($1)
+	ldl	$14, CALLEE_R14($1)
+	ldl	$15, CALLEE_R15($1)
+	ldl	$26, CALLEE_RA($1)
+	/* SIMD-FP */
+	fldd	$f0, PSTATE_FPCR($16)
+	wfpcr	$f0
+	fimovd	$f0, $2
+	and	$2, 0x3, $2
+	beq	$2, $hibernate_setfpec_0
+	subl	$2, 0x1, $2
+	beq	$2, $hibernate_setfpec_1
+	subl	$2, 0x1, $2
+	beq	$2, $hibernate_setfpec_2
+	setfpec3
+	br	$hibernate_setfpec_over
+$hibernate_setfpec_0:
+	setfpec0
+	br	$hibernate_setfpec_over
+$hibernate_setfpec_1:
+	setfpec1
+	br	$hibernate_setfpec_over
+$hibernate_setfpec_2:
+	setfpec2
+$hibernate_setfpec_over:
+	ldi	$1, PSTATE_FPREGS($16)
+	vldd	$f2, CALLEE_F2($1)
+	vldd	$f3, CALLEE_F3($1)
+	vldd	$f4, CALLEE_F4($1)
+	vldd	$f5, CALLEE_F5($1)
+	vldd	$f6, CALLEE_F6($1)
+	vldd	$f7, CALLEE_F7($1)
+	vldd	$f8, CALLEE_F8($1)
+	vldd	$f9, CALLEE_F9($1)
+
+	ldl	sp, PSTATE_SP($16)
+	ldl	$8, PSTATE_KTP($16)
+	sys_call HMC_wrktp
+
+	ldi	$0, 0($31)
+
+	ret
+END(restore_image)
diff --git a/arch/sw_64/kernel/insn.c b/arch/sw_64/kernel/insn.c
new file mode 100644
index 0000000000000000000000000000000000000000..281578e1bfc03b708be124e0e3d28644d811b512
--- /dev/null
+++ b/arch/sw_64/kernel/insn.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019, serveros, linyue
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/spinlock.h>
+#include <linux/kprobes.h>
+
+//static DEFINE_RAW_SPINLOCK(patch_lock);
+
+int __kprobes sw64_insn_read(void *addr, u32 *insnp)
+{
+	int ret;
+	__le32 val;
+
+	ret = copy_from_kernel_nofault(&val, addr, SW64_INSN_SIZE);
+	if (!ret)
+		*insnp = le32_to_cpu(val);
+
+	return ret;
+}
+
+static int __kprobes __sw64_insn_write(void *addr, __le32 insn)
+{
+	void *waddr = addr;
+	int ret;
+
+	//raw_spin_lock_irqsave(&patch_lock, flags);
+
+	ret = copy_to_kernel_nofault(waddr, &insn, SW64_INSN_SIZE);
+
+	//raw_spin_unlock_irqrestore(&patch_lock, flags);
+
+	return ret;
+}
+
+static int __kprobes __sw64_insn_double_write(void *addr, __le64 insn)
+{
+	void *waddr = addr;
+	//unsigned long flags = 0;
+	int ret;
+
+	//raw_spin_lock_irqsave(&patch_lock, flags);
+
+	ret = copy_to_kernel_nofault(waddr, &insn, 2 * SW64_INSN_SIZE);
+
+	//raw_spin_unlock_irqrestore(&patch_lock, flags);
+
+	return ret;
+}
+
+int __kprobes sw64_insn_write(void *addr, u32 insn)
+{
+	u32 *tp = addr;
+	/* SW64 instructions must be word aligned */
+	if ((uintptr_t)tp & 0x3)
+		return -EINVAL;
+	return __sw64_insn_write(addr, cpu_to_le32(insn));
+}
+
+int __kprobes sw64_insn_double_write(void *addr, u64 insn)
+{
+	u32 *tp = addr;
+	/* SW64 instructions must be word aligned */
+	if ((uintptr_t)tp & 0x3)
+		return -EINVAL;
+	return __sw64_insn_double_write(addr, cpu_to_le64(insn));
+}
+unsigned int __kprobes sw64_insn_nop(void)
+{
+	return SW64_BIS(R31, R31, R31);
+}
+
+unsigned int __kprobes sw64_insn_call(unsigned int ra, unsigned int rb)
+{
+	return SW64_CALL(ra, rb, 0);
+}
+
+unsigned int __kprobes sw64_insn_sys_call(unsigned int num)
+{
+	return  SW64_SYS_CALL(num);
+}
+
+/* 'pc' is the address of br instruction, not the +4 PC. 'new_pc' is the target address. */
+unsigned int __kprobes sw64_insn_br(unsigned int ra, unsigned long pc, unsigned long new_pc)
+{
+	int offset = new_pc - pc;
+	unsigned int disp, minus = 0x1fffff;
+
+	if (!(offset <= BR_MAX_DISP && offset >= -BR_MAX_DISP))
+		return -1;
+	if (offset > 0)
+		disp = (offset - 4) / 4;
+	else
+		disp = ~(-offset / 4) & minus;
+
+	return SW64_BR(ra, disp);
+
+}
diff --git a/arch/sw_64/kernel/jump_label.c b/arch/sw_64/kernel/jump_label.c
new file mode 100644
index 0000000000000000000000000000000000000000..f3bc40370e4de9b77889343338b509d6bdcad8c6
--- /dev/null
+++ b/arch/sw_64/kernel/jump_label.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/jump_label.h>
+
+#include <asm/bug.h>
+#include <asm/cacheflush.h>
+
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type type)
+{
+	u32 *insnp = (u32 *)entry->code;
+	u32 insn;
+
+	if (type == JUMP_LABEL_JMP) {
+		insn = sw64_insn_br(R31, (entry->code), entry->target);
+		BUG_ON(insn == -1);
+	} else {
+		insn = sw64_insn_nop();
+	}
+
+	*insnp = insn;
+
+	flush_icache_range(entry->code, entry->code + SW64_INSN_SIZE);
+}
+
+void arch_jump_label_transform_static(struct jump_entry *entry,
+				      enum jump_label_type type)
+{
+	/*
+	 * no need to rewrite NOP
+	 */
+}
diff --git a/arch/sw_64/kernel/kgdb.c b/arch/sw_64/kernel/kgdb.c
new file mode 100644
index 0000000000000000000000000000000000000000..833f72a1577ca8f2d2f01113c0443739a9a9c025
--- /dev/null
+++ b/arch/sw_64/kernel/kgdb.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * sw64 KGDB support
+ *
+ * Based on arch/arm64/kernel/kgdb.c
+ *
+ * Copyright (C) Xia Bin
+ * Author: Xia Bin
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kdebug.h>
+#include <linux/kgdb.h>
+
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
+	{ "r0", 8, offsetof(struct pt_regs, regs[0])},
+	{ "r1", 8, offsetof(struct pt_regs, regs[1])},
+	{ "r2", 8, offsetof(struct pt_regs, regs[2])},
+	{ "r3", 8, offsetof(struct pt_regs, regs[3])},
+	{ "r4", 8, offsetof(struct pt_regs, regs[4])},
+	{ "r5", 8, offsetof(struct pt_regs, regs[5])},
+	{ "r6", 8, offsetof(struct pt_regs, regs[6])},
+	{ "r7", 8, offsetof(struct pt_regs, regs[7])},
+	{ "r8", 8, offsetof(struct pt_regs, regs[8])},
+
+	{ "r9",  8, offsetof(struct pt_regs, regs[9])},
+	{ "r10", 8, offsetof(struct pt_regs, regs[10])},
+	{ "r11", 8, offsetof(struct pt_regs, regs[11])},
+	{ "r12", 8, offsetof(struct pt_regs, regs[12])},
+	{ "r13", 8, offsetof(struct pt_regs, regs[13])},
+	{ "r14", 8, offsetof(struct pt_regs, regs[14])},
+	{ "r15", 8, offsetof(struct pt_regs, regs[15])},
+
+	{ "r16", 8, offsetof(struct pt_regs, regs[16])},
+	{ "r17", 8, offsetof(struct pt_regs, regs[17])},
+	{ "r18", 8, offsetof(struct pt_regs, regs[18])},
+
+	{ "r19", 8, offsetof(struct pt_regs, regs[19])},
+	{ "r20", 8, offsetof(struct pt_regs, regs[20])},
+	{ "r21", 8, offsetof(struct pt_regs, regs[21])},
+	{ "r22", 8, offsetof(struct pt_regs, regs[22])},
+	{ "r23", 8, offsetof(struct pt_regs, regs[23])},
+	{ "r24", 8, offsetof(struct pt_regs, regs[24])},
+	{ "r25", 8, offsetof(struct pt_regs, regs[25])},
+	{ "r26", 8, offsetof(struct pt_regs, regs[26])},
+	{ "r27", 8, offsetof(struct pt_regs, regs[27])},
+	{ "at", 8, offsetof(struct pt_regs, regs[28])},
+	{ "gp", 8, offsetof(struct pt_regs, regs[29])},
+	{ "sp", 8, offsetof(struct pt_regs, regs[30])},
+	{ "zero", 8, -1 },
+
+	{ "f0", 8, -1 },
+	{ "f1", 8, -1 },
+	{ "f2", 8, -1 },
+	{ "f3", 8, -1 },
+	{ "f4", 8, -1 },
+	{ "f5", 8, -1 },
+	{ "f6", 8, -1 },
+	{ "f7", 8, -1 },
+	{ "f8", 8, -1 },
+	{ "f9", 8, -1 },
+	{ "f10", 8, -1 },
+	{ "f11", 8, -1 },
+	{ "f12", 8, -1 },
+	{ "f13", 8, -1 },
+	{ "f14", 8, -1 },
+	{ "f15", 8, -1 },
+	{ "f16", 8, -1 },
+	{ "f17", 8, -1 },
+	{ "f18", 8, -1 },
+	{ "f19", 8, -1 },
+	{ "f20", 8, -1 },
+	{ "f21", 8, -1 },
+	{ "f22", 8, -1 },
+	{ "f23", 8, -1 },
+	{ "f24", 8, -1 },
+	{ "f25", 8, -1 },
+	{ "f26", 8, -1 },
+	{ "f27", 8, -1 },
+	{ "f28", 8, -1 },
+	{ "f29", 8, -1 },
+	{ "f30", 8, -1 },
+	{ "fpcr", 8, -1 },
+
+	{ "pc", 8, offsetof(struct pt_regs, pc)},
+	{ "", 8, -1 },
+	{ "tp", 8, -1},
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+	if (regno >= DBG_MAX_REG_NUM || regno < 0)
+		return NULL;
+
+	if (dbg_reg_def[regno].offset != -1)
+		memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
+				dbg_reg_def[regno].size);
+	else
+		memset(mem, 0, dbg_reg_def[regno].size);
+	return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+	if (regno >= DBG_MAX_REG_NUM || regno < 0)
+		return -EINVAL;
+
+	if (dbg_reg_def[regno].offset != -1)
+		memcpy((void *)regs + dbg_reg_def[regno].offset, mem,
+				dbg_reg_def[regno].size);
+	return 0;
+}
+
+void
+sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task)
+{
+	int i;
+	/* Initialize to zero */
+	memset((char *)gdb_regs, 0, NUMREGBYTES);
+	for (i = 0; i < DBG_MAX_REG_NUM; i++)
+		gdb_regs[i] = get_reg(task, i);
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+	pr_info("BEFORE SET PC WITH %lx\n", pc);
+	instruction_pointer(regs) = pc;
+	pr_info("AFTER SET PC IS %lx\n", instruction_pointer(regs));
+}
+
+void kgdb_call_nmi_hook(void *ignored)
+{
+	kgdb_nmicallback(raw_smp_processor_id(), NULL);
+}
+
+void kgdb_roundup_cpus(void)
+{
+	local_irq_enable();
+	smp_call_function(kgdb_call_nmi_hook, NULL, 0);
+	local_irq_disable();
+}
+
+int kgdb_arch_handle_exception(int exception_vector, int signo,
+			       int err_code, char *remcom_in_buffer,
+			       char *remcom_out_buffer,
+			       struct pt_regs *linux_regs)
+{
+	char *ptr;
+	unsigned long address = -1;
+
+	switch (remcom_in_buffer[0]) {
+	case 'c':
+		ptr = &remcom_in_buffer[1];
+		if (kgdb_hex2long(&ptr, &address))
+			kgdb_arch_set_pc(linux_regs, address);
+		return 0;
+	}
+	return -1;
+}
+
+static int __kgdb_notify(struct die_args *args, unsigned long cmd)
+{
+	struct pt_regs *regs = args->regs;
+
+	/* Userspace events, ignore. */
+	if (user_mode(regs))
+		return NOTIFY_DONE;
+
+	if (kgdb_handle_exception(1, args->signr, cmd, regs))
+		return  NOTIFY_DONE;
+
+	return NOTIFY_STOP;
+}
+
+static int
+kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+	unsigned long flags;
+	int ret;
+
+	local_irq_save(flags);
+	ret = __kgdb_notify(ptr, cmd);
+	local_irq_restore(flags);
+
+	return ret;
+}
+
+static struct notifier_block kgdb_notifier = {
+	.notifier_call  = kgdb_notify,
+};
+
+/*
+ * kgdb_arch_init - Perform any architecture specific initalization.
+ * This function will handle the initalization of any architecture
+ * specific callbacks.
+ */
+int kgdb_arch_init(void)
+{
+	int ret = register_die_notifier(&kgdb_notifier);
+
+	if (ret != 0)
+		return ret;
+	return 0;
+}
+
+/*
+ * kgdb_arch_exit - Perform any architecture specific uninitalization.
+ * This function will handle the uninitalization of any architecture
+ * specific callbacks, for dynamic registration and unregistration.
+ */
+void kgdb_arch_exit(void)
+{
+	unregister_die_notifier(&kgdb_notifier);
+}
+
+/*
+ * sw64 instructions are always in LE.
+ * Break instruction is encoded in LE format
+ */
+const struct kgdb_arch arch_kgdb_ops = {
+	.gdb_bpt_instr = {0x80, 00, 00, 00}
+};
diff --git a/arch/sw_64/kernel/kprobes/Makefile b/arch/sw_64/kernel/kprobes/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..110ba2bf7752361442022553269447ceb802d465
--- /dev/null
+++ b/arch/sw_64/kernel/kprobes/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_KPROBES)           += kprobes.o decode-insn.o
+obj-$(CONFIG_KPROBES_ON_FTRACE) += kprobes-ftrace.o
diff --git a/arch/sw_64/kernel/kprobes/common.h b/arch/sw_64/kernel/kprobes/common.h
new file mode 100644
index 0000000000000000000000000000000000000000..de10058f0376ea342c973e0e03a8ef1bd9faa72c
--- /dev/null
+++ b/arch/sw_64/kernel/kprobes/common.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SW64_KERNEL_KPROBES_COMMON_H
+#define _SW64_KERNEL_KPROBES_COMMON_H
+
+
+extern bool sw64_insn_can_kprobe(kprobe_opcode_t *addr);
+
+
+#endif /* _SW64_KERNEL_KPROBES_COMMON_H */
diff --git a/arch/sw_64/kernel/kprobes/decode-insn.c b/arch/sw_64/kernel/kprobes/decode-insn.c
new file mode 100644
index 0000000000000000000000000000000000000000..91c31111f2b73273d186d6b0c1cb9961e12dd68a
--- /dev/null
+++ b/arch/sw_64/kernel/kprobes/decode-insn.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Based on arch/arm64/kernel/probes/decode-insn.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kprobes.h>
+
+#include "common.h"
+
+static bool __kprobes sw64_insn_is_steppable(u32 insn)
+{
+	/*
+	 * Branch instructions will write a new value into the PC which is
+	 * likely to be relative to the XOL address and therefore invalid.
+	 * Deliberate generation of an exception during stepping is also not
+	 * currently safe. Lastly, MSR instructions can do any number of nasty
+	 * things we can't handle during single-stepping.
+	 */
+	if (sw64_insn_is_sys_call_b(insn) ||
+		sw64_insn_is_sys_call(insn) ||
+		sw64_insn_is_call(insn) ||
+		sw64_insn_is_ret(insn) ||
+		sw64_insn_is_jmp(insn) ||
+		sw64_insn_is_br(insn) ||
+		sw64_insn_is_bsr(insn) ||
+		sw64_insn_is_memb(insn) ||
+		sw64_insn_is_imemb(insn) ||
+		sw64_insn_is_rtc(insn) ||
+		sw64_insn_is_lldl(insn) ||
+		sw64_insn_is_lldw(insn) ||
+		sw64_insn_is_beq(insn) ||
+		sw64_insn_is_bne(insn) ||
+		sw64_insn_is_blt(insn) ||
+		sw64_insn_is_ble(insn) ||
+		sw64_insn_is_bgt(insn) ||
+		sw64_insn_is_bge(insn) ||
+		sw64_insn_is_blbc(insn) ||
+		sw64_insn_is_blbs(insn) ||
+		sw64_insn_is_fbeq(insn) ||
+		sw64_insn_is_fbne(insn) ||
+		sw64_insn_is_fblt(insn) ||
+		sw64_insn_is_fble(insn) ||
+		sw64_insn_is_fbgt(insn) ||
+		sw64_insn_is_fbge(insn))
+		return false;
+
+	return true;
+}
+
+
+#ifdef CONFIG_KPROBES
+//  lldl  rd_f
+static bool __kprobes is_probed_between_atomic(kprobe_opcode_t *addr)
+{
+	int count = 0;
+	unsigned long size = 0, offset = 0;
+	kprobe_opcode_t *scan_start = NULL;
+
+	if (kallsyms_lookup_size_offset((unsigned long)addr, &size, &offset))
+		scan_start = addr - (offset / sizeof(kprobe_opcode_t));
+
+	while (scan_start < addr) {
+		if (sw64_insn_is_lldl(le32_to_cpu(*scan_start)) ||
+				sw64_insn_is_lldw(le32_to_cpu(*scan_start)))
+			count++;
+		if (sw64_insn_is_rd_f(le32_to_cpu(*scan_start)))
+			count--;
+		scan_start++;
+	}
+	if (count)
+		return false;
+
+	return true;
+}
+
+bool __kprobes sw64_insn_can_kprobe(kprobe_opcode_t *addr)
+{
+	u32 insn = le32_to_cpu(*addr);
+
+	if (!sw64_insn_is_steppable(insn)) {
+		pr_warn("addr is not steppable\n");
+		return false;
+	}
+#ifdef CONFIG_SUBARCH_C3B
+	if (!is_probed_between_atomic(addr)) {
+		pr_warn("addr between atomic can't probe\n");
+		return false;
+	}
+#endif
+	return true;
+}
+#endif
diff --git a/arch/sw_64/kernel/kprobes/kprobes-ftrace.c b/arch/sw_64/kernel/kprobes/kprobes-ftrace.c
new file mode 100644
index 0000000000000000000000000000000000000000..89d7dba9dc25c7938019f8362ef461ab2607b4c6
--- /dev/null
+++ b/arch/sw_64/kernel/kprobes/kprobes-ftrace.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Dynamic Ftrace based Kprobes Optimization
+ */
+
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/hardirq.h>
+#include <linux/preempt.h>
+#include <linux/ftrace.h>
+
+/* Ftrace callback handler for kprobes */
+void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+			   struct ftrace_ops *ops, struct pt_regs *regs)
+{
+	struct kprobe *p;
+	struct kprobe_ctlblk *kcb;
+
+	p = get_kprobe((kprobe_opcode_t *)ip);
+	if (unlikely(!p) || kprobe_disabled(p))
+		return;
+
+	kcb = get_kprobe_ctlblk();
+	if (kprobe_running()) {
+		kprobes_inc_nmissed_count(p);
+	} else {
+		regs->regs[28] -= MCOUNT_INSN_SIZE;
+
+		__this_cpu_write(current_kprobe, p);
+		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+		if (!p->pre_handler || !p->pre_handler(p, regs)) {
+			regs->regs[28] += MCOUNT_INSN_SIZE;
+			if (unlikely(p->post_handler)) {
+				kcb->kprobe_status = KPROBE_HIT_SSDONE;
+				p->post_handler(p, regs, 0);
+			}
+		}
+		__this_cpu_write(current_kprobe, NULL);
+	}
+}
+NOKPROBE_SYMBOL(kprobe_ftrace_handler);
+
+int arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+	p->ainsn.insn = NULL;
+	p->ainsn.boostable = -1;
+	return 0;
+}
diff --git a/arch/sw_64/kernel/kprobes/kprobes.c b/arch/sw_64/kernel/kprobes/kprobes.c
new file mode 100644
index 0000000000000000000000000000000000000000..024ce7d99e61688b7b95c5120e9432a030c65735
--- /dev/null
+++ b/arch/sw_64/kernel/kprobes/kprobes.c
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Kernel Probes (KProbes)
+ *  arch/sw_64/kernel/kprobes.c
+ */
+
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/slab.h>
+
+#include "common.h"
+
+static u32 breakpoint_insn = BREAK_KPROBE;
+static u32 breakpoint2_insn = BREAK_KPROBE_SS;
+
+int post_kprobe_handler(struct pt_regs *regs);
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe);
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+	int ret = 0;
+	extern char __start_rodata[];
+	extern char __end_rodata[];
+	unsigned long probe_addr = (unsigned long)p->addr;
+
+	if (probe_addr & 0x3)
+		return -EINVAL;
+
+	if (!sw64_insn_can_kprobe(p->addr))
+		return -EINVAL;
+	/* copy instruction */
+	p->opcode = le32_to_cpu(*p->addr);
+
+
+	if (probe_addr >= (unsigned long) __start_rodata &&
+			probe_addr <= (unsigned long) __end_rodata)
+		return -EINVAL;
+
+
+	/* insn: must be on special executable page on mips. */
+	p->ainsn.insn = get_insn_slot();
+	if (!p->ainsn.insn) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	/*
+	 * In the kprobe->ainsn.insn[] array we store the original
+	 * instruction at index zero and a break trap instruction at
+	 * index one.
+	 */
+	p->ainsn.insn[0] = p->opcode;
+	p->ainsn.insn[1] = breakpoint2_insn;
+out:
+	return ret;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+	sw64_insn_write(p->addr, breakpoint_insn);
+	flush_insn_slot(p);
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+	sw64_insn_write(p->addr, p->opcode);
+	flush_insn_slot(p);
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+	if (p->ainsn.insn) {
+		free_insn_slot(p->ainsn.insn, 0);
+		p->ainsn.insn = NULL;
+	}
+}
+
+static void save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	kcb->prev_kprobe.kp = kprobe_running();
+	kcb->prev_kprobe.status = kcb->kprobe_status;
+}
+
+static void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+	kcb->kprobe_status = kcb->prev_kprobe.status;
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p)
+{
+	__this_cpu_write(current_kprobe, p);
+}
+
+
+static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
+		struct kprobe_ctlblk *kcb, int reenter)
+{
+	if (reenter) {
+		save_previous_kprobe(kcb);
+		set_current_kprobe(p);
+		kcb->kprobe_status = KPROBE_REENTER;
+	} else {
+		kcb->kprobe_status = KPROBE_HIT_SS;
+	}
+
+	/* insn simulation */
+	kcb->target_pc = regs->pc;
+	regs->pc = (unsigned long)&p->ainsn.insn[0];
+}
+
+static int __kprobes reenter_kprobe(struct kprobe *p,
+		struct pt_regs *regs,
+		struct kprobe_ctlblk *kcb)
+{
+	switch (kcb->kprobe_status) {
+	case KPROBE_HIT_SSDONE:
+	case KPROBE_HIT_ACTIVE:
+		kprobes_inc_nmissed_count(p);
+		setup_singlestep(p, regs, kcb, 1);
+		break;
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+		pr_warn("Unrecoverable kprobe detected.\n");
+		dump_kprobe(p);
+		BUG();
+		break;
+	default:
+		WARN_ON(1);
+		return 0;
+	}
+	return 1;
+}
+
+int __kprobes kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *p;
+	struct kprobe_ctlblk *kcb;
+	unsigned long addr = instruction_pointer(regs);
+
+	if (user_mode(regs))
+		return 0;
+	/*
+	 * We don't want to be preempted for the entire
+	 * duration of kprobe processing
+	 */
+	preempt_disable();
+	kcb = get_kprobe_ctlblk();
+	p = get_kprobe((kprobe_opcode_t *)(addr - 4));
+
+	if (p) {
+		if (kprobe_running()) {
+			if (reenter_kprobe(p, regs, kcb))
+				return 1;
+		} else {
+			set_current_kprobe(p);
+			kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+			/*
+			 * If we have no pre-handler or it returned 0, we
+			 * continue with normal processing.  If we have a
+			 * pre-handler and it returned non-zero, that means
+			 * user handler setup registers to exit to another
+			 * instruction, we must skip the single stepping.
+			 */
+			if (!p->pre_handler || !p->pre_handler(p, regs))
+				setup_singlestep(p, regs, kcb, 0);
+			else
+				reset_current_kprobe();
+			return 1;
+		}
+	}
+	return 0;
+
+}
+int __kprobes post_kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	if (!cur)
+		return 0;
+
+	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		cur->post_handler(cur, regs, 0);
+	}
+
+	//	resume_execution(cur, regs, kcb);
+	regs->pc = kcb->target_pc;
+
+
+	/* Restore back the original saved kprobes variables and continue. */
+	if (kcb->kprobe_status == KPROBE_REENTER) {
+		restore_previous_kprobe(kcb);
+		goto out;
+	}
+	reset_current_kprobe();
+out:
+	preempt_enable_no_resched();
+
+	return 1;
+}
+
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned long mmcsr)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	if (kcb->kprobe_status & KPROBE_HIT_SS) {
+		regs->pc = kcb->target_pc;
+
+		reset_current_kprobe();
+		preempt_enable_no_resched();
+	}
+	return 0;
+}
+
+/*
+ * Wrapper routine for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+		unsigned long val, void *data)
+{
+
+	struct die_args *args = (struct die_args *)data;
+	int ret = NOTIFY_DONE;
+
+	switch (val) {
+	case DIE_BREAK:
+		if (kprobe_handler(args->regs))
+			ret = NOTIFY_STOP;
+		break;
+	case DIE_SSTEPBP:
+		if (post_kprobe_handler(args->regs))
+			ret = NOTIFY_STOP;
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+/*
+ * Function return probe trampoline:
+ *	- init_kprobes() establishes a probepoint here
+ *	- When the probed function returns, this probe causes the
+ *	  handlers to fire
+ */
+static void __used kretprobe_trampoline_holder(void)
+{
+	asm volatile(
+			/* Keep the assembler from reordering and placing JR here. */
+			".set noreorder\n\t"
+			"nop\n\t"
+			".global __kretprobe_trampoline\n"
+			"__kretprobe_trampoline:\n\t"
+			"nop\n\t"
+			: : : "memory");
+}
+
+void __kretprobe_trampoline(void);
+
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+		struct pt_regs *regs)
+{
+	ri->ret_addr = (kprobe_opcode_t *) regs->regs[26];
+	ri->fp = NULL;
+
+	/* Replace the return addr with trampoline addr */
+	regs->regs[26] = (unsigned long)__kretprobe_trampoline;
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+static int __kprobes trampoline_probe_handler(struct kprobe *p,
+		struct pt_regs *regs)
+{
+	unsigned long orig_ret_address;
+
+	orig_ret_address = __kretprobe_trampoline_handler(regs, NULL);
+	instruction_pointer(regs) = orig_ret_address;
+	regs->regs[26] = orig_ret_address;
+
+	/*
+	 * By returning a non-zero value, we are telling
+	 * kprobe_handler() that we don't want the post_handler
+	 * to run (and have re-enabled preemption)
+	 */
+	return 1;
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+	if (p->addr == (kprobe_opcode_t *)__kretprobe_trampoline)
+		return 1;
+
+	return 0;
+}
+
+static struct kprobe trampoline_p = {
+	.addr = (kprobe_opcode_t *)__kretprobe_trampoline,
+	.pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+	return register_kprobe(&trampoline_p);
+}
diff --git a/arch/sw_64/kernel/machine_kexec.c b/arch/sw_64/kernel/machine_kexec.c
new file mode 100644
index 0000000000000000000000000000000000000000..950998476cdaced4b7368cb4712a1d7081e11047
--- /dev/null
+++ b/arch/sw_64/kernel/machine_kexec.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * machine_kexec.c for kexec
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+#include <linux/kexec.h>
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/reboot.h>
+
+#include <asm/cacheflush.h>
+
+extern void *kexec_control_page;
+extern const unsigned char relocate_new_kernel[];
+extern const size_t relocate_new_kernel_size;
+
+extern unsigned long kexec_start_address;
+extern unsigned long kexec_indirection_page;
+
+static atomic_t waiting_for_crash_ipi;
+
+#ifdef CONFIG_SMP
+extern struct smp_rcb_struct *smp_rcb;
+
+/*
+ * Wait for relocation code is prepared and send
+ * secondary CPUs to spin until kernel is relocated.
+ */
+static void kexec_smp_down(void *ignored)
+{
+	int cpu = smp_processor_id();
+
+	local_irq_disable();
+	while (READ_ONCE(smp_rcb->ready) != 0)
+		mdelay(1);
+	set_cpu_online(cpu, false);
+	reset_cpu(cpu);
+}
+#endif
+
+int machine_kexec_prepare(struct kimage *kimage)
+{
+	return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *kimage)
+{
+}
+
+void machine_shutdown(void)
+{
+#ifdef CONFIG_SMP
+	WRITE_ONCE(smp_rcb->ready, 0);
+	smp_call_function(kexec_smp_down, NULL, 0);
+	smp_wmb();
+	while (num_online_cpus() > 1) {
+		cpu_relax();
+		mdelay(1);
+	}
+#endif
+}
+
+#ifdef CONFIG_SMP
+static void machine_crash_nonpanic_core(void *unused)
+{
+	int cpu;
+	struct pt_regs regs;
+
+	cpu = smp_processor_id();
+
+	local_irq_disable();
+	crash_setup_regs(&regs, NULL);
+	pr_debug("CPU %u will stop doing anything useful since another CPU has crashed\n", cpu);
+	crash_save_cpu(&regs, cpu);
+	flush_cache_all();
+
+	set_cpu_online(cpu, false);
+	atomic_dec(&waiting_for_crash_ipi);
+	while (READ_ONCE(smp_rcb->ready) != 0)
+		mdelay(1);
+	if (cpu != 0)
+		reset_cpu(cpu);
+	else
+		machine_kexec(kexec_crash_image);
+}
+#else
+static inline void machine_crash_nonpanic_core(void *unused) { }
+#endif
+
+static void machine_kexec_mask_interrupts(void)
+{
+	unsigned int i;
+	struct irq_desc *desc;
+
+	for_each_irq_desc(i, desc) {
+		struct irq_chip *chip;
+
+		chip = irq_desc_get_chip(desc);
+		if (!chip)
+			continue;
+
+		if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
+			chip->irq_eoi(&desc->irq_data);
+
+		if (chip->irq_mask)
+			chip->irq_mask(&desc->irq_data);
+
+		if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
+			chip->irq_disable(&desc->irq_data);
+	}
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	int cpu;
+	unsigned long msecs;
+
+	cpu = smp_processor_id();
+	local_irq_disable();
+	kernel_restart_prepare(NULL);
+	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+	smp_call_function(machine_crash_nonpanic_core, NULL, false);
+	msecs = 1000; /* Wait at most a second for the other cpus to stop */
+	while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
+		mdelay(1);
+		msecs--;
+	}
+	if (atomic_read(&waiting_for_crash_ipi) > 0)
+		pr_warn("Non-crashing CPUs did not react to IPI\n");
+
+	crash_save_cpu(regs, cpu);
+	machine_kexec_mask_interrupts();
+	pr_info("Loading crashdump kernel...\n");
+#ifdef CONFIG_SMP
+	WRITE_ONCE(smp_rcb->ready, 0);
+	if (cpu != 0)
+		reset_cpu(cpu);
+#endif
+}
+
+#define phys_to_ktext(pa)    (__START_KERNEL_map + (pa))
+
+typedef void (*noretfun_t)(void) __noreturn;
+
+void machine_kexec(struct kimage *image)
+{
+	void *reboot_code_buffer;
+	unsigned long entry;
+	unsigned long *ptr;
+	struct boot_params *params = sunway_boot_params;
+
+
+	reboot_code_buffer = kexec_control_page;
+	pr_info("reboot_code_buffer = %px\n", reboot_code_buffer);
+	kexec_start_address = phys_to_ktext(image->start);
+	pr_info("kexec_start_address = %#lx\n", kexec_start_address);
+	if (image->type == KEXEC_TYPE_DEFAULT)
+		kexec_indirection_page =
+			(unsigned long) phys_to_virt(image->head & PAGE_MASK);
+	else
+		kexec_indirection_page = (unsigned long)&image->head;
+
+	pr_info("kexec_indirection_page = %#lx, image->head=%#lx\n",
+			kexec_indirection_page, image->head);
+
+	params->cmdline = kexec_start_address - COMMAND_LINE_OFF;
+	params->initrd_start = *(__u64 *)(kexec_start_address - INITRD_START_OFF);
+	params->initrd_size = *(__u64 *)(kexec_start_address - INITRD_SIZE_OFF);
+
+	pr_info("initrd_start = %#llx, initrd_size = %#llx\n"
+		"dtb_start = %#llx, efi_systab = %#llx\n"
+		"efi_memmap = %#llx, efi_memmap_size = %#llx\n"
+		"efi_memdesc_size = %#llx, efi_memdesc_version = %#llx\n"
+		"cmdline = %#llx\n",
+		params->initrd_start, params->initrd_size,
+		params->dtb_start, params->efi_systab,
+		params->efi_memmap, params->efi_memmap_size,
+		params->efi_memdesc_size, params->efi_memdesc_version,
+		params->cmdline);
+
+	memcpy(reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size);
+
+	/*
+	 * The generic kexec code builds a page list with physical
+	 * addresses. they are directly accessible through KSEG0 (or
+	 * CKSEG0 or XPHYS if on 64bit system), hence the
+	 * phys_to_virt() call.
+	 */
+	for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE);
+	     ptr = (entry & IND_INDIRECTION) ?
+	       phys_to_virt(entry & PAGE_MASK) : ptr + 1) {
+		if (*ptr & IND_SOURCE || *ptr & IND_INDIRECTION ||
+		    *ptr & IND_DESTINATION)
+			*ptr = (unsigned long) phys_to_virt(*ptr);
+	}
+
+	/*
+	 * we do not want to be bothered.
+	 */
+	local_irq_disable();
+
+	pr_info("Will call new kernel at %08lx\n", image->start);
+	pr_info("Bye ...\n");
+	smp_wmb();
+	((noretfun_t) reboot_code_buffer)();
+}
diff --git a/arch/sw_64/kernel/perf_event.c b/arch/sw_64/kernel/perf_event.c
new file mode 100644
index 0000000000000000000000000000000000000000..83bb051be9de4767779d8783b31c8eda4277caa2
--- /dev/null
+++ b/arch/sw_64/kernel/perf_event.c
@@ -0,0 +1,787 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance events support for SW64 platforms.
+ *
+ * This code is based upon riscv and sparc perf event code.
+ */
+
+#include <linux/perf_event.h>
+#include <asm/stacktrace.h>
+
+/* For tracking PMCs and the hw events they monitor on each CPU. */
+struct cpu_hw_events {
+	/*
+	 * Set the bit (indexed by the counter number) when the counter
+	 * is used for an event.
+	 */
+	unsigned long		used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
+	/* Array of events current scheduled on this cpu. */
+	struct perf_event	*event[MAX_HWEVENTS];
+};
+
+DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+struct sw64_perf_event {
+	/* pmu index */
+	int counter;
+	/* events selector */
+	int event;
+};
+
+/*
+ * A structure to hold the description of the PMCs available on a particular
+ * type of SW64 CPU.
+ */
+struct sw64_pmu_t {
+	/* generic hw/cache events table */
+	const struct sw64_perf_event *hw_events;
+	const struct sw64_perf_event (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+		[PERF_COUNT_HW_CACHE_OP_MAX]
+		[PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+	/* method used to map hw/cache events */
+	const struct sw64_perf_event *(*map_hw_event)(u64 config);
+	const struct sw64_perf_event *(*map_cache_event)(u64 config);
+
+	/* The number of entries in the hw_event_map */
+	int  max_events;
+
+	/* The number of counters on this pmu */
+	int  num_pmcs;
+
+	/*
+	 * All PMC counters reside in the IBOX register PCTR.  This is the
+	 * LSB of the counter.
+	 */
+	int  pmc_count_shift[MAX_HWEVENTS];
+
+	/*
+	 * The mask that isolates the PMC bits when the LSB of the counter
+	 * is shifted to bit 0.
+	 */
+	unsigned long pmc_count_mask;
+
+	/* The maximum period the PMC can count. */
+	unsigned long pmc_max_period;
+
+	/*
+	 * The maximum value that may be written to the counter due to
+	 * hardware restrictions is pmc_max_period - pmc_left.
+	 */
+	long pmc_left;
+
+	/* Subroutine for checking validity of a raw event for this PMU. */
+	bool (*raw_event_valid)(u64 config);
+};
+
+/*
+ * The SW64 PMU description currently in operation.  This is set during
+ * the boot process to the specific CPU of the machine.
+ */
+static const struct sw64_pmu_t *sw64_pmu;
+
+/*
+ * SW64 PMC event types
+ *
+ * There is no one-to-one mapping of the possible hw event types to the
+ * actual codes that are used to program the PMCs hence we introduce our
+ * own hw event type identifiers.
+ */
+#define SW64_OP_UNSUP {-1, -1}
+
+/* Mapping of the hw event types to the perf tool interface */
+static const struct sw64_perf_event core3_hw_event_map[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= {PMC_PC0, PC0_CPU_CYCLES},
+	[PERF_COUNT_HW_INSTRUCTIONS]		= {PMC_PC0, PC0_INSTRUCTIONS},
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= {PMC_PC0, PC0_SCACHE_REFERENCES},
+	[PERF_COUNT_HW_CACHE_MISSES]		= {PMC_PC1, PC1_SCACHE_MISSES},
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= {PMC_PC0, PC0_BRANCH_INSTRUCTIONS},
+	[PERF_COUNT_HW_BRANCH_MISSES]		= {PMC_PC1, PC1_BRANCH_MISSES},
+};
+
+/* Mapping of the hw cache event types to the perf tool interface */
+#define C(x) PERF_COUNT_HW_CACHE_##x
+static const struct sw64_perf_event core3_cache_event_map
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= {PMC_PC0, PC0_DCACHE_READ},
+			[C(RESULT_MISS)]	= {PMC_PC1, PC1_DCACHE_MISSES}
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= {PMC_PC0, PC0_ICACHE_READ},
+			[C(RESULT_MISS)]	= {PMC_PC1, PC1_ICACHE_READ_MISSES},
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= {PMC_PC0, PC0_DTB_READ},
+			[C(RESULT_MISS)]	= {PMC_PC1, PC1_DTB_SINGLE_MISSES},
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= {PMC_PC0, PC0_ITB_READ},
+			[C(RESULT_MISS)]	= {PMC_PC1, PC1_ITB_MISSES},
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+	},
+	[C(NODE)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
+			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
+		},
+	},
+
+};
+
+static const struct sw64_perf_event *core3_map_hw_event(u64 config)
+{
+	return &sw64_pmu->hw_events[config];
+}
+
+static const struct sw64_perf_event *core3_map_cache_event(u64 config)
+{
+	unsigned int cache_type, cache_op, cache_result;
+	const struct sw64_perf_event *perf_event;
+
+	cache_type = (config >> 0) & 0xff;
+	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+		return ERR_PTR(-EINVAL);
+
+	cache_op = (config >> 8) & 0xff;
+	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		return ERR_PTR(-EINVAL);
+
+	cache_result = (config >> 16) & 0xff;
+	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return ERR_PTR(-EINVAL);
+
+	perf_event = &((*sw64_pmu->cache_events)[cache_type][cache_op][cache_result]);
+	if (perf_event->counter == -1) /* SW64_OP_UNSUP */
+		return ERR_PTR(-ENOENT);
+
+	return perf_event;
+}
+
+/*
+ * r0xx for counter0, r1yy for counter1.
+ * According to the datasheet, 00 <= xx <= 0F, 00 <= yy <= 3D
+ */
+static bool core3_raw_event_valid(u64 config)
+{
+	if ((config >= PC0_RAW_BASE && config <= (PC0_RAW_BASE + PC0_MAX)) ||
+		(config >= PC1_RAW_BASE && config <= (PC1_RAW_BASE + PC1_MAX)))
+		return true;
+
+	pr_info("sw64 pmu: invalid raw event config %#llx\n", config);
+	return false;
+}
+
+static const struct sw64_pmu_t core3_pmu = {
+	.max_events = ARRAY_SIZE(core3_hw_event_map),
+	.hw_events = core3_hw_event_map,
+	.map_hw_event = core3_map_hw_event,
+	.cache_events = &core3_cache_event_map,
+	.map_cache_event = core3_map_cache_event,
+	.num_pmcs = MAX_HWEVENTS,
+	.pmc_count_mask = PMC_COUNT_MASK,
+	.pmc_max_period = PMC_COUNT_MASK,
+	.pmc_left = 4,
+	.raw_event_valid = core3_raw_event_valid,
+};
+
+/*
+ * Low-level functions: reading/writing counters
+ */
+static void sw64_write_pmc(int idx, unsigned long val)
+{
+	wrperfmon(PMC_CMD_WRITE_BASE + idx, val);
+}
+
+static unsigned long sw64_read_pmc(int idx)
+{
+	return wrperfmon(PMC_CMD_READ, idx);
+}
+
+/* Set a new period to sample over */
+static int sw64_perf_event_set_period(struct perf_event *event,
+				struct hw_perf_event *hwc, int idx)
+{
+	long left = local64_read(&hwc->period_left);
+	long period = hwc->sample_period;
+	int overflow = 0;
+	unsigned long value;
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		overflow = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		overflow = 1;
+	}
+
+	if (left > (long)sw64_pmu->pmc_max_period)
+		left = sw64_pmu->pmc_max_period;
+
+	value = sw64_pmu->pmc_max_period - left;
+	local64_set(&hwc->prev_count, value);
+	sw64_write_pmc(idx, value);
+
+	perf_event_update_userpage(event);
+
+	return overflow;
+}
+
+/*
+ * Calculates the count (the 'delta') since the last time the PMC was read.
+ *
+ * As the PMCs' full period can easily be exceeded within the perf system
+ * sampling period we cannot use any high order bits as a guard bit in the
+ * PMCs to detect overflow as is done by other architectures.  The code here
+ * calculates the delta on the basis that there is no overflow when ovf is
+ * zero.  The value passed via ovf by the interrupt handler corrects for
+ * overflow.
+ *
+ * This can be racey on rare occasions -- a call to this routine can occur
+ * with an overflowed counter just before the PMI service routine is called.
+ * The check for delta negative hopefully always rectifies this situation.
+ */
+static unsigned long sw64_perf_event_update(struct perf_event *event,
+					struct hw_perf_event *hwc, int idx, long ovf)
+{
+	long prev_raw_count, new_raw_count;
+	long delta;
+
+again:
+	prev_raw_count = local64_read(&hwc->prev_count);
+	new_raw_count = sw64_read_pmc(idx);
+
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			     new_raw_count) != prev_raw_count)
+		goto again;
+
+	delta = (new_raw_count - (prev_raw_count & sw64_pmu->pmc_count_mask)) + ovf;
+
+	/* It is possible on very rare occasions that the PMC has overflowed
+	 * but the interrupt is yet to come.  Detect and fix this situation.
+	 */
+	if (unlikely(delta < 0))
+		delta += sw64_pmu->pmc_max_period + 1;
+
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+
+	return new_raw_count;
+}
+
+/*
+ * State transition functions:
+ *
+ * add()/del() & start()/stop()
+ *
+ */
+
+/*
+ * pmu->start: start the event.
+ */
+static void sw64_pmu_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+		return;
+
+	if (flags & PERF_EF_RELOAD) {
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+		sw64_perf_event_set_period(event, hwc, hwc->idx);
+	}
+
+	hwc->state = 0;
+
+	/* counting in selected modes, for both counters */
+	wrperfmon(PMC_CMD_PM, hwc->config_base);
+	wrperfmon(PMC_CMD_EVENT_BASE + hwc->idx, hwc->event_base);
+	wrperfmon(PMC_CMD_ENABLE, PMC_ENABLE_BASE + hwc->idx);
+}
+
+/*
+ * pmu->stop: stop the counter
+ */
+static void sw64_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		wrperfmon(PMC_CMD_DISABLE, PMC_DISABLE_BASE + hwc->idx);
+		hwc->state |= PERF_HES_STOPPED;
+		barrier();
+	}
+
+	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+		sw64_perf_event_update(event, hwc, hwc->idx, 0);
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+}
+
+/*
+ * pmu->add: add the event to PMU.
+ */
+static int sw64_pmu_add(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int err = 0;
+	unsigned long irq_flags;
+
+	local_irq_save(irq_flags);
+
+	if (__test_and_set_bit(hwc->idx, cpuc->used_mask)) {
+		err = -ENOSPC;
+		goto out;
+	}
+
+	cpuc->event[hwc->idx] = event;
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	if (flags & PERF_EF_START)
+		sw64_pmu_start(event, PERF_EF_RELOAD);
+
+	/* Propagate our changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+out:
+	local_irq_restore(irq_flags);
+
+	return err;
+}
+
+/*
+ * pmu->del: delete the event from PMU.
+ */
+static void sw64_pmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned long irq_flags;
+
+	local_irq_save(irq_flags);
+
+	sw64_pmu_stop(event, PERF_EF_UPDATE);
+	cpuc->event[hwc->idx] = NULL;
+	__clear_bit(event->hw.idx, cpuc->used_mask);
+
+	/* Absorb the final count and turn off the event. */
+	perf_event_update_userpage(event);
+
+	local_irq_restore(irq_flags);
+}
+
+/*
+ * pmu->read: read and update the counter
+ */
+static void sw64_pmu_read(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	sw64_perf_event_update(event, hwc, hwc->idx, 0);
+}
+
+static bool supported_cpu(void)
+{
+	return true;
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	/* Nothing to be done! */
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	const struct sw64_perf_event *event_type;
+
+
+	/*
+	 * SW64 does not have per-counter usr/os/guest/host bits,
+	 * we can distinguish exclude_user and exclude_kernel by
+	 * sample mode.
+	 */
+	if (event->attr.exclude_hv || event->attr.exclude_idle ||
+			event->attr.exclude_host || event->attr.exclude_guest)
+		return -EINVAL;
+
+	/*
+	 * SW64 does not support precise ip feature, and system hang when
+	 * detecting precise_ip by perf_event_attr__set_max_precise_ip
+	 * in userspace
+	 */
+	if (attr->precise_ip != 0)
+		return -EOPNOTSUPP;
+
+	/* SW64 has fixed counter for given event type */
+	if (attr->type == PERF_TYPE_HARDWARE) {
+		if (attr->config >= sw64_pmu->max_events)
+			return -EINVAL;
+		event_type = sw64_pmu->map_hw_event(attr->config);
+		hwc->idx = event_type->counter;
+		hwc->event_base = event_type->event;
+	} else if (attr->type == PERF_TYPE_HW_CACHE) {
+		event_type = sw64_pmu->map_cache_event(attr->config);
+		if (IS_ERR(event_type))	/* */
+			return PTR_ERR(event_type);
+		hwc->idx = event_type->counter;
+		hwc->event_base = event_type->event;
+	} else { /* PERF_TYPE_RAW */
+		if (!sw64_pmu->raw_event_valid(attr->config))
+			return -EINVAL;
+		hwc->idx = attr->config >> 8;	/* counter selector */
+		hwc->event_base = attr->config & 0xff;	/* event selector */
+	}
+
+	hwc->config_base = SW64_PERFCTRL_AM;
+
+	if (attr->exclude_user)
+		hwc->config_base = SW64_PERFCTRL_KM;
+	if (attr->exclude_kernel)
+		hwc->config_base = SW64_PERFCTRL_UM;
+
+	hwc->config = attr->config;
+
+	if (!is_sampling_event(event))
+		pr_debug("not sampling event\n");
+
+	event->destroy = hw_perf_event_destroy;
+
+	if (!hwc->sample_period) {
+		hwc->sample_period = sw64_pmu->pmc_max_period;
+		hwc->last_period = hwc->sample_period;
+		local64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	return 0;
+}
+
+/*
+ * Main entry point to initialise a HW performance event.
+ */
+static int sw64_pmu_event_init(struct perf_event *event)
+{
+	int err;
+
+	/* does not support taken branch sampling */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_RAW:
+	case PERF_TYPE_HARDWARE:
+	case PERF_TYPE_HW_CACHE:
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	if (!sw64_pmu)
+		return -ENODEV;
+
+	/* Do the real initialisation work. */
+	err = __hw_perf_event_init(event);
+
+	return err;
+}
+
+static struct pmu pmu = {
+	.name		= "core3-base",
+	.capabilities   = PERF_PMU_CAP_NO_NMI,
+	.event_init	= sw64_pmu_event_init,
+	.add		= sw64_pmu_add,
+	.del		= sw64_pmu_del,
+	.start		= sw64_pmu_start,
+	.stop		= sw64_pmu_stop,
+	.read		= sw64_pmu_read,
+};
+
+void perf_event_print_debug(void)
+{
+	unsigned long flags;
+	unsigned long pcr0, pcr1;
+	int cpu;
+
+	if (!supported_cpu())
+		return;
+
+	local_irq_save(flags);
+
+	cpu = smp_processor_id();
+
+	pcr0 = wrperfmon(PMC_CMD_READ, PMC_PC0);
+	pcr1 = wrperfmon(PMC_CMD_READ, PMC_PC1);
+
+	pr_info("CPU#%d: PCTR0[%lx] PCTR1[%lx]\n", cpu, pcr0, pcr1);
+
+	local_irq_restore(flags);
+}
+
+static void sw64_perf_event_irq_handler(unsigned long idx,
+					struct pt_regs *regs)
+{
+	struct cpu_hw_events *cpuc;
+	struct perf_sample_data data;
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+
+	__this_cpu_inc(irq_pmi_count);
+	cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	event = cpuc->event[idx];
+
+	if (unlikely(!event)) {
+		irq_err_count++;
+		return;
+	}
+
+	hwc = &event->hw;
+	sw64_perf_event_update(event, hwc, idx, sw64_pmu->pmc_max_period + 1);
+	perf_sample_data_init(&data, 0, hwc->last_period);
+
+	if (sw64_perf_event_set_period(event, hwc, idx)) {
+		if (perf_event_overflow(event, &data, regs)) {
+			/* Interrupts coming too quickly; "throttle" the
+			 * counter, i.e., disable it for a little while.
+			 */
+			sw64_pmu_stop(event, 0);
+		}
+	}
+}
+
+bool valid_utext_addr(unsigned long addr)
+{
+	return addr >= current->mm->start_code && addr <= current->mm->end_code;
+}
+
+bool valid_dy_addr(unsigned long addr)
+{
+	bool ret = false;
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+
+	if (addr > TASK_SIZE || addr < TASK_UNMAPPED_BASE)
+		return ret;
+	vma = find_vma(mm, addr);
+	if (vma && vma->vm_start <= addr && (vma->vm_flags & VM_EXEC))
+		ret = true;
+	return ret;
+}
+
+#ifdef CONFIG_FRAME_POINTER
+void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+		struct pt_regs *regs)
+{
+
+	struct stack_frame frame;
+	unsigned long __user *fp;
+	int err;
+
+	perf_callchain_store(entry, regs->pc);
+
+	fp = (unsigned long __user *)regs->regs[15];
+
+	while (entry->nr < entry->max_stack && (unsigned long)fp < current->mm->start_stack) {
+		if (!access_ok(fp, sizeof(frame)))
+			break;
+
+		pagefault_disable();
+		err =  __copy_from_user_inatomic(&frame, fp, sizeof(frame));
+		pagefault_enable();
+
+		if (err)
+			break;
+
+		if (valid_utext_addr(frame.return_address) || valid_dy_addr(frame.return_address))
+			perf_callchain_store(entry, frame.return_address);
+		fp = (void __user *)frame.next_frame;
+	}
+}
+#else /* !CONFIG_FRAME_POINTER */
+void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+		struct pt_regs *regs)
+{
+	unsigned long usp = rdusp();
+	unsigned long user_addr;
+	int err;
+
+	perf_callchain_store(entry, regs->pc);
+
+	while (entry->nr < entry->max_stack && usp < current->mm->start_stack) {
+		if (!access_ok((const void __user *)usp, 8))
+			break;
+
+		pagefault_disable();
+		err = __get_user(user_addr, (unsigned long *)usp);
+		pagefault_enable();
+
+		if (err)
+			break;
+
+		if (valid_utext_addr(user_addr) || valid_dy_addr(user_addr))
+			perf_callchain_store(entry, user_addr);
+		usp = usp + 8;
+	}
+}
+#endif/* CONFIG_FRAME_POINTER */
+
+/*
+ * Gets called by walk_stackframe() for every stackframe. This will be called
+ * whist unwinding the stackframe and is like a subroutine return so we use
+ * the PC.
+ */
+static int callchain_trace(unsigned long pc, void *data)
+{
+	struct perf_callchain_entry_ctx *entry = data;
+
+	perf_callchain_store(entry, pc);
+	return 0;
+}
+
+void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+			   struct pt_regs *regs)
+{
+	walk_stackframe(NULL, regs, callchain_trace, entry);
+}
+
+/*
+ * Gets the perf_instruction_pointer and perf_misc_flags for guest os.
+ */
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+	if (perf_guest_state())
+		return perf_guest_get_ip();
+
+	return instruction_pointer(regs);
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+	unsigned int guest_state = perf_guest_state();
+	int misc = 0;
+
+	if (guest_state) {
+		if (guest_state & PERF_GUEST_USER)
+			misc |= PERF_RECORD_MISC_GUEST_USER;
+		else
+			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+	} else {
+		if (user_mode(regs))
+			misc |= PERF_RECORD_MISC_USER;
+		else
+			misc |= PERF_RECORD_MISC_KERNEL;
+	}
+
+	return misc;
+}
+
+/*
+ * Init call to initialise performance events at kernel startup.
+ */
+int __init init_hw_perf_events(void)
+{
+	if (!supported_cpu()) {
+		pr_info("Performance events: Unsupported CPU type!\n");
+		return 0;
+	}
+
+	pr_info("Performance events: Supported CPU type!\n");
+
+	/* Override performance counter IRQ vector */
+
+	perf_irq = sw64_perf_event_irq_handler;
+
+	/* And set up PMU specification */
+	sw64_pmu = &core3_pmu;
+
+	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
+
+	return 0;
+}
+early_initcall(init_hw_perf_events);
diff --git a/arch/sw_64/kernel/perf_regs.c b/arch/sw_64/kernel/perf_regs.c
new file mode 100644
index 0000000000000000000000000000000000000000..b036f213936bc6d79214c9b7bdf1ab9a82a40b69
--- /dev/null
+++ b/arch/sw_64/kernel/perf_regs.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/errno.h>
+#include <linux/perf_event.h>
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+	if (WARN_ON_ONCE((u32)idx >= PERF_REG_SW64_MAX))
+		return 0;
+
+	return ((unsigned long *)regs)[idx];
+}
+
+#define REG_RESERVED (~((1ULL << PERF_REG_SW64_MAX) - 1))
+
+int perf_reg_validate(u64 mask)
+{
+	if (!mask || mask & REG_RESERVED)
+		return -EINVAL;
+	return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+	return PERF_SAMPLE_REGS_ABI_64;
+}
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+			struct pt_regs *regs)
+{
+	regs_user->regs = task_pt_regs(current);
+	regs_user->abi = perf_reg_abi(current);
+}
diff --git a/arch/sw_64/kernel/pm.c b/arch/sw_64/kernel/pm.c
new file mode 100644
index 0000000000000000000000000000000000000000..f0a35e5d0486167340b44f3bac1c80104f25649e
--- /dev/null
+++ b/arch/sw_64/kernel/pm.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/suspend.h>
+#include <linux/syscore_ops.h>
+
+#include <asm/suspend.h>
+
+struct syscore_ops io_syscore_ops;
+
+static int __init sw64_pm_init(void)
+{
+#ifdef CONFIG_SUSPEND
+	suspend_set_ops(&native_suspend_ops);
+#endif
+	register_syscore_ops(&io_syscore_ops);
+
+	return 0;
+}
+device_initcall(sw64_pm_init);
diff --git a/arch/sw_64/kernel/relocate.c b/arch/sw_64/kernel/relocate.c
new file mode 100644
index 0000000000000000000000000000000000000000..ebdf7d894805e8f2c0a1853d853d5d2ef8bf6c09
--- /dev/null
+++ b/arch/sw_64/kernel/relocate.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for kernel relocation at boot time.
+ *
+ * Based on arch/mips/kernel/relocate.c
+ *
+ * Copyright (C) 2019 He Sheng
+ * Authors: He Sheng (hesheng05@gmail.com)
+ */
+#include <linux/elf.h>
+#include <linux/notifier.h>
+#include <linux/mm.h>
+
+#include <asm/sections.h>
+
+#define KTEXT_MAX    0xffffffffa0000000UL
+#define RELOCATED(x) ((void *)((unsigned long)x + offset))
+
+extern unsigned long _got_start[];
+extern unsigned long _got_end[];
+extern char pre_start_kernel[];
+
+extern unsigned int _relocation_start[];	/* End kernel image / start relocation table */
+extern unsigned int _relocation_end[];	/* End relocation table */
+
+extern unsigned long __start___ex_table;	/* Start exception table */
+extern unsigned long __stop___ex_table;	/* End exception table */
+extern union thread_union init_thread_union;
+
+/*
+ * This function may be defined for a platform to perform any post-relocation
+ * fixup necessary.
+ * Return non-zero to abort relocation
+ */
+int __weak plat_post_relocation(long offset)
+{
+	return 0;
+}
+
+static int __init apply_r_sw64_refquad(unsigned long *loc_orig, unsigned long *loc_new, unsigned int offset)
+{
+	*(unsigned long *)loc_new += offset;
+
+	return 0;
+}
+
+static int (*reloc_handlers_rel[]) (unsigned long *, unsigned long *, unsigned int) __initdata = {
+	[R_SW64_REFQUAD]		= apply_r_sw64_refquad,
+};
+
+int __init do_relocations(void *kbase_old, void *kbase_new, unsigned int offset)
+{
+	unsigned int *r;
+	unsigned long *loc_orig;
+	unsigned long *loc_new;
+	int type;
+	int res;
+
+	for (r = _relocation_start; r < _relocation_end; r++) {
+		/* Sentinel for last relocation */
+		if (*r == 0)
+			break;
+
+		type = (*r >> 24) & 0xff;
+		loc_orig = kbase_old + ((*r & 0x00ffffff) << 2);
+		loc_new = RELOCATED(loc_orig);
+
+		if (reloc_handlers_rel[type] == NULL) {
+			/* Unsupported relocation */
+			pr_err("Unhandled relocation type %d at 0x%pK\n",
+			       type, loc_orig);
+			return -ENOEXEC;
+		}
+
+		res = reloc_handlers_rel[type](loc_orig, loc_new, offset);
+		if (res)
+			return res;
+	}
+
+	return 0;
+}
+
+static int __init relocate_got(unsigned int offset)
+{
+	unsigned long *got_start, *got_end, *e;
+
+	got_start = RELOCATED(&_got_start);
+	got_end = RELOCATED(&_got_end);
+
+	for (e = got_start; e < got_end; e++)
+		*e += offset;
+
+	return 0;
+}
+
+#ifdef CONFIG_RANDOMIZE_BASE
+
+static inline __init unsigned long rotate_xor(unsigned long hash,
+					      const void *area, size_t size)
+{
+	size_t i;
+	unsigned long start, *ptr;
+	/* Make sure start is 8 byte aligned */
+	start = ALIGN((unsigned long)area, 8);
+	size -= (start - (unsigned long)area);
+	ptr = (unsigned long *) start;
+	for (i = 0; i < size / sizeof(hash); i++) {
+		/* Rotate by odd number of bits and XOR. */
+		hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
+		hash ^= ptr[i];
+	}
+	return hash;
+}
+
+static inline __init unsigned long get_random_boot(void)
+{
+	unsigned long entropy = random_get_entropy();
+	unsigned long hash = 0;
+
+	/* Attempt to create a simple but unpredictable starting entropy. */
+	hash = rotate_xor(hash, linux_banner, strlen(linux_banner));
+
+	/* Add in any runtime entropy we can get */
+	hash = rotate_xor(hash, &entropy, sizeof(entropy));
+
+	return hash;
+}
+
+static inline __init bool kaslr_disabled(void)
+{
+	char *str;
+
+	str = strstr(COMMAND_LINE, "nokaslr");
+	if (str == COMMAND_LINE || (str > COMMAND_LINE && *(str - 1) == ' '))
+		return true;
+
+	return false;
+}
+
+static unsigned long __init determine_relocation_offset(void)
+{
+	/* Choose a new address for the kernel */
+	unsigned long kernel_length;
+	unsigned long offset;
+
+	if (kaslr_disabled())
+		return 0;
+
+	kernel_length = (unsigned long)_end - (unsigned long)(&_text);
+
+	/* TODO: offset is 64K align. maybe 8KB align is okay.  */
+	offset = get_random_boot() << 16;
+	offset &= (CONFIG_RANDOMIZE_BASE_MAX_OFFSET - 1);
+	if (offset < kernel_length)
+		offset += ALIGN(kernel_length, 0x10000);
+
+	/*
+	 * TODO:new location should not overlaps initrd, dtb, acpi
+	 * tables, etc.
+	 */
+
+	if ((KTEXT_MAX - (unsigned long)_end) < offset)
+		offset = 0;
+
+	return offset;
+}
+
+#else
+
+static inline unsigned long __init determine_relocation_offset(void)
+{
+	/*
+	 * Choose a new address for the kernel
+	 * For now we'll hard code the destination offset.
+	 */
+	return 0;
+}
+
+#endif
+
+static inline int __init relocation_offset_valid(unsigned long offset)
+{
+	unsigned long loc_new = (unsigned long)_text + offset;
+
+	if (loc_new & 0x0000ffff) {
+		/* Inappropriately aligned new location */
+		return 0;
+	}
+	if (loc_new < (unsigned long)&_end) {
+		/* New location overlaps original kernel */
+		return 0;
+	}
+	return 1;
+}
+
+unsigned int __init relocate_kernel(void)
+{
+	void *loc_new;
+	unsigned long kernel_length;
+	unsigned long bss_length;
+	unsigned int offset = 0;
+	int res = 1;
+
+	kernel_length = (unsigned long)(&_relocation_start) - (long)(&_text);
+	bss_length = (unsigned long)&__bss_stop - (long)&__bss_start;
+
+	offset = determine_relocation_offset();
+	/* Reset the command line now so we don't end up with a duplicate */
+
+	/* Sanity check relocation address */
+	if (offset && relocation_offset_valid(offset)) {
+
+		loc_new = RELOCATED(&_text);
+		/* Copy the kernel to it's new location */
+		memcpy(loc_new, &_text, kernel_length);
+
+		/* Perform relocations on the new kernel */
+		res = do_relocations(&_text, loc_new, offset);
+		if (res < 0)
+			goto out;
+
+		res = relocate_got(offset);
+		if (res < 0)
+			goto out;
+
+		/*
+		 * The original .bss has already been cleared, and
+		 * some variables such as command line parameters
+		 * stored to it so make a copy in the new location.
+		 */
+		memcpy(RELOCATED(&__bss_start), &__bss_start, bss_length);
+
+		/*
+		 * Last chance for the platform to abort relocation.
+		 * This may also be used by the platform to perform any
+		 * initialisation required now that the new kernel is
+		 * resident in memory and ready to be executed.
+		 */
+		if (plat_post_relocation(offset))
+			goto out;
+
+		/* Return the new kernel's offset */
+		return offset;
+	}
+out:
+	return 0;
+}
+
+/*
+ * Show relocation information on panic.
+ */
+void show_kernel_relocation(const char *level)
+{
+	unsigned long offset;
+
+	offset = __pa_symbol(_text) - __pa_symbol(_TEXT_START);
+
+	if (IS_ENABLED(CONFIG_RELOCATABLE) && offset > 0) {
+		printk(level);
+		pr_cont("Kernel relocated by 0x%pK\n", (void *)offset);
+		pr_cont(" .text @ 0x%pK\n", _text);
+		pr_cont(" .data @ 0x%pK\n", _sdata);
+		pr_cont(" .bss  @ 0x%pK\n", __bss_start);
+	}
+}
+
+static int kernel_location_notifier_fn(struct notifier_block *self,
+				       unsigned long v, void *p)
+{
+	show_kernel_relocation(KERN_EMERG);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block kernel_location_notifier = {
+	.notifier_call = kernel_location_notifier_fn
+};
+
+static int __init register_kernel_offset_dumper(void)
+{
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &kernel_location_notifier);
+	return 0;
+}
+device_initcall(register_kernel_offset_dumper);
diff --git a/arch/sw_64/kernel/relocate_kernel.S b/arch/sw_64/kernel/relocate_kernel.S
new file mode 100644
index 0000000000000000000000000000000000000000..f1a160636212fed8e73dd32616edaea155c51154
--- /dev/null
+++ b/arch/sw_64/kernel/relocate_kernel.S
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * relocate_kernel.S for kexec
+ * Created by <hesheng05@gmail.com> Jul 2 2019
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#include <asm/regdef.h>
+#include <asm/page.h>
+
+	.align 3
+	.globl relocate_new_kernel
+	.ent relocate_new_kernel
+
+relocate_new_kernel:
+	.prologue 0
+	ldl	a0, arg0
+	ldl	a1, arg1
+	ldl	a2, arg2
+	ldl	a3, arg3
+
+	ldl	s0, kexec_indirection_page
+	ldl	s1, kexec_start_address
+
+process_entry:
+	ldl	s2, 0(s0)
+	addl	s0, 8, s0
+
+	/*
+	 * In case of a kdump/crash kernel, the indirection page is not
+	 * populated as the kernel is directly copied to a reserved location
+	 */
+	beq	s2, done
+
+	/* destination page */
+	and	s2, 0x1, s3
+	beq	s3, 1f
+	bic	s2, 0x1, s4/* store destination addr in s4 */
+	br	$31, process_entry
+
+1:
+	/* indirection page, update s0*/
+	and	s2, 0x2, s3
+	beq	s3, 1f
+	bic	s2, 0x2, s0
+	br	$31, process_entry
+
+1:
+	/* done page */
+	and	s2, 0x4, s3
+	beq	s3, 1f
+	br	$31, done
+1:
+	/* source page */
+	and	s2, 0x8, s3
+	beq	s3, process_entry
+	bic	s2, 0x8, s2
+	ldi	s6, 0x1
+	sll	s6, (PAGE_SHIFT - 3), s6
+
+copy_word:
+	/* copy page word by word */
+	ldl	s5, 0(s2)
+	stl	s5, 0(s4)
+	addl	s4, 8, s4
+	addl	s2, 8, s2
+	subl	s6, 1, s6
+	beq	s6, process_entry
+	br	$31, copy_word
+	br	$31, process_entry
+
+done:
+#ifdef CONFIG_CRASH_SMP /* unsupported now!!!! */
+	/* kexec_flag reset is signal to other CPUs what kernel
+	 * was moved to it's location. Note - we need relocated address
+	 * of kexec_flag.
+	 */
+
+	br	ra, 1f
+1:	mov	ra, t1
+	ldi	t2, 1b
+	ldi	t0, kexec_flag
+	subl	t0, t2, t0
+	addl	t1, t0, t0
+	stl	zero, 0(t0)
+#endif
+	memb
+	jmp	ra, (s1)
+	.end relocate_new_kernel
+	.size relocate_new_kernel, .-relocate_new_kernel
+
+#ifdef CONFIG_CRASH_SMP
+	/*
+	 * Other CPUs should wait until code is relocated and
+	 * then start at entry (?) point.
+	 */
+	.align 3
+	.globl kexec_smp_wait
+	.ent kexec_smp_wait
+kexec_smp_wait:
+	ldl	a0, s_arg0
+	ldl	a1, s_arg1
+	ldl	a2, s_arg2
+	ldl	a3, s_arg3
+	ldl	s1, kexec_start_address
+
+	/* Non-relocated address works for args and kexec_start_address (old
+	 * kernel is not overwritten). But we need relocated address of
+	 * kexec_flag.
+	 */
+
+	bsr	ra, 1f
+1:	mov	ra, t1
+	ldi	t2, 1b
+	ldi	t0, kexec_flag
+	subl	t0, t2, t0
+	addl	t1, t0, t0
+
+1:	stl	s0, 0(t0)
+	bne	s0, 1b
+	memb
+	jmp	ra, (s1)
+	.end kexec_smp_wait
+	.size kexec_smp_wait, .-kexec_smp_wait
+#endif
+
+	.align 3
+
+	/* All parameters to new kernel are passed in registers a0-a3.
+	 * kexec_args[0..3] are uses to prepare register values.
+	 */
+
+kexec_args:
+	.globl kexec_args
+arg0:	.quad 0x0
+arg1:	.quad 0x0
+arg2:	.quad 0x0
+arg3:	.quad 0x0
+	.size kexec_args, 8*4
+
+#ifdef CONFIG_CRASH_SMP
+	/*
+	 * Secondary CPUs may have different kernel parameters in
+	 * their registers a0-a3. secondary_kexec_args[0..3] are used
+	 * to prepare register values.
+	 */
+secondary_kexec_args:
+	.globl secondary_kexec_args
+s_arg0:	.quad 0x0
+s_arg1:	.quad 0x0
+s_arg2:	.quad 0x0
+s_arg3:	.quad 0x0
+	.size secondary_kexec_args, 8*4
+
+kexec_flag:
+	.quad 0x1
+#endif
+
+kexec_start_address:
+	.globl kexec_start_address
+	.quad 0x0
+	.size kexec_start_address, 8
+
+kexec_indirection_page:
+	.globl kexec_indirection_page
+	.quad 0
+	.size kexec_indirection_page, 8
+
+relocate_new_kernel_end:
+
+relocate_new_kernel_size:
+	.global relocate_new_kernel_size
+	.quad relocate_new_kernel_end - relocate_new_kernel
+	.size relocate_new_kernel_size, 8
diff --git a/arch/sw_64/kernel/stacktrace.c b/arch/sw_64/kernel/stacktrace.c
new file mode 100644
index 0000000000000000000000000000000000000000..ff00506d5b824727161449fa8c5f3602574c1e6e
--- /dev/null
+++ b/arch/sw_64/kernel/stacktrace.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Stack trace management functions
+ *
+ *  Copyright (C) 2018 snyh <xiabin@deepin.com>
+ */
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+#include <linux/sched/task_stack.h>
+#include <linux/sched/debug.h>
+#include <linux/ftrace.h>
+#include <linux/perf_event.h>
+#include <linux/kallsyms.h>
+
+#include <asm/stacktrace.h>
+
+/*
+ * sw_64 PCS assigns the frame pointer to r15.
+ *
+ * A simple function prologue looks like this:
+ *	ldi     sp,-xx(sp)
+ *	stl     ra,0(sp)
+ *	stl     fp,8(sp)
+ *	mov     sp,fp
+ *
+ * A simple function epilogue looks like this:
+ *	mov     fp,sp
+ *	ldl     ra,0(sp)
+ *	ldl     fp,8(sp)
+ *	ldi     sp,+xx(sp)
+ */
+
+#ifdef CONFIG_FRAME_POINTER
+
+int unwind_frame(struct task_struct *tsk, struct stackframe *frame)
+{
+	unsigned long fp = frame->fp;
+
+	if (fp & 0x7)
+		return -EINVAL;
+
+	if (!tsk)
+		tsk = current;
+
+	if (!on_accessible_stack(tsk, fp, NULL))
+		return -EINVAL;
+
+	frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
+	frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
+
+	/*
+	 * Frames created upon entry from user have NULL FP and PC values, so
+	 * don't bother reporting these. Frames created by __noreturn functions
+	 * might have a valid FP even if PC is bogus, so only terminate where
+	 * both are NULL.
+	 */
+	if (!frame->fp && !frame->pc)
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(unwind_frame);
+
+void walk_stackframe(struct task_struct *tsk, struct pt_regs *regs,
+		     int (*fn)(unsigned long, void *), void *data)
+{
+	unsigned long pc, fp;
+
+	struct stackframe frame;
+
+	if (regs) {
+		unsigned long offset;
+
+		pc = regs->pc;
+		fp = regs->regs[15];
+		if (kallsyms_lookup_size_offset(pc, NULL, &offset)
+				&& offset < 16) {
+			/* call stack has not been setup
+			 * store pc first then loop from ra
+			 */
+			if (fn(pc, data))
+				return;
+			pc = regs->regs[26];
+		}
+	} else if (tsk == current || tsk == NULL) {
+		fp = (unsigned long)__builtin_frame_address(0);
+		pc = (unsigned long)walk_stackframe;
+	} else {
+		fp = tsk->thread.s[6];
+		pc = tsk->thread.ra;
+	}
+
+	if (!__kernel_text_address(pc) || fn(pc, data))
+		return;
+
+	frame.pc = pc;
+	frame.fp = fp;
+	while (1) {
+		int ret;
+
+		ret = unwind_frame(tsk, &frame);
+		if (ret < 0)
+			break;
+
+		if (fn(frame.pc, data))
+			break;
+	}
+}
+EXPORT_SYMBOL_GPL(walk_stackframe);
+
+#else /* !CONFIG_FRAME_POINTER */
+void walk_stackframe(struct task_struct *tsk, struct pt_regs *regs,
+		     int (*fn)(unsigned long, void *), void *data)
+{
+	unsigned long *ksp;
+	unsigned long sp, pc;
+
+	if (regs) {
+		sp = (unsigned long)(regs+1);
+		pc = regs->pc;
+	} else if (tsk == current || tsk == NULL) {
+		register unsigned long current_sp __asm__ ("$30");
+		sp = current_sp;
+		pc = (unsigned long)walk_stackframe;
+	} else {
+		sp = tsk->thread.sp;
+		pc = tsk->thread.ra;
+	}
+
+	ksp = (unsigned long *)sp;
+
+	while (!kstack_end(ksp)) {
+		if (__kernel_text_address(pc) && fn(pc, data))
+			break;
+		pc = *ksp++;
+	}
+}
+EXPORT_SYMBOL_GPL(walk_stackframe);
+
+#endif/* CONFIG_FRAME_POINTER */
+
+static int print_address_trace(unsigned long pc, void *data)
+{
+	print_ip_sym((const char *)data, pc);
+	return 0;
+}
+
+void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl)
+{
+	pr_info("Trace:\n");
+	walk_stackframe(task, NULL, print_address_trace, (void *)loglvl);
+}
+
+#ifdef CONFIG_STACKTRACE
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer.
+ */
+struct stack_trace_data {
+	struct stack_trace *trace;
+	unsigned int nosched;
+};
+
+int save_trace(unsigned long pc, void *d)
+{
+	struct stack_trace_data *data = d;
+	struct stack_trace *trace = data->trace;
+
+	if (data->nosched && in_sched_functions(pc))
+		return 0;
+	if (trace->skip > 0) {
+		trace->skip--;
+		return 0;
+	}
+
+	trace->entries[trace->nr_entries++] = pc;
+	return (trace->nr_entries >= trace->max_entries);
+}
+
+void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+{
+	struct stack_trace_data data;
+
+	data.trace = trace;
+	data.nosched = 0;
+
+	walk_stackframe(current, regs, save_trace, &data);
+
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
+static void __save_stack_trace(struct task_struct *tsk,
+		struct stack_trace *trace, unsigned int nosched)
+{
+	struct stack_trace_data data;
+
+	data.trace = trace;
+	data.nosched = nosched;
+
+	walk_stackframe(tsk, NULL, save_trace, &data);
+
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+	__save_stack_trace(tsk, trace, 1);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+
+void save_stack_trace(struct stack_trace *trace)
+{
+	__save_stack_trace(current, trace, 0);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+#endif
+
+static int save_pc(unsigned long pc, void *data)
+{
+	unsigned long *p = data;
+	*p = 0;
+
+	if (!in_sched_functions(pc))
+		*p = pc;
+
+	return *p;
+}
+
+unsigned long __get_wchan(struct task_struct *tsk)
+{
+	unsigned long pc;
+
+	if (!tsk || tsk == current || task_is_running(tsk))
+		return 0;
+	walk_stackframe(tsk, NULL, save_pc, &pc);
+
+	return pc;
+}
+
+#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
+int save_stack_trace_tsk_reliable(struct task_struct *tsk,
+				  struct stack_trace *trace)
+{
+	return 0;
+}
+#endif
diff --git a/arch/sw_64/kernel/suspend.c b/arch/sw_64/kernel/suspend.c
new file mode 100644
index 0000000000000000000000000000000000000000..27a240e6614955835f7abe8c21558b956898da43
--- /dev/null
+++ b/arch/sw_64/kernel/suspend.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/suspend.h>
+
+#include <asm/suspend.h>
+#include <asm/sw64_init.h>
+
+struct processor_state suspend_state;
+
+static int native_suspend_state_valid(suspend_state_t pm_state)
+{
+	switch (pm_state) {
+	case PM_SUSPEND_ON:
+	case PM_SUSPEND_STANDBY:
+	case PM_SUSPEND_MEM:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+void disable_local_timer(void)
+{
+	wrtimer(0);
+}
+
+extern struct pci_controller *hose_head;
+
+/*
+ * Boot Core will enter suspend stat here.
+ */
+void sw64_suspend_enter(void)
+{
+	/* boot processor will go to deep sleep mode from here
+	 * After wake up  boot processor, pc will go here
+	 */
+	disable_local_timer();
+	current_thread_info()->pcb.tp = rtid();
+
+	sw64_suspend_deep_sleep(&suspend_state);
+	wrtp(current_thread_info()->pcb.tp);
+
+	disable_local_timer();
+}
+
+static int native_suspend_enter(suspend_state_t state)
+{
+	if (is_in_guest())
+		return 0;
+	/* processor specific suspend */
+	sw64_suspend_enter();
+	return 0;
+}
+
+const struct platform_suspend_ops native_suspend_ops = {
+	.valid = native_suspend_state_valid,
+	.enter = native_suspend_enter,
+};
diff --git a/arch/sw_64/kernel/suspend_asm.S b/arch/sw_64/kernel/suspend_asm.S
new file mode 100644
index 0000000000000000000000000000000000000000..34ee349515a7c1278f24bc9c64dc3e8a6e864137
--- /dev/null
+++ b/arch/sw_64/kernel/suspend_asm.S
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/page.h>
+#include <asm/regdef.h>
+
+	.text
+	.set	noat
+ENTRY(sw64_suspend_deep_sleep)
+	/* a0 $16 will be the address of suspend_state */
+	ldi	$1, PSTATE_REGS($16)
+	stl	$9, CALLEE_R9($1)
+	stl	$10, CALLEE_R10($1)
+	stl	$11, CALLEE_R11($1)
+	stl	$12, CALLEE_R12($1)
+	stl	$13, CALLEE_R13($1)
+	stl	$14, CALLEE_R14($1)
+	stl	$15, CALLEE_R15($1)
+	stl	$26, CALLEE_RA($1)
+	/* SIMD-FP */
+	ldi	$1, PSTATE_FPREGS($16)
+	vstd	$f2, CALLEE_F2($1)
+	vstd	$f3, CALLEE_F3($1)
+	vstd	$f4, CALLEE_F4($1)
+	vstd	$f5, CALLEE_F5($1)
+	vstd	$f6, CALLEE_F6($1)
+	vstd	$f7, CALLEE_F7($1)
+	vstd	$f8, CALLEE_F8($1)
+	vstd	$f9, CALLEE_F9($1)
+	rfpcr	$f0
+	fstd	$f0, PSTATE_FPCR($16)
+	stl	$8, PSTATE_KTP($16)
+
+	/* save the address of suspend_state to $18 */
+	mov	$16, $18
+
+	/*
+	 * Now will Go to Deep Sleep
+	 * HMcode should save  pc, gp, ps, r16, r17, r18
+	 */
+
+	sys_call HMC_sleepen
+	sys_call HMC_whami
+	bis	$0, $0, $16
+	ldi	$17, 0x2($31)
+	sys_call HMC_sendii
+
+	/* wait for a while to receive interrupt */
+	ldi	$16, 0x1($31)
+	sll	$16, 24, $16
+$subloop:
+	subl	$16, 1, $16
+	bis	$16, $16, $16
+	bis	$16, $16, $16
+	bne	$16, $subloop
+
+
+	ldl	$8, PSTATE_KTP($18)
+	ldi	$1, PSTATE_REGS($18)
+	ldl	$9, CALLEE_R9($1)
+	ldl	$10, CALLEE_R10($1)
+	ldl	$11, CALLEE_R11($1)
+	ldl	$12, CALLEE_R12($1)
+	ldl	$13, CALLEE_R13($1)
+	ldl	$14, CALLEE_R14($1)
+	ldl	$15, CALLEE_R15($1)
+	ldl	$26, CALLEE_RA($1)
+	/* SIMD-FP */
+	fldd	$f0, PSTATE_FPCR($18)
+	wfpcr	$f0
+	fimovd	$f0, $2
+	and	$2, 0x3, $2
+	beq	$2, $suspend_setfpec_0
+	subl	$2, 0x1, $2
+	beq	$2, $suspend_setfpec_1
+	subl	$2, 0x1, $2
+	beq	$2, $suspend_setfpec_2
+	setfpec3
+	br	$suspend_setfpec_over
+$suspend_setfpec_0:
+	setfpec0
+	br	$suspend_setfpec_over
+$suspend_setfpec_1:
+	setfpec1
+	br	$suspend_setfpec_over
+$suspend_setfpec_2:
+	setfpec2
+$suspend_setfpec_over:
+	ldi	$1, PSTATE_FPREGS($18)
+	vldd	$f2, CALLEE_F2($1)
+	vldd	$f3, CALLEE_F3($1)
+	vldd	$f4, CALLEE_F4($1)
+	vldd	$f5, CALLEE_F5($1)
+	vldd	$f6, CALLEE_F6($1)
+	vldd	$f7, CALLEE_F7($1)
+	vldd	$f8, CALLEE_F8($1)
+	vldd	$f9, CALLEE_F9($1)
+	ret
+END(sw64_suspend_deep_sleep)
diff --git a/arch/sw_64/kernel/uprobes.c b/arch/sw_64/kernel/uprobes.c
new file mode 100644
index 0000000000000000000000000000000000000000..928312d62cfd172f20edcc113fea897a5c054a56
--- /dev/null
+++ b/arch/sw_64/kernel/uprobes.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/highmem.h>
+#include <linux/kdebug.h>
+#include <linux/uprobes.h>
+#include <linux/ptrace.h>
+
+/**
+ * arch_uprobe_analyze_insn - instruction analysis including validity and fixups.
+ * @mm: the probed address space.
+ * @arch_uprobe: the probepoint information.
+ * @addr: virtual address at which to install the probepoint
+ * Return 0 on success or a -ve number on error.
+ */
+int arch_uprobe_analyze_insn(struct arch_uprobe *aup,
+		struct mm_struct *mm, unsigned long addr)
+{
+	u32 inst;
+
+	if (addr & 0x03)
+		return -EINVAL;
+
+	inst = aup->insn;
+
+	aup->ixol[0] = aup->insn;
+	aup->ixol[1] = UPROBE_BRK_UPROBE_XOL;		/* NOP  */
+
+	return 0;
+}
+
+void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+		void *src, unsigned long len)
+{
+	unsigned long kaddr, kstart;
+
+	/* Initialize the slot */
+	kaddr = (unsigned long)kmap_local_page(page);
+	kstart = kaddr + (vaddr & ~PAGE_MASK);
+	memcpy((void *)kstart, src, len);
+	flush_icache_range(kstart, kstart + len);
+	kunmap_local((void *)kaddr);
+}
+
+/*
+ * arch_uprobe_pre_xol - prepare to execute out of line.
+ * @auprobe: the probepoint information.
+ * @regs: reflects the saved user state of current task.
+ */
+int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+
+	/* Instruction points to execute ol */
+	instruction_pointer_set(regs, utask->xol_vaddr);
+
+	return 0;
+}
+
+int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+
+	/* Instruction points to execute next to breakpoint address */
+	instruction_pointer_set(regs, utask->vaddr + 4);
+
+	return 0;
+}
+
+/*
+ * If xol insn itself traps and generates a signal(Say,
+ * SIGILL/SIGSEGV/etc), then detect the case where a singlestepped
+ * instruction jumps back to its own address. It is assumed that anything
+ * like do_page_fault/do_trap/etc sets thread.trap_nr != -1.
+ *
+ * arch_uprobe_pre_xol/arch_uprobe_post_xol save/restore thread.trap_nr,
+ * arch_uprobe_xol_was_trapped() simply checks that ->trap_nr is not equal to
+ * UPROBE_TRAP_NR == -1 set by arch_uprobe_pre_xol().
+ */
+bool arch_uprobe_xol_was_trapped(struct task_struct *tsk)
+{
+	return false;
+}
+
+int arch_uprobe_exception_notify(struct notifier_block *self,
+		unsigned long val, void *data)
+{
+	struct die_args *args = data;
+	struct pt_regs *regs = args->regs;
+
+	/* regs == NULL is a kernel bug */
+	if (WARN_ON(!regs))
+		return NOTIFY_DONE;
+
+	/* We are only interested in userspace traps */
+	if (!user_mode(regs))
+		return NOTIFY_DONE;
+
+	switch (val) {
+	case DIE_UPROBE:
+		if (uprobe_pre_sstep_notifier(regs))
+			return NOTIFY_STOP;
+		break;
+	case DIE_UPROBE_XOL:
+		if (uprobe_post_sstep_notifier(regs))
+			return NOTIFY_STOP;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/*
+ * This function gets called when XOL instruction either gets trapped or
+ * the thread has a fatal signal. Reset the instruction pointer to its
+ * probed address for the potential restart or for post mortem analysis.
+ */
+void arch_uprobe_abort_xol(struct arch_uprobe *aup,
+		struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+
+	instruction_pointer_set(regs, utask->vaddr);
+}
+
+unsigned long arch_uretprobe_hijack_return_addr(
+		unsigned long trampoline_vaddr, struct pt_regs *regs)
+{
+	unsigned long ra;
+
+	ra = regs->regs[26];
+
+	/* Replace the return address with the trampoline address */
+	regs->regs[26] = trampoline_vaddr;
+
+	return ra;
+}
+
+/*
+ * See if the instruction can be emulated.
+ * Returns true if instruction was emulated, false otherwise.
+ *
+ * For now we always emulate so this function just returns 0.
+ */
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	return 0;
+}
+
+/*
+ * struct xol_area and get_trampoline_vaddr() are copied from
+ * kernel/events/uprobes.c to avoid modifying arch-independent
+ * code.
+ */
+struct xol_area {
+	wait_queue_head_t		wq;
+	atomic_t			slot_count;
+	unsigned long			*bitmap;
+	struct vm_special_mapping	xol_mapping;
+	struct page			*pages[2];
+	unsigned long			vaddr;
+};
+
+static unsigned long get_trampoline_vaddr(void)
+{
+	struct xol_area *area;
+	unsigned long trampoline_vaddr = -1;
+
+	area = READ_ONCE(current->mm->uprobes_state.xol_area);
+	if (area)
+		trampoline_vaddr = area->vaddr;
+
+	return trampoline_vaddr;
+}
+
+void sw64_fix_uretprobe(struct pt_regs *regs, unsigned long exc_pc)
+{
+	/*
+	 * regs->pc has been changed to orig_ret_vaddr in handle_trampoline().
+	 */
+	if (exc_pc == get_trampoline_vaddr())
+		regs->regs[26] = regs->pc;
+}
diff --git a/arch/sw_64/net/Makefile b/arch/sw_64/net/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..d4663b4bf509894e62c3b02c69726ee5717c2dd4
--- /dev/null
+++ b/arch/sw_64/net/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Arch-specific network modules
+#
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
diff --git a/arch/sw_64/net/bpf_jit.h b/arch/sw_64/net/bpf_jit.h
new file mode 100644
index 0000000000000000000000000000000000000000..929036d8ea6b10daec13166c1e87f63165d99f1a
--- /dev/null
+++ b/arch/sw_64/net/bpf_jit.h
@@ -0,0 +1,368 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * BPF JIT compiler for SW64
+ *
+ * Copyright (C) Mao Minkai
+ * Author: Mao Minkai
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SW64_NET_BPF_JIT_H
+#define _SW64_NET_BPF_JIT_H
+
+/* SW64 instruction field shift */
+#define SW64_BPF_OPCODE_OFFSET		26
+#define SW64_BPF_RA_OFFSET		21
+#define SW64_BPF_RB_OFFSET		16
+#define SW64_BPF_SIMPLE_ALU_IMM_OFFSET	13
+#define SW64_BPF_SIMPLE_ALU_FUNC_OFFSET	5
+#define SW64_BPF_SIMPLE_ALU_RC_OFFSET	0
+#define SW64_BPF_LS_FUNC_OFFSET		12
+
+/* SW64 instruction opcodes */
+#define SW64_BPF_OPCODE_CALL		0x01
+#define SW64_BPF_OPCODE_RET		0x02
+#define SW64_BPF_OPCODE_JMP		0x03
+#define SW64_BPF_OPCODE_BR		0x04
+#define SW64_BPF_OPCODE_BSR		0x05
+#define SW64_BPF_OPCODE_MISC		0x06
+#define SW64_BPF_OPCODE_LOCK		0x08
+#define SW64_BPF_OPCODE_ALU_REG		0x10
+#define SW64_BPF_OPCODE_ALU_IMM		0x12
+#define SW64_BPF_OPCODE_LDBU		0x20
+#define SW64_BPF_OPCODE_LDHU		0x21
+#define SW64_BPF_OPCODE_LDW		0x22
+#define SW64_BPF_OPCODE_LDL		0x23
+#define SW64_BPF_OPCODE_STB		0x28
+#define SW64_BPF_OPCODE_STH		0x29
+#define SW64_BPF_OPCODE_STW		0x2A
+#define SW64_BPF_OPCODE_STL		0x2B
+#define SW64_BPF_OPCODE_BEQ		0x30
+#define SW64_BPF_OPCODE_BNE		0x31
+#define SW64_BPF_OPCODE_BLT		0x32
+#define SW64_BPF_OPCODE_BLE		0x33
+#define SW64_BPF_OPCODE_BGT		0x34
+#define SW64_BPF_OPCODE_BGE		0x35
+#define SW64_BPF_OPCODE_BLBC		0x36
+#define SW64_BPF_OPCODE_BLBS		0x37
+#define SW64_BPF_OPCODE_LDI		0x3E
+#define SW64_BPF_OPCODE_LDIH		0x3F
+
+/* SW64 MISC instructions function codes */
+#define SW64_BPF_FUNC_MISC_RD_F		0x1000
+#define SW64_BPF_FUNC_MISC_WR_F		0x1020
+
+/* SW64 LOCK instructions function codes */
+#define SW64_BPF_FUNC_LOCK_LLDW		0x0
+#define SW64_BPF_FUNC_LOCK_LLDL		0x1
+#define SW64_BPF_FUNC_LOCK_LSTW		0x8
+#define SW64_BPF_FUNC_LOCK_LSTL		0x9
+
+/* SW64 ALU instructions function codes */
+#define SW64_BPF_FUNC_ALU_ADDW		0x00
+#define SW64_BPF_FUNC_ALU_SUBW		0x01
+#define SW64_BPF_FUNC_ALU_ADDL		0x08
+#define SW64_BPF_FUNC_ALU_SUBL		0x09
+#define SW64_BPF_FUNC_ALU_MULW		0x10
+#define SW64_BPF_FUNC_ALU_MULL		0x18
+#define SW64_BPF_FUNC_ALU_CMPEQ		0x28
+#define SW64_BPF_FUNC_ALU_CMPLT		0x29
+#define SW64_BPF_FUNC_ALU_CMPLE		0x2A
+#define SW64_BPF_FUNC_ALU_CMPULT	0x2B
+#define SW64_BPF_FUNC_ALU_CMPULE	0x2C
+#define SW64_BPF_FUNC_ALU_AND		0x38
+#define SW64_BPF_FUNC_ALU_BIC		0x39
+#define SW64_BPF_FUNC_ALU_BIS		0x3A
+#define SW64_BPF_FUNC_ALU_ORNOT		0x3B
+#define SW64_BPF_FUNC_ALU_XOR		0x3C
+#define SW64_BPF_FUNC_ALU_EQV		0x3D
+#define SW64_BPF_FUNC_ALU_SLL		0x48
+#define SW64_BPF_FUNC_ALU_SRL		0x49
+#define SW64_BPF_FUNC_ALU_SRA		0x4A
+#define SW64_BPF_FUNC_ALU_ZAP		0x68
+#define SW64_BPF_FUNC_ALU_ZAPNOT	0x69
+#define SW64_BPF_FUNC_ALU_SEXTB		0x6A
+#define SW64_BPF_FUNC_ALU_SEXTH		0x6B
+
+/* special instuction used in jit_fill_hole() */
+#define SW64_BPF_ILLEGAL_INSN	(0x1ff00000)	/* pri_ret/b	$31 */
+
+enum sw64_bpf_registers {
+	SW64_BPF_REG_V0		= 0,	/* keep return value */
+	SW64_BPF_REG_T0		= 1,
+	SW64_BPF_REG_T1		= 2,
+	SW64_BPF_REG_T2		= 3,
+	SW64_BPF_REG_T3		= 4,
+	SW64_BPF_REG_T4		= 5,
+	SW64_BPF_REG_T5		= 6,
+	SW64_BPF_REG_T6		= 7,
+	SW64_BPF_REG_T7		= 8,
+	SW64_BPF_REG_S0		= 9,	/* callee saved */
+	SW64_BPF_REG_S1		= 10,	/* callee saved */
+	SW64_BPF_REG_S2		= 11,	/* callee saved */
+	SW64_BPF_REG_S3		= 12,	/* callee saved */
+	SW64_BPF_REG_S4		= 13,	/* callee saved */
+	SW64_BPF_REG_S5		= 14,	/* callee saved */
+	SW64_BPF_REG_S6		= 15,	/* callee saved */
+	SW64_BPF_REG_FP		= 15,	/* frame pointer if necessary */
+	SW64_BPF_REG_A0		= 16,	/* argument 0 */
+	SW64_BPF_REG_A1		= 17,	/* argument 1 */
+	SW64_BPF_REG_A2		= 18,	/* argument 2 */
+	SW64_BPF_REG_A3		= 19,	/* argument 3 */
+	SW64_BPF_REG_A4		= 20,	/* argument 4 */
+	SW64_BPF_REG_A5		= 21,	/* argument 5 */
+	SW64_BPF_REG_T8		= 22,
+	SW64_BPF_REG_T9		= 23,
+	SW64_BPF_REG_T10	= 24,
+	SW64_BPF_REG_T11	= 25,
+	SW64_BPF_REG_RA		= 26,	/* callee saved, keep retuen address */
+	SW64_BPF_REG_T12	= 27,
+	SW64_BPF_REG_PV		= 27,
+	SW64_BPF_REG_AT		= 28,	/* reserved by assembler */
+	SW64_BPF_REG_GP		= 29,	/* global pointer */
+	SW64_BPF_REG_SP		= 30,	/* callee saved, stack pointer */
+	SW64_BPF_REG_ZR		= 31	/* read 0 */
+};
+
+/* SW64 load and store instructions */
+#define SW64_BPF_LDBU(dst, rb, offset16) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDBU, dst, rb, offset16)
+#define SW64_BPF_LDHU(dst, rb, offset16) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDHU, dst, rb, offset16)
+#define SW64_BPF_LDW(dst, rb, offset16) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDW, dst, rb, offset16)
+#define SW64_BPF_LDL(dst, rb, offset16) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDL, dst, rb, offset16)
+#define SW64_BPF_STB(src, rb, offset16) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_STB, src, rb, offset16)
+#define SW64_BPF_STH(src, rb, offset16) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_STH, src, rb, offset16)
+#define SW64_BPF_STW(src, rb, offset16) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_STW, src, rb, offset16)
+#define SW64_BPF_STL(src, rb, offset16) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_STL, src, rb, offset16)
+#define SW64_BPF_LDI(dst, rb, imm16) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDI, dst, rb, imm16)
+#define SW64_BPF_LDIH(dst, rb, imm16) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_LDIH, dst, rb, imm16)
+
+/* SW64 lock instructions */
+#define SW64_BPF_LLDW(ra, rb, offset16) \
+	sw64_bpf_gen_format_ls_func(SW64_BPF_OPCODE_LOCK, \
+			ra, rb, offset16, SW64_BPF_FUNC_LOCK_LLDW)
+#define SW64_BPF_LLDL(ra, rb, offset16) \
+	sw64_bpf_gen_format_ls_func(SW64_BPF_OPCODE_LOCK, \
+			ra, rb, offset16, SW64_BPF_FUNC_LOCK_LLDL)
+#define SW64_BPF_LSTW(ra, rb, offset16) \
+	sw64_bpf_gen_format_ls_func(SW64_BPF_OPCODE_LOCK, \
+			ra, rb, offset16, SW64_BPF_FUNC_LOCK_LSTW)
+#define SW64_BPF_LSTL(ra, rb, offset16) \
+	sw64_bpf_gen_format_ls_func(SW64_BPF_OPCODE_LOCK, \
+			ra, rb, offset16, SW64_BPF_FUNC_LOCK_LSTL)
+#define SW64_BPF_RD_F(ra) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_MISC, \
+			ra, SW64_BPF_REG_ZR, SW64_BPF_FUNC_MISC_RD_F)
+#define SW64_BPF_WR_F(ra) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_MISC, \
+			ra, SW64_BPF_REG_ZR, SW64_BPF_FUNC_MISC_WR_F)
+
+/* SW64 ALU instructions REG format */
+#define SW64_BPF_ADDW_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_ADDW)
+#define SW64_BPF_ADDL_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_ADDL)
+#define SW64_BPF_SUBW_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_SUBW)
+#define SW64_BPF_SUBL_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_SUBL)
+#define SW64_BPF_MULW_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_MULW)
+#define SW64_BPF_MULL_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_MULL)
+#define SW64_BPF_ZAP_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_ZAP)
+#define SW64_BPF_ZAPNOT_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_ZAPNOT)
+#define SW64_BPF_SEXTB_REG(rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			SW64_BPF_REG_ZR, rb, dst, SW64_BPF_FUNC_ALU_SEXTB)
+#define SW64_BPF_SEXTH_REG(rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			SW64_BPF_REG_ZR, rb, dst, SW64_BPF_FUNC_ALU_SEXTH)
+
+/* SW64 ALU instructions IMM format */
+#define SW64_BPF_ADDW_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_ADDW)
+#define SW64_BPF_ADDL_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_ADDL)
+#define SW64_BPF_SUBW_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_SUBW)
+#define SW64_BPF_SUBL_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_SUBL)
+#define SW64_BPF_MULW_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_MULW)
+#define SW64_BPF_MULL_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_MULL)
+#define SW64_BPF_ZAP_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_ZAP)
+#define SW64_BPF_ZAPNOT_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_ZAPNOT)
+#define SW64_BPF_SEXTB_IMM(imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			SW64_BPF_REG_ZR, imm8, dst, SW64_BPF_FUNC_ALU_SEXTB)
+#define SW64_BPF_SEXTH_IMM(imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			SW64_BPF_REG_ZR, imm8, dst, SW64_BPF_FUNC_ALU_SEXTH)
+
+/* SW64 bit shift instructions REG format */
+#define SW64_BPF_SLL_REG(src, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			src, rb, dst, SW64_BPF_FUNC_ALU_SLL)
+#define SW64_BPF_SRL_REG(src, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			src, rb, dst, SW64_BPF_FUNC_ALU_SRL)
+#define SW64_BPF_SRA_REG(src, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			src, rb, dst, SW64_BPF_FUNC_ALU_SRA)
+
+/* SW64 bit shift instructions IMM format */
+#define SW64_BPF_SLL_IMM(src, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			src, imm8, dst, SW64_BPF_FUNC_ALU_SLL)
+#define SW64_BPF_SRL_IMM(src, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			src, imm8, dst, SW64_BPF_FUNC_ALU_SRL)
+#define SW64_BPF_SRA_IMM(src, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			src, imm8, dst, SW64_BPF_FUNC_ALU_SRA)
+
+/* SW64 control instructions */
+#define SW64_BPF_CALL(ra, rb) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_CALL, ra, rb, 0)
+#define SW64_BPF_RET(rb) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_RET, SW64_BPF_REG_ZR, rb, 0)
+#define SW64_BPF_JMP(ra, rb) \
+	sw64_bpf_gen_format_ls(SW64_BPF_OPCODE_JMP, ra, rb, 0)
+#define SW64_BPF_BR(ra, offset) \
+	sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BR, ra, offset)
+#define SW64_BPF_BSR(ra, offset) \
+	sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BSR, ra, offset)
+#define SW64_BPF_BEQ(ra, offset) \
+	sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BEQ, ra, offset)
+#define SW64_BPF_BNE(ra, offset) \
+	sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BNE, ra, offset)
+#define SW64_BPF_BLT(ra, offset) \
+	sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BLT, ra, offset)
+#define SW64_BPF_BLE(ra, offset) \
+	sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BLE, ra, offset)
+#define SW64_BPF_BGT(ra, offset) \
+	sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BGT, ra, offset)
+#define SW64_BPF_BGE(ra, offset) \
+	sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BGE, ra, offset)
+#define SW64_BPF_BLBC(ra, offset) \
+	sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BLBC, ra, offset)
+#define SW64_BPF_BLBS(ra, offset) \
+	sw64_bpf_gen_format_br(SW64_BPF_OPCODE_BLBS, ra, offset)
+
+/* SW64 bit logic instructions REG format */
+#define SW64_BPF_AND_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_AND)
+#define SW64_BPF_ANDNOT_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_BIC)
+#define SW64_BPF_BIS_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_BIS)
+#define SW64_BPF_ORNOT_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_ORNOT)
+#define SW64_BPF_XOR_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_XOR)
+#define SW64_BPF_EQV_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_EQV)
+
+/* SW64 bit logic instructions IMM format */
+#define SW64_BPF_AND_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_AND)
+#define SW64_BPF_ANDNOT_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_BIC)
+#define SW64_BPF_BIS_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_BIS)
+#define SW64_BPF_ORNOT_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_ORNOT)
+#define SW64_BPF_XOR_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_XOR)
+#define SW64_BPF_EQV_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_EQV)
+
+/* SW64 compare instructions REG format */
+#define SW64_BPF_CMPEQ_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_CMPEQ)
+#define SW64_BPF_CMPLT_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_CMPLT)
+#define SW64_BPF_CMPLE_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_CMPLE)
+#define SW64_BPF_CMPULT_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_CMPULT)
+#define SW64_BPF_CMPULE_REG(ra, rb, dst) \
+	sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \
+			ra, rb, dst, SW64_BPF_FUNC_ALU_CMPULE)
+
+/* SW64 compare instructions imm format */
+#define SW64_BPF_CMPEQ_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPEQ)
+#define SW64_BPF_CMPLT_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPLT)
+#define SW64_BPF_CMPLE_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPLE)
+#define SW64_BPF_CMPULT_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPULT)
+#define SW64_BPF_CMPULE_IMM(ra, imm8, dst) \
+	sw64_bpf_gen_format_simple_alu_imm(SW64_BPF_OPCODE_ALU_IMM, \
+			ra, imm8, dst, SW64_BPF_FUNC_ALU_CMPULE)
+
+#endif /* _SW64_NET_BPF_JIT_H */
diff --git a/arch/sw_64/net/bpf_jit_comp.c b/arch/sw_64/net/bpf_jit_comp.c
new file mode 100644
index 0000000000000000000000000000000000000000..31202dd0f9cf8dd8fd51d0d30c94ea422d74c8b7
--- /dev/null
+++ b/arch/sw_64/net/bpf_jit_comp.c
@@ -0,0 +1,1455 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * BPF JIT compiler for SW64
+ *
+ * Copyright (C) Mao Minkai
+ * Author: Mao Minkai
+ *
+ * This file is taken from arch/arm64/net/bpf_jit_comp.c
+ *	Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/printk.h>
+
+#include <asm/cacheflush.h>
+
+#include "bpf_jit.h"
+
+#define TCALL_CNT (MAX_BPF_JIT_REG + 0)
+
+static const int bpf2sw64[] = {
+	/* return value from in-kernel function, and exit value from eBPF */
+	[BPF_REG_0] = SW64_BPF_REG_V0,
+	/* arguments from eBPF program to in-kernel function */
+	[BPF_REG_1] = SW64_BPF_REG_A0,
+	[BPF_REG_2] = SW64_BPF_REG_A1,
+	[BPF_REG_3] = SW64_BPF_REG_A2,
+	[BPF_REG_4] = SW64_BPF_REG_A3,
+	[BPF_REG_5] = SW64_BPF_REG_A4,
+	/* callee saved registers that in-kernel function will preserve */
+	[BPF_REG_6] = SW64_BPF_REG_S0,
+	[BPF_REG_7] = SW64_BPF_REG_S1,
+	[BPF_REG_8] = SW64_BPF_REG_S2,
+	[BPF_REG_9] = SW64_BPF_REG_S3,
+	/* read-only frame pointer to access stack */
+	[BPF_REG_FP] = SW64_BPF_REG_FP,
+	/* tail_call_cnt */
+	[TCALL_CNT] = SW64_BPF_REG_S4,
+	/* temporary register for blinding constants */
+	[BPF_REG_AX] = SW64_BPF_REG_T11,
+};
+
+struct jit_ctx {
+	const struct bpf_prog *prog;
+	int idx;		// JITed instruction index
+	int current_tmp_reg;
+	int epilogue_offset;
+	int *insn_offset;	// [bpf_insn_idx] = jited_insn_idx
+	int exentry_idx;
+	u32 *image;		// JITed instruction
+	u32 stack_size;
+};
+
+struct sw64_jit_data {
+	struct bpf_binary_header *header;
+	u8 *image;	// bpf instruction
+	struct jit_ctx ctx;
+};
+
+static inline u32 sw64_bpf_gen_format_br(int opcode, enum sw64_bpf_registers ra, u32 disp)
+{
+	opcode = opcode << SW64_BPF_OPCODE_OFFSET;
+	ra = ra << SW64_BPF_RA_OFFSET;
+	return opcode | ra | (disp & 0x1fffff);
+}
+
+static inline u32 sw64_bpf_gen_format_ls(int opcode, enum sw64_bpf_registers ra,
+		enum sw64_bpf_registers rb, u16 disp)
+{
+	opcode = opcode << SW64_BPF_OPCODE_OFFSET;
+	ra = ra << SW64_BPF_RA_OFFSET;
+	rb = rb << SW64_BPF_RB_OFFSET;
+	return opcode | ra | rb | (disp & 0xffff);
+}
+
+static inline u32 sw64_bpf_gen_format_ls_func(int opcode, enum sw64_bpf_registers ra,
+		enum sw64_bpf_registers rb, u16 disp, int function)
+{
+	opcode = opcode << SW64_BPF_OPCODE_OFFSET;
+	ra = ra << SW64_BPF_RA_OFFSET;
+	rb = rb << SW64_BPF_RB_OFFSET;
+	function = function << SW64_BPF_LS_FUNC_OFFSET;
+	return opcode | ra | rb | function | (disp & 0xfff);
+}
+
+static inline u32 sw64_bpf_gen_format_simple_alu_reg(int opcode, enum sw64_bpf_registers ra,
+		enum sw64_bpf_registers rb, enum sw64_bpf_registers rc, int function)
+{
+	opcode = opcode << SW64_BPF_OPCODE_OFFSET;
+	ra = ra << SW64_BPF_RA_OFFSET;
+	rb = rb << SW64_BPF_RB_OFFSET;
+	rc = rc << SW64_BPF_SIMPLE_ALU_RC_OFFSET;
+	function = function << SW64_BPF_SIMPLE_ALU_FUNC_OFFSET;
+	return opcode | ra | rb | function | rc;
+}
+
+static inline u32 sw64_bpf_gen_format_simple_alu_imm(int opcode, enum sw64_bpf_registers ra,
+		u32 imm, enum sw64_bpf_registers rc, int function)
+{
+	opcode = opcode << SW64_BPF_OPCODE_OFFSET;
+	ra = ra << SW64_BPF_RA_OFFSET;
+	imm = (imm & 0xff) << SW64_BPF_SIMPLE_ALU_IMM_OFFSET;
+	rc = rc << SW64_BPF_SIMPLE_ALU_RC_OFFSET;
+	function = function << SW64_BPF_SIMPLE_ALU_FUNC_OFFSET;
+	return opcode | ra | imm | function | rc;
+}
+
+static inline void emit(const u32 insn, struct jit_ctx *ctx)
+{
+	if (ctx->image != NULL)
+		ctx->image[ctx->idx] = insn;
+
+	ctx->idx++;
+}
+
+static inline int get_tmp_reg(struct jit_ctx *ctx)
+{
+	ctx->current_tmp_reg++;
+	/* Do not use 22-25. Should be more than enough. */
+	if (unlikely(ctx->current_tmp_reg == 8)) {
+		pr_err("eBPF JIT %s[%d]: not enough temporary registers!\n",
+				current->comm, current->pid);
+		return -1;
+	}
+	return ctx->current_tmp_reg;
+}
+
+static inline void put_tmp_reg(struct jit_ctx *ctx)
+{
+	ctx->current_tmp_reg--;
+	if (ctx->current_tmp_reg == 21)
+		ctx->current_tmp_reg = 7;
+}
+
+static void emit_sw64_ldu32(const int dst, const u32 imm, struct jit_ctx *ctx)
+{
+	u16 imm_tmp;
+	u8 reg_tmp = get_tmp_reg(ctx);
+
+	if (!imm) {
+		emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, dst), ctx);
+		put_tmp_reg(ctx);
+		return;
+	}
+
+	if (imm <= S16_MAX) {
+		emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx);
+		put_tmp_reg(ctx);
+		return;
+	}
+
+	if (imm >= U32_MAX - S16_MAX) {
+		emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx);
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		put_tmp_reg(ctx);
+		return;
+	}
+
+	imm_tmp = (imm >> 30) & 3;
+	emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm_tmp), ctx);
+	if (imm_tmp)
+		emit(SW64_BPF_SLL_IMM(dst, 30, dst), ctx);
+
+	imm_tmp = (imm >> 15) & 0x7fff;
+	if (imm_tmp) {
+		emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx);
+		emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx);
+		emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx);
+	}
+
+	imm_tmp = imm & 0x7fff;
+	if (imm_tmp)
+		emit(SW64_BPF_LDI(dst, dst, imm_tmp), ctx);
+
+	put_tmp_reg(ctx);
+}
+
+static void emit_sw64_lds32(const int dst, const s32 imm, struct jit_ctx *ctx)
+{
+	s16 hi = imm >> 16;
+	s16 lo = imm & 0xffff;
+	u8 reg_tmp = get_tmp_reg(ctx);
+
+	if (!imm) {
+		emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, dst), ctx);
+		put_tmp_reg(ctx);
+		return;
+	}
+
+	if (imm >= S16_MIN && imm <= S16_MAX) {
+		emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx);
+		put_tmp_reg(ctx);
+		return;
+	}
+
+	emit(SW64_BPF_LDIH(dst, SW64_BPF_REG_ZR, hi), ctx);
+	if (lo & 0x8000) {	// sign bit is 1
+		lo = lo & 0x7fff;
+		emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, 1), ctx);
+		emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx);
+		emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx);
+		if (lo)
+			emit(SW64_BPF_LDI(dst, dst, lo), ctx);
+	} else {	// sign bit is 0
+		if (lo)
+			emit(SW64_BPF_LDI(dst, dst, lo), ctx);
+	}
+
+	put_tmp_reg(ctx);
+}
+
+static void emit_sw64_ldu64(const int dst, const u64 imm, struct jit_ctx *ctx)
+{
+	u16 imm_tmp;
+	u8 reg_tmp = get_tmp_reg(ctx);
+
+	if (!imm) {
+		emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, dst), ctx);
+		put_tmp_reg(ctx);
+		return;
+	}
+
+	if (imm <= U32_MAX) {
+		put_tmp_reg(ctx);
+		return emit_sw64_ldu32(dst, (u32)imm, ctx);
+	}
+
+	if (imm >= (U64_MAX - S16_MAX) || imm <= S16_MAX) {
+		emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx);
+		put_tmp_reg(ctx);
+		return;
+	}
+
+	imm_tmp = (imm >> 60) & 0xf;
+	emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm_tmp), ctx);
+	if (imm_tmp)
+		emit(SW64_BPF_SLL_IMM(dst, 60, dst), ctx);
+
+	imm_tmp = (imm >> 45) & 0x7fff;
+	if (imm_tmp) {
+		emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx);
+		emit(SW64_BPF_SLL_IMM(reg_tmp, 45, reg_tmp), ctx);
+		emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx);
+	}
+
+	imm_tmp = (imm >> 30) & 0x7fff;
+	if (imm_tmp) {
+		emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx);
+		emit(SW64_BPF_SLL_IMM(reg_tmp, 30, reg_tmp), ctx);
+		emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx);
+	}
+
+	imm_tmp = (imm >> 15) & 0x7fff;
+	if (imm_tmp) {
+		emit(SW64_BPF_LDI(reg_tmp, SW64_BPF_REG_ZR, imm_tmp), ctx);
+		emit(SW64_BPF_SLL_IMM(reg_tmp, 15, reg_tmp), ctx);
+		emit(SW64_BPF_ADDL_REG(dst, reg_tmp, dst), ctx);
+	}
+
+	imm_tmp = imm & 0x7fff;
+	if (imm_tmp)
+		emit(SW64_BPF_LDI(dst, dst, imm_tmp), ctx);
+
+	put_tmp_reg(ctx);
+}
+
+/* Do not change!!! See arch/sw_64/lib/divide.S for more detail */
+#define REG(x)		"$"str(x)
+#define str(x)		#x
+#define DIV_RET_ADDR	23
+#define DIVIDEND	24
+#define DIVISOR		25
+#define RESULT		27
+
+#include <asm/asm-prototypes.h>
+static void emit_sw64_divmod(const int dst, const int src, struct jit_ctx *ctx, u8 code)
+{
+	emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, dst, DIVIDEND), ctx);
+	emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, src, DIVISOR), ctx);
+	switch (BPF_CLASS(code)) {
+	case BPF_ALU:
+		switch (BPF_OP(code)) {
+		case BPF_DIV:
+			emit_sw64_ldu64(SW64_BPF_REG_PV, (u64)__divwu, ctx);
+			break;
+		case BPF_MOD:
+			emit_sw64_ldu64(SW64_BPF_REG_PV, (u64)__remwu, ctx);
+			break;
+		}
+		emit(SW64_BPF_CALL(DIV_RET_ADDR, SW64_BPF_REG_PV), ctx);
+		emit(SW64_BPF_ZAP_IMM(RESULT, 0xf0, dst), ctx);
+		break;
+	case BPF_ALU64:
+		switch (BPF_OP(code)) {
+		case BPF_DIV:
+			emit_sw64_ldu64(SW64_BPF_REG_PV, (u64)__divlu, ctx);
+			break;
+		case BPF_MOD:
+			emit_sw64_ldu64(SW64_BPF_REG_PV, (u64)__remlu, ctx);
+			break;
+		}
+		emit(SW64_BPF_CALL(DIV_RET_ADDR, SW64_BPF_REG_PV), ctx);
+		emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, RESULT, dst), ctx);
+		break;
+	}
+}
+
+#undef REG
+#undef str
+#undef DIVIDEND
+#undef DIVISOR
+#undef RESULT
+
+/* STX XADD: lock *(u32 *)(dst + off) += src */
+static void emit_sw64_xadd32(const int src, int dst, s16 off, struct jit_ctx *ctx)
+{
+	int atomic_start;
+	int atomic_end;
+	u8 tmp1 = get_tmp_reg(ctx);
+	u8 tmp2 = get_tmp_reg(ctx);
+	u8 tmp3 = get_tmp_reg(ctx);
+
+	if (off < -0x800 || off > 0x7ff) {
+		emit(SW64_BPF_LDI(tmp1, dst, off), ctx);
+		dst = tmp1;
+		off = 0;
+	}
+
+	atomic_start = ctx->idx;
+	emit(SW64_BPF_LLDW(tmp2, dst, off), ctx);
+	emit(SW64_BPF_LDI(tmp3, SW64_BPF_REG_ZR, 1), ctx);
+	emit(SW64_BPF_WR_F(tmp3), ctx);
+	emit(SW64_BPF_ADDW_REG(tmp2, src, tmp2), ctx);
+	if (ctx->idx & 1)
+		emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx);
+	emit(SW64_BPF_LSTW(tmp2, dst, off), ctx);
+	emit(SW64_BPF_RD_F(tmp3), ctx);
+	atomic_end = ctx->idx;
+	emit(SW64_BPF_BEQ(tmp3, atomic_start - atomic_end - 1), ctx);
+
+	put_tmp_reg(ctx);
+	put_tmp_reg(ctx);
+	put_tmp_reg(ctx);
+}
+
+/* STX XADD: lock *(u64 *)(dst + off) += src */
+static void emit_sw64_xadd64(const int src, int dst, s16 off, struct jit_ctx *ctx)
+{
+	int atomic_start;
+	int atomic_end;
+	u8 tmp1 = get_tmp_reg(ctx);
+	u8 tmp2 = get_tmp_reg(ctx);
+	u8 tmp3 = get_tmp_reg(ctx);
+
+	if (off < -0x800 || off > 0x7ff) {
+		emit(SW64_BPF_LDI(tmp1, dst, off), ctx);
+		dst = tmp1;
+		off = 0;
+	}
+
+	atomic_start = ctx->idx;
+	emit(SW64_BPF_LLDL(tmp2, dst, off), ctx);
+	emit(SW64_BPF_LDI(tmp3, SW64_BPF_REG_ZR, 1), ctx);
+	emit(SW64_BPF_WR_F(tmp3), ctx);
+	emit(SW64_BPF_ADDL_REG(tmp2, src, tmp2), ctx);
+	if (ctx->idx & 1)
+		emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx);
+	emit(SW64_BPF_LSTL(tmp2, dst, off), ctx);
+	emit(SW64_BPF_RD_F(tmp3), ctx);
+	atomic_end = ctx->idx;
+	emit(SW64_BPF_BEQ(tmp3, atomic_start - atomic_end - 1), ctx);
+
+	put_tmp_reg(ctx);
+	put_tmp_reg(ctx);
+	put_tmp_reg(ctx);
+}
+
+static void emit_sw64_htobe16(const int dst, struct jit_ctx *ctx)
+{
+	u8 tmp = get_tmp_reg(ctx);
+
+	emit(SW64_BPF_ZAPNOT_IMM(dst, 0x2, tmp), ctx);
+	emit(SW64_BPF_ZAPNOT_IMM(dst, 0x1, dst), ctx);
+	emit(SW64_BPF_SRL_IMM(tmp, 8, tmp), ctx);
+	emit(SW64_BPF_SLL_IMM(dst, 8, dst), ctx);
+	emit(SW64_BPF_BIS_REG(dst, tmp, dst), ctx);
+
+	put_tmp_reg(ctx);
+}
+
+static void emit_sw64_htobe32(const int dst, struct jit_ctx *ctx)
+{
+	u8 tmp1 = get_tmp_reg(ctx);
+	u8 tmp2 = get_tmp_reg(ctx);
+
+	emit(SW64_BPF_ZAPNOT_IMM(dst, 0x8, tmp1), ctx);
+	emit(SW64_BPF_SRL_IMM(tmp1, 24, tmp2), ctx);
+
+	emit(SW64_BPF_ZAPNOT_IMM(dst, 0x4, tmp1), ctx);
+	emit(SW64_BPF_SRL_IMM(tmp1, 8, tmp1), ctx);
+	emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx);
+
+	emit(SW64_BPF_ZAPNOT_IMM(dst, 0x2, tmp1), ctx);
+	emit(SW64_BPF_SLL_IMM(tmp1, 8, tmp1), ctx);
+	emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx);
+
+	emit(SW64_BPF_ZAPNOT_IMM(dst, 0x1, dst), ctx);
+	emit(SW64_BPF_SLL_IMM(dst, 24, dst), ctx);
+	emit(SW64_BPF_BIS_REG(dst, tmp2, dst), ctx);
+
+	put_tmp_reg(ctx);
+	put_tmp_reg(ctx);
+}
+
+static void emit_sw64_htobe64(const int dst, struct jit_ctx *ctx)
+{
+	u8 tmp1 = get_tmp_reg(ctx);
+	u8 tmp2 = get_tmp_reg(ctx);
+
+	emit(SW64_BPF_ZAPNOT_IMM(dst, 0x80, tmp1), ctx);
+	emit(SW64_BPF_SRL_IMM(tmp1, 56, tmp2), ctx);
+
+	emit(SW64_BPF_ZAPNOT_IMM(dst, 0x40, tmp1), ctx);
+	emit(SW64_BPF_SRL_IMM(tmp1, 40, tmp1), ctx);
+	emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx);
+
+	emit(SW64_BPF_ZAPNOT_IMM(dst, 0x20, tmp1), ctx);
+	emit(SW64_BPF_SRL_IMM(tmp1, 24, tmp1), ctx);
+	emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx);
+
+	emit(SW64_BPF_ZAPNOT_IMM(dst, 0x10, tmp1), ctx);
+	emit(SW64_BPF_SRL_IMM(tmp1, 8, tmp1), ctx);
+	emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx);
+
+	emit(SW64_BPF_ZAPNOT_IMM(dst, 0x08, tmp1), ctx);
+	emit(SW64_BPF_SLL_IMM(tmp1, 8, tmp1), ctx);
+	emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx);
+
+	emit(SW64_BPF_ZAPNOT_IMM(dst, 0x04, tmp1), ctx);
+	emit(SW64_BPF_SLL_IMM(tmp1, 24, tmp1), ctx);
+	emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx);
+
+	emit(SW64_BPF_ZAPNOT_IMM(dst, 0x02, tmp1), ctx);
+	emit(SW64_BPF_SLL_IMM(tmp1, 40, tmp1), ctx);
+	emit(SW64_BPF_BIS_REG(tmp2, tmp1, tmp2), ctx);
+
+	emit(SW64_BPF_ZAPNOT_IMM(dst, 0x01, dst), ctx);
+	emit(SW64_BPF_SLL_IMM(dst, 56, dst), ctx);
+	emit(SW64_BPF_BIS_REG(dst, tmp2, dst), ctx);
+
+	put_tmp_reg(ctx);
+	put_tmp_reg(ctx);
+}
+
+static void jit_fill_hole(void *area, unsigned int size)
+{
+	unsigned long c = SW64_BPF_ILLEGAL_INSN;
+
+	c |= c << 32;
+	__constant_c_memset(area, c, size);
+}
+
+static int offset_to_epilogue(const struct jit_ctx *ctx);
+static int bpf2sw64_offset(int bpf_idx, s32 off, const struct jit_ctx *ctx)
+{
+	int from = ctx->insn_offset[bpf_idx + 1];
+	int to = ctx->insn_offset[bpf_idx + 1 + off];
+
+	if (ctx->image == NULL)
+		return 0;
+
+	return to - from;
+}
+
+static int offset_to_epilogue(const struct jit_ctx *ctx)
+{
+	if (ctx->image == NULL)
+		return 0;
+
+	return ctx->epilogue_offset - ctx->idx;
+}
+
+/* For tail call, jump to set up function call stack */
+#define PROLOGUE_OFFSET	11
+
+static void build_prologue(struct jit_ctx *ctx, bool was_classic)
+{
+	const u8 r6 = bpf2sw64[BPF_REG_6];
+	const u8 r7 = bpf2sw64[BPF_REG_7];
+	const u8 r8 = bpf2sw64[BPF_REG_8];
+	const u8 r9 = bpf2sw64[BPF_REG_9];
+	const u8 fp = bpf2sw64[BPF_REG_FP];
+	const u8 tcc = bpf2sw64[TCALL_CNT];
+
+	/* Save callee-saved registers */
+	emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, -64), ctx);
+	emit(SW64_BPF_STL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 0), ctx);
+	emit(SW64_BPF_STL(fp, SW64_BPF_REG_SP, 8), ctx);
+	emit(SW64_BPF_STL(r6, SW64_BPF_REG_SP, 16), ctx);
+	emit(SW64_BPF_STL(r7, SW64_BPF_REG_SP, 24), ctx);
+	emit(SW64_BPF_STL(r8, SW64_BPF_REG_SP, 32), ctx);
+	emit(SW64_BPF_STL(r9, SW64_BPF_REG_SP, 40), ctx);
+	emit(SW64_BPF_STL(tcc, SW64_BPF_REG_SP, 48), ctx);
+	emit(SW64_BPF_STL(SW64_BPF_REG_GP, SW64_BPF_REG_SP, 56), ctx);
+
+	/* Set up BPF prog stack base register */
+	emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_SP, fp), ctx);
+	if (!was_classic)
+		/* Initialize tail_call_cnt */
+		emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, tcc), ctx);
+
+	/* Set up function call stack */
+	ctx->stack_size = (ctx->prog->aux->stack_depth + 15) & (~15);
+	emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, -ctx->stack_size), ctx);
+}
+
+static void build_epilogue(struct jit_ctx *ctx)
+{
+	const u8 r6 = bpf2sw64[BPF_REG_6];
+	const u8 r7 = bpf2sw64[BPF_REG_7];
+	const u8 r8 = bpf2sw64[BPF_REG_8];
+	const u8 r9 = bpf2sw64[BPF_REG_9];
+	const u8 fp = bpf2sw64[BPF_REG_FP];
+	const u8 tcc = bpf2sw64[TCALL_CNT];
+
+	/* Destroy function call stack */
+	emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, ctx->stack_size), ctx);
+
+	/* Restore callee-saved registers */
+	emit(SW64_BPF_LDL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 0), ctx);
+	emit(SW64_BPF_LDL(fp, SW64_BPF_REG_SP, 8), ctx);
+	emit(SW64_BPF_LDL(r6, SW64_BPF_REG_SP, 16), ctx);
+	emit(SW64_BPF_LDL(r7, SW64_BPF_REG_SP, 24), ctx);
+	emit(SW64_BPF_LDL(r8, SW64_BPF_REG_SP, 32), ctx);
+	emit(SW64_BPF_LDL(r9, SW64_BPF_REG_SP, 40), ctx);
+	emit(SW64_BPF_LDL(tcc, SW64_BPF_REG_SP, 48), ctx);
+	emit(SW64_BPF_LDL(SW64_BPF_REG_GP, SW64_BPF_REG_SP, 56), ctx);
+	emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, 64), ctx);
+
+	/* Return */
+	emit(SW64_BPF_RET(SW64_BPF_REG_RA), ctx);
+}
+
+static int emit_bpf_tail_call(struct jit_ctx *ctx)
+{
+	/* bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) */
+	const u8 r2 = bpf2sw64[BPF_REG_2];	/* struct bpf_array *array */
+	const u8 r3 = bpf2sw64[BPF_REG_3];	/* u32 index */
+
+	const u8 tmp = get_tmp_reg(ctx);
+	const u8 prg = get_tmp_reg(ctx);
+	const u8 tcc = bpf2sw64[TCALL_CNT];
+	u64 offset;
+	static int out_idx;
+#define out_offset	(ctx->image ? (out_idx - ctx->idx - 1) : 0)
+
+	/* if (index >= array->map.max_entries)
+	 *     goto out;
+	 */
+	offset = offsetof(struct bpf_array, map.max_entries);
+	emit_sw64_ldu64(tmp, offset, ctx);
+	emit(SW64_BPF_ADDL_REG(r2, tmp, tmp), ctx);	/* tmp = r2 + tmp = &map.max_entries */
+	emit(SW64_BPF_LDW(tmp, tmp, 0), ctx);		/* tmp = *tmp = map.max_entries */
+	emit(SW64_BPF_ZAP_IMM(tmp, 0xf0, tmp), ctx);	/* map.max_entries is u32 */
+	emit(SW64_BPF_ZAP_IMM(r3, 0xf0, r3), ctx);	/* index is u32 */
+	emit(SW64_BPF_CMPULE_REG(tmp, r3, tmp), ctx);
+	emit(SW64_BPF_BNE(tmp, out_offset), ctx);
+
+	/* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+	 *     goto out;
+	 * tail_call_cnt++;
+	 */
+	emit_sw64_ldu64(tmp, MAX_TAIL_CALL_CNT, ctx);
+	emit(SW64_BPF_CMPULT_REG(tmp, tcc, tmp), ctx);
+	emit(SW64_BPF_BNE(tmp, out_offset), ctx);
+	emit(SW64_BPF_ADDL_IMM(tcc, 1, tcc), ctx);
+
+	/* prog = array->ptrs[index];
+	 * if (prog == NULL)
+	 *     goto out;
+	 */
+	offset = offsetof(struct bpf_array, ptrs);
+	emit_sw64_ldu64(tmp, offset, ctx);
+	emit(SW64_BPF_ADDL_REG(r2, tmp, tmp), ctx);	/* tmp = r2 + tmp = &ptrs[0] */
+	emit(SW64_BPF_SLL_IMM(r3, 3, prg), ctx);	/* prg = r3 * 8, each entry is a pointer */
+	emit(SW64_BPF_ADDL_REG(tmp, prg, prg), ctx);	/* prg = tmp + prg = &ptrs[index] */
+	emit(SW64_BPF_LDL(prg, prg, 0), ctx);		/* prg = *prg = ptrs[index] = prog */
+	emit(SW64_BPF_BEQ(prg, out_offset), ctx);
+
+	/* goto *(prog->bpf_func + prologue_offset); */
+	offset = offsetof(struct bpf_prog, bpf_func);
+	emit_sw64_ldu64(tmp, offset, ctx);
+	emit(SW64_BPF_ADDL_REG(prg, tmp, tmp), ctx);	/* tmp = prg + tmp = &bpf_func */
+	emit(SW64_BPF_LDL(tmp, tmp, 0), ctx);		/* tmp = *tmp = bpf_func */
+	emit(SW64_BPF_BEQ(tmp, out_offset), ctx);
+	emit(SW64_BPF_LDI(tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx);
+	emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, ctx->stack_size), ctx);
+	emit(SW64_BPF_JMP(SW64_BPF_REG_ZR, tmp), ctx);
+
+	put_tmp_reg(ctx);
+	put_tmp_reg(ctx);
+
+	/* out */
+	if (ctx->image == NULL)
+		out_idx = ctx->idx;
+	if (ctx->image != NULL && out_idx <= 0)
+		return -1;
+#undef out_offset
+	return 0;
+}
+
+/* For accesses to BTF pointers, add an entry to the exception table */
+static int add_exception_handler(const struct bpf_insn *insn,
+				 struct jit_ctx *ctx,
+				 int dst_reg)
+{
+	off_t offset;
+	unsigned long pc;
+	struct exception_table_entry *ex;
+
+	if (!ctx->image)
+		/* First pass */
+		return 0;
+
+	if (!ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM)
+		return 0;
+
+	if (WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries))
+		return -EINVAL;
+
+	ex = &ctx->prog->aux->extable[ctx->exentry_idx];
+	pc = (unsigned long)&ctx->image[ctx->idx - 1];
+
+	offset = (long)&ex->insn - pc;
+	ex->insn = offset;
+
+	ex->fixup.bits.nextinsn = sizeof(u32);
+	ex->fixup.bits.valreg = dst_reg;
+	ex->fixup.bits.errreg = SW64_BPF_REG_ZR;
+
+	ctx->exentry_idx++;
+	return 0;
+}
+
+/* JITs an eBPF instruction.
+ * Returns:
+ * 0  - successfully JITed an 8-byte eBPF instruction.
+ * >0 - successfully JITed a 16-byte eBPF instruction.
+ * <0 - failed to JIT.
+ */
+static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
+{
+	const u8 code = insn->code;
+	u8 dst = bpf2sw64[insn->dst_reg];
+	u8 src = bpf2sw64[insn->src_reg];
+	const u8 tmp1 __maybe_unused = get_tmp_reg(ctx);
+	const u8 tmp2 __maybe_unused = get_tmp_reg(ctx);
+	const s16 off = insn->off;
+	const s32 imm = insn->imm;
+	const int bpf_idx = insn - ctx->prog->insnsi;
+	s32 jmp_offset;
+	u64 func;
+	struct bpf_insn insn1;
+	u64 imm64;
+	int ret;
+
+	switch (code) {
+	case BPF_ALU | BPF_MOV | BPF_X:
+		emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, src, dst), ctx);
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_MOV | BPF_X:
+		emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, src, dst), ctx);
+		break;
+	case BPF_ALU | BPF_ADD | BPF_X:
+		emit(SW64_BPF_ADDW_REG(dst, src, dst), ctx);
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_ADD | BPF_X:
+		emit(SW64_BPF_ADDL_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU | BPF_SUB | BPF_X:
+		emit(SW64_BPF_SUBW_REG(dst, src, dst), ctx);
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_SUB | BPF_X:
+		emit(SW64_BPF_SUBL_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU | BPF_MUL | BPF_X:
+		emit(SW64_BPF_MULW_REG(dst, src, dst), ctx);
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_MUL | BPF_X:
+		emit(SW64_BPF_MULL_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU | BPF_DIV | BPF_X:
+		emit_sw64_divmod(dst, src, ctx, code);
+		break;
+	case BPF_ALU64 | BPF_DIV | BPF_X:
+		emit_sw64_divmod(dst, src, ctx, code);
+		break;
+	case BPF_ALU | BPF_MOD | BPF_X:
+		emit_sw64_divmod(dst, src, ctx, code);
+		break;
+	case BPF_ALU64 | BPF_MOD | BPF_X:
+		emit_sw64_divmod(dst, src, ctx, code);
+		break;
+	case BPF_ALU | BPF_LSH | BPF_X:
+		emit(SW64_BPF_SLL_REG(dst, src, dst), ctx);
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_LSH | BPF_X:
+		emit(SW64_BPF_SLL_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU | BPF_RSH | BPF_X:
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		fallthrough;
+	case BPF_ALU64 | BPF_RSH | BPF_X:
+		emit(SW64_BPF_SRL_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU | BPF_ARSH | BPF_X:
+		emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, dst, dst), ctx);
+		emit(SW64_BPF_SRA_REG(dst, src, dst), ctx);
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_ARSH | BPF_X:
+		emit(SW64_BPF_SRA_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU | BPF_AND | BPF_X:
+		emit(SW64_BPF_AND_REG(dst, src, dst), ctx);
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_AND | BPF_X:
+		emit(SW64_BPF_AND_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU | BPF_OR | BPF_X:
+		emit(SW64_BPF_BIS_REG(dst, src, dst), ctx);
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_OR | BPF_X:
+		emit(SW64_BPF_BIS_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU | BPF_XOR | BPF_X:
+		emit(SW64_BPF_XOR_REG(dst, src, dst), ctx);
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_XOR | BPF_X:
+		emit(SW64_BPF_XOR_REG(dst, src, dst), ctx);
+		break;
+	case BPF_ALU | BPF_NEG:
+		emit(SW64_BPF_SUBW_REG(SW64_BPF_REG_ZR, dst, dst), ctx);
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_NEG:
+		emit(SW64_BPF_SUBL_REG(SW64_BPF_REG_ZR, dst, dst), ctx);
+		break;
+	case BPF_ALU | BPF_END | BPF_TO_LE:
+		switch (imm) {
+		case 16:
+			emit(SW64_BPF_ZAPNOT_IMM(dst, 0x3, dst), ctx);
+			break;
+		case 32:
+			emit(SW64_BPF_ZAPNOT_IMM(dst, 0xf, dst), ctx);
+			break;
+		case 64:
+			break;
+		default:
+			pr_err("eBPF JIT %s[%d]: BPF_TO_LE unknown size\n",
+					current->comm, current->pid);
+			return -EINVAL;
+		}
+		break;
+	case BPF_ALU | BPF_END | BPF_TO_BE:
+		switch (imm) {
+		case 16:
+			emit_sw64_htobe16(dst, ctx);
+			break;
+		case 32:
+			emit_sw64_htobe32(dst, ctx);
+			break;
+		case 64:
+			emit_sw64_htobe64(dst, ctx);
+			break;
+		default:
+			pr_err("eBPF JIT %s[%d]: BPF_TO_BE unknown size\n",
+					current->comm, current->pid);
+			return -EINVAL;
+		}
+		break;
+
+	case BPF_ALU | BPF_MOV | BPF_K:
+		if (imm >= S16_MIN && imm <= S16_MAX)
+			emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx);
+		else
+			emit_sw64_ldu32(dst, imm, ctx);
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_MOV | BPF_K:
+		if (imm >= S16_MIN && imm <= S16_MAX)
+			emit(SW64_BPF_LDI(dst, SW64_BPF_REG_ZR, imm), ctx);
+		else
+			emit_sw64_lds32(dst, imm, ctx);
+		break;
+	case BPF_ALU | BPF_ADD | BPF_K:
+		if (imm >= S16_MIN && imm <= S16_MAX) {
+			emit(SW64_BPF_LDI(dst, dst, imm), ctx);
+		} else {
+			emit_sw64_ldu32(tmp1, imm, ctx);
+			emit(SW64_BPF_ADDW_REG(dst, tmp1, dst), ctx);
+		}
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_ADD | BPF_K:
+		if (imm >= S16_MIN && imm <= S16_MAX) {
+			emit(SW64_BPF_LDI(dst, dst, imm), ctx);
+		} else {
+			emit_sw64_lds32(tmp1, imm, ctx);
+			emit(SW64_BPF_ADDL_REG(dst, tmp1, dst), ctx);
+		}
+		break;
+	case BPF_ALU | BPF_SUB | BPF_K:
+		if (imm >= -S16_MAX && imm <= -S16_MIN) {
+			emit(SW64_BPF_LDI(dst, dst, -imm), ctx);
+		} else {
+			emit_sw64_ldu32(tmp1, imm, ctx);
+			emit(SW64_BPF_SUBL_REG(dst, tmp1, dst), ctx);
+		}
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_SUB | BPF_K:
+		if (imm >= -S16_MAX && imm <= -S16_MIN) {
+			emit(SW64_BPF_LDI(dst, dst, -imm), ctx);
+		} else {
+			emit_sw64_lds32(tmp1, imm, ctx);
+			emit(SW64_BPF_SUBL_REG(dst, tmp1, dst), ctx);
+		}
+		break;
+	case BPF_ALU | BPF_MUL | BPF_K:
+		if (imm >= 0 && imm <= U8_MAX) {
+			emit(SW64_BPF_MULL_IMM(dst, imm, dst), ctx);
+		} else {
+			emit_sw64_ldu32(tmp1, imm, ctx);
+			emit(SW64_BPF_MULL_REG(dst, tmp1, dst), ctx);
+		}
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_MUL | BPF_K:
+		if (imm >= 0 && imm <= U8_MAX) {
+			emit(SW64_BPF_MULL_IMM(dst, imm, dst), ctx);
+		} else {
+			emit_sw64_lds32(tmp1, imm, ctx);
+			emit(SW64_BPF_MULL_REG(dst, tmp1, dst), ctx);
+		}
+		break;
+	case BPF_ALU | BPF_DIV | BPF_K:
+		emit_sw64_ldu32(tmp1, imm, ctx);
+		emit_sw64_divmod(dst, tmp1, ctx, code);
+		break;
+	case BPF_ALU64 | BPF_DIV | BPF_K:
+		emit_sw64_lds32(tmp1, imm, ctx);
+		emit_sw64_divmod(dst, tmp1, ctx, code);
+		break;
+	case BPF_ALU | BPF_MOD | BPF_K:
+		emit_sw64_ldu32(tmp1, imm, ctx);
+		emit_sw64_divmod(dst, tmp1, ctx, code);
+		break;
+	case BPF_ALU64 | BPF_MOD | BPF_K:
+		emit_sw64_lds32(tmp1, imm, ctx);
+		emit_sw64_divmod(dst, tmp1, ctx, code);
+		break;
+	case BPF_ALU | BPF_LSH | BPF_K:
+		if (imm >= 0 && imm <= U8_MAX) {
+			emit(SW64_BPF_SLL_IMM(dst, imm, dst), ctx);
+		} else {
+			emit_sw64_ldu32(tmp1, imm, ctx);
+			emit(SW64_BPF_SLL_REG(dst, tmp1, dst), ctx);
+		}
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_LSH | BPF_K:
+		if (imm >= 0 && imm <= U8_MAX) {
+			emit(SW64_BPF_SLL_IMM(dst, imm, dst), ctx);
+		} else {
+			emit_sw64_lds32(tmp1, imm, ctx);
+			emit(SW64_BPF_SLL_REG(dst, tmp1, dst), ctx);
+		}
+		break;
+	case BPF_ALU | BPF_RSH | BPF_K:
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		if (imm >= 0 && imm <= U8_MAX) {
+			emit(SW64_BPF_SRL_IMM(dst, imm, dst), ctx);
+		} else {
+			emit_sw64_ldu32(tmp1, imm, ctx);
+			emit(SW64_BPF_SRL_REG(dst, tmp1, dst), ctx);
+		}
+		break;
+	case BPF_ALU64 | BPF_RSH | BPF_K:
+		if (imm >= 0 && imm <= U8_MAX) {
+			emit(SW64_BPF_SRL_IMM(dst, imm, dst), ctx);
+		} else {
+			emit_sw64_lds32(tmp1, imm, ctx);
+			emit(SW64_BPF_SRL_REG(dst, tmp1, dst), ctx);
+		}
+		break;
+	case BPF_ALU | BPF_ARSH | BPF_K:
+		emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, dst, dst), ctx);
+		if (imm >= 0 && imm <= U8_MAX) {
+			emit(SW64_BPF_SRA_IMM(dst, imm, dst), ctx);
+		} else {
+			emit_sw64_ldu32(tmp1, imm, ctx);
+			emit(SW64_BPF_SRA_REG(dst, tmp1, dst), ctx);
+		}
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_ARSH | BPF_K:
+		if (imm >= 0 && imm <= U8_MAX) {
+			emit(SW64_BPF_SRA_IMM(dst, imm, dst), ctx);
+		} else {
+			emit_sw64_lds32(tmp1, imm, ctx);
+			emit(SW64_BPF_SRA_REG(dst, tmp1, dst), ctx);
+		}
+		break;
+	case BPF_ALU | BPF_AND | BPF_K:
+		if (imm >= 0 && imm <= U8_MAX) {
+			emit(SW64_BPF_AND_IMM(dst, imm, dst), ctx);
+		} else {
+			emit_sw64_ldu32(tmp1, imm, ctx);
+			emit(SW64_BPF_AND_REG(dst, tmp1, dst), ctx);
+		}
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_AND | BPF_K:
+		if (imm >= 0 && imm <= U8_MAX) {
+			emit(SW64_BPF_AND_IMM(dst, imm, dst), ctx);
+		} else {
+			emit_sw64_lds32(tmp1, imm, ctx);
+			emit(SW64_BPF_AND_REG(dst, tmp1, dst), ctx);
+		}
+		break;
+	case BPF_ALU | BPF_OR | BPF_K:
+		if (imm >= 0 && imm <= U8_MAX) {
+			emit(SW64_BPF_BIS_IMM(dst, imm, dst), ctx);
+		} else {
+			emit_sw64_ldu32(tmp1, imm, ctx);
+			emit(SW64_BPF_BIS_REG(dst, tmp1, dst), ctx);
+		}
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_OR | BPF_K:
+		if (imm >= 0 && imm <= U8_MAX) {
+			emit(SW64_BPF_BIS_IMM(dst, imm, dst), ctx);
+		} else {
+			emit_sw64_lds32(tmp1, imm, ctx);
+			emit(SW64_BPF_BIS_REG(dst, tmp1, dst), ctx);
+		}
+		break;
+	case BPF_ALU | BPF_XOR | BPF_K:
+		if (imm >= 0 && imm <= U8_MAX) {
+			emit(SW64_BPF_XOR_IMM(dst, imm, dst), ctx);
+		} else {
+			emit_sw64_ldu32(tmp1, imm, ctx);
+			emit(SW64_BPF_XOR_REG(dst, tmp1, dst), ctx);
+		}
+		emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+		break;
+	case BPF_ALU64 | BPF_XOR | BPF_K:
+		if (imm >= 0 && imm <= U8_MAX) {
+			emit(SW64_BPF_XOR_IMM(dst, imm, dst), ctx);
+		} else {
+			emit_sw64_lds32(tmp1, imm, ctx);
+			emit(SW64_BPF_XOR_REG(dst, tmp1, dst), ctx);
+		}
+		break;
+
+	case BPF_JMP | BPF_JA:
+		jmp_offset = bpf2sw64_offset(bpf_idx, off, ctx);
+		if (jmp_offset >= -0x100000 && jmp_offset <= 0xfffff) {
+			emit(SW64_BPF_BR(SW64_BPF_REG_ZR, jmp_offset), ctx);
+		} else {
+			pr_err("eBPF JIT %s[%d]: BPF_JMP out of range, %d instructions\n",
+					current->comm, current->pid, jmp_offset);
+			return -EINVAL;
+		}
+		break;
+
+	case BPF_JMP32 | BPF_JEQ | BPF_X:
+	case BPF_JMP32 | BPF_JGT | BPF_X:
+	case BPF_JMP32 | BPF_JLT | BPF_X:
+	case BPF_JMP32 | BPF_JGE | BPF_X:
+	case BPF_JMP32 | BPF_JLE | BPF_X:
+	case BPF_JMP32 | BPF_JNE | BPF_X:
+	case BPF_JMP32 | BPF_JSGT | BPF_X:
+	case BPF_JMP32 | BPF_JSLT | BPF_X:
+	case BPF_JMP32 | BPF_JSGE | BPF_X:
+	case BPF_JMP32 | BPF_JSLE | BPF_X:
+	case BPF_JMP32 | BPF_JSET | BPF_X:
+		emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, src, tmp1), ctx);
+		src = tmp1;
+		emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, dst, tmp2), ctx);
+		dst = tmp2;
+		fallthrough;
+	case BPF_JMP | BPF_JEQ | BPF_X:
+	case BPF_JMP | BPF_JGT | BPF_X:
+	case BPF_JMP | BPF_JLT | BPF_X:
+	case BPF_JMP | BPF_JGE | BPF_X:
+	case BPF_JMP | BPF_JLE | BPF_X:
+	case BPF_JMP | BPF_JNE | BPF_X:
+	case BPF_JMP | BPF_JSGT | BPF_X:
+	case BPF_JMP | BPF_JSLT | BPF_X:
+	case BPF_JMP | BPF_JSGE | BPF_X:
+	case BPF_JMP | BPF_JSLE | BPF_X:
+	case BPF_JMP | BPF_JSET | BPF_X:
+		switch (BPF_OP(code)) {
+		case BPF_JEQ:
+			emit(SW64_BPF_CMPEQ_REG(dst, src, tmp1), ctx);
+			break;
+		case BPF_JGT:
+			emit(SW64_BPF_CMPULT_REG(src, dst, tmp1), ctx);
+			break;
+		case BPF_JLT:
+			emit(SW64_BPF_CMPULT_REG(dst, src, tmp1), ctx);
+			break;
+		case BPF_JGE:
+			emit(SW64_BPF_CMPULE_REG(src, dst, tmp1), ctx);
+			break;
+		case BPF_JLE:
+			emit(SW64_BPF_CMPULE_REG(dst, src, tmp1), ctx);
+			break;
+		case BPF_JNE:
+			emit(SW64_BPF_CMPEQ_REG(dst, src, tmp1), ctx);
+			emit(SW64_BPF_XOR_IMM(tmp1, 1, tmp1), ctx);
+			break;
+		case BPF_JSGT:
+			emit(SW64_BPF_CMPLT_REG(src, dst, tmp1), ctx);
+			break;
+		case BPF_JSLT:
+			emit(SW64_BPF_CMPLT_REG(dst, src, tmp1), ctx);
+			break;
+		case BPF_JSGE:
+			emit(SW64_BPF_CMPLE_REG(src, dst, tmp1), ctx);
+			break;
+		case BPF_JSLE:
+			emit(SW64_BPF_CMPLE_REG(dst, src, tmp1), ctx);
+			break;
+		case BPF_JSET:
+			emit(SW64_BPF_AND_REG(dst, src, tmp1), ctx);
+			break;
+		}
+		jmp_offset = bpf2sw64_offset(bpf_idx, off, ctx);
+		if (jmp_offset >= -0x100000 && jmp_offset <= 0xfffff) {
+			emit(SW64_BPF_BNE(tmp1, jmp_offset), ctx);
+		} else {
+			pr_err("eBPF JIT %s[%d]: BPF_JMP out of range, %d instructions\n",
+					current->comm, current->pid, jmp_offset);
+			return -EINVAL;
+		}
+		break;
+
+	case BPF_JMP32 | BPF_JEQ | BPF_K:
+	case BPF_JMP32 | BPF_JGT | BPF_K:
+	case BPF_JMP32 | BPF_JLT | BPF_K:
+	case BPF_JMP32 | BPF_JGE | BPF_K:
+	case BPF_JMP32 | BPF_JLE | BPF_K:
+	case BPF_JMP32 | BPF_JNE | BPF_K:
+	case BPF_JMP32 | BPF_JSGT | BPF_K:
+	case BPF_JMP32 | BPF_JSLT | BPF_K:
+	case BPF_JMP32 | BPF_JSGE | BPF_K:
+	case BPF_JMP32 | BPF_JSLE | BPF_K:
+	case BPF_JMP32 | BPF_JSET | BPF_K:
+		emit(SW64_BPF_ADDW_REG(SW64_BPF_REG_ZR, dst, tmp2), ctx);
+		dst = tmp2;
+		fallthrough;
+	case BPF_JMP | BPF_JEQ | BPF_K:
+	case BPF_JMP | BPF_JGT | BPF_K:
+	case BPF_JMP | BPF_JLT | BPF_K:
+	case BPF_JMP | BPF_JGE | BPF_K:
+	case BPF_JMP | BPF_JLE | BPF_K:
+	case BPF_JMP | BPF_JNE | BPF_K:
+	case BPF_JMP | BPF_JSGT | BPF_K:
+	case BPF_JMP | BPF_JSLT | BPF_K:
+	case BPF_JMP | BPF_JSGE | BPF_K:
+	case BPF_JMP | BPF_JSLE | BPF_K:
+	case BPF_JMP | BPF_JSET | BPF_K:
+		emit_sw64_lds32(tmp1, imm, ctx);
+		switch (BPF_OP(code)) {
+		case BPF_JEQ:
+			emit(SW64_BPF_CMPEQ_REG(dst, tmp1, tmp2), ctx);
+			break;
+		case BPF_JGT:
+			emit(SW64_BPF_CMPULT_REG(tmp1, dst, tmp2), ctx);
+			break;
+		case BPF_JLT:
+			emit(SW64_BPF_CMPULT_REG(dst, tmp1, tmp2), ctx);
+			break;
+		case BPF_JGE:
+			emit(SW64_BPF_CMPULE_REG(tmp1, dst, tmp2), ctx);
+			break;
+		case BPF_JLE:
+			emit(SW64_BPF_CMPULE_REG(dst, tmp1, tmp2), ctx);
+			break;
+		case BPF_JNE:
+			emit(SW64_BPF_CMPEQ_REG(dst, tmp1, tmp2), ctx);
+			emit(SW64_BPF_XOR_IMM(tmp2, 1, tmp2), ctx);
+			break;
+		case BPF_JSGT:
+			emit(SW64_BPF_CMPLT_REG(tmp1, dst, tmp2), ctx);
+			break;
+		case BPF_JSLT:
+			emit(SW64_BPF_CMPLT_REG(dst, tmp1, tmp2), ctx);
+			break;
+		case BPF_JSGE:
+			emit(SW64_BPF_CMPLE_REG(tmp1, dst, tmp2), ctx);
+			break;
+		case BPF_JSLE:
+			emit(SW64_BPF_CMPLE_REG(dst, tmp1, tmp2), ctx);
+			break;
+		case BPF_JSET:
+			emit(SW64_BPF_AND_REG(dst, tmp1, tmp2), ctx);
+			break;
+		}
+		jmp_offset = bpf2sw64_offset(bpf_idx, off, ctx);
+		if (jmp_offset >= -0x100000 && jmp_offset <= 0xfffff) {
+			emit(SW64_BPF_BNE(tmp2, jmp_offset), ctx);
+		} else {
+			pr_err("eBPF JIT %s[%d]: BPF_JMP out of range, %d instructions\n",
+					current->comm, current->pid, jmp_offset);
+			return -EINVAL;
+		}
+		break;
+
+	case BPF_JMP | BPF_CALL:
+		func = (u64)__bpf_call_base + imm;
+		if ((func & ~(KERNEL_IMAGE_SIZE - 1)) != __START_KERNEL_map)
+			/* calling bpf program, switch to vmalloc addr */
+			func = (func & U32_MAX) | VMALLOC_START;
+		emit_sw64_ldu64(SW64_BPF_REG_PV, func, ctx);
+		emit(SW64_BPF_CALL(SW64_BPF_REG_RA, SW64_BPF_REG_PV), ctx);
+		break;
+
+	case BPF_JMP | BPF_TAIL_CALL:
+		if (emit_bpf_tail_call(ctx))
+			return -EFAULT;
+		break;
+
+	case BPF_JMP | BPF_EXIT:
+		// if this is the last bpf instruction, skip to epilogue
+		if (bpf_idx == ctx->prog->len - 1)
+			break;
+		jmp_offset = offset_to_epilogue(ctx) - 1;
+		// epilogue is always at the end, must jump forward
+		if (jmp_offset >= -1 && jmp_offset <= 0xfffff) {
+			if (ctx->image && !jmp_offset)
+				// if this is the last jited instruction, generate nop
+				emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx);
+			else
+				emit(SW64_BPF_BR(SW64_BPF_REG_ZR, jmp_offset), ctx);
+		} else {
+			pr_err("eBPF JIT %s[%d]: BPF_EXIT out of range, %d instructions\n",
+					current->comm, current->pid, jmp_offset);
+			return -EINVAL;
+		}
+		break;
+
+	case BPF_LD | BPF_IMM | BPF_DW:
+		insn1 = insn[1];
+		imm64 = ((u64)insn1.imm << 32) | (u32)imm;
+		emit_sw64_ldu64(dst, imm64, ctx);
+		put_tmp_reg(ctx);
+		put_tmp_reg(ctx);
+		return 1;
+
+	/* LDX: dst = *(size *)(src + off) */
+	case BPF_LDX | BPF_MEM | BPF_W:
+	case BPF_LDX | BPF_MEM | BPF_H:
+	case BPF_LDX | BPF_MEM | BPF_B:
+	case BPF_LDX | BPF_MEM | BPF_DW:
+	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
+	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
+	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
+		switch (BPF_SIZE(code)) {
+		case BPF_W:
+			emit(SW64_BPF_LDW(dst, src, off), ctx);
+			emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx);
+			break;
+		case BPF_H:
+			emit(SW64_BPF_LDHU(dst, src, off), ctx);
+			break;
+		case BPF_B:
+			emit(SW64_BPF_LDBU(dst, src, off), ctx);
+			break;
+		case BPF_DW:
+			emit(SW64_BPF_LDL(dst, src, off), ctx);
+			break;
+		}
+
+		ret = add_exception_handler(insn, ctx, dst);
+		if (ret)
+			return ret;
+		break;
+
+	/* ST: *(size *)(dst + off) = imm */
+	case BPF_ST | BPF_MEM | BPF_W:
+	case BPF_ST | BPF_MEM | BPF_H:
+	case BPF_ST | BPF_MEM | BPF_B:
+	case BPF_ST | BPF_MEM | BPF_DW:
+		/* Load imm to a register then store it */
+		emit_sw64_lds32(tmp1, imm, ctx);
+		switch (BPF_SIZE(code)) {
+		case BPF_W:
+			emit(SW64_BPF_STW(tmp1, dst, off), ctx);
+			break;
+		case BPF_H:
+			emit(SW64_BPF_STH(tmp1, dst, off), ctx);
+			break;
+		case BPF_B:
+			emit(SW64_BPF_STB(tmp1, dst, off), ctx);
+			break;
+		case BPF_DW:
+			emit(SW64_BPF_STL(tmp1, dst, off), ctx);
+			break;
+		}
+		break;
+
+	/* STX: *(size *)(dst + off) = src */
+	case BPF_STX | BPF_MEM | BPF_W:
+		emit(SW64_BPF_STW(src, dst, off), ctx);
+		break;
+	case BPF_STX | BPF_MEM | BPF_H:
+		emit(SW64_BPF_STH(src, dst, off), ctx);
+		break;
+	case BPF_STX | BPF_MEM | BPF_B:
+		emit(SW64_BPF_STB(src, dst, off), ctx);
+		break;
+	case BPF_STX | BPF_MEM | BPF_DW:
+		emit(SW64_BPF_STL(src, dst, off), ctx);
+		break;
+
+	/* STX XADD: lock *(u32 *)(dst + off) += src */
+	case BPF_STX | BPF_XADD | BPF_W:
+		emit_sw64_xadd32(src, dst, off, ctx);
+		break;
+	/* STX XADD: lock *(u64 *)(dst + off) += src */
+	case BPF_STX | BPF_XADD | BPF_DW:
+		emit_sw64_xadd64(src, dst, off, ctx);
+		break;
+
+	default:
+		pr_err("eBPF JIT %s[%d]: unknown opcode 0x%02x\n",
+				current->comm, current->pid, code);
+		return -EINVAL;
+	}
+
+	put_tmp_reg(ctx);
+	put_tmp_reg(ctx);
+	return 0;
+}
+
+static int build_body(struct jit_ctx *ctx)
+{
+	const struct bpf_prog *prog = ctx->prog;
+	int i;
+
+	for (i = 0; i < prog->len; i++) {
+		const struct bpf_insn *insn = &prog->insnsi[i];
+		int ret;
+
+		if (ctx->image == NULL)
+			ctx->insn_offset[i] = ctx->idx;
+		ret = build_insn(insn, ctx);
+		if (ret < 0)
+			return ret;
+		while (ret > 0) {
+			i++;
+			if (ctx->image == NULL)
+				ctx->insn_offset[i] = ctx->insn_offset[i - 1];
+			ret--;
+		}
+	}
+
+	return 0;
+}
+
+static int validate_code(struct jit_ctx *ctx)
+{
+	int i;
+
+	for (i = 0; i < ctx->idx; i++) {
+		if (ctx->image[i] == SW64_BPF_ILLEGAL_INSN)
+			return -1;
+	}
+
+	if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries))
+		return -1;
+
+	return 0;
+}
+
+static inline void bpf_flush_icache(void *start, void *end)
+{
+	flush_icache_range((unsigned long)start, (unsigned long)end);
+}
+
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+{
+	struct bpf_prog *tmp, *orig_prog = prog;
+	struct bpf_binary_header *header;
+	struct sw64_jit_data *jit_data;
+	bool was_classic = bpf_prog_was_classic(prog);
+	bool tmp_blinded = false;
+	bool extra_pass = false;
+	struct jit_ctx ctx;
+	int image_size, prog_size, extable_size;
+	u8 *image_ptr;
+
+	if (!prog->jit_requested)
+		return orig_prog;
+
+	tmp = bpf_jit_blind_constants(prog);
+	/* If blinding was requested and we failed during blinding,
+	 * we must fall back to the interpreter.
+	 */
+	if (IS_ERR(tmp))
+		return orig_prog;
+	if (tmp != prog) {
+		tmp_blinded = true;
+		prog = tmp;
+	}
+
+	jit_data = prog->aux->jit_data;
+	if (!jit_data) {
+		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+		if (!jit_data) {
+			prog = orig_prog;
+			goto out;
+		}
+		prog->aux->jit_data = jit_data;
+	}
+	if (jit_data->ctx.insn_offset) {
+		ctx = jit_data->ctx;
+		image_ptr = jit_data->image;
+		header = jit_data->header;
+		extra_pass = true;
+		prog_size = sizeof(u32) * ctx.idx;
+		goto skip_init_ctx;
+	}
+	memset(&ctx, 0, sizeof(ctx));
+	ctx.prog = prog;
+
+	ctx.insn_offset = kcalloc(prog->len + 1, sizeof(int), GFP_KERNEL);
+	if (ctx.insn_offset == NULL) {
+		prog = orig_prog;
+		goto out_off;
+	}
+
+	/* 1. Initial fake pass to compute ctx->idx. */
+
+	/* Fake pass to fill in ctx->offset. */
+	build_prologue(&ctx, was_classic);
+
+	if (build_body(&ctx)) {
+		prog = orig_prog;
+		goto out_off;
+	}
+
+	ctx.insn_offset[prog->len] = ctx.epilogue_offset = ctx.idx;
+	build_epilogue(&ctx);
+
+	extable_size = prog->aux->num_exentries *
+		sizeof(struct exception_table_entry);
+
+	/* Now we know the actual image size. */
+	/* And we need extra 8 bytes for lock instructions alignment */
+	prog_size = sizeof(u32) * ctx.idx + 8;
+	image_size = prog_size + extable_size;
+	header = bpf_jit_binary_alloc(image_size, &image_ptr,
+				      sizeof(u32), jit_fill_hole);
+	if (header == NULL) {
+		prog = orig_prog;
+		goto out_off;
+	}
+
+	/* 2. Now, the actual pass. */
+
+	/* lock instructions need 8-byte alignment */
+	ctx.image = (u32 *)(((unsigned long)image_ptr + 7) & (~7));
+	if (extable_size)
+		prog->aux->extable = (void *)image_ptr + prog_size;
+skip_init_ctx:
+	ctx.idx = 0;
+	ctx.exentry_idx = 0;
+
+	build_prologue(&ctx, was_classic);
+
+	if (build_body(&ctx)) {
+		bpf_jit_binary_free(header);
+		prog = orig_prog;
+		goto out_off;
+	}
+
+	build_epilogue(&ctx);
+
+	/* 3. Extra pass to validate JITed code. */
+	if (validate_code(&ctx)) {
+		bpf_jit_binary_free(header);
+		prog = orig_prog;
+		goto out_off;
+	}
+
+	/* And we're done. */
+	if (bpf_jit_enable > 1)
+		bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
+
+	bpf_flush_icache(header, ctx.image + ctx.idx);
+
+	if (!prog->is_func || extra_pass) {
+		bpf_jit_binary_lock_ro(header);
+	} else {
+		jit_data->ctx = ctx;
+		jit_data->image = image_ptr;
+		jit_data->header = header;
+	}
+	prog->bpf_func = (void *)ctx.image;
+	prog->jited = 1;
+	prog->jited_len = prog_size;
+	if (ctx.current_tmp_reg) {
+		pr_err("eBPF JIT %s[%d]: unreleased temporary regsters %d\n",
+				current->comm, current->pid, ctx.current_tmp_reg);
+	}
+
+	if (!prog->is_func || extra_pass) {
+out_off:
+		kfree(ctx.insn_offset);
+		kfree(jit_data);
+		prog->aux->jit_data = NULL;
+	}
+out:
+	if (tmp_blinded)
+		bpf_jit_prog_release_other(prog, prog == orig_prog ?
+					   tmp : orig_prog);
+	return prog;
+}
diff --git a/arch/sw_64/platform/Makefile b/arch/sw_64/platform/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..4c0edceb4a2c1f4f7c8a5ee16617e80161b771a1
--- /dev/null
+++ b/arch/sw_64/platform/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_PLATFORM_XUELANG)	+= cpufreq_xuelang.o
diff --git a/arch/sw_64/platform/cpufreq_xuelang.c b/arch/sw_64/platform/cpufreq_xuelang.c
new file mode 100644
index 0000000000000000000000000000000000000000..1259e58dc874ffa691d189dfd46d4e120cdb2cef
--- /dev/null
+++ b/arch/sw_64/platform/cpufreq_xuelang.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/platform_device.h>
+
+#include <asm/cpufreq.h>
+#include <asm/delay.h>
+#include <asm/sw64_init.h>
+
+/* Minimum CLK support */
+enum {
+	DC_0, DC_1, DC_2, DC_3, DC_4, DC_5, DC_6, DC_7, DC_8,
+	DC_9, DC_10, DC_11, DC_12, DC_13, DC_14, DC_15, DC_RESV
+};
+
+struct cpufreq_frequency_table freq_table[] = {
+	{0, 200, CPUFREQ_ENTRY_INVALID},
+	{0, DC_1, CPUFREQ_ENTRY_INVALID},
+	{0, DC_2, 0},
+	{0, DC_3, 0},
+	{0, DC_4, 0},
+	{0, DC_5, 0},
+	{0, DC_6, 0},
+	{0, DC_7, 0},
+	{0, DC_8, 0},
+	{0, DC_9, 0},
+	{0, DC_10, 0},
+	{0, DC_11, 0},
+	{0, DC_12, 0},
+	{0, DC_13, 0},
+	{0, DC_14, 0},
+	{0, DC_15, 0},
+	{-1, DC_RESV, CPUFREQ_TABLE_END},
+};
+
+
+static struct platform_device sw64_cpufreq_device = {
+	.name = "sw64_cpufreq",
+	.id = -1,
+};
+
+static int __init sw64_cpufreq_init(void)
+{
+	int i;
+	unsigned char external_clk;
+	unsigned long max_rate, freq_off;
+
+	max_rate = get_cpu_freq() / 1000;
+
+	external_clk = *((unsigned char *)__va(MB_EXTCLK));
+
+	if (external_clk == 240)
+		freq_off = 60000;
+	else
+		freq_off = 50000;
+
+	/* clock table init */
+	for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		if (i == 1)
+			freq_table[i].driver_data = freq_off * 24;
+		if (i == 2)
+			freq_table[i].frequency = freq_off * 36;
+		if (i > 2)
+			freq_table[i].frequency = freq_off * 38 + ((i - 3) * freq_off);
+
+		if (freq_table[i].frequency == max_rate)
+			freq_table[i + 1].frequency = CPUFREQ_TABLE_END;
+	}
+
+	return platform_device_register(&sw64_cpufreq_device);
+}
+arch_initcall(sw64_cpufreq_init);
+
+char curruent_policy[CPUFREQ_NAME_LEN];
+
+static struct clk cpu_clk = {
+	.name = "cpu_clk",
+	.flags = CLK_ALWAYS_ENABLED | CLK_RATE_PROPAGATES,
+	.rate = 2400000000,
+};
+
+struct clk *sw64_clk_get(struct device *dev, const char *id)
+{
+	return &cpu_clk;
+}
+EXPORT_SYMBOL(sw64_clk_get);
+
+unsigned int __sw64_cpufreq_get(struct cpufreq_policy *policy)
+{
+	int i;
+	u64 val;
+	struct cpufreq_frequency_table *ft = policy->freq_table;
+
+	val = sw64_io_read(0, CLK_CTL) >> CORE_PLL2_CFG_SHIFT;
+
+	for (i = 0; ft[i].frequency != CPUFREQ_TABLE_END; i++) {
+		if (val == i)
+			return ft[i].frequency;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(__sw64_cpufreq_get);
+
+void sw64_set_rate(unsigned int index)
+{
+	unsigned int i, val;
+	int cpu_num;
+
+	cpu_num = sw64_chip->get_cpu_num();
+
+	for (i = 0; i < cpu_num; i++) {
+		sw64_io_write(i, CLK_CTL, CORE_CLK2_R | CORE_CLK2_V | CLK_PRT);
+		val = sw64_io_read(i, CLK_CTL);
+
+		sw64_io_write(i, CLK_CTL, val | index << CORE_PLL2_CFG_SHIFT);
+
+		udelay(1);
+
+		sw64_io_write(i, CLK_CTL, CORE_CLK2_V | CLK_PRT
+				| index << CORE_PLL2_CFG_SHIFT);
+		val = sw64_io_read(i, CLK_CTL);
+
+		/* LV1 select PLL1/PLL2 */
+		sw64_io_write(i, CLU_LV1_SEL, CLK_LV1_SEL_MUXA | CLK_LV1_SEL_PRT);
+
+		/* Set CLK_CTL PLL0 */
+		sw64_io_write(i, CLK_CTL, val | CORE_CLK0_R | CORE_CLK0_V);
+
+		sw64_io_write(i, CLK_CTL, val | CORE_CLK0_R | CORE_CLK0_V
+				| index << CORE_PLL0_CFG_SHIFT);
+
+		udelay(1);
+
+		sw64_io_write(i, CLK_CTL, val | CORE_CLK0_V
+				| index << CORE_PLL0_CFG_SHIFT);
+
+		/* LV1 select PLL0/PLL1 */
+		sw64_io_write(i, CLU_LV1_SEL, CLK_LV1_SEL_MUXB | CLK_LV1_SEL_PRT);
+	}
+}
+EXPORT_SYMBOL_GPL(sw64_set_rate);
diff --git a/arch/sw_64/tools/.gitignore b/arch/sw_64/tools/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..f73e86272b7616f0c2ce1d704e1966da94aed182
--- /dev/null
+++ b/arch/sw_64/tools/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+relocs
diff --git a/arch/sw_64/tools/Makefile b/arch/sw_64/tools/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..66f55b035e223cc3f9073c6fbd252385293e4475
--- /dev/null
+++ b/arch/sw_64/tools/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+
+hostprogs	+= relocs
+relocs-objs	+= relocs.o
+relocs-objs	+= relocs_main.o
+PHONY += relocs
+relocs: $(obj)/relocs
+	@:
diff --git a/arch/sw_64/tools/relocs.c b/arch/sw_64/tools/relocs.c
new file mode 100644
index 0000000000000000000000000000000000000000..ec0ed422a8369172d2db92550dfed98619419961
--- /dev/null
+++ b/arch/sw_64/tools/relocs.c
@@ -0,0 +1,635 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "relocs.h"
+
+#define ELF_BITS 64
+
+#define ELF_MACHINE             EM_SW64
+#define ELF_MACHINE_NAME        "SW64"
+#define SHT_REL_TYPE            SHT_RELA
+#define Elf_Rel                 Elf64_Rela
+
+#define ELF_CLASS               ELFCLASS64
+#define ELF_R_SYM(val)          ELF64_R_SYM(val)
+#define ELF_R_TYPE(val)         ELF64_R_TYPE(val)
+#define ELF_ST_TYPE(o)          ELF64_ST_TYPE(o)
+#define ELF_ST_BIND(o)          ELF64_ST_BIND(o)
+#define ELF_ST_VISIBILITY(o)    ELF64_ST_VISIBILITY(o)
+
+#define ElfW(type)		_ElfW(ELF_BITS, type)
+#define _ElfW(bits, type)	__ElfW(bits, type)
+#define __ElfW(bits, type)	Elf##bits##_##type
+
+#define Elf_Addr		ElfW(Addr)
+#define Elf_Ehdr		ElfW(Ehdr)
+#define Elf_Phdr		ElfW(Phdr)
+#define Elf_Shdr		ElfW(Shdr)
+#define Elf_Sym			ElfW(Sym)
+
+static Elf_Ehdr ehdr;
+
+struct relocs {
+	uint32_t	*offset;
+	unsigned long	count;
+	unsigned long	size;
+};
+
+static struct relocs relocs;
+
+struct section {
+	Elf_Shdr       shdr;
+	struct section *link;
+	Elf_Sym        *symtab;
+	Elf_Rel        *reltab;
+	char           *strtab;
+	long           shdr_offset;
+};
+static struct section *secs;
+
+static const char * const regex_sym_kernel = {
+/* Symbols matching these regex's should never be relocated */
+	"^(__crc_)",
+};
+
+static regex_t sym_regex_c;
+
+static int regex_skip_reloc(const char *sym_name)
+{
+	return !regexec(&sym_regex_c, sym_name, 0, NULL, 0);
+}
+
+static void regex_init(void)
+{
+	char errbuf[128];
+	int err;
+
+	err = regcomp(&sym_regex_c, regex_sym_kernel,
+			REG_EXTENDED|REG_NOSUB);
+
+	if (err) {
+		regerror(err, &sym_regex_c, errbuf, sizeof(errbuf));
+		die("%s", errbuf);
+	}
+}
+
+static const char *rel_type(unsigned int type)
+{
+	static const char * const type_name[] = {
+#define REL_TYPE(X)[X] = #X
+	REL_TYPE(R_SW64_NONE),
+	REL_TYPE(R_SW64_REFQUAD),
+	REL_TYPE(R_SW64_LITERAL),
+	REL_TYPE(R_SW64_LITUSE),
+	REL_TYPE(R_SW64_GPDISP),
+	REL_TYPE(R_SW64_BRADDR),
+	REL_TYPE(R_SW64_HINT),
+	REL_TYPE(R_SW64_SREL32),
+	REL_TYPE(R_SW64_GPRELHIGH),
+	REL_TYPE(R_SW64_GPRELLOW),
+#undef REL_TYPE
+	};
+	const char *name = "unknown type rel type name";
+
+	if (type < ARRAY_SIZE(type_name) && type_name[type])
+		name = type_name[type];
+	return name;
+}
+
+static const char *sec_name(unsigned int shndx)
+{
+	const char *sec_strtab;
+	const char *name;
+
+	sec_strtab = secs[ehdr.e_shstrndx].strtab;
+	if (shndx < ehdr.e_shnum)
+		name = sec_strtab + secs[shndx].shdr.sh_name;
+	else if (shndx == SHN_ABS)
+		name = "ABSOLUTE";
+	else if (shndx == SHN_COMMON)
+		name = "COMMON";
+	else
+		name = "<noname>";
+	return name;
+}
+
+static struct section *sec_lookup(const char *secname)
+{
+	int i;
+
+	for (i = 0; i < ehdr.e_shnum; i++)
+		if (strcmp(secname, sec_name(i)) == 0)
+			return &secs[i];
+
+	return NULL;
+}
+
+static const char *sym_name(const char *sym_strtab, Elf_Sym *sym)
+{
+	const char *name;
+
+	if (sym->st_name)
+		name = sym_strtab + sym->st_name;
+	else
+		name = sec_name(sym->st_shndx);
+	return name;
+}
+
+#define le16_to_cpu(val) (val)
+#define le32_to_cpu(val) (val)
+#define le64_to_cpu(val) (val)
+
+#define cpu_to_le16(val) (val)
+#define cpu_to_le32(val) (val)
+#define cpu_to_le64(val) (val)
+
+static uint16_t elf16_to_cpu(uint16_t val)
+{
+	return le16_to_cpu(val);
+}
+
+static uint32_t elf32_to_cpu(uint32_t val)
+{
+	return le32_to_cpu(val);
+}
+
+static uint32_t cpu_to_elf32(uint32_t val)
+{
+	return cpu_to_le32(val);
+}
+
+#define elf_half_to_cpu(x)	elf16_to_cpu(x)
+#define elf_word_to_cpu(x)	elf32_to_cpu(x)
+
+#if ELF_BITS == 64
+static uint64_t elf64_to_cpu(uint64_t val)
+{
+	return le64_to_cpu(val);
+}
+#define elf_addr_to_cpu(x)	elf64_to_cpu(x)
+#define elf_off_to_cpu(x)	elf64_to_cpu(x)
+#define elf_xword_to_cpu(x)	elf64_to_cpu(x)
+#else
+#define elf_addr_to_cpu(x)	elf32_to_cpu(x)
+#define elf_off_to_cpu(x)	elf32_to_cpu(x)
+#define elf_xword_to_cpu(x)	elf32_to_cpu(x)
+#endif
+
+static void read_ehdr(FILE *fp)
+{
+	if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1)
+		die("Cannot read ELF header: %s\n", strerror(errno));
+
+	if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0)
+		die("No ELF magic\n");
+
+	if (ehdr.e_ident[EI_CLASS] != ELF_CLASS)
+		die("Not a %d bit executable\n", ELF_BITS);
+
+	if ((ehdr.e_ident[EI_DATA] != ELFDATA2LSB) &&
+	    (ehdr.e_ident[EI_DATA] != ELFDATA2MSB))
+		die("Unknown ELF Endianness\n");
+
+	if (ehdr.e_ident[EI_VERSION] != EV_CURRENT)
+		die("Unknown ELF version\n");
+
+	/* Convert the fields to native endian */
+	ehdr.e_type      = elf_half_to_cpu(ehdr.e_type);
+	ehdr.e_machine   = elf_half_to_cpu(ehdr.e_machine);
+	ehdr.e_version   = elf_word_to_cpu(ehdr.e_version);
+	ehdr.e_entry     = elf_addr_to_cpu(ehdr.e_entry);
+	ehdr.e_phoff     = elf_off_to_cpu(ehdr.e_phoff);
+	ehdr.e_shoff     = elf_off_to_cpu(ehdr.e_shoff);
+	ehdr.e_flags     = elf_word_to_cpu(ehdr.e_flags);
+	ehdr.e_ehsize    = elf_half_to_cpu(ehdr.e_ehsize);
+	ehdr.e_phentsize = elf_half_to_cpu(ehdr.e_phentsize);
+	ehdr.e_phnum     = elf_half_to_cpu(ehdr.e_phnum);
+	ehdr.e_shentsize = elf_half_to_cpu(ehdr.e_shentsize);
+	ehdr.e_shnum     = elf_half_to_cpu(ehdr.e_shnum);
+	ehdr.e_shstrndx  = elf_half_to_cpu(ehdr.e_shstrndx);
+
+	if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN))
+		die("Unsupported ELF header type\n");
+
+	if (ehdr.e_machine != ELF_MACHINE)
+		die("Not for %s\n", ELF_MACHINE_NAME);
+
+	if (ehdr.e_version != EV_CURRENT)
+		die("Unknown ELF version\n");
+
+	if (ehdr.e_ehsize != sizeof(Elf_Ehdr))
+		die("Bad Elf header size\n");
+
+	if (ehdr.e_phentsize != sizeof(Elf_Phdr))
+		die("Bad program header entry\n");
+
+	if (ehdr.e_shentsize != sizeof(Elf_Shdr))
+		die("Bad section header entry\n");
+
+	if (ehdr.e_shstrndx >= ehdr.e_shnum)
+		die("String table index out of bounds\n");
+}
+
+static void read_shdrs(FILE *fp)
+{
+	int i;
+	Elf_Shdr shdr;
+
+	secs = calloc(ehdr.e_shnum, sizeof(struct section));
+	if (!secs)
+		die("Unable to allocate %d section headers\n", ehdr.e_shnum);
+
+	if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0)
+		die("Seek to %d failed: %s\n", ehdr.e_shoff, strerror(errno));
+
+	for (i = 0; i < ehdr.e_shnum; i++) {
+		struct section *sec = &secs[i];
+
+		sec->shdr_offset = ftell(fp);
+		if (fread(&shdr, sizeof(shdr), 1, fp) != 1)
+			die("Cannot read ELF section headers %d/%d: %s\n",
+			    i, ehdr.e_shnum, strerror(errno));
+		sec->shdr.sh_name      = elf_word_to_cpu(shdr.sh_name);
+		sec->shdr.sh_type      = elf_word_to_cpu(shdr.sh_type);
+		sec->shdr.sh_flags     = elf_xword_to_cpu(shdr.sh_flags);
+		sec->shdr.sh_addr      = elf_addr_to_cpu(shdr.sh_addr);
+		sec->shdr.sh_offset    = elf_off_to_cpu(shdr.sh_offset);
+		sec->shdr.sh_size      = elf_xword_to_cpu(shdr.sh_size);
+		sec->shdr.sh_link      = elf_word_to_cpu(shdr.sh_link);
+		sec->shdr.sh_info      = elf_word_to_cpu(shdr.sh_info);
+		sec->shdr.sh_addralign = elf_xword_to_cpu(shdr.sh_addralign);
+		sec->shdr.sh_entsize   = elf_xword_to_cpu(shdr.sh_entsize);
+		if (sec->shdr.sh_link < ehdr.e_shnum)
+			sec->link = &secs[sec->shdr.sh_link];
+	}
+}
+
+static void read_strtabs(FILE *fp)
+{
+	int i;
+
+	for (i = 0; i < ehdr.e_shnum; i++) {
+		struct section *sec = &secs[i];
+
+		if (sec->shdr.sh_type != SHT_STRTAB)
+			continue;
+
+		sec->strtab = malloc(sec->shdr.sh_size);
+		if (!sec->strtab)
+			die("malloc of %d bytes for strtab failed\n",
+			    sec->shdr.sh_size);
+
+		if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0)
+			die("Seek to %d failed: %s\n",
+			    sec->shdr.sh_offset, strerror(errno));
+
+		if (fread(sec->strtab, 1, sec->shdr.sh_size, fp) !=
+		    sec->shdr.sh_size)
+			die("Cannot read symbol table: %s\n", strerror(errno));
+	}
+}
+
+static void read_symtabs(FILE *fp)
+{
+	int i, j;
+
+	for (i = 0; i < ehdr.e_shnum; i++) {
+		struct section *sec = &secs[i];
+
+		if (sec->shdr.sh_type != SHT_SYMTAB)
+			continue;
+
+		sec->symtab = malloc(sec->shdr.sh_size);
+		if (!sec->symtab)
+			die("malloc of %d bytes for symtab failed\n",
+			    sec->shdr.sh_size);
+
+		if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0)
+			die("Seek to %d failed: %s\n",
+			    sec->shdr.sh_offset, strerror(errno));
+
+		if (fread(sec->symtab, 1, sec->shdr.sh_size, fp) !=
+		    sec->shdr.sh_size)
+			die("Cannot read symbol table: %s\n", strerror(errno));
+
+		for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Sym); j++) {
+			Elf_Sym *sym = &sec->symtab[j];
+
+			sym->st_name  = elf_word_to_cpu(sym->st_name);
+			sym->st_value = elf_addr_to_cpu(sym->st_value);
+			sym->st_size  = elf_xword_to_cpu(sym->st_size);
+			sym->st_shndx = elf_half_to_cpu(sym->st_shndx);
+		}
+	}
+}
+
+static void read_relocs(FILE *fp)
+{
+	static unsigned long base;
+	int i, j;
+
+	if (!base) {
+		struct section *sec = sec_lookup(".text");
+
+		if (!sec)
+			die("Could not find .text section\n");
+
+		base = sec->shdr.sh_addr;
+	}
+
+	for (i = 0; i < ehdr.e_shnum; i++) {
+		struct section *sec = &secs[i];
+
+		if (sec->shdr.sh_type != SHT_REL_TYPE)
+			continue;
+
+		sec->reltab = malloc(sec->shdr.sh_size);
+		if (!sec->reltab)
+			die("malloc of %d bytes for relocs failed\n",
+			    sec->shdr.sh_size);
+
+		if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0)
+			die("Seek to %d failed: %s\n",
+			    sec->shdr.sh_offset, strerror(errno));
+
+		if (fread(sec->reltab, 1, sec->shdr.sh_size, fp) !=
+		    sec->shdr.sh_size)
+			die("Cannot read symbol table: %s\n", strerror(errno));
+
+		for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) {
+			Elf_Rel *rel = &sec->reltab[j];
+
+			rel->r_offset = elf_addr_to_cpu(rel->r_offset);
+			/* Set offset into kernel image */
+			rel->r_offset -= base;
+			/* Convert SW64 RELA format - only the symbol
+			 * index needs converting to native endianness
+			 */
+			rel->r_info   = elf_xword_to_cpu(rel->r_info);
+#if (SHT_REL_TYPE == SHT_RELA)
+			rel->r_addend = elf_xword_to_cpu(rel->r_addend);
+#endif
+		}
+	}
+}
+
+static void remove_relocs(FILE *fp)
+{
+	int i;
+	Elf_Shdr shdr;
+
+	for (i = 0; i < ehdr.e_shnum; i++) {
+		struct section *sec = &secs[i];
+
+		if (sec->shdr.sh_type != SHT_REL_TYPE)
+			continue;
+
+		if (fseek(fp, sec->shdr_offset, SEEK_SET) < 0)
+			die("Seek to %d failed: %s\n",
+			    sec->shdr_offset, strerror(errno));
+
+		if (fread(&shdr, sizeof(shdr), 1, fp) != 1)
+			die("Cannot read ELF section headers %d/%d: %s\n",
+			    i, ehdr.e_shnum, strerror(errno));
+
+		/* Set relocation section size to 0, effectively removing it.
+		 * This is necessary due to lack of support for relocations
+		 * in objcopy when creating 32bit elf from 64bit elf.
+		 */
+		shdr.sh_size = 0;
+
+		if (fseek(fp, sec->shdr_offset, SEEK_SET) < 0)
+			die("Seek to %d failed: %s\n",
+			    sec->shdr_offset, strerror(errno));
+
+		if (fwrite(&shdr, sizeof(shdr), 1, fp) != 1)
+			die("Cannot write ELF section headers %d/%d: %s\n",
+			    i, ehdr.e_shnum, strerror(errno));
+	}
+}
+
+static void add_reloc(struct relocs *r, uint32_t offset, unsigned int type)
+{
+	/* Relocation representation in binary table:
+	 * |76543210|76543210|76543210|76543210|
+	 * |  Type  |  offset from _text >> 2  |
+	 */
+	offset >>= 2;
+	if (offset > 0x00FFFFFF)
+		die("Kernel image exceeds maximum size for relocation!\n");
+
+	offset = (offset & 0x00FFFFFF) | ((type & 0xFF) << 24);
+
+	if (r->count == r->size) {
+		unsigned long newsize = r->size + 50000;
+		void *mem = realloc(r->offset, newsize * sizeof(r->offset[0]));
+
+		if (!mem)
+			die("realloc failed\n");
+
+		r->offset = mem;
+		r->size = newsize;
+	}
+	r->offset[r->count++] = offset;
+}
+
+static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel,
+			Elf_Sym *sym, const char *symname))
+{
+	int i;
+
+	/* Walk through the relocations */
+	for (i = 0; i < ehdr.e_shnum; i++) {
+		char *sym_strtab;
+		Elf_Sym *sh_symtab;
+		struct section *sec_applies, *sec_symtab;
+		int j;
+		struct section *sec = &secs[i];
+
+		if (sec->shdr.sh_type != SHT_REL_TYPE)
+			continue;
+		sec_symtab  = sec->link;
+		sec_applies = &secs[sec->shdr.sh_info];
+		if (!(sec_applies->shdr.sh_flags & SHF_ALLOC))
+			continue;
+
+		sh_symtab = sec_symtab->symtab;
+		sym_strtab = sec_symtab->link->strtab;
+		for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) {
+			Elf_Rel *rel = &sec->reltab[j];
+			Elf_Sym *sym = &sh_symtab[ELF_R_SYM(rel->r_info)];
+			const char *symname = sym_name(sym_strtab, sym);
+
+			process(sec, rel, sym, symname);
+		}
+	}
+}
+
+static int do_reloc(struct section *sec, Elf_Rel *rel, Elf_Sym *sym,
+		      const char *symname)
+{
+	unsigned int r_type = ELF_R_TYPE(rel->r_info);
+	unsigned int bind = ELF_ST_BIND(sym->st_info);
+
+	if ((bind == STB_WEAK) && (sym->st_value == 0)) {
+		/* Don't relocate weak symbols without a target */
+		return 0;
+	}
+
+	if (regex_skip_reloc(symname))
+		return 0;
+
+	switch (r_type) {
+	case R_SW64_NONE:
+	case R_SW64_LITERAL:   /* relocated by GOT */
+	case R_SW64_LITUSE:
+	case R_SW64_GPDISP:
+	case R_SW64_BRADDR:
+	case R_SW64_HINT:
+	case R_SW64_SREL32:
+	case R_SW64_GPRELHIGH:
+	case R_SW64_GPRELLOW:
+	case R_SW64_LITERAL_GOT:
+		/*
+		 * NONE can be ignored and PC relative relocations don't
+		 * need to be adjusted.
+		 */
+		break;
+
+	case R_SW64_REFQUAD:
+		add_reloc(&relocs, rel->r_offset, r_type);
+		break;
+
+	default:
+		die("Unsupported relocation type: %s (%d)\n",
+		    rel_type(r_type), r_type);
+		break;
+	}
+
+	return 0;
+}
+
+static int write_reloc_as_bin(uint32_t v, FILE *f)
+{
+	unsigned char buf[4];
+
+	v = cpu_to_elf32(v);
+
+	memcpy(buf, &v, sizeof(uint32_t));
+	return fwrite(buf, 1, 4, f);
+}
+
+static int write_reloc_as_text(uint32_t v, FILE *f)
+{
+	int res;
+
+	res = fprintf(f, "\t.long 0x%08"PRIx32"\n", v);
+	if (res < 0)
+		return res;
+	else
+		return sizeof(uint32_t);
+}
+
+static void emit_relocs(int as_text, int as_bin, FILE *outf)
+{
+	int i;
+	int (*write_reloc)(uint32_t, FILE *) = write_reloc_as_bin;
+	int size = 0;
+	int size_reserved;
+	struct section *sec_reloc;
+
+	sec_reloc = sec_lookup(".data.reloc");
+	if (!sec_reloc)
+		die("Could not find relocation section\n");
+
+	size_reserved = sec_reloc->shdr.sh_size;
+	/* Collect up the relocations */
+	walk_relocs(do_reloc);
+
+	/* Print the relocations */
+	if (as_text) {
+		/* Print the relocations in a form suitable that
+		 * gas will like.
+		 */
+		printf(".section \".data.reloc\",\"a\"\n");
+		printf(".balign 8\n");
+		/* Output text to stdout */
+		write_reloc = write_reloc_as_text;
+		outf = stdout;
+	} else if (as_bin) {
+		/* Output raw binary to stdout */
+		outf = stdout;
+	} else {
+		/*
+		 * Seek to offset of the relocation section.
+		 * Each relocation is then written into the
+		 * vmlinux kernel image.
+		 */
+		if (fseek(outf, sec_reloc->shdr.sh_offset, SEEK_SET) < 0) {
+			die("Seek to %d failed: %s\n",
+				sec_reloc->shdr.sh_offset, strerror(errno));
+		}
+	}
+
+	for (i = 0; i < relocs.count; i++)
+		size += write_reloc(relocs.offset[i], outf);
+
+	/* Print a stop, but only if we've actually written some relocs */
+	if (size)
+		size += write_reloc(0, outf);
+
+	if (size > size_reserved)
+		/*
+		 * Die, but suggest a value for CONFIG_RELOCATION_TABLE_SIZE
+		 * which will fix this problem and allow a bit of headroom
+		 * if more kernel features are enabled
+		 */
+		die("Relocations overflow available space!\n"
+		    "Please adjust CONFIG_RELOCATION_TABLE_SIZE "
+		    "to at least 0x%08x\n", (size + 0x1000) & ~0xFFF);
+}
+
+/*
+ * As an aid to debugging problems with different linkers
+ * print summary information about the relocs.
+ * Since different linkers tend to emit the sections in
+ * different orders we use the section names in the output.
+ */
+static int do_reloc_info(struct section *sec, Elf_Rel *rel, ElfW(Sym) * sym,
+				const char *symname)
+{
+	printf("%16s  0x%x  %16s  %40s  %16s\n",
+		sec_name(sec->shdr.sh_info),
+		(unsigned int)rel->r_offset,
+		rel_type(ELF_R_TYPE(rel->r_info)),
+		symname,
+		sec_name(sym->st_shndx));
+	return 0;
+}
+
+static void print_reloc_info(void)
+{
+	printf("%16s  %10s  %16s  %40s  %16s\n",
+		"reloc section",
+		"offset",
+		"reloc type",
+		"symbol",
+		"symbol section");
+	walk_relocs(do_reloc_info);
+}
+
+void process(FILE *fp, int as_text, int as_bin,
+	     int show_reloc_info, int keep_relocs)
+{
+	regex_init();
+	read_ehdr(fp);
+	read_shdrs(fp);
+	read_strtabs(fp);
+	read_symtabs(fp);
+	read_relocs(fp);
+	if (show_reloc_info) {
+		print_reloc_info();
+		return;
+	}
+	emit_relocs(as_text, as_bin, fp);
+	if (!keep_relocs)
+		remove_relocs(fp);
+}
diff --git a/arch/sw_64/tools/relocs.h b/arch/sw_64/tools/relocs.h
new file mode 100644
index 0000000000000000000000000000000000000000..17c7e31113a0e5f93ac2b596d54e31bc9de7fe58
--- /dev/null
+++ b/arch/sw_64/tools/relocs.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SW64_TOOLS_RELOCS_H
+#define _SW64_TOOLS_RELOCS_H
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <elf.h>
+#include <byteswap.h>
+#define USE_BSD
+#include <endian.h>
+#include <regex.h>
+
+#define EM_SW64        0x9916
+/*
+ * SW64 ELF relocation types
+ */
+#define R_SW64_NONE		0       /* No reloc */
+#define R_SW64_REFLONG		1       /* Direct 32 bit */
+#define R_SW64_REFQUAD		2       /* Direct 64 bit */
+#define R_SW64_GPREL32		3       /* GP relative 32 bit */
+#define R_SW64_LITERAL		4       /* GP relative 16 bit w/optimization */
+#define R_SW64_LITUSE		5       /* Optimization hint for LITERAL */
+#define R_SW64_GPDISP		6       /* Add displacement to GP */
+#define R_SW64_BRADDR		7       /* PC+4 relative 23 bit shifted */
+#define R_SW64_HINT		8       /* PC+4 relative 16 bit shifted */
+#define R_SW64_SREL16		9       /* PC relative 16 bit */
+#define R_SW64_SREL32		10      /* PC relative 32 bit */
+#define R_SW64_SREL64		11      /* PC relative 64 bit */
+#define R_SW64_GPRELHIGH	17      /* GP relative 32 bit, high 16 bits */
+#define R_SW64_GPRELLOW		18      /* GP relative 32 bit, low 16 bits */
+#define R_SW64_GPREL16		19      /* GP relative 16 bit */
+#define R_SW64_COPY		24      /* Copy symbol at runtime */
+#define R_SW64_GLOB_DAT		25      /* Create GOT entry */
+#define R_SW64_JMP_SLOT		26      /* Create PLT entry */
+#define R_SW64_RELATIVE		27      /* Adjust by program base */
+#define R_SW64_BRSGP		28
+#define R_SW64_TLSGD		29
+#define R_SW64_TLS_LDM		30
+#define R_SW64_DTPMOD64		31
+#define R_SW64_GOTDTPREL	32
+#define R_SW64_DTPREL64		33
+#define R_SW64_DTPRELHI		34
+#define R_SW64_DTPRELLO		35
+#define R_SW64_DTPREL16		36
+#define R_SW64_GOTTPREL		37
+#define R_SW64_TPREL64		38
+#define R_SW64_TPRELHI		39
+#define R_SW64_TPRELLO		40
+#define R_SW64_TPREL16		41
+#define R_SW64_LITERAL_GOT	43	/* GP relative */
+
+void die(char *fmt, ...);
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+enum symtype {
+	S_ABS,
+	S_REL,
+	S_SEG,
+	S_LIN,
+	S_NSYMTYPES
+};
+
+void process(FILE *fp, int as_text, int as_bin,
+		int show_reloc_info, int keep_relocs);
+#endif /* _SW64_TOOLS_RELOCS_H */
diff --git a/arch/sw_64/tools/relocs_main.c b/arch/sw_64/tools/relocs_main.c
new file mode 100644
index 0000000000000000000000000000000000000000..30a830a070dbe98b4b0b4770b9fc3ca1d3406941
--- /dev/null
+++ b/arch/sw_64/tools/relocs_main.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <endian.h>
+#include <elf.h>
+
+#include "relocs.h"
+
+void die(char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	exit(1);
+}
+
+static void usage(void)
+{
+	die("relocs [--reloc-info|--text|--bin|--keep] vmlinux\n");
+}
+
+int main(int argc, char **argv)
+{
+	int show_reloc_info, as_text, as_bin, keep_relocs;
+	const char *fname;
+	FILE *fp;
+	int i;
+	unsigned char e_ident[EI_NIDENT];
+
+	show_reloc_info = 0;
+	as_text = 0;
+	as_bin = 0;
+	keep_relocs = 0;
+	fname = NULL;
+	for (i = 1; i < argc; i++) {
+		char *arg = argv[i];
+
+		if (*arg == '-') {
+			if (strcmp(arg, "--reloc-info") == 0) {
+				show_reloc_info = 1;
+				continue;
+			}
+			if (strcmp(arg, "--text") == 0) {
+				as_text = 1;
+				continue;
+			}
+			if (strcmp(arg, "--bin") == 0) {
+				as_bin = 1;
+				continue;
+			}
+			if (strcmp(arg, "--keep") == 0) {
+				keep_relocs = 1;
+				continue;
+			}
+		} else if (!fname) {
+			fname = arg;
+			continue;
+		}
+		usage();
+	}
+	if (!fname)
+		usage();
+
+	fp = fopen(fname, "r+");
+	if (!fp)
+		die("Cannot open %s: %s\n", fname, strerror(errno));
+
+	if (fread(&e_ident, 1, EI_NIDENT, fp) != EI_NIDENT)
+		die("Cannot read %s: %s", fname, strerror(errno));
+
+	rewind(fp);
+	if (e_ident[EI_CLASS] == ELFCLASS64)
+		process(fp, as_text,  as_bin, show_reloc_info, keep_relocs);
+	else
+		die("Unsupport ELF class on SW64: %s", fname);
+		//process_32(fp, as_text, as_bin, show_reloc_info, keep_relocs);
+	fclose(fp);
+	return 0;
+}
diff --git a/scripts/package/buildtar b/scripts/package/buildtar
index 65b4ea50296219e2cfed406dddd3cb4eac0737ea..93158943a4f73d868660e80eb488a3475512e7ac 100755
--- a/scripts/package/buildtar
+++ b/scripts/package/buildtar
@@ -64,6 +64,9 @@ case "${ARCH}" in
 	alpha)
 		[ -f "${objtree}/arch/alpha/boot/vmlinux.gz" ] && cp -v -- "${objtree}/arch/alpha/boot/vmlinux.gz" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}"
 		;;
+	sw_64)
+		[ -f "${objtree}/arch/sw_64/boot/vmlinux.bin" ] && cp -v -- "${objtree}/arch/sw_64/boot/vmlinux.bin" "${tmpdir}/boot/vmlinux-bin-${KERNELRELEASE}"
+		;;
 	parisc*)
 		[ -f "${KBUILD_IMAGE}" ] && cp -v -- "${KBUILD_IMAGE}" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}"
 		[ -f "${objtree}/lifimage" ] && cp -v -- "${objtree}/lifimage" "${tmpdir}/boot/lifimage-${KERNELRELEASE}"
diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian
index 5044224cf6714b3e5738f1e6d30dda05c589e3ff..2586bcd5f43a3ab28b3ac512e6136e7c1bede7fd 100755
--- a/scripts/package/mkdebian
+++ b/scripts/package/mkdebian
@@ -26,7 +26,7 @@ set_debarch() {
 
 	# Attempt to find the correct Debian architecture
 	case "$UTS_MACHINE" in
-	i386|ia64|alpha|m68k|riscv*)
+	i386|ia64|alpha|m68k|riscv*|sw_64)
 		debarch="$UTS_MACHINE" ;;
 	x86_64)
 		debarch=amd64 ;;