From 18c5e8073739dbda047019dffff34fb99f382cb4 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 7 Sep 2022 14:02:38 +0800 Subject: [PATCH 01/36] LoongArch: Add vector extensions support LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- Add LoongArch's vector extensions support, which including 128bit LSX (i.e., Loongson SIMD eXtension) and 256bit LASX (i.e., Loongson Advanced SIMD eXtension). Signed-off-by: Huacai Chen Change-Id: Icbdcc8c0657d85198accb9050f90dd41c9fa456c --- arch/loongarch/Kconfig | 32 ++ arch/loongarch/configs/loongson3_defconfig | 2 + arch/loongarch/include/asm/asmmacro.h | 624 +++++++++++++++++++++ arch/loongarch/include/asm/fpu.h | 198 ++++++- arch/loongarch/kernel/cpu-probe.c | 12 + arch/loongarch/kernel/fpu.S | 288 ++++++++++ arch/loongarch/kernel/process.c | 10 +- arch/loongarch/kernel/ptrace.c | 110 ++++ arch/loongarch/kernel/signal.c | 330 ++++++++++- arch/loongarch/kernel/traps.c | 82 ++- 10 files changed, 1678 insertions(+), 10 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 068515a599e7..76f0be990526 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -75,6 +75,8 @@ config LOONGARCH select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL + select CPU_SUPPORTS_LSX + select CPU_SUPPORTS_LASX select GPIOLIB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_MMAP_RND_BITS if MMU @@ -145,6 +147,36 @@ config CPU_HAS_PREFETCH bool default y +config CPU_HAS_LSX + bool "Support for the Loongson SIMD Extension" + depends on CPU_SUPPORTS_LSX + depends on 64BIT + help + Loongson SIMD Extension (LSX) introduces 128 bit wide vector registers + and a set of SIMD instructions to operate on them. When this option + is enabled the kernel will support allocating & switching LSX + vector register contexts. If you know that your kernel will only be + running on CPUs which do not support LSX or that your userland will + not be making use of it then you may wish to say N here to reduce + the size & complexity of your kernel. + + If unsure, say Y. + +config CPU_HAS_LASX + bool "Support for the Loongson Advanced SIMD Extension" + depends on CPU_SUPPORTS_LASX + depends on 64BIT && CPU_HAS_LSX + help + Loongson Advanced SIMD Extension is 256 bit wide SIMD extension. + + If unsure, say Y. + +config CPU_SUPPORTS_LSX + bool + +config CPU_SUPPORTS_LASX + bool + config GENERIC_CALIBRATE_DELAY def_bool y diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 7bdb125ff4d1..ace3efc63b08 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -35,6 +35,8 @@ CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set +CONFIG_CPU_HAS_LSX=y +CONFIG_CPU_HAS_LASX=y CONFIG_HOTPLUG_CPU=y CONFIG_NUMA=y CONFIG_ACPI_SPCR_TABLE=y diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h index be037a40580d..3a629b545174 100644 --- a/arch/loongarch/include/asm/asmmacro.h +++ b/arch/loongarch/include/asm/asmmacro.h @@ -117,6 +117,212 @@ .endif .endm + .macro parse_vr var vr + \var = -1 + .ifc \vr, $vr0 + \var = 0 + .endif + .ifc \vr, $vr1 + \var = 1 + .endif + .ifc \vr, $vr2 + \var = 2 + .endif + .ifc \vr, $vr3 + \var = 3 + .endif + .ifc \vr, $vr4 + \var = 4 + .endif + .ifc \vr, $vr5 + \var = 5 + .endif + .ifc \vr, $vr6 + \var = 6 + .endif + .ifc \vr, $vr7 + \var = 7 + .endif + .ifc \vr, $vr8 + \var = 8 + .endif + .ifc \vr, $vr9 + \var = 9 + .endif + .ifc \vr, $vr10 + \var = 10 + .endif + .ifc \vr, $vr11 + \var = 11 + .endif + .ifc \vr, $vr12 + \var = 12 + .endif + .ifc \vr, $vr13 + \var = 13 + .endif + .ifc \vr, $vr14 + \var = 14 + .endif + .ifc \vr, $vr15 + \var = 15 + .endif + .ifc \vr, $vr16 + \var = 16 + .endif + .ifc \vr, $vr17 + \var = 17 + .endif + .ifc \vr, $vr18 + \var = 18 + .endif + .ifc \vr, $vr19 + \var = 19 + .endif + .ifc \vr, $vr20 + \var = 20 + .endif + .ifc \vr, $vr21 + \var = 21 + .endif + .ifc \vr, $vr22 + \var = 22 + .endif + .ifc \vr, $vr23 + \var = 23 + .endif + .ifc \vr, $vr24 + \var = 24 + .endif + .ifc \vr, $vr25 + \var = 25 + .endif + .ifc \vr, $vr26 + \var = 26 + .endif + .ifc \vr, $vr27 + \var = 27 + .endif + .ifc \vr, $vr28 + \var = 28 + .endif + .ifc \vr, $vr29 + \var = 29 + .endif + .ifc \vr, $vr30 + \var = 30 + .endif + .ifc \vr, $vr31 + \var = 31 + .endif + .iflt \var + .error "Unable to parse register name \r" + .endif + .endm + + .macro parse_xr var xr + \var = -1 + .ifc \xr, $xr0 + \var = 0 + .endif + .ifc \xr, $xr1 + \var = 1 + .endif + .ifc \xr, $xr2 + \var = 2 + .endif + .ifc \xr, $xr3 + \var = 3 + .endif + .ifc \xr, $xr4 + \var = 4 + .endif + .ifc \xr, $xr5 + \var = 5 + .endif + .ifc \xr, $xr6 + \var = 6 + .endif + .ifc \xr, $xr7 + \var = 7 + .endif + .ifc \xr, $xr8 + \var = 8 + .endif + .ifc \xr, $xr9 + \var = 9 + .endif + .ifc \xr, $xr10 + \var = 10 + .endif + .ifc \xr, $xr11 + \var = 11 + .endif + .ifc \xr, $xr12 + \var = 12 + .endif + .ifc \xr, $xr13 + \var = 13 + .endif + .ifc \xr, $xr14 + \var = 14 + .endif + .ifc \xr, $xr15 + \var = 15 + .endif + .ifc \xr, $xr16 + \var = 16 + .endif + .ifc \xr, $xr17 + \var = 17 + .endif + .ifc \xr, $xr18 + \var = 18 + .endif + .ifc \xr, $xr19 + \var = 19 + .endif + .ifc \xr, $xr20 + \var = 20 + .endif + .ifc \xr, $xr21 + \var = 21 + .endif + .ifc \xr, $xr22 + \var = 22 + .endif + .ifc \xr, $xr23 + \var = 23 + .endif + .ifc \xr, $xr24 + \var = 24 + .endif + .ifc \xr, $xr25 + \var = 25 + .endif + .ifc \xr, $xr26 + \var = 26 + .endif + .ifc \xr, $xr27 + \var = 27 + .endif + .ifc \xr, $xr28 + \var = 28 + .endif + .ifc \xr, $xr29 + \var = 29 + .endif + .ifc \xr, $xr30 + \var = 30 + .endif + .ifc \xr, $xr31 + \var = 31 + .endif + .iflt \var + .error "Unable to parse register name \r" + .endif + .endm + .macro cpu_save_nonscratch thread stptr.d s0, \thread, THREAD_REG23 stptr.d s1, \thread, THREAD_REG24 @@ -270,6 +476,424 @@ fld.d $f31, \tmp, THREAD_FPR31 - THREAD_FPR0 .endm + .macro lsx_save_data thread tmp + parse_r __tmp, \tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + /* vst opcode is 0xb1 */ + .word (0xb1 << 22 | ((THREAD_FPR0-THREAD_FPR0) << 10) | __tmp << 5 | 0) + .word (0xb1 << 22 | ((THREAD_FPR1-THREAD_FPR0) << 10) | __tmp << 5 | 1) + .word (0xb1 << 22 | ((THREAD_FPR2-THREAD_FPR0) << 10) | __tmp << 5 | 2) + .word (0xb1 << 22 | ((THREAD_FPR3-THREAD_FPR0) << 10) | __tmp << 5 | 3) + .word (0xb1 << 22 | ((THREAD_FPR4-THREAD_FPR0) << 10) | __tmp << 5 | 4) + .word (0xb1 << 22 | ((THREAD_FPR5-THREAD_FPR0) << 10) | __tmp << 5 | 5) + .word (0xb1 << 22 | ((THREAD_FPR6-THREAD_FPR0) << 10) | __tmp << 5 | 6) + .word (0xb1 << 22 | ((THREAD_FPR7-THREAD_FPR0) << 10) | __tmp << 5 | 7) + .word (0xb1 << 22 | ((THREAD_FPR8-THREAD_FPR0) << 10) | __tmp << 5 | 8) + .word (0xb1 << 22 | ((THREAD_FPR9-THREAD_FPR0) << 10) | __tmp << 5 | 9) + .word (0xb1 << 22 | ((THREAD_FPR10-THREAD_FPR0) << 10) | __tmp << 5 | 10) + .word (0xb1 << 22 | ((THREAD_FPR11-THREAD_FPR0) << 10) | __tmp << 5 | 11) + .word (0xb1 << 22 | ((THREAD_FPR12-THREAD_FPR0) << 10) | __tmp << 5 | 12) + .word (0xb1 << 22 | ((THREAD_FPR13-THREAD_FPR0) << 10) | __tmp << 5 | 13) + .word (0xb1 << 22 | ((THREAD_FPR14-THREAD_FPR0) << 10) | __tmp << 5 | 14) + .word (0xb1 << 22 | ((THREAD_FPR15-THREAD_FPR0) << 10) | __tmp << 5 | 15) + .word (0xb1 << 22 | ((THREAD_FPR16-THREAD_FPR0) << 10) | __tmp << 5 | 16) + .word (0xb1 << 22 | ((THREAD_FPR17-THREAD_FPR0) << 10) | __tmp << 5 | 17) + .word (0xb1 << 22 | ((THREAD_FPR18-THREAD_FPR0) << 10) | __tmp << 5 | 18) + .word (0xb1 << 22 | ((THREAD_FPR19-THREAD_FPR0) << 10) | __tmp << 5 | 19) + .word (0xb1 << 22 | ((THREAD_FPR20-THREAD_FPR0) << 10) | __tmp << 5 | 20) + .word (0xb1 << 22 | ((THREAD_FPR21-THREAD_FPR0) << 10) | __tmp << 5 | 21) + .word (0xb1 << 22 | ((THREAD_FPR22-THREAD_FPR0) << 10) | __tmp << 5 | 22) + .word (0xb1 << 22 | ((THREAD_FPR23-THREAD_FPR0) << 10) | __tmp << 5 | 23) + .word (0xb1 << 22 | ((THREAD_FPR24-THREAD_FPR0) << 10) | __tmp << 5 | 24) + .word (0xb1 << 22 | ((THREAD_FPR25-THREAD_FPR0) << 10) | __tmp << 5 | 25) + .word (0xb1 << 22 | ((THREAD_FPR26-THREAD_FPR0) << 10) | __tmp << 5 | 26) + .word (0xb1 << 22 | ((THREAD_FPR27-THREAD_FPR0) << 10) | __tmp << 5 | 27) + .word (0xb1 << 22 | ((THREAD_FPR28-THREAD_FPR0) << 10) | __tmp << 5 | 28) + .word (0xb1 << 22 | ((THREAD_FPR29-THREAD_FPR0) << 10) | __tmp << 5 | 29) + .word (0xb1 << 22 | ((THREAD_FPR30-THREAD_FPR0) << 10) | __tmp << 5 | 30) + .word (0xb1 << 22 | ((THREAD_FPR31-THREAD_FPR0) << 10) | __tmp << 5 | 31) + .endm + + .macro lsx_restore_data thread tmp + parse_r __tmp, \tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + /* vld opcode is 0xb0 */ + .word (0xb0 << 22 | ((THREAD_FPR0-THREAD_FPR0) << 10) | __tmp << 5 | 0) + .word (0xb0 << 22 | ((THREAD_FPR1-THREAD_FPR0) << 10) | __tmp << 5 | 1) + .word (0xb0 << 22 | ((THREAD_FPR2-THREAD_FPR0) << 10) | __tmp << 5 | 2) + .word (0xb0 << 22 | ((THREAD_FPR3-THREAD_FPR0) << 10) | __tmp << 5 | 3) + .word (0xb0 << 22 | ((THREAD_FPR4-THREAD_FPR0) << 10) | __tmp << 5 | 4) + .word (0xb0 << 22 | ((THREAD_FPR5-THREAD_FPR0) << 10) | __tmp << 5 | 5) + .word (0xb0 << 22 | ((THREAD_FPR6-THREAD_FPR0) << 10) | __tmp << 5 | 6) + .word (0xb0 << 22 | ((THREAD_FPR7-THREAD_FPR0) << 10) | __tmp << 5 | 7) + .word (0xb0 << 22 | ((THREAD_FPR8-THREAD_FPR0) << 10) | __tmp << 5 | 8) + .word (0xb0 << 22 | ((THREAD_FPR9-THREAD_FPR0) << 10) | __tmp << 5 | 9) + .word (0xb0 << 22 | ((THREAD_FPR10-THREAD_FPR0) << 10) | __tmp << 5 | 10) + .word (0xb0 << 22 | ((THREAD_FPR11-THREAD_FPR0) << 10) | __tmp << 5 | 11) + .word (0xb0 << 22 | ((THREAD_FPR12-THREAD_FPR0) << 10) | __tmp << 5 | 12) + .word (0xb0 << 22 | ((THREAD_FPR13-THREAD_FPR0) << 10) | __tmp << 5 | 13) + .word (0xb0 << 22 | ((THREAD_FPR14-THREAD_FPR0) << 10) | __tmp << 5 | 14) + .word (0xb0 << 22 | ((THREAD_FPR15-THREAD_FPR0) << 10) | __tmp << 5 | 15) + .word (0xb0 << 22 | ((THREAD_FPR16-THREAD_FPR0) << 10) | __tmp << 5 | 16) + .word (0xb0 << 22 | ((THREAD_FPR17-THREAD_FPR0) << 10) | __tmp << 5 | 17) + .word (0xb0 << 22 | ((THREAD_FPR18-THREAD_FPR0) << 10) | __tmp << 5 | 18) + .word (0xb0 << 22 | ((THREAD_FPR19-THREAD_FPR0) << 10) | __tmp << 5 | 19) + .word (0xb0 << 22 | ((THREAD_FPR20-THREAD_FPR0) << 10) | __tmp << 5 | 20) + .word (0xb0 << 22 | ((THREAD_FPR21-THREAD_FPR0) << 10) | __tmp << 5 | 21) + .word (0xb0 << 22 | ((THREAD_FPR22-THREAD_FPR0) << 10) | __tmp << 5 | 22) + .word (0xb0 << 22 | ((THREAD_FPR23-THREAD_FPR0) << 10) | __tmp << 5 | 23) + .word (0xb0 << 22 | ((THREAD_FPR24-THREAD_FPR0) << 10) | __tmp << 5 | 24) + .word (0xb0 << 22 | ((THREAD_FPR25-THREAD_FPR0) << 10) | __tmp << 5 | 25) + .word (0xb0 << 22 | ((THREAD_FPR26-THREAD_FPR0) << 10) | __tmp << 5 | 26) + .word (0xb0 << 22 | ((THREAD_FPR27-THREAD_FPR0) << 10) | __tmp << 5 | 27) + .word (0xb0 << 22 | ((THREAD_FPR28-THREAD_FPR0) << 10) | __tmp << 5 | 28) + .word (0xb0 << 22 | ((THREAD_FPR29-THREAD_FPR0) << 10) | __tmp << 5 | 29) + .word (0xb0 << 22 | ((THREAD_FPR30-THREAD_FPR0) << 10) | __tmp << 5 | 30) + .word (0xb0 << 22 | ((THREAD_FPR31-THREAD_FPR0) << 10) | __tmp << 5 | 31) + .endm + + .macro lsx_save_all thread tmp0 tmp1 + fpu_save_cc \thread, \tmp0, \tmp1 + fpu_save_csr \thread, \tmp0 + lsx_save_data \thread, \tmp0 + .endm + + .macro lsx_restore_all thread tmp0 tmp1 + lsx_restore_data \thread, \tmp0 + fpu_restore_cc \thread, \tmp0, \tmp1 + fpu_restore_csr \thread, \tmp0 + .endm + + .macro lsx_save_upper vd base tmp off + parse_vr __vd, \vd + parse_r __tmp, \tmp + /* vpickve2gr opcode is 0xe5dfe */ + .word (0xe5dfe << 11 | 1 << 10 | __vd << 5 | __tmp) + st.d \tmp, \base, (\off+8) + .endm + + .macro lsx_save_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \base, \thread, \tmp + lsx_save_upper $vr0, \base, \tmp, (THREAD_FPR0-THREAD_FPR0) + lsx_save_upper $vr1, \base, \tmp, (THREAD_FPR1-THREAD_FPR0) + lsx_save_upper $vr2, \base, \tmp, (THREAD_FPR2-THREAD_FPR0) + lsx_save_upper $vr3, \base, \tmp, (THREAD_FPR3-THREAD_FPR0) + lsx_save_upper $vr4, \base, \tmp, (THREAD_FPR4-THREAD_FPR0) + lsx_save_upper $vr5, \base, \tmp, (THREAD_FPR5-THREAD_FPR0) + lsx_save_upper $vr6, \base, \tmp, (THREAD_FPR6-THREAD_FPR0) + lsx_save_upper $vr7, \base, \tmp, (THREAD_FPR7-THREAD_FPR0) + lsx_save_upper $vr8, \base, \tmp, (THREAD_FPR8-THREAD_FPR0) + lsx_save_upper $vr9, \base, \tmp, (THREAD_FPR9-THREAD_FPR0) + lsx_save_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0) + lsx_save_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0) + lsx_save_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0) + lsx_save_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0) + lsx_save_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0) + lsx_save_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0) + lsx_save_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0) + lsx_save_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0) + lsx_save_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0) + lsx_save_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0) + lsx_save_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0) + lsx_save_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0) + lsx_save_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0) + lsx_save_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0) + lsx_save_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0) + lsx_save_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0) + lsx_save_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0) + lsx_save_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0) + lsx_save_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0) + lsx_save_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0) + lsx_save_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0) + lsx_save_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0) + .endm + + .macro lsx_restore_upper vd base tmp off + parse_vr __vd, \vd + parse_r __tmp, \tmp + ld.d \tmp, \base, (\off+8) + /* vinsgr2vr opcode is 0xe5d7e */ + .word (0xe5d7e << 11 | 1 << 10 | __tmp << 5 | __vd) + .endm + + .macro lsx_restore_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \base, \thread, \tmp + lsx_restore_upper $vr0, \base, \tmp, (THREAD_FPR0-THREAD_FPR0) + lsx_restore_upper $vr1, \base, \tmp, (THREAD_FPR1-THREAD_FPR0) + lsx_restore_upper $vr2, \base, \tmp, (THREAD_FPR2-THREAD_FPR0) + lsx_restore_upper $vr3, \base, \tmp, (THREAD_FPR3-THREAD_FPR0) + lsx_restore_upper $vr4, \base, \tmp, (THREAD_FPR4-THREAD_FPR0) + lsx_restore_upper $vr5, \base, \tmp, (THREAD_FPR5-THREAD_FPR0) + lsx_restore_upper $vr6, \base, \tmp, (THREAD_FPR6-THREAD_FPR0) + lsx_restore_upper $vr7, \base, \tmp, (THREAD_FPR7-THREAD_FPR0) + lsx_restore_upper $vr8, \base, \tmp, (THREAD_FPR8-THREAD_FPR0) + lsx_restore_upper $vr9, \base, \tmp, (THREAD_FPR9-THREAD_FPR0) + lsx_restore_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0) + lsx_restore_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0) + lsx_restore_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0) + lsx_restore_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0) + lsx_restore_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0) + lsx_restore_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0) + lsx_restore_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0) + lsx_restore_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0) + lsx_restore_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0) + lsx_restore_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0) + lsx_restore_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0) + lsx_restore_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0) + lsx_restore_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0) + lsx_restore_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0) + lsx_restore_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0) + lsx_restore_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0) + lsx_restore_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0) + lsx_restore_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0) + lsx_restore_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0) + lsx_restore_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0) + lsx_restore_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0) + lsx_restore_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0) + .endm + + .macro lsx_init_upper vd tmp + parse_vr __vd, \vd + parse_r __tmp, \tmp + /* vinsgr2vr opcode is 0xe5d7e */ + .word (0xe5d7e << 11 | 1 << 10 | __tmp << 5 | __vd) + .endm + + .macro lsx_init_all_upper tmp + not \tmp, zero + lsx_init_upper $vr0 \tmp + lsx_init_upper $vr1 \tmp + lsx_init_upper $vr2 \tmp + lsx_init_upper $vr3 \tmp + lsx_init_upper $vr4 \tmp + lsx_init_upper $vr5 \tmp + lsx_init_upper $vr6 \tmp + lsx_init_upper $vr7 \tmp + lsx_init_upper $vr8 \tmp + lsx_init_upper $vr9 \tmp + lsx_init_upper $vr10 \tmp + lsx_init_upper $vr11 \tmp + lsx_init_upper $vr12 \tmp + lsx_init_upper $vr13 \tmp + lsx_init_upper $vr14 \tmp + lsx_init_upper $vr15 \tmp + lsx_init_upper $vr16 \tmp + lsx_init_upper $vr17 \tmp + lsx_init_upper $vr18 \tmp + lsx_init_upper $vr19 \tmp + lsx_init_upper $vr20 \tmp + lsx_init_upper $vr21 \tmp + lsx_init_upper $vr22 \tmp + lsx_init_upper $vr23 \tmp + lsx_init_upper $vr24 \tmp + lsx_init_upper $vr25 \tmp + lsx_init_upper $vr26 \tmp + lsx_init_upper $vr27 \tmp + lsx_init_upper $vr28 \tmp + lsx_init_upper $vr29 \tmp + lsx_init_upper $vr30 \tmp + lsx_init_upper $vr31 \tmp + .endm + + .macro lasx_save_data thread tmp + parse_r __tmp, \tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + /* xvst opcode is 0xb3 */ + .word (0xb3 << 22 | ((THREAD_FPR0-THREAD_FPR0) << 10) | __tmp << 5 | 0) + .word (0xb3 << 22 | ((THREAD_FPR1-THREAD_FPR0) << 10) | __tmp << 5 | 1) + .word (0xb3 << 22 | ((THREAD_FPR2-THREAD_FPR0) << 10) | __tmp << 5 | 2) + .word (0xb3 << 22 | ((THREAD_FPR3-THREAD_FPR0) << 10) | __tmp << 5 | 3) + .word (0xb3 << 22 | ((THREAD_FPR4-THREAD_FPR0) << 10) | __tmp << 5 | 4) + .word (0xb3 << 22 | ((THREAD_FPR5-THREAD_FPR0) << 10) | __tmp << 5 | 5) + .word (0xb3 << 22 | ((THREAD_FPR6-THREAD_FPR0) << 10) | __tmp << 5 | 6) + .word (0xb3 << 22 | ((THREAD_FPR7-THREAD_FPR0) << 10) | __tmp << 5 | 7) + .word (0xb3 << 22 | ((THREAD_FPR8-THREAD_FPR0) << 10) | __tmp << 5 | 8) + .word (0xb3 << 22 | ((THREAD_FPR9-THREAD_FPR0) << 10) | __tmp << 5 | 9) + .word (0xb3 << 22 | ((THREAD_FPR10-THREAD_FPR0) << 10) | __tmp << 5 | 10) + .word (0xb3 << 22 | ((THREAD_FPR11-THREAD_FPR0) << 10) | __tmp << 5 | 11) + .word (0xb3 << 22 | ((THREAD_FPR12-THREAD_FPR0) << 10) | __tmp << 5 | 12) + .word (0xb3 << 22 | ((THREAD_FPR13-THREAD_FPR0) << 10) | __tmp << 5 | 13) + .word (0xb3 << 22 | ((THREAD_FPR14-THREAD_FPR0) << 10) | __tmp << 5 | 14) + .word (0xb3 << 22 | ((THREAD_FPR15-THREAD_FPR0) << 10) | __tmp << 5 | 15) + .word (0xb3 << 22 | ((THREAD_FPR16-THREAD_FPR0) << 10) | __tmp << 5 | 16) + .word (0xb3 << 22 | ((THREAD_FPR17-THREAD_FPR0) << 10) | __tmp << 5 | 17) + .word (0xb3 << 22 | ((THREAD_FPR18-THREAD_FPR0) << 10) | __tmp << 5 | 18) + .word (0xb3 << 22 | ((THREAD_FPR19-THREAD_FPR0) << 10) | __tmp << 5 | 19) + .word (0xb3 << 22 | ((THREAD_FPR20-THREAD_FPR0) << 10) | __tmp << 5 | 20) + .word (0xb3 << 22 | ((THREAD_FPR21-THREAD_FPR0) << 10) | __tmp << 5 | 21) + .word (0xb3 << 22 | ((THREAD_FPR22-THREAD_FPR0) << 10) | __tmp << 5 | 22) + .word (0xb3 << 22 | ((THREAD_FPR23-THREAD_FPR0) << 10) | __tmp << 5 | 23) + .word (0xb3 << 22 | ((THREAD_FPR24-THREAD_FPR0) << 10) | __tmp << 5 | 24) + .word (0xb3 << 22 | ((THREAD_FPR25-THREAD_FPR0) << 10) | __tmp << 5 | 25) + .word (0xb3 << 22 | ((THREAD_FPR26-THREAD_FPR0) << 10) | __tmp << 5 | 26) + .word (0xb3 << 22 | ((THREAD_FPR27-THREAD_FPR0) << 10) | __tmp << 5 | 27) + .word (0xb3 << 22 | ((THREAD_FPR28-THREAD_FPR0) << 10) | __tmp << 5 | 28) + .word (0xb3 << 22 | ((THREAD_FPR29-THREAD_FPR0) << 10) | __tmp << 5 | 29) + .word (0xb3 << 22 | ((THREAD_FPR30-THREAD_FPR0) << 10) | __tmp << 5 | 30) + .word (0xb3 << 22 | ((THREAD_FPR31-THREAD_FPR0) << 10) | __tmp << 5 | 31) + .endm + + .macro lasx_restore_data thread tmp + parse_r __tmp, \tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + /* xvld opcode is 0xb2 */ + .word (0xb2 << 22 | ((THREAD_FPR0-THREAD_FPR0) << 10) | __tmp << 5 | 0) + .word (0xb2 << 22 | ((THREAD_FPR1-THREAD_FPR0) << 10) | __tmp << 5 | 1) + .word (0xb2 << 22 | ((THREAD_FPR2-THREAD_FPR0) << 10) | __tmp << 5 | 2) + .word (0xb2 << 22 | ((THREAD_FPR3-THREAD_FPR0) << 10) | __tmp << 5 | 3) + .word (0xb2 << 22 | ((THREAD_FPR4-THREAD_FPR0) << 10) | __tmp << 5 | 4) + .word (0xb2 << 22 | ((THREAD_FPR5-THREAD_FPR0) << 10) | __tmp << 5 | 5) + .word (0xb2 << 22 | ((THREAD_FPR6-THREAD_FPR0) << 10) | __tmp << 5 | 6) + .word (0xb2 << 22 | ((THREAD_FPR7-THREAD_FPR0) << 10) | __tmp << 5 | 7) + .word (0xb2 << 22 | ((THREAD_FPR8-THREAD_FPR0) << 10) | __tmp << 5 | 8) + .word (0xb2 << 22 | ((THREAD_FPR9-THREAD_FPR0) << 10) | __tmp << 5 | 9) + .word (0xb2 << 22 | ((THREAD_FPR10-THREAD_FPR0) << 10) | __tmp << 5 | 10) + .word (0xb2 << 22 | ((THREAD_FPR11-THREAD_FPR0) << 10) | __tmp << 5 | 11) + .word (0xb2 << 22 | ((THREAD_FPR12-THREAD_FPR0) << 10) | __tmp << 5 | 12) + .word (0xb2 << 22 | ((THREAD_FPR13-THREAD_FPR0) << 10) | __tmp << 5 | 13) + .word (0xb2 << 22 | ((THREAD_FPR14-THREAD_FPR0) << 10) | __tmp << 5 | 14) + .word (0xb2 << 22 | ((THREAD_FPR15-THREAD_FPR0) << 10) | __tmp << 5 | 15) + .word (0xb2 << 22 | ((THREAD_FPR16-THREAD_FPR0) << 10) | __tmp << 5 | 16) + .word (0xb2 << 22 | ((THREAD_FPR17-THREAD_FPR0) << 10) | __tmp << 5 | 17) + .word (0xb2 << 22 | ((THREAD_FPR18-THREAD_FPR0) << 10) | __tmp << 5 | 18) + .word (0xb2 << 22 | ((THREAD_FPR19-THREAD_FPR0) << 10) | __tmp << 5 | 19) + .word (0xb2 << 22 | ((THREAD_FPR20-THREAD_FPR0) << 10) | __tmp << 5 | 20) + .word (0xb2 << 22 | ((THREAD_FPR21-THREAD_FPR0) << 10) | __tmp << 5 | 21) + .word (0xb2 << 22 | ((THREAD_FPR22-THREAD_FPR0) << 10) | __tmp << 5 | 22) + .word (0xb2 << 22 | ((THREAD_FPR23-THREAD_FPR0) << 10) | __tmp << 5 | 23) + .word (0xb2 << 22 | ((THREAD_FPR24-THREAD_FPR0) << 10) | __tmp << 5 | 24) + .word (0xb2 << 22 | ((THREAD_FPR25-THREAD_FPR0) << 10) | __tmp << 5 | 25) + .word (0xb2 << 22 | ((THREAD_FPR26-THREAD_FPR0) << 10) | __tmp << 5 | 26) + .word (0xb2 << 22 | ((THREAD_FPR27-THREAD_FPR0) << 10) | __tmp << 5 | 27) + .word (0xb2 << 22 | ((THREAD_FPR28-THREAD_FPR0) << 10) | __tmp << 5 | 28) + .word (0xb2 << 22 | ((THREAD_FPR29-THREAD_FPR0) << 10) | __tmp << 5 | 29) + .word (0xb2 << 22 | ((THREAD_FPR30-THREAD_FPR0) << 10) | __tmp << 5 | 30) + .word (0xb2 << 22 | ((THREAD_FPR31-THREAD_FPR0) << 10) | __tmp << 5 | 31) + .endm + + .macro lasx_save_all thread tmp0 tmp1 + fpu_save_cc \thread, \tmp0, \tmp1 + fpu_save_csr \thread, \tmp0 + lasx_save_data \thread, \tmp0 + .endm + + .macro lasx_restore_all thread tmp0 tmp1 + lasx_restore_data \thread, \tmp0 + fpu_restore_cc \thread, \tmp0, \tmp1 + fpu_restore_csr \thread, \tmp0 + .endm + + .macro lasx_save_upper xd base tmp off + /* Nothing */ + .endm + + .macro lasx_save_all_upper thread base tmp + /* Nothing */ + .endm + + .macro lasx_restore_upper xd base tmp off + parse_xr __xd, \xd + parse_xr __xt, \tmp + parse_r __base, \base + /* vld opcode is 0xb0 */ + .word (0xb0 << 22 | (\off+16) << 10 | __base << 5 | __xt) + /* xvpermi.q opcode is 0x1dfb */ + .word (0x1dfb << 18 | 0x2 << 10 | __xt << 5 | __xd) + .endm + + .macro lasx_restore_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \base, \thread, \tmp + /* Save $vr31, xvpickve2gr opcode is 0x76efe */ + .word (0x76efe << 12 | 0 << 10 | 31 << 5 | 0x11) + .word (0x76efe << 12 | 1 << 10 | 31 << 5 | 0x12) + lasx_restore_upper $xr0, \base, $xr31, (THREAD_FPR0-THREAD_FPR0) + lasx_restore_upper $xr1, \base, $xr31, (THREAD_FPR1-THREAD_FPR0) + lasx_restore_upper $xr2, \base, $xr31, (THREAD_FPR2-THREAD_FPR0) + lasx_restore_upper $xr3, \base, $xr31, (THREAD_FPR3-THREAD_FPR0) + lasx_restore_upper $xr4, \base, $xr31, (THREAD_FPR4-THREAD_FPR0) + lasx_restore_upper $xr5, \base, $xr31, (THREAD_FPR5-THREAD_FPR0) + lasx_restore_upper $xr6, \base, $xr31, (THREAD_FPR6-THREAD_FPR0) + lasx_restore_upper $xr7, \base, $xr31, (THREAD_FPR7-THREAD_FPR0) + lasx_restore_upper $xr8, \base, $xr31, (THREAD_FPR8-THREAD_FPR0) + lasx_restore_upper $xr9, \base, $xr31, (THREAD_FPR9-THREAD_FPR0) + lasx_restore_upper $xr10, \base, $xr31, (THREAD_FPR10-THREAD_FPR0) + lasx_restore_upper $xr11, \base, $xr31, (THREAD_FPR11-THREAD_FPR0) + lasx_restore_upper $xr12, \base, $xr31, (THREAD_FPR12-THREAD_FPR0) + lasx_restore_upper $xr13, \base, $xr31, (THREAD_FPR13-THREAD_FPR0) + lasx_restore_upper $xr14, \base, $xr31, (THREAD_FPR14-THREAD_FPR0) + lasx_restore_upper $xr15, \base, $xr31, (THREAD_FPR15-THREAD_FPR0) + lasx_restore_upper $xr16, \base, $xr31, (THREAD_FPR16-THREAD_FPR0) + lasx_restore_upper $xr17, \base, $xr31, (THREAD_FPR17-THREAD_FPR0) + lasx_restore_upper $xr18, \base, $xr31, (THREAD_FPR18-THREAD_FPR0) + lasx_restore_upper $xr19, \base, $xr31, (THREAD_FPR19-THREAD_FPR0) + lasx_restore_upper $xr20, \base, $xr31, (THREAD_FPR20-THREAD_FPR0) + lasx_restore_upper $xr21, \base, $xr31, (THREAD_FPR21-THREAD_FPR0) + lasx_restore_upper $xr22, \base, $xr31, (THREAD_FPR22-THREAD_FPR0) + lasx_restore_upper $xr23, \base, $xr31, (THREAD_FPR23-THREAD_FPR0) + lasx_restore_upper $xr24, \base, $xr31, (THREAD_FPR24-THREAD_FPR0) + lasx_restore_upper $xr25, \base, $xr31, (THREAD_FPR25-THREAD_FPR0) + lasx_restore_upper $xr26, \base, $xr31, (THREAD_FPR26-THREAD_FPR0) + lasx_restore_upper $xr27, \base, $xr31, (THREAD_FPR27-THREAD_FPR0) + lasx_restore_upper $xr28, \base, $xr31, (THREAD_FPR28-THREAD_FPR0) + lasx_restore_upper $xr29, \base, $xr31, (THREAD_FPR29-THREAD_FPR0) + lasx_restore_upper $xr30, \base, $xr31, (THREAD_FPR30-THREAD_FPR0) + lasx_restore_upper $xr31, \base, $xr31, (THREAD_FPR31-THREAD_FPR0) + /* Restore $vr31, xvinsgr2vr opcode is 0x76ebe */ + .word (0x76ebe << 12 | 0 << 10 | 0x11 << 5 | 31) + .word (0x76ebe << 12 | 1 << 10 | 0x12 << 5 | 31) + .endm + + .macro lasx_init_upper xd tmp + parse_xr __xd, \xd + parse_r __tmp, \tmp + /* xvinsgr2vr opcode is 0x76ebe */ + .word (0x76ebe << 12 | 2 << 10 | __tmp << 5 | __xd) + .word (0x76ebe << 12 | 3 << 10 | __tmp << 5 | __xd) + .endm + + .macro lasx_init_all_upper tmp + not \tmp, zero + lasx_init_upper $xr0 \tmp + lasx_init_upper $xr1 \tmp + lasx_init_upper $xr2 \tmp + lasx_init_upper $xr3 \tmp + lasx_init_upper $xr4 \tmp + lasx_init_upper $xr5 \tmp + lasx_init_upper $xr6 \tmp + lasx_init_upper $xr7 \tmp + lasx_init_upper $xr8 \tmp + lasx_init_upper $xr9 \tmp + lasx_init_upper $xr10 \tmp + lasx_init_upper $xr11 \tmp + lasx_init_upper $xr12 \tmp + lasx_init_upper $xr13 \tmp + lasx_init_upper $xr14 \tmp + lasx_init_upper $xr15 \tmp + lasx_init_upper $xr16 \tmp + lasx_init_upper $xr17 \tmp + lasx_init_upper $xr18 \tmp + lasx_init_upper $xr19 \tmp + lasx_init_upper $xr20 \tmp + lasx_init_upper $xr21 \tmp + lasx_init_upper $xr22 \tmp + lasx_init_upper $xr23 \tmp + lasx_init_upper $xr24 \tmp + lasx_init_upper $xr25 \tmp + lasx_init_upper $xr26 \tmp + lasx_init_upper $xr27 \tmp + lasx_init_upper $xr28 \tmp + lasx_init_upper $xr29 \tmp + lasx_init_upper $xr30 \tmp + lasx_init_upper $xr31 \tmp + .endm + .macro not dst src nor \dst, \src, zero .endm diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h index 358b254d9c1d..eaa8030f2d9c 100644 --- a/arch/loongarch/include/asm/fpu.h +++ b/arch/loongarch/include/asm/fpu.h @@ -25,6 +25,26 @@ extern void _init_fpu(unsigned int); extern void _save_fp(struct loongarch_fpu *); extern void _restore_fp(struct loongarch_fpu *); +extern void _save_lsx(struct loongarch_fpu *fpu); +extern void _restore_lsx(struct loongarch_fpu *fpu); +extern void _init_lsx_upper(void); +extern void _restore_lsx_upper(struct loongarch_fpu *fpu); + +extern void _save_lasx(struct loongarch_fpu *fpu); +extern void _restore_lasx(struct loongarch_fpu *fpu); +extern void _init_lasx_upper(void); +extern void _restore_lasx_upper(struct loongarch_fpu *fpu); + +static inline void enable_lsx(void); +static inline void disable_lsx(void); +static inline void save_lsx(struct task_struct *t); +static inline void restore_lsx(struct task_struct *t); + +static inline void enable_lasx(void); +static inline void disable_lasx(void); +static inline void save_lasx(struct task_struct *t); +static inline void restore_lasx(struct task_struct *t); + /* * Mask the FCSR Cause bits according to the Enable bits, observing * that Unimplemented is always enabled. @@ -41,6 +61,29 @@ static inline int is_fp_enabled(void) 1 : 0; } +static inline int is_lsx_enabled(void) +{ + if (!cpu_has_lsx) + return 0; + + return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LSXEN) ? + 1 : 0; +} + +static inline int is_lasx_enabled(void) +{ + if (!cpu_has_lasx) + return 0; + + return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LASXEN) ? + 1 : 0; +} + +static inline int is_simd_enabled(void) +{ + return is_lsx_enabled() | is_lasx_enabled(); +} + #define enable_fpu() set_csr_euen(CSR_EUEN_FPEN) #define disable_fpu() clear_csr_euen(CSR_EUEN_FPEN) @@ -78,9 +121,22 @@ static inline void own_fpu(int restore) static inline void lose_fpu_inatomic(int save, struct task_struct *tsk) { if (is_fpu_owner()) { - if (save) - _save_fp(&tsk->thread.fpu); - disable_fpu(); + if (is_simd_enabled()) { + if (save) { + if (is_lasx_enabled()) + save_lasx(tsk); + else + save_lsx(tsk); + } + disable_fpu(); + disable_lsx(); + disable_lasx(); + clear_tsk_thread_flag(tsk, TIF_USEDSIMD); + } else { + if (save) + _save_fp(&tsk->thread.fpu); + disable_fpu(); + } clear_tsk_thread_flag(tsk, TIF_USEDFPU); } KSTK_EUEN(tsk) &= ~(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN); @@ -126,4 +182,140 @@ static inline union fpureg *get_fpu_regs(struct task_struct *tsk) return tsk->thread.fpu.fpr; } +enum { + CTX_LSX = 1, + CTX_LASX = 2, +}; + +static inline int is_simd_owner(void) +{ + return test_thread_flag(TIF_USEDSIMD); +} + +#ifdef CONFIG_CPU_HAS_LSX + +static inline void enable_lsx(void) +{ + if (cpu_has_lsx) + csr_xchg32(CSR_EUEN_LSXEN, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN); +} + +static inline void disable_lsx(void) +{ + if (cpu_has_lsx) + csr_xchg32(0, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN); +} + +static inline void save_lsx(struct task_struct *t) +{ + if (cpu_has_lsx) + _save_lsx(&t->thread.fpu); +} + +static inline void restore_lsx(struct task_struct *t) +{ + if (cpu_has_lsx) + _restore_lsx(&t->thread.fpu); +} + +static inline void init_lsx_upper(void) +{ + /* + * Check cpu_has_lsx only if it's a constant. This will allow the + * compiler to optimise out code for CPUs without LSX without adding + * an extra redundant check for CPUs with LSX. + */ + if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx) + return; + + _init_lsx_upper(); +} + +static inline void restore_lsx_upper(struct task_struct *t) +{ + if (cpu_has_lsx) + _restore_lsx_upper(&t->thread.fpu); +} + +#else +static inline void enable_lsx(void) {} +static inline void disable_lsx(void) {} +static inline void save_lsx(struct task_struct *t) {} +static inline void restore_lsx(struct task_struct *t) {} +static inline void init_lsx_upper(void) {} +static inline void restore_lsx_upper(struct task_struct *t) {} +#endif + +#ifdef CONFIG_CPU_HAS_LASX + +static inline void enable_lasx(void) +{ + + if (cpu_has_lasx) + csr_xchg32(CSR_EUEN_LASXEN, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN); +} + +static inline void disable_lasx(void) +{ + if (cpu_has_lasx) + csr_xchg32(0, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN); +} + +static inline void save_lasx(struct task_struct *t) +{ + if (cpu_has_lasx) + _save_lasx(&t->thread.fpu); +} + +static inline void restore_lasx(struct task_struct *t) +{ + if (cpu_has_lasx) + _restore_lasx(&t->thread.fpu); +} + +static inline void init_lasx_upper(void) +{ + if (cpu_has_lasx) + _init_lasx_upper(); +} + +static inline void restore_lasx_upper(struct task_struct *t) +{ + if (cpu_has_lasx) + _restore_lasx_upper(&t->thread.fpu); +} + +#else +static inline void enable_lasx(void) {} +static inline void disable_lasx(void) {} +static inline void save_lasx(struct task_struct *t) {} +static inline void restore_lasx(struct task_struct *t) {} +static inline void init_lasx_upper(void) {} +static inline void restore_lasx_upper(struct task_struct *t) {} +#endif + +static inline int thread_lsx_context_live(void) +{ + int ret = 0; + + if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx) + goto out; + + ret = test_thread_flag(TIF_LSX_CTX_LIVE) ? CTX_LSX : 0; +out: + return ret; +} + +static inline int thread_lasx_context_live(void) +{ + int ret = 0; + + if (__builtin_constant_p(cpu_has_lasx) && !cpu_has_lasx) + goto out; + + ret = test_thread_flag(TIF_LASX_CTX_LIVE) ? CTX_LASX : 0; +out: + return ret; +} + #endif /* _ASM_FPU_H */ diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 529ab8f44ec6..3dd31afb2ae6 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -111,6 +111,18 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_FPU; elf_hwcap |= HWCAP_LOONGARCH_FPU; } +#ifdef CONFIG_CPU_HAS_LSX + if (config & CPUCFG2_LSX) { + c->options |= LOONGARCH_CPU_LSX; + elf_hwcap |= HWCAP_LOONGARCH_LSX; + } +#endif +#ifdef CONFIG_CPU_HAS_LASX + if (config & CPUCFG2_LASX) { + c->options |= LOONGARCH_CPU_LASX; + elf_hwcap |= HWCAP_LOONGARCH_LASX; + } +#endif if (config & CPUCFG2_COMPLEX) { c->options |= LOONGARCH_CPU_COMPLEX; elf_hwcap |= HWCAP_LOONGARCH_COMPLEX; diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 576b3370a296..65d245a0f409 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -26,6 +26,32 @@ .previous .endm + .macro EX_V insn, reg, src, offs + parse_v __insn, \insn + parse_v __offs, \offs + parse_r __src, \src + parse_vr __reg, \reg + +.ex\@: + .word __insn << 22 | __offs << 10 | __src << 5 | __reg + .section __ex_table,"a" + PTR .ex\@, fault + .previous + .endm + + .macro EX_XV insn, reg, src, offs + parse_v __insn, \insn + parse_v __offs, \offs + parse_r __src, \src + parse_xr __reg, \reg + +.ex\@: + .word __insn << 22 | __offs << 10 | __src << 5 | __reg + .section __ex_table,"a" + PTR .ex\@, fault + .previous + .endm + .macro sc_save_fp base EX fst.d $f0, \base, (0 * FPU_REG_WIDTH) EX fst.d $f1, \base, (1 * FPU_REG_WIDTH) @@ -146,6 +172,146 @@ movgr2fcsr fcsr0, \tmp0 .endm + .macro sc_save_lsx base + EX_V 0xb1 $vr0, \base, (0 * LSX_REG_WIDTH) + EX_V 0xb1 $vr1, \base, (1 * LSX_REG_WIDTH) + EX_V 0xb1 $vr2, \base, (2 * LSX_REG_WIDTH) + EX_V 0xb1 $vr3, \base, (3 * LSX_REG_WIDTH) + EX_V 0xb1 $vr4, \base, (4 * LSX_REG_WIDTH) + EX_V 0xb1 $vr5, \base, (5 * LSX_REG_WIDTH) + EX_V 0xb1 $vr6, \base, (6 * LSX_REG_WIDTH) + EX_V 0xb1 $vr7, \base, (7 * LSX_REG_WIDTH) + EX_V 0xb1 $vr8, \base, (8 * LSX_REG_WIDTH) + EX_V 0xb1 $vr9, \base, (9 * LSX_REG_WIDTH) + EX_V 0xb1 $vr10, \base, (10 * LSX_REG_WIDTH) + EX_V 0xb1 $vr11, \base, (11 * LSX_REG_WIDTH) + EX_V 0xb1 $vr12, \base, (12 * LSX_REG_WIDTH) + EX_V 0xb1 $vr13, \base, (13 * LSX_REG_WIDTH) + EX_V 0xb1 $vr14, \base, (14 * LSX_REG_WIDTH) + EX_V 0xb1 $vr15, \base, (15 * LSX_REG_WIDTH) + EX_V 0xb1 $vr16, \base, (16 * LSX_REG_WIDTH) + EX_V 0xb1 $vr17, \base, (17 * LSX_REG_WIDTH) + EX_V 0xb1 $vr18, \base, (18 * LSX_REG_WIDTH) + EX_V 0xb1 $vr19, \base, (19 * LSX_REG_WIDTH) + EX_V 0xb1 $vr20, \base, (20 * LSX_REG_WIDTH) + EX_V 0xb1 $vr21, \base, (21 * LSX_REG_WIDTH) + EX_V 0xb1 $vr22, \base, (22 * LSX_REG_WIDTH) + EX_V 0xb1 $vr23, \base, (23 * LSX_REG_WIDTH) + EX_V 0xb1 $vr24, \base, (24 * LSX_REG_WIDTH) + EX_V 0xb1 $vr25, \base, (25 * LSX_REG_WIDTH) + EX_V 0xb1 $vr26, \base, (26 * LSX_REG_WIDTH) + EX_V 0xb1 $vr27, \base, (27 * LSX_REG_WIDTH) + EX_V 0xb1 $vr28, \base, (28 * LSX_REG_WIDTH) + EX_V 0xb1 $vr29, \base, (29 * LSX_REG_WIDTH) + EX_V 0xb1 $vr30, \base, (30 * LSX_REG_WIDTH) + EX_V 0xb1 $vr31, \base, (31 * LSX_REG_WIDTH) + .endm + + .macro sc_restore_lsx base + EX_V 0xb0 $vr0, \base, (0 * LSX_REG_WIDTH) + EX_V 0xb0 $vr1, \base, (1 * LSX_REG_WIDTH) + EX_V 0xb0 $vr2, \base, (2 * LSX_REG_WIDTH) + EX_V 0xb0 $vr3, \base, (3 * LSX_REG_WIDTH) + EX_V 0xb0 $vr4, \base, (4 * LSX_REG_WIDTH) + EX_V 0xb0 $vr5, \base, (5 * LSX_REG_WIDTH) + EX_V 0xb0 $vr6, \base, (6 * LSX_REG_WIDTH) + EX_V 0xb0 $vr7, \base, (7 * LSX_REG_WIDTH) + EX_V 0xb0 $vr8, \base, (8 * LSX_REG_WIDTH) + EX_V 0xb0 $vr9, \base, (9 * LSX_REG_WIDTH) + EX_V 0xb0 $vr10, \base, (10 * LSX_REG_WIDTH) + EX_V 0xb0 $vr11, \base, (11 * LSX_REG_WIDTH) + EX_V 0xb0 $vr12, \base, (12 * LSX_REG_WIDTH) + EX_V 0xb0 $vr13, \base, (13 * LSX_REG_WIDTH) + EX_V 0xb0 $vr14, \base, (14 * LSX_REG_WIDTH) + EX_V 0xb0 $vr15, \base, (15 * LSX_REG_WIDTH) + EX_V 0xb0 $vr16, \base, (16 * LSX_REG_WIDTH) + EX_V 0xb0 $vr17, \base, (17 * LSX_REG_WIDTH) + EX_V 0xb0 $vr18, \base, (18 * LSX_REG_WIDTH) + EX_V 0xb0 $vr19, \base, (19 * LSX_REG_WIDTH) + EX_V 0xb0 $vr20, \base, (20 * LSX_REG_WIDTH) + EX_V 0xb0 $vr21, \base, (21 * LSX_REG_WIDTH) + EX_V 0xb0 $vr22, \base, (22 * LSX_REG_WIDTH) + EX_V 0xb0 $vr23, \base, (23 * LSX_REG_WIDTH) + EX_V 0xb0 $vr24, \base, (24 * LSX_REG_WIDTH) + EX_V 0xb0 $vr25, \base, (25 * LSX_REG_WIDTH) + EX_V 0xb0 $vr26, \base, (26 * LSX_REG_WIDTH) + EX_V 0xb0 $vr27, \base, (27 * LSX_REG_WIDTH) + EX_V 0xb0 $vr28, \base, (28 * LSX_REG_WIDTH) + EX_V 0xb0 $vr29, \base, (29 * LSX_REG_WIDTH) + EX_V 0xb0 $vr30, \base, (30 * LSX_REG_WIDTH) + EX_V 0xb0 $vr31, \base, (31 * LSX_REG_WIDTH) + .endm + + .macro sc_save_lasx base + EX_XV 0xb3 $xr0, \base, (0 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr1, \base, (1 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr2, \base, (2 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr3, \base, (3 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr4, \base, (4 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr5, \base, (5 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr6, \base, (6 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr7, \base, (7 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr8, \base, (8 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr9, \base, (9 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr10, \base, (10 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr11, \base, (11 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr12, \base, (12 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr13, \base, (13 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr14, \base, (14 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr15, \base, (15 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr16, \base, (16 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr17, \base, (17 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr18, \base, (18 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr19, \base, (19 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr20, \base, (20 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr21, \base, (21 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr22, \base, (22 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr23, \base, (23 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr24, \base, (24 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr25, \base, (25 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr26, \base, (26 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr27, \base, (27 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr28, \base, (28 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr29, \base, (29 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr30, \base, (30 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr31, \base, (31 * LASX_REG_WIDTH) + .endm + + .macro sc_restore_lasx base + EX_XV 0xb2 $xr0, \base, (0 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr1, \base, (1 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr2, \base, (2 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr3, \base, (3 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr4, \base, (4 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr5, \base, (5 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr6, \base, (6 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr7, \base, (7 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr8, \base, (8 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr9, \base, (9 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr10, \base, (10 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr11, \base, (11 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr12, \base, (12 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr13, \base, (13 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr14, \base, (14 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr15, \base, (15 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr16, \base, (16 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr17, \base, (17 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr18, \base, (18 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr19, \base, (19 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr20, \base, (20 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr21, \base, (21 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr22, \base, (22 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr23, \base, (23 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr24, \base, (24 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr25, \base, (25 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr26, \base, (26 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr27, \base, (27 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr28, \base, (28 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr29, \base, (29 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr30, \base, (30 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr31, \base, (31 * LASX_REG_WIDTH) + .endm + /* * Save a thread's fp context. */ @@ -167,6 +333,76 @@ SYM_FUNC_START(_restore_fp) jr ra SYM_FUNC_END(_restore_fp) +#ifdef CONFIG_CPU_HAS_LSX + +/* + * Save a thread's LSX vector context. + */ +SYM_FUNC_START(_save_lsx) + lsx_save_all a0 t1 t2 + jirl zero, ra, 0 +SYM_FUNC_END(_save_lsx) +EXPORT_SYMBOL(_save_lsx) + +/* + * Restore a thread's LSX vector context. + */ +SYM_FUNC_START(_restore_lsx) + lsx_restore_all a0 t1 t2 + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lsx) + +SYM_FUNC_START(_save_lsx_upper) + lsx_save_all_upper a0 t0 t1 + jirl zero, ra, 0 +SYM_FUNC_END(_save_lsx_upper) + +SYM_FUNC_START(_restore_lsx_upper) + lsx_restore_all_upper a0 t0 t1 + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lsx_upper) + +SYM_FUNC_START(_init_lsx_upper) + lsx_init_all_upper t1 + jirl zero, ra, 0 +SYM_FUNC_END(_init_lsx_upper) +#endif + +#ifdef CONFIG_CPU_HAS_LASX + +/* + * Save a thread's LASX vector context. + */ +SYM_FUNC_START(_save_lasx) + lasx_save_all a0 t1 t2 + jirl zero, ra, 0 +SYM_FUNC_END(_save_lasx) +EXPORT_SYMBOL(_save_lasx) + +/* + * Restore a thread's LASX vector context. + */ +SYM_FUNC_START(_restore_lasx) + lasx_restore_all a0 t1 t2 + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lasx) + +SYM_FUNC_START(_save_lasx_upper) + lasx_save_all_upper a0 t0 t1 + jirl zero, ra, 0 +SYM_FUNC_END(_save_lasx_upper) + +SYM_FUNC_START(_restore_lasx_upper) + lasx_restore_all_upper a0 t0 t1 + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lasx_upper) + +SYM_FUNC_START(_init_lasx_upper) + lasx_init_all_upper t1 + jirl zero, ra, 0 +SYM_FUNC_END(_init_lasx_upper) +#endif + /* * Load the FPU with signalling NANS. This bit pattern we're using has * the property that no matter whether considered as single or as double @@ -245,6 +481,58 @@ SYM_FUNC_START(_restore_fp_context) jr ra SYM_FUNC_END(_restore_fp_context) +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_save_lsx_context) + sc_save_fcc a1, t0, t1 + sc_save_fcsr a2, t0 + sc_save_lsx a0 + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_save_lsx_context) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_restore_lsx_context) + sc_restore_lsx a0 + sc_restore_fcc a1, t1, t2 + sc_restore_fcsr a2, t1 + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lsx_context) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_save_lasx_context) + sc_save_fcc a1, t0, t1 + sc_save_fcsr a2, t0 + sc_save_lasx a0 + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_save_lasx_context) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_restore_lasx_context) + sc_restore_lasx a0 + sc_restore_fcc a1, t1, t2 + sc_restore_fcsr a2, t1 + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lasx_context) + SYM_FUNC_START(fault) li.w a0, -EFAULT # failure jr ra diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 95a124058e32..86600e39799e 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -105,8 +105,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) */ preempt_disable(); - if (is_fpu_owner()) - save_fp(current); + if (is_fpu_owner()) { + if (is_lasx_enabled()) + save_lasx(current); + else if (is_lsx_enabled()) + save_lsx(current); + else + save_fp(current); + } preempt_enable(); diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c index dc2b82ea894c..bee4194177fd 100644 --- a/arch/loongarch/kernel/ptrace.c +++ b/arch/loongarch/kernel/ptrace.c @@ -246,6 +246,90 @@ static int cfg_set(struct task_struct *target, return 0; } +#ifdef CONFIG_CPU_HAS_LSX + +static void copy_pad_fprs(struct task_struct *target, + const struct user_regset *regset, + struct membuf *to, unsigned int live_sz) +{ + int i, j; + unsigned long long fill = ~0ull; + unsigned int cp_sz, pad_sz; + + cp_sz = min(regset->size, live_sz); + pad_sz = regset->size - cp_sz; + WARN_ON(pad_sz % sizeof(fill)); + + for (i = 0; i < NUM_FPU_REGS; i++) { + membuf_write(to, &target->thread.fpu.fpr[i], cp_sz); + for (j = 0; j < (pad_sz / sizeof(fill)); j++) { + membuf_store(to, fill); + } + } +} + +static int simd_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + const unsigned int wr_size = NUM_FPU_REGS * regset->size; + + if (!tsk_used_math(target)) { + /* The task hasn't used FP or LSX, fill with 0xff */ + copy_pad_fprs(target, regset, &to, 0); + } else if (!test_tsk_thread_flag(target, TIF_LSX_CTX_LIVE)) { + /* Copy scalar FP context, fill the rest with 0xff */ + copy_pad_fprs(target, regset, &to, 8); +#ifdef CONFIG_CPU_HAS_LASX + } else if (!test_tsk_thread_flag(target, TIF_LASX_CTX_LIVE)) { + /* Copy LSX 128 Bit context, fill the rest with 0xff */ + copy_pad_fprs(target, regset, &to, 16); +#endif + } else if (sizeof(target->thread.fpu.fpr[0]) == regset->size) { + /* Trivially copy the vector registers */ + membuf_write(&to, &target->thread.fpu.fpr, wr_size); + } else { + /* Copy as much context as possible, fill the rest with 0xff */ + copy_pad_fprs(target, regset, &to, sizeof(target->thread.fpu.fpr[0])); + } + + return 0; +} + +static int simd_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + const unsigned int wr_size = NUM_FPU_REGS * regset->size; + unsigned int cp_sz; + int i, err, start; + + init_fp_ctx(target); + + if (sizeof(target->thread.fpu.fpr[0]) == regset->size) { + /* Trivially copy the vector registers */ + err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fpr, + 0, wr_size); + } else { + /* Copy as much context as possible */ + cp_sz = min_t(unsigned int, regset->size, + sizeof(target->thread.fpu.fpr[0])); + + i = start = err = 0; + for (; i < NUM_FPU_REGS; i++, start += regset->size) { + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fpr[i], + start, start + cp_sz); + } + } + + return err; +} + +#endif /* CONFIG_CPU_HAS_LSX */ + struct pt_regs_offset { const char *name; int offset; @@ -319,6 +403,12 @@ enum loongarch_regset { REGSET_GPR, REGSET_FPR, REGSET_CPUCFG, +#ifdef CONFIG_CPU_HAS_LSX + REGSET_LSX, +#endif +#ifdef CONFIG_CPU_HAS_LASX + REGSET_LASX, +#endif }; static const struct user_regset loongarch64_regsets[] = { @@ -346,6 +436,26 @@ static const struct user_regset loongarch64_regsets[] = { .regset_get = cfg_get, .set = cfg_set, }, +#ifdef CONFIG_CPU_HAS_LSX + [REGSET_LSX] = { + .core_note_type = NT_LOONGARCH_LSX, + .n = NUM_FPU_REGS, + .size = 16, + .align = 16, + .regset_get = simd_get, + .set = simd_set, + }, +#endif +#ifdef CONFIG_CPU_HAS_LASX + [REGSET_LASX] = { + .core_note_type = NT_LOONGARCH_LASX, + .n = NUM_FPU_REGS, + .size = 32, + .align = 32, + .regset_get = simd_get, + .set = simd_set, + }, +#endif }; static const struct user_regset_view user_loongarch64_view = { diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index 2d6eb75b11f1..8c2ef6598328 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -50,6 +50,16 @@ extern asmlinkage int _save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); extern asmlinkage int _restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); +extern asmlinkage int +_save_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +extern asmlinkage int +_restore_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +extern asmlinkage int +_save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +extern asmlinkage int +_restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +extern asmlinkage int _save_lsx_all_upper(void __user *buf); +extern asmlinkage int _restore_lsx_all_upper(void __user *buf); struct rt_sigframe { struct siginfo rs_info; @@ -65,9 +75,29 @@ struct extctx_layout { unsigned long size; unsigned int flags; struct _ctx_layout fpu; + struct _ctx_layout lsx; + struct _ctx_layout lasx; struct _ctx_layout end; }; +/* LSX context */ +#define LSX_CTX_MAGIC 0x53580001 +#define LSX_CTX_ALIGN 16 +struct lsx_context { + __u64 regs[2*32]; + __u64 fcc; + __u32 fcsr; +}; + +/* LASX context */ +#define LASX_CTX_MAGIC 0x41535801 +#define LASX_CTX_ALIGN 32 +struct lasx_context { + __u64 regs[4*32]; + __u64 fcc; + __u32 fcsr; +}; + static void __user *get_ctx_through_ctxinfo(struct sctx_info *info) { return (void __user *)((char *)info + sizeof(struct sctx_info)); @@ -115,6 +145,96 @@ static int copy_fpu_from_sigcontext(struct fpu_context __user *ctx) return err; } +static int copy_lsx_to_sigcontext(struct lsx_context __user *ctx) +{ + int i; + int err = 0; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0), + ®s[2*i]); + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1), + ®s[2*i+1]); + } + err |= __put_user(current->thread.fpu.fcc, fcc); + err |= __put_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + +static int copy_lsx_from_sigcontext(struct lsx_context __user *ctx) +{ + int i; + int err = 0; + u64 fpr_val; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __get_user(fpr_val, ®s[2*i]); + set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val); + err |= __get_user(fpr_val, ®s[2*i+1]); + set_fpr64(¤t->thread.fpu.fpr[i], 1, fpr_val); + } + err |= __get_user(current->thread.fpu.fcc, fcc); + err |= __get_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + +static int copy_lasx_to_sigcontext(struct lasx_context __user *ctx) +{ + int i; + int err = 0; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0), + ®s[4*i]); + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1), + ®s[4*i+1]); + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 2), + ®s[4*i+2]); + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 3), + ®s[4*i+3]); + } + err |= __put_user(current->thread.fpu.fcc, fcc); + err |= __put_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + +static int copy_lasx_from_sigcontext(struct lasx_context __user *ctx) +{ + int i; + int err = 0; + u64 fpr_val; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __get_user(fpr_val, ®s[4*i]); + set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val); + err |= __get_user(fpr_val, ®s[4*i+1]); + set_fpr64(¤t->thread.fpu.fpr[i], 1, fpr_val); + err |= __get_user(fpr_val, ®s[4*i+2]); + set_fpr64(¤t->thread.fpu.fpr[i], 2, fpr_val); + err |= __get_user(fpr_val, ®s[4*i+3]); + set_fpr64(¤t->thread.fpu.fpr[i], 3, fpr_val); + } + err |= __get_user(current->thread.fpu.fcc, fcc); + err |= __get_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + /* * Wrappers for the assembly _{save,restore}_fp_context functions. */ @@ -136,6 +256,42 @@ static int restore_hw_fpu_context(struct fpu_context __user *ctx) return _restore_fp_context(regs, fcc, fcsr); } +static int save_hw_lsx_context(struct lsx_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _save_lsx_context(regs, fcc, fcsr); +} + +static int restore_hw_lsx_context(struct lsx_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _restore_lsx_context(regs, fcc, fcsr); +} + +static int save_hw_lasx_context(struct lasx_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _save_lasx_context(regs, fcc, fcsr); +} + +static int restore_hw_lasx_context(struct lasx_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _restore_lasx_context(regs, fcc, fcsr); +} + static int fcsr_pending(unsigned int __user *fcsr) { int err, sig = 0; @@ -227,6 +383,146 @@ static int protected_restore_fpu_context(struct extctx_layout *extctx) return err ?: sig; } +static int protected_save_lsx_context(struct extctx_layout *extctx) +{ + int err = 0; + struct sctx_info __user *info = extctx->lsx.addr; + struct lsx_context __user *lsx_ctx = (struct lsx_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lsx_ctx->regs; + uint64_t __user *fcc = &lsx_ctx->fcc; + uint32_t __user *fcsr = &lsx_ctx->fcsr; + + while (1) { + lock_fpu_owner(); + if (is_lsx_enabled()) + err = save_hw_lsx_context(lsx_ctx); + else + err = copy_lsx_to_sigcontext(lsx_ctx); + unlock_fpu_owner(); + + err |= __put_user(LSX_CTX_MAGIC, &info->magic); + err |= __put_user(extctx->lsx.size, &info->size); + + if (likely(!err)) + break; + /* Touch the LSX context and try again */ + err = __put_user(0, ®s[0]) | + __put_user(0, ®s[32*2-1]) | + __put_user(0, fcc) | + __put_user(0, fcsr); + if (err) + return err; /* really bad sigcontext */ + } + + return err; +} + +static int protected_restore_lsx_context(struct extctx_layout *extctx) +{ + int err = 0, sig = 0, tmp __maybe_unused; + struct sctx_info __user *info = extctx->lsx.addr; + struct lsx_context __user *lsx_ctx = (struct lsx_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lsx_ctx->regs; + uint64_t __user *fcc = &lsx_ctx->fcc; + uint32_t __user *fcsr = &lsx_ctx->fcsr; + + err = sig = fcsr_pending(fcsr); + if (err < 0) + return err; + + while (1) { + lock_fpu_owner(); + if (is_lsx_enabled()) + err = restore_hw_lsx_context(lsx_ctx); + else + err = copy_lsx_from_sigcontext(lsx_ctx); + unlock_fpu_owner(); + + if (likely(!err)) + break; + /* Touch the LSX context and try again */ + err = __get_user(tmp, ®s[0]) | + __get_user(tmp, ®s[32*2-1]) | + __get_user(tmp, fcc) | + __get_user(tmp, fcsr); + if (err) + break; /* really bad sigcontext */ + } + + return err ?: sig; +} + +static int protected_save_lasx_context(struct extctx_layout *extctx) +{ + int err = 0; + struct sctx_info __user *info = extctx->lasx.addr; + struct lasx_context __user *lasx_ctx = + (struct lasx_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lasx_ctx->regs; + uint64_t __user *fcc = &lasx_ctx->fcc; + uint32_t __user *fcsr = &lasx_ctx->fcsr; + + while (1) { + lock_fpu_owner(); + if (is_lasx_enabled()) + err = save_hw_lasx_context(lasx_ctx); + else + err = copy_lasx_to_sigcontext(lasx_ctx); + unlock_fpu_owner(); + + err |= __put_user(LASX_CTX_MAGIC, &info->magic); + err |= __put_user(extctx->lasx.size, &info->size); + + if (likely(!err)) + break; + /* Touch the LASX context and try again */ + err = __put_user(0, ®s[0]) | + __put_user(0, ®s[32*4-1]) | + __put_user(0, fcc) | + __put_user(0, fcsr); + if (err) + return err; /* really bad sigcontext */ + } + + return err; +} + +static int protected_restore_lasx_context(struct extctx_layout *extctx) +{ + int err = 0, sig = 0, tmp __maybe_unused; + struct sctx_info __user *info = extctx->lasx.addr; + struct lasx_context __user *lasx_ctx = + (struct lasx_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lasx_ctx->regs; + uint64_t __user *fcc = &lasx_ctx->fcc; + uint32_t __user *fcsr = &lasx_ctx->fcsr; + + err = sig = fcsr_pending(fcsr); + if (err < 0) + return err; + + while (1) { + lock_fpu_owner(); + if (is_lasx_enabled()) + err = restore_hw_lasx_context(lasx_ctx); + else + err = copy_lasx_from_sigcontext(lasx_ctx); + unlock_fpu_owner(); + + if (likely(!err)) + break; + /* Touch the LASX context and try again */ + err = __get_user(tmp, ®s[0]) | + __get_user(tmp, ®s[32*4-1]) | + __get_user(tmp, fcc) | + __get_user(tmp, fcsr); + if (err) + break; /* really bad sigcontext */ + } + + return err ?: sig; +} + static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, struct extctx_layout *extctx) { @@ -240,7 +536,11 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, for (i = 1; i < 32; i++) err |= __put_user(regs->regs[i], &sc->sc_regs[i]); - if (extctx->fpu.addr) + if (extctx->lasx.addr) + err |= protected_save_lasx_context(extctx); + else if (extctx->lsx.addr) + err |= protected_save_lsx_context(extctx); + else if (extctx->fpu.addr) err |= protected_save_fpu_context(extctx); /* Set the "end" magic */ @@ -274,6 +574,20 @@ static int parse_extcontext(struct sigcontext __user *sc, struct extctx_layout * extctx->fpu.addr = info; break; + case LSX_CTX_MAGIC: + if (size < (sizeof(struct sctx_info) + + sizeof(struct lsx_context))) + goto invalid; + extctx->lsx.addr = info; + break; + + case LASX_CTX_MAGIC: + if (size < (sizeof(struct sctx_info) + + sizeof(struct lasx_context))) + goto invalid; + extctx->lasx.addr = info; + break; + default: goto invalid; } @@ -319,7 +633,11 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc for (i = 1; i < 32; i++) err |= __get_user(regs->regs[i], &sc->sc_regs[i]); - if (extctx.fpu.addr) + if (extctx.lasx.addr) + err |= protected_restore_lasx_context(&extctx); + else if (extctx.lsx.addr) + err |= protected_restore_lsx_context(&extctx); + else if (extctx.fpu.addr) err |= protected_restore_fpu_context(&extctx); bad: @@ -375,7 +693,13 @@ static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned lon extctx->size += extctx->end.size; if (extctx->flags & SC_USED_FP) { - if (cpu_has_fpu) + if (cpu_has_lasx && thread_lasx_context_live()) + new_sp = extframe_alloc(extctx, &extctx->lasx, + sizeof(struct lasx_context), LASX_CTX_ALIGN, new_sp); + else if (cpu_has_lsx && thread_lsx_context_live()) + new_sp = extframe_alloc(extctx, &extctx->lsx, + sizeof(struct lsx_context), LSX_CTX_ALIGN, new_sp); + else if (cpu_has_fpu) new_sp = extframe_alloc(extctx, &extctx->fpu, sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp); } diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 94616e677e23..4840358c5341 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -514,12 +514,67 @@ static void init_restore_fp(void) BUG_ON(!is_fp_enabled()); } +static void init_restore_lsx(void) +{ + enable_lsx(); + + if (!thread_lsx_context_live()) { + /* First time LSX context user */ + init_restore_fp(); + init_lsx_upper(); + set_thread_flag(TIF_LSX_CTX_LIVE); + } else { + if (!is_simd_owner()) { + if (is_fpu_owner()) { + restore_lsx_upper(current); + } else { + __own_fpu(); + restore_lsx(current); + } + } + } + + set_thread_flag(TIF_USEDSIMD); + + BUG_ON(!is_fp_enabled()); + BUG_ON(!is_lsx_enabled()); +} + +static void init_restore_lasx(void) +{ + enable_lasx(); + + if (!thread_lasx_context_live()) { + /* First time LASX context user */ + init_restore_lsx(); + init_lasx_upper(); + set_thread_flag(TIF_LASX_CTX_LIVE); + } else { + if (is_fpu_owner() || is_simd_owner()) { + init_restore_lsx(); + restore_lasx_upper(current); + } else { + __own_fpu(); + enable_lsx(); + restore_lasx(current); + } + } + + set_thread_flag(TIF_USEDSIMD); + + BUG_ON(!is_fp_enabled()); + BUG_ON(!is_lsx_enabled()); + BUG_ON(!is_lasx_enabled()); +} + asmlinkage void noinstr do_fpu(struct pt_regs *regs) { irqentry_state_t state = irqentry_enter(regs); local_irq_enable(); die_if_kernel("do_fpu invoked from kernel context!", regs); + BUG_ON(is_lsx_enabled()); + BUG_ON(is_lasx_enabled()); preempt_disable(); init_restore_fp(); @@ -534,7 +589,19 @@ asmlinkage void noinstr do_lsx(struct pt_regs *regs) irqentry_state_t state = irqentry_enter(regs); local_irq_enable(); - force_sig(SIGILL); + if (!cpu_has_lsx) { + force_sig(SIGILL); + goto out; + } + + die_if_kernel("do_lsx invoked from kernel context!", regs); + BUG_ON(is_lasx_enabled()); + + preempt_disable(); + init_restore_lsx(); + preempt_enable(); + +out: local_irq_disable(); irqentry_exit(regs, state); @@ -545,7 +612,18 @@ asmlinkage void noinstr do_lasx(struct pt_regs *regs) irqentry_state_t state = irqentry_enter(regs); local_irq_enable(); - force_sig(SIGILL); + if (!cpu_has_lasx) { + force_sig(SIGILL); + goto out; + } + + die_if_kernel("do_lasx invoked from kernel context!", regs); + + preempt_disable(); + init_restore_lasx(); + preempt_enable(); + +out: local_irq_disable(); irqentry_exit(regs, state); -- Gitee From f3b12c258754e1df8cc7562cfaf97c743da06d33 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 3 Sep 2022 09:42:07 +0800 Subject: [PATCH 02/36] LoongArch: Fix section mismatch due to acpi_os_ioremap() mainline inclusion from mainline-v6.0-rc4 commit e0fba87c854347007fb9fc873e890b686cc61302 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP CVE: NA -------------------------------- Now acpi_os_ioremap() is marked with __init because it calls memblock_ is_memory() which is also marked with __init in the !ARCH_KEEP_MEMBLOCK case. However, acpi_os_ioremap() is called by ordinary functions such as acpi_os_{read, write}_memory() and causes section mismatch warnings: WARNING: modpost: vmlinux.o: section mismatch in reference: acpi_os_read_memory (section: .text) -> acpi_os_ioremap (section: .init.text) WARNING: modpost: vmlinux.o: section mismatch in reference: acpi_os_write_memory (section: .text) -> acpi_os_ioremap (section: .init.text) Fix these warnings by selecting ARCH_KEEP_MEMBLOCK unconditionally and removing the __init modifier of acpi_os_ioremap(). This can also give a chance to track "memory" and "reserved" memblocks after early boot. Signed-off-by: Huacai Chen Change-Id: If0cd1baeb01c8d627c70c2f1b4569d6ac68bf696 --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/acpi.h | 2 +- arch/loongarch/kernel/acpi.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 76f0be990526..94a722515400 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -39,6 +39,7 @@ config LOONGARCH select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPTION select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION + select ARCH_KEEP_MEMBLOCK select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_SPARSEMEM_ENABLE diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h index 62044cd5b7bc..825c2519b9d1 100644 --- a/arch/loongarch/include/asm/acpi.h +++ b/arch/loongarch/include/asm/acpi.h @@ -15,7 +15,7 @@ extern int acpi_pci_disabled; extern int acpi_noirq; #define acpi_os_ioremap acpi_os_ioremap -void __init __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); static inline void disable_acpi(void) { diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 61221e6b2cac..0d6a4de10f6c 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -49,7 +49,7 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size) early_memunmap(map, size); } -void __init __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) { if (!memblock_is_memory(phys)) return ioremap(phys, size); -- Gitee From 6def0ea3615d03f079f16f77252a1dd0804a298f Mon Sep 17 00:00:00 2001 From: liuyun Date: Mon, 5 Sep 2022 07:25:41 +0800 Subject: [PATCH 03/36] cpufreq: Add cpufreq driver for LoongArch LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- Signed-off-by: zhangtianyang Change-Id: Ic9390a1c78e1670422be1b03d35e3f0fdec1367f --- arch/loongarch/Kconfig | 6 + arch/loongarch/configs/loongson3_defconfig | 4 + arch/loongarch/include/asm/fpu.h | 13 +- drivers/cpufreq/Kconfig | 11 + drivers/cpufreq/Makefile | 1 + drivers/cpufreq/loongson3-acpi-cpufreq.c | 1564 ++++++++++++++++++++ 6 files changed, 1598 insertions(+), 1 deletion(-) create mode 100644 drivers/cpufreq/loongson3-acpi-cpufreq.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 94a722515400..77de7a1370f7 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -514,4 +514,10 @@ source "drivers/acpi/Kconfig" endmenu +menu "CPU Power Management" + +source "drivers/cpufreq/Kconfig" + +endmenu + source "drivers/firmware/Kconfig" diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index ace3efc63b08..df6b5c5a59c8 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -43,6 +43,10 @@ CONFIG_ACPI_SPCR_TABLE=y CONFIG_ACPI_DOCK=y CONFIG_ACPI_IPMI=m CONFIG_ACPI_PCI_SLOT=y +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_STAT=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=y +CONFIG_LOONGSON3_ACPI_CPUFREQ=y CONFIG_EFI_CAPSULE_LOADER=m CONFIG_EFI_TEST=m CONFIG_MODULES=y diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h index eaa8030f2d9c..1363f92929df 100644 --- a/arch/loongarch/include/asm/fpu.h +++ b/arch/loongarch/include/asm/fpu.h @@ -45,6 +45,10 @@ static inline void disable_lasx(void); static inline void save_lasx(struct task_struct *t); static inline void restore_lasx(struct task_struct *t); +#ifdef CONFIG_LOONGSON3_ACPI_CPUFREQ +DECLARE_PER_CPU(unsigned long, msa_count); +DECLARE_PER_CPU(unsigned long, lasx_count); +#endif /* * Mask the FCSR Cause bits according to the Enable bits, observing * that Unimplemented is always enabled. @@ -198,6 +202,9 @@ static inline void enable_lsx(void) { if (cpu_has_lsx) csr_xchg32(CSR_EUEN_LSXEN, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN); +#ifdef CONFIG_LOONGSON3_ACPI_CPUFREQ + per_cpu(msa_count, raw_smp_processor_id())++; +#endif } static inline void disable_lsx(void) @@ -251,8 +258,12 @@ static inline void restore_lsx_upper(struct task_struct *t) {} static inline void enable_lasx(void) { - if (cpu_has_lasx) + if (cpu_has_lasx) { csr_xchg32(CSR_EUEN_LASXEN, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN); +#ifdef CONFIG_LOONGSON3_ACPI_CPUFREQ + per_cpu(lasx_count, raw_smp_processor_id())++; +#endif + } } static inline void disable_lasx(void) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 85de313ddec2..0d3ec7e7b972 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -287,6 +287,17 @@ config LOONGSON1_CPUFREQ If in doubt, say N. endif +if LOONGARCH +config LOONGSON3_ACPI_CPUFREQ + bool "Loongson3 ACPI cpufreq driver" + depends on ACPI_PROCESSOR + help + This driver adds a CPUFreq driver which utilizes the ACPI + Processor Performance States. + This driver supports Loongson 3A5000 compatible CPUs. + If in doubt, say N. +endif + if SPARC64 config SPARC_US3_CPUFREQ tristate "UltraSPARC-III CPU Frequency driver" diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 7c762e105146..97e34fa3efe1 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -106,6 +106,7 @@ obj-$(CONFIG_BMIPS_CPUFREQ) += bmips-cpufreq.o obj-$(CONFIG_IA64_ACPI_CPUFREQ) += ia64-acpi-cpufreq.o obj-$(CONFIG_LOONGSON2_CPUFREQ) += loongson2_cpufreq.o obj-$(CONFIG_LOONGSON1_CPUFREQ) += loongson1-cpufreq.o +obj-$(CONFIG_LOONGSON3_ACPI_CPUFREQ) += loongson3-acpi-cpufreq.o obj-$(CONFIG_SH_CPU_FREQ) += sh-cpufreq.o obj-$(CONFIG_SPARC_US2E_CPUFREQ) += sparc-us2e-cpufreq.o obj-$(CONFIG_SPARC_US3_CPUFREQ) += sparc-us3-cpufreq.o diff --git a/drivers/cpufreq/loongson3-acpi-cpufreq.c b/drivers/cpufreq/loongson3-acpi-cpufreq.c new file mode 100644 index 000000000000..5bfbe3867c0a --- /dev/null +++ b/drivers/cpufreq/loongson3-acpi-cpufreq.c @@ -0,0 +1,1564 @@ +/* + * loongson3-acpi-cpufreq.c - Loongson ACPI Processor P-States Driver + * + * Copyright (C) 2020 lvjianmin + * Yijun + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include "cpufreq_governor.h" + +#include +#define CPU_ID_FIELD 0xf + +#define COMPLETE_STATUS 0x80000000 +#define VOLTAGE_COMMAND 0x21 + +#define DVFS_INFO 0x22 +#define DVFS_INFO_BOOST_LEVEL 0x23 +#define DVFS_INFO_MIN_FREQ 0xf +#define DVFS_INFO_MAX_FREQ 0xf0 +#define DVFS_INFO_BOOST_CORE_FREQ 0xff00 +#define DVFS_INFO_NORMAL_CORE_UPPER_LIMIT 0xf0000 +#define DVFS_INFO_BOOST_CORES 0xf00000 + +#define BOOST_MODE 0x80000 +#define NORMAL_MODE 0x40000 + +MODULE_DESCRIPTION("Loongson 3A5000 ACPI Processor P-States Driver"); + +MODULE_LICENSE("GPL"); + +#define CPUFREQ_SAMPLING_INTERVAL (2 * TICK_NSEC / NSEC_PER_USEC) +#define LOONGSON_CONTROL_MASK (0xFF) +#define FACTOR (0xeac0c6e8) +#define BOOST_THRESHOLD (900) +#define MAX_CORES_PER_PACKAGE 64 +#define CPU_ID_FIELD 0xf +#define VOLTAGE_COMMAND 0x21 +#define MAX_READY_TIMEOUT 300000000 +#define RESERVED_FREQ 3 + +#define LOONGSON_BOOST_FREQ_MASK (0x7 << 8) +#define FREQ_STEP (25) + +static struct mutex boost_mutex[MAX_PACKAGES]; +static bool cpufreq_has_boost_freq; +static int max_boost_cores; +static int boost_gears; +static int boost_freqs[NR_CPUS + 1]; +struct package_data; +struct core_data; +static struct acpi_processor_performance __percpu *acpi_perf_data; +static struct cpufreq_driver loongson3_cpufreq_driver; +static struct freq_attr *loongson3_cpufreq_attr[]; +DECLARE_PER_CPU(struct clock_event_device, stable_clockevent_device); +static inline struct core_data *get_core_data(int cpu); +extern struct clk *cpu_clk_get(int cpu); + +static int min_freq_level; +static int max_freq_level; +static int max_upper_index; +static int max_boost_freq; + +/* threshold of core's get into msa */ +static int msa_count_threshold = 200; +/* threshold of core's get into lasx */ +static int lasx_count_threshold = 200; +/* other cores' upper load threshold when 1 core get into boost mode and enable msa/lasx */ +static int load_threshold = 60; + +DEFINE_PER_CPU(unsigned long, msa_count); +EXPORT_PER_CPU_SYMBOL(msa_count); + +#if defined(CONFIG_CPU_HAS_LASX) +DEFINE_PER_CPU(unsigned long, lasx_count); +EXPORT_PER_CPU_SYMBOL(lasx_count); +#endif + +struct ce_update_data { + struct clock_event_device *cd; + unsigned int new_freq; +}; + +static struct kthread_worker cpufreq_worker; +static struct task_struct *cpufreq_thread; +/** + * struct core_data - Store core related information + * @in_boost: the core is boosting to boost_freq + * @cpu: logical cpu of the core + * @update_util The update_util_data pointer of @cpu, is passed to the callback + * function, which will be called by cpufreq_update_util() + * @package The package_data structure the core belonged to + * @work_in_progress @work is busy + * @irq_work to enqueue callback handling on irq workqueue + * @work to enqueue work from irq workqueue on system workqueue + * @perf store frequency table related information from ACPI table + * @max_freq max normal freq of cpu + * @boost_freq max boost freq of cpu + * @clock_scale clock scale to calculate cpu_data[cpu].udelay_val in boost mode + * @package_id package id of core + * @shift clock shift to calculate cpu_data[cpu].udelay_val in boost mode + * @update_util_set if callback has been set for cpufreq_update_util() + * @load current load of the core + * @last_freq_update_time last freq update time + * @freq_update_delay_ns min interval of freq update, which is + * transition_latency configured in ACPI table + * + * following elements are used to calculate load of the core + * @prev_update_time + * @prev_cpu_idle + * @prev_load + * @sampling_rate + * + */ +struct core_data { + bool in_boost; + int cpu; + struct update_util_data update_util; + struct package_data *package; + bool work_in_progress; + struct irq_work irq_work; + struct kthread_work work; + struct acpi_processor_performance *perf; + unsigned int normal_max_freq; + unsigned int *boost_freq; + unsigned int *clock_scale; + unsigned int package_id; + unsigned int *shift; + bool update_util_set; + unsigned long long load; + + u64 last_freq_update_time; + s64 freq_update_delay_ns; + u64 prev_update_time; + u64 prev_cpu_idle; + u32 prev_load; + u32 sampling_rate; +}; + +struct package_data { + int boost_cores; + int max_boost_cores; + int nr_cores; + char in_boost; + int nr_full_load_cores; + struct core_data core[MAX_CORES_PER_PACKAGE]; +} all_package_data[MAX_PACKAGES]; + +static bool boost_supported(void) +{ + return loongson3_cpufreq_driver.set_boost; +} + +/* + * Check if target_freq is a boost freq + * + * target_freq must be a freq in freq table when + * calling the function. + * */ +static int boost_level(struct acpi_processor_performance *perf, unsigned int target_freq) +{ + int i; + + for (i = 0; i < perf->state_count; i++) { + if (target_freq == (perf->states[i].core_frequency * 1000)) { + return (perf->states[i].control & LOONGSON_BOOST_FREQ_MASK) >> 8; + } + } + return 0; +} + +#ifdef CONFIG_SMP +static int loongson3_cpu_freq_notifier(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freqs; + struct clock_event_device __maybe_unused *cd; + struct core_data *core; + unsigned int __maybe_unused new_freq; + unsigned long cpu; + struct ce_update_data __maybe_unused ce_data; + int cur_boost_level; + + if (val == CPUFREQ_POSTCHANGE) { + freqs = (struct cpufreq_freqs *)data; + cpu = freqs->policy->cpu; + core = get_core_data(cpu); + cur_boost_level = boost_level(core->perf, freqs->new); + if (cur_boost_level != 0) { + lpj_fine = (unsigned int) (((int64_t)core->clock_scale[cur_boost_level] * + cpufreq_scale(loops_per_jiffy, boost_freqs[cur_boost_level] * 1000, + freqs->new)) / core->shift[cur_boost_level]); + } else { + lpj_fine = + cpufreq_scale(loops_per_jiffy, core->normal_max_freq * 1000, freqs->new); + } + } + + return 0; +} +#else +static int loongson3_cpu_freq_notifier(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freqs; + struct clock_event_device __maybe_unused *cd; + struct core_data *core; + unsigned int __maybe_unused new_freq; + unsigned long cpu; + int cur_boost_level; + + if (val == CPUFREQ_POSTCHANGE) { + + freqs = (struct cpufreq_freqs *)data; + cpu = freqs->cpu; + core = get_core_data(cpu); + cur_boost_level = boost_level(core->perf, target_freq); + + if (cur_boost_level != 0) { + lpj_fine = (unsigned int) (((int64_t)core->clock_scale[cur_boost_level] * + loops_per_jiffy) / core->shift[cur_boost_level]); + } else { + lpj_fine = loops_per_jiffy; + } + } + + return 0; +} +#endif +static struct notifier_block loongson3_cpufreq_notifier_block = { + .notifier_call = loongson3_cpu_freq_notifier +}; + +static int cpufreq_perf_find_level(struct acpi_processor_performance *perf, + unsigned int target_freq, + unsigned int boost_level) +{ + int i; + for (i = 0; i < perf->state_count; i++) { + if (boost_level) { + if (perf->states[i].control & LOONGSON_BOOST_FREQ_MASK) { + if (target_freq == (perf->states[i].core_frequency * 1000)) + return perf->states[i].control & LOONGSON_CONTROL_MASK; + } + } else { + if (!(perf->states[i].control & LOONGSON_BOOST_FREQ_MASK)) + if (target_freq == (perf->states[i].core_frequency * 1000)) + return perf->states[i].control; + } + } + return 0; +} + +static int cpufreq_perf_find_freq(struct acpi_processor_performance *perf, + unsigned int target_index, + unsigned int boost_level) +{ + int i; + for (i = 0; i < perf->state_count; i++) { + if (boost_level) { + if (perf->states[i].control & LOONGSON_BOOST_FREQ_MASK) + if (target_index == (perf->states[i].control & LOONGSON_CONTROL_MASK)) + return perf->states[i].core_frequency; + } else { + if (!(perf->states[i].control & LOONGSON_BOOST_FREQ_MASK)) + if (target_index == perf->states[i].control) + return perf->states[i].core_frequency; + } + } + return 0; +} + + +static inline struct core_data *get_core_data(int cpu) +{ + int package_id = cpu_data[cpu].package; + struct package_data *package = &all_package_data[package_id]; + int core_id = cpu_logical_map(cpu) % package->nr_cores; + return &package->core[core_id]; +} + +static bool package_boost(struct package_data *package) +{ + int i; + int cur_full_load = 0; + +#if defined(CONFIG_CPU_HAS_LASX) + int lasx_enable_count = 0; + unsigned long lasx_num; + bool clear_lasx = false; +#endif + + int msa_enable_count = 0; + unsigned long msa_num; + bool clear_msa = false; + + for (i = 0; i < package->nr_cores; i++) { + +#if defined(CONFIG_CPU_HAS_LASX) + lasx_num = per_cpu(lasx_count, package->core[i].cpu); + + if (lasx_num) { + lasx_enable_count++; + } + + if (lasx_num >= lasx_count_threshold) { + clear_lasx = true; + } + + pr_debug("file %s, line %d, lasx enabled, i %d, cpu %d, lasx_num %lu\n", + __FILE__, __LINE__, i, package->core[i].cpu, lasx_num); +#endif + msa_num = per_cpu(msa_count, package->core[i].cpu); + + if (msa_num) { + msa_enable_count++; + } + + if (msa_num >= msa_count_threshold) { + clear_msa = true; + } + + pr_debug("file %s, line %d, msa enabled, i %d, cpu %d, msa_num %lu\n", + __FILE__, __LINE__, i, package->core[i].cpu, msa_num); + + if (package->core[i].prev_load >= load_threshold) { + cur_full_load++; + } + } + +#if defined(CONFIG_CPU_HAS_LASX) + if (clear_lasx) { + for (i = 0; i < package->nr_cores; i++) { + per_cpu(lasx_count, package->core[i].cpu) = 0; + } + } +#endif + + if (clear_msa) { + for (i = 0; i < package->nr_cores; i++) { + per_cpu(msa_count, package->core[i].cpu) = 0; + } + } + +#if defined(CONFIG_CPU_HAS_LASX) + if (lasx_enable_count > 1 + || (lasx_enable_count && package->nr_full_load_cores > 1) + || (lasx_enable_count && cur_full_load > 1)) { + return false; + } +#endif + + if (msa_enable_count > 1 + || (msa_enable_count && package->nr_full_load_cores > 1) + || (msa_enable_count && cur_full_load > 1)) { + return false; + } + + if (package->nr_full_load_cores && + package->nr_full_load_cores <= package->max_boost_cores) + return true; + + return false; +} + +/* + * check if the cpu can be boosted. + * + * call the function after load of cpu updated. + * */ +static bool cpu_can_boost(int cpu) +{ + struct core_data *core = get_core_data(cpu); + struct package_data *package = core->package; + if (package->boost_cores >= package->max_boost_cores) + return false; + if (core->load > BOOST_THRESHOLD) { + return true; + } + return false; +} + +static void do_set_freq_level(int cpu, int freq_level) +{ + uint32_t message; + uint32_t val; + + message = (0 << 31) | (VOLTAGE_COMMAND << 24) + | ((uint32_t)freq_level << 4) + | (cpu & CPU_ID_FIELD); + iocsr_write32(message, 0x51c); + val = iocsr_read32(0x420); + + val |= 1 << 10; + iocsr_write32(val, 0x420); +} + +static int wait_for_ready_timeout(int64_t timeout) +{ + int ret; + struct timespec64 prev_ts; + struct timespec64 curr_ts; + ktime_t delay = ktime_set(0, 100); + + ktime_get_ts64(&prev_ts); + ktime_get_ts64(&curr_ts); + + ret = -EPERM; + while (((curr_ts.tv_sec - prev_ts.tv_sec) * 1000000000 + (curr_ts.tv_nsec - prev_ts.tv_nsec)) < timeout) { + ktime_get_ts64(&curr_ts); + + if (iocsr_read32(0x51c) & COMPLETE_STATUS) { + ret = 0; + break; + } + + __set_current_state(TASK_UNINTERRUPTIBLE); + schedule_hrtimeout(&delay, HRTIMER_MODE_REL); + } + return ret; +} + +/* Find closest freq to target in a table in ascending order */ +static int cpufreq_table_find_freq_ac(struct cpufreq_policy *policy, + unsigned int target_freq, + int boost_level) +{ + struct cpufreq_frequency_table *table = policy->freq_table; + struct cpufreq_frequency_table *pos; + unsigned int freq; + unsigned int best_freq = 0; + int idx, best = -1; + cpufreq_for_each_valid_entry_idx(pos, table, idx) { + freq = pos->frequency; + + if (pos->driver_data != boost_level) + continue; + if (freq > policy->max || freq < policy->min) + continue; + if (freq == target_freq) + return freq; + + if (freq < target_freq) { + best = idx; + best_freq = freq; + continue; + } + + /* No freq found below target_freq, return freq above target_freq */ + if (best == -1) + return freq; + + /* Choose the closest freq */ + if (target_freq - table[best].frequency > freq - target_freq) + return freq; + + return best_freq; + } + + return best_freq; +} + +/* Find closest freq to target in a table in descending order */ +static int cpufreq_table_find_freq_dc(struct cpufreq_policy *policy, + unsigned int target_freq, + int boost_level) +{ + struct cpufreq_frequency_table *table = policy->freq_table; + struct cpufreq_frequency_table *pos; + unsigned int freq; + unsigned int best_freq = 0; + int idx, best = -1; + + cpufreq_for_each_valid_entry_idx(pos, table, idx) { + freq = pos->frequency; + + if (pos->driver_data != boost_level) + continue; + if (freq > policy->max || freq < policy->min) + continue; + + if (freq == target_freq) { + + return freq; + } + if (freq > target_freq) { + best = idx; + best_freq = freq; + continue; + } + + /* No freq found above target_freq, return freq below target_freq */ + if (best == -1) { + return freq; + } + /* Choose the closest freq */ + if (table[best].frequency - target_freq > target_freq - freq) { + + return freq; + } + return best_freq; + } + + return best_freq; +} + +/* Works only on sorted freq-tables */ +static int cpufreq_table_find_freq(struct cpufreq_policy *policy, + unsigned int target_freq, + int boost_level) +{ + target_freq = clamp_val(target_freq, policy->min, policy->max); + if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) + return cpufreq_table_find_freq_ac(policy, target_freq, boost_level); + else + return cpufreq_table_find_freq_dc(policy, target_freq, boost_level); +} + +static void transition_end(struct cpufreq_policy *policy, + struct cpufreq_freqs *freqs, bool failed) +{ + if (unlikely(!policy->transition_ongoing)) { + return; + } + cpufreq_freq_transition_end(policy, freqs, failed); +} +static void transition_begin(struct cpufreq_policy *policy, + struct cpufreq_freqs *freqs) +{ + if (unlikely(policy->transition_ongoing)) { + cpufreq_freq_transition_end(policy, freqs, true); + } + cpufreq_freq_transition_begin(policy, freqs); +} + +static void update_core_boost_info(struct core_data *core, bool boost_set) +{ + core->in_boost = boost_set; + if (boost_set) + core->package->boost_cores++; + else + core->package->boost_cores--; +} + +static unsigned int cores_freq_trans_notify(struct package_data *package, + bool before_trans, + bool trans_failed, + int find_level, + int find_freq, + unsigned int skip_cpumask) +{ + int i; + struct cpufreq_policy *policy; + struct cpufreq_freqs freqs; + unsigned int cores_level = 0; + unsigned int core_level; + + for (i = 0; i < package->nr_cores; i++) { + struct core_data *core = &package->core[i]; + policy = cpufreq_cpu_get_raw(core->cpu); + if (((1 << i) & skip_cpumask) || !policy) { + continue; + } + freqs.old = policy->cur; + freqs.flags = 0; + + /* find level from normal levels */ + core_level = cpufreq_perf_find_level(core->perf, policy->cur, find_level); + if (!core_level) { + pr_debug("cpu%d policy->cur=%d find_level=%d freq=%d skip_cpumask=%x \n", + policy->cpu, policy->cur, find_level, find_freq, skip_cpumask); + } + freqs.new = cpufreq_perf_find_freq(core->perf, core_level, find_freq) * 1000; + if (!freqs.new) { + pr_debug("file %s, line %d, find freq error\n", __FILE__, __LINE__); + } + + pr_debug("file %s, line %d, cpu %d, old freq %d, new freq %d, find_level %d, find_freq %d\n", + __FILE__, __LINE__, policy->cpu, freqs.old, freqs.new, find_level, find_freq); + cores_level |= (core_level << (i << 2)); + + if (before_trans) + transition_begin(policy, &freqs); + else { + transition_end(policy, &freqs, trans_failed); + } + } + return cores_level; +} +static int loongson3_set_freq(struct core_data *core, unsigned long freq, int boost_level) +{ + int ret = 0; + int freq_level; + int phy_cpu; + int target_freq; + struct cpufreq_freqs freqs; + struct cpufreq_policy *policy = cpufreq_cpu_get_raw(core->cpu); + + if (!policy) + return -EINVAL; + + ret = wait_for_ready_timeout(MAX_READY_TIMEOUT); + if (ret) + return ret; + + phy_cpu = cpu_logical_map(core->cpu); + target_freq = cpufreq_table_find_freq(policy, freq, boost_level); + if (!target_freq) + return -1; + if (target_freq == policy->cur) + return -1; + + freqs.flags = 0; + freqs.old = policy->cur; + freqs.new = target_freq; + freq_level = cpufreq_perf_find_level(core->perf, target_freq, boost_level); + if (!freq_level) { + pr_debug("loongson3_set_freq cpu%d freq=%lu targetfreq=%d boost_level=%d find level error\n", + core->cpu, freq, target_freq, boost_level); + } + + transition_begin(policy, &freqs); + do_set_freq_level(phy_cpu, freq_level); + ret = wait_for_ready_timeout(MAX_READY_TIMEOUT); + transition_end(policy, &freqs, !!ret); + + return ret; +} + +int loongson3_set_mode(int mode, int freq_level) +{ + uint32_t val; + int ret = 0; + uint32_t message; + + ret = wait_for_ready_timeout(MAX_READY_TIMEOUT); + if (ret) + return ret; + + message = mode | (VOLTAGE_COMMAND << 24) | freq_level; + iocsr_write32(message, 0x51c); + val = iocsr_read32(0x420); + val |= 1 << 10; + iocsr_write32(val, 0x420); + return wait_for_ready_timeout(MAX_READY_TIMEOUT); +} + +enum freq_adjust_action{ + FAA_NORMAL, + FAA_N2B, + FAA_B2N, + FAA_BOOST, +}; + +static int faa_normal(struct cpufreq_policy *policy, int load) +{ + int ret; + unsigned int freq_next, min_f, max_f; + struct core_data *core = get_core_data(policy->cpu); + if (!core) + return -1; + + pr_debug("file %s, line %d, func %s\n", __FILE__, __LINE__, __func__); + + min_f = policy->min; + max_f = policy->max; + freq_next = min_f + load * (max_f - min_f) / 100; + ret = loongson3_set_freq(core, freq_next, 0); + return ret; +} + +static void handle_boost_cores(struct core_data *core, struct package_data *package, + unsigned long target_freq, bool skip_update_and_notify, bool update_core, bool inc_boost) +{ + int boost_level; + int find_level; + int find_freq; + int ret; + int inc_core = inc_boost ? 1 : -1; + + if (boost_gears == 1) { + find_level = 0; + boost_level = boost_gears; + } else { + find_level = package->boost_cores; + if (update_core) + boost_level = package->boost_cores + inc_core; + else + boost_level = package->boost_cores; + } + find_freq = boost_level; + ret = loongson3_set_freq(core, target_freq, boost_level); + if (ret) + return; + + if (skip_update_and_notify) { + if (update_core) + update_core_boost_info(core, inc_boost); + return; + } + + if (boost_gears != 1) { + cores_freq_trans_notify(package, true, false, + find_level, find_freq, 1 << core->cpu); + cores_freq_trans_notify(package, false, false, + find_level, find_freq, 1 << core->cpu); + } + if (update_core) + update_core_boost_info(core, inc_boost); +} + +static void faa_boost(struct cpufreq_policy *policy, int load) +{ + unsigned int min_f, max_f; + struct core_data *core = get_core_data(policy->cpu); + struct package_data *package = core->package; + unsigned long target_freq; + + pr_debug("file %s, line %d, func %s\n", __FILE__, __LINE__, __func__); + + /* boost cores form n to n + 1 */ + if (core->load > BOOST_THRESHOLD) { + if (package->boost_cores < package->max_boost_cores + && !core->in_boost) { + if (boost_gears == 1) { + target_freq = policy->max; + } else { + target_freq = cpufreq_table_find_freq(policy, policy->max, package->boost_cores + 1); + if (!target_freq) { + pr_debug("file %s, line %d, find freq error ,boost_level %d, cur freq %d\n", + __FILE__, __LINE__, package->boost_cores, policy->max); + } + } + handle_boost_cores(core, package, target_freq, false, true, true); + } + } else { + /* 1. core not in boost, level up but not change pll + * 2. core in boost, boost cores from n to n - 1 */ + min_f = policy->min; + max_f = policy->max; + target_freq = min_f + load * (max_f - min_f) / 100; + handle_boost_cores(core, package, target_freq, !core->in_boost, core->in_boost, false); + } + + +} + +static void get_boost_cores(struct package_data *package, int *boost_cores, int *boost_count) +{ + struct core_data *core; + struct cpufreq_policy *policy; + int i; + + /* count boost cores */ + for (i = 0; i < package->nr_cores; i++) { + core = &package->core[i]; + policy = cpufreq_cpu_get_raw(core->cpu); + if (!policy) + continue; + + if (cpu_can_boost(core->cpu)) { + if (boost_cores) + *boost_cores |= (1 << i); + + (*boost_count)++; + } + } +} + +static void faa_n2b(struct package_data *package, struct core_data *core) +{ + int boost_cores = 0; + int boost_count = 0; + int freq_level; + + pr_debug("file %s, line %d func %s\n", __FILE__, __LINE__, __func__); + + get_boost_cores(package, &boost_cores, &boost_count); + + if (boost_gears == 1) { + boost_count = 1; + } + + freq_level = cores_freq_trans_notify(package, true, false, + 0, boost_count, 0); + if (!loongson3_set_mode(BOOST_MODE, freq_level)) { + int i; + cores_freq_trans_notify(package, false, false, + 0, boost_count, 0); + package->in_boost = true; + for (i = 0; i < package->nr_cores; i++) { + if (boost_cores & (1 << i)) + update_core_boost_info(&package->core[i], true); + } + } else + cores_freq_trans_notify(package, false, true, + 0, boost_count, 0); +} + +static void faa_b2n(struct package_data *package) +{ + int i; + int boost_count = package->boost_cores; + + if (boost_gears == 1) { + boost_count = 1; + } + + pr_debug("file %s, line %d, func %s\n", __FILE__, __LINE__, __func__); + + cores_freq_trans_notify(package, true, false, + boost_count, 0, 0); + if (!loongson3_set_mode(NORMAL_MODE, 0)) { + cores_freq_trans_notify(package, false, false, + boost_count, 0, 0); + for (i = 0; i < package->nr_cores; i++) { + if (package->core[i].in_boost) + update_core_boost_info(&package->core[i], false); + } + package->in_boost = false; + } else + cores_freq_trans_notify(package, false, true, + boost_count, 0, 0); +} + + +unsigned int load_update(struct core_data *core) +{ + int i; + u64 update_time, cur_idle_time; + unsigned int idle_time, time_elapsed; + unsigned int load = 0; + struct package_data *package = core->package; + + cur_idle_time = get_cpu_idle_time(core->cpu, &update_time, true); + + time_elapsed = update_time - core->prev_update_time; + core->prev_update_time = update_time; + + idle_time = cur_idle_time - core->prev_cpu_idle; + core->prev_cpu_idle = cur_idle_time; + + if (unlikely(!time_elapsed)) { + /* + * That can only happen when this function is called + * twice in a row with a very short interval between the + * calls, so the previous load value can be used then. + */ + load = core->prev_load; + } else if (unlikely((int)idle_time > 2 * core->sampling_rate && + core->prev_load)) { + + load = core->prev_load; + core->prev_load = 0; + } else { + if (time_elapsed >= idle_time) { + load = 100 * (time_elapsed - idle_time) / time_elapsed; + } else { + load = (int)idle_time < 0 ? 100 : 0; + } + core->prev_load = load; + } + + package->nr_full_load_cores = 0; + for (i = 0; i < package->nr_cores; i++) { + if (package->core[i].load > BOOST_THRESHOLD) { + package->nr_full_load_cores++; + } + } + + return load; +} + +static bool cpufreq_should_update_freq(struct core_data *core, u64 time) +{ + s64 delta_ns; + delta_ns = time - core->last_freq_update_time; + return delta_ns >= core->freq_update_delay_ns; +} + +static void cpufreq_update(struct cpufreq_policy *policy) +{ + int action; + struct core_data *core; + struct package_data *package; + unsigned long int load; + bool should_be_boost = 0; + + core = get_core_data(policy->cpu); + package = core->package; + + mutex_lock(&boost_mutex[core->package_id]); + + if (!core->update_util_set) { + mutex_unlock(&boost_mutex[core->package_id]); + return; + } + + load = load_update(core); + core->load = (u64)load + ((core->load * FACTOR) >> 32); + + if (cpufreq_boost_enabled()) { + should_be_boost = package_boost(package); + } else { + if (package->in_boost) + should_be_boost = false; + } + + action = (package->in_boost << 1) | should_be_boost; + switch (action) { + case FAA_NORMAL: + faa_normal(policy, load); + break; + case FAA_B2N: + faa_b2n(package); + break; + case FAA_N2B: + faa_n2b(package, core); + break; + case FAA_BOOST: + faa_boost(policy, load); + break; + } + mutex_unlock(&boost_mutex[core->package_id]); +} + +static void set_max_within_limits(struct cpufreq_policy *policy) +{ + struct core_data *core = get_core_data(policy->cpu); + /* + * policy->max <= cpu->pstate.max_freq indecates that + * the boost is disabled, so max freq is in normal range + * + * Skip performance policy with boost enabled!!! + * + * */ + if (policy->max <= (core->normal_max_freq * 1000)) { + mutex_lock(&boost_mutex[core->package_id]); + if (!loongson3_set_freq(core, policy->max, 0)) + pr_debug("Set cpu %d to performance mode under normal range.\n", policy->cpu); + mutex_unlock(&boost_mutex[core->package_id]); + } +} + +static void clear_update_util_hook(unsigned int cpu) +{ + struct core_data *core = get_core_data(cpu); + + if (!core->update_util_set) + return; + + cpufreq_remove_update_util_hook(cpu); + core->update_util_set = false; + synchronize_rcu(); +} + +static void update_util_handler(struct update_util_data *data, u64 time, + unsigned int flags) +{ + struct core_data *core = container_of(data, struct core_data, update_util); + + if (!cpufreq_should_update_freq(core, time)) + return; + if (!core->work_in_progress) { + core->last_freq_update_time = time; + core->work_in_progress = true; + irq_work_queue(&core->irq_work); + } +} +static void set_update_util_hook(unsigned int cpu) +{ + struct core_data *core = get_core_data(cpu); + if (core->update_util_set) + return; + + cpufreq_add_update_util_hook(cpu, &core->update_util, + update_util_handler); + core->update_util_set = true; +} +static int loongson3_cpufreq_set_policy(struct cpufreq_policy *policy) +{ + if (!policy->cpuinfo.max_freq) + return -ENODEV; + + if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { + clear_update_util_hook(policy->cpu); + set_max_within_limits(policy); + } else { + set_update_util_hook(policy->cpu); + } + + return 0; +} + +static int loongson3_cpufreq_verify_policy(struct cpufreq_policy_data *policy) +{ + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, policy->cpuinfo.max_freq); + + return 0; +} + +static void set_boost_freq(bool has) +{ + cpufreq_has_boost_freq = has; +} + +static bool has_boost_freq(void) +{ + return cpufreq_has_boost_freq; +} + +static int compute_scale(int *shift, int dividor, int dividee) +{ + int i; + int result = 0; + int remainder = 0; + int scale_resolution = 8; + + result = dividor / dividee; + remainder = (dividor % dividee) * 10; + + for (i = 0; i < scale_resolution; i++) { + result = result * 10 + remainder / dividee; + remainder = (remainder % dividee) * 10; + *shift *= 10; + } + + return result; +} + +static void cpufreq_work_handler(struct kthread_work *work) +{ + struct core_data *core; + struct cpufreq_policy *policy; + + core = container_of(work, struct core_data, work); + policy = cpufreq_cpu_get_raw(core->cpu); + + if (policy) { + cpufreq_update(policy); + core->work_in_progress = false; + } +} + +static void cpufreq_irq_work(struct irq_work *irq_work) +{ + struct core_data *core = container_of(irq_work, struct core_data, irq_work); + kthread_queue_work(&cpufreq_worker, &core->work); +} + +static void cpufreq_kthread_stop(void) +{ + kthread_flush_worker(&cpufreq_worker); + kthread_stop(cpufreq_thread); +} +static int cpufreq_kthread_create(void) +{ + struct sched_attr attr = { + .size = sizeof(struct sched_attr), + .sched_policy = SCHED_DEADLINE, + .sched_flags = 0x10000000, + .sched_nice = 0, + .sched_priority = 0, + .sched_runtime = 1000000, + .sched_deadline = 10000000, + .sched_period = 10000000, + }; + int ret; + + kthread_init_worker(&cpufreq_worker); + cpufreq_thread = kthread_create(kthread_worker_fn, &cpufreq_worker, "lsfrq:%d", 0); + if (IS_ERR(cpufreq_thread)) { + return PTR_ERR(cpufreq_thread); + } + + ret = sched_setattr_nocheck(cpufreq_thread, &attr); + if (ret) { + kthread_stop(cpufreq_thread); + pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__); + return ret; + } + + wake_up_process(cpufreq_thread); + + return 0; +} + +static int init_acpi(struct acpi_processor_performance *perf) +{ + int result = 0; + int i; + + perf->shared_type = 0; + perf->state_count = (max_freq_level - min_freq_level + 1) * (boost_gears + 1); + + perf->states = + kmalloc_array(perf->state_count, + sizeof(struct acpi_processor_px), + GFP_KERNEL); + + if (!perf->states) { + result = -ENOMEM; + return result; + } + + for (i = 0; i < perf->state_count; i++) { + perf->states[i].power = 0x3A98; + perf->states[i].transition_latency = 10000; + perf->states[i].bus_master_latency = 10000; + perf->states[i].status = (RESERVED_FREQ + i / (boost_gears + 1)); + perf->states[i].control = (RESERVED_FREQ + i / (boost_gears + 1)); + + switch (i % (boost_gears + 1)) { + case 0: + perf->states[i].core_frequency = (cpu_clock_freq / 1000000) * (8 - i / (boost_gears + 1)) / 8; + break; + case 1: + case 2: + case 3: + case 4: + perf->states[i].core_frequency = + boost_freqs[i % (boost_gears + 1)] * (8 - i / (boost_gears + 1)) / 8; + perf->states[i].control |= ((i % (boost_gears + 1)) << 8); + break; + default: + pr_info("file %s, line %d, i %d freq table error\n", __FILE__, __LINE__, i); + } + } + + return result; +} + +static int loongson3_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int i; + struct acpi_processor_performance *perf; + struct cpufreq_frequency_table *freq_table; + struct core_data *core; + int package_id; + unsigned int cpu = policy->cpu; + unsigned int result = 0; + + perf = per_cpu_ptr(acpi_perf_data, cpu); + package_id = cpu_data[cpu].package; + core = get_core_data(cpu); + all_package_data[package_id].nr_cores = loongson_sysconf.cores_per_package; + all_package_data[package_id].max_boost_cores = max_boost_cores; + core->normal_max_freq = 0; + all_package_data[package_id].nr_full_load_cores = 0; + core->cpu = cpu; + core->work_in_progress = false; + core->last_freq_update_time = 0; + core->perf = perf; + core->package_id = package_id; + core->package = &all_package_data[package_id]; + + core->boost_freq = kmalloc_array(boost_gears + 1, sizeof(typeof(core->boost_freq)), GFP_KERNEL); + core->clock_scale = kmalloc_array(boost_gears + 1, sizeof(typeof(core->clock_scale)), GFP_KERNEL); + core->shift = kmalloc_array(boost_gears + 1, sizeof(typeof(core->shift)), GFP_KERNEL); + + for (i = 0; i < boost_gears + 1; i++) { + core->boost_freq[i] = boost_freqs[i]; + core->shift[i] = 1; + } + + if (!acpi_disabled) + result = acpi_processor_register_performance(perf, cpu); + else { + result = init_acpi(perf); + policy->shared_type = perf->shared_type; + } + + if (result) { + pr_info("CPU%d acpi_processor_register_performance failed.\n", cpu); + return result; + } + + for (i = 0; i < MAX_PACKAGES; i++) { + mutex_init(&boost_mutex[i]); + } + + /* capability check */ + if (perf->state_count <= 1) { + pr_debug("No P-States\n"); + result = -ENODEV; + goto err_unreg; + } + + freq_table = kcalloc(perf->state_count + 1, sizeof(*freq_table), + GFP_KERNEL); + if (!freq_table) { + result = -ENOMEM; + goto err_unreg; + } + + /* detect transition latency */ + policy->cpuinfo.transition_latency = 0; + for (i = 0; i < perf->state_count; i++) { + if ((perf->states[i].transition_latency * 1000) > + policy->cpuinfo.transition_latency) + policy->cpuinfo.transition_latency = + perf->states[i].transition_latency * 1000; + if (perf->states[i].control & LOONGSON_BOOST_FREQ_MASK) { + set_boost_freq(true); + } else { + if (perf->states[i].core_frequency > core->normal_max_freq) + core->normal_max_freq = perf->states[i].core_frequency; + } + } + + core->freq_update_delay_ns = policy->cpuinfo.transition_latency; + + for (i = 0; i < boost_gears + 1; i++) { + core->clock_scale[i] = compute_scale(&core->shift[i], boost_freqs[i], core->normal_max_freq); + pr_debug("file %s, line %d, boost_freqs[%d] %d, normal_max_freq %d, scale %d, shift %d\n", + __FILE__, __LINE__, i, boost_freqs[i], core->normal_max_freq, core->clock_scale[i], core->shift[i]); + } + + /* table init */ + for (i = 0; i < perf->state_count; i++) { + freq_table[i].driver_data = (perf->states[i].control & LOONGSON_BOOST_FREQ_MASK) >> 8; + if (freq_table[i].driver_data) + freq_table[i].flags |= CPUFREQ_BOOST_FREQ; + freq_table[i].frequency = + perf->states[i].core_frequency * 1000; + } + freq_table[i].frequency = CPUFREQ_TABLE_END; + policy->freq_table = freq_table; + perf->state = 0; + + /* add boost-attr if supported. */ + if (has_boost_freq() && boost_supported()) + loongson3_cpufreq_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; + + pr_info("CPU%u - ACPI performance management activated.\n", cpu); + for (i = 0; i < perf->state_count; i++) + pr_debug(" %cP%d: %d MHz, %d mW, %d uS %d level\n", + (i == perf->state ? '*' : ' '), i, + (u32) perf->states[i].core_frequency, + (u32) perf->states[i].power, + (u32) perf->states[i].transition_latency, + (u32) perf->states[i].control); + + /* + * the first call to ->target() should result in us actually + * writing something to the appropriate registers. + */ + policy->fast_switch_possible = false; + + init_irq_work(&core->irq_work, cpufreq_irq_work); + kthread_init_work(&core->work, cpufreq_work_handler); + core->sampling_rate = max_t(unsigned int, + CPUFREQ_SAMPLING_INTERVAL, + cpufreq_policy_transition_delay_us(policy)); + return result; + +err_unreg: + if (!acpi_disabled) + acpi_processor_unregister_performance(cpu); + + return result; +} + +static int loongson3_cpufreq_cpu_exit(struct cpufreq_policy *policy) +{ + struct core_data *core = get_core_data(policy->cpu); + clear_update_util_hook(policy->cpu); + irq_work_sync(&core->irq_work); + kthread_cancel_work_sync(&core->work); + core->work_in_progress = false; + policy->fast_switch_possible = false; + if (!acpi_disabled) + acpi_processor_unregister_performance(policy->cpu); + kfree(policy->freq_table); + kfree(core->boost_freq); + kfree(core->clock_scale); + kfree(core->shift); + return 0; +} + +static struct freq_attr *loongson3_cpufreq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, /* Extra space for boost-attr if supported */ + NULL, +}; + +static struct cpufreq_driver loongson3_cpufreq_driver = { + .verify = loongson3_cpufreq_verify_policy, + .setpolicy = loongson3_cpufreq_set_policy, + .init = loongson3_cpufreq_cpu_init, + .exit = loongson3_cpufreq_cpu_exit, + .name = "acpi-cpufreq", + .attr = loongson3_cpufreq_attr, +}; + +static void free_acpi_perf_data(void) +{ + unsigned int i; + + /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */ + for_each_possible_cpu(i) + free_cpumask_var(per_cpu_ptr(acpi_perf_data, i) + ->shared_cpu_map); + free_percpu(acpi_perf_data); +} + +static int __init loongson3_cpufreq_early_init(void) +{ + unsigned int i; + pr_debug("acpi_cpufreq_early_init\n"); + + acpi_perf_data = alloc_percpu(struct acpi_processor_performance); + if (!acpi_perf_data) { + return -ENOMEM; + } + for_each_possible_cpu(i) { + if (!zalloc_cpumask_var_node( + &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map, + GFP_KERNEL, cpu_to_node(i))) { + free_acpi_perf_data(); + return -ENOMEM; + } + } + return 0; +} + +static bool support_boost(void) +{ + int message; + int val; + int i; + + if (wait_for_ready_timeout(MAX_READY_TIMEOUT)) + return false; + message = DVFS_INFO << 24; + iocsr_write32(message, 0x51c); + val = iocsr_read32(0x420); + + val |= 1 << 10; + iocsr_write32(val, 0x420); + if (wait_for_ready_timeout(MAX_READY_TIMEOUT)) { + pr_info("file %s, line %d, not support boost\n", __FILE__, __LINE__); + return false; + } + + val = iocsr_read32(0x51c); + + min_freq_level = val & DVFS_INFO_MIN_FREQ; + max_freq_level = (val & DVFS_INFO_MAX_FREQ) >> 4; + + if ((val & DVFS_INFO_BOOST_CORE_FREQ) && ((val & DVFS_INFO_BOOST_CORES) >> 20)) { + max_boost_cores = (val & DVFS_INFO_BOOST_CORES) >> 20; + max_boost_freq = ((val & DVFS_INFO_BOOST_CORE_FREQ) >> 8) * 25; + max_upper_index = (val & DVFS_INFO_NORMAL_CORE_UPPER_LIMIT) >> 16; + } else { + boost_gears = 0; + return false; + } + + /* Read boost levels */ + if (wait_for_ready_timeout(MAX_READY_TIMEOUT)) + return false; + + /* for version 1, single boost freq boost */ + message = DVFS_INFO_BOOST_LEVEL << 24; + iocsr_write32(message, 0x51c); + val = iocsr_read32(0x420); + + val |= 1 << 10; + iocsr_write32(val, 0x420); + + if (wait_for_ready_timeout(MAX_READY_TIMEOUT)) { + pr_info("file %s, line %d, single boost mode\n", __FILE__, __LINE__); + boost_gears = 1; + boost_freqs[0] = calc_const_freq() / 1000000; + for (i = 1; i < boost_gears + 1; i++) { + boost_freqs[i] = max_boost_freq; + } + + /* set 0x51c complete */ + iocsr_write32(COMPLETE_STATUS, 0x51c); + } else { + pr_info("file %s, line %d, multi boost mode\n", __FILE__, __LINE__); + boost_gears = max_boost_cores; + val = iocsr_read32(0x51c); + + boost_freqs[0] = calc_const_freq() / 1000000; + boost_freqs[1] = max_boost_freq; + + if (boost_gears > 1) { + for (i = 2; i < boost_gears + 1; i++) { + boost_freqs[i] = max_boost_freq - (((val >> ((i-2) * 4)) & 0xf) * FREQ_STEP); + } + } + } + + pr_info("file %s, line %d, min_freq_level %d, max_freq_level %d, max_boost_cores %d, boost_gears %d\n", + __FILE__, __LINE__, min_freq_level, max_freq_level, max_boost_cores, boost_gears); + + return true; +} + +static int cpufreq_table_cpuinfo(struct cpufreq_policy *policy, + struct cpufreq_frequency_table *table, + bool boost) +{ + struct cpufreq_frequency_table *pos; + unsigned int min_freq = ~0; + unsigned int max_freq = 0; + unsigned int freq; + + cpufreq_for_each_valid_entry(pos, table) { + freq = pos->frequency; + + if (!boost) { + if (pos->driver_data) + continue; + } + if (freq < min_freq) + min_freq = freq; + if (freq > max_freq) + max_freq = freq; + } + + policy->min = policy->cpuinfo.min_freq = min_freq; + policy->max = policy->cpuinfo.max_freq = max_freq; + if (policy->min == ~0) + return -EINVAL; + else + return 0; +} + +static int set_boost(struct cpufreq_policy *policy, int state) +{ + if (!has_boost_freq()) + return -EINVAL; + + if (!policy) + return -EINVAL; + + if (!state) { + if (policy->policy == CPUFREQ_POLICY_POWERSAVE) { + cpufreq_update(policy); + } + } + if (!policy->freq_table) + return -EINVAL; + + cpufreq_table_cpuinfo(policy, policy->freq_table, state); + down_write(&policy->rwsem); + up_write(&policy->rwsem); + + if (!state) { + set_max_within_limits(policy); + } + + + return 0; +} + +static void __init loongson3_cpufreq_boost_init(void) +{ + if (!support_boost()) { + pr_info("Boost capabilities not present in the processor\n"); + return; + } + + loongson3_cpufreq_driver.set_boost = set_boost; +} + +static int cpufreq_supported_detect(void) +{ + return wait_for_ready_timeout(MAX_READY_TIMEOUT); +} + +static int __init loongson3_cpufreq_init(void) +{ + int ret; + if (!cpu_has_csr || !cpu_has_scalefreq) + return -ENODEV; + + /* don't keep reloading if cpufreq_driver exists */ + if (cpufreq_get_current_driver()) + return -EEXIST; + + pr_debug("loongson3_cpufreq_init\n"); + if (cpufreq_supported_detect()) { + pr_info("loongson3_cpufreq_init failed!\n"); + return -ENODEV; + } + + ret = loongson3_cpufreq_early_init(); + if (ret) + return ret; + loongson3_cpufreq_boost_init(); + + cpufreq_register_notifier(&loongson3_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + ret = cpufreq_register_driver(&loongson3_cpufreq_driver); + cpufreq_kthread_create(); + if (ret) { + free_acpi_perf_data(); + } + return ret; +} + +static void __exit loongson3_cpufreq_exit(void) +{ + pr_debug("loongson3_cpufreq_exit\n"); + + cpufreq_unregister_driver(&loongson3_cpufreq_driver); + free_acpi_perf_data(); + cpufreq_kthread_stop(); +} + +late_initcall(loongson3_cpufreq_init); +module_exit(loongson3_cpufreq_exit); + +static const struct acpi_device_id processor_device_ids[] = { + {ACPI_PROCESSOR_OBJECT_HID, }, + {ACPI_PROCESSOR_DEVICE_HID, }, + {}, +}; +MODULE_DEVICE_TABLE(acpi, processor_device_ids); + +MODULE_ALIAS("acpi"); -- Gitee From eb8c6f754365027463541b05b87c34e10c4208d0 Mon Sep 17 00:00:00 2001 From: Liu Peibao Date: Mon, 1 Aug 2022 09:23:27 +0800 Subject: [PATCH 04/36] LoongArch: convert p v s cache to l1 l2 l3 format LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- Change-Id: I0d38cd552c26b92a587465d3b5fb5e992f67ff9b Signed-off-by: Liu Peibao --- arch/loongarch/include/asm/cacheflush.h | 43 +--- arch/loongarch/include/asm/cacheops.h | 36 +-- arch/loongarch/include/asm/cpu-features.h | 5 - arch/loongarch/include/asm/cpu-info.h | 20 +- arch/loongarch/include/asm/loongarch.h | 7 + arch/loongarch/kernel/cacheinfo.c | 109 +++------ arch/loongarch/mm/cache.c | 274 +++++++++++++++------- arch/loongarch/pci/pci.c | 7 +- 8 files changed, 277 insertions(+), 224 deletions(-) diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h index 670900141b7c..a9fa98b79405 100644 --- a/arch/loongarch/include/asm/cacheflush.h +++ b/arch/loongarch/include/asm/cacheflush.h @@ -9,8 +9,11 @@ #include #include -extern void local_flush_icache_range(unsigned long start, unsigned long end); +void local_flush_icache_range(unsigned long start, unsigned long end); +void flush_cache_line_hit(unsigned long addr); +asmlinkage void cpu_flush_caches(void); +#define invalid_cache_line_hit(addr) flush_cache_line_hit(addr) #define flush_icache_range local_flush_icache_range #define flush_icache_user_range local_flush_icache_range @@ -35,46 +38,26 @@ extern void local_flush_icache_range(unsigned long start, unsigned long end); : \ : "i" (op), "ZC" (*(unsigned char *)(addr))) -static inline void flush_icache_line_indexed(unsigned long addr) +static inline bool cache_present(struct cache_desc *cdesc) { - cache_op(Index_Invalidate_I, addr); + return cdesc->flags & CACHE_PRESENT; } -static inline void flush_dcache_line_indexed(unsigned long addr) +static inline bool cache_private(struct cache_desc *cdesc) { - cache_op(Index_Writeback_Inv_D, addr); + return cdesc->flags & CACHE_PRIVATE; } -static inline void flush_vcache_line_indexed(unsigned long addr) +static inline bool cache_inclusive(struct cache_desc *cdesc) { - cache_op(Index_Writeback_Inv_V, addr); + return cdesc->flags & CACHE_INCLUSIVE; } -static inline void flush_scache_line_indexed(unsigned long addr) +static inline unsigned int cpu_last_level_cache_line_size(void) { - cache_op(Index_Writeback_Inv_S, addr); -} + unsigned int cache_present = current_cpu_data.cache_leaves_present; -static inline void flush_icache_line(unsigned long addr) -{ - cache_op(Hit_Invalidate_I, addr); + return current_cpu_data.cache_leaves[cache_present - 1].linesz; } - -static inline void flush_dcache_line(unsigned long addr) -{ - cache_op(Hit_Writeback_Inv_D, addr); -} - -static inline void flush_vcache_line(unsigned long addr) -{ - cache_op(Hit_Writeback_Inv_V, addr); -} - -static inline void flush_scache_line(unsigned long addr) -{ - cache_op(Hit_Writeback_Inv_S, addr); -} - #include - #endif /* _ASM_CACHEFLUSH_H */ diff --git a/arch/loongarch/include/asm/cacheops.h b/arch/loongarch/include/asm/cacheops.h index dc280efecebd..0f4a86f8e2be 100644 --- a/arch/loongarch/include/asm/cacheops.h +++ b/arch/loongarch/include/asm/cacheops.h @@ -8,16 +8,18 @@ #define __ASM_CACHEOPS_H /* - * Most cache ops are split into a 2 bit field identifying the cache, and a 3 + * Most cache ops are split into a 3 bit field identifying the cache, and a 2 * bit field identifying the cache operation. */ -#define CacheOp_Cache 0x03 -#define CacheOp_Op 0x1c +#define CacheOp_Cache 0x07 +#define CacheOp_Op 0x18 -#define Cache_I 0x00 -#define Cache_D 0x01 -#define Cache_V 0x02 -#define Cache_S 0x03 +#define Cache_LEAF0 0x00 +#define Cache_LEAF1 0x01 +#define Cache_LEAF2 0x02 +#define Cache_LEAF3 0x03 +#define Cache_LEAF4 0x04 +#define Cache_LEAF5 0x05 #define Index_Invalidate 0x08 #define Index_Writeback_Inv 0x08 @@ -25,13 +27,17 @@ #define Hit_Writeback_Inv 0x10 #define CacheOp_User_Defined 0x18 -#define Index_Invalidate_I (Cache_I | Index_Invalidate) -#define Index_Writeback_Inv_D (Cache_D | Index_Writeback_Inv) -#define Index_Writeback_Inv_V (Cache_V | Index_Writeback_Inv) -#define Index_Writeback_Inv_S (Cache_S | Index_Writeback_Inv) -#define Hit_Invalidate_I (Cache_I | Hit_Invalidate) -#define Hit_Writeback_Inv_D (Cache_D | Hit_Writeback_Inv) -#define Hit_Writeback_Inv_V (Cache_V | Hit_Writeback_Inv) -#define Hit_Writeback_Inv_S (Cache_S | Hit_Writeback_Inv) +#define Index_Writeback_Inv_LEAF0 (Cache_LEAF0 | Index_Writeback_Inv) +#define Index_Writeback_Inv_LEAF1 (Cache_LEAF1 | Index_Writeback_Inv) +#define Index_Writeback_Inv_LEAF2 (Cache_LEAF2 | Index_Writeback_Inv) +#define Index_Writeback_Inv_LEAF3 (Cache_LEAF3 | Index_Writeback_Inv) +#define Index_Writeback_Inv_LEAF4 (Cache_LEAF4 | Index_Writeback_Inv) +#define Index_Writeback_Inv_LEAF5 (Cache_LEAF5 | Index_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF0 (Cache_LEAF0 | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF1 (Cache_LEAF1 | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF2 (Cache_LEAF2 | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF3 (Cache_LEAF3 | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF4 (Cache_LEAF4 | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF5 (Cache_LEAF5 | Hit_Writeback_Inv) #endif /* __ASM_CACHEOPS_H */ diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index a8d87c40a0eb..b07974218393 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -19,11 +19,6 @@ #define cpu_has_loongarch32 (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_32BIT) #define cpu_has_loongarch64 (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_64BIT) -#define cpu_icache_line_size() cpu_data[0].icache.linesz -#define cpu_dcache_line_size() cpu_data[0].dcache.linesz -#define cpu_vcache_line_size() cpu_data[0].vcache.linesz -#define cpu_scache_line_size() cpu_data[0].scache.linesz - #ifdef CONFIG_32BIT # define cpu_has_64bits (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_64BIT) # define cpu_vabits 31 diff --git a/arch/loongarch/include/asm/cpu-info.h b/arch/loongarch/include/asm/cpu-info.h index b6c4f96079df..3fce27d92fcb 100644 --- a/arch/loongarch/include/asm/cpu-info.h +++ b/arch/loongarch/include/asm/cpu-info.h @@ -9,19 +9,28 @@ #include #include +/* cache_desc->flags */ +enum { + CACHE_PRESENT = (1 << 0), + CACHE_PRIVATE = (1 << 1), /* core private cache */ + CACHE_INCLUSIVE = (1 << 2), /* include the lower level caches */ +}; /* * Descriptor for a cache */ struct cache_desc { - unsigned int waysize; /* Bytes per way */ + unsigned char type; + unsigned char level; unsigned short sets; /* Number of lines per set */ unsigned char ways; /* Number of ways */ unsigned char linesz; /* Size of line in bytes */ - unsigned char waybit; /* Bits to select in a cache set */ unsigned char flags; /* Flags describing cache properties */ }; +#define CACHE_LEAVES_MAX 6 + +#define CACHE_LEVEL_MAX 3 struct cpuinfo_loongarch { u64 asid_cache; unsigned long asid_mask; @@ -40,11 +49,8 @@ struct cpuinfo_loongarch { int tlbsizemtlb; int tlbsizestlbsets; int tlbsizestlbways; - struct cache_desc icache; /* Primary I-cache */ - struct cache_desc dcache; /* Primary D or combined I/D cache */ - struct cache_desc vcache; /* Victim cache, between pcache and scache */ - struct cache_desc scache; /* Secondary cache */ - struct cache_desc tcache; /* Tertiary/split secondary cache */ + unsigned int cache_leaves_present; /* number of cache_leaves[] elements */ + struct cache_desc cache_leaves[CACHE_LEAVES_MAX]; int core; /* physical core number in package */ int package;/* physical package number */ int vabits; /* Virtual Address size in bits */ diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 3ba4f7e87cd2..7ead7a1f8aa0 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -224,6 +224,13 @@ static inline u32 read_cpucfg(u32 reg) #define CPUCFG48_VFPU_CG BIT(2) #define CPUCFG48_RAM_CG BIT(3) +#define CACHE_WAYS_M GENMASK(15, 0) +#define CACHE_SETS_M GENMASK(23, 16) +#define CACHE_LSIZE_M GENMASK(30, 24) +#define CACHE_WAYS 0 +#define CACHE_SETS 16 +#define CACHE_LSIZE 24 + #ifndef __ASSEMBLY__ /* CSR */ diff --git a/arch/loongarch/kernel/cacheinfo.c b/arch/loongarch/kernel/cacheinfo.c index 4662b06269f4..0d97a970ad4e 100644 --- a/arch/loongarch/kernel/cacheinfo.c +++ b/arch/loongarch/kernel/cacheinfo.c @@ -5,69 +5,28 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include +#include #include #include -/* Populates leaf and increments to next leaf */ -#define populate_cache(cache, leaf, c_level, c_type) \ -do { \ - leaf->type = c_type; \ - leaf->level = c_level; \ - leaf->coherency_line_size = c->cache.linesz; \ - leaf->number_of_sets = c->cache.sets; \ - leaf->ways_of_associativity = c->cache.ways; \ - leaf->size = c->cache.linesz * c->cache.sets * \ - c->cache.ways; \ - if (leaf->level > 2) \ - leaf->size *= nodes_per_package; \ - leaf++; \ -} while (0) - int init_cache_level(unsigned int cpu) { - struct cpuinfo_loongarch *c = ¤t_cpu_data; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - int levels = 0, leaves = 0; - - /* - * If Dcache is not set, we assume the cache structures - * are not properly initialized. - */ - if (c->dcache.waysize) - levels += 1; - else - return -ENOENT; - - - leaves += (c->icache.waysize) ? 2 : 1; - - if (c->vcache.waysize) { - levels++; - leaves++; - } - - if (c->scache.waysize) { - levels++; - leaves++; - } - - if (c->tcache.waysize) { - levels++; - leaves++; - } - - this_cpu_ci->num_levels = levels; - this_cpu_ci->num_leaves = leaves; + unsigned int cache_present = current_cpu_data.cache_leaves_present; + this_cpu_ci->num_levels = + current_cpu_data.cache_leaves[cache_present - 1].level; + this_cpu_ci->num_leaves = cache_present; return 0; } static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, - struct cacheinfo *sib_leaf) + struct cacheinfo *sib_leaf) { - return !((this_leaf->level == 1) || (this_leaf->level == 2)); + return (!(*(unsigned char *)(this_leaf->priv) & CACHE_PRIVATE) && + !(*(unsigned char *)(sib_leaf->priv) & CACHE_PRIVATE)); } -static void cache_cpumap_setup(unsigned int cpu) +static void __cache_cpumap_setup(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf, *sib_leaf; @@ -85,8 +44,11 @@ static void cache_cpumap_setup(unsigned int cpu) for_each_online_cpu(i) { struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i); - if (i == cpu || !sib_cpu_ci->info_list) - continue;/* skip if itself or no cacheinfo */ + /* skip if itself or no cacheinfo or not in one + * physical node. */ + if (i == cpu || !sib_cpu_ci->info_list || + (cpu_to_node(i) != cpu_to_node(cpu))) + continue; sib_leaf = sib_cpu_ci->info_list + index; if (cache_leaves_are_shared(this_leaf, sib_leaf)) { cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map); @@ -98,33 +60,30 @@ static void cache_cpumap_setup(unsigned int cpu) int populate_cache_leaves(unsigned int cpu) { - int level = 1, nodes_per_package = 1; - struct cpuinfo_loongarch *c = ¤t_cpu_data; + struct cache_desc *cdesc_tmp, *cdesc = current_cpu_data.cache_leaves; + unsigned int cache_present = current_cpu_data.cache_leaves_present; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf = this_cpu_ci->info_list; - - if (loongson_sysconf.nr_nodes > 1) - nodes_per_package = loongson_sysconf.cores_per_package - / loongson_sysconf.cores_per_node; - - if (c->icache.waysize) { - populate_cache(dcache, this_leaf, level, CACHE_TYPE_DATA); - populate_cache(icache, this_leaf, level++, CACHE_TYPE_INST); - } else { - populate_cache(dcache, this_leaf, level++, CACHE_TYPE_UNIFIED); + int i; + + for (i = 0; i < cache_present; i++) { + cdesc_tmp = cdesc + i; + + this_leaf->type = cdesc_tmp->type; + this_leaf->level = cdesc_tmp->level; + this_leaf->coherency_line_size = cdesc_tmp->linesz; + this_leaf->number_of_sets = cdesc_tmp->sets; + this_leaf->ways_of_associativity = cdesc_tmp->ways; + this_leaf->size = + cdesc_tmp->linesz * cdesc_tmp->sets * cdesc_tmp->ways; + this_leaf->priv = &cdesc_tmp->flags; + this_leaf++; } - if (c->vcache.waysize) - populate_cache(vcache, this_leaf, level++, CACHE_TYPE_UNIFIED); - - if (c->scache.waysize) - populate_cache(scache, this_leaf, level++, CACHE_TYPE_UNIFIED); - - if (c->tcache.waysize) - populate_cache(tcache, this_leaf, level++, CACHE_TYPE_UNIFIED); - - cache_cpumap_setup(cpu); - this_cpu_ci->cpu_map_populated = true; + if (!of_have_populated_dt()) { + __cache_cpumap_setup(cpu); + this_cpu_ci->cpu_map_populated = true; + } return 0; } diff --git a/arch/loongarch/mm/cache.c b/arch/loongarch/mm/cache.c index 9e5ce5aa73f7..e08ce053d969 100644 --- a/arch/loongarch/mm/cache.c +++ b/arch/loongarch/mm/cache.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2020-2022 Loongson Technology Corporation Limited - * - * Derived from MIPS: - * Copyright (C) 1994 - 2003, 06, 07 by Ralf Baechle (ralf@linux-mips.org) - * Copyright (C) 2007 MIPS Technologies, Inc. - */ +* Copyright (C) 2020-2022 Loongson Technology Corporation Limited +* +* Derived from MIPS: +* Copyright (C) 1994 - 2003, 06, 07 by Ralf Baechle (ralf@linux-mips.org) +* Copyright (C) 2007 MIPS Technologies, Inc. +*/ #include #include #include @@ -15,127 +15,225 @@ #include #include #include +#include -#include #include +#include #include #include #include #include #include +extern struct loongson_system_configuration loongson_sysconf; +void cache_error_setup(void) +{ + extern char __weak except_vec_cex; + set_merr_handler(0x0, &except_vec_cex, 0x80); +} -/* - * LoongArch maintains ICache/DCache coherency by hardware, - * we just need "ibar" to avoid instruction hazard here. - */ +/* Cache operations. */ void local_flush_icache_range(unsigned long start, unsigned long end) { asm volatile ("\tibar 0\n"::); } -EXPORT_SYMBOL(local_flush_icache_range); -void cache_error_setup(void) +static inline void __flush_cache_line_hit(int leaf, unsigned long addr) { - extern char __weak except_vec_cex; - set_merr_handler(0x0, &except_vec_cex, 0x80); + switch (leaf) { + case Cache_LEAF0: + cache_op(Hit_Writeback_Inv_LEAF0, addr); + break; + case Cache_LEAF1: + cache_op(Hit_Writeback_Inv_LEAF1, addr); + break; + case Cache_LEAF2: + cache_op(Hit_Writeback_Inv_LEAF2, addr); + break; + case Cache_LEAF3: + cache_op(Hit_Writeback_Inv_LEAF3, addr); + break; + case Cache_LEAF4: + cache_op(Hit_Writeback_Inv_LEAF4, addr); + break; + case Cache_LEAF5: + cache_op(Hit_Writeback_Inv_LEAF5, addr); + break; + default: + break; + } } -static unsigned long icache_size __read_mostly; -static unsigned long dcache_size __read_mostly; -static unsigned long vcache_size __read_mostly; -static unsigned long scache_size __read_mostly; +static inline void __flush_cache_line_indexed(int leaf, unsigned long addr) +{ + switch (leaf) { + case Cache_LEAF0: + cache_op(Index_Writeback_Inv_LEAF0, addr); + break; + case Cache_LEAF1: + cache_op(Index_Writeback_Inv_LEAF1, addr); + break; + case Cache_LEAF2: + cache_op(Index_Writeback_Inv_LEAF2, addr); + break; + case Cache_LEAF3: + cache_op(Index_Writeback_Inv_LEAF3, addr); + break; + case Cache_LEAF4: + cache_op(Index_Writeback_Inv_LEAF4, addr); + break; + case Cache_LEAF5: + cache_op(Index_Writeback_Inv_LEAF5, addr); + break; + default: + break; + } +} -static char *way_string[] = { NULL, "direct mapped", "2-way", - "3-way", "4-way", "5-way", "6-way", "7-way", "8-way", - "9-way", "10-way", "11-way", "12-way", - "13-way", "14-way", "15-way", "16-way", -}; +void flush_cache_line_hit(unsigned long addr) +{ + int leaf; + struct cache_desc *cdesc = current_cpu_data.cache_leaves; + unsigned int cache_present = current_cpu_data.cache_leaves_present; + + /* If last level cache is inclusive, no need to flush other caches. */ + leaf = cache_present - 1; + if (cache_inclusive(cdesc + leaf)) { + __flush_cache_line_hit(leaf, addr); + return; + } + + for (leaf = 0; leaf < cache_present; leaf++) + __flush_cache_line_hit(leaf, addr); +} -static void probe_pcache(void) +static void flush_cache_leaf(unsigned int leaf) +{ + u64 line; + int i, j, nr_nodes; + struct cache_desc *cdesc = current_cpu_data.cache_leaves + leaf; + + nr_nodes = loongson_sysconf.nr_nodes; + if (cache_private(cdesc)) + nr_nodes = 1; + + line = CSR_DMW0_BASE; + do { + for (i = 0; i < cdesc->sets; i++) { + for (j = 0; j < cdesc->ways; j++) { + __flush_cache_line_indexed(leaf, line); + line++; + } + + line -= cdesc->ways; + line += cdesc->linesz; + } + line += 0x100000000000; + } while (--nr_nodes > 0); +} + +asmlinkage __visible void cpu_flush_caches(void) +{ + int leaf; + struct cache_desc *cdesc = current_cpu_data.cache_leaves; + unsigned int cache_present = current_cpu_data.cache_leaves_present; + + /* If last level cache is inclusive, no need to flush other caches. */ + leaf = cache_present - 1; + if (cache_inclusive(cdesc + leaf)) { + flush_cache_leaf(leaf); + return; + } + + for (leaf = 0; leaf < cache_present; leaf++) + flush_cache_leaf(leaf); +} + +static inline void set_cache_basics(struct cache_desc *cdesc, unsigned int leaf) { - struct cpuinfo_loongarch *c = ¤t_cpu_data; - unsigned int lsize, sets, ways; unsigned int config; - config = read_cpucfg(LOONGARCH_CPUCFG17); - lsize = 1 << ((config & CPUCFG17_L1I_SIZE_M) >> CPUCFG17_L1I_SIZE); - sets = 1 << ((config & CPUCFG17_L1I_SETS_M) >> CPUCFG17_L1I_SETS); - ways = ((config & CPUCFG17_L1I_WAYS_M) >> CPUCFG17_L1I_WAYS) + 1; + config = read_cpucfg(LOONGARCH_CPUCFG17 + leaf); + cdesc->linesz = 1 << ((config & CACHE_LSIZE_M) >> CACHE_LSIZE); + cdesc->sets = 1 << ((config & CACHE_SETS_M) >> CACHE_SETS); + cdesc->ways = ((config & CACHE_WAYS_M) >> CACHE_WAYS) + 1; +} - c->icache.linesz = lsize; - c->icache.sets = sets; - c->icache.ways = ways; - icache_size = sets * ways * lsize; - c->icache.waysize = icache_size / c->icache.ways; +#define populate_cache_properties(conifg, cdesc, level, leaf) \ +{ \ + if (level == 1) { \ + cdesc->flags |= CACHE_PRIVATE; \ + } else { \ + if (config & IUPRIV) \ + cdesc->flags |= CACHE_PRIVATE; \ + if (config & IUINCL) \ + cdesc->flags |= CACHE_INCLUSIVE; \ + } \ + cdesc->flags |= CACHE_PRESENT; \ + cdesc->level = level; \ + set_cache_basics(cdesc, leaf); \ + cdesc++; \ + leaf++; \ +} - config = read_cpucfg(LOONGARCH_CPUCFG18); - lsize = 1 << ((config & CPUCFG18_L1D_SIZE_M) >> CPUCFG18_L1D_SIZE); - sets = 1 << ((config & CPUCFG18_L1D_SETS_M) >> CPUCFG18_L1D_SETS); - ways = ((config & CPUCFG18_L1D_WAYS_M) >> CPUCFG18_L1D_WAYS) + 1; +/* +* Each level cache occupies 7bits in order in CPUCFG16 +* except level 1 cache with bit0~2. +* +*/ +static void probe_cache_hierarchy(void) +{ + struct cache_desc *cdesc = current_cpu_data.cache_leaves; + unsigned int leaf = 0, level; + unsigned int config = read_cpucfg(LOONGARCH_CPUCFG16); - c->dcache.linesz = lsize; - c->dcache.sets = sets; - c->dcache.ways = ways; - dcache_size = sets * ways * lsize; - c->dcache.waysize = dcache_size / c->dcache.ways; +#define IUPRE (1 << 0) +#define IUUNIFY (1 << 1) +#define IUPRIV (1 << 2) +#define IUINCL (1 << 3) +#define DPRE (1 << 4) +#define DPRIV (1 << 5) +#define DINCL (1 << 6) - c->options |= LOONGARCH_CPU_PREFETCH; +#define L1DPRE (1 << 2) - pr_info("Primary instruction cache %ldkB, %s, %s, linesize %d bytes.\n", - icache_size >> 10, way_string[c->icache.ways], "VIPT", c->icache.linesz); + for (level = 1; level <= CACHE_LEVEL_MAX; level++) { + if (config & IUPRE) { + if (config & IUUNIFY) + cdesc->type = CACHE_TYPE_UNIFIED; + else + cdesc->type = CACHE_TYPE_INST; - pr_info("Primary data cache %ldkB, %s, %s, %s, linesize %d bytes\n", - dcache_size >> 10, way_string[c->dcache.ways], "VIPT", "no aliases", c->dcache.linesz); -} + populate_cache_properties(config, cdesc, level, leaf); + } -static void probe_vcache(void) -{ - struct cpuinfo_loongarch *c = ¤t_cpu_data; - unsigned int lsize, sets, ways; - unsigned int config; + if ((level == 1 && (config & L1DPRE)) || + (level != 1 && (config & DPRE))) { + cdesc->type = CACHE_TYPE_DATA; - config = read_cpucfg(LOONGARCH_CPUCFG19); - lsize = 1 << ((config & CPUCFG19_L2_SIZE_M) >> CPUCFG19_L2_SIZE); - sets = 1 << ((config & CPUCFG19_L2_SETS_M) >> CPUCFG19_L2_SETS); - ways = ((config & CPUCFG19_L2_WAYS_M) >> CPUCFG19_L2_WAYS) + 1; + populate_cache_properties(config, cdesc, level, leaf); + } - c->vcache.linesz = lsize; - c->vcache.sets = sets; - c->vcache.ways = ways; - vcache_size = lsize * sets * ways; - c->vcache.waysize = vcache_size / c->vcache.ways; + if (level == 1) + config = config >> 3; + else + config = config >> 7; - pr_info("Unified victim cache %ldkB %s, linesize %d bytes.\n", - vcache_size >> 10, way_string[c->vcache.ways], c->vcache.linesz); -} + if (!config) + break; -static void probe_scache(void) -{ - struct cpuinfo_loongarch *c = ¤t_cpu_data; - unsigned int lsize, sets, ways; - unsigned int config; + } - config = read_cpucfg(LOONGARCH_CPUCFG20); - lsize = 1 << ((config & CPUCFG20_L3_SIZE_M) >> CPUCFG20_L3_SIZE); - sets = 1 << ((config & CPUCFG20_L3_SETS_M) >> CPUCFG20_L3_SETS); - ways = ((config & CPUCFG20_L3_WAYS_M) >> CPUCFG20_L3_WAYS) + 1; + if (leaf > 0) + current_cpu_data.options |= LOONGARCH_CPU_PREFETCH; - c->scache.linesz = lsize; - c->scache.sets = sets; - c->scache.ways = ways; - /* 4 cores. scaches are shared */ - scache_size = lsize * sets * ways; - c->scache.waysize = scache_size / c->scache.ways; + BUG_ON(leaf > CACHE_LEAVES_MAX); - pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n", - scache_size >> 10, way_string[c->scache.ways], c->scache.linesz); + current_cpu_data.cache_leaves_present = leaf; } void cpu_cache_init(void) { - probe_pcache(); - probe_vcache(); - probe_scache(); + probe_cache_hierarchy(); shm_align_mask = PAGE_SIZE - 1; } diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c index 4456fafc243a..a552b9da72ec 100644 --- a/arch/loongarch/pci/pci.c +++ b/arch/loongarch/pci/pci.c @@ -10,6 +10,7 @@ #include #include #include +#include #define PCI_DEVICE_ID_LOONGSON_HOST 0x7a00 #define PCI_DEVICE_ID_LOONGSON_DC1 0x7a06 @@ -45,12 +46,10 @@ static int __init pcibios_init(void) unsigned int lsize; /* - * Set PCI cacheline size to that of the highest level in the + * Set PCI cacheline size to that of the last level in the * cache hierarchy. */ - lsize = cpu_dcache_line_size(); - lsize = cpu_vcache_line_size() ? : lsize; - lsize = cpu_scache_line_size() ? : lsize; + lsize = cpu_last_level_cache_line_size(); BUG_ON(!lsize); -- Gitee From b47b57e8360cff2eea687064a6ceb6d921014383 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Sat, 3 Sep 2022 17:34:53 +0800 Subject: [PATCH 05/36] rtc: ls2x: Add support for the Loongson-2K/LS7A RTC LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- This RTC module is integrated into the Loongson-2K SoC and the LS7A bridge chip. This version is almost entirely rewritten to make use of current kernel API, and it supports both ACPI and DT. This patch also make CONFIG_RTC_DRV_EFI=m. The purpose of this is to make rtc-ls2x device to /dev/rtc0. Signed-off-by: Huacai Chen Signed-off-by: WANG Xuerui Signed-off-by: Binbin Zhou Cc: Alexandre Belloni Cc: "Rafael J. Wysocki" Cc: devicetree@vger.kernel.org Cc: linux-acpi@vger.kernel.org Cc: linux-mips@vger.kernel.org Cc: loongarch@lists.linux.dev Signed-off-by: Ming Wang Change-Id: If950a9b210fd1ffb32a2d4d7f0ad6c1fb5058303 --- arch/loongarch/configs/loongson3_defconfig | 3 +- drivers/rtc/Kconfig | 11 + drivers/rtc/Makefile | 1 + drivers/rtc/rtc-ls2x.c | 325 +++++++++++++++++++++ 4 files changed, 339 insertions(+), 1 deletion(-) create mode 100644 drivers/rtc/rtc-ls2x.c diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index df6b5c5a59c8..3b4b63235fa8 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -655,7 +655,8 @@ CONFIG_USB_SERIAL_OPTION=m CONFIG_USB_GADGET=y CONFIG_INFINIBAND=m CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_EFI=y +CONFIG_RTC_DRV_EFI=m +CONFIG_RTC_DRV_LS2X=y CONFIG_DMADEVICES=y CONFIG_UIO=m CONFIG_UIO_PDRV_GENIRQ=m diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index d5512b18a3ae..1d005a001a71 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1320,6 +1320,17 @@ config RTC_DRV_CROS_EC This driver can also be built as a module. If so, the module will be called rtc-cros-ec. +config RTC_DRV_LS2X + tristate "Loongson LS2X RTC" + depends on (ACPI || OF) && MACH_LOONGSON64 || COMPILE_TEST + select REGMAP_MMIO + help + If you say yes here you get support for the RTC on the Loongson-2K + SoC and LS7A bridge, which first appeared on the Loongson-2H. + + This driver can also be built as a module. If so, the module + will be called rtc-ls2x. + comment "on-CPU RTC drivers" config RTC_DRV_ASM9260 diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index fd1d53e789b7..ec0bed7fd4e4 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -88,6 +88,7 @@ obj-$(CONFIG_RTC_DRV_LOONGSON1) += rtc-ls1x.o obj-$(CONFIG_RTC_DRV_LP8788) += rtc-lp8788.o obj-$(CONFIG_RTC_DRV_LPC24XX) += rtc-lpc24xx.o obj-$(CONFIG_RTC_DRV_LPC32XX) += rtc-lpc32xx.o +obj-$(CONFIG_RTC_DRV_LS2X) += rtc-ls2x.o obj-$(CONFIG_RTC_DRV_M41T80) += rtc-m41t80.o obj-$(CONFIG_RTC_DRV_M41T93) += rtc-m41t93.o obj-$(CONFIG_RTC_DRV_M41T94) += rtc-m41t94.o diff --git a/drivers/rtc/rtc-ls2x.c b/drivers/rtc/rtc-ls2x.c new file mode 100644 index 000000000000..962fec171154 --- /dev/null +++ b/drivers/rtc/rtc-ls2x.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Loongson-2K/7A RTC driver + * + * Based on the original out-of-tree Loongson-2H RTC driver for Linux 2.6.32, + * by Shaozong Liu . + * + * Maintained out-of-tree by Huacai Chen . + * + * Rewritten for mainline by WANG Xuerui . + */ + +#include +#include +#include +#include +#include +#include +#include + +#define TOY_TRIM_REG 0x20 +#define TOY_WRITE0_REG 0x24 +#define TOY_WRITE1_REG 0x28 +#define TOY_READ0_REG 0x2c +#define TOY_READ1_REG 0x30 +#define TOY_MATCH0_REG 0x34 +#define TOY_MATCH1_REG 0x38 +#define TOY_MATCH2_REG 0x3c +#define RTC_CTRL_REG 0x40 +#define RTC_TRIM_REG 0x60 +#define RTC_WRITE0_REG 0x64 +#define RTC_READ0_REG 0x68 +#define RTC_MATCH0_REG 0x6c +#define RTC_MATCH1_REG 0x70 +#define RTC_MATCH2_REG 0x74 + +#define TOY_MON GENMASK(31, 26) +#define TOY_DAY GENMASK(25, 21) +#define TOY_HOUR GENMASK(20, 16) +#define TOY_MIN GENMASK(15, 10) +#define TOY_SEC GENMASK(9, 4) +#define TOY_MSEC GENMASK(3, 0) + +#define TOY_MATCH_YEAR GENMASK(31, 26) +#define TOY_MATCH_MON GENMASK(25, 22) +#define TOY_MATCH_DAY GENMASK(21, 17) +#define TOY_MATCH_HOUR GENMASK(16, 12) +#define TOY_MATCH_MIN GENMASK(11, 6) +#define TOY_MATCH_SEC GENMASK(5, 0) + +/* ACPI and RTC offset */ +#define ACPI_RTC_OFFSET 0x100 + +/* support rtc wakeup */ +#define ACPI_PM1_STS_REG 0x0c +#define ACPI_PM1_EN_REG 0x10 +#define RTC_EN BIT(10) +#define RTC_STS BIT(10) + +struct ls2x_rtc_priv { + struct regmap *regmap; + spinlock_t rtc_reglock; + void __iomem *acpi_base; + struct rtc_device *rtcdev; +}; + +static const struct regmap_config ls2x_rtc_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, +}; + +struct ls2x_rtc_regs { + u32 reg0; + u32 reg1; +}; + +#if defined(CONFIG_ACPI) +static u32 ls2x_acpi_fix_handler(void *id) +{ + int ret; + struct ls2x_rtc_priv *priv = (struct ls2x_rtc_priv *)id; + + spin_lock(&priv->rtc_reglock); + + /* Disable acpi rtc enabled */ + ret = readl(priv->acpi_base + ACPI_PM1_EN_REG) & ~RTC_EN; + writel(ret, priv->acpi_base + ACPI_PM1_EN_REG); + + /* Clear acpi rtc interrupt Status */ + writel(RTC_STS, priv->acpi_base + ACPI_PM1_STS_REG); + + spin_unlock(&priv->rtc_reglock); + + /* + * The TOY_MATCH0_REG should be cleared 0 here, + * otherwise the interrupt cannot be cleared. + * Because the match condition is still satisfied + */ + ret = regmap_write(priv->regmap, TOY_MATCH0_REG, 0); + if (unlikely(ret)) + return ret; + + return 0; +} +#endif + +static inline void ls2x_rtc_regs_to_time(struct ls2x_rtc_regs *regs, + struct rtc_time *tm) +{ + tm->tm_year = regs->reg1; + tm->tm_sec = FIELD_GET(TOY_SEC, regs->reg0); + tm->tm_min = FIELD_GET(TOY_MIN, regs->reg0); + tm->tm_hour = FIELD_GET(TOY_HOUR, regs->reg0); + tm->tm_mday = FIELD_GET(TOY_DAY, regs->reg0); + tm->tm_mon = FIELD_GET(TOY_MON, regs->reg0) - 1; +} + +static inline void ls2x_rtc_time_to_regs(struct rtc_time *tm, + struct ls2x_rtc_regs *regs) +{ + regs->reg0 = FIELD_PREP(TOY_SEC, tm->tm_sec); + regs->reg0 |= FIELD_PREP(TOY_MIN, tm->tm_min); + regs->reg0 |= FIELD_PREP(TOY_HOUR, tm->tm_hour); + regs->reg0 |= FIELD_PREP(TOY_DAY, tm->tm_mday); + regs->reg0 |= FIELD_PREP(TOY_MON, tm->tm_mon + 1); + regs->reg1 = tm->tm_year; +} + +static inline void ls2x_rtc_alarm_regs_to_time(struct ls2x_rtc_regs *regs, + struct rtc_time *tm) +{ + tm->tm_sec = FIELD_GET(TOY_MATCH_SEC, regs->reg0); + tm->tm_min = FIELD_GET(TOY_MATCH_MIN, regs->reg0); + tm->tm_hour = FIELD_GET(TOY_MATCH_HOUR, regs->reg0); + tm->tm_mday = FIELD_GET(TOY_MATCH_DAY, regs->reg0); + tm->tm_mon = FIELD_GET(TOY_MATCH_MON, regs->reg0) - 1; + /* + * The rtc SYS_TOYMATCH0/YEAR bit field is only 6 bits, + * so it means 63 years at most. Therefore, The RTC alarm + * years can be set from 1900 to 1963. + * This causes the initialization of alarm fail during + * call __rtc_read_alarm. We add 64 years offset to + * ls2x_rtc_read_alarm. After adding the offset, + * the RTC alarm clock can be set from 1964 to 2027. + */ + tm->tm_year = FIELD_GET(TOY_MATCH_YEAR, regs->reg0) + 64; +} + +static inline void ls2x_rtc_time_to_alarm_regs(struct rtc_time *tm, + struct ls2x_rtc_regs *regs) +{ + regs->reg0 = FIELD_PREP(TOY_MATCH_SEC, tm->tm_sec); + regs->reg0 |= FIELD_PREP(TOY_MATCH_MIN, tm->tm_min); + regs->reg0 |= FIELD_PREP(TOY_MATCH_HOUR, tm->tm_hour); + regs->reg0 |= FIELD_PREP(TOY_MATCH_DAY, tm->tm_mday); + regs->reg0 |= FIELD_PREP(TOY_MATCH_MON, tm->tm_mon + 1); + regs->reg0 |= FIELD_PREP(TOY_MATCH_YEAR, tm->tm_year); +} + +static int ls2x_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + int ret; + struct ls2x_rtc_regs regs; + struct ls2x_rtc_priv *priv = dev_get_drvdata(dev); + + ret = regmap_read(priv->regmap, TOY_READ1_REG, ®s.reg1); + if (unlikely(ret)) + return ret; + + ret = regmap_read(priv->regmap, TOY_READ0_REG, ®s.reg0); + if (unlikely(ret)) + return ret; + + ls2x_rtc_regs_to_time(®s, tm); + + return 0; +} + +static int ls2x_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + int ret; + struct ls2x_rtc_regs regs; + struct ls2x_rtc_priv *priv = dev_get_drvdata(dev); + + ls2x_rtc_time_to_regs(tm, ®s); + + ret = regmap_write(priv->regmap, TOY_WRITE0_REG, regs.reg0); + if (unlikely(ret)) + return ret; + + return regmap_write(priv->regmap, TOY_WRITE1_REG, regs.reg1); +} + +static int ls2x_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + int ret; + struct ls2x_rtc_regs regs; + struct ls2x_rtc_priv *priv = dev_get_drvdata(dev); + + ret = regmap_read(priv->regmap, TOY_MATCH0_REG, ®s.reg0); + if (unlikely(ret)) + return ret; + + ls2x_rtc_alarm_regs_to_time(®s, &alrm->time); + +#if defined(CONFIG_ACPI) + ret = readl(priv->acpi_base + ACPI_PM1_EN_REG); + alrm->enabled = !!(ret & RTC_EN); +#endif + + return 0; +} + +static int ls2x_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct ls2x_rtc_regs regs; + struct ls2x_rtc_priv *priv = dev_get_drvdata(dev); + + ls2x_rtc_time_to_alarm_regs(&alrm->time, ®s); + + return regmap_write(priv->regmap, TOY_MATCH0_REG, regs.reg0); +} + +static struct rtc_class_ops ls2x_rtc_ops = { + .read_time = ls2x_rtc_read_time, + .set_time = ls2x_rtc_set_time, + .read_alarm = ls2x_rtc_read_alarm, + .set_alarm = ls2x_rtc_set_alarm, +}; + +static int ls2x_rtc_probe(struct platform_device *pdev) +{ + int ret; + void __iomem *regs; + struct ls2x_rtc_priv *priv; + struct device *dev = &pdev->dev; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (unlikely(!priv)) + return -ENOMEM; + + spin_lock_init(&priv->rtc_reglock); + + platform_set_drvdata(pdev, priv); + + regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(regs)) + return PTR_ERR(regs); + + priv->regmap = devm_regmap_init_mmio(dev, regs, + &ls2x_rtc_regmap_config); + if (IS_ERR(priv->regmap)) + return PTR_ERR(priv->regmap); + + priv->rtcdev = devm_rtc_allocate_device(dev); + if (IS_ERR(priv->rtcdev)) + return PTR_ERR(priv->rtcdev); + + /* Due to hardware erratum, all years multiple of 4 are considered + * leap year, so only years 2000 through 2099 are usable. + * + * Previous out-of-tree versions of this driver wrote tm_year directly + * into the year register, so epoch 2000 must be used to preserve + * semantics on shipped systems. + */ + priv->rtcdev->range_min = RTC_TIMESTAMP_BEGIN_2000; + priv->rtcdev->range_max = RTC_TIMESTAMP_END_2099; + priv->rtcdev->ops = &ls2x_rtc_ops; + +#ifdef CONFIG_ACPI + priv->acpi_base = regs - ACPI_RTC_OFFSET; + acpi_install_fixed_event_handler(ACPI_EVENT_RTC, + ls2x_acpi_fix_handler, priv); +#endif + + if (!device_can_wakeup(&pdev->dev)) + device_init_wakeup(dev, 1); + + ret = rtc_register_device(priv->rtcdev); + if (unlikely(ret)) + return ret; + + /* An offset of -0.9s will call RTC set for wall clock time 10.0 s at 10.9 s */ + priv->rtcdev->set_offset_nsec = -900000000; + + /* If not cause hwclock huang */ + priv->rtcdev->uie_unsupported = 1; + + return ret; +} + +#ifdef CONFIG_OF +static const struct of_device_id ls2x_rtc_of_match[] = { + { .compatible = "loongson,ls2x-rtc" }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, ls2x_rtc_of_match); +#endif + +#ifdef CONFIG_ACPI +static const struct acpi_device_id ls2x_rtc_acpi_match[] = { + {"LOON0001"}, + {} +}; +MODULE_DEVICE_TABLE(acpi, ls2x_rtc_acpi_match); +#endif + +static struct platform_driver ls2x_rtc_driver = { + .probe = ls2x_rtc_probe, + .driver = { + .name = "ls2x-rtc", + .of_match_table = of_match_ptr(ls2x_rtc_of_match), + .acpi_match_table = ACPI_PTR(ls2x_rtc_acpi_match), + }, +}; + +module_platform_driver(ls2x_rtc_driver); + +MODULE_DESCRIPTION("LS2X RTC driver"); +MODULE_AUTHOR("WANG Xuerui"); +MODULE_AUTHOR("Huacai Chen"); +MODULE_AUTHOR("Binbin Zhou"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:ls2x-rtc"); -- Gitee From 3c16558bebf94d27ac746687fd498eef0f3bef7a Mon Sep 17 00:00:00 2001 From: Juxin Gao Date: Thu, 8 Sep 2022 16:32:52 +0800 Subject: [PATCH 06/36] gpio: loongson: Add 3A/3B/3C/7A gpio dirver support LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- Change-Id: Ib1adc61f5279bba8020f26acc32a4de4dee95df5 Signed-off-by: Juxin Gao --- drivers/gpio/Kconfig | 3 +- drivers/gpio/gpio-loongson.c | 413 ++++++++++++++++++++++++++++------- 2 files changed, 340 insertions(+), 76 deletions(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index c91355c89ec6..c9401840422a 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -370,7 +370,8 @@ config GPIO_LOGICVC config GPIO_LOONGSON bool "Loongson-2/3 GPIO support" - depends on CPU_LOONGSON2EF || CPU_LOONGSON64 + depends on CPU_LOONGSON2EF || CPU_LOONGSON64 || LOONGARCH + default m help driver for GPIO functionality on Loongson-2F/3A/3B processors. diff --git a/drivers/gpio/gpio-loongson.c b/drivers/gpio/gpio-loongson.c index a42145873cc9..217243027a94 100644 --- a/drivers/gpio/gpio-loongson.c +++ b/drivers/gpio/gpio-loongson.c @@ -1,13 +1,13 @@ -// SPDX-License-Identifier: GPL-2.0-or-later /* - * Loongson-2F/3A/3B GPIO Support + * Loongson-3A/3B/3C/7A GPIO Support * - * Copyright (c) 2008 Richard Liu, STMicroelectronics - * Copyright (c) 2008-2010 Arnaud Patard - * Copyright (c) 2013 Hongbing Hu - * Copyright (c) 2014 Huacai Chen + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. */ +#include #include #include #include @@ -17,119 +17,382 @@ #include #include #include -#include -#define STLS2F_N_GPIO 4 -#define STLS3A_N_GPIO 16 +/* ============== Data structrues =============== */ -#ifdef CONFIG_CPU_LOONGSON64 -#define LOONGSON_N_GPIO STLS3A_N_GPIO -#else -#define LOONGSON_N_GPIO STLS2F_N_GPIO -#endif +/* gpio data */ +struct platform_gpio_data { + u32 gpio_conf; + u32 gpio_out; + u32 gpio_in; + u32 in_start_bit; + u32 support_irq; + char *label; + int gpio_base; + int ngpio; +}; + +#define GPIO_IO_CONF(x) (x->base + x->conf_offset) +#define GPIO_OUT(x) (x->base + x->out_offset) +#define GPIO_IN(x) (x->base + x->in_offset) + +#define LS7A_GPIO_OEN_BYTE(x, gpio) (x->base + x->conf_offset + gpio) +#define LS7A_GPIO_OUT_BYTE(x, gpio) (x->base + x->out_offset + gpio) +#define LS7A_GPIO_IN_BYTE(x, gpio) (x->base + x->in_offset + gpio) + +struct loongson_gpio_chip { + struct gpio_chip chip; + spinlock_t lock; + void __iomem *base; + int conf_offset; + int out_offset; + int in_offset; + int in_start_bit; + u16 *gsi_idx_map; + u16 mapsize; + bool support_irq; +}; /* - * Offset into the register where we read lines, we write them from offset 0. - * This offset is the only thing that stand between us and using - * GPIO_GENERIC. + * GPIO primitives. */ -#define LOONGSON_GPIO_IN_OFFSET 16 +static int loongson_gpio_request(struct gpio_chip *chip, unsigned pin) +{ + if (pin >= chip->ngpio) + return -EINVAL; + else + return 0; +} + +static inline void +__set_direction(struct loongson_gpio_chip *lgpio, unsigned pin, int input) +{ + u64 temp; + u8 value; -static DEFINE_SPINLOCK(gpio_lock); + if (!strcmp(lgpio->chip.label, "loongson,loongson3-gpio") || + !strncmp(lgpio->chip.label, "LOON0007", 8)) { + temp = readq(GPIO_IO_CONF(lgpio)); + if (input) + temp |= 1ULL << pin; + else + temp &= ~(1ULL << pin); + writeq(temp, GPIO_IO_CONF(lgpio)); + return ; + } + if (!strcmp(lgpio->chip.label,"loongson,ls7a-gpio") || + !strncmp(lgpio->chip.label, "LOON0002", 8)){ + if (input) + value = 1; + else + value = 0; + writeb(value, LS7A_GPIO_OEN_BYTE(lgpio, pin)); + return ; + } +} -static int loongson_gpio_get_value(struct gpio_chip *chip, unsigned gpio) +static void __set_level(struct loongson_gpio_chip *lgpio, unsigned pin, int high) { - u32 val; + u64 temp; + u8 value; - spin_lock(&gpio_lock); - val = LOONGSON_GPIODATA; - spin_unlock(&gpio_lock); + /* If GPIO controller is on 3A,then... */ + if (!strcmp(lgpio->chip.label, "loongson,loongson3-gpio") || + !strncmp(lgpio->chip.label, "LOON0007", 8)) { + temp = readq(GPIO_OUT(lgpio)); + if (high) + temp |= 1ULL << pin; + else + temp &= ~(1ULL << pin); + writeq(temp, GPIO_OUT(lgpio)); + return; + } - return !!(val & BIT(gpio + LOONGSON_GPIO_IN_OFFSET)); + if (!strcmp(lgpio->chip.label,"loongson,ls7a-gpio") || + !strncmp(lgpio->chip.label,"LOON0002", 8)){ + if (high) + value = 1; + else + value = 0; + writeb(value, LS7A_GPIO_OUT_BYTE(lgpio, pin)); + return; + } } -static void loongson_gpio_set_value(struct gpio_chip *chip, - unsigned gpio, int value) +static int loongson_gpio_direction_input(struct gpio_chip *chip, unsigned pin) { - u32 val; + unsigned long flags; + struct loongson_gpio_chip *lgpio = + container_of(chip, struct loongson_gpio_chip, chip); - spin_lock(&gpio_lock); - val = LOONGSON_GPIODATA; - if (value) - val |= BIT(gpio); - else - val &= ~BIT(gpio); - LOONGSON_GPIODATA = val; - spin_unlock(&gpio_lock); + spin_lock_irqsave(&lgpio->lock, flags); + __set_direction(lgpio, pin, 1); + spin_unlock_irqrestore(&lgpio->lock, flags); + + return 0; } -static int loongson_gpio_direction_input(struct gpio_chip *chip, unsigned gpio) +static int loongson_gpio_direction_output(struct gpio_chip *chip, + unsigned pin, int value) { - u32 temp; + struct loongson_gpio_chip *lgpio = + container_of(chip, struct loongson_gpio_chip, chip); + unsigned long flags; - spin_lock(&gpio_lock); - temp = LOONGSON_GPIOIE; - temp |= BIT(gpio); - LOONGSON_GPIOIE = temp; - spin_unlock(&gpio_lock); + spin_lock_irqsave(&lgpio->lock, flags); + __set_level(lgpio, pin, value); + __set_direction(lgpio, pin, 0); + spin_unlock_irqrestore(&lgpio->lock, flags); return 0; } -static int loongson_gpio_direction_output(struct gpio_chip *chip, - unsigned gpio, int level) +static int loongson_gpio_get(struct gpio_chip *chip, unsigned pin) { - u32 temp; + struct loongson_gpio_chip *lgpio = + container_of(chip, struct loongson_gpio_chip, chip); + u64 temp; + u8 value; - loongson_gpio_set_value(chip, gpio, level); - spin_lock(&gpio_lock); - temp = LOONGSON_GPIOIE; - temp &= ~BIT(gpio); - LOONGSON_GPIOIE = temp; - spin_unlock(&gpio_lock); + /* GPIO controller in 3A is different for 7A */ + if (!strcmp(lgpio->chip.label, "loongson,loongson3-gpio") || + !strncmp(lgpio->chip.label, "LOON0007", 8)) { + temp = readq(GPIO_IN(lgpio)); + return ((temp & (1ULL << (pin + lgpio->in_start_bit))) != 0); + } + + if (!strcmp(lgpio->chip.label,"loongson,ls7a-gpio") || + !strncmp(lgpio->chip.label, "LOON0002", 8)){ + value = readb(LS7A_GPIO_IN_BYTE(lgpio, pin)); + return (value & 1); + } + + return -ENXIO; +} + +static void loongson_gpio_set(struct gpio_chip *chip, unsigned pin, int value) +{ + struct loongson_gpio_chip *lgpio = + container_of(chip, struct loongson_gpio_chip, chip); + unsigned long flags; + + spin_lock_irqsave(&lgpio->lock, flags); + __set_level(lgpio, pin, value); + spin_unlock_irqrestore(&lgpio->lock, flags); +} + +static int loongson_gpio_to_irq(struct gpio_chip *chip, unsigned int offset) +{ + struct platform_device *pdev = + container_of(chip->parent, struct platform_device, dev); + struct loongson_gpio_chip *lgpio = + container_of(chip, struct loongson_gpio_chip, chip); + + if (offset >= chip->ngpio) + return -EINVAL; + + if ((lgpio->gsi_idx_map != NULL) && (offset < lgpio->mapsize)) + offset = lgpio->gsi_idx_map[offset]; + + return platform_get_irq(pdev, offset); +} + +static int loongson_gpio_init(struct device *dev, struct loongson_gpio_chip *lgpio, struct device_node *np, + void __iomem *base) +{ + lgpio->chip.request = loongson_gpio_request; + lgpio->chip.direction_input = loongson_gpio_direction_input; + lgpio->chip.get = loongson_gpio_get; + lgpio->chip.direction_output = loongson_gpio_direction_output; + lgpio->chip.set = loongson_gpio_set; + lgpio->chip.can_sleep = 0; + lgpio->chip.of_node = np; + lgpio->chip.parent = dev; + spin_lock_init(&lgpio->lock); + lgpio->base = (void __iomem *)base; + + if (!strcmp(lgpio->chip.label, "loongson,ls7a-gpio") || + !strncmp(lgpio->chip.label, "LOON0002", 8) || + !strcmp(lgpio->chip.label, "loongson,loongson3-gpio") || + !strncmp(lgpio->chip.label, "LOON0007", 8)) { + + lgpio->chip.to_irq = loongson_gpio_to_irq; + } + gpiochip_add(&lgpio->chip); return 0; } + +static void of_loongson_gpio_get_props(struct device_node *np, + struct loongson_gpio_chip *lgpio) +{ + const char *name; + + of_property_read_u32(np, "ngpios", (u32 *)&lgpio->chip.ngpio); + of_property_read_u32(np, "gpio_base", (u32 *)&lgpio->chip.base); + of_property_read_u32(np, "conf_offset", (u32 *)&lgpio->conf_offset); + of_property_read_u32(np, "out_offset", (u32 *)&lgpio->out_offset); + of_property_read_u32(np, "in_offset", (u32 *)&lgpio->in_offset); + of_property_read_string(np, "compatible", &name); + if (!strcmp(name, "loongson,loongson3-gpio")) { + of_property_read_u32(np, "in_start_bit", + (u32 *)&lgpio->in_start_bit); + if (of_property_read_bool(np, "support_irq")) + lgpio->support_irq = true; + } + lgpio->chip.label = kstrdup(name, GFP_KERNEL); +} + +static void acpi_loongson_gpio_get_props(struct platform_device *pdev, + struct loongson_gpio_chip *lgpio) +{ + + struct device *dev = &pdev->dev; + int rval; + + device_property_read_u32(dev, "ngpios", (u32 *)&lgpio->chip.ngpio); + device_property_read_u32(dev, "gpio_base", (u32 *)&lgpio->chip.base); + device_property_read_u32(dev, "conf_offset", (u32 *)&lgpio->conf_offset); + device_property_read_u32(dev, "out_offset", (u32 *)&lgpio->out_offset); + device_property_read_u32(dev, "in_offset", (u32 *)&lgpio->in_offset); + rval = device_property_read_u16_array(dev, "gsi_idx_map", NULL, 0); + if (rval > 0) { + lgpio->gsi_idx_map = + kmalloc_array(rval, sizeof(*lgpio->gsi_idx_map), + GFP_KERNEL); + if (unlikely(!lgpio->gsi_idx_map)) { + dev_err(dev, "Alloc gsi_idx_map fail!\n"); + } else { + lgpio->mapsize = rval; + device_property_read_u16_array(dev, "gsi_idx_map", + lgpio->gsi_idx_map, lgpio->mapsize); + } + } + if (!strcmp(pdev->name, "LOON0007")) { + device_property_read_u32(dev, "in_start_bit", + (u32 *)&lgpio->in_start_bit); + if (device_property_read_bool(dev, "support_irq")) + lgpio->support_irq = true; + } + lgpio->chip.label = kstrdup(pdev->name, GFP_KERNEL); +} + +static void platform_loongson_gpio_get_props(struct platform_device *pdev, + struct loongson_gpio_chip *lgpio) +{ + struct platform_gpio_data *gpio_data = + (struct platform_gpio_data *)pdev->dev.platform_data; + + lgpio->chip.ngpio = gpio_data->ngpio; + lgpio->chip.base = gpio_data->gpio_base; + lgpio->conf_offset = gpio_data->gpio_conf; + lgpio->out_offset = gpio_data->gpio_out; + lgpio->in_offset = gpio_data->gpio_in; + if (!strcmp(gpio_data->label, "loongson,loongson3-gpio")) { + lgpio->in_start_bit = gpio_data->in_start_bit; + lgpio->support_irq = gpio_data->support_irq; + } + lgpio->chip.label = kstrdup(gpio_data->label, GFP_KERNEL); +} + static int loongson_gpio_probe(struct platform_device *pdev) { - struct gpio_chip *gc; + struct resource *iores; + void __iomem *base; + struct loongson_gpio_chip *lgpio; + struct device_node *np = pdev->dev.of_node; struct device *dev = &pdev->dev; + int ret = 0; - gc = devm_kzalloc(dev, sizeof(*gc), GFP_KERNEL); - if (!gc) + lgpio = kzalloc(sizeof(struct loongson_gpio_chip), GFP_KERNEL); + if (!lgpio) return -ENOMEM; - gc->label = "loongson-gpio-chip"; - gc->base = 0; - gc->ngpio = LOONGSON_N_GPIO; - gc->get = loongson_gpio_get_value; - gc->set = loongson_gpio_set_value; - gc->direction_input = loongson_gpio_direction_input; - gc->direction_output = loongson_gpio_direction_output; + if (np){ + of_loongson_gpio_get_props(np,lgpio); + } else if (ACPI_COMPANION(&pdev->dev)) { + acpi_loongson_gpio_get_props(pdev,lgpio); + } else { + platform_loongson_gpio_get_props(pdev,lgpio); + } + + iores = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!iores) { + ret = -ENODEV; + goto out; + } + if (!request_mem_region(iores->start, resource_size(iores), + pdev->name)) { + ret = -EBUSY; + goto out; + } + base = ioremap(iores->start, resource_size(iores)); + if (!base) { + ret = -ENOMEM; + goto out; + } + platform_set_drvdata(pdev, lgpio); + loongson_gpio_init(dev,lgpio, np, base); - return gpiochip_add_data(gc, NULL); + return 0; +out: + pr_err("%s: %s: missing mandatory property\n", __func__, np->name); + return ret; } -static struct platform_driver loongson_gpio_driver = { +static int loongson_gpio_remove(struct platform_device *pdev) +{ + struct loongson_gpio_chip *lgpio = platform_get_drvdata(pdev); + struct resource *mem; + + platform_set_drvdata(pdev, NULL); + gpiochip_remove(&lgpio->chip); + iounmap(lgpio->base); + kfree(lgpio->gsi_idx_map); + kfree(lgpio); + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + release_mem_region(mem->start, resource_size(mem)); + return 0; +} + +static const struct of_device_id loongson_gpio_dt_ids[] = { + { .compatible = "loongson,loongson3-gpio"}, + { .compatible = "loongson,ls7a-gpio"}, + {} +}; +MODULE_DEVICE_TABLE(of, loongson_gpio_dt_ids); + +static const struct acpi_device_id loongson_gpio_acpi_match[] = { + {"LOON0002"}, + {"LOON0007"}, + {} +}; +MODULE_DEVICE_TABLE(acpi, loongson_gpio_acpi_match); + +static struct platform_driver ls_gpio_driver = { .driver = { .name = "loongson-gpio", + .owner = THIS_MODULE, + .of_match_table = loongson_gpio_dt_ids, + .acpi_match_table = ACPI_PTR(loongson_gpio_acpi_match), }, .probe = loongson_gpio_probe, + .remove = loongson_gpio_remove, }; static int __init loongson_gpio_setup(void) { - struct platform_device *pdev; - int ret; - - ret = platform_driver_register(&loongson_gpio_driver); - if (ret) { - pr_err("error registering loongson GPIO driver\n"); - return ret; - } + return platform_driver_register(&ls_gpio_driver); +} +subsys_initcall(loongson_gpio_setup); - pdev = platform_device_register_simple("loongson-gpio", -1, NULL, 0); - return PTR_ERR_OR_ZERO(pdev); +static void __exit loongson_gpio_driver(void) +{ + platform_driver_unregister(&ls_gpio_driver); } -postcore_initcall(loongson_gpio_setup); +module_exit(loongson_gpio_driver); +MODULE_AUTHOR("Loongson Technology Corporation Limited"); +MODULE_DESCRIPTION("LOONGSON GPIO"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:loongson_gpio"); -- Gitee From 2c1c0b0ca61d5055dddb48deaa3e4af32af9fc13 Mon Sep 17 00:00:00 2001 From: Jianmin Lv Date: Wed, 27 Jul 2022 19:03:32 +0800 Subject: [PATCH 07/36] irqchip/ACPI: Introduce ACPI_IRQ_MODEL_LPIC for LoongArch LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- For LoongArch, ACPI_IRQ_MODEL_LPIC is introduced, and high level trigger type is set for PCI devices legacy irq. Change-Id: I5f9d44e2d9c7d3492fedf89b64becac5fb5cc270 Signed-off-by: Jianmin Lv --- arch/loongarch/kernel/acpi.c | 1 + drivers/acpi/pci_irq.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 0d6a4de10f6c..3d9759c3ccb5 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -165,6 +165,7 @@ static void __init acpi_process_madt(void) acpi_table_parse_madt(ACPI_MADT_TYPE_EIO_PIC, acpi_parse_eio_master, MAX_IO_PICS); + acpi_irq_model = ACPI_IRQ_MODEL_LPIC; loongson_sysconf.nr_cpus = num_processors; } diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 14ee631cb7cf..0684ab9b8f62 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -405,8 +405,14 @@ int acpi_pci_irq_enable(struct pci_dev *dev) * controller and must therefore be considered active high * as default. */ +#ifdef CONFIG_LOONGARCH + int polarity = acpi_irq_model == ACPI_IRQ_MODEL_GIC || + acpi_irq_model == ACPI_IRQ_MODEL_LPIC ? + ACPI_ACTIVE_HIGH : ACPI_ACTIVE_LOW; +#else int polarity = acpi_irq_model == ACPI_IRQ_MODEL_GIC ? ACPI_ACTIVE_HIGH : ACPI_ACTIVE_LOW; +#endif char *link = NULL; char link_desc[16]; int rc; -- Gitee From 901aefa6e55b06734f246bb73088873b97254a7b Mon Sep 17 00:00:00 2001 From: liuyun Date: Wed, 21 Sep 2022 18:05:48 +0800 Subject: [PATCH 08/36] LoongArch: Call irq_find_matching_fwnode get irq domain LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- Fix patch "LoongArch: Old BPI compatibility" Signed-off-by: liuyun Change-Id: I67c2535a86213e620546889a081511eaa499289a --- arch/loongarch/kernel/legacy_boot.c | 12 ++++++++++-- drivers/irqchip/irq-loongarch-cpu.c | 5 ----- drivers/irqchip/irq-loongson-pch-pic.c | 5 ----- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/arch/loongarch/kernel/legacy_boot.c b/arch/loongarch/kernel/legacy_boot.c index f0934639de3e..d8f749653939 100644 --- a/arch/loongarch/kernel/legacy_boot.c +++ b/arch/loongarch/kernel/legacy_boot.c @@ -239,7 +239,11 @@ int setup_legacy_IRQ(void) printk("CPU domain init eror!\n"); return -1; } - cpu_domain = get_cpudomain(); + cpu_domain = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); + if (!cpu_domain) { + printk("CPU domain error!\n"); + return -1; + } ret = liointc_acpi_init(cpu_domain, acpi_liointc); if (ret) { printk("Liointc domain init eror!\n"); @@ -269,7 +273,11 @@ int setup_legacy_IRQ(void) pch_msi_parse_madt((union acpi_subtable_headers *)acpi_pchmsi[0], 0); } - pic_domain = get_pchpic_irq_domain(); + pic_domain = irq_find_matching_fwnode(pch_pic_handle[0], DOMAIN_BUS_ANY); + if (!pic_domain) { + printk("Pic domain error!\n"); + return -1; + } if (pic_domain) pch_lpc_acpi_init(pic_domain, acpi_pchlpc); diff --git a/drivers/irqchip/irq-loongarch-cpu.c b/drivers/irqchip/irq-loongarch-cpu.c index d3a0bbe4a9f7..adbd32b58f77 100644 --- a/drivers/irqchip/irq-loongarch-cpu.c +++ b/drivers/irqchip/irq-loongarch-cpu.c @@ -119,11 +119,6 @@ static int __init acpi_cascade_irqdomain_init(void) return 0; } -struct irq_domain *get_cpudomain(void) -{ - return irq_domain; -} - int __init cpuintc_acpi_init(union acpi_subtable_headers *header, const unsigned long end) { diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c index cd8b16293f39..e09078e1ac01 100644 --- a/drivers/irqchip/irq-loongson-pch-pic.c +++ b/drivers/irqchip/irq-loongson-pch-pic.c @@ -52,11 +52,6 @@ static struct pch_pic *pch_pic_priv[MAX_IO_PICS]; struct fwnode_handle *pch_pic_handle[MAX_IO_PICS]; -struct irq_domain *get_pchpic_irq_domain(void) -{ - return pch_pic_priv[0]->pic_domain; -} - static void pch_pic_bitset(struct pch_pic *priv, int offset, int bit) { u32 reg; -- Gitee From fda2ad5e116d3acac6ac303722795d2720c13540 Mon Sep 17 00:00:00 2001 From: Juxin Gao Date: Fri, 2 Sep 2022 17:51:31 +0800 Subject: [PATCH 09/36] i2c: loongson: Add LS7A i2c driver support LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- Change-Id: Ia57bb17a0663d038c61cdd52ce5b6fe63a979dde Signed-off-by: Juxin Gao --- drivers/i2c/busses/Kconfig | 8 + drivers/i2c/busses/Makefile | 1 + drivers/i2c/busses/i2c-loongson.c | 592 ++++++++++++++++++++++++++++++ 3 files changed, 601 insertions(+) create mode 100644 drivers/i2c/busses/i2c-loongson.c diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 9535e995ecc9..5763a1e9360b 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -866,6 +866,14 @@ config I2C_OWL Say Y here if you want to use the I2C bus controller on the Actions Semiconductor Owl SoC's. +config I2C_LOONGSON + tristate "Loongson I2C adapter" + depends on LOONGARCH + default m + help + If you say yes to this option, support will be included for the + I2C interface on the Loongson's LS7A Platform-Bridge. + config I2C_PASEMI tristate "PA Semi SMBus interface" depends on PPC_PASEMI && PCI diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index e6d5d108e22b..280e05622d50 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -89,6 +89,7 @@ obj-$(CONFIG_I2C_NPCM7XX) += i2c-npcm7xx.o obj-$(CONFIG_I2C_OCORES) += i2c-ocores.o obj-$(CONFIG_I2C_OMAP) += i2c-omap.o obj-$(CONFIG_I2C_OWL) += i2c-owl.o +obj-$(CONFIG_I2C_LOONGSON) += i2c-loongson.o obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi.o obj-$(CONFIG_I2C_PCA_PLATFORM) += i2c-pca-platform.o obj-$(CONFIG_I2C_PMCMSP) += i2c-pmcmsp.o diff --git a/drivers/i2c/busses/i2c-loongson.c b/drivers/i2c/busses/i2c-loongson.c new file mode 100644 index 000000000000..a2dc0dd1e4f8 --- /dev/null +++ b/drivers/i2c/busses/i2c-loongson.c @@ -0,0 +1,592 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Loongson-7A I2C master mode driver + * + * Copyright (C) 2013 Loongson Technology Corporation Limited + * Copyright (C) 2014-2017 Lemote, Inc. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRIVER_NAME "loongson_i2c" + +#define LOONGSON_I2C_PRER_LO_REG 0x0 +#define LOONGSON_I2C_PRER_HI_REG 0x1 +#define LOONGSON_I2C_CTR_REG 0x2 +#define LOONGSON_I2C_TXR_REG 0x3 +#define LOONGSON_I2C_RXR_REG 0x3 +#define LOONGSON_I2C_CR_REG 0x4 +#define LOONGSON_I2C_SR_REG 0x4 +#define LOONGSON_I2C_BLTOP_REG 0x5 +#define LOONGSON_I2C_SADDR_REG 0x7 + +#define CTR_EN 0x80 +#define CTR_IEN 0x40 +#define CTR_TXROK 0x90 +#define CTR_RXROK 0x88 + +#define CR_START 0x81 +#define CR_STOP 0x41 +#define CR_READ 0x21 +#define CR_WRITE 0x11 +#define CR_ACK 0x8 +#define CR_IACK 0x1 + +#define SR_NOACK 0x80 +#define SR_BUSY 0x40 +#define SR_AL 0x20 +#define SR_SLAVE_ADDRESSED 0x10 +#define SR_SLAVE_RW 0x8 +#define SR_TIP 0x2 +#define SR_IF 0x1 + +#define i2c_readb(addr) readb(dev->base + addr) +#define i2c_writeb(val, addr) writeb(val, dev->base + addr) + +#ifdef LOONGSON_I2C_DEBUG +#define i2c_debug(fmt, args...) printk(KERN_CRIT fmt, ##args) +#else +#define i2c_debug(fmt, args...) +#endif + +static bool repeated_start = 1; +module_param(repeated_start, bool, S_IRUSR | S_IWUSR); +MODULE_PARM_DESC(repeated_start, "Compatible with devices that support repeated start"); + +enum loongson_i2c_slave_state { + LOONGSON_I2C_SLAVE_STOP, + LOONGSON_I2C_SLAVE_START, + LOONGSON_I2C_SLAVE_READ_REQUESTED, + LOONGSON_I2C_SLAVE_READ_PROCESSED, + LOONGSON_I2C_SLAVE_WRITE_REQUESTED, + LOONGSON_I2C_SLAVE_WRITE_RECEIVED, +}; + +struct loongson_i2c_dev { + spinlock_t lock; + unsigned int suspended:1; + struct device *dev; + void __iomem *base; + int irq; + struct completion cmd_complete; + struct resource *ioarea; + struct i2c_adapter adapter; +#if IS_ENABLED(CONFIG_I2C_SLAVE) + struct i2c_client *slave; + enum loongson_i2c_slave_state slave_state; +#endif /* CONFIG_I2C_SLAVE */ +}; + +static int i2c_stop(struct loongson_i2c_dev *dev) +{ + unsigned long time_left; + +again: + i2c_writeb(CR_STOP, LOONGSON_I2C_CR_REG); + time_left = wait_for_completion_timeout( + &dev->cmd_complete, + (&dev->adapter)->timeout); + if (!time_left) { + pr_info("Timeout abort message cmd\n"); + return -1; + } + + i2c_readb(LOONGSON_I2C_SR_REG); + while (i2c_readb(LOONGSON_I2C_SR_REG) & SR_BUSY) + goto again; + + return 0; +} + +static int i2c_start(struct loongson_i2c_dev *dev, + int dev_addr, int flags) +{ + unsigned long time_left; + int retry = 5; + unsigned char addr = (dev_addr & 0x7f) << 1; + addr |= (flags & I2C_M_RD)? 1:0; + +start: + mdelay(1); + i2c_writeb(addr, LOONGSON_I2C_TXR_REG); + i2c_debug("%s : i2c device address: 0x%x\n", + __func__, __LINE__, addr); + i2c_writeb((CR_START | CR_WRITE), LOONGSON_I2C_CR_REG); + time_left = wait_for_completion_timeout( + &dev->cmd_complete, + (&dev->adapter)->timeout); + if (!time_left) { + pr_info("Timeout abort message cmd\n"); + return -1; + } + + if (i2c_readb(LOONGSON_I2C_SR_REG) & SR_NOACK) { + if (i2c_stop(dev) < 0) + return -1; + while (retry--) + goto start; + pr_debug("There is no i2c device ack\n"); + return 0; + } + return 1; +} + +#if IS_ENABLED(CONFIG_I2C_SLAVE) +static void __loongson_i2c_reg_slave(struct loongson_i2c_dev *dev, u16 slave_addr) +{ + /* Set slave addr. */ + i2c_writeb(slave_addr & 0x7f, LOONGSON_I2C_SADDR_REG); + + /* Turn on slave mode. */ + i2c_writeb(0xc0, LOONGSON_I2C_CTR_REG); +} + +static int loongson_i2c_reg_slave(struct i2c_client *client) +{ + struct loongson_i2c_dev *dev = i2c_get_adapdata(client->adapter); + unsigned long flags; + + if (dev->slave) { + return -EINVAL; + } + + __loongson_i2c_reg_slave(dev, client->addr); + + dev->slave = client; + dev->slave_state = LOONGSON_I2C_SLAVE_STOP; + + return 0; +} + +static int loongson_i2c_unreg_slave(struct i2c_client *client) +{ + struct loongson_i2c_dev *dev = i2c_get_adapdata(client->adapter); + unsigned long flags; + + if (!dev->slave) { + return -EINVAL; + } + + /* Turn off slave mode. */ + i2c_writeb(0xa0, LOONGSON_I2C_CTR_REG); + + dev->slave = NULL; + + return 0; +} +#endif /* CONFIG_I2C_SLAVE */ + +static void loongson_i2c_reginit(struct loongson_i2c_dev *dev) +{ +#if IS_ENABLED(CONFIG_I2C_SLAVE) + if (dev->slave) { + __loongson_i2c_reg_slave(dev, dev->slave->addr); + return; + } +#endif /* CONFIG_I2C_SLAVE */ + i2c_writeb(i2c_readb(LOONGSON_I2C_CR_REG) | 0x01, LOONGSON_I2C_CR_REG); + i2c_writeb(i2c_readb(LOONGSON_I2C_CTR_REG) & ~0x80, LOONGSON_I2C_CTR_REG); + i2c_writeb(0x2c, LOONGSON_I2C_PRER_LO_REG); + i2c_writeb(0x1, LOONGSON_I2C_PRER_HI_REG); + i2c_writeb(i2c_readb(LOONGSON_I2C_CTR_REG) | 0xe0, LOONGSON_I2C_CTR_REG); +} + +static int i2c_read(struct loongson_i2c_dev *dev, + unsigned char *buf, int count) +{ + int i; + unsigned long time_left; + + for (i = 0; i < count; i++) { + i2c_writeb((i == count - 1)? + (CR_READ | CR_ACK) : CR_READ, + LOONGSON_I2C_CR_REG); + time_left = wait_for_completion_timeout( + &dev->cmd_complete, + (&dev->adapter)->timeout); + if (!time_left) { + pr_info("Timeout abort message cmd\n"); + return -1; + } + + buf[i] = i2c_readb(LOONGSON_I2C_RXR_REG); + i2c_debug("%s : read buf[%d] <= %02x\n", + __func__, __LINE__, i, buf[i]); + } + + return i; +} + +static int i2c_write(struct loongson_i2c_dev *dev, + unsigned char *buf, int count) +{ + int i; + unsigned long time_left; + + for (i = 0; i < count; i++) { + i2c_writeb(buf[i], LOONGSON_I2C_TXR_REG); + i2c_debug("%s : write buf[%d] => %02x\n", + __func__, __LINE__, i, buf[i]); + i2c_writeb(CR_WRITE, LOONGSON_I2C_CR_REG); + time_left = wait_for_completion_timeout( + &dev->cmd_complete, + (&dev->adapter)->timeout); + if (!time_left) { + pr_info("Timeout abort message cmd\n"); + return -1; + } + + if (i2c_readb(LOONGSON_I2C_SR_REG) & SR_NOACK) { + i2c_debug("%s : device no ack\n", + __func__, __LINE__); + if (i2c_stop(dev) < 0) + return -1; + return 0; + } + } + + return i; +} + +static int i2c_doxfer(struct loongson_i2c_dev *dev, + struct i2c_msg *msgs, int num) +{ + struct i2c_msg *m = msgs; + int i, err; + + for (i = 0; i < num; i++) { + reinit_completion(&dev->cmd_complete); + err = i2c_start(dev, m->addr, m->flags); + if (err <= 0) + return err; + + if (m->flags & I2C_M_RD) { + if (i2c_read(dev, m->buf, m->len) < 0) + return -1; + } else { + if (i2c_write(dev, m->buf, m->len) < 0) + return -1; + } + ++m; + if (!repeated_start && i2c_stop(dev) < 0) + return -1; + } + if (repeated_start && i2c_stop(dev) < 0) + return -1; + return i; +} + +static int i2c_xfer(struct i2c_adapter *adap, + struct i2c_msg *msgs, int num) +{ + int ret; + int retry; + struct loongson_i2c_dev *dev; + + dev = i2c_get_adapdata(adap); + for (retry = 0; retry < adap->retries; retry++) { + ret = i2c_doxfer(dev, msgs, num); + if (ret != -EAGAIN) + return ret; + + udelay(100); + } + + return -EREMOTEIO; +} + +static unsigned int i2c_func(struct i2c_adapter *adap) +{ + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; +} + +static const struct i2c_algorithm loongson_i2c_algo = { + .master_xfer = i2c_xfer, + .functionality = i2c_func, +#if IS_ENABLED(CONFIG_I2C_SLAVE) + .reg_slave = loongson_i2c_reg_slave, + .unreg_slave = loongson_i2c_unreg_slave, +#endif /* CONFIG_I2C_SLAVE */ +}; + +#if IS_ENABLED(CONFIG_I2C_SLAVE) +static bool loongson_i2c_slave_irq(struct loongson_i2c_dev *dev) +{ + u32 stat; + struct i2c_client *slave = dev->slave; + u8 value; + + stat = i2c_readb(LOONGSON_I2C_SR_REG); + + /* Slave was requested, restart state machine. */ + if (stat & SR_SLAVE_ADDRESSED) { + dev->slave_state = LOONGSON_I2C_SLAVE_START; + i2c_writeb(CTR_RXROK | CTR_IEN, LOONGSON_I2C_CTR_REG); + } + + /* Slave is not currently active, irq was for someone else. */ + if (dev->slave_state == LOONGSON_I2C_SLAVE_STOP) { + return IRQ_NONE; + } + + /* Handle address frame. */ + if (dev->slave_state == LOONGSON_I2C_SLAVE_START) { + if (stat & SR_SLAVE_RW) //slave be read + dev->slave_state = + LOONGSON_I2C_SLAVE_READ_REQUESTED; + else + dev->slave_state = + LOONGSON_I2C_SLAVE_WRITE_REQUESTED; + } + + /* Slave was asked to stop. */ + if (stat & SR_NOACK) { + dev->slave_state = LOONGSON_I2C_SLAVE_STOP; + } + + value = i2c_readb(LOONGSON_I2C_RXR_REG); + switch (dev->slave_state) { + case LOONGSON_I2C_SLAVE_READ_REQUESTED: + dev->slave_state = LOONGSON_I2C_SLAVE_READ_PROCESSED; + i2c_slave_event(slave, I2C_SLAVE_READ_REQUESTED, &value); + i2c_writeb(value, LOONGSON_I2C_TXR_REG); + i2c_writeb(CTR_TXROK | CTR_IEN, LOONGSON_I2C_CTR_REG); + break; + case LOONGSON_I2C_SLAVE_READ_PROCESSED: + i2c_slave_event(slave, I2C_SLAVE_READ_PROCESSED, &value); + i2c_writeb(value, LOONGSON_I2C_TXR_REG); + i2c_writeb(CTR_TXROK | CTR_IEN, LOONGSON_I2C_CTR_REG); + break; + case LOONGSON_I2C_SLAVE_WRITE_REQUESTED: + dev->slave_state = LOONGSON_I2C_SLAVE_WRITE_RECEIVED; + i2c_slave_event(slave, I2C_SLAVE_WRITE_REQUESTED, &value); + break; + case LOONGSON_I2C_SLAVE_WRITE_RECEIVED: + i2c_slave_event(slave, I2C_SLAVE_WRITE_RECEIVED, &value); + i2c_writeb(CTR_RXROK | CTR_IEN, LOONGSON_I2C_CTR_REG); + break; + case LOONGSON_I2C_SLAVE_STOP: + i2c_slave_event(slave, I2C_SLAVE_STOP, &value); + i2c_writeb(0, LOONGSON_I2C_TXR_REG); + i2c_writeb(CTR_TXROK | CTR_IEN, LOONGSON_I2C_CTR_REG); + break; + default: + dev_err(dev->dev, "unhandled slave_state: %d\n", + dev->slave_state); + break; + } + +out: + return IRQ_HANDLED; +} +#endif /* CONFIG_I2C_SLAVE */ + +/* + * Interrupt service routine. This gets called whenever an I2C interrupt + * occurs. + */ +static irqreturn_t i2c_loongson_isr(int this_irq, void *dev_id) +{ + unsigned char iflag; + struct loongson_i2c_dev *dev = dev_id; + + iflag = i2c_readb(LOONGSON_I2C_SR_REG); + + if (iflag & SR_IF) { + i2c_writeb(CR_IACK, LOONGSON_I2C_CR_REG); +#if IS_ENABLED(CONFIG_I2C_SLAVE) + if (dev->slave) { + loongson_i2c_slave_irq(dev); + } +#endif + if (!(iflag & SR_TIP)) + complete(&dev->cmd_complete); + } else + return IRQ_NONE; + + return IRQ_HANDLED; +} + +static int loongson_i2c_probe(struct platform_device *pdev) +{ + struct loongson_i2c_dev *dev; + struct i2c_adapter *adap; + struct resource *mem, *ioarea; + int r, irq; + + /* NOTE: driver uses the static register mapping */ + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) { + dev_err(&pdev->dev, "no mem resource?\n"); + return -ENODEV; + } + irq = platform_get_irq(pdev, 0); + if (irq <= 0) { + dev_err(&pdev->dev, "no irq resource?\n"); + return -ENODEV; + } + + ioarea = request_mem_region(mem->start, resource_size(mem), + pdev->name); + if (!ioarea) { + dev_err(&pdev->dev, "I2C region already claimed\n"); + return -EBUSY; + } + + dev = kzalloc(sizeof(struct loongson_i2c_dev), GFP_KERNEL); + if (!dev) { + r = -ENOMEM; + goto err_release_region; + } + + init_completion(&dev->cmd_complete); + + dev->dev = &pdev->dev; + dev->irq = irq; + dev->base = ioremap(mem->start, resource_size(mem)); + if (!dev->base) { + r = -ENOMEM; + goto err_free_mem; + } + + platform_set_drvdata(pdev, dev); + + loongson_i2c_reginit(dev); + + r = request_irq(dev->irq, i2c_loongson_isr, IRQF_SHARED, DRIVER_NAME, dev); + if (r) + dev_err(&pdev->dev, "failure requesting irq %i\n", dev->irq); + + adap = &dev->adapter; + i2c_set_adapdata(adap, dev); + adap->nr = pdev->id; + strlcpy(adap->name, pdev->name, sizeof(adap->name)); + adap->owner = THIS_MODULE; + adap->class = I2C_CLASS_HWMON; + adap->retries = 5; + adap->algo = &loongson_i2c_algo; + adap->dev.parent = &pdev->dev; + adap->dev.of_node = pdev->dev.of_node; + ACPI_COMPANION_SET(&adap->dev, ACPI_COMPANION(&pdev->dev)); + adap->timeout = msecs_to_jiffies(100); + + /* i2c device drivers may be active on return from add_adapter() */ + r = i2c_add_adapter(adap); + if (r) { + dev_err(dev->dev, "failure adding adapter\n"); + goto err_iounmap; + } + + return 0; + +err_iounmap: + iounmap(dev->base); +err_free_mem: + platform_set_drvdata(pdev, NULL); + kfree(dev); +err_release_region: + release_mem_region(mem->start, resource_size(mem)); + + return r; +} + +static int loongson_i2c_remove(struct platform_device *pdev) +{ + struct loongson_i2c_dev *dev = platform_get_drvdata(pdev); + struct resource *mem; + + platform_set_drvdata(pdev, NULL); + i2c_del_adapter(&dev->adapter); + iounmap(dev->base); + kfree(dev); + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + release_mem_region(mem->start, resource_size(mem)); + return 0; +} + +#ifdef CONFIG_PM +static int loongson_i2c_suspend_noirq(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct loongson_i2c_dev *i2c_dev = platform_get_drvdata(pdev); + + i2c_dev->suspended = 1; + + return 0; +} + +static int loongson_i2c_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct loongson_i2c_dev *i2c_dev = platform_get_drvdata(pdev); + + i2c_dev->suspended = 0; + loongson_i2c_reginit(i2c_dev); + + return 0; +} + +static const struct dev_pm_ops loongson_i2c_dev_pm_ops = { + .suspend_noirq = loongson_i2c_suspend_noirq, + .resume = loongson_i2c_resume, +}; + +#define LOONGSON_DEV_PM_OPS (&loongson_i2c_dev_pm_ops) +#else +#define LOONGSON_DEV_PM_OPS NULL +#endif + +#ifdef CONFIG_OF +static struct of_device_id loongson_i2c_id_table[] = { + {.compatible = "loongson,ls7a-i2c"}, + {}, +}; +MODULE_DEVICE_TABLE(of, loongson_i2c_id_table); +#endif +static const struct acpi_device_id loongson_i2c_acpi_match[] = { + {"LOON0004"}, + {} +}; +MODULE_DEVICE_TABLE(acpi, loongson_i2c_acpi_match); + +static struct platform_driver loongson_i2c_driver = { + .probe = loongson_i2c_probe, + .remove = loongson_i2c_remove, + .driver = { + .name = "loongson-i2c", + .owner = THIS_MODULE, + .pm = LOONGSON_DEV_PM_OPS, +#ifdef CONFIG_OF + .of_match_table = of_match_ptr(loongson_i2c_id_table), +#endif + .acpi_match_table = ACPI_PTR(loongson_i2c_acpi_match), + }, +}; + +static int __init loongson_i2c_init_driver(void) +{ + return platform_driver_register(&loongson_i2c_driver); +} +subsys_initcall(loongson_i2c_init_driver); + +static void __exit loongson_i2c_exit_driver(void) +{ + platform_driver_unregister(&loongson_i2c_driver); +} +module_exit(loongson_i2c_exit_driver); + +MODULE_AUTHOR("Loongson Technology Corporation Limited"); +MODULE_DESCRIPTION("Loongson LOONGSON I2C bus adapter"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:loongson-i2c"); -- Gitee From 9f0b0ecfd6dfd97bc9fef35297367d85d56ada3e Mon Sep 17 00:00:00 2001 From: Juxin Gao Date: Wed, 31 Aug 2022 11:48:00 +0800 Subject: [PATCH 10/36] spi: loongson: Add LS7A spi driver support LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- Change-Id: Ib4552613f7c7706a0773839ddec7384d4a1d40a0 Signed-off-by: Juxin Gao --- drivers/spi/Kconfig | 7 + drivers/spi/Makefile | 1 + drivers/spi/spi-loongson.c | 540 +++++++++++++++++++++++++++++++++++++ 3 files changed, 548 insertions(+) create mode 100644 drivers/spi/spi-loongson.c diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index 1bc68c0547c1..2c98fe65ab1a 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -1030,6 +1030,13 @@ config SPI_TLE62X0 sysfs interface, with each line presented as a kind of GPIO exposing both switch control and diagnostic feedback. +config SPI_LOONGSON + tristate "Loongson SPI Controller Support" + depends on LOONGARCH + default m + help + This is the driver for Loongson spi master controller. + # # Add new SPI protocol masters in alphabetical order above this line # diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile index e1f88bd47ded..f21f7fd3673c 100644 --- a/drivers/spi/Makefile +++ b/drivers/spi/Makefile @@ -137,3 +137,4 @@ obj-$(CONFIG_SPI_AMD) += spi-amd.o # SPI slave protocol handlers obj-$(CONFIG_SPI_SLAVE_TIME) += spi-slave-time.o obj-$(CONFIG_SPI_SLAVE_SYSTEM_CONTROL) += spi-slave-system-control.o +obj-$(CONFIG_SPI_LOONGSON) += spi-loongson.o diff --git a/drivers/spi/spi-loongson.c b/drivers/spi/spi-loongson.c new file mode 100644 index 000000000000..dd5812bedfd2 --- /dev/null +++ b/drivers/spi/spi-loongson.c @@ -0,0 +1,540 @@ +/* + * Loongson SPI driver + * + * Copyright (C) 2013 Loongson Technology Corporation Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/*define spi register */ +#define SPCR 0x00 +#define SPSR 0x01 +#define FIFO 0x02 +#define SPER 0x03 +#define PARA 0x04 +#define SPCS 0x04 +#define SFCS 0x05 +#define TIMI 0x06 + +#define PARA_MEM_EN 0x01 +#define SPSR_SPIF 0x80 +#define SPSR_WCOL 0x40 +#define SPCR_SPE 0x40 + +extern unsigned long bus_clock; +struct loongson_spi { + struct work_struct work; + spinlock_t lock; + + struct list_head msg_queue; + struct spi_master *master; + void __iomem *base; + int cs_active; + unsigned int hz; + unsigned char spcr, sper, spsr; + unsigned char para, sfcs, timi; + struct workqueue_struct *wq; + unsigned int mode; +} *loongson_spi_dev; + +static inline int set_cs(struct loongson_spi *loongson_spi, struct spi_device *spi, int val); + +static void loongson_spi_write_reg(struct loongson_spi *spi, + unsigned char reg, unsigned char data) +{ + writeb(data, spi->base +reg); +} + +static char loongson_spi_read_reg(struct loongson_spi *spi, + unsigned char reg) +{ + return readb(spi->base + reg); +} + +static int loongson_spi_update_state(struct loongson_spi *loongson_spi,struct spi_device *spi, + struct spi_transfer *t) +{ + unsigned int hz; + unsigned int div, div_tmp; + unsigned int bit; + unsigned long clk; + unsigned char val; + const char rdiv[12] = {0, 1, 4, 2, 3, 5, 6, 7, 8, 9, 10, 11}; + + hz = t ? t->speed_hz : spi->max_speed_hz; + + if (!hz) + hz = spi->max_speed_hz; + + if ((hz && loongson_spi->hz != hz) || ((spi->mode ^ loongson_spi->mode) & (SPI_CPOL | SPI_CPHA))) { + clk = 100000000; + div = DIV_ROUND_UP(clk, hz); + + if (div < 2) + div = 2; + + if (div > 4096) + div = 4096; + + bit = fls(div) - 1; + if ((1<dev, "clk = %ld hz = %d div_tmp = %d bit = %d\n", + clk, hz, div_tmp, bit); + + loongson_spi->hz = hz; + loongson_spi->spcr = div_tmp & 3; + loongson_spi->sper = (div_tmp >> 2) & 3; + + val = loongson_spi_read_reg(loongson_spi, SPCR); + val &= ~0xc; + if (spi->mode & SPI_CPOL) + val |= 8; + if (spi->mode & SPI_CPHA) + val |= 4; + loongson_spi_write_reg(loongson_spi, SPCR, (val & ~3) | loongson_spi->spcr); + val = loongson_spi_read_reg(loongson_spi, SPER); + loongson_spi_write_reg(loongson_spi, SPER, (val & ~3) | loongson_spi->sper); + loongson_spi->mode &= SPI_NO_CS; + loongson_spi->mode |= spi->mode; + } + + return 0; +} + + + +static int loongson_spi_setup(struct spi_device *spi) +{ + struct loongson_spi *loongson_spi; + + loongson_spi = spi_master_get_devdata(spi->master); + if (spi->bits_per_word %8) + return -EINVAL; + + if(spi->chip_select >= spi->master->num_chipselect) + return -EINVAL; + + loongson_spi_update_state(loongson_spi, spi, NULL); + + set_cs(loongson_spi, spi, 1); + + return 0; +} + +static int loongson_spi_write_read_8bit( struct spi_device *spi, + const u8 **tx_buf, u8 **rx_buf, unsigned int num) +{ + struct loongson_spi *loongson_spi; + loongson_spi = spi_master_get_devdata(spi->master); + + if (tx_buf && *tx_buf){ + loongson_spi_write_reg(loongson_spi, FIFO, *((*tx_buf)++)); + while((loongson_spi_read_reg(loongson_spi, SPSR) & 0x1) == 1); + }else{ + loongson_spi_write_reg(loongson_spi, FIFO, 0); + while((loongson_spi_read_reg(loongson_spi, SPSR) & 0x1) == 1); + } + + if (rx_buf && *rx_buf) { + *(*rx_buf)++ = loongson_spi_read_reg(loongson_spi, FIFO); + }else{ + loongson_spi_read_reg(loongson_spi, FIFO); + } + + return 1; +} + + +static unsigned int loongson_spi_write_read(struct spi_device *spi, struct spi_transfer *xfer) +{ + struct loongson_spi *loongson_spi; + unsigned int count; + const u8 *tx = xfer->tx_buf; + u8 *rx = xfer->rx_buf; + + loongson_spi = spi_master_get_devdata(spi->master); + count = xfer->len; + + do { + if (loongson_spi_write_read_8bit(spi, &tx, &rx, count) < 0) + goto out; + count--; + } while (count); + +out: + return xfer->len - count; + +} + +static inline int set_cs(struct loongson_spi *loongson_spi, struct spi_device *spi, int val) +{ + if (spi->mode & SPI_CS_HIGH) + val = !val; + if (loongson_spi->mode & SPI_NO_CS) { + loongson_spi_write_reg(loongson_spi, SPCS, val); + } else { + int cs = loongson_spi_read_reg(loongson_spi, SFCS) & ~(0x11 << spi->chip_select); + loongson_spi_write_reg(loongson_spi, SFCS, (val ? (0x11 << spi->chip_select):(0x1 << spi->chip_select)) | cs); + } + return 0; +} + +static void loongson_spi_work(struct work_struct *work) +{ + struct loongson_spi *loongson_spi = + container_of(work, struct loongson_spi, work); + int param; + + spin_lock(&loongson_spi->lock); + param = loongson_spi_read_reg(loongson_spi, PARA); + loongson_spi_write_reg(loongson_spi, PARA, param&~1); + while (!list_empty(&loongson_spi->msg_queue)) { + + struct spi_message *m; + struct spi_device *spi; + struct spi_transfer *t = NULL; + + m = container_of(loongson_spi->msg_queue.next, struct spi_message, queue); + + list_del_init(&m->queue); + spin_unlock(&loongson_spi->lock); + + spi = m->spi; + + /*in here set cs*/ + set_cs(loongson_spi, spi, 0); + + list_for_each_entry(t, &m->transfers, transfer_list) { + + /*setup spi clock*/ + loongson_spi_update_state(loongson_spi, spi, t); + + if (t->len) + m->actual_length += + loongson_spi_write_read(spi, t); + } + + set_cs(loongson_spi, spi, 1); + m->complete(m->context); + + + spin_lock(&loongson_spi->lock); + } + + loongson_spi_write_reg(loongson_spi, PARA, param); + spin_unlock(&loongson_spi->lock); +} + + + +static int loongson_spi_transfer(struct spi_device *spi, struct spi_message *m) +{ + struct loongson_spi *loongson_spi; + struct spi_transfer *t = NULL; + + m->actual_length = 0; + m->status = 0; + if (list_empty(&m->transfers) || !m->complete) + return -EINVAL; + + loongson_spi = spi_master_get_devdata(spi->master); + + list_for_each_entry(t, &m->transfers, transfer_list) { + + if (t->tx_buf == NULL && t->rx_buf == NULL && t->len) { + dev_err(&spi->dev, + "message rejected : " + "invalid transfer data buffers\n"); + goto msg_rejected; + } + /*other things not check*/ + } + + spin_lock(&loongson_spi->lock); + list_add_tail(&m->queue, &loongson_spi->msg_queue); + queue_work(loongson_spi->wq, &loongson_spi->work); + spin_unlock(&loongson_spi->lock); + + return 0; +msg_rejected: + + m->status = -EINVAL; + if (m->complete) + m->complete(m->context); + return -EINVAL; +} + +static void loongson_spi_reginit(void) +{ + unsigned char val; + + val = loongson_spi_read_reg(loongson_spi_dev, SPCR); + val &= ~SPCR_SPE; + loongson_spi_write_reg(loongson_spi_dev, SPCR, val); + + loongson_spi_write_reg(loongson_spi_dev, SPSR, (SPSR_SPIF | SPSR_WCOL)); + + val = loongson_spi_read_reg(loongson_spi_dev, SPCR); + val |= SPCR_SPE; + loongson_spi_write_reg(loongson_spi_dev, SPCR, val); +} + +static int loongson_spi_probe(struct platform_device *pdev) +{ + struct spi_master *master; + struct loongson_spi *spi; + struct resource *res; + int ret; + master = spi_alloc_master(&pdev->dev, sizeof(struct loongson_spi)); + + if (master == NULL) { + dev_dbg(&pdev->dev, "master allocation failed\n"); + return-ENOMEM; + } + + if (pdev->id != -1) + master->bus_num = pdev->id; + + master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH ; + master->setup = loongson_spi_setup; + master->transfer = loongson_spi_transfer; + master->num_chipselect = 4; +#ifdef CONFIG_OF + master->dev.of_node = of_node_get(pdev->dev.of_node); +#endif + dev_set_drvdata(&pdev->dev, master); + + spi = spi_master_get_devdata(master); + + loongson_spi_dev = spi; + + spi->wq = create_singlethread_workqueue(pdev->name); + + spi->master = master; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (res == NULL) { + dev_err(&pdev->dev, "Cannot get IORESOURCE_MEM\n"); + ret = -ENOENT; + goto free_master; + } + + spi->base = ioremap(res->start, (res->end - res->start)+1); + if (spi->base == NULL) { + dev_err(&pdev->dev, "Cannot map IO\n"); + ret = -ENXIO; + goto unmap_io; + } + + loongson_spi_reginit(); + + spi->mode = 0; + if (of_get_property(pdev->dev.of_node, "spi-nocs", NULL)) + spi->mode |= SPI_NO_CS; + + INIT_WORK(&spi->work, loongson_spi_work); + + spin_lock_init(&spi->lock); + INIT_LIST_HEAD(&spi->msg_queue); + + ret = spi_register_master(master); + if (ret < 0) + goto unmap_io; + + return ret; + +unmap_io: + iounmap(spi->base); +free_master: + kfree(master); + spi_master_put(master); + return ret; + +} + +#ifdef CONFIG_PM +static int loongson_spi_suspend(struct device *dev) +{ + struct loongson_spi *loongson_spi; + struct spi_master *master; + + master = dev_get_drvdata(dev); + loongson_spi = spi_master_get_devdata(master); + + loongson_spi->spcr = loongson_spi_read_reg(loongson_spi, SPCR); + loongson_spi->sper = loongson_spi_read_reg(loongson_spi, SPER); + loongson_spi->spsr = loongson_spi_read_reg(loongson_spi, SPSR); + loongson_spi->para = loongson_spi_read_reg(loongson_spi, PARA); + loongson_spi->sfcs = loongson_spi_read_reg(loongson_spi, SFCS); + loongson_spi->timi = loongson_spi_read_reg(loongson_spi, TIMI); + + return 0; +} + +static int loongson_spi_resume(struct device *dev) +{ + struct loongson_spi *loongson_spi; + struct spi_master *master; + + master = dev_get_drvdata(dev); + loongson_spi = spi_master_get_devdata(master); + + loongson_spi_write_reg(loongson_spi, SPCR, loongson_spi->spcr); + loongson_spi_write_reg(loongson_spi, SPER, loongson_spi->sper); + loongson_spi_write_reg(loongson_spi, SPSR, loongson_spi->spsr); + loongson_spi_write_reg(loongson_spi, PARA, loongson_spi->para); + loongson_spi_write_reg(loongson_spi, SFCS, loongson_spi->sfcs); + loongson_spi_write_reg(loongson_spi, TIMI, loongson_spi->timi); + + return 0; +} + +static const struct dev_pm_ops loongson_spi_dev_pm_ops = { + .suspend = loongson_spi_suspend, + .resume = loongson_spi_resume, +}; + +#define LS_DEV_PM_OPS (&loongson_spi_dev_pm_ops) +#else +#define LS_DEV_PM_OPS NULL +#endif + + +#ifdef CONFIG_OF +static struct of_device_id loongson_spi_id_table[] = { + { .compatible = "loongson,ls7a-spi", }, + { }, +}; +MODULE_DEVICE_TABLE(of, loongson_spi_id_table); +#endif +static struct platform_driver loongson_spi_driver = { + .probe = loongson_spi_probe, + .driver = { + .name = "loongson-spi", + .owner = THIS_MODULE, + .bus = &platform_bus_type, + .pm = LS_DEV_PM_OPS, +#ifdef CONFIG_OF + .of_match_table = of_match_ptr(loongson_spi_id_table), +#endif + }, +}; + +#ifdef CONFIG_PCI +static struct resource loongson_spi_resources[] = { + [0] = { + .flags = IORESOURCE_MEM, + }, + [1] = { + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device loongson_spi_device = { + .name = "loongson-spi", + .id = 0, + .num_resources = ARRAY_SIZE(loongson_spi_resources), + .resource = loongson_spi_resources, +}; + + +static int loongson_spi_pci_register(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int ret; + unsigned char v8; + + pr_debug("loongson_spi_pci_register BEGIN\n"); + /* Enable device in PCI config */ + ret = pci_enable_device(pdev); + if (ret < 0) { + printk(KERN_ERR "loongson-pci (%s): Cannot enable PCI device\n", + pci_name(pdev)); + goto err_out; + } + + /* request the mem regions */ + ret = pci_request_region(pdev, 0, "loongson-spi io"); + if (ret < 0) { + printk( KERN_ERR "loongson-spi (%s): cannot request region 0.\n", + pci_name(pdev)); + goto err_out; + } + + loongson_spi_resources[0].start = pci_resource_start (pdev, 0); + loongson_spi_resources[0].end = pci_resource_end(pdev, 0); + /* need api from pci irq */ + ret = pci_read_config_byte(pdev, PCI_INTERRUPT_LINE, &v8); + + if (ret == PCIBIOS_SUCCESSFUL) { + + loongson_spi_resources[1].start = v8; + loongson_spi_resources[1].end = v8; + platform_device_register(&loongson_spi_device); + } + +err_out: + return ret; +} + +static void loongson_spi_pci_unregister(struct pci_dev *pdev) +{ + pci_release_region(pdev, 0); +} + +static struct pci_device_id loongson_spi_devices[] = { + {PCI_DEVICE(0x14, 0x7a0b)}, + {0, 0, 0, 0, 0, 0, 0} +}; + +static struct pci_driver loongson_spi_pci_driver = { + .name = "loongson-spi-pci", + .id_table = loongson_spi_devices, + .probe = loongson_spi_pci_register, + .remove = loongson_spi_pci_unregister, +}; +#endif + + +static int __init loongson_spi_init(void) +{ + int ret; + + ret = platform_driver_register(&loongson_spi_driver); +#ifdef CONFIG_PCI + if(!ret) + ret = pci_register_driver(&loongson_spi_pci_driver); +#endif + return ret; +} + +static void __exit loongson_spi_exit(void) +{ + platform_driver_unregister(&loongson_spi_driver); +#ifdef CONFIG_PCI + pci_unregister_driver(&loongson_spi_pci_driver); +#endif +} + +subsys_initcall(loongson_spi_init); +module_exit(loongson_spi_exit); + +MODULE_AUTHOR("Loongson Technology Corporation Limited"); +MODULE_DESCRIPTION("Loongson SPI driver"); +MODULE_LICENSE("GPL"); -- Gitee From d1c564ec2caf3397821e747c783bbc3600fcc782 Mon Sep 17 00:00:00 2001 From: zhangtianyang Date: Mon, 19 Sep 2022 20:07:07 +0800 Subject: [PATCH 11/36] ACPI: Support ACPI_MACHINE_WIDTH for 64 LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- Signed-off-by: zhangtianyang Change-Id: Ie79e0c0a3ebcc1c9016b2be1e434ad3d57bf334f --- include/acpi/platform/aclinux.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index 72f52a1342a0..1a60441ccc16 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -194,7 +194,7 @@ #if defined(__ia64__) || (defined(__x86_64__) && !defined(__ILP32__)) ||\ defined(__aarch64__) || defined(__PPC64__) ||\ - defined(__s390x__) ||\ + defined(__s390x__) || defined(__loongarch__)\ (defined(__riscv) && (defined(__LP64__) || defined(_LP64))) #define ACPI_MACHINE_WIDTH 64 #define COMPILER_DEPENDENT_INT64 long -- Gitee From 42ac1b636c127d3a08b7de59232f4d5f4aca4e03 Mon Sep 17 00:00:00 2001 From: zhangtianyang Date: Fri, 23 Sep 2022 16:03:11 +0800 Subject: [PATCH 12/36] LoongArch: Adapted SECTION_SIZE_BITS with page size LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- Signed-off-by: zhangtianyang Change-Id: Ib32d4a2fc458fedd939e76e86d8bdddfe3c43914 --- arch/loongarch/include/asm/sparsemem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/sparsemem.h b/arch/loongarch/include/asm/sparsemem.h index ee55cdf933c6..0f6e76fb3174 100644 --- a/arch/loongarch/include/asm/sparsemem.h +++ b/arch/loongarch/include/asm/sparsemem.h @@ -8,7 +8,7 @@ * SECTION_SIZE_BITS 2^N: how big each section will be * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space */ -#define SECTION_SIZE_BITS 29 /* 2^29 = Largest Huge Page Size */ +#define SECTION_SIZE_BITS 28 #define MAX_PHYSMEM_BITS 48 #ifndef CONFIG_SPARSEMEM_VMEMMAP -- Gitee From 8d43ae43fa7c08d850127397d1026a9aff530e40 Mon Sep 17 00:00:00 2001 From: zhangtianyang Date: Mon, 19 Sep 2022 10:19:31 +0800 Subject: [PATCH 13/36] platform: Support loongson laptop generic driver LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- Signed-off-by: zhangtianyang Change-Id: Ide7fce6caeffea9b001afd43cfaccfef1889d3a5 --- drivers/platform/Kconfig | 1 + drivers/platform/Makefile | 1 + drivers/platform/loongarch/Kconfig | 30 + drivers/platform/loongarch/Makefile | 1 + .../loongarch/loongson_generic_laptop.c | 663 ++++++++++++++++++ 5 files changed, 696 insertions(+) create mode 100644 drivers/platform/loongarch/Kconfig create mode 100644 drivers/platform/loongarch/Makefile create mode 100644 drivers/platform/loongarch/loongson_generic_laptop.c diff --git a/drivers/platform/Kconfig b/drivers/platform/Kconfig index 971426bb4302..47063d4f4deb 100644 --- a/drivers/platform/Kconfig +++ b/drivers/platform/Kconfig @@ -13,3 +13,4 @@ source "drivers/platform/chrome/Kconfig" source "drivers/platform/mellanox/Kconfig" source "drivers/platform/olpc/Kconfig" +source "drivers/platform/loongarch/Kconfig" diff --git a/drivers/platform/Makefile b/drivers/platform/Makefile index 6fda58c021ca..3cb2af6ff64a 100644 --- a/drivers/platform/Makefile +++ b/drivers/platform/Makefile @@ -9,3 +9,4 @@ obj-$(CONFIG_MIPS) += mips/ obj-$(CONFIG_OLPC_EC) += olpc/ obj-$(CONFIG_GOLDFISH) += goldfish/ obj-$(CONFIG_CHROME_PLATFORMS) += chrome/ +obj-$(CONFIG_LOONGARCH) += loongarch/ diff --git a/drivers/platform/loongarch/Kconfig b/drivers/platform/loongarch/Kconfig new file mode 100644 index 000000000000..f444d41fd03a --- /dev/null +++ b/drivers/platform/loongarch/Kconfig @@ -0,0 +1,30 @@ +# +# LOONGARCH Platform Specific Drivers +# + +menuconfig LOONGARCH_PLATFORM_DEVICES + bool "LOONGARCH Platform Specific Device Drivers" + default y + help + Say Y here to get to see options for device drivers of various + LOONGARCH platforms, including vendor-specific netbook/laptop/desktop + extension and hardware monitor drivers. This option itself does + not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if LOONGARCH_PLATFORM_DEVICES + +config LOONGSON_GENERIC_LAPTOP + tristate "Generic Loongson-3 Laptop Driver" + depends on ACPI + depends on BACKLIGHT_CLASS_DEVICE + depends on INPUT + depends on MACH_LOONGSON64 + select ACPI_VIDEO + select INPUT_SPARSEKMAP + default y + help + ACPI-based Loongson-3 family laptops generic driver. + +endif # LOONGARCH_PLATFORM_DEVICES diff --git a/drivers/platform/loongarch/Makefile b/drivers/platform/loongarch/Makefile new file mode 100644 index 000000000000..f912a53b66a7 --- /dev/null +++ b/drivers/platform/loongarch/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_LOONGSON_GENERIC_LAPTOP) += loongson_generic_laptop.o diff --git a/drivers/platform/loongarch/loongson_generic_laptop.c b/drivers/platform/loongarch/loongson_generic_laptop.c new file mode 100644 index 000000000000..41520e90a98e --- /dev/null +++ b/drivers/platform/loongarch/loongson_generic_laptop.c @@ -0,0 +1,663 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * loongson_generic_laptop.c - Loongson processor + * based LAPTOP/ALL-IN-ONE driver + * + * lvjianmin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#define LSACPI_VERSION "1.0" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* ACPI HIDs */ +#define ACPI_LOONGSON_HKEY_HID "LOON0000" +#define ACPI_EC_HID "PNP0C09" + +/**************************************************************************** + * Main driver + */ + +#define LSACPI_NAME "loongson-laptop" +#define LSACPI_DESC "Loongson Laptop/all-in-one ACPI Driver" +#define LSACPI_FILE LSACPI_NAME "_acpi" +#define LSACPI_DRVR_NAME LSACPI_FILE +#define LSACPI_ACPI_EVENT_PREFIX "loongson_generic" +/**************************************************************************** + * Driver-wide structs and misc. variables + */ + +struct generic_sub_driver { + u32 type; + char *name; + acpi_handle *handle; + struct acpi_device *device; + struct platform_driver *driver; + int (*init)(struct generic_sub_driver *sub_driver); + void (*notify)(struct generic_sub_driver *sub_driver, u32 event); + u8 acpi_notify_installed; +}; + +static u32 input_device_registered; + +static int hotkey_status_get(int *status); + +static int loongson_laptop_backlight_update(struct backlight_device *bd); +/**************************************************************************** + **************************************************************************** + * + * ACPI Helpers and device model + * + **************************************************************************** + ****************************************************************************/ + +/************************************************************************* + * ACPI basic handles + */ + +static int acpi_evalf(acpi_handle handle, + int *res, char *method, char *fmt, ...); +static acpi_handle hkey_handle; + +static void dispatch_acpi_notify(acpi_handle handle, u32 event, void *data) +{ + struct generic_sub_driver *sub_driver = data; + + if (!sub_driver || !sub_driver->notify) + return; + sub_driver->notify(sub_driver, event); +} + +static int __init setup_acpi_notify(struct generic_sub_driver *sub_driver) +{ + acpi_status status; + int rc; + + if (!*sub_driver->handle) + return 0; + + rc = acpi_bus_get_device(*sub_driver->handle, &sub_driver->device); + if (rc < 0) { + pr_err("acpi_bus_get_device(%s) failed: %d\n", + sub_driver->name, rc); + return -ENODEV; + } + + sub_driver->device->driver_data = sub_driver; + sprintf(acpi_device_class(sub_driver->device), "%s/%s", + LSACPI_ACPI_EVENT_PREFIX, sub_driver->name); + + status = acpi_install_notify_handler(*sub_driver->handle, + sub_driver->type, dispatch_acpi_notify, sub_driver); + if (ACPI_FAILURE(status)) { + if (status == AE_ALREADY_EXISTS) { + pr_notice("another device driver is already " + "handling %s events\n", sub_driver->name); + } else { + pr_err("acpi_install_notify_handler(%s) failed: %s\n", + sub_driver->name, acpi_format_exception(status)); + } + return -ENODEV; + } + sub_driver->acpi_notify_installed = 1; + return 0; +} + +static struct input_dev *generic_inputdev; + +#ifdef CONFIG_PM +static int loongson_hkey_suspend(struct device *dev) +{ + return 0; +} +static int loongson_hkey_resume(struct device *dev) +{ + int status = 0; + struct key_entry ke; + + struct backlight_device *bd; + bd = backlight_device_get_by_type(BACKLIGHT_PLATFORM); + if (bd) { + loongson_laptop_backlight_update(bd) ? + pr_warn("Loongson_backlight:resume brightness failed") : + pr_info("Loongson_backlight:resume brightness %d\n", bd->props.brightness); + } + /* + * Only if the firmware supports SW_LID event model, we can handle the + * event. This is for the consideration of development board without + * EC. + */ + if (test_bit(SW_LID, generic_inputdev->swbit)) { + if (hotkey_status_get(&status)) + return -EIO; + /* + * The input device sw element records the last lid status. + * When the system is awakened by other wake-up sources, + * the lid event will also be reported. The judgment of + * adding SW_LID bit which in sw element can avoid this + * case. + * + * input system will drop lid event when current lid event + * value and last lid status in the same data set,which + * data set inclue zero set and no zero set. so laptop + * driver doesn't report repeated events. + * + * Lid status is generally 0, but hardware exception is + * considered. So add lid status confirmation. + */ + if (test_bit(SW_LID, generic_inputdev->sw) && !(status & (1 << SW_LID))) { + ke.type = KE_SW; + ke.sw.value = (u8)status; + ke.sw.code = SW_LID; + sparse_keymap_report_entry(generic_inputdev, &ke, + 1, true); + } + } + + return 0; +} + +static const struct dev_pm_ops loongson_hkey_dev_pm_ops = { + .suspend_noirq = loongson_hkey_suspend, + .resume = loongson_hkey_resume, +}; + +#define LOONGSON_HKEY_DEV_PM_OPS (&loongson_hkey_dev_pm_ops) +#else +#define LOONGSON_HKEY_DEV_PM_OPS NULL +#endif +static int loongson_hkey_probe(struct platform_device *pdev) +{ + hkey_handle = ACPI_HANDLE(&pdev->dev); + + if (!hkey_handle) + return -ENODEV; + + return 0; +} + +static const struct acpi_device_id loongson_htk_device_ids[] = { + {ACPI_LOONGSON_HKEY_HID, 0}, + {"", 0}, +}; + +static struct platform_driver loongson_hkey_driver = { + .probe = loongson_hkey_probe, + .driver = { + .name = "loongson-hkey", + .owner = THIS_MODULE, + .pm = LOONGSON_HKEY_DEV_PM_OPS, + .acpi_match_table = ACPI_PTR(loongson_htk_device_ids), + }, +}; + +/* + * Loongson generic laptop firmware event model + * + */ + +#define GENERIC_HOTKEY_MAP_MAX 64 +#define METHOD_NAME__KMAP "KMAP" +static struct key_entry hotkey_keycode_map[GENERIC_HOTKEY_MAP_MAX]; +static int hkey_map(void) +{ + struct acpi_buffer buf; + union acpi_object *pack; + acpi_status status; + u32 index; + + buf.length = ACPI_ALLOCATE_BUFFER; + status = acpi_evaluate_object_typed(hkey_handle, + METHOD_NAME__KMAP, NULL, &buf, ACPI_TYPE_PACKAGE); + if (status != AE_OK) { + printk(KERN_ERR ": ACPI exception: %s\n", + acpi_format_exception(status)); + return -1; + } + pack = buf.pointer; + for (index = 0; index < pack->package.count; index++) { + union acpi_object *sub_pack = &pack->package.elements[index]; + union acpi_object *element = &sub_pack->package.elements[0]; + + hotkey_keycode_map[index].type = element->integer.value; + element = &sub_pack->package.elements[1]; + hotkey_keycode_map[index].code = element->integer.value; + element = &sub_pack->package.elements[2]; + hotkey_keycode_map[index].keycode = element->integer.value; + } + return 0; +} + +static int hotkey_backlight_set(bool enable) +{ + if (!acpi_evalf(hkey_handle, NULL, "VCBL", "vd", enable ? 1 : 0)) + return -EIO; + + return 0; +} +static int event_init(struct generic_sub_driver *sub_driver) +{ + int ret; + + ret = hkey_map(); + if (ret) { + printk(KERN_ERR "Fail to parse keymap from DSDT.\n"); + return ret; + } + + ret = sparse_keymap_setup(generic_inputdev, hotkey_keycode_map, NULL); + if (ret) { + printk(KERN_ERR "Fail to setup input device keymap\n"); + input_free_device(generic_inputdev); + + return ret; + } + + /* + * This hotkey driver handle backlight event when + * acpi_video_get_backlight_type() gets acpi_backlight_vendor + */ + if (acpi_video_get_backlight_type() != acpi_backlight_vendor) + hotkey_backlight_set(false); + else + hotkey_backlight_set(true); + + printk("ACPI:enabling firmware HKEY event interface...\n"); + return ret; + +} + +#define GENERIC_EVENT_TYPE_OFF 12 +#define GENERIC_EVENT_MASK 0xFFF +#define TPACPI_MAX_ACPI_ARGS 3 +static int acpi_evalf(acpi_handle handle, + int *res, char *method, char *fmt, ...) +{ + char *fmt0 = fmt; + struct acpi_object_list params; + union acpi_object in_objs[TPACPI_MAX_ACPI_ARGS]; + struct acpi_buffer result, *resultp; + union acpi_object out_obj; + acpi_status status; + va_list ap; + char res_type; + int success; + int quiet; + + if (!*fmt) { + pr_err("acpi_evalf() called with empty format\n"); + return 0; + } + + if (*fmt == 'q') { + quiet = 1; + fmt++; + } else + quiet = 0; + + res_type = *(fmt++); + + params.count = 0; + params.pointer = &in_objs[0]; + + va_start(ap, fmt); + while (*fmt) { + char c = *(fmt++); + switch (c) { + case 'd': /* int */ + in_objs[params.count].integer.value = va_arg(ap, int); + in_objs[params.count++].type = ACPI_TYPE_INTEGER; + break; + /* add more types as needed */ + default: + pr_err("acpi_evalf() called with invalid format character '%c'\n", + c); + va_end(ap); + return 0; + } + } + va_end(ap); + + if (res_type != 'v') { + result.length = sizeof(out_obj); + result.pointer = &out_obj; + resultp = &result; + } else + resultp = NULL; + + status = acpi_evaluate_object(handle, method, ¶ms, resultp); + + switch (res_type) { + case 'd': /* int */ + success = (status == AE_OK && + out_obj.type == ACPI_TYPE_INTEGER); + if (success && res) + *res = out_obj.integer.value; + break; + case 'v': /* void */ + success = status == AE_OK; + break; + /* add more types as needed */ + default: + pr_err("acpi_evalf() called with invalid format character '%c'\n", + res_type); + return 0; + } + + if (!success && !quiet) + pr_err("acpi_evalf(%s, %s, ...) failed: %s\n", + method, fmt0, acpi_format_exception(status)); + + return success; +} + +int ec_get_brightness(void) +{ + int status = 0; + + if (!hkey_handle) + return -ENXIO; + + if (!acpi_evalf(hkey_handle, &status, "ECBG", "d")) + return -EIO; + + if (status < 0) + return status; + + return status; +} +EXPORT_SYMBOL(ec_get_brightness); + +int ec_set_brightness(int level) +{ + + int ret = 0; + if (!hkey_handle) + return -ENXIO; + + if (!acpi_evalf(hkey_handle, NULL, "ECBS", "vd", level)) + ret = -EIO; + + return ret; +} +EXPORT_SYMBOL(ec_set_brightness); + +int ec_bl_level(u8 level) +{ + int status = 0; + + if (!hkey_handle) + return -ENXIO; + + if (!acpi_evalf(hkey_handle, &status, "ECLL", "d")) + return -EIO; + if (status < 0 || level > status) + return status; + + if (!acpi_evalf(hkey_handle, &status, "ECSL", "d")) + return -EIO; + + if (status < 0 || level < status) + return status; + + return level; +} +EXPORT_SYMBOL(ec_bl_level); + +static int loongson_laptop_backlight_update(struct backlight_device *bd) +{ + int lvl = ec_bl_level(bd->props.brightness); + if (lvl < 0) + return -EIO; + if (ec_set_brightness(lvl)) + return -EIO; + return 0; +} + +static int loongson_laptop_get_brightness(struct backlight_device *bd) +{ + u8 __maybe_unused level; + + level = ec_get_brightness(); + if (level >= 0) + return level; + return -EIO; +} + +static const struct backlight_ops ls_backlight_laptop_ops = { + .update_status = loongson_laptop_backlight_update, + .get_brightness = loongson_laptop_get_brightness, +}; + +static int ls_laptop_backlight_register(void) +{ + struct backlight_properties props; + int status = 0; + memset(&props, 0, sizeof(props)); + props.type = BACKLIGHT_PLATFORM; + + if (!acpi_evalf(hkey_handle, &status, "ECLL", "d")) + return -EIO; + props.max_brightness = status; + props.brightness = 1; + + if (backlight_device_register("loongson_laptop", + NULL, NULL, + &ls_backlight_laptop_ops, &props)) + return 0; + + return -EIO; +} + +static int hotkey_status_get(int *status) +{ + if (!acpi_evalf(hkey_handle, status, "GSWS", "d")) + return -EIO; + + return 0; +} +int turn_off_lvds(void) +{ + int status; + union acpi_object arg0 = { ACPI_TYPE_INTEGER }; + struct acpi_object_list args = { 1, &arg0 }; + arg0.integer.value = 0; + status = acpi_evaluate_object(NULL, "\\BLSW", &args, NULL); + if (ACPI_FAILURE(status)) { + pr_info("Loongson lvds error:0x%x\n", status); + return -ENODEV; + } + return 0; +} + +int turn_on_lvds(void) +{ + int status; + union acpi_object arg0 = { ACPI_TYPE_INTEGER }; + struct acpi_object_list args = { 1, &arg0 }; + arg0.integer.value = 1; + status = acpi_evaluate_object(NULL, "\\BLSW", &args, NULL); + if (ACPI_FAILURE(status)) { + pr_info("Loongson lvds error:0x%x\n", status); + return -ENODEV; + } + return 0; +} +static void event_notify(struct generic_sub_driver *sub_driver, u32 event) +{ + struct key_entry *ke = NULL; + int scan_code = event & GENERIC_EVENT_MASK; + int type = (event >> GENERIC_EVENT_TYPE_OFF) & 0xF; + + ke = sparse_keymap_entry_from_scancode(generic_inputdev, scan_code); + if (ke) { + if (type == KE_SW) { + int status = 0; + + if (hotkey_status_get(&status)) + return; + ke->sw.value = !!(status & (1 << ke->sw.code)); + } + sparse_keymap_report_entry(generic_inputdev, ke, 1, true); + } +} + +/**************************************************************************** + **************************************************************************** + * + * Infrastructure + * + **************************************************************************** + ****************************************************************************/ +static void generic_exit(struct generic_sub_driver *sub_driver) +{ + + if (sub_driver->acpi_notify_installed) { + acpi_remove_notify_handler(*sub_driver->handle, + sub_driver->type, dispatch_acpi_notify); + sub_driver->acpi_notify_installed = 0; + } +} + +static int __init generic_subdriver_init(struct generic_sub_driver *sub_driver) +{ + int ret; + + if (!sub_driver || !sub_driver->driver) + return -EINVAL; + + ret = platform_driver_register(sub_driver->driver); + if (ret) + return -EINVAL; + + if (sub_driver->init) + sub_driver->init(sub_driver); + + if (sub_driver->notify) { + ret = setup_acpi_notify(sub_driver); + if (ret == -ENODEV) { + ret = 0; + goto err_out; + } + if (ret < 0) + goto err_out; + } + + return 0; + +err_out: + generic_exit(sub_driver); + return (ret < 0) ? ret : 0; +} + +/* Module init, exit, parameters */ +static struct generic_sub_driver generic_sub_drivers[] = { + { + .name = "hkey", + .init = event_init, + .notify = event_notify, + .handle = &hkey_handle, + .type = ACPI_DEVICE_NOTIFY, + .driver = &loongson_hkey_driver, + }, +}; + +static void generic_acpi_module_exit(void) +{ + if (generic_inputdev) { + if (input_device_registered) + input_unregister_device(generic_inputdev); + else + input_free_device(generic_inputdev); + } +} + +static int __init generic_acpi_module_init(void) +{ + int ret, i; + int status; + bool ec_found; + + if (acpi_disabled) + return -ENODEV; + + /* The EC device is required */ + ec_found = acpi_dev_found(ACPI_EC_HID); + if (!ec_found) + return -ENODEV; + + generic_inputdev = input_allocate_device(); + if (!generic_inputdev) { + pr_err("unable to allocate input device\n"); + generic_acpi_module_exit(); + return -ENOMEM; + } + + /* Prepare input device, but don't register */ + generic_inputdev->name = + "Loongson Generic Laptop/All-in-one Extra Buttons"; + generic_inputdev->phys = LSACPI_DRVR_NAME "/input0"; + generic_inputdev->id.bustype = BUS_HOST; + generic_inputdev->dev.parent = NULL; + + /* Init subdrivers */ + for (i = 0; i < ARRAY_SIZE(generic_sub_drivers); i++) { + ret = generic_subdriver_init(&generic_sub_drivers[i]); + if (ret < 0) { + generic_acpi_module_exit(); + return ret; + } + } + + ret = input_register_device(generic_inputdev); + if (ret < 0) { + pr_err("unable to register input device\n"); + generic_acpi_module_exit(); + return ret; + } + + input_device_registered = 1; + + if (acpi_evalf(hkey_handle, &status, "ECBG", "d")) { + pr_info("Loongson Laptop used, init brightness is 0x%x\n", status); + ret = ls_laptop_backlight_register(); + if (ret < 0) + pr_err("Loongson Laptop:laptop-backlight device register failed\n"); + } else + pr_info("Loongson Laptop :laptop-backlight device is not in use\n"); + return 0; +} + +MODULE_ALIAS("platform:ls-laptop"); +MODULE_AUTHOR("lvjianmin "); +MODULE_DESCRIPTION(LSACPI_DESC); +MODULE_VERSION(LSACPI_VERSION); +MODULE_LICENSE("GPL"); + +module_init(generic_acpi_module_init); +module_exit(generic_acpi_module_exit); -- Gitee From f20a2dd13689eff911cbfc2badca52868801b551 Mon Sep 17 00:00:00 2001 From: Tianli Xiong Date: Fri, 28 Oct 2022 17:36:41 +0800 Subject: [PATCH 14/36] irqchip/loongson-liointc: Set different isr for differnt core LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- Signed-off-by: Tianli Xiong Change-Id: I397b141f87598267d52917cb273334ce647495cf --- drivers/irqchip/irq-loongson-liointc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-loongson-liointc.c b/drivers/irqchip/irq-loongson-liointc.c index 1ba889a165f2..9f0b2ec2a2d2 100644 --- a/drivers/irqchip/irq-loongson-liointc.c +++ b/drivers/irqchip/irq-loongson-liointc.c @@ -28,7 +28,7 @@ #define LIOINTC_INTC_CHIP_START 0x20 -#define LIOINTC_REG_INTC_STATUS (LIOINTC_INTC_CHIP_START + 0x20) +#define LIOINTC_REG_INTC_STATUS(cpuid) (LIOINTC_INTC_CHIP_START + 0x20 + (cpuid) * 8) #define LIOINTC_REG_INTC_EN_STATUS (LIOINTC_INTC_CHIP_START + 0x04) #define LIOINTC_REG_INTC_ENABLE (LIOINTC_INTC_CHIP_START + 0x08) #define LIOINTC_REG_INTC_DISABLE (LIOINTC_INTC_CHIP_START + 0x0c) @@ -196,7 +196,7 @@ static int liointc_init(phys_addr_t addr, unsigned long size, int revision, goto out_free_priv; for (i = 0; i < LIOINTC_NUM_CORES; i++) - priv->core_isr[i] = base + LIOINTC_REG_INTC_STATUS; + priv->core_isr[i] = base + LIOINTC_REG_INTC_STATUS(i); for (i = 0; i < LIOINTC_NUM_PARENT; i++) priv->handler[i].parent_int_map = parent_int_map[i]; -- Gitee From f0c990f834a966733b589a16d5d045180052c8a8 Mon Sep 17 00:00:00 2001 From: zhangtianyang Date: Mon, 19 Sep 2022 11:31:22 +0800 Subject: [PATCH 15/36] LoongArch: Support Power Manager LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- Signed-off-by: yangqiming Signed-off-by: zhangtianyang Change-Id: If6112fafdf5968ea731fb97ab6c764b34e4a7740 --- arch/loongarch/Kconfig | 8 ++ arch/loongarch/Makefile | 4 + arch/loongarch/configs/loongson3_defconfig | 2 +- arch/loongarch/include/asm/acpi.h | 8 +- arch/loongarch/include/asm/bootinfo.h | 4 + arch/loongarch/include/asm/loongson.h | 32 ----- arch/loongarch/include/asm/suspend.h | 10 ++ arch/loongarch/include/asm/time.h | 1 + arch/loongarch/kernel/Makefile | 3 +- arch/loongarch/kernel/acpi.c | 32 +++++ arch/loongarch/kernel/asm-offsets.c | 12 ++ arch/loongarch/kernel/platform.c | 45 +++++++ arch/loongarch/kernel/setup.c | 16 +++ arch/loongarch/kernel/smp.c | 1 + arch/loongarch/kernel/time.c | 7 +- arch/loongarch/power/Makefile | 4 + arch/loongarch/power/cpu.c | 45 +++++++ arch/loongarch/power/hibernate.c | 21 +++ arch/loongarch/power/hibernate_asm.S | 68 ++++++++++ arch/loongarch/power/suspend.c | 145 +++++++++++++++++++++ arch/loongarch/power/suspend_asm.S | 120 +++++++++++++++++ 21 files changed, 552 insertions(+), 36 deletions(-) create mode 100644 arch/loongarch/include/asm/suspend.h create mode 100644 arch/loongarch/kernel/platform.c create mode 100644 arch/loongarch/power/Makefile create mode 100644 arch/loongarch/power/cpu.c create mode 100644 arch/loongarch/power/hibernate.c create mode 100644 arch/loongarch/power/hibernate_asm.S create mode 100644 arch/loongarch/power/suspend.c create mode 100644 arch/loongarch/power/suspend_asm.S diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 77de7a1370f7..c8619a3c16f8 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -510,6 +510,14 @@ config ARCH_MMAP_RND_BITS_MAX menu "Power management options" +config ARCH_HIBERNATION_POSSIBLE + def_bool y + +config ARCH_SUSPEND_POSSIBLE + def_bool y + +source "kernel/power/Kconfig" + source "drivers/acpi/Kconfig" endmenu diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 3ab3625946a9..964b779b130d 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -96,9 +96,13 @@ endif head-y := arch/loongarch/kernel/head.o core-y += arch/loongarch/ + libs-y += arch/loongarch/lib/ libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a +# suspend and hibernation support +drivers-$(CONFIG_PM) += arch/loongarch/power/ + ifeq ($(KBUILD_EXTMOD),) prepare: vdso_prepare vdso_prepare: prepare0 diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 3b4b63235fa8..def76987a4c5 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -37,8 +37,8 @@ CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set CONFIG_CPU_HAS_LSX=y CONFIG_CPU_HAS_LASX=y -CONFIG_HOTPLUG_CPU=y CONFIG_NUMA=y +CONFIG_HIBERNATION=y CONFIG_ACPI_SPCR_TABLE=y CONFIG_ACPI_DOCK=y CONFIG_ACPI_IPMI=m diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h index 825c2519b9d1..31f140d4ef26 100644 --- a/arch/loongarch/include/asm/acpi.h +++ b/arch/loongarch/include/asm/acpi.h @@ -7,7 +7,7 @@ #ifndef _ASM_LOONGARCH_ACPI_H #define _ASM_LOONGARCH_ACPI_H - +#include #ifdef CONFIG_ACPI extern int acpi_strict; extern int acpi_disabled; @@ -35,4 +35,10 @@ extern struct list_head acpi_wakeup_device_list; #define ACPI_TABLE_UPGRADE_MAX_PHYS ARCH_LOW_ADDRESS_LIMIT +static inline unsigned long acpi_get_wakeup_address(void) +{ + return (unsigned long)loongarch_wakeup_start; +} +extern int loongarch_acpi_suspend(void); +extern int (*acpi_suspend_lowlevel)(void); #endif /* _ASM_LOONGARCH_ACPI_H */ diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h index 8e5881bc5ad1..068ea523260e 100644 --- a/arch/loongarch/include/asm/bootinfo.h +++ b/arch/loongarch/include/asm/bootinfo.h @@ -33,6 +33,10 @@ struct loongson_system_configuration { int cores_per_package; unsigned long cores_io_master; const char *cpuname; + u64 suspend_addr; + u64 gpe0_ena_reg; + u8 pcie_wake_enabled; + u8 is_soc_cpu; }; extern u64 efi_system_table; diff --git a/arch/loongarch/include/asm/loongson.h b/arch/loongarch/include/asm/loongson.h index 6e8f6972ceb6..e4108f674c4e 100644 --- a/arch/loongarch/include/asm/loongson.h +++ b/arch/loongarch/include/asm/loongson.h @@ -70,8 +70,6 @@ static inline void xconf_writeq(u64 val64, volatile void __iomem *addr) #define LS7A_CHIPCFG_REG_BASE (LS7A_PCH_REG_BASE + 0x00010000) /* MISC reg base */ #define LS7A_MISC_REG_BASE (LS7A_PCH_REG_BASE + 0x00080000) -/* ACPI regs */ -#define LS7A_ACPI_REG_BASE (LS7A_MISC_REG_BASE + 0x00050000) /* RTC regs */ #define LS7A_RTC_REG_BASE (LS7A_MISC_REG_BASE + 0x00050100) @@ -93,36 +91,6 @@ static inline void xconf_writeq(u64 val64, volatile void __iomem *addr) #define LS7A_LPC_INT_CLR (volatile void *)TO_UNCACHE(LS7A_PCH_REG_BASE + 0x200c) #define LS7A_LPC_INT_POL (volatile void *)TO_UNCACHE(LS7A_PCH_REG_BASE + 0x2010) -#define LS7A_PMCON_SOC_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x000) -#define LS7A_PMCON_RESUME_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x004) -#define LS7A_PMCON_RTC_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x008) -#define LS7A_PM1_EVT_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x00c) -#define LS7A_PM1_ENA_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x010) -#define LS7A_PM1_CNT_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x014) -#define LS7A_PM1_TMR_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x018) -#define LS7A_P_CNT_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x01c) -#define LS7A_GPE0_STS_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x028) -#define LS7A_GPE0_ENA_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x02c) -#define LS7A_RST_CNT_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x030) -#define LS7A_WD_SET_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x034) -#define LS7A_WD_TIMER_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x038) -#define LS7A_THSENS_CNT_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x04c) -#define LS7A_GEN_RTC_1_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x050) -#define LS7A_GEN_RTC_2_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x054) -#define LS7A_DPM_CFG_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x400) -#define LS7A_DPM_STS_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x404) -#define LS7A_DPM_CNT_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x408) - -typedef enum { - ACPI_PCI_HOTPLUG_STATUS = 1 << 1, - ACPI_CPU_HOTPLUG_STATUS = 1 << 2, - ACPI_MEM_HOTPLUG_STATUS = 1 << 3, - ACPI_POWERBUTTON_STATUS = 1 << 8, - ACPI_RTC_WAKE_STATUS = 1 << 10, - ACPI_PCI_WAKE_STATUS = 1 << 14, - ACPI_ANY_WAKE_STATUS = 1 << 15, -} AcpiEventStatusBits; - #define HT1LO_OFFSET 0xe0000000000UL /* PCI Configuration Space Base */ diff --git a/arch/loongarch/include/asm/suspend.h b/arch/loongarch/include/asm/suspend.h new file mode 100644 index 000000000000..9c42f0fee92f --- /dev/null +++ b/arch/loongarch/include/asm/suspend.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LOONGARCH_SUSPEND_H +#define _ASM_LOONGARCH_SUSPEND_H + +void arch_common_resume(void); +void arch_common_suspend(void); +extern void loongarch_suspend_enter(void); +extern void loongarch_wakeup_start(void); + +#endif /* _ASM_LOONGARCH_SUSPEND_H */ diff --git a/arch/loongarch/include/asm/time.h b/arch/loongarch/include/asm/time.h index 2eae219301d0..d160d2cf6831 100644 --- a/arch/loongarch/include/asm/time.h +++ b/arch/loongarch/include/asm/time.h @@ -13,6 +13,7 @@ extern u64 cpu_clock_freq; extern u64 const_clock_freq; extern void sync_counter(void); +extern void save_counter(void); static inline unsigned int calc_const_freq(void) { diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 0b2aea3d94ef..11cc09679b40 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -7,7 +7,8 @@ extra-y := head.o vmlinux.lds obj-y += cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \ traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \ - elf.o legacy_boot.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o + elf.o legacy_boot.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o\ + platform.o obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_EFI) += efi.o diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 3d9759c3ccb5..1a2e4c478c9e 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -74,6 +74,30 @@ void __init acpi_boot_table_init(void) } } +static int __init acpi_parse_fadt(struct acpi_table_header *table) +{ + u64 gpe0_ena; + + if (acpi_gbl_reduced_hardware) + return 0; + + if (acpi_gbl_FADT.xgpe0_block.space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) + goto err; + gpe0_ena = acpi_gbl_FADT.xgpe0_block.address + + acpi_gbl_FADT.gpe0_block_length / 2; + if (!gpe0_ena) + goto err; + + loongson_sysconf.gpe0_ena_reg = TO_UNCACHE(gpe0_ena); + + return 0; +err: + pr_err(PREFIX "Invalid BIOS FADT, disabling ACPI\n"); + disable_acpi(); + return -1; +} + + #ifdef CONFIG_SMP int set_processor_mask(u32 id, u32 flags) { @@ -169,6 +193,12 @@ static void __init acpi_process_madt(void) loongson_sysconf.nr_cpus = num_processors; } +#ifdef CONFIG_ACPI_SLEEP +int (*acpi_suspend_lowlevel)(void) = loongarch_acpi_suspend; +#else +int (*acpi_suspend_lowlevel)(void); +#endif + int __init acpi_boot_init(void) { /* @@ -179,6 +209,8 @@ int __init acpi_boot_init(void) loongson_sysconf.boot_cpu_id = read_csr_cpuid(); + acpi_table_parse(ACPI_SIG_FADT, acpi_parse_fadt); + /* * Process the Multiple APIC Description Table (MADT), if present */ diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index 812b0002dbc8..8733fc347b3e 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -257,3 +257,15 @@ void output_smpboot_defines(void) BLANK(); } #endif +#ifdef CONFIG_HIBERNATION +void output_pbe_defines(void) +{ + COMMENT(" Linux struct pbe offsets. "); + OFFSET(PBE_ADDRESS, pbe, address); + OFFSET(PBE_ORIG_ADDRESS, pbe, orig_address); + OFFSET(PBE_NEXT, pbe, next); + DEFINE(PBE_SIZE, sizeof(struct pbe)); + BLANK(); +} +#endif + diff --git a/arch/loongarch/kernel/platform.c b/arch/loongarch/kernel/platform.c new file mode 100644 index 000000000000..da158221fae1 --- /dev/null +++ b/arch/loongarch/kernel/platform.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2009 Lemote Inc. + * Author: Wu Zhangjin, wuzhangjin@gmail.com + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +extern int loongson_acpi_init(void); + +static int __init loongson3_acpi_suspend_init(void) +{ +#ifdef CONFIG_ACPI + acpi_status status; + unsigned long long suspend_addr = 0; + + if (acpi_disabled || acpi_gbl_reduced_hardware) + return 0; + + acpi_write_bit_register(ACPI_BITREG_SCI_ENABLE, 1); + + status = acpi_evaluate_integer(NULL, "\\SADR", NULL, &suspend_addr); + if (ACPI_FAILURE(status) || !suspend_addr) { + pr_err("ACPI S3 is not support!\n"); + return -1; + } + loongson_sysconf.suspend_addr = (u64)phys_to_virt(TO_PHYS(suspend_addr)); +#endif + return 0; +} + +device_initcall(loongson3_acpi_suspend_init); diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index eca98e9cca25..887ae4a92438 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -198,6 +198,21 @@ static int __init early_parse_mem(char *p) return 0; } early_param("mem", early_parse_mem); +static void __init set_pcie_wakeup(void) +{ + acpi_status status; + u32 value; + + if (loongson_sysconf.is_soc_cpu || acpi_gbl_reduced_hardware) + return; + + status = acpi_read_bit_register(ACPI_BITREG_PCIEXP_WAKE_DISABLE, &value); + if (ACPI_FAILURE(status)) { + return; + } + loongson_sysconf.pcie_wake_enabled = !value; +} + void __init platform_init(void) { @@ -210,6 +225,7 @@ void __init platform_init(void) acpi_boot_table_init(); acpi_boot_init(); #endif + set_pcie_wakeup(); #ifdef CONFIG_NUMA init_numa_memory(); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 883d8d540f67..7e6dde83e6ac 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index 786735dcc8d6..8d331a5fae5a 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -115,7 +115,12 @@ static unsigned long __init get_loops_per_jiffy(void) return lpj; } -static long init_timeval; +static long init_timeval __nosavedata; + +void save_counter(void) +{ + init_timeval = drdtime(); +} void sync_counter(void) { diff --git a/arch/loongarch/power/Makefile b/arch/loongarch/power/Makefile new file mode 100644 index 000000000000..a77c31a96178 --- /dev/null +++ b/arch/loongarch/power/Makefile @@ -0,0 +1,4 @@ +OBJECT_FILES_NON_STANDARD_suspend_asm.o := y + +obj-$(CONFIG_SUSPEND) += suspend.o suspend_asm.o +obj-$(CONFIG_HIBERNATION) += cpu.o hibernate.o hibernate_asm.o diff --git a/arch/loongarch/power/cpu.c b/arch/loongarch/power/cpu.c new file mode 100644 index 000000000000..e3d8fc1099e2 --- /dev/null +++ b/arch/loongarch/power/cpu.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Suspend support specific for loongarch. + * + * Licensed under the GPLv2 + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include + +static u64 saved_crmd; +static u64 saved_prmd; +static u64 saved_euen; +static u64 saved_ecfg; +struct pt_regs saved_regs; + +void save_processor_state(void) +{ + saved_crmd = csr_read32(LOONGARCH_CSR_CRMD); + saved_prmd = csr_read32(LOONGARCH_CSR_PRMD); + saved_euen = csr_read32(LOONGARCH_CSR_EUEN); + saved_ecfg = csr_read32(LOONGARCH_CSR_ECFG); + + if (is_fpu_owner()) + save_fp(current); +} + +void restore_processor_state(void) +{ + csr_write32(saved_crmd, LOONGARCH_CSR_CRMD); + csr_write32(saved_prmd, LOONGARCH_CSR_PRMD); + csr_write32(saved_euen, LOONGARCH_CSR_EUEN); + csr_write32(saved_ecfg, LOONGARCH_CSR_ECFG); + + if (is_fpu_owner()) + restore_fp(current); +} + +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin)); + unsigned long nosave_end_pfn = PFN_UP(__pa(&__nosave_end)); + + return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); +} diff --git a/arch/loongarch/power/hibernate.c b/arch/loongarch/power/hibernate.c new file mode 100644 index 000000000000..9050225d0d34 --- /dev/null +++ b/arch/loongarch/power/hibernate.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +extern int restore_image(void); +extern void enable_pcie_wakeup(void); +extern void swsusp_arch_save(void); + +int swsusp_arch_suspend(void) +{ + enable_pcie_wakeup(); + swsusp_arch_save(); + + return 0; +} + +int swsusp_arch_resume(void) +{ + /* Avoid TLB mismatch during and after kernel resume */ + local_flush_tlb_all(); + return restore_image(); +} diff --git a/arch/loongarch/power/hibernate_asm.S b/arch/loongarch/power/hibernate_asm.S new file mode 100644 index 000000000000..1874e473b293 --- /dev/null +++ b/arch/loongarch/power/hibernate_asm.S @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Hibernation support specific for loongarch - temporary page tables + * + * Licensed under the GPLv2 + * + * Copyright (C) 2009 Lemote Inc. + * Author: Hu Hongbing + * Wu Zhangjin + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include +#include + +.text +SYM_FUNC_START(swsusp_arch_save) + la.abs t0, saved_regs + PTR_S ra, t0, PT_R1 + PTR_S sp, t0, PT_R3 + PTR_S fp, t0, PT_R22 + PTR_S tp, t0, PT_R2 + PTR_S s0, t0, PT_R23 + PTR_S s1, t0, PT_R24 + PTR_S s2, t0, PT_R25 + PTR_S s3, t0, PT_R26 + PTR_S s4, t0, PT_R27 + PTR_S s5, t0, PT_R28 + PTR_S s6, t0, PT_R29 + PTR_S s7, t0, PT_R30 + PTR_S s8, t0, PT_R31 + b swsusp_save +SYM_FUNC_END(swsusp_arch_save) + +SYM_FUNC_START(restore_image) + la.pcrel t0, restore_pblist + PTR_L t0, t0, 0 +0: + PTR_L t1, t0, PBE_ADDRESS /* source */ + PTR_L t2, t0, PBE_ORIG_ADDRESS /* destination */ + PTR_LI t3, _PAGE_SIZE + PTR_ADD t3, t3, t1 +1: + REG_L t8, t1, 0 + REG_S t8, t2, 0 + PTR_ADDI t1, t1, SZREG + PTR_ADDI t2, t2, SZREG + bne t1, t3, 1b + PTR_L t0, t0, PBE_NEXT + bnez t0, 0b + la.pcrel t0, saved_regs + PTR_L ra, t0, PT_R1 + PTR_L sp, t0, PT_R3 + PTR_L fp, t0, PT_R22 + PTR_L tp, t0, PT_R2 + PTR_L s0, t0, PT_R23 + PTR_L s1, t0, PT_R24 + PTR_L s2, t0, PT_R25 + PTR_L s3, t0, PT_R26 + PTR_L s4, t0, PT_R27 + PTR_L s5, t0, PT_R28 + PTR_L s6, t0, PT_R29 + PTR_L s7, t0, PT_R30 + PTR_L s8, t0, PT_R31 + PTR_LI a0, 0x0 + jirl zero, ra, 0 +SYM_FUNC_END(restore_image) diff --git a/arch/loongarch/power/suspend.c b/arch/loongarch/power/suspend.c new file mode 100644 index 000000000000..3d0b0291e8f0 --- /dev/null +++ b/arch/loongarch/power/suspend.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * loongson-specific suspend support + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * Author: Huacai Chen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +u64 loongarch_suspend_addr; + +extern unsigned long eentry; +extern unsigned long tlbrentry; +struct saved_registers { + u32 ecfg; + u64 pgd; + u64 kpgd; + u32 pwctl0; + u32 pwctl1; + u32 euen; +}; +static struct saved_registers saved_regs; + +void arch_common_suspend(void) +{ + save_counter(); + saved_regs.pgd = csr_read64(LOONGARCH_CSR_PGDL); + saved_regs.kpgd = csr_read64(LOONGARCH_CSR_PGDH); + saved_regs.pwctl0 = csr_read32(LOONGARCH_CSR_PWCTL0); + saved_regs.pwctl1 = csr_read32(LOONGARCH_CSR_PWCTL1); + saved_regs.ecfg = csr_read32(LOONGARCH_CSR_ECFG); + saved_regs.euen = csr_read32(LOONGARCH_CSR_EUEN); + + loongarch_suspend_addr = loongson_sysconf.suspend_addr; +} + +void arch_common_resume(void) +{ + sync_counter(); + local_flush_tlb_all(); + csr_write64(per_cpu_offset(0), PERCPU_BASE_KS); + + csr_write64(saved_regs.pgd, LOONGARCH_CSR_PGDL); + csr_write64(saved_regs.kpgd, LOONGARCH_CSR_PGDH); + csr_write32(saved_regs.pwctl0, LOONGARCH_CSR_PWCTL0); + csr_write32(saved_regs.pwctl1, LOONGARCH_CSR_PWCTL1); + csr_write32(saved_regs.ecfg, LOONGARCH_CSR_ECFG); + csr_write32(saved_regs.euen, LOONGARCH_CSR_EUEN); + csr_write64(eentry, LOONGARCH_CSR_EENTRY); + csr_write64(tlbrentry, LOONGARCH_CSR_TLBRENTRY); + csr_write64(eentry, LOONGARCH_CSR_MERRENTRY); +} + +static void enable_gpe_wakeup(void) +{ + struct list_head *node, *next; + u32 data = 0; + + data = readl((volatile void *)loongson_sysconf.gpe0_ena_reg); + + list_for_each_safe(node, next, &acpi_wakeup_device_list) { + struct acpi_device *dev = + container_of(node, struct acpi_device, wakeup_list); + + if (!dev->wakeup.flags.valid + || ACPI_STATE_S3 > (u32) dev->wakeup.sleep_state + || !(device_may_wakeup(&dev->dev) + || dev->wakeup.prepare_count)) + continue; + + data |= (1 << dev->wakeup.gpe_number); + } + writel(data, (volatile void *)loongson_sysconf.gpe0_ena_reg); +} + +void enable_pcie_wakeup(void) +{ + u16 value; + + if (loongson_sysconf.is_soc_cpu || acpi_gbl_reduced_hardware) + return; + + acpi_write_bit_register(ACPI_BITREG_PCIEXP_WAKE_STATUS, 1); + + if (loongson_sysconf.pcie_wake_enabled) { + acpi_write_bit_register(ACPI_BITREG_PCIEXP_WAKE_DISABLE, 0); + } +} +EXPORT_SYMBOL_GPL(enable_pcie_wakeup); + +int loongarch_acpi_suspend(void) +{ + arch_common_suspend(); + enable_gpe_wakeup(); + enable_pcie_wakeup(); + /* processor specific suspend */ + loongarch_suspend_enter(); + arch_common_resume(); + + return 0; +} + +static int plat_pm_callback(struct notifier_block *nb, unsigned long action, void *ptr) +{ + int ret = 0; + + switch (action) { + case PM_POST_SUSPEND: + enable_gpe_wakeup(); + break; + default: + break; + } + + return notifier_from_errno(ret); +} + +static int __init plat_pm_post_init(void) +{ + if (loongson_sysconf.is_soc_cpu || acpi_gbl_reduced_hardware) + return 0; + + enable_gpe_wakeup(); + pm_notifier(plat_pm_callback, -INT_MAX); + return 0; +} + +late_initcall_sync(plat_pm_post_init); diff --git a/arch/loongarch/power/suspend_asm.S b/arch/loongarch/power/suspend_asm.S new file mode 100644 index 000000000000..781e38cd35e7 --- /dev/null +++ b/arch/loongarch/power/suspend_asm.S @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Sleep helper for Loongson-3 sleep mode. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * Author: Huacai Chen + */ + +#include +#include +#include +#include +#include + + .extern loongarch_nr_nodes + .extern loongarch_suspend_addr + .extern loongarch_pcache_ways + .extern loongarch_pcache_sets + .extern loongarch_pcache_linesz + .extern loongarch_scache_ways + .extern loongarch_scache_sets + .extern loongarch_scache_linesz + + .text + .align 5 + +/* preparatory stuff */ +.macro SETUP_SLEEP + addi.d sp, sp, -PT_SIZE + st.d $r1, sp, PT_R1 + st.d $r2, sp, PT_R2 + st.d $r3, sp, PT_R3 + st.d $r4, sp, PT_R4 + st.d $r5, sp, PT_R5 + st.d $r6, sp, PT_R6 + st.d $r7, sp, PT_R7 + st.d $r8, sp, PT_R8 + st.d $r9, sp, PT_R9 + st.d $r10, sp, PT_R10 + st.d $r11, sp, PT_R11 + st.d $r20, sp, PT_R20 + st.d $r21, sp, PT_R21 + st.d $r22, sp, PT_R22 + st.d $r23, sp, PT_R23 + st.d $r24, sp, PT_R24 + st.d $r25, sp, PT_R25 + st.d $r26, sp, PT_R26 + st.d $r27, sp, PT_R27 + st.d $r28, sp, PT_R28 + st.d $r29, sp, PT_R29 + st.d $r30, sp, PT_R30 + st.d $r31, sp, PT_R31 + +#ifdef CONFIG_ACPI + la.pcrel t0, acpi_saved_sp + st.d sp, t0, 0 +#endif +.endm +/* Sleep code for Loongson-3 */ +SYM_CODE_START(loongarch_suspend_enter) + SETUP_SLEEP + bl cpu_flush_caches + + /* Pass RA and SP to BIOS, for machines without CMOS RAM */ + addi.d a1, sp, 0 + la.pcrel a0, loongarch_wakeup_start + + la.pcrel t0, loongarch_suspend_addr + ld.d t0, t0, 0 /* Call BIOS's STR sleep routine */ + jr t0 + nop +SYM_CODE_END(loongarch_suspend_enter) + +.macro SETUP_WAKEUP + + nop + ld.d $r1, sp, PT_R1 + ld.d $r2, sp, PT_R2 + ld.d $r3, sp, PT_R3 + ld.d $r4, sp, PT_R4 + ld.d $r5, sp, PT_R5 + ld.d $r6, sp, PT_R6 + ld.d $r7, sp, PT_R7 + ld.d $r8, sp, PT_R8 + ld.d $r9, sp, PT_R9 + ld.d $r10, sp, PT_R10 + ld.d $r11, sp, PT_R11 + ld.d $r20, sp, PT_R20 + ld.d $r21, sp, PT_R21 + ld.d $r22, sp, PT_R22 + ld.d $r23, sp, PT_R23 + ld.d $r24, sp, PT_R24 + ld.d $r25, sp, PT_R25 + ld.d $r26, sp, PT_R26 + ld.d $r27, sp, PT_R27 + ld.d $r28, sp, PT_R28 + ld.d $r29, sp, PT_R29 + ld.d $r30, sp, PT_R30 + ld.d $r31, sp, PT_R31 +.endm + + /* This is where we return upon wakeup. + * Reload all of the registers and return. + */ +SYM_CODE_START(loongarch_wakeup_start) + li.d t0, CSR_DMW0_INIT # UC, PLV0 + csrwr t0, LOONGARCH_CSR_DMWIN0 + li.d t0, CSR_DMW1_INIT # CA, PLV0 + csrwr t0, LOONGARCH_CSR_DMWIN1 + + la.pcrel t0, acpi_saved_sp + ld.d sp, t0, 0 + SETUP_WAKEUP + addi.d sp, sp, PT_SIZE + jr ra +SYM_CODE_END(loongarch_wakeup_start) -- Gitee From 12f8311e0dd29e08cc5bae6ca09de56b37829543 Mon Sep 17 00:00:00 2001 From: Jun Yi Date: Fri, 11 Nov 2022 17:41:49 +0800 Subject: [PATCH 16/36] LoongArch: Remove redudant csr save/restore LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- Signed-off-by: Jun Yi Change-Id: Ib034b577cbebbbe56e936bf541348db271c2a9db --- arch/loongarch/include/asm/stackframe.h | 9 --------- arch/loongarch/kernel/entry.S | 8 -------- arch/loongarch/kernel/switch.S | 6 ------ 3 files changed, 23 deletions(-) diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h index 4ca953062b5b..733dc9e96241 100644 --- a/arch/loongarch/include/asm/stackframe.h +++ b/arch/loongarch/include/asm/stackframe.h @@ -114,14 +114,6 @@ LONG_S zero, sp, PT_R0 csrrd t0, LOONGARCH_CSR_PRMD LONG_S t0, sp, PT_PRMD - csrrd t0, LOONGARCH_CSR_CRMD - LONG_S t0, sp, PT_CRMD - csrrd t0, LOONGARCH_CSR_EUEN - LONG_S t0, sp, PT_EUEN - csrrd t0, LOONGARCH_CSR_ECFG - LONG_S t0, sp, PT_ECFG - csrrd t0, LOONGARCH_CSR_ESTAT - PTR_S t0, sp, PT_ESTAT cfi_st ra, PT_R1, \docfi cfi_st a0, PT_R4, \docfi cfi_st a1, PT_R5, \docfi @@ -140,7 +132,6 @@ cfi_st fp, PT_R22, \docfi /* Set thread_info if we're coming from user mode */ - csrrd t0, LOONGARCH_CSR_PRMD andi t0, t0, 0x3 /* extract pplv bit */ beqz t0, 9f diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S index d53b631c9022..893e632e76da 100644 --- a/arch/loongarch/kernel/entry.S +++ b/arch/loongarch/kernel/entry.S @@ -31,14 +31,6 @@ SYM_FUNC_START(handle_syscall) st.d zero, sp, PT_R0 csrrd t2, LOONGARCH_CSR_PRMD st.d t2, sp, PT_PRMD - csrrd t2, LOONGARCH_CSR_CRMD - st.d t2, sp, PT_CRMD - csrrd t2, LOONGARCH_CSR_EUEN - st.d t2, sp, PT_EUEN - csrrd t2, LOONGARCH_CSR_ECFG - st.d t2, sp, PT_ECFG - csrrd t2, LOONGARCH_CSR_ESTAT - st.d t2, sp, PT_ESTAT cfi_st ra, PT_R1 cfi_st a0, PT_R4 cfi_st a1, PT_R5 diff --git a/arch/loongarch/kernel/switch.S b/arch/loongarch/kernel/switch.S index 43ebbc3990f7..d0363437d25f 100644 --- a/arch/loongarch/kernel/switch.S +++ b/arch/loongarch/kernel/switch.S @@ -16,9 +16,6 @@ */ .align 5 SYM_FUNC_START(__switch_to) - csrrd t1, LOONGARCH_CSR_PRMD - stptr.d t1, a0, THREAD_CSRPRMD - cpu_save_nonscratch a0 stptr.d ra, a0, THREAD_REG01 stptr.d a3, a0, THREAD_SCHED_RA @@ -30,8 +27,5 @@ SYM_FUNC_START(__switch_to) PTR_ADD t0, t0, tp set_saved_sp t0, t1, t2 - ldptr.d t1, a1, THREAD_CSRPRMD - csrwr t1, LOONGARCH_CSR_PRMD - jr ra SYM_FUNC_END(__switch_to) -- Gitee From 4e992fb1cac710cf845c1945d5618ccf58954a3c Mon Sep 17 00:00:00 2001 From: Jianmin Lv Date: Thu, 8 Sep 2022 16:58:31 +0800 Subject: [PATCH 17/36] LoongArch: Enhance booting and resume compatibility LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- - For bootstrap processor booting, firmare legacy mmu environment is used on calling kernel_entry(), if the first instruction and next instructions of it are not in one page built by firmware, the instructions outside the page containing first entry instruction will trigger tlb refill exception, which will cause bug because the tlb refill ebase has not been configured. To avoid the disaster, we should ensure the kernel_entry to be page aligned with 4k page size, so that for pages of size >= 4k built by firmware, the instrutions of kernel_entry() will be in one firmware's page. - For S3 resume from firmware, firmare legacy mmu environment is also used on calling wakeup entry, so the wakeup entry is required to be 4k page aligned too. And for compatibility with calling wakeup entry by using physical address, a jumping to itself in the wakeup entry is required. REBASE: 1 rebase changes at head.s to commit a8fc1e90de478fb711ccf7e1a7115370388ab7fc LoongArch: Add boot and setup routines 2 rebase changes at suspend_asm.S to b4206b674b85d365097a40485c87187dfbea9a39 LoongArch:Support Power Manager Change-Id: I5304f271c9440a8526466cfe6be44da43466cd90 Signed-off-by: Jianmin Lv --- arch/loongarch/kernel/head.S | 1 + arch/loongarch/power/suspend_asm.S | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index e2074cd4fff4..58254fd1999d 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -34,6 +34,7 @@ SYM_DATA(kernel_offset, .long kernel_offset - _text); __REF +.align 12 SYM_CODE_START(kernel_entry) # kernel entry point /* Config direct window and set PG */ diff --git a/arch/loongarch/power/suspend_asm.S b/arch/loongarch/power/suspend_asm.S index 781e38cd35e7..ceac577c3794 100644 --- a/arch/loongarch/power/suspend_asm.S +++ b/arch/loongarch/power/suspend_asm.S @@ -106,12 +106,16 @@ SYM_CODE_END(loongarch_suspend_enter) /* This is where we return upon wakeup. * Reload all of the registers and return. */ + .align 12 SYM_CODE_START(loongarch_wakeup_start) li.d t0, CSR_DMW0_INIT # UC, PLV0 csrwr t0, LOONGARCH_CSR_DMWIN0 li.d t0, CSR_DMW1_INIT # CA, PLV0 csrwr t0, LOONGARCH_CSR_DMWIN1 + la.abs t0, 0f + jirl zero, t0, 0 +0: la.pcrel t0, acpi_saved_sp ld.d sp, t0, 0 SETUP_WAKEUP -- Gitee From a51ad547c331cd683dec0657f480c265a1b9e8cd Mon Sep 17 00:00:00 2001 From: yangqiming Date: Mon, 12 Dec 2022 14:37:46 +0800 Subject: [PATCH 18/36] LoongArch: Support loader and kernel interface V40 LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- 1. LoongArch uses the new interface parameter form: a0: efi flag, a1: command line, a2: system table 2. Boot memmap and initrd are passed to kernel via GUID. Signed-off-by: Qiming Yang Signed-off-by: Jianmin Lv Change-Id: I193db4238e28962bdc4ae82a232d0c343f4f32f3 --- arch/loongarch/include/asm/efi.h | 17 +++++++++ arch/loongarch/kernel/efi.c | 55 ++++++++++++++++++++++++++++- arch/loongarch/kernel/env.c | 17 ++++++++- arch/loongarch/kernel/legacy_boot.c | 2 +- arch/loongarch/kernel/setup.c | 2 +- 5 files changed, 89 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/include/asm/efi.h b/arch/loongarch/include/asm/efi.h index 85176fc629b0..9499abaa6baf 100644 --- a/arch/loongarch/include/asm/efi.h +++ b/arch/loongarch/include/asm/efi.h @@ -18,6 +18,23 @@ void __init efi_runtime_init(void); #define EFI_ALLOC_ALIGN SZ_64K #define EFI_RT_VIRTUAL_OFFSET CSR_DMW0_BASE +#define LINUX_EFI_INITRD_MEDIA_GUID EFI_GUID(0x5568e427, 0x68fc, 0x4f3d, 0xac, 0x74, 0xca, 0x55, 0x52, 0x31, 0xcc, 0x68) +#define LINUX_EFI_NEW_MEMMAP_GUID EFI_GUID(0x800f683f, 0xd08b, 0x423a, 0xa2, 0x93, 0x96, 0x5c, 0x3c, 0x6f, 0xe2, 0xb4) + +struct linux_efi_initrd { + unsigned long base; + unsigned long size; +}; + +struct efi_new_memmap { + unsigned long map_size; + unsigned long desc_size; + u32 desc_ver; + unsigned long map_key; + unsigned long buff_size; + efi_memory_desc_t map[]; +}; + static inline struct screen_info *alloc_screen_info(void) { return &screen_info; diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c index f48ef0d8d518..7cf4a11732d3 100644 --- a/arch/loongarch/kernel/efi.c +++ b/arch/loongarch/kernel/efi.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -27,11 +28,18 @@ #include #include "legacy_boot.h" +static __initdata unsigned long new_memmap = EFI_INVALID_TABLE_ADDR; +static __initdata unsigned long initrd = EFI_INVALID_TABLE_ADDR; + static unsigned long efi_nr_tables; static unsigned long efi_config_table; static efi_system_table_t *efi_systab; -static efi_config_table_type_t arch_tables[] __initdata = {{},}; +static efi_config_table_type_t arch_tables[] __initdata = { + {LINUX_EFI_NEW_MEMMAP_GUID, &new_memmap, "NEWMEM"}, + {LINUX_EFI_INITRD_MEDIA_GUID, &initrd, "INITRD"}, + {}, +}; static __initdata pgd_t *pgd_efi; static int __init efimap_populate_hugepages( @@ -184,6 +192,9 @@ static int __init set_virtual_map(void) (efi_memory_desc_t *)TO_PHYS((unsigned long)runtime_map)); efi_unmap_pgt(); + if (status != EFI_SUCCESS) + return -1; + return 0; } @@ -213,6 +224,44 @@ void __init efi_runtime_init(void) set_bit(EFI_RUNTIME_SERVICES, &efi.flags); } +static void __init get_initrd(void) +{ + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && + initrd != EFI_INVALID_TABLE_ADDR && phys_initrd_size == 0) { + struct linux_efi_initrd *tbl; + + tbl = early_memremap(initrd, sizeof(*tbl)); + if (tbl) { + phys_initrd_start = tbl->base; + phys_initrd_size = tbl->size; + early_memunmap(tbl, sizeof(*tbl)); + } + } +} + +static void __init init_new_memmap(void) +{ + struct efi_new_memmap *tbl; + + if (new_memmap == EFI_INVALID_TABLE_ADDR) + return; + + tbl = early_memremap_ro(new_memmap, sizeof(*tbl)); + if (tbl) { + struct efi_memory_map_data data; + + data.phys_map = new_memmap + sizeof(*tbl); + data.size = tbl->map_size; + data.desc_size = tbl->desc_size; + data.desc_version = tbl->desc_ver; + + if (efi_memmap_init_early(&data) < 0) + panic("Unable to map EFI memory map.\n"); + + early_memunmap(tbl, sizeof(*tbl)); + } +} + void __init loongson_efi_init(void) { int size; @@ -237,6 +286,10 @@ void __init loongson_efi_init(void) efi_config_parse_tables(config_tables, efi_systab->nr_tables, arch_tables); early_memunmap(config_tables, efi_nr_tables * size); + get_initrd(); + + init_new_memmap(); + if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI) memblock_reserve(screen_info.lfb_base, screen_info.lfb_size); } diff --git a/arch/loongarch/kernel/env.c b/arch/loongarch/kernel/env.c index 596e6635368e..64490a2764d2 100644 --- a/arch/loongarch/kernel/env.c +++ b/arch/loongarch/kernel/env.c @@ -22,7 +22,8 @@ void __init init_environ(void) { int efi_boot = fw_arg0; struct efi_memory_map_data data; - void *fdt_ptr = early_memremap_ro(fw_arg1, SZ_64K); + char *cmdline; + void *fdt_ptr; if (efi_bp) return; @@ -32,6 +33,20 @@ void __init init_environ(void) else clear_bit(EFI_BOOT, &efi.flags); + if (fw_arg2 == 0) + goto parse_fdt; + + cmdline = early_memremap_ro(fw_arg1, COMMAND_LINE_SIZE); + strscpy(boot_command_line, cmdline, COMMAND_LINE_SIZE); + early_memunmap(cmdline, COMMAND_LINE_SIZE); + + efi_system_table = fw_arg2; + + return; + +parse_fdt: + fdt_ptr = early_memremap_ro(fw_arg1, SZ_64K); + early_init_dt_scan(fdt_ptr); early_init_fdt_reserve_self(); efi_system_table = efi_get_fdt_params(&data); diff --git a/arch/loongarch/kernel/legacy_boot.c b/arch/loongarch/kernel/legacy_boot.c index d8f749653939..99b69173460d 100644 --- a/arch/loongarch/kernel/legacy_boot.c +++ b/arch/loongarch/kernel/legacy_boot.c @@ -525,7 +525,7 @@ unsigned long legacy_boot_init(unsigned long argc, unsigned long cmdptr, unsigne { int ret; - if (!bpi) + if (!bpi || (argc < 2)) return -1; efi_bp = (struct boot_params *)bpi; bpi_version = get_bpi_version(&efi_bp->signature); diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 887ae4a92438..35d99525485b 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -216,7 +216,6 @@ static void __init set_pcie_wakeup(void) void __init platform_init(void) { - loongson_efi_init(); #ifdef CONFIG_ACPI_TABLE_UPGRADE acpi_table_upgrade(); #endif @@ -378,6 +377,7 @@ void __init setup_arch(char **cmdline_p) legacy_boot_init(fw_arg0, fw_arg1, fw_arg2); init_environ(); + loongson_efi_init(); memblock_init(); pagetable_init(); parse_early_param(); -- Gitee From 0dde507c5edf256b557dbd8d1dccb50972474fc8 Mon Sep 17 00:00:00 2001 From: Hongchen Zhang Date: Wed, 30 Nov 2022 18:38:34 +0800 Subject: [PATCH 19/36] LoongArch: use 40 bits address space for user LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- default to use 40 bits address space for user. Signed-off-by: Hongchen Zhang Change-Id: I23088f43e700ed9b4eb15df2cc9c58299a0582ff --- arch/loongarch/Kconfig | 7 +++++++ arch/loongarch/include/asm/pgtable.h | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index c8619a3c16f8..7ff49eba631b 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -426,6 +426,13 @@ config NODES_SHIFT default "6" depends on NUMA +config VA_BITS_40 + bool "40-bits" + default y + depends on 64BIT + help + Support a maximum at least 40 bits of application virtual memory. + config FORCE_MAX_ZONEORDER int "Maximum zone order" range 14 64 if PAGE_SIZE_64KB diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index e5ed54fce402..27aa3d03162d 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -42,7 +42,11 @@ #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) +#ifdef CONFIG_VA_BITS_40 +#define VA_BITS 40 +#else #define VA_BITS (PGDIR_SHIFT + (PAGE_SHIFT - 3)) +#endif #define PTRS_PER_PGD (PAGE_SIZE >> 3) #if CONFIG_PGTABLE_LEVELS > 3 -- Gitee From d8911a28f79ac88b385c6b99aace7278848e139c Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Fri, 9 Dec 2022 18:05:54 +0800 Subject: [PATCH 20/36] LoongArch: refresh usage of sync LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- sync is used for ll/sc pair for read barrier, instead only ll is required for read barrier, sc does not need. This patch fixes the issue. Change-Id: Ib8a42f50d494e3e80cda0843d069668604b486e6 Signed-off-by: Bibo Mao --- arch/loongarch/include/asm/atomic.h | 8 ++++++++ arch/loongarch/include/asm/cmpxchg.h | 2 ++ arch/loongarch/include/asm/futex.h | 2 ++ arch/loongarch/include/asm/pgtable.h | 3 ++- 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h index 98a24f221746..aad2df1af323 100644 --- a/arch/loongarch/include/asm/atomic.h +++ b/arch/loongarch/include/asm/atomic.h @@ -162,8 +162,10 @@ static inline int arch_atomic_sub_if_positive(int i, atomic_t *v) " bltz %0, 2f \n" " sc.w %1, %2 \n" " beqz %1, 1b \n" + " b 3f \n" "2: \n" __WEAK_LLSC_MB + "3: \n" : "=&r" (result), "=&r" (temp), "+ZC" (v->counter) : "I" (-i)); } else { @@ -174,8 +176,10 @@ static inline int arch_atomic_sub_if_positive(int i, atomic_t *v) " bltz %0, 2f \n" " sc.w %1, %2 \n" " beqz %1, 1b \n" + " b 3f \n" "2: \n" __WEAK_LLSC_MB + "3: \n" : "=&r" (result), "=&r" (temp), "+ZC" (v->counter) : "r" (i)); } @@ -323,8 +327,10 @@ static inline long arch_atomic64_sub_if_positive(long i, atomic64_t *v) " bltz %0, 2f \n" " sc.d %1, %2 \n" " beqz %1, 1b \n" + " b 3f \n" "2: \n" __WEAK_LLSC_MB + "3: \n" : "=&r" (result), "=&r" (temp), "+ZC" (v->counter) : "I" (-i)); } else { @@ -335,8 +341,10 @@ static inline long arch_atomic64_sub_if_positive(long i, atomic64_t *v) " bltz %0, 2f \n" " sc.d %1, %2 \n" " beqz %1, 1b \n" + " b 3f \n" "2: \n" __WEAK_LLSC_MB + "3: \n" : "=&r" (result), "=&r" (temp), "+ZC" (v->counter) : "r" (i)); } diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h index ae19e33c7754..1a47d5ee1196 100644 --- a/arch/loongarch/include/asm/cmpxchg.h +++ b/arch/loongarch/include/asm/cmpxchg.h @@ -102,8 +102,10 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x, " move $t0, %z4 \n" \ " " st " $t0, %1 \n" \ " beqz $t0, 1b \n" \ + " b 3f \n" \ "2: \n" \ __WEAK_LLSC_MB \ + "3: \n" \ : "=&r" (__ret), "=ZB"(*m) \ : "ZB"(*m), "Jr" (old), "Jr" (new) \ : "t0", "memory"); \ diff --git a/arch/loongarch/include/asm/futex.h b/arch/loongarch/include/asm/futex.h index feb6658c84ff..f0208f4aec3e 100644 --- a/arch/loongarch/include/asm/futex.h +++ b/arch/loongarch/include/asm/futex.h @@ -84,8 +84,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newv " move $t0, %z5 \n" "2: sc.w $t0, %2 \n" " beqz $t0, 1b \n" + " b 5f \n" "3: \n" __WEAK_LLSC_MB + "5: \n" " .section .fixup,\"ax\" \n" "4: li.d %0, %6 \n" " b 3b \n" diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 27aa3d03162d..eb3318927644 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -298,9 +298,10 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) " or %[tmp], %[tmp], %[global] \n" __SC "%[tmp], %[buddy] \n" " beqz %[tmp], 1b \n" - " nop \n" + " b 3f \n" "2: \n" __WEAK_LLSC_MB + "3: \n" : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global)); #else /* !CONFIG_SMP */ -- Gitee From eba5f68c3ed74e413cddd83f1a9506928d0aca86 Mon Sep 17 00:00:00 2001 From: Hongchen Zhang Date: Wed, 14 Dec 2022 18:15:42 +0800 Subject: [PATCH 21/36] LoongArch: optimize for syscall return LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- If the syscall is not rt_sigreturn,there is no need to do RESTORE_STATIC and RESTORE_TEMP. Signed-off-by: Hongchen Zhang Change-Id: I61804bb16ce678dd39e9f197bd88d91e13b972cb --- arch/loongarch/kernel/entry.S | 17 +++++++++++++---- arch/loongarch/kernel/syscall.c | 10 +++++----- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S index 893e632e76da..8670e9d128ab 100644 --- a/arch/loongarch/kernel/entry.S +++ b/arch/loongarch/kernel/entry.S @@ -14,13 +14,14 @@ #include #include #include +#include .text .cfi_sections .debug_frame .align 5 SYM_FUNC_START(handle_syscall) csrrd t0, PERCPU_BASE_KS - la.abs t1, kernelsp + la.pcrel t1, kernelsp add.d t1, t1, t0 move t2, sp ld.d sp, t1, 0 @@ -28,11 +29,10 @@ SYM_FUNC_START(handle_syscall) addi.d sp, sp, -PT_SIZE cfi_st t2, PT_R3 cfi_rel_offset sp, PT_R3 - st.d zero, sp, PT_R0 csrrd t2, LOONGARCH_CSR_PRMD st.d t2, sp, PT_PRMD cfi_st ra, PT_R1 - cfi_st a0, PT_R4 + cfi_st a0, PT_ORIG_A0 cfi_st a1, PT_R5 cfi_st a2, PT_R6 cfi_st a3, PT_R7 @@ -41,6 +41,7 @@ SYM_FUNC_START(handle_syscall) cfi_st a6, PT_R10 cfi_st a7, PT_R11 csrrd ra, LOONGARCH_CSR_ERA + addi.d ra, ra, 4 st.d ra, sp, PT_ERA cfi_rel_offset ra, PT_ERA @@ -55,9 +56,17 @@ SYM_FUNC_START(handle_syscall) and tp, tp, sp move a0, sp + move a1, a7 bl do_syscall - RESTORE_ALL_AND_RET + addi.w t0, zero, __NR_rt_sigreturn + bne a0, t0, 1f + + RESTORE_STATIC + RESTORE_TEMP +1: + RESTORE_SOME + RESTORE_SP_AND_RET SYM_FUNC_END(handle_syscall) SYM_CODE_START(ret_from_fork) diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c index 3fc4211db989..796fcdcaa6a7 100644 --- a/arch/loongarch/kernel/syscall.c +++ b/arch/loongarch/kernel/syscall.c @@ -37,18 +37,16 @@ void *sys_call_table[__NR_syscalls] = { typedef long (*sys_call_fn)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); -void noinstr do_syscall(struct pt_regs *regs) +unsigned long noinstr do_syscall(struct pt_regs *regs, unsigned long nr) { - unsigned long nr; sys_call_fn syscall_fn; - nr = regs->regs[11]; /* Set for syscall restarting */ if (nr < NR_syscalls) regs->regs[0] = nr + 1; + else + regs->regs[0] = 0; - regs->csr_era += 4; - regs->orig_a0 = regs->regs[4]; regs->regs[4] = -ENOSYS; nr = syscall_enter_from_user_mode(regs, nr); @@ -60,4 +58,6 @@ void noinstr do_syscall(struct pt_regs *regs) } syscall_exit_to_user_mode(regs); + + return nr; } -- Gitee From f5069ef5125fda578a873da305001e940e7c2a59 Mon Sep 17 00:00:00 2001 From: Hongchen Zhang Date: Wed, 14 Dec 2022 18:26:13 +0800 Subject: [PATCH 22/36] LoongArch: save one instruction for arch_local_irq_{enable,disable} LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- Signed-off-by: Hongchen Zhang Change-Id: I607e48f5abea1184b22326e772d809f6994603cc --- arch/loongarch/include/asm/irqflags.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/include/asm/irqflags.h b/arch/loongarch/include/asm/irqflags.h index 319a8c616f1f..53eb33b2c258 100644 --- a/arch/loongarch/include/asm/irqflags.h +++ b/arch/loongarch/include/asm/irqflags.h @@ -17,16 +17,15 @@ static inline void arch_local_irq_enable(void) __asm__ __volatile__( "csrxchg %[val], %[mask], %[reg]\n\t" : [val] "+r" (flags) - : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD) + : [mask] "r" (flags), [reg] "i" (LOONGARCH_CSR_CRMD) : "memory"); } static inline void arch_local_irq_disable(void) { - u32 flags = 0; __asm__ __volatile__( - "csrxchg %[val], %[mask], %[reg]\n\t" - : [val] "+r" (flags) + "csrxchg $zero, %[mask], %[reg]\n\t" + : : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD) : "memory"); } -- Gitee From f3f1f3b964ea4d9daffb10564cd6b867239ebca1 Mon Sep 17 00:00:00 2001 From: Hongchen Zhang Date: Thu, 15 Dec 2022 18:09:15 +0800 Subject: [PATCH 23/36] LoongArch: enable hugetlbfs support LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- Signed-off-by: Hongchen Zhang Change-Id: I4631175847064093af8f143d0c46b267a82de069 --- arch/loongarch/Kconfig | 3 +++ arch/loongarch/configs/loongson3_defconfig | 1 + arch/loongarch/mm/hugetlbpage.c | 4 ++-- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 7ff49eba631b..0436e79e3928 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -497,6 +497,9 @@ config ARCH_SPARSEMEM_ENABLE or have huge holes in the physical address space for other reasons. See for more. +config SYS_SUPPORTS_HUGETLBFS + def_bool y + config ARCH_ENABLE_THP_MIGRATION def_bool y depends on TRANSPARENT_HUGEPAGE diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index def76987a4c5..8e15593b052a 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -733,6 +733,7 @@ CONFIG_FAT_DEFAULT_IOCHARSET="gb2312" CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=y CONFIG_HFS_FS=m CONFIG_HFSPLUS_FS=m diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c index ba138117b124..97ed6f1d1c9b 100644 --- a/arch/loongarch/mm/hugetlbpage.c +++ b/arch/loongarch/mm/hugetlbpage.c @@ -13,8 +13,8 @@ #include #include -pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, unsigned long sz) +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) { pgd_t *pgd; p4d_t *p4d; -- Gitee From e3d1d0a97dd88d37487c12d95cfd37542f95d4df Mon Sep 17 00:00:00 2001 From: Hongchen Zhang Date: Fri, 30 Dec 2022 03:06:32 +0800 Subject: [PATCH 24/36] LoongArch: fix vmlinux ld error LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- When CONFIG_DEBUG_INFO_BTF is enabled, ld load error. To fix this issue, we should use -mdirect-extern-access. Signed-off-by: Xing Li Signed-off-by: Hongchen Zhang Change-Id: I698169b2af48369d531ef3aa4f9c53b97096da83 --- arch/loongarch/Makefile | 1 + drivers/firmware/efi/libstub/Makefile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 964b779b130d..cb14e7f96401 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -54,6 +54,7 @@ LDFLAGS_vmlinux += -G0 -static -n -nostdlib # upgrade the compiler or downgrade the assembler. ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS cflags-y += -mexplicit-relocs +KBUILD_CFLAGS_KERNEL += -mdirect-extern-access else cflags-y += $(call cc-option,-mno-explicit-relocs) KBUILD_AFLAGS_KERNEL += -Wa,-mla-global-with-pcrel diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 770d5807c744..44c1bf5a1236 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -27,7 +27,7 @@ cflags-$(CONFIG_ARM) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ cflags-$(CONFIG_RISCV) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ -fpic cflags-$(CONFIG_LOONGARCH) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ - -fpic + -fpie cflags-$(CONFIG_EFI_GENERIC_STUB) += -I$(srctree)/scripts/dtc/libfdt -- Gitee From b8690293f724171cf3a1993d2aa6de24be0e04c0 Mon Sep 17 00:00:00 2001 From: Hongchen Zhang Date: Sat, 31 Dec 2022 00:58:44 +0800 Subject: [PATCH 25/36] LoongArch: fix SECCOMP test error LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- When we test the ltp testcase prctl04,we get failed.The reason is _TIF_SECOMP not defined for LoongArch.So define the _TIF_SECCOMP for LoongArch. Signed-off-by: Hongchen Zhang Change-Id: I6e890cf347839f301b45036693a0cce3736d3ce3 --- arch/loongarch/include/asm/thread_info.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h index c9030464cbb5..43e74b37df60 100644 --- a/arch/loongarch/include/asm/thread_info.h +++ b/arch/loongarch/include/asm/thread_info.h @@ -112,6 +112,7 @@ static inline unsigned long current_stack_pointer(void) #define _TIF_LASX_CTX_LIVE (1< Date: Sat, 10 Dec 2022 22:39:48 +0800 Subject: [PATCH 26/36] LoongArch: Consolidate __ex_table construction mainline inclusion from mainline-v6.2-rc1 commit 508f28c67171e276356650f407dd87d42b6913ef category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP CVE: NA -------------------------------- Consolidate all the __ex_table constuction code with a _ASM_EXTABLE or _asm_extable helper. There should be no functional change as a result of this patch. Signed-off-by: Youling Tang Signed-off-by: Huacai Chen Change-Id: I24db0d24f7ec183f687221f9d99c4eb8f17cebd2 --- arch/loongarch/include/asm/asm-extable.h | 35 ++++++++++++++++++++++++ arch/loongarch/include/asm/futex.h | 13 ++++----- arch/loongarch/include/asm/uaccess.h | 9 ++---- arch/loongarch/kernel/fpu.S | 13 +++------ arch/loongarch/lib/clear_user.S | 5 ++-- arch/loongarch/lib/copy_user.S | 5 ++-- arch/loongarch/lib/strncpy_user.S | 6 ++-- arch/loongarch/lib/strnlen_user.S | 5 ++-- 8 files changed, 55 insertions(+), 36 deletions(-) create mode 100644 arch/loongarch/include/asm/asm-extable.h diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h new file mode 100644 index 000000000000..4f615bf56727 --- /dev/null +++ b/arch/loongarch/include/asm/asm-extable.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_ASM_EXTABLE_H +#define __ASM_ASM_EXTABLE_H + +#ifdef __ASSEMBLY__ + +#define __ASM_EXTABLE_RAW(insn, fixup) \ + .pushsection __ex_table, "a"; \ + .balign 8; \ + .quad (insn); \ + .quad (fixup); \ + .popsection; + + .macro _asm_extable, insn, fixup + __ASM_EXTABLE_RAW(\insn, \fixup) + .endm + +#else /* __ASSEMBLY__ */ + +#include +#include + +#define __ASM_EXTABLE_RAW(insn, fixup) \ + ".pushsection __ex_table, \"a\"\n" \ + ".balign 8\n" \ + ".quad ((" insn "))\n" \ + ".quad ((" fixup "))\n" \ + ".popsection\n" + +#define _ASM_EXTABLE(insn, fixup) \ + __ASM_EXTABLE_RAW(#insn, #fixup) + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/loongarch/include/asm/futex.h b/arch/loongarch/include/asm/futex.h index f0208f4aec3e..056f89a87d1f 100644 --- a/arch/loongarch/include/asm/futex.h +++ b/arch/loongarch/include/asm/futex.h @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -22,10 +23,8 @@ "4: li.w %0, %6 \n" \ " b 3b \n" \ " .previous \n" \ - " .section __ex_table,\"a\" \n" \ - " "__UA_ADDR "\t1b, 4b \n" \ - " "__UA_ADDR "\t2b, 4b \n" \ - " .previous \n" \ + _ASM_EXTABLE(1b, 4b) \ + _ASM_EXTABLE(2b, 4b) \ : "=r" (ret), "=&r" (oldval), \ "=ZC" (*uaddr) \ : "0" (0), "ZC" (*uaddr), "Jr" (oparg), \ @@ -92,10 +91,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newv "4: li.d %0, %6 \n" " b 3b \n" " .previous \n" - " .section __ex_table,\"a\" \n" - " "__UA_ADDR "\t1b, 4b \n" - " "__UA_ADDR "\t2b, 4b \n" - " .previous \n" + _ASM_EXTABLE(1b, 4b) + _ASM_EXTABLE(2b, 4b) : "+r" (ret), "=&r" (val), "=ZC" (*uaddr) : "ZC" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h index f124b307122e..58545c8686e3 100644 --- a/arch/loongarch/include/asm/uaccess.h +++ b/arch/loongarch/include/asm/uaccess.h @@ -15,6 +15,7 @@ #include #include #include +#include #include extern u64 __ua_limit; @@ -207,9 +208,7 @@ do { \ " move %1, $zero \n" \ " b 2b \n" \ " .previous \n" \ - " .section __ex_table,\"a\" \n" \ - " "__UA_ADDR "\t1b, 3b \n" \ - " .previous \n" \ + _ASM_EXTABLE(1b, 3b) \ : "+r" (__gu_err), "=r" (__gu_tmp) \ : "m" (__m(ptr)), "i" (-EFAULT)); \ \ @@ -238,9 +237,7 @@ do { \ "3: li.w %0, %3 \n" \ " b 2b \n" \ " .previous \n" \ - " .section __ex_table,\"a\" \n" \ - " " __UA_ADDR " 1b, 3b \n" \ - " .previous \n" \ + _ASM_EXTABLE(1b, 3b) \ : "+r" (__pu_err), "=m" (__m(ptr)) \ : "Jr" (__pu_val), "i" (-EFAULT)); \ } diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 65d245a0f409..2e45dfb1b5e2 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -8,6 +8,7 @@ */ #include #include +#include #include #include #include @@ -21,9 +22,7 @@ .macro EX insn, reg, src, offs .ex\@: \insn \reg, \src, \offs - .section __ex_table,"a" - PTR .ex\@, fault - .previous + _asm_extable .ex\@, fault .endm .macro EX_V insn, reg, src, offs @@ -34,9 +33,7 @@ .ex\@: .word __insn << 22 | __offs << 10 | __src << 5 | __reg - .section __ex_table,"a" - PTR .ex\@, fault - .previous + _asm_extable .ex\@, fault .endm .macro EX_XV insn, reg, src, offs @@ -47,9 +44,7 @@ .ex\@: .word __insn << 22 | __offs << 10 | __src << 5 | __reg - .section __ex_table,"a" - PTR .ex\@, fault - .previous + _asm_extable .ex\@, fault .endm .macro sc_save_fp base diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index 16ba2b8dd68a..7a066d6a41b8 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -15,9 +16,7 @@ jr ra .previous .endif - .section __ex_table, "a" - PTR \from\()b, \to\()b - .previous + _asm_extable \from\()b, \to\()b .endm /* diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index 97d20327a69e..f8ace04586c2 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -15,9 +16,7 @@ jr ra .previous .endif - .section __ex_table, "a" - PTR \from\()b, \to\()b - .previous + _asm_extable \from\()b, \to\()b .endm /* diff --git a/arch/loongarch/lib/strncpy_user.S b/arch/loongarch/lib/strncpy_user.S index b1dcfc1e1fc0..90d8df2b6ecf 100644 --- a/arch/loongarch/lib/strncpy_user.S +++ b/arch/loongarch/lib/strncpy_user.S @@ -7,6 +7,7 @@ #include #include #include +#include /* * long __strncpy_from_user(char *to, const char *from, long len) @@ -39,10 +40,7 @@ SYM_FUNC_START(__strncpy_from_user) 3: li.w a0, -EFAULT jr ra .previous - - .section __ex_table, "a" - PTR 1b, 3b - .previous + _asm_extable 1b, 3b SYM_FUNC_END(__strncpy_from_user) EXPORT_SYMBOL(__strncpy_from_user) diff --git a/arch/loongarch/lib/strnlen_user.S b/arch/loongarch/lib/strnlen_user.S index bf9d5ad6d047..795aef7441ea 100644 --- a/arch/loongarch/lib/strnlen_user.S +++ b/arch/loongarch/lib/strnlen_user.S @@ -6,6 +6,7 @@ #include #include #include +#include /* * long __strnlen_user(const char *s, long n) @@ -36,9 +37,7 @@ SYM_FUNC_START(__strnlen_user) jr ra .previous - .section __ex_table, "a" - PTR 1b, 3b - .previous + _asm_extable 1b, 3b SYM_FUNC_END(__strnlen_user) EXPORT_SYMBOL(__strnlen_user) -- Gitee From a2603778e57ece2a4f0ff1594361ff37a0d4bd22 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Sat, 10 Dec 2022 22:39:48 +0800 Subject: [PATCH 27/36] LoongArch: Switch to relative exception tables mainline inclusion from mainline-v6.2-rc1 commit 3d36f4298ba91fbdec6bc56aa7bb0663cba6ab0c category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP CVE: NA -------------------------------- Similar to other architectures such as arm64, x86, riscv and so on, use offsets relative to the exception table entry values rather than their absolute addresses for both the exception location and the fixup. However, LoongArch label difference because it will actually produce two relocations, a pair of R_LARCH_ADD32 and R_LARCH_SUB32. Take simple code below for example: $ cat test_ex_table.S .section .text 1: nop .section __ex_table,"a" .balign 4 .long (1b - .) .previous $ loongarch64-unknown-linux-gnu-gcc -c test_ex_table.S $ loongarch64-unknown-linux-gnu-readelf -Wr test_ex_table.o Relocation section '.rela__ex_table' at offset 0x100 contains 2 entries: Offset Info Type Symbol's Value Symbol's Name + Addend 0000000000000000 0000000600000032 R_LARCH_ADD32 0000000000000000 .L1^B1 + 0 0000000000000000 0000000500000037 R_LARCH_SUB32 0000000000000000 L0^A + 0 The modpost will complain the R_LARCH_SUB32 relocation, so we need to patch modpost.c to skip this relocation for .rela__ex_table section. Signed-off-by: Youling Tang Signed-off-by: Huacai Chen Change-Id: If42e54904f36a0d7cafc40a75fd89b582fbaa09e --- arch/loongarch/include/asm/asm-extable.h | 12 ++++----- arch/loongarch/include/asm/extable.h | 26 +++++++++++++++++++ arch/loongarch/include/asm/uaccess.h | 2 +- arch/loongarch/mm/extable.c | 32 ++++++++++++++++-------- scripts/mod/modpost.c | 15 +++++++++++ scripts/sorttable.c | 2 +- 6 files changed, 71 insertions(+), 18 deletions(-) create mode 100644 arch/loongarch/include/asm/extable.h diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h index 4f615bf56727..74f8bc75472a 100644 --- a/arch/loongarch/include/asm/asm-extable.h +++ b/arch/loongarch/include/asm/asm-extable.h @@ -6,9 +6,9 @@ #define __ASM_EXTABLE_RAW(insn, fixup) \ .pushsection __ex_table, "a"; \ - .balign 8; \ - .quad (insn); \ - .quad (fixup); \ + .balign 4; \ + .long ((insn) - .); \ + .long ((fixup) - .); \ .popsection; .macro _asm_extable, insn, fixup @@ -22,9 +22,9 @@ #define __ASM_EXTABLE_RAW(insn, fixup) \ ".pushsection __ex_table, \"a\"\n" \ - ".balign 8\n" \ - ".quad ((" insn "))\n" \ - ".quad ((" fixup "))\n" \ + ".balign 4\n" \ + ".long ((" insn ") - .)\n" \ + ".long ((" fixup ") - .)\n" \ ".popsection\n" #define _ASM_EXTABLE(insn, fixup) \ diff --git a/arch/loongarch/include/asm/extable.h b/arch/loongarch/include/asm/extable.h new file mode 100644 index 000000000000..b571c89705d1 --- /dev/null +++ b/arch/loongarch/include/asm/extable.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LOONGARCH_EXTABLE_H +#define _ASM_LOONGARCH_EXTABLE_H + +/* + * The exception table consists of pairs of relative offsets: the first + * is the relative offset to an instruction that is allowed to fault, + * and the second is the relative offset at which the program should + * continue. No registers are modified, so it is entirely up to the + * continuation code to figure out what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry { + int insn, fixup; +}; + +#define ARCH_HAS_RELATIVE_EXTABLE + +bool fixup_exception(struct pt_regs *regs); + +#endif diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h index 58545c8686e3..33d9d7be8a02 100644 --- a/arch/loongarch/include/asm/uaccess.h +++ b/arch/loongarch/include/asm/uaccess.h @@ -15,8 +15,8 @@ #include #include #include +#include #include -#include extern u64 __ua_limit; diff --git a/arch/loongarch/mm/extable.c b/arch/loongarch/mm/extable.c index bc20988f2b87..08a9a7d6357a 100644 --- a/arch/loongarch/mm/extable.c +++ b/arch/loongarch/mm/extable.c @@ -3,20 +3,32 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include -#include -#include #include +#include +#include + +static inline unsigned long +get_ex_fixup(const struct exception_table_entry *ex) +{ + return ((unsigned long)&ex->fixup + ex->fixup); +} -int fixup_exception(struct pt_regs *regs) +static bool ex_handler_fixup(const struct exception_table_entry *ex, + struct pt_regs *regs) { - const struct exception_table_entry *fixup; + regs->csr_era = get_ex_fixup(ex); - fixup = search_exception_tables(exception_era(regs)); - if (fixup) { - regs->csr_era = fixup->fixup; + return true; +} + + +bool fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *ex; - return 1; - } + ex = search_exception_tables(exception_era(regs)); + if (!ex) + return false; - return 0; + return ex_handler_fixup(ex, regs); } diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index e48742760fec..8545e49cfb77 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1842,6 +1842,14 @@ static int addend_mips_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r) return 0; } +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 +#endif + +#ifndef R_LARCH_SUB32 +#define R_LARCH_SUB32 55 +#endif + static void section_rela(const char *modname, struct elf_info *elf, Elf_Shdr *sechdr) { @@ -1878,6 +1886,13 @@ static void section_rela(const char *modname, struct elf_info *elf, r_sym = ELF_R_SYM(r.r_info); #endif r.r_addend = TO_NATIVE(rela->r_addend); + switch (elf->hdr->e_machine) { + case EM_LOONGARCH: + if (!strcmp("__ex_table", fromsec) && + ELF_R_TYPE(r.r_info) == R_LARCH_SUB32) + continue; + break; + } sym = elf->symtab_start + r_sym; /* Skip special sections */ if (is_shndx_special(sym->st_shndx)) diff --git a/scripts/sorttable.c b/scripts/sorttable.c index 3edef1c6aaa0..5ce49722d5bc 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -347,11 +347,11 @@ static int do_file(char const *const fname, void *addr) case EM_PPC: case EM_ARM: case EM_PPC64: + case EM_LOONGARCH: custom_sort = sort_relative_table; break; case EM_ARCOMPACT: case EM_ARCV2: - case EM_LOONGARCH: case EM_MICROBLAZE: case EM_MIPS: case EM_XTENSA: -- Gitee From 82f69454de796b638ea76b7841e7e659d8de52fa Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Sat, 10 Dec 2022 22:39:59 +0800 Subject: [PATCH 28/36] LoongArch: extable: Add `type` and `data` fields mainline inclusion from mainline-v6.2-rc1 commit 26bc82441250f2e01621f5b26606a4f6926ee3ad category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP CVE: NA -------------------------------- This is a LoongArch port of commit d6e2cc564775 ("arm64: extable: add `type` and `data` fields"). Subsequent patches will add specialized handlers for fixups, in addition to the simple PC fixup we have today. In preparation, this patch adds a new `type` field to struct exception_table_entry, and uses this to distinguish the fixup and other cases. A `data` field is also added so that subsequent patches can associate data specific to each exception site (e.g. register numbers). Handlers are named ex_handler_*() for consistency, following the example of x86. At the same time, get_ex_fixup() is split out into a helper so that it can be used by other ex_handler_*() functions in the subsequent patches. Signed-off-by: Youling Tang Signed-off-by: Huacai Chen Change-Id: I95ac7a755a8a3389b63b92c1e98a851c1993c0aa --- arch/loongarch/include/asm/asm-extable.h | 15 ++++++++--- arch/loongarch/include/asm/extable.h | 11 ++++++++ arch/loongarch/kernel/vmlinux.lds.S | 3 +-- arch/loongarch/mm/extable.c | 7 +++++- scripts/sorttable.c | 32 +++++++++++++++++++++++- 5 files changed, 60 insertions(+), 8 deletions(-) diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h index 74f8bc75472a..634bd770e3c4 100644 --- a/arch/loongarch/include/asm/asm-extable.h +++ b/arch/loongarch/include/asm/asm-extable.h @@ -2,17 +2,22 @@ #ifndef __ASM_ASM_EXTABLE_H #define __ASM_ASM_EXTABLE_H +#define EX_TYPE_NONE 0 +#define EX_TYPE_FIXUP 1 + #ifdef __ASSEMBLY__ -#define __ASM_EXTABLE_RAW(insn, fixup) \ +#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ .pushsection __ex_table, "a"; \ .balign 4; \ .long ((insn) - .); \ .long ((fixup) - .); \ + .short (type); \ + .short (data); \ .popsection; .macro _asm_extable, insn, fixup - __ASM_EXTABLE_RAW(\insn, \fixup) + __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0) .endm #else /* __ASSEMBLY__ */ @@ -20,15 +25,17 @@ #include #include -#define __ASM_EXTABLE_RAW(insn, fixup) \ +#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ ".pushsection __ex_table, \"a\"\n" \ ".balign 4\n" \ ".long ((" insn ") - .)\n" \ ".long ((" fixup ") - .)\n" \ + ".short (" type ")\n" \ + ".short (" data ")\n" \ ".popsection\n" #define _ASM_EXTABLE(insn, fixup) \ - __ASM_EXTABLE_RAW(#insn, #fixup) + __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0") #endif /* __ASSEMBLY__ */ diff --git a/arch/loongarch/include/asm/extable.h b/arch/loongarch/include/asm/extable.h index b571c89705d1..92612b4364a1 100644 --- a/arch/loongarch/include/asm/extable.h +++ b/arch/loongarch/include/asm/extable.h @@ -17,10 +17,21 @@ struct exception_table_entry { int insn, fixup; + short type, data; }; #define ARCH_HAS_RELATIVE_EXTABLE +#define swap_ex_entry_fixup(a, b, tmp, delta) \ +do { \ + (a)->fixup = (b)->fixup + (delta); \ + (b)->fixup = (tmp).fixup - (delta); \ + (a)->type = (b)->type; \ + (b)->type = (tmp).type; \ + (a)->data = (b)->data; \ + (b)->data = (tmp).data; \ +} while (0) + bool fixup_exception(struct pt_regs *regs); #endif diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index 36d042739f3c..e956e2e3c510 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -4,6 +4,7 @@ #include #define PAGE_SIZE _PAGE_SIZE +#define RO_EXCEPTION_TABLE_ALIGN 4 /* * Put .bss..swapper_pg_dir as the first thing in .bss. This will @@ -53,8 +54,6 @@ SECTIONS . = ALIGN(PECOFF_SEGMENT_ALIGN); _etext = .; - EXCEPTION_TABLE(16) - . = ALIGN(PECOFF_SEGMENT_ALIGN); __init_begin = .; __inittext_begin = .; diff --git a/arch/loongarch/mm/extable.c b/arch/loongarch/mm/extable.c index 08a9a7d6357a..fd2395221cff 100644 --- a/arch/loongarch/mm/extable.c +++ b/arch/loongarch/mm/extable.c @@ -30,5 +30,10 @@ bool fixup_exception(struct pt_regs *regs) if (!ex) return false; - return ex_handler_fixup(ex, regs); + switch (ex->type) { + case EX_TYPE_FIXUP: + return ex_handler_fixup(ex, regs); + } + + BUG(); } diff --git a/scripts/sorttable.c b/scripts/sorttable.c index 5ce49722d5bc..92f1951b46f6 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -231,6 +231,34 @@ static void sort_relative_table(char *extab_image, int image_size) } } +static void sort_relative_table_with_data(char *extab_image, int image_size) +{ + int i = 0; + + while (i < image_size) { + uint32_t *loc = (uint32_t *)(extab_image + i); + + w(r(loc) + i, loc); + w(r(loc + 1) + i + 4, loc + 1); + /* Don't touch the fixup type or data */ + + i += sizeof(uint32_t) * 3; + } + + qsort(extab_image, image_size / 12, 12, compare_relative_table); + + i = 0; + while (i < image_size) { + uint32_t *loc = (uint32_t *)(extab_image + i); + + w(r(loc) - i, loc); + w(r(loc + 1) - (i + 4), loc + 1); + /* Don't touch the fixup type or data */ + + i += sizeof(uint32_t) * 3; + } +} + static void x86_sort_relative_table(char *extab_image, int image_size) { int i = 0; @@ -339,6 +367,9 @@ static int do_file(char const *const fname, void *addr) case EM_X86_64: custom_sort = x86_sort_relative_table; break; + case EM_LOONGARCH: + custom_sort = sort_relative_table_with_data; + break; case EM_S390: custom_sort = s390_sort_relative_table; break; @@ -347,7 +378,6 @@ static int do_file(char const *const fname, void *addr) case EM_PPC: case EM_ARM: case EM_PPC64: - case EM_LOONGARCH: custom_sort = sort_relative_table; break; case EM_ARCOMPACT: -- Gitee From 7e828480af7879932e00c9be21628d1377e80191 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Sat, 10 Dec 2022 22:39:59 +0800 Subject: [PATCH 29/36] LoongArch: extable: Add a dedicated uaccess handler mainline inclusion from mainline-v6.2-rc1 commit 672999cfae3e830a64c4996362a26934fd555ff9 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP CVE: NA -------------------------------- Inspired by commit 2e77a62cb3a6("arm64: extable: add a dedicated uaccess handler"), do similar to LoongArch to add a dedicated uaccess exception handler to update registers in exception context and subsequently return back into the function which faulted, so we remove the need for fixups specialized to each faulting instruction. Add gpr-num.h here because we need to map the same GPR names to integer constants, so that we can use this to build meta-data for the exception fixups. The compiler treats gpr 0 as zero rather than $r0, so set it separately to .L__gpr_num_zero, otherwise the following assembly error will occurs: {standard input}: Assembler messages: {standard input}:1074: Error: invalid operands (*UND* and *ABS* sections) for `<<' {standard input}:1160: Error: invalid operands (*UND* and *ABS* sections) for `<<' make[1]: *** [scripts/Makefile.build:249: fs/fcntl.o] Error 1 Signed-off-by: Youling Tang Signed-off-by: Huacai Chen Change-Id: Ifda81e961c8147c6f652891802dc6b5354bb5491 --- arch/loongarch/include/asm/asm-extable.h | 22 ++++++++++++++++++++++ arch/loongarch/include/asm/futex.h | 22 ++++++---------------- arch/loongarch/include/asm/gpr-num.h | 22 ++++++++++++++++++++++ arch/loongarch/include/asm/uaccess.h | 17 ++++------------- arch/loongarch/mm/extable.c | 22 ++++++++++++++++++++++ 5 files changed, 76 insertions(+), 29 deletions(-) create mode 100644 arch/loongarch/include/asm/gpr-num.h diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h index 634bd770e3c4..f5502cb50c6e 100644 --- a/arch/loongarch/include/asm/asm-extable.h +++ b/arch/loongarch/include/asm/asm-extable.h @@ -4,6 +4,7 @@ #define EX_TYPE_NONE 0 #define EX_TYPE_FIXUP 1 +#define EX_TYPE_UACCESS_ERR_ZERO 2 #ifdef __ASSEMBLY__ @@ -24,6 +25,7 @@ #include #include +#include #define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ ".pushsection __ex_table, \"a\"\n" \ @@ -37,6 +39,26 @@ #define _ASM_EXTABLE(insn, fixup) \ __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0") +#define EX_DATA_REG_ERR_SHIFT 0 +#define EX_DATA_REG_ERR GENMASK(4, 0) +#define EX_DATA_REG_ZERO_SHIFT 5 +#define EX_DATA_REG_ZERO GENMASK(9, 5) + +#define EX_DATA_REG(reg, gpr) \ + "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")" + +#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \ + __DEFINE_ASM_GPR_NUMS \ + __ASM_EXTABLE_RAW(#insn, #fixup, \ + __stringify(EX_TYPE_UACCESS_ERR_ZERO), \ + "(" \ + EX_DATA_REG(ERR, err) " | " \ + EX_DATA_REG(ZERO, zero) \ + ")") + +#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/loongarch/include/asm/futex.h b/arch/loongarch/include/asm/futex.h index 056f89a87d1f..af6ee7a8ee88 100644 --- a/arch/loongarch/include/asm/futex.h +++ b/arch/loongarch/include/asm/futex.h @@ -19,16 +19,11 @@ "2: sc.w $t0, %2 \n" \ " beqz $t0, 1b \n" \ "3: \n" \ - " .section .fixup,\"ax\" \n" \ - "4: li.w %0, %6 \n" \ - " b 3b \n" \ - " .previous \n" \ - _ASM_EXTABLE(1b, 4b) \ - _ASM_EXTABLE(2b, 4b) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %0) \ + _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %0) \ : "=r" (ret), "=&r" (oldval), \ "=ZC" (*uaddr) \ - : "0" (0), "ZC" (*uaddr), "Jr" (oparg), \ - "i" (-EFAULT) \ + : "0" (0), "ZC" (*uaddr), "Jr" (oparg) \ : "memory", "t0"); \ } @@ -87,15 +82,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newv "3: \n" __WEAK_LLSC_MB "5: \n" - " .section .fixup,\"ax\" \n" - "4: li.d %0, %6 \n" - " b 3b \n" - " .previous \n" - _ASM_EXTABLE(1b, 4b) - _ASM_EXTABLE(2b, 4b) + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %0) + _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %0) : "+r" (ret), "=&r" (val), "=ZC" (*uaddr) - : "ZC" (*uaddr), "Jr" (oldval), "Jr" (newval), - "i" (-EFAULT) + : "ZC" (*uaddr), "Jr" (oldval), "Jr" (newval) : "memory", "t0"); *uval = val; diff --git a/arch/loongarch/include/asm/gpr-num.h b/arch/loongarch/include/asm/gpr-num.h new file mode 100644 index 000000000000..e0941af20c7e --- /dev/null +++ b/arch/loongarch/include/asm/gpr-num.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_GPR_NUM_H +#define __ASM_GPR_NUM_H + +#ifdef __ASSEMBLY__ + + .equ .L__gpr_num_zero, 0 + .irp num,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + .equ .L__gpr_num_$r\num, \num + .endr + +#else /* __ASSEMBLY__ */ + +#define __DEFINE_ASM_GPR_NUMS \ +" .equ .L__gpr_num_zero, 0\n" \ +" .irp num,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31\n" \ +" .equ .L__gpr_num_$r\\num, \\num\n" \ +" .endr\n" \ + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_GPR_NUM_H */ diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h index 33d9d7be8a02..306bef8f4972 100644 --- a/arch/loongarch/include/asm/uaccess.h +++ b/arch/loongarch/include/asm/uaccess.h @@ -203,14 +203,9 @@ do { \ __asm__ __volatile__( \ "1: " insn " %1, %2 \n" \ "2: \n" \ - " .section .fixup,\"ax\" \n" \ - "3: li.w %0, %3 \n" \ - " move %1, $zero \n" \ - " b 2b \n" \ - " .previous \n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %0, %1) \ : "+r" (__gu_err), "=r" (__gu_tmp) \ - : "m" (__m(ptr)), "i" (-EFAULT)); \ + : "m" (__m(ptr))); \ \ (val) = (__typeof__(*(ptr))) __gu_tmp; \ } @@ -233,13 +228,9 @@ do { \ __asm__ __volatile__( \ "1: " insn " %z2, %1 # __put_user_asm\n" \ "2: \n" \ - " .section .fixup,\"ax\" \n" \ - "3: li.w %0, %3 \n" \ - " b 2b \n" \ - " .previous \n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 2b,%0) \ : "+r" (__pu_err), "=m" (__m(ptr)) \ - : "Jr" (__pu_val), "i" (-EFAULT)); \ + : "Jr" (__pu_val)); \ } #define HAVE_GET_KERNEL_NOFAULT diff --git a/arch/loongarch/mm/extable.c b/arch/loongarch/mm/extable.c index fd2395221cff..9b0cfd898940 100644 --- a/arch/loongarch/mm/extable.c +++ b/arch/loongarch/mm/extable.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#include #include #include #include @@ -13,6 +14,13 @@ get_ex_fixup(const struct exception_table_entry *ex) return ((unsigned long)&ex->fixup + ex->fixup); } +static inline void regs_set_gpr(struct pt_regs *regs, + unsigned int offset, unsigned long val) +{ + if (offset && offset <= MAX_REG_OFFSET) + *(unsigned long *)((unsigned long)regs + offset) = val; +} + static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_regs *regs) { @@ -21,6 +29,18 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex, return true; } +static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); + int reg_zero = FIELD_GET(EX_DATA_REG_ZERO, ex->data); + + regs_set_gpr(regs, reg_err * sizeof(unsigned long), -EFAULT); + regs_set_gpr(regs, reg_zero * sizeof(unsigned long), 0); + regs->csr_era = get_ex_fixup(ex); + + return true; +} bool fixup_exception(struct pt_regs *regs) { @@ -33,6 +53,8 @@ bool fixup_exception(struct pt_regs *regs) switch (ex->type) { case EX_TYPE_FIXUP: return ex_handler_fixup(ex, regs); + case EX_TYPE_UACCESS_ERR_ZERO: + return ex_handler_uaccess_err_zero(ex, regs); } BUG(); -- Gitee From 070fb8af1b70ea6357ffa3aa82c3388cdd05fb4f Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Sat, 10 Dec 2022 22:39:59 +0800 Subject: [PATCH 30/36] LoongArch: Remove the .fixup section usage mainline inclusion from mainline-v6.2-rc1 commit 912bcfaf36771a2bf7a83799ce5454850d1c3f40 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP CVE: NA -------------------------------- Use the `.L_xxx` label to improve fixup code and then remove the .fixup section usage. Signed-off-by: Youling Tang Signed-off-by: Huacai Chen Change-Id: I01c14c9bbc99baffe611d7e27f4c87d494b73026 --- arch/loongarch/lib/clear_user.S | 14 +++++--------- arch/loongarch/lib/copy_user.S | 16 ++++++---------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index 7a066d6a41b8..d5c9e44ac8c4 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -9,15 +9,11 @@ #include #include -.macro fixup_ex from, to, offset, fix -.if \fix - .section .fixup, "ax" -\to: addi.d a0, a1, \offset +.irp to, 0 +.L_fixup_handle_\to\(): + addi.d a0, a1, (\to) * (-8) jr ra - .previous -.endif - _asm_extable \from\()b, \to\()b -.endm +.endr /* * unsigned long __clear_user(void *addr, size_t size) @@ -36,7 +32,7 @@ SYM_FUNC_START(__clear_user) 2: move a0, a1 jr ra - fixup_ex 1, 3, 0, 1 + _asm_extable 1b, .L_fixup_handle_0 SYM_FUNC_END(__clear_user) EXPORT_SYMBOL(__clear_user) diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index f8ace04586c2..61933d964da0 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -9,15 +9,11 @@ #include #include -.macro fixup_ex from, to, offset, fix -.if \fix - .section .fixup, "ax" -\to: addi.d a0, a2, \offset +.irp to, 0 +.L_fixup_handle_\to\(): + addi.d a0, a2, (\to) * (-8) jr ra - .previous -.endif - _asm_extable \from\()b, \to\()b -.endm +.endr /* * unsigned long __copy_user(void *to, const void *from, size_t n) @@ -39,8 +35,8 @@ SYM_FUNC_START(__copy_user) 3: move a0, a2 jr ra - fixup_ex 1, 4, 0, 1 - fixup_ex 2, 4, 0, 0 + _asm_extable 1b, .L_fixup_handle_0 + _asm_extable 2b, .L_fixup_handle_0 SYM_FUNC_END(__copy_user) EXPORT_SYMBOL(__copy_user) -- Gitee From caac8f6ff9f5ff121e80e71c0b6ee5718ac75e27 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 10 Dec 2022 22:39:59 +0800 Subject: [PATCH 31/36] LoongArch: Add alternative runtime patching mechanism mainline inclusion from mainline-v6.2-rc1 commit 19e5eb15b00c5841b4b9bd9777af2865a40d2f39 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP CVE: NA -------------------------------- Introduce the "alternative" mechanism from ARM64 and x86 for LoongArch to apply runtime patching. The main purpose of this patch is to provide a framework. In future we can use this mechanism (i.e., the ALTERNATIVE and ALTERNATIVE_2 macros) to optimize hotspot functions according to cpu features. Signed-off-by: Jun Yi Signed-off-by: Huacai Chen Change-Id: I204372e8f1ce7cf3c7bc7de9e8f6f16dbf06239a --- arch/loongarch/include/asm/alternative-asm.h | 82 +++++++ arch/loongarch/include/asm/alternative.h | 111 +++++++++ arch/loongarch/include/asm/bugs.h | 15 ++ arch/loongarch/include/asm/inst.h | 27 ++ arch/loongarch/kernel/Makefile | 4 +- arch/loongarch/kernel/alternative.c | 246 +++++++++++++++++++ arch/loongarch/kernel/module.c | 15 ++ arch/loongarch/kernel/setup.c | 7 + arch/loongarch/kernel/vmlinux.lds.S | 12 + 9 files changed, 517 insertions(+), 2 deletions(-) create mode 100644 arch/loongarch/include/asm/alternative-asm.h create mode 100644 arch/loongarch/include/asm/alternative.h create mode 100644 arch/loongarch/include/asm/bugs.h create mode 100644 arch/loongarch/kernel/alternative.c diff --git a/arch/loongarch/include/asm/alternative-asm.h b/arch/loongarch/include/asm/alternative-asm.h new file mode 100644 index 000000000000..ff3d10ac393f --- /dev/null +++ b/arch/loongarch/include/asm/alternative-asm.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ALTERNATIVE_ASM_H +#define _ASM_ALTERNATIVE_ASM_H + +#ifdef __ASSEMBLY__ + +#include + +/* + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ +.macro altinstruction_entry orig alt feature orig_len alt_len + .long \orig - . + .long \alt - . + .short \feature + .byte \orig_len + .byte \alt_len +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. ".fill" directive takes care of proper instruction padding + * in case @newinstr is longer than @oldinstr. + */ +.macro ALTERNATIVE oldinstr, newinstr, feature +140 : + \oldinstr +141 : + .fill - (((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)) / 4, 4, 0x03400000 +142 : + + .pushsection .altinstructions, "a" + altinstruction_entry 140b, 143f, \feature, 142b-140b, 144f-143f + .popsection + + .subsection 1 +143 : + \newinstr +144 : + .previous +.endm + +#define old_len (141b-140b) +#define new_len1 (144f-143f) +#define new_len2 (145f-144f) + +#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) + +/* + * Same as ALTERNATIVE macro above but for two alternatives. If CPU + * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has + * @feature2, it replaces @oldinstr with @feature2. + */ +.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 +140 : + \oldinstr +141 : + .fill - ((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ + (alt_max_short(new_len1, new_len2) - (old_len)) / 4, 4, 0x03400000 +142 : + + .pushsection .altinstructions, "a" + altinstruction_entry 140b, 143f, \feature1, 142b-140b, 144f-143f, 142b-141b + altinstruction_entry 140b, 144f, \feature2, 142b-140b, 145f-144f, 142b-141b + .popsection + + .subsection 1 +143 : + \newinstr1 +144 : + \newinstr2 +145 : + .previous +.endm + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_ALTERNATIVE_ASM_H */ diff --git a/arch/loongarch/include/asm/alternative.h b/arch/loongarch/include/asm/alternative.h new file mode 100644 index 000000000000..cee7b29785ab --- /dev/null +++ b/arch/loongarch/include/asm/alternative.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ALTERNATIVE_H +#define _ASM_ALTERNATIVE_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include + +struct alt_instr { + s32 instr_offset; /* offset to original instruction */ + s32 replace_offset; /* offset to replacement instruction */ + u16 feature; /* feature bit set for replacement */ + u8 instrlen; /* length of original instruction */ + u8 replacementlen; /* length of new instruction */ +} __packed; + +/* + * Debug flag that can be tested to see whether alternative + * instructions were patched in already: + */ +extern int alternatives_patched; +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; + +extern void alternative_instructions(void); +extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); + +#define b_replacement(num) "664"#num +#define e_replacement(num) "665"#num + +#define alt_end_marker "663" +#define alt_slen "662b-661b" +#define alt_total_slen alt_end_marker"b-661b" +#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" + +#define __OLDINSTR(oldinstr, num) \ + "661:\n\t" oldinstr "\n662:\n" \ + ".fill -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \ + "((" alt_rlen(num) ")-(" alt_slen ")) / 4, 4, 0x03400000\n" + +#define OLDINSTR(oldinstr, num) \ + __OLDINSTR(oldinstr, num) \ + alt_end_marker ":\n" + +#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))" + +/* + * Pad the second replacement alternative with additional NOPs if it is + * additionally longer than the first replacement alternative. + */ +#define OLDINSTR_2(oldinstr, num1, num2) \ + "661:\n\t" oldinstr "\n662:\n" \ + ".fill -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \ + "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) / 4, " \ + "4, 0x03400000\n" \ + alt_end_marker ":\n" + +#define ALTINSTR_ENTRY(feature, num) \ + " .long 661b - .\n" /* label */ \ + " .long " b_replacement(num)"f - .\n" /* new instruction */ \ + " .short " __stringify(feature) "\n" /* feature bit */ \ + " .byte " alt_total_slen "\n" /* source len */ \ + " .byte " alt_rlen(num) "\n" /* replacement len */ + +#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \ + b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t" + +/* alternative assembly primitive: */ +#define ALTERNATIVE(oldinstr, newinstr, feature) \ + OLDINSTR(oldinstr, 1) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature, 1) \ + ".popsection\n" \ + ".subsection 1\n" \ + ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ + ".previous\n" + +#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ + OLDINSTR_2(oldinstr, 1, 2) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature1, 1) \ + ALTINSTR_ENTRY(feature2, 2) \ + ".popsection\n" \ + ".subsection 1\n" \ + ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ + ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ + ".previous\n" + +/* + * Alternative instructions for different CPU types or capabilities. + * + * This allows to use optimized instructions even on generic binary + * kernels. + * + * length of oldinstr must be longer or equal the length of newinstr + * It can be padded with nops as needed. + * + * For non barrier like inlines please define new variants + * without volatile and memory clobber. + */ +#define alternative(oldinstr, newinstr, feature) \ + (asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")) + +#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ + (asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_ALTERNATIVE_H */ diff --git a/arch/loongarch/include/asm/bugs.h b/arch/loongarch/include/asm/bugs.h new file mode 100644 index 000000000000..98396535163b --- /dev/null +++ b/arch/loongarch/include/asm/bugs.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This is included by init/main.c to check for architecture-dependent bugs. + * + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ +#ifndef _ASM_BUGS_H +#define _ASM_BUGS_H + +#include +#include + +extern void check_bugs(void); + +#endif /* _ASM_BUGS_H */ diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 7b07cbb3188c..a74e221b73bc 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -8,6 +8,8 @@ #include #include +#define INSN_NOP 0x03400000 + #define ADDR_IMMMASK_LU52ID 0xFFF0000000000000 #define ADDR_IMMMASK_LU32ID 0x000FFFFF00000000 #define ADDR_IMMMASK_ADDU16ID 0x00000000FFFF0000 @@ -18,14 +20,25 @@ #define ADDR_IMM(addr, INSN) ((addr & ADDR_IMMMASK_##INSN) >> ADDR_IMMSHIFT_##INSN) +enum reg0i26_op { + b_op = 0x14, + bl_op = 0x15, +}; + enum reg1i20_op { lu12iw_op = 0x0a, lu32id_op = 0x0b, + pcaddi_op = 0x0c, + pcalau12i_op = 0x0d, + pcaddu12i_op = 0x0e, + pcaddu18i_op = 0x0f, }; enum reg1i21_op { beqz_op = 0x10, bnez_op = 0x11, + bceqz_op = 0x12, /* bits[9:8] = 0x00 */ + bcnez_op = 0x12, /* bits[9:8] = 0x01 */ }; enum reg2i12_op { @@ -138,6 +151,20 @@ static inline bool is_imm_negative(unsigned long val, unsigned int bit) return val & (1UL << (bit - 1)); } +static inline unsigned long sign_extend(unsigned long val, unsigned int idx) +{ + if (!is_imm_negative(val, idx + 1)) + return ((1UL << idx) - 1) & val; + else + return ~((1UL << idx) - 1) | val; +} + +static inline bool is_pc_ins(union loongarch_instruction *ip) +{ + return ip->reg1i20_format.opcode >= pcaddi_op && + ip->reg1i20_format.opcode <= pcaddu18i_op; +} + static inline bool is_branch_ins(union loongarch_instruction *ip) { return ip->reg1i21_format.opcode >= beqz_op && diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 11cc09679b40..6c940e41d676 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -7,8 +7,8 @@ extra-y := head.o vmlinux.lds obj-y += cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \ traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \ - elf.o legacy_boot.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o\ - platform.o + elf.o legacy_boot.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o \ + alternative.o platform.o obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_EFI) += efi.o diff --git a/arch/loongarch/kernel/alternative.c b/arch/loongarch/kernel/alternative.c new file mode 100644 index 000000000000..c5aebeac960b --- /dev/null +++ b/arch/loongarch/kernel/alternative.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include + +int __read_mostly alternatives_patched; + +EXPORT_SYMBOL_GPL(alternatives_patched); + +#define MAX_PATCH_SIZE (((u8)(-1)) / LOONGARCH_INSN_SIZE) + +static int __initdata_or_module debug_alternative; + +static int __init debug_alt(char *str) +{ + debug_alternative = 1; + return 1; +} +__setup("debug-alternative", debug_alt); + +#define DPRINTK(fmt, args...) \ +do { \ + if (debug_alternative) \ + printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args); \ +} while (0) + +#define DUMP_WORDS(buf, count, fmt, args...) \ +do { \ + if (unlikely(debug_alternative)) { \ + int _j; \ + union loongarch_instruction *_buf = buf; \ + \ + if (!(count)) \ + break; \ + \ + printk(KERN_DEBUG fmt, ##args); \ + for (_j = 0; _j < count - 1; _j++) \ + printk(KERN_CONT "<%08x> ", _buf[_j].word); \ + printk(KERN_CONT "<%08x>\n", _buf[_j].word); \ + } \ +} while (0) + +/* Use this to add nops to a buffer, then text_poke the whole buffer. */ +static void __init_or_module add_nops(union loongarch_instruction *insn, int count) +{ + while (count--) { + insn->word = INSN_NOP; + insn++; + } +} + +/* Is the jump addr in local .altinstructions */ +static inline bool in_alt_jump(unsigned long jump, void *start, void *end) +{ + return jump >= (unsigned long)start && jump < (unsigned long)end; +} + +static void __init_or_module recompute_jump(union loongarch_instruction *buf, + union loongarch_instruction *dest, union loongarch_instruction *src, + void *start, void *end) +{ + unsigned int si, si_l, si_h; + unsigned long cur_pc, jump_addr, pc; + long offset; + + cur_pc = (unsigned long)src; + pc = (unsigned long)dest; + + si_l = src->reg0i26_format.immediate_l; + si_h = src->reg0i26_format.immediate_h; + switch (src->reg0i26_format.opcode) { + case b_op: + case bl_op: + jump_addr = cur_pc + sign_extend((si_h << 16 | si_l) << 2, 27); + if (in_alt_jump(jump_addr, start, end)) + return; + offset = jump_addr - pc; + BUG_ON(offset < -SZ_128M || offset >= SZ_128M); + offset >>= 2; + buf->reg0i26_format.immediate_h = offset >> 16; + buf->reg0i26_format.immediate_l = offset; + return; + } + + si_l = src->reg1i21_format.immediate_l; + si_h = src->reg1i21_format.immediate_h; + switch (src->reg1i21_format.opcode) { + case bceqz_op: /* bceqz_op = bcnez_op */ + BUG_ON(buf->reg1i21_format.rj & BIT(4)); + fallthrough; + case beqz_op: + case bnez_op: + jump_addr = cur_pc + sign_extend((si_h << 16 | si_l) << 2, 22); + if (in_alt_jump(jump_addr, start, end)) + return; + offset = jump_addr - pc; + BUG_ON(offset < -SZ_4M || offset >= SZ_4M); + offset >>= 2; + buf->reg1i21_format.immediate_h = offset >> 16; + buf->reg1i21_format.immediate_l = offset; + return; + } + + si = src->reg2i16_format.immediate; + switch (src->reg2i16_format.opcode) { + case beq_op: + case bne_op: + case blt_op: + case bge_op: + case bltu_op: + case bgeu_op: + jump_addr = cur_pc + sign_extend(si << 2, 17); + if (in_alt_jump(jump_addr, start, end)) + return; + offset = jump_addr - pc; + BUG_ON(offset < -SZ_128K || offset >= SZ_128K); + offset >>= 2; + buf->reg2i16_format.immediate = offset; + return; + } +} + +static int __init_or_module copy_alt_insns(union loongarch_instruction *buf, + union loongarch_instruction *dest, union loongarch_instruction *src, int nr) +{ + int i; + + for (i = 0; i < nr; i++) { + buf[i].word = src[i].word; + + if (is_pc_ins(&src[i])) { + pr_err("Not support pcrel instruction at present!"); + return -EINVAL; + } + + if (is_branch_ins(&src[i]) && + src[i].reg2i16_format.opcode != jirl_op) { + recompute_jump(&buf[i], &dest[i], &src[i], src, src + nr); + } + } + + return 0; +} + +/* + * text_poke_early - Update instructions on a live kernel at boot time + * + * When you use this code to patch more than one byte of an instruction + * you need to make sure that other CPUs cannot execute this code in parallel. + * Also no thread must be currently preempted in the middle of these + * instructions. And on the local CPU you need to be protected again NMI or MCE + * handlers seeing an inconsistent instruction while you patch. + */ +static void *__init_or_module text_poke_early(union loongarch_instruction *insn, + union loongarch_instruction *buf, unsigned int nr) +{ + int i; + unsigned long flags; + + local_irq_save(flags); + + for (i = 0; i < nr; i++) + insn[i].word = buf[i].word; + + local_irq_restore(flags); + + wbflush(); + flush_icache_range((unsigned long)insn, (unsigned long)(insn + nr)); + + return insn; +} + +/* + * Replace instructions with better alternatives for this CPU type. This runs + * before SMP is initialized to avoid SMP problems with self modifying code. + * This implies that asymmetric systems where APs have less capabilities than + * the boot processor are not handled. Tough. Make sure you disable such + * features by hand. + */ +void __init_or_module apply_alternatives(struct alt_instr *start, struct alt_instr *end) +{ + struct alt_instr *a; + unsigned int nr_instr, nr_repl, nr_insnbuf; + union loongarch_instruction *instr, *replacement; + union loongarch_instruction insnbuf[MAX_PATCH_SIZE]; + + DPRINTK("alt table %px, -> %px", start, end); + /* + * The scan order should be from start to end. A later scanned + * alternative code can overwrite previously scanned alternative code. + * Some kernel functions (e.g. memcpy, memset, etc) use this order to + * patch code. + * + * So be careful if you want to change the scan order to any other + * order. + */ + for (a = start; a < end; a++) { + nr_insnbuf = 0; + + instr = (void *)&a->instr_offset + a->instr_offset; + replacement = (void *)&a->replace_offset + a->replace_offset; + + BUG_ON(a->instrlen > sizeof(insnbuf)); + BUG_ON(a->instrlen & 0x3); + BUG_ON(a->replacementlen & 0x3); + + nr_instr = a->instrlen / LOONGARCH_INSN_SIZE; + nr_repl = a->replacementlen / LOONGARCH_INSN_SIZE; + + if (!cpu_has(a->feature)) { + DPRINTK("feat not exist: %d, old: (%px len: %d), repl: (%px, len: %d)", + a->feature, instr, a->instrlen, + replacement, a->replacementlen); + + continue; + } + + DPRINTK("feat: %d, old: (%px len: %d), repl: (%px, len: %d)", + a->feature, instr, a->instrlen, + replacement, a->replacementlen); + + DUMP_WORDS(instr, nr_instr, "%px: old_insn: ", instr); + DUMP_WORDS(replacement, nr_repl, "%px: rpl_insn: ", replacement); + + copy_alt_insns(insnbuf, instr, replacement, nr_repl); + nr_insnbuf = nr_repl; + + if (nr_instr > nr_repl) { + add_nops(insnbuf + nr_repl, nr_instr - nr_repl); + nr_insnbuf += nr_instr - nr_repl; + } + DUMP_WORDS(insnbuf, nr_insnbuf, "%px: final_insn: ", instr); + + text_poke_early(instr, insnbuf, nr_insnbuf); + } +} + +void __init alternative_instructions(void) +{ + apply_alternatives(__alt_instructions, __alt_instructions_end); + + alternatives_patched = 1; +} diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index bee7457db804..8127df5dd0ad 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -17,6 +17,7 @@ #include #include #include +#include static inline bool signed_imm_check(long val, unsigned int bit) { @@ -466,3 +467,17 @@ void *module_alloc(unsigned long size) return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, struct module *mod) +{ + const Elf_Shdr *s, *se; + const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { + if (!strcmp(".altinstructions", secstrs + s->sh_name)) + apply_alternatives((void *)s->sh_addr, (void *)s->sh_addr + s->sh_size); + } + + return 0; +} diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 35d99525485b..075fd4f778ea 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -29,7 +29,9 @@ #include #include +#include #include +#include #include #include #include @@ -79,6 +81,11 @@ const char *get_system_type(void) return "generic-loongson-machine"; } +void __init check_bugs(void) +{ + alternative_instructions(); +} + static const char *dmi_string_parse(const struct dmi_header *dm, u8 s) { const u8 *bp = ((u8 *) dm) + dm->length; diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index e956e2e3c510..c673347a80bb 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -54,6 +54,18 @@ SECTIONS . = ALIGN(PECOFF_SEGMENT_ALIGN); _etext = .; + /* + * struct alt_inst entries. From the header (alternative.h): + * "Alternative instructions for different CPU types or capabilities" + * Think locking instructions on spinlocks. + */ + . = ALIGN(4); + .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + . = ALIGN(PECOFF_SEGMENT_ALIGN); __init_begin = .; __inittext_begin = .; -- Gitee From ffe6c674caab29ec78c526f459932d23121310ee Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 10 Dec 2022 22:39:59 +0800 Subject: [PATCH 32/36] LoongArch: Use alternative to optimize libraries mainline inclusion from mainline-v6.2-rc1 commit a275a82dcd4024c75337db15d59ed039c31e21da category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP CVE: NA -------------------------------- Use the alternative to optimize common libraries according whether CPU has UAL (hardware unaligned access support) feature, including memset(), memcopy(), memmove(), copy_user() and clear_user(). We have tested UnixBench on a Loongson-3A5000 quad-core machine (1.6GHz): 1, One copy, before patch: System Benchmarks Index Values BASELINE RESULT INDEX Dhrystone 2 using register variables 116700.0 9566582.0 819.8 Double-Precision Whetstone 55.0 2805.3 510.1 Execl Throughput 43.0 2120.0 493.0 File Copy 1024 bufsize 2000 maxblocks 3960.0 209833.0 529.9 File Copy 256 bufsize 500 maxblocks 1655.0 89400.0 540.2 File Copy 4096 bufsize 8000 maxblocks 5800.0 320036.0 551.8 Pipe Throughput 12440.0 340624.0 273.8 Pipe-based Context Switching 4000.0 109939.1 274.8 Process Creation 126.0 4728.7 375.3 Shell Scripts (1 concurrent) 42.4 2223.1 524.3 Shell Scripts (8 concurrent) 6.0 883.1 1471.9 System Call Overhead 15000.0 518639.1 345.8 ======== System Benchmarks Index Score 500.2 2, One copy, after patch: System Benchmarks Index Values BASELINE RESULT INDEX Dhrystone 2 using register variables 116700.0 9567674.7 819.9 Double-Precision Whetstone 55.0 2805.5 510.1 Execl Throughput 43.0 2392.7 556.4 File Copy 1024 bufsize 2000 maxblocks 3960.0 417804.0 1055.1 File Copy 256 bufsize 500 maxblocks 1655.0 112909.5 682.2 File Copy 4096 bufsize 8000 maxblocks 5800.0 1255207.4 2164.2 Pipe Throughput 12440.0 555712.0 446.7 Pipe-based Context Switching 4000.0 99964.5 249.9 Process Creation 126.0 5192.5 412.1 Shell Scripts (1 concurrent) 42.4 2302.4 543.0 Shell Scripts (8 concurrent) 6.0 919.6 1532.6 System Call Overhead 15000.0 511159.3 340.8 ======== System Benchmarks Index Score 640.1 3, Four copies, before patch: System Benchmarks Index Values BASELINE RESULT INDEX Dhrystone 2 using register variables 116700.0 38268610.5 3279.2 Double-Precision Whetstone 55.0 11222.2 2040.4 Execl Throughput 43.0 7892.0 1835.3 File Copy 1024 bufsize 2000 maxblocks 3960.0 235149.6 593.8 File Copy 256 bufsize 500 maxblocks 1655.0 74959.6 452.9 File Copy 4096 bufsize 8000 maxblocks 5800.0 545048.5 939.7 Pipe Throughput 12440.0 1337359.0 1075.0 Pipe-based Context Switching 4000.0 473663.9 1184.2 Process Creation 126.0 17491.2 1388.2 Shell Scripts (1 concurrent) 42.4 6865.7 1619.3 Shell Scripts (8 concurrent) 6.0 1015.9 1693.1 System Call Overhead 15000.0 1899535.2 1266.4 ======== System Benchmarks Index Score 1278.3 4, Four copies, after patch: System Benchmarks Index Values BASELINE RESULT INDEX Dhrystone 2 using register variables 116700.0 38272815.5 3279.6 Double-Precision Whetstone 55.0 11222.8 2040.5 Execl Throughput 43.0 8839.2 2055.6 File Copy 1024 bufsize 2000 maxblocks 3960.0 313912.9 792.7 File Copy 256 bufsize 500 maxblocks 1655.0 80976.1 489.3 File Copy 4096 bufsize 8000 maxblocks 5800.0 1176594.3 2028.6 Pipe Throughput 12440.0 2100941.9 1688.9 Pipe-based Context Switching 4000.0 476696.4 1191.7 Process Creation 126.0 18394.7 1459.9 Shell Scripts (1 concurrent) 42.4 7172.2 1691.6 Shell Scripts (8 concurrent) 6.0 1058.3 1763.9 System Call Overhead 15000.0 1874714.7 1249.8 ======== System Benchmarks Index Score 1488.8 Signed-off-by: Jun Yi Signed-off-by: Huacai Chen Change-Id: I5dc8f19abfe6fcdfbe473b1433e65a7d4d5d110b --- arch/loongarch/include/asm/string.h | 5 ++ arch/loongarch/lib/Makefile | 3 +- arch/loongarch/lib/clear_user.S | 70 ++++++++++++++-- arch/loongarch/lib/copy_user.S | 91 +++++++++++++++++++-- arch/loongarch/lib/memcpy.S | 95 ++++++++++++++++++++++ arch/loongarch/lib/memmove.S | 121 ++++++++++++++++++++++++++++ arch/loongarch/lib/memset.S | 91 +++++++++++++++++++++ 7 files changed, 465 insertions(+), 11 deletions(-) create mode 100644 arch/loongarch/lib/memcpy.S create mode 100644 arch/loongarch/lib/memmove.S create mode 100644 arch/loongarch/lib/memset.S diff --git a/arch/loongarch/include/asm/string.h b/arch/loongarch/include/asm/string.h index b07e60ded957..7b29cc9c70aa 100644 --- a/arch/loongarch/include/asm/string.h +++ b/arch/loongarch/include/asm/string.h @@ -5,8 +5,13 @@ #ifndef _ASM_STRING_H #define _ASM_STRING_H +#define __HAVE_ARCH_MEMSET extern void *memset(void *__s, int __c, size_t __count); + +#define __HAVE_ARCH_MEMCPY extern void *memcpy(void *__to, __const__ void *__from, size_t __n); + +#define __HAVE_ARCH_MEMMOVE extern void *memmove(void *__dest, __const__ void *__src, size_t __n); #endif /* _ASM_STRING_H */ diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile index 1bb75cd8caf7..8263fe4ca6a2 100644 --- a/arch/loongarch/lib/Makefile +++ b/arch/loongarch/lib/Makefile @@ -3,4 +3,5 @@ # Makefile for LoongArch-specific library files. # -lib-y += delay.o clear_user.o strnlen_user.o strncpy_user.o copy_user.o dump_tlb.o +lib-y += delay.o clear_user.o strnlen_user.o strncpy_user.o copy_user.o dump_tlb.o \ + memset.o memcpy.o memmove.o diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index d5c9e44ac8c4..2dc48e61a2c8 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -3,25 +3,37 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#include #include #include #include +#include #include #include -.irp to, 0 +.irp to, 0, 1, 2, 3, 4, 5, 6, 7 .L_fixup_handle_\to\(): addi.d a0, a1, (\to) * (-8) jr ra .endr +SYM_FUNC_START(__clear_user) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __clear_user_generic", \ + "b __clear_user_fast", CPU_FEATURE_UAL +SYM_FUNC_END(__clear_user) + +EXPORT_SYMBOL(__clear_user) + /* - * unsigned long __clear_user(void *addr, size_t size) + * unsigned long __clear_user_generic(void *addr, size_t size) * * a0: addr * a1: size */ -SYM_FUNC_START(__clear_user) +SYM_FUNC_START(__clear_user_generic) beqz a1, 2f 1: st.b zero, a0, 0 @@ -33,6 +45,54 @@ SYM_FUNC_START(__clear_user) jr ra _asm_extable 1b, .L_fixup_handle_0 -SYM_FUNC_END(__clear_user) +SYM_FUNC_END(__clear_user_generic) -EXPORT_SYMBOL(__clear_user) +/* + * unsigned long __clear_user_fast(void *addr, unsigned long size) + * + * a0: addr + * a1: size + */ +SYM_FUNC_START(__clear_user_fast) + beqz a1, 10f + + ori a2, zero, 64 + blt a1, a2, 9f + + /* set 64 bytes at a time */ +1: st.d zero, a0, 0 +2: st.d zero, a0, 8 +3: st.d zero, a0, 16 +4: st.d zero, a0, 24 +5: st.d zero, a0, 32 +6: st.d zero, a0, 40 +7: st.d zero, a0, 48 +8: st.d zero, a0, 56 + + addi.d a0, a0, 64 + addi.d a1, a1, -64 + bge a1, a2, 1b + + beqz a1, 10f + + /* set the remaining bytes */ +9: st.b zero, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, -1 + bgt a1, zero, 9b + + /* return */ +10: move a0, a1 + jr ra + + /* fixup and ex_table */ + _asm_extable 1b, .L_fixup_handle_0 + _asm_extable 2b, .L_fixup_handle_1 + _asm_extable 3b, .L_fixup_handle_2 + _asm_extable 4b, .L_fixup_handle_3 + _asm_extable 5b, .L_fixup_handle_4 + _asm_extable 6b, .L_fixup_handle_5 + _asm_extable 7b, .L_fixup_handle_6 + _asm_extable 8b, .L_fixup_handle_7 + _asm_extable 9b, .L_fixup_handle_0 +SYM_FUNC_END(__clear_user_fast) diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index 61933d964da0..55ac6020a1ad 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -3,26 +3,38 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#include #include #include #include +#include #include #include -.irp to, 0 +.irp to, 0, 1, 2, 3, 4, 5, 6, 7 .L_fixup_handle_\to\(): addi.d a0, a2, (\to) * (-8) jr ra .endr +SYM_FUNC_START(__copy_user) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __copy_user_generic", \ + "b __copy_user_fast", CPU_FEATURE_UAL +SYM_FUNC_END(__copy_user) + +EXPORT_SYMBOL(__copy_user) + /* - * unsigned long __copy_user(void *to, const void *from, size_t n) + * unsigned long __copy_user_generic(void *to, const void *from, size_t n) * * a0: to * a1: from * a2: n */ -SYM_FUNC_START(__copy_user) +SYM_FUNC_START(__copy_user_generic) beqz a2, 3f 1: ld.b t0, a1, 0 @@ -37,6 +49,75 @@ SYM_FUNC_START(__copy_user) _asm_extable 1b, .L_fixup_handle_0 _asm_extable 2b, .L_fixup_handle_0 -SYM_FUNC_END(__copy_user) +SYM_FUNC_END(__copy_user_generic) -EXPORT_SYMBOL(__copy_user) +/* + * unsigned long __copy_user_fast(void *to, const void *from, unsigned long n) + * + * a0: to + * a1: from + * a2: n + */ +SYM_FUNC_START(__copy_user_fast) + beqz a2, 19f + + ori a3, zero, 64 + blt a2, a3, 17f + + /* copy 64 bytes at a time */ +1: ld.d t0, a1, 0 +2: ld.d t1, a1, 8 +3: ld.d t2, a1, 16 +4: ld.d t3, a1, 24 +5: ld.d t4, a1, 32 +6: ld.d t5, a1, 40 +7: ld.d t6, a1, 48 +8: ld.d t7, a1, 56 +9: st.d t0, a0, 0 +10: st.d t1, a0, 8 +11: st.d t2, a0, 16 +12: st.d t3, a0, 24 +13: st.d t4, a0, 32 +14: st.d t5, a0, 40 +15: st.d t6, a0, 48 +16: st.d t7, a0, 56 + + addi.d a0, a0, 64 + addi.d a1, a1, 64 + addi.d a2, a2, -64 + bge a2, a3, 1b + + beqz a2, 19f + + /* copy the remaining bytes */ +17: ld.b t0, a1, 0 +18: st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + addi.d a2, a2, -1 + bgt a2, zero, 17b + + /* return */ +19: move a0, a2 + jr ra + + /* fixup and ex_table */ + _asm_extable 1b, .L_fixup_handle_0 + _asm_extable 2b, .L_fixup_handle_1 + _asm_extable 3b, .L_fixup_handle_2 + _asm_extable 4b, .L_fixup_handle_3 + _asm_extable 5b, .L_fixup_handle_4 + _asm_extable 6b, .L_fixup_handle_5 + _asm_extable 7b, .L_fixup_handle_6 + _asm_extable 8b, .L_fixup_handle_7 + _asm_extable 9b, .L_fixup_handle_0 + _asm_extable 10b, .L_fixup_handle_1 + _asm_extable 11b, .L_fixup_handle_2 + _asm_extable 12b, .L_fixup_handle_3 + _asm_extable 13b, .L_fixup_handle_4 + _asm_extable 14b, .L_fixup_handle_5 + _asm_extable 15b, .L_fixup_handle_6 + _asm_extable 16b, .L_fixup_handle_7 + _asm_extable 17b, .L_fixup_handle_0 + _asm_extable 18b, .L_fixup_handle_0 +SYM_FUNC_END(__copy_user_fast) diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S new file mode 100644 index 000000000000..7c07d595ee89 --- /dev/null +++ b/arch/loongarch/lib/memcpy.S @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include + +SYM_FUNC_START(memcpy) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __memcpy_generic", \ + "b __memcpy_fast", CPU_FEATURE_UAL +SYM_FUNC_END(memcpy) + +EXPORT_SYMBOL(memcpy) + +/* + * void *__memcpy_generic(void *dst, const void *src, size_t n) + * + * a0: dst + * a1: src + * a2: n + */ +SYM_FUNC_START(__memcpy_generic) + move a3, a0 + beqz a2, 2f + +1: ld.b t0, a1, 0 + st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + addi.d a2, a2, -1 + bgt a2, zero, 1b + +2: move a0, a3 + jr ra +SYM_FUNC_END(__memcpy_generic) + +/* + * void *__memcpy_fast(void *dst, const void *src, size_t n) + * + * a0: dst + * a1: src + * a2: n + */ +SYM_FUNC_START(__memcpy_fast) + move a3, a0 + beqz a2, 3f + + ori a4, zero, 64 + blt a2, a4, 2f + + /* copy 64 bytes at a time */ +1: ld.d t0, a1, 0 + ld.d t1, a1, 8 + ld.d t2, a1, 16 + ld.d t3, a1, 24 + ld.d t4, a1, 32 + ld.d t5, a1, 40 + ld.d t6, a1, 48 + ld.d t7, a1, 56 + st.d t0, a0, 0 + st.d t1, a0, 8 + st.d t2, a0, 16 + st.d t3, a0, 24 + st.d t4, a0, 32 + st.d t5, a0, 40 + st.d t6, a0, 48 + st.d t7, a0, 56 + + addi.d a0, a0, 64 + addi.d a1, a1, 64 + addi.d a2, a2, -64 + bge a2, a4, 1b + + beqz a2, 3f + + /* copy the remaining bytes */ +2: ld.b t0, a1, 0 + st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + addi.d a2, a2, -1 + bgt a2, zero, 2b + + /* return */ +3: move a0, a3 + jr ra +SYM_FUNC_END(__memcpy_fast) diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S new file mode 100644 index 000000000000..6ffdb46da78f --- /dev/null +++ b/arch/loongarch/lib/memmove.S @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include + +SYM_FUNC_START(memmove) + blt a0, a1, 1f /* dst < src, memcpy */ + blt a1, a0, 3f /* src < dst, rmemcpy */ + jr ra /* dst == src, return */ + + /* if (src - dst) < 64, copy 1 byte at a time */ +1: ori a3, zero, 64 + sub.d t0, a1, a0 + blt t0, a3, 2f + b memcpy +2: b __memcpy_generic + + /* if (dst - src) < 64, copy 1 byte at a time */ +3: ori a3, zero, 64 + sub.d t0, a0, a1 + blt t0, a3, 4f + b rmemcpy +4: b __rmemcpy_generic +SYM_FUNC_END(memmove) + +EXPORT_SYMBOL(memmove) + +SYM_FUNC_START(rmemcpy) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __rmemcpy_generic", \ + "b __rmemcpy_fast", CPU_FEATURE_UAL +SYM_FUNC_END(rmemcpy) + +/* + * void *__rmemcpy_generic(void *dst, const void *src, size_t n) + * + * a0: dst + * a1: src + * a2: n + */ +SYM_FUNC_START(__rmemcpy_generic) + move a3, a0 + beqz a2, 2f + + add.d a0, a0, a2 + add.d a1, a1, a2 + +1: ld.b t0, a1, -1 + st.b t0, a0, -1 + addi.d a0, a0, -1 + addi.d a1, a1, -1 + addi.d a2, a2, -1 + bgt a2, zero, 1b + +2: move a0, a3 + jr ra +SYM_FUNC_END(__rmemcpy_generic) + +/* + * void *__rmemcpy_fast(void *dst, const void *src, size_t n) + * + * a0: dst + * a1: src + * a2: n + */ +SYM_FUNC_START(__rmemcpy_fast) + move a3, a0 + beqz a2, 3f + + add.d a0, a0, a2 + add.d a1, a1, a2 + + ori a4, zero, 64 + blt a2, a4, 2f + + /* copy 64 bytes at a time */ +1: ld.d t0, a1, -8 + ld.d t1, a1, -16 + ld.d t2, a1, -24 + ld.d t3, a1, -32 + ld.d t4, a1, -40 + ld.d t5, a1, -48 + ld.d t6, a1, -56 + ld.d t7, a1, -64 + st.d t0, a0, -8 + st.d t1, a0, -16 + st.d t2, a0, -24 + st.d t3, a0, -32 + st.d t4, a0, -40 + st.d t5, a0, -48 + st.d t6, a0, -56 + st.d t7, a0, -64 + + addi.d a0, a0, -64 + addi.d a1, a1, -64 + addi.d a2, a2, -64 + bge a2, a4, 1b + + beqz a2, 3f + + /* copy the remaining bytes */ +2: ld.b t0, a1, -1 + st.b t0, a0, -1 + addi.d a0, a0, -1 + addi.d a1, a1, -1 + addi.d a2, a2, -1 + bgt a2, zero, 2b + + /* return */ +3: move a0, a3 + jr ra +SYM_FUNC_END(__rmemcpy_fast) diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S new file mode 100644 index 000000000000..e7cb4ea3747d --- /dev/null +++ b/arch/loongarch/lib/memset.S @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include + +.macro fill_to_64 r0 + bstrins.d \r0, \r0, 15, 8 + bstrins.d \r0, \r0, 31, 16 + bstrins.d \r0, \r0, 63, 32 +.endm + +SYM_FUNC_START(memset) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __memset_generic", \ + "b __memset_fast", CPU_FEATURE_UAL +SYM_FUNC_END(memset) + +EXPORT_SYMBOL(memset) + +/* + * void *__memset_generic(void *s, int c, size_t n) + * + * a0: s + * a1: c + * a2: n + */ +SYM_FUNC_START(__memset_generic) + move a3, a0 + beqz a2, 2f + +1: st.b a1, a0, 0 + addi.d a0, a0, 1 + addi.d a2, a2, -1 + bgt a2, zero, 1b + +2: move a0, a3 + jr ra +SYM_FUNC_END(__memset_generic) + +/* + * void *__memset_fast(void *s, int c, size_t n) + * + * a0: s + * a1: c + * a2: n + */ +SYM_FUNC_START(__memset_fast) + move a3, a0 + beqz a2, 3f + + ori a4, zero, 64 + blt a2, a4, 2f + + /* fill a1 to 64 bits */ + fill_to_64 a1 + + /* set 64 bytes at a time */ +1: st.d a1, a0, 0 + st.d a1, a0, 8 + st.d a1, a0, 16 + st.d a1, a0, 24 + st.d a1, a0, 32 + st.d a1, a0, 40 + st.d a1, a0, 48 + st.d a1, a0, 56 + + addi.d a0, a0, 64 + addi.d a2, a2, -64 + bge a2, a4, 1b + + beqz a2, 3f + + /* set the remaining bytes */ +2: st.b a1, a0, 0 + addi.d a0, a0, 1 + addi.d a2, a2, -1 + bgt a2, zero, 2b + + /* return */ +3: move a0, a3 + jr ra +SYM_FUNC_END(__memset_fast) -- Gitee From 80764bc784413eb604c7d472db55b1ca72d4bbc5 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 22 May 2018 16:54:04 +0800 Subject: [PATCH 33/36] drm/loongson: add kernel modesetting driver support for ls7a1000/ls7a2000 LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- 1) Support Double Screen HW Cursor 2) Support LS7A1000/LS7A2000 + 3A5000 3) CRTC's DMA Step is remain 256 bytes Change-Id: Id3c3a7bebf1e95dcc882f69ba20ff3b7e57d275d Signed-off-by: Jingfeng Sui Signed-off-by: Huacai Chen --- arch/loongarch/configs/loongson3_defconfig | 1 + drivers/gpu/drm/Kconfig | 2 + drivers/gpu/drm/Makefile | 1 + drivers/gpu/drm/loongson/Kconfig | 26 + drivers/gpu/drm/loongson/Makefile | 16 + drivers/gpu/drm/loongson/lsdc_crtc.c | 420 +++++++++++++++ drivers/gpu/drm/loongson/lsdc_debugfs.c | 176 ++++++ drivers/gpu/drm/loongson/lsdc_debugfs.h | 17 + drivers/gpu/drm/loongson/lsdc_drv.c | 427 +++++++++++++++ drivers/gpu/drm/loongson/lsdc_drv.h | 184 +++++++ drivers/gpu/drm/loongson/lsdc_i2c.c | 288 ++++++++++ drivers/gpu/drm/loongson/lsdc_i2c.h | 38 ++ drivers/gpu/drm/loongson/lsdc_irq.c | 57 ++ drivers/gpu/drm/loongson/lsdc_irq.h | 17 + drivers/gpu/drm/loongson/lsdc_output.c | 412 ++++++++++++++ drivers/gpu/drm/loongson/lsdc_output.h | 21 + drivers/gpu/drm/loongson/lsdc_pci_drv.c | 352 ++++++++++++ drivers/gpu/drm/loongson/lsdc_plane.c | 476 ++++++++++++++++ drivers/gpu/drm/loongson/lsdc_pll.c | 599 +++++++++++++++++++++ drivers/gpu/drm/loongson/lsdc_pll.h | 87 +++ drivers/gpu/drm/loongson/lsdc_regs.h | 252 +++++++++ 21 files changed, 3869 insertions(+) create mode 100644 drivers/gpu/drm/loongson/Kconfig create mode 100644 drivers/gpu/drm/loongson/Makefile create mode 100644 drivers/gpu/drm/loongson/lsdc_crtc.c create mode 100644 drivers/gpu/drm/loongson/lsdc_debugfs.c create mode 100644 drivers/gpu/drm/loongson/lsdc_debugfs.h create mode 100644 drivers/gpu/drm/loongson/lsdc_drv.c create mode 100644 drivers/gpu/drm/loongson/lsdc_drv.h create mode 100644 drivers/gpu/drm/loongson/lsdc_i2c.c create mode 100644 drivers/gpu/drm/loongson/lsdc_i2c.h create mode 100644 drivers/gpu/drm/loongson/lsdc_irq.c create mode 100644 drivers/gpu/drm/loongson/lsdc_irq.h create mode 100644 drivers/gpu/drm/loongson/lsdc_output.c create mode 100644 drivers/gpu/drm/loongson/lsdc_output.h create mode 100644 drivers/gpu/drm/loongson/lsdc_pci_drv.c create mode 100644 drivers/gpu/drm/loongson/lsdc_plane.c create mode 100644 drivers/gpu/drm/loongson/lsdc_pll.c create mode 100644 drivers/gpu/drm/loongson/lsdc_pll.h create mode 100644 drivers/gpu/drm/loongson/lsdc_regs.h diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 8e15593b052a..e4eb90e9d86d 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -594,6 +594,7 @@ CONFIG_DRM_AMDGPU_SI=y CONFIG_DRM_AMDGPU_CIK=y CONFIG_DRM_AMDGPU_USERPTR=y CONFIG_DRM_AST=y +CONFIG_DRM_LOONGSON=y CONFIG_DRM_QXL=m CONFIG_DRM_VIRTIO_GPU=m CONFIG_FB_EFI=y diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index ca868271f4c4..af407d01e1c4 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -312,6 +312,8 @@ source "drivers/gpu/drm/udl/Kconfig" source "drivers/gpu/drm/ast/Kconfig" +source "drivers/gpu/drm/loongson/Kconfig" + source "drivers/gpu/drm/mgag200/Kconfig" source "drivers/gpu/drm/armada/Kconfig" diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 81569009f884..e9dd6847c9fa 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -86,6 +86,7 @@ obj-$(CONFIG_DRM_ROCKCHIP) +=rockchip/ obj-$(CONFIG_DRM_GMA500) += gma500/ obj-$(CONFIG_DRM_UDL) += udl/ obj-$(CONFIG_DRM_AST) += ast/ +obj-$(CONFIG_DRM_LOONGSON) +=loongson/ obj-$(CONFIG_DRM_ARMADA) += armada/ obj-$(CONFIG_DRM_ATMEL_HLCDC) += atmel-hlcdc/ obj-y += rcar-du/ diff --git a/drivers/gpu/drm/loongson/Kconfig b/drivers/gpu/drm/loongson/Kconfig new file mode 100644 index 000000000000..5d747271ffaf --- /dev/null +++ b/drivers/gpu/drm/loongson/Kconfig @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: GPL-2.0 + +config DRM_LOONGSON + tristate "DRM Support for loongson's display controller" + depends on DRM && PCI + depends on MACH_LOONGSON64 || LOONGARCH || MIPS || COMPILE_TEST + select OF + select CMA if HAVE_DMA_CONTIGUOUS + select DMA_CMA if HAVE_DMA_CONTIGUOUS + select DRM_KMS_HELPER + select DRM_KMS_FB_HELPER + select DRM_KMS_CMA_HELPER + select DRM_GEM_CMA_HELPER + select DRM_TTM + select DRM_TTM_HELPER + select DRM_VRAM_HELPER + select VIDEOMODE_HELPERS + select DRM_BRIDGE + select DRM_PANEL_BRIDGE + default y + help + This is a KMS driver for the display controller in the LS7A1000 + bridge chip and LS2K1000/LS2K0500 SoC. + If "M" is selected, the module will be called loongson. + + If in doubt, say "Y". diff --git a/drivers/gpu/drm/loongson/Makefile b/drivers/gpu/drm/loongson/Makefile new file mode 100644 index 000000000000..cf6391b8eb17 --- /dev/null +++ b/drivers/gpu/drm/loongson/Makefile @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 + +loongson-y := \ + lsdc_drv.o \ + lsdc_crtc.o \ + lsdc_irq.o \ + lsdc_plane.o \ + lsdc_pll.o \ + lsdc_i2c.o \ + lsdc_output.o \ + lsdc_pci_drv.o \ + lsdc_debugfs.o \ + +lsdc-$(CONFIG_DEBUG_FS) += lsdc_debugfs.o + +obj-$(CONFIG_DRM_LOONGSON) += loongson.o diff --git a/drivers/gpu/drm/loongson/lsdc_crtc.c b/drivers/gpu/drm/loongson/lsdc_crtc.c new file mode 100644 index 000000000000..e59b75d79098 --- /dev/null +++ b/drivers/gpu/drm/loongson/lsdc_crtc.c @@ -0,0 +1,420 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Loongson Corporation + */ + +/* + * Authors: + * Sui Jingfeng + */ +#include +#include +#include + +#include "lsdc_drv.h" +#include "lsdc_regs.h" +#include "lsdc_pll.h" + +static int lsdc_crtc_enable_vblank(struct drm_crtc *crtc) +{ + struct lsdc_device *ldev = to_lsdc(crtc->dev); + unsigned int index = drm_crtc_index(crtc); + struct drm_crtc_state *state = crtc->state; + u32 val; + + if (state->enable) { + val = readl(ldev->reg_base + LSDC_INT_REG); + + if (index == 0) + val |= INT_CRTC0_VS_EN; + else if (index == 1) + val |= INT_CRTC1_VS_EN; + + writel(val, ldev->reg_base + LSDC_INT_REG); + } + + return 0; +} + +static void lsdc_crtc_disable_vblank(struct drm_crtc *crtc) +{ + struct lsdc_device *ldev = to_lsdc(crtc->dev); + unsigned int index = drm_crtc_index(crtc); + u32 val; + + val = readl(ldev->reg_base + LSDC_INT_REG); + + if (index == 0) + val &= ~INT_CRTC0_VS_EN; + else if (index == 1) + val &= ~INT_CRTC1_VS_EN; + + writel(val, ldev->reg_base + LSDC_INT_REG); +} + +static void lsdc_crtc_reset(struct drm_crtc *crtc) +{ + struct drm_device *ddev = crtc->dev; + struct lsdc_device *ldev = to_lsdc(ddev); + unsigned int index = drm_crtc_index(crtc); + struct lsdc_crtc_state *priv_crtc_state; + u32 val = CFG_RESET_BIT | CFG_OUTPUT_EN_BIT | LSDC_PF_XRGB8888; + + if (ldev->enable_gamma) + val |= CFG_GAMMAR_EN_BIT; + + /* align to 64 */ + if (ldev->desc->chip == LSDC_CHIP_7A2000) { + val &= ~LS7A2000_DMA_STEP_MASK; + val |= DMA_STEP_256_BYTE; + } + + if (index == 0) + writel(val, ldev->reg_base + LSDC_CRTC0_CFG_REG); + else if (index == 1) + writel(val, ldev->reg_base + LSDC_CRTC1_CFG_REG); + + if (crtc->state) { + priv_crtc_state = to_lsdc_crtc_state(crtc->state); + __drm_atomic_helper_crtc_destroy_state(&priv_crtc_state->base); + kfree(priv_crtc_state); + } + + priv_crtc_state = kzalloc(sizeof(*priv_crtc_state), GFP_KERNEL); + if (!priv_crtc_state) + return; + + __drm_atomic_helper_crtc_reset(crtc, &priv_crtc_state->base); + + drm_dbg(ddev, "crtc%u reset\n", index); +} + +static void lsdc_crtc_atomic_destroy_state(struct drm_crtc *crtc, struct drm_crtc_state *state) +{ + struct lsdc_crtc_state *priv_crtc_state = to_lsdc_crtc_state(state); + + __drm_atomic_helper_crtc_destroy_state(&priv_crtc_state->base); + + kfree(priv_crtc_state); +} + +static struct drm_crtc_state *lsdc_crtc_atomic_duplicate_state(struct drm_crtc *crtc) +{ + struct lsdc_crtc_state *new_priv_state; + struct lsdc_crtc_state *old_priv_state; + struct drm_device *ddev = crtc->dev; + + if (drm_WARN_ON(ddev, !crtc->state)) + return NULL; + + new_priv_state = kmalloc(sizeof(*new_priv_state), GFP_KERNEL); + if (!new_priv_state) + return NULL; + + __drm_atomic_helper_crtc_duplicate_state(crtc, &new_priv_state->base); + + old_priv_state = to_lsdc_crtc_state(crtc->state); + + memcpy(&new_priv_state->pparams, &old_priv_state->pparams, sizeof(new_priv_state->pparams)); + + return &new_priv_state->base; +} + +static const struct drm_crtc_funcs lsdc_crtc_funcs = { + .reset = lsdc_crtc_reset, + .destroy = drm_crtc_cleanup, + .set_config = drm_atomic_helper_set_config, + .page_flip = drm_atomic_helper_page_flip, + .atomic_duplicate_state = lsdc_crtc_atomic_duplicate_state, + .atomic_destroy_state = lsdc_crtc_atomic_destroy_state, + .enable_vblank = lsdc_crtc_enable_vblank, + .disable_vblank = lsdc_crtc_disable_vblank, +}; + +static enum drm_mode_status +lsdc_crtc_helper_mode_valid(struct drm_crtc *crtc, + const struct drm_display_mode *mode) +{ + struct drm_device *ddev = crtc->dev; + struct lsdc_device *ldev = to_lsdc(ddev); + const struct lsdc_chip_desc * const descp = ldev->desc; + + if (mode->hdisplay > descp->max_width) + return MODE_BAD_HVALUE; + if (mode->vdisplay > descp->max_height) + return MODE_BAD_VVALUE; + + if (mode->clock > descp->max_pixel_clk) { + drm_dbg(ddev, "mode %dx%d, pixel clock=%d is too high\n", + mode->hdisplay, mode->vdisplay, mode->clock); + return MODE_CLOCK_HIGH; + } + + /* The CRTC hardware dma take 256 bytes once a time, + * this is a limitation of the CRTC. + * TODO: check RGB565 support + */ + if (!ldev->relax_alignment) { + if ((mode->hdisplay * 4) % descp->stride_alignment) { + drm_dbg(ddev, "mode %dx%d, stride is not %u bytes aligned\n", + mode->hdisplay, mode->vdisplay, descp->stride_alignment); + return MODE_BAD; + } + } + + return MODE_OK; +} + +static int lsdc_pixpll_atomic_check(struct drm_crtc *crtc, + struct drm_crtc_state *state) +{ + struct lsdc_display_pipe * const dispipe = drm_crtc_to_dispipe(crtc); + struct lsdc_pll * const pixpll = &dispipe->pixpll; + const struct lsdc_pixpll_funcs * const pfuncs = pixpll->funcs; + struct lsdc_crtc_state *priv_state = to_lsdc_crtc_state(state); + bool ret; + + ret = pfuncs->compute(pixpll, state->mode.clock, &priv_state->pparams); + if (ret) + return 0; + + drm_warn(crtc->dev, "failed find PLL parameters for %u\n", state->mode.clock); + + return -EINVAL; +} + +static int lsdc_crtc_helper_atomic_check(struct drm_crtc *crtc, + struct drm_crtc_state *state) +{ + if (!state->enable) + return 0; /* no mode checks if CRTC is being disabled */ + + if (state->mode_changed || state->active_changed || state->connectors_changed) + return lsdc_pixpll_atomic_check(crtc, state); + + return 0; +} + +static void lsdc_update_pixclk(struct drm_crtc *crtc) +{ + struct lsdc_display_pipe * const dispipe = drm_crtc_to_dispipe(crtc); + struct lsdc_pll * const pixpll = &dispipe->pixpll; + const struct lsdc_pixpll_funcs * const clkfun = pixpll->funcs; + struct lsdc_crtc_state *priv_state = to_lsdc_crtc_state(crtc->state); + + clkfun->update(pixpll, &priv_state->pparams); +} + +static void lsdc_crtc_helper_mode_set_nofb(struct drm_crtc *crtc) +{ + struct drm_device *ddev = crtc->dev; + struct lsdc_device *ldev = to_lsdc(ddev); + struct drm_display_mode *mode = &crtc->state->adjusted_mode; + unsigned int index = drm_crtc_index(crtc); + u32 h_sync, v_sync, h_val, v_val; + + /* 26:16 total pixels, 10:0 visiable pixels, in horizontal */ + h_val = (mode->crtc_htotal << 16) | mode->crtc_hdisplay; + /* Hack to support non 256 bytes aligned stride, for example: + * 800x480 DPI panel. In this case userspace do the work to + * guarantee the horizontal pixel size is aligned by padding it. + * In actual, We allocate 832x480x4 bytes in size. + */ + if (ldev->relax_alignment) + h_val = (h_val + 63) & ~63; + + /* 26:16 total pixels, 10:0 visiable pixels, in vertical */ + v_val = (mode->crtc_vtotal << 16) | mode->crtc_vdisplay; + /* 26:16 hsync end, 10:0 hsync start, bit 30 is hsync enable */ + h_sync = (mode->crtc_hsync_end << 16) | mode->crtc_hsync_start | EN_HSYNC_BIT; + if (mode->flags & DRM_MODE_FLAG_NHSYNC) + h_sync |= INV_HSYNC_BIT; + + /* 26:16 vsync end, 10:0 vsync start, bit 30 is vsync enable */ + v_sync = (mode->crtc_vsync_end << 16) | mode->crtc_vsync_start | EN_VSYNC_BIT; + if (mode->flags & DRM_MODE_FLAG_NVSYNC) + v_sync |= INV_VSYNC_BIT; + + if (index == 0) { + writel(0, ldev->reg_base + LSDC_CRTC0_FB_ORIGIN_REG); + writel(h_val, ldev->reg_base + LSDC_CRTC0_HDISPLAY_REG); + writel(v_val, ldev->reg_base + LSDC_CRTC0_VDISPLAY_REG); + writel(h_sync, ldev->reg_base + LSDC_CRTC0_HSYNC_REG); + writel(v_sync, ldev->reg_base + LSDC_CRTC0_VSYNC_REG); + } else if (index == 1) { + writel(0, ldev->reg_base + LSDC_CRTC1_FB_ORIGIN_REG); + writel(h_val, ldev->reg_base + LSDC_CRTC1_HDISPLAY_REG); + writel(v_val, ldev->reg_base + LSDC_CRTC1_VDISPLAY_REG); + writel(h_sync, ldev->reg_base + LSDC_CRTC1_HSYNC_REG); + writel(v_sync, ldev->reg_base + LSDC_CRTC1_VSYNC_REG); + } + + drm_dbg(ddev, "%s modeset: %ux%u\n", crtc->name, mode->hdisplay, mode->vdisplay); + + lsdc_update_pixclk(crtc); +} + +static void lsdc_enable_display(struct lsdc_device *ldev, unsigned int index) +{ + u32 val; + + if (index == 0) { + val = readl(ldev->reg_base + LSDC_CRTC0_CFG_REG); + val |= CFG_OUTPUT_EN_BIT; + writel(val, ldev->reg_base + LSDC_CRTC0_CFG_REG); + } else if (index == 1) { + val = readl(ldev->reg_base + LSDC_CRTC1_CFG_REG); + val |= CFG_OUTPUT_EN_BIT; + writel(val, ldev->reg_base + LSDC_CRTC1_CFG_REG); + } +} + +static void lsdc_disable_display(struct lsdc_device *ldev, unsigned int index) +{ + u32 val; + + if (index == 0) { + val = readl(ldev->reg_base + LSDC_CRTC0_CFG_REG); + val &= ~CFG_OUTPUT_EN_BIT; + writel(val, ldev->reg_base + LSDC_CRTC0_CFG_REG); + } else if (index == 1) { + val = readl(ldev->reg_base + LSDC_CRTC1_CFG_REG); + val &= ~CFG_OUTPUT_EN_BIT; + writel(val, ldev->reg_base + LSDC_CRTC1_CFG_REG); + } +} + +/* + * @lsdc_crtc_helper_atomic_enable: + * + * This callback should be used to enable the CRTC. With the atomic + * drivers it is called before all encoders connected to this CRTC are + * enabled through the encoder's own &drm_encoder_helper_funcs.enable + * hook. If that sequence is too simple drivers can just add their own + * hooks and call it from this CRTC callback here by looping over all + * encoders connected to it using for_each_encoder_on_crtc(). + * + * This hook is used only by atomic helpers, for symmetry with + * @atomic_disable. Atomic drivers don't need to implement it if there's + * no need to enable anything at the CRTC level. To ensure that runtime + * PM handling (using either DPMS or the new "ACTIVE" property) works + * @atomic_enable must be the inverse of @atomic_disable for atomic + * drivers. + * + * Drivers can use the @old_crtc_state input parameter if the operations + * needed to enable the CRTC don't depend solely on the new state but + * also on the transition between the old state and the new state. + * + * This function is optional. + */ +static void lsdc_crtc_helper_atomic_enable(struct drm_crtc *crtc, + struct drm_crtc_state *old_crtc_state) +{ + struct drm_device *ddev = crtc->dev; + struct lsdc_device *ldev = to_lsdc(ddev); + + drm_crtc_vblank_on(crtc); + + lsdc_enable_display(ldev, drm_crtc_index(crtc)); + + drm_dbg(ddev, "%s: enabled\n", crtc->name); +} + +static void lsdc_crtc_helper_atomic_disable(struct drm_crtc *crtc, + struct drm_crtc_state *old_crtc_state) +{ + struct drm_device *ddev = crtc->dev; + struct lsdc_device *ldev = to_lsdc(ddev); + + drm_crtc_vblank_off(crtc); + + lsdc_disable_display(ldev, drm_crtc_index(crtc)); + + drm_dbg(ddev, "%s: disabled\n", crtc->name); +} + +static void lsdc_crtc_update_clut(struct drm_crtc *crtc) +{ + struct lsdc_device *ldev = to_lsdc(crtc->dev); + unsigned int index = drm_crtc_index(crtc); + struct drm_color_lut *lut; + unsigned int i; + + if (!ldev->enable_gamma) + return; + + if (!crtc->state->color_mgmt_changed || !crtc->state->gamma_lut) + return; + + lut = (struct drm_color_lut *)crtc->state->gamma_lut->data; + + writel(0, ldev->reg_base + LSDC_CRTC0_GAMMA_INDEX_REG); + + for (i = 0; i < 256; i++) { + u32 val = ((lut->red << 8) & 0xff0000) | + (lut->green & 0xff00) | + (lut->blue >> 8); + + if (index == 0) + writel(val, ldev->reg_base + LSDC_CRTC0_GAMMA_DATA_REG); + else if (index == 1) + writel(val, ldev->reg_base + LSDC_CRTC1_GAMMA_DATA_REG); + + lut++; + } +} + +static void lsdc_crtc_atomic_flush(struct drm_crtc *crtc, + struct drm_crtc_state *old_crtc_state) +{ + struct drm_pending_vblank_event *event = crtc->state->event; + + lsdc_crtc_update_clut(crtc); + + if (event) { + crtc->state->event = NULL; + + spin_lock_irq(&crtc->dev->event_lock); + if (drm_crtc_vblank_get(crtc) == 0) + drm_crtc_arm_vblank_event(crtc, event); + else + drm_crtc_send_vblank_event(crtc, event); + spin_unlock_irq(&crtc->dev->event_lock); + } +} + +static const struct drm_crtc_helper_funcs lsdc_crtc_helper_funcs = { + .mode_valid = lsdc_crtc_helper_mode_valid, + .mode_set_nofb = lsdc_crtc_helper_mode_set_nofb, + .atomic_enable = lsdc_crtc_helper_atomic_enable, + .atomic_disable = lsdc_crtc_helper_atomic_disable, + .atomic_check = lsdc_crtc_helper_atomic_check, + .atomic_flush = lsdc_crtc_atomic_flush, +}; + +int lsdc_crtc_init(struct drm_device *ddev, + struct drm_crtc *crtc, + unsigned int index, + struct drm_plane *primary, + struct drm_plane *cursor) +{ + int ret; + + ret = drm_crtc_init_with_planes(ddev, crtc, primary, cursor, + &lsdc_crtc_funcs, "crtc-%d", index); + + if (ret) { + drm_err(ddev, "crtc init with planes failed: %d\n", ret); + return ret; + } + + drm_crtc_helper_add(crtc, &lsdc_crtc_helper_funcs); + + ret = drm_mode_crtc_set_gamma_size(crtc, 256); + if (ret) + drm_warn(ddev, "set the gamma table size failed\n"); + + drm_crtc_enable_color_mgmt(crtc, 0, false, 256); + + return 0; +} diff --git a/drivers/gpu/drm/loongson/lsdc_debugfs.c b/drivers/gpu/drm/loongson/lsdc_debugfs.c new file mode 100644 index 000000000000..1aad413ee97c --- /dev/null +++ b/drivers/gpu/drm/loongson/lsdc_debugfs.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Loongson Corporation + */ + +/* + * Authors: + * Sui Jingfeng + */ + +#include +#include +#include +#include +#include + +#include "lsdc_drv.h" +#include "lsdc_pll.h" +#include "lsdc_regs.h" +#include "lsdc_debugfs.h" + +#ifdef CONFIG_DEBUG_FS + +static int lsdc_show_clock(struct seq_file *m, void *arg) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *ddev = node->minor->dev; + struct drm_crtc *crtc; + + drm_for_each_crtc(crtc, ddev) { + struct lsdc_display_pipe *pipe; + struct lsdc_pll *pixpll; + const struct lsdc_pixpll_funcs *funcs; + struct lsdc_pll_core_values params; + unsigned int out_khz; + struct drm_display_mode *adj; + + pipe = container_of(crtc, struct lsdc_display_pipe, crtc); + if (!pipe->available) + continue; + + adj = &crtc->state->adjusted_mode; + + pixpll = &pipe->pixpll; + funcs = pixpll->funcs; + out_khz = funcs->get_clock_rate(pixpll, ¶ms); + + seq_printf(m, "Display pipe %u: %dx%d\n", + pipe->index, adj->hdisplay, adj->vdisplay); + + seq_printf(m, "Frequency actually output: %u kHz\n", out_khz); + seq_printf(m, "Pixel clock required: %d kHz\n", adj->clock); + seq_printf(m, "diff: %d kHz\n", adj->clock); + + seq_printf(m, "div_ref=%u, loopc=%u, div_out=%u\n", + params.div_ref, params.loopc, params.div_out); + + seq_printf(m, "hsync_start=%d, hsync_end=%d, htotal=%d\n", + adj->hsync_start, adj->hsync_end, adj->htotal); + seq_printf(m, "vsync_start=%d, vsync_end=%d, vtotal=%d\n\n", + adj->vsync_start, adj->vsync_end, adj->vtotal); + } + + return 0; +} + +static int lsdc_show_mm(struct seq_file *m, void *arg) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *ddev = node->minor->dev; + struct drm_printer p = drm_seq_file_printer(m); + + drm_mm_print(&ddev->vma_offset_manager->vm_addr_space_mm, &p); + + return 0; +} + +#define REGDEF(reg) { __stringify_1(LSDC_##reg##_REG), LSDC_##reg##_REG } +static const struct { + const char *name; + u32 reg_offset; +} lsdc_regs_array[] = { + REGDEF(INT), + REGDEF(CRTC0_CFG), + REGDEF(CRTC0_FB_ADDR0), + REGDEF(CRTC0_FB_ADDR1), + REGDEF(CRTC0_FB_HI_ADDR), + REGDEF(CRTC0_STRIDE), + REGDEF(CRTC0_FB_ORIGIN), + REGDEF(CRTC0_HDISPLAY), + REGDEF(CRTC0_HSYNC), + REGDEF(CRTC0_VDISPLAY), + REGDEF(CRTC0_VSYNC), + REGDEF(CRTC0_GAMMA_INDEX), + REGDEF(CRTC0_GAMMA_DATA), + REGDEF(CRTC1_CFG), + REGDEF(CRTC1_FB_ADDR0), + REGDEF(CRTC1_FB_ADDR1), + REGDEF(CRTC1_FB_HI_ADDR), + REGDEF(CRTC1_STRIDE), + REGDEF(CRTC1_FB_ORIGIN), + REGDEF(CRTC1_HDISPLAY), + REGDEF(CRTC1_HSYNC), + REGDEF(CRTC1_VDISPLAY), + REGDEF(CRTC1_VSYNC), + REGDEF(CRTC1_GAMMA_INDEX), + REGDEF(CRTC1_GAMMA_DATA), + REGDEF(CURSOR0_CFG), + REGDEF(CURSOR0_ADDR), + REGDEF(CURSOR0_POSITION), + REGDEF(CURSOR0_BG_COLOR), + REGDEF(CURSOR0_FG_COLOR), +}; + +static int lsdc_show_regs(struct seq_file *m, void *arg) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *ddev = node->minor->dev; + struct lsdc_device *ldev = to_lsdc(ddev); + int i; + + for (i = 0; i < ARRAY_SIZE(lsdc_regs_array); i++) { + u32 offset = lsdc_regs_array[i].reg_offset; + const char *name = lsdc_regs_array[i].name; + + seq_printf(m, "%s (0x%04x): 0x%08x\n", + name, offset, + readl(ldev->reg_base + offset)); + } + + return 0; +} + +static const struct drm_info_list lsdc_debugfs_list[] = { + { "clocks", lsdc_show_clock, 0 }, + { "mm", lsdc_show_mm, 0, NULL }, + { "regs", lsdc_show_regs, 0 }, +}; + +void lsdc_debugfs_init(struct drm_minor *minor) +{ + drm_debugfs_create_files(lsdc_debugfs_list, + ARRAY_SIZE(lsdc_debugfs_list), + minor->debugfs_root, + minor); +} + +/* + * vram debugfs related. + */ +static int lsdc_vram_mm_show(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_vram_mm *vmm = node->minor->dev->vram_mm; + struct ttm_resource_manager *man = ttm_manager_type(&vmm->bdev, TTM_PL_VRAM); + struct drm_printer p = drm_seq_file_printer(m); + + ttm_resource_manager_debug(man, &p); + return 0; +} + +static const struct drm_info_list lsdc_vram_mm_debugfs_list[] = { + { "clocks", lsdc_show_clock, 0 }, + { "vram-mm", lsdc_vram_mm_show, 0, NULL }, + { "regs", lsdc_show_regs, 0 }, +}; + +void lsdc_vram_mm_debugfs_init(struct drm_minor *minor) +{ + drm_debugfs_create_files(lsdc_vram_mm_debugfs_list, + ARRAY_SIZE(lsdc_vram_mm_debugfs_list), + minor->debugfs_root, + minor); +} + +#endif diff --git a/drivers/gpu/drm/loongson/lsdc_debugfs.h b/drivers/gpu/drm/loongson/lsdc_debugfs.h new file mode 100644 index 000000000000..d5e630534f83 --- /dev/null +++ b/drivers/gpu/drm/loongson/lsdc_debugfs.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Loongson Corporation + */ + +/* + * Authors: + * Sui Jingfeng + */ + +#ifndef __LSDC_DEBUGFS_H__ +#define __LSDC_DEBUGFS_H__ + +void lsdc_debugfs_init(struct drm_minor *minor); +void lsdc_vram_mm_debugfs_init(struct drm_minor *minor); + +#endif diff --git a/drivers/gpu/drm/loongson/lsdc_drv.c b/drivers/gpu/drm/loongson/lsdc_drv.c new file mode 100644 index 000000000000..306b0de0d2e8 --- /dev/null +++ b/drivers/gpu/drm/loongson/lsdc_drv.c @@ -0,0 +1,427 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2022 Loongson Corporation + */ + +/* + * Authors: + * Sui Jingfeng + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lsdc_drv.h" +#include "lsdc_irq.h" +#include "lsdc_output.h" +#include "lsdc_debugfs.h" + +static const struct lsdc_chip_desc dc_in_ls2k1000 = { + .chip = LSDC_CHIP_2K1000, + .num_of_crtc = LSDC_NUM_CRTC, + /* ls2k1000 user manual say the max pixel clock can be about 200MHz */ + .max_pixel_clk = 200000, + .max_width = 2560, + .max_height = 2048, + .num_of_hw_cursor = 1, + .hw_cursor_w = 32, + .hw_cursor_h = 32, + .stride_alignment = 256, + .has_builtin_i2c = false, + .has_vram = false, + .broken_gamma = true, +}; + +static const struct lsdc_chip_desc dc_in_ls2k0500 = { + .chip = LSDC_CHIP_2K0500, + .num_of_crtc = LSDC_NUM_CRTC, + .max_pixel_clk = 200000, + .max_width = 2048, + .max_height = 2048, + .num_of_hw_cursor = 1, + .hw_cursor_w = 32, + .hw_cursor_h = 32, + .stride_alignment = 256, + .has_builtin_i2c = false, + .has_vram = false, + .broken_gamma = true, +}; + +static const struct lsdc_chip_desc dc_in_ls7a1000 = { + .chip = LSDC_CHIP_7A1000, + .num_of_crtc = LSDC_NUM_CRTC, + .max_pixel_clk = 200000, + .max_width = 2048, + .max_height = 2048, + .num_of_hw_cursor = 1, + .hw_cursor_w = 32, + .hw_cursor_h = 32, + .stride_alignment = 256, + .has_builtin_i2c = true, + .has_vram = true, + .broken_gamma = true, +}; + +static const struct lsdc_chip_desc dc_in_ls7a2000 = { + .chip = LSDC_CHIP_7A2000, + .num_of_crtc = LSDC_NUM_CRTC, + .max_pixel_clk = 200000, + .max_width = 2048, + .max_height = 2048, + .num_of_hw_cursor = 2, + .hw_cursor_w = 64, + .hw_cursor_h = 64, + .stride_alignment = 256, + .has_builtin_i2c = true, + .has_vram = true, + .broken_gamma = true, +}; + +static enum drm_mode_status +lsdc_device_mode_valid(struct drm_device *ddev, const struct drm_display_mode *mode) +{ + struct lsdc_device *ldev = to_lsdc(ddev); + + if (ldev->use_vram_helper) + return drm_vram_helper_mode_valid(ddev, mode); + + return MODE_OK; +} + +static const struct drm_mode_config_funcs lsdc_mode_config_funcs = { + .fb_create = drm_gem_fb_create, + .output_poll_changed = drm_fb_helper_output_poll_changed, + .atomic_check = drm_atomic_helper_check, + .atomic_commit = drm_atomic_helper_commit, + .mode_valid = lsdc_device_mode_valid, +}; + +static int lsdc_gem_cma_dumb_create(struct drm_file *file, + struct drm_device *ddev, + struct drm_mode_create_dumb *args) +{ + struct lsdc_device *ldev = to_lsdc(ddev); + const struct lsdc_chip_desc *desc = ldev->desc; + unsigned int bytes_per_pixel = (args->bpp + 7) / 8; + unsigned int pitch = bytes_per_pixel * args->width; + + /* + * The dc in ls7a1000/ls2k1000/ls2k0500 require the stride be a + * multiple of 256 bytes which is for sake of optimize dma data + * transfer. + */ + args->pitch = roundup(pitch, desc->stride_alignment); + + return drm_gem_cma_dumb_create_internal(file, ddev, args); +} + +DEFINE_DRM_GEM_CMA_FOPS(lsdc_drv_fops); + +static struct drm_driver lsdc_drm_driver_cma_stub = { + .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, + .lastclose = drm_fb_helper_lastclose, + .fops = &lsdc_drv_fops, + + .name = "lsdc", + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, + .patchlevel = DRIVER_PATCHLEVEL, + + DRM_GEM_CMA_DRIVER_OPS_WITH_DUMB_CREATE(lsdc_gem_cma_dumb_create), + +#ifdef CONFIG_DEBUG_FS + .debugfs_init = lsdc_debugfs_init, +#endif +}; + +DEFINE_DRM_GEM_FOPS(lsdc_gem_fops); + +static struct drm_driver lsdc_vram_driver_stub = { + .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC, + .fops = &lsdc_gem_fops, + + .name = "loongson-drm", + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, + .patchlevel = DRIVER_PATCHLEVEL, +#ifdef CONFIG_DEBUG_FS + .debugfs_init = lsdc_vram_mm_debugfs_init, +#endif + .dumb_create = drm_gem_vram_driver_dumb_create, + .dumb_map_offset = drm_gem_vram_driver_dumb_mmap_offset, + .gem_prime_mmap = drm_gem_prime_mmap, +}; + +static int lsdc_modeset_init(struct lsdc_device *ldev, uint32_t num_crtc) +{ + struct drm_device *ddev = ldev->ddev; + unsigned int i; + int ret; + + if (ldev->has_ports_node) { + drm_info(ddev, "Has OF graph support\n"); + ret = lsdc_attach_output(ldev, num_crtc); + if (ret) + return ret; + } else { + drm_info(ddev, "No OF graph support\n"); + for (i = 0; i < num_crtc; i++) { + ret = lsdc_create_output(ldev, i, num_crtc); + if (ret) + return ret; + } + } + + for (i = 0; i < num_crtc; i++) { + struct lsdc_display_pipe * const dispipe = &ldev->dispipe[i]; + struct drm_plane * const primary = &dispipe->primary; + struct drm_plane * const cursor = &dispipe->cursor; + struct drm_crtc * const crtc = &dispipe->crtc; + struct lsdc_pll * const pixpll = &dispipe->pixpll; + + dispipe->index = i; + + ret = lsdc_pixpll_init(pixpll, ddev, i); + if (ret) + return ret; + + ret = lsdc_plane_init(ldev, primary, DRM_PLANE_TYPE_PRIMARY, i); + if (ret) + return ret; + + ret = lsdc_plane_init(ldev, cursor, DRM_PLANE_TYPE_CURSOR, i); + if (ret) + return ret; + + /* + * Initial all of the CRTC available, in this way the crtc + * index is equal to the hardware crtc index. That is: + * display pipe 0 => crtc0 + dvo0 + encoder0 + * display pipe 1 => crtc1 + dvo1 + encoder1 + */ + ret = lsdc_crtc_init(ddev, crtc, i, primary, cursor); + if (ret) + return ret; + + drm_info(ddev, "display pipe %u initialized\n", i); + } + + return 0; +} + +static int lsdc_mode_config_init(struct lsdc_device *ldev) +{ + const struct lsdc_chip_desc * const descp = ldev->desc; + struct drm_device *ddev = ldev->ddev; + int ret; + + ret = drmm_mode_config_init(ddev); + if (ret) + return ret; + + ddev->mode_config.funcs = &lsdc_mode_config_funcs; + ddev->mode_config.min_width = 1; + ddev->mode_config.min_height = 1; + ddev->mode_config.max_width = 4096; + ddev->mode_config.max_height = 4096; + ddev->mode_config.preferred_depth = 24; + ddev->mode_config.prefer_shadow = ldev->use_vram_helper; + + ddev->mode_config.cursor_width = descp->hw_cursor_h; + ddev->mode_config.cursor_height = descp->hw_cursor_h; + + if (ldev->vram_base) + ddev->mode_config.fb_base = ldev->vram_base; + + return lsdc_modeset_init(ldev, descp->num_of_crtc); +} + +static void lsdc_mode_config_fini(struct drm_device *ddev) +{ + drm_atomic_helper_shutdown(ddev); + + drm_mode_config_cleanup(ddev); +} + +/* + * lsdc_detect_chip - a function to tell different chips apart. + */ +const struct lsdc_chip_desc * +lsdc_detect_chip(struct pci_dev *pdev, const struct pci_device_id * const ent) +{ + static const struct lsdc_match { + char name[128]; + const void *data; + } compat[] = { + { .name = "loongson,ls7a1000-dc", .data = &dc_in_ls7a1000 }, + { .name = "loongson,ls2k1000-dc", .data = &dc_in_ls2k1000 }, + { .name = "loongson,ls2k0500-dc", .data = &dc_in_ls2k0500 }, + { .name = "loongson,ls7a2000-dc", .data = &dc_in_ls7a2000 }, + { .name = "loongson,loongson64c-4core-ls7a", .data = &dc_in_ls7a1000 }, + { .name = "loongson,loongson64g-4core-ls7a", .data = &dc_in_ls7a1000 }, + { .name = "loongson,loongson64-2core-2k1000", .data = &dc_in_ls2k1000 }, + { .name = "loongson,loongson2k1000", .data = &dc_in_ls2k1000 }, + { /* sentinel */ }, + }; + + struct device_node *np; + unsigned int i; + + if (ent->driver_data == LSDC_CHIP_7A2000) + return &dc_in_ls7a2000; + + if (ent->driver_data == LSDC_CHIP_7A1000) + return &dc_in_ls7a1000; + + /* Deduce DC variant information from the DC device node */ + for (i = 0; i < ARRAY_SIZE(compat); ++i) { + np = of_find_compatible_node(NULL, NULL, compat[i].name); + if (!np) + continue; + + of_node_put(np); + + return compat[i].data; + } + + dev_info(&pdev->dev, "No Compatible Device Node Found\n"); + + if (pci_get_device(PCI_VENDOR_ID_LOONGSON, 0x7A15, NULL)) + return &dc_in_ls7a1000; + else if (pci_get_device(PCI_VENDOR_ID_LOONGSON, 0x7A05, NULL)) + return &dc_in_ls2k1000; + + return NULL; +} + +static int lsdc_remove_conflicting_framebuffers(const struct drm_driver *drv) +{ + struct apertures_struct *ap; + + ap = alloc_apertures(1); + if (!ap) + return -ENOMEM; + + /* lsdc is a pci device, but it don't have a dedicate vram bar because + * of historic reason. The display controller is ported from Loongson + * 2H series SoC which date back to 2012. + * And simplefb node may have been located anywhere in memory. + */ + + ap->ranges[0].base = 0; + ap->ranges[0].size = ~0; + + return drm_fb_helper_remove_conflicting_framebuffers(ap, "loongsondrmfb", false); +} + +static int lsdc_platform_probe(struct platform_device *pdev) +{ + struct lsdc_device *ldev = dev_get_drvdata(pdev->dev.parent); + struct drm_driver *driver; + struct drm_device *ddev; + int ret; + + if (ldev->use_vram_helper) + driver = &lsdc_vram_driver_stub; + else + driver = &lsdc_drm_driver_cma_stub; + + lsdc_remove_conflicting_framebuffers(driver); + + ddev = drm_dev_alloc(driver, &pdev->dev); + if (IS_ERR(ddev)) + return PTR_ERR(ddev); + + platform_set_drvdata(pdev, ddev); + ldev->ddev = ddev; + ddev->dev_private = ldev; + + if (ldev->use_vram_helper) { + ret = drmm_vram_helper_init(ddev, ldev->vram_base, ldev->vram_size); + if (ret) { + drm_err(ddev, "vram helper init failed: %d\n", ret); + goto err_kms; + } + }; + + ret = lsdc_mode_config_init(ldev); + if (ret) { + drm_dbg(ddev, "%s: %d\n", __func__, ret); + goto err_kms; + } + + ret = devm_request_threaded_irq(&pdev->dev, ldev->irq, + lsdc_irq_handler_cb, + lsdc_irq_thread_cb, + IRQF_ONESHOT, NULL, + ddev); + if (ret) { + drm_err(ddev, "Failed to register lsdc interrupt\n"); + goto err_kms; + } + + ret = drm_vblank_init(ddev, ldev->desc->num_of_crtc); + if (ret) + goto err_kms; + + drm_mode_config_reset(ddev); + + drm_kms_helper_poll_init(ddev); + + ret = drm_dev_register(ddev, 0); + if (ret) + goto err_poll_fini; + + drm_fbdev_generic_setup(ddev, 32); + + return 0; + +err_poll_fini: + drm_kms_helper_poll_fini(ddev); +err_kms: + drm_dev_put(ddev); + + return ret; +} + +static int lsdc_platform_remove(struct platform_device *pdev) +{ + struct drm_device *ddev = platform_get_drvdata(pdev); + struct lsdc_device *ldev = to_lsdc(ddev); + + drm_dev_unregister(ddev); + + drm_kms_helper_poll_fini(ddev); + + devm_free_irq(ddev->dev, ldev->irq, ddev); + + lsdc_mode_config_fini(ddev); + + platform_set_drvdata(pdev, NULL); + + drm_dev_put(ddev); + + return 0; +} + +struct platform_driver lsdc_platform_driver = { + .probe = lsdc_platform_probe, + .remove = lsdc_platform_remove, + .driver = { + .name = "lsdc", + }, +}; diff --git a/drivers/gpu/drm/loongson/lsdc_drv.h b/drivers/gpu/drm/loongson/lsdc_drv.h new file mode 100644 index 000000000000..e07b049dcd21 --- /dev/null +++ b/drivers/gpu/drm/loongson/lsdc_drv.h @@ -0,0 +1,184 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Loongson Corporation + */ + +/* + * Authors: + * Sui Jingfeng + */ + +#ifndef __LSDC_DRV_H__ +#define __LSDC_DRV_H__ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lsdc_pll.h" + +#define DRIVER_AUTHOR "Sui Jingfeng " +#define DRIVER_NAME "lsdc" +#define DRIVER_DESC "drm driver for loongson's display controller" +#define DRIVER_DATE "20200701" +#define DRIVER_MAJOR 1 +#define DRIVER_MINOR 0 +#define DRIVER_PATCHLEVEL 0 + +#define LSDC_NUM_CRTC 2 + +enum loongson_dc_family { + LSDC_CHIP_UNKNOWN = 0, + LSDC_CHIP_2K1000 = 1, /* 2-Core Mips64r2 compatible SoC */ + LSDC_CHIP_7A1000 = 2, /* North bridge of LS3A3000/LS3A4000/LS3A5000 */ + LSDC_CHIP_2K0500 = 3, /* Single core, reduced version of LS2K1000 */ + LSDC_CHIP_7A2000 = 4, /* Enhancement version of LS7a1000 */ + LSDC_CHIP_LAST, +}; + +struct lsdc_chip_desc { + enum loongson_dc_family chip; + u32 num_of_crtc; + u32 max_pixel_clk; + u32 max_width; + u32 max_height; + u32 num_of_hw_cursor; + u32 hw_cursor_w; + u32 hw_cursor_h; + /* DMA alignment constraint (must be multiple of 256 bytes) */ + u32 stride_alignment; + bool has_builtin_i2c; + bool has_vram; + bool broken_gamma; +}; + +/* There is only a 1:1 mapping of encoders and connectors for lsdc */ +struct lsdc_output { + struct drm_encoder encoder; + struct drm_connector connector; + struct lsdc_i2c *li2c; +}; + +static inline struct lsdc_output * +drm_connector_to_lsdc_output(struct drm_connector *connp) +{ + return container_of(connp, struct lsdc_output, connector); +} + +/* + * struct lsdc_display_pipe - Abstraction of hardware display pipeline. + * @crtc: CRTC control structure + * @plane: Plane control structure + * @encoder: Encoder control structure + * @pixpll: Pll control structure + * @connector: point to connector control structure this display pipe bind + * @index: the index corresponding to the hardware display pipe + * @available: is this display pipe is available on the motherboard, The + * downstream mother board manufacturer may use only one of them. + * For example, LEMOTE LX-6901 board just has only one VGA output. + * + * Display pipeline with planes, crtc, pll and output collapsed into one entity. + */ +struct lsdc_display_pipe { + struct drm_crtc crtc; + struct drm_plane primary; + struct drm_plane cursor; + struct lsdc_pll pixpll; + struct lsdc_output *output; + int index; + bool available; +}; + +static inline struct lsdc_display_pipe * +drm_crtc_to_dispipe(struct drm_crtc *crtc) +{ + return container_of(crtc, struct lsdc_display_pipe, crtc); +} + +static inline struct lsdc_display_pipe * +lsdc_cursor_to_dispipe(struct drm_plane *plane) +{ + return container_of(plane, struct lsdc_display_pipe, cursor); +} + +struct lsdc_crtc_state { + struct drm_crtc_state base; + struct lsdc_pll_core_values pparams; +}; + +struct lsdc_device { + struct device *dev; + struct drm_device *ddev; + /* @dc: pointer to the platform device created at runtime */ + struct platform_device *dc; + /* @desc: device dependent data and feature descriptions */ + const struct lsdc_chip_desc *desc; + + /* LS7A1000/LS7A2000 has a dediacted video RAM */ + void __iomem *reg_base; + void __iomem *vram; + resource_size_t vram_base; + resource_size_t vram_size; + + struct lsdc_display_pipe dispipe[LSDC_NUM_CRTC]; + + /* + * @num_output: count the number of active display pipe. + */ + unsigned int num_output; + + int irq; + u32 irq_status; + + /* + * @use_vram_helper: using vram helper base solution instead of + * CMA helper based solution. The DC scanout from the VRAM is + * proved to be more reliable, but graphic application is may + * become slow when using this driver mode. + */ + bool use_vram_helper; + /* + * @enable_gamma: control whether hardware gamma support should be + * enabled or not. It is broken though, but you can know that only + * when you can enable it. + */ + bool enable_gamma; + /* @relax_alignment: for 800x480, 1366x768 resulotion support */ + bool relax_alignment; + /* @has_dt: true if there are DT support*/ + bool has_dt; + /* @has_ports_node: true if there are OF graph in the DT */ + bool has_ports_node; +}; + +static inline struct lsdc_device *to_lsdc(struct drm_device *ddev) +{ + return ddev->dev_private; +} + +static inline struct lsdc_crtc_state * +to_lsdc_crtc_state(struct drm_crtc_state *base) +{ + return container_of(base, struct lsdc_crtc_state, base); +} + +int lsdc_crtc_init(struct drm_device *ddev, + struct drm_crtc *crtc, + unsigned int index, + struct drm_plane *primary, + struct drm_plane *cursor); + +int lsdc_plane_init(struct lsdc_device *ldev, struct drm_plane *plane, + enum drm_plane_type type, unsigned int index); + +const struct lsdc_chip_desc * +lsdc_detect_chip(struct pci_dev *pdev, const struct pci_device_id * const ent); + +extern struct platform_driver lsdc_platform_driver; + +#endif diff --git a/drivers/gpu/drm/loongson/lsdc_i2c.c b/drivers/gpu/drm/loongson/lsdc_i2c.c new file mode 100644 index 000000000000..925bb8b866ad --- /dev/null +++ b/drivers/gpu/drm/loongson/lsdc_i2c.c @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Loongson Corporation + */ + +/* + * Authors: + * Sui Jingfeng + */ + +#include + +#include +#include +#include + +#include "lsdc_regs.h" +#include "lsdc_i2c.h" + +/* + * ls7a_gpio_i2c_set - set the state of a gpio pin, either high or low. + * @mask: gpio pin mask indicate which pin to set + */ +static void ls7a_gpio_i2c_set(struct lsdc_i2c * const li2c, int mask, int state) +{ + unsigned long flags; + u8 val; + + spin_lock_irqsave(&li2c->reglock, flags); + + if (state) { + /* + * The high state is achieved by setting the direction as + * input, because the GPIO is open drained with external + * pull up resistance. + */ + val = readb(li2c->dir_reg); + val |= mask; + writeb(val, li2c->dir_reg); + } else { + /* First, set this pin as output */ + val = readb(li2c->dir_reg); + val &= ~mask; + writeb(val, li2c->dir_reg); + + /* Then, set the state to it */ + val = readb(li2c->dat_reg); + val &= ~mask; + writeb(val, li2c->dat_reg); + } + + spin_unlock_irqrestore(&li2c->reglock, flags); +} + +/* + * ls7a_gpio_i2c_get - read value back from gpio pin + * @mask: gpio pin mask indicate which pin to read from + */ +static int ls7a_gpio_i2c_get(struct lsdc_i2c * const li2c, int mask) +{ + unsigned long flags; + u8 val; + + spin_lock_irqsave(&li2c->reglock, flags); + + /* First, set this pin as input */ + val = readb(li2c->dir_reg); + val |= mask; + writeb(val, li2c->dir_reg); + + /* Then, get level state from this pin */ + val = readb(li2c->dat_reg); + + spin_unlock_irqrestore(&li2c->reglock, flags); + + return (val & mask) ? 1 : 0; +} + +/* set the state on the i2c->sda pin */ +static void ls7a_i2c_set_sda(void *i2c, int state) +{ + struct lsdc_i2c * const li2c = (struct lsdc_i2c *)i2c; + + return ls7a_gpio_i2c_set(li2c, li2c->sda, state); +} + +/* set the state on the i2c->scl pin */ +static void ls7a_i2c_set_scl(void *i2c, int state) +{ + struct lsdc_i2c * const li2c = (struct lsdc_i2c *)i2c; + + return ls7a_gpio_i2c_set(li2c, li2c->scl, state); +} + +/* read the value from the i2c->sda pin */ +static int ls7a_i2c_get_sda(void *i2c) +{ + struct lsdc_i2c * const li2c = (struct lsdc_i2c *)i2c; + + return ls7a_gpio_i2c_get(li2c, li2c->sda); +} + +/* read the value from the i2c->scl pin */ +static int ls7a_i2c_get_scl(void *i2c) +{ + struct lsdc_i2c * const li2c = (struct lsdc_i2c *)i2c; + + return ls7a_gpio_i2c_get(li2c, li2c->scl); +} + +/* + * Mainly for dc in ls7a1000 which have dedicated gpio hardware + */ +static void lsdc_of_release_i2c_adapter(void *res) +{ + struct lsdc_i2c *li2c = res; + struct i2c_adapter *adapter; + struct device_node *i2c_np; + + adapter = &li2c->adapter; + i2c_np = adapter->dev.of_node; + if (i2c_np) + of_node_put(i2c_np); + + i2c_del_adapter(adapter); + + kfree(li2c); +} + +struct lsdc_i2c *lsdc_of_create_i2c_adapter(struct device *parent, + void *reg_base, + struct device_node *i2c_np) +{ + unsigned int udelay = 5; + unsigned int timeout = 2200; + int nr = -1; + struct i2c_adapter *adapter; + struct lsdc_i2c *li2c; + u32 sda, scl; + int ret; + + li2c = kzalloc(sizeof(*li2c), GFP_KERNEL); + if (!li2c) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&li2c->reglock); + + ret = of_property_read_u32(i2c_np, "loongson,sda", &sda); + if (ret) { + dev_err(parent, "No sda pin number provided\n"); + return ERR_PTR(ret); + } + + ret = of_property_read_u32(i2c_np, "loongson,scl", &scl); + if (ret) { + dev_err(parent, "No scl pin number provided\n"); + return ERR_PTR(ret); + } + + ret = of_property_read_u32(i2c_np, "loongson,nr", &nr); + if (ret) { + int id; + + if (ret == -EINVAL) + dev_dbg(parent, "no nr provided\n"); + + id = of_alias_get_id(i2c_np, "i2c"); + if (id >= 0) + nr = id; + } + + li2c->sda = 1 << sda; + li2c->scl = 1 << scl; + + /* Optional properties which made the driver more flexible */ + of_property_read_u32(i2c_np, "loongson,udelay", &udelay); + of_property_read_u32(i2c_np, "loongson,timeout", &timeout); + + li2c->dir_reg = reg_base + LS7A_DC_GPIO_DIR_REG; + li2c->dat_reg = reg_base + LS7A_DC_GPIO_DAT_REG; + + li2c->bit.setsda = ls7a_i2c_set_sda; + li2c->bit.setscl = ls7a_i2c_set_scl; + li2c->bit.getsda = ls7a_i2c_get_sda; + li2c->bit.getscl = ls7a_i2c_get_scl; + li2c->bit.udelay = udelay; + li2c->bit.timeout = usecs_to_jiffies(timeout); + li2c->bit.data = li2c; + + adapter = &li2c->adapter; + adapter->algo_data = &li2c->bit; + adapter->owner = THIS_MODULE; + adapter->class = I2C_CLASS_DDC; + adapter->dev.parent = parent; + adapter->nr = nr; + adapter->dev.of_node = i2c_np; + + snprintf(adapter->name, sizeof(adapter->name), "gpio-i2c-%d", nr); + + i2c_set_adapdata(adapter, li2c); + + ret = i2c_bit_add_numbered_bus(adapter); + if (ret) { + if (i2c_np) + of_node_put(i2c_np); + + kfree(li2c); + return ERR_PTR(ret); + } + + dev_info(parent, "sda=%u, scl=%u, nr=%d, udelay=%u, timeout=%u\n", + li2c->sda, li2c->scl, nr, udelay, timeout); + + ret = devm_add_action_or_reset(parent, lsdc_of_release_i2c_adapter, li2c); + if (ret) + return NULL; + + return li2c; +} + +static void lsdc_release_i2c_chan(struct drm_device *dev, void *res) +{ + struct lsdc_i2c *li2c = res; + + i2c_del_adapter(&li2c->adapter); + + kfree(li2c); +} + +struct lsdc_i2c *lsdc_create_i2c_chan(struct drm_device *ddev, + void *reg_base, + unsigned int index) +{ + struct i2c_adapter *adapter; + struct lsdc_i2c *li2c; + int ret; + + li2c = kzalloc(sizeof(*li2c), GFP_KERNEL); + if (!li2c) + return ERR_PTR(-ENOMEM); + + if (index == 0) { + li2c->sda = 0x01; + li2c->scl = 0x02; + } else if (index == 1) { + li2c->sda = 0x04; + li2c->scl = 0x08; + } + + spin_lock_init(&li2c->reglock); + + li2c->dir_reg = reg_base + LS7A_DC_GPIO_DIR_REG; + li2c->dat_reg = reg_base + LS7A_DC_GPIO_DAT_REG; + + li2c->bit.setsda = ls7a_i2c_set_sda; + li2c->bit.setscl = ls7a_i2c_set_scl; + li2c->bit.getsda = ls7a_i2c_get_sda; + li2c->bit.getscl = ls7a_i2c_get_scl; + li2c->bit.udelay = 5; + li2c->bit.timeout = usecs_to_jiffies(2200); + li2c->bit.data = li2c; + + adapter = &li2c->adapter; + adapter->algo_data = &li2c->bit; + adapter->owner = THIS_MODULE; + adapter->class = I2C_CLASS_DDC; + adapter->dev.parent = ddev->dev; + adapter->nr = -1; + + snprintf(adapter->name, sizeof(adapter->name), "gpio-i2c-%d", index); + + i2c_set_adapdata(adapter, li2c); + + ret = i2c_bit_add_bus(adapter); + if (ret) { + kfree(li2c); + return ERR_PTR(ret); + } + + ret = drmm_add_action_or_reset(ddev, lsdc_release_i2c_chan, li2c); + if (ret) + return NULL; + + drm_info(ddev, "%s: sda=%u, scl=%u\n", + adapter->name, li2c->sda, li2c->scl); + + return li2c; +} diff --git a/drivers/gpu/drm/loongson/lsdc_i2c.h b/drivers/gpu/drm/loongson/lsdc_i2c.h new file mode 100644 index 000000000000..62cbf2aaab2e --- /dev/null +++ b/drivers/gpu/drm/loongson/lsdc_i2c.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Loongson Corporation + */ + +/* + * Authors: + * Sui Jingfeng + */ + +#ifndef __LSDC_I2C__ +#define __LSDC_I2C__ + +#include +#include +#include + +struct lsdc_i2c { + struct i2c_adapter adapter; + struct i2c_algo_bit_data bit; + /* @reglock: protects concurrent register access */ + spinlock_t reglock; + void __iomem *dir_reg; + void __iomem *dat_reg; + /* pin bit mask */ + u8 sda; + u8 scl; +}; + +struct lsdc_i2c *lsdc_create_i2c_chan(struct drm_device *ddev, + void *reg_base, + unsigned int index); + +struct lsdc_i2c *lsdc_of_create_i2c_adapter(struct device *dev, + void *reg_base, + struct device_node *i2c_np); + +#endif diff --git a/drivers/gpu/drm/loongson/lsdc_irq.c b/drivers/gpu/drm/loongson/lsdc_irq.c new file mode 100644 index 000000000000..35616f976fad --- /dev/null +++ b/drivers/gpu/drm/loongson/lsdc_irq.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Loongson Corporation + */ + +/* + * Authors: + * Sui Jingfeng + */ + +#include + +#include "lsdc_drv.h" +#include "lsdc_regs.h" +#include "lsdc_irq.h" + +/* Function to be called in a threaded interrupt context. */ +irqreturn_t lsdc_irq_thread_cb(int irq, void *arg) +{ + struct drm_device *ddev = arg; + struct lsdc_device *ldev = to_lsdc(ddev); + struct drm_crtc *crtc; + + /* trigger the vblank event */ + if (ldev->irq_status & INT_CRTC0_VS) { + crtc = drm_crtc_from_index(ddev, 0); + drm_crtc_handle_vblank(crtc); + } + + if (ldev->irq_status & INT_CRTC1_VS) { + crtc = drm_crtc_from_index(ddev, 1); + drm_crtc_handle_vblank(crtc); + } + + writel(INT_CRTC0_VS_EN | INT_CRTC1_VS_EN, ldev->reg_base + LSDC_INT_REG); + + return IRQ_HANDLED; +} + +/* Function to be called when the IRQ occurs */ +irqreturn_t lsdc_irq_handler_cb(int irq, void *arg) +{ + struct drm_device *ddev = arg; + struct lsdc_device *ldev = to_lsdc(ddev); + + /* Read & Clear the interrupt status */ + ldev->irq_status = readl(ldev->reg_base + LSDC_INT_REG); + if ((ldev->irq_status & INT_STATUS_MASK) == 0) { + drm_warn(ddev, "no interrupt occurs\n"); + return IRQ_NONE; + } + + /* clear all interrupt */ + writel(ldev->irq_status, ldev->reg_base + LSDC_INT_REG); + + return IRQ_WAKE_THREAD; +} diff --git a/drivers/gpu/drm/loongson/lsdc_irq.h b/drivers/gpu/drm/loongson/lsdc_irq.h new file mode 100644 index 000000000000..528a74d9bf83 --- /dev/null +++ b/drivers/gpu/drm/loongson/lsdc_irq.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Loongson Corporation + */ + +/* + * Authors: + * Sui Jingfeng + */ + +#ifndef __LSDC_IRQ_H__ +#define __LSDC_IRQ_H__ + +irqreturn_t lsdc_irq_thread_cb(int irq, void *arg); +irqreturn_t lsdc_irq_handler_cb(int irq, void *arg); + +#endif diff --git a/drivers/gpu/drm/loongson/lsdc_output.c b/drivers/gpu/drm/loongson/lsdc_output.c new file mode 100644 index 000000000000..e8cefe6edd4d --- /dev/null +++ b/drivers/gpu/drm/loongson/lsdc_output.c @@ -0,0 +1,412 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Loongson Corporation + */ + +/* + * Authors: + * Sui Jingfeng + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "lsdc_drv.h" +#include "lsdc_i2c.h" +#include "lsdc_output.h" +#include "lsdc_regs.h" + +static int lsdc_get_modes(struct drm_connector *connector) +{ + unsigned int num = 0; + struct lsdc_output *lop = drm_connector_to_lsdc_output(connector); + struct lsdc_i2c *li2c = lop->li2c; + struct i2c_adapter *ddc = &li2c->adapter; + + if (ddc) { + struct edid *edid; + + edid = drm_get_edid(connector, ddc); + if (edid) { + drm_connector_update_edid_property(connector, edid); + num = drm_add_edid_modes(connector, edid); + kfree(edid); + } + + return num; + } + + drm_dbg(connector->dev, "Failed to get mode from ddc\n"); + + num = drm_add_modes_noedid(connector, 1920, 1200); + + drm_set_preferred_mode(connector, 1024, 768); + + return num; +} + +static enum drm_connector_status +lsdc_connector_detect(struct drm_connector *connector, bool force) +{ + struct lsdc_output *lop = drm_connector_to_lsdc_output(connector); + struct lsdc_i2c *li2c = lop->li2c; + struct i2c_adapter *ddc = &li2c->adapter; + + if (ddc && drm_probe_ddc(ddc)) + return connector_status_connected; + + if (connector->connector_type == DRM_MODE_CONNECTOR_VIRTUAL) + return connector_status_connected; + + if (connector->connector_type == DRM_MODE_CONNECTOR_DVIA || + connector->connector_type == DRM_MODE_CONNECTOR_DVID || + connector->connector_type == DRM_MODE_CONNECTOR_DVII) + return connector_status_disconnected; + + if (connector->connector_type == DRM_MODE_CONNECTOR_HDMIA || + connector->connector_type == DRM_MODE_CONNECTOR_HDMIB) + return connector_status_disconnected; + + return connector_status_unknown; +} + +static void lsdc_connector_destroy(struct drm_connector *connector) +{ + drm_connector_cleanup(connector); +} + +static const struct drm_connector_helper_funcs lsdc_connector_helpers = { + .get_modes = lsdc_get_modes, +}; + +static const struct drm_connector_funcs lsdc_connector_funcs = { + .dpms = drm_helper_connector_dpms, + .detect = lsdc_connector_detect, + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = lsdc_connector_destroy, + .reset = drm_atomic_helper_connector_reset, + .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, +}; + +static enum drm_mode_status +ls7a2000_hdmi_encoder_mode_valid(struct drm_encoder *crtc, + const struct drm_display_mode *mode) +{ + return MODE_OK; +} + +static void ls7a2000_hdmi_encoder_disable(struct drm_encoder *encoder) +{ + int index = encoder->index; + struct lsdc_device *ldev = to_lsdc(encoder->dev); + + if (index == 0) { + /* Enable hdmi */ + writel(0, ldev->reg_base + HDMI0_CTRL_REG); + + } else if (index == 1) { + /* Enable hdmi */ + writel(0, ldev->reg_base + HDMI1_CTRL_REG); + } + + drm_dbg(encoder->dev, "HDMI%d disable\n", index); +} + +static void ls7a2000_hdmi_encoder_enable(struct drm_encoder *encoder) +{ + int index = encoder->index; + struct lsdc_device *ldev = to_lsdc(encoder->dev); + + if (index == 0) { + /* Enable hdmi */ + writel(0x280 | HDMI_EN | HDMI_PACKET_EN, ldev->reg_base + HDMI0_CTRL_REG); + + /* hdmi zone idle */ + writel(0x00400040, ldev->reg_base + HDMI0_ZONE_REG); + } else if (index == 1) { + /* Enable hdmi */ + writel(0x280 | HDMI_EN | HDMI_PACKET_EN, ldev->reg_base + HDMI1_CTRL_REG); + + /* hdmi zone idle */ + writel(0x00400040, ldev->reg_base + HDMI1_ZONE_REG); + } + + drm_dbg(encoder->dev, "HDMI%d enable\n", index); +} + +static void +ls7a2000_hdmi_encoder_mode_set(struct drm_encoder *encoder, + struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode) +{ + int index = encoder->index; + struct drm_device *ddev = encoder->dev; + struct lsdc_device *ldev = to_lsdc(ddev); + int clock = mode->clock; + u32 val; + int counter; + + if (index == 0) { + writel(0x0, ldev->reg_base + HDMI0_PLL_REG); + writel(0x0, ldev->reg_base + HDMI0_PHY_CTRL_REG); + } else { + writel(0x0, ldev->reg_base + HDMI1_PLL_REG); + writel(0x0, ldev->reg_base + HDMI1_PHY_CTRL_REG); + } + + if (clock >= 170000) + val = (0x0 << 13) | (0x28 << 6) | (0x10 << 1) | HDMI_PLL_EN; + else if (clock >= 85000 && clock < 170000) + val = (0x1 << 13) | (0x28 << 6) | (0x8 << 1) | HDMI_PLL_EN; + else if (clock >= 42500 && clock < 85000) + val = (0x2 << 13) | (0x28 << 6) | (0x4 << 1) | HDMI_PLL_EN; + else if (clock >= 21250 && clock < 42500) + val = (0x3 << 13) | (0x28 << 6) | (0x2 << 1) | HDMI_PLL_EN; + + if (index == 0) { + writel(val, ldev->reg_base + HDMI0_PLL_REG); + } else { + writel(val, ldev->reg_base + HDMI1_PLL_REG); + } + + do { + /* wait pll lock */ + if (index == 0) + val = readl(ldev->reg_base + HDMI0_PLL_REG); + else if (index == 1) + val = readl(ldev->reg_base + HDMI1_PLL_REG); + + ++counter; + } while (((val & HDMI_PLL_LOCKED) == 0) && (counter < 1000)); + + drm_dbg(ddev, "HDMI%d modeset, PLL: %u loop waited\n", index, counter); + + if (index == 0) { + writel(0x0f03, ldev->reg_base + HDMI0_PHY_CTRL_REG); + } else if (index == 1) { + writel(0x0f03, ldev->reg_base + HDMI1_PHY_CTRL_REG); + } +} + +static const struct drm_encoder_helper_funcs ls7a2000_hdmi_encoder_helper_funcs = { + .mode_valid = ls7a2000_hdmi_encoder_mode_valid, + .disable = ls7a2000_hdmi_encoder_disable, + .enable = ls7a2000_hdmi_encoder_enable, + .mode_set = ls7a2000_hdmi_encoder_mode_set, +}; + +static void lsdc_encoder_reset(struct drm_encoder *encoder) +{ + struct lsdc_device *ldev = to_lsdc(encoder->dev); + + if (ldev->desc->chip == LSDC_CHIP_7A2000) + ls7a2000_hdmi_encoder_enable(encoder); +} + +static const struct drm_encoder_funcs lsdc_encoder_funcs = { + .reset = lsdc_encoder_reset, + .destroy = drm_encoder_cleanup, +}; + +static int lsdc_attach_bridges(struct lsdc_device *ldev, + struct device_node *ports, + unsigned int i) +{ + struct lsdc_display_pipe * const dispipe = &ldev->dispipe[i]; + struct drm_device *ddev = ldev->ddev; + struct drm_bridge *bridge; + struct drm_panel *panel; + struct drm_connector *connector; + struct drm_encoder *encoder; + struct lsdc_output *output; + int ret; + + ret = drm_of_find_panel_or_bridge(ports, i, 0, &panel, &bridge); + + if (panel) { + bridge = devm_drm_panel_bridge_add_typed(ddev->dev, panel, DRM_MODE_CONNECTOR_DPI); + drm_info(ddev, "output-%u is a DPI panel\n", i); + } + + if (!bridge) + return ret; + + output = devm_kzalloc(ddev->dev, sizeof(*output), GFP_KERNEL); + if (!output) + return -ENOMEM; + + encoder = &output->encoder; + + ret = drm_encoder_init(ddev, encoder, &lsdc_encoder_funcs, + DRM_MODE_ENCODER_DPI, "encoder-%u", i); + + if (ret) { + drm_err(ddev, "Failed to init encoder: %d\n", ret); + return ret; + } + + encoder->possible_crtcs = BIT(i); + + ret = drm_bridge_attach(encoder, bridge, NULL, DRM_BRIDGE_ATTACH_NO_CONNECTOR); + if (ret) { + drm_err(ddev, + "failed to attach bridge %pOF for output %u (%d)\n", + bridge->of_node, i, ret); + return ret; + } + + connector = drm_bridge_connector_init(ddev, encoder); + if (IS_ERR(connector)) { + drm_err(ddev, "Unable to init connector\n"); + return PTR_ERR(connector); + } + + drm_connector_attach_encoder(connector, encoder); + + drm_info(ddev, "bridge-%u attached to %s\n", i, encoder->name); + + dispipe->output = output; + + return 0; +} + +int lsdc_attach_output(struct lsdc_device *ldev, uint32_t num_crtc) +{ + struct drm_device *ddev = ldev->ddev; + struct device_node *ports; + struct lsdc_display_pipe *disp; + unsigned int i; + int ret; + + ldev->num_output = 0; + + ports = of_get_child_by_name(ldev->dev->of_node, "ports"); + + for (i = 0; i < num_crtc; i++) { + struct drm_bridge *b; + struct drm_panel *p; + + disp = &ldev->dispipe[i]; + disp->available = false; + + ret = drm_of_find_panel_or_bridge(ports, i, 0, &p, &b); + if (ret) { + if (ret == -ENODEV) { + drm_dbg(ddev, "No active panel or bridge for port%u\n", i); + disp->available = false; + continue; + } + + if (ret == -EPROBE_DEFER) + drm_dbg(ddev, "Bridge for port%d is defer probed\n", i); + + goto RET; + } + + disp->available = true; + ldev->num_output++; + } + + if (ldev->num_output == 0) { + drm_err(ddev, "No valid output, abort\n"); + ret = -ENODEV; + goto RET; + } + + for (i = 0; i < num_crtc; i++) { + disp = &ldev->dispipe[i]; + if (disp->available) { + ret = lsdc_attach_bridges(ldev, ports, i); + if (ret) + goto RET; + } else { + drm_info(ddev, "output-%u is not available\n", i); + } + } + + drm_info(ddev, "number of outputs: %u\n", ldev->num_output); +RET: + of_node_put(ports); + return ret; +} + +/* No DT support, provide a minimal support */ +int lsdc_create_output(struct lsdc_device *ldev, + unsigned int index, + unsigned int num_crtc) +{ + const struct lsdc_chip_desc * const descp = ldev->desc; + struct lsdc_display_pipe * const dispipe = &ldev->dispipe[index]; + struct drm_device *ddev = ldev->ddev; + int encoder_type = DRM_MODE_ENCODER_DPI; + int connector_type = DRM_MODE_CONNECTOR_DPI; + struct lsdc_output *output; + struct drm_encoder *encoder; + struct drm_connector *connector; + int ret; + + output = devm_kzalloc(ddev->dev, sizeof(*output), GFP_KERNEL); + if (!output) + return -ENOMEM; + + encoder = &output->encoder; + + if (descp->chip == LSDC_CHIP_7A2000) { + encoder_type = DRM_MODE_ENCODER_TMDS; + connector_type = DRM_MODE_CONNECTOR_HDMIA; + } + + ret = drm_encoder_init(ddev, encoder, &lsdc_encoder_funcs, + encoder_type, "encoder-%u", index); + + if (ret) { + drm_err(ddev, "Failed to init encoder: %d\n", ret); + return ret; + } + + if (descp->chip == LSDC_CHIP_7A2000) + drm_encoder_helper_add(encoder, &ls7a2000_hdmi_encoder_helper_funcs); + + encoder->possible_crtcs = BIT(index); + + if (descp->has_builtin_i2c) { + output->li2c = lsdc_create_i2c_chan(ddev, ldev->reg_base, index); + if (IS_ERR(output->li2c)) { + drm_err(ddev, "Failed to create i2c adapter\n"); + return PTR_ERR(output->li2c); + } + } else { + drm_warn(ddev, "output-%u don't has ddc\n", index); + output->li2c = NULL; + } + + connector = &output->connector; + + ret = drm_connector_init_with_ddc(ddev, + connector, + &lsdc_connector_funcs, + connector_type, + &output->li2c->adapter); + if (ret) { + drm_err(ddev, "Init connector%d failed\n", index); + return ret; + } + + drm_connector_helper_add(connector, &lsdc_connector_helpers); + + connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; + + drm_connector_attach_encoder(connector, encoder); + + dispipe->available = true; + dispipe->output = output; + ldev->num_output++; + + return 0; +} diff --git a/drivers/gpu/drm/loongson/lsdc_output.h b/drivers/gpu/drm/loongson/lsdc_output.h new file mode 100644 index 000000000000..97ad9345ec10 --- /dev/null +++ b/drivers/gpu/drm/loongson/lsdc_output.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Loongson Corporation + */ + +/* + * Authors: + * Sui Jingfeng + */ + +#ifndef __LSDC_OUTPUT_H__ +#define __LSDC_OUTPUT_H__ + +#include +#include + +int lsdc_create_output(struct lsdc_device *ldev, unsigned int i, unsigned int num_crtc); + +int lsdc_attach_output(struct lsdc_device *ldev, uint32_t num_crtc); + +#endif diff --git a/drivers/gpu/drm/loongson/lsdc_pci_drv.c b/drivers/gpu/drm/loongson/lsdc_pci_drv.c new file mode 100644 index 000000000000..e202da6621bc --- /dev/null +++ b/drivers/gpu/drm/loongson/lsdc_pci_drv.c @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * KMS driver for Loongson display controller + * Copyright (C) 2022 Loongson Corporation + */ + +/* + * Authors: + * Sui Jingfeng + */ + +#include +#include +#include +#include + +#include + +#include "lsdc_drv.h" +#include "lsdc_i2c.h" + +static int lsdc_use_vram_helper = -1; +MODULE_PARM_DESC(use_vram_helper, "Using vram helper based driver(0 = disabled)"); +module_param_named(use_vram_helper, lsdc_use_vram_helper, int, 0644); + +static int lsdc_gamma = -1; +MODULE_PARM_DESC(gamma, "enable gamma (-1 = disabled (default), >0 = enabled)"); +module_param_named(gamma, lsdc_gamma, int, 0644); + +static int lsdc_relax_alignment = -1; +MODULE_PARM_DESC(relax_alignment, + "relax crtc stride alignment (-1 = disabled (default), >0 = enabled)"); +module_param_named(relax_alignment, lsdc_relax_alignment, int, 0644); + + +static struct platform_device * +lsdc_create_platform_device(const char *name, + struct device *parent, + const struct lsdc_chip_desc *descp, + struct resource *res) +{ + struct device *dev; + struct platform_device *pdev; + int ret; + + pdev = platform_device_alloc(name, PLATFORM_DEVID_NONE); + if (!pdev) { + dev_err(parent, "can not create platform device\n"); + return ERR_PTR(-ENOMEM); + } + + dev_info(parent, "platform device %s created\n", name); + + dev = &pdev->dev; + dev->parent = parent; + + if (descp) { + ret = platform_device_add_data(pdev, descp, sizeof(*descp)); + if (ret) { + dev_err(parent, "add platform data failed: %d\n", ret); + goto ERROR_RET; + } + } + + if (res) { + ret = platform_device_add_resources(pdev, res, 1); + if (ret) { + dev_err(parent, "add platform resources failed: %d\n", ret); + goto ERROR_RET; + } + } + + ret = platform_device_add(pdev); + if (ret) { + dev_err(parent, "add platform device failed: %d\n", ret); + goto ERROR_RET; + } + + return pdev; + +ERROR_RET: + platform_device_put(pdev); + return ERR_PTR(ret); +} + +static int lsdc_vram_init(struct lsdc_device *ldev) +{ + const struct lsdc_chip_desc * const descp = ldev->desc; + struct pci_dev *gpu; + resource_size_t base, size; + + if (descp->chip == LSDC_CHIP_7A2000) { + /* BAR 2 of LS7A2000's GPU contain VRAM */ + gpu = pci_get_device(PCI_VENDOR_ID_LOONGSON, 0x7A25, NULL); + } else if (descp->chip == LSDC_CHIP_7A1000) { + /* BAR 2 of LS7A1000's GPU(GC1000) contain VRAM */ + gpu = pci_get_device(PCI_VENDOR_ID_LOONGSON, 0x7A15, NULL); + } else { + dev_err(ldev->dev, "Unknown chip, the driver need update\n"); + return -ENOENT; + } + + if (IS_ERR_OR_NULL(gpu)) { + dev_err(ldev->dev, "Can not get VRAM\n"); + return -ENOENT; + } + + base = pci_resource_start(gpu, 2); + size = pci_resource_len(gpu, 2); + + ldev->vram_base = base; + ldev->vram_size = size; + + dev_info(ldev->dev, "vram start: 0x%llx, size: %uMB\n", + (u64)base, (u32)(size >> 20)); + + return 0; +} + +static void lsdc_of_probe(struct lsdc_device *ldev, struct device_node *np) +{ + struct device_node *ports; + + if (!np) { + ldev->has_dt = false; + ldev->has_ports_node = false; + dev_info(ldev->dev, "don't has DT support\n"); + return; + } + + ports = of_get_child_by_name(np, "ports"); + ldev->has_ports_node = ports ? true : false; + of_node_put(ports); +} + +static int lsdc_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + const struct lsdc_chip_desc *descp; + struct lsdc_device *ldev; + int ret; + + descp = lsdc_detect_chip(pdev, ent); + if (!descp) { + dev_info(dev, "unknown dc ip core, abort\n"); + return -ENOENT; + } + + ldev = devm_kzalloc(dev, sizeof(*ldev), GFP_KERNEL); + if (IS_ERR(ldev)) + return PTR_ERR(ldev); + + ldev->desc = descp; + ldev->dev = dev; + + if (lsdc_use_vram_helper > 0) + ldev->use_vram_helper = true; + else if ((lsdc_use_vram_helper < 0) && descp->has_vram) + ldev->use_vram_helper = true; + else + ldev->use_vram_helper = false; + + if (!descp->broken_gamma) + ldev->enable_gamma = true; + else + ldev->enable_gamma = lsdc_gamma > 0 ? true : false; + + ldev->relax_alignment = lsdc_relax_alignment > 0 ? true : false; + + lsdc_of_probe(ldev, dev->of_node); + + ret = pcim_enable_device(pdev); + if (ret) + return ret; + + pci_set_master(pdev); + + /* BAR 0 contains registers */ + ldev->reg_base = devm_ioremap_resource(dev, &pdev->resource[0]); + if (IS_ERR(ldev->reg_base)) + return PTR_ERR(ldev->reg_base); + + /* Create GPIO emulated i2c driver as early as possible */ + if (descp->has_builtin_i2c && ldev->has_ports_node) { + struct device_node *i2c_node; + + for_each_compatible_node(i2c_node, NULL, "loongson,gpio-i2c") { + if (!of_device_is_available(i2c_node)) + continue; + + lsdc_of_create_i2c_adapter(dev, ldev->reg_base, i2c_node); + } + } + + if (ldev->has_dt) { + /* Get the optional framebuffer memory resource */ + ret = of_reserved_mem_device_init(dev); + if (ret && (ret != -ENODEV)) + return ret; + } + + if (descp->has_vram && ldev->use_vram_helper) { + ret = lsdc_vram_init(ldev); + if (ret) { + dev_err(dev, "VRAM is unavailable\n"); + ldev->use_vram_helper = false; + } + } + + ldev->irq = pdev->irq; + + dev_set_drvdata(dev, ldev); + + if (descp->has_vram && ldev->use_vram_helper) { + struct resource res; + + memset(&res, 0, sizeof(res)); + res.flags = IORESOURCE_MEM; + res.name = "LS7A_VRAM"; + res.start = ldev->vram_base; + res.end = ldev->vram_size; + } + + ldev->dc = lsdc_create_platform_device("lsdc", dev, descp, NULL); + if (IS_ERR(ldev->dc)) + return PTR_ERR(ldev->dc); + + return platform_driver_register(&lsdc_platform_driver); +} + +static void lsdc_pci_remove(struct pci_dev *pdev) +{ + struct lsdc_device *ldev = pci_get_drvdata(pdev); + + platform_device_unregister(ldev->dc); + + pci_set_drvdata(pdev, NULL); + + pci_clear_master(pdev); + + pci_release_regions(pdev); +} + +static int lsdc_drm_suspend(struct device *dev) +{ + struct lsdc_device *ldev = dev_get_drvdata(dev); + + return drm_mode_config_helper_suspend(ldev->ddev); +} + +static int lsdc_drm_resume(struct device *dev) +{ + struct lsdc_device *ldev = dev_get_drvdata(dev); + + return drm_mode_config_helper_resume(ldev->ddev); +} + +static int lsdc_pm_freeze(struct device *dev) +{ + return lsdc_drm_suspend(dev); +} + +static int lsdc_pm_thaw(struct device *dev) +{ + return lsdc_drm_resume(dev); +} + +static int lsdc_pm_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int error; + + error = lsdc_pm_freeze(dev); + if (error) + return error; + + pci_save_state(pdev); + /* Shut down the device */ + pci_disable_device(pdev); + pci_set_power_state(pdev, PCI_D3hot); + + return 0; +} + +static int lsdc_pm_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + if (pcim_enable_device(pdev)) + return -EIO; + + pci_set_power_state(pdev, PCI_D0); + + pci_restore_state(pdev); + + return lsdc_pm_thaw(dev); +} + +static const struct dev_pm_ops lsdc_pm_ops = { + .suspend = lsdc_pm_suspend, + .resume = lsdc_pm_resume, + .freeze = lsdc_pm_freeze, + .thaw = lsdc_pm_thaw, + .poweroff = lsdc_pm_freeze, + .restore = lsdc_pm_resume, +}; + +static const struct pci_device_id lsdc_pciid_list[] = { + {PCI_VENDOR_ID_LOONGSON, 0x7a06, PCI_ANY_ID, PCI_ANY_ID, 0, 0, (kernel_ulong_t)LSDC_CHIP_7A1000}, + {PCI_VENDOR_ID_LOONGSON, 0x7a36, PCI_ANY_ID, PCI_ANY_ID, 0, 0, (kernel_ulong_t)LSDC_CHIP_7A2000}, + {0, 0, 0, 0, 0, 0, 0} +}; + +static struct pci_driver lsdc_pci_driver = { + .name = DRIVER_NAME, + .id_table = lsdc_pciid_list, + .probe = lsdc_pci_probe, + .remove = lsdc_pci_remove, + .driver.pm = &lsdc_pm_ops, +}; + +static int __init lsdc_drm_init(void) +{ + struct pci_dev *pdev = NULL; + + while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev))) { + /* + * Multiple video card workaround + * + * This integrated video card will always be selected as + * default boot device by vgaarb subsystem. + */ + if (pdev->vendor != PCI_VENDOR_ID_LOONGSON) { + pr_info("Discrete graphic card detected, abort\n"); + return 0; + } + } + + return pci_register_driver(&lsdc_pci_driver); +} +module_init(lsdc_drm_init); + +static void __exit lsdc_drm_exit(void) +{ + pci_unregister_driver(&lsdc_pci_driver); +} +module_exit(lsdc_drm_exit); + +MODULE_DEVICE_TABLE(pci, lsdc_pciid_list); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/loongson/lsdc_plane.c b/drivers/gpu/drm/loongson/lsdc_plane.c new file mode 100644 index 000000000000..6f65c9fd687e --- /dev/null +++ b/drivers/gpu/drm/loongson/lsdc_plane.c @@ -0,0 +1,476 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Loongson Corporation + */ + +/* + * Authors: + * Sui Jingfeng + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lsdc_drv.h" +#include "lsdc_regs.h" + +static const u32 lsdc_primary_formats[] = { + DRM_FORMAT_RGB565, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, +}; + +static const u32 lsdc_cursor_formats[] = { + DRM_FORMAT_ARGB8888, +}; + +static const u64 lsdc_fb_format_modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID +}; + +static void lsdc_update_fb_format(struct lsdc_device *ldev, + struct drm_crtc *crtc, + const struct drm_format_info *fmt_info) +{ + unsigned int index = drm_crtc_index(crtc); + u32 val = 0; + u32 fmt; + + switch (fmt_info->format) { + case DRM_FORMAT_RGB565: + fmt = LSDC_PF_RGB565; + break; + case DRM_FORMAT_XRGB8888: + fmt = LSDC_PF_XRGB8888; + break; + case DRM_FORMAT_ARGB8888: + fmt = LSDC_PF_XRGB8888; + break; + default: + fmt = LSDC_PF_XRGB8888; + break; + } + + if (index == 0) { + val = readl(ldev->reg_base + LSDC_CRTC0_CFG_REG); + val = (val & ~CFG_PIX_FMT_MASK) | fmt; + writel(val, ldev->reg_base + LSDC_CRTC0_CFG_REG); + } else if (index == 1) { + val = readl(ldev->reg_base + LSDC_CRTC1_CFG_REG); + val = (val & ~CFG_PIX_FMT_MASK) | fmt; + writel(val, ldev->reg_base + LSDC_CRTC1_CFG_REG); + } +} + +static void lsdc_update_fb_start_addr(struct lsdc_device *ldev, + struct drm_crtc *crtc, + u64 paddr) +{ + unsigned int index = drm_crtc_index(crtc); + u32 lo32_addr_reg; + u32 hi32_addr_reg; + u32 cfg_reg; + u32 val; + + /* + * Find which framebuffer address register should update. + * if FB_ADDR0_REG is in using, we write the addr to FB_ADDR1_REG, + * if FB_ADDR1_REG is in using, we write the addr to FB_ADDR0_REG + */ + if (index == 0) { + /* CRTC0 */ + val = readl(ldev->reg_base + LSDC_CRTC0_CFG_REG); + + cfg_reg = LSDC_CRTC0_CFG_REG; + hi32_addr_reg = LSDC_CRTC0_FB_HI_ADDR_REG; + + if (val & CFG_FB_IDX_BIT) + lo32_addr_reg = LSDC_CRTC0_FB_ADDR0_REG; + else + lo32_addr_reg = LSDC_CRTC0_FB_ADDR1_REG; + } else if (index == 1) { + /* CRTC1 */ + val = readl(ldev->reg_base + LSDC_CRTC1_CFG_REG); + + cfg_reg = LSDC_CRTC1_CFG_REG; + hi32_addr_reg = LSDC_CRTC1_FB_HI_ADDR_REG; + + if (val & CFG_FB_IDX_BIT) + lo32_addr_reg = LSDC_CRTC1_FB_ADDR0_REG; + else + lo32_addr_reg = LSDC_CRTC1_FB_ADDR1_REG; + } + + drm_dbg(ldev->ddev, "crtc%u scantout from 0x%llx\n", index, paddr); + + /* The bridge's bus width is 40 */ + writel(paddr, ldev->reg_base + lo32_addr_reg); + writel((paddr >> 32) & 0xFF, ldev->reg_base + hi32_addr_reg); + /* + * Then, we triger the fb switch, the switch of the framebuffer + * to be scanout will complete at the next vblank. + */ + writel(val | CFG_PAGE_FLIP_BIT, ldev->reg_base + cfg_reg); +} + +static unsigned int lsdc_get_fb_offset(struct drm_framebuffer *fb, + struct drm_plane_state *state, + unsigned int plane) +{ + unsigned int offset = fb->offsets[plane]; + + offset += fb->format->cpp[plane] * (state->src_x >> 16); + offset += fb->pitches[plane] * (state->src_y >> 16); + + return offset; +} + +static s64 lsdc_get_vram_bo_offset(struct drm_framebuffer *fb) +{ + struct drm_gem_vram_object *gbo; + s64 gpu_addr; + + gbo = drm_gem_vram_of_gem(fb->obj[0]); + gpu_addr = drm_gem_vram_offset(gbo); + + return gpu_addr; +} + +static int lsdc_primary_plane_atomic_check(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct drm_crtc *crtc = state->crtc; + struct drm_crtc_state *crtc_state; + struct drm_framebuffer *fb = state->fb; + + /* no need for further checks if the plane is being disabled */ + if (!crtc || !fb) + return 0; + + crtc_state = drm_atomic_get_crtc_state(state->state, crtc); + if (WARN_ON(!crtc_state)) + return -EINVAL; + + return drm_atomic_helper_check_plane_state(plane->state, + crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + false, true); +} + +static void lsdc_update_stride(struct lsdc_device *ldev, + struct drm_crtc *crtc, + unsigned int stride) +{ + unsigned int index = drm_crtc_index(crtc); + + if (index == 0) + writel(stride, ldev->reg_base + LSDC_CRTC0_STRIDE_REG); + else if (index == 1) + writel(stride, ldev->reg_base + LSDC_CRTC1_STRIDE_REG); + + drm_dbg(ldev->ddev, "update stride to %u\n", stride); +} + +static void lsdc_primary_plane_atomic_update(struct drm_plane *plane, + struct drm_plane_state *old_state) +{ + struct drm_device *ddev = plane->dev; + struct lsdc_device *ldev = to_lsdc(ddev); + struct drm_plane_state *new_plane_state = plane->state; + struct drm_crtc *crtc = new_plane_state->crtc; + struct drm_framebuffer *fb = new_plane_state->fb; + u32 fb_offset = lsdc_get_fb_offset(fb, new_plane_state, 0); + dma_addr_t fb_addr; + + if (ldev->use_vram_helper) { + s64 gpu_addr; + + gpu_addr = lsdc_get_vram_bo_offset(fb); + if (gpu_addr < 0) + return; + + fb_addr = ldev->vram_base + gpu_addr + fb_offset; + } else { + struct drm_gem_cma_object *obj = drm_fb_cma_get_gem_obj(fb, 0); + + fb_addr = obj->paddr + fb_offset; + } + + lsdc_update_fb_start_addr(ldev, crtc, fb_addr); + + lsdc_update_stride(ldev, crtc, fb->pitches[0]); + + if (drm_atomic_crtc_needs_modeset(crtc->state)) + lsdc_update_fb_format(ldev, crtc, fb->format); +} + +static void lsdc_primary_plane_atomic_disable(struct drm_plane *plane, + struct drm_plane_state *old_state) +{ + drm_dbg(plane->dev, "%s disabled\n", plane->name); +} + +static int lsdc_plane_prepare_fb(struct drm_plane *plane, + struct drm_plane_state *new_state) +{ + struct lsdc_device *ldev = to_lsdc(plane->dev); + + if (ldev->use_vram_helper) + return drm_gem_vram_plane_helper_prepare_fb(plane, new_state); + + return drm_gem_fb_prepare_fb(plane, new_state); +} + +static void lsdc_plane_cleanup_fb(struct drm_plane *plane, + struct drm_plane_state *old_state) +{ + struct drm_device *ddev = plane->dev; + struct lsdc_device *ldev = to_lsdc(ddev); + + if (ldev->use_vram_helper) + return drm_gem_vram_plane_helper_cleanup_fb(plane, old_state); +} + +static const struct drm_plane_helper_funcs lsdc_primary_plane_helpers = { + .prepare_fb = lsdc_plane_prepare_fb, + .cleanup_fb = lsdc_plane_cleanup_fb, + .atomic_check = lsdc_primary_plane_atomic_check, + .atomic_update = lsdc_primary_plane_atomic_update, + .atomic_disable = lsdc_primary_plane_atomic_disable, +}; + +static int lsdc_cursor_atomic_check(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct drm_framebuffer *fb = state->fb; + struct drm_crtc *crtc = state->crtc; + struct drm_crtc_state *crtc_state; + + /* no need for further checks if the plane is being disabled */ + if (!crtc || !fb) + return 0; + + if (!state->visible) + return 0; + + crtc_state = drm_atomic_get_crtc_state(state->state, crtc); + if (WARN_ON(!crtc_state)) + return -EINVAL; + + return drm_atomic_helper_check_plane_state(state, + crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + true, + true); +} + +static void lsdc_cursor_atomic_update(struct drm_plane *plane, + struct drm_plane_state *old_plane_state) +{ + struct lsdc_display_pipe * const dispipe = lsdc_cursor_to_dispipe(plane); + struct drm_device *ddev = plane->dev; + struct lsdc_device *ldev = to_lsdc(ddev); + const struct lsdc_chip_desc * const descp = ldev->desc; + struct drm_plane_state *new_plane_state = plane->state; + struct drm_framebuffer *new_fb = new_plane_state->fb; + struct drm_framebuffer *old_fb = old_plane_state->fb; + int dst_x = new_plane_state->crtc_x; + int dst_y = new_plane_state->crtc_y; + u32 val; + + if (new_fb != old_fb) { + u64 cursor_addr; + + if (ldev->use_vram_helper) { + s64 offset; + + offset = lsdc_get_vram_bo_offset(new_fb); + cursor_addr = ldev->vram_base + offset; + + drm_dbg(ddev, "%s offset: %llx\n", plane->name, offset); + } else { + struct drm_gem_cma_object *cursor_obj; + + cursor_obj = drm_fb_cma_get_gem_obj(new_fb, 0); + if (!cursor_obj) + return; + + cursor_addr = cursor_obj->paddr; + } + + if ((descp->chip == LSDC_CHIP_7A2000) && (dispipe->index == 1)) + writel(cursor_addr, ldev->reg_base + LSDC_CURSOR1_ADDR_REG); + else + writel(cursor_addr, ldev->reg_base + LSDC_CURSOR0_ADDR_REG); + } + + /* Update cursor's position */ + if (dst_x < 0) + dst_x = 0; + + if (dst_y < 0) + dst_y = 0; + + val = (dst_y << 16) | dst_x; + + if ((descp->chip == LSDC_CHIP_7A2000) && (dispipe->index == 1)) + writel(val, ldev->reg_base + LSDC_CURSOR1_POSITION_REG); + else + writel(val, ldev->reg_base + LSDC_CURSOR0_POSITION_REG); + + /* Update cursor's location and format */ + val = CURSOR_FORMAT_ARGB8888; + + if (descp->chip == LSDC_CHIP_7A2000) { + /* LS7A2000 support 64x64 and 32x32 */ + val |= CURSOR_SIZE_64X64; + if (dispipe->index == 1) { + val |= CURSOR_LOCATION_BIT; + writel(val, ldev->reg_base + LSDC_CURSOR1_CFG_REG); + } else if (dispipe->index == 0) { + val &= ~CURSOR_LOCATION_BIT; + writel(val, ldev->reg_base + LSDC_CURSOR0_CFG_REG); + } + } else { + /* + * Update the location of the cursor + * if bit 4 of LSDC_CURSOR_CFG_REG is 1, then the cursor will be + * locate at CRTC1, if bit 4 of LSDC_CURSOR_CFG_REG is 0, then + * the cursor will be locate at CRTC0. + */ + if (dispipe->index) + val |= CURSOR_LOCATION_BIT; + + writel(val, ldev->reg_base + LSDC_CURSOR0_CFG_REG); + } +} + +static void lsdc_cursor_atomic_disable(struct drm_plane *plane, + struct drm_plane_state *old_state) +{ + const struct lsdc_display_pipe * const dispipe = lsdc_cursor_to_dispipe(plane); + struct drm_device *ddev = plane->dev; + struct lsdc_device *ldev = to_lsdc(ddev); + const struct lsdc_chip_desc * const descp = ldev->desc; + u32 val; + + if ((descp->chip == LSDC_CHIP_7A2000) && (dispipe->index == 1)) { + val = readl(ldev->reg_base + LSDC_CURSOR1_CFG_REG); + val &= ~CURSOR_FORMAT_MASK; + val |= CURSOR_FORMAT_DISABLE; + writel(val, ldev->reg_base + LSDC_CURSOR1_CFG_REG); + } else { + val = readl(ldev->reg_base + LSDC_CURSOR0_CFG_REG); + val &= ~CURSOR_FORMAT_MASK; + val |= CURSOR_FORMAT_DISABLE; + writel(val, ldev->reg_base + LSDC_CURSOR0_CFG_REG); + } + + drm_dbg(ddev, "%s disabled\n", plane->name); +} + +static const struct drm_plane_helper_funcs lsdc_cursor_plane_helpers = { + .prepare_fb = lsdc_plane_prepare_fb, + .cleanup_fb = lsdc_plane_cleanup_fb, + .atomic_check = lsdc_cursor_atomic_check, + .atomic_update = lsdc_cursor_atomic_update, + .atomic_disable = lsdc_cursor_atomic_disable, +}; + +static int lsdc_plane_get_default_zpos(enum drm_plane_type type) +{ + switch (type) { + case DRM_PLANE_TYPE_PRIMARY: + return 0; + case DRM_PLANE_TYPE_OVERLAY: + return 1; + case DRM_PLANE_TYPE_CURSOR: + return 7; + } + + return 0; +} + +static void lsdc_plane_reset(struct drm_plane *plane) +{ + drm_atomic_helper_plane_reset(plane); + + plane->state->zpos = lsdc_plane_get_default_zpos(plane->type); + + drm_dbg(plane->dev, "%s reset\n", plane->name); +} + +static const struct drm_plane_funcs lsdc_plane_funcs = { + .update_plane = drm_atomic_helper_update_plane, + .disable_plane = drm_atomic_helper_disable_plane, + .destroy = drm_plane_cleanup, + .reset = lsdc_plane_reset, + .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, +}; + +int lsdc_plane_init(struct lsdc_device *ldev, + struct drm_plane *plane, + enum drm_plane_type type, + unsigned int index) +{ + struct drm_device *ddev = ldev->ddev; + int zpos = lsdc_plane_get_default_zpos(type); + unsigned int format_count; + const u32 *formats; + const char *name; + int ret; + + switch (type) { + case DRM_PLANE_TYPE_PRIMARY: + formats = lsdc_primary_formats; + format_count = ARRAY_SIZE(lsdc_primary_formats); + name = "primary-%u"; + break; + case DRM_PLANE_TYPE_CURSOR: + formats = lsdc_cursor_formats; + format_count = ARRAY_SIZE(lsdc_cursor_formats); + name = "cursor-%u"; + break; + case DRM_PLANE_TYPE_OVERLAY: + drm_err(ddev, "overlay plane is not supported\n"); + break; + } + + ret = drm_universal_plane_init(ddev, plane, 1 << index, + &lsdc_plane_funcs, + formats, format_count, + lsdc_fb_format_modifiers, + type, name, index); + if (ret) { + drm_err(ddev, "%s failed: %d\n", __func__, ret); + return ret; + } + + switch (type) { + case DRM_PLANE_TYPE_PRIMARY: + drm_plane_helper_add(plane, &lsdc_primary_plane_helpers); + drm_plane_create_zpos_property(plane, zpos, 0, 6); + break; + case DRM_PLANE_TYPE_CURSOR: + drm_plane_helper_add(plane, &lsdc_cursor_plane_helpers); + drm_plane_create_zpos_immutable_property(plane, zpos); + break; + case DRM_PLANE_TYPE_OVERLAY: + drm_err(ddev, "overlay plane is not supported\n"); + break; + } + + drm_plane_create_alpha_property(plane); + + return 0; +} diff --git a/drivers/gpu/drm/loongson/lsdc_pll.c b/drivers/gpu/drm/loongson/lsdc_pll.c new file mode 100644 index 000000000000..4b3e7650c99a --- /dev/null +++ b/drivers/gpu/drm/loongson/lsdc_pll.c @@ -0,0 +1,599 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Loongson Corporation + */ + +/* + * Authors: + * Sui Jingfeng + */ + +#include "lsdc_drv.h" +#include "lsdc_regs.h" +#include "lsdc_pll.h" + +/* + * The structure of the pixel PLL register is evolved with times. + * All loongson's cpu is little endian. + */ + +/* u64 */ +struct ls7a1000_pixpll_bitmap { + /* Byte 0 ~ Byte 3 */ + unsigned div_out : 7; /* 0 : 6 output clock divider */ + unsigned reserved_1 : 14; /* 7 : 20 */ + unsigned loopc : 9; /* 21 : 29 */ + unsigned reserved_2 : 2; /* 30 : 31 */ + + /* Byte 4 ~ Byte 7 */ + unsigned div_ref : 7; /* 0 : 6 input clock divider */ + unsigned locked : 1; /* 7 PLL locked flag */ + unsigned sel_out : 1; /* 8 output clk selector */ + unsigned reserved_3 : 2; /* 9 : 10 reserved */ + unsigned set_param : 1; /* 11 set pll param */ + unsigned bypass : 1; /* 12 */ + unsigned powerdown : 1; /* 13 */ + unsigned reserved_4 : 18; /* 14 : 31 */ +}; + +/* u128 */ +struct ls2k1000_pixpll_bitmap { + /* Byte 0 ~ Byte 3 */ + unsigned sel_out : 1; /* 0 select this PLL */ + unsigned reserved_1 : 1; /* 1 */ + unsigned sw_adj_en : 1; /* 2 allow software adjust */ + unsigned bypass : 1; /* 3 bypass L1 PLL */ + unsigned reserved_2 : 3; /* 4:6 */ + unsigned lock_en : 1; /* 7 enable lock L1 PLL */ + unsigned reserved_3 : 2; /* 8:9 */ + unsigned lock_check : 2; /* 10:11 precision check */ + unsigned reserved_4 : 4; /* 12:15 */ + + unsigned locked : 1; /* 16 PLL locked flag bit */ + unsigned reserved_5 : 2; /* 17:18 */ + unsigned powerdown : 1; /* 19 powerdown the pll if set */ + unsigned reserved_6 : 6; /* 20:25 */ + unsigned div_ref : 6; /* 26:31 L1 Prescaler */ + + /* Byte 4 ~ Byte 7 */ + unsigned loopc : 10; /* 32:41 Clock Multiplier */ + unsigned l1_div : 6; /* 42:47 not used */ + unsigned reserved_7 : 16; /* 48:63 */ + + /* Byte 8 ~ Byte 15 */ + unsigned div_out : 6; /* 0 : 5 output clock divider */ + unsigned reserved_8 : 26; /* 6 : 31 */ + unsigned reserved_9 : 32; /* 70: 127 */ +}; + +/* u32 */ +struct ls2k0500_pixpll_bitmap { + /* Byte 0 ~ Byte 1 */ + unsigned sel_out : 1; + unsigned reserved_1 : 2; + unsigned sw_adj_en : 1; /* allow software adjust */ + unsigned bypass : 1; /* bypass L1 PLL */ + unsigned powerdown : 1; /* write 1 to powerdown the PLL */ + unsigned reserved_2 : 1; + unsigned locked : 1; /* 7 Is L1 PLL locked, read only */ + unsigned div_ref : 6; /* 8:13 ref clock divider */ + unsigned reserved_3 : 2; /* 14:15 */ + /* Byte 2 ~ Byte 3 */ + unsigned loopc : 8; /* 16:23 Clock Multiplier */ + unsigned div_out : 6; /* 24:29 output clock divider */ + unsigned reserved_4 : 2; /* 30:31 */ +}; + +union lsdc_pixpll_bitmap { + struct ls7a1000_pixpll_bitmap ls7a2000; + struct ls7a1000_pixpll_bitmap ls7a1000; + struct ls2k1000_pixpll_bitmap ls2k1000; + struct ls2k0500_pixpll_bitmap ls2k0500; + + u32 dword[4]; +}; + +struct pixclk_to_pll_parm { + /* kHz */ + unsigned int clock; + + /* unrelated information */ + unsigned short width; + unsigned short height; + unsigned short vrefresh; + + /* Stores parameters for programming the Hardware PLLs */ + unsigned short div_out; + unsigned short loopc; + unsigned short div_ref; +}; + +/* + * Pixel clock to PLL parameters translation table. + * Small static cached value to speed up PLL parameters calculation. + */ +static const struct pixclk_to_pll_parm pll_param_table[] = { + {148500, 1920, 1080, 60, 11, 49, 3}, /* 1920x1080@60Hz */ + /* 1920x1080@50Hz */ + {174500, 1920, 1080, 75, 17, 89, 3}, /* 1920x1080@75Hz */ + {181250, 2560, 1080, 75, 8, 58, 4}, /* 2560x1080@75Hz */ + {146250, 1680, 1050, 60, 16, 117, 5}, /* 1680x1050@60Hz */ + {135000, 1280, 1024, 75, 10, 54, 4}, /* 1280x1024@75Hz */ + + {108000, 1600, 900, 60, 15, 81, 5}, /* 1600x900@60Hz */ + /* 1280x1024@60Hz */ + /* 1280x960@60Hz */ + /* 1152x864@75Hz */ + + {106500, 1440, 900, 60, 19, 81, 4}, /* 1440x900@60Hz */ + {88750, 1440, 900, 60, 16, 71, 5}, /* 1440x900@60Hz */ + {83500, 1280, 800, 60, 17, 71, 5}, /* 1280x800@60Hz */ + {71000, 1280, 800, 60, 20, 71, 5}, /* 1280x800@60Hz */ + + {74250, 1280, 720, 60, 22, 49, 3}, /* 1280x720@60Hz */ + /* 1280x720@50Hz */ + + {78750, 1024, 768, 75, 16, 63, 5}, /* 1024x768@75Hz */ + {75000, 1024, 768, 70, 29, 87, 4}, /* 1024x768@70Hz */ + {65000, 1024, 768, 60, 20, 39, 3}, /* 1024x768@60Hz */ + + {51200, 1024, 600, 60, 25, 64, 5}, /* 1024x600@60Hz */ + + {57284, 832, 624, 75, 24, 55, 4}, /* 832x624@75Hz */ + {49500, 800, 600, 75, 40, 99, 5}, /* 800x600@75Hz */ + {50000, 800, 600, 72, 44, 88, 4}, /* 800x600@72Hz */ + {40000, 800, 600, 60, 30, 36, 3}, /* 800x600@60Hz */ + {36000, 800, 600, 56, 50, 72, 4}, /* 800x600@56Hz */ + {31500, 640, 480, 75, 40, 63, 5}, /* 640x480@75Hz */ + /* 640x480@73Hz */ + + {30240, 640, 480, 67, 62, 75, 4}, /* 640x480@67Hz */ + {27000, 720, 576, 50, 50, 54, 4}, /* 720x576@60Hz */ + {25175, 640, 480, 60, 85, 107, 5}, /* 640x480@60Hz */ + {25200, 640, 480, 60, 50, 63, 5}, /* 640x480@60Hz */ + /* 720x480@60Hz */ +}; + +/** + * lsdc_pixpll_setup - ioremap the device dependent PLL registers + * + * @this: point to the object which this function is called from + */ +static int lsdc_pixpll_setup(struct lsdc_pll * const this) +{ + this->mmio = ioremap(this->reg_base, this->reg_size); + + return 0; +} + +/* + * Find a set of pll parameters (to generate pixel clock) from a static + * local table, which avoid to compute the pll parameter eachtime a + * modeset is triggered. + * + * @this: point to the object which this function is called from + * @clock: the desired output pixel clock, the unit is kHz + * @pout: point to where the parameters to store if found + * + * Return true if hit, otherwise return false. + */ +static bool lsdc_pixpll_find(struct lsdc_pll * const this, + unsigned int clock, + struct lsdc_pll_core_values * const pout) +{ + unsigned int num = ARRAY_SIZE(pll_param_table); + unsigned int i; + + for (i = 0; i < num; i++) { + if (clock != pll_param_table[i].clock) + continue; + + pout->div_ref = pll_param_table[i].div_ref; + pout->loopc = pll_param_table[i].loopc; + pout->div_out = pll_param_table[i].div_out; + + return true; + } + + drm_dbg(this->ddev, "pixel clock %u: miss\n", clock); + + return false; +} + +/* + * Find a set of pll parameters which have minimal difference with the desired + * pixel clock frequency. It does that by computing all of the possible + * combination. Compute the diff and find the combination with minimal diff. + * + * clock_out = refclk / div_ref * loopc / div_out + * + * refclk is fixed as 100MHz in ls7a1000, ls2k1000 and ls2k0500 + * + * @this: point to the object from which this function is called + * @clk: the desired output pixel clock, the unit is kHz + * @pout: point to where the parameters to store if success + * + * Return true if a parameter is found, otherwise return false + */ +static bool lsdc_pixpll_compute(struct lsdc_pll * const this, + unsigned int clk, + struct lsdc_pll_core_values *pout) +{ + unsigned int refclk = this->ref_clock; + const unsigned int tolerance = 1000; + unsigned int min = tolerance; + unsigned int div_out, loopc, div_ref; + + if (lsdc_pixpll_find(this, clk, pout)) + return true; + + for (div_out = 6; div_out < 64; div_out++) { + for (div_ref = 3; div_ref < 6; div_ref++) { + for (loopc = 6; loopc < 161; loopc++) { + int diff; + + if (loopc < 12 * div_ref) + continue; + if (loopc > 32 * div_ref) + continue; + + diff = clk * div_out - refclk * loopc / div_ref; + + if (diff < 0) + diff = -diff; + + if (diff < min) { + min = diff; + pout->div_ref = div_ref; + pout->div_out = div_out; + pout->loopc = loopc; + + if (diff == 0) + return true; + } + } + } + } + + return min < tolerance; +} + +/* + * Update the pll parameters to hardware, target to the pixpll in ls7a1000 + * + * @this: point to the object from which this function is called + * @param: point to the core parameters passed in + * + * return 0 if successful. + */ +static int ls7a1000_pixpll_param_update(struct lsdc_pll * const this, + struct lsdc_pll_core_values const *param) +{ + void __iomem *reg = this->mmio; + unsigned int counter = 0; + bool locked; + u32 val; + + /* Bypass the software configured PLL, using refclk directly */ + val = readl(reg + 0x4); + val &= ~(1 << 8); + writel(val, reg + 0x4); + + /* Powerdown the PLL */ + val = readl(reg + 0x4); + val |= (1 << 13); + writel(val, reg + 0x4); + + /* Clear the pll parameters */ + val = readl(reg + 0x4); + val &= ~(1 << 11); + writel(val, reg + 0x4); + + /* clear old value & config new value */ + val = readl(reg + 0x04); + val &= ~0x7F; + val |= param->div_ref; /* div_ref */ + writel(val, reg + 0x4); + + val = readl(reg); + val &= ~0x7f; + val |= param->div_out; /* div_out */ + + val &= ~(0x1ff << 21); + val |= param->loopc << 21; /* loopc */ + writel(val, reg); + + /* Set the pll the parameters */ + val = readl(reg + 0x4); + val |= (1 << 11); + writel(val, reg + 0x4); + + /* Powerup the PLL */ + val = readl(reg + 0x4); + val &= ~(1 << 13); + writel(val, reg + 0x4); + + /* Wait the PLL lock */ + do { + val = readl(reg + 0x4); + locked = val & 0x80; + counter++; + } while (!locked && (counter < 10000)); + + drm_dbg(this->ddev, "%u loop waited\n", counter); + + /* Switch to the software configured pll */ + val = readl(reg + 0x4); + val |= (1UL << 8); + writel(val, reg + 0x4); + + return 0; +} + +/* + * Update the pll parameters to hardware, target to the pixpll in ls2k1000 + * + * @this: point to the object from which this function is called + * @param: pointer to where the parameter is passed in + * + * return 0 if successful. + */ +static int ls2k1000_pixpll_param_update(struct lsdc_pll * const this, + struct lsdc_pll_core_values const *param) +{ + void __iomem *reg = this->mmio; + unsigned int counter = 0; + bool locked = false; + u32 val; + + val = readl(reg); + /* Bypass the software configured PLL, using refclk directly */ + val &= ~(1 << 0); + writel(val, reg); + + /* Powerdown the PLL */ + val |= (1 << 19); + writel(val, reg); + + /* Allow the software configuration */ + val &= ~(1 << 2); + writel(val, reg); + + /* allow L1 PLL lock */ + val = (1L << 7) | (3L << 10); + writel(val, reg); + + /* clear div_ref bit field */ + val &= ~(0x3f << 26); + /* set div_ref bit field */ + val = val | (param->div_ref << 26); + writel(val, reg); + + val = readl(reg + 4); + /* clear loopc bit field */ + val &= ~0x0fff; + /* set loopc bit field */ + val |= param->loopc; + writel(val, reg + 4); + + /* set div_out */ + writel(param->div_out, reg + 8); + + val = readl(reg); + /* use the software configure param */ + val |= (1 << 2); + /* powerup the PLL */ + val &= ~(1 << 19); + writel(val, reg); + + /* wait pll setup and locked */ + do { + val = readl(reg); + locked = val & 0x10000; + counter++; + } while (!locked && (counter < 10000)); + + drm_dbg(this->ddev, "%u loop waited\n", counter); + + /* Switch to the above software configured PLL instead of refclk */ + val |= 1; + writel(val, reg); + + return 0; +} + +/* + * Update the pll parameters to hardware, target to the pixpll in ls2k0500 + * + * @this: point to the object which calling this function + * @param: pointer to where the parameters passed in + * + * return 0 if successful. + */ +static int ls2k0500_pixpll_param_update(struct lsdc_pll * const this, + struct lsdc_pll_core_values const *param) +{ + void __iomem *reg = this->mmio; + unsigned int counter = 0; + bool locked = false; + u32 val; + + /* Bypass the software configured PLL, using refclk directly */ + val = readl(reg); + val &= ~(1 << 0); + writel(val, reg); + + /* Powerdown the PLL */ + val = readl(reg); + val |= (1 << 5); + writel(val, reg); + + /* Allow the software configuration */ + val |= (1 << 3); + writel(val, reg); + + /* Update the pll params */ + val = (param->div_out << 24) | + (param->loopc << 16) | + (param->div_ref << 8); + + writel(val, reg); + + /* Powerup the PLL */ + val = readl(reg); + val &= ~(1 << 5); + writel(val, reg); + + /* wait pll setup and locked */ + do { + val = readl(reg); + locked = val & 0x80; + counter++; + } while (!locked && (counter < 10000)); + + drm_dbg(this->ddev, "%u loop waited\n", counter); + + /* Switch to the above software configured PLL instead of refclk */ + writel((val | 1), reg); + + return 0; +} + +static unsigned int lsdc_get_clock_rate(struct lsdc_pll * const this, + struct lsdc_pll_core_values *pout) +{ + struct drm_device *ddev = this->ddev; + struct lsdc_device *ldev = to_lsdc(ddev); + const struct lsdc_chip_desc * const desc = ldev->desc; + unsigned int out; + union lsdc_pixpll_bitmap parms; + + if (desc->chip == LSDC_CHIP_7A2000) { + struct ls7a1000_pixpll_bitmap *obj = &parms.ls7a2000; + + parms.dword[0] = readl(this->mmio); + parms.dword[1] = readl(this->mmio + 4); + out = this->ref_clock / obj->div_ref * obj->loopc / obj->div_out; + if (pout) { + pout->div_ref = obj->div_ref; + pout->loopc = obj->loopc; + pout->div_out = obj->div_out; + } + } else if (desc->chip == LSDC_CHIP_7A1000) { + struct ls7a1000_pixpll_bitmap *obj = &parms.ls7a1000; + + parms.dword[0] = readl(this->mmio); + parms.dword[1] = readl(this->mmio + 4); + out = this->ref_clock / obj->div_ref * obj->loopc / obj->div_out; + if (pout) { + pout->div_ref = obj->div_ref; + pout->loopc = obj->loopc; + pout->div_out = obj->div_out; + } + } else if (desc->chip == LSDC_CHIP_2K1000) { + struct ls2k1000_pixpll_bitmap *obj = &parms.ls2k1000; + + parms.dword[0] = readl(this->mmio); + parms.dword[1] = readl(this->mmio + 4); + parms.dword[2] = readl(this->mmio + 8); + parms.dword[3] = readl(this->mmio + 12); + out = this->ref_clock / obj->div_ref * obj->loopc / obj->div_out; + if (pout) { + pout->div_ref = obj->div_ref; + pout->loopc = obj->loopc; + pout->div_out = obj->div_out; + } + } else if (desc->chip == LSDC_CHIP_2K0500) { + struct ls2k0500_pixpll_bitmap *obj = &parms.ls2k0500; + + parms.dword[0] = readl(this->mmio); + out = this->ref_clock / obj->div_ref * obj->loopc / obj->div_out; + if (pout) { + pout->div_ref = obj->div_ref; + pout->loopc = obj->loopc; + pout->div_out = obj->div_out; + } + } else { + drm_err(ddev, "unknown chip, the driver need update\n"); + return 0; + } + + return out; +} + +static const struct lsdc_pixpll_funcs ls7a2000_pixpll_funcs = { + .setup = lsdc_pixpll_setup, + .compute = lsdc_pixpll_compute, + .update = ls7a1000_pixpll_param_update, + .get_clock_rate = lsdc_get_clock_rate, +}; + +static const struct lsdc_pixpll_funcs ls7a1000_pixpll_funcs = { + .setup = lsdc_pixpll_setup, + .compute = lsdc_pixpll_compute, + .update = ls7a1000_pixpll_param_update, + .get_clock_rate = lsdc_get_clock_rate, +}; + +static const struct lsdc_pixpll_funcs ls2k1000_pixpll_funcs = { + .setup = lsdc_pixpll_setup, + .compute = lsdc_pixpll_compute, + .update = ls2k1000_pixpll_param_update, + .get_clock_rate = lsdc_get_clock_rate, +}; + +static const struct lsdc_pixpll_funcs ls2k0500_pixpll_funcs = { + .setup = lsdc_pixpll_setup, + .compute = lsdc_pixpll_compute, + .update = ls2k0500_pixpll_param_update, + .get_clock_rate = lsdc_get_clock_rate, +}; + +int lsdc_pixpll_init(struct lsdc_pll * const this, + struct drm_device *ddev, + unsigned int index) +{ + struct lsdc_device *ldev = to_lsdc(ddev); + const struct lsdc_chip_desc * const descp = ldev->desc; + + this->ddev = ddev; + this->index = index; + this->ref_clock = LSDC_PLL_REF_CLK; + + if (descp->chip == LSDC_CHIP_7A2000) { + if (index == 0) + this->reg_base = LS7A1000_CFG_REG_BASE + LS7A1000_PIX_PLL0_REG; + else if (index == 1) + this->reg_base = LS7A1000_CFG_REG_BASE + LS7A1000_PIX_PLL1_REG; + this->reg_size = 8; + this->funcs = &ls7a2000_pixpll_funcs; + } else if (descp->chip == LSDC_CHIP_7A1000) { + if (index == 0) + this->reg_base = LS7A1000_CFG_REG_BASE + LS7A1000_PIX_PLL0_REG; + else if (index == 1) + this->reg_base = LS7A1000_CFG_REG_BASE + LS7A1000_PIX_PLL1_REG; + this->reg_size = 8; + this->funcs = &ls7a1000_pixpll_funcs; + } else if (descp->chip == LSDC_CHIP_2K1000) { + if (index == 0) + this->reg_base = LS2K1000_CFG_REG_BASE + LS2K1000_PIX_PLL0_REG; + else if (index == 1) + this->reg_base = LS2K1000_CFG_REG_BASE + LS2K1000_PIX_PLL1_REG; + + this->reg_size = 16; + this->funcs = &ls2k1000_pixpll_funcs; + } else if (descp->chip == LSDC_CHIP_2K0500) { + if (index == 0) + this->reg_base = LS2K0500_CFG_REG_BASE + LS2K0500_PIX_PLL0_REG; + else if (index == 1) + this->reg_base = LS2K0500_CFG_REG_BASE + LS2K0500_PIX_PLL1_REG; + + this->reg_size = 4; + this->funcs = &ls2k0500_pixpll_funcs; + } else { + drm_err(this->ddev, "unknown chip, the driver need update\n"); + return -ENOENT; + } + + return this->funcs->setup(this); +} diff --git a/drivers/gpu/drm/loongson/lsdc_pll.h b/drivers/gpu/drm/loongson/lsdc_pll.h new file mode 100644 index 000000000000..2b9101f56097 --- /dev/null +++ b/drivers/gpu/drm/loongson/lsdc_pll.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Loongson Corporation + */ + +/* + * Authors: + * Sui Jingfeng + */ + +#ifndef __LSDC_PLL_H__ +#define __LSDC_PLL_H__ + +#include + +/* + * PIXEL PLL hardware structure + * + * refclk: reference frequency, 100 MHz from external oscillator + * outclk: output frequency desired. + * + * + * L1 Fref Fvco L2 + * refclk +-----------+ +------------------+ +---------+ outclk + * ---+---> | Prescaler | ---> | Clock Multiplier | ---> | divider | --------> + * | +-----------+ +------------------+ +---------+ ^ + * | ^ ^ ^ | + * | | | | | + * | | | | | + * | div_ref loopc div_out | + * | | + * +---- sel_out (bypass above software configurable clock if==1) ----+ + * + * sel_out: PLL clock output selector. + * + * If sel_out == 1, it will take refclk as output directly, + * the L1 Prescaler and the out divider is bypassed. + * + * If sel_out == 0, then: + * outclk = refclk / div_ref * loopc / div_out; + * + * PLL hardware working requirements: + * + * 1) 20 MHz <= refclk / div_ref <= 40Mhz + * 2) 1.2 GHz <= refclk /div_out * loopc <= 3.2 Ghz + * + */ + +struct lsdc_pll_core_values { + unsigned int div_ref; + unsigned int loopc; + unsigned int div_out; +}; + +struct lsdc_pll; + +struct lsdc_pixpll_funcs { + int (*setup)(struct lsdc_pll * const this); + bool (*compute)(struct lsdc_pll * const this, unsigned int clock, + struct lsdc_pll_core_values *params_out); + int (*update)(struct lsdc_pll * const this, + struct lsdc_pll_core_values const *params_in); + unsigned int (*get_clock_rate)(struct lsdc_pll * const this, + struct lsdc_pll_core_values *pout); +}; + +struct lsdc_pll { + const struct lsdc_pixpll_funcs *funcs; + struct drm_device *ddev; + void __iomem *mmio; + + /* PLL register offset */ + u32 reg_base; + /* PLL register size in bytes */ + u32 reg_size; + + /* 100000kHz, fixed on all board found */ + unsigned int ref_clock; + + unsigned int index; +}; + +int lsdc_pixpll_init(struct lsdc_pll * const this, + struct drm_device *ddev, + unsigned int index); + +#endif diff --git a/drivers/gpu/drm/loongson/lsdc_regs.h b/drivers/gpu/drm/loongson/lsdc_regs.h new file mode 100644 index 000000000000..ffa6285530d7 --- /dev/null +++ b/drivers/gpu/drm/loongson/lsdc_regs.h @@ -0,0 +1,252 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Loongson Corporation + */ + +/* + * Authors: + * Sui Jingfeng + */ + +#ifndef __LSDC_REGS_H__ +#define __LSDC_REGS_H__ + +#include +#include + +/* + * PIXEL PLL + */ +#define LSDC_PLL_REF_CLK 100000 /* kHz */ + +/* + * Those PLL registers are not located at DC reg bar space, + * there are relative to LSXXXXX_CFG_REG_BASE. + * XXXXX = 7A1000, 2K1000, 2K0500 + */ + +/* LS2K1000 */ +#define LS2K1000_PIX_PLL0_REG 0x04B0 +#define LS2K1000_PIX_PLL1_REG 0x04C0 +#define LS2K1000_CFG_REG_BASE 0x1fe10000 + +/* LS7A1000 */ +#define LS7A1000_PIX_PLL0_REG 0x04B0 +#define LS7A1000_PIX_PLL1_REG 0x04C0 +#define LS7A1000_CFG_REG_BASE 0x10010000 + +/* LS2K0500 */ +#define LS2K0500_PIX_PLL0_REG 0x0418 +#define LS2K0500_PIX_PLL1_REG 0x0420 +#define LS2K0500_CFG_REG_BASE 0x1fe10000 + +/* + * CRTC CFG REG + */ +#define CFG_PIX_FMT_MASK GENMASK(2, 0) + +enum lsdc_pixel_format { + LSDC_PF_NONE = 0, + LSDC_PF_ARGB4444 = 1, /* ARGB A:4 bits R/G/B: 4 bits each [16 bits] */ + LSDC_PF_ARGB1555 = 2, /* ARGB A:1 bit RGB:15 bits [16 bits] */ + LSDC_PF_RGB565 = 3, /* RGB [16 bits] */ + LSDC_PF_XRGB8888 = 4, /* XRGB [32 bits] */ +}; + +/* Each CRTC has two FB address registers, CFG_FB_IDX_BIT specify + * which fb address register is currently in using by the CRTC. + * Setting CFG_PAGE_FLIP_BIT bit will triger the switch. The switch + * finished at the vblank and if you want switch back you can set + * CFG_PAGE_FLIP_BIT again. + */ +#define CFG_PAGE_FLIP_BIT BIT(7) +#define CFG_OUTPUT_EN_BIT BIT(8) +/* CRTC0 clone from CRTC1 or CRTC1 clone from CRTC0 using hardware logic */ +#define CFG_PANEL_SWITCH BIT(9) +/* Indicate witch fb addr reg is in using, currently */ +#define CFG_FB_IDX_BIT BIT(11) +#define CFG_GAMMAR_EN_BIT BIT(12) + +/* CRTC get soft reset if voltage level change from 1 -> 0 */ +#define CFG_RESET_BIT BIT(20) + +#define EN_HSYNC_BIT BIT(30) +#define INV_HSYNC_BIT BIT(31) +#define EN_VSYNC_BIT BIT(30) +#define INV_VSYNC_BIT BIT(31) + +/******** CRTC0 & DVO0 ********/ +#define LSDC_CRTC0_CFG_REG 0x1240 +#define LSDC_CRTC0_FB_ADDR0_REG 0x1260 +#define LSDC_CRTC0_FB_ADDR1_REG 0x1580 +#define LSDC_CRTC0_FB_HI_ADDR_REG 0x15A0 +#define LSDC_CRTC0_STRIDE_REG 0x1280 +#define LSDC_CRTC0_FB_ORIGIN_REG 0x1300 +#define LSDC_CRTC0_HDISPLAY_REG 0x1400 +#define LSDC_CRTC0_HSYNC_REG 0x1420 +#define LSDC_CRTC0_VDISPLAY_REG 0x1480 +#define LSDC_CRTC0_VSYNC_REG 0x14A0 +#define LSDC_CRTC0_GAMMA_INDEX_REG 0x14E0 +#define LSDC_CRTC0_GAMMA_DATA_REG 0x1500 + +/******** CTRC1 & DVO1 ********/ +#define LSDC_CRTC1_CFG_REG 0x1250 +#define LSDC_CRTC1_FB_ADDR0_REG 0x1270 +#define LSDC_CRTC1_FB_ADDR1_REG 0x1590 +#define LSDC_CRTC1_FB_HI_ADDR_REG 0x15C0 +#define LSDC_CRTC1_STRIDE_REG 0x1290 +#define LSDC_CRTC1_FB_ORIGIN_REG 0x1310 +#define LSDC_CRTC1_HDISPLAY_REG 0x1410 +#define LSDC_CRTC1_HSYNC_REG 0x1430 +#define LSDC_CRTC1_VDISPLAY_REG 0x1490 +#define LSDC_CRTC1_VSYNC_REG 0x14B0 +#define LSDC_CRTC1_GAMMA_INDEX_REG 0x14F0 +#define LSDC_CRTC1_GAMMA_DATA_REG 0x1510 + +#define LSDC_REGS_OFFSET 0x0010 + +/* + * Hardware cursor + * There is only one hardware cursor shared by two CRTC in ls7a1000, + * ls2k1000 and ls2k0500. + */ +#define LSDC_CURSOR0_CFG_REG 0x1520 +#define LSDC_CURSOR0_ADDR_REG 0x1530 +#define LSDC_CURSOR0_POSITION_REG 0x1540 +#define LSDC_CURSOR0_BG_COLOR_REG 0x1550 /* background color */ +#define LSDC_CURSOR0_FG_COLOR_REG 0x1560 /* foreground color */ + +#define LSDC_CURS_MIN_SIZE 1 +#define LSDC_CURS_MAX_SIZE 64 +#define CURSOR_FORMAT_MASK GENMASK(1, 0) +#define CURSOR_FORMAT_DISABLE 0 +#define CURSOR_FORMAT_MONOCHROME 1 +#define CURSOR_FORMAT_ARGB8888 2 +#define CURSOR_SIZE_64X64 BIT(2) +#define CURSOR_LOCATION_BIT BIT(4) + +/* LS7A2000 have two hardware cursor */ + +#define LSDC_CURSOR1_CFG_REG 0x1670 +#define LSDC_CURSOR1_ADDR_REG 0x1680 +#define LSDC_CURSOR1_POSITION_REG 0x1690 +#define LSDC_CURSOR1_BG_COLOR_REG 0x16A0 /* background color */ +#define LSDC_CURSOR1_FG_COLOR_REG 0x16B0 /* foreground color */ + +/* + * DC Interrupt Control Register, 32bit, Address Offset: 1570 + * + * Bits 0:10 inidicate the interrupt type, read only + * Bits 16:26 control if the specific interrupt corresponding to bit 0~10 + * is enabled or not. Write 1 to enable, write 0 to disable + * + * RF: Read Finished + * IDBU : Internal Data Buffer Underflow + * IDBFU : Internal Data Buffer Fatal Underflow + * + * + * +-------+-------------------------------+-------+--------+--------+-------+ + * | 31:27 | 26:16 | 15:11 | 10 | 9 | 8 | + * +-------+-------------------------------+-------+--------+--------+-------+ + * | N/A | Interrupt Enable Control Bits | N/A | IDBFU0 | IDBFU1 | IDBU0 | + * +-------+-------------------------------+-------+--------+--------+-------+ + * + * Bit 4 is cursor buffer read finished, no use. + * + * +-------+-----+-----+-----+--------+--------+--------+--------+ + * | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | + * +-------+-----+-----+-----+--------+--------+--------+--------+ + * | IDBU1 | RF0 | RF1 | | HSYNC0 | VSYNC0 | HSYNC1 | VSYNC1 | + * +-------+-----+-----+-----+--------+--------+--------+--------+ + * + */ + +#define LSDC_INT_REG 0x1570 + +#define INT_CRTC0_VS BIT(2) +#define INT_CRTC0_HS BIT(3) +#define INT_CRTC0_RF BIT(6) +#define INT_CRTC0_IDBU BIT(8) +#define INT_CRTC0_IDBFU BIT(10) + +#define INT_CURSOR_RF BIT(4) + +#define INT_CRTC1_VS BIT(0) +#define INT_CRTC1_HS BIT(1) +#define INT_CRTC1_RF BIT(5) +#define INT_CRTC1_IDBU BIT(7) +#define INT_CRTC1_IDBFU BIT(9) + +#define INT_CRTC0_VS_EN BIT(18) +#define INT_CRTC0_HS_EN BIT(19) +#define INT_CRTC0_RF_EN BIT(22) +#define INT_CRTC0_IDBU_EN BIT(24) +#define INT_CRTC0_IDBFU_EN BIT(26) + +#define INT_CURSOR_RF_EN BIT(20) + +#define INT_CRTC1_VS_EN BIT(16) +#define INT_CRTC1_HS_EN BIT(17) +#define INT_CRTC1_RF_EN BIT(21) +#define INT_CRTC1_IDBU_EN BIT(23) +#define INT_CRTC1_IDBFU_EN BIT(25) + +#define INT_STATUS_MASK GENMASK(10, 0) + +/* + * LS7A1000 have 4 gpios which is under control of the LS7A_DC_GPIO_DAT_REG + * and LS7A_DC_GPIO_DIR_REG register, it has no relationship whth the general + * GPIO hardware. Those registers are in the DC register space on LS7A1000. + * + * Those GPIOs are used to emulated I2C, for reading edid and monitor detection + * + * LS2k1000 and LS2K0500 don't have those registers, they use hardware i2c or + * generial GPIO emulated i2c from other module. + * + * GPIO data register + * Address offset: 0x1650 + * +---------------+-----------+-----------+ + * | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | + * +---------------+-----------+-----------+ + * | | DVO1 | DVO0 | + * + N/A +-----------+-----------+ + * | | SCL | SDA | SCL | SDA | + * +---------------+-----------+-----------+ + */ +#define LS7A_DC_GPIO_DAT_REG 0x1650 + +/* + * GPIO Input/Output direction control register + * Address offset: 0x1660 + * write 1 for Input, 0 for Output. + */ +#define LS7A_DC_GPIO_DIR_REG 0x1660 + +/* + * LS7A2000 Built-in HDMI Encoder + */ +#define HDMI_EN BIT(0) +#define HDMI_PACKET_EN BIT(1) + +#define HDMI0_ZONE_REG 0x1700 +#define HDMI1_ZONE_REG 0x1710 + +#define HDMI0_CTRL_REG 0x1720 +#define HDMI1_CTRL_REG 0x1730 + +#define HDMI_PLL_EN BIT(0) +#define HDMI_PLL_LOCKED BIT(16) + +#define HDMI0_PHY_CTRL_REG 0x1800 +#define HDMI0_PLL_REG 0x1820 + +#define HDMI1_PHY_CTRL_REG 0x1810 +#define HDMI1_PLL_REG 0x1830 + +#define LS7A2000_DMA_STEP_MASK GENMASK(17, 16) +#define DMA_STEP_256_BYTE (0 << 16) +#define DMA_STEP_128_BYTE (1 << 16) +#define DMA_STEP_64_BYTE (2 << 16) +#define DMA_STEP_32_BYTE (3 << 16) + +#endif -- Gitee From 923ddb84d8f127bf97d450c79a1e7b0cfa789838 Mon Sep 17 00:00:00 2001 From: Hongchen Zhang Date: Tue, 31 Jan 2023 09:28:41 +0800 Subject: [PATCH 34/36] LoongArch: Change definition of cpu_relax() for Loongson-3 LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- This patch changes the definition of cpu_relax() to smp_mb() for Loongson-3, forcing a flush of the SFB on SMP systems which will cause any pending writes to make it as far as the L1 caches where they will become visible to other CPUs. If the kernel is not compiled for SMP support, this will expand to a barrier() as before. Reference commit a30718868915 ("MIPS: Change definition of cpu_relax() for Loongson-3") for detail. Signed-off-by: Huacai Chen Signed-off-by: Hongchen Zhang Change-Id: I1ad7efc19b77ead5932ff8429819db0856fb8e33 --- arch/loongarch/include/asm/vdso/processor.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/vdso/processor.h b/arch/loongarch/include/asm/vdso/processor.h index ef5770b343a0..fa3d08bbde94 100644 --- a/arch/loongarch/include/asm/vdso/processor.h +++ b/arch/loongarch/include/asm/vdso/processor.h @@ -7,7 +7,16 @@ #ifndef __ASSEMBLY__ -#define cpu_relax() barrier() +/* + * Loongson-3's SFB (Store-Fill-Buffer) may buffer writes indefinitely when a + * tight read loop is executed, because reads take priority over writes & the + * hardware (incorrectly) doesn't ensure that writes will eventually occur. + * + * Since spin loops of any kind should have a cpu_relax() in them, force an SFB + * flush from cpu_relax() such that any pending writes will become visible as + * expected. + */ +#define cpu_relax() smp_mb() #endif /* __ASSEMBLY__ */ -- Gitee From 7f3981abc9b38c69305a450a982d0157f43820b8 Mon Sep 17 00:00:00 2001 From: Hongchen Zhang Date: Tue, 31 Jan 2023 20:09:00 +0800 Subject: [PATCH 35/36] LoongArch: modify defconfig according to other architectures LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- 1. enable watdog support 2. enable some net class support, used by tc 3. use voluntary preemption 4. compile r8169 driver as m Signed-off-by: Hongchen Zhang Change-Id: I59315f265bfe54a00936b5c47aaa3c2e805d3ee6 --- arch/loongarch/configs/loongson3_defconfig | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index e4eb90e9d86d..705b8344e562 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y -CONFIG_PREEMPT=y +CONFIG_PREEMPT_VOLUNTARY=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y @@ -281,10 +281,17 @@ CONFIG_NET_SCH_TBF=m CONFIG_NET_SCH_NETEM=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m -CONFIG_NET_CLS_CGROUP=m +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_FLOW=m +CONFIG_NET_CLS_CGROUP=y CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m @@ -470,7 +477,7 @@ CONFIG_TXGBE=m # CONFIG_NET_VENDOR_RDC is not set CONFIG_8139CP=m CONFIG_8139TOO=m -CONFIG_R8169=y +CONFIG_R8169=m # CONFIG_NET_VENDOR_RENESAS is not set # CONFIG_NET_VENDOR_ROCKER is not set # CONFIG_NET_VENDOR_SAMSUNG is not set @@ -567,6 +574,11 @@ CONFIG_SENSORS_LM75=m CONFIG_SENSORS_LM93=m CONFIG_SENSORS_W83795=m CONFIG_SENSORS_W83627HF=m +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_CORE=y +CONFIG_WATCHDOG_SYSFS=y +CONFIG_SOFT_WATCHDOG=m +CONFIG_GPIO_WATCHDOG=m CONFIG_RC_CORE=m CONFIG_LIRC=y CONFIG_RC_DECODERS=y -- Gitee From ef895cd613a736758e1516e49159e242d676cdf3 Mon Sep 17 00:00:00 2001 From: Hongchen Zhang Date: Wed, 15 Feb 2023 09:50:05 +0800 Subject: [PATCH 36/36] LoongArch: remove __ARCH_WANT_NEW_STAT LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6BWFP -------------------------------- remove fstatat and fstat syscall, not used by glibc now. Signed-off-by: Hongchen Zhang Change-Id: Ib54607ad15bdb7fa59626caebd851312c490d897 --- arch/loongarch/include/uapi/asm/unistd.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/loongarch/include/uapi/asm/unistd.h b/arch/loongarch/include/uapi/asm/unistd.h index b344b1f91715..fcb668984f03 100644 --- a/arch/loongarch/include/uapi/asm/unistd.h +++ b/arch/loongarch/include/uapi/asm/unistd.h @@ -1,5 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SYS_CLONE #define __ARCH_WANT_SYS_CLONE3 -- Gitee