diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index d586225cb96e0ae7fe0132f990b2d2d8d81f89fc..77de7a1370f7e0ed2ba378be461fdbc947e4402e 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -39,6 +39,7 @@ config LOONGARCH select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPTION select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION + select ARCH_KEEP_MEMBLOCK select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_SPARSEMEM_ENABLE @@ -75,6 +76,8 @@ config LOONGARCH select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL + select CPU_SUPPORTS_LSX + select CPU_SUPPORTS_LASX select GPIOLIB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_MMAP_RND_BITS if MMU @@ -145,6 +148,36 @@ config CPU_HAS_PREFETCH bool default y +config CPU_HAS_LSX + bool "Support for the Loongson SIMD Extension" + depends on CPU_SUPPORTS_LSX + depends on 64BIT + help + Loongson SIMD Extension (LSX) introduces 128 bit wide vector registers + and a set of SIMD instructions to operate on them. When this option + is enabled the kernel will support allocating & switching LSX + vector register contexts. If you know that your kernel will only be + running on CPUs which do not support LSX or that your userland will + not be making use of it then you may wish to say N here to reduce + the size & complexity of your kernel. + + If unsure, say Y. + +config CPU_HAS_LASX + bool "Support for the Loongson Advanced SIMD Extension" + depends on CPU_SUPPORTS_LASX + depends on 64BIT && CPU_HAS_LSX + help + Loongson Advanced SIMD Extension is 256 bit wide SIMD extension. + + If unsure, say Y. + +config CPU_SUPPORTS_LSX + bool + +config CPU_SUPPORTS_LASX + bool + config GENERIC_CALIBRATE_DELAY def_bool y @@ -206,6 +239,9 @@ config SCHED_OMIT_FRAME_POINTER bool default y +config AS_HAS_EXPLICIT_RELOCS + def_bool $(as-instr,x:pcalau12i \$t0$(comma)%pc_hi20(x)) + menu "Kernel type and options" source "kernel/Kconfig.hz" @@ -478,4 +514,10 @@ source "drivers/acpi/Kconfig" endmenu +menu "CPU Power Management" + +source "drivers/cpufreq/Kconfig" + +endmenu + source "drivers/firmware/Kconfig" diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index fa2689a7da73e8f6177e626ac6631403edab51c9..3ab3625946a900dd68d80b536d7e02a843abbcc1 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -40,10 +40,27 @@ endif cflags-y += -G0 -pipe -msoft-float LDFLAGS_vmlinux += -G0 -static -n -nostdlib + +# When the assembler supports explicit relocation hint, we must use it. +# GCC may have -mexplicit-relocs off by default if it was built with an old +# assembler, so we force it via an option. +# +# When the assembler does not supports explicit relocation hint, we can't use +# it. Disable it if the compiler supports it. +# +# If you've seen "unknown reloc hint" message building the kernel and you are +# now wondering why "-mexplicit-relocs" is not wrapped with cc-option: the +# combination of a "new" assembler and "old" compiler is not supported. Either +# upgrade the compiler or downgrade the assembler. +ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS +cflags-y += -mexplicit-relocs +else +cflags-y += $(call cc-option,-mno-explicit-relocs) KBUILD_AFLAGS_KERNEL += -Wa,-mla-global-with-pcrel KBUILD_CFLAGS_KERNEL += -Wa,-mla-global-with-pcrel KBUILD_AFLAGS_MODULE += -Wa,-mla-global-with-abs KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs +endif cflags-y += -ffreestanding cflags-y += $(call cc-option, -mno-check-zero-division) diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 3712552e18d39dfe3164fc2c90771db26f1cd46d..3b4b63235fa88964987d906a0dbb817da2854f93 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -3,7 +3,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y -CONFIG_BPF_SYSCALL=y CONFIG_PREEMPT=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y @@ -19,7 +18,6 @@ CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_FREEZER=y -CONFIG_CGROUP_HUGETLB=y CONFIG_CPUSETS=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y @@ -33,29 +31,22 @@ CONFIG_SYSFS_DEPRECATED=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y +CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set -CONFIG_LOONGARCH=y -CONFIG_64BIT=y -CONFIG_MACH_LOONGSON64=y -CONFIG_DMI=y -CONFIG_EFI=y -CONFIG_SMP=y +CONFIG_CPU_HAS_LSX=y +CONFIG_CPU_HAS_LASX=y CONFIG_HOTPLUG_CPU=y -CONFIG_NR_CPUS=64 CONFIG_NUMA=y -CONFIG_PAGE_SIZE_16KB=y -CONFIG_HZ_250=y -CONFIG_ACPI=y CONFIG_ACPI_SPCR_TABLE=y -CONFIG_ACPI_HOTPLUG_CPU=y -CONFIG_ACPI_TAD=y CONFIG_ACPI_DOCK=y CONFIG_ACPI_IPMI=m CONFIG_ACPI_PCI_SLOT=y -CONFIG_ACPI_HOTPLUG_MEMORY=y -CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_STAT=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=y +CONFIG_LOONGSON3_ACPI_CPUFREQ=y CONFIG_EFI_CAPSULE_LOADER=m CONFIG_EFI_TEST=m CONFIG_MODULES=y @@ -68,13 +59,8 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_IOSCHED_BFQ=y CONFIG_BFQ_GROUP_IOSCHED=y CONFIG_BINFMT_MISC=m -CONFIG_MEMORY_HOTPLUG=y -CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y -CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_ZSWAP=y -CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD=y CONFIG_ZPOOL=y CONFIG_ZBUD=y CONFIG_Z3FOLD=y @@ -102,19 +88,37 @@ CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_MROUTE=y CONFIG_NETWORK_PHY_TIMESTAMPING=y CONFIG_NETFILTER=y CONFIG_BRIDGE_NETFILTER=m -CONFIG_NETFILTER_NETLINK_LOG=m CONFIG_NF_CONNTRACK=m CONFIG_NF_LOG_NETDEV=m +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_ZONES=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_TIMEOUT=y +CONFIG_NF_CONNTRACK_TIMESTAMP=y CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_H323=m +CONFIG_NF_CONNTRACK_IRC=m CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_SNMP=m +CONFIG_NF_CONNTRACK_PPTP=m +CONFIG_NF_CONNTRACK_SANE=m +CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m +CONFIG_NF_CT_NETLINK_TIMEOUT=m +CONFIG_NF_CT_NETLINK_HELPER=m +CONFIG_NETFILTER_NETLINK_GLUE_CT=y CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_NETDEV=y +CONFIG_NFT_NUMGEN=m +CONFIG_NFT_CT=m CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m @@ -129,22 +133,30 @@ CONFIG_NFT_QUOTA=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m +CONFIG_NFT_FIB_INET=m CONFIG_NFT_SOCKET=m CONFIG_NFT_OSF=m CONFIG_NFT_TPROXY=m +CONFIG_NFT_DUP_NETDEV=m +CONFIG_NFT_FWD_NETDEV=m +CONFIG_NFT_FIB_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_AUDIT=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m CONFIG_NETFILTER_XT_TARGET_CONNMARK=m -CONFIG_NETFILTER_XT_TARGET_CT=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_HMARK=m CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m CONFIG_NETFILTER_XT_TARGET_LED=m CONFIG_NETFILTER_XT_TARGET_LOG=m CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_NOTRACK=m +CONFIG_NETFILTER_XT_TARGET_TEE=m +CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m @@ -178,10 +190,12 @@ CONFIG_NETFILTER_XT_MATCH_NFACCT=m CONFIG_NETFILTER_XT_MATCH_OSF=m CONFIG_NETFILTER_XT_MATCH_OWNER=m CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m CONFIG_NETFILTER_XT_MATCH_QUOTA=m CONFIG_NETFILTER_XT_MATCH_RATEEST=m CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_RECENT=m CONFIG_NETFILTER_XT_MATCH_SOCKET=m CONFIG_NETFILTER_XT_MATCH_STATE=m CONFIG_NETFILTER_XT_MATCH_STATISTIC=m @@ -196,7 +210,6 @@ CONFIG_IP_VS_PROTO_TCP=y CONFIG_IP_VS_PROTO_UDP=y CONFIG_IP_VS_RR=m CONFIG_IP_VS_NFCT=y -CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -222,12 +235,14 @@ CONFIG_IP_NF_SECURITY=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NF_TABLES_IPV6=y +CONFIG_NFT_DUP_IPV6=m +CONFIG_NFT_FIB_IPV6=m CONFIG_IP6_NF_IPTABLES=y CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m CONFIG_IP6_NF_MATCH_FRAG=m CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_HL=m CONFIG_IP6_NF_MATCH_IPV6HEADER=m CONFIG_IP6_NF_MATCH_MH=m CONFIG_IP6_NF_MATCH_RPFILTER=m @@ -277,6 +292,7 @@ CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_BPF=m +CONFIG_DCB=y CONFIG_OPENVSWITCH=m CONFIG_VSOCKETS=m CONFIG_VIRTIO_VSOCKETS=m @@ -292,7 +308,6 @@ CONFIG_RFKILL=m CONFIG_RFKILL_INPUT=y CONFIG_NET_9P=y CONFIG_NET_9P_VIRTIO=y -CONFIG_CEPH_LIB=m CONFIG_PCIEPORTBUS=y CONFIG_HOTPLUG_PCI_PCIE=y CONFIG_PCIEAER=y @@ -312,7 +327,6 @@ CONFIG_UEVENT_HELPER=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_FW_LOADER_COMPRESS=y -CONFIG_FW_LOADER_COMPRESS_ZSTD=y CONFIG_MTD=m CONFIG_MTD_BLOCK=m CONFIG_MTD_CFI=m @@ -327,7 +341,6 @@ CONFIG_PARPORT_PC=y CONFIG_PARPORT_SERIAL=y CONFIG_PARPORT_PC_FIFO=y CONFIG_ZRAM=m -CONFIG_ZRAM_DEF_COMP_ZSTD=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_CRYPTOLOOP=y CONFIG_BLK_DEV_NBD=m @@ -374,7 +387,6 @@ CONFIG_SCSI_QLOGIC_1280=m CONFIG_SCSI_QLA_FC=m CONFIG_TCM_QLA2XXX=m CONFIG_SCSI_QLA_ISCSI=m -CONFIG_SCSI_LPFC=m CONFIG_SCSI_VIRTIO=m CONFIG_ATA=y CONFIG_SATA_AHCI=y @@ -382,12 +394,7 @@ CONFIG_SATA_AHCI_PLATFORM=y CONFIG_PATA_ATIIXP=y CONFIG_PATA_PCMCIA=m CONFIG_MD=y -CONFIG_BLK_DEV_MD=m CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID10=m -CONFIG_MD_RAID456=m CONFIG_MD_MULTIPATH=m CONFIG_BCACHE=m CONFIG_BLK_DEV_DM=y @@ -438,7 +445,6 @@ CONFIG_CHELSIO_T1=m CONFIG_CHELSIO_T1_1G=y CONFIG_CHELSIO_T3=m CONFIG_CHELSIO_T4=m -# CONFIG_NET_VENDOR_CIRRUS is not set # CONFIG_NET_VENDOR_CISCO is not set # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set @@ -450,6 +456,7 @@ CONFIG_E1000E=y CONFIG_IGB=y CONFIG_IXGB=y CONFIG_IXGBE=y +CONFIG_TXGBE=m # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MELLANOX is not set # CONFIG_NET_VENDOR_MICREL is not set @@ -556,7 +563,6 @@ CONFIG_I2C_PIIX4=y CONFIG_I2C_GPIO=y CONFIG_SPI=y CONFIG_GPIO_SYSFS=y -CONFIG_GPIO_LOONGSON=y CONFIG_SENSORS_LM75=m CONFIG_SENSORS_LM93=m CONFIG_SENSORS_W83795=m @@ -590,10 +596,8 @@ CONFIG_DRM_AMDGPU_USERPTR=y CONFIG_DRM_AST=y CONFIG_DRM_QXL=m CONFIG_DRM_VIRTIO_GPU=m -CONFIG_FB=y CONFIG_FB_EFI=y CONFIG_FB_RADEON=y -CONFIG_LCD_PLATFORM=m # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y @@ -602,7 +606,6 @@ CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_SEQUENCER=m CONFIG_SND_SEQ_DUMMY=m -# CONFIG_SND_ISA is not set CONFIG_SND_BT87X=m CONFIG_SND_BT87X_OVERCLOCK=y CONFIG_SND_HDA_INTEL=y @@ -628,7 +631,6 @@ CONFIG_HID_MULTITOUCH=m CONFIG_HID_SUNPLUS=m CONFIG_USB_HIDDEV=y CONFIG_USB=y -CONFIG_USB_OTG=y CONFIG_USB_MON=y CONFIG_USB_XHCI_HCD=y CONFIG_USB_EHCI_HCD=y @@ -653,7 +655,8 @@ CONFIG_USB_SERIAL_OPTION=m CONFIG_USB_GADGET=y CONFIG_INFINIBAND=m CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_EFI=y +CONFIG_RTC_DRV_EFI=m +CONFIG_RTC_DRV_LS2X=y CONFIG_DMADEVICES=y CONFIG_UIO=m CONFIG_UIO_PDRV_GENIRQ=m @@ -669,6 +672,7 @@ CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y CONFIG_VHOST_NET=m CONFIG_VHOST_SCSI=m CONFIG_VHOST_VSOCK=m +CONFIG_STAGING=y CONFIG_COMEDI=m CONFIG_COMEDI_PCI_DRIVERS=m CONFIG_COMEDI_8255_PCI=m @@ -686,7 +690,6 @@ CONFIG_COMEDI_ADV_PCI_DIO=m CONFIG_COMEDI_NI_LABPC_PCI=m CONFIG_COMEDI_NI_PCIDIO=m CONFIG_COMEDI_NI_PCIMIO=m -CONFIG_STAGING=y CONFIG_R8188EU=m # CONFIG_88EU_AP_MODE is not set CONFIG_PM_DEVFREQ=y @@ -730,7 +733,6 @@ CONFIG_FAT_DEFAULT_IOCHARSET="gb2312" CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y -CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=y CONFIG_HFS_FS=m CONFIG_HFSPLUS_FS=m @@ -785,7 +787,6 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_DEFLATE=m -CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m @@ -799,5 +800,3 @@ CONFIG_STRIP_ASM_SYMS=y CONFIG_MAGIC_SYSRQ=y # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y -# CONFIG_DEBUG_PREEMPT is not set -# CONFIG_FTRACE is not set diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h index 62044cd5b7bc54458cc98881c86c1f3ff0aa52a0..825c2519b9d1f7c0eedabf118f20375b4493604e 100644 --- a/arch/loongarch/include/asm/acpi.h +++ b/arch/loongarch/include/asm/acpi.h @@ -15,7 +15,7 @@ extern int acpi_pci_disabled; extern int acpi_noirq; #define acpi_os_ioremap acpi_os_ioremap -void __init __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); static inline void disable_acpi(void) { diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h index d342935e5a72d1de92c496087ba3f2e96e46d352..ba2d71b74a04c614a2df6dd82900c77f82120c3b 100644 --- a/arch/loongarch/include/asm/addrspace.h +++ b/arch/loongarch/include/asm/addrspace.h @@ -124,5 +124,6 @@ extern unsigned long vm_map_base; #define PCI_IOSIZE SZ_32M #define ISA_IOSIZE SZ_16K #define IO_SPACE_LIMIT (PCI_IOSIZE - 1) +#define ISA_PHY_IOBASE LOONGSON_LIO_BASE #endif /* _ASM_ADDRSPACE_H */ diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h index be037a40580d4b017725564e59ec0c511373228a..3a629b5451749687daed24748cc8f0fc59088398 100644 --- a/arch/loongarch/include/asm/asmmacro.h +++ b/arch/loongarch/include/asm/asmmacro.h @@ -117,6 +117,212 @@ .endif .endm + .macro parse_vr var vr + \var = -1 + .ifc \vr, $vr0 + \var = 0 + .endif + .ifc \vr, $vr1 + \var = 1 + .endif + .ifc \vr, $vr2 + \var = 2 + .endif + .ifc \vr, $vr3 + \var = 3 + .endif + .ifc \vr, $vr4 + \var = 4 + .endif + .ifc \vr, $vr5 + \var = 5 + .endif + .ifc \vr, $vr6 + \var = 6 + .endif + .ifc \vr, $vr7 + \var = 7 + .endif + .ifc \vr, $vr8 + \var = 8 + .endif + .ifc \vr, $vr9 + \var = 9 + .endif + .ifc \vr, $vr10 + \var = 10 + .endif + .ifc \vr, $vr11 + \var = 11 + .endif + .ifc \vr, $vr12 + \var = 12 + .endif + .ifc \vr, $vr13 + \var = 13 + .endif + .ifc \vr, $vr14 + \var = 14 + .endif + .ifc \vr, $vr15 + \var = 15 + .endif + .ifc \vr, $vr16 + \var = 16 + .endif + .ifc \vr, $vr17 + \var = 17 + .endif + .ifc \vr, $vr18 + \var = 18 + .endif + .ifc \vr, $vr19 + \var = 19 + .endif + .ifc \vr, $vr20 + \var = 20 + .endif + .ifc \vr, $vr21 + \var = 21 + .endif + .ifc \vr, $vr22 + \var = 22 + .endif + .ifc \vr, $vr23 + \var = 23 + .endif + .ifc \vr, $vr24 + \var = 24 + .endif + .ifc \vr, $vr25 + \var = 25 + .endif + .ifc \vr, $vr26 + \var = 26 + .endif + .ifc \vr, $vr27 + \var = 27 + .endif + .ifc \vr, $vr28 + \var = 28 + .endif + .ifc \vr, $vr29 + \var = 29 + .endif + .ifc \vr, $vr30 + \var = 30 + .endif + .ifc \vr, $vr31 + \var = 31 + .endif + .iflt \var + .error "Unable to parse register name \r" + .endif + .endm + + .macro parse_xr var xr + \var = -1 + .ifc \xr, $xr0 + \var = 0 + .endif + .ifc \xr, $xr1 + \var = 1 + .endif + .ifc \xr, $xr2 + \var = 2 + .endif + .ifc \xr, $xr3 + \var = 3 + .endif + .ifc \xr, $xr4 + \var = 4 + .endif + .ifc \xr, $xr5 + \var = 5 + .endif + .ifc \xr, $xr6 + \var = 6 + .endif + .ifc \xr, $xr7 + \var = 7 + .endif + .ifc \xr, $xr8 + \var = 8 + .endif + .ifc \xr, $xr9 + \var = 9 + .endif + .ifc \xr, $xr10 + \var = 10 + .endif + .ifc \xr, $xr11 + \var = 11 + .endif + .ifc \xr, $xr12 + \var = 12 + .endif + .ifc \xr, $xr13 + \var = 13 + .endif + .ifc \xr, $xr14 + \var = 14 + .endif + .ifc \xr, $xr15 + \var = 15 + .endif + .ifc \xr, $xr16 + \var = 16 + .endif + .ifc \xr, $xr17 + \var = 17 + .endif + .ifc \xr, $xr18 + \var = 18 + .endif + .ifc \xr, $xr19 + \var = 19 + .endif + .ifc \xr, $xr20 + \var = 20 + .endif + .ifc \xr, $xr21 + \var = 21 + .endif + .ifc \xr, $xr22 + \var = 22 + .endif + .ifc \xr, $xr23 + \var = 23 + .endif + .ifc \xr, $xr24 + \var = 24 + .endif + .ifc \xr, $xr25 + \var = 25 + .endif + .ifc \xr, $xr26 + \var = 26 + .endif + .ifc \xr, $xr27 + \var = 27 + .endif + .ifc \xr, $xr28 + \var = 28 + .endif + .ifc \xr, $xr29 + \var = 29 + .endif + .ifc \xr, $xr30 + \var = 30 + .endif + .ifc \xr, $xr31 + \var = 31 + .endif + .iflt \var + .error "Unable to parse register name \r" + .endif + .endm + .macro cpu_save_nonscratch thread stptr.d s0, \thread, THREAD_REG23 stptr.d s1, \thread, THREAD_REG24 @@ -270,6 +476,424 @@ fld.d $f31, \tmp, THREAD_FPR31 - THREAD_FPR0 .endm + .macro lsx_save_data thread tmp + parse_r __tmp, \tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + /* vst opcode is 0xb1 */ + .word (0xb1 << 22 | ((THREAD_FPR0-THREAD_FPR0) << 10) | __tmp << 5 | 0) + .word (0xb1 << 22 | ((THREAD_FPR1-THREAD_FPR0) << 10) | __tmp << 5 | 1) + .word (0xb1 << 22 | ((THREAD_FPR2-THREAD_FPR0) << 10) | __tmp << 5 | 2) + .word (0xb1 << 22 | ((THREAD_FPR3-THREAD_FPR0) << 10) | __tmp << 5 | 3) + .word (0xb1 << 22 | ((THREAD_FPR4-THREAD_FPR0) << 10) | __tmp << 5 | 4) + .word (0xb1 << 22 | ((THREAD_FPR5-THREAD_FPR0) << 10) | __tmp << 5 | 5) + .word (0xb1 << 22 | ((THREAD_FPR6-THREAD_FPR0) << 10) | __tmp << 5 | 6) + .word (0xb1 << 22 | ((THREAD_FPR7-THREAD_FPR0) << 10) | __tmp << 5 | 7) + .word (0xb1 << 22 | ((THREAD_FPR8-THREAD_FPR0) << 10) | __tmp << 5 | 8) + .word (0xb1 << 22 | ((THREAD_FPR9-THREAD_FPR0) << 10) | __tmp << 5 | 9) + .word (0xb1 << 22 | ((THREAD_FPR10-THREAD_FPR0) << 10) | __tmp << 5 | 10) + .word (0xb1 << 22 | ((THREAD_FPR11-THREAD_FPR0) << 10) | __tmp << 5 | 11) + .word (0xb1 << 22 | ((THREAD_FPR12-THREAD_FPR0) << 10) | __tmp << 5 | 12) + .word (0xb1 << 22 | ((THREAD_FPR13-THREAD_FPR0) << 10) | __tmp << 5 | 13) + .word (0xb1 << 22 | ((THREAD_FPR14-THREAD_FPR0) << 10) | __tmp << 5 | 14) + .word (0xb1 << 22 | ((THREAD_FPR15-THREAD_FPR0) << 10) | __tmp << 5 | 15) + .word (0xb1 << 22 | ((THREAD_FPR16-THREAD_FPR0) << 10) | __tmp << 5 | 16) + .word (0xb1 << 22 | ((THREAD_FPR17-THREAD_FPR0) << 10) | __tmp << 5 | 17) + .word (0xb1 << 22 | ((THREAD_FPR18-THREAD_FPR0) << 10) | __tmp << 5 | 18) + .word (0xb1 << 22 | ((THREAD_FPR19-THREAD_FPR0) << 10) | __tmp << 5 | 19) + .word (0xb1 << 22 | ((THREAD_FPR20-THREAD_FPR0) << 10) | __tmp << 5 | 20) + .word (0xb1 << 22 | ((THREAD_FPR21-THREAD_FPR0) << 10) | __tmp << 5 | 21) + .word (0xb1 << 22 | ((THREAD_FPR22-THREAD_FPR0) << 10) | __tmp << 5 | 22) + .word (0xb1 << 22 | ((THREAD_FPR23-THREAD_FPR0) << 10) | __tmp << 5 | 23) + .word (0xb1 << 22 | ((THREAD_FPR24-THREAD_FPR0) << 10) | __tmp << 5 | 24) + .word (0xb1 << 22 | ((THREAD_FPR25-THREAD_FPR0) << 10) | __tmp << 5 | 25) + .word (0xb1 << 22 | ((THREAD_FPR26-THREAD_FPR0) << 10) | __tmp << 5 | 26) + .word (0xb1 << 22 | ((THREAD_FPR27-THREAD_FPR0) << 10) | __tmp << 5 | 27) + .word (0xb1 << 22 | ((THREAD_FPR28-THREAD_FPR0) << 10) | __tmp << 5 | 28) + .word (0xb1 << 22 | ((THREAD_FPR29-THREAD_FPR0) << 10) | __tmp << 5 | 29) + .word (0xb1 << 22 | ((THREAD_FPR30-THREAD_FPR0) << 10) | __tmp << 5 | 30) + .word (0xb1 << 22 | ((THREAD_FPR31-THREAD_FPR0) << 10) | __tmp << 5 | 31) + .endm + + .macro lsx_restore_data thread tmp + parse_r __tmp, \tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + /* vld opcode is 0xb0 */ + .word (0xb0 << 22 | ((THREAD_FPR0-THREAD_FPR0) << 10) | __tmp << 5 | 0) + .word (0xb0 << 22 | ((THREAD_FPR1-THREAD_FPR0) << 10) | __tmp << 5 | 1) + .word (0xb0 << 22 | ((THREAD_FPR2-THREAD_FPR0) << 10) | __tmp << 5 | 2) + .word (0xb0 << 22 | ((THREAD_FPR3-THREAD_FPR0) << 10) | __tmp << 5 | 3) + .word (0xb0 << 22 | ((THREAD_FPR4-THREAD_FPR0) << 10) | __tmp << 5 | 4) + .word (0xb0 << 22 | ((THREAD_FPR5-THREAD_FPR0) << 10) | __tmp << 5 | 5) + .word (0xb0 << 22 | ((THREAD_FPR6-THREAD_FPR0) << 10) | __tmp << 5 | 6) + .word (0xb0 << 22 | ((THREAD_FPR7-THREAD_FPR0) << 10) | __tmp << 5 | 7) + .word (0xb0 << 22 | ((THREAD_FPR8-THREAD_FPR0) << 10) | __tmp << 5 | 8) + .word (0xb0 << 22 | ((THREAD_FPR9-THREAD_FPR0) << 10) | __tmp << 5 | 9) + .word (0xb0 << 22 | ((THREAD_FPR10-THREAD_FPR0) << 10) | __tmp << 5 | 10) + .word (0xb0 << 22 | ((THREAD_FPR11-THREAD_FPR0) << 10) | __tmp << 5 | 11) + .word (0xb0 << 22 | ((THREAD_FPR12-THREAD_FPR0) << 10) | __tmp << 5 | 12) + .word (0xb0 << 22 | ((THREAD_FPR13-THREAD_FPR0) << 10) | __tmp << 5 | 13) + .word (0xb0 << 22 | ((THREAD_FPR14-THREAD_FPR0) << 10) | __tmp << 5 | 14) + .word (0xb0 << 22 | ((THREAD_FPR15-THREAD_FPR0) << 10) | __tmp << 5 | 15) + .word (0xb0 << 22 | ((THREAD_FPR16-THREAD_FPR0) << 10) | __tmp << 5 | 16) + .word (0xb0 << 22 | ((THREAD_FPR17-THREAD_FPR0) << 10) | __tmp << 5 | 17) + .word (0xb0 << 22 | ((THREAD_FPR18-THREAD_FPR0) << 10) | __tmp << 5 | 18) + .word (0xb0 << 22 | ((THREAD_FPR19-THREAD_FPR0) << 10) | __tmp << 5 | 19) + .word (0xb0 << 22 | ((THREAD_FPR20-THREAD_FPR0) << 10) | __tmp << 5 | 20) + .word (0xb0 << 22 | ((THREAD_FPR21-THREAD_FPR0) << 10) | __tmp << 5 | 21) + .word (0xb0 << 22 | ((THREAD_FPR22-THREAD_FPR0) << 10) | __tmp << 5 | 22) + .word (0xb0 << 22 | ((THREAD_FPR23-THREAD_FPR0) << 10) | __tmp << 5 | 23) + .word (0xb0 << 22 | ((THREAD_FPR24-THREAD_FPR0) << 10) | __tmp << 5 | 24) + .word (0xb0 << 22 | ((THREAD_FPR25-THREAD_FPR0) << 10) | __tmp << 5 | 25) + .word (0xb0 << 22 | ((THREAD_FPR26-THREAD_FPR0) << 10) | __tmp << 5 | 26) + .word (0xb0 << 22 | ((THREAD_FPR27-THREAD_FPR0) << 10) | __tmp << 5 | 27) + .word (0xb0 << 22 | ((THREAD_FPR28-THREAD_FPR0) << 10) | __tmp << 5 | 28) + .word (0xb0 << 22 | ((THREAD_FPR29-THREAD_FPR0) << 10) | __tmp << 5 | 29) + .word (0xb0 << 22 | ((THREAD_FPR30-THREAD_FPR0) << 10) | __tmp << 5 | 30) + .word (0xb0 << 22 | ((THREAD_FPR31-THREAD_FPR0) << 10) | __tmp << 5 | 31) + .endm + + .macro lsx_save_all thread tmp0 tmp1 + fpu_save_cc \thread, \tmp0, \tmp1 + fpu_save_csr \thread, \tmp0 + lsx_save_data \thread, \tmp0 + .endm + + .macro lsx_restore_all thread tmp0 tmp1 + lsx_restore_data \thread, \tmp0 + fpu_restore_cc \thread, \tmp0, \tmp1 + fpu_restore_csr \thread, \tmp0 + .endm + + .macro lsx_save_upper vd base tmp off + parse_vr __vd, \vd + parse_r __tmp, \tmp + /* vpickve2gr opcode is 0xe5dfe */ + .word (0xe5dfe << 11 | 1 << 10 | __vd << 5 | __tmp) + st.d \tmp, \base, (\off+8) + .endm + + .macro lsx_save_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \base, \thread, \tmp + lsx_save_upper $vr0, \base, \tmp, (THREAD_FPR0-THREAD_FPR0) + lsx_save_upper $vr1, \base, \tmp, (THREAD_FPR1-THREAD_FPR0) + lsx_save_upper $vr2, \base, \tmp, (THREAD_FPR2-THREAD_FPR0) + lsx_save_upper $vr3, \base, \tmp, (THREAD_FPR3-THREAD_FPR0) + lsx_save_upper $vr4, \base, \tmp, (THREAD_FPR4-THREAD_FPR0) + lsx_save_upper $vr5, \base, \tmp, (THREAD_FPR5-THREAD_FPR0) + lsx_save_upper $vr6, \base, \tmp, (THREAD_FPR6-THREAD_FPR0) + lsx_save_upper $vr7, \base, \tmp, (THREAD_FPR7-THREAD_FPR0) + lsx_save_upper $vr8, \base, \tmp, (THREAD_FPR8-THREAD_FPR0) + lsx_save_upper $vr9, \base, \tmp, (THREAD_FPR9-THREAD_FPR0) + lsx_save_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0) + lsx_save_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0) + lsx_save_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0) + lsx_save_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0) + lsx_save_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0) + lsx_save_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0) + lsx_save_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0) + lsx_save_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0) + lsx_save_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0) + lsx_save_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0) + lsx_save_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0) + lsx_save_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0) + lsx_save_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0) + lsx_save_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0) + lsx_save_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0) + lsx_save_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0) + lsx_save_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0) + lsx_save_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0) + lsx_save_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0) + lsx_save_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0) + lsx_save_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0) + lsx_save_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0) + .endm + + .macro lsx_restore_upper vd base tmp off + parse_vr __vd, \vd + parse_r __tmp, \tmp + ld.d \tmp, \base, (\off+8) + /* vinsgr2vr opcode is 0xe5d7e */ + .word (0xe5d7e << 11 | 1 << 10 | __tmp << 5 | __vd) + .endm + + .macro lsx_restore_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \base, \thread, \tmp + lsx_restore_upper $vr0, \base, \tmp, (THREAD_FPR0-THREAD_FPR0) + lsx_restore_upper $vr1, \base, \tmp, (THREAD_FPR1-THREAD_FPR0) + lsx_restore_upper $vr2, \base, \tmp, (THREAD_FPR2-THREAD_FPR0) + lsx_restore_upper $vr3, \base, \tmp, (THREAD_FPR3-THREAD_FPR0) + lsx_restore_upper $vr4, \base, \tmp, (THREAD_FPR4-THREAD_FPR0) + lsx_restore_upper $vr5, \base, \tmp, (THREAD_FPR5-THREAD_FPR0) + lsx_restore_upper $vr6, \base, \tmp, (THREAD_FPR6-THREAD_FPR0) + lsx_restore_upper $vr7, \base, \tmp, (THREAD_FPR7-THREAD_FPR0) + lsx_restore_upper $vr8, \base, \tmp, (THREAD_FPR8-THREAD_FPR0) + lsx_restore_upper $vr9, \base, \tmp, (THREAD_FPR9-THREAD_FPR0) + lsx_restore_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0) + lsx_restore_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0) + lsx_restore_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0) + lsx_restore_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0) + lsx_restore_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0) + lsx_restore_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0) + lsx_restore_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0) + lsx_restore_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0) + lsx_restore_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0) + lsx_restore_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0) + lsx_restore_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0) + lsx_restore_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0) + lsx_restore_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0) + lsx_restore_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0) + lsx_restore_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0) + lsx_restore_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0) + lsx_restore_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0) + lsx_restore_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0) + lsx_restore_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0) + lsx_restore_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0) + lsx_restore_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0) + lsx_restore_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0) + .endm + + .macro lsx_init_upper vd tmp + parse_vr __vd, \vd + parse_r __tmp, \tmp + /* vinsgr2vr opcode is 0xe5d7e */ + .word (0xe5d7e << 11 | 1 << 10 | __tmp << 5 | __vd) + .endm + + .macro lsx_init_all_upper tmp + not \tmp, zero + lsx_init_upper $vr0 \tmp + lsx_init_upper $vr1 \tmp + lsx_init_upper $vr2 \tmp + lsx_init_upper $vr3 \tmp + lsx_init_upper $vr4 \tmp + lsx_init_upper $vr5 \tmp + lsx_init_upper $vr6 \tmp + lsx_init_upper $vr7 \tmp + lsx_init_upper $vr8 \tmp + lsx_init_upper $vr9 \tmp + lsx_init_upper $vr10 \tmp + lsx_init_upper $vr11 \tmp + lsx_init_upper $vr12 \tmp + lsx_init_upper $vr13 \tmp + lsx_init_upper $vr14 \tmp + lsx_init_upper $vr15 \tmp + lsx_init_upper $vr16 \tmp + lsx_init_upper $vr17 \tmp + lsx_init_upper $vr18 \tmp + lsx_init_upper $vr19 \tmp + lsx_init_upper $vr20 \tmp + lsx_init_upper $vr21 \tmp + lsx_init_upper $vr22 \tmp + lsx_init_upper $vr23 \tmp + lsx_init_upper $vr24 \tmp + lsx_init_upper $vr25 \tmp + lsx_init_upper $vr26 \tmp + lsx_init_upper $vr27 \tmp + lsx_init_upper $vr28 \tmp + lsx_init_upper $vr29 \tmp + lsx_init_upper $vr30 \tmp + lsx_init_upper $vr31 \tmp + .endm + + .macro lasx_save_data thread tmp + parse_r __tmp, \tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + /* xvst opcode is 0xb3 */ + .word (0xb3 << 22 | ((THREAD_FPR0-THREAD_FPR0) << 10) | __tmp << 5 | 0) + .word (0xb3 << 22 | ((THREAD_FPR1-THREAD_FPR0) << 10) | __tmp << 5 | 1) + .word (0xb3 << 22 | ((THREAD_FPR2-THREAD_FPR0) << 10) | __tmp << 5 | 2) + .word (0xb3 << 22 | ((THREAD_FPR3-THREAD_FPR0) << 10) | __tmp << 5 | 3) + .word (0xb3 << 22 | ((THREAD_FPR4-THREAD_FPR0) << 10) | __tmp << 5 | 4) + .word (0xb3 << 22 | ((THREAD_FPR5-THREAD_FPR0) << 10) | __tmp << 5 | 5) + .word (0xb3 << 22 | ((THREAD_FPR6-THREAD_FPR0) << 10) | __tmp << 5 | 6) + .word (0xb3 << 22 | ((THREAD_FPR7-THREAD_FPR0) << 10) | __tmp << 5 | 7) + .word (0xb3 << 22 | ((THREAD_FPR8-THREAD_FPR0) << 10) | __tmp << 5 | 8) + .word (0xb3 << 22 | ((THREAD_FPR9-THREAD_FPR0) << 10) | __tmp << 5 | 9) + .word (0xb3 << 22 | ((THREAD_FPR10-THREAD_FPR0) << 10) | __tmp << 5 | 10) + .word (0xb3 << 22 | ((THREAD_FPR11-THREAD_FPR0) << 10) | __tmp << 5 | 11) + .word (0xb3 << 22 | ((THREAD_FPR12-THREAD_FPR0) << 10) | __tmp << 5 | 12) + .word (0xb3 << 22 | ((THREAD_FPR13-THREAD_FPR0) << 10) | __tmp << 5 | 13) + .word (0xb3 << 22 | ((THREAD_FPR14-THREAD_FPR0) << 10) | __tmp << 5 | 14) + .word (0xb3 << 22 | ((THREAD_FPR15-THREAD_FPR0) << 10) | __tmp << 5 | 15) + .word (0xb3 << 22 | ((THREAD_FPR16-THREAD_FPR0) << 10) | __tmp << 5 | 16) + .word (0xb3 << 22 | ((THREAD_FPR17-THREAD_FPR0) << 10) | __tmp << 5 | 17) + .word (0xb3 << 22 | ((THREAD_FPR18-THREAD_FPR0) << 10) | __tmp << 5 | 18) + .word (0xb3 << 22 | ((THREAD_FPR19-THREAD_FPR0) << 10) | __tmp << 5 | 19) + .word (0xb3 << 22 | ((THREAD_FPR20-THREAD_FPR0) << 10) | __tmp << 5 | 20) + .word (0xb3 << 22 | ((THREAD_FPR21-THREAD_FPR0) << 10) | __tmp << 5 | 21) + .word (0xb3 << 22 | ((THREAD_FPR22-THREAD_FPR0) << 10) | __tmp << 5 | 22) + .word (0xb3 << 22 | ((THREAD_FPR23-THREAD_FPR0) << 10) | __tmp << 5 | 23) + .word (0xb3 << 22 | ((THREAD_FPR24-THREAD_FPR0) << 10) | __tmp << 5 | 24) + .word (0xb3 << 22 | ((THREAD_FPR25-THREAD_FPR0) << 10) | __tmp << 5 | 25) + .word (0xb3 << 22 | ((THREAD_FPR26-THREAD_FPR0) << 10) | __tmp << 5 | 26) + .word (0xb3 << 22 | ((THREAD_FPR27-THREAD_FPR0) << 10) | __tmp << 5 | 27) + .word (0xb3 << 22 | ((THREAD_FPR28-THREAD_FPR0) << 10) | __tmp << 5 | 28) + .word (0xb3 << 22 | ((THREAD_FPR29-THREAD_FPR0) << 10) | __tmp << 5 | 29) + .word (0xb3 << 22 | ((THREAD_FPR30-THREAD_FPR0) << 10) | __tmp << 5 | 30) + .word (0xb3 << 22 | ((THREAD_FPR31-THREAD_FPR0) << 10) | __tmp << 5 | 31) + .endm + + .macro lasx_restore_data thread tmp + parse_r __tmp, \tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + /* xvld opcode is 0xb2 */ + .word (0xb2 << 22 | ((THREAD_FPR0-THREAD_FPR0) << 10) | __tmp << 5 | 0) + .word (0xb2 << 22 | ((THREAD_FPR1-THREAD_FPR0) << 10) | __tmp << 5 | 1) + .word (0xb2 << 22 | ((THREAD_FPR2-THREAD_FPR0) << 10) | __tmp << 5 | 2) + .word (0xb2 << 22 | ((THREAD_FPR3-THREAD_FPR0) << 10) | __tmp << 5 | 3) + .word (0xb2 << 22 | ((THREAD_FPR4-THREAD_FPR0) << 10) | __tmp << 5 | 4) + .word (0xb2 << 22 | ((THREAD_FPR5-THREAD_FPR0) << 10) | __tmp << 5 | 5) + .word (0xb2 << 22 | ((THREAD_FPR6-THREAD_FPR0) << 10) | __tmp << 5 | 6) + .word (0xb2 << 22 | ((THREAD_FPR7-THREAD_FPR0) << 10) | __tmp << 5 | 7) + .word (0xb2 << 22 | ((THREAD_FPR8-THREAD_FPR0) << 10) | __tmp << 5 | 8) + .word (0xb2 << 22 | ((THREAD_FPR9-THREAD_FPR0) << 10) | __tmp << 5 | 9) + .word (0xb2 << 22 | ((THREAD_FPR10-THREAD_FPR0) << 10) | __tmp << 5 | 10) + .word (0xb2 << 22 | ((THREAD_FPR11-THREAD_FPR0) << 10) | __tmp << 5 | 11) + .word (0xb2 << 22 | ((THREAD_FPR12-THREAD_FPR0) << 10) | __tmp << 5 | 12) + .word (0xb2 << 22 | ((THREAD_FPR13-THREAD_FPR0) << 10) | __tmp << 5 | 13) + .word (0xb2 << 22 | ((THREAD_FPR14-THREAD_FPR0) << 10) | __tmp << 5 | 14) + .word (0xb2 << 22 | ((THREAD_FPR15-THREAD_FPR0) << 10) | __tmp << 5 | 15) + .word (0xb2 << 22 | ((THREAD_FPR16-THREAD_FPR0) << 10) | __tmp << 5 | 16) + .word (0xb2 << 22 | ((THREAD_FPR17-THREAD_FPR0) << 10) | __tmp << 5 | 17) + .word (0xb2 << 22 | ((THREAD_FPR18-THREAD_FPR0) << 10) | __tmp << 5 | 18) + .word (0xb2 << 22 | ((THREAD_FPR19-THREAD_FPR0) << 10) | __tmp << 5 | 19) + .word (0xb2 << 22 | ((THREAD_FPR20-THREAD_FPR0) << 10) | __tmp << 5 | 20) + .word (0xb2 << 22 | ((THREAD_FPR21-THREAD_FPR0) << 10) | __tmp << 5 | 21) + .word (0xb2 << 22 | ((THREAD_FPR22-THREAD_FPR0) << 10) | __tmp << 5 | 22) + .word (0xb2 << 22 | ((THREAD_FPR23-THREAD_FPR0) << 10) | __tmp << 5 | 23) + .word (0xb2 << 22 | ((THREAD_FPR24-THREAD_FPR0) << 10) | __tmp << 5 | 24) + .word (0xb2 << 22 | ((THREAD_FPR25-THREAD_FPR0) << 10) | __tmp << 5 | 25) + .word (0xb2 << 22 | ((THREAD_FPR26-THREAD_FPR0) << 10) | __tmp << 5 | 26) + .word (0xb2 << 22 | ((THREAD_FPR27-THREAD_FPR0) << 10) | __tmp << 5 | 27) + .word (0xb2 << 22 | ((THREAD_FPR28-THREAD_FPR0) << 10) | __tmp << 5 | 28) + .word (0xb2 << 22 | ((THREAD_FPR29-THREAD_FPR0) << 10) | __tmp << 5 | 29) + .word (0xb2 << 22 | ((THREAD_FPR30-THREAD_FPR0) << 10) | __tmp << 5 | 30) + .word (0xb2 << 22 | ((THREAD_FPR31-THREAD_FPR0) << 10) | __tmp << 5 | 31) + .endm + + .macro lasx_save_all thread tmp0 tmp1 + fpu_save_cc \thread, \tmp0, \tmp1 + fpu_save_csr \thread, \tmp0 + lasx_save_data \thread, \tmp0 + .endm + + .macro lasx_restore_all thread tmp0 tmp1 + lasx_restore_data \thread, \tmp0 + fpu_restore_cc \thread, \tmp0, \tmp1 + fpu_restore_csr \thread, \tmp0 + .endm + + .macro lasx_save_upper xd base tmp off + /* Nothing */ + .endm + + .macro lasx_save_all_upper thread base tmp + /* Nothing */ + .endm + + .macro lasx_restore_upper xd base tmp off + parse_xr __xd, \xd + parse_xr __xt, \tmp + parse_r __base, \base + /* vld opcode is 0xb0 */ + .word (0xb0 << 22 | (\off+16) << 10 | __base << 5 | __xt) + /* xvpermi.q opcode is 0x1dfb */ + .word (0x1dfb << 18 | 0x2 << 10 | __xt << 5 | __xd) + .endm + + .macro lasx_restore_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \base, \thread, \tmp + /* Save $vr31, xvpickve2gr opcode is 0x76efe */ + .word (0x76efe << 12 | 0 << 10 | 31 << 5 | 0x11) + .word (0x76efe << 12 | 1 << 10 | 31 << 5 | 0x12) + lasx_restore_upper $xr0, \base, $xr31, (THREAD_FPR0-THREAD_FPR0) + lasx_restore_upper $xr1, \base, $xr31, (THREAD_FPR1-THREAD_FPR0) + lasx_restore_upper $xr2, \base, $xr31, (THREAD_FPR2-THREAD_FPR0) + lasx_restore_upper $xr3, \base, $xr31, (THREAD_FPR3-THREAD_FPR0) + lasx_restore_upper $xr4, \base, $xr31, (THREAD_FPR4-THREAD_FPR0) + lasx_restore_upper $xr5, \base, $xr31, (THREAD_FPR5-THREAD_FPR0) + lasx_restore_upper $xr6, \base, $xr31, (THREAD_FPR6-THREAD_FPR0) + lasx_restore_upper $xr7, \base, $xr31, (THREAD_FPR7-THREAD_FPR0) + lasx_restore_upper $xr8, \base, $xr31, (THREAD_FPR8-THREAD_FPR0) + lasx_restore_upper $xr9, \base, $xr31, (THREAD_FPR9-THREAD_FPR0) + lasx_restore_upper $xr10, \base, $xr31, (THREAD_FPR10-THREAD_FPR0) + lasx_restore_upper $xr11, \base, $xr31, (THREAD_FPR11-THREAD_FPR0) + lasx_restore_upper $xr12, \base, $xr31, (THREAD_FPR12-THREAD_FPR0) + lasx_restore_upper $xr13, \base, $xr31, (THREAD_FPR13-THREAD_FPR0) + lasx_restore_upper $xr14, \base, $xr31, (THREAD_FPR14-THREAD_FPR0) + lasx_restore_upper $xr15, \base, $xr31, (THREAD_FPR15-THREAD_FPR0) + lasx_restore_upper $xr16, \base, $xr31, (THREAD_FPR16-THREAD_FPR0) + lasx_restore_upper $xr17, \base, $xr31, (THREAD_FPR17-THREAD_FPR0) + lasx_restore_upper $xr18, \base, $xr31, (THREAD_FPR18-THREAD_FPR0) + lasx_restore_upper $xr19, \base, $xr31, (THREAD_FPR19-THREAD_FPR0) + lasx_restore_upper $xr20, \base, $xr31, (THREAD_FPR20-THREAD_FPR0) + lasx_restore_upper $xr21, \base, $xr31, (THREAD_FPR21-THREAD_FPR0) + lasx_restore_upper $xr22, \base, $xr31, (THREAD_FPR22-THREAD_FPR0) + lasx_restore_upper $xr23, \base, $xr31, (THREAD_FPR23-THREAD_FPR0) + lasx_restore_upper $xr24, \base, $xr31, (THREAD_FPR24-THREAD_FPR0) + lasx_restore_upper $xr25, \base, $xr31, (THREAD_FPR25-THREAD_FPR0) + lasx_restore_upper $xr26, \base, $xr31, (THREAD_FPR26-THREAD_FPR0) + lasx_restore_upper $xr27, \base, $xr31, (THREAD_FPR27-THREAD_FPR0) + lasx_restore_upper $xr28, \base, $xr31, (THREAD_FPR28-THREAD_FPR0) + lasx_restore_upper $xr29, \base, $xr31, (THREAD_FPR29-THREAD_FPR0) + lasx_restore_upper $xr30, \base, $xr31, (THREAD_FPR30-THREAD_FPR0) + lasx_restore_upper $xr31, \base, $xr31, (THREAD_FPR31-THREAD_FPR0) + /* Restore $vr31, xvinsgr2vr opcode is 0x76ebe */ + .word (0x76ebe << 12 | 0 << 10 | 0x11 << 5 | 31) + .word (0x76ebe << 12 | 1 << 10 | 0x12 << 5 | 31) + .endm + + .macro lasx_init_upper xd tmp + parse_xr __xd, \xd + parse_r __tmp, \tmp + /* xvinsgr2vr opcode is 0x76ebe */ + .word (0x76ebe << 12 | 2 << 10 | __tmp << 5 | __xd) + .word (0x76ebe << 12 | 3 << 10 | __tmp << 5 | __xd) + .endm + + .macro lasx_init_all_upper tmp + not \tmp, zero + lasx_init_upper $xr0 \tmp + lasx_init_upper $xr1 \tmp + lasx_init_upper $xr2 \tmp + lasx_init_upper $xr3 \tmp + lasx_init_upper $xr4 \tmp + lasx_init_upper $xr5 \tmp + lasx_init_upper $xr6 \tmp + lasx_init_upper $xr7 \tmp + lasx_init_upper $xr8 \tmp + lasx_init_upper $xr9 \tmp + lasx_init_upper $xr10 \tmp + lasx_init_upper $xr11 \tmp + lasx_init_upper $xr12 \tmp + lasx_init_upper $xr13 \tmp + lasx_init_upper $xr14 \tmp + lasx_init_upper $xr15 \tmp + lasx_init_upper $xr16 \tmp + lasx_init_upper $xr17 \tmp + lasx_init_upper $xr18 \tmp + lasx_init_upper $xr19 \tmp + lasx_init_upper $xr20 \tmp + lasx_init_upper $xr21 \tmp + lasx_init_upper $xr22 \tmp + lasx_init_upper $xr23 \tmp + lasx_init_upper $xr24 \tmp + lasx_init_upper $xr25 \tmp + lasx_init_upper $xr26 \tmp + lasx_init_upper $xr27 \tmp + lasx_init_upper $xr28 \tmp + lasx_init_upper $xr29 \tmp + lasx_init_upper $xr30 \tmp + lasx_init_upper $xr31 \tmp + .endm + .macro not dst src nor \dst, \src, zero .endm diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h index e02ac4af7f6e86515e9cfffd320be41cace56028..8e5881bc5ad19c57b426f5e0b97a1bcc940a008b 100644 --- a/arch/loongarch/include/asm/bootinfo.h +++ b/arch/loongarch/include/asm/bootinfo.h @@ -36,7 +36,7 @@ struct loongson_system_configuration { }; extern u64 efi_system_table; -extern unsigned long fw_arg0, fw_arg1; +extern unsigned long fw_arg0, fw_arg1, fw_arg2; extern struct loongson_board_info b_info; extern struct loongson_system_configuration loongson_sysconf; diff --git a/arch/loongarch/include/asm/cache.h b/arch/loongarch/include/asm/cache.h index 1b6d0961719989b9352aa90d55578df21e256c8d..6c2f81bca2a316c851f8a68acbe5489d7323f317 100644 --- a/arch/loongarch/include/asm/cache.h +++ b/arch/loongarch/include/asm/cache.h @@ -10,4 +10,7 @@ #define __read_mostly __section(".data..read_mostly") +extern struct loongson_system_configuration loongson_sysconf; +extern char __weak except_vec_cex; + #endif /* _ASM_CACHE_H */ diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h index 670900141b7c80df0ae2041ef086c7e921cdc1a6..a9fa98b79405fdc5809844abf5df33fc4f0f8b6a 100644 --- a/arch/loongarch/include/asm/cacheflush.h +++ b/arch/loongarch/include/asm/cacheflush.h @@ -9,8 +9,11 @@ #include #include -extern void local_flush_icache_range(unsigned long start, unsigned long end); +void local_flush_icache_range(unsigned long start, unsigned long end); +void flush_cache_line_hit(unsigned long addr); +asmlinkage void cpu_flush_caches(void); +#define invalid_cache_line_hit(addr) flush_cache_line_hit(addr) #define flush_icache_range local_flush_icache_range #define flush_icache_user_range local_flush_icache_range @@ -35,46 +38,26 @@ extern void local_flush_icache_range(unsigned long start, unsigned long end); : \ : "i" (op), "ZC" (*(unsigned char *)(addr))) -static inline void flush_icache_line_indexed(unsigned long addr) +static inline bool cache_present(struct cache_desc *cdesc) { - cache_op(Index_Invalidate_I, addr); + return cdesc->flags & CACHE_PRESENT; } -static inline void flush_dcache_line_indexed(unsigned long addr) +static inline bool cache_private(struct cache_desc *cdesc) { - cache_op(Index_Writeback_Inv_D, addr); + return cdesc->flags & CACHE_PRIVATE; } -static inline void flush_vcache_line_indexed(unsigned long addr) +static inline bool cache_inclusive(struct cache_desc *cdesc) { - cache_op(Index_Writeback_Inv_V, addr); + return cdesc->flags & CACHE_INCLUSIVE; } -static inline void flush_scache_line_indexed(unsigned long addr) +static inline unsigned int cpu_last_level_cache_line_size(void) { - cache_op(Index_Writeback_Inv_S, addr); -} + unsigned int cache_present = current_cpu_data.cache_leaves_present; -static inline void flush_icache_line(unsigned long addr) -{ - cache_op(Hit_Invalidate_I, addr); + return current_cpu_data.cache_leaves[cache_present - 1].linesz; } - -static inline void flush_dcache_line(unsigned long addr) -{ - cache_op(Hit_Writeback_Inv_D, addr); -} - -static inline void flush_vcache_line(unsigned long addr) -{ - cache_op(Hit_Writeback_Inv_V, addr); -} - -static inline void flush_scache_line(unsigned long addr) -{ - cache_op(Hit_Writeback_Inv_S, addr); -} - #include - #endif /* _ASM_CACHEFLUSH_H */ diff --git a/arch/loongarch/include/asm/cacheops.h b/arch/loongarch/include/asm/cacheops.h index dc280efecebd8d5c091ef9957904128824f636a5..0f4a86f8e2bea78cd0796fb0848acd50111d79a3 100644 --- a/arch/loongarch/include/asm/cacheops.h +++ b/arch/loongarch/include/asm/cacheops.h @@ -8,16 +8,18 @@ #define __ASM_CACHEOPS_H /* - * Most cache ops are split into a 2 bit field identifying the cache, and a 3 + * Most cache ops are split into a 3 bit field identifying the cache, and a 2 * bit field identifying the cache operation. */ -#define CacheOp_Cache 0x03 -#define CacheOp_Op 0x1c +#define CacheOp_Cache 0x07 +#define CacheOp_Op 0x18 -#define Cache_I 0x00 -#define Cache_D 0x01 -#define Cache_V 0x02 -#define Cache_S 0x03 +#define Cache_LEAF0 0x00 +#define Cache_LEAF1 0x01 +#define Cache_LEAF2 0x02 +#define Cache_LEAF3 0x03 +#define Cache_LEAF4 0x04 +#define Cache_LEAF5 0x05 #define Index_Invalidate 0x08 #define Index_Writeback_Inv 0x08 @@ -25,13 +27,17 @@ #define Hit_Writeback_Inv 0x10 #define CacheOp_User_Defined 0x18 -#define Index_Invalidate_I (Cache_I | Index_Invalidate) -#define Index_Writeback_Inv_D (Cache_D | Index_Writeback_Inv) -#define Index_Writeback_Inv_V (Cache_V | Index_Writeback_Inv) -#define Index_Writeback_Inv_S (Cache_S | Index_Writeback_Inv) -#define Hit_Invalidate_I (Cache_I | Hit_Invalidate) -#define Hit_Writeback_Inv_D (Cache_D | Hit_Writeback_Inv) -#define Hit_Writeback_Inv_V (Cache_V | Hit_Writeback_Inv) -#define Hit_Writeback_Inv_S (Cache_S | Hit_Writeback_Inv) +#define Index_Writeback_Inv_LEAF0 (Cache_LEAF0 | Index_Writeback_Inv) +#define Index_Writeback_Inv_LEAF1 (Cache_LEAF1 | Index_Writeback_Inv) +#define Index_Writeback_Inv_LEAF2 (Cache_LEAF2 | Index_Writeback_Inv) +#define Index_Writeback_Inv_LEAF3 (Cache_LEAF3 | Index_Writeback_Inv) +#define Index_Writeback_Inv_LEAF4 (Cache_LEAF4 | Index_Writeback_Inv) +#define Index_Writeback_Inv_LEAF5 (Cache_LEAF5 | Index_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF0 (Cache_LEAF0 | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF1 (Cache_LEAF1 | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF2 (Cache_LEAF2 | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF3 (Cache_LEAF3 | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF4 (Cache_LEAF4 | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF5 (Cache_LEAF5 | Hit_Writeback_Inv) #endif /* __ASM_CACHEOPS_H */ diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index a8d87c40a0eb0606bc06cb90e1cf5263f5f8392d..b07974218393d1dd6f47fa45f429363134f4d605 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -19,11 +19,6 @@ #define cpu_has_loongarch32 (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_32BIT) #define cpu_has_loongarch64 (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_64BIT) -#define cpu_icache_line_size() cpu_data[0].icache.linesz -#define cpu_dcache_line_size() cpu_data[0].dcache.linesz -#define cpu_vcache_line_size() cpu_data[0].vcache.linesz -#define cpu_scache_line_size() cpu_data[0].scache.linesz - #ifdef CONFIG_32BIT # define cpu_has_64bits (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_64BIT) # define cpu_vabits 31 diff --git a/arch/loongarch/include/asm/cpu-info.h b/arch/loongarch/include/asm/cpu-info.h index b6c4f96079dfedf2bc63e96a20f9feded8cff238..3fce27d92fcb2b4b1e2b2f27c6cab3c50e81e16d 100644 --- a/arch/loongarch/include/asm/cpu-info.h +++ b/arch/loongarch/include/asm/cpu-info.h @@ -9,19 +9,28 @@ #include #include +/* cache_desc->flags */ +enum { + CACHE_PRESENT = (1 << 0), + CACHE_PRIVATE = (1 << 1), /* core private cache */ + CACHE_INCLUSIVE = (1 << 2), /* include the lower level caches */ +}; /* * Descriptor for a cache */ struct cache_desc { - unsigned int waysize; /* Bytes per way */ + unsigned char type; + unsigned char level; unsigned short sets; /* Number of lines per set */ unsigned char ways; /* Number of ways */ unsigned char linesz; /* Size of line in bytes */ - unsigned char waybit; /* Bits to select in a cache set */ unsigned char flags; /* Flags describing cache properties */ }; +#define CACHE_LEAVES_MAX 6 + +#define CACHE_LEVEL_MAX 3 struct cpuinfo_loongarch { u64 asid_cache; unsigned long asid_mask; @@ -40,11 +49,8 @@ struct cpuinfo_loongarch { int tlbsizemtlb; int tlbsizestlbsets; int tlbsizestlbways; - struct cache_desc icache; /* Primary I-cache */ - struct cache_desc dcache; /* Primary D or combined I/D cache */ - struct cache_desc vcache; /* Victim cache, between pcache and scache */ - struct cache_desc scache; /* Secondary cache */ - struct cache_desc tcache; /* Tertiary/split secondary cache */ + unsigned int cache_leaves_present; /* number of cache_leaves[] elements */ + struct cache_desc cache_leaves[CACHE_LEAVES_MAX]; int core; /* physical core number in package */ int package;/* physical package number */ int vabits; /* Virtual Address size in bits */ diff --git a/arch/loongarch/include/asm/efi.h b/arch/loongarch/include/asm/efi.h index 720ac34499b66eb5644241a22e857ae5b25425fd..85176fc629b0d88790dd828eac3015c47252f363 100644 --- a/arch/loongarch/include/asm/efi.h +++ b/arch/loongarch/include/asm/efi.h @@ -7,7 +7,7 @@ #include -void __init efi_init(void); +void __init loongson_efi_init(void); void __init efi_runtime_init(void); #define ARCH_EFI_IRQ_FLAGS_MASK 0x00000004 /* Bit 2: CSR.CRMD.IE */ @@ -40,5 +40,6 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr) { return ULONG_MAX; } +extern void *early_memremap_ro(resource_size_t phys_addr, unsigned long size); #endif /* _ASM_LOONGARCH_EFI_H */ diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h index 5f3ff4781fda83fd9e861a862f1a4ea8611d8619..7af0cebf28d73c5d3551eb10ecc36beb6b08d2d7 100644 --- a/arch/loongarch/include/asm/elf.h +++ b/arch/loongarch/include/asm/elf.h @@ -74,6 +74,43 @@ #define R_LARCH_SUB64 56 #define R_LARCH_GNU_VTINHERIT 57 #define R_LARCH_GNU_VTENTRY 58 +#define R_LARCH_B16 64 +#define R_LARCH_B21 65 +#define R_LARCH_B26 66 +#define R_LARCH_ABS_HI20 67 +#define R_LARCH_ABS_LO12 68 +#define R_LARCH_ABS64_LO20 69 +#define R_LARCH_ABS64_HI12 70 +#define R_LARCH_PCALA_HI20 71 +#define R_LARCH_PCALA_LO12 72 +#define R_LARCH_PCALA64_LO20 73 +#define R_LARCH_PCALA64_HI12 74 +#define R_LARCH_GOT_PC_HI20 75 +#define R_LARCH_GOT_PC_LO12 76 +#define R_LARCH_GOT64_PC_LO20 77 +#define R_LARCH_GOT64_PC_HI12 78 +#define R_LARCH_GOT_HI20 79 +#define R_LARCH_GOT_LO12 80 +#define R_LARCH_GOT64_LO20 81 +#define R_LARCH_GOT64_HI12 82 +#define R_LARCH_TLS_LE_HI20 83 +#define R_LARCH_TLS_LE_LO12 84 +#define R_LARCH_TLS_LE64_LO20 85 +#define R_LARCH_TLS_LE64_HI12 86 +#define R_LARCH_TLS_IE_PC_HI20 87 +#define R_LARCH_TLS_IE_PC_LO12 88 +#define R_LARCH_TLS_IE64_PC_LO20 89 +#define R_LARCH_TLS_IE64_PC_HI12 90 +#define R_LARCH_TLS_IE_HI20 91 +#define R_LARCH_TLS_IE_LO12 92 +#define R_LARCH_TLS_IE64_LO20 93 +#define R_LARCH_TLS_IE64_HI12 94 +#define R_LARCH_TLS_LD_PC_HI20 95 +#define R_LARCH_TLS_LD_HI20 96 +#define R_LARCH_TLS_GD_PC_HI20 97 +#define R_LARCH_TLS_GD_HI20 98 +#define R_LARCH_32_PCREL 99 +#define R_LARCH_RELAX 100 #ifndef ELF_ARCH diff --git a/arch/loongarch/include/asm/entry-common.h b/arch/loongarch/include/asm/entry-common.h index 0fe2a098ded96caec4e556d53552392a9fba2e60..a1d2ff1d05923c0f91bdac12b6250d07adf8bb76 100644 --- a/arch/loongarch/include/asm/entry-common.h +++ b/arch/loongarch/include/asm/entry-common.h @@ -7,7 +7,7 @@ static inline bool on_thread_stack(void) { - return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1)); + return !(((unsigned long)(current->stack) ^ current_stack_pointer()) & ~(THREAD_SIZE - 1)); } #endif diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h index 358b254d9c1d665e3891a8c21c9a34a886871c3b..1363f92929df2a552724df0399484da8217861d9 100644 --- a/arch/loongarch/include/asm/fpu.h +++ b/arch/loongarch/include/asm/fpu.h @@ -25,6 +25,30 @@ extern void _init_fpu(unsigned int); extern void _save_fp(struct loongarch_fpu *); extern void _restore_fp(struct loongarch_fpu *); +extern void _save_lsx(struct loongarch_fpu *fpu); +extern void _restore_lsx(struct loongarch_fpu *fpu); +extern void _init_lsx_upper(void); +extern void _restore_lsx_upper(struct loongarch_fpu *fpu); + +extern void _save_lasx(struct loongarch_fpu *fpu); +extern void _restore_lasx(struct loongarch_fpu *fpu); +extern void _init_lasx_upper(void); +extern void _restore_lasx_upper(struct loongarch_fpu *fpu); + +static inline void enable_lsx(void); +static inline void disable_lsx(void); +static inline void save_lsx(struct task_struct *t); +static inline void restore_lsx(struct task_struct *t); + +static inline void enable_lasx(void); +static inline void disable_lasx(void); +static inline void save_lasx(struct task_struct *t); +static inline void restore_lasx(struct task_struct *t); + +#ifdef CONFIG_LOONGSON3_ACPI_CPUFREQ +DECLARE_PER_CPU(unsigned long, msa_count); +DECLARE_PER_CPU(unsigned long, lasx_count); +#endif /* * Mask the FCSR Cause bits according to the Enable bits, observing * that Unimplemented is always enabled. @@ -41,6 +65,29 @@ static inline int is_fp_enabled(void) 1 : 0; } +static inline int is_lsx_enabled(void) +{ + if (!cpu_has_lsx) + return 0; + + return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LSXEN) ? + 1 : 0; +} + +static inline int is_lasx_enabled(void) +{ + if (!cpu_has_lasx) + return 0; + + return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LASXEN) ? + 1 : 0; +} + +static inline int is_simd_enabled(void) +{ + return is_lsx_enabled() | is_lasx_enabled(); +} + #define enable_fpu() set_csr_euen(CSR_EUEN_FPEN) #define disable_fpu() clear_csr_euen(CSR_EUEN_FPEN) @@ -78,9 +125,22 @@ static inline void own_fpu(int restore) static inline void lose_fpu_inatomic(int save, struct task_struct *tsk) { if (is_fpu_owner()) { - if (save) - _save_fp(&tsk->thread.fpu); - disable_fpu(); + if (is_simd_enabled()) { + if (save) { + if (is_lasx_enabled()) + save_lasx(tsk); + else + save_lsx(tsk); + } + disable_fpu(); + disable_lsx(); + disable_lasx(); + clear_tsk_thread_flag(tsk, TIF_USEDSIMD); + } else { + if (save) + _save_fp(&tsk->thread.fpu); + disable_fpu(); + } clear_tsk_thread_flag(tsk, TIF_USEDFPU); } KSTK_EUEN(tsk) &= ~(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN); @@ -126,4 +186,147 @@ static inline union fpureg *get_fpu_regs(struct task_struct *tsk) return tsk->thread.fpu.fpr; } +enum { + CTX_LSX = 1, + CTX_LASX = 2, +}; + +static inline int is_simd_owner(void) +{ + return test_thread_flag(TIF_USEDSIMD); +} + +#ifdef CONFIG_CPU_HAS_LSX + +static inline void enable_lsx(void) +{ + if (cpu_has_lsx) + csr_xchg32(CSR_EUEN_LSXEN, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN); +#ifdef CONFIG_LOONGSON3_ACPI_CPUFREQ + per_cpu(msa_count, raw_smp_processor_id())++; +#endif +} + +static inline void disable_lsx(void) +{ + if (cpu_has_lsx) + csr_xchg32(0, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN); +} + +static inline void save_lsx(struct task_struct *t) +{ + if (cpu_has_lsx) + _save_lsx(&t->thread.fpu); +} + +static inline void restore_lsx(struct task_struct *t) +{ + if (cpu_has_lsx) + _restore_lsx(&t->thread.fpu); +} + +static inline void init_lsx_upper(void) +{ + /* + * Check cpu_has_lsx only if it's a constant. This will allow the + * compiler to optimise out code for CPUs without LSX without adding + * an extra redundant check for CPUs with LSX. + */ + if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx) + return; + + _init_lsx_upper(); +} + +static inline void restore_lsx_upper(struct task_struct *t) +{ + if (cpu_has_lsx) + _restore_lsx_upper(&t->thread.fpu); +} + +#else +static inline void enable_lsx(void) {} +static inline void disable_lsx(void) {} +static inline void save_lsx(struct task_struct *t) {} +static inline void restore_lsx(struct task_struct *t) {} +static inline void init_lsx_upper(void) {} +static inline void restore_lsx_upper(struct task_struct *t) {} +#endif + +#ifdef CONFIG_CPU_HAS_LASX + +static inline void enable_lasx(void) +{ + + if (cpu_has_lasx) { + csr_xchg32(CSR_EUEN_LASXEN, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN); +#ifdef CONFIG_LOONGSON3_ACPI_CPUFREQ + per_cpu(lasx_count, raw_smp_processor_id())++; +#endif + } +} + +static inline void disable_lasx(void) +{ + if (cpu_has_lasx) + csr_xchg32(0, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN); +} + +static inline void save_lasx(struct task_struct *t) +{ + if (cpu_has_lasx) + _save_lasx(&t->thread.fpu); +} + +static inline void restore_lasx(struct task_struct *t) +{ + if (cpu_has_lasx) + _restore_lasx(&t->thread.fpu); +} + +static inline void init_lasx_upper(void) +{ + if (cpu_has_lasx) + _init_lasx_upper(); +} + +static inline void restore_lasx_upper(struct task_struct *t) +{ + if (cpu_has_lasx) + _restore_lasx_upper(&t->thread.fpu); +} + +#else +static inline void enable_lasx(void) {} +static inline void disable_lasx(void) {} +static inline void save_lasx(struct task_struct *t) {} +static inline void restore_lasx(struct task_struct *t) {} +static inline void init_lasx_upper(void) {} +static inline void restore_lasx_upper(struct task_struct *t) {} +#endif + +static inline int thread_lsx_context_live(void) +{ + int ret = 0; + + if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx) + goto out; + + ret = test_thread_flag(TIF_LSX_CTX_LIVE) ? CTX_LSX : 0; +out: + return ret; +} + +static inline int thread_lasx_context_live(void) +{ + int ret = 0; + + if (__builtin_constant_p(cpu_has_lasx) && !cpu_has_lasx) + goto out; + + ret = test_thread_flag(TIF_LASX_CTX_LIVE) ? CTX_LASX : 0; +out: + return ret; +} + #endif /* _ASM_FPU_H */ diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 3ba4f7e87cd254404b223395b9567f09fd718821..e82119198ce3c56848182d2fc51dc53e0d9ed114 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -224,6 +224,13 @@ static inline u32 read_cpucfg(u32 reg) #define CPUCFG48_VFPU_CG BIT(2) #define CPUCFG48_RAM_CG BIT(3) +#define CACHE_WAYS_M GENMASK(15, 0) +#define CACHE_SETS_M GENMASK(23, 16) +#define CACHE_LSIZE_M GENMASK(30, 24) +#define CACHE_WAYS 0 +#define CACHE_SETS 16 +#define CACHE_LSIZE 24 + #ifndef __ASSEMBLY__ /* CSR */ diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h index 9f6718df18547e3a29e81c5a44b01b2de7a22c1d..b29b19a46f4270553c03ce85d1a5d59be78b9d26 100644 --- a/arch/loongarch/include/asm/module.h +++ b/arch/loongarch/include/asm/module.h @@ -17,10 +17,15 @@ struct mod_section { }; struct mod_arch_specific { + struct mod_section got; struct mod_section plt; struct mod_section plt_idx; }; +struct got_entry { + Elf_Addr symbol_addr; +}; + struct plt_entry { u32 inst_lu12iw; u32 inst_lu32id; @@ -29,10 +34,16 @@ struct plt_entry { }; struct plt_idx_entry { - unsigned long symbol_addr; + Elf_Addr symbol_addr; }; -Elf_Addr module_emit_plt_entry(struct module *mod, unsigned long val); +Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val); +Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val); + +static inline struct got_entry emit_got_entry(Elf_Addr val) +{ + return (struct got_entry) { val }; +} static inline struct plt_entry emit_plt_entry(unsigned long val) { @@ -77,4 +88,16 @@ static inline struct plt_entry *get_plt_entry(unsigned long val, return plt + plt_idx; } +static inline struct got_entry *get_got_entry(Elf_Addr val, + const struct mod_section *sec) +{ + struct got_entry *got = (struct got_entry *)sec->shdr->sh_addr; + int i; + + for (i = 0; i < sec->num_entries; i++) + if (got[i].symbol_addr == val) + return &got[i]; + return NULL; +} + #endif /* _ASM_MODULE_H */ diff --git a/arch/loongarch/include/asm/module.lds.h b/arch/loongarch/include/asm/module.lds.h index 31c1c0db11a3a3690a36252271cfdbdfae8dcdcb..a3d1bc0fcc72e99dcab35aa623f8956fdbec41f3 100644 --- a/arch/loongarch/include/asm/module.lds.h +++ b/arch/loongarch/include/asm/module.lds.h @@ -2,6 +2,7 @@ /* Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ SECTIONS { . = ALIGN(4); + .got : { BYTE(0) } .plt : { BYTE(0) } .plt.idx : { BYTE(0) } } diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index 0bd6b0110198f776a7bc154f052cfc0d370ac1d8..e4872bcd851cb8c688d5544d0fd0ade028283665 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -8,15 +8,29 @@ #include #include +/* + * The "address" (in fact, offset from $r21) of a per-CPU variable is close to + * the loading address of main kernel image, but far from where the modules are + * loaded. Tell the compiler this fact when using explicit relocs. + */ +#if defined(MODULE) && defined(CONFIG_AS_HAS_EXPLICIT_RELOCS) +#define PER_CPU_ATTRIBUTES __attribute__((model("extreme"))) +#endif + /* Use r21 for fast access */ -register unsigned long __my_cpu_offset __asm__("$r21"); +static inline unsigned long __kern_my_cpu_offset(void) +{ + register unsigned long off __asm__("$r21"); + + return off; +} +#define __my_cpu_offset __kern_my_cpu_offset() static inline void set_my_cpu_offset(unsigned long off) { - __my_cpu_offset = off; + asm volatile("move $r21, %0"::"r"(off)); csr_write64(off, PERCPU_BASE_KS); } -#define __my_cpu_offset __my_cpu_offset #define PERCPU_OP(op, asm_op, c_op) \ static inline unsigned long __percpu_##op(void *ptr, \ diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h index 1be904ab37926ea555a2bc78870f730de9882462..c9030464cbb59272c2eb1e02f896382e9f46b731 100644 --- a/arch/loongarch/include/asm/thread_info.h +++ b/arch/loongarch/include/asm/thread_info.h @@ -44,14 +44,19 @@ struct thread_info { } /* How to get the thread information struct from C. */ -register struct thread_info *__current_thread_info __asm__("$tp"); - static inline struct thread_info *current_thread_info(void) { + register struct thread_info *__current_thread_info __asm__("$tp"); + return __current_thread_info; } -register unsigned long current_stack_pointer __asm__("$sp"); +static inline unsigned long current_stack_pointer(void) +{ + register unsigned long __current_stack_pointer __asm__("$sp"); + + return __current_stack_pointer; +} #endif /* !__ASSEMBLY__ */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 7225916dd37815352c8699892cceb6d66297f868..0b2aea3d94efbcbe4a291d5af1440f8e6b6ae127 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -7,7 +7,7 @@ extra-y := head.o vmlinux.lds obj-y += cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \ traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \ - elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o + elf.o legacy_boot.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_EFI) += efi.o diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index f1c928648a4a6178b4dc5449126429292374af27..0d6a4de10f6c52c85d886f66a5df11469a0f2414 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -16,6 +16,7 @@ #include #include #include +#include "legacy_boot.h" int acpi_disabled; EXPORT_SYMBOL(acpi_disabled); @@ -48,7 +49,7 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size) early_memunmap(map, size); } -void __init __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) { if (!memblock_is_memory(phys)) return ioremap(phys, size); @@ -74,7 +75,7 @@ void __init acpi_boot_table_init(void) } #ifdef CONFIG_SMP -static int set_processor_mask(u32 id, u32 flags) +int set_processor_mask(u32 id, u32 flags) { int cpu, cpuid = id; @@ -139,6 +140,7 @@ acpi_parse_eio_master(union acpi_subtable_headers *header, const unsigned long e static void __init acpi_process_madt(void) { + int error; #ifdef CONFIG_SMP int i; @@ -147,6 +149,16 @@ static void __init acpi_process_madt(void) __cpu_logical_map[i] = -1; } #endif + + if (efi_bp && bpi_version <= BPI_VERSION_V1) { + error = legacy_madt_table_init(); + if (error < 0) { + disable_acpi(); + pr_err(PREFIX "Invalid BIOS MADT (legacy), ACPI disabled\n"); + } + return; + } + acpi_table_parse_madt(ACPI_MADT_TYPE_CORE_PIC, acpi_parse_processor, MAX_CORE_PIC); diff --git a/arch/loongarch/kernel/cacheinfo.c b/arch/loongarch/kernel/cacheinfo.c index 4662b06269f42eea27830bd2439fc693b4e487b5..f8eb4c7e4ddf15a959f4744bf302864e1df52e39 100644 --- a/arch/loongarch/kernel/cacheinfo.c +++ b/arch/loongarch/kernel/cacheinfo.c @@ -5,69 +5,29 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include +#include #include #include -/* Populates leaf and increments to next leaf */ -#define populate_cache(cache, leaf, c_level, c_type) \ -do { \ - leaf->type = c_type; \ - leaf->level = c_level; \ - leaf->coherency_line_size = c->cache.linesz; \ - leaf->number_of_sets = c->cache.sets; \ - leaf->ways_of_associativity = c->cache.ways; \ - leaf->size = c->cache.linesz * c->cache.sets * \ - c->cache.ways; \ - if (leaf->level > 2) \ - leaf->size *= nodes_per_package; \ - leaf++; \ -} while (0) - int init_cache_level(unsigned int cpu) { - struct cpuinfo_loongarch *c = ¤t_cpu_data; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - int levels = 0, leaves = 0; - - /* - * If Dcache is not set, we assume the cache structures - * are not properly initialized. - */ - if (c->dcache.waysize) - levels += 1; - else - return -ENOENT; - - - leaves += (c->icache.waysize) ? 2 : 1; - - if (c->vcache.waysize) { - levels++; - leaves++; - } - - if (c->scache.waysize) { - levels++; - leaves++; - } + unsigned int cache_present = current_cpu_data.cache_leaves_present; - if (c->tcache.waysize) { - levels++; - leaves++; - } - - this_cpu_ci->num_levels = levels; - this_cpu_ci->num_leaves = leaves; + this_cpu_ci->num_levels = + current_cpu_data.cache_leaves[cache_present - 1].level; + this_cpu_ci->num_leaves = cache_present; return 0; } static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, - struct cacheinfo *sib_leaf) + struct cacheinfo *sib_leaf) { - return !((this_leaf->level == 1) || (this_leaf->level == 2)); + return (!(*(unsigned char *)(this_leaf->priv) & CACHE_PRIVATE) && + !(*(unsigned char *)(sib_leaf->priv) & CACHE_PRIVATE)); } -static void cache_cpumap_setup(unsigned int cpu) +static void __cache_cpumap_setup(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf, *sib_leaf; @@ -85,8 +45,9 @@ static void cache_cpumap_setup(unsigned int cpu) for_each_online_cpu(i) { struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i); - if (i == cpu || !sib_cpu_ci->info_list) - continue;/* skip if itself or no cacheinfo */ + if (i == cpu || !sib_cpu_ci->info_list || + (cpu_to_node(i) != cpu_to_node(cpu))) + continue; sib_leaf = sib_cpu_ci->info_list + index; if (cache_leaves_are_shared(this_leaf, sib_leaf)) { cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map); @@ -98,33 +59,30 @@ static void cache_cpumap_setup(unsigned int cpu) int populate_cache_leaves(unsigned int cpu) { - int level = 1, nodes_per_package = 1; - struct cpuinfo_loongarch *c = ¤t_cpu_data; + struct cache_desc *cdesc_tmp, *cdesc = current_cpu_data.cache_leaves; + unsigned int cache_present = current_cpu_data.cache_leaves_present; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf = this_cpu_ci->info_list; - - if (loongson_sysconf.nr_nodes > 1) - nodes_per_package = loongson_sysconf.cores_per_package - / loongson_sysconf.cores_per_node; - - if (c->icache.waysize) { - populate_cache(dcache, this_leaf, level, CACHE_TYPE_DATA); - populate_cache(icache, this_leaf, level++, CACHE_TYPE_INST); - } else { - populate_cache(dcache, this_leaf, level++, CACHE_TYPE_UNIFIED); + int i; + + for (i = 0; i < cache_present; i++) { + cdesc_tmp = cdesc + i; + + this_leaf->type = cdesc_tmp->type; + this_leaf->level = cdesc_tmp->level; + this_leaf->coherency_line_size = cdesc_tmp->linesz; + this_leaf->number_of_sets = cdesc_tmp->sets; + this_leaf->ways_of_associativity = cdesc_tmp->ways; + this_leaf->size = + cdesc_tmp->linesz * cdesc_tmp->sets * cdesc_tmp->ways; + this_leaf->priv = &cdesc_tmp->flags; + this_leaf++; } - if (c->vcache.waysize) - populate_cache(vcache, this_leaf, level++, CACHE_TYPE_UNIFIED); - - if (c->scache.waysize) - populate_cache(scache, this_leaf, level++, CACHE_TYPE_UNIFIED); - - if (c->tcache.waysize) - populate_cache(tcache, this_leaf, level++, CACHE_TYPE_UNIFIED); - - cache_cpumap_setup(cpu); - this_cpu_ci->cpu_map_populated = true; + if (!of_have_populated_dt()) { + __cache_cpumap_setup(cpu); + this_cpu_ci->cpu_map_populated = true; + } return 0; } diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 529ab8f44ec6d9fcecbba29099c224fa087d9c08..3dd31afb2ae6982e7ff971958387a74c0e4a18f0 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -111,6 +111,18 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_FPU; elf_hwcap |= HWCAP_LOONGARCH_FPU; } +#ifdef CONFIG_CPU_HAS_LSX + if (config & CPUCFG2_LSX) { + c->options |= LOONGARCH_CPU_LSX; + elf_hwcap |= HWCAP_LOONGARCH_LSX; + } +#endif +#ifdef CONFIG_CPU_HAS_LASX + if (config & CPUCFG2_LASX) { + c->options |= LOONGARCH_CPU_LASX; + elf_hwcap |= HWCAP_LOONGARCH_LASX; + } +#endif if (config & CPUCFG2_COMPLEX) { c->options |= LOONGARCH_CPU_COMPLEX; elf_hwcap |= HWCAP_LOONGARCH_COMPLEX; diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c index 1f1f755fb425514df023d86c219dc289258b07c9..8719096ac4864b8f248d60b145a4cf9f02b2d7ce 100644 --- a/arch/loongarch/kernel/efi.c +++ b/arch/loongarch/kernel/efi.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -22,16 +23,175 @@ #include #include +#include #include +#include "legacy_boot.h" static unsigned long efi_nr_tables; static unsigned long efi_config_table; static efi_system_table_t *efi_systab; static efi_config_table_type_t arch_tables[] __initdata = {{},}; +static __initdata pgd_t *pgd_efi; + +static int __init efimap_populate_hugepages( + unsigned long start, unsigned long end, + pgprot_t prot) +{ + unsigned long addr; + unsigned long next; + pmd_t entry; + pud_t *pud; + pmd_t *pmd; + + for (addr = start; addr < end; addr = next) { + next = pmd_addr_end(addr, end); + pud = pud_offset((p4d_t *)pgd_efi + pgd_index(addr), addr); + if (pud_none(*pud)) { + void *p = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + + if (!p) + return -1; + pmd_init(p); + pud_populate(&init_mm, pud, p); + } + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + entry = pfn_pmd((addr >> PAGE_SHIFT), prot); + entry = pmd_mkhuge(entry); + set_pmd_at(&init_mm, addr, pmd, entry); + } + } + return 0; +} + +static void __init efi_map_pgt(void) +{ + unsigned long node; + unsigned long start, end; + unsigned long start_pfn, end_pfn; + + pgd_efi = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + if (!pgd_efi) { + pr_err("alloc efi pgd failed!\n"); + return; + } + pgd_init(pgd_efi); + csr_write64((long)pgd_efi, LOONGARCH_CSR_PGDL); + + /* Low Memory, Cached */ + efimap_populate_hugepages(0, SZ_256M, PAGE_KERNEL); + + for_each_node_mask(node, node_possible_map) { + /* MMIO Registers, Uncached */ + efimap_populate_hugepages(SZ_256M | (node << 44), + SZ_512M | (node << 44), PAGE_KERNEL_SUC); + + get_pfn_range_for_nid(node, &start_pfn, &end_pfn); + start = ALIGN_DOWN(start_pfn << PAGE_SHIFT, PMD_SIZE); + end = ALIGN(end_pfn << PAGE_SHIFT, PMD_SIZE); + + /* System memory, Cached */ + efimap_populate_hugepages(node ? start : SZ_512M, end, PAGE_KERNEL); + } +} + +static int __init efimap_free_pgt(unsigned long start, unsigned long end) +{ + unsigned long addr; + unsigned long next; + pud_t *pud; + pmd_t *pmd; + + for (addr = start; addr < end; addr = next) { + next = pmd_addr_end(addr, end); + + pud = pud_offset((p4d_t *)pgd_efi + pgd_index(addr), addr); + if (!pud_present(*pud)) + continue; + pmd = pmd_offset(pud, addr); + memblock_free_early(virt_to_phys((void *)pmd), PAGE_SIZE); + pud_clear(pud); + } + return 0; +} + +static void __init efi_unmap_pgt(void) +{ + unsigned long node; + unsigned long start, end; + unsigned long start_pfn, end_pfn; + + for_each_node_mask(node, node_possible_map) { + get_pfn_range_for_nid(node, &start_pfn, &end_pfn); + start = ALIGN_DOWN(start_pfn << PAGE_SHIFT, PMD_SIZE); + end = ALIGN(end_pfn << PAGE_SHIFT, PMD_SIZE); + + /* Free pagetable memory */ + efimap_free_pgt(start, end); + } + + memblock_free_early(virt_to_phys((void *)pgd_efi), PAGE_SIZE); + csr_write64((long)invalid_pg_dir, LOONGARCH_CSR_PGDL); + local_flush_tlb_all(); + + return; +} + +/* + * set_virtual_map() - create a virtual mapping for the EFI memory map and call + * efi_set_virtual_address_map enter virtual for runtime service + * + * This function populates the virt_addr fields of all memory region descriptors + * in @memory_map whose EFI_MEMORY_RUNTIME attribute is set. Those descriptors + * are also copied to @runtime_map, and their total count is returned in @count. + */ +static int __init set_virtual_map(void) +{ + efi_status_t status; + int count = 0; + unsigned int size; + unsigned long attr; + efi_runtime_services_t *rt; + efi_set_virtual_address_map_t *svam; + efi_memory_desc_t *in, runtime_map[32]; + + if (efi_bp) + return EFI_SUCCESS; + + size = sizeof(efi_memory_desc_t); + + for_each_efi_memory_desc(in) { + attr = in->attribute; + if (!(attr & EFI_MEMORY_RUNTIME)) + continue; + + if (attr & (EFI_MEMORY_WB | EFI_MEMORY_WT)) + in->virt_addr = TO_CACHE(in->phys_addr); + else + in->virt_addr = TO_UNCACHE(in->phys_addr); + + memcpy(&runtime_map[count++], in, size); + } + + rt = early_memremap_ro((unsigned long)efi_systab->runtime, sizeof(*rt)); + + /* Install the new virtual address map */ + svam = rt->set_virtual_address_map; + + efi_map_pgt(); + + status = svam(size * count, size, efi.memmap.desc_version, + (efi_memory_desc_t *)TO_PHYS((unsigned long)runtime_map)); + + efi_unmap_pgt(); + return 0; +} void __init efi_runtime_init(void) { + efi_status_t status; + if (!efi_enabled(EFI_BOOT)) return; @@ -40,6 +200,13 @@ void __init efi_runtime_init(void) return; } + if (!efi_systab->runtime) + return; + + status = set_virtual_map(); + if (status < 0) + return; + efi.runtime = (efi_runtime_services_t *)efi_systab->runtime; efi.runtime_version = (unsigned int)efi.runtime->hdr.revision; @@ -47,15 +214,18 @@ void __init efi_runtime_init(void) set_bit(EFI_RUNTIME_SERVICES, &efi.flags); } -void __init efi_init(void) +void __init loongson_efi_init(void) { int size; void *config_tables; - if (!efi_system_table) - return; + if (efi_system_table) { + efi_systab = (efi_system_table_t *)early_memremap_ro(efi_system_table, + sizeof(*efi_systab)); + } else { + efi_systab = (efi_system_table_t *)efi_bp->systemtable; + } - efi_systab = (efi_system_table_t *)early_memremap_ro(efi_system_table, sizeof(*efi_systab)); if (!efi_systab) { pr_err("Can't find EFI system table.\n"); return; diff --git a/arch/loongarch/kernel/env.c b/arch/loongarch/kernel/env.c index 82b478a5c665746e90ed72dc17b00d6c6089807e..596e6635368ed8a0f2bda6d37a481cc93a4521a3 100644 --- a/arch/loongarch/kernel/env.c +++ b/arch/loongarch/kernel/env.c @@ -12,6 +12,7 @@ #include #include #include +#include "legacy_boot.h" u64 efi_system_table; struct loongson_system_configuration loongson_sysconf; @@ -23,6 +24,9 @@ void __init init_environ(void) struct efi_memory_map_data data; void *fdt_ptr = early_memremap_ro(fw_arg1, SZ_64K); + if (efi_bp) + return; + if (efi_boot) set_bit(EFI_BOOT, &efi.flags); else diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 576b3370a296da0e0db3b710df2f682e6b628a12..65d245a0f409d6f69dd34b8d7a62236a9021b1c4 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -26,6 +26,32 @@ .previous .endm + .macro EX_V insn, reg, src, offs + parse_v __insn, \insn + parse_v __offs, \offs + parse_r __src, \src + parse_vr __reg, \reg + +.ex\@: + .word __insn << 22 | __offs << 10 | __src << 5 | __reg + .section __ex_table,"a" + PTR .ex\@, fault + .previous + .endm + + .macro EX_XV insn, reg, src, offs + parse_v __insn, \insn + parse_v __offs, \offs + parse_r __src, \src + parse_xr __reg, \reg + +.ex\@: + .word __insn << 22 | __offs << 10 | __src << 5 | __reg + .section __ex_table,"a" + PTR .ex\@, fault + .previous + .endm + .macro sc_save_fp base EX fst.d $f0, \base, (0 * FPU_REG_WIDTH) EX fst.d $f1, \base, (1 * FPU_REG_WIDTH) @@ -146,6 +172,146 @@ movgr2fcsr fcsr0, \tmp0 .endm + .macro sc_save_lsx base + EX_V 0xb1 $vr0, \base, (0 * LSX_REG_WIDTH) + EX_V 0xb1 $vr1, \base, (1 * LSX_REG_WIDTH) + EX_V 0xb1 $vr2, \base, (2 * LSX_REG_WIDTH) + EX_V 0xb1 $vr3, \base, (3 * LSX_REG_WIDTH) + EX_V 0xb1 $vr4, \base, (4 * LSX_REG_WIDTH) + EX_V 0xb1 $vr5, \base, (5 * LSX_REG_WIDTH) + EX_V 0xb1 $vr6, \base, (6 * LSX_REG_WIDTH) + EX_V 0xb1 $vr7, \base, (7 * LSX_REG_WIDTH) + EX_V 0xb1 $vr8, \base, (8 * LSX_REG_WIDTH) + EX_V 0xb1 $vr9, \base, (9 * LSX_REG_WIDTH) + EX_V 0xb1 $vr10, \base, (10 * LSX_REG_WIDTH) + EX_V 0xb1 $vr11, \base, (11 * LSX_REG_WIDTH) + EX_V 0xb1 $vr12, \base, (12 * LSX_REG_WIDTH) + EX_V 0xb1 $vr13, \base, (13 * LSX_REG_WIDTH) + EX_V 0xb1 $vr14, \base, (14 * LSX_REG_WIDTH) + EX_V 0xb1 $vr15, \base, (15 * LSX_REG_WIDTH) + EX_V 0xb1 $vr16, \base, (16 * LSX_REG_WIDTH) + EX_V 0xb1 $vr17, \base, (17 * LSX_REG_WIDTH) + EX_V 0xb1 $vr18, \base, (18 * LSX_REG_WIDTH) + EX_V 0xb1 $vr19, \base, (19 * LSX_REG_WIDTH) + EX_V 0xb1 $vr20, \base, (20 * LSX_REG_WIDTH) + EX_V 0xb1 $vr21, \base, (21 * LSX_REG_WIDTH) + EX_V 0xb1 $vr22, \base, (22 * LSX_REG_WIDTH) + EX_V 0xb1 $vr23, \base, (23 * LSX_REG_WIDTH) + EX_V 0xb1 $vr24, \base, (24 * LSX_REG_WIDTH) + EX_V 0xb1 $vr25, \base, (25 * LSX_REG_WIDTH) + EX_V 0xb1 $vr26, \base, (26 * LSX_REG_WIDTH) + EX_V 0xb1 $vr27, \base, (27 * LSX_REG_WIDTH) + EX_V 0xb1 $vr28, \base, (28 * LSX_REG_WIDTH) + EX_V 0xb1 $vr29, \base, (29 * LSX_REG_WIDTH) + EX_V 0xb1 $vr30, \base, (30 * LSX_REG_WIDTH) + EX_V 0xb1 $vr31, \base, (31 * LSX_REG_WIDTH) + .endm + + .macro sc_restore_lsx base + EX_V 0xb0 $vr0, \base, (0 * LSX_REG_WIDTH) + EX_V 0xb0 $vr1, \base, (1 * LSX_REG_WIDTH) + EX_V 0xb0 $vr2, \base, (2 * LSX_REG_WIDTH) + EX_V 0xb0 $vr3, \base, (3 * LSX_REG_WIDTH) + EX_V 0xb0 $vr4, \base, (4 * LSX_REG_WIDTH) + EX_V 0xb0 $vr5, \base, (5 * LSX_REG_WIDTH) + EX_V 0xb0 $vr6, \base, (6 * LSX_REG_WIDTH) + EX_V 0xb0 $vr7, \base, (7 * LSX_REG_WIDTH) + EX_V 0xb0 $vr8, \base, (8 * LSX_REG_WIDTH) + EX_V 0xb0 $vr9, \base, (9 * LSX_REG_WIDTH) + EX_V 0xb0 $vr10, \base, (10 * LSX_REG_WIDTH) + EX_V 0xb0 $vr11, \base, (11 * LSX_REG_WIDTH) + EX_V 0xb0 $vr12, \base, (12 * LSX_REG_WIDTH) + EX_V 0xb0 $vr13, \base, (13 * LSX_REG_WIDTH) + EX_V 0xb0 $vr14, \base, (14 * LSX_REG_WIDTH) + EX_V 0xb0 $vr15, \base, (15 * LSX_REG_WIDTH) + EX_V 0xb0 $vr16, \base, (16 * LSX_REG_WIDTH) + EX_V 0xb0 $vr17, \base, (17 * LSX_REG_WIDTH) + EX_V 0xb0 $vr18, \base, (18 * LSX_REG_WIDTH) + EX_V 0xb0 $vr19, \base, (19 * LSX_REG_WIDTH) + EX_V 0xb0 $vr20, \base, (20 * LSX_REG_WIDTH) + EX_V 0xb0 $vr21, \base, (21 * LSX_REG_WIDTH) + EX_V 0xb0 $vr22, \base, (22 * LSX_REG_WIDTH) + EX_V 0xb0 $vr23, \base, (23 * LSX_REG_WIDTH) + EX_V 0xb0 $vr24, \base, (24 * LSX_REG_WIDTH) + EX_V 0xb0 $vr25, \base, (25 * LSX_REG_WIDTH) + EX_V 0xb0 $vr26, \base, (26 * LSX_REG_WIDTH) + EX_V 0xb0 $vr27, \base, (27 * LSX_REG_WIDTH) + EX_V 0xb0 $vr28, \base, (28 * LSX_REG_WIDTH) + EX_V 0xb0 $vr29, \base, (29 * LSX_REG_WIDTH) + EX_V 0xb0 $vr30, \base, (30 * LSX_REG_WIDTH) + EX_V 0xb0 $vr31, \base, (31 * LSX_REG_WIDTH) + .endm + + .macro sc_save_lasx base + EX_XV 0xb3 $xr0, \base, (0 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr1, \base, (1 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr2, \base, (2 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr3, \base, (3 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr4, \base, (4 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr5, \base, (5 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr6, \base, (6 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr7, \base, (7 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr8, \base, (8 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr9, \base, (9 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr10, \base, (10 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr11, \base, (11 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr12, \base, (12 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr13, \base, (13 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr14, \base, (14 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr15, \base, (15 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr16, \base, (16 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr17, \base, (17 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr18, \base, (18 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr19, \base, (19 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr20, \base, (20 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr21, \base, (21 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr22, \base, (22 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr23, \base, (23 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr24, \base, (24 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr25, \base, (25 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr26, \base, (26 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr27, \base, (27 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr28, \base, (28 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr29, \base, (29 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr30, \base, (30 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr31, \base, (31 * LASX_REG_WIDTH) + .endm + + .macro sc_restore_lasx base + EX_XV 0xb2 $xr0, \base, (0 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr1, \base, (1 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr2, \base, (2 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr3, \base, (3 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr4, \base, (4 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr5, \base, (5 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr6, \base, (6 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr7, \base, (7 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr8, \base, (8 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr9, \base, (9 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr10, \base, (10 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr11, \base, (11 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr12, \base, (12 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr13, \base, (13 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr14, \base, (14 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr15, \base, (15 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr16, \base, (16 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr17, \base, (17 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr18, \base, (18 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr19, \base, (19 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr20, \base, (20 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr21, \base, (21 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr22, \base, (22 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr23, \base, (23 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr24, \base, (24 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr25, \base, (25 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr26, \base, (26 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr27, \base, (27 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr28, \base, (28 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr29, \base, (29 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr30, \base, (30 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr31, \base, (31 * LASX_REG_WIDTH) + .endm + /* * Save a thread's fp context. */ @@ -167,6 +333,76 @@ SYM_FUNC_START(_restore_fp) jr ra SYM_FUNC_END(_restore_fp) +#ifdef CONFIG_CPU_HAS_LSX + +/* + * Save a thread's LSX vector context. + */ +SYM_FUNC_START(_save_lsx) + lsx_save_all a0 t1 t2 + jirl zero, ra, 0 +SYM_FUNC_END(_save_lsx) +EXPORT_SYMBOL(_save_lsx) + +/* + * Restore a thread's LSX vector context. + */ +SYM_FUNC_START(_restore_lsx) + lsx_restore_all a0 t1 t2 + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lsx) + +SYM_FUNC_START(_save_lsx_upper) + lsx_save_all_upper a0 t0 t1 + jirl zero, ra, 0 +SYM_FUNC_END(_save_lsx_upper) + +SYM_FUNC_START(_restore_lsx_upper) + lsx_restore_all_upper a0 t0 t1 + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lsx_upper) + +SYM_FUNC_START(_init_lsx_upper) + lsx_init_all_upper t1 + jirl zero, ra, 0 +SYM_FUNC_END(_init_lsx_upper) +#endif + +#ifdef CONFIG_CPU_HAS_LASX + +/* + * Save a thread's LASX vector context. + */ +SYM_FUNC_START(_save_lasx) + lasx_save_all a0 t1 t2 + jirl zero, ra, 0 +SYM_FUNC_END(_save_lasx) +EXPORT_SYMBOL(_save_lasx) + +/* + * Restore a thread's LASX vector context. + */ +SYM_FUNC_START(_restore_lasx) + lasx_restore_all a0 t1 t2 + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lasx) + +SYM_FUNC_START(_save_lasx_upper) + lasx_save_all_upper a0 t0 t1 + jirl zero, ra, 0 +SYM_FUNC_END(_save_lasx_upper) + +SYM_FUNC_START(_restore_lasx_upper) + lasx_restore_all_upper a0 t0 t1 + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lasx_upper) + +SYM_FUNC_START(_init_lasx_upper) + lasx_init_all_upper t1 + jirl zero, ra, 0 +SYM_FUNC_END(_init_lasx_upper) +#endif + /* * Load the FPU with signalling NANS. This bit pattern we're using has * the property that no matter whether considered as single or as double @@ -245,6 +481,58 @@ SYM_FUNC_START(_restore_fp_context) jr ra SYM_FUNC_END(_restore_fp_context) +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_save_lsx_context) + sc_save_fcc a1, t0, t1 + sc_save_fcsr a2, t0 + sc_save_lsx a0 + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_save_lsx_context) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_restore_lsx_context) + sc_restore_lsx a0 + sc_restore_fcc a1, t1, t2 + sc_restore_fcsr a2, t1 + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lsx_context) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_save_lasx_context) + sc_save_fcc a1, t0, t1 + sc_save_fcsr a2, t0 + sc_save_lasx a0 + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_save_lasx_context) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_restore_lasx_context) + sc_restore_lasx a0 + sc_restore_fcc a1, t1, t2 + sc_restore_fcsr a2, t1 + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lasx_context) + SYM_FUNC_START(fault) li.w a0, -EFAULT # failure jr ra diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index 01bac62a644273d3951e147fdc1f8077701745b9..e2074cd4fff4dd4ba1d7cfcd5077318fbc3ec37c 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -55,25 +55,27 @@ SYM_CODE_START(kernel_entry) # kernel entry point li.w t0, 0x00 # FPE=0, SXE=0, ASXE=0, BTE=0 csrwr t0, LOONGARCH_CSR_EUEN - la t0, __bss_start # clear .bss + la.pcrel t0, __bss_start # clear .bss st.d zero, t0, 0 - la t1, __bss_stop - LONGSIZE + la.pcrel t1, __bss_stop - LONGSIZE 1: addi.d t0, t0, LONGSIZE st.d zero, t0, 0 bne t0, t1, 1b - la t0, fw_arg0 + la.pcrel t0, fw_arg0 st.d a0, t0, 0 # firmware arguments - la t0, fw_arg1 + la.pcrel t0, fw_arg1 st.d a1, t0, 0 + la.pcrel t0, fw_arg2 + st.d a2, t0, 0 /* KSave3 used for percpu base, initialized as 0 */ csrwr zero, PERCPU_BASE_KS /* GPR21 used for percpu base (runtime), initialized as 0 */ move u0, zero - la tp, init_thread_union + la.pcrel tp, init_thread_union /* Set the SP after an empty pt_regs. */ PTR_LI sp, (_THREAD_SIZE - 32 - PT_SIZE) PTR_ADD sp, sp, tp diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c index 1ba19c76563e9bd7fe0381f54bde68f67f7a7768..f55910ab85038864808326db9846f8b2f6a39e06 100644 --- a/arch/loongarch/kernel/irq.c +++ b/arch/loongarch/kernel/irq.c @@ -20,6 +20,7 @@ #include #include #include +#include "legacy_boot.h" DEFINE_PER_CPU(unsigned long, irq_stack); DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); @@ -61,6 +62,12 @@ static int __init early_pci_mcfg_parse(struct acpi_table_header *header) if (header->length < sizeof(struct acpi_table_mcfg)) return -EINVAL; + for (i = 0; i < MAX_IO_PICS; i++) { + msi_group[i].pci_segment = -1; + msi_group[i].node = -1; + pch_group[i].node = -1; + } + n = (header->length - sizeof(struct acpi_table_mcfg)) / sizeof(struct acpi_mcfg_allocation); mcfg = (struct acpi_table_mcfg *)header; @@ -76,14 +83,6 @@ static int __init early_pci_mcfg_parse(struct acpi_table_header *header) static void __init init_vec_parent_group(void) { - int i; - - for (i = 0; i < MAX_IO_PICS; i++) { - msi_group[i].pci_segment = -1; - msi_group[i].node = -1; - pch_group[i].node = -1; - } - acpi_table_parse(ACPI_SIG_MCFG, early_pci_mcfg_parse); } @@ -99,7 +98,7 @@ static int __init get_ipi_irq(void) void __init init_IRQ(void) { - int i; + int i, ret; #ifdef CONFIG_SMP int r, ipi_irq; static int ipi_dummy_dev; @@ -111,7 +110,13 @@ void __init init_IRQ(void) clear_csr_estat(ESTATF_IP); init_vec_parent_group(); - irqchip_init(); + if (efi_bp && bpi_version <= BPI_VERSION_V1) { + ret = setup_legacy_IRQ(); + if (ret) + panic("IRQ domain init error!\n"); + } else { + irqchip_init(); + } #ifdef CONFIG_SMP ipi_irq = get_ipi_irq(); if (ipi_irq < 0) diff --git a/arch/loongarch/kernel/legacy_boot.c b/arch/loongarch/kernel/legacy_boot.c new file mode 100644 index 0000000000000000000000000000000000000000..c670b5ea4ce5b01093082a5fe8567b9a383444cb --- /dev/null +++ b/arch/loongarch/kernel/legacy_boot.c @@ -0,0 +1,617 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Yun Liu, liuyun@loongson.cn + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "legacy_boot.h" + +#define MAX_CORE_PIC 256 +#define PREFIX "ACPI: " + +#define MSI_MSG_ADDRESS 0x2FF00000 +#define MSI_MSG_DEFAULT_COUNT 0xC0 + +struct boot_params *efi_bp; +struct loongsonlist_mem_map *g_mmap; +struct acpi_madt_lio_pic *acpi_liointc; +struct acpi_madt_eio_pic *acpi_eiointc[MAX_IO_PICS]; + +struct acpi_madt_ht_pic *acpi_htintc; +struct acpi_madt_lpc_pic *acpi_pchlpc; +struct acpi_madt_msi_pic *acpi_pchmsi[MAX_IO_PICS]; +struct acpi_madt_bio_pic *acpi_pchpic[MAX_IO_PICS]; + +struct irq_domain *cpu_domain; +struct irq_domain *liointc_domain; +struct irq_domain *pch_lpc_domain; +struct irq_domain *pch_msi_domain[MAX_IO_PICS]; +struct irq_domain *pch_pic_domain[MAX_IO_PICS]; + +char arcs_cmdline[COMMAND_LINE_SIZE]; +int nr_io_pics; +int bpi_version; + +struct acpi_madt_lio_pic liointc_default = { + .address = LOONGSON_REG_BASE + 0x1400, + .size = 256, + .cascade = {2, 3}, + .cascade_map = {0x00FFFFFF, 0xff000000}, +}; + +struct acpi_madt_lpc_pic pchlpc_default = { + .address = LS7A_LPC_REG_BASE, + .size = SZ_4K, + .cascade = 19, +}; + +struct acpi_madt_eio_pic eiointc_default[MAX_IO_PICS]; +struct acpi_madt_msi_pic pchmsi_default[MAX_IO_PICS]; +struct acpi_madt_bio_pic pchpic_default[MAX_IO_PICS]; + +static int +acpi_parse_lapic(union acpi_subtable_headers *header, const unsigned long end) +{ + struct acpi_madt_local_apic *processor = NULL; + + processor = (struct acpi_madt_local_apic *)header; + if (BAD_MADT_ENTRY(processor, end)) + return -EINVAL; + + acpi_table_print_madt_entry(&header->common); + set_processor_mask(processor->id, processor->lapic_flags); + + return 0; +} + +static int acpi_parse_madt_lapic(void) +{ + int ret; + struct acpi_subtable_proc madt_proc[1]; + + memset(madt_proc, 0, sizeof(madt_proc)); + madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC; + madt_proc[0].handler = acpi_parse_lapic; + ret = acpi_table_parse_entries_array(ACPI_SIG_MADT, + sizeof(struct acpi_table_madt), + madt_proc, ARRAY_SIZE(madt_proc), + MAX_CORE_PIC); + if (ret < 0) { + pr_err(PREFIX "Error parsing LAPIC entries\n"); + return ret; + } + + return 0; +} + +static int bad_pch_pic(unsigned long address) +{ + if (nr_io_pics >= MAX_IO_PICS) { + pr_warn("WARNING: Max # of I/O PCH_PICs (%d) exceeded (found %d), skipping\n", + MAX_IO_PICS, nr_io_pics); + return 1; + } + if (!address) { + pr_warn("WARNING: Bogus (zero) I/O PCH_PIC address found in table, skipping!\n"); + return 1; + } + return 0; +} + +void register_default_pic(int id, u32 address, u32 irq_base) +{ + int idx, entries; + unsigned long addr; + + if (bad_pch_pic(address)) + return; + + idx = nr_io_pics; + + pchpic_default[idx].address = address; + if (idx) + pchpic_default[idx].address |= nid_to_addrbase(id) | HT1LO_OFFSET; + pchpic_default[idx].id = id; + pchpic_default[idx].version = 0; + pchpic_default[idx].size = 0x1000; + pchpic_default[idx].gsi_base = irq_base; + + msi_group[nr_io_pics].pci_segment = nr_io_pics; + pch_group[nr_io_pics].node = msi_group[nr_io_pics].node = id; + + addr = pchpic_default[idx].address; + entries = (((unsigned long)ls7a_readq(address) >> 48) & 0xff) + 1; + pchmsi_default[idx].msg_address = MSI_MSG_ADDRESS; + pchmsi_default[idx].start = entries; + pchmsi_default[idx].count = MSI_MSG_DEFAULT_COUNT; + + eiointc_default[idx].cascade = 3; + eiointc_default[idx].node = id; + eiointc_default[idx].node_map = 1; + + if (idx) { + eiointc_default[idx].cascade = 0x4; + eiointc_default[0].node_map = 0x1DF; + eiointc_default[idx].node_map = 0xFE20; + } + + acpi_pchpic[idx] = &pchpic_default[idx]; + acpi_pchmsi[idx] = &pchmsi_default[idx]; + acpi_eiointc[idx] = &eiointc_default[idx]; + + nr_io_pics++; +} + +static int +acpi_parse_legacy_pch_pic(struct acpi_subtable_header *header, + const unsigned long end) +{ + struct acpi_madt_io_apic *pch_pic = NULL; + + pch_pic = (struct acpi_madt_io_apic *)header; + + if (BAD_MADT_ENTRY(pch_pic, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + register_default_pic(pch_pic->id, pch_pic->address, + pch_pic->global_irq_base); + + return 0; +} + +/* + * Parse PCH_PIC related entries in MADT + * returns 0 on success, < 0 on error + */ +static int acpi_parse_madt_pch_pic_entries(void) +{ + int count; + + /* + * ACPI interpreter is required to complete interrupt setup, + * so if it is off, don't enumerate the io-apics with ACPI. + * If MPS is present, it will handle them, + * otherwise the system will stay in PIC mode + */ + if (acpi_disabled || acpi_noirq) + return -ENODEV; + + count = acpi_table_parse_madt(ACPI_MADT_TYPE_IO_APIC, + (void *)acpi_parse_legacy_pch_pic, MAX_IO_PICS); + if (!count) { + pr_err("No PCH_PIC entries present\n"); + return -ENODEV; + } else if (count < 0) { + pr_err("Error parsing PCH_PIC entry\n"); + return count; + } + + return 0; +} + +int legacy_madt_table_init(void) +{ + int error; + + /* Parse MADT LAPIC entries */ + error = acpi_parse_madt_lapic(); + if (!error) { + acpi_liointc = &liointc_default; + acpi_pchlpc = &pchlpc_default; + acpi_parse_madt_pch_pic_entries(); + } + if (error == -EINVAL) { + pr_err(PREFIX + "Invalid BIOS MADT, disabling ACPI\n"); + disable_acpi(); + return -1; + } + + loongson_sysconf.nr_cpus = num_processors; + return 0; +} + +int setup_legacy_IRQ(void) +{ + int i, ret; + struct irq_domain *pic_domain; + + if (!acpi_eiointc[0]) + cpu_data[0].options &= ~LOONGARCH_CPU_EXTIOI; + + ret = cpuintc_acpi_init(NULL, 0); + if (ret) { + pr_err("CPU domain init error!\n"); + return -1; + } + cpu_domain = get_cpudomain(); + ret = liointc_acpi_init(cpu_domain, acpi_liointc); + if (ret) { + pr_err("Liointc domain init error!\n"); + return -1; + } + liointc_domain = irq_find_matching_fwnode(liointc_handle, DOMAIN_BUS_ANY); + if (cpu_has_extioi) { + pr_info("Using EIOINTC interrupt mode\n"); + for (i = 0; i < nr_io_pics; i++) { + ret = eiointc_acpi_init(cpu_domain, acpi_eiointc[i]); + if (ret) { + pr_err("Eiointc domain init error!\n"); + return -1; + } + pch_pic_parse_madt((union acpi_subtable_headers *)acpi_pchpic[i], 0); + pch_msi_parse_madt((union acpi_subtable_headers *)acpi_pchmsi[i], 0); + } + /* HTVECINTC maybe not use */ + } else { + pr_info("Using HTVECINTC interrupt mode\n"); + ret = htvec_acpi_init(liointc_domain, acpi_htintc); + if (ret) { + pr_err("HTVECintc domain init error!\n"); + return -1; + } + pch_pic_parse_madt((union acpi_subtable_headers *)acpi_pchpic[0], 0); + pch_msi_parse_madt((union acpi_subtable_headers *)acpi_pchmsi[0], 0); + } + + pic_domain = get_pchpic_irq_domain(); + if (pic_domain) + pch_lpc_acpi_init(pic_domain, acpi_pchlpc); + + return 0; +} + +/* + * Manage initrd + */ +#ifdef CONFIG_BLK_DEV_INITRD +static unsigned long init_initrd(unsigned long ps, unsigned long z) +{ + static int initialized; + + if (!ps || !z) + return 0; + + initrd_start = (unsigned long)__va(ps); + initrd_end = initrd_start + z; + /* + * Board specific code or command line parser should have + * already set up initrd_start and initrd_end. In these cases + * perfom sanity checks and use them if all looks good. + */ + if (initrd_start < PAGE_OFFSET || initrd_end <= initrd_start) { + pr_err("initrd start load address error!"); + goto disable; + } + + if (initrd_start & ~PAGE_MASK) { + pr_err("initrd start must be page aligned\n"); + goto disable; + } + + memblock_reserve(__pa(initrd_start), z); + initrd_below_start_ok = 1; + + if (!initialized) + pr_info("Initial ramdisk at: 0x%lx (%lu bytes)\n", + initrd_start, z); + initialized = 1; + + return 0; +disable: + pr_err("disabling initrd\n"); + initrd_start = 0; + initrd_end = 0; + return 0; +} + +static int early_initrd(char *p) +{ + unsigned long start, size; + char *endp; + + if (!efi_bp) + return 0; + start = memparse(p, &endp); + if (*endp == ',') + size = memparse(endp + 1, NULL); + + if (start + size > PFN_PHYS(max_low_pfn)) { + pr_err("Initrd physical address is out of memory!"); + return 0; + } + + init_initrd(start, size); + + return 0; +} +early_param("initrd", early_initrd); + +static int rd_start_early(char *p) +{ + unsigned long start; + + if (!efi_bp) + return 0; + + start = memparse(p, &p); + initrd_start = start; + initrd_end += start; + init_initrd(__pa(start), initrd_end - start); + + return 0; +} +early_param("rd_start", rd_start_early); + +static int rd_size_early(char *p) +{ + unsigned long size; + + if (!efi_bp) + return 0; + size = memparse(p, &p); + initrd_end += size; + + init_initrd(__pa(initrd_start), size); + return 0; +} +early_param("rd_size", rd_size_early); + +#else /* !CONFIG_BLK_DEV_INITRD */ +static unsigned long init_initrd(void) +{ + return 0; +} +#endif + +void fw_init_cmdline(unsigned long argc, unsigned long cmdp) +{ + int i; + char **_fw_argv; + + _fw_argv = (char **)cmdp; + + arcs_cmdline[0] = '\0'; + for (i = 1; i < argc; i++) { + strlcat(arcs_cmdline, _fw_argv[i], COMMAND_LINE_SIZE); + if (i < (argc - 1)) + strlcat(arcs_cmdline, " ", COMMAND_LINE_SIZE); + } + strlcat(boot_command_line, arcs_cmdline, COMMAND_LINE_SIZE); +} + +static u8 ext_listhdr_checksum(u8 *buffer, u32 length) +{ + u8 sum = 0; + u8 *end = buffer + length; + + while (buffer < end) + sum = (u8)(sum + *(buffer++)); + + return sum; +} + +static int parse_mem(struct _extension_list_hdr *head) +{ + g_mmap = (struct loongsonlist_mem_map *)head; + if (ext_listhdr_checksum((u8 *)g_mmap, head->length)) { + pr_err("mem checksum error\n"); + return -EPERM; + } + return 0; +} + +/* legacy firmware passed, add use this info if need vbios */ +static int parse_vbios(struct _extension_list_hdr *head) +{ + struct loongsonlist_vbios *pvbios; + + pvbios = (struct loongsonlist_vbios *)head; + + if (ext_listhdr_checksum((u8 *)pvbios, head->length)) { + pr_err("vbios_addr checksum error\n"); + return -EPERM; + } + return 0; +} + +/* legacy firmware passed, add use this info if need screeninfo KVM? */ +static int parse_screeninfo(struct _extension_list_hdr *head) +{ + struct loongsonlist_screeninfo *pscreeninfo; + + pscreeninfo = (struct loongsonlist_screeninfo *)head; + if (ext_listhdr_checksum((u8 *)pscreeninfo, head->length)) { + pr_err("screeninfo_addr checksum error\n"); + return -EPERM; + } + + memcpy(&screen_info, &pscreeninfo->si, sizeof(screen_info)); + return 0; +} + +static int list_find(struct boot_params *bp) +{ + struct _extension_list_hdr *fhead = NULL; + unsigned long index; + + fhead = bp->extlist; + if (!fhead) { + pr_err("the bp ext struct empty!\n"); + return -1; + } + do { + if (memcmp(&(fhead->signature), LOONGSON_MEM_SIGNATURE, 3) == 0) { + if (parse_mem(fhead) != 0) { + pr_err("parse mem failed\n"); + return -EPERM; + } + } else if (memcmp(&(fhead->signature), LOONGSON_VBIOS_SIGNATURE, 5) == 0) { + if (parse_vbios(fhead) != 0) { + pr_err("parse vbios failed\n"); + return -EPERM; + } + } else if (memcmp(&(fhead->signature), LOONGSON_SCREENINFO_SIGNATURE, 5) == 0) { + if (parse_screeninfo(fhead) != 0) { + pr_err("parse screeninfo failed\n"); + return -EPERM; + } + } + fhead = (struct _extension_list_hdr *)fhead->next; + index = (unsigned long)fhead; + } while (index); + return 0; +} + +unsigned int bpi_init(void) +{ + return list_find(efi_bp); +} + +static void register_addrs_set(u64 *registers, const u64 addr, int num) +{ + u64 i; + + for (i = 0; i < num; i++) { + *registers = (i << 44) | addr; + registers++; + } +} + +static int get_bpi_version(u64 *signature) +{ + u8 data[9]; + int version = BPI_VERSION_NONE; + + data[8] = 0; + memcpy(data, signature, sizeof(*signature)); + if (kstrtoint(&data[3], 10, &version)) + return BPI_VERSION_NONE; + return version; +} + +static void __init parse_bpi_flags(void) +{ + if (efi_bp->flags & BPI_FLAGS_UEFI_SUPPORTED) + set_bit(EFI_BOOT, &efi.flags); + else + clear_bit(EFI_BOOT, &efi.flags); +} + +unsigned long legacy_boot_init(unsigned long argc, unsigned long cmdptr, unsigned long bpi) +{ + int ret; + + if (!bpi) + return -1; + efi_bp = (struct boot_params *)bpi; + bpi_version = get_bpi_version(&efi_bp->signature); + pr_info("BPI%d with boot flags %llx.\n", bpi_version, efi_bp->flags); + if (bpi_version == BPI_VERSION_NONE) + panic("Fatal error, bpi ver BONE!\n"); + else if (bpi_version == BPI_VERSION_V2) + parse_bpi_flags(); + + fw_init_cmdline(argc, cmdptr); + ret = bpi_init(); + if (ret) { + pr_err("init legacy firmware error!\n"); + return -1; + } + + return 0; +} + +static int __init add_legacy_isa_io(struct fwnode_handle *fwnode, unsigned long isa_base) +{ + int ret = 0; + unsigned long vaddr; + struct logic_pio_hwaddr *range; + + range = kzalloc(sizeof(*range), GFP_ATOMIC); + if (!range) + return -ENOMEM; + + range->fwnode = fwnode; + range->size = ISA_IOSIZE; + range->hw_start = isa_base; + range->flags = LOGIC_PIO_CPU_MMIO; + + ret = logic_pio_register_range(range); + if (ret) { + kfree(range); + return ret; + } + + if (range->io_start != 0) { + logic_pio_unregister_range(range); + kfree(range); + return -EINVAL; + } + + vaddr = (unsigned long)(PCI_IOBASE + range->io_start); + ret = ioremap_page_range(vaddr, vaddr + range->size, + range->hw_start, pgprot_device(PAGE_KERNEL)); + return ret; +} + +static struct fwnode_handle * __init parse_isa_base(u64 *cpu_addr) +{ + struct device_node *np; + const __be32 *ranges = NULL; + int len; + struct device_node *node; + + for_each_node_by_name(np, "isa") { + node = of_node_get(np); + + if (!node) + break; + + ranges = of_get_property(node, "ranges", &len); + + if (!ranges || (ranges && len > 0)) + break; + } + if (ranges) { + ranges += 2; + *cpu_addr = of_translate_address(np, ranges); + return &np->fwnode; + } + + return NULL; +} + +static int __init register_legacy_isa_io(void) +{ + struct fwnode_handle *fwnode; + u64 cpu_addr; + + if (!acpi_disabled) { + cpu_addr = ISA_PHY_IOBASE; + fwnode = kzalloc(sizeof(*fwnode), GFP_ATOMIC); + } else { + fwnode = parse_isa_base(&cpu_addr); + } + + if (fwnode) + add_legacy_isa_io(fwnode, cpu_addr); + + return 0; +} +arch_initcall(register_legacy_isa_io); diff --git a/arch/loongarch/kernel/legacy_boot.h b/arch/loongarch/kernel/legacy_boot.h new file mode 100644 index 0000000000000000000000000000000000000000..7b7ec8a013c5ef4a1c8eeca3f6d4c6d509baecb7 --- /dev/null +++ b/arch/loongarch/kernel/legacy_boot.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LEGACY_BOOT_H_ +#define __LEGACY_BOOT_H_ +#include +#include +#define ADDRESS_TYPE_SYSRAM 1 +#define ADDRESS_TYPE_RESERVED 2 +#define ADDRESS_TYPE_ACPI 3 +#define ADDRESS_TYPE_NVS 4 +#define ADDRESS_TYPE_PMEM 5 + +#define LOONGSON3_BOOT_MEM_MAP_MAX 128 +#define RT_MAP_START 100 +#define FIX_MAP_ENTRY 32 + +/* mask of the flags in bootparamsinterface */ +#define BPI_FLAGS_UEFI_SUPPORTED BIT(0) +#define BPI_FLAGS_SOC_CPU BIT(1) + +#define LOONGSON_DMA_MASK_BIT 64 +#define LOONGSON_MEM_SIGNATURE "MEM" +#define LOONGSON_VBIOS_SIGNATURE "VBIOS" +#define LOONGSON_EFIBOOT_SIGNATURE "BPI" +#define LOONGSON_SCREENINFO_SIGNATURE "SINFO" +#define LOONGSON_EFIBOOT_VERSION 1000 + +/* Values for Version firmware */ + +enum bpi_vers { + BPI_VERSION_NONE = 0, + BPI_VERSION_V1 = 1000, + BPI_VERSION_V2 = 1001, +}; + +struct boot_params { + u64 signature; /* {"BPIXXXXX"} */ + void *systemtable; + struct _extension_list_hdr *extlist; + u64 flags; +} __packed; + +struct _extension_list_hdr { + u64 signature; + u32 length; + u8 revision; + u8 checksum; + struct _extension_list_hdr *next; +} __packed; + +struct loongsonlist_mem_map { + struct _extension_list_hdr header; /*{"M", "E", "M"}*/ + u8 map_count; + struct _loongson_mem_map { + u32 mem_type; + u64 mem_start; + u64 mem_size; + } __packed map[LOONGSON3_BOOT_MEM_MAP_MAX]; +} __packed; + +struct loongsonlist_vbios { + struct _extension_list_hdr header; /* {VBIOS} */ + u64 vbios_addr; +} __packed; + +struct loongsonlist_screeninfo { + struct _extension_list_hdr header; + struct screen_info si; +}; +unsigned long legacy_boot_init(unsigned long argc, + unsigned long cmdptr, unsigned long bpi); +extern int bpi_version; +extern struct boot_params *efi_bp; +extern struct loongsonlist_mem_map *g_mmap; +extern int set_processor_mask(u32 id, u32 flags); +extern int __init setup_legacy_IRQ(void); +extern struct loongson_system_configuration loongson_sysconf; +extern unsigned long long smp_group[MAX_PACKAGES]; +extern int legacy_madt_table_init(void); +extern struct pch_pic *pch_pic_priv[MAX_IO_PICS]; +extern struct irq_domain *get_cpudomain(void); +extern int __init cpuintc_acpi_init(union acpi_subtable_headers *header, + const unsigned long end); +extern int __init +pch_pic_parse_madt(union acpi_subtable_headers *header, + const unsigned long end); +extern int __init +pch_msi_parse_madt(union acpi_subtable_headers *header, + const unsigned long end); +extern struct irq_domain *get_pchpic_irq_domain(void); +#endif diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c index 7423361b0ebc9b69864b9cec624b48081d20ee37..5bdb1fe8e0586c805b83878855b2ad6ba1c57d9e 100644 --- a/arch/loongarch/kernel/mem.c +++ b/arch/loongarch/kernel/mem.c @@ -9,13 +9,36 @@ #include #include #include - +#include "legacy_boot.h" void __init memblock_init(void) { - u32 mem_type; + u32 i, mem_type; u64 mem_start, mem_end, mem_size; efi_memory_desc_t *md; + if (g_mmap) { + /* parse memory information */ + for (i = 0; i < g_mmap->map_count; i++) { + mem_type = g_mmap->map[i].mem_type; + mem_start = g_mmap->map[i].mem_start; + mem_size = g_mmap->map[i].mem_size; + mem_end = mem_start + mem_size; + + switch (mem_type) { + case ADDRESS_TYPE_SYSRAM: + pr_info("add memory region memblock - base: + %lx size: %x\n", mem_start, mem_size); + memblock_add(mem_start, mem_size); + if (max_low_pfn < (mem_end >> PAGE_SHIFT)) + max_low_pfn = mem_end >> PAGE_SHIFT; + break; + } + } + memblock_set_current_limit(PFN_PHYS(max_low_pfn)); + memblock_reserve(__pa_symbol(&_text), + __pa_symbol(&_end) - __pa_symbol(&_text)); + return; + } /* Parse memory information */ for_each_efi_memory_desc(md) { mem_type = md->type; diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c index 6d498288977d2a44df89dc979fad84e3571b0066..80b46a4e57572f72e18b9d680bd3b4c7cbe994ac 100644 --- a/arch/loongarch/kernel/module-sections.c +++ b/arch/loongarch/kernel/module-sections.c @@ -7,7 +7,33 @@ #include #include -Elf_Addr module_emit_plt_entry(struct module *mod, unsigned long val) +Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val) +{ + struct mod_section *got_sec = &mod->arch.got; + int i = got_sec->num_entries; + struct got_entry *got = get_got_entry(val, got_sec); + + if (got) + return (Elf_Addr)got; + + /* There is no GOT entry for val yet, create a new one. */ + got = (struct got_entry *)got_sec->shdr->sh_addr; + got[i] = emit_got_entry(val); + + got_sec->num_entries++; + if (got_sec->num_entries > got_sec->max_entries) { + /* + * This may happen when the module contains a GOT_HI20 without + * a paired GOT_LO12. Such a module is broken, reject it. + */ + pr_err("%s: module contains bad GOT relocation\n", mod->name); + return 0; + } + + return (Elf_Addr)&got[i]; +} + +Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val) { int nr; struct mod_section *plt_sec = &mod->arch.plt; @@ -50,15 +76,26 @@ static bool duplicate_rela(const Elf_Rela *rela, int idx) return false; } -static void count_max_entries(Elf_Rela *relas, int num, unsigned int *plts) +static void count_max_entries(Elf_Rela *relas, int num, + unsigned int *plts, unsigned int *gots) { unsigned int i, type; for (i = 0; i < num; i++) { type = ELF_R_TYPE(relas[i].r_info); - if (type == R_LARCH_SOP_PUSH_PLT_PCREL) { + switch (type) { + case R_LARCH_SOP_PUSH_PLT_PCREL: + case R_LARCH_B26: if (!duplicate_rela(relas, i)) (*plts)++; + break; + case R_LARCH_GOT_PC_HI20: + if (!duplicate_rela(relas, i)) + (*gots)++; + break; + default: + /* Do nothing. */ + break; } } } @@ -66,18 +103,24 @@ static void count_max_entries(Elf_Rela *relas, int num, unsigned int *plts) int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { - unsigned int i, num_plts = 0; + unsigned int i, num_plts = 0, num_gots = 0; /* * Find the empty .plt sections. */ for (i = 0; i < ehdr->e_shnum; i++) { - if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt")) + if (!strcmp(secstrings + sechdrs[i].sh_name, ".got")) + mod->arch.got.shdr = sechdrs + i; + else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt")) mod->arch.plt.shdr = sechdrs + i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt.idx")) mod->arch.plt_idx.shdr = sechdrs + i; } + if (!mod->arch.got.shdr) { + pr_err("%s: module GOT section(s) missing\n", mod->name); + return -ENOEXEC; + } if (!mod->arch.plt.shdr) { pr_err("%s: module PLT section(s) missing\n", mod->name); return -ENOEXEC; @@ -100,9 +143,16 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, if (!(dst_sec->sh_flags & SHF_EXECINSTR)) continue; - count_max_entries(relas, num_rela, &num_plts); + count_max_entries(relas, num_rela, &num_plts, &num_gots); } + mod->arch.got.shdr->sh_type = SHT_NOBITS; + mod->arch.got.shdr->sh_flags = SHF_ALLOC; + mod->arch.got.shdr->sh_addralign = L1_CACHE_BYTES; + mod->arch.got.shdr->sh_size = (num_gots + 1) * sizeof(struct got_entry); + mod->arch.got.num_entries = 0; + mod->arch.got.max_entries = num_gots; + mod->arch.plt.shdr->sh_type = SHT_NOBITS; mod->arch.plt.shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC; mod->arch.plt.shdr->sh_addralign = L1_CACHE_BYTES; diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index 638427ff0d5150cbffacef7d9ac75c0f10b661b8..bee7457db804326ef7b01c2f57067bd90c859312 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -281,6 +281,96 @@ static int apply_r_larch_add_sub(struct module *mod, u32 *location, Elf_Addr v, } } +static int apply_r_larch_b26(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + ptrdiff_t offset = (void *)v - (void *)location; + union loongarch_instruction *insn = (union loongarch_instruction *)location; + + if (offset >= SZ_128M) + v = module_emit_plt_entry(mod, v); + + if (offset < -SZ_128M) + v = module_emit_plt_entry(mod, v); + + offset = (void *)v - (void *)location; + + if (offset & 3) { + pr_err("module %s: jump offset = 0x%llx unaligned! dangerous R_LARCH_B26 (%u) relocation\n", + mod->name, (long long)offset, type); + return -ENOEXEC; + } + + if (!signed_imm_check(offset, 28)) { + pr_err("module %s: jump offset = 0x%llx overflow! dangerous R_LARCH_B26 (%u) relocation\n", + mod->name, (long long)offset, type); + return -ENOEXEC; + } + + offset >>= 2; + insn->reg0i26_format.immediate_l = offset & 0xffff; + insn->reg0i26_format.immediate_h = (offset >> 16) & 0x3ff; + + return 0; +} + +static int apply_r_larch_pcala(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + union loongarch_instruction *insn = (union loongarch_instruction *)location; + /* Use s32 for a sign-extension deliberately. */ + s32 offset_hi20 = (void *)((v + 0x800) & ~0xfff) - + (void *)((Elf_Addr)location & ~0xfff); + Elf_Addr anchor = (((Elf_Addr)location) & ~0xfff) + offset_hi20; + ptrdiff_t offset_rem = (void *)v - (void *)anchor; + + switch (type) { + case R_LARCH_PCALA_LO12: + insn->reg2i12_format.immediate = v & 0xfff; + break; + case R_LARCH_PCALA_HI20: + v = offset_hi20 >> 12; + insn->reg1i20_format.immediate = v & 0xfffff; + break; + case R_LARCH_PCALA64_LO20: + v = offset_rem >> 32; + insn->reg1i20_format.immediate = v & 0xfffff; + break; + case R_LARCH_PCALA64_HI12: + v = offset_rem >> 52; + insn->reg2i12_format.immediate = v & 0xfff; + break; + default: + pr_err("%s: Unsupport relocation type %u\n", mod->name, type); + return -EINVAL; + } + + return 0; +} + +static int apply_r_larch_got_pc(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + Elf_Addr got = module_emit_got_entry(mod, v); + + if (!got) + return -EINVAL; + + switch (type) { + case R_LARCH_GOT_PC_LO12: + type = R_LARCH_PCALA_LO12; + break; + case R_LARCH_GOT_PC_HI20: + type = R_LARCH_PCALA_HI20; + break; + default: + pr_err("%s: Unsupport relocation type %u\n", mod->name, type); + return -EINVAL; + } + + return apply_r_larch_pcala(mod, location, got, rela_stack, rela_stack_top, type); +} + /* * reloc_handlers_rela() - Apply a particular relocation to a module * @mod: the module to apply the reloc to @@ -296,7 +386,7 @@ typedef int (*reloc_rela_handler)(struct module *mod, u32 *location, Elf_Addr v, /* The handlers for known reloc types */ static reloc_rela_handler reloc_rela_handlers[] = { - [R_LARCH_NONE ... R_LARCH_SUB64] = apply_r_larch_error, + [R_LARCH_NONE ... R_LARCH_RELAX] = apply_r_larch_error, [R_LARCH_NONE] = apply_r_larch_none, [R_LARCH_32] = apply_r_larch_32, @@ -310,6 +400,9 @@ static reloc_rela_handler reloc_rela_handlers[] = { [R_LARCH_SOP_SUB ... R_LARCH_SOP_IF_ELSE] = apply_r_larch_sop, [R_LARCH_SOP_POP_32_S_10_5 ... R_LARCH_SOP_POP_32_U] = apply_r_larch_sop_imm_field, [R_LARCH_ADD32 ... R_LARCH_SUB64] = apply_r_larch_add_sub, + [R_LARCH_B26] = apply_r_larch_b26, + [R_LARCH_PCALA_HI20...R_LARCH_PCALA64_HI12] = apply_r_larch_pcala, + [R_LARCH_GOT_PC_HI20...R_LARCH_GOT_PC_LO12] = apply_r_larch_got_pc, }; int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index eb5d3a4c8a7ad7726ae8e2704d0b0f1f1f4b63ab..1098fb1fc0ccfd4adcfb07fd2ffb9c4a8d89cfdc 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -25,6 +25,7 @@ #include #include #include +#include "legacy_boot.h" int numa_off; struct pglist_data *node_data[MAX_NUMNODES]; @@ -37,7 +38,6 @@ static struct numa_meminfo numa_meminfo; cpumask_t cpus_on_node[MAX_NUMNODES]; cpumask_t phys_cpus_on_node[MAX_NUMNODES]; EXPORT_SYMBOL(cpus_on_node); - /* * apicid, cpu, node mappings */ @@ -333,10 +333,45 @@ static void __init add_numamem_region(u64 start, u64 end, u32 type) static void __init init_node_memblock(void) { - u32 mem_type; + u32 i, mem_type; u64 mem_end, mem_start, mem_size; efi_memory_desc_t *md; + if (g_mmap) { + for (i = 0; i < g_mmap->map_count; i++) { + mem_type = g_mmap->map[i].mem_type; + mem_start = g_mmap->map[i].mem_start; + mem_size = g_mmap->map[i].mem_size; + mem_end = g_mmap->map[i].mem_start + mem_size; + + switch (mem_type) { + case ADDRESS_TYPE_SYSRAM: + mem_start = PFN_ALIGN(mem_start); + mem_end = PFN_ALIGN(mem_end - PAGE_SIZE + 1); + if (mem_start >= mem_end) + break; + add_numamem_region(mem_start, mem_end, EFI_PERSISTENT_MEMORY); + break; + + case ADDRESS_TYPE_ACPI: + mem_start = PFN_ALIGN(mem_start - PAGE_SIZE + 1); + mem_end = PFN_ALIGN(mem_end); + mem_size = mem_end - mem_start; + memblock_add(mem_start, mem_size); + memblock_mark_nomap(mem_start, mem_size); + memblock_set_node(mem_start, mem_size, + &memblock.memory, 0); + memblock_reserve(mem_start, mem_size); + break; + + case ADDRESS_TYPE_RESERVED: + memblock_reserve(mem_start, mem_size); + break; + } + } + return; + } + /* Parse memory information and activate */ for_each_efi_memory_desc(md) { mem_type = md->type; diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 95a124058e32ee350393ac51e8a93aab444981ee..86600e39799e4e5e438fa56143b95c88f0a27d3f 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -105,8 +105,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) */ preempt_disable(); - if (is_fpu_owner()) - save_fp(current); + if (is_fpu_owner()) { + if (is_lasx_enabled()) + save_lasx(current); + else if (is_lsx_enabled()) + save_lsx(current); + else + save_fp(current); + } preempt_enable(); diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c index dc2b82ea894cd26c1c3d662e488145d82d1fba0d..bee4194177fdda50814491d94470b8d92e5333fd 100644 --- a/arch/loongarch/kernel/ptrace.c +++ b/arch/loongarch/kernel/ptrace.c @@ -246,6 +246,90 @@ static int cfg_set(struct task_struct *target, return 0; } +#ifdef CONFIG_CPU_HAS_LSX + +static void copy_pad_fprs(struct task_struct *target, + const struct user_regset *regset, + struct membuf *to, unsigned int live_sz) +{ + int i, j; + unsigned long long fill = ~0ull; + unsigned int cp_sz, pad_sz; + + cp_sz = min(regset->size, live_sz); + pad_sz = regset->size - cp_sz; + WARN_ON(pad_sz % sizeof(fill)); + + for (i = 0; i < NUM_FPU_REGS; i++) { + membuf_write(to, &target->thread.fpu.fpr[i], cp_sz); + for (j = 0; j < (pad_sz / sizeof(fill)); j++) { + membuf_store(to, fill); + } + } +} + +static int simd_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + const unsigned int wr_size = NUM_FPU_REGS * regset->size; + + if (!tsk_used_math(target)) { + /* The task hasn't used FP or LSX, fill with 0xff */ + copy_pad_fprs(target, regset, &to, 0); + } else if (!test_tsk_thread_flag(target, TIF_LSX_CTX_LIVE)) { + /* Copy scalar FP context, fill the rest with 0xff */ + copy_pad_fprs(target, regset, &to, 8); +#ifdef CONFIG_CPU_HAS_LASX + } else if (!test_tsk_thread_flag(target, TIF_LASX_CTX_LIVE)) { + /* Copy LSX 128 Bit context, fill the rest with 0xff */ + copy_pad_fprs(target, regset, &to, 16); +#endif + } else if (sizeof(target->thread.fpu.fpr[0]) == regset->size) { + /* Trivially copy the vector registers */ + membuf_write(&to, &target->thread.fpu.fpr, wr_size); + } else { + /* Copy as much context as possible, fill the rest with 0xff */ + copy_pad_fprs(target, regset, &to, sizeof(target->thread.fpu.fpr[0])); + } + + return 0; +} + +static int simd_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + const unsigned int wr_size = NUM_FPU_REGS * regset->size; + unsigned int cp_sz; + int i, err, start; + + init_fp_ctx(target); + + if (sizeof(target->thread.fpu.fpr[0]) == regset->size) { + /* Trivially copy the vector registers */ + err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fpr, + 0, wr_size); + } else { + /* Copy as much context as possible */ + cp_sz = min_t(unsigned int, regset->size, + sizeof(target->thread.fpu.fpr[0])); + + i = start = err = 0; + for (; i < NUM_FPU_REGS; i++, start += regset->size) { + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fpr[i], + start, start + cp_sz); + } + } + + return err; +} + +#endif /* CONFIG_CPU_HAS_LSX */ + struct pt_regs_offset { const char *name; int offset; @@ -319,6 +403,12 @@ enum loongarch_regset { REGSET_GPR, REGSET_FPR, REGSET_CPUCFG, +#ifdef CONFIG_CPU_HAS_LSX + REGSET_LSX, +#endif +#ifdef CONFIG_CPU_HAS_LASX + REGSET_LASX, +#endif }; static const struct user_regset loongarch64_regsets[] = { @@ -346,6 +436,26 @@ static const struct user_regset loongarch64_regsets[] = { .regset_get = cfg_get, .set = cfg_set, }, +#ifdef CONFIG_CPU_HAS_LSX + [REGSET_LSX] = { + .core_note_type = NT_LOONGARCH_LSX, + .n = NUM_FPU_REGS, + .size = 16, + .align = 16, + .regset_get = simd_get, + .set = simd_set, + }, +#endif +#ifdef CONFIG_CPU_HAS_LASX + [REGSET_LASX] = { + .core_note_type = NT_LOONGARCH_LASX, + .n = NUM_FPU_REGS, + .size = 32, + .align = 32, + .regset_get = simd_get, + .set = simd_set, + }, +#endif }; static const struct user_regset_view user_loongarch64_view = { diff --git a/arch/loongarch/kernel/reset.c b/arch/loongarch/kernel/reset.c index 693f555af8fc2fe5f506d569a19cdeecff20f943..4adf89f9a076415adc3dbe1de144f0c8f2cc506e 100644 --- a/arch/loongarch/kernel/reset.c +++ b/arch/loongarch/kernel/reset.c @@ -42,9 +42,10 @@ void machine_power_off(void) preempt_disable(); smp_send_stop(); #endif - + pm_power_off(); #ifdef CONFIG_EFI - efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL); + if (efi.reset_system) + efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL); #endif while (true) { __arch_cpu_idle(); diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 39a2e5816d9fd11b4b1d3645f49980b0e6252bec..957e720e12c8a67d915f88dcae3b75ea731c5be9 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -40,6 +40,7 @@ #include #include #include +#include "legacy_boot.h" #define SMBIOS_BIOSSIZE_OFFSET 0x09 #define SMBIOS_BIOSEXTERN_OFFSET 0x13 @@ -51,7 +52,7 @@ struct screen_info screen_info __section(".data"); -unsigned long fw_arg0, fw_arg1; +unsigned long fw_arg0, fw_arg1, fw_arg2; DEFINE_PER_CPU(unsigned long, kernelsp); struct cpuinfo_loongarch cpu_data[NR_CPUS] __read_mostly; @@ -120,9 +121,23 @@ static void __init parse_cpu_table(const struct dmi_header *dm) static void __init parse_bios_table(const struct dmi_header *dm) { + int bios_extern; char *dmi_data = (char *)dm; + bios_extern = *(dmi_data + SMBIOS_BIOSEXTERN_OFFSET); b_info.bios_size = (*(dmi_data + SMBIOS_BIOSSIZE_OFFSET) + 1) << 6; + + if (bpi_version == BPI_VERSION_V2) { + if ((!!(efi_bp->flags & BPI_FLAGS_UEFI_SUPPORTED)) != + (!!(bios_extern & LOONGSON_EFI_ENABLE))) + pr_err("There is a conflict of definitions between efi_bp->flags and smbios\n"); + return; + } + + if (bios_extern & LOONGSON_EFI_ENABLE) + set_bit(EFI_BOOT, &efi.flags); + else + clear_bit(EFI_BOOT, &efi.flags); } static void __init find_tokens(const struct dmi_header *dm, void *dummy) @@ -187,7 +202,7 @@ early_param("mem", early_parse_mem); void __init platform_init(void) { - efi_init(); + loongson_efi_init(); #ifdef CONFIG_ACPI_TABLE_UPGRADE acpi_table_upgrade(); #endif @@ -345,6 +360,7 @@ void __init setup_arch(char **cmdline_p) { cpu_probe(); *cmdline_p = boot_command_line; + legacy_boot_init(fw_arg0, fw_arg1, fw_arg2); init_environ(); memblock_init(); diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index 2d6eb75b11f18bd0c7d06cc379d7892c904750e8..8c2ef65983287bc3d7e95c2f7b431d13ffed1925 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -50,6 +50,16 @@ extern asmlinkage int _save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); extern asmlinkage int _restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); +extern asmlinkage int +_save_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +extern asmlinkage int +_restore_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +extern asmlinkage int +_save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +extern asmlinkage int +_restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +extern asmlinkage int _save_lsx_all_upper(void __user *buf); +extern asmlinkage int _restore_lsx_all_upper(void __user *buf); struct rt_sigframe { struct siginfo rs_info; @@ -65,9 +75,29 @@ struct extctx_layout { unsigned long size; unsigned int flags; struct _ctx_layout fpu; + struct _ctx_layout lsx; + struct _ctx_layout lasx; struct _ctx_layout end; }; +/* LSX context */ +#define LSX_CTX_MAGIC 0x53580001 +#define LSX_CTX_ALIGN 16 +struct lsx_context { + __u64 regs[2*32]; + __u64 fcc; + __u32 fcsr; +}; + +/* LASX context */ +#define LASX_CTX_MAGIC 0x41535801 +#define LASX_CTX_ALIGN 32 +struct lasx_context { + __u64 regs[4*32]; + __u64 fcc; + __u32 fcsr; +}; + static void __user *get_ctx_through_ctxinfo(struct sctx_info *info) { return (void __user *)((char *)info + sizeof(struct sctx_info)); @@ -115,6 +145,96 @@ static int copy_fpu_from_sigcontext(struct fpu_context __user *ctx) return err; } +static int copy_lsx_to_sigcontext(struct lsx_context __user *ctx) +{ + int i; + int err = 0; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0), + ®s[2*i]); + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1), + ®s[2*i+1]); + } + err |= __put_user(current->thread.fpu.fcc, fcc); + err |= __put_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + +static int copy_lsx_from_sigcontext(struct lsx_context __user *ctx) +{ + int i; + int err = 0; + u64 fpr_val; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __get_user(fpr_val, ®s[2*i]); + set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val); + err |= __get_user(fpr_val, ®s[2*i+1]); + set_fpr64(¤t->thread.fpu.fpr[i], 1, fpr_val); + } + err |= __get_user(current->thread.fpu.fcc, fcc); + err |= __get_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + +static int copy_lasx_to_sigcontext(struct lasx_context __user *ctx) +{ + int i; + int err = 0; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0), + ®s[4*i]); + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1), + ®s[4*i+1]); + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 2), + ®s[4*i+2]); + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 3), + ®s[4*i+3]); + } + err |= __put_user(current->thread.fpu.fcc, fcc); + err |= __put_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + +static int copy_lasx_from_sigcontext(struct lasx_context __user *ctx) +{ + int i; + int err = 0; + u64 fpr_val; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __get_user(fpr_val, ®s[4*i]); + set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val); + err |= __get_user(fpr_val, ®s[4*i+1]); + set_fpr64(¤t->thread.fpu.fpr[i], 1, fpr_val); + err |= __get_user(fpr_val, ®s[4*i+2]); + set_fpr64(¤t->thread.fpu.fpr[i], 2, fpr_val); + err |= __get_user(fpr_val, ®s[4*i+3]); + set_fpr64(¤t->thread.fpu.fpr[i], 3, fpr_val); + } + err |= __get_user(current->thread.fpu.fcc, fcc); + err |= __get_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + /* * Wrappers for the assembly _{save,restore}_fp_context functions. */ @@ -136,6 +256,42 @@ static int restore_hw_fpu_context(struct fpu_context __user *ctx) return _restore_fp_context(regs, fcc, fcsr); } +static int save_hw_lsx_context(struct lsx_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _save_lsx_context(regs, fcc, fcsr); +} + +static int restore_hw_lsx_context(struct lsx_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _restore_lsx_context(regs, fcc, fcsr); +} + +static int save_hw_lasx_context(struct lasx_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _save_lasx_context(regs, fcc, fcsr); +} + +static int restore_hw_lasx_context(struct lasx_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _restore_lasx_context(regs, fcc, fcsr); +} + static int fcsr_pending(unsigned int __user *fcsr) { int err, sig = 0; @@ -227,6 +383,146 @@ static int protected_restore_fpu_context(struct extctx_layout *extctx) return err ?: sig; } +static int protected_save_lsx_context(struct extctx_layout *extctx) +{ + int err = 0; + struct sctx_info __user *info = extctx->lsx.addr; + struct lsx_context __user *lsx_ctx = (struct lsx_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lsx_ctx->regs; + uint64_t __user *fcc = &lsx_ctx->fcc; + uint32_t __user *fcsr = &lsx_ctx->fcsr; + + while (1) { + lock_fpu_owner(); + if (is_lsx_enabled()) + err = save_hw_lsx_context(lsx_ctx); + else + err = copy_lsx_to_sigcontext(lsx_ctx); + unlock_fpu_owner(); + + err |= __put_user(LSX_CTX_MAGIC, &info->magic); + err |= __put_user(extctx->lsx.size, &info->size); + + if (likely(!err)) + break; + /* Touch the LSX context and try again */ + err = __put_user(0, ®s[0]) | + __put_user(0, ®s[32*2-1]) | + __put_user(0, fcc) | + __put_user(0, fcsr); + if (err) + return err; /* really bad sigcontext */ + } + + return err; +} + +static int protected_restore_lsx_context(struct extctx_layout *extctx) +{ + int err = 0, sig = 0, tmp __maybe_unused; + struct sctx_info __user *info = extctx->lsx.addr; + struct lsx_context __user *lsx_ctx = (struct lsx_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lsx_ctx->regs; + uint64_t __user *fcc = &lsx_ctx->fcc; + uint32_t __user *fcsr = &lsx_ctx->fcsr; + + err = sig = fcsr_pending(fcsr); + if (err < 0) + return err; + + while (1) { + lock_fpu_owner(); + if (is_lsx_enabled()) + err = restore_hw_lsx_context(lsx_ctx); + else + err = copy_lsx_from_sigcontext(lsx_ctx); + unlock_fpu_owner(); + + if (likely(!err)) + break; + /* Touch the LSX context and try again */ + err = __get_user(tmp, ®s[0]) | + __get_user(tmp, ®s[32*2-1]) | + __get_user(tmp, fcc) | + __get_user(tmp, fcsr); + if (err) + break; /* really bad sigcontext */ + } + + return err ?: sig; +} + +static int protected_save_lasx_context(struct extctx_layout *extctx) +{ + int err = 0; + struct sctx_info __user *info = extctx->lasx.addr; + struct lasx_context __user *lasx_ctx = + (struct lasx_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lasx_ctx->regs; + uint64_t __user *fcc = &lasx_ctx->fcc; + uint32_t __user *fcsr = &lasx_ctx->fcsr; + + while (1) { + lock_fpu_owner(); + if (is_lasx_enabled()) + err = save_hw_lasx_context(lasx_ctx); + else + err = copy_lasx_to_sigcontext(lasx_ctx); + unlock_fpu_owner(); + + err |= __put_user(LASX_CTX_MAGIC, &info->magic); + err |= __put_user(extctx->lasx.size, &info->size); + + if (likely(!err)) + break; + /* Touch the LASX context and try again */ + err = __put_user(0, ®s[0]) | + __put_user(0, ®s[32*4-1]) | + __put_user(0, fcc) | + __put_user(0, fcsr); + if (err) + return err; /* really bad sigcontext */ + } + + return err; +} + +static int protected_restore_lasx_context(struct extctx_layout *extctx) +{ + int err = 0, sig = 0, tmp __maybe_unused; + struct sctx_info __user *info = extctx->lasx.addr; + struct lasx_context __user *lasx_ctx = + (struct lasx_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lasx_ctx->regs; + uint64_t __user *fcc = &lasx_ctx->fcc; + uint32_t __user *fcsr = &lasx_ctx->fcsr; + + err = sig = fcsr_pending(fcsr); + if (err < 0) + return err; + + while (1) { + lock_fpu_owner(); + if (is_lasx_enabled()) + err = restore_hw_lasx_context(lasx_ctx); + else + err = copy_lasx_from_sigcontext(lasx_ctx); + unlock_fpu_owner(); + + if (likely(!err)) + break; + /* Touch the LASX context and try again */ + err = __get_user(tmp, ®s[0]) | + __get_user(tmp, ®s[32*4-1]) | + __get_user(tmp, fcc) | + __get_user(tmp, fcsr); + if (err) + break; /* really bad sigcontext */ + } + + return err ?: sig; +} + static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, struct extctx_layout *extctx) { @@ -240,7 +536,11 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, for (i = 1; i < 32; i++) err |= __put_user(regs->regs[i], &sc->sc_regs[i]); - if (extctx->fpu.addr) + if (extctx->lasx.addr) + err |= protected_save_lasx_context(extctx); + else if (extctx->lsx.addr) + err |= protected_save_lsx_context(extctx); + else if (extctx->fpu.addr) err |= protected_save_fpu_context(extctx); /* Set the "end" magic */ @@ -274,6 +574,20 @@ static int parse_extcontext(struct sigcontext __user *sc, struct extctx_layout * extctx->fpu.addr = info; break; + case LSX_CTX_MAGIC: + if (size < (sizeof(struct sctx_info) + + sizeof(struct lsx_context))) + goto invalid; + extctx->lsx.addr = info; + break; + + case LASX_CTX_MAGIC: + if (size < (sizeof(struct sctx_info) + + sizeof(struct lasx_context))) + goto invalid; + extctx->lasx.addr = info; + break; + default: goto invalid; } @@ -319,7 +633,11 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc for (i = 1; i < 32; i++) err |= __get_user(regs->regs[i], &sc->sc_regs[i]); - if (extctx.fpu.addr) + if (extctx.lasx.addr) + err |= protected_restore_lasx_context(&extctx); + else if (extctx.lsx.addr) + err |= protected_restore_lsx_context(&extctx); + else if (extctx.fpu.addr) err |= protected_restore_fpu_context(&extctx); bad: @@ -375,7 +693,13 @@ static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned lon extctx->size += extctx->end.size; if (extctx->flags & SC_USED_FP) { - if (cpu_has_fpu) + if (cpu_has_lasx && thread_lasx_context_live()) + new_sp = extframe_alloc(extctx, &extctx->lasx, + sizeof(struct lasx_context), LASX_CTX_ALIGN, new_sp); + else if (cpu_has_lsx && thread_lsx_context_live()) + new_sp = extframe_alloc(extctx, &extctx->lsx, + sizeof(struct lsx_context), LSX_CTX_ALIGN, new_sp); + else if (cpu_has_fpu) new_sp = extframe_alloc(extctx, &extctx->fpu, sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp); } diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index b5fab308dcf25a3693ded821c584cee6952ead1d..883d8d540f67186dd689800bab920752b3cb599f 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -29,6 +29,7 @@ #include #include #include +#include "legacy_boot.h" int __cpu_number_map[NR_CPUS]; /* Map physical to logical */ EXPORT_SYMBOL(__cpu_number_map); @@ -195,11 +196,11 @@ void __init loongson3_prepare_cpus(unsigned int max_cpus) */ void loongson3_boot_secondary(int cpu, struct task_struct *idle) { - unsigned long entry; + unsigned long entry = (unsigned long)&smpboot_entry; pr_info("Booting CPU#%d...\n", cpu); - - entry = __pa_symbol((unsigned long)&smpboot_entry); + if (!efi_bp) + entry = __pa_symbol((unsigned long)&smpboot_entry); cpuboot_data.stack = (unsigned long)__KSTK_TOS(idle); cpuboot_data.thread_info = (unsigned long)task_thread_info(idle); diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 94616e677e238ea684ff28f4036f9800a5421f92..4840358c534103d0b9d1b28ffd25bbf664fe629e 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -514,12 +514,67 @@ static void init_restore_fp(void) BUG_ON(!is_fp_enabled()); } +static void init_restore_lsx(void) +{ + enable_lsx(); + + if (!thread_lsx_context_live()) { + /* First time LSX context user */ + init_restore_fp(); + init_lsx_upper(); + set_thread_flag(TIF_LSX_CTX_LIVE); + } else { + if (!is_simd_owner()) { + if (is_fpu_owner()) { + restore_lsx_upper(current); + } else { + __own_fpu(); + restore_lsx(current); + } + } + } + + set_thread_flag(TIF_USEDSIMD); + + BUG_ON(!is_fp_enabled()); + BUG_ON(!is_lsx_enabled()); +} + +static void init_restore_lasx(void) +{ + enable_lasx(); + + if (!thread_lasx_context_live()) { + /* First time LASX context user */ + init_restore_lsx(); + init_lasx_upper(); + set_thread_flag(TIF_LASX_CTX_LIVE); + } else { + if (is_fpu_owner() || is_simd_owner()) { + init_restore_lsx(); + restore_lasx_upper(current); + } else { + __own_fpu(); + enable_lsx(); + restore_lasx(current); + } + } + + set_thread_flag(TIF_USEDSIMD); + + BUG_ON(!is_fp_enabled()); + BUG_ON(!is_lsx_enabled()); + BUG_ON(!is_lasx_enabled()); +} + asmlinkage void noinstr do_fpu(struct pt_regs *regs) { irqentry_state_t state = irqentry_enter(regs); local_irq_enable(); die_if_kernel("do_fpu invoked from kernel context!", regs); + BUG_ON(is_lsx_enabled()); + BUG_ON(is_lasx_enabled()); preempt_disable(); init_restore_fp(); @@ -534,7 +589,19 @@ asmlinkage void noinstr do_lsx(struct pt_regs *regs) irqentry_state_t state = irqentry_enter(regs); local_irq_enable(); - force_sig(SIGILL); + if (!cpu_has_lsx) { + force_sig(SIGILL); + goto out; + } + + die_if_kernel("do_lsx invoked from kernel context!", regs); + BUG_ON(is_lasx_enabled()); + + preempt_disable(); + init_restore_lsx(); + preempt_enable(); + +out: local_irq_disable(); irqentry_exit(regs, state); @@ -545,7 +612,18 @@ asmlinkage void noinstr do_lasx(struct pt_regs *regs) irqentry_state_t state = irqentry_enter(regs); local_irq_enable(); - force_sig(SIGILL); + if (!cpu_has_lasx) { + force_sig(SIGILL); + goto out; + } + + die_if_kernel("do_lasx invoked from kernel context!", regs); + + preempt_disable(); + init_restore_lasx(); + preempt_enable(); + +out: local_irq_disable(); irqentry_exit(regs, state); diff --git a/arch/loongarch/mm/cache.c b/arch/loongarch/mm/cache.c index 9e5ce5aa73f740f181e4ea779527a71c7234ba43..4a076ff404cdc5252d1cc7117daa8cd08a013f57 100644 --- a/arch/loongarch/mm/cache.c +++ b/arch/loongarch/mm/cache.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2020-2022 Loongson Technology Corporation Limited - * - * Derived from MIPS: - * Copyright (C) 1994 - 2003, 06, 07 by Ralf Baechle (ralf@linux-mips.org) - * Copyright (C) 2007 MIPS Technologies, Inc. - */ +* Copyright (C) 2020-2022 Loongson Technology Corporation Limited +* +* Derived from MIPS: +* Copyright (C) 1994 - 2003, 06, 07 by Ralf Baechle (ralf@linux-mips.org) +* Copyright (C) 2007 MIPS Technologies, Inc. +*/ #include #include #include @@ -15,127 +15,224 @@ #include #include #include +#include -#include #include +#include #include #include #include #include #include +#include -/* - * LoongArch maintains ICache/DCache coherency by hardware, - * we just need "ibar" to avoid instruction hazard here. - */ +void cache_error_setup(void) +{ + set_merr_handler(0x0, &except_vec_cex, 0x80); +} + +/* Cache operations. */ void local_flush_icache_range(unsigned long start, unsigned long end) { asm volatile ("\tibar 0\n"::); } -EXPORT_SYMBOL(local_flush_icache_range); -void cache_error_setup(void) +static inline void __flush_cache_line_hit(int leaf, unsigned long addr) { - extern char __weak except_vec_cex; - set_merr_handler(0x0, &except_vec_cex, 0x80); + switch (leaf) { + case Cache_LEAF0: + cache_op(Hit_Writeback_Inv_LEAF0, addr); + break; + case Cache_LEAF1: + cache_op(Hit_Writeback_Inv_LEAF1, addr); + break; + case Cache_LEAF2: + cache_op(Hit_Writeback_Inv_LEAF2, addr); + break; + case Cache_LEAF3: + cache_op(Hit_Writeback_Inv_LEAF3, addr); + break; + case Cache_LEAF4: + cache_op(Hit_Writeback_Inv_LEAF4, addr); + break; + case Cache_LEAF5: + cache_op(Hit_Writeback_Inv_LEAF5, addr); + break; + default: + break; + } } -static unsigned long icache_size __read_mostly; -static unsigned long dcache_size __read_mostly; -static unsigned long vcache_size __read_mostly; -static unsigned long scache_size __read_mostly; +static inline void __flush_cache_line_indexed(int leaf, unsigned long addr) +{ + switch (leaf) { + case Cache_LEAF0: + cache_op(Index_Writeback_Inv_LEAF0, addr); + break; + case Cache_LEAF1: + cache_op(Index_Writeback_Inv_LEAF1, addr); + break; + case Cache_LEAF2: + cache_op(Index_Writeback_Inv_LEAF2, addr); + break; + case Cache_LEAF3: + cache_op(Index_Writeback_Inv_LEAF3, addr); + break; + case Cache_LEAF4: + cache_op(Index_Writeback_Inv_LEAF4, addr); + break; + case Cache_LEAF5: + cache_op(Index_Writeback_Inv_LEAF5, addr); + break; + default: + break; + } +} -static char *way_string[] = { NULL, "direct mapped", "2-way", - "3-way", "4-way", "5-way", "6-way", "7-way", "8-way", - "9-way", "10-way", "11-way", "12-way", - "13-way", "14-way", "15-way", "16-way", -}; +void flush_cache_line_hit(unsigned long addr) +{ + int leaf; + struct cache_desc *cdesc = current_cpu_data.cache_leaves; + unsigned int cache_present = current_cpu_data.cache_leaves_present; + + /* If last level cache is inclusive, no need to flush other caches. */ + leaf = cache_present - 1; + if (cache_inclusive(cdesc + leaf)) { + __flush_cache_line_hit(leaf, addr); + return; + } + + for (leaf = 0; leaf < cache_present; leaf++) + __flush_cache_line_hit(leaf, addr); +} -static void probe_pcache(void) +static void flush_cache_leaf(unsigned int leaf) +{ + u64 line; + int i, j, nr_nodes; + struct cache_desc *cdesc = current_cpu_data.cache_leaves + leaf; + + nr_nodes = loongson_sysconf.nr_nodes; + if (cache_private(cdesc)) + nr_nodes = 1; + + line = CSR_DMW0_BASE; + do { + for (i = 0; i < cdesc->sets; i++) { + for (j = 0; j < cdesc->ways; j++) { + __flush_cache_line_indexed(leaf, line); + line++; + } + + line -= cdesc->ways; + line += cdesc->linesz; + } + line += 0x100000000000; + } while (--nr_nodes > 0); +} + +asmlinkage __visible void cpu_flush_caches(void) +{ + int leaf; + struct cache_desc *cdesc = current_cpu_data.cache_leaves; + unsigned int cache_present = current_cpu_data.cache_leaves_present; + + /* If last level cache is inclusive, no need to flush other caches. */ + leaf = cache_present - 1; + if (cache_inclusive(cdesc + leaf)) { + flush_cache_leaf(leaf); + return; + } + + for (leaf = 0; leaf < cache_present; leaf++) + flush_cache_leaf(leaf); +} + +static inline void set_cache_basics(struct cache_desc *cdesc, unsigned int leaf) { - struct cpuinfo_loongarch *c = ¤t_cpu_data; - unsigned int lsize, sets, ways; unsigned int config; - config = read_cpucfg(LOONGARCH_CPUCFG17); - lsize = 1 << ((config & CPUCFG17_L1I_SIZE_M) >> CPUCFG17_L1I_SIZE); - sets = 1 << ((config & CPUCFG17_L1I_SETS_M) >> CPUCFG17_L1I_SETS); - ways = ((config & CPUCFG17_L1I_WAYS_M) >> CPUCFG17_L1I_WAYS) + 1; + config = read_cpucfg(LOONGARCH_CPUCFG17 + leaf); + cdesc->linesz = 1 << ((config & CACHE_LSIZE_M) >> CACHE_LSIZE); + cdesc->sets = 1 << ((config & CACHE_SETS_M) >> CACHE_SETS); + cdesc->ways = ((config & CACHE_WAYS_M) >> CACHE_WAYS) + 1; +} - c->icache.linesz = lsize; - c->icache.sets = sets; - c->icache.ways = ways; - icache_size = sets * ways * lsize; - c->icache.waysize = icache_size / c->icache.ways; +#define populate_cache_properties(conifg, cdesc, level, leaf) \ +{ \ + if (level == 1) { \ + cdesc->flags |= CACHE_PRIVATE; \ + } else { \ + if (config & IUPRIV) \ + cdesc->flags |= CACHE_PRIVATE; \ + if (config & IUINCL) \ + cdesc->flags |= CACHE_INCLUSIVE; \ + } \ + cdesc->flags |= CACHE_PRESENT; \ + cdesc->level = level; \ + set_cache_basics(cdesc, leaf); \ + cdesc++; \ + leaf++; \ +} - config = read_cpucfg(LOONGARCH_CPUCFG18); - lsize = 1 << ((config & CPUCFG18_L1D_SIZE_M) >> CPUCFG18_L1D_SIZE); - sets = 1 << ((config & CPUCFG18_L1D_SETS_M) >> CPUCFG18_L1D_SETS); - ways = ((config & CPUCFG18_L1D_WAYS_M) >> CPUCFG18_L1D_WAYS) + 1; +/* + * Each level cache occupies 7bits in order in CPUCFG16 + * except level 1 cache with bit0~2. + */ +static void probe_cache_hierarchy(void) +{ + struct cache_desc *cdesc = current_cpu_data.cache_leaves; + unsigned int leaf = 0, level; + unsigned int config = read_cpucfg(LOONGARCH_CPUCFG16); - c->dcache.linesz = lsize; - c->dcache.sets = sets; - c->dcache.ways = ways; - dcache_size = sets * ways * lsize; - c->dcache.waysize = dcache_size / c->dcache.ways; +#define IUPRE (1 << 0) +#define IUUNIFY (1 << 1) +#define IUPRIV (1 << 2) +#define IUINCL (1 << 3) +#define DPRE (1 << 4) +#define DPRIV (1 << 5) +#define DINCL (1 << 6) - c->options |= LOONGARCH_CPU_PREFETCH; +#define L1DPRE (1 << 2) - pr_info("Primary instruction cache %ldkB, %s, %s, linesize %d bytes.\n", - icache_size >> 10, way_string[c->icache.ways], "VIPT", c->icache.linesz); + for (level = 1; level <= CACHE_LEVEL_MAX; level++) { + if (config & IUPRE) { + if (config & IUUNIFY) + cdesc->type = CACHE_TYPE_UNIFIED; + else + cdesc->type = CACHE_TYPE_INST; - pr_info("Primary data cache %ldkB, %s, %s, %s, linesize %d bytes\n", - dcache_size >> 10, way_string[c->dcache.ways], "VIPT", "no aliases", c->dcache.linesz); -} + populate_cache_properties(config, cdesc, level, leaf); + } -static void probe_vcache(void) -{ - struct cpuinfo_loongarch *c = ¤t_cpu_data; - unsigned int lsize, sets, ways; - unsigned int config; + if ((level == 1 && (config & L1DPRE)) || + (level != 1 && (config & DPRE))) { + cdesc->type = CACHE_TYPE_DATA; - config = read_cpucfg(LOONGARCH_CPUCFG19); - lsize = 1 << ((config & CPUCFG19_L2_SIZE_M) >> CPUCFG19_L2_SIZE); - sets = 1 << ((config & CPUCFG19_L2_SETS_M) >> CPUCFG19_L2_SETS); - ways = ((config & CPUCFG19_L2_WAYS_M) >> CPUCFG19_L2_WAYS) + 1; + populate_cache_properties(config, cdesc, level, leaf); + } - c->vcache.linesz = lsize; - c->vcache.sets = sets; - c->vcache.ways = ways; - vcache_size = lsize * sets * ways; - c->vcache.waysize = vcache_size / c->vcache.ways; + if (level == 1) + config = config >> 3; + else + config = config >> 7; - pr_info("Unified victim cache %ldkB %s, linesize %d bytes.\n", - vcache_size >> 10, way_string[c->vcache.ways], c->vcache.linesz); -} + if (!config) + break; -static void probe_scache(void) -{ - struct cpuinfo_loongarch *c = ¤t_cpu_data; - unsigned int lsize, sets, ways; - unsigned int config; + } - config = read_cpucfg(LOONGARCH_CPUCFG20); - lsize = 1 << ((config & CPUCFG20_L3_SIZE_M) >> CPUCFG20_L3_SIZE); - sets = 1 << ((config & CPUCFG20_L3_SETS_M) >> CPUCFG20_L3_SETS); - ways = ((config & CPUCFG20_L3_WAYS_M) >> CPUCFG20_L3_WAYS) + 1; + if (leaf > 0) + current_cpu_data.options |= LOONGARCH_CPU_PREFETCH; - c->scache.linesz = lsize; - c->scache.sets = sets; - c->scache.ways = ways; - /* 4 cores. scaches are shared */ - scache_size = lsize * sets * ways; - c->scache.waysize = scache_size / c->scache.ways; + WARN_ON(leaf > CACHE_LEAVES_MAX); - pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n", - scache_size >> 10, way_string[c->scache.ways], c->scache.linesz); + current_cpu_data.cache_leaves_present = leaf; } void cpu_cache_init(void) { - probe_pcache(); - probe_vcache(); - probe_scache(); + probe_cache_hierarchy(); shm_align_mask = PAGE_SIZE - 1; } diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c index ac18ca7a900a37de212661a216dc6568198af2a4..9450e7aac9cfd76047b384e73fa9f2ddcfc0dd19 100644 --- a/arch/loongarch/pci/acpi.c +++ b/arch/loongarch/pci/acpi.c @@ -55,13 +55,159 @@ static void acpi_release_root_info(struct acpi_pci_root_info *ci) kfree(info); } +static void arch_pci_root_validate_resources(struct device *dev, + struct list_head *resources, + unsigned long type) +{ + LIST_HEAD(list); + struct resource *res1, *res2, *root = NULL; + struct resource_entry *tmp, *entry, *entry2; + + WARN_ON((type & (IORESOURCE_MEM | IORESOURCE_IO)) == 0); + root = (type & IORESOURCE_MEM) ? &iomem_resource : &ioport_resource; + + list_splice_init(resources, &list); + resource_list_for_each_entry_safe(entry, tmp, &list) { + bool free = false; + resource_size_t end; + + res1 = entry->res; + if (!(res1->flags & type)) + goto next; + + /* Exclude non-addressable range or non-addressable portion */ + end = min(res1->end, root->end); + if (end <= res1->start) { + dev_info(dev, "host bridge window %pR (ignored, not CPU addressable)\n", + res1); + free = true; + goto next; + } else if (res1->end != end) { + dev_info(dev, "host bridge window %pR ([%#llx-%#llx] ignored, not CPU addressable)\n", + res1, (unsigned long long)end + 1, + (unsigned long long)res1->end); + res1->end = end; + } + + resource_list_for_each_entry(entry2, resources) { + res2 = entry2->res; + if (!(res2->flags & type)) + continue; + + /* + * I don't like throwing away windows because then + * our resources no longer match the ACPI _CRS, but + * the kernel resource tree doesn't allow overlaps. + */ + if (resource_overlaps(res1, res2)) { + res2->start = min(res1->start, res2->start); + res2->end = max(res1->end, res2->end); + dev_info(dev, "host bridge window expanded to %pR; %pR ignored\n", + res2, res1); + free = true; + goto next; + } + } + +next: + resource_list_del(entry); + if (free) + resource_list_free_entry(entry); + else + resource_list_add_tail(entry, resources); + } +} +static void arch_pci_root_remap_iospace(struct fwnode_handle *fwnode, + struct resource_entry *entry) +{ + struct resource *res = entry->res; + resource_size_t cpu_addr = res->start; + resource_size_t pci_addr = cpu_addr - entry->offset; + resource_size_t length = resource_size(res); + unsigned long port; + + if (pci_register_io_range(fwnode, cpu_addr, length)) { + res->start += ISA_IOSIZE; + cpu_addr = res->start; + pci_addr = cpu_addr - entry->offset; + length = resource_size(res); + if (pci_register_io_range(fwnode, cpu_addr, length)) + goto err; + } + + port = pci_address_to_pio(cpu_addr); + if (port == (unsigned long)-1) + goto err; + + res->start = port; + res->end = port + length - 1; + entry->offset = port - pci_addr; + + if (pci_remap_iospace(res, cpu_addr) < 0) + goto err; + + pr_info("Remapped I/O %pa to %pR\n", &cpu_addr, res); + return; +err: + res->flags |= IORESOURCE_DISABLED; +} + +static int arch_pci_probe_root_resources(struct acpi_pci_root_info *info) +{ + int ret; + struct list_head *list = &info->resources; + struct acpi_device *device = info->bridge; + struct resource_entry *entry, *tmp; + unsigned long flags; + struct resource *res; + + flags = IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_MEM_8AND16BIT; + ret = acpi_dev_get_resources(device, list, + acpi_dev_filter_resource_type_cb, + (void *)flags); + if (ret < 0) + dev_warn(&device->dev, + "failed to parse _CRS method, error code %d\n", ret); + else if (ret == 0) + dev_dbg(&device->dev, + "no IO and memory resources present in _CRS\n"); + else { + resource_list_for_each_entry_safe(entry, tmp, list) { + if (entry->res->flags & IORESOURCE_IO) { + res = entry->res; + res->start = PFN_ALIGN(res->start); + res->end += 1; + res->end = PFN_ALIGN(res->end); + res->end -= 1; + if (!entry->offset) { + entry->offset = LOONGSON_LIO_BASE; + res->start |= LOONGSON_LIO_BASE; + res->end |= LOONGSON_LIO_BASE; + } + arch_pci_root_remap_iospace(&device->fwnode, + entry); + } + if (entry->res->flags & IORESOURCE_DISABLED) + resource_list_destroy_entry(entry); + else + entry->res->name = info->name; + } + arch_pci_root_validate_resources(&device->dev, list, + IORESOURCE_MEM); + arch_pci_root_validate_resources(&device->dev, list, + IORESOURCE_IO); + } + + return ret; +} + static int acpi_prepare_root_resources(struct acpi_pci_root_info *ci) { int status; struct resource_entry *entry, *tmp; struct acpi_device *device = ci->bridge; - status = acpi_pci_probe_root_resources(ci); + status = arch_pci_probe_root_resources(ci); if (status > 0) { resource_list_for_each_entry_safe(entry, tmp, &ci->resources) { if (entry->res->flags & IORESOURCE_MEM) { diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c index 4456fafc243ad770a29d9fbfafddc8bd88e3f67d..a552b9da72eccdec6fa65aa6e52cb1a12f84deda 100644 --- a/arch/loongarch/pci/pci.c +++ b/arch/loongarch/pci/pci.c @@ -10,6 +10,7 @@ #include #include #include +#include #define PCI_DEVICE_ID_LOONGSON_HOST 0x7a00 #define PCI_DEVICE_ID_LOONGSON_DC1 0x7a06 @@ -45,12 +46,10 @@ static int __init pcibios_init(void) unsigned int lsize; /* - * Set PCI cacheline size to that of the highest level in the + * Set PCI cacheline size to that of the last level in the * cache hierarchy. */ - lsize = cpu_dcache_line_size(); - lsize = cpu_vcache_line_size() ? : lsize; - lsize = cpu_scache_line_size() ? : lsize; + lsize = cpu_last_level_cache_line_size(); BUG_ON(!lsize); diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 3d08cce7c62f3d5076eddc0265af4a87ebd4ceb3..82378d7208fa52abae4e0fc8b77f2d0a7c5ce30e 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -287,6 +287,17 @@ config LOONGSON1_CPUFREQ If in doubt, say N. endif +if LOONGARCH +config LOONGSON3_ACPI_CPUFREQ + bool "Loongson3 ACPI cpufreq driver" + depends on ACPI_PROCESSOR + help + This driver adds a CPUFreq driver which utilizes the ACPI + Processor Performance States. + This driver supports Loongson 3A5000 compatible CPUs. + If in doubt, say N. +endif + if SPARC64 config SPARC_US3_CPUFREQ tristate "UltraSPARC-III CPU Frequency driver" diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 7335b9ed53234ce0631a9527960510e71ac26f52..250bc70febc859e456a8a3230e566617abeaf08f 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -111,6 +111,7 @@ obj-$(CONFIG_BMIPS_CPUFREQ) += bmips-cpufreq.o obj-$(CONFIG_IA64_ACPI_CPUFREQ) += ia64-acpi-cpufreq.o obj-$(CONFIG_LOONGSON2_CPUFREQ) += loongson2_cpufreq.o obj-$(CONFIG_LOONGSON1_CPUFREQ) += loongson1-cpufreq.o +obj-$(CONFIG_LOONGSON3_ACPI_CPUFREQ) += loongson3-acpi-cpufreq.o obj-$(CONFIG_SH_CPU_FREQ) += sh-cpufreq.o obj-$(CONFIG_SPARC_US2E_CPUFREQ) += sparc-us2e-cpufreq.o obj-$(CONFIG_SPARC_US3_CPUFREQ) += sparc-us3-cpufreq.o diff --git a/drivers/cpufreq/loongson3-acpi-cpufreq.c b/drivers/cpufreq/loongson3-acpi-cpufreq.c new file mode 100644 index 0000000000000000000000000000000000000000..282c48f89b58404689775536744da772b5729af4 --- /dev/null +++ b/drivers/cpufreq/loongson3-acpi-cpufreq.c @@ -0,0 +1,1530 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include "cpufreq_governor.h" + +#include +#define CPU_ID_FIELD 0xf + +#define COMPLETE_STATUS 0x80000000 +#define VOLTAGE_COMMAND 0x21 + +#define DVFS_INFO 0x22 +#define DVFS_INFO_BOOST_LEVEL 0x23 +#define DVFS_INFO_MIN_FREQ 0xf +#define DVFS_INFO_MAX_FREQ 0xf0 +#define DVFS_INFO_BOOST_CORE_FREQ 0xff00 +#define DVFS_INFO_NORMAL_CORE_UPPER_LIMIT 0xf0000 +#define DVFS_INFO_BOOST_CORES 0xf00000 + +#define BOOST_MODE 0x80000 +#define NORMAL_MODE 0x40000 + +MODULE_DESCRIPTION("Loongson 3A5000 ACPI Processor P-States Driver"); + +MODULE_LICENSE("GPL"); + +#define CPUFREQ_SAMPLING_INTERVAL (2 * TICK_NSEC / NSEC_PER_USEC) +#define LOONGSON_CONTROL_MASK (0xFF) +#define FACTOR (0xeac0c6e8) +#define BOOST_THRESHOLD (900) +#define MAX_CORES_PER_PACKAGE 64 +#define CPU_ID_FIELD 0xf +#define VOLTAGE_COMMAND 0x21 +#define MAX_READY_TIMEOUT 300000000 +#define RESERVED_FREQ 3 + +#define LOONGSON_BOOST_FREQ_MASK (0x7 << 8) +#define FREQ_STEP (25) + +static struct mutex boost_mutex[MAX_PACKAGES]; +static bool cpufreq_has_boost_freq; +static int max_boost_cores; +static int boost_gears; +static int boost_freqs[NR_CPUS + 1]; +struct package_data; +struct core_data; +static struct acpi_processor_performance __percpu *acpi_perf_data; +static struct cpufreq_driver loongson3_cpufreq_driver; +static struct freq_attr *loongson3_cpufreq_attr[]; +DECLARE_PER_CPU(struct clock_event_device, stable_clockevent_device); +static inline struct core_data *get_core_data(int cpu); + +static int min_freq_level; +static int max_freq_level; +static int max_upper_index; +static int max_boost_freq; + +/* threshold of core's get into msa */ +static int msa_count_threshold = 200; +/* threshold of core's get into lasx */ +static int lasx_count_threshold = 200; +/* other cores' upper load threshold when 1 core get into boost mode and enable msa/lasx */ +static int load_threshold = 60; + +DEFINE_PER_CPU(unsigned long, msa_count); +EXPORT_PER_CPU_SYMBOL(msa_count); + +#if defined(CONFIG_CPU_HAS_LASX) +DEFINE_PER_CPU(unsigned long, lasx_count); +EXPORT_PER_CPU_SYMBOL(lasx_count); +#endif + +struct ce_update_data { + struct clock_event_device *cd; + unsigned int new_freq; +}; + +static struct kthread_worker cpufreq_worker; +static struct task_struct *cpufreq_thread; +/** + * struct core_data - Store core related information + * @in_boost: the core is boosting to boost_freq + * @cpu: logical cpu of the core + * @update_util The update_util_data pointer of @cpu, is passed to the callback + * function, which will be called by cpufreq_update_util() + * @package The package_data structure the core belonged to + * @work_in_progress @work is busy + * @irq_work to enqueue callback handling on irq workqueue + * @work to enqueue work from irq workqueue on system workqueue + * @perf store frequency table related information from ACPI table + * @max_freq max normal freq of cpu + * @boost_freq max boost freq of cpu + * @clock_scale clock scale to calculate cpu_data[cpu].udelay_val in boost mode + * @package_id package id of core + * @shift clock shift to calculate cpu_data[cpu].udelay_val in boost mode + * @update_util_set if callback has been set for cpufreq_update_util() + * @load current load of the core + * @last_freq_update_time last freq update time + * @freq_update_delay_ns min interval of freq update, which is + * transition_latency configured in ACPI table + * + * following elements are used to calculate load of the core + * @prev_update_time + * @prev_cpu_idle + * @prev_load + * @sampling_rate + * + */ +struct core_data { + bool in_boost; + int cpu; + struct update_util_data update_util; + struct package_data *package; + bool work_in_progress; + struct irq_work irq_work; + struct kthread_work work; + struct acpi_processor_performance *perf; + unsigned int normal_max_freq; + unsigned int *boost_freq; + unsigned int *clock_scale; + unsigned int package_id; + unsigned int *shift; + bool update_util_set; + unsigned long long load; + + u64 last_freq_update_time; + s64 freq_update_delay_ns; + u64 prev_update_time; + u64 prev_cpu_idle; + u32 prev_load; + u32 sampling_rate; +}; + +struct package_data { + int boost_cores; + int max_boost_cores; + int nr_cores; + char in_boost; + int nr_full_load_cores; + struct core_data core[MAX_CORES_PER_PACKAGE]; +} all_package_data[MAX_PACKAGES]; + +static bool boost_supported(void) +{ + return loongson3_cpufreq_driver.set_boost; +} + +/* + * Check if target_freq is a boost freq + * + * target_freq must be a freq in freq table when + * calling the function. + */ +static int boost_level(struct acpi_processor_performance *perf, unsigned int target_freq) +{ + int i; + + for (i = 0; i < perf->state_count; i++) { + if (target_freq == (perf->states[i].core_frequency * 1000)) + return (perf->states[i].control & LOONGSON_BOOST_FREQ_MASK) >> 8; + } + return 0; +} + +#ifdef CONFIG_SMP +static int loongson3_cpu_freq_notifier(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freqs; + struct clock_event_device __maybe_unused *cd; + struct core_data *core; + unsigned int __maybe_unused new_freq; + unsigned long cpu; + struct ce_update_data __maybe_unused ce_data; + int cur_boost_level; + + if (val == CPUFREQ_POSTCHANGE) { + freqs = (struct cpufreq_freqs *)data; + cpu = freqs->policy->cpu; + core = get_core_data(cpu); + cur_boost_level = boost_level(core->perf, freqs->new); + if (cur_boost_level != 0) { + lpj_fine = (unsigned int) (((int64_t)core->clock_scale[cur_boost_level] * + cpufreq_scale(loops_per_jiffy, + boost_freqs[cur_boost_level] * 1000, + freqs->new)) / core->shift[cur_boost_level]); + } else { + lpj_fine = cpufreq_scale(loops_per_jiffy, + core->normal_max_freq * 1000, freqs->new); + } + } + + return 0; +} +#else +static int loongson3_cpu_freq_notifier(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freqs; + struct clock_event_device __maybe_unused *cd; + struct core_data *core; + unsigned int __maybe_unused new_freq; + unsigned long cpu; + int cur_boost_level; + + if (val == CPUFREQ_POSTCHANGE) { + + freqs = (struct cpufreq_freqs *)data; + cpu = freqs->cpu; + core = get_core_data(cpu); + cur_boost_level = boost_level(core->perf, target_freq); + + if (cur_boost_level != 0) { + lpj_fine = (unsigned int) (((int64_t)core->clock_scale[cur_boost_level] * + loops_per_jiffy) / core->shift[cur_boost_level]); + } else { + lpj_fine = loops_per_jiffy; + } + } + + return 0; +} +#endif +static struct notifier_block loongson3_cpufreq_notifier_block = { + .notifier_call = loongson3_cpu_freq_notifier +}; + +static int cpufreq_perf_find_level(struct acpi_processor_performance *perf, + unsigned int target_freq, + unsigned int boost_level) +{ + int i; + + for (i = 0; i < perf->state_count; i++) { + if (boost_level) { + if (perf->states[i].control & LOONGSON_BOOST_FREQ_MASK) { + if (target_freq == (perf->states[i].core_frequency * 1000)) + return perf->states[i].control & LOONGSON_CONTROL_MASK; + } + } else { + if (!(perf->states[i].control & LOONGSON_BOOST_FREQ_MASK)) + if (target_freq == (perf->states[i].core_frequency * 1000)) + return perf->states[i].control; + } + } + return 0; +} + +static int cpufreq_perf_find_freq(struct acpi_processor_performance *perf, + unsigned int target_index, + unsigned int boost_level) +{ + int i; + + for (i = 0; i < perf->state_count; i++) { + if (boost_level) { + if (perf->states[i].control & LOONGSON_BOOST_FREQ_MASK) + if (target_index == (perf->states[i].control & + LOONGSON_CONTROL_MASK)) + return perf->states[i].core_frequency; + } else { + if (!(perf->states[i].control & LOONGSON_BOOST_FREQ_MASK)) + if (target_index == perf->states[i].control) + return perf->states[i].core_frequency; + } + } + return 0; +} + + +static inline struct core_data *get_core_data(int cpu) +{ + int package_id = cpu_data[cpu].package; + struct package_data *package = &all_package_data[package_id]; + int core_id = cpu_logical_map(cpu) % package->nr_cores; + + return &package->core[core_id]; +} + +static bool package_boost(struct package_data *package) +{ + int i; + int cur_full_load = 0; + +#if defined(CONFIG_CPU_HAS_LASX) + int lasx_enable_count = 0; + unsigned long lasx_num; + bool clear_lasx = false; +#endif + + int msa_enable_count = 0; + unsigned long msa_num; + bool clear_msa = false; + + for (i = 0; i < package->nr_cores; i++) { + +#if defined(CONFIG_CPU_HAS_LASX) + lasx_num = per_cpu(lasx_count, package->core[i].cpu); + + if (lasx_num) + lasx_enable_count++; + + if (lasx_num >= lasx_count_threshold) + clear_lasx = true; + + pr_debug("file %s, line %d, lasx enabled, i %d, cpu %d, lasx_num %lu\n", + __FILE__, __LINE__, i, package->core[i].cpu, lasx_num); +#endif + msa_num = per_cpu(msa_count, package->core[i].cpu); + + if (msa_num) + msa_enable_count++; + + if (msa_num >= msa_count_threshold) + clear_msa = true; + + pr_debug("file %s, line %d, msa enabled, i %d, cpu %d, msa_num %lu\n", + __FILE__, __LINE__, i, package->core[i].cpu, msa_num); + + if (package->core[i].prev_load >= load_threshold) + cur_full_load++; + } + +#if defined(CONFIG_CPU_HAS_LASX) + if (clear_lasx) { + for (i = 0; i < package->nr_cores; i++) + per_cpu(lasx_count, package->core[i].cpu) = 0; + } +#endif + + if (clear_msa) { + for (i = 0; i < package->nr_cores; i++) + per_cpu(msa_count, package->core[i].cpu) = 0; + } + +#if defined(CONFIG_CPU_HAS_LASX) + if (lasx_enable_count > 1 + || (lasx_enable_count && package->nr_full_load_cores > 1) + || (lasx_enable_count && cur_full_load > 1)) { + return false; + } +#endif + + if (msa_enable_count > 1 + || (msa_enable_count && package->nr_full_load_cores > 1) + || (msa_enable_count && cur_full_load > 1)) { + return false; + } + + if (package->nr_full_load_cores && + package->nr_full_load_cores <= package->max_boost_cores) + return true; + + return false; +} + +/* + * check if the cpu can be boosted. + * + * call the function after load of cpu updated. + */ +static bool cpu_can_boost(int cpu) +{ + struct core_data *core = get_core_data(cpu); + struct package_data *package = core->package; + + if (package->boost_cores >= package->max_boost_cores) + return false; + if (core->load > BOOST_THRESHOLD) + return true; + + return false; +} + +static void do_set_freq_level(int cpu, int freq_level) +{ + uint32_t message; + uint32_t val; + + message = (0 << 31) | (VOLTAGE_COMMAND << 24) + | ((uint32_t)freq_level << 4) + | (cpu & CPU_ID_FIELD); + iocsr_write32(message, 0x51c); + val = iocsr_read32(0x420); + + val |= 1 << 10; + iocsr_write32(val, 0x420); +} + +static int wait_for_ready_timeout(int64_t timeout) +{ + int ret; + struct timespec64 prev_ts; + struct timespec64 curr_ts; + ktime_t delay = ktime_set(0, 100); + + ktime_get_ts64(&prev_ts); + ktime_get_ts64(&curr_ts); + + ret = -EPERM; + + while (((curr_ts.tv_sec - prev_ts.tv_sec) * 1000000000 + + (curr_ts.tv_nsec - prev_ts.tv_nsec)) < timeout) { + ktime_get_ts64(&curr_ts); + + if (iocsr_read32(0x51c) & COMPLETE_STATUS) { + ret = 0; + break; + } + + __set_current_state(TASK_UNINTERRUPTIBLE); + schedule_hrtimeout(&delay, HRTIMER_MODE_REL); + } + return ret; +} + +/* Find closest freq to target in a table in ascending order */ +static int cpufreq_table_find_freq_ac(struct cpufreq_policy *policy, + unsigned int target_freq, + int boost_level) +{ + struct cpufreq_frequency_table *table = policy->freq_table; + struct cpufreq_frequency_table *pos; + unsigned int freq; + unsigned int best_freq = 0; + int idx, best = -1; + + cpufreq_for_each_valid_entry_idx(pos, table, idx) { + freq = pos->frequency; + + if (pos->driver_data != boost_level) + continue; + if (freq > policy->max || freq < policy->min) + continue; + if (freq == target_freq) + return freq; + + if (freq < target_freq) { + best = idx; + best_freq = freq; + continue; + } + + /* No freq found below target_freq, return freq above target_freq */ + if (best == -1) + return freq; + + /* Choose the closest freq */ + if (target_freq - table[best].frequency > freq - target_freq) + return freq; + + return best_freq; + } + + return best_freq; +} + +/* Find closest freq to target in a table in descending order */ +static int cpufreq_table_find_freq_dc(struct cpufreq_policy *policy, + unsigned int target_freq, + int boost_level) +{ + struct cpufreq_frequency_table *table = policy->freq_table; + struct cpufreq_frequency_table *pos; + unsigned int freq; + unsigned int best_freq = 0; + int idx, best = -1; + + cpufreq_for_each_valid_entry_idx(pos, table, idx) { + freq = pos->frequency; + + if (pos->driver_data != boost_level) + continue; + if (freq > policy->max || freq < policy->min) + continue; + + if (freq == target_freq) + return freq; + + if (freq > target_freq) { + best = idx; + best_freq = freq; + continue; + } + + /* No freq found above target_freq, return freq below target_freq */ + if (best == -1) + return freq; + + /* Choose the closest freq */ + if (table[best].frequency - target_freq > target_freq - freq) + return freq; + + return best_freq; + } + + return best_freq; +} + +/* Works only on sorted freq-tables */ +static int cpufreq_table_find_freq(struct cpufreq_policy *policy, + unsigned int target_freq, + int boost_level) +{ + target_freq = clamp_val(target_freq, policy->min, policy->max); + if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) + return cpufreq_table_find_freq_ac(policy, target_freq, boost_level); + else + return cpufreq_table_find_freq_dc(policy, target_freq, boost_level); +} + +static void transition_end(struct cpufreq_policy *policy, + struct cpufreq_freqs *freqs, bool failed) +{ + if (unlikely(!policy->transition_ongoing)) + return; + cpufreq_freq_transition_end(policy, freqs, failed); +} +static void transition_begin(struct cpufreq_policy *policy, + struct cpufreq_freqs *freqs) +{ + if (unlikely(policy->transition_ongoing)) + cpufreq_freq_transition_end(policy, freqs, true); + + cpufreq_freq_transition_begin(policy, freqs); +} + +static void update_core_boost_info(struct core_data *core, bool boost_set) +{ + core->in_boost = boost_set; + if (boost_set) + core->package->boost_cores++; + else + core->package->boost_cores--; +} + +static unsigned int cores_freq_trans_notify(struct package_data *package, + bool before_trans, + bool trans_failed, + int find_level, + int find_freq, + unsigned int skip_cpumask) +{ + int i; + struct cpufreq_policy *policy; + struct cpufreq_freqs freqs; + unsigned int cores_level = 0; + unsigned int core_level; + + for (i = 0; i < package->nr_cores; i++) { + struct core_data *core = &package->core[i]; + + policy = cpufreq_cpu_get_raw(core->cpu); + if (((1 << i) & skip_cpumask) || !policy) + continue; + + freqs.old = policy->cur; + freqs.flags = 0; + + /* find level from normal levels */ + core_level = cpufreq_perf_find_level(core->perf, policy->cur, find_level); + if (!core_level) { + pr_debug("cpu%d policy->cur=%d find_level=%d", + policy->cpu, policy->cur, find_level); + pr_debug("freq=%d skip_cpumask=%x\n", find_freq, skip_cpumask); + } + + freqs.new = cpufreq_perf_find_freq(core->perf, core_level, find_freq) * 1000; + if (!freqs.new) + pr_debug("file %s, line %d, find freq error\n", __FILE__, __LINE__); + + cores_level |= (core_level << (i << 2)); + + if (before_trans) + transition_begin(policy, &freqs); + else + transition_end(policy, &freqs, trans_failed); + } + return cores_level; +} +static int loongson3_set_freq(struct core_data *core, unsigned long freq, int boost_level) +{ + int ret = 0; + int freq_level; + int phy_cpu; + int target_freq; + struct cpufreq_freqs freqs; + struct cpufreq_policy *policy = cpufreq_cpu_get_raw(core->cpu); + + if (!policy) + return -EINVAL; + + ret = wait_for_ready_timeout(MAX_READY_TIMEOUT); + if (ret) + return ret; + + phy_cpu = cpu_logical_map(core->cpu); + target_freq = cpufreq_table_find_freq(policy, freq, boost_level); + if (!target_freq) + return -1; + if (target_freq == policy->cur) + return -1; + + freqs.flags = 0; + freqs.old = policy->cur; + freqs.new = target_freq; + freq_level = cpufreq_perf_find_level(core->perf, target_freq, boost_level); + if (!freq_level) { + pr_debug("%s... cpu%d freq=%lu targetfreq=%d boost_level=%d find level error\n", + __func__, core->cpu, freq, target_freq, boost_level); + } + + transition_begin(policy, &freqs); + do_set_freq_level(phy_cpu, freq_level); + ret = wait_for_ready_timeout(MAX_READY_TIMEOUT); + transition_end(policy, &freqs, !!ret); + + return ret; +} + +int loongson3_set_mode(int mode, int freq_level) +{ + uint32_t val; + int ret = 0; + uint32_t message; + + ret = wait_for_ready_timeout(MAX_READY_TIMEOUT); + if (ret) + return ret; + + message = mode | (VOLTAGE_COMMAND << 24) | freq_level; + iocsr_write32(message, 0x51c); + val = iocsr_read32(0x420); + val |= 1 << 10; + iocsr_write32(val, 0x420); + return wait_for_ready_timeout(MAX_READY_TIMEOUT); +} + +enum freq_adjust_action { + FAA_NORMAL, + FAA_N2B, + FAA_B2N, + FAA_BOOST, +}; + +static int faa_normal(struct cpufreq_policy *policy, int load) +{ + int ret; + unsigned int freq_next, min_f, max_f; + struct core_data *core = get_core_data(policy->cpu); + + if (!core) + return -1; + + pr_debug("file %s, line %d, func %s\n", __FILE__, __LINE__, __func__); + + min_f = policy->min; + max_f = policy->max; + freq_next = min_f + load * (max_f - min_f) / 100; + ret = loongson3_set_freq(core, freq_next, 0); + return ret; +} + +static void handle_boost_cores(struct core_data *core, struct package_data *package, + unsigned long target_freq, bool skip_update_and_notify, + bool update_core, bool inc_boost) +{ + int boost_level; + int find_level; + int find_freq; + int ret; + int inc_core = inc_boost ? 1 : -1; + + if (boost_gears == 1) { + find_level = 0; + boost_level = boost_gears; + } else { + find_level = package->boost_cores; + if (update_core) + boost_level = package->boost_cores + inc_core; + else + boost_level = package->boost_cores; + } + find_freq = boost_level; + ret = loongson3_set_freq(core, target_freq, boost_level); + if (ret) + return; + + if (skip_update_and_notify) { + if (update_core) + update_core_boost_info(core, inc_boost); + return; + } + + if (boost_gears != 1) { + cores_freq_trans_notify(package, true, false, + find_level, find_freq, 1 << core->cpu); + cores_freq_trans_notify(package, false, false, + find_level, find_freq, 1 << core->cpu); + } + if (update_core) + update_core_boost_info(core, inc_boost); +} + +static void faa_boost(struct cpufreq_policy *policy, int load) +{ + unsigned int min_f, max_f; + struct core_data *core = get_core_data(policy->cpu); + struct package_data *package = core->package; + unsigned long target_freq; + + /* boost cores form n to n + 1 */ + if (core->load > BOOST_THRESHOLD) { + if (package->boost_cores < package->max_boost_cores + && !core->in_boost) { + if (boost_gears == 1) { + target_freq = policy->max; + } else { + target_freq = cpufreq_table_find_freq(policy, + policy->max, package->boost_cores + 1); + if (!target_freq) { + pr_debug("find freq error ,boost_level %d, cur freq %d\n", + package->boost_cores, policy->max); + } + } + handle_boost_cores(core, package, target_freq, false, true, true); + } + } else { + /* + * 1. core not in boost, level up but not change pll + * 2. core in boost, boost cores from n to n - 1 + */ + min_f = policy->min; + max_f = policy->max; + target_freq = min_f + load * (max_f - min_f) / 100; + handle_boost_cores(core, package, target_freq, + !core->in_boost, core->in_boost, false); + } +} + +static void get_boost_cores(struct package_data *package, int *boost_cores, int *boost_count) +{ + struct core_data *core; + struct cpufreq_policy *policy; + int i; + + /* count boost cores */ + for (i = 0; i < package->nr_cores; i++) { + core = &package->core[i]; + policy = cpufreq_cpu_get_raw(core->cpu); + if (!policy) + continue; + + if (cpu_can_boost(core->cpu)) { + if (boost_cores) + *boost_cores |= (1 << i); + + (*boost_count)++; + } + } +} + +static void faa_n2b(struct package_data *package, struct core_data *core) +{ + int boost_cores = 0; + int boost_count = 0; + int freq_level; + + get_boost_cores(package, &boost_cores, &boost_count); + + if (boost_gears == 1) + boost_count = 1; + + freq_level = cores_freq_trans_notify(package, true, false, + 0, boost_count, 0); + if (!loongson3_set_mode(BOOST_MODE, freq_level)) { + int i; + + cores_freq_trans_notify(package, false, false, + 0, boost_count, 0); + package->in_boost = true; + for (i = 0; i < package->nr_cores; i++) { + if (boost_cores & (1 << i)) + update_core_boost_info(&package->core[i], true); + } + } else + cores_freq_trans_notify(package, false, true, + 0, boost_count, 0); +} + +static void faa_b2n(struct package_data *package) +{ + int i; + int boost_count = package->boost_cores; + + if (boost_gears == 1) + boost_count = 1; + + cores_freq_trans_notify(package, true, false, + boost_count, 0, 0); + if (!loongson3_set_mode(NORMAL_MODE, 0)) { + cores_freq_trans_notify(package, false, false, + boost_count, 0, 0); + for (i = 0; i < package->nr_cores; i++) { + if (package->core[i].in_boost) + update_core_boost_info(&package->core[i], false); + } + package->in_boost = false; + } else + cores_freq_trans_notify(package, false, true, + boost_count, 0, 0); +} + + +unsigned int load_update(struct core_data *core) +{ + int i; + u64 update_time, cur_idle_time; + unsigned int idle_time, time_elapsed; + unsigned int load = 0; + struct package_data *package = core->package; + + cur_idle_time = get_cpu_idle_time(core->cpu, &update_time, true); + + time_elapsed = update_time - core->prev_update_time; + core->prev_update_time = update_time; + + idle_time = cur_idle_time - core->prev_cpu_idle; + core->prev_cpu_idle = cur_idle_time; + + if (unlikely(!time_elapsed)) { + /* + * That can only happen when this function is called + * twice in a row with a very short interval between the + * calls, so the previous load value can be used then. + */ + load = core->prev_load; + } else if (unlikely((int)idle_time > 2 * core->sampling_rate && + core->prev_load)) { + + load = core->prev_load; + core->prev_load = 0; + } else { + if (time_elapsed >= idle_time) + load = 100 * (time_elapsed - idle_time) / time_elapsed; + else + load = (int)idle_time < 0 ? 100 : 0; + core->prev_load = load; + } + + package->nr_full_load_cores = 0; + for (i = 0; i < package->nr_cores; i++) { + if (package->core[i].load > BOOST_THRESHOLD) + package->nr_full_load_cores++; + } + + return load; +} + +static bool cpufreq_should_update_freq(struct core_data *core, u64 time) +{ + s64 delta_ns; + + delta_ns = time - core->last_freq_update_time; + return delta_ns >= core->freq_update_delay_ns; +} + +static void cpufreq_update(struct cpufreq_policy *policy) +{ + int action; + struct core_data *core; + struct package_data *package; + unsigned long load; + bool should_be_boost = 0; + + core = get_core_data(policy->cpu); + package = core->package; + + mutex_lock(&boost_mutex[core->package_id]); + + if (!core->update_util_set) { + mutex_unlock(&boost_mutex[core->package_id]); + return; + } + + load = load_update(core); + core->load = (u64)load + ((core->load * FACTOR) >> 32); + + if (cpufreq_boost_enabled()) { + should_be_boost = package_boost(package); + } else { + if (package->in_boost) + should_be_boost = false; + } + + action = (package->in_boost << 1) | should_be_boost; + switch (action) { + case FAA_NORMAL: + faa_normal(policy, load); + break; + case FAA_B2N: + faa_b2n(package); + break; + case FAA_N2B: + faa_n2b(package, core); + break; + case FAA_BOOST: + faa_boost(policy, load); + break; + } + mutex_unlock(&boost_mutex[core->package_id]); +} + +static void set_max_within_limits(struct cpufreq_policy *policy) +{ + struct core_data *core = get_core_data(policy->cpu); + /* + * policy->max <= cpu->pstate.max_freq indecates that + * the boost is disabled, so max freq is in normal range + * + * Skip performance policy with boost enabled!!! + * + */ + if (policy->max <= (core->normal_max_freq * 1000)) { + mutex_lock(&boost_mutex[core->package_id]); + if (!loongson3_set_freq(core, policy->max, 0)) + pr_debug("Set cpu %d to performance mode under normal range.\n", + policy->cpu); + mutex_unlock(&boost_mutex[core->package_id]); + } +} + +static void clear_update_util_hook(unsigned int cpu) +{ + struct core_data *core = get_core_data(cpu); + + if (!core->update_util_set) + return; + + cpufreq_remove_update_util_hook(cpu); + core->update_util_set = false; + synchronize_rcu(); +} + +static void update_util_handler(struct update_util_data *data, u64 time, + unsigned int flags) +{ + struct core_data *core = container_of(data, struct core_data, update_util); + + if (!cpufreq_should_update_freq(core, time)) + return; + if (!core->work_in_progress) { + core->last_freq_update_time = time; + core->work_in_progress = true; + irq_work_queue(&core->irq_work); + } +} +static void set_update_util_hook(unsigned int cpu) +{ + struct core_data *core = get_core_data(cpu); + + if (core->update_util_set) + return; + + cpufreq_add_update_util_hook(cpu, &core->update_util, + update_util_handler); + core->update_util_set = true; +} +static int loongson3_cpufreq_set_policy(struct cpufreq_policy *policy) +{ + if (!policy->cpuinfo.max_freq) + return -ENODEV; + + if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { + clear_update_util_hook(policy->cpu); + set_max_within_limits(policy); + } else { + set_update_util_hook(policy->cpu); + } + + return 0; +} + +static int loongson3_cpufreq_verify_policy(struct cpufreq_policy_data *policy) +{ + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, policy->cpuinfo.max_freq); + + return 0; +} + +static void set_boost_freq(bool has) +{ + cpufreq_has_boost_freq = has; +} + +static bool has_boost_freq(void) +{ + return cpufreq_has_boost_freq; +} + +static int compute_scale(int *shift, int dividor, int dividee) +{ + int i; + int result = 0; + int remainder = 0; + int scale_resolution = 8; + + result = dividor / dividee; + remainder = (dividor % dividee) * 10; + + for (i = 0; i < scale_resolution; i++) { + result = result * 10 + remainder / dividee; + remainder = (remainder % dividee) * 10; + *shift *= 10; + } + + return result; +} + +static void cpufreq_work_handler(struct kthread_work *work) +{ + struct core_data *core; + struct cpufreq_policy *policy; + + core = container_of(work, struct core_data, work); + policy = cpufreq_cpu_get_raw(core->cpu); + + if (policy) { + cpufreq_update(policy); + core->work_in_progress = false; + } +} + +static void cpufreq_irq_work(struct irq_work *irq_work) +{ + struct core_data *core = container_of(irq_work, struct core_data, irq_work); + + kthread_queue_work(&cpufreq_worker, &core->work); +} + +static void cpufreq_kthread_stop(void) +{ + kthread_flush_worker(&cpufreq_worker); + kthread_stop(cpufreq_thread); +} +static int cpufreq_kthread_create(void) +{ + struct sched_attr attr = { + .size = sizeof(struct sched_attr), + .sched_policy = SCHED_DEADLINE, + .sched_flags = 0x10000000, + .sched_nice = 0, + .sched_priority = 0, + .sched_runtime = 1000000, + .sched_deadline = 10000000, + .sched_period = 10000000, + }; + int ret; + + kthread_init_worker(&cpufreq_worker); + cpufreq_thread = kthread_create(kthread_worker_fn, &cpufreq_worker, "lsfrq:%d", 0); + if (IS_ERR(cpufreq_thread)) + return PTR_ERR(cpufreq_thread); + + ret = sched_setattr_nocheck(cpufreq_thread, &attr); + if (ret) { + kthread_stop(cpufreq_thread); + pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__); + return ret; + } + + wake_up_process(cpufreq_thread); + + return 0; +} + +static int init_acpi(struct acpi_processor_performance *perf) +{ + int result = 0; + int i; + + perf->shared_type = 0; + perf->state_count = (max_freq_level - min_freq_level + 1) * (boost_gears + 1); + + perf->states = + kmalloc_array(perf->state_count, + sizeof(struct acpi_processor_px), + GFP_KERNEL); + + if (!perf->states) { + result = -ENOMEM; + return result; + } + + for (i = 0; i < perf->state_count; i++) { + perf->states[i].power = 0x3A98; + perf->states[i].transition_latency = 10000; + perf->states[i].bus_master_latency = 10000; + perf->states[i].status = (RESERVED_FREQ + i / (boost_gears + 1)); + perf->states[i].control = (RESERVED_FREQ + i / (boost_gears + 1)); + + switch (i % (boost_gears + 1)) { + case 0: + perf->states[i].core_frequency = + (cpu_clock_freq / 1000000) * (8 - i / (boost_gears + 1)) / 8; + break; + case 1: + case 2: + case 3: + case 4: + perf->states[i].core_frequency = + boost_freqs[i % (boost_gears + 1)] * + (8 - i / (boost_gears + 1)) / 8; + perf->states[i].control |= ((i % (boost_gears + 1)) << 8); + break; + default: + pr_info("file %s, line %d, i %d freq table error\n", __FILE__, __LINE__, i); + } + } + + return result; +} + +static int loongson3_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int i; + struct acpi_processor_performance *perf; + struct cpufreq_frequency_table *freq_table; + struct core_data *core; + int package_id; + unsigned int cpu = policy->cpu; + unsigned int result = 0; + + perf = per_cpu_ptr(acpi_perf_data, cpu); + package_id = cpu_data[cpu].package; + core = get_core_data(cpu); + all_package_data[package_id].nr_cores = loongson_sysconf.cores_per_package; + all_package_data[package_id].max_boost_cores = max_boost_cores; + core->normal_max_freq = 0; + all_package_data[package_id].nr_full_load_cores = 0; + core->cpu = cpu; + core->work_in_progress = false; + core->last_freq_update_time = 0; + core->perf = perf; + core->package_id = package_id; + core->package = &all_package_data[package_id]; + + core->boost_freq = kmalloc_array(boost_gears + 1, + sizeof(typeof(core->boost_freq)), GFP_KERNEL); + core->clock_scale = kmalloc_array(boost_gears + 1, + sizeof(typeof(core->clock_scale)), GFP_KERNEL); + core->shift = kmalloc_array(boost_gears + 1, + sizeof(typeof(core->shift)), GFP_KERNEL); + + for (i = 0; i < boost_gears + 1; i++) { + core->boost_freq[i] = boost_freqs[i]; + core->shift[i] = 1; + } + + if (!acpi_disabled) + result = acpi_processor_register_performance(perf, cpu); + else { + result = init_acpi(perf); + policy->shared_type = perf->shared_type; + } + + if (result) { + pr_info("CPU%d acpi_processor_register_performance failed.\n", cpu); + return result; + } + + for (i = 0; i < MAX_PACKAGES; i++) + mutex_init(&boost_mutex[i]); + + /* capability check */ + if (perf->state_count <= 1) { + pr_debug("No P-States\n"); + result = -ENODEV; + goto err_unreg; + } + + freq_table = kcalloc(perf->state_count + 1, sizeof(*freq_table), + GFP_KERNEL); + if (!freq_table) { + result = -ENOMEM; + goto err_unreg; + } + + /* detect transition latency */ + policy->cpuinfo.transition_latency = 0; + for (i = 0; i < perf->state_count; i++) { + if ((perf->states[i].transition_latency * 1000) > + policy->cpuinfo.transition_latency) + policy->cpuinfo.transition_latency = + perf->states[i].transition_latency * 1000; + if (perf->states[i].control & LOONGSON_BOOST_FREQ_MASK) { + set_boost_freq(true); + } else { + if (perf->states[i].core_frequency > core->normal_max_freq) + core->normal_max_freq = perf->states[i].core_frequency; + } + } + + core->freq_update_delay_ns = policy->cpuinfo.transition_latency; + + for (i = 0; i < boost_gears + 1; i++) + core->clock_scale[i] = compute_scale(&core->shift[i], + boost_freqs[i], core->normal_max_freq); + + /* table init */ + for (i = 0; i < perf->state_count; i++) { + freq_table[i].driver_data = (perf->states[i].control + & LOONGSON_BOOST_FREQ_MASK) >> 8; + if (freq_table[i].driver_data) + freq_table[i].flags |= CPUFREQ_BOOST_FREQ; + freq_table[i].frequency = + perf->states[i].core_frequency * 1000; + } + freq_table[i].frequency = CPUFREQ_TABLE_END; + policy->freq_table = freq_table; + perf->state = 0; + + /* add boost-attr if supported. */ + if (has_boost_freq() && boost_supported()) + loongson3_cpufreq_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; + + pr_info("CPU%u - ACPI performance management activated.\n", cpu); + for (i = 0; i < perf->state_count; i++) + pr_debug(" %cP%d: %d MHz, %d mW, %d uS %d level\n", + (i == perf->state ? '*' : ' '), i, + (u32) perf->states[i].core_frequency, + (u32) perf->states[i].power, + (u32) perf->states[i].transition_latency, + (u32) perf->states[i].control); + + /* + * the first call to ->target() should result in us actually + * writing something to the appropriate registers. + */ + policy->fast_switch_possible = false; + + init_irq_work(&core->irq_work, cpufreq_irq_work); + kthread_init_work(&core->work, cpufreq_work_handler); + core->sampling_rate = max_t(unsigned int, + CPUFREQ_SAMPLING_INTERVAL, + cpufreq_policy_transition_delay_us(policy)); + return result; + +err_unreg: + if (!acpi_disabled) + acpi_processor_unregister_performance(cpu); + + return result; +} + +static int loongson3_cpufreq_cpu_exit(struct cpufreq_policy *policy) +{ + struct core_data *core = get_core_data(policy->cpu); + + clear_update_util_hook(policy->cpu); + irq_work_sync(&core->irq_work); + kthread_cancel_work_sync(&core->work); + core->work_in_progress = false; + policy->fast_switch_possible = false; + if (!acpi_disabled) + acpi_processor_unregister_performance(policy->cpu); + kfree(policy->freq_table); + kfree(core->boost_freq); + kfree(core->clock_scale); + kfree(core->shift); + return 0; +} + +static struct freq_attr *loongson3_cpufreq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, /* Extra space for boost-attr if supported */ + NULL, +}; + +static struct cpufreq_driver loongson3_cpufreq_driver = { + .verify = loongson3_cpufreq_verify_policy, + .setpolicy = loongson3_cpufreq_set_policy, + .init = loongson3_cpufreq_cpu_init, + .exit = loongson3_cpufreq_cpu_exit, + .name = "acpi-cpufreq", + .attr = loongson3_cpufreq_attr, +}; + +static void free_acpi_perf_data(void) +{ + unsigned int i; + + /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */ + for_each_possible_cpu(i) + free_cpumask_var(per_cpu_ptr(acpi_perf_data, i) + ->shared_cpu_map); + free_percpu(acpi_perf_data); +} + +static int __init loongson3_cpufreq_early_init(void) +{ + unsigned int i; + + acpi_perf_data = alloc_percpu(struct acpi_processor_performance); + if (!acpi_perf_data) + return -ENOMEM; + for_each_possible_cpu(i) { + if (!zalloc_cpumask_var_node( + &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map, + GFP_KERNEL, cpu_to_node(i))) { + free_acpi_perf_data(); + return -ENOMEM; + } + } + return 0; +} + +static bool support_boost(void) +{ + int message; + int val; + int i; + + if (wait_for_ready_timeout(MAX_READY_TIMEOUT)) + return false; + message = DVFS_INFO << 24; + iocsr_write32(message, 0x51c); + val = iocsr_read32(0x420); + + val |= 1 << 10; + iocsr_write32(val, 0x420); + if (wait_for_ready_timeout(MAX_READY_TIMEOUT)) { + pr_info("file %s, line %d, not support boost\n", __FILE__, __LINE__); + return false; + } + + val = iocsr_read32(0x51c); + + min_freq_level = val & DVFS_INFO_MIN_FREQ; + max_freq_level = (val & DVFS_INFO_MAX_FREQ) >> 4; + + if ((val & DVFS_INFO_BOOST_CORE_FREQ) && ((val & DVFS_INFO_BOOST_CORES) >> 20)) { + max_boost_cores = (val & DVFS_INFO_BOOST_CORES) >> 20; + max_boost_freq = ((val & DVFS_INFO_BOOST_CORE_FREQ) >> 8) * 25; + max_upper_index = (val & DVFS_INFO_NORMAL_CORE_UPPER_LIMIT) >> 16; + } else { + boost_gears = 0; + return false; + } + + /* Read boost levels */ + if (wait_for_ready_timeout(MAX_READY_TIMEOUT)) + return false; + + /* for version 1, single boost freq boost */ + message = DVFS_INFO_BOOST_LEVEL << 24; + iocsr_write32(message, 0x51c); + val = iocsr_read32(0x420); + + val |= 1 << 10; + iocsr_write32(val, 0x420); + + if (wait_for_ready_timeout(MAX_READY_TIMEOUT)) { + pr_info("file %s, line %d, single boost mode\n", __FILE__, __LINE__); + boost_gears = 1; + boost_freqs[0] = calc_const_freq() / 1000000; + for (i = 1; i < boost_gears + 1; i++) + boost_freqs[i] = max_boost_freq; + + /* set 0x51c complete */ + iocsr_write32(COMPLETE_STATUS, 0x51c); + } else { + pr_info("file %s, line %d, multi boost mode\n", __FILE__, __LINE__); + boost_gears = max_boost_cores; + val = iocsr_read32(0x51c); + + boost_freqs[0] = calc_const_freq() / 1000000; + boost_freqs[1] = max_boost_freq; + + if (boost_gears > 1) { + for (i = 2; i < boost_gears + 1; i++) + boost_freqs[i] = max_boost_freq - + (((val >> ((i-2) * 4)) & 0xf) * FREQ_STEP); + } + } + + return true; +} + +static int cpufreq_table_cpuinfo(struct cpufreq_policy *policy, + struct cpufreq_frequency_table *table, + bool boost) +{ + struct cpufreq_frequency_table *pos; + unsigned int min_freq = ~0; + unsigned int max_freq = 0; + unsigned int freq; + + cpufreq_for_each_valid_entry(pos, table) { + freq = pos->frequency; + + if (!boost) { + if (pos->driver_data) + continue; + } + if (freq < min_freq) + min_freq = freq; + if (freq > max_freq) + max_freq = freq; + } + + policy->min = policy->cpuinfo.min_freq = min_freq; + policy->max = policy->cpuinfo.max_freq = max_freq; + if (policy->min == ~0) + return -EINVAL; + else + return 0; +} + +static int set_boost(struct cpufreq_policy *policy, int state) +{ + if (!has_boost_freq()) + return -EINVAL; + + if (!policy) + return -EINVAL; + + if (!state) { + if (policy->policy == CPUFREQ_POLICY_POWERSAVE) + cpufreq_update(policy); + } + if (!policy->freq_table) + return -EINVAL; + + cpufreq_table_cpuinfo(policy, policy->freq_table, state); + down_write(&policy->rwsem); + up_write(&policy->rwsem); + + if (!state) + set_max_within_limits(policy); + + return 0; +} + +static void __init loongson3_cpufreq_boost_init(void) +{ + if (!support_boost()) { + pr_info("Boost capabilities not present in the processor\n"); + return; + } + + loongson3_cpufreq_driver.set_boost = set_boost; +} + +static int cpufreq_supported_detect(void) +{ + return wait_for_ready_timeout(MAX_READY_TIMEOUT); +} + +static int __init loongson3_cpufreq_init(void) +{ + int ret; + + if (!cpu_has_csr || !cpu_has_scalefreq) + return -ENODEV; + + /* don't keep reloading if cpufreq_driver exists */ + if (cpufreq_get_current_driver()) + return -EEXIST; + + if (cpufreq_supported_detect()) + return -ENODEV; + + ret = loongson3_cpufreq_early_init(); + if (ret) + return ret; + loongson3_cpufreq_boost_init(); + + cpufreq_register_notifier(&loongson3_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + ret = cpufreq_register_driver(&loongson3_cpufreq_driver); + cpufreq_kthread_create(); + if (ret) + free_acpi_perf_data(); + + return ret; +} + +static void __exit loongson3_cpufreq_exit(void) +{ + cpufreq_unregister_driver(&loongson3_cpufreq_driver); + free_acpi_perf_data(); + cpufreq_kthread_stop(); +} + +late_initcall(loongson3_cpufreq_init); +module_exit(loongson3_cpufreq_exit); + +static const struct acpi_device_id processor_device_ids[] = { + {ACPI_PROCESSOR_OBJECT_HID, }, + {ACPI_PROCESSOR_DEVICE_HID, }, + {}, +}; +MODULE_DEVICE_TABLE(acpi, processor_device_ids); + +MODULE_ALIAS("acpi"); diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index 3a0770d7dd36bb8db384d157e1834d2fbc4b8cf0..eebc886b461553b206d2d82c22aff3736f917215 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -40,6 +40,7 @@ sw64-obj-$(CONFIG_EFI) := sunway-init.o sunway-runtime.o obj-$(CONFIG_SW64) += $(sw64-obj-y) riscv-obj-$(CONFIG_EFI) := efi-init.o riscv-runtime.o obj-$(CONFIG_RISCV) += $(riscv-obj-y) +obj-$(CONFIG_LOONGARCH) += efi-init.o obj-$(CONFIG_EFI_CAPSULE_LOADER) += capsule-loader.o obj-$(CONFIG_EFI_EARLYCON) += earlycon.o obj-$(CONFIG_UEFI_CPER_ARM) += cper-arm.o diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index aa8da0a4982941958faef955582c505815a952e5..2e8d44c03d325e706bc94df73a2c86b557d25a35 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -22,7 +22,7 @@ bool efi_nochunk; bool efi_nokaslr = !IS_ENABLED(CONFIG_RANDOMIZE_BASE); bool efi_noinitrd; int efi_loglevel = CONSOLE_LOGLEVEL_DEFAULT; -bool efi_novamap; +bool efi_novamap = IS_ENABLED(CONFIG_LOONGARCH); /* LoongArch call svam() in kernel */; static bool efi_nosoftreserve; static bool efi_disable_pci_dma = IS_ENABLED(CONFIG_EFI_DISABLE_PCI_DMA); diff --git a/drivers/firmware/efi/libstub/loongarch-stub.c b/drivers/firmware/efi/libstub/loongarch-stub.c index 9e71a27bac924d346069fc6726c55906489e01d9..8727daa90d649ca198ff946689250f78946eb0da 100644 --- a/drivers/firmware/efi/libstub/loongarch-stub.c +++ b/drivers/firmware/efi/libstub/loongarch-stub.c @@ -9,7 +9,7 @@ #include #include "efistub.h" -typedef void __noreturn (*kernel_entry_t)(bool efi, unsigned long fdt); +typedef void __noreturn (*kernel_entry_t)(bool efi, unsigned long fdt, int flags); extern int kernel_asize; extern int kernel_fsize; @@ -52,8 +52,5 @@ void __noreturn efi_enter_kernel(unsigned long entrypoint, unsigned long fdt, un real_kernel_entry = (kernel_entry_t) ((unsigned long)&kernel_entry - entrypoint + VMLINUX_LOAD_ADDRESS); - if (!efi_novamap) - real_kernel_entry(true, fdt); - else - real_kernel_entry(false, fdt); + real_kernel_entry(true, fdt, 0); } diff --git a/drivers/irqchip/irq-loongarch-cpu.c b/drivers/irqchip/irq-loongarch-cpu.c index 0ac9587446dbb2fd9b60eaf64a614f6b0532f15e..d3a0bbe4a9f747e731967bfbb7b493b7a1a9e31b 100644 --- a/drivers/irqchip/irq-loongarch-cpu.c +++ b/drivers/irqchip/irq-loongarch-cpu.c @@ -119,7 +119,12 @@ static int __init acpi_cascade_irqdomain_init(void) return 0; } -static int __init cpuintc_acpi_init(union acpi_subtable_headers *header, +struct irq_domain *get_cpudomain(void) +{ + return irq_domain; +} + +int __init cpuintc_acpi_init(union acpi_subtable_headers *header, const unsigned long end) { if (irq_domain) diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index 472e029766a87da54fc4426240a075f424954beb..ca32e73bb92414916a7f064d5ad99674740bf8c4 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -333,7 +333,7 @@ static struct syscore_ops eiointc_syscore_ops = { .resume = eiointc_resume, }; -static int __init +int __init pch_pic_parse_madt(union acpi_subtable_headers *header, const unsigned long end) { @@ -347,7 +347,7 @@ pch_pic_parse_madt(union acpi_subtable_headers *header, return -EINVAL; } -static int __init +int __init pch_msi_parse_madt(union acpi_subtable_headers *header, const unsigned long end) { diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c index e09078e1ac0161dce262983fde4136fa2c1b057e..cd8b16293f39cbef200433275d597bd0f0f62ca3 100644 --- a/drivers/irqchip/irq-loongson-pch-pic.c +++ b/drivers/irqchip/irq-loongson-pch-pic.c @@ -52,6 +52,11 @@ static struct pch_pic *pch_pic_priv[MAX_IO_PICS]; struct fwnode_handle *pch_pic_handle[MAX_IO_PICS]; +struct irq_domain *get_pchpic_irq_domain(void) +{ + return pch_pic_priv[0]->pic_domain; +} + static void pch_pic_bitset(struct pch_pic *priv, int offset, int bit) { u32 reg; diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 6133e9f61d0b2441c88b24132644133eb1628657..afe5efb37471730e8ded52e6320ad521671a4ac6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -241,6 +241,15 @@ config DWMAC_INTEL This selects the Intel platform specific bus support for the stmmac driver. This driver is used for Intel Quark/EHL/TGL. +config DWMAC_LOONGSON + tristate "Loongson PCI DWMAC support" + default MACH_LOONGSON64 + depends on STMMAC_ETH && PCI + depends on COMMON_CLK + help + This selects the LOONGSON PCI bus support for the stmmac driver, + Support for ethernet controller on Loongson-2K1000 SoC and LS7A1000 bridge. + config STMMAC_PCI tristate "STMMAC PCI bus support" depends on STMMAC_ETH && PCI diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 8aa75d837ec430377f6c459e28467739fcd613c1..e2c038c3341974d6569eb463fcebfee1a3b129c1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -35,4 +35,5 @@ dwmac-altr-socfpga-objs := altr_tse_pcs.o dwmac-socfpga.o obj-$(CONFIG_STMMAC_PCI) += stmmac-pci.o obj-$(CONFIG_DWMAC_INTEL) += dwmac-intel.o +obj-$(CONFIG_DWMAC_LOONGSON) += dwmac-loongson.o stmmac-pci-objs:= stmmac_pci.o diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c new file mode 100644 index 0000000000000000000000000000000000000000..cc8dc8c2b97d6d2cc470b4e0b84b9396fc7bb736 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Loongson Corporation + */ + +#include +#include +#include +#include +#include +#include "stmmac.h" + +struct stmmac_pci_info { + int (*setup)(struct pci_dev *pdev, struct plat_stmmacenet_data *plat); +}; + +static void common_default_data(struct pci_dev *pdev, + struct plat_stmmacenet_data *plat) +{ + plat->bus_id = PCI_DEVID(pdev->bus->number, pdev->devfn); + plat->interface = PHY_INTERFACE_MODE_GMII; + + plat->clk_csr = 2; /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ + plat->has_gmac = 1; + plat->force_sf_dma_mode = 1; + + /* Set default value for multicast hash bins */ + plat->multicast_filter_bins = 256; + + /* Set default value for unicast filter entries */ + plat->unicast_filter_entries = 1; + + /* Set the maxmtu to a default of JUMBO_LEN */ + plat->maxmtu = JUMBO_LEN; + + /* Set default number of RX and TX queues to use */ + plat->tx_queues_to_use = 1; + plat->rx_queues_to_use = 1; + + /* Disable Priority config by default */ + plat->tx_queues_cfg[0].use_prio = false; + plat->rx_queues_cfg[0].use_prio = false; + + /* Disable RX queues routing by default */ + plat->rx_queues_cfg[0].pkt_route = 0x0; + + plat->dma_cfg->pbl = 32; + plat->dma_cfg->pblx8 = true; + + plat->clk_ref_rate = 125000000; + plat->clk_ptp_rate = 125000000; +} + +static int loongson_gmac_data(struct pci_dev *pdev, + struct plat_stmmacenet_data *plat) +{ + common_default_data(pdev, plat); + + plat->mdio_bus_data->phy_mask = 0; + + plat->phy_addr = -1; + plat->phy_interface = PHY_INTERFACE_MODE_RGMII_ID; + + return 0; +} + +static struct stmmac_pci_info loongson_gmac_pci_info = { + .setup = loongson_gmac_data, +}; + +static void loongson_gnet_fix_speed(void *priv, unsigned int speed) +{ + struct net_device *ndev = (struct net_device *)(*(unsigned long *)priv); + struct stmmac_priv *ptr = netdev_priv(ndev); + + if (speed == SPEED_1000) { + if (readl(ptr->ioaddr + MAC_CTRL_REG) & (1 << 15) /* PS */) { + /* reset phy */ + phy_set_bits(ndev->phydev, 0 /*MII_BMCR*/, + 0x200 /*BMCR_ANRESTART*/); + } + } +} + +static int loongson_gnet_data(struct pci_dev *pdev, + struct plat_stmmacenet_data *plat) +{ + common_default_data(pdev, plat); + + plat->mdio_bus_data->phy_mask = 0xfffffffb; + + plat->phy_addr = 2; + plat->phy_interface = PHY_INTERFACE_MODE_GMII; + + /* GNET 1000M speed need workaround */ + plat->fix_mac_speed = loongson_gnet_fix_speed; + + /* Get netdev pointer address */ + plat->bsp_priv = &pdev->dev.driver_data; + + return 0; +} + +static struct stmmac_pci_info loongson_gnet_pci_info = { + .setup = loongson_gnet_data, +}; + +static int loongson_dwmac_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct plat_stmmacenet_data *plat; + struct stmmac_pci_info *info; + struct stmmac_resources res; + struct device_node *np; + int ret, i, bus_id, phy_mode; + bool mdio = false; + + np = dev_of_node(&pdev->dev); + if (np && !of_device_is_compatible(np, "loongson, pci-gmac")) { + pr_info("dwmac_loongson_pci: Incompatible OF node\n"); + return -ENODEV; + } + + plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL); + if (!plat) + return -ENOMEM; + + if (plat->mdio_node) { + dev_err(&pdev->dev, "Found MDIO subnode\n"); + mdio = true; + } + + plat->mdio_bus_data = devm_kzalloc(&pdev->dev, + sizeof(*plat->mdio_bus_data), + GFP_KERNEL); + if (!plat->mdio_bus_data) + return -ENOMEM; + + if (mdio) + plat->mdio_bus_data->needs_reset = true; + + plat->dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->dma_cfg), GFP_KERNEL); + if (!plat->dma_cfg) + return -ENOMEM; + + /* Enable pci device */ + ret = pci_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", __func__); + return ret; + } + + /* Get the base address of device */ + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + if (pci_resource_len(pdev, i) == 0) + continue; + ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev)); + if (ret) + return ret; + break; + } + + pci_set_master(pdev); + + info = (struct stmmac_pci_info *)id->driver_data; + ret = info->setup(pdev, plat); + if (ret) + return ret; + + if (np) { + bus_id = of_alias_get_id(np, "ethernet"); + if (bus_id >= 0) + plat->bus_id = bus_id; + + phy_mode = device_get_phy_mode(&pdev->dev); + if (phy_mode < 0) { + dev_err(&pdev->dev, "phy_mode not found\n"); + return phy_mode; + } + plat->phy_interface = phy_mode; + } + + pci_enable_msi(pdev); + + memset(&res, 0, sizeof(res)); + res.addr = pcim_iomap_table(pdev)[0]; + if (np) { + res.irq = of_irq_get_byname(np, "macirq"); + if (res.irq < 0) { + dev_err(&pdev->dev, "IRQ macirq not found\n"); + ret = -ENODEV; + } + + res.wol_irq = of_irq_get_byname(np, "eth_wake_irq"); + if (res.wol_irq < 0) { + dev_info(&pdev->dev, + "IRQ eth_wake_irq not found, using macirq\n"); + res.wol_irq = res.irq; + } + + res.lpi_irq = of_irq_get_byname(np, "eth_lpi"); + if (res.lpi_irq < 0) { + dev_err(&pdev->dev, "IRQ eth_lpi not found\n"); + ret = -ENODEV; + } + } else { + res.irq = pdev->irq; + res.wol_irq = pdev->irq; + } + + return stmmac_dvr_probe(&pdev->dev, plat, &res); +} + +static void loongson_dwmac_remove(struct pci_dev *pdev) +{ + int i; + + stmmac_dvr_remove(&pdev->dev); + + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + if (pci_resource_len(pdev, i) == 0) + continue; + pcim_iounmap_regions(pdev, BIT(i)); + break; + } + + pci_disable_device(pdev); +} + +static int __maybe_unused loongson_dwmac_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + ret = stmmac_suspend(dev); + if (ret) + return ret; + + ret = pci_save_state(pdev); + if (ret) + return ret; + + pci_disable_device(pdev); + pci_wake_from_d3(pdev, true); + return 0; +} + +static int __maybe_unused loongson_dwmac_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + pci_restore_state(pdev); + pci_set_power_state(pdev, PCI_D0); + + ret = pci_enable_device(pdev); + if (ret) + return ret; + + pci_set_master(pdev); + + return stmmac_resume(dev); +} + +static SIMPLE_DEV_PM_OPS(loongson_dwmac_pm_ops, loongson_dwmac_suspend, + loongson_dwmac_resume); + +#define PCI_DEVICE_ID_LOONGSON_GMAC 0x7a03 +#define PCI_DEVICE_ID_LOONGSON_GNET 0x7a13 + +static const struct pci_device_id loongson_dwmac_id_table[] = { + { PCI_DEVICE_DATA(LOONGSON, GMAC, &loongson_gmac_pci_info) }, + { PCI_DEVICE_DATA(LOONGSON, GNET, &loongson_gnet_pci_info) }, + {} +}; +MODULE_DEVICE_TABLE(pci, loongson_dwmac_id_table); + +static struct pci_driver loongson_dwmac_driver = { + .name = "dwmac-loongson-pci", + .id_table = loongson_dwmac_id_table, + .probe = loongson_dwmac_probe, + .remove = loongson_dwmac_remove, + .driver = { + .pm = &loongson_dwmac_pm_ops, + }, +}; + +module_pci_driver(loongson_dwmac_driver); + +MODULE_DESCRIPTION("Loongson DWMAC PCI driver"); +MODULE_AUTHOR("Qing Zhang "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 27b7bb64a028172c44f928b5283c4792298d676c..8cbc3774ce43fc8c33d3da05d357645ef6453c32 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -74,7 +74,7 @@ MODULE_PARM_DESC(phyaddr, "Physical device address"); #define STMMAC_TX_THRESH(x) ((x)->dma_tx_size / 4) #define STMMAC_RX_THRESH(x) ((x)->dma_rx_size / 4) -static int flow_ctrl = FLOW_AUTO; +static int flow_ctrl = FLOW_OFF; module_param(flow_ctrl, int, 0644); MODULE_PARM_DESC(flow_ctrl, "Flow control ability [on/off]"); diff --git a/drivers/pci/controller/pci-loongson.c b/drivers/pci/controller/pci-loongson.c index d77b11b38df53fe4a9335bf2b1ab249e695448fe..679ff66f9c29e23f39b128ac781c878d15f72a51 100644 --- a/drivers/pci/controller/pci-loongson.c +++ b/drivers/pci/controller/pci-loongson.c @@ -27,6 +27,7 @@ #define DEV_LS7A_CONF 0x7a10 #define DEV_LS7A_GNET 0x7a13 #define DEV_LS7A_EHCI 0x7a14 +#define DEV_LS7A_OHCI 0x7a24 #define DEV_LS7A_DC2 0x7a36 #define DEV_LS7A_HDMI 0x7a37 @@ -129,6 +130,13 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, DEV_LS7A_HDMI, loongson_pci_pin_quirk); +static void loongson_ohci_quirk(struct pci_dev *dev) +{ + if (dev->revision == 0x2) + dev->resource[0].start += 0x1000; +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, DEV_LS7A_OHCI, loongson_ohci_quirk); + static struct loongson_pci *pci_bus_to_loongson_pci(struct pci_bus *bus) { struct pci_config_window *cfg; diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 310b3a9bdc7785770ca7fc93c9b77473a30c9362..7746d7a749c508aea539be31dc30057ac1251cdf 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1320,6 +1320,17 @@ config RTC_DRV_CROS_EC This driver can also be built as a module. If so, the module will be called rtc-cros-ec. +config RTC_DRV_LS2X + tristate "Loongson LS2X RTC" + depends on (ACPI || OF) && MACH_LOONGSON64 || COMPILE_TEST + select REGMAP_MMIO + help + If you say yes here you get support for the RTC on the Loongson-2K + SoC and LS7A bridge, which first appeared on the Loongson-2H. + + This driver can also be built as a module. If so, the module + will be called rtc-ls2x. + comment "on-CPU RTC drivers" config RTC_DRV_ASM9260 diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 885a0b5c83f3009971e66c90de56dbc33375ee07..6e06e4af25af58eeb514ec8435117626c4a39734 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -88,6 +88,7 @@ obj-$(CONFIG_RTC_DRV_LOONGSON1) += rtc-ls1x.o obj-$(CONFIG_RTC_DRV_LP8788) += rtc-lp8788.o obj-$(CONFIG_RTC_DRV_LPC24XX) += rtc-lpc24xx.o obj-$(CONFIG_RTC_DRV_LPC32XX) += rtc-lpc32xx.o +obj-$(CONFIG_RTC_DRV_LS2X) += rtc-ls2x.o obj-$(CONFIG_RTC_DRV_M41T80) += rtc-m41t80.o obj-$(CONFIG_RTC_DRV_M41T93) += rtc-m41t93.o obj-$(CONFIG_RTC_DRV_M41T94) += rtc-m41t94.o diff --git a/drivers/rtc/rtc-ls2x.c b/drivers/rtc/rtc-ls2x.c new file mode 100644 index 0000000000000000000000000000000000000000..962fec171154a08a92ccfa7d4ca01067183df8ba --- /dev/null +++ b/drivers/rtc/rtc-ls2x.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Loongson-2K/7A RTC driver + * + * Based on the original out-of-tree Loongson-2H RTC driver for Linux 2.6.32, + * by Shaozong Liu . + * + * Maintained out-of-tree by Huacai Chen . + * + * Rewritten for mainline by WANG Xuerui . + */ + +#include +#include +#include +#include +#include +#include +#include + +#define TOY_TRIM_REG 0x20 +#define TOY_WRITE0_REG 0x24 +#define TOY_WRITE1_REG 0x28 +#define TOY_READ0_REG 0x2c +#define TOY_READ1_REG 0x30 +#define TOY_MATCH0_REG 0x34 +#define TOY_MATCH1_REG 0x38 +#define TOY_MATCH2_REG 0x3c +#define RTC_CTRL_REG 0x40 +#define RTC_TRIM_REG 0x60 +#define RTC_WRITE0_REG 0x64 +#define RTC_READ0_REG 0x68 +#define RTC_MATCH0_REG 0x6c +#define RTC_MATCH1_REG 0x70 +#define RTC_MATCH2_REG 0x74 + +#define TOY_MON GENMASK(31, 26) +#define TOY_DAY GENMASK(25, 21) +#define TOY_HOUR GENMASK(20, 16) +#define TOY_MIN GENMASK(15, 10) +#define TOY_SEC GENMASK(9, 4) +#define TOY_MSEC GENMASK(3, 0) + +#define TOY_MATCH_YEAR GENMASK(31, 26) +#define TOY_MATCH_MON GENMASK(25, 22) +#define TOY_MATCH_DAY GENMASK(21, 17) +#define TOY_MATCH_HOUR GENMASK(16, 12) +#define TOY_MATCH_MIN GENMASK(11, 6) +#define TOY_MATCH_SEC GENMASK(5, 0) + +/* ACPI and RTC offset */ +#define ACPI_RTC_OFFSET 0x100 + +/* support rtc wakeup */ +#define ACPI_PM1_STS_REG 0x0c +#define ACPI_PM1_EN_REG 0x10 +#define RTC_EN BIT(10) +#define RTC_STS BIT(10) + +struct ls2x_rtc_priv { + struct regmap *regmap; + spinlock_t rtc_reglock; + void __iomem *acpi_base; + struct rtc_device *rtcdev; +}; + +static const struct regmap_config ls2x_rtc_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, +}; + +struct ls2x_rtc_regs { + u32 reg0; + u32 reg1; +}; + +#if defined(CONFIG_ACPI) +static u32 ls2x_acpi_fix_handler(void *id) +{ + int ret; + struct ls2x_rtc_priv *priv = (struct ls2x_rtc_priv *)id; + + spin_lock(&priv->rtc_reglock); + + /* Disable acpi rtc enabled */ + ret = readl(priv->acpi_base + ACPI_PM1_EN_REG) & ~RTC_EN; + writel(ret, priv->acpi_base + ACPI_PM1_EN_REG); + + /* Clear acpi rtc interrupt Status */ + writel(RTC_STS, priv->acpi_base + ACPI_PM1_STS_REG); + + spin_unlock(&priv->rtc_reglock); + + /* + * The TOY_MATCH0_REG should be cleared 0 here, + * otherwise the interrupt cannot be cleared. + * Because the match condition is still satisfied + */ + ret = regmap_write(priv->regmap, TOY_MATCH0_REG, 0); + if (unlikely(ret)) + return ret; + + return 0; +} +#endif + +static inline void ls2x_rtc_regs_to_time(struct ls2x_rtc_regs *regs, + struct rtc_time *tm) +{ + tm->tm_year = regs->reg1; + tm->tm_sec = FIELD_GET(TOY_SEC, regs->reg0); + tm->tm_min = FIELD_GET(TOY_MIN, regs->reg0); + tm->tm_hour = FIELD_GET(TOY_HOUR, regs->reg0); + tm->tm_mday = FIELD_GET(TOY_DAY, regs->reg0); + tm->tm_mon = FIELD_GET(TOY_MON, regs->reg0) - 1; +} + +static inline void ls2x_rtc_time_to_regs(struct rtc_time *tm, + struct ls2x_rtc_regs *regs) +{ + regs->reg0 = FIELD_PREP(TOY_SEC, tm->tm_sec); + regs->reg0 |= FIELD_PREP(TOY_MIN, tm->tm_min); + regs->reg0 |= FIELD_PREP(TOY_HOUR, tm->tm_hour); + regs->reg0 |= FIELD_PREP(TOY_DAY, tm->tm_mday); + regs->reg0 |= FIELD_PREP(TOY_MON, tm->tm_mon + 1); + regs->reg1 = tm->tm_year; +} + +static inline void ls2x_rtc_alarm_regs_to_time(struct ls2x_rtc_regs *regs, + struct rtc_time *tm) +{ + tm->tm_sec = FIELD_GET(TOY_MATCH_SEC, regs->reg0); + tm->tm_min = FIELD_GET(TOY_MATCH_MIN, regs->reg0); + tm->tm_hour = FIELD_GET(TOY_MATCH_HOUR, regs->reg0); + tm->tm_mday = FIELD_GET(TOY_MATCH_DAY, regs->reg0); + tm->tm_mon = FIELD_GET(TOY_MATCH_MON, regs->reg0) - 1; + /* + * The rtc SYS_TOYMATCH0/YEAR bit field is only 6 bits, + * so it means 63 years at most. Therefore, The RTC alarm + * years can be set from 1900 to 1963. + * This causes the initialization of alarm fail during + * call __rtc_read_alarm. We add 64 years offset to + * ls2x_rtc_read_alarm. After adding the offset, + * the RTC alarm clock can be set from 1964 to 2027. + */ + tm->tm_year = FIELD_GET(TOY_MATCH_YEAR, regs->reg0) + 64; +} + +static inline void ls2x_rtc_time_to_alarm_regs(struct rtc_time *tm, + struct ls2x_rtc_regs *regs) +{ + regs->reg0 = FIELD_PREP(TOY_MATCH_SEC, tm->tm_sec); + regs->reg0 |= FIELD_PREP(TOY_MATCH_MIN, tm->tm_min); + regs->reg0 |= FIELD_PREP(TOY_MATCH_HOUR, tm->tm_hour); + regs->reg0 |= FIELD_PREP(TOY_MATCH_DAY, tm->tm_mday); + regs->reg0 |= FIELD_PREP(TOY_MATCH_MON, tm->tm_mon + 1); + regs->reg0 |= FIELD_PREP(TOY_MATCH_YEAR, tm->tm_year); +} + +static int ls2x_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + int ret; + struct ls2x_rtc_regs regs; + struct ls2x_rtc_priv *priv = dev_get_drvdata(dev); + + ret = regmap_read(priv->regmap, TOY_READ1_REG, ®s.reg1); + if (unlikely(ret)) + return ret; + + ret = regmap_read(priv->regmap, TOY_READ0_REG, ®s.reg0); + if (unlikely(ret)) + return ret; + + ls2x_rtc_regs_to_time(®s, tm); + + return 0; +} + +static int ls2x_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + int ret; + struct ls2x_rtc_regs regs; + struct ls2x_rtc_priv *priv = dev_get_drvdata(dev); + + ls2x_rtc_time_to_regs(tm, ®s); + + ret = regmap_write(priv->regmap, TOY_WRITE0_REG, regs.reg0); + if (unlikely(ret)) + return ret; + + return regmap_write(priv->regmap, TOY_WRITE1_REG, regs.reg1); +} + +static int ls2x_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + int ret; + struct ls2x_rtc_regs regs; + struct ls2x_rtc_priv *priv = dev_get_drvdata(dev); + + ret = regmap_read(priv->regmap, TOY_MATCH0_REG, ®s.reg0); + if (unlikely(ret)) + return ret; + + ls2x_rtc_alarm_regs_to_time(®s, &alrm->time); + +#if defined(CONFIG_ACPI) + ret = readl(priv->acpi_base + ACPI_PM1_EN_REG); + alrm->enabled = !!(ret & RTC_EN); +#endif + + return 0; +} + +static int ls2x_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct ls2x_rtc_regs regs; + struct ls2x_rtc_priv *priv = dev_get_drvdata(dev); + + ls2x_rtc_time_to_alarm_regs(&alrm->time, ®s); + + return regmap_write(priv->regmap, TOY_MATCH0_REG, regs.reg0); +} + +static struct rtc_class_ops ls2x_rtc_ops = { + .read_time = ls2x_rtc_read_time, + .set_time = ls2x_rtc_set_time, + .read_alarm = ls2x_rtc_read_alarm, + .set_alarm = ls2x_rtc_set_alarm, +}; + +static int ls2x_rtc_probe(struct platform_device *pdev) +{ + int ret; + void __iomem *regs; + struct ls2x_rtc_priv *priv; + struct device *dev = &pdev->dev; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (unlikely(!priv)) + return -ENOMEM; + + spin_lock_init(&priv->rtc_reglock); + + platform_set_drvdata(pdev, priv); + + regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(regs)) + return PTR_ERR(regs); + + priv->regmap = devm_regmap_init_mmio(dev, regs, + &ls2x_rtc_regmap_config); + if (IS_ERR(priv->regmap)) + return PTR_ERR(priv->regmap); + + priv->rtcdev = devm_rtc_allocate_device(dev); + if (IS_ERR(priv->rtcdev)) + return PTR_ERR(priv->rtcdev); + + /* Due to hardware erratum, all years multiple of 4 are considered + * leap year, so only years 2000 through 2099 are usable. + * + * Previous out-of-tree versions of this driver wrote tm_year directly + * into the year register, so epoch 2000 must be used to preserve + * semantics on shipped systems. + */ + priv->rtcdev->range_min = RTC_TIMESTAMP_BEGIN_2000; + priv->rtcdev->range_max = RTC_TIMESTAMP_END_2099; + priv->rtcdev->ops = &ls2x_rtc_ops; + +#ifdef CONFIG_ACPI + priv->acpi_base = regs - ACPI_RTC_OFFSET; + acpi_install_fixed_event_handler(ACPI_EVENT_RTC, + ls2x_acpi_fix_handler, priv); +#endif + + if (!device_can_wakeup(&pdev->dev)) + device_init_wakeup(dev, 1); + + ret = rtc_register_device(priv->rtcdev); + if (unlikely(ret)) + return ret; + + /* An offset of -0.9s will call RTC set for wall clock time 10.0 s at 10.9 s */ + priv->rtcdev->set_offset_nsec = -900000000; + + /* If not cause hwclock huang */ + priv->rtcdev->uie_unsupported = 1; + + return ret; +} + +#ifdef CONFIG_OF +static const struct of_device_id ls2x_rtc_of_match[] = { + { .compatible = "loongson,ls2x-rtc" }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, ls2x_rtc_of_match); +#endif + +#ifdef CONFIG_ACPI +static const struct acpi_device_id ls2x_rtc_acpi_match[] = { + {"LOON0001"}, + {} +}; +MODULE_DEVICE_TABLE(acpi, ls2x_rtc_acpi_match); +#endif + +static struct platform_driver ls2x_rtc_driver = { + .probe = ls2x_rtc_probe, + .driver = { + .name = "ls2x-rtc", + .of_match_table = of_match_ptr(ls2x_rtc_of_match), + .acpi_match_table = ACPI_PTR(ls2x_rtc_acpi_match), + }, +}; + +module_platform_driver(ls2x_rtc_driver); + +MODULE_DESCRIPTION("LS2X RTC driver"); +MODULE_AUTHOR("WANG Xuerui"); +MODULE_AUTHOR("Huacai Chen"); +MODULE_AUTHOR("Binbin Zhou"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:ls2x-rtc"); diff --git a/scripts/Makefile b/scripts/Makefile index cedc1f0e21d875751f03d0d510dab765dbd4dbd3..9fa609edc38d9d896fb11cffabe5a2220b6ebeb4 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -25,10 +25,17 @@ HOSTCFLAGS_extract-cert.o = $(CRYPTO_CFLAGS) HOSTLDLIBS_extract-cert = $(CRYPTO_LIBS) ifdef CONFIG_UNWINDER_ORC +# Additional ARCH settings for x86 ifeq ($(ARCH),x86_64) ARCH := x86 endif -HOSTCFLAGS_sorttable.o += -I$(srctree)/tools/arch/x86/include + +# Additional ARCH settings for loongarch +ifeq ($(ARCH),loongarch64) +ARCH := loongarch +endif + +HOSTCFLAGS_sorttable.o += -I$(srctree)/tools/arch/$(ARCH)/include HOSTCFLAGS_sorttable.o += -DUNWINDER_ORC_ENABLED endif diff --git a/tools/arch/loongarch/include/uapi/asm/perf_regs.h b/tools/arch/loongarch/include/uapi/asm/perf_regs.h new file mode 100644 index 0000000000000000000000000000000000000000..9943d418e01d3c47f1a205b4d0c18d3d8a902204 --- /dev/null +++ b/tools/arch/loongarch/include/uapi/asm/perf_regs.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_LOONGARCH_PERF_REGS_H +#define _ASM_LOONGARCH_PERF_REGS_H + +enum perf_event_loongarch_regs { + PERF_REG_LOONGARCH_PC, + PERF_REG_LOONGARCH_R1, + PERF_REG_LOONGARCH_R2, + PERF_REG_LOONGARCH_R3, + PERF_REG_LOONGARCH_R4, + PERF_REG_LOONGARCH_R5, + PERF_REG_LOONGARCH_R6, + PERF_REG_LOONGARCH_R7, + PERF_REG_LOONGARCH_R8, + PERF_REG_LOONGARCH_R9, + PERF_REG_LOONGARCH_R10, + PERF_REG_LOONGARCH_R11, + PERF_REG_LOONGARCH_R12, + PERF_REG_LOONGARCH_R13, + PERF_REG_LOONGARCH_R14, + PERF_REG_LOONGARCH_R15, + PERF_REG_LOONGARCH_R16, + PERF_REG_LOONGARCH_R17, + PERF_REG_LOONGARCH_R18, + PERF_REG_LOONGARCH_R19, + PERF_REG_LOONGARCH_R20, + PERF_REG_LOONGARCH_R21, + PERF_REG_LOONGARCH_R22, + PERF_REG_LOONGARCH_R23, + PERF_REG_LOONGARCH_R24, + PERF_REG_LOONGARCH_R25, + PERF_REG_LOONGARCH_R26, + PERF_REG_LOONGARCH_R27, + PERF_REG_LOONGARCH_R28, + PERF_REG_LOONGARCH_R29, + PERF_REG_LOONGARCH_R30, + PERF_REG_LOONGARCH_R31, + PERF_REG_LOONGARCH_MAX = PERF_REG_LOONGARCH_R31 + 1, +}; +#endif /* _ASM_LOONGARCH_PERF_REGS_H */ diff --git a/tools/arch/loongarch/include/uapi/asm/unistd.h b/tools/arch/loongarch/include/uapi/asm/unistd.h new file mode 100644 index 0000000000000000000000000000000000000000..d3666a55f7a66df540671e42f272c261d353b677 --- /dev/null +++ b/tools/arch/loongarch/include/uapi/asm/unistd.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#define __ARCH_WANT_NEW_STAT +#define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 + +#include diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 6baa16fbee14e2938751bfb7f7f00003fec93bcf..4e290fe5a526826c9d904cf662bcaa9b3b4a41dc 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -74,6 +74,13 @@ ifeq ($(SRCARCH),arm64) LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 endif +ifeq ($(SRCARCH),loongarch) + NO_PERF_REGS := 0 + CFLAGS += -I$(OUTPUT)arch/loongarch/include/generated + CFLAGS += -I$(OUTPUT)../arch/loongarch/include/uapi + LIBUNWIND_LIBS = -lunwind -lunwind-loongarch +endif + ifeq ($(SRCARCH),riscv) NO_PERF_REGS := 0 endif diff --git a/tools/perf/arch/loongarch/Build b/tools/perf/arch/loongarch/Build new file mode 100644 index 0000000000000000000000000000000000000000..e4e5f33c84d862aaba8e05ded73e0c336d6e90bd --- /dev/null +++ b/tools/perf/arch/loongarch/Build @@ -0,0 +1 @@ +perf-y += util/ diff --git a/tools/perf/arch/loongarch/Makefile b/tools/perf/arch/loongarch/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1229157b09e1e6db3be6986d32ed700ca459ec85 --- /dev/null +++ b/tools/perf/arch/loongarch/Makefile @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: GPL-2.0 +ifndef NO_DWARF +PERF_HAVE_DWARF_REGS := 1 +endif +PERF_HAVE_JITDUMP := 1 + +# +# Syscall table generation for perf +# + +out := $(OUTPUT)arch/loongarch/include/generated/asm +header := $(out)/syscalls_64.c +incpath := $(srctree)/tools +sysdef := $(srctree)/tools/arch/loongarch/include/uapi/asm/unistd.h +sysprf := $(srctree)/tools/perf/arch/loongarch/entry/syscalls/ +systbl := $(sysprf)/mksyscalltbl + +# Create output directory if not already present +_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') + +$(header): $(sysdef) $(systbl) + $(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@ + +clean:: + $(call QUIET_CLEAN, loongarch) $(RM) $(header) + +archheaders: $(header) diff --git a/tools/perf/arch/loongarch/annotate/instructions.c b/tools/perf/arch/loongarch/annotate/instructions.c new file mode 100644 index 0000000000000000000000000000000000000000..206e1fca38a47350cde878f8f40f50039d50e0b6 --- /dev/null +++ b/tools/perf/arch/loongarch/annotate/instructions.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Perf annotate functions. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +static +struct ins_ops *loongarch__associate_ins_ops(struct arch *arch, const char *name) +{ + struct ins_ops *ops = NULL; + + if (!strncmp(name, "beqz", 4) || + !strncmp(name, "bnez", 4) || + !strncmp(name, "beq", 3) || + !strncmp(name, "bne", 3) || + !strncmp(name, "blt", 3) || + !strncmp(name, "bge", 3) || + !strncmp(name, "bltu", 4) || + !strncmp(name, "bgeu", 4) || + !strncmp(name, "bl", 2)) + ops = &call_ops; + else if (!strncmp(name, "jirl", 4)) + ops = &ret_ops; + else if (name[0] == 'b') + ops = &jump_ops; + else + return NULL; + + arch__associate_ins_ops(arch, name, ops); + + return ops; +} + +static +int loongarch__annotate_init(struct arch *arch, char *cpuid __maybe_unused) +{ + if (!arch->initialized) { + arch->associate_instruction_ops = loongarch__associate_ins_ops; + arch->initialized = true; + arch->objdump.comment_char = '#'; + } + + return 0; +} diff --git a/tools/perf/arch/loongarch/entry/syscalls/mksyscalltbl b/tools/perf/arch/loongarch/entry/syscalls/mksyscalltbl new file mode 100755 index 0000000000000000000000000000000000000000..86dad5a018b73335855428b400656e73cbbc55bf --- /dev/null +++ b/tools/perf/arch/loongarch/entry/syscalls/mksyscalltbl @@ -0,0 +1,60 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Generate system call table for perf. Derived from +# powerpc script. +# +# Copyright (C) 2020 Loongson Technology Co., Ltd. +# Author(s): Ming Wang + +gcc=$1 +hostcc=$2 +incpath=$3 +input=$4 + +if ! test -r $input; then + echo "Could not read input file" >&2 + exit 1 +fi + +create_table_from_c() +{ + local sc nr last_sc + + create_table_exe=`mktemp ${TMPDIR:-/tmp}/create-table-XXXXXX` + + { + + cat <<-_EoHEADER + #include + #include "$input" + int main(int argc, char *argv[]) + { + _EoHEADER + + while read sc nr; do + printf "%s\n" " printf(\"\\t[%d] = \\\"$sc\\\",\\n\", $nr);" + last_sc=$nr + done + + printf "%s\n" " printf(\"#define SYSCALLTBL_LOONGARCH_MAX_ID %d\\n\", $last_sc);" + printf "}\n" + + } | $hostcc -I $incpath/include/uapi -o $create_table_exe -x c - + + $create_table_exe + + rm -f $create_table_exe +} + +create_table() +{ + echo "static const char *syscalltbl_loongarch[] = {" + create_table_from_c + echo "};" +} + +$gcc -E -dM -x c -I $incpath/include/uapi $input \ + |sed -ne 's/^#define __NR_//p' \ + |sort -t' ' -k2 -n \ + |create_table diff --git a/tools/perf/arch/loongarch/include/dwarf-regs-table.h b/tools/perf/arch/loongarch/include/dwarf-regs-table.h new file mode 100644 index 0000000000000000000000000000000000000000..676c54a226a5d56854ea8ed029e87a5a02315d55 --- /dev/null +++ b/tools/perf/arch/loongarch/include/dwarf-regs-table.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * dwarf-regs-table.h : Mapping of DWARF debug register numbers into + * register names. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifdef DEFINE_DWARF_REGSTR_TABLE +static const char * const loongarch_regstr_tbl[] = { + "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", + "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19", + "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "%29", + "$30", "$31", +}; +#endif diff --git a/tools/perf/arch/loongarch/include/perf_regs.h b/tools/perf/arch/loongarch/include/perf_regs.h new file mode 100644 index 0000000000000000000000000000000000000000..82d531dcd90fb82b5d63d468f0d36eda88afa6c3 --- /dev/null +++ b/tools/perf/arch/loongarch/include/perf_regs.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_PERF_REGS_H +#define ARCH_PERF_REGS_H + +#include +#include +#include + +#define PERF_REGS_MAX PERF_REG_LOONGARCH_MAX +#define PERF_REG_IP PERF_REG_LOONGARCH_PC +#define PERF_REG_SP PERF_REG_LOONGARCH_R3 + +#define PERF_REGS_MASK ((1ULL << PERF_REG_LOONGARCH_MAX) - 1) + +static inline const char *__perf_reg_name(int id) +{ + switch (id) { + case PERF_REG_LOONGARCH_PC: + return "PC"; + case PERF_REG_LOONGARCH_R1: + return "$1"; + case PERF_REG_LOONGARCH_R2: + return "$2"; + case PERF_REG_LOONGARCH_R3: + return "$3"; + case PERF_REG_LOONGARCH_R4: + return "$4"; + case PERF_REG_LOONGARCH_R5: + return "$5"; + case PERF_REG_LOONGARCH_R6: + return "$6"; + case PERF_REG_LOONGARCH_R7: + return "$7"; + case PERF_REG_LOONGARCH_R8: + return "$8"; + case PERF_REG_LOONGARCH_R9: + return "$9"; + case PERF_REG_LOONGARCH_R10: + return "$10"; + case PERF_REG_LOONGARCH_R11: + return "$11"; + case PERF_REG_LOONGARCH_R12: + return "$12"; + case PERF_REG_LOONGARCH_R13: + return "$13"; + case PERF_REG_LOONGARCH_R14: + return "$14"; + case PERF_REG_LOONGARCH_R15: + return "$15"; + case PERF_REG_LOONGARCH_R16: + return "$16"; + case PERF_REG_LOONGARCH_R17: + return "$17"; + case PERF_REG_LOONGARCH_R18: + return "$18"; + case PERF_REG_LOONGARCH_R19: + return "$19"; + case PERF_REG_LOONGARCH_R20: + return "$20"; + case PERF_REG_LOONGARCH_R21: + return "$21"; + case PERF_REG_LOONGARCH_R22: + return "$22"; + case PERF_REG_LOONGARCH_R23: + return "$23"; + case PERF_REG_LOONGARCH_R24: + return "$24"; + case PERF_REG_LOONGARCH_R25: + return "$25"; + case PERF_REG_LOONGARCH_R26: + return "$26"; + case PERF_REG_LOONGARCH_R27: + return "$27"; + case PERF_REG_LOONGARCH_R28: + return "$28"; + case PERF_REG_LOONGARCH_R29: + return "$29"; + case PERF_REG_LOONGARCH_R30: + return "$30"; + case PERF_REG_LOONGARCH_R31: + return "$31"; + default: + break; + } + return NULL; +} + +#endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/loongarch/util/Build b/tools/perf/arch/loongarch/util/Build new file mode 100644 index 0000000000000000000000000000000000000000..fab48acf21c59ea02d08d2ceea46ce8ad91e0310 --- /dev/null +++ b/tools/perf/arch/loongarch/util/Build @@ -0,0 +1,4 @@ +perf-y += perf_regs.o + +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/loongarch/util/dwarf-regs.c b/tools/perf/arch/loongarch/util/dwarf-regs.c new file mode 100644 index 0000000000000000000000000000000000000000..bbd343e3191f8102fca3dcef3fefe21f0229f8d3 --- /dev/null +++ b/tools/perf/arch/loongarch/util/dwarf-regs.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. + * + * Copyright (C) 2013 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include + +static const char *loongarch_gpr_names[32] = { + "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", + "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19", + "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "$29", + "$30", "$31" +}; + +const char *get_arch_regstr(unsigned int n) +{ + n %= 32; + return loongarch_gpr_names[n]; +} diff --git a/tools/perf/arch/loongarch/util/perf_regs.c b/tools/perf/arch/loongarch/util/perf_regs.c new file mode 100644 index 0000000000000000000000000000000000000000..2833e101a7c6407263130e9948a06a2caa32bc4b --- /dev/null +++ b/tools/perf/arch/loongarch/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/arch/loongarch/util/unwind-libunwind.c b/tools/perf/arch/loongarch/util/unwind-libunwind.c new file mode 100644 index 0000000000000000000000000000000000000000..abcd9e2c6624b7ec376190b205ca4a94a1393acf --- /dev/null +++ b/tools/perf/arch/loongarch/util/unwind-libunwind.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "perf_regs.h" +#include "../../util/unwind.h" +#include "util/debug.h" + +int libunwind__arch_reg_id(int regnum) +{ + switch (regnum) { + case UNW_LOONGARCH_R1: + return PERF_REG_LOONGARCH_R1; + case UNW_LOONGARCH_R2: + return PERF_REG_LOONGARCH_R2; + case UNW_LOONGARCH_R3: + return PERF_REG_LOONGARCH_R3; + case UNW_LOONGARCH_R4: + return PERF_REG_LOONGARCH_R4; + case UNW_LOONGARCH_R5: + return PERF_REG_LOONGARCH_R5; + case UNW_LOONGARCH_R6: + return PERF_REG_LOONGARCH_R6; + case UNW_LOONGARCH_R7: + return PERF_REG_LOONGARCH_R7; + case UNW_LOONGARCH_R8: + return PERF_REG_LOONGARCH_R8; + case UNW_LOONGARCH_R9: + return PERF_REG_LOONGARCH_R9; + case UNW_LOONGARCH_R10: + return PERF_REG_LOONGARCH_R10; + case UNW_LOONGARCH_R11: + return PERF_REG_LOONGARCH_R11; + case UNW_LOONGARCH_R12: + return PERF_REG_LOONGARCH_R12; + case UNW_LOONGARCH_R13: + return PERF_REG_LOONGARCH_R13; + case UNW_LOONGARCH_R14: + return PERF_REG_LOONGARCH_R14; + case UNW_LOONGARCH_R15: + return PERF_REG_LOONGARCH_R15; + case UNW_LOONGARCH_R16: + return PERF_REG_LOONGARCH_R16; + case UNW_LOONGARCH_R17: + return PERF_REG_LOONGARCH_R17; + case UNW_LOONGARCH_R18: + return PERF_REG_LOONGARCH_R18; + case UNW_LOONGARCH_R19: + return PERF_REG_LOONGARCH_R19; + case UNW_LOONGARCH_R20: + return PERF_REG_LOONGARCH_R20; + case UNW_LOONGARCH_R21: + return PERF_REG_LOONGARCH_R21; + case UNW_LOONGARCH_R22: + return PERF_REG_LOONGARCH_R22; + case UNW_LOONGARCH_R23: + return PERF_REG_LOONGARCH_R23; + case UNW_LOONGARCH_R24: + return PERF_REG_LOONGARCH_R24; + case UNW_LOONGARCH_R25: + return PERF_REG_LOONGARCH_R25; + case UNW_LOONGARCH_R26: + return PERF_REG_LOONGARCH_R26; + case UNW_LOONGARCH_R27: + return PERF_REG_LOONGARCH_R27; + case UNW_LOONGARCH_R28: + return PERF_REG_LOONGARCH_R28; + case UNW_LOONGARCH_R29: + return PERF_REG_LOONGARCH_R29; + case UNW_LOONGARCH_R30: + return PERF_REG_LOONGARCH_R30; + case UNW_LOONGARCH_R31: + return PERF_REG_LOONGARCH_R31; + case UNW_LOONGARCH_PC: + return PERF_REG_LOONGARCH_PC; + default: + pr_err("unwind: invalid reg id %d\n", regnum); + return -EINVAL; + } + + return -EINVAL; +} diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 9e0f3f912c276ce1b7e5c05db288f8b98401c65b..b9edf33e4c6992db516c6076d03c9b133bf86bf5 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -156,6 +156,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i #include "arch/powerpc/annotate/instructions.c" #include "arch/s390/annotate/instructions.c" #include "arch/sparc/annotate/instructions.c" +#include "arch/loongarch/annotate/instructions.c" static struct arch architectures[] = { { @@ -205,6 +206,13 @@ static struct arch architectures[] = { .comment_char = '#', }, }, + { + .name = "loongarch", + .init = loongarch__annotate_init, + .objdump = { + .comment_char = '#', + }, + }, }; static void ins__delete(struct ins_operands *ops) diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c index 1b49ecee5affd2b19d10ce6790eaa7190e107bc1..43a77aeeb3100730d03504cb8afbe865c33ec751 100644 --- a/tools/perf/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs.c @@ -14,6 +14,10 @@ #define EM_AARCH64 183 /* ARM 64 bit */ #endif +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 /* LoongArch */ +#endif + /* Define const char * {arch}_register_tbl[] */ #define DEFINE_DWARF_REGSTR_TABLE #include "../arch/x86/include/dwarf-regs-table.h" @@ -24,6 +28,7 @@ #include "../arch/s390/include/dwarf-regs-table.h" #include "../arch/sparc/include/dwarf-regs-table.h" #include "../arch/xtensa/include/dwarf-regs-table.h" +#include "../arch/loongarch/include/dwarf-regs-table.h" #define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL) @@ -53,6 +58,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) return __get_dwarf_regstr(sparc_regstr_tbl, n); case EM_XTENSA: return __get_dwarf_regstr(xtensa_regstr_tbl, n); + case EM_LOONGARCH: + return __get_dwarf_regstr(loongarch_regstr_tbl, n); default: pr_err("ELF MACHINE %x is not supported.\n", machine); } diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index a059d927b475ad048915d0827a6841d99275b743..b66e24fff0e4e19b5993abcf3998ec8721d0c30d 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -434,6 +434,8 @@ static const char *normalize_arch(char *arch) return "mips"; if (!strncmp(arch, "sh", 2) && isdigit(arch[2])) return "sh"; + if (!strncmp(arch, "loongarch", 9)) + return "loongarch"; return arch; } diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index d4137559be0537fe49e6ef923cdce447619016f0..fdeef73cf85f4458cce1d8e040ce6d6fa2291c43 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -38,6 +38,9 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent #elif defined(__s390x__) #define GEN_ELF_ARCH EM_S390 #define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__loongarch__) +#define GEN_ELF_ARCH EM_LOONGARCH +#define GEN_ELF_CLASS ELFCLASS64 #else #error "unsupported architecture" #endif diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index ab21c9e250a7b8b0da1f8f6c64c8352c8480be6a..61b27ee56a9f85503e031fb23703a547daf8d81f 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -16,6 +16,7 @@ #include "map_symbol.h" #include "branch.h" #include "mem-events.h" +#include "path.h" #include "srcline.h" #include "symbol.h" #include "sort.h" @@ -1387,7 +1388,7 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i struct stat st; /*sshfs might return bad dent->d_type, so we have to stat*/ - snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name); + path__join(path, sizeof(path), dir_name, dent->d_name); if (stat(path, &st)) continue; diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 03bd99d3be16f352cc7ae09d304340d9a8b2cb87..e441f12c7272274abea56c926274e22e50eada9b 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -34,6 +34,10 @@ static const char **syscalltbl_native = syscalltbl_powerpc_32; #include const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID; static const char **syscalltbl_native = syscalltbl_arm64; +#elif defined(__loongarch__) +#include +const int syscalltbl_native_max_id = SYSCALLTBL_LOONGARCH_MAX_ID; +static const char **syscalltbl_native = syscalltbl_loongarch; #endif struct syscall { diff --git a/tools/scripts/Makefile.arch b/tools/scripts/Makefile.arch index b10b7a27c33fd16b10fa3d7fcfd7a5fa372f2d59..0b2af44c167fcc6a2b1faeecb935f32f9dc1db57 100644 --- a/tools/scripts/Makefile.arch +++ b/tools/scripts/Makefile.arch @@ -4,7 +4,8 @@ HOSTARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ -e /arm64/!s/arm.*/arm/ -e s/sa110/arm/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ - -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ ) + -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \ + -e s/loongarch.*/loongarch/) ifndef ARCH ARCH := $(HOSTARCH) @@ -33,6 +34,15 @@ ifeq ($(ARCH),sh64) SRCARCH := sh endif +# Additional ARCH settings for loongarch +ifeq ($(ARCH),loongarch32) + SRCARCH := loongarch +endif + +ifeq ($(ARCH),loongarch64) + SRCARCH := loongarch +endif + LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) ifeq ($(LP64), 1) IS_64_BIT := 1