From 85740d3cc56fda699beae689b5d73233d16097af Mon Sep 17 00:00:00 2001 From: bule Date: Thu, 8 Jul 2021 11:52:47 +0800 Subject: [PATCH 01/13] [libquadmath] Enable libquadmath on kunpeng This enable libquadmath on kunpeng platform to convenient users that migrating from x86 platform. libquadmath uses "__float128" as quad precision floating point type and with math functions with "q" suffix like "cosq". For those who do not need to adapt to x86 platform, you can use "long double" as quad precision floating point type and math functions with "l" suffix like "cosl" in libm for quad precision math. --- libquadmath/Makefile.in | 353 ++++++++++++++++++++-------------------- libquadmath/quadmath.h | 6 +- 2 files changed, 181 insertions(+), 178 deletions(-) diff --git a/libquadmath/Makefile.in b/libquadmath/Makefile.in index 8c011212258..66df9c922f8 100644 --- a/libquadmath/Makefile.in +++ b/libquadmath/Makefile.in @@ -90,7 +90,7 @@ POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ -@BUILD_LIBQUADMATH_FALSE@libquadmath_la_DEPENDENCIES = +#libquadmath_la_DEPENDENCIES = subdir = . ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \ @@ -147,68 +147,68 @@ am__installdirs = "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(infodir)" \ "$(DESTDIR)$(libsubincludedir)" LTLIBRARIES = $(toolexeclib_LTLIBRARIES) am__dirstamp = $(am__leading_dot)dirstamp -@BUILD_LIBQUADMATH_TRUE@am_libquadmath_la_OBJECTS = math/x2y2m1q.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/acoshq.lo math/fmodq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/acosq.lo math/frexpq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/rem_pio2q.lo math/asinhq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/hypotq.lo math/remainderq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/asinq.lo math/rintq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/atan2q.lo math/isinfq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/roundq.lo math/atanhq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/isnanq.lo math/scalblnq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/atanq.lo math/j0q.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/scalbnq.lo math/cbrtq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/j1q.lo math/signbitq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/ceilq.lo math/jnq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/sincos_table.lo math/complex.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/ldexpq.lo math/sincosq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/copysignq.lo math/lgammaq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/sincosq_kernel.lo math/coshq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/llroundq.lo math/sinhq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/cosq.lo math/log10q.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/sinq.lo math/cosq_kernel.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/log1pq.lo math/sinq_kernel.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/erfq.lo math/logq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/sqrtq.lo math/expm1q.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/lroundq.lo math/tanhq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/expq.lo math/modfq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/tanq.lo math/fabsq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/nanq.lo math/tgammaq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/finiteq.lo math/nextafterq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/truncq.lo math/floorq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/powq.lo math/fmaq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/logbq.lo math/exp2q.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/issignalingq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/lgammaq_neg.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/lgammaq_product.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/tanq_kernel.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/tgammaq_product.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/casinhq_kernel.lo math/cacoshq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/cacosq.lo math/casinhq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/casinq.lo math/catanhq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/catanq.lo math/cimagq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/conjq.lo math/cprojq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/crealq.lo math/fdimq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/fmaxq.lo math/fminq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/ilogbq.lo math/llrintq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/log2q.lo math/lrintq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/nearbyintq.lo math/remquoq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/ccoshq.lo math/cexpq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/clog10q.lo math/clogq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/csinq.lo math/csinhq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/csqrtq.lo math/ctanq.lo \ -@BUILD_LIBQUADMATH_TRUE@ math/ctanhq.lo printf/addmul_1.lo \ -@BUILD_LIBQUADMATH_TRUE@ printf/add_n.lo printf/cmp.lo \ -@BUILD_LIBQUADMATH_TRUE@ printf/divrem.lo printf/flt1282mpn.lo \ -@BUILD_LIBQUADMATH_TRUE@ printf/fpioconst.lo printf/lshift.lo \ -@BUILD_LIBQUADMATH_TRUE@ printf/mul_1.lo printf/mul_n.lo \ -@BUILD_LIBQUADMATH_TRUE@ printf/mul.lo printf/printf_fphex.lo \ -@BUILD_LIBQUADMATH_TRUE@ printf/printf_fp.lo \ -@BUILD_LIBQUADMATH_TRUE@ printf/quadmath-printf.lo \ -@BUILD_LIBQUADMATH_TRUE@ printf/rshift.lo printf/submul_1.lo \ -@BUILD_LIBQUADMATH_TRUE@ printf/sub_n.lo strtod/strtoflt128.lo \ -@BUILD_LIBQUADMATH_TRUE@ strtod/mpn2flt128.lo \ -@BUILD_LIBQUADMATH_TRUE@ strtod/tens_in_limb.lo +am_libquadmath_la_OBJECTS = math/x2y2m1q.lo \ + math/acoshq.lo math/fmodq.lo \ + math/acosq.lo math/frexpq.lo \ + math/rem_pio2q.lo math/asinhq.lo \ + math/hypotq.lo math/remainderq.lo \ + math/asinq.lo math/rintq.lo \ + math/atan2q.lo math/isinfq.lo \ + math/roundq.lo math/atanhq.lo \ + math/isnanq.lo math/scalblnq.lo \ + math/atanq.lo math/j0q.lo \ + math/scalbnq.lo math/cbrtq.lo \ + math/j1q.lo math/signbitq.lo \ + math/ceilq.lo math/jnq.lo \ + math/sincos_table.lo math/complex.lo \ + math/ldexpq.lo math/sincosq.lo \ + math/copysignq.lo math/lgammaq.lo \ + math/sincosq_kernel.lo math/coshq.lo \ + math/llroundq.lo math/sinhq.lo \ + math/cosq.lo math/log10q.lo \ + math/sinq.lo math/cosq_kernel.lo \ + math/log1pq.lo math/sinq_kernel.lo \ + math/erfq.lo math/logq.lo \ + math/sqrtq.lo math/expm1q.lo \ + math/lroundq.lo math/tanhq.lo \ + math/expq.lo math/modfq.lo \ + math/tanq.lo math/fabsq.lo \ + math/nanq.lo math/tgammaq.lo \ + math/finiteq.lo math/nextafterq.lo \ + math/truncq.lo math/floorq.lo \ + math/powq.lo math/fmaq.lo \ + math/logbq.lo math/exp2q.lo \ + math/issignalingq.lo \ + math/lgammaq_neg.lo \ + math/lgammaq_product.lo \ + math/tanq_kernel.lo \ + math/tgammaq_product.lo \ + math/casinhq_kernel.lo math/cacoshq.lo \ + math/cacosq.lo math/casinhq.lo \ + math/casinq.lo math/catanhq.lo \ + math/catanq.lo math/cimagq.lo \ + math/conjq.lo math/cprojq.lo \ + math/crealq.lo math/fdimq.lo \ + math/fmaxq.lo math/fminq.lo \ + math/ilogbq.lo math/llrintq.lo \ + math/log2q.lo math/lrintq.lo \ + math/nearbyintq.lo math/remquoq.lo \ + math/ccoshq.lo math/cexpq.lo \ + math/clog10q.lo math/clogq.lo \ + math/csinq.lo math/csinhq.lo \ + math/csqrtq.lo math/ctanq.lo \ + math/ctanhq.lo printf/addmul_1.lo \ + printf/add_n.lo printf/cmp.lo \ + printf/divrem.lo printf/flt1282mpn.lo \ + printf/fpioconst.lo printf/lshift.lo \ + printf/mul_1.lo printf/mul_n.lo \ + printf/mul.lo printf/printf_fphex.lo \ + printf/printf_fp.lo \ + printf/quadmath-printf.lo \ + printf/rshift.lo printf/submul_1.lo \ + printf/sub_n.lo strtod/strtoflt128.lo \ + strtod/mpn2flt128.lo \ + strtod/tens_in_limb.lo libquadmath_la_OBJECTS = $(am_libquadmath_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) @@ -218,8 +218,8 @@ libquadmath_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ $(AM_CFLAGS) $(CFLAGS) $(libquadmath_la_LDFLAGS) $(LDFLAGS) -o \ $@ -@BUILD_LIBQUADMATH_TRUE@am_libquadmath_la_rpath = -rpath \ -@BUILD_LIBQUADMATH_TRUE@ $(toolexeclibdir) +am_libquadmath_la_rpath = -rpath \ + $(toolexeclibdir) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false @@ -337,7 +337,7 @@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CYGPATH_W = @CYGPATH_W@ -DEFS = @DEFS@ +DEFS = @DEFS@ -D__float128="long double" DEPDIR = @DEPDIR@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ @@ -409,7 +409,7 @@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ -enable_shared = @enable_shared@ +enable_shared = yes enable_static = @enable_static@ exec_prefix = @exec_prefix@ get_gcc_base_ver = @get_gcc_base_ver@ @@ -451,109 +451,109 @@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign info-in-builddir -@BUILD_LIBQUADMATH_TRUE@ACLOCAL_AMFLAGS = -I .. -I ../config -@BUILD_LIBQUADMATH_TRUE@AM_CPPFLAGS = -I $(top_srcdir)/../include -@BUILD_LIBQUADMATH_TRUE@AM_CFLAGS = $(XCFLAGS) -@BUILD_LIBQUADMATH_TRUE@gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER) -@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_FALSE@version_arg = -@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,--version-script=$(srcdir)/quadmath.map -@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,-M,quadmath.map-sun -@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_FALSE@version_dep = -@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = $(srcdir)/quadmath.map -@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = quadmath.map-sun -@BUILD_LIBQUADMATH_TRUE@toolexeclib_LTLIBRARIES = libquadmath.la -@BUILD_LIBQUADMATH_TRUE@libquadmath_la_LIBADD = -@BUILD_LIBQUADMATH_TRUE@libquadmath_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \ -@BUILD_LIBQUADMATH_TRUE@ $(version_arg) $(lt_host_flags) -lm - -@BUILD_LIBQUADMATH_TRUE@libquadmath_la_DEPENDENCIES = $(version_dep) $(libquadmath_la_LIBADD) -@BUILD_LIBQUADMATH_TRUE@nodist_libsubinclude_HEADERS = quadmath.h quadmath_weak.h -@BUILD_LIBQUADMATH_TRUE@libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include -@BUILD_LIBQUADMATH_TRUE@libquadmath_la_SOURCES = \ -@BUILD_LIBQUADMATH_TRUE@ math/x2y2m1q.c math/acoshq.c math/fmodq.c \ -@BUILD_LIBQUADMATH_TRUE@ math/acosq.c math/frexpq.c \ -@BUILD_LIBQUADMATH_TRUE@ math/rem_pio2q.c math/asinhq.c math/hypotq.c math/remainderq.c \ -@BUILD_LIBQUADMATH_TRUE@ math/asinq.c math/rintq.c math/atan2q.c math/isinfq.c \ -@BUILD_LIBQUADMATH_TRUE@ math/roundq.c math/atanhq.c math/isnanq.c math/scalblnq.c math/atanq.c \ -@BUILD_LIBQUADMATH_TRUE@ math/j0q.c math/scalbnq.c math/cbrtq.c math/j1q.c math/signbitq.c \ -@BUILD_LIBQUADMATH_TRUE@ math/ceilq.c math/jnq.c math/sincos_table.c math/complex.c math/ldexpq.c \ -@BUILD_LIBQUADMATH_TRUE@ math/sincosq.c math/copysignq.c math/lgammaq.c math/sincosq_kernel.c \ -@BUILD_LIBQUADMATH_TRUE@ math/coshq.c math/llroundq.c math/sinhq.c math/cosq.c math/log10q.c \ -@BUILD_LIBQUADMATH_TRUE@ math/sinq.c math/cosq_kernel.c math/log1pq.c math/sinq_kernel.c \ -@BUILD_LIBQUADMATH_TRUE@ math/erfq.c math/logq.c math/sqrtq.c math/expm1q.c math/lroundq.c \ -@BUILD_LIBQUADMATH_TRUE@ math/tanhq.c math/expq.c math/modfq.c math/tanq.c math/fabsq.c \ -@BUILD_LIBQUADMATH_TRUE@ math/nanq.c math/tgammaq.c math/finiteq.c math/nextafterq.c \ -@BUILD_LIBQUADMATH_TRUE@ math/truncq.c math/floorq.c math/powq.c math/fmaq.c math/logbq.c \ -@BUILD_LIBQUADMATH_TRUE@ math/exp2q.c math/issignalingq.c math/lgammaq_neg.c math/lgammaq_product.c \ -@BUILD_LIBQUADMATH_TRUE@ math/tanq_kernel.c math/tgammaq_product.c math/casinhq_kernel.c \ -@BUILD_LIBQUADMATH_TRUE@ math/cacoshq.c math/cacosq.c math/casinhq.c math/casinq.c \ -@BUILD_LIBQUADMATH_TRUE@ math/catanhq.c math/catanq.c math/cimagq.c math/conjq.c math/cprojq.c \ -@BUILD_LIBQUADMATH_TRUE@ math/crealq.c math/fdimq.c math/fmaxq.c math/fminq.c math/ilogbq.c \ -@BUILD_LIBQUADMATH_TRUE@ math/llrintq.c math/log2q.c math/lrintq.c math/nearbyintq.c math/remquoq.c \ -@BUILD_LIBQUADMATH_TRUE@ math/ccoshq.c math/cexpq.c math/clog10q.c math/clogq.c math/csinq.c \ -@BUILD_LIBQUADMATH_TRUE@ math/csinhq.c math/csqrtq.c math/ctanq.c math/ctanhq.c \ -@BUILD_LIBQUADMATH_TRUE@ printf/addmul_1.c printf/add_n.c printf/cmp.c printf/divrem.c \ -@BUILD_LIBQUADMATH_TRUE@ printf/flt1282mpn.c printf/fpioconst.c printf/lshift.c printf/mul_1.c \ -@BUILD_LIBQUADMATH_TRUE@ printf/mul_n.c printf/mul.c printf/printf_fphex.c printf/printf_fp.c \ -@BUILD_LIBQUADMATH_TRUE@ printf/quadmath-printf.c printf/rshift.c printf/submul_1.c printf/sub_n.c \ -@BUILD_LIBQUADMATH_TRUE@ strtod/strtoflt128.c strtod/mpn2flt128.c strtod/tens_in_limb.c +ACLOCAL_AMFLAGS = -I .. -I ../config +AM_CPPFLAGS = -I $(top_srcdir)/../include +AM_CFLAGS = $(XCFLAGS) +gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER) +@LIBQUAD_USE_SYMVER_FALSE@version_arg = +@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,--version-script=$(srcdir)/quadmath.map +@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,-M,quadmath.map-sun +@LIBQUAD_USE_SYMVER_FALSE@version_dep = +@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = $(srcdir)/quadmath.map +@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = quadmath.map-sun +toolexeclib_LTLIBRARIES = libquadmath.la +libquadmath_la_LIBADD = +libquadmath_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \ + $(version_arg) $(lt_host_flags) -lm + +libquadmath_la_DEPENDENCIES = $(version_dep) $(libquadmath_la_LIBADD) +nodist_libsubinclude_HEADERS = quadmath.h quadmath_weak.h +libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include +libquadmath_la_SOURCES = \ + math/x2y2m1q.c math/acoshq.c math/fmodq.c \ + math/acosq.c math/frexpq.c \ + math/rem_pio2q.c math/asinhq.c math/hypotq.c math/remainderq.c \ + math/asinq.c math/rintq.c math/atan2q.c math/isinfq.c \ + math/roundq.c math/atanhq.c math/isnanq.c math/scalblnq.c math/atanq.c \ + math/j0q.c math/scalbnq.c math/cbrtq.c math/j1q.c math/signbitq.c \ + math/ceilq.c math/jnq.c math/sincos_table.c math/complex.c math/ldexpq.c \ + math/sincosq.c math/copysignq.c math/lgammaq.c math/sincosq_kernel.c \ + math/coshq.c math/llroundq.c math/sinhq.c math/cosq.c math/log10q.c \ + math/sinq.c math/cosq_kernel.c math/log1pq.c math/sinq_kernel.c \ + math/erfq.c math/logq.c math/sqrtq.c math/expm1q.c math/lroundq.c \ + math/tanhq.c math/expq.c math/modfq.c math/tanq.c math/fabsq.c \ + math/nanq.c math/tgammaq.c math/finiteq.c math/nextafterq.c \ + math/truncq.c math/floorq.c math/powq.c math/fmaq.c math/logbq.c \ + math/exp2q.c math/issignalingq.c math/lgammaq_neg.c math/lgammaq_product.c \ + math/tanq_kernel.c math/tgammaq_product.c math/casinhq_kernel.c \ + math/cacoshq.c math/cacosq.c math/casinhq.c math/casinq.c \ + math/catanhq.c math/catanq.c math/cimagq.c math/conjq.c math/cprojq.c \ + math/crealq.c math/fdimq.c math/fmaxq.c math/fminq.c math/ilogbq.c \ + math/llrintq.c math/log2q.c math/lrintq.c math/nearbyintq.c math/remquoq.c \ + math/ccoshq.c math/cexpq.c math/clog10q.c math/clogq.c math/csinq.c \ + math/csinhq.c math/csqrtq.c math/ctanq.c math/ctanhq.c \ + printf/addmul_1.c printf/add_n.c printf/cmp.c printf/divrem.c \ + printf/flt1282mpn.c printf/fpioconst.c printf/lshift.c printf/mul_1.c \ + printf/mul_n.c printf/mul.c printf/printf_fphex.c printf/printf_fp.c \ + printf/quadmath-printf.c printf/rshift.c printf/submul_1.c printf/sub_n.c \ + strtod/strtoflt128.c strtod/mpn2flt128.c strtod/tens_in_limb.c # Work around what appears to be a GNU make bug handling MAKEFLAGS # values defined in terms of make variables, as is the case for CC and # friends when we are called from the top level Makefile. -@BUILD_LIBQUADMATH_TRUE@AM_MAKEFLAGS = \ -@BUILD_LIBQUADMATH_TRUE@ "AR_FLAGS=$(AR_FLAGS)" \ -@BUILD_LIBQUADMATH_TRUE@ "CC_FOR_BUILD=$(CC_FOR_BUILD)" \ -@BUILD_LIBQUADMATH_TRUE@ "CFLAGS=$(CFLAGS)" \ -@BUILD_LIBQUADMATH_TRUE@ "CXXFLAGS=$(CXXFLAGS)" \ -@BUILD_LIBQUADMATH_TRUE@ "CFLAGS_FOR_BUILD=$(CFLAGS_FOR_BUILD)" \ -@BUILD_LIBQUADMATH_TRUE@ "CFLAGS_FOR_TARGET=$(CFLAGS_FOR_TARGET)" \ -@BUILD_LIBQUADMATH_TRUE@ "INSTALL=$(INSTALL)" \ -@BUILD_LIBQUADMATH_TRUE@ "INSTALL_DATA=$(INSTALL_DATA)" \ -@BUILD_LIBQUADMATH_TRUE@ "INSTALL_PROGRAM=$(INSTALL_PROGRAM)" \ -@BUILD_LIBQUADMATH_TRUE@ "INSTALL_SCRIPT=$(INSTALL_SCRIPT)" \ -@BUILD_LIBQUADMATH_TRUE@ "JC1FLAGS=$(JC1FLAGS)" \ -@BUILD_LIBQUADMATH_TRUE@ "LDFLAGS=$(LDFLAGS)" \ -@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS=$(LIBCFLAGS)" \ -@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS_FOR_TARGET=$(LIBCFLAGS_FOR_TARGET)" \ -@BUILD_LIBQUADMATH_TRUE@ "MAKE=$(MAKE)" \ -@BUILD_LIBQUADMATH_TRUE@ "MAKEINFO=$(MAKEINFO) $(MAKEINFOFLAGS)" \ -@BUILD_LIBQUADMATH_TRUE@ "PICFLAG=$(PICFLAG)" \ -@BUILD_LIBQUADMATH_TRUE@ "PICFLAG_FOR_TARGET=$(PICFLAG_FOR_TARGET)" \ -@BUILD_LIBQUADMATH_TRUE@ "SHELL=$(SHELL)" \ -@BUILD_LIBQUADMATH_TRUE@ "RUNTESTFLAGS=$(RUNTESTFLAGS)" \ -@BUILD_LIBQUADMATH_TRUE@ "exec_prefix=$(exec_prefix)" \ -@BUILD_LIBQUADMATH_TRUE@ "infodir=$(infodir)" \ -@BUILD_LIBQUADMATH_TRUE@ "libdir=$(libdir)" \ -@BUILD_LIBQUADMATH_TRUE@ "prefix=$(prefix)" \ -@BUILD_LIBQUADMATH_TRUE@ "includedir=$(includedir)" \ -@BUILD_LIBQUADMATH_TRUE@ "AR=$(AR)" \ -@BUILD_LIBQUADMATH_TRUE@ "AS=$(AS)" \ -@BUILD_LIBQUADMATH_TRUE@ "CC=$(CC)" \ -@BUILD_LIBQUADMATH_TRUE@ "CXX=$(CXX)" \ -@BUILD_LIBQUADMATH_TRUE@ "LD=$(LD)" \ -@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS=$(LIBCFLAGS)" \ -@BUILD_LIBQUADMATH_TRUE@ "NM=$(NM)" \ -@BUILD_LIBQUADMATH_TRUE@ "PICFLAG=$(PICFLAG)" \ -@BUILD_LIBQUADMATH_TRUE@ "RANLIB=$(RANLIB)" \ -@BUILD_LIBQUADMATH_TRUE@ "DESTDIR=$(DESTDIR)" +AM_MAKEFLAGS = \ + "AR_FLAGS=$(AR_FLAGS)" \ + "CC_FOR_BUILD=$(CC_FOR_BUILD)" \ + "CFLAGS=$(CFLAGS)" \ + "CXXFLAGS=$(CXXFLAGS)" \ + "CFLAGS_FOR_BUILD=$(CFLAGS_FOR_BUILD)" \ + "CFLAGS_FOR_TARGET=$(CFLAGS_FOR_TARGET)" \ + "INSTALL=$(INSTALL)" \ + "INSTALL_DATA=$(INSTALL_DATA)" \ + "INSTALL_PROGRAM=$(INSTALL_PROGRAM)" \ + "INSTALL_SCRIPT=$(INSTALL_SCRIPT)" \ + "JC1FLAGS=$(JC1FLAGS)" \ + "LDFLAGS=$(LDFLAGS)" \ + "LIBCFLAGS=$(LIBCFLAGS)" \ + "LIBCFLAGS_FOR_TARGET=$(LIBCFLAGS_FOR_TARGET)" \ + "MAKE=$(MAKE)" \ + "MAKEINFO=$(MAKEINFO) $(MAKEINFOFLAGS)" \ + "PICFLAG=$(PICFLAG)" \ + "PICFLAG_FOR_TARGET=$(PICFLAG_FOR_TARGET)" \ + "SHELL=$(SHELL)" \ + "RUNTESTFLAGS=$(RUNTESTFLAGS)" \ + "exec_prefix=$(exec_prefix)" \ + "infodir=$(infodir)" \ + "libdir=$(libdir)" \ + "prefix=$(prefix)" \ + "includedir=$(includedir)" \ + "AR=$(AR)" \ + "AS=$(AS)" \ + "CC=$(CC)" \ + "CXX=$(CXX)" \ + "LD=$(LD)" \ + "LIBCFLAGS=$(LIBCFLAGS)" \ + "NM=$(NM)" \ + "PICFLAG=$(PICFLAG)" \ + "RANLIB=$(RANLIB)" \ + "DESTDIR=$(DESTDIR)" # Subdir rules rely on $(FLAGS_TO_PASS) -@BUILD_LIBQUADMATH_TRUE@FLAGS_TO_PASS = $(AM_MAKEFLAGS) -@BUILD_LIBQUADMATH_TRUE@MAKEOVERRIDES = -@BUILD_LIBQUADMATH_TRUE@@GENINSRC_FALSE@STAMP_GENINSRC = +FLAGS_TO_PASS = $(AM_MAKEFLAGS) +MAKEOVERRIDES = +@GENINSRC_FALSE@STAMP_GENINSRC = # AM_CONDITIONAL on configure option --generated-files-in-srcdir -@BUILD_LIBQUADMATH_TRUE@@GENINSRC_TRUE@STAMP_GENINSRC = stamp-geninsrc -@BUILD_LIBQUADMATH_TRUE@ALL_LOCAL_DEPS = $(STAMP_GENINSRC) -@BUILD_INFO_FALSE@@BUILD_LIBQUADMATH_TRUE@STAMP_BUILD_INFO = +@GENINSRC_TRUE@STAMP_GENINSRC = stamp-geninsrc +ALL_LOCAL_DEPS = $(STAMP_GENINSRC) +@BUILD_INFO_FALSE@STAMP_BUILD_INFO = # AM_CONDITIONAL on configure check ACX_CHECK_PROG_VER([MAKEINFO]) -@BUILD_INFO_TRUE@@BUILD_LIBQUADMATH_TRUE@STAMP_BUILD_INFO = stamp-build-info -@BUILD_LIBQUADMATH_TRUE@CLEANFILES = $(STAMP_GENINSRC) $(STAMP_BUILD_INFO) -@BUILD_LIBQUADMATH_TRUE@MAINTAINERCLEANFILES = $(srcdir)/libquadmath.info +@BUILD_INFO_TRUE@STAMP_BUILD_INFO = stamp-build-info +CLEANFILES = $(STAMP_GENINSRC) $(STAMP_BUILD_INFO) +MAINTAINERCLEANFILES = $(srcdir)/libquadmath.info # Automake Documentation: # If your package has Texinfo files in many directories, you can use the @@ -564,8 +564,8 @@ TEXINFO_TEX = ../gcc/doc/include/texinfo.tex # Defines info, dvi, pdf and html targets MAKEINFOFLAGS = -I $(srcdir)/../gcc/doc/include -@BUILD_LIBQUADMATH_FALSE@info_TEXINFOS = -@BUILD_LIBQUADMATH_TRUE@info_TEXINFOS = libquadmath.texi +info_TEXINFOS = +info_TEXINFOS = libquadmath.texi libquadmath_TEXINFOS = libquadmath-vers.texi MULTISRCTOP = MULTIBUILDTOP = @@ -1187,6 +1187,7 @@ distclean-tags: -rm -f cscope.out cscope.in.out cscope.po.out cscope.files check-am: all-am check: check-am +#all-local all-am: Makefile $(INFO_DEPS) $(LTLIBRARIES) $(HEADERS) config.h \ all-local installdirs: @@ -1425,22 +1426,22 @@ uninstall-am: uninstall-dvi-am uninstall-html-am uninstall-info-am \ .PRECIOUS: Makefile -@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@quadmath.map-sun : $(srcdir)/quadmath.map \ -@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(top_srcdir)/../contrib/make_sunver.pl \ -@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) -@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ perl $(top_srcdir)/../contrib/make_sunver.pl \ -@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(srcdir)/quadmath.map \ -@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ `echo $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) | \ -@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ sed 's,\([^/ ]*\)\.l\([ao]\),.libs/\1.\2,g'` \ -@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ > $@ || (rm -f $@ ; exit 1) - -@BUILD_LIBQUADMATH_TRUE@stamp-geninsrc: libquadmath.info -@BUILD_LIBQUADMATH_TRUE@ cp -p $(top_builddir)/libquadmath.info $(srcdir)/libquadmath.info -@BUILD_LIBQUADMATH_TRUE@ @touch $@ - -@BUILD_LIBQUADMATH_TRUE@stamp-build-info: libquadmath.texi $(libquadmath_TEXINFOS) -@BUILD_LIBQUADMATH_TRUE@ $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) -o libquadmath.info $(srcdir)/libquadmath.texi -@BUILD_LIBQUADMATH_TRUE@ @touch $@ +@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@quadmath.map-sun : $(srcdir)/quadmath.map \ +@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(top_srcdir)/../contrib/make_sunver.pl \ +@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) +@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ perl $(top_srcdir)/../contrib/make_sunver.pl \ +@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(srcdir)/quadmath.map \ +@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ `echo $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) | \ +@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ sed 's,\([^/ ]*\)\.l\([ao]\),.libs/\1.\2,g'` \ +@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ > $@ || (rm -f $@ ; exit 1) + +stamp-geninsrc: libquadmath.info + cp -p $(top_builddir)/libquadmath.info $(srcdir)/libquadmath.info + @touch $@ + +stamp-build-info: libquadmath.texi $(libquadmath_TEXINFOS) + $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) -o libquadmath.info $(srcdir)/libquadmath.texi + @touch $@ all-local: $(ALL_LOCAL_DEPS) diff --git a/libquadmath/quadmath.h b/libquadmath/quadmath.h index 81eb957d2fa..faa5977cbc9 100644 --- a/libquadmath/quadmath.h +++ b/libquadmath/quadmath.h @@ -27,6 +27,9 @@ Boston, MA 02110-1301, USA. */ extern "C" { #endif +#ifdef AARCH64_QUADMATH +typedef long double __float128; +#endif /* Define the complex type corresponding to __float128 ("_Complex __float128" is not allowed) */ #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__) @@ -160,10 +163,9 @@ extern int quadmath_snprintf (char *str, size_t size, #define FLT128_MAX_10_EXP 4932 -#define HUGE_VALQ __builtin_huge_valq() /* The following alternative is valid, but brings the warning: (floating constant exceeds range of ‘__float128’) */ -/* #define HUGE_VALQ (__extension__ 0x1.0p32767Q) */ + #define HUGE_VALQ (__extension__ 0x1.0p32767Q) #define M_Eq 2.718281828459045235360287471352662498Q /* e */ #define M_LOG2Eq 1.442695040888963407359924681001892137Q /* log_2 e */ -- Gitee From d1e1ec0cd539f96be5a86b369b8c20b36ce9567f Mon Sep 17 00:00:00 2001 From: yangyang Date: Thu, 8 Jul 2021 14:38:39 +0800 Subject: [PATCH 02/13] [Backport] cselim: Extend to check non-trapping for more references Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=54ecfb182bc32140722022c1d9818dee4bdc0e45 If there is a dominating store, a store to the same reference can not be trapped. But previously, it only supports such check on MEM_REFs. So this patch extends it to support ARRAY_REFs and COMPONENT_REFs. This patch also supports a special case: if there is a dominating load of local variable without address escape, a store is not trapped, as local stack is always writable. Other loads are ignored for simplicity, as they don't help to check if a store can be trapped (the memory may be read-only). --- gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c | 2 +- .../gcc.dg/tree-ssa/pr89430-7-comp-ref.c | 17 +++ .../gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c | 15 +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c | 2 +- gcc/tree-ssa-phiopt.c | 127 ++++++++++-------- 8 files changed, 106 insertions(+), 63 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c index ce242ba569b..8ee1850ac63 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c @@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) { return a[0]+a[1]; } -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c index 90ae36bfce2..9b96875ac7a 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c @@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) { return a[0]+a[1]; } -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c index c633cbe947d..b2d04119381 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c @@ -13,4 +13,4 @@ int test(int b, int k) { return a.data[0] + a.data[1]; } -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c index 7cad563128d..8d3c4f7cc6a 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c @@ -16,4 +16,4 @@ int test(int b, int k) { return a.data[0].x + a.data[1].x; } -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c new file mode 100644 index 00000000000..c35a2afc70b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-cselim-details" } */ + +typedef union { + int i; + float f; +} U; + +int foo(U *u, int b, int i) +{ + u->i = 0; + if (b) + u->i = i; + return u->i; +} + +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c new file mode 100644 index 00000000000..f9e66aefb13 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-cselim-details" } */ + +int *t; + +int f1 (int tt) +{ + int *t1 = t; + *t1 = -5; + if (*t1 < tt) + *((unsigned *) t1) = 5; + return *t1; +} + +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c index 09313716598..a06f339f0bb 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-pre-stats" } */ +/* { dg-options "-O2 -fdump-tree-pre-stats -fno-tree-cselim" } */ typedef union { int i; diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c index b1e0dce93d8..3b5b6907679 100644 --- a/gcc/tree-ssa-phiopt.c +++ b/gcc/tree-ssa-phiopt.c @@ -1986,26 +1986,33 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb, ??? We currently are very conservative and assume that a load might trap even if a store doesn't (write-only memory). This probably is - overly conservative. */ + overly conservative. -/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF - through it was seen, which would constitute a no-trap region for - same accesses. */ -struct name_to_bb + We currently support a special case that for !TREE_ADDRESSABLE automatic + variables, it could ignore whether something is a load or store because the + local stack should be always writable. */ + +/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which + basic block an *_REF through it was seen, which would constitute a + no-trap region for same accesses. + + Size is needed to support 2 MEM_REFs of different types, like + MEM(s_1) and MEM(s_1), which would compare equal with + OEP_ADDRESS_OF. */ +struct ref_to_bb { - unsigned int ssa_name_ver; + tree exp; + HOST_WIDE_INT size; unsigned int phase; - bool store; - HOST_WIDE_INT offset, size; basic_block bb; }; /* Hashtable helpers. */ -struct ssa_names_hasher : free_ptr_hash +struct refs_hasher : free_ptr_hash { - static inline hashval_t hash (const name_to_bb *); - static inline bool equal (const name_to_bb *, const name_to_bb *); + static inline hashval_t hash (const ref_to_bb *); + static inline bool equal (const ref_to_bb *, const ref_to_bb *); }; /* Used for quick clearing of the hash-table when we see calls. @@ -2015,28 +2022,29 @@ static unsigned int nt_call_phase; /* The hash function. */ inline hashval_t -ssa_names_hasher::hash (const name_to_bb *n) +refs_hasher::hash (const ref_to_bb *n) { - return n->ssa_name_ver ^ (((hashval_t) n->store) << 31) - ^ (n->offset << 6) ^ (n->size << 3); + inchash::hash hstate; + inchash::add_expr (n->exp, hstate, OEP_ADDRESS_OF); + hstate.add_hwi (n->size); + return hstate.end (); } /* The equality function of *P1 and *P2. */ inline bool -ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2) +refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2) { - return n1->ssa_name_ver == n2->ssa_name_ver - && n1->store == n2->store - && n1->offset == n2->offset - && n1->size == n2->size; + return operand_equal_p (n1->exp, n2->exp, OEP_ADDRESS_OF) + && n1->size == n2->size; } class nontrapping_dom_walker : public dom_walker { public: nontrapping_dom_walker (cdi_direction direction, hash_set *ps) - : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {} + : dom_walker (direction), m_nontrapping (ps), m_seen_refs (128) + {} virtual edge before_dom_children (basic_block); virtual void after_dom_children (basic_block); @@ -2053,7 +2061,7 @@ private: hash_set *m_nontrapping; /* The hash table for remembering what we've seen. */ - hash_table m_seen_ssa_names; + hash_table m_seen_refs; }; /* Called by walk_dominator_tree, when entering the block BB. */ @@ -2102,65 +2110,68 @@ nontrapping_dom_walker::after_dom_children (basic_block bb) } /* We see the expression EXP in basic block BB. If it's an interesting - expression (an MEM_REF through an SSA_NAME) possibly insert the - expression into the set NONTRAP or the hash table of seen expressions. - STORE is true if this expression is on the LHS, otherwise it's on - the RHS. */ + expression of: + 1) MEM_REF + 2) ARRAY_REF + 3) COMPONENT_REF + possibly insert the expression into the set NONTRAP or the hash table + of seen expressions. STORE is true if this expression is on the LHS, + otherwise it's on the RHS. */ void nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store) { HOST_WIDE_INT size; - if (TREE_CODE (exp) == MEM_REF - && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME - && tree_fits_shwi_p (TREE_OPERAND (exp, 1)) + if ((TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF + || TREE_CODE (exp) == COMPONENT_REF) && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0) { - tree name = TREE_OPERAND (exp, 0); - struct name_to_bb map; - name_to_bb **slot; - struct name_to_bb *n2bb; + struct ref_to_bb map; + ref_to_bb **slot; + struct ref_to_bb *r2bb; basic_block found_bb = 0; - /* Try to find the last seen MEM_REF through the same - SSA_NAME, which can trap. */ - map.ssa_name_ver = SSA_NAME_VERSION (name); - map.phase = 0; - map.bb = 0; - map.store = store; - map.offset = tree_to_shwi (TREE_OPERAND (exp, 1)); - map.size = size; + if (!store) + { + tree base = get_base_address (exp); + /* Only record a LOAD of a local variable without address-taken, as + the local stack is always writable. This allows cselim on a STORE + with a dominating LOAD. */ + if (!auto_var_p (base) || TREE_ADDRESSABLE (base)) + return; + } - slot = m_seen_ssa_names.find_slot (&map, INSERT); - n2bb = *slot; - if (n2bb && n2bb->phase >= nt_call_phase) - found_bb = n2bb->bb; + /* Try to find the last seen *_REF, which can trap. */ + map.exp = exp; + map.size = size; + slot = m_seen_refs.find_slot (&map, INSERT); + r2bb = *slot; + if (r2bb && r2bb->phase >= nt_call_phase) + found_bb = r2bb->bb; - /* If we've found a trapping MEM_REF, _and_ it dominates EXP - (it's in a basic block on the path from us to the dominator root) + /* If we've found a trapping *_REF, _and_ it dominates EXP + (it's in a basic block on the path from us to the dominator root) then we can't trap. */ if (found_bb && (((size_t)found_bb->aux) & 1) == 1) { m_nontrapping->add (exp); } else - { + { /* EXP might trap, so insert it into the hash table. */ - if (n2bb) + if (r2bb) { - n2bb->phase = nt_call_phase; - n2bb->bb = bb; + r2bb->phase = nt_call_phase; + r2bb->bb = bb; } else { - n2bb = XNEW (struct name_to_bb); - n2bb->ssa_name_ver = SSA_NAME_VERSION (name); - n2bb->phase = nt_call_phase; - n2bb->bb = bb; - n2bb->store = store; - n2bb->offset = map.offset; - n2bb->size = size; - *slot = n2bb; + r2bb = XNEW (struct ref_to_bb); + r2bb->phase = nt_call_phase; + r2bb->bb = bb; + r2bb->exp = exp; + r2bb->size = size; + *slot = r2bb; } } } -- Gitee From 309f459021a3681d728e5cf644a288ecf2b95175 Mon Sep 17 00:00:00 2001 From: zhanghaijian Date: Mon, 12 Jul 2021 09:42:11 +0800 Subject: [PATCH 03/13] [version] Set version to 10.3.1 Set version to 10.3.1 and clear DATESTAMP_s. --- gcc/BASE-VER | 2 +- gcc/Makefile.in | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/gcc/BASE-VER b/gcc/BASE-VER index 0719d810258..a9368325816 100644 --- a/gcc/BASE-VER +++ b/gcc/BASE-VER @@ -1 +1 @@ -10.3.0 +10.3.1 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 646db219460..fdc2857d44a 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -885,8 +885,7 @@ PATCHLEVEL_c := \ # significant - do not remove it. BASEVER_s := "\"$(BASEVER_c)\"" DEVPHASE_s := "\"$(if $(DEVPHASE_c), ($(DEVPHASE_c)))\"" -DATESTAMP_s := \ - "\"$(if $(DEVPHASE_c)$(filter-out 0,$(PATCHLEVEL_c)), $(DATESTAMP_c))\"" +DATESTAMP_s := "\"\"" PKGVERSION_s:= "\"@PKGVERSION@\"" BUGURL_s := "\"@REPORT_BUGS_TO@\"" -- Gitee From bdb0f40cea4aa1a92ead381b645363ae0571c065 Mon Sep 17 00:00:00 2001 From: zhanghaijian Date: Mon, 12 Jul 2021 10:36:15 +0800 Subject: [PATCH 04/13] [Backport]tree-optimization: Avoid issueing loads in SM when possible Reference:https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f9e1ea10e657af9fb02fafecf1a600740fd34409 Currently store-motion emits a load of the value in the loop preheader even when the original loop does not contain any read of the reference. This avoids doing this. In the conditional store-motion case we need to mark the sunk stores with no-warning since the control dependence is too tricky to figure out for the uninit warning. --- gcc/testsuite/gcc.dg/tree-ssa/pr39612.c | 21 +++++++++++ gcc/tree-ssa-loop-im.c | 47 ++++++++++++++++++++----- 2 files changed, 60 insertions(+), 8 deletions(-) create mode 100755 gcc/testsuite/gcc.dg/tree-ssa/pr39612.c diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c b/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c new file mode 100755 index 00000000000..884f905148f --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-lim2-details -Wuninitialized" } */ + +void foo(int *); +void f2(int dst[3], int R) +{ + int i, inter[2]; + + for (i = 1; i < R; i++) { + if (i & 8) + { + inter[0] = 1; + inter[1] = 1; + } + } + + foo(inter); +} + +/* { dg-final { scan-tree-dump-times "Executing store motion" 2 "lim2" } } */ +/* { dg-final { scan-tree-dump-not " = inter\\\[\[0-1\]\\\];" "lim2" } } */ diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index abd5f702b91..b3fd1647fbd 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -127,6 +127,8 @@ public: bitmap stored; /* The set of loops in that this memory location is stored to. */ + bitmap loaded; /* The set of loops in that this memory location + is loaded from. */ vec accesses_in_loop; /* The locations of the accesses. Vector indexed by the loop number. */ @@ -1395,6 +1397,7 @@ mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id) ref->ref_decomposed = false; ref->hash = hash; ref->stored = NULL; + ref->loaded = NULL; bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack); bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack); ref->accesses_in_loop.create (1); @@ -1435,6 +1438,27 @@ mark_ref_stored (im_mem_ref *ref, class loop *loop) loop = loop_outer (loop); } +/* Set the LOOP bit in REF loaded bitmap and allocate that if + necessary. Return whether a bit was changed. */ + +static bool +set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop) +{ + if (!ref->loaded) + ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack); + return bitmap_set_bit (ref->loaded, loop->num); +} + +/* Marks reference REF as loaded in LOOP. */ + +static void +mark_ref_loaded (im_mem_ref *ref, class loop *loop) +{ + while (loop != current_loops->tree_root + && set_ref_loaded_in_loop (ref, loop)) + loop = loop_outer (loop); +} + /* Gathers memory references in statement STMT in LOOP, storing the information about them in the memory_accesses structure. Marks the vops accessed through unrecognized statements there as @@ -1571,6 +1595,8 @@ gather_mem_refs_stmt (class loop *loop, gimple *stmt) bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id); mark_ref_stored (ref, loop); } + else + mark_ref_loaded (ref, loop); init_lim_data (stmt)->ref = ref->id; return; } @@ -1968,6 +1994,8 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag, gsi = gsi_start_bb (then_bb); /* Insert actual store. */ stmt = gimple_build_assign (unshare_expr (mem), tmp_var); + /* Make sure to not warn about maybe-uninit uses of tmp_var here. */ + gimple_set_no_warning (stmt, true); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); edge e1 = single_succ_edge (new_bb); @@ -2115,14 +2143,17 @@ execute_sm (class loop *loop, vec exits, im_mem_ref *ref) by move_computations after all dependencies. */ gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt); - /* FIXME/TODO: For the multi-threaded variant, we could avoid this - load altogether, since the store is predicated by a flag. We - could, do the load only if it was originally in the loop. */ - load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref)); - lim_data = init_lim_data (load); - lim_data->max_loop = loop; - lim_data->tgt_loop = loop; - gsi_insert_before (&gsi, load, GSI_SAME_STMT); + /* Avoid doing a load if there was no load of the ref in the loop. + Esp. when the ref is not always stored we cannot optimize it + away later. */ + if (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)) + { + load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref)); + lim_data = init_lim_data (load); + lim_data->max_loop = loop; + lim_data->tgt_loop = loop; + gsi_insert_before (&gsi, load, GSI_SAME_STMT); + } if (multi_threaded_model_p) { -- Gitee From dc238e97a75835231939e77e8568ccd9bc5187d5 Mon Sep 17 00:00:00 2001 From: zhanghaijian Date: Mon, 12 Jul 2021 10:46:16 +0800 Subject: [PATCH 05/13] [Backport]tree-optimization: Fix load eliding in SM Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0424a5ece5307cc22bbc0fe97edf4707d7a798ed This fixes the case of not using the multithreaded model when only conditionally storing to the destination. We cannot elide the load in this case. --- gcc/testsuite/gcc.dg/torture/pr94949.c | 17 +++++++++++++++++ gcc/tree-ssa-loop-im.c | 10 ++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) create mode 100755 gcc/testsuite/gcc.dg/torture/pr94949.c diff --git a/gcc/testsuite/gcc.dg/torture/pr94949.c b/gcc/testsuite/gcc.dg/torture/pr94949.c new file mode 100755 index 00000000000..6182d77b3cd --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr94949.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ +/* { dg-additional-options "-fallow-store-data-races" } */ + +static int x = 1; +static volatile int y = -1; +int +main() +{ + for (int i = 0; i < 128; ++i) + { + if (i == y) + x = i; + } + if (x != 1) + __builtin_abort (); + return 0; +} diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index b3fd1647fbd..8c33735b1fa 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -2128,9 +2128,9 @@ execute_sm (class loop *loop, vec exits, im_mem_ref *ref) fmt_data.orig_loop = loop; for_each_index (&ref->mem.ref, force_move_till, &fmt_data); + bool always_stored = ref_always_accessed_p (loop, ref, true); if (bb_in_transaction (loop_preheader_edge (loop)->src) - || (! flag_store_data_races - && ! ref_always_accessed_p (loop, ref, true))) + || (! flag_store_data_races && ! always_stored)) multi_threaded_model_p = true; if (multi_threaded_model_p) @@ -2145,8 +2145,10 @@ execute_sm (class loop *loop, vec exits, im_mem_ref *ref) /* Avoid doing a load if there was no load of the ref in the loop. Esp. when the ref is not always stored we cannot optimize it - away later. */ - if (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)) + away later. But when it is not always stored we must use a conditional + store then. */ + if ((!always_stored && !multi_threaded_model_p) + || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num))) { load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref)); lim_data = init_lim_data (load); -- Gitee From cfd6920125f7968f0c1f5cb225f9fbd5bc8988b9 Mon Sep 17 00:00:00 2001 From: bule Date: Tue, 13 Jul 2021 15:26:54 +0800 Subject: [PATCH 06/13] [simdmath] Enable simdmath on kunpeng This enable simd math function supported by libmathlib on fortran/c/c++. Use -fsimdmath to turn on the generation of simdmath function. The supported functions can be found in simdmath.h. Add more simd declaration if you need more kinds of math functions. -msimdmath-64 is used to turn on 64-bit simd math functions which is not supported by libmathlib. Therefore, this option is default to off. --- gcc/c-family/c-opts.c | 4 ++++ gcc/common.opt | 4 ++++ gcc/config/aarch64/aarch64.c | 8 +++++-- gcc/config/aarch64/aarch64.opt | 6 +++++ gcc/fortran/scanner.c | 4 ++++ gcc/opts.c | 17 +++++++++++++++ libgomp/Makefile.am | 4 ++-- libgomp/Makefile.in | 10 ++++++--- libgomp/configure | 4 +++- libgomp/configure.ac | 2 +- libgomp/simdmath.h.in | 40 ++++++++++++++++++++++++++++++++++ libgomp/simdmath_f.h.in | 11 ++++++++++ 12 files changed, 105 insertions(+), 9 deletions(-) create mode 100644 libgomp/simdmath.h.in create mode 100644 libgomp/simdmath_f.h.in diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c index c51d6d34726..dc1a8984871 100644 --- a/gcc/c-family/c-opts.c +++ b/gcc/c-family/c-opts.c @@ -780,6 +780,10 @@ c_common_post_options (const char **pfilename) if (cpp_opts->deps.style == DEPS_NONE) check_deps_environment_vars (); + if (flag_simdmath) + { + defer_opt (OPT_include, "simdmath.h"); + } handle_deferred_opts (); sanitize_cpp_opts (); diff --git a/gcc/common.opt b/gcc/common.opt index ec5235c3a41..8eb05570418 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1977,6 +1977,10 @@ fmath-errno Common Report Var(flag_errno_math) Init(1) Optimization SetByCombined Set errno after built-in math functions. +fsimdmath +Common Report Var(flag_simdmath) Init(0) Optimization +Enable auto-vectorize math functions for mathlib. This option will turn on -fno-math-errno and -fopenmp-simd. + fmax-errors= Common Joined RejectNegative UInteger Var(flag_max_errors) -fmax-errors= Maximum number of errors to report. diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 9b400c49ac6..79dc8f186f4 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -23077,8 +23077,12 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type)); if (clonei->simdlen == 0) { - count = 2; - vec_bits = (num == 0 ? 64 : 128); + /* Currently mathlib or sleef hasn't provide function for V2SF mode + simdclone of single precision functions. (e.g._ZCVnN2v_expf) + Therefore this mode is disabled by default to avoid link error. + Use -msimdmath-64 option to enable this mode. */ + count = flag_simdmath_64 ? 2 : 1; + vec_bits = ((num == 0 && flag_simdmath_64) ? 64 : 128); clonei->simdlen = vec_bits / elt_bits; } else diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt index 1b3d942e0f5..4539156d6f4 100644 --- a/gcc/config/aarch64/aarch64.opt +++ b/gcc/config/aarch64/aarch64.opt @@ -190,6 +190,12 @@ precision of square root results to about 16 bits for single precision and to 32 bits for double precision. If enabled, it implies -mlow-precision-recip-sqrt. +msimdmath-64 +Target Var(flag_simdmath_64) Optimization +Allow compiler to generate V2SF 64 bits simdclone of math functions, +which is not currently supported in mathlib or sleef. +Therefore this option is disabled by default. + mlow-precision-div Target Var(flag_mlow_precision_div) Optimization Enable the division approximation. Enabling this reduces diff --git a/gcc/fortran/scanner.c b/gcc/fortran/scanner.c index 6f93508f934..42fd5a8be1e 100644 --- a/gcc/fortran/scanner.c +++ b/gcc/fortran/scanner.c @@ -2737,6 +2737,10 @@ gfc_new_file (void) && !load_file (flag_pre_include, NULL, false)) exit (FATAL_EXIT_CODE); + if (flag_simdmath + && !load_file ("simdmath_f.h", NULL, false)) + exit (FATAL_EXIT_CODE); + if (gfc_cpp_enabled ()) { result = gfc_cpp_preprocess (gfc_source_file); diff --git a/gcc/opts.c b/gcc/opts.c index 73162528938..e31aa560564 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -189,6 +189,7 @@ static const char use_diagnosed_msg[] = N_("Uses of this option are diagnosed.") typedef char *char_p; /* For DEF_VEC_P. */ +static void set_simdmath_flags (struct gcc_options *opts, int set); static void set_debug_level (enum debug_info_type type, int extended, const char *arg, struct gcc_options *opts, struct gcc_options *opts_set, @@ -2469,6 +2470,10 @@ common_handle_option (struct gcc_options *opts, dc->min_margin_width = value; break; + case OPT_fsimdmath: + set_simdmath_flags (opts, value); + break; + case OPT_fdump_: /* Deferred. */ break; @@ -2847,6 +2852,18 @@ common_handle_option (struct gcc_options *opts, return true; } +/* The following routines are used to set -fno-math-errno and -fopenmp-simd + to enable vector mathlib. */ +static void +set_simdmath_flags (struct gcc_options *opts, int set) +{ + if (set) + { + opts->x_flag_errno_math = 0; + opts->x_flag_openmp_simd = 1; + } +} + /* Used to set the level of strict aliasing warnings in OPTS, when no level is specified (i.e., when -Wstrict-aliasing, and not -Wstrict-aliasing=level was given). diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am index 669b9e4defd..0d9cc96481c 100644 --- a/libgomp/Makefile.am +++ b/libgomp/Makefile.am @@ -74,10 +74,10 @@ libgomp_la_SOURCES += openacc.f90 endif nodist_noinst_HEADERS = libgomp_f.h -nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h +nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h simdmath.h if USE_FORTRAN nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \ - openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod + openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod simdmath_f.h endif LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS)) diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index ae5d9d54705..dd4b334895e 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -148,7 +148,7 @@ am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ configure.lineno config.status.lineno mkinstalldirs = $(SHELL) $(top_srcdir)/../mkinstalldirs CONFIG_HEADER = config.h -CONFIG_CLEAN_FILES = omp.h omp_lib.h omp_lib.f90 libgomp_f.h \ +CONFIG_CLEAN_FILES = omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h \ libgomp.spec CONFIG_CLEAN_VPATH_FILES = am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; @@ -609,9 +609,9 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \ @PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_LIBADD = libgomp.la $(PLUGIN_GCN_LIBS) @PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_LIBTOOLFLAGS = --tag=disable-static nodist_noinst_HEADERS = libgomp_f.h -nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h +nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h simdmath.h @USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \ -@USE_FORTRAN_TRUE@ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod +@USE_FORTRAN_TRUE@ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod simdmath_f.h LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS)) LINK = $(LIBTOOL) --tag CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ @@ -702,6 +702,10 @@ omp.h: $(top_builddir)/config.status $(srcdir)/omp.h.in cd $(top_builddir) && $(SHELL) ./config.status $@ omp_lib.h: $(top_builddir)/config.status $(srcdir)/omp_lib.h.in cd $(top_builddir) && $(SHELL) ./config.status $@ +simdmath_f.h: $(top_builddir)/config.status $(srcdir)/simdmath_f.h.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +simdmath.h: $(top_builddir)/config.status $(srcdir)/simdmath.h.in + cd $(top_builddir) && $(SHELL) ./config.status $@ omp_lib.f90: $(top_builddir)/config.status $(srcdir)/omp_lib.f90.in cd $(top_builddir) && $(SHELL) ./config.status $@ libgomp_f.h: $(top_builddir)/config.status $(srcdir)/libgomp_f.h.in diff --git a/libgomp/configure b/libgomp/configure index 5240f7e9d39..b03036c2738 100644 --- a/libgomp/configure +++ b/libgomp/configure @@ -17050,7 +17050,7 @@ fi -ac_config_files="$ac_config_files omp.h omp_lib.h omp_lib.f90 libgomp_f.h" +ac_config_files="$ac_config_files omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h" ac_config_files="$ac_config_files Makefile testsuite/Makefile libgomp.spec" @@ -18205,6 +18205,8 @@ do "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;; "omp.h") CONFIG_FILES="$CONFIG_FILES omp.h" ;; "omp_lib.h") CONFIG_FILES="$CONFIG_FILES omp_lib.h" ;; + "simdmath.h") CONFIG_FILES="$CONFIG_FILES simdmath.h" ;; + "simdmath_f.h") CONFIG_FILES="$CONFIG_FILES simdmath_f.h" ;; "omp_lib.f90") CONFIG_FILES="$CONFIG_FILES omp_lib.f90" ;; "libgomp_f.h") CONFIG_FILES="$CONFIG_FILES libgomp_f.h" ;; "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; diff --git a/libgomp/configure.ac b/libgomp/configure.ac index ef5d293c31e..569c2065a66 100644 --- a/libgomp/configure.ac +++ b/libgomp/configure.ac @@ -433,7 +433,7 @@ CFLAGS="$save_CFLAGS" # Determine what GCC version number to use in filesystem paths. GCC_BASE_VER -AC_CONFIG_FILES(omp.h omp_lib.h omp_lib.f90 libgomp_f.h) +AC_CONFIG_FILES(omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h) AC_CONFIG_FILES(Makefile testsuite/Makefile libgomp.spec) AC_CONFIG_FILES([testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in]) AC_CONFIG_FILES([testsuite/libgomp-site-extra.exp]) diff --git a/libgomp/simdmath.h.in b/libgomp/simdmath.h.in new file mode 100644 index 00000000000..ab91a4ec317 --- /dev/null +++ b/libgomp/simdmath.h.in @@ -0,0 +1,40 @@ +#ifdef __cplusplus +extern "C" { +#endif + +#pragma omp declare simd simdlen(2) notinbranch +double cos (double x); + +#pragma omp declare simd simdlen(4) notinbranch +float cosf (float x); + +#pragma omp declare simd simdlen(2) notinbranch +double sin (double x); + +#pragma omp declare simd simdlen(4) notinbranch +float sinf (float x); + +#pragma omp declare simd simdlen(2) notinbranch +double exp (double x); + +#pragma omp declare simd simdlen(4) notinbranch +float expf (float x); + +#pragma omp declare simd simdlen(2) notinbranch +double log (double x); + +#pragma omp declare simd simdlen(4) notinbranch +float logf (float x); + +#pragma omp declare simd simdlen(2) notinbranch +double pow (double x, double y); + +#pragma omp declare simd simdlen(4) notinbranch +float powf (float x, float y); + +#pragma omp declare simd simdlen(4) notinbranch +float exp2f (float x); + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/libgomp/simdmath_f.h.in b/libgomp/simdmath_f.h.in new file mode 100644 index 00000000000..550595015db --- /dev/null +++ b/libgomp/simdmath_f.h.in @@ -0,0 +1,11 @@ +!GCC$ builtin (cos) attributes simd (notinbranch) +!GCC$ builtin (cosf) attributes simd (notinbranch) +!GCC$ builtin (sin) attributes simd (notinbranch) +!GCC$ builtin (sinf) attributes simd (notinbranch) +!GCC$ builtin (exp) attributes simd (notinbranch) +!GCC$ builtin (expf) attributes simd (notinbranch) +!GCC$ builtin (exp2f) attributes simd (notinbranch) +!GCC$ builtin (log) attributes simd (notinbranch) +!GCC$ builtin (logf) attributes simd (notinbranch) +!GCC$ builtin (pow) attributes simd (notinbranch) +!GCC$ builtin (powf) attributes simd (notinbranch) -- Gitee From 07033bcc5b9e4c03846cd84b4587cd493fcf7d53 Mon Sep 17 00:00:00 2001 From: zhoukaipeng Date: Wed, 14 Jul 2021 11:24:06 +0800 Subject: [PATCH 07/13] [Vect] Enable skipping vectorization on reduction chains Sometimes either vectorization on reduction chains or reductions is possible. But the latter is better. The option "-ftree-vect-analyze -slp-group" skips the former. --- gcc/common.opt | 4 ++++ gcc/testsuite/gcc.dg/vect/vect-reduc-12.c | 20 ++++++++++++++++++++ gcc/tree-vect-slp.c | 3 ++- 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-12.c diff --git a/gcc/common.opt b/gcc/common.opt index 8eb05570418..55d4eb5a351 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2968,6 +2968,10 @@ ftree-slp-vectorize Common Report Var(flag_tree_slp_vectorize) Optimization EnabledBy(ftree-vectorize) Enable basic block vectorization (SLP) on trees. +ftree-vect-analyze-slp-group +Common Report Var(flag_tree_slp_group) Init(0) +Disable SLP vectorization for reduction chain on tree. + fvect-cost-model= Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization -fvect-cost-model=[unlimited|dynamic|cheap] Specifies the cost model for vectorization. diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c new file mode 100644 index 00000000000..913f1ef28df --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -funsafe-math-optimizations -fno-tree-reassoc -ftree-vect-analyze-slp-group" } */ +void f(double *a, double *res, double m) { + double res1, res0; + res1 = 0; + res0 = 0; + for (int i = 0; i < 1000; i+=8) { + res0 += a[i] * m; + res1 += a[i+1] * m; + res0 += a[i+2] * m; + res1 += a[i+3] * m; + res0 += a[i+4] * m; + res1 += a[i+5] * m; + res0 += a[i+6] * m; + res1 += a[i+7] * m; + } + res[0] += res0; + res[1] += res1; +} +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index adc579ff544..476b3237054 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -2480,7 +2480,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) { /* Find SLP sequences starting from reduction chains. */ FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element) - if (! vect_analyze_slp_instance (vinfo, bst_map, first_element, + if (flag_tree_slp_group + || ! vect_analyze_slp_instance (vinfo, bst_map, first_element, max_tree_size)) { /* Dissolve reduction chain group. */ -- Gitee From 79d1ed2d7f166a498662f6111a4defc55f0061c7 Mon Sep 17 00:00:00 2001 From: yangyang Date: Thu, 15 Jul 2021 09:27:27 +0800 Subject: [PATCH 08/13] [Backport]tree-optimization: Add checks to avoid spoiling if-conversion Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=33d114f570b4a3583421c700396fd5945acebc28 Add some checks in pass_splits_paths, so that pass_split_paths can recognize the missed if-conversion opportunity and do not duplicate the corresponding block. --- gcc/gimple-ssa-split-paths.c | 39 +++++++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c | 19 +++++++++ 2 files changed, 58 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c diff --git a/gcc/gimple-ssa-split-paths.c b/gcc/gimple-ssa-split-paths.c index b3efd43c7ef..9c32da76369 100644 --- a/gcc/gimple-ssa-split-paths.c +++ b/gcc/gimple-ssa-split-paths.c @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see #include "gimple-ssa.h" #include "tree-phinodes.h" #include "ssa-iterators.h" +#include "fold-const.h" /* Given LATCH, the latch block in a loop, see if the shape of the path reaching LATCH is suitable for being split by duplication. @@ -254,6 +255,44 @@ is_feasible_trace (basic_block bb) } } + /* Canonicalize the form. */ + if (single_pred_p (pred1) && single_pred (pred1) == pred2 + && num_stmts_in_pred1 == 0) + std::swap (pred1, pred2); + + /* This is meant to catch another kind of cases that are likely opportunities + for if-conversion. After canonicalizing, PRED2 must be an empty block and + PRED1 must be the only predecessor of PRED2. Moreover, PRED1 is supposed + to end with a cond_stmt which has the same args with the PHI in BB. */ + if (single_pred_p (pred2) && single_pred (pred2) == pred1 + && num_stmts_in_pred2 == 0) + { + gimple *cond_stmt = last_stmt (pred1); + if (cond_stmt && gimple_code (cond_stmt) == GIMPLE_COND) + { + tree lhs = gimple_cond_lhs (cond_stmt); + tree rhs = gimple_cond_rhs (cond_stmt); + + gimple_stmt_iterator gsi; + for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *phi = gsi_stmt (gsi); + if ((operand_equal_p (gimple_phi_arg_def (phi, 0), lhs) + && operand_equal_p (gimple_phi_arg_def (phi, 1), rhs)) + || (operand_equal_p (gimple_phi_arg_def (phi, 0), rhs) + && (operand_equal_p (gimple_phi_arg_def (phi, 1), lhs)))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Block %d appears to be optimized to a join " + "point for if-convertable half-diamond.\n", + bb->index); + return false; + } + } + } + } + /* If the joiner has no PHIs with useful uses there is zero chance of CSE/DCE/jump-threading possibilities exposed by duplicating it. */ bool found_useful_phi = false; diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c new file mode 100644 index 00000000000..19a130d9bf1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fsplit-paths -fdump-tree-split-paths-details " } */ + +double +foo(double *d1, double *d2, double *d3, int num, double *ip) +{ + double dmax[3]; + + for (int i = 0; i < num; i++) { + dmax[0] = d1[i] < dmax[0] ? dmax[0] : d1[i]; + dmax[1] = d2[i] < dmax[1] ? dmax[1] : d2[i]; + dmax[2] = d3[i] < dmax[2] ? dmax[2] : d3[i]; + ip[i] = dmax[2]; + } + + return dmax[0] + dmax[1] + dmax[2]; +} + +/* { dg-final { scan-tree-dump "appears to be optimized to a join point for if-convertable half-diamond" "split-paths" } } */ -- Gitee From 7bc78d0ab13c37e2b11adb385d9916181ec4cc20 Mon Sep 17 00:00:00 2001 From: zhanghaijian Date: Thu, 15 Jul 2021 09:04:55 +0800 Subject: [PATCH 09/13] [Backport]expand: Simplify removing subregs when expanding a copy [PR95254] Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=9a182ef9ee011935d827ab5c6c9a7cd8e22257d8 In rtl expand, if we have a copy that matches one of the following patterns: (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...))) (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR)) (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...))) (set (subreg:M1 (reg:M2 ...)) (constant C)) where mode M1 is equal in size to M2, try to detect whether the mode change involves an implicit round trip through memory. If so, see if we can avoid that by removing the subregs and doing the move in mode M2 instead. --- gcc/expr.c | 72 ++++++++++++++++++++++ gcc/testsuite/gcc.target/aarch64/pr95254.c | 19 ++++++ gcc/testsuite/gcc.target/i386/pr67609.c | 2 +- 3 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/pr95254.c diff --git a/gcc/expr.c b/gcc/expr.c index 991b26f3341..d66fdd4e93d 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -3814,6 +3814,78 @@ emit_move_insn (rtx x, rtx y) gcc_assert (mode != BLKmode && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode)); + /* If we have a copy that looks like one of the following patterns: + (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...))) + (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR)) + (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...))) + (set (subreg:M1 (reg:M2 ...)) (constant C)) + where mode M1 is equal in size to M2, try to detect whether the + mode change involves an implicit round trip through memory. + If so, see if we can avoid that by removing the subregs and + doing the move in mode M2 instead. */ + + rtx x_inner = NULL_RTX; + rtx y_inner = NULL_RTX; + +#define CANDIDATE_SUBREG_P(subreg) \ + (REG_P (SUBREG_REG (subreg)) \ + && known_eq (GET_MODE_SIZE (GET_MODE (SUBREG_REG (subreg))), \ + GET_MODE_SIZE (GET_MODE (subreg))) \ + && optab_handler (mov_optab, GET_MODE (SUBREG_REG (subreg))) \ + != CODE_FOR_nothing) + +#define CANDIDATE_MEM_P(innermode, mem) \ + (!targetm.can_change_mode_class ((innermode), GET_MODE (mem), ALL_REGS) \ + && !push_operand ((mem), GET_MODE (mem)) \ + /* Not a candiate if innermode requires too much alignment. */ \ + && (MEM_ALIGN (mem) >= GET_MODE_ALIGNMENT (innermode) \ + || targetm.slow_unaligned_access (GET_MODE (mem), \ + MEM_ALIGN (mem)) \ + || !targetm.slow_unaligned_access ((innermode), \ + MEM_ALIGN (mem)))) + + if (SUBREG_P (x) && CANDIDATE_SUBREG_P (x)) + x_inner = SUBREG_REG (x); + + if (SUBREG_P (y) && CANDIDATE_SUBREG_P (y)) + y_inner = SUBREG_REG (y); + + if (x_inner != NULL_RTX + && y_inner != NULL_RTX + && GET_MODE (x_inner) == GET_MODE (y_inner) + && !targetm.can_change_mode_class (GET_MODE (x_inner), mode, ALL_REGS)) + { + x = x_inner; + y = y_inner; + mode = GET_MODE (x_inner); + } + else if (x_inner != NULL_RTX + && MEM_P (y) + && CANDIDATE_MEM_P (GET_MODE (x_inner), y)) + { + x = x_inner; + y = adjust_address (y, GET_MODE (x_inner), 0); + mode = GET_MODE (x_inner); + } + else if (y_inner != NULL_RTX + && MEM_P (x) + && CANDIDATE_MEM_P (GET_MODE (y_inner), x)) + { + x = adjust_address (x, GET_MODE (y_inner), 0); + y = y_inner; + mode = GET_MODE (y_inner); + } + else if (x_inner != NULL_RTX + && CONSTANT_P (y) + && !targetm.can_change_mode_class (GET_MODE (x_inner), + mode, ALL_REGS) + && (y_inner = simplify_subreg (GET_MODE (x_inner), y, mode, 0))) + { + x = x_inner; + y = y_inner; + mode = GET_MODE (x_inner); + } + if (CONSTANT_P (y)) { if (optimize diff --git a/gcc/testsuite/gcc.target/aarch64/pr95254.c b/gcc/testsuite/gcc.target/aarch64/pr95254.c new file mode 100644 index 00000000000..10bfc868197 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr95254.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-slp-vectorize -march=armv8.2-a+sve -msve-vector-bits=256" } */ + +typedef short __attribute__((vector_size (8))) v4hi; + +typedef union U4HI { v4hi v; short a[4]; } u4hi; + +short b[4]; + +void pass_v4hi (v4hi v) +{ + int i; + u4hi u; + u.v = v; + for (i = 0; i < 4; i++) + b[i] = u.a[i]; +}; + +/* { dg-final { scan-assembler-not "ptrue" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr67609.c b/gcc/testsuite/gcc.target/i386/pr67609.c index 518071bdd86..398cdba5d5f 100644 --- a/gcc/testsuite/gcc.target/i386/pr67609.c +++ b/gcc/testsuite/gcc.target/i386/pr67609.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2 -msse2" } */ /* { dg-require-effective-target lp64 } */ -/* { dg-final { scan-assembler "movdqa" } } */ +/* { dg-final { scan-assembler "movq\t%xmm0" } } */ #include __m128d reg; -- Gitee From b8b3e29e4cceae2bab6e0774b1af994dbe713d97 Mon Sep 17 00:00:00 2001 From: zhanghaijian Date: Thu, 15 Jul 2021 09:13:11 +0800 Subject: [PATCH 10/13] [Backport]tree-optimization/94963 - avoid bogus uninit warning with store-motion Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=371905d12259c180efb9b1f1b5716e969feb60f9 Eliding the load for store-motion causes an uninitialized variable flowing into the loop, conditionally initialized and used. The uninit warning cannot relate the flag used to guard the initialization and use with the actual initialization so the following robustifies the previous approach of marking the conditional store as not to be warned on by instead initializing the variable on loop entry from an uninitialized variable we mark as not to be warned for. --- gcc/testsuite/gcc.dg/pr94963.c | 35 ++++++++++++++++++++++++++++++++++ gcc/tree-ssa-loop-im.c | 18 ++++++++++------- 2 files changed, 46 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr94963.c diff --git a/gcc/testsuite/gcc.dg/pr94963.c b/gcc/testsuite/gcc.dg/pr94963.c new file mode 100644 index 00000000000..09c0524fb3a --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr94963.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -Wall" } */ + +typedef struct +{ + int p1; + int p2; + int p3; +} P; +struct S +{ + int field; +}; +extern int v2; +extern void foo (struct S *map); +static struct S var; +const P *pv; +int ps; +void +f (void) +{ + if (pv != 0) + for (const P *ph = pv; ph < &pv[ps]; ++ph) + switch (ph->p1) + { + case 1: + v2 = ph->p2; + break; + case 2: + var.field = ph->p3; + break; + } + if (var.field != 0) /* { dg-bogus "uninitialized" } */ + foo (&var); +} diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index 8c33735b1fa..d74a46ef352 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -1994,8 +1994,6 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag, gsi = gsi_start_bb (then_bb); /* Insert actual store. */ stmt = gimple_build_assign (unshare_expr (mem), tmp_var); - /* Make sure to not warn about maybe-uninit uses of tmp_var here. */ - gimple_set_no_warning (stmt, true); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); edge e1 = single_succ_edge (new_bb); @@ -2149,13 +2147,19 @@ execute_sm (class loop *loop, vec exits, im_mem_ref *ref) store then. */ if ((!always_stored && !multi_threaded_model_p) || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num))) + load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref)); + else { - load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref)); - lim_data = init_lim_data (load); - lim_data->max_loop = loop; - lim_data->tgt_loop = loop; - gsi_insert_before (&gsi, load, GSI_SAME_STMT); + /* If not emitting a load mark the uninitialized state on the + loop entry as not to be warned for. */ + tree uninit = create_tmp_reg (TREE_TYPE (tmp_var)); + TREE_NO_WARNING (uninit) = 1; + load = gimple_build_assign (tmp_var, uninit); } + lim_data = init_lim_data (load); + lim_data->max_loop = loop; + lim_data->tgt_loop = loop; + gsi_insert_before (&gsi, load, GSI_SAME_STMT); if (multi_threaded_model_p) { -- Gitee From 78cf3b95d7b895cfe8d6f1c2a48ebc08a662eef0 Mon Sep 17 00:00:00 2001 From: bule Date: Sat, 17 Jul 2021 16:38:10 +0800 Subject: [PATCH 11/13] [simdmath] Enable 64-bits simd when test simd_pcs_attribute-3 Enable 64-bits simd when test simd_pcs_attribute-3. The 64-bits simd is default to off without specify the -msimdmath-64. --- gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c index 95f6a6803e8..e0e0efa9d7e 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c +++ b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-Ofast" } */ +/* { dg-options "-Ofast -msimdmath-64" } */ __attribute__ ((__simd__)) __attribute__ ((__nothrow__ , __leaf__ , __const__)) -- Gitee From 26ea42402eede6a441c9d74ec6b6086e5bf0bf79 Mon Sep 17 00:00:00 2001 From: bule Date: Mon, 19 Jul 2021 12:04:08 +0800 Subject: [PATCH 12/13] [fp-model] Enable fp-model on kunpeng Enable fp-model options on kunpeng for precision control. --- gcc/common.opt | 26 +++++ gcc/config/aarch64/aarch64-linux.h | 3 +- gcc/flag-types.h | 9 ++ gcc/fortran/options.c | 8 ++ gcc/opts-common.c | 146 ++++++++++++++++++++++++++++- gcc/opts.c | 68 ++++++++++++++ 6 files changed, 256 insertions(+), 4 deletions(-) diff --git a/gcc/common.opt b/gcc/common.opt index 55d4eb5a351..79c9ef6615b 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1545,6 +1545,32 @@ ffp-int-builtin-inexact Common Report Var(flag_fp_int_builtin_inexact) Init(1) Optimization Allow built-in functions ceil, floor, round, trunc to raise \"inexact\" exceptions. +fftz +Common Report Var(flag_ftz) Optimization +Control fpcr register for flush to zero. + +fp-model= +Common Joined RejectNegative Enum(fp_model) Var(flag_fp_model) Init(FP_MODEL_NORMAL) Optimization +-fp-model=[normal|fast|precise|except|strict] Perform floating-point precision control. + +Enum +Name(fp_model) Type(enum fp_model) UnknownError(unknown floating point precision model %qs) + +EnumValue +Enum(fp_model) String(normal) Value(FP_MODEL_NORMAL) + +EnumValue +Enum(fp_model) String(fast) Value(FP_MODEL_FAST) + +EnumValue +Enum(fp_model) String(precise) Value(FP_MODEL_PRECISE) + +EnumValue +Enum(fp_model) String(except) Value(FP_MODEL_EXCEPT) + +EnumValue +Enum(fp_model) String(strict) Value(FP_MODEL_STRICT) + ; Nonzero means don't put addresses of constant functions in registers. ; Used for compiling the Unix kernel, where strange substitutions are ; done on the assembly output. diff --git a/gcc/config/aarch64/aarch64-linux.h b/gcc/config/aarch64/aarch64-linux.h index e587e2e9ad6..331b12c8702 100644 --- a/gcc/config/aarch64/aarch64-linux.h +++ b/gcc/config/aarch64/aarch64-linux.h @@ -50,7 +50,8 @@ #define LINK_SPEC LINUX_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC #define GNU_USER_TARGET_MATHFILE_SPEC \ - "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}" + "%{Ofast|ffast-math|funsafe-math-optimizations|fp-model=fast|fftz:\ + %{!fno-ftz:crtfastmath.o%s}}" #undef ENDFILE_SPEC #define ENDFILE_SPEC \ diff --git a/gcc/flag-types.h b/gcc/flag-types.h index 852ea76eaa2..5832298251e 100644 --- a/gcc/flag-types.h +++ b/gcc/flag-types.h @@ -223,6 +223,15 @@ enum fp_contract_mode { FP_CONTRACT_FAST = 2 }; +/* Floating-point precision mode. */ +enum fp_model { + FP_MODEL_NORMAL = 0, + FP_MODEL_FAST = 1, + FP_MODEL_PRECISE = 2, + FP_MODEL_EXCEPT = 3, + FP_MODEL_STRICT = 4 +}; + /* Scalar storage order kind. */ enum scalar_storage_order_kind { SSO_NATIVE = 0, diff --git a/gcc/fortran/options.c b/gcc/fortran/options.c index 4cc8a908417..c59dcf63781 100644 --- a/gcc/fortran/options.c +++ b/gcc/fortran/options.c @@ -250,6 +250,7 @@ form_from_filename (const char *filename) return f_form; } +static void gfc_handle_fpe_option (const char *arg, bool trap); /* Finalize commandline options. */ @@ -277,6 +278,13 @@ gfc_post_options (const char **pfilename) if (flag_protect_parens == -1) flag_protect_parens = !optimize_fast; + /* If fp-model=precise/strict, turn on all ffpe-trap and ffpe-summary. */ + if (flag_fp_model == FP_MODEL_EXCEPT || flag_fp_model == FP_MODEL_STRICT) + { + gfc_handle_fpe_option ("all", false); + gfc_handle_fpe_option ("invalid,zero,overflow,underflow", true); + } + /* -Ofast sets implies -fstack-arrays unless an explicit size is set for stack arrays. */ if (flag_stack_arrays == -1 && flag_max_stack_var_size == -2) diff --git a/gcc/opts-common.c b/gcc/opts-common.c index de9510abd64..bf82b05c8a2 100644 --- a/gcc/opts-common.c +++ b/gcc/opts-common.c @@ -26,7 +26,8 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic.h" #include "spellcheck.h" -static void prune_options (struct cl_decoded_option **, unsigned int *); +static void prune_options (struct cl_decoded_option **, unsigned int *, + unsigned int); /* An option that is undocumented, that takes a joined argument, and that doesn't fit any of the classes of uses (language/common, @@ -988,7 +989,7 @@ decode_cmdline_options_to_array (unsigned int argc, const char **argv, *decoded_options = opt_array; *decoded_options_count = num_decoded_options; - prune_options (decoded_options, decoded_options_count); + prune_options (decoded_options, decoded_options_count, lang_mask); } /* Return true if NEXT_OPT_IDX cancels OPT_IDX. Return false if the @@ -1009,11 +1010,109 @@ cancel_option (int opt_idx, int next_opt_idx, int orig_next_opt_idx) return false; } +/* Check whether opt_idx exists in decoded_options array bewteen index + start and end. If found, return its index in decoded_options, + else return end. */ +static unsigned int +find_opt_idx (const struct cl_decoded_option *decoded_options, + unsigned int decoded_options_count, + unsigned int start, unsigned int end, unsigned int opt_idx) +{ + gcc_assert (end <= decoded_options_count); + gcc_assert (opt_idx < cl_options_count); + unsigned int k; + for (k = start; k < end; k++) + { + if (decoded_options[k].opt_index == opt_idx) + { + return k; + } + } + return k; +} + +/* remove the opt_index element from decoded_options array. */ +static unsigned int +remove_option (struct cl_decoded_option *decoded_options, + unsigned int decoded_options_count, + unsigned int opt_index) +{ + gcc_assert (opt_index < decoded_options_count); + unsigned int i; + for (i = opt_index; i < decoded_options_count - 1; i++) + { + decoded_options[i] = decoded_options[i + 1]; + } + return decoded_options_count - 1; +} + +/* Handle the priority between fp-model, Ofast, and + ffast-math. */ +static unsigned int +handle_fp_model_driver (struct cl_decoded_option *decoded_options, + unsigned int decoded_options_count, + unsigned int fp_model_index, + unsigned int lang_mask) +{ + struct cl_decoded_option fp_model_opt = decoded_options[fp_model_index]; + enum fp_model model = (enum fp_model) fp_model_opt.value; + if (model == FP_MODEL_PRECISE || model == FP_MODEL_STRICT) + { + /* If found Ofast, override Ofast with O3. */ + unsigned int Ofast_index; + Ofast_index = find_opt_idx (decoded_options, decoded_options_count, + 0, decoded_options_count, OPT_Ofast); + while (Ofast_index != decoded_options_count) + { + const char *tmp_argv = "-O3"; + decode_cmdline_option (&tmp_argv, lang_mask, + &decoded_options[Ofast_index]); + warning (0, "%<-Ofast%> is degraded to %<-O3%> due to %qs", + fp_model_opt.orig_option_with_args_text); + Ofast_index = find_opt_idx (decoded_options, decoded_options_count, + 0, decoded_options_count, OPT_Ofast); + } + /* If found ffast-math before fp-model=precise/strict + it, cancel it. */ + unsigned int ffast_math_index; + ffast_math_index + = find_opt_idx (decoded_options, decoded_options_count, 0, + fp_model_index, OPT_ffast_math); + if (ffast_math_index != fp_model_index) + { + decoded_options_count + = remove_option (decoded_options, decoded_options_count, + ffast_math_index); + warning (0, "%<-ffast-math%> before %qs is canceled", + fp_model_opt.orig_option_with_args_text); + } + } + if (model == FP_MODEL_FAST) + { + /* If found -fno-fast-math after fp-model=fast, cancel this one. */ + unsigned int fno_fast_math_index; + fno_fast_math_index + = find_opt_idx (decoded_options, decoded_options_count, fp_model_index, + decoded_options_count, OPT_ffast_math); + if (fno_fast_math_index != decoded_options_count + && decoded_options[fno_fast_math_index].value == 0) + { + decoded_options_count + = remove_option (decoded_options, decoded_options_count, + fp_model_index); + warning (0, + "%<-fp-model=fast%> before %<-fno-fast-math%> is canceled"); + } + } + return decoded_options_count; +} + /* Filter out options canceled by the ones after them. */ static void prune_options (struct cl_decoded_option **decoded_options, - unsigned int *decoded_options_count) + unsigned int *decoded_options_count, + unsigned int lang_mask) { unsigned int old_decoded_options_count = *decoded_options_count; struct cl_decoded_option *old_decoded_options = *decoded_options; @@ -1024,7 +1123,12 @@ prune_options (struct cl_decoded_option **decoded_options, const struct cl_option *option; unsigned int fdiagnostics_color_idx = 0; + if (!diagnostic_ready_p ()) + diagnostic_initialize (global_dc, 0); + /* Remove arguments which are negated by others after them. */ + + unsigned int fp_model_index = old_decoded_options_count; new_decoded_options_count = 0; for (i = 0; i < old_decoded_options_count; i++) { @@ -1048,6 +1152,34 @@ prune_options (struct cl_decoded_option **decoded_options, fdiagnostics_color_idx = i; continue; + case OPT_fp_model_: + /* Only the last fp-model option will take effect. */ + unsigned int next_fp_model_idx; + next_fp_model_idx = find_opt_idx (old_decoded_options, + old_decoded_options_count, + i + 1, + old_decoded_options_count, + OPT_fp_model_); + if (next_fp_model_idx != old_decoded_options_count) + { + /* Found more than one fp-model, cancel this one. */ + if (old_decoded_options[i].value + != old_decoded_options[next_fp_model_idx].value) + { + warning (0, "%qs is overrided by %qs", + old_decoded_options[i]. + orig_option_with_args_text, + old_decoded_options[next_fp_model_idx]. + orig_option_with_args_text); + } + break; + } + else + { + /* Found the last fp-model option. */ + fp_model_index = new_decoded_options_count; + } + /* FALLTHRU. */ default: gcc_assert (opt_idx < cl_options_count); option = &cl_options[opt_idx]; @@ -1087,6 +1219,14 @@ keep: break; } } + if (fp_model_index < new_decoded_options_count) + { + new_decoded_options_count + = handle_fp_model_driver (new_decoded_options, + new_decoded_options_count, + fp_model_index, + lang_mask); + } if (fdiagnostics_color_idx >= 1) { diff --git a/gcc/opts.c b/gcc/opts.c index e31aa560564..6924a973a5b 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -195,6 +195,7 @@ static void set_debug_level (enum debug_info_type type, int extended, struct gcc_options *opts_set, location_t loc); static void set_fast_math_flags (struct gcc_options *opts, int set); +static void set_fp_model_flags (struct gcc_options *opts, int set); static void decode_d_option (const char *arg, struct gcc_options *opts, location_t loc, diagnostic_context *dc); static void set_unsafe_math_optimizations_flags (struct gcc_options *opts, @@ -2482,6 +2483,10 @@ common_handle_option (struct gcc_options *opts, set_fast_math_flags (opts, value); break; + case OPT_fp_model_: + set_fp_model_flags (opts, value); + break; + case OPT_funsafe_math_optimizations: set_unsafe_math_optimizations_flags (opts, value); break; @@ -2908,6 +2913,69 @@ set_fast_math_flags (struct gcc_options *opts, int set) } } +/* Handle fp-model options. */ +static void +set_fp_model_flags (struct gcc_options *opts, int set) +{ + enum fp_model model = (enum fp_model) set; + switch (model) + { + case FP_MODEL_FAST: + /* Equivalent to open ffast-math. */ + set_fast_math_flags (opts, 1); + break; + + case FP_MODEL_PRECISE: + /* Equivalent to close ffast-math. */ + set_fast_math_flags (opts, 0); + /* Turn on -frounding-math -fsignaling-nans. */ + if (!opts->frontend_set_flag_signaling_nans) + opts->x_flag_signaling_nans = 1; + if (!opts->frontend_set_flag_rounding_math) + opts->x_flag_rounding_math = 1; + opts->x_flag_expensive_optimizations = 0; + opts->x_flag_code_hoisting = 0; + opts->x_flag_predictive_commoning = 0; + opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF; + break; + + case FP_MODEL_EXCEPT: + if (!opts->frontend_set_flag_signaling_nans) + opts->x_flag_signaling_nans = 1; + if (!opts->frontend_set_flag_errno_math) + opts->x_flag_errno_math = 1; + if (!opts->frontend_set_flag_trapping_math) + opts->x_flag_trapping_math = 1; + opts->x_flag_fp_int_builtin_inexact = 1; + /* Also turn on ffpe-trap in fortran. */ + break; + + case FP_MODEL_STRICT: + /* Turn on both precise and except. */ + if (!opts->frontend_set_flag_signaling_nans) + opts->x_flag_signaling_nans = 1; + if (!opts->frontend_set_flag_rounding_math) + opts->x_flag_rounding_math = 1; + opts->x_flag_expensive_optimizations = 0; + opts->x_flag_code_hoisting = 0; + opts->x_flag_predictive_commoning = 0; + if (!opts->frontend_set_flag_errno_math) + opts->x_flag_errno_math = 1; + if (!opts->frontend_set_flag_trapping_math) + opts->x_flag_trapping_math = 1; + opts->x_flag_fp_int_builtin_inexact = 1; + opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF; + break; + + case FP_MODEL_NORMAL: + /* Do nothing. */ + break; + + default: + gcc_unreachable (); + } +} + /* When -funsafe-math-optimizations is set the following flags are set as well. */ static void -- Gitee From 0d14a2b7a3defc82ed16c99a18c2bc2e6be9f5b1 Mon Sep 17 00:00:00 2001 From: xiezhiheng Date: Fri, 16 Jul 2021 23:21:38 -0400 Subject: [PATCH 13/13] [LoopElim] Redundant loop elimination optimization Introduce redundant loop elimination optimization controlled by -floop-elim. And it's often used with -ffinite-loops. --- gcc/common.opt | 4 + gcc/tree-ssa-phiopt.c | 448 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 452 insertions(+) diff --git a/gcc/common.opt b/gcc/common.opt index 79c9ef6615b..b2b0aac7fdf 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1169,6 +1169,10 @@ fcompare-elim Common Report Var(flag_compare_elim_after_reload) Optimization Perform comparison elimination after register allocation has finished. +floop-elim +Common Report Var(flag_loop_elim) Init(0) Optimization +Perform redundant loop elimination. + fconserve-stack Common Var(flag_conserve_stack) Optimization Do not perform optimizations increasing noticeably stack usage. diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c index 3b5b6907679..591b6435f78 100644 --- a/gcc/tree-ssa-phiopt.c +++ b/gcc/tree-ssa-phiopt.c @@ -69,6 +69,7 @@ static hash_set * get_non_trapping (); static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree); static void hoist_adjacent_loads (basic_block, basic_block, basic_block, basic_block); +static bool do_phiopt_pattern (basic_block, basic_block, basic_block); static bool gate_hoist_loads (void); /* This pass tries to transform conditional stores into unconditional @@ -257,6 +258,10 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) hoist_adjacent_loads (bb, bb1, bb2, bb3); continue; } + else if (flag_loop_elim && do_phiopt_pattern (bb, bb1, bb2)) + { + continue; + } else continue; @@ -2819,6 +2824,449 @@ hoist_adjacent_loads (basic_block bb0, basic_block bb1, } } +static bool check_uses (tree, hash_set *); + +/* Check SSA_NAME is used in + if (SSA_NAME == 0) + ... + or + if (SSA_NAME != 0) + ... +*/ +static bool +check_uses_cond (const_tree ssa_name, gimple *stmt, + hash_set *hset ATTRIBUTE_UNUSED) +{ + tree_code code = gimple_cond_code (stmt); + if (code != EQ_EXPR && code != NE_EXPR) + { + return false; + } + + tree lhs = gimple_cond_lhs (stmt); + tree rhs = gimple_cond_rhs (stmt); + if ((lhs == ssa_name && integer_zerop (rhs)) + || (rhs == ssa_name && integer_zerop (lhs))) + { + return true; + } + + return false; +} + +/* Check SSA_NAME is used in + _tmp = SSA_NAME == 0; + or + _tmp = SSA_NAME != 0; + or + _tmp = SSA_NAME | _tmp2; +*/ +static bool +check_uses_assign (const_tree ssa_name, gimple *stmt, hash_set *hset) +{ + tree_code code = gimple_assign_rhs_code (stmt); + tree lhs, rhs1, rhs2; + + switch (code) + { + case EQ_EXPR: + case NE_EXPR: + rhs1 = gimple_assign_rhs1 (stmt); + rhs2 = gimple_assign_rhs2 (stmt); + if ((rhs1 == ssa_name && integer_zerop (rhs2)) + || (rhs2 == ssa_name && integer_zerop (rhs1))) + { + return true; + } + break; + + case BIT_IOR_EXPR: + lhs = gimple_assign_lhs (stmt); + if (hset->contains (lhs)) + { + return false; + } + /* We should check the use of _tmp further. */ + return check_uses (lhs, hset); + + default: + break; + } + return false; +} + +/* Check SSA_NAME is used in + # result = PHI +*/ +static bool +check_uses_phi (const_tree ssa_name, gimple *stmt, hash_set *hset) +{ + for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++) + { + tree arg = gimple_phi_arg_def (stmt, i); + if (!integer_zerop (arg) && arg != ssa_name) + { + return false; + } + } + + tree result = gimple_phi_result (stmt); + + /* It is used to avoid infinite recursion, + + if (cond) + goto + else + goto + + + # _tmp2 = PHI <0 (bb 1), _tmp3 (bb 3)> + {BODY} + if (cond) + goto + else + goto + + + # _tmp3 = PHI <0 (bb 1), _tmp2 (bb 2)> + {BODY} + if (cond) + goto + else + goto + + + ... + */ + if (hset->contains (result)) + { + return false; + } + + return check_uses (result, hset); +} + +/* Check the use of SSA_NAME, it should only be used in comparison + operation and PHI node. HSET is used to record the ssa_names + that have been already checked. */ +static bool +check_uses (tree ssa_name, hash_set *hset) +{ + imm_use_iterator imm_iter; + use_operand_p use_p; + + if (TREE_CODE (ssa_name) != SSA_NAME) + { + return false; + } + + if (SSA_NAME_VAR (ssa_name) + && is_global_var (SSA_NAME_VAR (ssa_name))) + { + return false; + } + + hset->add (ssa_name); + + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_name) + { + gimple *stmt = USE_STMT (use_p); + + /* Ignore debug gimple statements. */ + if (is_gimple_debug (stmt)) + { + continue; + } + + switch (gimple_code (stmt)) + { + case GIMPLE_COND: + if (!check_uses_cond (ssa_name, stmt, hset)) + { + return false; + } + break; + + case GIMPLE_ASSIGN: + if (!check_uses_assign (ssa_name, stmt, hset)) + { + return false; + } + break; + + case GIMPLE_PHI: + if (!check_uses_phi (ssa_name, stmt, hset)) + { + return false; + } + break; + + default: + return false; + } + } + return true; +} + +static bool +check_def_gimple (gimple *def1, gimple *def2, const_tree result) +{ + /* def1 and def2 should be POINTER_PLUS_EXPR. */ + if (!is_gimple_assign (def1) || !is_gimple_assign (def2) + || gimple_assign_rhs_code (def1) != POINTER_PLUS_EXPR + || gimple_assign_rhs_code (def2) != POINTER_PLUS_EXPR) + { + return false; + } + + tree rhs12 = gimple_assign_rhs2 (def1); + + tree rhs21 = gimple_assign_rhs1 (def2); + tree rhs22 = gimple_assign_rhs2 (def2); + + if (rhs21 != result) + { + return false; + } + + /* We should have a positive pointer-plus constant to ensure + that the pointer value is continuously increasing. */ + if (TREE_CODE (rhs12) != INTEGER_CST || TREE_CODE (rhs22) != INTEGER_CST + || compare_tree_int (rhs12, 0) <= 0 || compare_tree_int (rhs22, 0) <= 0) + { + return false; + } + + return true; +} + +static bool +check_loop_body (basic_block bb0, basic_block bb2, const_tree result) +{ + gimple *g01 = first_stmt (bb0); + if (!g01 || !is_gimple_assign (g01) + || gimple_assign_rhs_code (g01) != MEM_REF + || TREE_OPERAND (gimple_assign_rhs1 (g01), 0) != result) + { + return false; + } + + gimple *g02 = g01->next; + /* GIMPLE_COND would be the last gimple in a basic block, + and have no other side effects on RESULT. */ + if (!g02 || gimple_code (g02) != GIMPLE_COND) + { + return false; + } + + if (first_stmt (bb2) != last_stmt (bb2)) + { + return false; + } + + return true; +} + +/* Pattern is like +
+   arg1 = base (rhs11) + cst (rhs12); [def1]
+   goto 
+
+   
+   arg2 = result (rhs21) + cst (rhs22); [def2]
+
+   
+   # result = PHI 
+   _v = *result;  [g01]
+   if (_v == 0)   [g02]
+     goto 
+   else
+     goto 
+
+   
+   _1 = result - base;     [g1]
+   _2 = _1 /[ex] cst;      [g2]
+   _3 = (unsigned int) _2; [g3]
+   if (_3 == 0)
+   ...
+*/
+static bool
+check_bb_order (basic_block bb0, basic_block &bb1, basic_block &bb2,
+		gphi *phi_stmt, gimple *&output)
+{
+  /* Start check from PHI node in BB0.  */
+  if (gimple_phi_num_args (phi_stmt) != 2
+      || virtual_operand_p (gimple_phi_result (phi_stmt)))
+    {
+      return false;
+    }
+
+  tree result = gimple_phi_result (phi_stmt);
+  tree arg1 = gimple_phi_arg_def (phi_stmt, 0);
+  tree arg2 = gimple_phi_arg_def (phi_stmt, 1);
+
+  if (TREE_CODE (arg1) != SSA_NAME
+      || TREE_CODE (arg2) != SSA_NAME
+      || SSA_NAME_IS_DEFAULT_DEF (arg1)
+      || SSA_NAME_IS_DEFAULT_DEF (arg2))
+    {
+      return false;
+    }
+
+  gimple *def1 = SSA_NAME_DEF_STMT (arg1);
+  gimple *def2 = SSA_NAME_DEF_STMT (arg2);
+
+  /* Swap bb1 and bb2 if pattern is like
+     if (_v != 0)
+       goto 
+     else
+       goto 
+  */
+  if (gimple_bb (def2) == bb1 && EDGE_SUCC (bb1, 0)->dest == bb0)
+    {
+      std::swap (bb1, bb2);
+    }
+
+  /* prebb[def1] --> bb0 <-- bb2[def2] */
+  if (!gimple_bb (def1)
+      || EDGE_SUCC (gimple_bb (def1), 0)->dest != bb0
+      || gimple_bb (def2) != bb2 || EDGE_SUCC (bb2, 0)->dest != bb0)
+    {
+      return false;
+    }
+
+  /* Check whether define gimple meets the pattern requirements.  */
+  if (!check_def_gimple (def1, def2, result))
+    {
+      return false;
+    }
+
+  if (!check_loop_body (bb0, bb2, result))
+    {
+      return false;
+    }
+
+  output = def1;
+  return true;
+}
+
+/* Check pattern
+   
+   _1 = result - base;     [g1]
+   _2 = _1 /[ex] cst;      [g2]
+   _3 = (unsigned int) _2; [g3]
+   if (_3 == 0)
+   ...
+*/
+static bool
+check_gimple_order (basic_block bb1, const_tree base, const_tree cst,
+		    const_tree result, gimple *&output)
+{
+  gimple *g1 = first_stmt (bb1);
+  if (!g1 || !is_gimple_assign (g1)
+      || gimple_assign_rhs_code (g1) != POINTER_DIFF_EXPR
+      || gimple_assign_rhs1 (g1) != result
+      || gimple_assign_rhs2 (g1) != base)
+    {
+      return false;
+    }
+
+  gimple *g2 = g1->next;
+  if (!g2 || !is_gimple_assign (g2)
+      || gimple_assign_rhs_code (g2) != EXACT_DIV_EXPR
+      || gimple_assign_lhs (g1) != gimple_assign_rhs1 (g2)
+      || TREE_CODE (gimple_assign_rhs2 (g2)) != INTEGER_CST)
+    {
+      return false;
+    }
+
+  /* INTEGER_CST cst in gimple def1.  */
+  HOST_WIDE_INT num1 = TREE_INT_CST_LOW (cst);
+  /* INTEGER_CST cst in gimple g2.  */
+  HOST_WIDE_INT num2 = TREE_INT_CST_LOW (gimple_assign_rhs2 (g2));
+  /* _2 must be at least a positive number.  */
+  if (num2 == 0 || num1 / num2 <= 0)
+    {
+      return false;
+    }
+
+  gimple *g3 = g2->next;
+  if (!g3 || !is_gimple_assign (g3)
+      || gimple_assign_rhs_code (g3) != NOP_EXPR
+      || gimple_assign_lhs (g2) != gimple_assign_rhs1 (g3)
+      || TREE_CODE (gimple_assign_lhs (g3)) != SSA_NAME)
+    {
+      return false;
+    }
+
+  /* _3 should only be used in comparison operation or PHI node.  */
+  hash_set *hset = new hash_set;
+  if (!check_uses (gimple_assign_lhs (g3), hset))
+    {
+      delete hset;
+      return false;
+    }
+  delete hset;
+
+  output = g3;
+  return true;
+}
+
+static bool
+do_phiopt_pattern (basic_block bb0, basic_block bb1, basic_block bb2)
+{
+  gphi_iterator gsi;
+
+  for (gsi = gsi_start_phis (bb0); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gphi *phi_stmt = gsi.phi ();
+      gimple *def1 = NULL;
+      tree base, cst, result;
+
+      if (!check_bb_order (bb0, bb1, bb2, phi_stmt, def1))
+	{
+	  continue;
+	}
+
+      base = gimple_assign_rhs1 (def1);
+      cst = gimple_assign_rhs2 (def1);
+      result = gimple_phi_result (phi_stmt);
+
+      gimple *stmt = NULL;
+      if (!check_gimple_order (bb1, base, cst, result, stmt))
+	{
+	  continue;
+	}
+
+      gcc_assert (stmt);
+
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	{
+	  fprintf (dump_file, "PHIOPT pattern optimization (1) - Rewrite:\n");
+	  print_gimple_stmt (dump_file, stmt, 0);
+	  fprintf (dump_file, "to\n");
+	}
+
+      /* Rewrite statement
+	   _3 = (unsigned int) _2;
+	 to
+	   _3 = (unsigned int) 1;
+      */
+      tree type = TREE_TYPE (gimple_assign_rhs1 (stmt));
+      gimple_assign_set_rhs1 (stmt, build_int_cst (type, 1));
+      update_stmt (stmt);
+
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	{
+	  print_gimple_stmt (dump_file, stmt, 0);
+	  fprintf (dump_file, "\n");
+	}
+
+      return true;
+    }
+  return false;
+}
+
 /* Determine whether we should attempt to hoist adjacent loads out of
    diamond patterns in pass_phiopt.  Always hoist loads if
    -fhoist-adjacent-loads is specified and the target machine has
-- 
Gitee