1 Star 0 Fork 0

lindorx / stream2

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
stream2.f 5.84 KB
一键复制 编辑 原始数据 按行查看 历史
代凡 提交于 2021-03-04 17:09 . 完成初步改写,尚未实现openmp。
* Program: STREAM2 *
* Revision: 0.1, 99.10.26 *
* Author: John McCalpin *
* john@mccalpin.com *
* Copyright 1991-2003: John D. McCalpin
* License:
* 1. You are free to use this program and/or to redistribute
* this program.
* 2. You are free to modify this program for your own use,
* including commercial use, subject to the publication
* restrictions in item 3.
* 3. You are free to publish results obtained from running this
* program, or from works that you derive from this program,
* with the following limitations:
* 3a. In order to be referred to as "STREAM2 benchmark results",
* published results must be in conformance to the STREAM
* Run Rules, (briefly reviewed below) published at
* http://www.cs.virginia.edu/stream/ref.html
* and incorporated herein by reference.
* As the copyright holder, John McCalpin retains the
* right to determine conformity with the Run Rules.
* 3b. Results based on modified source code or on runs not in
* accordance with the STREAM Run Rules must be clearly
* labelled whenever they are published. Examples of
* proper labelling include:
* "tuned STREAM2 benchmark results"
* "based on a variant of the STREAM2 benchmark code"
* Other comparable, clear and reasonable labelling is
* acceptable.
* 3c. Submission of results to the STREAM benchmark web site
* is encouraged, but not required.
* 4. Use of this program or creation of derived works based on this
* program constitutes acceptance of these licensing restrictions.
* 5. Absolutely no warranty is expressed or implied.
* This program measures sustained bandwidth *
* using four computational kernels: *
* *
* FILL: a(i) = 0 *
* COPY: a(i) = b(i) *
* DAXPY: a(i) = a(i) + q*b(i) *
* DOT: sum += a(i) * b(i) *
* *
* Results are presented in MB/s, assuming *
* 8 Bytes per iteration for FILL, *
* 16 Bytes per iteration for COPY & DOT, and *
* 24 Bytes per iteration for DAXPY *
program stream2
parameter (NMIN=30,NMAX=2 000 000)
parameter (NTIMES=10,NUMSIZES=32)
integer NPAD
parameter (NPAD=5)
real*8 a(NMAX+NPAD),b(NMAX+NPAD)
real*8 time(4,NTIMES),mysecond,scalar
real*8 sum,start,finish
real*8 sum0,sum1,sum2,sum3,sum4,sum5,sum6,sum7
real*8 rate(4),besttime(4),bytes(4)
real*8 exp,tdelta
logical ALLTIMES
integer i,j,k,l,M
external mysecond
data bytes/8,16,24,16/
data ALLTIMES/.false./
* check timer granularity
do i=1,min(10000,NMAX)
a(i) = 0.0d0
end do
do i=1,min(10000,NMAX)
a(i) = mysecond()
end do
tdelta = 1.d36
do i=1,min(10000,NMAX)-1
if (a(i+1).ne.a(i)) then
tdelta = min(tdelta,abs(a(i+1)-a(i)))
end if
end do
print *,'Smallest time delta is ',tdelta
print *,' Size Iter FILL COPY DAXPY DOT'
* Loop over problem size
exp = log10(dble(NMIN)) + dble(j-1)/dble(NUMSIZES-1)*
$ (log10(dble(NMAX))-log10(dble(NMIN)))
M = NINT(10.**exp)
* Initialize Arrays
do i=1,M
a(i) = 0.0d0
b(i) = 0.0d0
end do
do k=1,NTIMES
inner = NMAX/M
start = mysecond()
do l=1,inner
scalar = dble(k+l)
do i=1,M
a(i) = scalar
end do
end do
finish = mysecond()
time(1,k) = (finish-start)/dble(inner)
start = mysecond()
do l=1,inner
a(l) = 1.0d0
do i=1,M
b(i) = a(i)
end do
end do
finish = mysecond()
time(2,k) = (finish-start)/dble(inner)
start = mysecond()
do l=1,inner
a(l) = 1.0d0
do i=1,M
b(i) = b(i) + scalar*a(i)
end do
end do
finish = mysecond()
time(3,k) = (finish-start)/dble(inner)
start = mysecond()
do l=1,inner
b(l) = 1.0d0
sum0 = 0.0d0
sum1 = 0.0d0
sum2 = 0.0d0
sum3 = 0.0d0
sum4 = 0.0d0
sum5 = 0.0d0
sum6 = 0.0d0
sum7 = 0.0d0
do i=1,M,8
sum0 = sum0 + a(i+0)*b(i+0)
sum1 = sum1 + a(i+1)*b(i+1)
sum2 = sum2 + a(i+2)*b(i+2)
sum3 = sum3 + a(i+3)*b(i+3)
sum4 = sum4 + a(i+4)*b(i+4)
sum5 = sum5 + a(i+5)*b(i+5)
sum6 = sum6 + a(i+6)*b(i+6)
sum7 = sum7 + a(i+7)*b(i+7)
end do
end do
sum = sum0 + sum1 + sum2 + sum3
$ + sum4 + sum5 + sum6 + sum7
finish = mysecond()
time(4,k) = (finish-start)/dble(inner)
end do
do i=1,4
besttime(i) = 1.d+36
do k=1,NTIMES
besttime(i) = min(besttime(i),time(i,k))
if (ALLTIMES) print *,i,k,time(i,k)
end do
rate(i) = dble(M)* bytes(i)/besttime(i) / 1.d6
end do
write (*,1) M,NTIMES,rate(1),rate(2),rate(3),rate(4),
$ tdelta/besttime(1)
open (unit=3,form='unformatted')
write (3) sum
close (unit=3)
end do
1 format (1x,i8,2x,i4,1x,5(f8.1,2x))
马建仓 AI 助手


344bd9b3 5694891 D2dac590 5694891