diff --git a/lib/decoder.f90 b/lib/decoder.f90 index 44aba257b..855d76746 100644 --- a/lib/decoder.f90 +++ b/lib/decoder.f90 @@ -1,12 +1,7 @@ subroutine decoder(ss,id2) use prog_args - - !$ interface - !$ subroutine omp_set_dynamic (flag) - !$ logical flag - !$ end subroutine omp_set_dynamic - !$ end interface + !$ use omp_lib include 'constants.f90' real ss(184,NSMAX) @@ -17,6 +12,9 @@ subroutine decoder(ss,id2) common/npar/nutc,ndiskdat,ntrperiod,nfqso,newdat,npts8,nfa,nfsplit,nfb, & ntol,kin,nzhsym,nsave,nagain,ndepth,ntxmode,nmode,datetime common/tracer/limtrace,lu + integer onlevel(0:10) + common/tracer_priv/level,onlevel + !$omp threadprivate(/tracer_priv/) save nfreqs0=0 @@ -43,7 +41,7 @@ subroutine decoder(ss,id2) ntol65=20 !$ call omp_set_dynamic(.true.) - !$omp parallel sections num_threads(2) + !$omp parallel sections num_threads(2) copyin(/tracer_priv/) !$omp section if(nmode.eq.65 .or. (nmode.gt.65 .and. ntxmode.eq.65)) then diff --git a/lib/timer.f90 b/lib/timer.f90 index 3534c87c3..9fc4840da 100644 --- a/lib/timer.f90 +++ b/lib/timer.f90 @@ -1,141 +1,194 @@ -subroutine timer(dname,k) - -! Times procedure number n between a call with k=0 (tstart) and with -! k=1 (tstop). Accumulates sums of these times in array ut (user time). -! Also traces all calls (for debugging purposes) if limtrace.gt.0 - - !$ interface - !$ integer function omp_get_thread_num() - !$ end function - !$ end interface - - parameter (MAXCALL=100) - character*8 dname - !$ character thread - character*11 tname,ename - character*16 sname - character*11, save :: space - integer, save :: level, nmax - character*11, save :: name(MAXCALL) - logical, save :: on(MAXCALL) - real, save :: ut(MAXCALL),ut0(MAXCALL),dut(MAXCALL) - integer, save :: ncall(MAXCALL),nlevel(MAXCALL),nparent(MAXCALL) - integer, save :: onlevel(0:10) - common/tracer/ limtrace,lu - data eps/0.000001/,ntrace/0/ - data level/0/,nmax/0/,space/' '/ - data limtrace/0/,lu/-1/ - !$omp threadprivate(level,space,onlevel) - -! ! currently this module is broken if called from multiple threads -! !$ return ! diable if usinh OpenMP - - tname=dname - !$ write(thread,'(i1)') omp_get_thread_num() - !$ tname=trim(dname)//'('//thread//')' ! decorate name with thread number - - !$omp critical(timer) - if(limtrace.lt.0) go to 999 - if(lu.lt.1) lu=6 - if(k.gt.1) go to 40 !Check for "all done" (k>1) - onlevel(0)=0 - - do n=1,nmax !Check for existing name - if(name(n).eq.tname) go to 20 - enddo - - nmax=nmax+1 !This is a new one - n=nmax - ncall(n)=0 - on(n)=.false. - ut(n)=eps - name(n)=tname - -20 if(k.eq.0) then !Get start times (k=0) - if(on(n)) print*,'Error in timer: ',tname,' already on.' - level=level+1 !Increment the level - on(n)=.true. -! call system_clock(icount,irate) -! ut0(n)=float(icount)/irate -! call cpu_time(ut0(n)) - ut0(n)=secnds(0.0) - - ncall(n)=ncall(n)+1 - if(ncall(n).gt.1.and.nlevel(n).ne.level) then - nlevel(n)=-1 - else - nlevel(n)=level - endif - nparent(n)=onlevel(level-1) - onlevel(level)=n - - else if(k.eq.1) then !Get stop times and accumulate sums. (k=1) - if(on(n)) then - on(n)=.false. -! call system_clock(icount,irate) -! ut1=float(icount)/irate -! call cpu_time(ut1) - ut1=secnds(0.0) - - ut(n)=ut(n)+ut1-ut0(n) - endif - level=level-1 - endif - - ntrace=ntrace+1 - if(ntrace.lt.limtrace) write(lu,1020) ntrace,tname,k,level,nparent(n) -1020 format(i8,': ',a11,3i5) - go to 998 - -! Write out the timer statistics - -40 write(lu,1040) -1040 format(/' name time frac dtime', & - ' dfrac calls level parent'/73('-')) - - if(k.gt.100) then - ndiv=k-100 - do i=1,nmax - ncall(i)=ncall(i)/ndiv - ut(i)=ut(i)/ndiv - enddo - endif - - total=ut(1) - sum=0. - sumf=0. - do i=1,nmax - dut(i)=ut(i) - do j=i,nmax - if(nparent(j).eq.i) dut(i)=dut(i)-ut(j) - enddo - if(dut(i).lt.0.0) dut(i)=0.0 - utf=ut(i)/total - dutf=dut(i)/total - sum=sum+dut(i) - sumf=sumf+dutf - kk=nlevel(i) - sname=space(1:kk)//name(i)//space(1:11-kk) - ename=space - if(nparent(i).ge.1) ename=name(nparent(i)) - write(lu,1060) float(i),sname,ut(i),utf,dut(i),dutf, & - ncall(i),nlevel(i),ename -1060 format(f4.0,a16,2(f10.3,f6.2),i7,i5,2x,a11) - enddo - - write(lu,1070) sum,sumf -1070 format(/36x,f10.2,f6.2) - nmax=0 - eps=0.000001 - ntrace=0 - level=0 - space=' ' - onlevel(0)=0 - -998 flush(lu) - -999 continue - - !$omp end critical(timer) - return -end subroutine timer +subroutine timer(dname,k) + +! Times procedure number n between a call with k=0 (tstart) and with +! k=1 (tstop). Accumulates sums of these times in array ut (user time). +! Also traces all calls (for debugging purposes) if limtrace.gt.0 +! +! If this is used with OpenMP than the /tracer_priv/ common block must +! be copyed into each thread of a thread team by using the copyin() +! clause on the !$omp parallel directive that creates the team. + + !$ use omp_lib + + character*8 dname + !$ integer tid + integer onlevel(0:10) + common/tracer/ limtrace,lu + common/tracer_priv/level,onlevel + + parameter (MAXCALL=100) + character*8 name(MAXCALL),space + logical on(MAXCALL) + real ut(MAXCALL),ut0(MAXCALL) + !$ integer ntid(MAXCALL) + integer nmax,ncall(MAXCALL),nlevel(MAXCALL),nparent(MAXCALL) + common/data/nmax,name,on,ut,ut0,dut,ntid,ncall,nlevel,nparent,total,sum,sumf,space + + data eps/0.000001/,ntrace/0/ + data level/0/,nmax/0/,space/' '/ + data limtrace/0/,lu/-1/ + + !$omp threadprivate(/tracer_priv/) + + !$omp critical(timer) + if(limtrace.lt.0) go to 999 + if(lu.lt.1) lu=6 + if(k.gt.1) go to 40 !Check for "all done" (k>1) + onlevel(0)=0 + + !$ tid=omp_get_thread_num() + do n=1,nmax !Check for existing name/parent[/thread] + if(name(n).eq.dname & + !$ .and.ntid(n).eq.tid & + ) then + if (on(n)) then + if (nparent(n).eq.onlevel(level-1)) goto 20 + else + if (nparent(n).eq.onlevel(level)) goto 20 + end if + end if + enddo + + nmax=nmax+1 !This is a new one + n=nmax + !$ ntid(n)=tid + ncall(n)=0 + on(n)=.false. + ut(n)=eps + name(n)=dname + +20 if(k.eq.0) then !Get start times (k=0) + if(on(n)) then + print*,'Error in timer: ',dname,' already on.' + end if + level=level+1 !Increment the level + on(n)=.true. +! call system_clock(icount,irate) +! ut0(n)=float(icount)/irate +! call cpu_time(ut0(n)) + ut0(n)=secnds(0.0) + + ncall(n)=ncall(n)+1 + if(ncall(n).gt.1.and.nlevel(n).ne.level) then + !recursion is happening + ! + !TODO: somehow need to account for this deeper call at the + !shallowest instance in the call chain and this needs to be + !done without incrementing anything here other than counters + !and timers + ! + nlevel(n)=-1 + else + nlevel(n)=level + endif + nparent(n)=onlevel(level-1) + onlevel(level)=n + + else if(k.eq.1) then !Get stop times and accumulate sums. (k=1) + if(on(n)) then + on(n)=.false. +! call system_clock(icount,irate) +! ut1=float(icount)/irate +! call cpu_time(ut1) + ut1=secnds(0.0) + + ut(n)=ut(n)+ut1-ut0(n) + endif + level=level-1 + endif + + ntrace=ntrace+1 + if(ntrace.lt.limtrace) write(lu,1020) ntrace,tname,k,level,nparent(n) +1020 format(i8,': ',a8,3i5) + go to 998 + +! Write out the timer statistics + +40 write(lu,1040) +1040 format(/' name time frac dtime', & + ' dfrac calls'/56('-')) + + !$ !walk backwards through the database rolling up thread data by call chain + !$ do i=nmax,1,-1 + !$ do j=1,i-1 + !$ l=j + !$ m=i + !$ do while (name(l).eq.name(m)) + !$ l=nparent(l) + !$ m=nparent(m) + !$ if (l.eq.0.or.m.eq.0) exit + !$ end do + !$ if (l.eq.0.and.m.eq.0) then + !$ !same call chain so roll up data + !$ ncall(j)=ncall(j)+ncall(i) + !$ ut(j)=ut(j)+ut(i) + !$ name(i)=space + !$ exit + !$ end if + !$ end do + !$ end do + + if(k.gt.100) then + ndiv=k-100 + do i=1,nmax + ncall(i)=ncall(i)/ndiv + ut(i)=ut(i)/ndiv + enddo + endif + + total=ut(1) + sum=0. + sumf=0. + i=1 + call print_root(i) + write(lu,1070) sum,sumf +1070 format(/32x,f10.3,f6.2) + nmax=0 + eps=0.000001 + ntrace=0 + level=0 + onlevel(0)=0 + +998 flush(lu) + +999 continue + + !$omp end critical(timer) + return +end subroutine timer + +recursive subroutine print_root(i) + character*16 sname + + common/tracer/ limtrace,lu + + parameter (MAXCALL=100) + character*8 name(MAXCALL),space + logical on(MAXCALL) + real ut(MAXCALL),ut0(MAXCALL) + !$ integer ntid(MAXCALL) + integer nmax,ncall(MAXCALL),nlevel(MAXCALL),nparent(MAXCALL) + common/data/nmax,name,on,ut,ut0,dut,ntid,ncall,nlevel,nparent,total,sum,sumf,space + + if (i.le.nmax) then + if (name(i).ne.space) then + dut=ut(i) + do j=i,nmax + if(nparent(j).eq.i) dut=dut-ut(j) + enddo + if(dut.lt.0.0) dut=0.0 + utf=ut(i)/total + dutf=dut/total + sum=sum+dut + sumf=sumf+dutf + kk=nlevel(i) + sname=space(1:kk)//name(i)//space(1:8-kk) + write(lu,2000) sname,ut(i),utf,dut,dutf,ncall(i) + do j=i,nmax + if(nparent(j).eq.i) call print_root(j) + enddo + end if + end if +2000 format(a16,2(f10.3,f6.2),i7,i5) + return +end subroutine print_root