From ad9475296770653d2062a82ec620996efcadacca Mon Sep 17 00:00:00 2001 From: Bill Somerville Date: Thu, 5 Feb 2015 22:07:19 +0000 Subject: [PATCH] Thread safe lib/timer.f90 Accounts for each traced call per thread and accumulates by rolling up calls with an identical call chain before printing the statistics. The print now accounts for function calls in their call chain so the same function will be reported more than once if it is called in different places. git-svn-id: svn+ssh://svn.code.sf.net/p/wsjt/wsjt/branches/wsjtx@4937 ab8295b8-cf94-4d9e-aec4-7959e3be5d79 --- lib/decoder.f90 | 12 +- lib/timer.f90 | 335 ++++++++++++++++++++++++++++-------------------- 2 files changed, 199 insertions(+), 148 deletions(-) diff --git a/lib/decoder.f90 b/lib/decoder.f90 index 44aba257b..855d76746 100644 --- a/lib/decoder.f90 +++ b/lib/decoder.f90 @@ -1,12 +1,7 @@ subroutine decoder(ss,id2) use prog_args - - !$ interface - !$ subroutine omp_set_dynamic (flag) - !$ logical flag - !$ end subroutine omp_set_dynamic - !$ end interface + !$ use omp_lib include 'constants.f90' real ss(184,NSMAX) @@ -17,6 +12,9 @@ subroutine decoder(ss,id2) common/npar/nutc,ndiskdat,ntrperiod,nfqso,newdat,npts8,nfa,nfsplit,nfb, & ntol,kin,nzhsym,nsave,nagain,ndepth,ntxmode,nmode,datetime common/tracer/limtrace,lu + integer onlevel(0:10) + common/tracer_priv/level,onlevel + !$omp threadprivate(/tracer_priv/) save nfreqs0=0 @@ -43,7 +41,7 @@ subroutine decoder(ss,id2) ntol65=20 !$ call omp_set_dynamic(.true.) - !$omp parallel sections num_threads(2) + !$omp parallel sections num_threads(2) copyin(/tracer_priv/) !$omp section if(nmode.eq.65 .or. (nmode.gt.65 .and. ntxmode.eq.65)) then diff --git a/lib/timer.f90 b/lib/timer.f90 index 3534c87c3..9fc4840da 100644 --- a/lib/timer.f90 +++ b/lib/timer.f90 @@ -1,141 +1,194 @@ -subroutine timer(dname,k) - -! Times procedure number n between a call with k=0 (tstart) and with -! k=1 (tstop). Accumulates sums of these times in array ut (user time). -! Also traces all calls (for debugging purposes) if limtrace.gt.0 - - !$ interface - !$ integer function omp_get_thread_num() - !$ end function - !$ end interface - - parameter (MAXCALL=100) - character*8 dname - !$ character thread - character*11 tname,ename - character*16 sname - character*11, save :: space - integer, save :: level, nmax - character*11, save :: name(MAXCALL) - logical, save :: on(MAXCALL) - real, save :: ut(MAXCALL),ut0(MAXCALL),dut(MAXCALL) - integer, save :: ncall(MAXCALL),nlevel(MAXCALL),nparent(MAXCALL) - integer, save :: onlevel(0:10) - common/tracer/ limtrace,lu - data eps/0.000001/,ntrace/0/ - data level/0/,nmax/0/,space/' '/ - data limtrace/0/,lu/-1/ - !$omp threadprivate(level,space,onlevel) - -! ! currently this module is broken if called from multiple threads -! !$ return ! diable if usinh OpenMP - - tname=dname - !$ write(thread,'(i1)') omp_get_thread_num() - !$ tname=trim(dname)//'('//thread//')' ! decorate name with thread number - - !$omp critical(timer) - if(limtrace.lt.0) go to 999 - if(lu.lt.1) lu=6 - if(k.gt.1) go to 40 !Check for "all done" (k>1) - onlevel(0)=0 - - do n=1,nmax !Check for existing name - if(name(n).eq.tname) go to 20 - enddo - - nmax=nmax+1 !This is a new one - n=nmax - ncall(n)=0 - on(n)=.false. - ut(n)=eps - name(n)=tname - -20 if(k.eq.0) then !Get start times (k=0) - if(on(n)) print*,'Error in timer: ',tname,' already on.' - level=level+1 !Increment the level - on(n)=.true. -! call system_clock(icount,irate) -! ut0(n)=float(icount)/irate -! call cpu_time(ut0(n)) - ut0(n)=secnds(0.0) - - ncall(n)=ncall(n)+1 - if(ncall(n).gt.1.and.nlevel(n).ne.level) then - nlevel(n)=-1 - else - nlevel(n)=level - endif - nparent(n)=onlevel(level-1) - onlevel(level)=n - - else if(k.eq.1) then !Get stop times and accumulate sums. (k=1) - if(on(n)) then - on(n)=.false. -! call system_clock(icount,irate) -! ut1=float(icount)/irate -! call cpu_time(ut1) - ut1=secnds(0.0) - - ut(n)=ut(n)+ut1-ut0(n) - endif - level=level-1 - endif - - ntrace=ntrace+1 - if(ntrace.lt.limtrace) write(lu,1020) ntrace,tname,k,level,nparent(n) -1020 format(i8,': ',a11,3i5) - go to 998 - -! Write out the timer statistics - -40 write(lu,1040) -1040 format(/' name time frac dtime', & - ' dfrac calls level parent'/73('-')) - - if(k.gt.100) then - ndiv=k-100 - do i=1,nmax - ncall(i)=ncall(i)/ndiv - ut(i)=ut(i)/ndiv - enddo - endif - - total=ut(1) - sum=0. - sumf=0. - do i=1,nmax - dut(i)=ut(i) - do j=i,nmax - if(nparent(j).eq.i) dut(i)=dut(i)-ut(j) - enddo - if(dut(i).lt.0.0) dut(i)=0.0 - utf=ut(i)/total - dutf=dut(i)/total - sum=sum+dut(i) - sumf=sumf+dutf - kk=nlevel(i) - sname=space(1:kk)//name(i)//space(1:11-kk) - ename=space - if(nparent(i).ge.1) ename=name(nparent(i)) - write(lu,1060) float(i),sname,ut(i),utf,dut(i),dutf, & - ncall(i),nlevel(i),ename -1060 format(f4.0,a16,2(f10.3,f6.2),i7,i5,2x,a11) - enddo - - write(lu,1070) sum,sumf -1070 format(/36x,f10.2,f6.2) - nmax=0 - eps=0.000001 - ntrace=0 - level=0 - space=' ' - onlevel(0)=0 - -998 flush(lu) - -999 continue - - !$omp end critical(timer) - return -end subroutine timer +subroutine timer(dname,k) + +! Times procedure number n between a call with k=0 (tstart) and with +! k=1 (tstop). Accumulates sums of these times in array ut (user time). +! Also traces all calls (for debugging purposes) if limtrace.gt.0 +! +! If this is used with OpenMP than the /tracer_priv/ common block must +! be copyed into each thread of a thread team by using the copyin() +! clause on the !$omp parallel directive that creates the team. + + !$ use omp_lib + + character*8 dname + !$ integer tid + integer onlevel(0:10) + common/tracer/ limtrace,lu + common/tracer_priv/level,onlevel + + parameter (MAXCALL=100) + character*8 name(MAXCALL),space + logical on(MAXCALL) + real ut(MAXCALL),ut0(MAXCALL) + !$ integer ntid(MAXCALL) + integer nmax,ncall(MAXCALL),nlevel(MAXCALL),nparent(MAXCALL) + common/data/nmax,name,on,ut,ut0,dut,ntid,ncall,nlevel,nparent,total,sum,sumf,space + + data eps/0.000001/,ntrace/0/ + data level/0/,nmax/0/,space/' '/ + data limtrace/0/,lu/-1/ + + !$omp threadprivate(/tracer_priv/) + + !$omp critical(timer) + if(limtrace.lt.0) go to 999 + if(lu.lt.1) lu=6 + if(k.gt.1) go to 40 !Check for "all done" (k>1) + onlevel(0)=0 + + !$ tid=omp_get_thread_num() + do n=1,nmax !Check for existing name/parent[/thread] + if(name(n).eq.dname & + !$ .and.ntid(n).eq.tid & + ) then + if (on(n)) then + if (nparent(n).eq.onlevel(level-1)) goto 20 + else + if (nparent(n).eq.onlevel(level)) goto 20 + end if + end if + enddo + + nmax=nmax+1 !This is a new one + n=nmax + !$ ntid(n)=tid + ncall(n)=0 + on(n)=.false. + ut(n)=eps + name(n)=dname + +20 if(k.eq.0) then !Get start times (k=0) + if(on(n)) then + print*,'Error in timer: ',dname,' already on.' + end if + level=level+1 !Increment the level + on(n)=.true. +! call system_clock(icount,irate) +! ut0(n)=float(icount)/irate +! call cpu_time(ut0(n)) + ut0(n)=secnds(0.0) + + ncall(n)=ncall(n)+1 + if(ncall(n).gt.1.and.nlevel(n).ne.level) then + !recursion is happening + ! + !TODO: somehow need to account for this deeper call at the + !shallowest instance in the call chain and this needs to be + !done without incrementing anything here other than counters + !and timers + ! + nlevel(n)=-1 + else + nlevel(n)=level + endif + nparent(n)=onlevel(level-1) + onlevel(level)=n + + else if(k.eq.1) then !Get stop times and accumulate sums. (k=1) + if(on(n)) then + on(n)=.false. +! call system_clock(icount,irate) +! ut1=float(icount)/irate +! call cpu_time(ut1) + ut1=secnds(0.0) + + ut(n)=ut(n)+ut1-ut0(n) + endif + level=level-1 + endif + + ntrace=ntrace+1 + if(ntrace.lt.limtrace) write(lu,1020) ntrace,tname,k,level,nparent(n) +1020 format(i8,': ',a8,3i5) + go to 998 + +! Write out the timer statistics + +40 write(lu,1040) +1040 format(/' name time frac dtime', & + ' dfrac calls'/56('-')) + + !$ !walk backwards through the database rolling up thread data by call chain + !$ do i=nmax,1,-1 + !$ do j=1,i-1 + !$ l=j + !$ m=i + !$ do while (name(l).eq.name(m)) + !$ l=nparent(l) + !$ m=nparent(m) + !$ if (l.eq.0.or.m.eq.0) exit + !$ end do + !$ if (l.eq.0.and.m.eq.0) then + !$ !same call chain so roll up data + !$ ncall(j)=ncall(j)+ncall(i) + !$ ut(j)=ut(j)+ut(i) + !$ name(i)=space + !$ exit + !$ end if + !$ end do + !$ end do + + if(k.gt.100) then + ndiv=k-100 + do i=1,nmax + ncall(i)=ncall(i)/ndiv + ut(i)=ut(i)/ndiv + enddo + endif + + total=ut(1) + sum=0. + sumf=0. + i=1 + call print_root(i) + write(lu,1070) sum,sumf +1070 format(/32x,f10.3,f6.2) + nmax=0 + eps=0.000001 + ntrace=0 + level=0 + onlevel(0)=0 + +998 flush(lu) + +999 continue + + !$omp end critical(timer) + return +end subroutine timer + +recursive subroutine print_root(i) + character*16 sname + + common/tracer/ limtrace,lu + + parameter (MAXCALL=100) + character*8 name(MAXCALL),space + logical on(MAXCALL) + real ut(MAXCALL),ut0(MAXCALL) + !$ integer ntid(MAXCALL) + integer nmax,ncall(MAXCALL),nlevel(MAXCALL),nparent(MAXCALL) + common/data/nmax,name,on,ut,ut0,dut,ntid,ncall,nlevel,nparent,total,sum,sumf,space + + if (i.le.nmax) then + if (name(i).ne.space) then + dut=ut(i) + do j=i,nmax + if(nparent(j).eq.i) dut=dut-ut(j) + enddo + if(dut.lt.0.0) dut=0.0 + utf=ut(i)/total + dutf=dut/total + sum=sum+dut + sumf=sumf+dutf + kk=nlevel(i) + sname=space(1:kk)//name(i)//space(1:8-kk) + write(lu,2000) sname,ut(i),utf,dut,dutf,ncall(i) + do j=i,nmax + if(nparent(j).eq.i) call print_root(j) + enddo + end if + end if +2000 format(a16,2(f10.3,f6.2),i7,i5) + return +end subroutine print_root