From 98d665c6780eeaa36c2d168a33cd3f1b0ec8974b Mon Sep 17 00:00:00 2001 From: Joe Taylor Date: Fri, 1 Feb 2019 10:47:47 -0500 Subject: [PATCH] Add subroutine clockit() and use it for profiling ft4_decode(). --- CMakeLists.txt | 1 + lib/ft4/clockit.f90 | 104 +++++++++++++++++++++++++++++++++++++++++ lib/ft4/ft4_decode.f90 | 24 +++++++++- 3 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 lib/ft4/clockit.f90 diff --git a/CMakeLists.txt b/CMakeLists.txt index c4374779e..38699265c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -394,6 +394,7 @@ set (wsjt_FSRCS lib/chkhist.f90 lib/chkmsg.f90 lib/chkss2.f90 + lib/ft4/clockit.f90 lib/ft8/compress.f90 lib/coord.f90 lib/db.f90 diff --git a/lib/ft4/clockit.f90 b/lib/ft4/clockit.f90 new file mode 100644 index 000000000..69d9690d7 --- /dev/null +++ b/lib/ft4/clockit.f90 @@ -0,0 +1,104 @@ +subroutine clockit(dname,k) + +! Times procedure number n between a call with k=0 (tstart) and with +! k=1 (tstop). Accumulates sums of these times in array ut (user time). +! Also traces all calls (for debugging purposes) if limtrace.gt.0 + + character*8 dname,name(50),space,ename + character*16 sname + logical first,on(50) + real ut(50),ut0(50),dut(50),tt(2) + integer ncall(50),nlevel(50),nparent(50) + integer onlevel(0:10) +! common/tracer/ limtrace,lu,ntimer + data first/.true./,eps/0.000001/,ntrace/0/ + data level/0/,nmax/0/,space/' '/ + data limtrace/0/,lu/29/,ntimer/1/ + save + + if(ntimer.eq.0) return + if(lu.lt.1) lu=6 + if(k.gt.1) go to 40 !Check for "all done" (k>1) + onlevel(0)=0 + + do n=1,nmax !Check for existing name + if(name(n).eq.dname) go to 20 + enddo + + nmax=nmax+1 !This is a new one + n=nmax + ncall(n)=0 + on(n)=.false. + ut(n)=eps + name(n)=dname + +20 if(k.eq.0) then !Get start times (k=0) + if(on(n)) print*,'Error in timer: ',dname,' already on.' + level=level+1 !Increment the level + on(n)=.true. + ut0(n)=etime(tt) + ncall(n)=ncall(n)+1 + if(ncall(n).gt.1.and.nlevel(n).ne.level) then + nlevel(n)=-1 + else + nlevel(n)=level + endif + nparent(n)=onlevel(level-1) + onlevel(level)=n + + else if(k.eq.1) then !Get stop times and accumulate sums. (k=1) + if(on(n)) then + on(n)=.false. + ut1=etime(tt) + ut(n)=ut(n)+ut1-ut0(n) + endif + level=level-1 + endif + + ntrace=ntrace+1 + if(ntrace.lt.limtrace) write(lu,1020) ntrace,dname,k,level,nparent(n) +1020 format(i5,': ',a8,3i5) + return + +! Write out the timer statistics + +40 open(lu,file='clockit.out',status='unknown') + write(lu,1040) +1040 format(/' name time frac dtime', & + ' dfrac calls level parent'/73('-')) + + if(k.gt.100) then + ndiv=k-100 + do i=1,nmax + ncall(i)=ncall(i)/ndiv + ut(i)=ut(i)/ndiv + enddo + endif + + total=ut(1) + sum=0. + sumf=0. + do i=1,nmax + dut(i)=ut(i) + do j=i,nmax + if(nparent(j).eq.i) dut(i)=dut(i)-ut(j) + enddo + utf=ut(i)/total + dutf=dut(i)/total + sum=sum+dut(i) + sumf=sumf+dutf + kk=nlevel(i) + sname=space(1:kk)//name(i)//space(1:8-kk) + ename=space + if(i.ge.2) ename=name(nparent(i)) + write(lu,1060) float(i),sname,ut(i),utf,dut(i),dutf, & + ncall(i),nlevel(i),ename +1060 format(f4.0,a16,2(f10.2,f6.2),i7,i5,2x,a8) + enddo + + write(lu,1070) sum,sumf +1070 format(/36x,f10.2,f6.2) + close(lu) + + return +end subroutine clockit diff --git a/lib/ft4/ft4_decode.f90 b/lib/ft4/ft4_decode.f90 index 83d9531d1..5ef9527a9 100644 --- a/lib/ft4/ft4_decode.f90 +++ b/lib/ft4/ft4_decode.f90 @@ -44,6 +44,7 @@ subroutine ft4_decode(cdatetime0,tbuf,nfa,nfb,nfqso,iwave,ndecodes,mycall, & data first/.true./ save one,first,nrxx,linex + call clockit('ft4_deco',0) hhmmss=cdatetime0(8:13) fs=12000.0/NDOWN !Sample rate after downsampling dt=1/fs !Sample interval after downsample (s) @@ -72,15 +73,20 @@ subroutine ft4_decode(cdatetime0,tbuf,nfa,nfb,nfqso,iwave,ndecodes,mycall, & fa=nfa fb=nfb + call clockit('getcand4',0) call getcandidates4(iwave,fa,fb,syncmin,nfqso,maxcand,savg,candidate, & ncand,sbase) + call clockit('getcand4',1) ndecodes=0 do icand=1,ncand f0=candidate(1,icand) xsnr=10*log10(candidate(3,icand))-18.0 if( f0.le.375.0 .or. f0.ge.(5000.0-375.0) ) cycle - call ft4_downsample(iwave,f0,cd2) ! downsample from 512 Sa/Symbol to 32 Sa/Symbol + call clockit('ft4_down',0) + call ft4_downsample(iwave,f0,cd2) !Downsample from 512 to 32 Sa/Symbol + call clockit('ft4_down',1) + sum2=sum(cd2*conjg(cd2))/(real(NMAX)/real(NDOWN)) if(sum2.gt.0.0) cd2=cd2/sqrt(sum2) ! Sample rate is now 12000/16 = 750 samples/second @@ -107,7 +113,11 @@ subroutine ft4_decode(cdatetime0,tbuf,nfa,nfb,nfqso,iwave,ndecodes,mycall, & a=0. a(1)=real(idf) ctwk=1. + call clockit('twkfreq1',0) call twkfreq1(ctwk,4*NSS,fs,a,ctwk2) + call clockit('twkfreq1',1) + + call clockit('sync4d ',0) do istart=ibmin,ibmax,ibstp call sync4d(cd2,istart,ctwk2,1,sync) !Find sync power if(sync.gt.smax) then @@ -116,14 +126,19 @@ subroutine ft4_decode(cdatetime0,tbuf,nfa,nfb,nfqso,iwave,ndecodes,mycall, & idfbest=idf endif enddo + call clockit('sync4d ',1) + enddo enddo f0=f0+real(idfbest) + call clockit('ft4down ',0) call ft4_downsample(iwave,f0,cb) !Final downsample with corrected f0 + call clockit('ft4down ',1) sum2=sum(abs(cb)**2)/(real(NSS)*NN) if(sum2.gt.0.0) cb=cb/sqrt(sum2) cd=cb(ibest:ibest+NN*NSS-1) + call clockit('four2a ',0) do k=1,NN i1=(k-1)*NSS csymb=cd(i1:i1+NSS-1) @@ -131,6 +146,7 @@ subroutine ft4_decode(cdatetime0,tbuf,nfa,nfb,nfqso,iwave,ndecodes,mycall, & cs(0:3,k)=csymb(1:4) s4(0:3,k)=abs(csymb(1:4)) enddo + call clockit('four2a ',1) ! Sync quality check is1=0 @@ -195,9 +211,11 @@ subroutine ft4_decode(cdatetime0,tbuf,nfa,nfb,nfqso,iwave,ndecodes,mycall, & enddo enddo + call clockit('normaliz',0) call normalizebmet(bmeta,2*NN) call normalizebmet(bmetb,2*NN) call normalizebmet(bmetc,2*NN) + call clockit('normaliz',1) hbits=0 where(bmeta.ge.0) hbits=1 @@ -233,8 +251,10 @@ subroutine ft4_decode(cdatetime0,tbuf,nfa,nfb,nfqso,iwave,ndecodes,mycall, & llr2=llr if(ibias.eq.1) llr2=llr+0.4 if(ibias.eq.2) llr2=llr-0.4 + call clockit('bpdecode',0) call bpdecode174_91(llr2,apmask,max_iterations,message77, & cw,nharderror,niterations) + call clockit('bpdecode',1) if(nharderror.ge.0) exit enddo if(sum(message77).eq.0) cycle @@ -286,6 +306,8 @@ subroutine ft4_decode(cdatetime0,tbuf,nfa,nfb,nfqso,iwave,ndecodes,mycall, & endif enddo !Sequence estimation enddo !Candidate list + call clockit('ft4_deco',1) + call clockit('ft4_deco',101) return entry get_ft4msg(idecode,nrx,line)