diff --git a/libm65/CMakeLists.txt b/libm65/CMakeLists.txt index a887e6a7e..f139e89f7 100644 --- a/libm65/CMakeLists.txt +++ b/libm65/CMakeLists.txt @@ -22,7 +22,8 @@ get_filename_component (Fortran_COMPILER_NAME ${CMAKE_Fortran_COMPILER} NAME) if (Fortran_COMPILER_NAME MATCHES "gfortran.*") # gfortran - set (CMAKE_Fortran_FLAGS_RELEASE "-funroll-all-loops -fno-f2c -O3") + # set (CMAKE_Fortran_FLAGS_RELEASE "-funroll-all-loops -fno-f2c -O3") + set (CMAKE_Fortran_FLAGS_RELEASE "-O2 -fbounds-check") set (CMAKE_Fortran_FLAGS_DEBUG "-fno-f2c -O0 -g") elseif (Fortran_COMPILER_NAME MATCHES "ifort.*") # ifort (untested) @@ -110,6 +111,7 @@ set (FSRCS packtext.f90 pctile.f90 pfxdump.f90 + qra64b.f90 recvpkt.f90 rfile3a.f90 s3avg.f90 diff --git a/libm65/fftw3.f90 b/libm65/fftw3.f90 new file mode 100644 index 000000000..f1c8775bc --- /dev/null +++ b/libm65/fftw3.f90 @@ -0,0 +1,64 @@ + INTEGER FFTW_R2HC + PARAMETER (FFTW_R2HC=0) + INTEGER FFTW_HC2R + PARAMETER (FFTW_HC2R=1) + INTEGER FFTW_DHT + PARAMETER (FFTW_DHT=2) + INTEGER FFTW_REDFT00 + PARAMETER (FFTW_REDFT00=3) + INTEGER FFTW_REDFT01 + PARAMETER (FFTW_REDFT01=4) + INTEGER FFTW_REDFT10 + PARAMETER (FFTW_REDFT10=5) + INTEGER FFTW_REDFT11 + PARAMETER (FFTW_REDFT11=6) + INTEGER FFTW_RODFT00 + PARAMETER (FFTW_RODFT00=7) + INTEGER FFTW_RODFT01 + PARAMETER (FFTW_RODFT01=8) + INTEGER FFTW_RODFT10 + PARAMETER (FFTW_RODFT10=9) + INTEGER FFTW_RODFT11 + PARAMETER (FFTW_RODFT11=10) + INTEGER FFTW_FORWARD + PARAMETER (FFTW_FORWARD=-1) + INTEGER FFTW_BACKWARD + PARAMETER (FFTW_BACKWARD=+1) + INTEGER FFTW_MEASURE + PARAMETER (FFTW_MEASURE=0) + INTEGER FFTW_DESTROY_INPUT + PARAMETER (FFTW_DESTROY_INPUT=1) + INTEGER FFTW_UNALIGNED + PARAMETER (FFTW_UNALIGNED=2) + INTEGER FFTW_CONSERVE_MEMORY + PARAMETER (FFTW_CONSERVE_MEMORY=4) + INTEGER FFTW_EXHAUSTIVE + PARAMETER (FFTW_EXHAUSTIVE=8) + INTEGER FFTW_PRESERVE_INPUT + PARAMETER (FFTW_PRESERVE_INPUT=16) + INTEGER FFTW_PATIENT + PARAMETER (FFTW_PATIENT=32) + INTEGER FFTW_ESTIMATE + PARAMETER (FFTW_ESTIMATE=64) + INTEGER FFTW_ESTIMATE_PATIENT + PARAMETER (FFTW_ESTIMATE_PATIENT=128) + INTEGER FFTW_BELIEVE_PCOST + PARAMETER (FFTW_BELIEVE_PCOST=256) + INTEGER FFTW_DFT_R2HC_ICKY + PARAMETER (FFTW_DFT_R2HC_ICKY=512) + INTEGER FFTW_NONTHREADED_ICKY + PARAMETER (FFTW_NONTHREADED_ICKY=1024) + INTEGER FFTW_NO_BUFFERING + PARAMETER (FFTW_NO_BUFFERING=2048) + INTEGER FFTW_NO_INDIRECT_OP + PARAMETER (FFTW_NO_INDIRECT_OP=4096) + INTEGER FFTW_ALLOW_LARGE_GENERIC + PARAMETER (FFTW_ALLOW_LARGE_GENERIC=8192) + INTEGER FFTW_NO_RANK_SPLITS + PARAMETER (FFTW_NO_RANK_SPLITS=16384) + INTEGER FFTW_NO_VRANK_SPLITS + PARAMETER (FFTW_NO_VRANK_SPLITS=32768) + INTEGER FFTW_NO_VRECURSE + PARAMETER (FFTW_NO_VRECURSE=65536) + INTEGER FFTW_NO_SIMD + PARAMETER (FFTW_NO_SIMD=131072) diff --git a/libm65/filbig.f90 b/libm65/filbig.f90 index 7ca1a92ed..12b3299d3 100644 --- a/libm65/filbig.f90 +++ b/libm65/filbig.f90 @@ -14,6 +14,7 @@ subroutine filbig(dd,nmax,nfast,f0,newdat,nfsample,xpol,c4a,c4b,n4) integer*8 plan1,plan2,plan3,plan4,plan5 logical first,xpol include 'fftw3.f' + common/cacb/ca,cb equivalence (rfilt,cfilt) data first/.true./,npatience/1/,nfast0/0/ data halfpulse/114.97547150,36.57879257,-20.93789101, & diff --git a/libm65/four2a.f90 b/libm65/four2a.f90 index e1fd72fdd..57c7239e1 100644 --- a/libm65/four2a.f90 +++ b/libm65/four2a.f90 @@ -1,101 +1,115 @@ subroutine four2a(a,nfft,ndim,isign,iform) -! IFORM = 1, 0 or -1, as data is -! complex, real, or the first half of a complex array. Transform -! values are returned in array DATA. They are complex, real, or -! the first half of a complex array, as IFORM = 1, -1 or 0. +! IFORM = 1, 0 or -1, as data is +! complex, real, or the first half of a complex array. Transform +! values are returned in array DATA. They are complex, real, or +! the first half of a complex array, as IFORM = 1, -1 or 0. -! The transform of a real array (IFORM = 0) dimensioned N(1) by N(2) -! by ... will be returned in the same array, now considered to -! be complex of dimensions N(1)/2+1 by N(2) by .... Note that if -! IFORM = 0 or -1, N(1) must be even, and enough room must be -! reserved. The missing values may be obtained by complex conjugation. +! The transform of a real array (IFORM = 0) dimensioned N(1) by N(2) +! by ... will be returned in the same array, now considered to +! be complex of dimensions N(1)/2+1 by N(2) by .... Note that if +! IFORM = 0 or -1, N(1) must be even, and enough room must be +! reserved. The missing values may be obtained by complex conjugation. -! The reverse transformation of a half complex array dimensioned -! N(1)/2+1 by N(2) by ..., is accomplished by setting IFORM -! to -1. In the N array, N(1) must be the true N(1), not N(1)/2+1. -! The transform will be real and returned to the input array. +! The reverse transformation of a half complex array dimensioned +! N(1)/2+1 by N(2) by ..., is accomplished by setting IFORM +! to -1. In the N array, N(1) must be the true N(1), not N(1)/2+1. +! The transform will be real and returned to the input array. - parameter (NPMAX=100) - parameter (NSMALL=16384) - complex a(nfft) - complex aa(NSMALL) - integer nn(NPMAX),ns(NPMAX),nf(NPMAX),nl(NPMAX) - integer*8 plan(NPMAX) !Actually should be i*8, but no matter -! character cfftw*12 - data nplan/0/,npatience/1/ - include 'fftw3.f' +! This version of four2a makes calls to the FFTW library to do the +! actual computations. + + parameter (NPMAX=2100) !Max numberf of stored plans + parameter (NSMALL=16384) !Max size of "small" FFTs + complex a(nfft) !Array to be transformed + complex aa(NSMALL) !Local copy of "small" a() + integer nn(NPMAX),ns(NPMAX),nf(NPMAX) !Params of stored plans + integer*8 nl(NPMAX),nloc !More params of plans + integer*8 plan(NPMAX) !Pointers to stored plans + logical found_plan + data nplan/0/ !Number of stored plans + common/patience/npatience,nthreads !Patience and threads for FFTW plans + include 'fftw3.f90' !FFTW definitions save plan,nplan,nn,ns,nf,nl if(nfft.lt.0) go to 999 nloc=loc(a) + + found_plan = .false. + !$omp critical(four2a_setup) do i=1,nplan if(nfft.eq.nn(i) .and. isign.eq.ns(i) .and. & - iform.eq.nf(i) .and. nloc.eq.nl(i)) go to 10 + iform.eq.nf(i) .and. nloc.eq.nl(i)) then + found_plan = .true. + exit + end if enddo -! if(nplan.ge.NPMAX) stop 'Too many FFTW plans requested.' - if(nplan.ge.NPMAX) call exit(1) - nplan=nplan+1 - i=nplan - nn(i)=nfft - ns(i)=isign - nf(i)=iform - nl(i)=nloc -! cfftw(1:2)='ci' -! if(nf(i).ne.1) cfftw(1:2)='ri' -! cfftw(3:3)='f' -! if(ns(i).eq.1) cfftw(3:3)='b' -! write(cfftw(4:),*) nn(i) -! cfftw=cfftw(1:3)//cfftw(5:) -! write(13,3999) i,nn(i),ns(i),nf(i),cfftw -!3999 format(4i10,2x,a12) -! flush(13) + if(i.ge.NPMAX) stop 'Too many FFTW plans requested.' + + if (.not. found_plan) then + nplan=nplan+1 + i=nplan + + nn(i)=nfft + ns(i)=isign + nf(i)=iform + nl(i)=nloc ! Planning: FFTW_ESTIMATE, FFTW_ESTIMATE_PATIENT, FFTW_MEASURE, ! FFTW_PATIENT, FFTW_EXHAUSTIVE - nflags=FFTW_ESTIMATE - if(npatience.eq.1) nflags=FFTW_ESTIMATE_PATIENT - if(npatience.eq.2) nflags=FFTW_MEASURE - if(npatience.eq.3) nflags=FFTW_PATIENT - if(npatience.eq.4) nflags=FFTW_EXHAUSTIVE + nflags=FFTW_ESTIMATE + if(npatience.eq.1) nflags=FFTW_ESTIMATE_PATIENT + if(npatience.eq.2) nflags=FFTW_MEASURE + if(npatience.eq.3) nflags=FFTW_PATIENT + if(npatience.eq.4) nflags=FFTW_EXHAUSTIVE - if(nfft.le.NSMALL) then - jz=nfft - if(iform.eq.0) jz=nfft/2 - do j=1,jz - aa(j)=a(j) - enddo - endif - if(isign.eq.-1 .and. iform.eq.1) then - call sfftw_plan_dft_1d(plan(i),nfft,a,a,FFTW_FORWARD,nflags) - else if(isign.eq.1 .and. iform.eq.1) then - call sfftw_plan_dft_1d(plan(i),nfft,a,a,FFTW_BACKWARD,nflags) - else if(isign.eq.-1 .and. iform.eq.0) then - call sfftw_plan_dft_r2c_1d(plan(i),nfft,a,a,nflags) - else if(isign.eq.1 .and. iform.eq.-1) then - call sfftw_plan_dft_c2r_1d(plan(i),nfft,a,a,nflags) - else -! stop 'Unsupported request in four2a' - call exit(1) - endif - i=nplan - if(nfft.le.NSMALL) then - jz=nfft - if(iform.eq.0) jz=nfft/2 - do j=1,jz - a(j)=aa(j) - enddo - endif + if(nfft.le.NSMALL) then + jz=nfft + if(iform.eq.0) jz=nfft/2 + aa(1:jz)=a(1:jz) + endif + + !$omp critical(fftw) ! serialize non thread-safe FFTW3 calls + if(isign.eq.-1 .and. iform.eq.1) then + call sfftw_plan_dft_1d(plan(i),nfft,a,a,FFTW_FORWARD,nflags) + else if(isign.eq.1 .and. iform.eq.1) then + call sfftw_plan_dft_1d(plan(i),nfft,a,a,FFTW_BACKWARD,nflags) + else if(isign.eq.-1 .and. iform.eq.0) then + call sfftw_plan_dft_r2c_1d(plan(i),nfft,a,a,nflags) + else if(isign.eq.1 .and. iform.eq.-1) then + call sfftw_plan_dft_c2r_1d(plan(i),nfft,a,a,nflags) + else + stop 'Unsupported request in four2a' + endif + !$omp end critical(fftw) + + if(nfft.le.NSMALL) then + jz=nfft + if(iform.eq.0) jz=nfft/2 + a(1:jz)=aa(1:jz) + endif + end if + !$omp end critical(four2a_setup) -10 continue call sfftw_execute(plan(i)) return -999 do i=1,nplan +999 continue + + !$omp critical(four2a) + do i=1,nplan ! The test is only to silence a compiler warning: - if(ndim.ne.-999) call sfftw_destroy_plan(plan(i)) + if(ndim.ne.-999) then + !$omp critical(fftw) ! serialize non thread-safe FFTW3 calls + call sfftw_destroy_plan(plan(i)) + !$omp end critical(fftw) + end if enddo + + nplan=0 + !$omp end critical(four2a) + return end subroutine four2a diff --git a/libm65/map65a.f90 b/libm65/map65a.f90 index 55ff2c7f8..55145abe7 100644 --- a/libm65/map65a.f90 +++ b/libm65/map65a.f90 @@ -74,7 +74,6 @@ subroutine map65a(dd,ss,savg,newdat,nutc,fcenter,ntol,idphi,nfa,nfb, & ib=nint(fb/df) + 16385 ia=max(51,ia) ib=min(32768-51,ib) - write(66,*) 'A',nqd,ia,ib km=0 nkm=1 @@ -209,7 +208,6 @@ subroutine map65a(dd,ss,savg,newdat,nutc,fcenter,ntol,idphi,nfa,nfb, & call flush(13) go to 999 endif - write(66,*) 'B',nqd,f00 call timer('decode1a',0) ifreq=i ikHz=nint(freq+0.5*(nfa+nfb)-foffset)-nfshift @@ -220,7 +218,7 @@ subroutine map65a(dd,ss,savg,newdat,nutc,fcenter,ntol,idphi,nfa,nfb, & a,dt,pol,nkv,nhist,nsum,nsave,qual,decoded) call timer('decode1a',1) if(nqd.eq.2) then - write(66,*) 'C call QRA64 decoder here...' + call qra64b(nutc,ikhz) cycle endif diff --git a/libm65/qra64b.f90 b/libm65/qra64b.f90 new file mode 100644 index 000000000..9723f2cdd --- /dev/null +++ b/libm65/qra64b.f90 @@ -0,0 +1,31 @@ +subroutine qra64b(nutc,ikhz) + + parameter (NFFT1=5376000) !56*96000 + parameter (NFFT2=336000) !56*6000 (downsampled by 1/16) + complex cx(0:NFFT2-1),cy(0:NFFT2-1) + complex ca(NFFT1),cb(NFFT1) !FFTs of raw x,y data + common/cacb/ca,cb + +!### + if(nutc.ne.-999) return +!### + + df=96000.0/NFFT1 + k0=(ikhz-75.7)*1000.0/df + nh=nfft2/2 + fac=1.0/NFFT2 + cx(0:nh)=ca(k0:k0+nh) + cx(nh+1:NFFT2-1)=ca(k0-nh+1:k0-1) + cx=fac*cx + cy(0:nh)=cb(k0:k0+nh) + cy(nh+1:NFFT2-1)=cb(k0-nh+1:k0-1) + cy=fac*cy + +! Transform back to time domain with sample rate 6000 Hz. + call four2a(cx,NFFT2,1,-1,1) + call four2a(cy,NFFT2,1,-1,1) + + write(67) nutc,cx,cy + + return +end subroutine qra64b