      program pingtestc this is a network performance ping-pong testc where one processor sends data and another echoes it backc on each iteration, the length of data sent doublesc iter = number of different message lengthsc nter = number of interations within timing loopc mter = number of times test is repeated to accumulate statistics      implicit none      integer iter, maxl, nter, mter      parameter(iter=21,maxl=2**(iter-1),nter=1,mter=10)      integer isbuf(maxl), irbuf(maxl)      real rate(iter), var(iter), rmin(iter), rmax(iter)      integer nerr(iter), mist(iter)      integer i, j, k      integer lint, idproc, nvp, id0, id1, lmsg, is, ierr, msid      real abytes, altime, time, r, anorm      character*16 label      character*36 namec common block for parallel processing      integer nproc, lgrp, mreal, mint, mcplx, istatc lgrp = current communicatorc mint = default datatype for integers      common /pparms/ nproc, lgrp, mreal, mint, mcplx      dimension istat(8)c SET MACHINE SPECIFIC DATAc lint = number of bytes in an integer      lint = 4c machine name and type      label = 'Mac G3 Cluster'c initialize data      abytes = real(2*lint*nter)*1.0e-6      do 10 i = 1, maxl      isbuf(i) = i      irbuf(i) = 0   10 continuec initialize for parallel processing      nvp = 0      call ppinit(idproc,nvp)c set ids for participating processors      id0 = 2*(idproc/2)      id1 = id0 + 1c suppress last node if number of nodes is odd      if (id1.eq.nvp) id0 = id1c perform ping-pong      altime = 0.c clear out counters      do 20 i = 1, iter      rate(i) = 0.      var(i) = 0.      rmin(i) = -1.      rmax(i) = 0.      nerr(i) = 0      mist(i) = 0   20 continue      do 60 k = 1, mter      write(name,'(i2)') k      call LOGNAME('Ping-Pong Test, Pass '//name(1:2))c loop over different message lengths      do 50 i = 1, iter      lmsg = 2**(i-1)c     if (idproc.eq.0) write (6,*) 'data length = ', lmsg      call timera(-1,'pingpong',time)      do 30 j = 1, nter      if (idproc.eq.id0) then         call MPI_SEND(isbuf,lmsg,mint,id1,i,lgrp,ierr)         call MPI_RECV(irbuf,lmsg,mint,id1,i+1,lgrp,istat,ierr)      elseif (idproc.eq.id1) then         call MPI_RECV(irbuf,lmsg,mint,id0,i,lgrp,istat,ierr)         call MPI_SEND(irbuf,lmsg,mint,id0,i+1,lgrp,ierr)      endif   30 continue      call timera(1,'pingpong',time)      if (idproc.eq.0) thenc compare result with expected sum         ierr = 0         do 40 j = 1, lmsg         if (isbuf(j).ne.irbuf(j)) ierr = ierr + 1   40    continue         nerr(i) = nerr(i) + ierrc calculate result         if (time.gt.0.) then            r = (abytes*real(lmsg))/time            rate(i) = rate(i) + r            var(i) = var(i) + r*r            if (rmin(i).lt.0) then               rmin(i) = r            else               rmin(i) = min(r,rmin(i))            endif            rmax(i) = max(r,rmax(i))         else            mist(i) = mist(i) + 1         endifc        write (6,*) 'rate(MBS) = ', r, ' errors = ', ierr         if (i.eq.1) altime = altime + time      endif   50 continue   60 continuec write out results to file      if (idproc.eq.0) then         if (mter.gt.mist(1)) then            anorm = 1./real(mter - mist(1))         else            anorm = 0.         endif         altime = 500.*altime*anorm/real(nter)         open(unit=7,file='PING.DATA',form='formatted',status='unknown')         write (7,*) '# Ping-Pong Test on ', label         write (7,*) '# Average latency(msec) = ', altime         write (7,*) 'Rate(MBS) variance ln2(data) data_length min max'     1, ' errors'         do 70 i = 1, iter         if (mter.gt.mist(i)) then            anorm = 1./real(mter - mist(i))         else            anorm = 0.         endif         rate(i) = rate(i)*anorm         var(i) = sqrt(max((var(i)*anorm - rate(i)**2),0.))         is = i - 1         lmsg = 2**is         write (7,*) rate(i), var(i), is, lmsg, rmin(i), rmax(i),nerr(i)   70    continue         close(unit=7)      endifc perform swap      altime = 0.c clear out counters      do 80 i = 1, iter      rate(i) = 0.      var(i) = 0.      rmin(i) = -1.      rmax(i) = 0.      nerr(i) = 0      mist(i) = 0   80 continue      do 120 k = 1, mter      write(name,'(i2)') k      call LOGNAME('Swap Test, Pass '//name(1:2))c loop over different message lengths      do 110 i = 1, iter      lmsg = 2**(i-1)c     if (idproc.eq.0) write (6,*) 'data length = ', lmsg      call timera(-1,'swap    ',time)      do 90 j = 1, nter      if (idproc.eq.id0) then         call MPI_IRECV(irbuf,lmsg,mint,id1,i+1,lgrp,msid,ierr)         call MPI_SEND(isbuf,lmsg,mint,id1,i,lgrp,ierr)         call MPI_WAIT(msid,istat,ierr)      elseif (idproc.eq.id1) then         call MPI_IRECV(irbuf,lmsg,mint,id0,i,lgrp,msid,ierr)         call MPI_SEND(isbuf,lmsg,mint,id0,i+1,lgrp,ierr)         call MPI_WAIT(msid,istat,ierr)      endif   90 continue      call timera(1,'swap    ',time)      if (idproc.eq.0) thenc compare result with expected sum         ierr = 0         do 100 j = 1, lmsg         if (isbuf(j).ne.irbuf(j)) ierr = ierr + 1  100    continue         nerr(i) = nerr(i) + ierrc calculate result         if (time.gt.0.) then            r = (abytes*real(lmsg))/time            rate(i) = rate(i) + r            var(i) = var(i) + r*r            if (rmin(i).lt.0) then               rmin(i) = r            else               rmin(i) = min(r,rmin(i))            endif            rmax(i) = max(r,rmax(i))         else            mist(i) = mist(i) + 1         endifc        write (6,*) 'rate(MBS) = ', r, ' errors = ', ierr         if (i.eq.1) altime = altime + time      endif  110 continue  120 continuec write out results to file      if (idproc.eq.0) then         if (mter.gt.mist(1)) then            anorm = 1./real(mter - mist(1))         else            anorm = 0.         endif         altime = 500.*altime*anorm/real(nter)         open(unit=8,file='SWAP.DATA',form='formatted',status='unknown')         write (8,*) '# Swap Test on ', label         write (8,*) '# Average latency(msec) = ', altime         write (8,*) 'Rate(MBS) variance ln2(data) data_length min max'     1, ' errors'         do 130 i = 1, iter         if (mter.gt.mist(i)) then            anorm = 1./real(mter - mist(i))         else            anorm = 0.         endif         rate(i) = rate(i)*anorm         var(i) = sqrt(max((var(i)*anorm - rate(i)**2),0.))         is = i - 1         lmsg = 2**is         write (8,*) rate(i), var(i), is, lmsg, rmin(i), rmax(i),nerr(i)  130    continue         close(unit=8)      endifc terminate parallel processing      call ppexit      stop      endc-----------------------------------------------------------------------      subroutine ppinit(idproc,nvp)c this subroutine initializes parallel processingc input: nvp, output: idprocc idproc = processor idc nvp = number of real or virtual processors requested      implicit none      integer idproc, nvpc get definition of MPI constants      include 'mpif.h'c common block for parallel processing      integer nproc, lgrp, lstat, mreal, mint, mcplxc lstat = length of status array      parameter(lstat=8)c nproc = number of real or virtual processors obtainedc lgrp = current communicatorc mreal = default datatype for realsc mint = default datatype for integersc mcplx = default datatype for complex type      common /pparms/ nproc, lgrp, mreal, mint, mcplxc local data      integer ierror, ndprec      save /pparms/c ndprec = (0,1) = (no,yes) use (normal,autodouble) precision      data ndprec /0/      if (MPI_STATUS_SIZE.gt.lstat) then         write (2,*) ' status size too small, actual/required = ', lstat     1, MPI_STATUS_SIZE         stop      endifc initialize the MPI execution environment      call MPI_INIT(ierror)      if (ierror.ne.0) stop      lgrp = MPI_COMM_WORLDc determine the rank of the calling process in the communicator      call MPI_COMM_RANK(lgrp,idproc,ierror)c determine the size of the group associated with a communicator      call MPI_COMM_SIZE(lgrp,nproc,ierror)c set default datatypes         mint = MPI_INTEGERc single precision      if (ndprec.eq.0) then         mreal = MPI_REAL         mcplx = MPI_COMPLEXc double precision      else         mreal = MPI_DOUBLE_PRECISION         mcplx = MPI_DOUBLE_COMPLEX      endif      if (nvp.eq.0) then         nvp = nprocc requested number of processors not obtained      elseif (nproc.ne.nvp) then         write (2,*) ' processor number error: nvp, nproc=', nvp, nproc         call ppexit         stop      endif      return      endc-----------------------------------------------------------------------      subroutine ppexitc this subroutine terminates parallel processing      implicit nonec common block for parallel processing      integer nproc, lgrp, mreal, mint, mcplxc lgrp = current communicator      common /pparms/ nproc, lgrp, mreal, mint, mcplx      integer ierrorc synchronize processes      call MPI_BARRIER(lgrp,ierror)c terminate MPI execution environment      call MPI_FINALIZE(ierror)      return      endc-----------------------------------------------------------------------      subroutine timera(icntrl,chr,time)c this subroutine performs timingc input: icntrl, chrc icntrl = (-1,0,1) = (initialize,ignore,read) clockc clock should be initialized before it is read!c chr = character variable for labeling timingsc time = elapsed time in secondsc written for mpi      implicit none      integer icntrl      character*8 chr      real timec get definition of MPI constants      include 'mpif.h'c common block for parallel processing      integer nproc, lgrp, mreal, mint, mcplxc lgrp = current communicatorc mreal = default datatype for reals      common /pparms/ nproc, lgrp, mreal, mint, mcplxc local data      integer idproc, ierr      real nclock, mclock      double precision jclock      save jclock   91 format (1x,a8,1x,'max/min real time = ',e14.7,1x,e14.7,1x,'sec')      data jclock /0.0d0/      if (icntrl.eq.0) return      if (icntrl.eq.1) go to 10c initialize clock      call MPI_BARRIER(lgrp,ierr)      jclock = MPI_WTIME()      returnc read clock and write time difference from last clock initialization   10 nclock = real(MPI_WTIME() - jclock)      call MPI_ALLREDUCE(nclock,time,1,mreal,MPI_MIN,lgrp,ierr)      mclock = time      call MPI_ALLREDUCE(nclock,time,1,mreal,MPI_MAX,lgrp,ierr)      call MPI_COMM_RANK(lgrp,idproc,ierr)c     if (idproc.eq.0) write (6,91) chr, time, mclock      return      end