!{\src2tex{textfont=tt}}
!!****f* ABINIT/fresid
!!
!! NAME
!! fresid
!!
!! FUNCTION
!! If option=1, compute the forces due to the residual of the potential
!! If option=2, generate approximate new density from old one,
!!  old atomic positions, and new atomic positions
!!
!! COPYRIGHT
!! Copyright (C) 1998-2007 ABINIT group (XG, MM, MT)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!! dtset <type(dataset_type)>=all input variables in this dataset
!!  | icoultrtmt=0 periodic treatment of Hartree potential, 1 use of Poisson solver
!!  | natom=number of atoms in cell.
!!  | nspden=number of spin-density components
!!  | typat(natom)=integer type for each atom in cell
!! gmet(3,3)=reciprocal space metric
!! gsqcut=cutoff value on G**2 for sphere inside fft box
!! izero=if 1, unbalanced components of Vhartree(g) are set to zero
!! kxc(nfft,nkxc)=exchange-correlation kernel, needed only if nkxc>0
!! mpi_enreg=informations about MPI parallelization
!! nfft=(effective) number of FFT grid points (for this processor)
!! ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/input_variables/vargs.htm#ngfft
!! nkxc=second dimension of the array kxc, see rhohxc.f for a description
!! ntypat=number of types of atoms in cell.
!! option=see below
!! optres=used only if option=1
!!        =0 if residual array (work) contains the potential residual
!!        =1 if residual array (work) contains the density residual
!! rhor_slice(nfft,nspden)=slices of electron density in electrons/bohr**3.
!! rprimd(3,3)=dimensional primitive translation vectors (bohr)
!! ucvol=unit cell volume (bohr**3).
!! xred_new(3,natom)=new reduced coordinates for atoms in unit cell
!! xred_old(3,natom)=old reduced coordinates for atoms in unit cell
!! znucl(ntypat)=real(dp), atomic number of atom type
!!
!! OUTPUT
!! gresid(3,natom)=forces due to the residual of the potential (f optres=0) or
!!                 the density (if optres=1)
!!
!! SIDE EFFECTS
!! work_slice(nfft,nspden)=different functions defined on slices of the fft grid:
!!  if option==1, the potential residual (resp. density residual) is input,
!!                if optres=0 (resp. optres=1)
!!  if option==2, the interpolated density is output
!!
!! NOTES
!! In order to carry out some comparisons between the sequential and parallelized/MPI_FFT code
!! some arguments and variables are changed.    
!! For example, vreswork is replaced by vreswork_slice
!!              rhor                 by rhor_slice
!!          and work                 by work_slice
!! At the beginning of this routine, the plane-waves are ditributed over all the processors.
!! In the main part, all the processors perform the same calculations over the whole FFT grid.
!! At the end, each processor gets its part of the whole FFT grid.
!! These modifications are not efficient when large FFT grids are used.
!! So they have to be considered as a first step before a comprehensive parallelization of this routine.
!!
!! PARENTS
!!      forces,prcref,scfcv
!!
!! CHILDREN
!!      atmdata,fourdp,hartre,leave_new,mean_fftr,wrtout
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

subroutine fresid(dtset,gmet,gresid,gsqcut,izero,kxc,mpi_enreg,nfft,ngfft,nkxc,&
&                  ntypat,option,optres,rhor_slice,rprimd,&
&                  ucvol,work_slice,xred_new,xred_old,znucl)

 use defs_basis
 use defs_datatypes

!This section has been created automatically by the script Abilint (TD). Do not modify these by hand.
#ifdef HAVE_FORTRAN_INTERFACES
 use interfaces_01manage_mpi
 use interfaces_11util
 use interfaces_12ffts
 use interfaces_12poisson
 use interfaces_12spacepar
 use interfaces_13xc
#endif
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: izero,nfft,nkxc,ntypat,option,optres
 real(dp),intent(in) :: gsqcut,ucvol
 type(MPI_type),intent(inout) :: mpi_enreg
 type(dataset_type),intent(in) :: dtset
!arrays
 integer,intent(in) :: ngfft(18)
 real(dp),intent(in) :: gmet(3,3),kxc(nfft,nkxc),rhor_slice(nfft,dtset%nspden)
 real(dp),intent(in) :: rprimd(3,3),xred_new(3,dtset%natom)
 real(dp),intent(in) :: xred_old(3,dtset%natom),znucl(ntypat)
 real(dp),intent(inout) :: work_slice(nfft,dtset%nspden)
 real(dp),intent(out) :: gresid(3,dtset%natom)

!Local variables-------------------------------
!real(dp), parameter :: app_remain=0.001_dp
!scalars
 integer,parameter :: natnum=110
 integer :: atmove,i1,i1_new,i1m,i1p,i2,i2_new,i2m,i2p,i3,i3_new,i3m,i3p,iatnum
 integer :: iatom,ifft,ifft_new,iloop,ind2m,ind2m3m,ind2p,ind2p3p,ind3m,ind3p
 integer :: index,index_new,ishift,ishift1,ishift2,ishift3,ispden,ixp,mshift,mu
 integer :: n1,n2,n3,nfft_tmp,nfftot,nu,quit
 real(dp),parameter :: app_remain=0.01_dp
 real(dp) :: aa,amu,bb,cc,cross,diff_rem1,diff_rem2,diff_rem3,difmag,difmag2
 real(dp) :: difmag2_fact,difmag2_part,drho1,drho1dn,drho1tot,drho1up,drho2
 real(dp) :: drho2dn,drho2tot,drho2up,drho3,drho3dn,drho3tot,drho3up,drhox00
 real(dp) :: drhox01,drhox10,drhox11,drhoxy0,drhoxy1,drhoxyz,fact,factor,g1
 real(dp) :: range,range2,rcov,rcov2,rcovm1,rdiff1,rdiff2,rdiff3,rho1_tot
 real(dp) :: rho2_tot,vresid1,vresid2,x2,xx,yy,zz
 character(len=2) :: symbol
 character(len=500) :: message
!arrays
 integer :: diff_igrid(3),igrid(3),irange(3)
 integer,allocatable :: ii(:,:)
 real(dp) :: diff_grid(3),diff_rem(3),diff_tau(3),diff_xred(3),lencp(3),qq(3)
 real(dp) :: rhosum(4),rmet(3,3),scale(3),tau(3),tsec(2)
 real(dp),allocatable :: approp(:),atmrho(:,:),rhor(:,:),rrdiff(:,:)
 real(dp),allocatable :: vreswork(:,:),vreswork_slice(:,:),work(:,:),workg(:,:)
 logical,allocatable :: my_sphere(:)

! *************************************************************************

!Define magnitude of cross product of two vectors
 cross(xx,yy,zz,aa,bb,cc)=&
&        sqrt((yy*cc-zz*bb)**2+(zz*aa-xx*cc)**2+(xx*bb-yy*aa)**2)

!DEBUG
!write(6,*)' fresid : enter'
!stop
!ENDDEBUG

 if(dtset%nspden==4 .and. option>=2)then
  write(message, '(4a)') ch10,&
&   ' fresid : ERROR -',ch10,&
    '   option=2 is not compatible with nspden=4 !'
  call wrtout(6,message,'PERS')
  call leave_new('PERS')
 end if
 if(dtset%nspden==4 .and. optres==1)then
  write(message, '(4a)') ch10,&
&   ' fresid : ERROR -',ch10,&
    '   optres=1 is not compatible with nspden=4 !'
  call wrtout(6,message,'PERS')
  call leave_new('PERS')
 end if
 if(option==1.and.optres==1.and.nkxc==0)then
  write(message, '(4a)') ch10,&
&   ' fresid : BUG -',ch10,&
    '   nkxc should be greater than zero !'
  call wrtout(6,message,'PERS')
  call leave_new('PERS')
 end if

!Compute lengths of cross products for pairs of primitive
!translation vectors (used in setting index search range below)
 lencp(1)=cross(rprimd(1,2),rprimd(2,2),rprimd(3,2),&
&               rprimd(1,3),rprimd(2,3),rprimd(3,3))
 lencp(2)=cross(rprimd(1,3),rprimd(2,3),rprimd(3,3),&
&               rprimd(1,1),rprimd(2,1),rprimd(3,1))
 lencp(3)=cross(rprimd(1,1),rprimd(2,1),rprimd(3,1),&
&               rprimd(1,2),rprimd(2,2),rprimd(3,2))

!Compute factor R1.(R2xR3)/|R2xR3| etc for 1, 2, 3
!(recall ucvol=R1.(R2xR3))
 scale(:)=ucvol/lencp(:)

!Compute metric tensor in real space rmet
 do nu=1,3
  rmet(:,nu)=rprimd(1,:)*rprimd(1,nu)+rprimd(2,:)*rprimd(2,nu)+rprimd(3,:)*rprimd(3,nu)
 end do

!If residual is a density residual, has to convert it to a potential residual
!                                   with Vresid=dV/dN.Nresid=VH(Nresid)+dVxc/dN.Nresid
 if (option==1.and.optres==1) then
  ! In present case, work_slice is the density residual in real space
  if (dtset%icoultrtmt == 0) then
    ! We work in reciprocal space, then need to move density residual
    ! to Fourier space before applying hartre().
    allocate(vreswork_slice(nfft,dtset%nspden),workg(2,nfft));qq=zero
    do ispden=1,dtset%nspden
     call fourdp(1,workg,work_slice(:,ispden),-1,mpi_enreg,nfft,ngfft,0)
     call hartre(1,gmet,gsqcut,izero,mpi_enreg,nfft,ngfft,qq,workg,vreswork_slice(:,ispden))
    end do
    deallocate(workg)
  else
    ! We work in real space, applying directly Poisson's solver.
    call PSolver_hartree(dtset, 1, nfft, ngfft, work_slice(:, ispden), rprimd, vreswork_slice(:,ispden))
  end if
  if (dtset%nspden==1) then
!$OMP PARALLEL DO PRIVATE(ifft) &
!$OMP&SHARED(nfft,kxc,vreswork_slice,work_slice)
   do ifft=1,nfft
    if (abs(kxc(ifft,1))<=tol6) vreswork_slice(ifft,1)=vreswork_slice(ifft,1)+kxc(ifft,1)*work_slice(ifft,1)
   end do
!$OMP END PARALLEL DO
  else
!$OMP PARALLEL DO PRIVATE(ifft) &
!$OMP&SHARED(nfft,kxc,vreswork_slice,work_slice)
   do ifft=1,nfft
    if (abs(kxc(ifft,1))<=tol6) vreswork_slice(ifft,1)=vreswork_slice(ifft,1)+kxc(ifft,1)*work_slice(ifft,2)
    if (abs(kxc(ifft,2))<=tol6) vreswork_slice(ifft,1)=vreswork_slice(ifft,1)+kxc(ifft,2)*(work_slice(ifft,1)-work_slice(ifft,2))
   end do
!$OMP END PARALLEL DO
!$OMP PARALLEL DO PRIVATE(ifft) &
!$OMP&SHARED(nfft,kxc,vreswork_slice,work_slice)
   do ifft=1,nfft
    if (abs(kxc(ifft,2))<=tol6) vreswork_slice(ifft,2)=vreswork_slice(ifft,2)+kxc(ifft,2)*work_slice(ifft,2)
    if (abs(kxc(ifft,3))<=tol6) vreswork_slice(ifft,2)=vreswork_slice(ifft,2)+kxc(ifft,3)*(work_slice(ifft,1)-work_slice(ifft,2))
   end do
!$OMP END PARALLEL DO
  end if
 end if

!band-FFT parallelization: For testing purposes
!Starting from now, calculations are performed on the whole FFT grid and no more on slices.
!The nfft variable becomes nfft_tmp until the end

 n1=ngfft(1)
 n2=ngfft(2)
 n3=ngfft(3)
 nfftot=n1*n2*n3
 nfft_tmp=nfftot
 allocate(rhor(nfftot,dtset%nspden),work(nfftot,dtset%nspden))
 if(mpi_enreg%mode_para=='b') then 
  call pre_scatter(rhor_slice,rhor,ngfft(1),ngfft(2),ngfft(3),mpi_enreg,'gather')
  call pre_scatter(work_slice,work,ngfft(1),ngfft(2),ngfft(3),mpi_enreg,'gather')
  if (option==1.and.optres==1) then
   allocate(vreswork(nfftot,dtset%nspden))
   call pre_scatter(vreswork_slice,vreswork,ngfft(1),ngfft(2),ngfft(3),mpi_enreg,'gather')
  end if  
 else
  rhor(:,:)=rhor_slice(:,:)
  work(:,:)=work_slice(:,:)
  if (option==1.and.optres==1) then
   allocate(vreswork(nfftot,dtset%nspden))
   vreswork(:,:)=vreswork_slice(:,:)
  end if 
 end if

 gresid(1:3,1:dtset%natom)=0.0_dp

 quit=0

!Initialize appropriation function
 allocate(approp(nfft_tmp),atmrho(nfft_tmp,dtset%nspden),my_sphere(nfft_tmp))

 approp(:)=app_remain

!First loop over atoms in unit cell : build appropriation function
!Second loop : compute forces
 do iloop=1,2

! Take into account the remaining density
  if(option==2 .and. iloop==2)then
   do ispden=1,dtset%nspden
    do ifft=1,nfft_tmp
     work(ifft,ispden)=rhor(ifft,ispden)*approp(ifft)*app_remain
    end do
   end do
   call mean_fftr(work,rhosum,mpi_enreg,nfft_tmp,nfftot,min(dtset%nspden,2))

!band-FFT parallelization: For testing purposes
!This part is not parallelized in the case of mode_para=='b'.
!All the processors perform the same calculation.  
!We divided by nproc_fft in order to "remove" the xsum_mpi made in mean_fftr
   if(mpi_enreg%mode_para=='b') rhosum(:)=rhosum(:)/mpi_enreg%nproc_fft

!  This will be used to restore proper normalization of density
   rho1_tot=rhosum(1)*nfftot
   if(dtset%nspden==2)rho2_tot=rhosum(2)*nfftot
  end if
  do iatom=1,dtset%natom

!  Get the covalent radius
   call atmdata(amu,rcov,symbol,znucl(dtset%typat(iatom)))

!  Set search range
   rcov2=rcov**2
   range=2._dp*rcov
   range2=range**2
   rcovm1=1.0_dp/rcov

!  Use range to compute an index range along R(1:3)
!  (add 1 to make sure it covers full range)
   irange(1)=1+nint((range/scale(1))*dble(ngfft(1)))
   irange(2)=1+nint((range/scale(2))*dble(ngfft(2)))
   irange(3)=1+nint((range/scale(3))*dble(ngfft(3)))

!  Allocate ii and rrdiff
   mshift=2*maxval(irange(1:3))+1
   allocate(ii(mshift,3),rrdiff(mshift,3))

!  Consider each component in turn
   do mu=1,3

!   Convert reduced coord of given atom to [0,1)
    tau(mu)=mod(xred_old(mu,iatom)+1._dp-aint(xred_old(mu,iatom)),1._dp)

!   Use tau to find nearest grid point along R(mu)
!   (igrid=0 is the origin; shift by 1 to agree with usual index)
    igrid(mu)=nint(tau(mu)*dble(ngfft(mu)))

!   Set up a counter that explore the relevant range
!   of points around the atom
    ishift=0
    do ixp=igrid(mu)-irange(mu),igrid(mu)+irange(mu)
     ishift=ishift+1
     ii(ishift,mu)=1+mod(ngfft(mu)+mod(ixp,ngfft(mu)),ngfft(mu))
     rrdiff(ishift,mu)=dble(ixp)/dble(ngfft(mu))-tau(mu)
    end do

!   If option 2, set up quantities related with the change of atomic coordinates
    if(option==2 .and. iloop==2)then
     diff_xred(mu)=xred_new(mu,iatom)-xred_old(mu,iatom)
!    Convert to [0,1)
     diff_tau(mu)=mod(diff_xred(mu)+1._dp-aint(diff_xred(mu)),1._dp)
!    Convert to [0,ngfft)
     diff_grid(mu)=diff_tau(mu)*dble(ngfft(mu))
!    Integer part
     diff_igrid(mu)=int(diff_grid(mu))
!    Compute remainder
     diff_rem(mu)=diff_grid(mu)-diff_igrid(mu)

!DEBUG
!    write(6,*)' mu,diff',mu,diff_igrid(mu),diff_rem(mu)
!ENDDEBUG

    end if

!  End loop on mu
   end do

!  May be the atom is fixed
   if(option==2 .and. iloop==2)then
    atmove=1
    if(diff_xred(1)**2+diff_xred(2)**2+diff_xred(3)**2 < 1.0d-24)then
     atmove=0
    else
     diff_rem1=diff_rem(1)
     diff_rem2=diff_rem(2)
     diff_rem3=diff_rem(3)
    end if
   end if

!  If second loop, initialize atomic density, and the variable
!  that says whether a fft point belongs to the sphere of the atom
   if(iloop==2) then
    atmrho(:,:)=0.0_dp
    my_sphere(:)=.false.
   end if

!  Conduct triple loop over restricted range of grid points for iatom

   do ishift3=1,1+2*irange(3)
!   map back to [1,ngfft(3)] for usual fortran index in unit cell
    i3=ii(ishift3,3)
    i3m=i3-1 ; if(i3==1)i3m=n3
    i3p=i3+1 ; if(i3==n3)i3p=1

!   find vector from atom location to grid point (reduced)
    rdiff3=rrdiff(ishift3,3)

    do ishift2=1,1+2*irange(2)
     i2=ii(ishift2,2)
     i2m=i2-1 ; if(i2==1)i2m=n2
     i2p=i2+1 ; if(i2==n2)i2p=1
     index=n1*(i2-1+n2*(i3-1))
     ind3m=n1*(i2-1+n2*(i3m-1))
     ind3p=n1*(i2-1+n2*(i3p-1))
     ind2m=n1*(i2m-1+n2*(i3-1))
     ind2p=n1*(i2p-1+n2*(i3-1))
     ind2p3p=n1*(i2p-1+n2*(i3p-1))

     rdiff2=rrdiff(ishift2,2)
!    Prepare the computation of difmag2
     difmag2_part=rmet(3,3)*rdiff3**2+rmet(2,2)*rdiff2**2&
&                +2.0_dp*rmet(3,2)*rdiff3*rdiff2
     difmag2_fact=2.0_dp*(rmet(3,1)*rdiff3+rmet(2,1)*rdiff2)

     do ishift1=1,1+2*irange(1)
      rdiff1=rrdiff(ishift1,1)

!     Compute (rgrid-tau-Rprim)**2
      difmag2= difmag2_part+rdiff1*(difmag2_fact+rmet(1,1)*rdiff1)

!     Only accept contribution inside defined range
!     This condition means that x, calculated below, cannot exceed 2.0_dp
      if (difmag2<range2) then

!      Will compute contribution to appropriation function based on
!      rcov2, range2 and difmag2
       i1=ii(ishift1,1)
       ifft=i1+index

       if(iloop==1)then

!       Build appropriation function
        if (difmag2<rcov2)then
         approp(ifft)=approp(ifft)+1.0_dp
        else
         difmag=sqrt(difmag2)
         xx=difmag*rcovm1
!        The following function is 1. at xx=1, 0. at xx=2, with vanishing
!        derivatives at these points.
         approp(ifft)=approp(ifft)+((2.0_dp*xx-9.0_dp)*xx+12.0_dp)*xx-4.0_dp
        end if

       else

!       Build atomic density
        if (difmag2<rcov2)then
         atmrho(ifft,1)=atmrho(ifft,1)+rhor(ifft,1)*approp(ifft)
         if(dtset%nspden==2)atmrho(ifft,2)=atmrho(ifft,2)+rhor(ifft,2)*approp(ifft)
        else
         difmag=sqrt(difmag2)
         xx=difmag*rcovm1
         fact=((2.0_dp*xx-9.0_dp)*xx+12.0_dp)*xx-4.0_dp
         atmrho(ifft,1)=atmrho(ifft,1)+rhor(ifft,1)*fact*approp(ifft)
         if(dtset%nspden==2)&
&            atmrho(ifft,2)=atmrho(ifft,2)+rhor(ifft,2)*fact*approp(ifft)
        end if

!       Compute the sphere of the atom : it is different for
!       option 1 and for option 2
        i1p=i1+1 ; if(i1==n1)i1p=1
        if(option==1)then
         i1m=i1-1 ; if(i1==1)i1m=n1
         my_sphere(ifft)=.true.
         my_sphere(i1p+index)=.true. ; my_sphere(i1m+index)=.true.
         my_sphere(i1+ind2p)=.true. ; my_sphere(i1+ind2m)=.true.
         my_sphere(i1+ind3p)=.true. ; my_sphere(i1+ind3m)=.true.
        else
         my_sphere(ifft)=.true. ; my_sphere(i1p+index)=.true.
         my_sphere(i1+ind2p)=.true. ; my_sphere(i1p+ind2p)=.true.
         my_sphere(i1+ind3p)=.true. ; my_sphere(i1p+ind3p)=.true.
         my_sphere(i1+ind2p3p)=.true. ; my_sphere(i1p+ind2p3p)=.true.
        end if

       end if

!     End of condition on the range
      end if

!    End loop on ishift1
     end do

!   End loop on ishift2
    end do

!  End loop on ishift3
   end do

!  At the end of the second loop for each atom, compute the force
!  from the atomic densities, or translate density.
!  In the first case, use a two-point finite-difference approximation,
!  since this calculation serves only to decrease the error,
!  and should not be very accurate, but fast.
!  In the second case, using a crude trilinear interpolation scheme
!  for the same reason.
!
!  The section is skipped if option==2 and the atom is fixed
   if(iloop==2 .and. (option==1 .or. atmove==1) )then

    do i3=1,n3
     i3m=i3-1 ; if(i3==1)i3m=n3
     i3p=i3+1 ; if(i3==n3)i3p=1
     i3_new=i3+diff_igrid(3) ; if(i3_new > n3)i3_new=i3_new-n3
     do i2=1,n2
      i2m=i2-1 ; if(i2==1)i2m=n2
      i2p=i2+1 ; if(i2==n2)i2p=1
      i2_new=i2+diff_igrid(2) ; if(i2_new > n2)i2_new=i2_new-n2
      index=n1*(i2-1+n2*(i3-1))
      index_new=n1*(i2_new-1+n2*(i3_new-1))
      ind3m=n1*(i2-1+n2*(i3m-1))
      ind3p=n1*(i2-1+n2*(i3p-1))
      ind2m=n1*(i2m-1+n2*(i3-1))
      ind2p=n1*(i2p-1+n2*(i3-1))
      ind2m3m=n1*(i2m-1+n2*(i3m-1))
      do i1=1,n1
       ifft=i1+index
       if(my_sphere(ifft))then

        i1m=i1-1 ; if(i1==1)i1m=n1

        if(option==1)then
!        Treat option 1 : computation of residual forces
         i1p=i1+1 ; if(i1==n1)i1p=1
!        Distinguish spin-unpolarized and spin-polarized
         if(dtset%nspden==1)then
!         Note that the factor needed to obtain a true finite difference
!         estimation of the derivative will be applied afterwards, for speed
          drho1=atmrho(i1p+index,1)-atmrho(i1m+index,1)
          drho2=atmrho(i1+ind2p,1) -atmrho(i1+ind2m,1)
          drho3=atmrho(i1+ind3p,1) -atmrho(i1+ind3m,1)
          if (optres==0) then
           vresid1=work(ifft,1)
          else
           vresid1=vreswork(ifft,1)
          end if
          gresid(1,iatom)=gresid(1,iatom)+drho1*vresid1
          gresid(2,iatom)=gresid(2,iatom)+drho2*vresid1
          gresid(3,iatom)=gresid(3,iatom)+drho3*vresid1
         else
          drho1tot=atmrho(i1p+index,1)-atmrho(i1m+index,1)
          drho2tot=atmrho(i1+ind2p,1) -atmrho(i1+ind2m,1)
          drho3tot=atmrho(i1+ind3p,1) -atmrho(i1+ind3m,1)
          drho1up=atmrho(i1p+index,2)-atmrho(i1m+index,2)
          drho2up=atmrho(i1+ind2p,2) -atmrho(i1+ind2m,2)
          drho3up=atmrho(i1+ind3p,2) -atmrho(i1+ind3m,2)
          drho1dn=drho1tot-drho1up
          drho2dn=drho2tot-drho2up
          drho3dn=drho3tot-drho3up
          if (optres==0) then
           vresid1=work(ifft,1)
           vresid2=work(ifft,2)
          else
           vresid1=vreswork(ifft,1)
           vresid2=vreswork(ifft,2)
          end if
          gresid(1,iatom)=gresid(1,iatom)+drho1up*vresid1+drho1dn*vresid2
          gresid(2,iatom)=gresid(2,iatom)+drho2up*vresid1+drho2dn*vresid2
          gresid(3,iatom)=gresid(3,iatom)+drho3up*vresid1+drho3dn*vresid2
         end if

!       Treat the case option==2 now : trilinear interpolation of the density
        else
         i1_new=i1+diff_igrid(1) ; if(i1_new > n1)i1_new=i1_new-n1
         ifft_new=i1_new+index_new
         drhox00=(atmrho(i1m+index,1)-atmrho(i1+index,1))*diff_rem1+&
&                                            atmrho(i1+index,1)
         drhox10=(atmrho(i1m+ind2m,1)-atmrho(i1+ind2m,1))*diff_rem1+&
&                                            atmrho(i1+ind2m,1)
         drhox01=(atmrho(i1m+ind3m,1)-atmrho(i1+ind3m,1))*diff_rem1+&
&                                            atmrho(i1+ind3m,1)
         drhox11=(atmrho(i1m+ind2m3m,1)-atmrho(i1+ind2m3m,1))*diff_rem1+&
&                                            atmrho(i1+ind2m3m,1)
         drhoxy0=(drhox10-drhox00)*diff_rem2+drhox00
         drhoxy1=(drhox11-drhox01)*diff_rem2+drhox01
         drhoxyz=(drhoxy1-drhoxy0)*diff_rem3+drhoxy0
         work(ifft_new,1)=work(ifft_new,1)+drhoxyz
         rho1_tot=rho1_tot+drhoxyz

         if(dtset%nspden==2)then
          drhox00=(atmrho(i1m+index,2)-atmrho(i1+index,2))*diff_rem1+&
&                                             atmrho(i1+index,2)
          drhox10=(atmrho(i1m+ind2m,2)-atmrho(i1+ind2m,2))*diff_rem1+&
&                                             atmrho(i1+ind2m,2)
          drhox01=(atmrho(i1m+ind3m,2)-atmrho(i1+ind3m,2))*diff_rem1+&
&                                             atmrho(i1+ind3m,2)
          drhox11=(atmrho(i1m+ind2m3m,2)-atmrho(i1+ind2m3m,2))*diff_rem1+&
&                                             atmrho(i1+ind2m3m,2)
          drhoxy0=(drhox10-drhox00)*diff_rem2+drhox00
          drhoxy1=(drhox11-drhox01)*diff_rem2+drhox01
          drhoxyz=(drhoxy1-drhoxy0)*diff_rem3+drhoxy0
          work(ifft_new,2)=work(ifft_new,2)+drhoxyz
          rho2_tot=rho2_tot+drhoxyz
         end if
        end if

!      End condition of belonging to the sphere of influence of the atom
       end if
      end do
     end do
    end do
!   The finite-difference factor applied here also take
!   into account diverse factors
    fact=-ucvol/dble(nfftot)
    gresid(1,iatom)=gresid(1,iatom)*dble(n1)*.5_dp*fact
    gresid(2,iatom)=gresid(2,iatom)*dble(n2)*.5_dp*fact
    gresid(3,iatom)=gresid(3,iatom)*dble(n3)*.5_dp*fact

   end if

!  Update work if the atom is fixed.
   if(iloop==2 .and. option==2 .and. atmove==0)then
    do ispden=1,dtset%nspden
     do ifft=1,nfft_tmp
      work(ifft,ispden)=work(ifft,ispden)+atmrho(ifft,ispden)
     end do
    end do
    call mean_fftr(atmrho,rhosum,mpi_enreg,nfft_tmp,nfftot,min(dtset%nspden,2))

!band-FFT parallelization: For testing purposes
!This part is not parallelized in the case of mode_para=='b'.
!All the processors perform the same calculation.  
!We divided by nproc_fft in order to "remove" the xsum_mpi made in mean_fftr
    if(mpi_enreg%mode_para=='b') rhosum(:)=rhosum(:)/mpi_enreg%nproc_fft

    rho1_tot=rho1_tot+rhosum(1)*nfftot
    if(dtset%nspden==2)rho2_tot=rho2_tot+rhosum(2)*nfftot
   end if

   deallocate(ii,rrdiff)

! End loop on atoms
  end do

!DEBUG
!if(option==2)then
! if(iloop==1)then
!  write(6,*)' fresid : rhor, approp'
!  do ifft=1,n1
!   write(6,*)ifft,rhor(ifft,1),approp(ifft)
!  end do
! end if
! if(iloop==2)then
!  write(6,*)' fresid : rhor, approp, work(:,:)'
!  do ifft=1,n1
!   write(6, '(i4,3es18.8)' )ifft,rhor(ifft,1),approp(ifft),work(ifft,1)
!  end do
!  do ifft=1,nfft_tmp
!   if(work(ifft,1)<0.0_dp)then
!    write(6,*)' f_fft negative value',work(ifft,1),' for ifft=',ifft
!   end if
!   if(rhor(ifft,1)<0.0_dp)then
!    write(6,*)' rhor  negative value',rhor(ifft,1),' for ifft=',ifft
!   end if
!  end do
! end if
!end if
!ENDDEBUG

  if(quit==1)exit

! At the end of the first loop, where the appropriation function is generated,
! invert it, to save cpu time later.
  if(iloop==1)approp(:)=1.0_dp/approp(:)

!End first or second pass through atoms
 end do

!Restore proper normalisation of density
 if(option==2)then
  call mean_fftr(rhor,rhosum,mpi_enreg,nfft_tmp,nfftot,min(dtset%nspden,2))

!band-FFT parallelization: For testing purposes
!This part is not parallelized in the case of mode_para=='b'.
!All the processors perform the same calculation.  
!We divided by nproc_fft in order to "remove" the xsum_mpi made in mean_fftr
  if(mpi_enreg%mode_para=='b') rhosum(:)=rhosum(:)/mpi_enreg%nproc_fft

!"!OCL NOPREEX" to avoid zero division after optimization (-Of) by MM
!(Even if nspden=1, "1.0/rho2_tot" will appear on vpp fujitsu
!OCL NOPREEX
  do ispden=1,dtset%nspden
   if(ispden==1)factor=rhosum(1)*dble(nfftot)/rho1_tot
   if(ispden==2)factor=rhosum(2)*dble(nfftot)/rho2_tot
   work(:,ispden)=factor*work(:,ispden)
  end do
!DEBUG
! Here, zero all the hard work, for checking purposes !
! work(:,:)=rhor(:,:)
!ENDDEBUG
 end if

 deallocate(approp,atmrho,my_sphere)
 if (option==1.and.optres==1) deallocate(vreswork,vreswork_slice)

 if(mpi_enreg%mode_para=='b') then 
  call pre_scatter(work_slice,work,ngfft(1),ngfft(2),ngfft(3),mpi_enreg,'scatter')
 else
  work_slice(:,:)=work(:,:)
 end if
 deallocate(rhor,work)

!DEBUG
!write(6,*)' fresid : exit '
!ENDDEBUG

end subroutine fresid

!!***
