/**********************************************************************
  Set_ProExpn_VNA.c:

     Set_ProExpn_VNA.c is a subroutine to calculate matrix elements and
     the derivatives of VNA projector expansion in the momentum space.

  Log of Set_ProExpn_VNA.c:

     8/Apr/2004  Released by T.Ozaki

***********************************************************************/

#define  measure_time   0

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <time.h>
#include <sys/types.h>
#include <sys/times.h>
#include <sys/time.h> 
#include <sys/stat.h>
#include <unistd.h>
#include "openmx_common.h"

#ifdef nompi
#include "mimic_mpi.h"
#else
#include "mpi.h"
#endif

static double VNA_BesselF(int Gensi, int L1, int Mul1, double R);
static double Set_ProExpn(double ****HVNA, Type_DS_VNA *****DS_VNA);
static double Set_VNA12(double ****HVNA, double *****HVNA2);

double Set_ProExpn_VNA(double ****HVNA, double *****HVNA2, Type_DS_VNA *****DS_VNA)
{
  static double time1,time2;

  /* separable form */
  time1 = Set_ProExpn(HVNA, DS_VNA);

  /* one-center (orbitals) but two-center integrals */
  time2 = Set_VNA12(HVNA, HVNA2);

  if (measure_time){
    printf("Time Set_ProExpn=%7.3f Set_VNA12=%7.3f\n",time1,time2);
  }

  return time1+time2;
}



double Set_ProExpn(double ****HVNA, Type_DS_VNA *****DS_VNA)
{
  /****************************************************
   evaluate matrix elements of neutral atom potential
   in the KB or Blochl separable form in momentum space.
  ****************************************************/
  static int firsttime=1;
  int LL,L0,L1,L2,L3,L,GL,Mul0,Mul1,M0,M1,num0,num1;
  int k,kk,kl,h_AN,Gh_AN,Rnh,Ls,m,n;
  int tno0,tno1,tno2,i,j,i1,j1,p,q,po1;
  int l,spe,Cwan,Hwan,fan,jg,kg,wakg,Lmax;
  int size_NLH,size_SumNL0,size_TmpNL;
  int Mc_AN,Mc_AN2,Gc_AN,Gc_AN2,Mj_AN,num,size1,size2;
  int *Snd_DS_VNA_Size,*Rcv_DS_VNA_Size;  
  double time0;
  int Lmax_Four_Int;
  double rcutA,rcutB,rcut,dmp;
  double siT,coT,siP,coP;
  double dx,dy,dz,Normk,kmin,kmax,Sk,Dk;
  double gant,r,theta,phi,SH[2],dSHt[2],dSHp[2];
  double **NLH;
  Type_DS_VNA *tmp_array;
  Type_DS_VNA *tmp_array2;
  double S_coordinate[3];
  double tmp0,tmp1,tmp2,tmp3,tmp4,tmp5,tmp10;
  double ****Bessel_Pro00;
  double ****Bessel_Pro01;
  double *Bessel_Pro1;
  double *Bes00;
  double *Bes01;
  double ene,sum,h,coe0,sj,sy,sjp,syp;
  double ***SumNL0;
  double ***SumNLr0;
  dcomplex sum0,sum1,sum2; 
  dcomplex *****TmpNL;
  dcomplex *****TmpNLr;
  dcomplex *****TmpNLt;
  dcomplex *****TmpNLp;
  dcomplex CY,CYt,CYp,CY1,CYt1,CYp1;
  dcomplex CsumNL0,CsumNLr,CsumNLt,CsumNLp;
  dcomplex Ctmp0,Ctmp1,Ctmp2,Cpow;
  dcomplex **CmatNL0;
  dcomplex **CmatNLr;
  dcomplex **CmatNLt;
  dcomplex **CmatNLp;
  double SphB[30][GL_Mesh];
  double SphBp[30][GL_Mesh];
  double tmp_SphB[30];
  double tmp_SphBp[30];
  double TStime,TEtime;
  int numprocs,myid,tag=999,ID,IDS,IDR;
  double Stime_atom, Etime_atom;
  int *VNA_List;
  int *VNA_List2;
  int **SP_VNA;
  int Num_RVNA;
  double stime, etime;
  double time1,time2,time3,time4,time5;

  MPI_Status stat;
  MPI_Request request;

  /* MPI */
  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);

  dtime(&TStime);

  /****************************************************
  allocation of arrays:

  int Snd_DS_VNA_Size[Num_Procs];
  int Rcv_DS_VNA_Size[Num_Procs];

  int  VNA_List[List_YOUSO[34]*(List_YOUSO[35] + 1)+2];
  int VNA_List2[List_YOUSO[34]*(List_YOUSO[35] + 1)+2];

  dcomplex  TmpNL[YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1]
                        [Num_RVNA][2*List_YOUSO[35]+1];
  dcomplex TmpNLr[YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1]
                        [Num_RVNA][2*List_YOUSO[35]+1];
  dcomplex TmpNLt[YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1]
                        [Num_RVNA][2*List_YOUSO[35]+1];
  dcomplex TmpNLp[YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1]
                        [Num_RVNA][2*List_YOUSO[35]+1];

  double  SumNL0[YOUSO25+1][YOUSO24][Num_RVNA];
  double SumNLr0[YOUSO25+1][YOUSO24][Num_RVNA];

  double NLH[List_YOUSO[7]][List_YOUSO[7]]

  dcomplex CmatNL0[2*(YOUSO25+1)+1][2*List_YOUSO[35]+1];
  dcomplex CmatNLr[2*(YOUSO25+1)+1][2*List_YOUSO[35]+1];
  dcomplex CmatNLt[2*(YOUSO25+1)+1][2*List_YOUSO[35]+1];
  dcomplex CmatNLp[2*(YOUSO25+1)+1][2*List_YOUSO[35]+1];
  ****************************************************/

  Num_RVNA = List_YOUSO[34]*(List_YOUSO[35] + 1);

  Snd_DS_VNA_Size = (int*)malloc(sizeof(int)*Num_Procs);
  Rcv_DS_VNA_Size = (int*)malloc(sizeof(int)*Num_Procs);

  VNA_List  = (int*)malloc(sizeof(int)*(List_YOUSO[34]*(List_YOUSO[35] + 1)+2) ); 
  VNA_List2 = (int*)malloc(sizeof(int)*(List_YOUSO[34]*(List_YOUSO[35] + 1)+2) ); 

  size_TmpNL = (List_YOUSO[25]+1)*List_YOUSO[24]*
               (2*(List_YOUSO[25]+1)+1)*Num_RVNA*(2*List_YOUSO[30]+1);
  TmpNL = (dcomplex*****)malloc(sizeof(dcomplex****)*(List_YOUSO[25]+1));
  for (i=0; i<(List_YOUSO[25]+1); i++){
    TmpNL[i] = (dcomplex****)malloc(sizeof(dcomplex***)*List_YOUSO[24]);
    for (j=0; j<List_YOUSO[24]; j++){
      TmpNL[i][j] = (dcomplex***)malloc(sizeof(dcomplex**)*(2*(List_YOUSO[25]+1)+1));
      for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
        TmpNL[i][j][k] = (dcomplex**)malloc(sizeof(dcomplex*)*Num_RVNA);
        for (l=0; l<Num_RVNA; l++){
          TmpNL[i][j][k][l] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
	}
      }
    }
  }

  TmpNLr = (dcomplex*****)malloc(sizeof(dcomplex****)*(List_YOUSO[25]+1));
  for (i=0; i<(List_YOUSO[25]+1); i++){
    TmpNLr[i] = (dcomplex****)malloc(sizeof(dcomplex***)*List_YOUSO[24]);
    for (j=0; j<List_YOUSO[24]; j++){
      TmpNLr[i][j] = (dcomplex***)malloc(sizeof(dcomplex**)*(2*(List_YOUSO[25]+1)+1));
      for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
        TmpNLr[i][j][k] = (dcomplex**)malloc(sizeof(dcomplex*)*Num_RVNA);
        for (l=0; l<Num_RVNA; l++){
          TmpNLr[i][j][k][l] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
	}
      }
    }
  }

  TmpNLt = (dcomplex*****)malloc(sizeof(dcomplex****)*(List_YOUSO[25]+1));
  for (i=0; i<(List_YOUSO[25]+1); i++){
    TmpNLt[i] = (dcomplex****)malloc(sizeof(dcomplex***)*List_YOUSO[24]);
    for (j=0; j<List_YOUSO[24]; j++){
      TmpNLt[i][j] = (dcomplex***)malloc(sizeof(dcomplex**)*(2*(List_YOUSO[25]+1)+1));
      for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
        TmpNLt[i][j][k] = (dcomplex**)malloc(sizeof(dcomplex*)*Num_RVNA);
        for (l=0; l<Num_RVNA; l++){
          TmpNLt[i][j][k][l] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
	}
      }
    }
  }

  TmpNLp = (dcomplex*****)malloc(sizeof(dcomplex****)*(List_YOUSO[25]+1));
  for (i=0; i<(List_YOUSO[25]+1); i++){
    TmpNLp[i] = (dcomplex****)malloc(sizeof(dcomplex***)*List_YOUSO[24]);
    for (j=0; j<List_YOUSO[24]; j++){
      TmpNLp[i][j] = (dcomplex***)malloc(sizeof(dcomplex**)*(2*(List_YOUSO[25]+1)+1));
      for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
        TmpNLp[i][j][k] = (dcomplex**)malloc(sizeof(dcomplex*)*Num_RVNA);
        for (l=0; l<Num_RVNA; l++){
          TmpNLp[i][j][k][l] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
	}
      }
    }
  }

  size_SumNL0 = (List_YOUSO[25]+1)*List_YOUSO[24]*(Num_RVNA+1);
  SumNL0 = (double***)malloc(sizeof(double**)*(List_YOUSO[25]+1));
  for (i=0; i<(List_YOUSO[25]+1); i++){
    SumNL0[i] = (double**)malloc(sizeof(double*)*List_YOUSO[24]);
    for (j=0; j<List_YOUSO[24]; j++){
      SumNL0[i][j] = (double*)malloc(sizeof(double)*Num_RVNA);
    }
  }

  SumNLr0 = (double***)malloc(sizeof(double**)*(List_YOUSO[25]+1));
  for (i=0; i<(List_YOUSO[25]+1); i++){
    SumNLr0[i] = (double**)malloc(sizeof(double*)*List_YOUSO[24]);
    for (j=0; j<List_YOUSO[24]; j++){
      SumNLr0[i][j] = (double*)malloc(sizeof(double)*Num_RVNA);
    }
  }

  size_NLH = 0;
  NLH = (double**)malloc(sizeof(double*)*List_YOUSO[7]); 
  for (i=0; i<List_YOUSO[7]; i++){
    NLH[i] = (double*)malloc(sizeof(double)*List_YOUSO[7]); 
  }

  CmatNL0 = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
  for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
    CmatNL0[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
  }

  CmatNLr = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
  for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
    CmatNLr[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
  }

  CmatNLt = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
  for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
    CmatNLt[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
  }

  CmatNLp = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
  for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
    CmatNLp[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
  }

  Bessel_Pro1 = (double*)malloc(sizeof(double)*Num_RVNA); 

  Bes00 = (double*)malloc(sizeof(double)*GL_Mesh); 
  Bes01 = (double*)malloc(sizeof(double)*GL_Mesh); 

  Bessel_Pro00 = (double****)malloc(sizeof(double***)*SpeciesNum); 
  for (spe=0; spe<SpeciesNum; spe++){
    Bessel_Pro00[spe] = (double***)malloc(sizeof(double**)*(Spe_MaxL_Basis[spe]+1)); 
    for (L0=0; L0<=Spe_MaxL_Basis[spe]; L0++){
      Bessel_Pro00[spe][L0] = (double**)malloc(sizeof(double*)*Spe_Num_Basis[spe][L0]); 
      for (Mul0=0; Mul0<Spe_Num_Basis[spe][L0]; Mul0++){
        Bessel_Pro00[spe][L0][Mul0] = (double*)malloc(sizeof(double)*GL_Mesh); 
      }
    }
  }

  Bessel_Pro01 = (double****)malloc(sizeof(double***)*SpeciesNum); 
  for (spe=0; spe<SpeciesNum; spe++){
    Bessel_Pro01[spe] = (double***)malloc(sizeof(double**)*(Spe_MaxL_Basis[spe]+1)); 
    for (L0=0; L0<=Spe_MaxL_Basis[spe]; L0++){
      Bessel_Pro01[spe][L0] = (double**)malloc(sizeof(double*)*Spe_Num_Basis[spe][L0]); 
      for (Mul0=0; Mul0<Spe_Num_Basis[spe][L0]; Mul0++){
        Bessel_Pro01[spe][L0][Mul0] = (double*)malloc(sizeof(double)*GL_Mesh); 
      }
    }
  }

  SP_VNA = (int**)malloc(sizeof(int*)*(MatomnumF+1));
  for (i=0; i<(MatomnumF+1); i++){
    SP_VNA[i] = (int*)malloc(sizeof(int)*List_YOUSO[8]);
  }

  /* PrintMemory */
  if (firsttime) {
    PrintMemory("Set_ProExpn_VNA: SumNL0",sizeof(double)*size_SumNL0,NULL);
    PrintMemory("Set_ProExpn_VNA: SumNLr0",sizeof(double)*size_SumNL0,NULL);
    PrintMemory("Set_ProExpn_VNA: TmpNL", sizeof(dcomplex)*size_TmpNL,NULL);
    PrintMemory("Set_ProExpn_VNA: TmpNLr",sizeof(dcomplex)*size_TmpNL,NULL);
    PrintMemory("Set_ProExpn_VNA: TmpNLt",sizeof(dcomplex)*size_TmpNL,NULL);
    PrintMemory("Set_ProExpn_VNA: TmpNLp",sizeof(dcomplex)*size_TmpNL,NULL);
    firsttime=0;
  }

  if (measure_time){
    time1 = 0.0;
    time2 = 0.0;
    time3 = 0.0;
    time4 = 0.0;
    time5 = 0.0;
  }

  /************************************************************
   Spe_Num_RVPS[Hwan]     ->  Num_RVNA = List_YOUSO[34]*(List_YOUSO[35]+1) 
   Spe_VPS_List[Hwan][L]  ->  VNA_List[L] = 0,0,0,.., 1,1,1,.., 2,2,2,..,
   List_YOUSO[19]         ->  Num_RVNA
   List_YOUSO[30]         ->  List_YOUSO[35]
  *************************************************************/

  L = 0;
  for (i=0; i<=List_YOUSO[35]; i++){    /* max L */
    for (j=0; j<List_YOUSO[34]; j++){   /* # of radial projectors */
      VNA_List[L]  = i;
      VNA_List2[L] = j;
      L++;
    }
  }

  /************************************************************
   pre-calculate the time consuming part in the 'time1' part
  *************************************************************/

  kmin = Radial_kmin;
  kmax = PAO_Nkmax;
  Sk = kmax + kmin;
  Dk = kmax - kmin;

  for (spe=0; spe<SpeciesNum; spe++){

    for (L0=0; L0<=Spe_MaxL_Basis[spe]; L0++){
      for (Mul0=0; Mul0<Spe_Num_Basis[spe][L0]; Mul0++){
	for (i=0; i<GL_Mesh; i++){
	  Normk = GL_NormK[i];
	  tmp10 = 0.50*Dk*GL_Weight[i]*Normk*Normk;

	  Bessel_Pro00[spe][L0][Mul0][i] = RF_BesselF(spe,L0,Mul0,Normk)*tmp10;
	  Bessel_Pro01[spe][L0][Mul0][i] = Bessel_Pro00[spe][L0][Mul0][i]*Normk;
	}
      }
    }
  }

  /************************************************************
    start the main calculation
  *************************************************************/

  for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){

    dtime(&Stime_atom);

    Gc_AN = M2G[Mc_AN];
    Cwan = WhatSpecies[Gc_AN];
    rcutA = Spe_Atom_Cut1[Cwan];

    for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){

      Gh_AN = natn[Gc_AN][h_AN];        
      Rnh = ncn[Gc_AN][h_AN];
      Hwan = WhatSpecies[Gh_AN];
      rcutB = Spe_Atom_Cut1[Hwan];
      rcut = rcutA + rcutB;

      dx = Gxyz[Gc_AN][1] - Gxyz[Gh_AN][1] - atv[Rnh][1]; 
      dy = Gxyz[Gc_AN][2] - Gxyz[Gh_AN][2] - atv[Rnh][2]; 
      dz = Gxyz[Gc_AN][3] - Gxyz[Gh_AN][3] - atv[Rnh][3];

      xyz2spherical(dx,dy,dz,0.0,0.0,0.0,S_coordinate); 
      r     = S_coordinate[0];
      theta = S_coordinate[1];
      phi   = S_coordinate[2];

      /* for empty atoms or finite elemens basis */

      if (r<1.0e-10) r = 1.0e-10;

      /* precalculation of sin and cos */

      siT = sin(theta);
      coT = cos(theta);
      siP = sin(phi);
      coP = cos(phi);

      /****************************************************
         evaluate ovelap integrals <chi0|P> between PAOs
         and progectors of nonlocal VNA potentials. 
      ****************************************************/
      /****************************************************
              \int RL(k)*RL'(k)*jl(k*R) k^2 dk^3 
      ****************************************************/

      kmin = Radial_kmin;
      kmax = PAO_Nkmax;
      Sk = kmax + kmin;
      Dk = kmax - kmin;

      for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
        for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
          for (L=0; L<Num_RVNA; L++){
            L1 = VNA_List[L];
            for (M0=-L0; M0<=L0; M0++){
	      for (M1=-L1; M1<=L1; M1++){
		 TmpNL[L0][Mul0][L0+M0][L][L1+M1] = Complex(0.0,0.0); 
		TmpNLr[L0][Mul0][L0+M0][L][L1+M1] = Complex(0.0,0.0); 
		TmpNLt[L0][Mul0][L0+M0][L][L1+M1] = Complex(0.0,0.0); 
		TmpNLp[L0][Mul0][L0+M0][L][L1+M1] = Complex(0.0,0.0); 
	      }
	    }
	  } 
	}
      }

      Lmax = -10;
      for (L=0; L<Num_RVNA; L++){
	if (Lmax<VNA_List[L]) Lmax = VNA_List[L];
      }
      if (Spe_MaxL_Basis[Cwan]<Lmax)
	Lmax_Four_Int = 2*Lmax;
      else 
        Lmax_Four_Int = 2*Spe_MaxL_Basis[Cwan];

      /* calculate SphB and SphBp */

      for (i=0; i<GL_Mesh; i++){
        Normk = GL_NormK[i];
        Spherical_Bessel(Normk*r,Lmax_Four_Int,tmp_SphB,tmp_SphBp);
        for(LL=0; LL<=Lmax_Four_Int; LL++){ 
          SphB[LL][i]  = tmp_SphB[LL]; 
          SphBp[LL][i] = tmp_SphBp[LL]; 
	}
      }

      /* LL loop */
      
      for(LL=0; LL<=Lmax_Four_Int; LL++){ 

	for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
	  for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
	    for (L=0; L<Num_RVNA; L++){
	       SumNL0[L0][Mul0][L] = 0.0;
	      SumNLr0[L0][Mul0][L] = 0.0;
	    }
	  }
	}

        /* Gauss-Legendre quadrature */

        if (measure_time) dtime(&stime);

        for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
	  for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){

            for (i=0; i<GL_Mesh; i++){
              Bes00[i] = Bessel_Pro00[Cwan][L0][Mul0][i]*SphB[LL][i]; 
              Bes01[i] = Bessel_Pro01[Cwan][L0][Mul0][i]*SphBp[LL][i]; 
            }

            L = 0;
            for (L1=0; L1<=List_YOUSO[35]; L1++){ 
              for (Mul1=0; Mul1<List_YOUSO[34]; Mul1++){         

                tmp1 = 0.0;
                tmp2 = 0.0;

                for (i=0; i<GL_Mesh; i++){
                  tmp1 += Bes00[i]*Spe_VNA_Bessel[Hwan][L1][Mul1][i];
                  tmp2 += Bes01[i]*Spe_VNA_Bessel[Hwan][L1][Mul1][i];
		}

 		 SumNL0[L0][Mul0][L] = tmp1;
		SumNLr0[L0][Mul0][L] = tmp2;

                L++; 
	      }
	    }
	  }
	}

        if (measure_time){
          dtime(&etime);
          time1 += etime - stime;
	}

	/* derivatives of "on site" */

	if (h_AN==0){ 
	  for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
	    for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
	      for (L=0; L<Num_RVNA; L++){
		SumNLr0[L0][Mul0][L] = 0.0;
	      }
	    }
	  }
	}

	/****************************************************
         for "overlap",
	    sum_m 8*(-i)^{-L0+L1+l}*
	          C_{L0,-M0,L1,M1,l,m}*
	          \int RL(k)*RL'(k)*jl(k*R) k^2 dk^3,
	****************************************************/

        if (measure_time) dtime(&stime);

	for(m=-LL; m<=LL; m++){ 

	  ComplexSH(LL,m,theta,phi,SH,dSHt,dSHp);

	  SH[1]   = -SH[1];
	  dSHt[1] = -dSHt[1];
	  dSHp[1] = -dSHp[1];

	  CY  = Complex(SH[0],SH[1]);
	  CYt = Complex(dSHt[0],dSHt[1]);
	  CYp = Complex(dSHp[0],dSHp[1]);

	  for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
	    for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
	      for (L=0; L<Num_RVNA; L++){

		L1 = VNA_List[L];
		Ls = -L0 + L1 + LL;

		Cpow = Im_pow(-1,Ls);
		CY1  = Cmul(Cpow,CY);
		CYt1 = Cmul(Cpow,CYt);
		CYp1 = Cmul(Cpow,CYp);

		for (M0=-L0; M0<=L0; M0++){
		  for (M1=-L1; M1<=L1; M1++){

		    if ( (M1+m)==M0 && abs(L1-LL)<=L0 && L0<=(L1+LL) ){

		      gant = Gaunt(L0,M0,L1,M1,LL,m);

		      tmp2 = gant*SumNL0[L0][Mul0][L];

		      /* S */ 

		      TmpNL[L0][Mul0][L0+M0][L][L1+M1].r += CY1.r*tmp2;
		      TmpNL[L0][Mul0][L0+M0][L][L1+M1].i += CY1.i*tmp2;

		      /* dS/dr */ 

		      tmp0 = gant*SumNLr0[L0][Mul0][L];

		      TmpNLr[L0][Mul0][L0+M0][L][L1+M1].r += CY1.r*tmp0;
		      TmpNLr[L0][Mul0][L0+M0][L][L1+M1].i += CY1.i*tmp0;

		      /* dS/dt */ 

		      TmpNLt[L0][Mul0][L0+M0][L][L1+M1].r += CYt1.r*tmp2;
		      TmpNLt[L0][Mul0][L0+M0][L][L1+M1].i += CYt1.i*tmp2;

		      /* dS/dp */ 

		      TmpNLp[L0][Mul0][L0+M0][L][L1+M1].r += CYp1.r*tmp2;
		      TmpNLp[L0][Mul0][L0+M0][L][L1+M1].i += CYp1.i*tmp2;
		    }
		  }
		}

	      }
	    }
	  }
	} /* m */

        if (measure_time){
          dtime(&etime);
          time2 += etime - stime;
	}

      } /* LL */

      /****************************************************
                        Complex to Real
      ****************************************************/

      if (measure_time) dtime(&stime);

      num0 = 0;
      for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
	for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){

	  num1 = 0;
	  for (L=0; L<Num_RVNA; L++){
	    L1 = VNA_List[L];

	    for (M0=-L0; M0<=L0; M0++){
	      for (M1=-L1; M1<=L1; M1++){
		CsumNL0 = Complex(0.0,0.0);
		CsumNLr = Complex(0.0,0.0);
		CsumNLt = Complex(0.0,0.0);
		CsumNLp = Complex(0.0,0.0);

		for (k=-L0; k<=L0; k++){

		  Ctmp1 = Conjg(Comp2Real[L0][L0+M0][L0+k]);

		  /* S */

		  Ctmp0 = TmpNL[L0][Mul0][L0+k][L][L1+M1];
		  Ctmp2 = Cmul(Ctmp1,Ctmp0);
		  CsumNL0 = Cadd(CsumNL0,Ctmp2);

		  /* dS/dr */

		  Ctmp0 = TmpNLr[L0][Mul0][L0+k][L][L1+M1];
		  Ctmp2 = Cmul(Ctmp1,Ctmp0);
		  CsumNLr = Cadd(CsumNLr,Ctmp2);

		  /* dS/dt */

		  Ctmp0 = TmpNLt[L0][Mul0][L0+k][L][L1+M1];
		  Ctmp2 = Cmul(Ctmp1,Ctmp0);
		  CsumNLt = Cadd(CsumNLt,Ctmp2);

		  /* dS/dp */

		  Ctmp0 = TmpNLp[L0][Mul0][L0+k][L][L1+M1];
		  Ctmp2 = Cmul(Ctmp1,Ctmp0);
		  CsumNLp = Cadd(CsumNLp,Ctmp2);

		}

		CmatNL0[L0+M0][L1+M1] = CsumNL0;
		CmatNLr[L0+M0][L1+M1] = CsumNLr;
		CmatNLt[L0+M0][L1+M1] = CsumNLt;
		CmatNLp[L0+M0][L1+M1] = CsumNLp;

	      }
	    }

	    for (M0=-L0; M0<=L0; M0++){
	      for (M1=-L1; M1<=L1; M1++){

		CsumNL0 = Complex(0.0,0.0);
		CsumNLr = Complex(0.0,0.0);
		CsumNLt = Complex(0.0,0.0);
		CsumNLp = Complex(0.0,0.0);

		for (k=-L1; k<=L1; k++){

		  /* S */ 

		  CsumNL0.r += (CmatNL0[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].r
                              - CmatNL0[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].i);

		  CsumNL0.i += (CmatNL0[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].i
                              + CmatNL0[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].r);

		  /* dS/dr */ 

		  CsumNLr.r += (CmatNLr[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].r
                              - CmatNLr[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].i);

		  CsumNLr.i += (CmatNLr[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].i
                              + CmatNLr[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].r);

		  /* dS/dt */ 

		  CsumNLt.r += (CmatNLt[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].r
                              - CmatNLt[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].i);

		  CsumNLt.i += (CmatNLt[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].i
                              + CmatNLt[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].r);

		  /* dS/dp */ 

		  CsumNLp.r += (CmatNLp[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].r
                              - CmatNLp[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].i);

		  CsumNLp.i += (CmatNLp[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].i
                              + CmatNLp[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].r);

		}

		DS_VNA[0][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 8.0*(Type_DS_VNA)CsumNL0.r;

		if (h_AN!=0){

		  if (fabs(siT)<10e-14){

		    DS_VNA[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
		      8.0*(Type_DS_VNA)(siT*coP*CsumNLr.r + coT*coP/r*CsumNLt.r);

		    DS_VNA[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
		      8.0*(Type_DS_VNA)(siT*siP*CsumNLr.r + coT*siP/r*CsumNLt.r);

		    DS_VNA[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
		      8.0*(Type_DS_VNA)(coT*CsumNLr.r - siT/r*CsumNLt.r);
		  }

		  else{

		    DS_VNA[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
		      8.0*(Type_DS_VNA)(siT*coP*CsumNLr.r + coT*coP/r*CsumNLt.r
			   - siP/siT/r*CsumNLp.r);

		    DS_VNA[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
		      8.0*(Type_DS_VNA)(siT*siP*CsumNLr.r + coT*siP/r*CsumNLt.r
			   + coP/siT/r*CsumNLp.r);

		    DS_VNA[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
		      8.0*(Type_DS_VNA)(coT*CsumNLr.r - siT/r*CsumNLt.r);
		  }

		}

		else{
		  DS_VNA[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
		  DS_VNA[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
		  DS_VNA[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
		} 

	      }
	    }

	    num1 = num1 + 2*L1 + 1; 
	  }

	  num0 = num0 + 2*L0 + 1; 
	} /* M0 */
      } /* L0 */

      if (measure_time){
	dtime(&etime);
	time3 += etime - stime;
      }

    } /* h_AN */

    dtime(&Etime_atom);
    time_per_atom[Gc_AN] += Etime_atom - Stime_atom;

  } /* Mc_AN */

  /****************************************************
   MPI: communicate for each of k=0,1,2,3

   DS_VNA
  ****************************************************/

  MPI_Barrier(mpi_comm_level1);
  if (measure_time) dtime(&stime);

  /***********************************
               set data size
  ************************************/

  for (ID=0; ID<numprocs; ID++){

    IDS = (myid + ID) % numprocs;
    IDR = (myid - ID + numprocs) % numprocs;

    /* find data size to send block data */
    if (F_Snd_Num[IDS]!=0){

      size1 = 0;
      for (n=0; n<F_Snd_Num[IDS]; n++){
	Mc_AN = Snd_MAN[IDS][n];
	Gc_AN = Snd_GAN[IDS][n];
	Cwan = WhatSpecies[Gc_AN]; 
	tno1 = Spe_Total_NO[Cwan];
	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
	  Gh_AN = natn[Gc_AN][h_AN];        
	  Hwan = WhatSpecies[Gh_AN];
	  tno2 = (List_YOUSO[35]+1)*(List_YOUSO[35]+1)*List_YOUSO[34];
	  size1 += tno1*tno2; 
	}
      }
 
      Snd_DS_VNA_Size[IDS] = size1;
      MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
    }
    else{
      Snd_DS_VNA_Size[IDS] = 0;
    }

    /* receiving of size of data */

    if (F_Rcv_Num[IDR]!=0){
      MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
      Rcv_DS_VNA_Size[IDR] = size2;
    }
    else{
      Rcv_DS_VNA_Size[IDR] = 0;
    }

    if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat);
  }

  /****************************************************
                         MPI_Barrier
  ****************************************************/

  MPI_Barrier(mpi_comm_level1);

  /***********************************
             data transfer
  ************************************/

  for (ID=0; ID<numprocs; ID++){

    IDS = (myid + ID) % numprocs;
    IDR = (myid - ID + numprocs) % numprocs;

    /*****************************
         sending of data 
    *****************************/

    if (F_Snd_Num[IDS]!=0){

      size1 = Snd_DS_VNA_Size[IDS];

      /* allocation of array */

      tmp_array = (Type_DS_VNA*)malloc(sizeof(Type_DS_VNA)*size1);

      /* multidimentional array to vector array */

      num = 0;
      for (n=0; n<F_Snd_Num[IDS]; n++){
	Mc_AN = Snd_MAN[IDS][n];
	Gc_AN = Snd_GAN[IDS][n];
	Cwan = WhatSpecies[Gc_AN]; 
	tno1 = Spe_Total_NO[Cwan];
	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
	  Gh_AN = natn[Gc_AN][h_AN];        
	  Hwan = WhatSpecies[Gh_AN];
	  tno2 = (List_YOUSO[35]+1)*(List_YOUSO[35]+1)*List_YOUSO[34];

	  for (i=0; i<tno1; i++){
	    for (j=0; j<tno2; j++){
	      tmp_array[num] = DS_VNA[0][Mc_AN][h_AN][i][j];
	      num++;
	    } 
	  } 
	}
      }

      MPI_Isend(&tmp_array[0], size1, MPI_Type_DS_VNA, IDS, tag, mpi_comm_level1, &request);
    }

    /*****************************
       receiving of block data
    *****************************/

    if (F_Rcv_Num[IDR]!=0){
        
      size2 = Rcv_DS_VNA_Size[IDR];
      tmp_array2 = (Type_DS_VNA*)malloc(sizeof(Type_DS_VNA)*size2);
        
      MPI_Recv(&tmp_array2[0], size2, MPI_Type_DS_VNA, IDR, tag, mpi_comm_level1, &stat);

      /* store */

      num = 0;
      Mc_AN = F_TopMAN[IDR] - 1;
      for (n=0; n<F_Rcv_Num[IDR]; n++){
	Mc_AN++;

	Gc_AN = Rcv_GAN[IDR][n];
	Cwan = WhatSpecies[Gc_AN]; 
	tno1 = Spe_Total_NO[Cwan];
	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
	  Gh_AN = natn[Gc_AN][h_AN];        
	  Hwan = WhatSpecies[Gh_AN];
	  tno2 = (List_YOUSO[35]+1)*(List_YOUSO[35]+1)*List_YOUSO[34];
	  for (i=0; i<tno1; i++){
	    for (j=0; j<tno2; j++){
	      DS_VNA[0][Mc_AN][h_AN][i][j] = tmp_array2[num];
	      num++;
	    }
	  }
	}
      }

      /* free tmp_array2 */

      free(tmp_array2);
    }

    if (F_Snd_Num[IDS]!=0){
      MPI_Wait(&request,&stat);
      free(tmp_array);  /* freeing of array */
    } 
  }

  if (measure_time){
    dtime(&etime);
    time4 += etime - stime;
  }

  /****************************************************
            multiplying overlap integrals 
  ****************************************************/

  if (measure_time) dtime(&stime);

  for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){

    dtime(&Stime_atom);
    
    Gc_AN = M2G[Mc_AN];
    Cwan = WhatSpecies[Gc_AN];
    fan = FNAN[Gc_AN];
    rcutA = Spe_Atom_Cut1[Cwan];
  
    for (j=0; j<=fan; j++){
  
      jg = natn[Gc_AN][j];
      Mj_AN = F_G2M[jg];
      Hwan = WhatSpecies[jg];

      rcutB = Spe_Atom_Cut1[Hwan];
      rcut = rcutA + rcutB;

      for (k=0; k<=fan; k++){

	kg = natn[Gc_AN][k];
	wakg = WhatSpecies[kg];
	kl = RMI1[Mc_AN][j][k];

	if (0<=kl){

	  for (m=0; m<Spe_Total_NO[Cwan]; m++){
	    for (n=0; n<Spe_Total_NO[Hwan]; n++){

	      sum = 0.0;
	      L = 0;

	      for (L1=0; L1<Num_RVNA; L1++){

		GL = VNA_List[L1]; 
		Mul1 = VNA_List2[L1];
            
		ene = VNA_proj_ene[wakg][GL][Mul1];
		L2 = 2*VNA_List[L1];

		tmp0 = 0.0;

		for (L3=0; L3<=L2; L3++){
		  tmp0 += DS_VNA[0][Mc_AN][k][m][L]*DS_VNA[0][Mj_AN][kl][n][L]; 
		  L++;
		}

		sum += ene*tmp0; 

	      }

	      if (k==0)  NLH[m][n]  = sum;  
	      else       NLH[m][n] += sum; 

	    }
	  }
	}
      } 
       
      /****************************************************
                          NLH to HVNA
      ****************************************************/

      dmp = dampingF(rcut,Dis[Gc_AN][j]);

      for (i1=0; i1<Spe_Total_NO[Cwan]; i1++){
	for (j1=0; j1<Spe_Total_NO[Hwan]; j1++){
	  HVNA[Mc_AN][j][i1][j1] = dmp*NLH[i1][j1];
	}
      }

    } /* j */

    dtime(&Etime_atom);
    time_per_atom[Gc_AN] += Etime_atom - Stime_atom;

  } /* Mc_AN */

  if (measure_time){
    dtime(&etime);
    time5 += etime - stime;
  }

  if (measure_time){
    printf("Set_ProExpn_VNA myid=%2d time1=%7.3f time2=%7.3f time3=%7.3f time4=%7.3f time5=%7.3f\n",
            myid,time1,time2,time3,time4,time5);fflush(stdout); 
  }

  /****************************************************
  freeing of arrays:

  int Snd_DS_VNA_Size[Num_Procs];
  int Rcv_DS_VNA_Size[Num_Procs];

  int VNA_List[List_YOUSO[34]*(List_YOUSO[35] + 1)+2];
  int VNA_List2[List_YOUSO[34]*(List_YOUSO[35] + 1)+2];

  dcomplex  TmpNL[YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1]
                        [Num_RVNA][2*List_YOUSO[35]+1];
  dcomplex TmpNLr[YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1]
                        [Num_RVNA][2*List_YOUSO[35]+1];
  dcomplex TmpNLt[YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1]
                        [Num_RVNA][2*List_YOUSO[35]+1];
  dcomplex TmpNLp[YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1]
                        [Num_RVNA][2*List_YOUSO[35]+1];

  double  SumNL0[YOUSO25+1][YOUSO24][Num_RVNA];
  double SumNLr0[YOUSO25+1][YOUSO24][Num_RVNA];

  double NLH[List_YOUSO[7]][List_YOUSO[7]]

  dcomplex CmatNL0[2*(YOUSO25+1)+1][2*List_YOUSO[35]+1];
  dcomplex CmatNLr[2*(YOUSO25+1)+1][2*List_YOUSO[35]+1];
  dcomplex CmatNLt[2*(YOUSO25+1)+1][2*List_YOUSO[35]+1];
  dcomplex CmatNLp[2*(YOUSO25+1)+1][2*List_YOUSO[35]+1];
  ****************************************************/

  free(Snd_DS_VNA_Size);
  free(Rcv_DS_VNA_Size);

  free(VNA_List);
  free(VNA_List2);

  for (i=0; i<(List_YOUSO[25]+1); i++){
    for (j=0; j<List_YOUSO[24]; j++){
      for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
        for (l=0; l<Num_RVNA; l++){
          free(TmpNL[i][j][k][l]);
	}
        free(TmpNL[i][j][k]);
      }
      free(TmpNL[i][j]);
    }
    free(TmpNL[i]);
  }
  free(TmpNL);

  for (i=0; i<(List_YOUSO[25]+1); i++){
    for (j=0; j<List_YOUSO[24]; j++){
      for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
        for (l=0; l<Num_RVNA; l++){
          free(TmpNLr[i][j][k][l]);
	}
        free(TmpNLr[i][j][k]);
      }
      free(TmpNLr[i][j]);
    }
    free(TmpNLr[i]);
  }
  free(TmpNLr);

  for (i=0; i<(List_YOUSO[25]+1); i++){
    for (j=0; j<List_YOUSO[24]; j++){
      for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
        for (l=0; l<Num_RVNA; l++){
          free(TmpNLt[i][j][k][l]);
	}
        free(TmpNLt[i][j][k]);
      }
      free(TmpNLt[i][j]);
    }
    free(TmpNLt[i]);
  }
 free(TmpNLt);

  for (i=0; i<(List_YOUSO[25]+1); i++){
    for (j=0; j<List_YOUSO[24]; j++){
      for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
        for (l=0; l<Num_RVNA; l++){
          free(TmpNLp[i][j][k][l]);
	}
        free(TmpNLp[i][j][k]);
      }
      free(TmpNLp[i][j]);
    }
    free(TmpNLp[i]);
  }
  free(TmpNLp);

  for (i=0; i<(List_YOUSO[25]+1); i++){
    for (j=0; j<List_YOUSO[24]; j++){
      free(SumNL0[i][j]);
    }
    free(SumNL0[i]);
  }
  free(SumNL0);

  for (i=0; i<(List_YOUSO[25]+1); i++){
    for (j=0; j<List_YOUSO[24]; j++){
      free(SumNLr0[i][j]);
    }
    free(SumNLr0[i]);
  }
  free(SumNLr0);

  for (i=0; i<List_YOUSO[7]; i++){
    free(NLH[i]);
  }
  free(NLH);

  for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
    free(CmatNL0[i]);
  }
  free(CmatNL0);

  for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
    free(CmatNLr[i]);
  }
  free(CmatNLr);

  for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
    free(CmatNLt[i]);
  }
  free(CmatNLt);

  for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
    free(CmatNLp[i]);
  }
  free(CmatNLp);

  free(Bessel_Pro1);


  for (spe=0; spe<SpeciesNum; spe++){
    for (L0=0; L0<=Spe_MaxL_Basis[spe]; L0++){
      for (Mul0=0; Mul0<Spe_Num_Basis[spe][L0]; Mul0++){
        free(Bessel_Pro00[spe][L0][Mul0]);
      }
      free(Bessel_Pro00[spe][L0]);
    }
    free(Bessel_Pro00[spe]);
  }
  free(Bessel_Pro00);

  for (spe=0; spe<SpeciesNum; spe++){
    for (L0=0; L0<=Spe_MaxL_Basis[spe]; L0++){
      for (Mul0=0; Mul0<Spe_Num_Basis[spe][L0]; Mul0++){
        free(Bessel_Pro01[spe][L0][Mul0]);
      }
      free(Bessel_Pro01[spe][L0]);
    }
    free(Bessel_Pro01[spe]);
  }
  free(Bessel_Pro01);

  free(Bes00);
  free(Bes01);

  for (i=0; i<(MatomnumF+1); i++){
    free(SP_VNA[i]);
  }
  free(SP_VNA);

  /* for time */
  dtime(&TEtime);
  time0 = TEtime - TStime;

  return time0;
} 




double Set_VNA12(double ****HVNA, double *****HVNA2)
{
  /****************************************************
   Evaluate matrix elements of one-center (orbitals)
   but two-center integrals for neutral atom potentials
   in the momentum space.
  ****************************************************/
  static int firsttime=1;
  int LL,L0,L1,L2,L3,L,GL,Mul0,Mul1,M0,M1,num0,num1;
  int k,kl,h_AN,Gh_AN,Rnh,Ls,m,n;
  int tno0,tno1,tno2,i,j,i1,j1,p;
  int l,Cwan,Hwan,fan,jg,kg,wakg,Lmax;
  int size_TmpHNA;
  int Mc_AN,Gc_AN,Mj_AN,num,size1,size2;
  int *Snd_HVNA2_Size,*Rcv_HVNA2_Size;  
  double time0;
  int Lmax_Four_Int;
  double siT,coT,siP,coP;
  double dx,dy,dz,Normk,kmin,kmax,Sk,Dk;
  double gant,r,theta,phi,SH[2],dSHt[2],dSHp[2];
  double *tmp_array;
  double *tmp_array2;
  double **SphB,**SphBp;
  double *tmp_SphB,*tmp_SphBp;
  double S_coordinate[3];
  double tmp0,tmp1,tmp2,tmp3,tmp4,tmp5,tmp10;
  double ene,sum,sumr,h,coe0,sj,sy,sjp,syp;
  dcomplex ******TmpHNA;
  dcomplex ******TmpHNAr;
  dcomplex ******TmpHNAt;
  dcomplex ******TmpHNAp;
  dcomplex **CmatS0;
  dcomplex **CmatSr;
  dcomplex **CmatSt;
  dcomplex **CmatSp;
  dcomplex CY,CYt,CYp,CY1,CYt1,CYp1;
  dcomplex CsumNL0,CsumNLr,CsumNLt,CsumNLp;
  dcomplex Ctmp0,Ctmp1,Ctmp2,Cpow;
  dcomplex Csum,Csumr,Csumt,Csump;
  dcomplex CsumS0,CsumSr,CsumSt,CsumSp;
  double TStime,TEtime;
  int numprocs,myid,tag=999,ID,IDS,IDR;
  double Stime_atom, Etime_atom;
  int *VNA_List;
  int *VNA_List2;
  int Num_RVNA;

  MPI_Status stat;
  MPI_Request request;

  /* MPI */
  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);

  dtime(&TStime);

  /****************************************************
  allocation of arrays:

  int Snd_HVNA2_Size[Num_Procs];
  int Rcv_HVNA2_Size[Num_Procs];

  dcomplex  TmpHNA[YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1]
                         [YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1]; 
  dcomplex TmpHNAr[YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1]
                         [YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1];
  dcomplex TmpHNAt[YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1]
                         [YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1];
  dcomplex TmpHNAp[YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1]
                         [YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1];

  dcomplex CmatS0[2*(YOUSO25+1)+1][2*(YOUSO25+1)+1];
  dcomplex CmatSr[2*(YOUSO25+1)+1][2*(YOUSO25+1)+1];
  dcomplex CmatSt[2*(YOUSO25+1)+1][2*(YOUSO25+1)+1];
  dcomplex CmatSp[2*(YOUSO25+1)+1][2*(YOUSO25+1)+1];
  dcomplex CmatK0[2*(YOUSO25+1)+1][2*(YOUSO25+1)+1];
  dcomplex CmatKr[2*(YOUSO25+1)+1][2*(YOUSO25+1)+1];
  dcomplex CmatKt[2*(YOUSO25+1)+1][2*(YOUSO25+1)+1];
  dcomplex CmatKp[2*(YOUSO25+1)+1][2*(YOUSO25+1)+1];
  ****************************************************/

  Snd_HVNA2_Size = (int*)malloc(sizeof(int)*Num_Procs);
  Rcv_HVNA2_Size = (int*)malloc(sizeof(int)*Num_Procs);

  size_TmpHNA = (List_YOUSO[25]+1)*List_YOUSO[24]*
                (2*(List_YOUSO[25]+1)+1)*
                (List_YOUSO[25]+1)*List_YOUSO[24]*
                (2*(List_YOUSO[25]+1)+1);

  TmpHNA = (dcomplex******)malloc(sizeof(dcomplex*****)*(List_YOUSO[25]+1));
  for (i=0; i<(List_YOUSO[25]+1); i++){
    TmpHNA[i] = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[24]);
    for (j=0; j<List_YOUSO[24]; j++){
      TmpHNA[i][j] = (dcomplex****)malloc(sizeof(dcomplex***)*(2*(List_YOUSO[25]+1)+1));
      for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
        TmpHNA[i][j][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(List_YOUSO[25]+1));
        for (l=0; l<(List_YOUSO[25]+1); l++){
          TmpHNA[i][j][k][l] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[24]);
          for (m=0; m<List_YOUSO[24]; m++){
            TmpHNA[i][j][k][l][m] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
          }
	}
      }
    }
  }

  TmpHNAr = (dcomplex******)malloc(sizeof(dcomplex*****)*(List_YOUSO[25]+1));
  for (i=0; i<(List_YOUSO[25]+1); i++){
    TmpHNAr[i] = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[24]);
    for (j=0; j<List_YOUSO[24]; j++){
      TmpHNAr[i][j] = (dcomplex****)malloc(sizeof(dcomplex***)*(2*(List_YOUSO[25]+1)+1));
      for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
        TmpHNAr[i][j][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(List_YOUSO[25]+1));
        for (l=0; l<(List_YOUSO[25]+1); l++){
          TmpHNAr[i][j][k][l] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[24]);
          for (m=0; m<List_YOUSO[24]; m++){
            TmpHNAr[i][j][k][l][m] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
          }
	}
      }
    }
  }

  TmpHNAt = (dcomplex******)malloc(sizeof(dcomplex*****)*(List_YOUSO[25]+1));
  for (i=0; i<(List_YOUSO[25]+1); i++){
    TmpHNAt[i] = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[24]);
    for (j=0; j<List_YOUSO[24]; j++){
      TmpHNAt[i][j] = (dcomplex****)malloc(sizeof(dcomplex***)*(2*(List_YOUSO[25]+1)+1));
      for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
        TmpHNAt[i][j][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(List_YOUSO[25]+1));
        for (l=0; l<(List_YOUSO[25]+1); l++){
          TmpHNAt[i][j][k][l] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[24]);
          for (m=0; m<List_YOUSO[24]; m++){
            TmpHNAt[i][j][k][l][m] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
          }
	}
      }
    }
  }

  TmpHNAp = (dcomplex******)malloc(sizeof(dcomplex*****)*(List_YOUSO[25]+1));
  for (i=0; i<(List_YOUSO[25]+1); i++){
    TmpHNAp[i] = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[24]);
    for (j=0; j<List_YOUSO[24]; j++){
      TmpHNAp[i][j] = (dcomplex****)malloc(sizeof(dcomplex***)*(2*(List_YOUSO[25]+1)+1));
      for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
        TmpHNAp[i][j][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(List_YOUSO[25]+1));
        for (l=0; l<(List_YOUSO[25]+1); l++){
          TmpHNAp[i][j][k][l] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[24]);
          for (m=0; m<List_YOUSO[24]; m++){
            TmpHNAp[i][j][k][l][m] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
          }
	}
      }
    }
  }

  CmatS0 = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
  for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
    CmatS0[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
  }

  CmatSr = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
  for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
    CmatSr[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
  }

  CmatSt = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
  for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
    CmatSt[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
  }

  CmatSp = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
  for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
    CmatSp[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
  }

  /* PrintMemory */
  if (firsttime) {
    PrintMemory("Set_ProExpn_VNA: TmpHNA",sizeof(double)*size_TmpHNA,NULL);
    PrintMemory("Set_ProExpn_VNA: TmpHNAr",sizeof(double)*size_TmpHNA,NULL);
    PrintMemory("Set_ProExpn_VNA: TmpHNAt",sizeof(double)*size_TmpHNA,NULL);
    PrintMemory("Set_ProExpn_VNA: TmpHNAp",sizeof(double)*size_TmpHNA,NULL);

    firsttime=0;
  }

  for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){

    dtime(&Stime_atom);

    Gc_AN = M2G[Mc_AN];
    Cwan = WhatSpecies[Gc_AN];

    for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){

      Gh_AN = natn[Gc_AN][h_AN];        
      Rnh = ncn[Gc_AN][h_AN];
      Hwan = WhatSpecies[Gh_AN];

      dx = (Gxyz[Gc_AN][1] - Gxyz[Gh_AN][1] - atv[Rnh][1]); 
      dy = (Gxyz[Gc_AN][2] - Gxyz[Gh_AN][2] - atv[Rnh][2]); 
      dz = (Gxyz[Gc_AN][3] - Gxyz[Gh_AN][3] - atv[Rnh][3]);

      xyz2spherical(dx,dy,dz,0.0,0.0,0.0,S_coordinate); 
      r     = S_coordinate[0];
      theta = S_coordinate[1];
      phi   = S_coordinate[2];

      /* for empty atoms or finite elemens basis */

      if (r<1.0e-10) r = 1.0e-10;

      /* precalculation of sin and cos */

      siT = sin(theta);
      coT = cos(theta);
      siP = sin(phi);
      coP = cos(phi);

      /****************************************************
         evaluate ovelap integrals <VNA|Phi_{LM,L'M'}>
         between VNA and PAO.
      ****************************************************/
      /****************************************************
       \int VNA(k)*Spe_ProductRF_Bessel(k)*jl(k*R) k^2 dk^3 
      ****************************************************/

      kmin = Radial_kmin;
      kmax = PAO_Nkmax;
      Sk = kmax + kmin;
      Dk = kmax - kmin;

      for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
        for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
          for (L1=0; L1<=Spe_MaxL_Basis[Cwan]; L1++){
            for (Mul1=0; Mul1<Spe_Num_Basis[Cwan][L1]; Mul1++){
              for (M0=-L0; M0<=L0; M0++){
  	        for (M1=-L1; M1<=L1; M1++){
		   TmpHNA[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0); 
		  TmpHNAr[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0); 
		  TmpHNAt[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0); 
		  TmpHNAp[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0); 
		}
	      }
	    }
	  }
	}
      }

      /* allocate SphB and SphBp */

      Lmax_Four_Int = 2*Spe_MaxL_Basis[Cwan];

      SphB = (double**)malloc(sizeof(double*)*(Lmax_Four_Int+3));
      for(LL=0; LL<(Lmax_Four_Int+3); LL++){ 
        SphB[LL] = (double*)malloc(sizeof(double)*GL_Mesh);
      }

      SphBp = (double**)malloc(sizeof(double*)*(Lmax_Four_Int+3));
      for(LL=0; LL<(Lmax_Four_Int+3); LL++){ 
        SphBp[LL] = (double*)malloc(sizeof(double)*GL_Mesh);
      }

      tmp_SphB  = (double*)malloc(sizeof(double)*(Lmax_Four_Int+3));
      tmp_SphBp = (double*)malloc(sizeof(double)*(Lmax_Four_Int+3));

      /* calculate SphB and SphBp */

      for (i=0; i<GL_Mesh; i++){
        Normk = GL_NormK[i];
        Spherical_Bessel(Normk*r,Lmax_Four_Int,tmp_SphB,tmp_SphBp);
        for(LL=0; LL<=Lmax_Four_Int; LL++){ 
          SphB[LL][i]  = tmp_SphB[LL]; 
          SphBp[LL][i] = tmp_SphBp[LL]; 
  	}
      }

      free(tmp_SphB);
      free(tmp_SphBp);

      /* loops for L0, Mul0, L1, and Mul1 */

      for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
        for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
          for (L1=0; L1<=Spe_MaxL_Basis[Cwan]; L1++){
            for (Mul1=0; Mul1<Spe_Num_Basis[Cwan][L1]; Mul1++){

              if (L0<=L1){
                Lmax_Four_Int = 2*L1;

                /* sum over LL */

                for(LL=0; LL<=Lmax_Four_Int; LL++){ 

                  sum  = 0.0;
                  sumr = 0.0;

                  /* Gauss-Legendre quadrature */

                  for (i=0; i<GL_Mesh; i++){

		    Normk = GL_NormK[i];

                    sj  =  SphB[LL][i];
                    sjp = SphBp[LL][i];

  		    tmp0 = Spe_CrudeVNA_Bessel[Hwan][i];
                    tmp1 = Spe_ProductRF_Bessel[Cwan][L0][Mul0][L1][Mul1][LL][i]; 
                    tmp10 = 0.50*Dk*tmp0*tmp1*GL_Weight[i]*Normk*Normk;

                    sum  += tmp10*sj;
		    sumr += tmp10*sjp*Normk;
		  }

                  /* sum over m */

                  for (M0=-L0; M0<=L0; M0++){
		    for (M1=-L1; M1<=L1; M1++){

                      Csum = Complex(0.0,0.0);
                      Csumr = Complex(0.0,0.0);
                      Csumt = Complex(0.0,0.0);
                      Csump = Complex(0.0,0.0);

                      for(m=-LL; m<=LL; m++){ 

			ComplexSH(LL,m,theta,phi,SH,dSHt,dSHp);
			CY  = Complex(SH[0],SH[1]);
			CYt = Complex(dSHt[0],dSHt[1]);
			CYp = Complex(dSHp[0],dSHp[1]);

			if ( (M1-m)==M0 && abs(L1-LL)<=L0 && L0<=(L1+LL) ){

			  gant = pow(-1.0,(double)abs(m))*Gaunt(L0,M0,L1,M1,LL,-m);
 
			  /* S */

			  Ctmp2 = CRmul(CY,gant);          
			  Csum = Cadd(Csum,Ctmp2);             

			  /* dS/dt */
                      
			  Ctmp2 = CRmul(CYt,gant);          
			  Csumt = Cadd(Csumt,Ctmp2);             
                      
			  /* dS/dp */
                      
			  Ctmp2 = CRmul(CYp,gant);          
			  Csump = Cadd(Csump,Ctmp2);             
			}

		      }

                      /* S */
                      Ctmp1 = CRmul(Csum,sum);
                      TmpHNA[L0][Mul0][L0+M0][L1][Mul1][L1+M1] 
                            = Cadd(TmpHNA[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp1);

                      /* dS/dr */
                      Ctmp1 = CRmul(Csum,sumr);
                      TmpHNAr[L0][Mul0][L0+M0][L1][Mul1][L1+M1] 
                            = Cadd(TmpHNAr[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp1);

                      /* dS/dt */
                      Ctmp1 = CRmul(Csumt,sum);
                      TmpHNAt[L0][Mul0][L0+M0][L1][Mul1][L1+M1] 
                            = Cadd(TmpHNAt[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp1);

                      /* dS/dp */
                      Ctmp1 = CRmul(Csump,sum);
                      TmpHNAp[L0][Mul0][L0+M0][L1][Mul1][L1+M1] 
                            = Cadd(TmpHNAp[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp1);
		    }
		  }
		}
	      } /* if (L0<=L1) */
	    }
	  }
	}
      }

      /* free SphB and SphBp */
      
      for(LL=0; LL<(Lmax_Four_Int+3); LL++){ 
        free(SphB[LL]);
      }
      free(SphB);

      for(LL=0; LL<(Lmax_Four_Int+3); LL++){ 
        free(SphBp[LL]);
      }
      free(SphBp);

      /* copy the upper part to the lower part */

      for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
        for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
          for (L1=0; L1<=Spe_MaxL_Basis[Cwan]; L1++){
            for (Mul1=0; Mul1<Spe_Num_Basis[Cwan][L1]; Mul1++){
              if (L0<=L1){
		for (M0=-L0; M0<=L0; M0++){
		  for (M1=-L1; M1<=L1; M1++){

		    TmpHNA[L1][Mul1][L1+M1][L0][Mul0][L0+M0] 
                          = Conjg(TmpHNA[L0][Mul0][L0+M0][L1][Mul1][L1+M1]); 

		    TmpHNAr[L1][Mul1][L1+M1][L0][Mul0][L0+M0]
                          = Conjg(TmpHNAr[L0][Mul0][L0+M0][L1][Mul1][L1+M1]);
 
		    TmpHNAt[L1][Mul1][L1+M1][L0][Mul0][L0+M0]
                          = Conjg(TmpHNAt[L0][Mul0][L0+M0][L1][Mul1][L1+M1]); 

		    TmpHNAp[L1][Mul1][L1+M1][L0][Mul0][L0+M0]
                          = Conjg(TmpHNAp[L0][Mul0][L0+M0][L1][Mul1][L1+M1]); 

		  }
		}
	      }
	    }
	  }
	}
      }

      /****************************************************
 	                 Complex to Real
      ****************************************************/

      num0 = 0;
      for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
	for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
       
	  num1 = 0;
	  for (L1=0; L1<=Spe_MaxL_Basis[Cwan]; L1++){
	    for (Mul1=0; Mul1<Spe_Num_Basis[Cwan][L1]; Mul1++){

	      for (M0=-L0; M0<=L0; M0++){
		for (M1=-L1; M1<=L1; M1++){
		  CsumS0 = Complex(0.0,0.0);
		  CsumSr = Complex(0.0,0.0);
		  CsumSt = Complex(0.0,0.0);
		  CsumSp = Complex(0.0,0.0);

		  for (k=-L0; k<=L0; k++){

		    Ctmp1 = Conjg(Comp2Real[L0][L0+M0][L0+k]);

		    /* S */

		    Ctmp0 = TmpHNA[L0][Mul0][L0+k][L1][Mul1][L1+M1];
		    Ctmp2 = Cmul(Ctmp1,Ctmp0);
		    CsumS0 = Cadd(CsumS0,Ctmp2);

		    /* dS/dr */

		    Ctmp0 = TmpHNAr[L0][Mul0][L0+k][L1][Mul1][L1+M1];
		    Ctmp2 = Cmul(Ctmp1,Ctmp0);
		    CsumSr = Cadd(CsumSr,Ctmp2);

		    /* dS/dt */

		    Ctmp0 = TmpHNAt[L0][Mul0][L0+k][L1][Mul1][L1+M1];
		    Ctmp2 = Cmul(Ctmp1,Ctmp0);
		    CsumSt = Cadd(CsumSt,Ctmp2);

		    /* dS/dp */

		    Ctmp0 = TmpHNAp[L0][Mul0][L0+k][L1][Mul1][L1+M1];
		    Ctmp2 = Cmul(Ctmp1,Ctmp0);
		    CsumSp = Cadd(CsumSp,Ctmp2);

		  }

		  CmatS0[L0+M0][L1+M1] = CsumS0;
		  CmatSr[L0+M0][L1+M1] = CsumSr;
		  CmatSt[L0+M0][L1+M1] = CsumSt;
		  CmatSp[L0+M0][L1+M1] = CsumSp;

		}
	      }

	      for (M0=-L0; M0<=L0; M0++){
		for (M1=-L1; M1<=L1; M1++){

		  CsumS0 = Complex(0.0,0.0);
		  CsumSr = Complex(0.0,0.0);
		  CsumSt = Complex(0.0,0.0);
		  CsumSp = Complex(0.0,0.0);

		  for (k=-L1; k<=L1; k++){

		    /* S */ 

		    Ctmp1 = Cmul(CmatS0[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
		    CsumS0 = Cadd(CsumS0,Ctmp1);

		    /* dS/dr */ 

		    Ctmp1 = Cmul(CmatSr[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
		    CsumSr = Cadd(CsumSr,Ctmp1);

		    /* dS/dt */ 

		    Ctmp1 = Cmul(CmatSt[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
		    CsumSt = Cadd(CsumSt,Ctmp1);

		    /* dS/dp */ 

		    Ctmp1 = Cmul(CmatSp[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
		    CsumSp = Cadd(CsumSp,Ctmp1);

		  }

		  HVNA2[0][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 8.0*CsumS0.r;

		  if (h_AN!=0){

		    if (fabs(siT)<10e-14){

		      HVNA2[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
			8.0*(siT*coP*CsumSr.r + coT*coP/r*CsumSt.r);

		      HVNA2[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
			8.0*(siT*siP*CsumSr.r + coT*siP/r*CsumSt.r);

		      HVNA2[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
			8.0*(coT*CsumSr.r - siT/r*CsumSt.r);
		    }

		    else{

		      HVNA2[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
			8.0*(siT*coP*CsumSr.r + coT*coP/r*CsumSt.r
			     - siP/siT/r*CsumSp.r);

		      HVNA2[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
			8.0*(siT*siP*CsumSr.r + coT*siP/r*CsumSt.r
			     + coP/siT/r*CsumSp.r);

		      HVNA2[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
			8.0*(coT*CsumSr.r - siT/r*CsumSt.r);
		    }
		  }
		  else{
		    HVNA2[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
		    HVNA2[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
		    HVNA2[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
		  }

		}
	      }

	      num1 = num1 + 2*L1 + 1; 
	    }
	  }

	  num0 = num0 + 2*L0 + 1; 
	}
      }
    } /* h_AN */

    dtime(&Etime_atom);
    time_per_atom[Gc_AN] += Etime_atom - Stime_atom;

  } /* Mc_AN */

  /****************************************************
    MPI:

     HVNA2
  ****************************************************/

  /***********************************
             set data size
  ************************************/

  for (ID=0; ID<numprocs; ID++){

    IDS = (myid + ID) % numprocs;
    IDR = (myid - ID + numprocs) % numprocs;

    tag = 999;

    /* find data size to send block data */
    if (F_Snd_Num[IDS]!=0){

      size1 = 0;
      for (k=0; k<=3; k++){
	for (n=0; n<F_Snd_Num[IDS]; n++){
	  Mc_AN = Snd_MAN[IDS][n];
	  Gc_AN = Snd_GAN[IDS][n];
	  Cwan = WhatSpecies[Gc_AN]; 
	  tno1 = Spe_Total_NO[Cwan];
	  size1 += tno1*tno1*(FNAN[Gc_AN]+1);
	}
      }
 
      Snd_HVNA2_Size[IDS] = size1;
      MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
    }
    else{
      Snd_HVNA2_Size[IDS] = 0;
    }

    /* receiving of size of data */

    if (F_Rcv_Num[IDR]!=0){
      MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
      Rcv_HVNA2_Size[IDR] = size2;
    }
    else{
      Rcv_HVNA2_Size[IDR] = 0;
    }

    if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat);
  }

  /***********************************
             data transfer
  ************************************/

  tag = 999;
  for (ID=0; ID<numprocs; ID++){

    IDS = (myid + ID) % numprocs;
    IDR = (myid - ID + numprocs) % numprocs;

    /*****************************
              sending of data 
    *****************************/

    if (F_Snd_Num[IDS]!=0){

      size1 = Snd_HVNA2_Size[IDS];

      /* allocation of array */

      tmp_array = (double*)malloc(sizeof(double)*size1);

      /* multidimentional array to vector array */

      num = 0;
      for (k=0; k<=3; k++){
	for (n=0; n<F_Snd_Num[IDS]; n++){
	  Mc_AN = Snd_MAN[IDS][n];
	  Gc_AN = Snd_GAN[IDS][n];
	  Cwan = WhatSpecies[Gc_AN]; 
	  tno1 = Spe_Total_NO[Cwan];

	  for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
	    tno2 = tno1;
	    for (i=0; i<tno1; i++){
	      for (j=0; j<tno2; j++){
		tmp_array[num] = HVNA2[k][Mc_AN][h_AN][i][j];
		num++;
	      } 
	    } 
	  }
	}
      }

      MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request);

    }

    /*****************************
       receiving of block data
    *****************************/

    if (F_Rcv_Num[IDR]!=0){
        
      size2 = Rcv_HVNA2_Size[IDR];
        
      /* allocation of array */
      tmp_array2 = (double*)malloc(sizeof(double)*size2);
        
      MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat);
        
      num = 0;
      for (k=0; k<=3; k++){
	Mc_AN = F_TopMAN[IDR] - 1;
	for (n=0; n<F_Rcv_Num[IDR]; n++){
	  Mc_AN++;
	  Gc_AN = Rcv_GAN[IDR][n];
	  Cwan = WhatSpecies[Gc_AN]; 
	  tno1 = Spe_Total_NO[Cwan];
	  for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
	    tno2 = tno1;
	    for (i=0; i<tno1; i++){
	      for (j=0; j<tno2; j++){
		HVNA2[k][Mc_AN][h_AN][i][j] = tmp_array2[num];
		num++;
	      }
	    }
	  }
	}        
      }

      /* freeing of array */
      free(tmp_array2);

    }

    if (F_Snd_Num[IDS]!=0){
      MPI_Wait(&request,&stat);
      free(tmp_array);  /* freeing of array */
    } 
  }

  /****************************************************
    HVNA[Mc_AN][0] = sum_{h_AN} HVNA2
  ****************************************************/

  for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){

    Gc_AN = M2G[Mc_AN];
    Cwan = WhatSpecies[Gc_AN];
    for (i=0; i<Spe_Total_NO[Cwan]; i++){
      for (j=0; j<Spe_Total_NO[Cwan]; j++){
	HVNA[Mc_AN][0][i][j] = 0.0;
      }
    }

    for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
      for (i=0; i<Spe_Total_NO[Cwan]; i++){
        for (j=0; j<Spe_Total_NO[Cwan]; j++){
          HVNA[Mc_AN][0][i][j] += HVNA2[0][Mc_AN][h_AN][i][j];
	}
      }
    }
  }  

  /****************************************************
  freeing of arrays:

  int Snd_HVNA2_Size[Num_Procs];
  int Rcv_HVNA2_Size[Num_Procs];

  dcomplex  TmpHNA[YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1]
                         [YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1]; 
  dcomplex TmpHNAr[YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1]
                         [YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1];
  dcomplex TmpHNAt[YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1]
                         [YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1];
  dcomplex TmpHNAp[YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1]
                         [YOUSO25+1][YOUSO24][2*(YOUSO25+1)+1];

  dcomplex CmatS0[2*(YOUSO25+1)+1][2*(YOUSO25+1)+1];
  dcomplex CmatSr[2*(YOUSO25+1)+1][2*(YOUSO25+1)+1];
  dcomplex CmatSt[2*(YOUSO25+1)+1][2*(YOUSO25+1)+1];
  dcomplex CmatSp[2*(YOUSO25+1)+1][2*(YOUSO25+1)+1];
  dcomplex CmatK0[2*(YOUSO25+1)+1][2*(YOUSO25+1)+1];
  dcomplex CmatKr[2*(YOUSO25+1)+1][2*(YOUSO25+1)+1];
  dcomplex CmatKt[2*(YOUSO25+1)+1][2*(YOUSO25+1)+1];
  dcomplex CmatKp[2*(YOUSO25+1)+1][2*(YOUSO25+1)+1];
  ****************************************************/

  free(Snd_HVNA2_Size);
  free(Rcv_HVNA2_Size);

  for (i=0; i<(List_YOUSO[25]+1); i++){
    for (j=0; j<List_YOUSO[24]; j++){
      for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
        for (l=0; l<(List_YOUSO[25]+1); l++){
          for (m=0; m<List_YOUSO[24]; m++){
            free(TmpHNA[i][j][k][l][m]);
          }
          free(TmpHNA[i][j][k][l]);
	}
        free(TmpHNA[i][j][k]);
      }
      free(TmpHNA[i][j]);
    }
    free(TmpHNA[i]);
  }
  free(TmpHNA);

  for (i=0; i<(List_YOUSO[25]+1); i++){
    for (j=0; j<List_YOUSO[24]; j++){
      for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
        for (l=0; l<(List_YOUSO[25]+1); l++){
          for (m=0; m<List_YOUSO[24]; m++){
            free(TmpHNAr[i][j][k][l][m]);
          }
          free(TmpHNAr[i][j][k][l]);
	}
        free(TmpHNAr[i][j][k]);
      }
      free(TmpHNAr[i][j]);
    }
    free(TmpHNAr[i]);
  }
  free(TmpHNAr);

  for (i=0; i<(List_YOUSO[25]+1); i++){
    for (j=0; j<List_YOUSO[24]; j++){
      for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
        for (l=0; l<(List_YOUSO[25]+1); l++){
          for (m=0; m<List_YOUSO[24]; m++){
            free(TmpHNAt[i][j][k][l][m]);
          }
          free(TmpHNAt[i][j][k][l]);
	}
        free(TmpHNAt[i][j][k]);
      }
      free(TmpHNAt[i][j]);
    }
    free(TmpHNAt[i]);
  }
  free(TmpHNAt);

  for (i=0; i<(List_YOUSO[25]+1); i++){
    for (j=0; j<List_YOUSO[24]; j++){
      for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
        for (l=0; l<(List_YOUSO[25]+1); l++){
          for (m=0; m<List_YOUSO[24]; m++){
            free(TmpHNAp[i][j][k][l][m]);
          }
          free(TmpHNAp[i][j][k][l]);
	}
        free(TmpHNAp[i][j][k]);
      }
      free(TmpHNAp[i][j]);
    }
    free(TmpHNAp[i]);
  }
  free(TmpHNAp);

  for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
    free(CmatS0[i]);
  }
  free(CmatS0);

  for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
    free(CmatSr[i]);
  }
  free(CmatSr);

  for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
    free(CmatSt[i]);
  }
  free(CmatSt);

  for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
    free(CmatSp[i]);
  }
  free(CmatSp);

  /****************************************************
   MPI_Barrier
  ****************************************************/

  MPI_Barrier(mpi_comm_level1);

  /* for time */
  dtime(&TEtime);
  time0 = TEtime - TStime;
  return time0;
} 



double VNA_BesselF(int Gensi, int L1, int Mul1, double R)
{
  int mp_min,mp_max,m,po;
  double h1,h2,h3,f1,f2,f3,f4;
  double g1,g2,x1,x2,y1,y2,f;
  double result;

  mp_min = 0;
  mp_max = Ngrid_NormK - 1;
  po = 0;

  if (R<NormK[0]){
    m = 1;
  }
  else if (NormK[mp_max]<R){
    result = 0.0;
    po = 1;
  }
  else{
    do{
      m = (mp_min + mp_max)/2;
      if (NormK[m]<R)
        mp_min = m;
      else 
        mp_max = m;
    }
    while((mp_max-mp_min)!=1);
    m = mp_max;
  }

  /****************************************************
                 Spline like interpolation
  ****************************************************/

  if (po==0){

    if (m==1){
      h2 = NormK[m]   - NormK[m-1];
      h3 = NormK[m+1] - NormK[m];

      f2 = Spe_VNA_Bessel[Gensi][L1][Mul1][m-1];
      f3 = Spe_VNA_Bessel[Gensi][L1][Mul1][m];
      f4 = Spe_VNA_Bessel[Gensi][L1][Mul1][m+1];

      h1 = -(h2+h3);
      f1 = f4;
    }
    else if (m==(Ngrid_NormK-1)){
      h1 = NormK[m-1] - NormK[m-2];
      h2 = NormK[m]   - NormK[m-1];

      f1 = Spe_VNA_Bessel[Gensi][L1][Mul1][m-2];
      f2 = Spe_VNA_Bessel[Gensi][L1][Mul1][m-1];
      f3 = Spe_VNA_Bessel[Gensi][L1][Mul1][m];

      h3 = -(h1+h2);
      f4 = f1;
    }
    else{
      h1 = NormK[m-1] - NormK[m-2];
      h2 = NormK[m]   - NormK[m-1];
      h3 = NormK[m+1] - NormK[m];

      f1 = Spe_VNA_Bessel[Gensi][L1][Mul1][m-2];
      f2 = Spe_VNA_Bessel[Gensi][L1][Mul1][m-1];
      f3 = Spe_VNA_Bessel[Gensi][L1][Mul1][m];
      f4 = Spe_VNA_Bessel[Gensi][L1][Mul1][m+1];
    }

    /****************************************************
                Calculate the value at R
    ****************************************************/

    g1 = ((f3-f2)*h1/h2 + (f2-f1)*h2/h1)/(h1+h2);
    g2 = ((f4-f3)*h2/h3 + (f3-f2)*h3/h2)/(h2+h3);

    x1 = R - NormK[m-1];
    x2 = R - NormK[m];
    y1 = x1/h2;
    y2 = x2/h2;

    f =  y2*y2*(3.0*f2 + h2*g1 + (2.0*f2 + h2*g1)*y2)
       + y1*y1*(3.0*f3 - h2*g2 - (2.0*f3 - h2*g2)*y1);

    result = f;
  }
  
  return result;
}
