Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions examples/gpmdk/src/gpmdcov_kernel.F90
Original file line number Diff line number Diff line change
Expand Up @@ -1149,9 +1149,15 @@ subroutine gpmdcov_rankN_update_byParts(myqn,myn,mysyprt,mysyprtk,maxRanks,KK0Re
call gpmdStartRange("prg_get_hscf",7)
#endif
!ptaux_bml corresponds to H0 which is 0 in this case.
#ifdef USE_OFFLOAD
call prg_get_hscf(ptaux_bml,mysyprt(ipt)%estr%over,ptham_bml,mysyprt(ipt)%spindex,&
mysyprt(ipt)%estr%hindex,tb%hubbardu,ptnet_charge,&
ptcoul_pot_r,ptcoul_pot_k,norbs,lt%threshold)
#else
call prg_get_hscf_v2(ptaux_bml,mysyprt(ipt)%estr%over,ptham_bml,mysyprt(ipt)%spindex,&
mysyprt(ipt)%estr%hindex,tb%hubbardu,ptnet_charge,&
ptcoul_pot_r,ptcoul_pot_k,norbs,lt%threshold)
#endif
#ifdef USE_NVTX
call gpmdEndRange
#endif
Expand Down
100 changes: 84 additions & 16 deletions examples/gpmdk/src/gpmdcov_part.F90
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@ subroutine gpmdcov_Part(ipreMD)
use gpmdcov_allocation_mod
implicit none
integer, allocatable :: graph_h(:,:)
integer, allocatable, save :: graph_p(:,:), graph_p_flat(:)
integer, allocatable, save :: graph_p(:,:)
integer, allocatable, save :: graph_p_old(:,:)
integer, allocatable, save :: G_added(:,:), G_removed(:,:)
integer, allocatable, save :: N_added(:), N_removed(:), NNZ1(:), NNZ2(:), NNZ_updated(:)
logical, allocatable, save :: v(:), v_check(:)
integer :: n_atoms, max_updates, k, ktot_a, ktot_r
real(dp) :: mls_ii
real, allocatable :: onesMat(:,:)
Expand All @@ -31,7 +30,10 @@ subroutine gpmdcov_Part(ipreMD)
integer :: coreHaloP1, coreP1
integer :: myMdim,parteach_offset
logical :: check_chi,check_graph,graphs_end

logical, allocatable, save :: v(:)
#ifdef USE_OFFLOAD
logical, allocatable, save :: vacc(:,:)
#endif
if(gsp2%mdim < 0)then
myMdim = sy%nats
elseif(gsp2%mdim > sy%nats)then
Expand All @@ -54,7 +56,6 @@ subroutine gpmdcov_Part(ipreMD)

#ifdef DO_MPI
n_atoms = sy%nats

max_updates = gpmdt%max_updates
if(.not.allocated(graph_p_old))then
allocate(graph_p(myMdim,n_atoms))
Expand All @@ -69,7 +70,12 @@ subroutine gpmdcov_Part(ipreMD)
allocate(NNZ2(n_atoms))
allocate(NNZ_updated(n_atoms))
allocate(v(n_atoms))
allocate(v_check(n_atoms))
#ifdef USE_OFFLOAD
allocate(vacc(n_atoms,n_atoms))
!$acc enter data copyin(graph_p(:,:),graph_p_old(:,:)) &
!$acc create(G_added(:,:),G_removed(:,:),N_added(:),N_removed(:)) &
!$acc create(NNZ1(:),NNZ2(:),NNZ_updated(:),vacc(:,:))
#endif
endif
#endif
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Expand Down Expand Up @@ -117,7 +123,6 @@ subroutine gpmdcov_Part(ipreMD)
mls_i = mls()

! call gpmdcov_mat2VectInt(graph_p,auxVectInt,sy%nats,myMdim)
if(.not.allocated(graph_p_flat))allocate(graph_p_flat(myMdim*sy%nats))
parteach_offset = 0
if(gsp2%partition_type=="Box")then
parteach_offset = 1
Expand All @@ -126,9 +131,6 @@ subroutine gpmdcov_Part(ipreMD)
if (getNRanks() > 1) then
call prg_barrierParallel
if((gsp2%parteach == 1) .or. (mod(mdstep,gsp2%parteach)==parteach_offset) .or. (mdstep <= 1))then
!graph_p_flat = RESHAPE(graph_p,shape(graph_p_flat))
!call prg_sumIntReduceN(graph_p_flat, size(graph_p_flat))
!graph_p = RESHAPE(graph_p_flat,shape(graph_p))
write(*,*)"DEBUG: Doing full graph reduction at mdstep ",mdstep
if(any(graph_p.gt.sy%nats))then
write(*,*)"DEBUG: GPMDCOV_PART before reduction: graph_p has elems > nats"
Expand All @@ -138,19 +140,33 @@ subroutine gpmdcov_Part(ipreMD)
write(*,*)"DEBUG: GPMDCOV_PART after reduction: graph_p has elems > nats"
endif
graph_p_old = graph_p
NNZ1 = count(graph_p_old.ne.0,DIM=1)
#ifdef USE_OFFLOAD
!$acc update device(graph_p_old(:,:),NNZ1(:))
#endif
else
#ifdef USE_NVTX
call gpmdStartRange("Fast graph update",2)
#endif

write(*,*)"DEBUG: Doing graph update reduction at mdstep ",mdstep
!$omp parallel do shared(graph_p_old,graph_p,n_atoms)
do i =1,n_atoms
NNZ1(i) = count(graph_p_old(:,i).ne.0)
NNZ2(i) = count(graph_p(:,i).ne.0)
enddo
! NNZ1 = count(graph_p_old.ne.0,DIM=1)
! NNZ2 = count(graph_p.ne.0,DIM=1)

#ifdef USE_OFFLOAD
!$acc update device(NNZ1(:))
#endif

!Added edges
ktot_a = 0
NNZ1 = count(graph_p_old.ne.0,DIM=1)
NNZ2 = count(graph_p.ne.0,DIM=1)
!$omp parallel do
do i = 1,n_atoms
G_added(:,i) = 0
G_added(:,i) = 0
enddo
N_added = 0
v = .false.
Expand Down Expand Up @@ -181,8 +197,6 @@ subroutine gpmdcov_Part(ipreMD)
do j = 1,NNZ2(i)
v(graph_p(j,i)) = .false.
end do
! v(graph_p_old(1:NNZ1(i),i)) = .false.
! v(graph_p(1:NNZ2(i),i)) = .false.
end do
enddo
! Removed edges
Expand Down Expand Up @@ -220,8 +234,6 @@ subroutine gpmdcov_Part(ipreMD)
do j = 1,NNZ2(i)
v(graph_p(j,i)) = .false.
enddo
! v(graph_p_old(1:NNZ1(i),i)) = .false.
! v(graph_p(1:NNZ2(i),i)) = .false.
end do
enddo
! % Check NNZ_Updated: NNZ_Updated = NNZ1 + N_Added - N_Removed
Expand All @@ -237,6 +249,60 @@ subroutine gpmdcov_Part(ipreMD)
! %% Use G_removed and G_added to update from G1 to G2
call prg_sumIntReduceN(G_added,n_atoms*max_updates)
call prg_sumIntReduceN(G_removed,n_atoms*max_updates)
#ifdef USE_OFFLOAD
NNZ_updated = 0
!$acc update device(G_added(:,:),G_removed(:,:)) &
!$acc device(N_added(:),N_removed(:),NNZ_updated(:)) &
!$acc device(NNZ2(:))

!$acc parallel loop gang present(vacc)
do i = 1,n_atoms
!$acc loop vector
do j = 1,n_atoms
vacc(j,i) = .false.
enddo
enddo

!$acc parallel loop gang &
!$acc private(i,j,k) &
!$acc present(vacc) &
!$acc present(G_added,G_removed,N_added,N_removed) &
!$acc present(graph_p_old,graph_p,NNZ1,NNZ2,NNZ_updated)
do i = 1,n_atoms
!$acc loop vector
do j = 1,NNZ1(i)
vacc(graph_p_old(j,i),i) = .true.
end do
!$acc loop vector
do j = 1,N_removed(i)
vacc(G_removed(j,i),i) = .false. ! % Remove edges
end do
k = 0
!!$omp loop reduction(+:k)
!$acc loop vector
do j = 1,mymdim
graph_p(j,i) = 0
enddo
do j = 1,NNZ1(i)
if (vacc(graph_p_old(j,i),i) .eqv. .true.)then ! % Account only for the remaining edges
k = k + 1;
graph_p(k,i) = graph_p_old(j,i);
end if
end do
NNZ_updated(i) = k + N_added(i)
!$acc loop vector
do j = k+1,NNZ_updated(i)
graph_p(j,i) = G_added(j-k,i) ! Add new edges at the end
end do
k = max(NNZ1(i),NNZ2(i))
!NNZ1(i) = NNZ2(i)
!$acc loop vector
do j = 1,k
graph_p_old(j,i) = graph_p(j,i)
enddo
end do
!$acc update self(graph_p(:,:),graph_p_old(:,:),NNZ_updated(:))
#else
NNZ_updated = 0
v = .false.
!$omp parallel do &
Expand Down Expand Up @@ -272,12 +338,14 @@ subroutine gpmdcov_Part(ipreMD)
graph_p(j,i) = G_added(j-k,i) ! Add new edges at the end
end do
k = max(NNZ1(i),NNZ2(i))
!NNZ1(i) = NNZ2(i)
!$omp loop
do j = 1,k
graph_p_old(j,i) = graph_p(j,i)
enddo
end do
!$omp end parallel do
#endif
else
write(*,*)"GPMDCOV_PART: WARNING: Number of changes exceeds max_updates. System might be unstable. Doing full reduction."
call prg_sumIntReduceN(graph_p, myMdim*sy%nats)
Expand Down
2 changes: 0 additions & 2 deletions examples/gpmdk/src/gpmdcov_writeout.F90
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,6 @@ end function cudaMemGetInfo
endif
endif
endif
#else
print *, message//": No GPU memory report available without USE_NVTX build"
#endif
end subroutine gpmdcov_msMemGPU

Expand Down