From e90d438842ac767038018c80f663eaf38ed74c03 Mon Sep 17 00:00:00 2001 From: christian Date: Tue, 26 Feb 2019 23:20:54 -0700 Subject: [PATCH 1/4] Attempt to add test for othogonal TB --- src/bodirectprogress.F90 | 5 +++ tests/energy.fullscf.ortho.out | 21 +++++++++ tests/fullscf.ortho.dat | 28 ++++++++++++ tests/latte.fullscf.ortho.in | 75 +++++++++++++++++++++++++++++++ tests/ortho.params/bondints.ortho | 41 +++++++++++++++++ tests/ortho.params/electrons.dat | 7 +++ tests/ortho.params/ppots.ortho | 12 +++++ tests/run_test.sh | 3 +- 8 files changed, 191 insertions(+), 1 deletion(-) create mode 100644 tests/energy.fullscf.ortho.out create mode 100644 tests/fullscf.ortho.dat create mode 100644 tests/latte.fullscf.ortho.in create mode 100755 tests/ortho.params/bondints.ortho create mode 100644 tests/ortho.params/electrons.dat create mode 100644 tests/ortho.params/ppots.ortho diff --git a/src/bodirectprogress.F90 b/src/bodirectprogress.F90 index 32d7875..e0e2b9d 100644 --- a/src/bodirectprogress.F90 +++ b/src/bodirectprogress.F90 @@ -54,6 +54,11 @@ SUBROUTINE BOEVECSPRG !! Convert Hamiltonian to bml format !! H should be in orthogonal form, ORTHOH + + IF (BASISTYPE == "ORTHO") THEN + ORTHOH = H + ENDIF + CALL BML_ZERO_MATRIX(BML_MATRIX_DENSE, BML_ELEMENT_REAL, & LATTEPREC, HDIM, HDIM, ORTHOH_BML) CALL BML_ZERO_MATRIX(BML_MATRIX_DENSE, BML_ELEMENT_REAL, & diff --git a/tests/energy.fullscf.ortho.out b/tests/energy.fullscf.ortho.out new file mode 100644 index 0000000..3a72149 --- /dev/null +++ b/tests/energy.fullscf.ortho.out @@ -0,0 +1,21 @@ +-404.503855 +-404.525030 +-404.725674 +-405.582288 +-408.059757 +-410.620284 +-406.007885 +-402.776780 +-403.819356 +-403.608768 +-403.804797 +-408.527217 +-409.741533 +-406.748274 +-405.063504 +-404.568807 +-404.498053 +-404.513288 +-404.526646 +-404.534376 +-404.620906 diff --git a/tests/fullscf.ortho.dat b/tests/fullscf.ortho.dat new file mode 100644 index 0000000..f07c23b --- /dev/null +++ b/tests/fullscf.ortho.dat @@ -0,0 +1,28 @@ + 24 + 6.26700 0.00000 0.00000 + 0.00000 6.26700 0.00000 + 0.00000 0.00000 6.26700 +O 3.08800 3.70000 3.12400 +H 4.05800 3.70000 3.12400 +H 2.76400 3.13200 3.84100 +O 2.47000 0.39000 1.36000 +H 1.54000 0.37000 1.73000 +H 2.48000 0.00000 0.44000 +O 1.99300 0.41700 5.25000 +H 2.39300 1.32700 5.16000 +H 0.99300 0.49700 5.31000 +O 2.05300 6.09700 3.48000 +H 2.12300 5.20700 3.02000 +H 1.11300 0.17000 3.40000 +O 4.90000 5.37700 2.14000 +H 5.51000 6.17700 2.18000 +H 3.95000 5.68700 2.21000 +O 0.92000 3.82700 0.56000 +H 0.00000 3.54700 0.27000 +H 1.23000 4.59700 0.00000 +O 0.89000 2.03700 3.41000 +H 0.72000 2.86700 2.87000 +H 1.79000 1.66700 3.19000 +O 4.45000 4.61700 5.43000 +H 4.75000 3.89700 4.81000 +H 4.06000 4.21700 6.26000 diff --git a/tests/latte.fullscf.ortho.in b/tests/latte.fullscf.ortho.in new file mode 100644 index 0000000..ab19f72 --- /dev/null +++ b/tests/latte.fullscf.ortho.in @@ -0,0 +1,75 @@ +LATTE INPUT FILE +================ +#This input file resumes the content of MDcontroller and TBparam/control.in +#The parser will only read it if it is present inside the running folder. +#In case this file is not present Latte will read the two files as original. +#The order of the kewords is not important in this file. + +#General controls +CONTROL{ + XCONTROL= 1 + BASISTYPE= ORTHO + PARAMPATH= "tests/ortho.params" + VERBOSE= 0 + DEBUGON= 0 + FERMIM= 6 + CGORLIB= 1 CGTOL= 1.0e-6 + KBT= 0.0 + NORECS= 1 + ENTROPYKIND= 1 + PPOTON= 1 VDWON= 0 + SPINON= 0 SPINTOL= 1.0e-4 + ELECTRO= 1 ELECMETH= 0 ELEC_ETOL= 0.001 ELEC_QTOL= 1.0e-4 + COULACC= 1.0e-6 COULCUT= -500.0 COULR1= 500.0 + MAXSCF= 250 + BREAKTOL= 1.0E-12 MINSP2ITER= 22 SP2CONV= REL + FULLQCONV= 1 QITER= 0 + QMIX= 0.25 SPINMIX= 0.25 MDMIX= 0.25 + ORDERNMOL= 0 + SPARSEON= 0 THRESHOLDON= 1 NUMTHRESH= 1.0e-6 FILLINSTOP= 100 BLKSZ= 4 + MSPARSE= 3000 + LCNON= 0 LCNITER= 4 CHTOL= 0.01 + SKIN= 1.0 + RELAX= 0 RELAXTYPE= SD MAXITER= 100 RLXFTOL= 0.00001 + MDON= 1 + PBCON= 1 + RESTART= 0 + CHARGE= 0 + XBO= 1 + XBODISON= 1 + XBODISORDER= 5 + NGPU= 2 + KON= 0 + COMPFORCE= 1 + DOSFIT= 0 INTS2FIT= 1 BETA= 1000.0 NFITSTEP= 5000 QFIT= 0 MCSIGMA= 0.2 + PPFITON= 0 + ALLFITON= 0 + PPSTEP= 500 BISTEP= 500 PP2FIT= 2 BINT2FIT= 6 + PPBETA= 1000.0 PPSIGMA= 0.01 PPNMOL= 10 PPNGEOM= 200 + PARREP= 0 + ER= 1.0 +} + +#Controls for QMD +MDCONTROL{ +MAXITER= 20 +UDNEIGH= 1 +DT= 0.25 +TEMPERATURE= 1.0e-30 RNDIST= GAUSSIAN SEEDINIT= UNIFORM +DUMPFREQ= 250 +RSFREQ= 500 +WRTFREQ= 1 +TOINITTEMP5= 1 +THERMPER= 500 +THERMRUN= 50000 +NVTON= 0 NPTON= 0 AVEPER= 1000 FRICTION= 1000.0 SEED= 54 +PTARGET= 0.0 NPTTYPE= ISO +SHOCKON= 0 +SHOCKSTART= 100000 +SHOCKDIR= 1 +UPARTICLE= 500.0 USHOCK= -4590.0 C0= 1300.0 +MDADAPT= 0 +GETHUG= 0 E0= -795.725 V0= 896.984864 P0= 0.083149 +} + + diff --git a/tests/ortho.params/bondints.ortho b/tests/ortho.params/bondints.ortho new file mode 100755 index 0000000..5a4554b --- /dev/null +++ b/tests/ortho.params/bondints.ortho @@ -0,0 +1,41 @@ +Noints= 39 +Element1 Element2 Kind H0 B1 B2 B3 B4 B5 R1 Rcut H0 B1 B2 B3 B4 B5 R1 Rcut +N O sss -9.360078 -1.293118 -0.379415 0.000000 0.000000 1.200000 3.500000 4.000000 0.340064 -1.703613 -0.622348 0.036738 -0.040158 1.200000 3.500000 4.000000 +N O sps 10.309052 -0.981652 -0.828497 0.000000 0.000000 1.200000 3.500000 4.000000 -0.370946 -1.040947 -0.931097 0.252441 -0.115450 1.200000 3.500000 4.000000 +O N sps 10.723048 -0.454312 -0.916563 0.000000 0.000000 1.200000 3.500000 4.000000 -0.420014 -1.107918 -0.905594 0.188424 -0.088365 1.200000 3.500000 4.000000 +N O pps 9.259131 -0.734112 -1.023762 0.000000 0.000000 1.200000 3.500000 4.000000 -0.314073 0.499050 -2.914288 2.067657 -0.738439 1.200000 3.500000 4.000000 +N O ppp -4.532623 -1.999631 -0.286275 0.000000 0.000000 1.200000 3.500000 4.000000 0.223937 -1.991867 -0.537630 -0.081270 -0.004130 1.200000 3.500000 4.000000 +C N sss -7.409712 -1.940942 -0.219762 0.000000 0.000000 1.500000 3.500000 4.000000 0.263438 -1.754525 -0.584215 -0.007801 -0.021729 1.500000 3.500000 4.000000 +C N sps 7.501761 -1.211169 -0.373905 0.000000 0.000000 1.500000 3.500000 4.000000 -0.326609 -1.197485 -0.807786 0.134891 -0.084373 1.500000 3.500000 4.000000 +N C sps 8.697591 -1.267240 -0.178484 0.000000 0.000000 1.500000 3.500000 4.000000 -0.337943 -1.335442 -0.769693 0.119373 -0.079493 1.500000 3.500000 4.000000 +C N pps 6.954600 -1.188456 -0.808043 0.000000 0.000000 1.500000 3.500000 4.000000 -0.350240 -0.467439 -1.849316 1.854403 -0.988471 1.500000 3.500000 4.000000 +C N ppp -2.921605 -2.203548 -0.409424 0.000000 0.000000 1.500000 3.500000 4.000000 0.158424 -2.114409 -0.582346 -0.051076 -0.006183 1.500000 3.500000 4.000000 +C O sss -13.986685 -1.931973 -0.432011 0.000000 0.000000 1.200000 3.500000 4.000000 0.375339 -1.547372 -0.642492 0.020614 -0.026699 1.200000 3.500000 4.000000 +C O sps 10.718738 -1.389459 -0.182128 0.000000 0.000000 1.200000 3.500000 4.000000 -0.373027 -0.776043 -1.019920 0.257539 -0.102838 1.200000 3.500000 4.000000 +O C sps 14.194791 -1.371650 -0.248285 0.000000 0.000000 1.200000 3.500000 4.000000 -0.458068 -1.035067 -0.937868 0.190562 -0.077841 1.200000 3.500000 4.000000 +C O pps 8.622023 -0.557144 -0.938551 0.000000 0.000000 1.200000 3.500000 4.000000 -0.322293 0.795473 -3.476601 2.589965 -0.897800 1.200000 3.500000 4.000000 +C O ppp -5.327397 -2.190160 -0.089303 0.000000 0.000000 1.200000 3.500000 4.000000 0.244570 -1.922717 -0.573671 -0.057280 -0.004108 1.200000 3.500000 4.000000 +N N sss -7.165811 -2.348869 -0.541905 0.000000 0.000000 1.500000 3.500000 4.000000 0.231654 -1.879002 -0.572765 -0.004579 -0.031106 1.500000 3.500000 4.000000 +N N sps 8.212268 -1.499123 -0.526440 0.000000 0.000000 1.500000 3.500000 4.000000 -0.305271 -1.385158 -0.751032 0.114531 -0.090839 1.500000 3.500000 4.000000 +N N pps 7.102331 -1.252366 -0.552533 0.000000 0.000000 1.500000 3.500000 4.000000 -0.324668 -0.547805 -1.638658 1.495168 -0.827868 1.500000 3.500000 4.000000 +N N ppp -2.828938 -2.376886 -0.560898 0.000000 0.000000 1.500000 3.500000 4.000000 0.142909 -2.162036 -0.571942 -0.071640 -0.004682 1.500000 3.500000 4.000000 +O O sss -14.387756 -2.244278 -1.645605 0.000000 0.000000 1.200000 3.500000 4.000000 0.296445 -1.911896 -0.663451 0.038054 -0.046608 1.200000 3.500000 4.000000 +O O sps 13.699127 -1.602358 -0.114474 0.000000 0.000000 1.200000 3.500000 4.000000 -0.362143 -1.285274 -0.939591 0.204641 -0.106438 1.200000 3.500000 4.000000 +O O pps 9.235469 -1.131474 -0.924535 0.000000 0.000000 1.200000 3.500000 4.000000 -0.312044 0.121814 -2.519352 1.681266 -0.644566 1.200000 3.500000 4.000000 +O O ppp -4.526526 -2.487174 -0.201464 0.000000 0.000000 1.200000 3.500000 4.000000 0.193010 -2.168462 -0.580629 -0.105104 0.004891 1.200000 3.500000 4.000000 +H O sss -12.189103 -1.800097 -0.325933 0.000000 0.000000 1.000000 3.500000 4.000000 0.404725 -1.702546 -0.707938 0.074904 -0.039922 1.000000 3.500000 4.000000 +H O sps 9.518733 -1.333235 -0.393710 0.000000 0.000000 1.000000 3.500000 4.000000 -0.447660 -0.952979 -1.163537 0.400616 -0.156965 1.000000 3.500000 4.000000 +H N sss -12.631030 -1.585597 -0.250969 0.000000 0.000000 1.000000 3.500000 4.000000 0.446693 -1.500463 -0.657448 0.065741 -0.037004 1.000000 3.500000 4.000000 +H N sps 9.837852 -1.234850 -0.324283 0.000000 0.000000 1.000000 3.500000 4.000000 -0.501530 -0.785734 -1.123232 0.394878 -0.148501 1.000000 3.500000 4.000000 +C C sss -9.197237 -1.607050 -0.535057 0.000000 0.000000 1.400000 3.500000 4.000000 0.346977 -1.519820 -0.570812 -0.013518 -0.015829 1.400000 3.500000 4.000000 +C C sps 8.562436 -0.980182 -0.646929 0.000000 0.000000 1.400000 3.500000 4.000000 -0.400467 -0.984048 -0.853949 0.157178 -0.073381 1.400000 3.500000 4.000000 +C C pps 6.614756 -0.528591 -0.951460 0.000000 0.000000 1.400000 3.500000 4.000000 -0.382417 0.102889 -2.786680 2.646356 -1.134320 1.400000 3.500000 4.000000 +C C ppp -3.678302 -1.881668 -0.255951 0.000000 0.000000 1.400000 3.500000 4.000000 0.214357 -1.948923 -0.578323 -0.034356 -0.007257 1.400000 3.500000 4.000000 +H C sss -9.235812 -1.372683 -0.408433 0.000000 0.000000 1.100000 3.500000 4.000000 0.416003 -1.459596 -0.654874 0.009140 -0.012658 1.100000 3.500000 4.000000 +H C sps 8.104851 -0.936099 -0.626219 0.000000 0.000000 1.100000 3.500000 4.000000 -0.495695 -0.901626 -1.007214 0.189808 -0.057087 1.100000 3.500000 4.000000 +H H sss -9.400000 -1.145903 -0.391777 0.000000 0.000000 0.750000 3.500000 4.000000 0.575007 -1.391261 -0.778831 0.080209 -0.017759 0.750000 3.500000 4.000000 +W W sss -2.63332044 -0.71100562 -0.27081645 0.03306840 -0.00393097 2.73 6.00 7.00 0.28895119 -0.91180491 -0.22236240 0.00062879 0.00223537 2.73 6.00 7.00 +W W sds -1.48671751 -0.42509143 -0.65004572 0.29520069 -0.06194951 2.73 6.00 7.00 0.14376383 -0.45157088 -0.59271709 0.18666281 -0.03366047 2.73 6.00 7.00 +W W dds -1.70672948 -1.10134419 -0.01972556 -0.04301193 0.00482517 2.73 6.00 7.00 0.11587117 -0.53362062 -0.52285554 0.14742450 -0.02563750 2.73 6.00 7.00 +W W ddp 1.41731714 -1.52300320 -0.03534115 -0.03114721 0.00393409 2.73 6.00 7.00 -0.11180069 -1.33376345 -0.26519523 0.05629322 -0.01154228 2.73 6.00 7.00 +W W ddd -0.32269993 -1.97500297 -0.00061059 -0.03674107 0.00594063 2.73 6.00 7.00 0.02602319 -1.93947083 -0.12231373 0.00539735 -0.00134363 2.73 6.00 7.00 diff --git a/tests/ortho.params/electrons.dat b/tests/ortho.params/electrons.dat new file mode 100644 index 0000000..9946312 --- /dev/null +++ b/tests/ortho.params/electrons.dat @@ -0,0 +1,7 @@ +Noelem= 5 +Element basis Numel Es Ep Ed Ef Mass HubbardU Wss Wpp Wdd Wff +N sp 5.000000 -18.556500 -7.062500 0.000000 0.000000 14.006700 17.372900 0.000000 -0.693400 0.000000 0.000000 +O sp 6.000000 -23.937700 -9.003500 0.000000 0.000000 15.999400 11.876141 0.000000 -0.7576500 0.000000 0.000000 +H s 1.000000 -6.483500 0.000000 0.000000 0.000000 1.007900 12.054683 -2.23400 0.000000 0.000000 0.000000 +C sp 4.000000 -13.719900 -5.254100 0.000000 0.000000 12.010000 14.240811 0.000000 -0.6181000 0.000000 0.000000 +W sd 6.0 -4.05 0.0 -2.12 0.0 183.84 7.048 0.0 0.0 0.0 0.0 diff --git a/tests/ortho.params/ppots.ortho b/tests/ortho.params/ppots.ortho new file mode 100644 index 0000000..6d0f806 --- /dev/null +++ b/tests/ortho.params/ppots.ortho @@ -0,0 +1,12 @@ +Nopps= 10 +Ele1 Ele2 A0 A1 A2 A3 A4 A5 A6 C R1 Rcut +C C 3.927770 24.439989 -51.156433 39.032536 -11.321277 0.000000 0.000000 0.000000 1.600000 1.700000 +N O 14.005908 19.769009 -46.607006 38.399015 -12.656658 0.000000 0.000000 0.000000 1.600000 1.700000 +C N 98.283078 10.289077 -27.709052 22.099235 -6.796462 0.000000 0.000000 0.000000 1.600000 1.700000 +N N 40.335850 14.958977 -36.644093 29.219613 -8.918783 0.000000 0.000000 0.000000 1.600000 1.700000 +C O 0.916287 30.115416 -59.612502 45.114207 -13.200384 0.000000 0.000000 0.000000 1.500000 1.600000 +O O 11.833452 19.281518 -45.763767 37.924165 -12.006535 0.000000 0.000000 0.000000 1.500000 1.600000 +N H 0.664002 28.086622 -63.415978 53.301425 -17.343446 0.000000 0.000000 0.000000 1.300000 1.400000 +O H 0.484351 33.176296 -81.154354 74.931992 -26.796460 0.000000 0.000000 0.000000 1.200000 1.300000 +C H 1.094168 28.606497 -71.558353 65.967464 -23.372892 0.000000 0.000000 0.000000 1.200000 1.300000 +H H 8.194700 16.371100 -75.246500 106.703000 -59.105700 0.000000 0.000000 0.000000 0.800000 0.900000 diff --git a/tests/run_test.sh b/tests/run_test.sh index 6db34e8..4c88cde 100755 --- a/tests/run_test.sh +++ b/tests/run_test.sh @@ -16,6 +16,7 @@ performanceExpectedTimes["tableread"]=1.700 performanceExpectedTimes["0scf"]=0.127 performanceExpectedTimes["2scf"]=0.120 performanceExpectedTimes["fullscf"]=0.158 +performanceExpectedTimes["fullscf.ortho"]=1.050 performanceExpectedTimes["fullscf.etemp"]=0.160 performanceExpectedTimes["sp2"]=0.161 performanceExpectedTimes["sp2.sparse"]=0.579 @@ -125,7 +126,7 @@ done # Testing for MD simulations: -for name in tableread 0scf 2scf fullscf fullscf.etemp sp2 sp2.sparse fullscf.nvt \ +for name in tableread 0scf 2scf fullscf fullscf.ortho fullscf.etemp sp2 sp2.sparse fullscf.nvt \ fullscf.npt fullscf.vdw fullscf.spin fullscf.kon fullscf.rspace ; do INLATTEFILE="latte."$name".in" From 9c0bf2edfdb13248ee5f54a054d28fadae75f076 Mon Sep 17 00:00:00 2001 From: cnegre Date: Wed, 27 Feb 2019 17:37:58 -0500 Subject: [PATCH 2/4] Added missing files --- MATRIX/F_mmlatte.cu | 28 +++++++++++++++++++++ MATRIX/M_Multiply.cu | 18 +++++++++++++ MATRIX/Makefile | 2 +- MATRIX/Matrix.h | 3 +++ MATRIX/SolveMatrixCG.cu | 4 ++- MATRIX/genmatmult.cu | 56 +++++++++++++++++++++++++++++++++++++++++ Makefile | 2 +- makefile.CHOICES | 50 +++++++++++------------------------- 8 files changed, 125 insertions(+), 38 deletions(-) create mode 100644 MATRIX/F_mmlatte.cu create mode 100644 MATRIX/genmatmult.cu diff --git a/MATRIX/F_mmlatte.cu b/MATRIX/F_mmlatte.cu new file mode 100644 index 0000000..7745a6a --- /dev/null +++ b/MATRIX/F_mmlatte.cu @@ -0,0 +1,28 @@ +/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! Copyright 2010. Los Alamos National Security, LLC. This material was ! +! produced under U.S. Government contract DE-AC52-06NA25396 for Los Alamos ! +! National Laboratory (LANL), which is operated by Los Alamos National ! +! Security, LLC for the U.S. Department of Energy. The U.S. Government has ! +! rights to use, reproduce, and distribute this software. NEITHER THE ! +! GOVERNMENT NOR LOS ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, ! +! EXPRESS OR IMPLIED, OR ASSUMES ANY LIABILITY FOR THE USE OF THIS ! +! SOFTWARE. If software is modified to produce derivative works, such ! +! modified software should be clearly marked, so as not to confuse it ! +! with the version available from LANL. ! +! ! +! Additionally, this program is free software; you can redistribute it ! +! and/or modify it under the terms of the GNU General Public License as ! +! published by the Free Software Foundation; version 2.0 of the License. ! +! Accordingly, this program is distributed in the hope that it will be ! +! useful, but WITHOUT ANY WARRANTY; without even the implied warranty of ! +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General ! +! Public License for more details. ! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/ + +#include "Matrix.h" + +extern "C" void mmlatte_(int *hdim, int *tposea, int *tposeb, void *alpha, void *beta, void *amat_pointer, void *bmat_pointer, void *cmat_pointer) { + genmatmult(*hdim, *tposea, *tposeb, *((double *)alpha), *((double *)beta), (double *)amat_pointer, (double *)bmat_pointer, (double *)cmat_pointer); + } + + diff --git a/MATRIX/M_Multiply.cu b/MATRIX/M_Multiply.cu index 630e5de..60fa8c7 100644 --- a/MATRIX/M_Multiply.cu +++ b/MATRIX/M_Multiply.cu @@ -154,6 +154,24 @@ void M_Multiply(REAL *scalar1, Matrix A, Matrix B, REAL *scalar2, Matrix C) { } +void M_Multiply(int tposea, int tposeb, REAL *alpha, Matrix A, Matrix B, REAL *beta, Matrix C) { + + cudaSetDevice(0); + +// printf("tposea = %d tposeb = %d \n", tposea, tposeb); + if (tposea == 0 && tposeb == 0 ) { + cublasDgemm(handle[0], CUBLAS_OP_N, CUBLAS_OP_N, A.DM, B.DN, A.DN, alpha, + A.Device[0], A.DM, B.Device[0], B.DM, beta, C.Device[0], C.DM); + } else if (tposea == 1 && tposeb == 0 ) { + cublasDgemm(handle[0], CUBLAS_OP_T, CUBLAS_OP_N, A.DM, B.DN, A.DN, alpha, + A.Device[0], A.DM, B.Device[0], B.DM, beta, C.Device[0], C.DM); + } else if (tposea == 0 && tposeb == 1 ) { + cublasDgemm(handle[0], CUBLAS_OP_N, CUBLAS_OP_T, A.DM, B.DN, A.DN, alpha, + A.Device[0], A.DM, B.Device[0], B.DM, beta, C.Device[0], C.DM); + } + +} + void M_Multiply(REAL k, Matrix A, Matrix B) { int msize = A.DM * A.DN; diff --git a/MATRIX/Makefile b/MATRIX/Makefile index 14e4154..af097cc 100644 --- a/MATRIX/Makefile +++ b/MATRIX/Makefile @@ -19,7 +19,7 @@ KERNEL_SOURCES=$(wildcard Kernels/*.cu) KERNEL_OBJECTS=$(KERNEL_SOURCES:.cu=.$(REALSIZE).o) #MATRIX_SOURCES=$(wildcard M_*.cu) $(wildcard F_*.cu) SP2Pure.cu SP2Fermi.cu SolveMatrixCG.cu Allocate.cu TestMultiply.cu -MATRIX_SOURCES=$(wildcard M_*.cu) $(wildcard F_*.cu) SP2Pure.cu SP2Fermi.cu SolveMatrixCG.cu Allocate.cu runmatmult.cu +MATRIX_SOURCES=$(wildcard M_*.cu) $(wildcard F_*.cu) SP2Pure.cu SP2Fermi.cu SolveMatrixCG.cu Allocate.cu runmatmult.cu genmatmult.cu MATRIX_CUDA_OBJECTS=$(MATRIX_SOURCES:.cu=.cuda.$(REALSIZE).o) all: libmatrix_cuda.$(PRECISION).a diff --git a/MATRIX/Matrix.h b/MATRIX/Matrix.h index 4f13f16..de511cc 100644 --- a/MATRIX/Matrix.h +++ b/MATRIX/Matrix.h @@ -108,6 +108,7 @@ void M_MultiplyTranspose(Matrix A, Matrix B, Matrix C); void M_Multiply(REAL *scalar, Matrix A, Matrix B, REAL *scalar2, Matrix C); void M_MultiplyMgpu(REAL *scalar, Matrix A, Matrix B, REAL *scalar2, Matrix C); void M_Multiply(REAL scalar, Matrix A, Matrix B); // B=scalar*A +void M_Multiply(int tposea, int tposeb, REAL *scalar1, Matrix A, Matrix B, REAL *scalar2, Matrix C); void M_MultiplyAdd(REAL scalar, Matrix A, REAL scalar2, Matrix B, Matrix C); // C = scalar*A + scalar2*B void M_MultiplySub(REAL scalar, Matrix A, REAL scalar2, Matrix B, Matrix C); // C = scalar*A - scalar2*B void M_MultiplyAdd(REAL scalar, Matrix A, Matrix B, Matrix C); // C = scalar*A + B @@ -153,6 +154,8 @@ void *Allocate(const char Label[], void *Pointer, size_t Size); void runmatmult(int hdim, REAL *bo_pointer, REAL *h_pointer); +void genmatmult(int hdim, int tposea, int tposeb, REAL alpha, REAL beta, REAL *amat_pointer, REAL *bmat_pointer, REAL *cmat_pointer); + void sp2pure_nospin3(REAL bndfil, int hdim, REAL *bo_pointer, REAL maxeval, REAL *h_pointer, REAL maxminusmin, int minsp2iter, int sp2convint); void sp2pure_nospin4(REAL bndfil, int hdim, REAL *bo_pointer, REAL maxeval, REAL *h_pointer, REAL maxminusmin, int minsp2iter, int sp2convint); diff --git a/MATRIX/SolveMatrixCG.cu b/MATRIX/SolveMatrixCG.cu index dc2b724..b02b060 100644 --- a/MATRIX/SolveMatrixCG.cu +++ b/MATRIX/SolveMatrixCG.cu @@ -69,6 +69,8 @@ void solve_matrix_cg(REAL *bo_ptr, int hdim, REAL cgtol2, int fermim) { iter++; +// printf("%d \n", iter); + // A * P0 - intermediate term used in CG M_Multiply( a, p0, tmpmat); @@ -105,7 +107,7 @@ void solve_matrix_cg(REAL *bo_ptr, int hdim, REAL cgtol2, int fermim) { // p0 = -1.0 * r0 + p0 M_MultiplyScalarSum( &MINUS1, r0, p0); - //printf("iter = %d error2 = %e cgtol2= %e \n", iter, error2, cgtol2); +// printf("iter = %d error2 = %e cgtol2= %e \n", iter, error2, cgtol2); if (error2 < cgtol2) breakloop = 1; diff --git a/MATRIX/genmatmult.cu b/MATRIX/genmatmult.cu new file mode 100644 index 0000000..f638ce7 --- /dev/null +++ b/MATRIX/genmatmult.cu @@ -0,0 +1,56 @@ +/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! Copyright 2010. Los Alamos National Security, LLC. This material was ! +! produced under U.S. Government contract DE-AC52-06NA25396 for Los Alamos ! +! National Laboratory (LANL), which is operated by Los Alamos National ! +! Security, LLC for the U.S. Department of Energy. The U.S. Government has ! +! rights to use, reproduce, and distribute this software. NEITHER THE ! +! GOVERNMENT NOR LOS ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, ! +! EXPRESS OR IMPLIED, OR ASSUMES ANY LIABILITY FOR THE USE OF THIS ! +! SOFTWARE. If software is modified to produce derivative works, such ! +! modified software should be clearly marked, so as not to confuse it ! +! with the version available from LANL. ! +! ! +! Additionally, this program is free software; you can redistribute it ! +! and/or modify it under the terms of the GNU General Public License as ! +! published by the Free Software Foundation; version 2.0 of the License. ! +! Accordingly, this program is distributed in the hope that it will be ! +! useful, but WITHOUT ANY WARRANTY; without even the implied warranty of ! +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General ! +! Public License for more details. ! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/ + +#include +#include +#include +#include + +#include "Matrix.h" + +extern int ndevices; +extern int nblocks; + +void genmatmult(int hdim, int tposea, int tposeb, REAL alpha, REAL beta, REAL *amat_pointer, REAL *bmat_pointer, REAL *cmat_pointer) { + //void runmatmult(int hdim, REAL *x0_pointer, REAL *h_pointer) { + + Matrix amat, bmat, cmat; + + M_InitWithLocal(amat, amat_pointer, hdim, hdim); + M_InitWithLocal(bmat, bmat_pointer, hdim, hdim); + M_InitWithLocal(cmat, cmat_pointer, hdim, hdim); + + // Copy Matrices to all GPUs. We only copy C if beta > 0 + + M_Push( amat ); + M_Push( bmat ); + + if (fabs(beta) > 1.0e-6) M_Push( cmat ); + + M_Multiply(tposea, tposeb, &alpha, amat, bmat, &beta, cmat); + + M_Pull(cmat); + + M_DeallocateDevice(amat); + M_DeallocateDevice(bmat); + M_DeallocateDevice(cmat); + +} diff --git a/Makefile b/Makefile index ff06030..44ddf4e 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ endif lammps : (rm liblatte.a; cd src; make; cd ..) - (cd $(HOME)/lammps/src; touch fix_latte.cpp; make serial; cd -) + (cd $(HOME)/lammps/src; touch fix_latte.cpp; make mpi; cd -) src : (rm liblatte.a; cd src; make; cd ..) diff --git a/makefile.CHOICES b/makefile.CHOICES index 9000389..3a1b9ea 100644 --- a/makefile.CHOICES +++ b/makefile.CHOICES @@ -15,15 +15,15 @@ RANLIB = /usr/bin/ranlib # Use PROGRESS and BML libraries PROGRESS = OFF -PROGRESS_PATH= $(HOME)/qmd-progress/install/lib -BML_PATH= $(HOME)/bml/install/lib +PROGRESS_PATH= $(HOME)/qmd-progress/install/lib64 +BML_PATH= $(HOME)/bml/install/lib64 # Use METIS library for graph partitioning METIS = OFF METIS_PATH= $(HOME)/metis/metis-5.1.0/install # GPU available - OFF or ON -GPUOPT = OFF +GPUOPT = ON # Using DBCSR library from cp2k? OFF or ON DBCSR_OPT = OFF @@ -35,35 +35,17 @@ MPIOPT = OFF # CPU Fortran options # -#For GNU compiler: -#FC = mpif90 -FC = gfortran +FC = xlf90_r FCL = $(FC) -FFLAGS = -O3 -fopenmp -cpp -#FFLAGS = -fast -Mpreprocess -mp -LINKFLAG = -fopenmp +# Optimization flags: +FFLAGS = -qessl -qstrict=all -qsmp=omp -O2 -qextname +# Debug flags: +FFLAGS += -g0 -qreport +LINKFLAG = -qsmp=omp -#For intel compiler: -#FC = ifort -#FCL = $(FC) -#FFLAGS = -O3 -fpp -qopenmp -#LINKFLAG = -qopenmp -#LIB = -mkl=parallel - -#GNU BLAS/LAPACK libraries: -LIB = -llapack -lblas - -#Intel MKL BLAS/LAPACK libraries: -#LIB = -Wl,--no-as-needed -L${MKLROOT}/lib/intel64 \ -# -lmkl_lapack95_lp64 -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core \ -# -lmkl_gnu_thread -lmkl_core -ldl -lpthread -lm - -#Alternative flags for MKL: -#LIB += -mkl=parallel - -#Other BLAS/LAPACK vendors: -#LIB = -framework Accelerate -#LIB = -L/usr/projects/hpcsoft/toss2/common/acml/5.3.1/gfortran64/lib -lacml +#BLAS/LAPACK libraries: +LIB = -L${OLCF_ESSL_ROOT}/lib64/ -lessl -lesslsmp -lesslsmpcuda -qextname +LIB += -lxlopt -lxlf90_r -lxlfmath -lxl -lxlsmp # Uncomment for coverage #CVR = OFF @@ -82,12 +64,10 @@ ifeq ($(GRAPH), ON) FFLAGS += -I$(METIS_PATH)/include endif -#DBCSR_LIB = -L/home/cawkwell/cp2k/lib/cawkwell/popt -lcp2k_dbcsr_lib -#DBCSR_MOD = -I/home/cawkwell/cp2k/obj/cawkwell/popt - # # GPU options # -GPU_CUDA_LIB = -L/opt/cudatoolkit-5.5/lib64 -lcublas -lcudart -GPU_ARCH = sm_20 +GPU_CUDA_LIB = -L${OLCF_CUDA_ROOT}/lib64/ -lcublas -lcudart + +GPU_ARCH = sm_70 From 585c009f132af9a8a986c304c1f6ddf5dfb7638e Mon Sep 17 00:00:00 2001 From: cnegre Date: Wed, 27 Feb 2019 17:59:36 -0500 Subject: [PATCH 3/4] Added Makefile and makefile.CHOICES back --- Makefile | 2 +- makefile.CHOICES | 50 +++++++++++++++++++++++++++++++++--------------- 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 44ddf4e..ff06030 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ endif lammps : (rm liblatte.a; cd src; make; cd ..) - (cd $(HOME)/lammps/src; touch fix_latte.cpp; make mpi; cd -) + (cd $(HOME)/lammps/src; touch fix_latte.cpp; make serial; cd -) src : (rm liblatte.a; cd src; make; cd ..) diff --git a/makefile.CHOICES b/makefile.CHOICES index 3a1b9ea..9000389 100644 --- a/makefile.CHOICES +++ b/makefile.CHOICES @@ -15,15 +15,15 @@ RANLIB = /usr/bin/ranlib # Use PROGRESS and BML libraries PROGRESS = OFF -PROGRESS_PATH= $(HOME)/qmd-progress/install/lib64 -BML_PATH= $(HOME)/bml/install/lib64 +PROGRESS_PATH= $(HOME)/qmd-progress/install/lib +BML_PATH= $(HOME)/bml/install/lib # Use METIS library for graph partitioning METIS = OFF METIS_PATH= $(HOME)/metis/metis-5.1.0/install # GPU available - OFF or ON -GPUOPT = ON +GPUOPT = OFF # Using DBCSR library from cp2k? OFF or ON DBCSR_OPT = OFF @@ -35,17 +35,35 @@ MPIOPT = OFF # CPU Fortran options # -FC = xlf90_r +#For GNU compiler: +#FC = mpif90 +FC = gfortran FCL = $(FC) -# Optimization flags: -FFLAGS = -qessl -qstrict=all -qsmp=omp -O2 -qextname -# Debug flags: -FFLAGS += -g0 -qreport -LINKFLAG = -qsmp=omp +FFLAGS = -O3 -fopenmp -cpp +#FFLAGS = -fast -Mpreprocess -mp +LINKFLAG = -fopenmp -#BLAS/LAPACK libraries: -LIB = -L${OLCF_ESSL_ROOT}/lib64/ -lessl -lesslsmp -lesslsmpcuda -qextname -LIB += -lxlopt -lxlf90_r -lxlfmath -lxl -lxlsmp +#For intel compiler: +#FC = ifort +#FCL = $(FC) +#FFLAGS = -O3 -fpp -qopenmp +#LINKFLAG = -qopenmp +#LIB = -mkl=parallel + +#GNU BLAS/LAPACK libraries: +LIB = -llapack -lblas + +#Intel MKL BLAS/LAPACK libraries: +#LIB = -Wl,--no-as-needed -L${MKLROOT}/lib/intel64 \ +# -lmkl_lapack95_lp64 -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core \ +# -lmkl_gnu_thread -lmkl_core -ldl -lpthread -lm + +#Alternative flags for MKL: +#LIB += -mkl=parallel + +#Other BLAS/LAPACK vendors: +#LIB = -framework Accelerate +#LIB = -L/usr/projects/hpcsoft/toss2/common/acml/5.3.1/gfortran64/lib -lacml # Uncomment for coverage #CVR = OFF @@ -64,10 +82,12 @@ ifeq ($(GRAPH), ON) FFLAGS += -I$(METIS_PATH)/include endif +#DBCSR_LIB = -L/home/cawkwell/cp2k/lib/cawkwell/popt -lcp2k_dbcsr_lib +#DBCSR_MOD = -I/home/cawkwell/cp2k/obj/cawkwell/popt + # # GPU options # -GPU_CUDA_LIB = -L${OLCF_CUDA_ROOT}/lib64/ -lcublas -lcudart - -GPU_ARCH = sm_70 +GPU_CUDA_LIB = -L/opt/cudatoolkit-5.5/lib64 -lcublas -lcudart +GPU_ARCH = sm_20 From ee9c677bbf4fc472dd80289614400c36f7571e8c Mon Sep 17 00:00:00 2001 From: cnegre Date: Mon, 4 Mar 2019 13:56:41 -0500 Subject: [PATCH 4/4] Added missing lines from getforce left after merging with MJC_GPU --- src/getforce.F90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/getforce.F90 b/src/getforce.F90 index d39950f..3311523 100644 --- a/src/getforce.F90 +++ b/src/getforce.F90 @@ -24,20 +24,20 @@ SUBROUTINE GETFORCE USE CONSTANTS_MOD USE SETUPARRAY USE MYPRECISION + USE TIMER_MOD IMPLICIT NONE + REAL(LATTEPREC) :: MLSI IF (EXISTERROR) RETURN FTOT = ZERO IF (KON .EQ. 0) THEN - IF (SPONLY .EQ. 0) THEN CALL GRADHSP ELSE CALL GRADH ENDIF - FTOT = TWO * F IF (BASISTYPE .EQ. "NONORTHO") THEN