File: qs_ks_atom.F

package info (click to toggle)
cp2k 6.1-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 204,532 kB
  • sloc: fortran: 835,196; f90: 59,605; python: 9,861; sh: 7,882; cpp: 4,868; ansic: 2,807; xml: 2,185; lisp: 733; pascal: 612; perl: 547; makefile: 497; csh: 16
file content (814 lines) | stat: -rw-r--r-- 41,265 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
!--------------------------------------------------------------------------------------------------!
!   CP2K: A general program to perform molecular dynamics simulations                              !
!   Copyright (C) 2000 - 2018  CP2K developers group                                               !
!--------------------------------------------------------------------------------------------------!

! **************************************************************************************************
!> \brief routines that build the Kohn-Sham matrix  contributions
!>      coming from local atomic densities
! **************************************************************************************************
MODULE qs_ks_atom

   USE ao_util,                         ONLY: trace_r_AxB
   USE atomic_kind_types,               ONLY: atomic_kind_type,&
                                              get_atomic_kind,&
                                              get_atomic_kind_set
   USE basis_set_types,                 ONLY: get_gto_basis_set,&
                                              gto_basis_set_p_type,&
                                              gto_basis_set_type
   USE cp_array_utils,                  ONLY: cp_2d_r_p_type
   USE cp_control_types,                ONLY: dft_control_type
   USE cp_para_types,                   ONLY: cp_para_env_type
   USE dbcsr_api,                       ONLY: dbcsr_get_block_p,&
                                              dbcsr_p_type
   USE kinds,                           ONLY: dp,&
                                              int_8
   USE kpoint_types,                    ONLY: get_kpoint_info,&
                                              kpoint_type
   USE message_passing,                 ONLY: mp_bcast
   USE qs_environment_types,            ONLY: get_qs_env,&
                                              qs_environment_type
   USE qs_force_types,                  ONLY: qs_force_type
   USE qs_kind_types,                   ONLY: get_qs_kind,&
                                              get_qs_kind_set,&
                                              qs_kind_type
   USE qs_neighbor_list_types,          ONLY: get_iterator_task,&
                                              neighbor_list_iterate,&
                                              neighbor_list_iterator_create,&
                                              neighbor_list_iterator_p_type,&
                                              neighbor_list_iterator_release,&
                                              neighbor_list_set_p_type,&
                                              neighbor_list_task_type
   USE qs_nl_hash_table_types,          ONLY: nl_hash_table_add,&
                                              nl_hash_table_create,&
                                              nl_hash_table_get_from_index,&
                                              nl_hash_table_is_null,&
                                              nl_hash_table_obj,&
                                              nl_hash_table_release,&
                                              nl_hash_table_status
   USE qs_oce_methods,                  ONLY: prj_gather
   USE qs_oce_types,                    ONLY: oce_matrix_type
   USE qs_p_env_types,                  ONLY: qs_p_env_type
   USE qs_rho_atom_types,               ONLY: get_rho_atom,&
                                              rho_atom_coeff,&
                                              rho_atom_type
   USE sap_kind_types,                  ONLY: alist_post_align_blk,&
                                              alist_pre_align_blk,&
                                              alist_type,&
                                              get_alist
   USE util,                            ONLY: get_limit
   USE virial_methods,                  ONLY: virial_pair_force
   USE virial_types,                    ONLY: virial_type

!$ USE OMP_LIB, ONLY: omp_get_max_threads, &
!$    omp_get_thread_num, &
!$    omp_lock_kind, &
!$    omp_init_lock, omp_set_lock, &
!$    omp_unset_lock, omp_destroy_lock

#include "./base/base_uses.f90"

   IMPLICIT NONE

   PRIVATE

   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_ks_atom'

   PUBLIC :: update_ks_atom

CONTAINS

! **************************************************************************************************
!> \brief The correction to the KS matrix due to the GAPW local terms to the hartree and
!>      XC contributions is here added. The correspondig forces contribution are also calculated
!>      if required. Each sparse-matrix block A-B is corrected by all the atomic contributions
!>      centered on atoms C for which the triplet A-C-B exists (they are close enough)
!>      To this end special lists are used
!> \param qs_env qs enviroment, for the lists, the contraction coefficients and the
!>               pre calculated integrals of the potential with the atomic orbitals
!> \param ksmat KS matrix, sparse matrix
!> \param pmat density matrix, sparse matrix, needed only for the forces
!> \param forces switch for the calculation of the forces on atoms
!> \param tddft switch for TDDFT linear response
!> \param p_env perturbation theory environment
!> \par History
!>      created [MI]
!>      the loop over the spins is done internally [03-05,MI]
!>      Rewrite using new OCE matrices [08.02.09, jhu]
!>      Add OpenMP [Apr 2016, EPCC]
! **************************************************************************************************
   SUBROUTINE update_ks_atom(qs_env, ksmat, pmat, forces, tddft, p_env)

      TYPE(qs_environment_type), POINTER                 :: qs_env
      TYPE(dbcsr_p_type), DIMENSION(:, :), INTENT(INOUT) :: ksmat, pmat
      LOGICAL, INTENT(IN)                                :: forces
      LOGICAL, INTENT(IN), OPTIONAL                      :: tddft
      TYPE(qs_p_env_type), OPTIONAL, POINTER             :: p_env

      CHARACTER(len=*), PARAMETER :: routineN = 'update_ks_atom', routineP = moduleN//':'//routineN

      INTEGER :: bo(2), handle, ia_kind, iac, iat, iatom, ibc, ikind, img, ip, ispin, ja_kind, &
         jatom, jkind, ka_kind, kac, katom, kbc, kkind, ldCPC, max_gau, max_nsgf, maxsoc, mepos, &
         n_cont_a, n_cont_b, nat, natom, nimages, nkind, nl_table_num_slots, nsetc, nsoctot, &
         nspins, num_pe, slot_num
      INTEGER(KIND=int_8)                                :: nl_table_key
      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: atom_of_kind
      INTEGER, DIMENSION(3)                              :: cell_b
      INTEGER, DIMENSION(:), POINTER                     :: atom_list, list_a, list_b
      INTEGER, DIMENSION(:, :, :), POINTER               :: cell_to_index
      LOGICAL                                            :: dista, distb, found, is_entry_null, &
                                                            is_task_valid, my_tddft, paw_atom, &
                                                            use_virial
      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: a_matrix, p_matrix
      REAL(dp), DIMENSION(3)                             :: rac, rbc
      REAL(dp), DIMENSION(3, 3)                          :: force_tmp
      REAL(kind=dp)                                      :: eps_cpc, factor
      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: C_int_h, C_int_s, coc
      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: dCPC_h, dCPC_s
      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: PC_h, PC_s
      REAL(KIND=dp), DIMENSION(3, 3)                     :: pv_virial_thread
      REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: C_coeff_hh_a, C_coeff_hh_b, &
                                                            C_coeff_ss_a, C_coeff_ss_b
      TYPE(alist_type), POINTER                          :: alist_ac, alist_bc
      TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
      TYPE(cp_2d_r_p_type), DIMENSION(:), POINTER        :: mat_h, mat_p
      TYPE(cp_para_env_type), POINTER                    :: para_env
      TYPE(dft_control_type), POINTER                    :: dft_control
      TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_list
      TYPE(gto_basis_set_type), POINTER                  :: basis_set_a, basis_set_b, orb_basis
      TYPE(kpoint_type), POINTER                         :: kpoints
      TYPE(neighbor_list_iterator_p_type), &
         DIMENSION(:), POINTER                           :: nl_iterator
      TYPE(neighbor_list_set_p_type), DIMENSION(:), &
         POINTER                                         :: sab_orb
      TYPE(neighbor_list_task_type), POINTER             :: next_task, task
      TYPE(nl_hash_table_obj)                            :: nl_hash_table
      TYPE(oce_matrix_type), POINTER                     :: oce
      TYPE(qs_force_type), DIMENSION(:), POINTER         :: force
      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
      TYPE(rho_atom_type), DIMENSION(:), POINTER         :: rho_atom
      TYPE(rho_atom_type), POINTER                       :: rho_at
      TYPE(virial_type), POINTER                         :: virial

!$    INTEGER(kind=omp_lock_kind), ALLOCATABLE, DIMENSION(:) :: locks
!$    INTEGER                                            :: lock_num

      CALL timeset(routineN, handle)

      NULLIFY (qs_kind_set, atomic_kind_set, force, oce, para_env, rho_atom, sab_orb)
      NULLIFY (mat_h, mat_p, orb_basis, dft_control)

      CALL get_qs_env(qs_env=qs_env, &
                      qs_kind_set=qs_kind_set, &
                      atomic_kind_set=atomic_kind_set, &
                      force=force, &
                      oce=oce, &
                      para_env=para_env, &
                      rho_atom_set=rho_atom, &
                      virial=virial, &
                      sab_orb=sab_orb, &
                      dft_control=dft_control)

      nspins = dft_control%nspins
      nimages = dft_control%nimages

      my_tddft = .FALSE.
      IF (PRESENT(tddft)) my_tddft = tddft
      factor = 1.0_dp
      IF (my_tddft) THEN
         IF (nspins == 1) factor = 2.0_dp
         CPASSERT(nimages == 1)
      END IF

      ! kpoint images
      NULLIFY (cell_to_index)
      IF (nimages > 1) THEN
         CALL get_qs_env(qs_env=qs_env, kpoints=kpoints)
         CALL get_kpoint_info(kpoint=kpoints, cell_to_index=cell_to_index)
      END IF

      IF (my_tddft) THEN
         rho_atom => p_env%local_rho_set%rho_atom_set
      END IF

      eps_cpc = dft_control%qs_control%gapw_control%eps_cpc

      CALL get_atomic_kind_set(atomic_kind_set, natom=natom)
      CALL get_qs_kind_set(qs_kind_set, maxsgf=max_nsgf, maxgtops=max_gau)

      IF (forces) THEN
         ALLOCATE (atom_of_kind(natom))
         CALL get_atomic_kind_set(atomic_kind_set, atom_of_kind=atom_of_kind)
         ldCPC = max_gau
         use_virial = virial%pv_availability .AND. (.NOT. virial%pv_numer)
         IF (use_virial) pv_virial_thread(:, :) = 0.0_dp
      ELSE
         use_virial = .FALSE.
      END IF

      nkind = SIZE(qs_kind_set, 1)
      ! Collect the local potential contributions from all the processors
      mepos = para_env%mepos
      num_pe = para_env%num_pe
      DO ikind = 1, nkind
         NULLIFY (atom_list)
         CALL get_atomic_kind(atomic_kind_set(ikind), atom_list=atom_list, natom=nat)
         CALL get_qs_kind(qs_kind_set(ikind), paw_atom=paw_atom)
         IF (paw_atom) THEN
            ! gather the atomic block integrals in a more compressed format
            bo = get_limit(nat, num_pe, mepos)
            DO iat = bo(1), bo(2)
               iatom = atom_list(iat)
               DO ispin = 1, nspins
                  CALL prj_gather(rho_atom(iatom)%ga_Vlocal_gb_h(ispin)%r_coef, &
                                  rho_atom(iatom)%cpc_h(ispin)%r_coef, qs_kind_set(ikind))
                  CALL prj_gather(rho_atom(iatom)%ga_Vlocal_gb_s(ispin)%r_coef, &
                                  rho_atom(iatom)%cpc_s(ispin)%r_coef, qs_kind_set(ikind))
               END DO
            END DO
            ! broadcast the CPC arrays to all processors (replicated data)
            DO ip = 0, num_pe-1
               bo = get_limit(nat, num_pe, ip)
               DO iat = bo(1), bo(2)
                  iatom = atom_list(iat)
                  DO ispin = 1, nspins
                     CALL mp_bcast(rho_atom(iatom)%cpc_h(ispin)%r_coef, ip, para_env%group)
                     CALL mp_bcast(rho_atom(iatom)%cpc_s(ispin)%r_coef, ip, para_env%group)
                  END DO
               END DO
            END DO
         END IF
      END DO

      ALLOCATE (basis_set_list(nkind))
      DO ikind = 1, nkind
         CALL get_qs_kind(qs_kind_set(ikind), basis_set=basis_set_a)
         IF (ASSOCIATED(basis_set_a)) THEN
            basis_set_list(ikind)%gto_basis_set => basis_set_a
         ELSE
            NULLIFY (basis_set_list(ikind)%gto_basis_set)
         END IF
      END DO

      ! build the hash table in serial...
      ! ... creation ...
      CALL neighbor_list_iterator_create(nl_iterator, sab_orb)
      nl_table_num_slots = natom*natom/2 ! this is probably not optimal, but it seems a good start
      CALL nl_hash_table_create(nl_hash_table, nmax=nl_table_num_slots)
      ! ... and population
      DO WHILE (neighbor_list_iterate(nl_iterator) == 0) ! build hash table in serial, so don't pass mepos
         ALLOCATE (task) ! They must be deallocated before the nl_hash_table is released
         CALL get_iterator_task(nl_iterator, task) ! build hash table in serial, so don't pass mepos
         ! tasks with the same key access the same blocks of H & P
         IF (task%iatom <= task%jatom) THEN
            nl_table_key = natom*task%iatom+task%jatom
         ELSE
            nl_table_key = natom*task%jatom+task%iatom
         ENDIF
         CALL nl_hash_table_add(nl_hash_table, nl_table_key, task)
      END DO
      CALL neighbor_list_iterator_release(nl_iterator)

      ! Get the total number of (possibly empty) entries in the table
      CALL nl_hash_table_status(nl_hash_table, nmax=nl_table_num_slots)

!$OMP PARALLEL DEFAULT(NONE)                                         &
!$OMP           SHARED(nl_table_num_slots, nl_hash_table             &
!$OMP                 , max_gau, max_nsgf, nspins, forces            &
!$OMP                 , basis_set_list, nimages, cell_to_index       &
!$OMP                 , ksmat, pmat, natom, nkind, qs_kind_set, oce  &
!$OMP                 , rho_atom, factor, use_virial, atom_of_kind   &
!$OMP                 , ldCPC, force                                 &
!$OMP                 , locks                                        &
!$OMP                 )                                              &
!$OMP          PRIVATE( slot_num, is_entry_null, TASK, is_task_valid &
!$OMP                 , C_int_h, C_int_s, coc, a_matrix, p_matrix    &
!$OMP                 , mat_h, mat_p, dCPC_h, dCPC_s, PC_h, PC_s     &
!$OMP                 , ikind, jkind, iatom, jatom, cell_b           &
!$OMP                 , basis_set_a, basis_set_b, img                &
!$OMP                 , found, next_task                             &
!$OMP                 , kkind, orb_basis, paw_atom, nsetc, maxsoc    &
!$OMP                 , iac, alist_ac, kac, n_cont_a, list_a         &
!$OMP                 , ibc, alist_bc, kbc, n_cont_b, list_b         &
!$OMP                 , katom, rho_at, nsoctot                       &
!$OMP                 , C_coeff_hh_a, C_coeff_ss_a, dista, rac       &
!$OMP                 , C_coeff_hh_b, C_coeff_ss_b, distb, rbc       &
!$OMP                 , ia_kind, ja_kind, ka_kind, force_tmp         &
!$OMP                 )                                              &
!$OMP        REDUCTION(+:pv_virial_thread                            &
!$OMP                 )

      ALLOCATE (C_int_h(max_gau*max_nsgf), C_int_s(max_gau*max_nsgf), coc(max_gau*max_gau), &
                a_matrix(max_gau, max_gau), p_matrix(max_nsgf, max_nsgf))

      ALLOCATE (mat_h(nspins), mat_p(nspins))
      DO ispin = 1, nspins
         NULLIFY (mat_h(ispin)%array, mat_p(ispin)%array)
      END DO

      IF (forces) THEN
         ALLOCATE (dCPC_h(max_gau, max_gau), dCPC_s(max_gau, max_gau), &
                   PC_h(max_nsgf, max_gau, nspins), PC_s(max_nsgf, max_gau, nspins))
!$OMP SINGLE
!$       ALLOCATE (locks(natom*nkind))
!$OMP END SINGLE

!$OMP DO
!$       do lock_num = 1, natom*nkind
!$          call omp_init_lock(locks(lock_num))
!$       end do
!$OMP END DO
      END IF

      ! Dynamic schedule to take account of the fact that some slots may be empty
      ! or contain 1 or more tasks
!$OMP DO SCHEDULE(DYNAMIC,5)
      DO slot_num = 1, nl_table_num_slots
         CALL nl_hash_table_is_null(nl_hash_table, slot_num, is_entry_null)

         IF (.NOT. is_entry_null) THEN
            CALL nl_hash_table_get_from_index(nl_hash_table, slot_num, task)

            is_task_valid = ASSOCIATED(task)
            DO WHILE (is_task_valid)

               ikind = task%ikind
               jkind = task%jkind
               iatom = task%iatom
               jatom = task%jatom
               cell_b(:) = task%cell(:)

               basis_set_a => basis_set_list(ikind)%gto_basis_set
               IF (.NOT. ASSOCIATED(basis_set_a)) CYCLE
               basis_set_b => basis_set_list(jkind)%gto_basis_set
               IF (.NOT. ASSOCIATED(basis_set_b)) CYCLE

               IF (nimages > 1) THEN
                  img = cell_to_index(cell_b(1), cell_b(2), cell_b(3))
                  CPASSERT(img > 0)
               ELSE
                  img = 1
               END IF

               DO ispin = 1, nspins
                  NULLIFY (mat_h(ispin)%array, mat_p(ispin)%array)

                  IF (iatom <= jatom) THEN
                     CALL dbcsr_get_block_p(matrix=ksmat(ispin, img)%matrix, &
                                            row=iatom, col=jatom, &
                                            BLOCK=mat_h(ispin)%array, found=found)
                  ELSE
                     CALL dbcsr_get_block_p(matrix=ksmat(ispin, img)%matrix, &
                                            row=jatom, col=iatom, &
                                            BLOCK=mat_h(ispin)%array, found=found)
                  END IF

                  IF (forces) THEN
                     IF (iatom <= jatom) THEN
                        CALL dbcsr_get_block_p(matrix=pmat(ispin, img)%matrix, &
                                               row=iatom, col=jatom, &
                                               BLOCK=mat_p(ispin)%array, found=found)
                     ELSE
                        CALL dbcsr_get_block_p(matrix=pmat(ispin, img)%matrix, &
                                               row=jatom, col=iatom, &
                                               BLOCK=mat_p(ispin)%array, found=found)
                     END IF
                  END IF
               END DO

               DO kkind = 1, nkind
                  CALL get_qs_kind(qs_kind_set(kkind), basis_set=orb_basis, paw_atom=paw_atom)

                  IF (.NOT. paw_atom) CYCLE

                  CALL get_gto_basis_set(gto_basis_set=orb_basis, nset=nsetc, maxso=maxsoc)

                  iac = ikind+nkind*(kkind-1)
                  ibc = jkind+nkind*(kkind-1)
                  IF (.NOT. ASSOCIATED(oce%intac(iac)%alist)) CYCLE
                  IF (.NOT. ASSOCIATED(oce%intac(ibc)%alist)) CYCLE

                  CALL get_alist(oce%intac(iac), alist_ac, iatom)
                  CALL get_alist(oce%intac(ibc), alist_bc, jatom)
                  IF (.NOT. ASSOCIATED(alist_ac)) CYCLE
                  IF (.NOT. ASSOCIATED(alist_bc)) CYCLE

                  DO kac = 1, alist_ac%nclist
                     DO kbc = 1, alist_bc%nclist
                        IF (alist_ac%clist(kac)%catom /= alist_bc%clist(kbc)%catom) CYCLE

                        IF (ALL(cell_b+alist_bc%clist(kbc)%cell-alist_ac%clist(kac)%cell == 0)) THEN
                           n_cont_a = alist_ac%clist(kac)%nsgf_cnt
                           n_cont_b = alist_bc%clist(kbc)%nsgf_cnt
                           IF (n_cont_a .EQ. 0 .OR. n_cont_b .EQ. 0) CYCLE

                           list_a => alist_ac%clist(kac)%sgf_list
                           list_b => alist_bc%clist(kbc)%sgf_list

                           katom = alist_ac%clist(kac)%catom

                           IF (iatom == katom .AND. ALL(alist_ac%clist(kac)%cell == 0)) THEN
                              C_coeff_hh_a => alist_ac%clist(kac)%achint
                              C_coeff_ss_a => alist_ac%clist(kac)%acint
                              dista = .FALSE.
                           ELSE
                              C_coeff_hh_a => alist_ac%clist(kac)%acint
                              C_coeff_ss_a => alist_ac%clist(kac)%acint
                              dista = .TRUE.
                           END IF

                           IF (jatom == katom .AND. ALL(alist_bc%clist(kbc)%cell == 0)) THEN
                              C_coeff_hh_b => alist_bc%clist(kbc)%achint
                              C_coeff_ss_b => alist_bc%clist(kbc)%acint
                              distb = .FALSE.
                           ELSE
                              C_coeff_hh_b => alist_bc%clist(kbc)%acint
                              C_coeff_ss_b => alist_bc%clist(kbc)%acint
                              distb = .TRUE.
                           END IF

                           rho_at => rho_atom(katom)
                           nsoctot = SIZE(C_coeff_ss_a, 2)

                           CALL add_vhxca_to_ks(mat_h, C_coeff_hh_a, C_coeff_hh_b, C_coeff_ss_a, C_coeff_ss_b, &
                                                rho_at, nspins, iatom, jatom, nsoctot, factor, &
                                                list_a, n_cont_a, list_b, n_cont_b, C_int_h, C_int_s, a_matrix, dista, distb, coc)

                           IF (forces) THEN
                              IF (use_virial) THEN
                                 rac = alist_ac%clist(kac)%rac
                                 rbc = alist_bc%clist(kbc)%rac
                              END IF
                              ia_kind = atom_of_kind(iatom)
                              ja_kind = atom_of_kind(jatom)
                              ka_kind = atom_of_kind(katom)
                              rho_at => rho_atom(katom)
                              force_tmp(1:3, 1:3) = 0.0_dp

                              IF (iatom <= jatom) THEN
                                 CALL add_vhxca_forces(mat_p, C_coeff_hh_a, C_coeff_hh_b, C_coeff_ss_a, C_coeff_ss_b, &
                                                       rho_at, force_tmp, nspins, iatom, jatom, nsoctot, &
                                                       list_a, n_cont_a, list_b, n_cont_b, dCPC_h, dCPC_s, ldCPC, &
                                                       PC_h, PC_s, p_matrix)
!$                               CALL omp_set_lock(locks((ka_kind-1)*nkind+kkind))
                                 force(kkind)%vhxc_atom(1:3, ka_kind) = force(kkind)%vhxc_atom(1:3, ka_kind)+force_tmp(1:3, 3)
!$                               CALL omp_unset_lock(locks((ka_kind-1)*nkind+kkind))
!$                               CALL omp_set_lock(locks((ia_kind-1)*nkind+ikind))
                                 force(ikind)%vhxc_atom(1:3, ia_kind) = force(ikind)%vhxc_atom(1:3, ia_kind)+force_tmp(1:3, 1)
!$                               CALL omp_unset_lock(locks((ia_kind-1)*nkind+ikind))
!$                               CALL omp_set_lock(locks((ja_kind-1)*nkind+jkind))
                                 force(jkind)%vhxc_atom(1:3, ja_kind) = force(jkind)%vhxc_atom(1:3, ja_kind)+force_tmp(1:3, 2)
!$                               CALL omp_unset_lock(locks((ja_kind-1)*nkind+jkind))
                                 IF (use_virial) THEN
                                    CALL virial_pair_force(pv_virial_thread, 1._dp, force_tmp(1:3, 1), rac)
                                    CALL virial_pair_force(pv_virial_thread, 1._dp, force_tmp(1:3, 2), rbc)
                                 END IF
                              ELSE
                                 CALL add_vhxca_forces(mat_p, C_coeff_hh_b, C_coeff_hh_a, C_coeff_ss_b, C_coeff_ss_a, &
                                                       rho_at, force_tmp, nspins, jatom, iatom, nsoctot, &
                                                       list_b, n_cont_b, list_a, n_cont_a, dCPC_h, dCPC_s, ldCPC, &
                                                       PC_h, PC_s, p_matrix)
!$                               CALL omp_set_lock(locks((ka_kind-1)*nkind+kkind))
                                 force(kkind)%vhxc_atom(1:3, ka_kind) = force(kkind)%vhxc_atom(1:3, ka_kind)+force_tmp(1:3, 3)
!$                               CALL omp_unset_lock(locks((ka_kind-1)*nkind+kkind))
!$                               CALL omp_set_lock(locks((ia_kind-1)*nkind+ikind))
                                 force(ikind)%vhxc_atom(1:3, ia_kind) = force(ikind)%vhxc_atom(1:3, ia_kind)+force_tmp(1:3, 2)
!$                               CALL omp_unset_lock(locks((ia_kind-1)*nkind+ikind))
!$                               CALL omp_set_lock(locks((ja_kind-1)*nkind+jkind))
                                 force(jkind)%vhxc_atom(1:3, ja_kind) = force(jkind)%vhxc_atom(1:3, ja_kind)+force_tmp(1:3, 1)
!$                               CALL omp_unset_lock(locks((ja_kind-1)*nkind+jkind))
                                 IF (use_virial) THEN
                                    CALL virial_pair_force(pv_virial_thread, 1._dp, force_tmp(1:3, 2), rac)
                                    CALL virial_pair_force(pv_virial_thread, 1._dp, force_tmp(1:3, 1), rbc)
                                 END IF
                              END IF

                           END IF
                           EXIT ! search loop over jatom-katom list
                        END IF
                     END DO ! kbc
                  END DO ! kac

               ENDDO ! kkind

               next_task => task%next
               ! We are done with this task, we can deallocate it
               DEALLOCATE (task)
               is_task_valid = ASSOCIATED(next_task)
               IF (is_task_valid) task => next_task

            END DO

         ELSE
            ! NO KEY/VALUE
         ENDIF
      END DO
!$OMP END DO

      DO ispin = 1, nspins
         NULLIFY (mat_h(ispin)%array, mat_p(ispin)%array)
      END DO
      DEALLOCATE (mat_h, mat_p, C_int_h, C_int_s, a_matrix, p_matrix, coc)

      IF (forces) THEN
         DEALLOCATE (dCPC_h, dCPC_s, PC_h, PC_s)

         ! Implicit barrier at end of OMP DO, so locks can be freed
!$OMP DO
!$       DO lock_num = 1, natom*nkind
!$          call omp_destroy_lock(locks(lock_num))
!$       END DO
!$OMP END DO

!$OMP SINGLE
!$       DEALLOCATE (locks)
!$OMP END SINGLE NOWAIT
      END IF

!$OMP END PARALLEL

      IF (use_virial) THEN
         virial%pv_virial(1:3, 1:3) = virial%pv_virial(1:3, 1:3)+pv_virial_thread(1:3, 1:3)
      END IF

      CALL nl_hash_table_release(nl_hash_table)

      DEALLOCATE (basis_set_list)
      IF (forces) THEN
         DEALLOCATE (atom_of_kind)
      END IF

      CALL timestop(handle)

   END SUBROUTINE update_ks_atom

! **************************************************************************************************
!> \brief ...
!> \param mat_h ...
!> \param C_hh_a ...
!> \param C_hh_b ...
!> \param C_ss_a ...
!> \param C_ss_b ...
!> \param rho_atom ...
!> \param nspins ...
!> \param ia ...
!> \param ja ...
!> \param nsp ...
!> \param factor ...
!> \param lista ...
!> \param nconta ...
!> \param listb ...
!> \param ncontb ...
!> \param C_int_h ...
!> \param C_int_s ...
!> \param a_matrix ...
!> \param dista ...
!> \param distb ...
!> \param coc ...
! **************************************************************************************************
   SUBROUTINE add_vhxca_to_ks(mat_h, C_hh_a, C_hh_b, C_ss_a, C_ss_b, &
                              rho_atom, nspins, ia, ja, nsp, factor, lista, nconta, listb, ncontb, &
                              C_int_h, C_int_s, a_matrix, dista, distb, coc)
      TYPE(cp_2d_r_p_type), DIMENSION(:), POINTER        :: mat_h
      REAL(KIND=dp), DIMENSION(:, :, :), INTENT(IN)      :: C_hh_a, C_hh_b, C_ss_a, C_ss_b
      TYPE(rho_atom_type), INTENT(IN), POINTER           :: rho_atom
      INTEGER, INTENT(IN)                                :: nspins, ia, ja, nsp
      REAL(KIND=dp), INTENT(IN)                          :: factor
      INTEGER, DIMENSION(:), INTENT(IN)                  :: lista
      INTEGER, INTENT(IN)                                :: nconta
      INTEGER, DIMENSION(:), INTENT(IN)                  :: listb
      INTEGER, INTENT(IN)                                :: ncontb
      REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: C_int_h, C_int_s
      REAL(dp), DIMENSION(:, :)                          :: a_matrix
      LOGICAL, INTENT(IN)                                :: dista, distb
      REAL(dp), DIMENSION(:)                             :: coc

      CHARACTER(len=*), PARAMETER :: routineN = 'add_vhxca_to_ks', &
         routineP = moduleN//':'//routineN

      INTEGER                                            :: i, ispin, j, k
      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: h_block, int_hard, int_soft
      TYPE(rho_atom_coeff), DIMENSION(:), POINTER        :: int_local_h, int_local_s

      NULLIFY (int_local_h, int_local_s, int_hard, int_soft)
      CALL get_rho_atom(rho_atom=rho_atom, cpc_h=int_local_h, cpc_s=int_local_s)

      DO ispin = 1, nspins
         h_block => mat_h(ispin)%array
         !
         int_hard => int_local_h(ispin)%r_coef
         int_soft => int_local_s(ispin)%r_coef
         !
         IF (ia <= ja) THEN
            IF (dista .AND. distb) THEN
               k = 0
               DO i = 1, nsp
                  DO j = 1, nsp
                     k = k+1
                     coc(k) = int_hard(j, i)-int_soft(j, i)
                  END DO
               END DO
               CALL DGEMM('N', 'T', nsp, ncontb, nsp, 1.0_dp, coc, nsp, C_hh_b(1, 1, 1), SIZE(C_hh_b, 1), &
                          0.0_dp, C_int_h(1), nsp)
               CALL DGEMM('N', 'N', nconta, ncontb, nsp, factor, C_hh_a(1, 1, 1), SIZE(C_hh_a, 1), &
                          C_int_h(1), nsp, 0.0_dp, a_matrix(1, 1), SIZE(a_matrix, 1))
            ELSEIF (dista) THEN
               CALL DGEMM('N', 'T', nsp, ncontb, nsp, 1.0_dp, int_hard(1, 1), SIZE(int_hard, 1), &
                          C_hh_b(1, 1, 1), SIZE(C_hh_b, 1), 0.0_dp, coc(1), nsp)
               CALL DGEMM('N', 'T', nsp, ncontb, nsp, -1.0_dp, int_soft(1, 1), SIZE(int_soft, 1), &
                          C_ss_b(1, 1, 1), SIZE(C_ss_b, 1), 1.0_dp, coc(1), nsp)
               CALL DGEMM('N', 'N', nconta, ncontb, nsp, factor, C_hh_a(1, 1, 1), SIZE(C_hh_a, 1), &
                          coc(1), nsp, 0.0_dp, a_matrix(1, 1), SIZE(a_matrix, 1))
            ELSEIF (distb) THEN
               CALL DGEMM('N', 'N', nconta, nsp, nsp, factor, C_hh_a(1, 1, 1), SIZE(C_hh_a, 1), &
                          int_hard(1, 1), SIZE(int_hard, 1), 0.0_dp, coc(1), nconta)
               CALL DGEMM('N', 'N', nconta, nsp, nsp, -factor, C_ss_a(1, 1, 1), SIZE(C_hh_a, 1), &
                          int_soft(1, 1), SIZE(int_soft, 1), 1.0_dp, coc(1), nconta)
               CALL DGEMM('N', 'T', nconta, ncontb, nsp, 1.0_dp, coc(1), nconta, &
                          C_hh_b(1, 1, 1), SIZE(C_hh_b, 1), 0.0_dp, a_matrix(1, 1), SIZE(a_matrix, 1))
            ELSE
               CALL DGEMM('N', 'T', nsp, ncontb, nsp, 1.0_dp, int_hard(1, 1), SIZE(int_hard, 1), &
                          C_hh_b(1, 1, 1), SIZE(C_hh_b, 1), &
                          0.0_dp, C_int_h(1), nsp)
               CALL DGEMM('N', 'T', nsp, ncontb, nsp, 1.0_dp, int_soft(1, 1), SIZE(int_soft, 1), &
                          C_ss_b(1, 1, 1), SIZE(C_ss_b, 1), &
                          0.0_dp, C_int_s(1), nsp)
               CALL DGEMM('N', 'N', nconta, ncontb, nsp, factor, C_hh_a(1, 1, 1), SIZE(C_hh_a, 1), &
                          C_int_h(1), nsp, &
                          0.0_dp, a_matrix(1, 1), SIZE(a_matrix, 1))
               CALL DGEMM('N', 'N', nconta, ncontb, nsp, -factor, C_ss_a(1, 1, 1), SIZE(C_ss_a, 1), &
                          C_int_s(1), nsp, &
                          1.0_dp, a_matrix(1, 1), SIZE(a_matrix, 1))
            END IF
            !
            CALL alist_post_align_blk(a_matrix, SIZE(a_matrix, 1), h_block, SIZE(h_block, 1), &
                                      lista, nconta, listb, ncontb)
         ELSE
            IF (dista .AND. distb) THEN
               k = 0
               DO i = 1, nsp
                  DO j = 1, nsp
                     k = k+1
                     coc(k) = int_hard(j, i)-int_soft(j, i)
                  END DO
               END DO
               CALL DGEMM('N', 'T', nsp, nconta, nsp, 1.0_dp, coc, nsp, C_hh_a(1, 1, 1), SIZE(C_hh_a, 1), 0.0_dp, C_int_h(1), nsp)
               CALL DGEMM('N', 'N', ncontb, nconta, nsp, factor, C_hh_b(1, 1, 1), SIZE(C_hh_b, 1), &
                          C_int_h(1), nsp, 0.0_dp, a_matrix(1, 1), SIZE(a_matrix, 1))
            ELSEIF (dista) THEN
               CALL DGEMM('N', 'N', ncontb, nsp, nsp, factor, C_hh_b(1, 1, 1), SIZE(C_hh_b, 1), &
                          int_hard(1, 1), SIZE(int_hard, 1), 0.0_dp, coc, ncontb)
               CALL DGEMM('N', 'N', ncontb, nsp, nsp, -factor, C_ss_b(1, 1, 1), SIZE(C_ss_b, 1), &
                          int_soft(1, 1), SIZE(int_soft, 1), 1.0_dp, coc, ncontb)
               CALL DGEMM('N', 'T', ncontb, nconta, nsp, 1.0_dp, coc, ncontb, &
                          C_hh_a(1, 1, 1), SIZE(C_hh_a, 1), 0.0_dp, a_matrix(1, 1), SIZE(a_matrix, 1))
            ELSEIF (distb) THEN
               CALL DGEMM('N', 'T', nsp, nconta, nsp, 1.0_dp, int_hard(1, 1), SIZE(int_hard, 1), &
                          C_hh_a(1, 1, 1), SIZE(C_hh_a, 1), 0.0_dp, coc(1), nsp)
               CALL DGEMM('N', 'T', nsp, nconta, nsp, -1.0_dp, int_soft(1, 1), SIZE(int_soft, 1), &
                          C_ss_a(1, 1, 1), SIZE(C_ss_a, 1), 1.0_dp, coc(1), nsp)
               CALL DGEMM('N', 'N', ncontb, nconta, nsp, factor, C_hh_b(1, 1, 1), SIZE(C_hh_b, 1), &
                          coc(1), nsp, 0.0_dp, a_matrix(1, 1), SIZE(a_matrix, 1))
            ELSE
               CALL DGEMM('N', 'T', nsp, nconta, nsp, 1.0_dp, int_hard(1, 1), SIZE(int_hard, 1), &
                          C_hh_a(1, 1, 1), SIZE(C_hh_a, 1), &
                          0.0_dp, C_int_h(1), nsp)
               CALL DGEMM('N', 'T', nsp, nconta, nsp, 1.0_dp, int_soft(1, 1), SIZE(int_soft, 1), &
                          C_ss_a(1, 1, 1), SIZE(C_ss_a, 1), &
                          0.0_dp, C_int_s(1), nsp)
               CALL DGEMM('N', 'N', ncontb, nconta, nsp, factor, C_hh_b(1, 1, 1), SIZE(C_hh_b, 1), &
                          C_int_h(1), nsp, &
                          0.0_dp, a_matrix(1, 1), SIZE(a_matrix, 1))
               CALL DGEMM('N', 'N', ncontb, nconta, nsp, -factor, C_ss_b(1, 1, 1), SIZE(C_ss_b, 1), &
                          C_int_s(1), nsp, &
                          1.0_dp, a_matrix(1, 1), SIZE(a_matrix, 1))
            END IF
            !
            CALL alist_post_align_blk(a_matrix, SIZE(a_matrix, 1), h_block, SIZE(h_block, 1), &
                                      listb, ncontb, lista, nconta)
         END IF
      END DO

   END SUBROUTINE add_vhxca_to_ks

! **************************************************************************************************
!> \brief ...
!> \param mat_p ...
!> \param C_hh_a ...
!> \param C_hh_b ...
!> \param C_ss_a ...
!> \param C_ss_b ...
!> \param rho_atom ...
!> \param force ...
!> \param nspins ...
!> \param ia ...
!> \param ja ...
!> \param nsp ...
!> \param lista ...
!> \param nconta ...
!> \param listb ...
!> \param ncontb ...
!> \param dCPC_h ...
!> \param dCPC_s ...
!> \param ldCPC ...
!> \param PC_h ...
!> \param PC_s ...
!> \param p_matrix ...
! **************************************************************************************************
   SUBROUTINE add_vhxca_forces(mat_p, C_hh_a, C_hh_b, C_ss_a, C_ss_b, &
                               rho_atom, force, nspins, ia, ja, nsp, lista, nconta, listb, ncontb, &
                               dCPC_h, dCPC_s, ldCPC, PC_h, PC_s, p_matrix)
      TYPE(cp_2d_r_p_type), DIMENSION(:), INTENT(IN), &
         POINTER                                         :: mat_p
      REAL(KIND=dp), DIMENSION(:, :, :), INTENT(IN)      :: C_hh_a, C_hh_b, C_ss_a, C_ss_b
      TYPE(rho_atom_type), INTENT(IN), POINTER           :: rho_atom
      REAL(dp), DIMENSION(3, 3), INTENT(INOUT)           :: force
      INTEGER, INTENT(IN)                                :: nspins, ia, ja, nsp
      INTEGER, DIMENSION(:), INTENT(IN)                  :: lista
      INTEGER, INTENT(IN)                                :: nconta
      INTEGER, DIMENSION(:), INTENT(IN)                  :: listb
      INTEGER, INTENT(IN)                                :: ncontb
      REAL(KIND=dp), DIMENSION(:, :)                     :: dCPC_h, dCPC_s
      INTEGER, INTENT(IN)                                :: ldCPC
      REAL(KIND=dp), DIMENSION(:, :, :)                  :: PC_h, PC_s
      REAL(KIND=dp), DIMENSION(:, :)                     :: p_matrix

      CHARACTER(len=*), PARAMETER :: routineN = 'add_vhxca_forces', &
         routineP = moduleN//':'//routineN

      INTEGER                                            :: dir, ispin
      REAL(dp), DIMENSION(:, :), POINTER                 :: int_hard, int_soft
      REAL(KIND=dp)                                      :: ieqj, trace
      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: p_block
      TYPE(rho_atom_coeff), DIMENSION(:), POINTER        :: int_local_h, int_local_s

!   if(dista.and.distb) we could also here use ChPCh = CsPCs
!   *** factor 2 because only half of the pairs with ia =/ ja are considered

      ieqj = 2.0_dp
      IF (ia == ja) ieqj = 1.0_dp

      NULLIFY (int_local_h, int_local_s, int_hard, int_soft)
      CALL get_rho_atom(rho_atom=rho_atom, cpc_h=int_local_h, cpc_s=int_local_s)

      DO ispin = 1, nspins
         p_block => mat_p(ispin)%array

         CALL alist_pre_align_blk(p_block, SIZE(p_block, 1), p_matrix, SIZE(p_matrix, 1), &
                                  lista, nconta, listb, ncontb)

         int_hard => int_local_h(ispin)%r_coef
         int_soft => int_local_s(ispin)%r_coef

         CALL DGEMM('N', 'N', nconta, nsp, ncontb, 1.0_dp, p_matrix(1, 1), SIZE(p_matrix, 1), &
                    C_hh_b(1, 1, 1), SIZE(C_hh_b, 1), &
                    0.0_dp, PC_h(1, 1, ispin), SIZE(PC_h, 1))
         CALL DGEMM('N', 'N', nconta, nsp, ncontb, 1.0_dp, p_matrix(1, 1), SIZE(p_matrix, 1), &
                    C_ss_b(1, 1, 1), SIZE(C_ss_b, 1), &
                    0.0_dp, PC_s(1, 1, ispin), SIZE(PC_s, 1))

         DO dir = 2, 4
            CALL DGEMM('T', 'N', nsp, nsp, nconta, 1.0_dp, PC_h(1, 1, ispin), SIZE(PC_h, 1), &
                       C_hh_a(1, 1, dir), SIZE(C_hh_a, 1), &
                       0.0_dp, dCPC_h(1, 1), SIZE(dCPC_h, 1))
            trace = trace_r_AxB(dCPC_h, ldCPC, int_hard, nsp, nsp, nsp)
            force(dir-1, 3) = force(dir-1, 3)+ieqj*trace
            force(dir-1, 1) = force(dir-1, 1)-ieqj*trace

            CALL DGEMM('T', 'N', nsp, nsp, nconta, 1.0_dp, PC_s(1, 1, ispin), SIZE(PC_s, 1), &
                       C_ss_a(1, 1, dir), SIZE(C_ss_a, 1), &
                       0.0_dp, dCPC_s(1, 1), SIZE(dCPC_s, 1))
            trace = trace_r_AxB(dCPC_s, ldCPC, int_soft, nsp, nsp, nsp)
            force(dir-1, 3) = force(dir-1, 3)-ieqj*trace
            force(dir-1, 1) = force(dir-1, 1)+ieqj*trace
         END DO

         ! j-k contributions
         CALL DGEMM('T', 'N', ncontb, nsp, nconta, 1.0_dp, p_matrix(1, 1), SIZE(p_matrix, 1), &
                    C_hh_a(1, 1, 1), SIZE(C_hh_a, 1), &
                    0.0_dp, PC_h(1, 1, ispin), SIZE(PC_h, 1))
         CALL DGEMM('T', 'N', ncontb, nsp, nconta, 1.0_dp, p_matrix(1, 1), SIZE(p_matrix, 1), &
                    C_ss_a(1, 1, 1), SIZE(C_ss_a, 1), &
                    0.0_dp, PC_s(1, 1, ispin), SIZE(PC_s, 1))

         DO dir = 2, 4
            CALL DGEMM('T', 'N', nsp, nsp, ncontb, 1.0_dp, PC_h(1, 1, ispin), SIZE(PC_h, 1), &
                       C_hh_b(1, 1, dir), SIZE(C_hh_b, 1), &
                       0.0_dp, dCPC_h(1, 1), SIZE(dCPC_h, 1))
            trace = trace_r_AxB(dCPC_h, ldCPC, int_hard, nsp, nsp, nsp)
            force(dir-1, 3) = force(dir-1, 3)+ieqj*trace
            force(dir-1, 2) = force(dir-1, 2)-ieqj*trace

            CALL DGEMM('T', 'N', nsp, nsp, ncontb, 1.0_dp, PC_s(1, 1, ispin), SIZE(PC_s, 1), &
                       C_ss_b(1, 1, dir), SIZE(C_ss_b, 1), &
                       0.0_dp, dCPC_s(1, 1), SIZE(dCPC_s, 1))
            trace = trace_r_AxB(dCPC_s, ldCPC, int_soft, nsp, nsp, nsp)
            force(dir-1, 3) = force(dir-1, 3)-ieqj*trace
            force(dir-1, 2) = force(dir-1, 2)+ieqj*trace
         END DO

      END DO !ispin

   END SUBROUTINE add_vhxca_forces

END MODULE qs_ks_atom