File: simple_ort_puri_test.cc

package info (click to toggle)
ergo 3.8-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye
  • size: 17,396 kB
  • sloc: cpp: 94,740; ansic: 17,015; sh: 7,559; makefile: 1,402; yacc: 127; lex: 110; awk: 23
file content (979 lines) | stat: -rw-r--r-- 34,534 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
/* Ergo, version 3.8, a program for linear scaling electronic structure
 * calculations.
 * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
 * and Anastasia Kruchinina.
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * 
 * Primary academic reference:
 * Ergo: An open-source program for linear-scaling electronic structure
 * calculations,
 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
 * Kruchinina,
 * SoftwareX 7, 107 (2018),
 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
 * 
 * For further information about Ergo, see <http://www.ergoscf.org>.
 */

#include <stdio.h>
#include <unistd.h>
#include <memory>
#include <limits>
#include "matrix_typedefs.h"
#include "matrix_utilities.h"
#include "utilities.h"

/** @file simple_ort_puri_test.cc Performs some simple tests of
    density matrix purification in orthogonal basis using artificialy
    generated input matrices. Written by Elias in Nov 2016. */

//const ergo_real penalty_alpha = 22;

static void report_timing(Util::TimeMeter & tm, const char* s) {
  double secondsTaken = tm.get_wall_seconds() - tm.get_start_time_wall_seconds();
  printf("report_timing for '%s': %f wall seconds\n", s, secondsTaken);
}


#if 0
static void get_tridiagonal_matrix_periodic(symmMatrix & F, int n, const std::vector<int> & perm) {
  // Create tridiagonal matrix with just the off-diagonal elements being nonzero
  std::vector<int> rows(n);
  std::vector<int> cols(n);
  std::vector<ergo_real> values(n);
  for(int i = 0; i < n; i++) {
    int row = i;
    int col = i+1;
    if(col == n)
      col = 0;
    rows[i] = row;
    cols[i] = col;
    values[i] = 1;
  }
  F.assign_from_sparse(rows, cols, values, perm, perm);
}

static void get_tridiagonal_matrix_nonperiodic(symmMatrix & F, int n, const std::vector<int> & perm) {
  // Create tridiagonal matrix with just the off-diagonal elements being nonzero
  int nElements = n-1;
  std::vector<int> rows(nElements);
  std::vector<int> cols(nElements);
  std::vector<ergo_real> values(nElements);
  for(int i = 0; i < nElements; i++) {
    rows[i] = i;
    cols[i] = i+1;
    values[i] = 1;
  }
  F.assign_from_sparse(rows, cols, values, perm, perm);
}
#endif


/*****************************************************************
  Here we construct Huckel-style Hamiltonian matrix for a periodic
  molecular system like this:

  H1          H3          H5          H7
  |           |           |           |
  C1 == C2 -- C3 == C4 -- C5 == C6 -- C7 == C8 -- 
        |           |           |           |
        H2          H4          H6          H8

  The basis functions are ordered like this:
  C1 H1 C2 H2 C3 H3 C4 H4 ...

  The Hamiltonian matrix then looks like this:

    C1 H1 C2 H2 C3 H3 C4 H4

C1  a  c  b
H1     d
C2        a  c  b
H2           d
C3              a  c  b
H3                 d
C4                    a  c
H4                       d

... and so on.

The values of the matrix elements a, b, c, d are as follows:
a = alpha (diagonal entry for C atoms)
b = beta (off-diagonal entry for neighboring C-C atoms)
d = alpha + h_A * beta (diagonal entry for H atoms)
c = k_AB * beta (off-diagonal entry for neighboring C-H atoms)

*****************************************************************/
static void get_Huckel_matrix_periodic(symmMatrix & F, int n, const std::vector<int> & perm) {
  assert(n % 2 == 0);
  assert(n >= 4);
  // FIXME: find out what are some reasonable values for the 4 parameters below. They affect gap and sparsity.
  // Setting alpha and h_A to zero still works, then the diagonal becomes zero and the ratio between beta and k_AB determines the properties of the system.
  const ergo_real alpha = 1.3;
  const ergo_real beta = 0.75;
  const ergo_real h_A = 0.95;
  const ergo_real k_AB = 0.15;
  // The number of matrix elements will be n*2 because we have n
  // elements on the diagonal, plus n off-diagonal elements.
  int nElements = 2*n;
  std::vector<int> rows(nElements);
  std::vector<int> cols(nElements);
  std::vector<ergo_real> values(nElements);
  int nCarbons = n/2;
  int count = 0;
  int row, col;
  ergo_real value;
  for(int i = 0; i < nCarbons; i++) {
    // Now do the row of C_i (3 matrix elements)
    row = i*2;
    col = row;
    value = alpha;
    rows[count] = row; cols[count] = col; values[count] = value; count++;
    col = row+1;
    if(col >= n)
      col -= n;
    value = k_AB * beta;
    rows[count] = row; cols[count] = col; values[count] = value; count++;
    col = row+2;
    if(col >= n)
      col -= n;
    value = beta;
    rows[count] = row; cols[count] = col; values[count] = value; count++;
    // Now do the row of H_i (1 matrix element)
    row++;
    col = row;
    value = alpha + h_A * beta;
    rows[count] = row; cols[count] = col; values[count] = value; count++;
  }
  assert(count == nElements);
  F.assign_from_sparse(rows, cols, values, perm, perm);
}

static void assign_from_full_matrix(symmMatrix & A, const ergo_real* A_full, int n, const std::vector<int> & perm) {
  int nElements = n*n;
  std::vector<int> rows(nElements);
  std::vector<int> cols(nElements);
  std::vector<ergo_real> values(nElements);
  ergo_real tolerance = template_blas_sqrt(mat::getMachineEpsilon<ergo_real>());
  int count = 0;
  for(int i = 0; i < n; i++)
    for(int j = 0; j < n; j++) {
      ergo_real transpose_absdiff = template_blas_fabs(A_full[i*n+j] - A_full[j*n+i]);
      if(transpose_absdiff > tolerance) {
	std::cerr << "Error in assign_from_full_matrix: (transpose_absdiff > tolerance). transpose_absdiff = " << (double)transpose_absdiff << " tolerance = " << (double)tolerance << std::endl;
	throw std::runtime_error("Error in assign_from_full_matrix: (transpose_absdiff > tolerance).");
      }
      rows[count] = i;
      cols[count] = j;
      values[count] = A_full[i*n+j];
      count++;
    }
  assert(count == nElements);
  normalMatrix A2(A);
  A2.assign_from_sparse(rows, cols, values, perm, perm);
  A = A2;
}

static void print_matrix(const symmMatrix & A, const char* A_name, int n, ergo_real scaleFactor = 1.0) {
  printf("print_matrix for matrix '%s', n = %d\n", A_name, n);
  int nTrunc = 20;
  if(n > nTrunc)
    printf("Only showing part of matrix because (n > nTrunc).\n");
  if(scaleFactor != 1.0)
    printf("NOTE: USING scaleFactor = %f\n", (double)scaleFactor);
  normalMatrix A2(A);
  for(int row = 0; row < n; row++) {
    if(row > nTrunc)
      continue;
    std::vector<int> rows(n);
    std::vector<int> cols(n);
    for(int i = 0; i < n; i++) {
      rows[i] = row;
      cols[i] = i;
    }
    std::vector<ergo_real> values(n);
    A2.get_values(rows, cols, values);
    for(int col = 0; col < n; col++) {
      if(col > nTrunc)
	continue;
      printf(" %7.4f", (double)(values[col]*scaleFactor));
    }
    printf("\n");
  } // end for row
}

static void get_all_matrix_elements_nosymm(const normalMatrix & A, int n, std::vector<ergo_real> & result) {
  result.resize(n*n);
  std::vector<int> rows(n*n);
  std::vector<int> cols(n*n);
  int count = 0;
  for(int i = 0; i < n; i++)
    for(int j = 0; j < n; j++) {
      rows[count] = i;
      cols[count] = j;
      count++;
    }
  A.get_values(rows, cols, result);
}

static void get_all_matrix_elements_symm(const symmMatrix & A, int n, std::vector<ergo_real> & result) {
  normalMatrix A2(A);
  get_all_matrix_elements_nosymm(A2, n, result);
}

struct DensMatInfo {
  ergo_real lambda_min;
  ergo_real lambda_max;
  ergo_real lambda_homo;
  ergo_real lambda_lumo;
};

static void get_density_mat_by_diagonalization(const symmMatrix & F,
					       symmMatrix & D,
					       DensMatInfo & info,
					       int n,
					       int n_occ,
					       const std::vector<int> & perm) {
  assert(n >= 1);
  assert(n_occ >= 0 && n_occ <= n);
  // Get full matrix
  std::vector<ergo_real> F_full(n*n);
  get_all_matrix_elements_symm(F, n, F_full);
  // Diagonalize
  int lwork = 3*n*n;
  std::vector<ergo_real> work(lwork);
  std::vector<ergo_real> eigvalList(n);
  std::vector<ergo_real> A(n*n);
  memcpy(&A[0], &F_full[0], n*n*sizeof(ergo_real));
  int syev_info = 0;
  mat::syev("V", "U", &n, &A[0],
	    &n, &eigvalList[0], &work[0], &lwork, 
	    &syev_info);
  if(syev_info != 0)
    throw std::runtime_error("ERROR: mat::syev() failed, gave (syev_info != 0).");
  // Save info about eigenvalues
  info.lambda_min = eigvalList[0];
  info.lambda_max = eigvalList[n-1];
  info.lambda_homo = eigvalList[n_occ-1];
  info.lambda_lumo = eigvalList[n_occ];
  // Put together density matrix using eigenvectors
  std::vector<ergo_real> D_full(n*n);
  for(int i = 0; i < n*n; i++)
    D_full[i] = 0;
  for(int i = 0; i < n_occ; i++) {
    // Add x*xT for cirrent eigenvector x
    const ergo_real* x = &A[i*n];
    for(int a = 0; a < n; a++)
      for(int b = 0; b < n; b++)
	D_full[a*n+b] += x[a]*x[b];
  }
  assign_from_full_matrix(D, &D_full[0], n, perm);
}

static void print_DensMatInfo(const DensMatInfo & info) {
  printf("info.lambda_min = %f\n", (double)info.lambda_min);
  printf("info.lambda_max = %f\n", (double)info.lambda_max);
  printf("info.lambda_homo = %f\n", (double)info.lambda_homo);
  printf("info.lambda_lumo = %f\n", (double)info.lambda_lumo);
}

static ergo_real get_nnz_percentage(int n, const symmMatrix & X) {
  ergo_real nnz = X.nnz();
  ergo_real n2 = (ergo_real)n*n;
  ergo_real nnz_percentage = 100 * nnz / n2;
  return nnz_percentage;
}

static void update_nnz_percentages(int n, const symmMatrix & X, ergo_real & nnz_percentage_min, ergo_real & nnz_percentage_max) {
  ergo_real nnz_percentage = get_nnz_percentage(n, X);
  if(nnz_percentage < nnz_percentage_min)
    nnz_percentage_min = nnz_percentage;
  if(nnz_percentage > nnz_percentage_max)
    nnz_percentage_max = nnz_percentage;
}


#if 0
static void verify_symmetry(int n, const normalMatrix & X) {
  std::vector<ergo_real> fullMat(n*n);
  get_all_matrix_elements_nosymm(X, n, fullMat);
  for(int i = 0; i < n; i++)
    for(int j = i+1; j < n; j++) {
      ergo_real absdiff = template_blas_fabs(fullMat[i*n+j]-fullMat[j*n+i]);
      assert(absdiff < 1e-9);
    }
}

static void get_symm_triple_product_ABA(int n,
					const symmMatrix & A,
					const symmMatrix & B,
					symmMatrix & result_ABA) {
  normalMatrix AB(A);
  AB = A * B;
  normalMatrix ABA(A);
  ABA = AB * A;
  // Check that ABA is really symmetric
  verify_symmetry(n, ABA);
  symmMatrix ABA_symm(ABA);
  result_ABA = ABA_symm;
}

static void get_triple_product_of_symm_mats(int n,
					    const symmMatrix & A,
					    const symmMatrix & B,
					    const symmMatrix & C,
					    normalMatrix & result_ABC) {
  normalMatrix AB(A);
  AB = A * B;
  normalMatrix ABC(A);
  ABC = AB * C;
  result_ABC = ABC;
}

static void get_triple_product_of_normal_mats(int n,
					      const normalMatrix & A,
					      const normalMatrix & B,
					      const normalMatrix & C,
					      normalMatrix & result_ABC) {
  normalMatrix AB(A);
  AB = A * B;
  normalMatrix ABC(A);
  ABC = AB * C;
  result_ABC = ABC;
}

static void get_diagonal_of_matrix(int n,
				   const symmMatrix & A,
				   symmMatrix & result_A_diag) {
  std::vector<int> rowind(n);
  std::vector<int> colind(n);
  for(int i = 0; i < n; i++) {
    rowind[i] = i;
    colind[i] = i;
  }
  std::vector<ergo_real> values(n);
  A.get_values(rowind, colind, values);
  result_A_diag.assign_from_sparse(rowind, colind, values);
}

static void add_to_one_element_symm(int row, int col, ergo_real x, symmMatrix & X, const std::vector<int> & perm) {
  std::vector<int> rowind;
  std::vector<int> colind;
  std::vector<ergo_real> values;
  if(row == col) {
    rowind.resize(1);
    colind.resize(1);
    values.resize(1);
    rowind[0] = row;
    colind[0] = col;
    values[0] = x;
  }
  else {
    // Two off-diagonal elements
    rowind.resize(2);
    colind.resize(2);
    values.resize(2);
    rowind[0] = row;
    colind[0] = col;
    values[0] = x;
    rowind[1] = col;
    colind[1] = row;
    values[1] = x;
  }
  normalMatrix tmp(X);
  tmp.assign_from_sparse(rowind, colind, values, perm, perm);
  symmMatrix tmp2(tmp);
  X += tmp2;
}

static void add_to_one_element_nosymm(int row, int col, ergo_real x, normalMatrix & X, const std::vector<int> & perm) {
  std::vector<int> rowind;
  std::vector<int> colind;
  std::vector<ergo_real> values;
  rowind.resize(1);
  colind.resize(1);
  values.resize(1);
  rowind[0] = row;
  colind[0] = col;
  values[0] = x;
  normalMatrix tmp(X);
  tmp.assign_from_sparse(rowind, colind, values, perm, perm);
  X += tmp;
}

static ergo_real evaluate_f1_symm(int n,
				  const symmMatrix & D,
				  const symmMatrix & E) {
  symmMatrix Po(D);
  symmMatrix Pv(D);
  Pv.add_identity(-1);
  Pv *= -1.0;
  // Get X = Po * E * Pv
  normalMatrix X(D);
  get_triple_product_of_symm_mats(n, Po, E, Pv, X);
  normalMatrix XTX(D);
  XTX = transpose(X) * X;
  return 2*XTX.trace();
}

static ergo_real evaluate_f1_nosymm(int n,
				    const symmMatrix & D,
				    const normalMatrix & E) {
  normalMatrix Po(D);
  normalMatrix Pv(D);
  Pv.add_identity(-1);
  Pv *= -1.0;
  normalMatrix ET(E);
  ET = transpose(E);
  // Get X = Po * E * Pv
  normalMatrix Xov(D);
  get_triple_product_of_normal_mats(n, Po, E, Pv, Xov);
  normalMatrix XovTXov(D);
  XovTXov = transpose(Xov) * Xov;
  normalMatrix Xvo(D);
  get_triple_product_of_normal_mats(n, Pv, E, Po, Xvo); // NOTE: it must be E here, not ET!
  normalMatrix XvoTXvo(D);
  XvoTXvo = transpose(Xvo) * Xvo;
#if 0
  // Check if Xvo is transpose of Xov (it is not!)
  normalMatrix T;
  T = transpose(Xov);
  T += (-1.0)*Xvo;
  std::cout << "T.frob() = " << T.frob() << "    Xvo.frob() = " << Xvo.frob() << "  Xov.frob() = " << Xov.frob() << std::endl;
#endif
  return XovTXov.trace() + XvoTXvo.trace();
}

static void zeroize_elements_inside_pattern(const symmMatrix & patternMat,
					    const normalMatrix & E_in,
					    normalMatrix & E_result,
					    const std::vector<int> & perm) {
  // Get all elements of patternMat in order to get the sparsity pattern
  normalMatrix patternMat_nosymm(patternMat);
  std::vector<int> rowind;
  std::vector<int> colind;
  std::vector<ergo_real> values;
  patternMat_nosymm.get_all_values(rowind, colind, values);
  // Get corresponding elements of E_in
  E_in.get_values(rowind, colind, values);
  normalMatrix tmp(E_in);
  tmp.assign_from_sparse(rowind, colind, values, perm, perm); // Now tmp contains the elements we want to remove.
  E_result = E_in;
  E_result -= tmp;
}

static void get_g_of_E_minus_E0(int n,
				const symmMatrix & E0,
				const symmMatrix & patternMat,
				const symmMatrix & E,
				const std::vector<int> & perm,
				symmMatrix & result_g_of_E_minus_E0) {
  symmMatrix E_minus_E0(E);
  E_minus_E0 -= E0;
  normalMatrix E_minus_E0_normal(E_minus_E0);
  normalMatrix g_of_E_minus_E0(E);
  zeroize_elements_inside_pattern(patternMat, E_minus_E0_normal, g_of_E_minus_E0, perm);
  verify_symmetry(n, g_of_E_minus_E0);
  symmMatrix g_of_E_minus_E0_symm(g_of_E_minus_E0);
  result_g_of_E_minus_E0 = g_of_E_minus_E0_symm;
}

static ergo_real evaluate_f2_nosymm(int n,
				    const symmMatrix & patternMat, // matrix defining sparsity pattern
				    const symmMatrix & E0,
				    const normalMatrix & E,
				    const std::vector<int> & perm) {
  normalMatrix E_minus_E0(E);
  normalMatrix E0_nosymm(E0);
  E_minus_E0 -= E0_nosymm;
  normalMatrix g_of_E_minus_E0(E);
  zeroize_elements_inside_pattern(patternMat, E_minus_E0, g_of_E_minus_E0, perm);
  normalMatrix Y(g_of_E_minus_E0);
  normalMatrix YTY(E);
  YTY = transpose(Y) * Y;
  ergo_real factor = 1;
  ergo_real result = factor * YTY.trace();
  //  printf("evaluate_f2_nosymm returning %12.6g  (Y.frob() = %12.6g)\n", result, Y.frob());
  return result;
}

static ergo_real evaluate_func_nosymm(int n,
				      const symmMatrix & D,
				      const symmMatrix & E0,
				      const symmMatrix & patternMat, // matrix defining sparsity pattern
				      const normalMatrix & E,
				      const std::vector<int> & perm,
				      const std::string & funcName) {
  if(funcName == "f1")
    return evaluate_f1_nosymm(n, D, E);
  else if(funcName == "f2")
    return evaluate_f2_nosymm(n, patternMat, E0, E, perm);
  else
    throw std::runtime_error("ERROR in evaluate_func_nosymm: unknown funcName.");
}

static ergo_real evaluate_f_complete_symm(int n,
					  const symmMatrix & D,
					  const symmMatrix & E0,
					  const symmMatrix & patternMat, // matrix defining sparsity pattern
					  const symmMatrix & E,
					  const std::vector<int> & perm) {
  normalMatrix E_nosymm(E);
  ergo_real f1Value = evaluate_f1_nosymm(n, D, E_nosymm);
  ergo_real f2Value = evaluate_f2_nosymm(n, patternMat, E0, E_nosymm, perm);
  return f1Value + penalty_alpha * f2Value;
}

static void get_numerical_gradient_of_func(int n,
					   const symmMatrix & D,
					   const symmMatrix & E0,
					   const symmMatrix & patternMat, // matrix defining sparsity pattern
					   const symmMatrix & E_in,
					   symmMatrix & result_numerical_gradient_of_f1,
					   const std::vector<int> & perm,
					   const std::string & funcName) {
  normalMatrix E_nosymm(E_in);
  // Get result numerically, as full matrix
  const ergo_real h = 1e-6;
  std::vector<ergo_real> M(n*n);
  for(int i = 0; i < n; i++)
    for(int j = 0; j < n; j++) {
      normalMatrix E1(E_nosymm);
      add_to_one_element_nosymm(i, j, h, E1, perm);
      normalMatrix E2(E_nosymm);
      add_to_one_element_nosymm(i, j, -h, E2, perm);
      ergo_real f1_1 = evaluate_func_nosymm(n, D, E0, patternMat, E1, perm, funcName);
      ergo_real f1_2 = evaluate_func_nosymm(n, D, E0, patternMat, E2, perm, funcName);
      ergo_real gradientValue = (f1_1 - f1_2) / (2*h);
      if(funcName == "f2")
	printf("i j = %2d %2d  f2 gradientValue = %22.11f\n", i, j, (double)gradientValue);
      M[i*n+j] = gradientValue;
    }
  assign_from_full_matrix(result_numerical_gradient_of_f1, &M[0], n, perm);
}
#endif


static void do_truncation(int n,
			  symmMatrix & X,
			  ergo_real truncation_threshold,
			  const symmMatrix & D_in,
			  bool use_alt_trunc,
			  const std::vector<int> & perm) {
  if(use_alt_trunc == false) {
    X.frob_thresh(truncation_threshold);
    return;
  }
  // OK, use alternative truncation approach.

  symmMatrix A(X);
  A.frob_thresh(truncation_threshold);
  if(A.nnz() == X.nnz())
    return;
  symmMatrix zeroMat(A);
  zeroMat.clear();

  bool do_truncation_internally = true;

  // Use truncated D-matrix to make calculations faster.
  // We can truncate it a lot and still get reasonable results, but using only the diagonal will not work since then it commutes with any E-matrix.
  symmMatrix D(D_in);
  if(do_truncation_internally)
    D.frob_thresh(2.5);//truncation_threshold*100); // FIXME change amount of truncation of D here, seems like lots o truncation can be used while still giving OK results.

  // Now A contains the truncated X, that defines the sparsity pattern we want to use.
  // Get all elements of A in order to get the sparsity pattern
  std::vector<int> rowind;
  std::vector<int> colind;
  std::vector<ergo_real> values;
  A.get_all_values(rowind, colind, values);
  symmMatrix E(zeroMat);

  symmMatrix YY(zeroMat);
  YY.clear();

  normalMatrix ED(zeroMat);
  normalMatrix DE(zeroMat);
  normalMatrix E_new_3(zeroMat);
  normalMatrix DED(zeroMat);

  const int nOptSteps = 2; // NOTE: CHANGE nOptSteps HERE!
  for(int optStep = 0; optStep < nOptSteps; optStep++) {
    E = X - A; // this gives E such that X = A + E
    if(do_truncation_internally)
      E.frob_thresh(truncation_threshold*0.01); // FIXME change amount of truncation of E here

    // Compute gradient (?) matrix ED+DE-2*DED
    ED = E * D;
    if(do_truncation_internally)
      ED.frob_thresh(truncation_threshold*0.01); // FIXME choose amount of truncation here
    DED = D * ED;
    symmMatrix DED_symm(DED);
    DE = transpose(ED); // Same as D * E
    E_new_3 = ED + DE;
    symmMatrix E_new_symm(E_new_3);
    E_new_symm += (ergo_real)(-2.0) * DED_symm;
    // Now E_new_symm contains ED+DE-2*DED
    if(0) {
      symmMatrix analytical_gradient_of_f1(zeroMat);
      analytical_gradient_of_f1 = (ergo_real)2.0 * E_new_symm; // NOTE: multiply (ED+DE-2*DED) by factor 2 to get gradient
    }

    values.clear();
    E_new_symm.get_values(rowind, colind, values);
    YY.assign_from_sparse(rowind, colind, values);
    A += (ergo_real)2.0*YY; // FIXME: HOW TO SET FACTOR HERE? ARBITATY 0.3 VALUE!?
  } // end for optStep
  X = A;
  return;
}



#if 0
if(0) {
  symmMatrix analytical_gradient_of_f1(zeroMat);
  analytical_gradient_of_f1 = 2.0 * E_new_symm; // NOTE: multiply (ED+DE-2*DED) by factor 2 to get gradient
  symmMatrix numerical_gradient_of_f1(zeroMat);
  get_numerical_gradient_of_func(n, D, E0, A, E, numerical_gradient_of_f1, perm, "f1");
  print_matrix(numerical_gradient_of_f1, "numerical_gradient_of_f1", n, 1e2);
  print_matrix(analytical_gradient_of_f1, "analytical_gradient_of_f1", n, 1e2);
  ergo_real diff_f1 = symmMatrix::frob_diff(numerical_gradient_of_f1, analytical_gradient_of_f1);
  std::cout << "frobdiff between numerical and analytical gradients of f1: " << diff_f1 << "  (numerical_gradient_of_f1.frob() = " << numerical_gradient_of_f1.frob() << ")." << std::endl;
  symmMatrix g_of_E_minus_E0(zeroMat);
  get_g_of_E_minus_E0(n, E0, A, E, perm, g_of_E_minus_E0);
  symmMatrix analytical_gradient_of_f2(zeroMat);
  analytical_gradient_of_f2 = 2.0 * g_of_E_minus_E0; // NOTE multiply by 2 to get gradient
  symmMatrix numerical_gradient_of_f2(zeroMat);
  get_numerical_gradient_of_func(n, D, E0, A, E, numerical_gradient_of_f2, perm, "f2");
  print_matrix(numerical_gradient_of_f2, "numerical_gradient_of_f2", n, 1e5);
  print_matrix(analytical_gradient_of_f2, "analytical_gradient_of_f2", n, 1e5);
  ergo_real diff_f2 = symmMatrix::frob_diff(numerical_gradient_of_f2, analytical_gradient_of_f2);
  std::cout << "frobdiff between numerical and analytical gradients of f2: " << diff_f2 << "  (numerical_gradient_of_f2.frob() = " << numerical_gradient_of_f2.frob() << ")." << std::endl;
  if(analytical_gradient_of_f2.frob() > 1e-9)
    exit(0);
 }
symmMatrix analytical_gradient_of_f1(zeroMat);
analytical_gradient_of_f1 = 2.0 * E_new_symm; // NOTE: multiply (ED+DE-2*DED) by factor 2 to get gradient
symmMatrix g_of_E_minus_E0(zeroMat);
get_g_of_E_minus_E0(n, E0, A, E, perm, g_of_E_minus_E0);
symmMatrix analytical_gradient_of_f2(zeroMat);
analytical_gradient_of_f2 = 2.0 * g_of_E_minus_E0; // NOTE multiply by 2 to get gradient
symmMatrix gradient(analytical_gradient_of_f1);
gradient += penalty_alpha * analytical_gradient_of_f2;
E -= 0.004 * gradient;
#endif



static void report_subspace_error(int currIterCount, int n, const symmMatrix & X, const symmMatrix & D) {
  normalMatrix XD(X);
  XD = X * D;
  normalMatrix DX(X);
  DX = transpose(XD);
  normalMatrix diff(XD);
  diff -= DX;
  printf("report_subspace_error for currIterCount = %2d: diff.frob() = %g\n", currIterCount, (double)diff.frob());
}


static void report_subspace_error_via_diagonalization(int currIterCount, int n, int n_occ, const symmMatrix & X, const symmMatrix & D_ref, const std::vector<int> & perm) {
  symmMatrix D(X);
  D.clear();
  symmMatrix minusX(X);
  minusX *= -1.0;
  DensMatInfo info;
  get_density_mat_by_diagonalization(minusX, D, info, n, n_occ, perm);
  ergo_real diff = symmMatrix::frob_diff(D, D_ref);
  printf("report_subspace_error_via_diagonalization for currIterCount = %2d: diff = %g\n", currIterCount, (double)diff);
}


static void get_density_mat_by_purification(const symmMatrix & F,
					    symmMatrix & result_D,
					    int n,
					    int n_occ,
					    const std::vector<int> & perm,
					    const DensMatInfo & info,
					    ergo_real truncation_threshold,
					    const symmMatrix & D_ref,
					    bool use_alt_trunc,
					    bool verify_each_step) {
  printf("Starting get_density_mat_by_purification() now, truncation_threshold = %g, use_alt_trunc = %d.\n", (double)truncation_threshold, (int)use_alt_trunc);
  // FIXME: Truncate D_ref here?
  //  symmMatrix D_ref(D_ref_in);
  //  D_ref.frob_thresh(truncation_threshold);
  // Start by transformation to get all eigenvalues in [0,1] in reverse order
  symmMatrix X(F);
  X.add_identity(-info.lambda_max);
  X *= ((ergo_real)(-1.0) / (info.lambda_max - info.lambda_min));
  // Prepare X2, needed later
  symmMatrix X2(F);
  X2.clear();
  // Loop until converged
  const int maxIter = 222;
  int iterCount = 0;
  int extraFinalStepsCounter = 0;
  ergo_real X2_time_total = 0;
  ergo_real truncation_time_total = 0;
  ergo_real X_nnz_percentage_min = 100;
  ergo_real X_nnz_percentage_max = 0;
  ergo_real X2_nnz_percentage_min = 100;
  ergo_real X2_nnz_percentage_max = 0;
  while(true) {
    Util::TimeMeter tm_X2;
    X2 = (ergo_real)1.0 * X * X;
    X2_time_total += tm_X2.get_wall_seconds() - tm_X2.get_start_time_wall_seconds();
    update_nnz_percentages(n, X, X_nnz_percentage_min, X_nnz_percentage_max);
    update_nnz_percentages(n, X2, X2_nnz_percentage_min, X2_nnz_percentage_max);
    printf("Iteration %2d : nnz for X = %5.1f %% nnz for X2 = %5.1f %%\n", iterCount, (double)get_nnz_percentage(n, X), (double)get_nnz_percentage(n, X2));
    ergo_real X_X2_diff = symmMatrix::frob_diff(X, X2);
    if(X_X2_diff < 1e-1)
      extraFinalStepsCounter++;
    if(extraFinalStepsCounter == 7)
      break;
    ergo_real trace = X.trace();
    if(trace > n_occ) {
      // Choose polynomial X2
      X = X2;
    }
    else {
      // Choose polynomial 2*X - X2
      X *= 2.0;
      X += ((ergo_real)-1.0) * X2;
    }
    // Turn off use_alt_trunc in final iterations to allow eigenvalues to converge
    bool use_alt_trunc_effective = use_alt_trunc;
    if(iterCount > 12) // extraFinalStepsCounter > 4) // FIXME CHOOSE WHEN TO STOP USING ALT TRUNC HERE
      use_alt_trunc_effective = false;
    Util::TimeMeter tm_truncation;
    do_truncation(n, X, truncation_threshold, D_ref, use_alt_trunc_effective, perm);
    truncation_time_total += tm_truncation.get_wall_seconds() - tm_truncation.get_start_time_wall_seconds();
    if(verify_each_step) {
      report_subspace_error(iterCount, n, X, D_ref);
      report_subspace_error_via_diagonalization(iterCount, n, n_occ, X, D_ref, perm);
    }
    iterCount++;
    if(iterCount > maxIter)
      throw std::runtime_error("ERROR in get_density_mat_by_purification: (iterCount > maxIter).");
  }
  printf("Purification done, iterCount = %d, X2_time_total = %f seconds, truncation_time_total = %f seconds\n", iterCount, (double)X2_time_total, (double)truncation_time_total);
  printf("NNZ values for X : min %5.1f %% max %5.1f %%\n", (double)X_nnz_percentage_min, (double)X_nnz_percentage_max);
  printf("NNZ values for X2: min %5.1f %% max %5.1f %%\n", (double)X2_nnz_percentage_min, (double)X2_nnz_percentage_max);
  printf("Calling report_subspace_error_via_diagonalization for final result X matrix at end of get_density_mat_by_purification:\n");
  if(verify_each_step)
    report_subspace_error_via_diagonalization(iterCount, n, n_occ, X, D_ref, perm);
  result_D = X;
}

static void verify_idempotency(const symmMatrix & X) {
  symmMatrix X2(X);
  X2 = (ergo_real)1.0 * X * X;
  ergo_real diff = symmMatrix::frob_diff(X, X2);
  printf("verify_idempotency(): diff = %g\n", (double)diff);
  ergo_real tolerance = template_blas_sqrt(mat::getMachineEpsilon<ergo_real>());
  assert(diff < tolerance);
}

static void verify_gap(const DensMatInfo & info) {
  ergo_real gap_abs = info.lambda_lumo - info.lambda_homo;
  ergo_real spectrum_width = info.lambda_max - info.lambda_min;
  ergo_real gap_rel = gap_abs / spectrum_width;
  printf("verify_gap(): gap_abs = %f, gap_rel = %f\n", (double)gap_abs, (double)gap_rel);
  assert(gap_rel > 1e-6);
}


#if 0
static void check_decay_internal(int n,
				 const std::vector<ergo_real> & D_full,
				 int dist_step,
				 int dist_max) {
  // Consider distances from 0 up to dist_max
  for(int dist = 0; dist < dist_max; dist += dist_step) {
    ergo_real maxAbsValue = 0;
    for(int i = 0; i < n; i++)
      for(int j = i; j < n; j++) {
	int ijdiff  = j - i;
	if(ijdiff >= dist && ijdiff < n/2) {
	  ergo_real absValue = template_blas_fabs(D_full[i*n+j]);
	  if(absValue > maxAbsValue)
	    maxAbsValue = absValue;
	}
      } // end for i j
    printf("dist %5d : maxAbsValue = %12.8f\n", dist, (double)maxAbsValue);
  } // end for dist
}

static void check_decay(const symmMatrix & D, const char* name, int n) {
  printf("Starting check_decay() for '%s', n = %d\n", name, n);
  std::vector<ergo_real> D_full(n*n);
  get_all_matrix_elements_symm(D, n, D_full);
  // First part with step 1
  int dist_step = 1;
  int dist_max = 20;
  if(dist_max > n/2)
    dist_max = n/2;
  printf("==========  first part with dist_step = 1  ===============\n");
  check_decay_internal(n, D_full, dist_step, dist_max);
  dist_step *= 2;
  dist_max *= 2;
  printf("==========  first part with dist_step = 2  ===============\n");
  check_decay_internal(n, D_full, dist_step, dist_max);
  dist_step *= 2;
  dist_max *= 2;
  printf("==========  first part with dist_step = 4  ===============\n");
  check_decay_internal(n, D_full, dist_step, dist_max);
  dist_step = n / 40;
  if(dist_step == 0)
    dist_step = 1;
  dist_max = n/2;
  printf("==========  now with dist_step = %d  ===============\n", dist_step);
  check_decay_internal(n, D_full, dist_step, dist_max);
  printf("check_decay() done.\n");
}
#endif


/*
ELIAS NOTE 2016-11-04: the following input seems to work for blockSize = 1 now:
./simple_ort_puri_test 14 1e-2 1e-1 1
*/

int main(int argc, char *argv[])
{
  try {
    Util::TimeMeter tm_everything;

    // OK n values: 10 14 18 22 26 30 50 70 90 110 150 210 290 410 610 1010 1410 2010 2410 3010
    int n = 8;
    ergo_real truncation_threshold = 1e-5;
    ergo_real result_diff_tolerance = 1e-4;
    int use_alt_trunc = 0;
    int verify_each_step = 0;

#ifdef PRECISION_SINGLE
    result_diff_tolerance = 1e-3;
#endif

    if(argc >= 2)
      n = atoi(argv[1]);
    if(argc >= 3)
      truncation_threshold = atof(argv[2]);
    if(argc >= 4)
      result_diff_tolerance = atof(argv[3]);
    if(argc >= 5)
      use_alt_trunc = atoi(argv[4]);
    if(argc >= 6)
      verify_each_step = atoi(argv[5]);

    int n_occ = n/2;
    int blockSize = 8;

    std::cout << "n = " << n << std::endl;
    std::cout << "truncation_threshold = " << (double)truncation_threshold << std::endl;
    std::cout << "result_diff_tolerance = " << (double)result_diff_tolerance << std::endl;
    std::cout << "n_occ = " << n_occ << std::endl;
    std::cout << "blockSize = " << blockSize << std::endl;

    if(n <= 1)
      throw std::runtime_error("ERROR: (n <= 1)");

#ifdef _OPENMP
    int defThreads;
    const char *env = getenv("OMP_NUM_THREADS");
    if ( !(env && (defThreads=atoi(env)) > 0) )
      defThreads = 1;
    mat::Params::setNProcs(defThreads);
    mat::Params::setMatrixParallelLevel(1);
    std::cout<<"OpenMP is used, number of threads set to "
	     <<mat::Params::getNProcs()<<". Matrix parallel level: "
	     <<mat::Params::getMatrixParallelLevel()<<"."<<std::endl;
#endif

    // Prepare stuff needed to use matrix library
    mat::SizesAndBlocks sizeBlockInfo;
    static const int sparseMatrixBlockFactor = 2;
    sizeBlockInfo =
      prepareMatrixSizesAndBlocks(n,
				  blockSize,
				  sparseMatrixBlockFactor,
				  sparseMatrixBlockFactor,
				  sparseMatrixBlockFactor);
    std::vector<int> perm(n);
    for(int i = 0; i < n; i++)
      perm[i] = i;

    // Create artificial effective Hamiltonian matrix F
    symmMatrix F;
    F.resetSizesAndBlocks(sizeBlockInfo, sizeBlockInfo);
    // get_tridiagonal_matrix_periodic(F, n, perm);
    get_Huckel_matrix_periodic(F, n, perm);
    print_matrix(F, "F", n);

    // Get reference density matrix using diagonalization
    symmMatrix D_ref;
    D_ref.resetSizesAndBlocks(sizeBlockInfo, sizeBlockInfo);
    DensMatInfo densMatInfo;
    Util::TimeMeter tm_get_density_mat_by_diagonalization;
    get_density_mat_by_diagonalization(F, D_ref, densMatInfo, n, n_occ, perm);
    report_timing(tm_get_density_mat_by_diagonalization, "tm_get_density_mat_by_diagonalization");
    print_matrix(D_ref, "D_ref", n);
    print_DensMatInfo(densMatInfo);

    // Verify that D_ref is idempotent
    verify_idempotency(D_ref);

    // Verify that we have significant gap
    verify_gap(densMatInfo);

    // Get density matrix using purification
    symmMatrix D_puri;
    D_puri.resetSizesAndBlocks(sizeBlockInfo, sizeBlockInfo);
    Util::TimeMeter tm_get_density_mat_by_purification;
    get_density_mat_by_purification(F, D_puri, n, n_occ, perm, densMatInfo, truncation_threshold, D_ref, use_alt_trunc, verify_each_step);
    report_timing(tm_get_density_mat_by_purification, "tm_get_density_mat_by_purification");

    // Check accuracy of result
    ergo_real diff_D_puri_vs_D_ref = symmMatrix::frob_diff(D_puri, D_ref);
    std::cout << "diff_D_puri_vs_D_ref = " << (double)diff_D_puri_vs_D_ref << std::endl;
    symmMatrix D_ref_truncated(D_ref);
    D_ref_truncated.frob_thresh(truncation_threshold);
    ergo_real diff_D_ref_vs_D_ref_truncated = symmMatrix::frob_diff(D_ref, D_ref_truncated);
    std::cout << "diff_D_ref_vs_D_ref_truncated = " << (double)diff_D_ref_vs_D_ref_truncated << std::endl;
    assert(diff_D_puri_vs_D_ref < result_diff_tolerance);

    // Investigate decay of matrix elements in D
    //    check_decay(D_ref, "D_ref", n);
    //    check_decay(D_puri, "D_puri", n);

    report_timing(tm_everything, "tm_everything");
  }
  catch(std::runtime_error & e) {
    std::cout << "Error: std::runtime_error caught: " << e.what() << std::endl;
    return -1;
  }
  catch(std::exception & e) {
    std::cout << "Error: std::exception caught: " << e.what() << std::endl;
    return -1;
  }
  catch(...) {
    printf("ERROR: exception caught in simple_ort_puri_test main().\n");
    return -1;
  }

  puts("simple_ort_puri_test finished OK.");
  return 0;
}