File: trlib_krylov.c

package info (click to toggle)
python-scipy 1.1.0-7
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 93,828 kB
  • sloc: python: 156,854; ansic: 82,925; fortran: 80,777; cpp: 7,505; makefile: 427; sh: 294
file content (790 lines) | stat: -rw-r--r-- 52,838 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
/* MIT License
 *
 * Copyright (c) 2016--2017 Felix Lenders
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 */

#include "trlib.h"
#include "trlib_private.h"

#include "_c99compat.h"

trlib_int_t trlib_krylov_min_internal(
    trlib_int_t init, trlib_flt_t radius, trlib_int_t equality, trlib_int_t itmax, trlib_int_t itmax_lanczos,
    trlib_flt_t tol_rel_i, trlib_flt_t tol_abs_i,
    trlib_flt_t tol_rel_b, trlib_flt_t tol_abs_b, trlib_flt_t zero, trlib_flt_t obj_lo,
    trlib_int_t ctl_invariant, trlib_int_t convexify, trlib_int_t earlyterm,
    trlib_flt_t g_dot_g, trlib_flt_t v_dot_g, trlib_flt_t p_dot_Hp,
    trlib_int_t *iwork, trlib_flt_t *fwork, trlib_int_t refine,
    trlib_int_t verbose, trlib_int_t unicode, char *prefix, FILE *fout, trlib_int_t *timing,
    trlib_int_t *action, trlib_int_t *iter, trlib_int_t *ityp,
    trlib_flt_t *flt1, trlib_flt_t *flt2, trlib_flt_t *flt3) {
    /* The algorithm runs by solving the trust region subproblem restricted to a Krylov subspace K(ii)
       The Krylov space K(ii) can be either described by the pCG iterates: (notation iM = M^-1)
         K(ii) = span(p_0, ..., p_ii)
       and in an equivalent way by the Lanczos iterates
         K(ii) = span(q_0, ..., q_ii)

       In one iteration the algorithms performs the following steps
       (a) expand K(ii-1) to K(ii):
           if done via pCG:
             alpha = (g, v)/(p, H p); g+ = g + alpha H p; v+ = iM g; beta = (g+, v+)/(g, v); p+ = -v+ + beta p
           if done via Lanczos:
             y = iM t; gamma = sq (t, y); w = t/gamma; q = y/gamma; delta = (q, H q); t+ = Hq - delta w - gamma w-
           we use pCG as long as it does not break down (alpha ~ 0) and continue with Lanczos in that case,
           note the relationship q = v/sq (g, v) * +-1
       (b) compute minimizer s of problem restricted to sample Krylov space K(ii)
           check if this minimizer is expected to be interior:
             do the pCG iterates satisfy the trust region constraint?
             is H positive definite on K(ii), i.e. are all alphas >= 0?
           if the minimizer is interior, set s = p
           if the minimizer is expected on the boundary, set s = Q*h with Q = [q_0, ..., q_ii]
             and let s solve a tridiagonal trust region subprobem with hessian the tridiagonal matrix
             T_ii from the Lanczos process,
             diag(T_ii) = (delta_0, ..., delta_ii) and offdiag(T_ii) = (gamma_1, ..., gamma_ii)
       (c) test for convergence */

    trlib_int_t *leftmost_timing = NULL;
    #if TRLIB_MEASURE_TIME
        struct timespec verystart, start, end;
        leftmost_timing = timing + 1;
        TRLIB_TIC(verystart)
    #endif
    // sane names for workspace variables
    trlib_int_t *status = iwork;
    trlib_int_t *ii = iwork+1;
    trlib_int_t *pos_def = iwork+2;
    trlib_int_t *interior = iwork+3;
    trlib_int_t *warm_leftmost = iwork+4;
    trlib_int_t *ileftmost = iwork+5;
    trlib_int_t *warm_lam0 = iwork+6;
    trlib_int_t *warm_lam = iwork+7;
    trlib_int_t *lanczos_switch = iwork+8;
    trlib_int_t *exit_tri = iwork+9;
    trlib_int_t *sub_fail_tri = iwork+10;
    trlib_int_t *iter_tri = iwork+11;
    trlib_int_t *iter_last_head = iwork+12;
    trlib_int_t *type_last_head = iwork+13;
    trlib_int_t *nirblk = iwork + 15;
    trlib_int_t *irblk = iwork+16;

    trlib_flt_t *stop_i = fwork;
    trlib_flt_t *stop_b = fwork+1;
    trlib_flt_t *v_g = fwork+2;
    trlib_flt_t *p_Hp = fwork+3;
    trlib_flt_t *cgl = fwork+4;
    trlib_flt_t *cglm = fwork+5;
    trlib_flt_t *lam0 = fwork+6;
    trlib_flt_t *lam = fwork+7;
    trlib_flt_t *obj = fwork+8;
    trlib_flt_t *s_Mp = fwork+9;
    trlib_flt_t *p_Mp = fwork+10;
    trlib_flt_t *s_Ms = fwork+11;
    trlib_flt_t *sigma = fwork+12;
    trlib_flt_t *raymax = fwork+13;
    trlib_flt_t *raymin = fwork+14;
    trlib_flt_t *alpha = fwork+15;
    trlib_flt_t *beta = fwork+15+itmax+1;
    trlib_flt_t *neglin = fwork+15+2*(itmax+1);
    trlib_flt_t *h0 = fwork+15+3*(itmax+1);
    trlib_flt_t *h = fwork+15+4*(itmax+1);
    trlib_flt_t *delta =  fwork+15+5*(itmax+1);
    trlib_flt_t *delta_fac0 = fwork+15+6*(itmax+1);
    trlib_flt_t *delta_fac = fwork+15+7*(itmax+1);
    trlib_flt_t *gamma = fwork+15+8*(itmax+1); // note that this is shifted by 1, so gamma[0] is gamma_1
    trlib_flt_t *gamma_fac0 = fwork+15+8+9*itmax;
    trlib_flt_t *gamma_fac = fwork+15+8+10*itmax;
    trlib_flt_t *ones = fwork+15+8+11*itmax;
    trlib_flt_t *leftmost = fwork+15+9+12*itmax;
    trlib_flt_t *regdiag = fwork+15+10+13*itmax;
    trlib_flt_t *convhist = fwork+15+11+14*itmax;
    trlib_flt_t *fwork_tr = fwork+15+12+15*itmax;

    // local variables
    trlib_int_t returnvalue = TRLIB_CLR_CONTINUE;
    trlib_int_t warm_fac0 = 0; // flag that indicates if you we could successfully update the factorization
    trlib_int_t warm_fac = 0; // flag that indicates if you we could successfully update the factorization
    trlib_int_t inc = 1;
    trlib_flt_t one = 1.0;
    trlib_flt_t minus = -1.0;
    trlib_flt_t sp_Msp = 0.0; // (s+, Ms+)
    trlib_flt_t eta_i = 0.0; // forcing parameter
    trlib_flt_t eta_b = 0.0; // forcing parameter
    trlib_int_t cit = 0;     // loop counter for convergence history

    *iter = *ii;

    if (init == TRLIB_CLS_INIT)       { *status = TRLIB_CLS_INIT; }
    if (init == TRLIB_CLS_HOTSTART)   { *status = TRLIB_CLS_HOTSTART; }
    if (init == TRLIB_CLS_HOTSTART_P) { *status = TRLIB_CLS_HOTSTART_P; }
    if (init == TRLIB_CLS_HOTSTART_G) { *status = TRLIB_CLS_HOTSTART_G; }
    if (init == TRLIB_CLS_HOTSTART_T) { *status = TRLIB_CLS_HOTSTART_T; }
    if (init == TRLIB_CLS_HOTSTART_R) { *status = TRLIB_CLS_HOTSTART_R; }

    while(1) {
        switch( *status ) {
            case TRLIB_CLS_INIT:
                // initialization
                *ii = 0; *iter = *ii;  // iteration counter
                *pos_def = 1;  // empty krylov subspace so far, so H is positive definite there for sure
                *interior = !equality;  // we can have interior solution if we are not asked for equality solution
                *warm_leftmost = 0;  // coldstart, so no warmstart information on leftmost available
                *nirblk = 1;  // at start, there is one irreducible block
                irblk[0] = 0; // start pointer to first irreducible block
                *warm_lam0 = 0;  // coldstart, so no warmstart information on multiplier available
                *warm_lam = 0;  // coldstart, so no warmstart information on multiplier available
                *lanczos_switch = -1; // indicate that no lanczos switch occurred
                *exit_tri = 0;  // set return code from #trlib_tri_factor_min to 0 just to be on the safe side
                *sub_fail_tri = 0;  // set sub_fail from #trlib_tri_factor_min to 0 just to be on the safe side
                *iter_tri = 0;  // set newton iter from #trlib_tri_factor_min to 0 just to be on the safe side
                *iter_last_head = 0;  // indicate that iteration headline should be printed in first iteration
                *type_last_head = 0;  // just a safety initialization for last iteration headline type
                memset(gamma, 0, itmax*sizeof(trlib_flt_t)); // initialize gamma to zero for safety reasons upon hotstart

                // ask the user to initialize the vectors he manages, set internal state to resume with vector initialization
                *ityp = TRLIB_CLT_CG; *status = TRLIB_CLS_VEC_INIT; *action = TRLIB_CLA_INIT;
                break;
            case TRLIB_CLS_VEC_INIT:
                if (v_dot_g <= 0.0 && g_dot_g > 0.0) { *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_TRIVIAL; returnvalue = TRLIB_CLR_PCINDEF; break; } // exit if M^-1 indefinite
                if( tol_rel_i >= 0.0 ) {
                    eta_i = tol_rel_i;
                }
                if( fabs(tol_rel_i +1.0) < 1e-8 ) {
                    eta_i = fmin(5e-1, sqrt(sqrt(v_dot_g)));
                }
                if( fabs(tol_rel_i +2.0) < 1e-8 ) {
                    eta_i = fmin(5e-1, sqrt(v_dot_g));
                }
                *stop_i = fmax(tol_abs_i, eta_i*sqrt(v_dot_g)); *stop_i = (*stop_i)*(*stop_i); // set interior stopping tolerance, note that this is squared as for efficiency we compare norm^2 <= tol
                if( tol_rel_b >= 0.0 ) { 
                    eta_b = tol_rel_b;
                }
                if( fabs(tol_rel_b +1.0) < 1e-8 || fabs(tol_rel_b +3.0) < 1e-8 ) {
                    eta_b = fmin(5e-1, sqrt(sqrt(v_dot_g)));
                }
                if( fabs(tol_rel_b +2.0) < 1e-8 || fabs(tol_rel_b +4.0) < 1e-8 ) {
                    eta_b = fmin(5e-1, sqrt(v_dot_g));
                }
                if( tol_rel_b < -2.5 ) {
                    eta_b = fmax(1e-6, eta_b);
                }
                *stop_b = fmax(tol_abs_b, eta_b*sqrt(v_dot_g)); // set boundary stopping tolerance, here no square as we directly compare norm <= tol
                *v_g = v_dot_g; // store (v, g)
                *p_Hp = p_dot_Hp; // store (p, Hp)
                neglin[0] = - sqrt(v_dot_g); // set neglin = - gamma_0 e_1 
                *cgl = 1.0; *cglm = 1.0; // ratio between CG and Lanczos vectors is 1 in this and previous iteration
                *sigma = 1.0; // sigma_0 = 1
                *leftmost = 0.0; *lam = 0.0; // assume interior solution
                *obj = 0.0; *s_Mp = 0.0; *p_Mp = 0.0; *s_Ms = 0.0; // safe initialization for scalar values
                *p_Mp = *v_g; // (p0, M p0) = (-v0, -M v0) = (v0, M M^-1 g0); (s, Mp) = (s, Ms) = 0 already properly initialized
                *raymax = (*p_Hp)/(*p_Mp); *raymin = *raymax;
                delta[0] = 0.0; // incremental updates in delta, have to initialize it
                *ityp = TRLIB_CLT_CG; *status = TRLIB_CLS_CG_NEW_ITER; *action = TRLIB_CLA_TRIVIAL; // continue with CG iteration
                break;
            case TRLIB_CLS_CG_NEW_ITER:
                if (fabs(*p_Hp) <= zero) { *action = TRLIB_CLA_TRIVIAL; *status = TRLIB_CLS_LANCZOS_SWT; break; } // (p, Hp) ~ 0 ---> CG breaks down, continue Lanczos
                alpha[*ii] = (*v_g)/(*p_Hp);
                /* update Lanczos tridiagonal
                   diag(i)    = 1/alpha(i) + beta(i-1)/alpha(i-1)
                   offdiag(i) = sqrt( beta(i-1)/abs(alpha(i-1) )
                     terms with index i-1 have been computed in previous iteration, just add 1/alpha(i) to diag(i) */
                delta[*ii] += (*p_Hp)/(*v_g); // delta(i) += (p,Hp)/(v,g)
                // update if hessian possitive definite in current krylov subspace
                *pos_def = *pos_def && (alpha[*ii] > 0.0);

                // update quantities needed to computed || s_trial ||_M and ratio between Lanczos vector q and pCG vector v
                if (*ii > 0) { *sigma = - copysign( 1.0, alpha[*ii-1] ) * (*sigma); }
                *cglm = *cgl; *cgl = *sigma/sqrt(*v_g);
                if (*ii>0) {
                    *s_Mp = beta[*ii-1]*(*s_Mp + alpha[*ii-1]*(*p_Mp));
                    *p_Mp = *v_g + beta[*ii-1]*beta[*ii-1]*(*p_Mp);
                }
                sp_Msp = *s_Ms + alpha[*ii]*(2.0*(*s_Mp)+alpha[*ii]*(*p_Mp));
                *raymax = fmax(*raymax, (*p_Hp)/(*p_Mp)); *raymin = fmin(*raymin, (*p_Hp)/(*p_Mp));
                // update if we can expect interior solution
                *interior = *interior && *pos_def && (sp_Msp < radius*radius);

                // update solution candidate
                if (*interior) {
                    // update (s, Ms) and objective
                    *s_Ms = sp_Msp; *obj = *obj - .5*alpha[*ii]*alpha[*ii]*(*p_Hp);
                    // ask user to update stationary point
                    *ityp = TRLIB_CLT_CG; *status = TRLIB_CLS_CG_UPDATE_S; *flt1 = alpha[*ii]; *action = TRLIB_CLA_UPDATE_STATIO;
                }
                else {
                    /* solution candidate is on boundary
                       solve tridiagonal reduction
                       first try to update factorization if available to start tridiagonal problem warmstarted */
                    warm_fac0 = 0;
                    if (*warm_lam0) {
                        TRLIB_PRINTLN_2("Trying to update factorization")
                        // check if subminor regular, otherwise warmstart impossible
                        warm_fac0 = delta_fac0[*ii-1] != 0.0;
                        if(warm_fac0) { TRLIB_PRINTLN_2("Nonzero pivot. Continue") } else { TRLIB_PRINTLN_2("Zero pivot. Failure!") }
                        if (warm_fac0) {
                            gamma_fac0[*ii-1] = gamma[*ii-1]/delta_fac0[*ii-1];
                            delta_fac0[*ii] = delta[*ii] + *lam0 - gamma[*ii-1]*gamma[*ii-1]/delta_fac0[*ii-1];
                            // check if regularized tridiagonal is still positive definite for warmstart
                            warm_fac0 = delta_fac0[*ii] > 0.0;
                            if(warm_fac0) { TRLIB_PRINTLN_2("Factorization update successful, still pos def!") } else { TRLIB_PRINTLN_2("Factorization update failed, got negative diagonal %e!", delta_fac0[*ii]) }
                        }
                    }
                    /* call trlib_tri_factor_min to solve tridiagonal problem, store solution candidate in h
                       the criterion to specify the maximum number of iterations is weird. it should not be dependent on problem size rather than condition of the hessian... */
                    irblk[*nirblk] = *ii+1;
                    *exit_tri = trlib_tri_factor_min(
                        *nirblk, irblk, delta, gamma, neglin, radius, 100+3*(*ii),
                        TRLIB_EPS, 1e-11, *pos_def, equality,
                        warm_lam0, lam0, warm_lam, lam, warm_leftmost, ileftmost, leftmost,
                        &warm_fac0, delta_fac0, gamma_fac0, &warm_fac, delta_fac, gamma_fac,
                        h0, h, ones, fwork_tr, refine, verbose-1, unicode, " TR ", fout,
                        leftmost_timing, obj, iter_tri, sub_fail_tri);

                    // check for failure, beware: newton break is ok as this means most likely convergence
                    // exit with error and ask the user to get (potentially invalid) solution candidate by backtransformation
                    if (*exit_tri < 0 && *exit_tri != TRLIB_TTR_NEWTON_BREAK) {
                        *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_RETRANSF; returnvalue = TRLIB_CLR_FAIL_TTR; break;
                    }
                    // also in positive definite case with interior solution
                    if (*exit_tri == TRLIB_TTR_CONV_INTERIOR && *pos_def) {
                        *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_RETRANSF; returnvalue = TRLIB_CLR_UNEXPECT_INT; break;
                    }

                    // request gradient update from user, skip directly to state TRLIB_CLS_CG_UPDATE_S that does this
                    *ityp = TRLIB_CLT_CG; *status = TRLIB_CLS_CG_UPDATE_S; *action = TRLIB_CLA_TRIVIAL;
                }
                break;
            case TRLIB_CLS_CG_UPDATE_S:
                // request gradient update from user
                *ityp = TRLIB_CLT_CG; *status = TRLIB_CLS_CG_UPDATE_GV; *flt1 = alpha[*ii]; *flt2= *cgl; *action = TRLIB_CLA_UPDATE_GRAD;
                break;
            case TRLIB_CLS_CG_UPDATE_GV:
                // if g == 0: Krylov breakdown or convergence
                // if g != 0 and (v,g) <= 0 ---> preconditioner indefinite
                if(isnan(v_dot_g)) { if (*interior) {*action = TRLIB_CLA_TRIVIAL;} else {*ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_RETRANSF;} returnvalue = TRLIB_CLR_FAIL_NUMERIC; break; } // exit if M^-1 indefinite
                if(g_dot_g > 0.0 && v_dot_g <= 0.0) { if (*interior) {*action = TRLIB_CLA_TRIVIAL;} else {*ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_RETRANSF;} returnvalue = TRLIB_CLR_PCINDEF; break; } // exit if M^-1 indefinite
                if (g_dot_g <= zero) { // Krylov iteration breaks down
                    if ( ctl_invariant <= TRLIB_CLC_NO_EXP_INV ) {
                        if ( *interior ) { *action = TRLIB_CLA_TRIVIAL; } else { *action = TRLIB_CLA_RETRANSF; }
                        *ityp = TRLIB_CLT_CG; returnvalue = TRLIB_CLR_FAIL_HARD; gamma[*ii] = 0.0; break;
                    }
                    else { 
                        /* decide if a new invariant Krylov subspace should be investigated
                           therefore compute actual gradient at current point and test for convergence */
                        if(*interior) { *action = TRLIB_CLA_TRIVIAL; } else { *action = TRLIB_CLA_RETRANSF; }
                        *flt1 = *lam; *ityp = TRLIB_CLT_CG; *status = TRLIB_CLS_CG_IF_IRBLK_P; returnvalue = TRLIB_CLR_CONTINUE; break;
                    }
                }

                beta[*ii] = v_dot_g/(*v_g);
                /* prepare the next Lanczos tridiagonal matrix as far as possible
                   the diagonal term is given by delta(i+1) = 1/alpha(i+1) + beta(i)/alpha(i)
                   here we can compute already beta(i)/alpha(i) = (v+, g+)/(v, g) / (v, g)/(p, Hp)
                   and the complete offdiagonal term gamma(i+1) = sqrt(beta(i))/abs(alpha(i)) */
                delta[*ii+1] = (v_dot_g*(*p_Hp))/((*v_g)*(*v_g));
                gamma[*ii] = fabs( sqrt(v_dot_g)*(*p_Hp)/(sqrt(*v_g)*(*v_g)) );
                *v_g = v_dot_g; // update (v,g)

                // print iteration details
                // first print headline if necessary
                if (((*ii)-(*iter_last_head)) % 20 == 0 || (*interior && *type_last_head != TRLIB_CLT_CG_INT) || (!(*interior) && *type_last_head != TRLIB_CLT_CG_BOUND)) {
                    if(*interior) {
                        if (unicode) { TRLIB_PRINTLN_1("%6s%6s%6s%14s%14s%14s%14s%14s%14s%14s%14s", " iter ", "inewton", " type ", "   objective  ", "   \u2016g\u208a\u2016_M\u207b\u00b9   ", "   leftmost   ", "      \u03bb       ", "      \u03b3       ", "      \u03b4       ", "      \u03b1       ", "      \u03b2       ") }
                        else { TRLIB_PRINTLN_1("%6s%6s%6s%14s%14s%14s%14s%14s%14s%14s%14s", " iter ", "inewton", " type ", "   objective  ", "  ||g+||_M^-1 ", "   leftmost   ", "     lam      ", "    gamma     ", "    delta     ", "    alpha     ", "     beta     ") }
                        *type_last_head = TRLIB_CLT_CG_INT;
                    }
                    else {
                        if (unicode) { TRLIB_PRINTLN_2("%s","") TRLIB_PRINTLN_1("%6s%6s%6s%14s%14s%14s%14s%14s%14s%14s%14s", " iter ", "inewton", " type ", "   objective  ", "   \u03b3\u1d62\u208a\u2081|h\u1d62|   ", "   leftmost   ", "      \u03bb       ", "      \u03b3       ", "      \u03b4       ", "      \u03b1       ", "      \u03b2       ") }
                        else { TRLIB_PRINTLN_2("%s","") TRLIB_PRINTLN_1("%6s%6s%6s%14s%14s%14s%14s%14s%14s%14s%14s", " iter ", "inewton", " type ", "   objective  ", "gam(i+1)|h(i)|", "   leftmost   ", "     lam      ", "    gamma     ", "    delta     ", "    alpha     ", "     beta     ") }
                        *type_last_head = TRLIB_CLT_CG_BOUND;
                    }
                    *iter_last_head = *ii;
                }
                if (*interior) {
                    TRLIB_PRINTLN_1("%6ld%6ld%6s%14e%14e%14e%14e%14e%14e%14e%14e", *ii, *iter_tri, "cg_i", *obj, sqrt(*v_g), *leftmost, *lam, *ii == 0 ? -neglin[0] : gamma[*ii-1], delta[*ii], alpha[*ii], beta[*ii])
                }
                else {
                    TRLIB_PRINTLN_2("%s","") TRLIB_PRINTLN_1("%6ld%6ld%6s%14e%14e%14e%14e%14e%14e%14e%14e", *ii, *iter_tri, "cg_b", *obj, gamma[*ii]*fabs(h[*ii]), *leftmost, *lam, *ii == 0 ? -neglin[0] : gamma[*ii-1], delta[*ii], alpha[*ii], beta[*ii]) TRLIB_PRINTLN_2("%s", "")
                }

                // test for convergence
                // note convergence criterion
                if (*interior) { convhist[*ii] = sqrt(*v_g)/(*stop_i); } else { convhist[*ii] = (gamma[*ii]*fabs(h[*ii]))/(*stop_b); }
                // interior: ||g^+||_{M^-1} = (g+, M^-1 g+) = (g+, v+) small, boundary gamma(i+1)*|h(i)| small
                if (*interior && (*v_g <= *stop_i)) { *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_TRIVIAL; returnvalue = TRLIB_CLR_CONV_INTERIOR; break; }
                else if (!(*interior) && (gamma[*ii]*fabs(h[*ii]) <= *stop_b) ) { *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_RETRANSF; returnvalue = TRLIB_CLR_CONV_BOUND; break; }
                // test if convergence is unlikely
                if ( !(*interior) && earlyterm && *ii > 10 && convhist[*ii-10] > 1e-1 * convhist[*ii]) { 
                    trlib_int_t doit = 1;
                    for(cit = *ii-10; cit < *ii; ++cit) { if( convhist[cit+1] > convhist[cit] ) doit = 0; }
                    if(doit) {
                        TRLIB_PRINTLN_2("Early exit as boundary case for the last ten iterations without significant progress")
                        if(*interior) { *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_TRIVIAL; returnvalue = TRLIB_CLR_UNLIKE_CONV; break; }
                        else  { *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_RETRANSF; returnvalue = TRLIB_CLR_UNLIKE_CONV; break; }
                    }
                    else { // prepare next iteration
                        *ityp = TRLIB_CLT_CG; *status = TRLIB_CLS_CG_UPDATE_P; *flt1 = -1.0; *flt2 = beta[*ii]; *action = TRLIB_CLA_UPDATE_DIR; break;
                    }
                }
                // test of problem is unbounded
                else if (isnan(*obj) || *obj < obj_lo) { 
                    if(*interior) { *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_TRIVIAL; returnvalue = TRLIB_CLR_UNBDBEL; break; }
                    else  { *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_RETRANSF; returnvalue = TRLIB_CLR_UNBDBEL; break; }
                }
                // otherwise prepare next iteration
                else { *ityp = TRLIB_CLT_CG; *status = TRLIB_CLS_CG_UPDATE_P; *flt1 = -1.0; *flt2 = beta[*ii]; *action = TRLIB_CLA_UPDATE_DIR; break; }
                break;
            case TRLIB_CLS_CG_UPDATE_P:
                *p_Hp = p_dot_Hp;
                // prepare next iteration
                *ii += 1;
                // check if we have to boil out due to iteration limit exceeded
                if (*ii >= itmax) { if (*interior) {*action = TRLIB_CLA_TRIVIAL;} else {*action = TRLIB_CLA_RETRANSF;} *ityp = TRLIB_CLT_CG; returnvalue = TRLIB_CLR_ITMAX; break; }
                *ityp = TRLIB_CLT_CG; *status = TRLIB_CLS_CG_NEW_ITER; *action = TRLIB_CLA_TRIVIAL;
                break;
            case TRLIB_CLS_CG_IF_IRBLK_P:
                // compute convergence criterion
                *flt1 = *lam; *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_CONV_HARD;
                *status = TRLIB_CLS_CG_IF_IRBLK_C; returnvalue = TRLIB_CLR_CONTINUE; break;
            case TRLIB_CLS_CG_IF_IRBLK_C:
                // print iteration details
                // first print headline if necessary
                TRLIB_PRINTLN_2("%s","") TRLIB_PRINTLN_1("%6s%6s%6s%14s%14s%14s%14s", " iter ", "inewton", " type ", "   objective  ", "||g(lam)||_iM", "   leftmost   ", "     lam      ")
                TRLIB_PRINTLN_2("%s","") TRLIB_PRINTLN_1("%6ld%6ld%6s%14e%14e%14e%14e", *ii, *iter_tri, "cg_h", *obj, v_dot_g, *leftmost, *lam) TRLIB_PRINTLN_2("%s", "")
                // check for convergence
                if (ctl_invariant <= TRLIB_CLC_EXP_INV_LOC && v_dot_g <= *stop_b) { *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_TRIVIAL; returnvalue = TRLIB_CLR_APPROX_HARD; break; }
                // if no convergence or want to force to investigate all invariant subspaces, continue with next invariant Krylov subspace
                if (ctl_invariant <= TRLIB_CLC_EXP_INV_LOC) {
                    TRLIB_PRINTLN_2("No convergence within invariant subspace. Investigate next invariant subspace") TRLIB_PRINTLN_2("%s","")
                }
                else {
                    TRLIB_PRINTLN_2("Investigate next invariant subspace") TRLIB_PRINTLN_2("%s","")
                } 
                *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_NEW_KRYLOV;
                *status = TRLIB_CLS_CG_IF_IRBLK_N; returnvalue = TRLIB_CLR_CONTINUE; break;
            case TRLIB_CLS_CG_IF_IRBLK_N:
                irblk[*nirblk] = *ii+1;
                (*nirblk)++;
                gamma[*ii] = sqrt(v_dot_g); // do not misinterpret this is as value of tridiagonal matrix, there it is 0
                *lanczos_switch = *ii;
                *ii += 1;
                *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_TRIVIAL; *status = TRLIB_CLS_L_UPDATE_P;
                returnvalue = TRLIB_CLR_CONTINUE; break;
            case TRLIB_CLS_HOTSTART:
                /* reentry with smaller trust region radius
                   we implement hotstart by not making use of the CG basis but rather the Lanczos basis
                   as this covers both cases: the interior and the boundary cases
                   the additional cost by doing this is negligible since we most likely will just do one iteration */
                // solve the corresponding tridiagonal problem, check for convergence and otherwise continue to iterate
                irblk[*nirblk] = *ii+1;
                *exit_tri = trlib_tri_factor_min(
                    *nirblk, irblk, delta, gamma, neglin, radius, 100+3*(*ii),
                    TRLIB_EPS, 1e-11, *pos_def, equality,
                    warm_lam0, lam0, warm_lam, lam, warm_leftmost, ileftmost, leftmost,
                    &warm_fac0, delta_fac0, gamma_fac0, &warm_fac, delta_fac, gamma_fac,
                    h0, h, ones, fwork_tr, refine, verbose-1, unicode, " TR ", fout,
                    leftmost_timing, obj, iter_tri, sub_fail_tri);

                /* check for failure, beware: newton break is ok as this means most likely convergence
                   exit with error and ask the user to get (potentially invalid) solution candidate by backtransformation */
                if (*exit_tri < 0 && *exit_tri != TRLIB_TTR_NEWTON_BREAK) {
                    // print some information
                    if (unicode) { TRLIB_PRINTLN_2("%s","") TRLIB_PRINTLN_1("%6s%6s%6s%14s%14s%14s%14s%14s%14s", " iter ", "inewton", " type ", "   objective  ", "   \u03b3\u1d62\u208a\u2081|h\u1d62|   ", "   leftmost   ", "      \u03bb       ", "      \u03b3       ", "      \u03b4       ") }
                    else { TRLIB_PRINTLN_2("%s","") TRLIB_PRINTLN_1("%6s%6s%6s%14s%14s%14s%14s%14s%14s", " iter ", "inewton", " type ", "   objective  ", "gam(i+1)|h(i)|", "   leftmost   ", "     lam      ", "    gamma     ", "    delta     ") }
                    *type_last_head = TRLIB_CLT_HOTSTART;
                    *iter_last_head = *ii;
                    TRLIB_PRINTLN_1("%6ld%6ld%6s%14e%14e%14e%14e%14e%14e", *ii, *iter_tri, "hnbk", *obj, gamma[*ii]*fabs(h[*ii]), *leftmost, *lam, *ii == 0 ? neglin[0] : gamma[*ii-1], delta[*ii]) TRLIB_PRINTLN_2("%s", "")

                    *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_RETRANSF; returnvalue = TRLIB_CLR_FAIL_TTR; break;
                }

                ///* if tridiagonal problem cannot find suitable initial lambda it is most likely best to stop at this point
                // * since this means that there is severe ill-conditioning and the user should better present a
                // * better problem formulation. Continuing means most likely computing on garbage */
                if (*exit_tri == TRLIB_TTR_HARD_INIT_LAM) {
                    // print some information
                    if (unicode) { TRLIB_PRINTLN_2("%s","") TRLIB_PRINTLN_1("%6s%6s%6s%14s%14s%14s%14s%14s%14s", " iter ", "inewton", " type ", "   objective  ", "   \u03b3\u1d62\u208a\u2081|h\u1d62|   ", "   leftmost   ", "      \u03bb       ", "      \u03b3       ", "      \u03b4       ") }
                    else { TRLIB_PRINTLN_2("%s","") TRLIB_PRINTLN_1("%6s%6s%6s%14s%14s%14s%14s%14s%14s", " iter ", "inewton", " type ", "   objective  ", "gam(i+1)|h(i)|", "   leftmost   ", "     lam      ", "    gamma     ", "    delta     ") }
                    *type_last_head = TRLIB_CLT_HOTSTART;
                    *iter_last_head = *ii;
                    TRLIB_PRINTLN_1("%6ld%6ld%6s%14e%14e%14e%14e%14e%14e", *ii, *iter_tri, "hlfl", *obj, gamma[*ii]*fabs(h[*ii]), *leftmost, *lam, *ii == 0 ? neglin[0] : gamma[*ii-1], delta[*ii]) TRLIB_PRINTLN_2("%s", "")

                    *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_RETRANSF; returnvalue = TRLIB_CLR_HARD_INIT_LAM; break;
                }

                // print some information
                if (unicode) { TRLIB_PRINTLN_2("%s","") TRLIB_PRINTLN_1("%6s%6s%6s%14s%14s%14s%14s%14s%14s", " iter ", "inewton", " type ", "   objective  ", "   \u03b3\u1d62\u208a\u2081|h\u1d62|   ", "   leftmost   ", "      \u03bb       ", "      \u03b3       ", "      \u03b4       ") }
                else { TRLIB_PRINTLN_2("%s","") TRLIB_PRINTLN_1("%6s%6s%6s%14s%14s%14s%14s%14s%14s", " iter ", "inewton", " type ", "   objective  ", "gam(i+1)|h(i)|", "   leftmost   ", "     lam      ", "    gamma     ", "    delta     ") }
                *type_last_head = TRLIB_CLT_HOTSTART;
                *iter_last_head = *ii;

                TRLIB_PRINTLN_1("%6ld%6ld%6s%14e%14e%14e%14e%14e%14e", *ii, *iter_tri, " hot", *obj, gamma[*ii]*fabs(h[*ii]), *leftmost, *lam, *ii == 0 ? neglin[0] : gamma[*ii-1], delta[*ii]) TRLIB_PRINTLN_2("%s", "")

                if ( earlyterm ) { 
                    TRLIB_PRINTLN_2("Early exit as hotstart with early termination on")
                    *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_RETRANSF; returnvalue = TRLIB_CLR_UNLIKE_CONV; break;
                }
                
                // test for convergence
                if ( (*exit_tri != TRLIB_TTR_CONV_INTERIOR) && gamma[*ii]*fabs(h[*ii]) <= *stop_b) { *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_RETRANSF; returnvalue = *exit_tri; break; }
                else if ( (*exit_tri == TRLIB_TTR_CONV_INTERIOR) && gamma[*ii]*fabs(h[*ii]) <= sqrt(*stop_i)) { *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_RETRANSF; returnvalue = *exit_tri; break; }
                else if (isnan(*obj) || *obj < obj_lo) { *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_RETRANSF; returnvalue = TRLIB_CLR_UNBDBEL; break; }
                else {
                    // prepare next iteration
                    if (lanczos_switch < 0) { returnvalue = TRLIB_CLR_CONTINUE; *ityp = TRLIB_CLT_CG; *status = TRLIB_CLS_CG_UPDATE_P; *flt1 = -1.0; *flt2 = beta[*ii]; *action = TRLIB_CLA_UPDATE_DIR; break; }
                    else { returnvalue = TRLIB_CLR_CONTINUE; *ityp = TRLIB_CLT_L; *action = TRLIB_CLA_TRIVIAL; *status = TRLIB_CLS_L_NEW_ITER; break; }
                }
                break;
            case TRLIB_CLS_HOTSTART_P:
                /* reentry to compute minimizer of problem with convexified hessian */

                // get regularization for diagonal in diag_fac (use this as a temporary)
                trlib_tri_factor_regularize_posdef(irblk[1], delta, gamma, 1e-12, 10.0, regdiag);

                // add regularization to diagonal
                TRLIB_DAXPY(irblk+1, &one, regdiag, &inc, delta, &inc) // delta <- delta + regdiag

                // as we may just consider a submatrix of T, ensure that h is set to 0
                memset(h, 0.0, (*ii+1)*sizeof(trlib_flt_t));

                *warm_lam0 = 0; *warm_lam = 0; *warm_leftmost = 0; warm_fac0 = 0; warm_fac = 0;
                irblk[1] = *ii+1;
                *exit_tri = trlib_tri_factor_min(
                    1, irblk, delta, gamma, neglin, radius, 100+3*(*ii),
                    TRLIB_EPS, 1e-11, *pos_def, equality,
                    warm_lam0, lam0, warm_lam, lam, warm_leftmost, ileftmost, leftmost,
                    &warm_fac0, delta_fac0, gamma_fac0, &warm_fac, delta_fac, gamma_fac,
                    h0, h, ones, fwork_tr, refine, verbose-1, unicode, " TR ", fout,
                    leftmost_timing, obj, iter_tri, sub_fail_tri);

                // restore diagonal
                TRLIB_DAXPY(irblk+1, &minus, regdiag, &inc, delta, &inc) // delta <- delta - regdiag

                *action = TRLIB_CLA_RETRANSF; returnvalue = *exit_tri; break;
            case TRLIB_CLS_HOTSTART_R:
                /* reentry to compute unconstrained minimizer of problem with regularized hessian */
                trlib_tri_factor_regularized_umin(irblk[1], delta, gamma, neglin, radius, h, ones, fwork_tr, refine,
                        verbose-1, unicode, " TR ", fout, leftmost_timing, obj, sub_fail_tri);
                *action = TRLIB_CLA_TRIVIAL; returnvalue = TRLIB_CLR_CONV_INTERIOR; break;
            case TRLIB_CLS_HOTSTART_T:
                /* reentry to compute regularization parameter for TRACE */
                *flt1 = radius;
                trlib_tri_factor_get_regularization(irblk[1], delta, gamma, neglin, flt1,
                        .5*(tol_rel_i + tol_rel_b), tol_rel_i, tol_rel_b, h, ones, fwork_tr, refine,
                        verbose-1, unicode, " TR ", fout, leftmost_timing, obj, sub_fail_tri);
                *action = TRLIB_CLA_TRIVIAL; returnvalue = TRLIB_CLR_CONV_INTERIOR; break;
            case TRLIB_CLS_HOTSTART_G:
                /* reentry with new gradient trust region radius
                   we implement hotstart by not making use of the CG basis but rather the Lanczos basis
                   as this covers both cases: the interior and the boundary cases
                   the additional cost by doing this is negligible since we most likely will just do one iteration */
                // solve the corresponding tridiagonal problem, check for convergence and otherwise continue to iterate
                irblk[*nirblk] = *ii+1;
                *exit_tri = trlib_tri_factor_min(
                    *nirblk, irblk, delta, gamma, neglin, radius, 100+3*(*ii),
                    TRLIB_EPS, 1e-11, *pos_def, equality,
                    warm_lam0, lam0, warm_lam, lam, warm_leftmost, ileftmost, leftmost,
                    &warm_fac0, delta_fac0, gamma_fac0, &warm_fac, delta_fac, gamma_fac,
                    h0, h, ones, fwork_tr, refine, verbose-1, unicode, " TR ", fout,
                    leftmost_timing, obj, iter_tri, sub_fail_tri);

                /* check for failure, beware: newton break is ok as this means most likely convergence
                   exit with error and ask the user to get (potentially invalid) solution candidate by backtransformation */
                if (*exit_tri < 0 && *exit_tri != TRLIB_TTR_NEWTON_BREAK) {
                    *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_RETRANSF; returnvalue = TRLIB_CLR_FAIL_TTR; break;
                }

                ///* if tridiagonal problem cannot find suitable initial lambda it is most likely best to stop at this point
                // * since this means that there is severe ill-conditioning and the user should better present a
                // * better problem formulation. Continuing means most likely computing on garbage */
                if (*exit_tri == TRLIB_TTR_HARD_INIT_LAM) {
                    *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_RETRANSF; returnvalue = TRLIB_CLR_HARD_INIT_LAM; break;
                }

                // print some information
                if (unicode) { TRLIB_PRINTLN_2("%s","") TRLIB_PRINTLN_1("%6s%6s%6s%14s%14s%14s%14s%14s%14s", " iter ", "inewton", " type ", "   objective  ", "   \u03b3\u1d62\u208a\u2081|h\u1d62|   ", "   leftmost   ", "      \u03bb       ", "      \u03b3       ", "      \u03b4       ") }
                else { TRLIB_PRINTLN_2("%s","") TRLIB_PRINTLN_1("%6s%6s%6s%14s%14s%14s%14s%14s%14s", " iter ", "inewton", " type ", "   objective  ", "gam(i+1)|h(i)|", "   leftmost   ", "     lam      ", "    gamma     ", "    delta     ") }
                *type_last_head = TRLIB_CLT_HOTSTART;
                *iter_last_head = *ii;

                TRLIB_PRINTLN_1("%6ld%6ld%6s%14e%14e%14e%14e%14e%14e", *ii, *iter_tri, " hot_g", *obj, 0.0, *leftmost, *lam, *ii == 0 ? neglin[0] : gamma[*ii-1], delta[*ii]) TRLIB_PRINTLN_2("%s", "")
                
                // return without convergence check as indicated in API doc
                *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_RETRANSF; returnvalue = *exit_tri; break;
            case TRLIB_CLS_LANCZOS_SWT:
                /* switch from CG to Lanczos. perform the first iteration by hand, after the that the coefficients match
                   so far pCG has been used, which means there is some g^{CG}(ii), v^{CG}(ii), p^{CG}(ii) and H p^{CG}(ii)
                   furthermore gamma(ii) is correct

                   what we need now is p^L(ii) ~ v^{CG}(ii), g^{L}(ii) ~ g^{CG}(ii) and H p^L(ii) (new)

                   set p^L := sigma/sqrt( (v^CG, g^CG) ); Hp := Hp^L and compute (p^L, Hp^L) */
                *lanczos_switch = *ii;
                if ( *ii > 0 ) { *sigma = - copysign( 1.0, alpha[*ii-1] ) * (*sigma); }
                *ityp = TRLIB_CLT_L; *status = TRLIB_CLS_L_UPDATE_P; *flt1 = (*sigma)/sqrt(*v_g); *flt2 = 0.0; *action = TRLIB_CLA_UPDATE_DIR;
                break;
            case TRLIB_CLS_L_UPDATE_P:
                if ( fabs(p_dot_Hp) <= zero ) { // Krylov iteration breaks down
                    // FIXME: continue with next invariant Krylov subspace
                    if ( ctl_invariant <= TRLIB_CLC_EXP_INV_GLO ) {
                        *ityp = TRLIB_CLT_CG; *action = TRLIB_CLA_RETRANSF;
                        returnvalue = TRLIB_CLR_FAIL_HARD; break;
                    }
                }
                delta[*ii] = p_dot_Hp;
                *raymax = fmax(*raymax, p_dot_Hp); *raymin = fmin(*raymin, p_dot_Hp);
                /* solve tridiagonal reduction
                   first try to update factorization if available to start tridiagonal problem warmstarted */
                if(*nirblk == 1) {
                    warm_fac0 = 0;
                    if (*warm_lam0) {
                        // check if subminor regular, otherwise warmstart impossible
                        warm_fac0 = delta_fac0[*ii-1] != 0.0;
                        if (warm_fac0) {
                            gamma_fac0[*ii-1] = gamma[*ii-1]/delta_fac0[*ii-1];
                            delta_fac0[*ii] = delta[*ii] + *lam0 - gamma[*ii-1]*gamma[*ii-1]/delta_fac0[*ii-1];
                            // check if regularized tridiagonal is still positive definite for warmstart
                            warm_fac0 = delta_fac0[*ii] > 0.0;
                        }
                    }
                }
                else {
                    // FIXME: implement proper warmstart
                    *warm_lam0 = 0; *warm_lam = 0; *warm_leftmost = 0;
                }
                /* call factor_min to solve tridiagonal problem, store solution candidate in h
                   the criterion to specify the maximum number of iterations is weird. it should not be dependent on problem size rather than condition of the hessian... */
                irblk[*nirblk] = *ii+1;
                *exit_tri = trlib_tri_factor_min(
                    *nirblk, irblk, delta, gamma, neglin, radius, 100+3*(*ii),
                    TRLIB_EPS, 1e-11, *pos_def, equality,
                    warm_lam0, lam0, warm_lam, lam, warm_leftmost, ileftmost, leftmost,
                    &warm_fac0, delta_fac0, gamma_fac0, &warm_fac, delta_fac, gamma_fac,
                    h0, h, ones, fwork_tr, refine, verbose-1, unicode, " TR ", fout,
                    leftmost_timing, obj, iter_tri, sub_fail_tri);

                /* check for failure, beware: newton break is ok as this means most likely convergence
                   exit with error and ask the user to get (potentially invalid) solution candidate by backtransformation */
                if (*exit_tri < 0 && *exit_tri != TRLIB_TTR_NEWTON_BREAK) {
                    *ityp = TRLIB_CLT_L; *action = TRLIB_CLA_RETRANSF; returnvalue = TRLIB_CLR_FAIL_TTR; break;
                }

                ///* if tridiagonal problem cannot find suitable initial lambda it is most likely best to stop at this point
                // * since this means that there is severe ill-conditioning and the user should better present a
                // * better problem formulation. Continuing means most likely computing on garbage.
                // * Ill-conditioning is likely since we already are in Lanczos mode. */
                if (*exit_tri == TRLIB_TTR_HARD_INIT_LAM) {
                    *ityp = TRLIB_CLT_LANCZOS; *action = TRLIB_CLA_RETRANSF; returnvalue = TRLIB_CLR_HARD_INIT_LAM; break;
                }

                /* convergence check is logical at this position, *but* requires gamma(ii+1).
                   wait until gradient has been updated */
                // compute g^L(ii+1)
                if(*ii < 2) { 
                    *flt1 = -delta[*ii]/gamma[*ii-1]; *flt2 = -gamma[*ii-1]; *flt3 = 1.0;
                }
                else {
                    *flt1 = -delta[*ii]/gamma[*ii-1]; *flt2 = -gamma[*ii-1]/gamma[*ii-2]; *flt3 = 1.0;
                }
                // in the case that we just switched to Lanczos, we have to use different coefficients
                if (*ii == *lanczos_switch && *nirblk == 1) {
                    *flt1 = -delta[*ii]/sqrt(*v_g)*(*sigma); *flt2 = -gamma[*ii-1]*(*cgl); *flt3 = gamma[*ii-1]/sqrt(*v_g);
                    *cgl = 1.0; *cglm = 1.0;
                }
                // as well in the case that we have a new irreducible block
                if (*ii == irblk[*nirblk-1]) {
                    *flt1 = -delta[*ii]/gamma[*ii-1]; *flt2 = 0; *flt3 = 1.0;
                }
                *ityp = TRLIB_CLT_L;  *action = TRLIB_CLA_UPDATE_GRAD; *status = TRLIB_CLS_L_CMP_CONV;
                break;
            case TRLIB_CLS_L_CMP_CONV:
                if(isnan(v_dot_g)) { if (*interior) {*action = TRLIB_CLA_TRIVIAL;} else {*ityp = TRLIB_CLT_L; *action = TRLIB_CLA_RETRANSF;} returnvalue = TRLIB_CLR_FAIL_NUMERIC; break; } // exit if M^-1 indefinite
                if (v_dot_g <= 0.0 && g_dot_g > 0.0) { if (*interior) {*action = TRLIB_CLA_TRIVIAL;} else {*ityp = TRLIB_CLT_L; *action = TRLIB_CLA_RETRANSF;} returnvalue = TRLIB_CLR_PCINDEF; break; } // exit if M^-1 indefinite
                // FIXME: implement adding further invariant subspaces
                if (g_dot_g <= zero) { if(*interior) {*action = TRLIB_CLA_TRIVIAL; } else {*ityp = TRLIB_CLT_L; *action = TRLIB_CLA_RETRANSF;} returnvalue = TRLIB_CLR_APPROX_HARD; }
                gamma[*ii] = sqrt(v_dot_g);
                // convergence check after new gradient has been computed
                // first get norm of new gradient
                if (*nirblk == 1) {
                    // compute convergence indicator, store it in *v_g
                    *v_g = v_dot_g * h[*ii]*h[*ii];
                    *ityp = TRLIB_CLT_L; *action = TRLIB_CLA_TRIVIAL; *status = TRLIB_CLS_L_CHK_CONV;
                }
                else { *ityp = TRLIB_CLT_L; *action = TRLIB_CLA_RETRANSF; *status = TRLIB_CLS_L_CMP_CONV_RT; }
                break;
            case TRLIB_CLS_L_CMP_CONV_RT:
                *flt1 = *lam; *ityp = TRLIB_CLT_L; *action = TRLIB_CLA_CONV_HARD; *status = TRLIB_CLS_L_CHK_CONV;
                break;
            case TRLIB_CLS_L_CHK_CONV:
                // get convergence indicator in *v_g
                if (*nirblk > 1 ) { *v_g = v_dot_g; }
                // print some information
                // first print headline if necessary
                if (((*ii)-(*iter_last_head)) % 20 == 0 || *type_last_head != TRLIB_CLT_LANCZOS) {
                    if (unicode) { TRLIB_PRINTLN_2("%s","") TRLIB_PRINTLN_1("%6s%6s%6s%14s%14s%14s%14s%14s%14s", " iter ", "inewton", " type ", "   objective  ", "   \u03b3\u1d62\u208a\u2081|h\u1d62|   ", "   leftmost   ", "      \u03bb       ", "      \u03b3       ", "      \u03b4       ") }
                    else { TRLIB_PRINTLN_2("%s","") TRLIB_PRINTLN_1("%6s%6s%6s%14s%14s%14s%14s%14s%14s", " iter ", "inewton", " type ", "   objective  ", "gam(i+1)|h(i)|", "   leftmost   ", "     lam      ", "    gamma     ", "    delta     ") }
                    *type_last_head = TRLIB_CLT_LANCZOS;
                    *iter_last_head = *ii;
                }
                TRLIB_PRINTLN_2("%s","") TRLIB_PRINTLN_1("%6ld%6ld%6s%14e%14e%14e%14e%14e%14e", *ii, *iter_tri, " lcz", *obj, sqrt(*v_g), *leftmost, *lam, *ii == 0 ? neglin[0] : gamma[*ii-1], delta[*ii]) TRLIB_PRINTLN_2("%s", "")

                // test for convergence
                // note convergence criterion
                if (*interior) { convhist[*ii] = sqrt(*v_g)/sqrt(*stop_i); } else { convhist[*ii] = (*v_g)/sqrt(*stop_b); }
                if ( ctl_invariant <= TRLIB_CLC_EXP_INV_LOC && (*exit_tri != TRLIB_TTR_CONV_INTERIOR) && *v_g <= sqrt(*stop_b)) { *ityp = TRLIB_CLT_L; *action = TRLIB_CLA_RETRANSF; returnvalue = *exit_tri; break; }
                else if ( ctl_invariant <= TRLIB_CLC_EXP_INV_LOC && (*exit_tri == TRLIB_TTR_CONV_INTERIOR) && *v_g <= sqrt(*stop_i)) { *ityp = TRLIB_CLT_L; *action = TRLIB_CLA_RETRANSF; returnvalue = *exit_tri; break; }
                // test if convergence is unlikely
                if ( !(*interior) && earlyterm && *ii > 10 && convhist[*ii-10] > 1e-1 * convhist[*ii]) { 
                    trlib_int_t doit = 1;
                    for(cit = *ii-10; cit < *ii; ++cit) { if( convhist[cit+1] > convhist[cit] ) doit = 0; }
                    if(doit) {
                        TRLIB_PRINTLN_2("Early exit as boundary case for the last ten iterations without significant progress")
                        *ityp = TRLIB_CLT_L; *action = TRLIB_CLA_RETRANSF; returnvalue = TRLIB_CLR_UNLIKE_CONV; break;
                    }
                    else { // prepare next iteration
                        *ityp = TRLIB_CLT_L; *action = TRLIB_CLA_TRIVIAL; *status = TRLIB_CLS_L_NEW_ITER; break;
                    }
                }
                // test of problem is unbounded
                else if (isnan(*obj) || *obj < obj_lo) { *ityp = TRLIB_CLT_L; *action = TRLIB_CLA_RETRANSF; returnvalue = TRLIB_CLR_UNBDBEL; break; }
                else {
                    // prepare next iteration
                    *ityp = TRLIB_CLT_L; *action = TRLIB_CLA_TRIVIAL; *status = TRLIB_CLS_L_NEW_ITER; break;
                }
                break;
            case TRLIB_CLS_L_NEW_ITER:
                // prepare next iteration
                *ii += 1;
                // check if we have to boil out due to iteration limit exceeded
                if (*ii >= itmax || (*ii - *lanczos_switch >= itmax_lanczos)) { *action = TRLIB_CLA_RETRANSF; *ityp = TRLIB_CLT_L; returnvalue = TRLIB_CLR_ITMAX; break; }
                *iter = *ii; *ityp = TRLIB_CLT_L; *flt1 = 1.0/gamma[*ii-1]; *flt2= 0.0; *action = TRLIB_CLA_UPDATE_DIR; *status = TRLIB_CLS_L_UPDATE_P;
                break;
            default: *action = TRLIB_CLA_TRIVIAL;
        }
        if (action != TRLIB_CLA_TRIVIAL || returnvalue <= 0) { break; }
    }
    TRLIB_RETURN(returnvalue)
}

trlib_int_t trlib_krylov_min(
    trlib_int_t init, trlib_flt_t radius, trlib_int_t equality, trlib_int_t itmax, trlib_int_t itmax_lanczos,
    trlib_flt_t tol_rel_i, trlib_flt_t tol_abs_i,
    trlib_flt_t tol_rel_b, trlib_flt_t tol_abs_b, trlib_flt_t zero, trlib_flt_t obj_lo,
    trlib_int_t ctl_invariant, trlib_int_t convexify, trlib_int_t earlyterm,
    trlib_flt_t g_dot_g, trlib_flt_t v_dot_g, trlib_flt_t p_dot_Hp,
    trlib_int_t *iwork, trlib_flt_t *fwork, trlib_int_t refine,
    trlib_int_t verbose, trlib_int_t unicode, char *prefix, FILE *fout, trlib_int_t *timing,
    trlib_int_t *action, trlib_int_t *iter, trlib_int_t *ityp,
    trlib_flt_t *flt1, trlib_flt_t *flt2, trlib_flt_t *flt3) {

    trlib_int_t ret = -1000;

    trlib_int_t *outerstatus = iwork+14;
    *iter = *(iwork+1);
    if (init == TRLIB_CLS_INIT || init == TRLIB_CLS_HOTSTART) { *outerstatus = 0; }

    if( *outerstatus < 100 || *outerstatus == 300 ) {
        while(1) {

            ret = trlib_krylov_min_internal(init, radius, equality, itmax, itmax_lanczos,
                    tol_rel_i, tol_abs_i, tol_rel_b, tol_abs_b, zero, obj_lo,
                    ctl_invariant, convexify, earlyterm, g_dot_g, v_dot_g, p_dot_Hp,
                    iwork, fwork, refine, verbose, unicode, prefix, fout, timing,
                    action, iter, ityp, flt1, flt2, flt3);

            if ( init > 0 || ret < 10 || *action != TRLIB_CLA_TRIVIAL ) { break; }
        }
    }

    if( ret >= 0 || ret == -1000 ) {

        if( *outerstatus < 100 && ret < 10 && *action != TRLIB_CLA_TRIVIAL ) { *outerstatus = 100 + ret; return 10; }
        if( *outerstatus >= 100 && *outerstatus < 200 ) { ret = *outerstatus - 100; *outerstatus = 0; *action = TRLIB_CLA_TRIVIAL; }

        if( ret < 10 && *outerstatus < 100 && convexify ) {
            // exit, check if we should convexify
            trlib_flt_t lam = fwork[7];
            trlib_flt_t obj = fwork[8];
            if( lam > 1e-2*fmax(1.0, fwork[13]) && fwork[14] < 0.0 && fabs(fwork[14]) < 1e-8 * fwork[13]) { // do only if it seems to make sense based on eigenvalue estimation
                // ask caller to compute objective value
                *outerstatus = 200 + ret;
                *action = TRLIB_CLA_OBJVAL;
                return 10;
            }
        }
        if( *outerstatus > 200 && *outerstatus < 300 ) {
            trlib_flt_t lam = fwork[7];
            trlib_flt_t obj = fwork[8];
            trlib_flt_t realobj = g_dot_g;
            if( fabs(obj - realobj) > fmax(1e-6, 1e-1*fabs(realobj)) || realobj > 0.0) {
                TRLIB_PRINTLN_2("leftmost: %e lam: %e raymax: %e raymin: %e\n", fwork[24+12*itmax], fwork[7], fwork[13], fwork[14])
                TRLIB_PRINTLN_2("mismatch between objective value as predicted from tridiagonal solution and actually computed: tridiag: %e, actual: %e\n", obj, realobj)
                TRLIB_PRINTLN_2("recomputing with regularized hessian\n");
                init = TRLIB_CLS_HOTSTART_P;
                ret = trlib_krylov_min_internal(init, radius, equality, itmax, itmax_lanczos,
                        tol_rel_i, tol_abs_i, tol_rel_b, tol_abs_b, zero, obj_lo,
                        ctl_invariant, convexify, earlyterm, g_dot_g, v_dot_g, p_dot_Hp,
                        iwork, fwork, refine, verbose, unicode, prefix, fout, timing,
                        action, iter, ityp, flt1, flt2, flt3);
                *outerstatus = 300;
                return ret;
            }
            else {
                ret = *outerstatus - 200;
                *outerstatus = 0;
                return ret;
            }

        }

        if( *outerstatus == 300 && ret < 10 ) { *outerstatus = 0; return ret; }

    }

    return ret;
}


trlib_int_t trlib_krylov_prepare_memory(trlib_int_t itmax, trlib_flt_t *fwork) {
    trlib_int_t jj = 0;
    for(jj = 23+11*itmax; jj<24+12*itmax; ++jj) { *(fwork+jj) = 1.0; } // everything to 1.0 in ones
    memset(fwork+17+2*itmax, 0, itmax*sizeof(trlib_flt_t)); // neglin = - gamma_0 e1, thus set neglin[1:] = 0
    return 0;
}

trlib_int_t trlib_krylov_memory_size(trlib_int_t itmax, trlib_int_t *iwork_size, trlib_int_t *fwork_size, trlib_int_t *h_pointer) {
    *iwork_size = 17+itmax;
    *fwork_size = 27+15*itmax+trlib_tri_factor_memory_size(itmax+1);
    *h_pointer = 19+4*itmax;
    return 0;
}

trlib_int_t trlib_krylov_timing_size() {
#if TRLIB_MEASURE_TIME
    return 1 + trlib_tri_timing_size();
#endif
    return 0;
}

trlib_int_t trlib_krylov_gt(trlib_int_t itmax, trlib_int_t *gt_pointer) {
    *gt_pointer = 17 + 2*itmax;
    return 0;
}