File: trlib_tri_factor.c

package info (click to toggle)
python-scipy 1.1.0-7
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 93,828 kB
  • sloc: python: 156,854; ansic: 82,925; fortran: 80,777; cpp: 7,505; makefile: 427; sh: 294
file content (633 lines) | stat: -rw-r--r-- 37,361 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
/* MIT License
 *
 * Copyright (c) 2016--2017 Felix Lenders
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 */

#include "trlib.h"
#include "trlib_private.h"

#include "_c99compat.h"

trlib_int_t trlib_tri_factor_min(
    trlib_int_t nirblk, trlib_int_t *irblk, trlib_flt_t *diag, trlib_flt_t *offdiag,
    trlib_flt_t *neglin, trlib_flt_t radius, 
    trlib_int_t itmax, trlib_flt_t tol_rel, trlib_flt_t tol_newton_tiny,
    trlib_int_t pos_def, trlib_int_t equality,
    trlib_int_t *warm0, trlib_flt_t *lam0, trlib_int_t *warm, trlib_flt_t *lam,
    trlib_int_t *warm_leftmost, trlib_int_t *ileftmost, trlib_flt_t *leftmost,
    trlib_int_t *warm_fac0, trlib_flt_t *diag_fac0, trlib_flt_t *offdiag_fac0,
    trlib_int_t *warm_fac, trlib_flt_t *diag_fac, trlib_flt_t *offdiag_fac,
    trlib_flt_t *sol0, trlib_flt_t *sol, trlib_flt_t *ones, trlib_flt_t *fwork,
    trlib_int_t refine,
    trlib_int_t verbose, trlib_int_t unicode, char *prefix, FILE *fout,
    trlib_int_t *timing, trlib_flt_t *obj, trlib_int_t *iter_newton, trlib_int_t *sub_fail) {
    // use notation of Gould paper
    // h = h(lam) denotes solution of (T+lam I) * h = -lin

    trlib_int_t *leftmost_timing = NULL;
    trlib_int_t *eigen_timing = NULL;
    // local variables
    #if TRLIB_MEASURE_TIME
        struct timespec verystart, start, end;
        TRLIB_TIC(verystart)
        leftmost_timing = timing + 1 + TRLIB_SIZE_TIMING_LINALG;
        eigen_timing = timing + 1 + TRLIB_SIZE_TIMING_LINALG + trlib_leftmost_timing_size();
    #endif
    /* this is based on Theorem 5.8 in Gould paper,
     * the data for the first block has a 0 suffix,
     * the data for the \ell block has a l suffix */
    trlib_int_t n0 = irblk[1];                               // dimension of first block
    trlib_int_t nl;                                          // dimension of block corresponding to leftmost
    trlib_int_t nm0 = irblk[1]-1;                            // length of offdiagonal of first block
    trlib_int_t info_fac = 0;                                // factorization information
    trlib_int_t ret = 0;                                     // return code
    trlib_flt_t lam_pert = 0.0;                           // perturbation of leftmost eigenvalue as starting value for lam
    trlib_flt_t norm_sol0 = 0.0;                          // norm of h_0(lam)
    trlib_int_t jj = 0;                                      // local iteration counter
    trlib_flt_t dlam     = 0.0;                           // increment in newton iteration
    trlib_int_t inc = 1;                                     // increment in vector storage
    trlib_flt_t *w = fwork;                               // auxiliary vector to be used in newton iteration
    trlib_flt_t *diag_lam = fwork+(irblk[nirblk]);        // vector that holds diag + lam, could be saved if we would implement iterative refinement ourselves
    trlib_flt_t *work = fwork+2*(irblk[nirblk]);          // workspace for iterative refinement
    trlib_flt_t ferr = 0.0;                               // forward  error bound from iterative refinement
    trlib_flt_t berr = 0.0;                               // backward error bound from iterative refinement
    trlib_flt_t pert_low, pert_up;                        // lower and upper bound on perturbation of lambda
    trlib_flt_t dot = 0.0, dot2 = 0.0;                    // save dot products
    trlib_flt_t invD_norm_w_sq = 0.0;                     // || w ||_{D^-1}^2
    *iter_newton = 0;                                // newton iteration counter

    // FIXME: ensure diverse warmstarts work as expected
    
    // initialization:
    *sub_fail = 0;

    // set sol to 0 as a safeguard
    memset(sol, 0, irblk[nirblk]*sizeof(trlib_flt_t));

    // first make sure that lam0, h_0 is accurate
    TRLIB_PRINTLN_1("Solving trust region problem, radius %e; starting on first irreducible block", radius)
    // if only blocks changed that differ from the first then there is nothing to do
    if (nirblk > 1 && *warm0) {
        TRLIB_DNRM2(norm_sol0, &n0, sol0, &inc)
        if (unicode) { TRLIB_PRINTLN_1("Solution provided via warmstart, \u03bb\u2080 = %e, \u2016h\u2080\u2016 = %e", *lam0, norm_sol0) }
        else { TRLIB_PRINTLN_1("Solution provided via warmstart, lam0 = %e, ||h0|| = %e", *lam0, norm_sol0) }
        if (norm_sol0-radius < 0.0) { 
            if (unicode) { TRLIB_PRINTLN_1("  violates \u2016h\u2080\u2016 - radius \u2265 0, but is %e, switch to coldstart", norm_sol0-radius) }
            else { TRLIB_PRINTLN_1("  violates ||h0|| - radius >= 0, but is %e, switch to coldstart", norm_sol0-radius) }
            *warm0 = 0; 
        }
    }
    if (nirblk == 1 || !*warm0) {
        // seek for lam0, h_0 with (T0+lam0*I) pos def and ||h_0(lam_0)|| = radius

        /* as a first step to initialize the newton iteration,
         *  find such a pair with the loosened requirement ||h_0(lam_0)|| >= radius */
        if(*warm0) {
            if(!*warm_fac0) {
                // factorize T + lam0 I
                TRLIB_DCOPY(&n0, diag, &inc, diag_lam, &inc) // diag_lam <-- diag
                TRLIB_DAXPY(&n0, lam0, ones, &inc, diag_lam, &inc) // diag_lam <-- lam0 + diag_lam
                TRLIB_DCOPY(&n0, diag_lam, &inc, diag_fac0, &inc) // diag_fac0 <-- diag_lam
                TRLIB_DCOPY(&nm0, offdiag, &inc, offdiag_fac0, &inc) // offdiag_fac0 <-- offdiag
                TRLIB_DPTTRF(&n0, diag_fac0, offdiag_fac0, &info_fac) // compute factorization
                if (info_fac != 0) { *warm0 = 0; } // factorization failed, switch to coldastart
                else { *warm_fac0 = 1; }
            }
            if(*warm_fac0) {
                // solve for h0(lam0) and compute norm
                TRLIB_DCOPY(&n0, neglin, &inc, sol0, &inc) // sol0 <-- neglin
                TRLIB_DPTTRS(&n0, &inc, diag_fac0, offdiag_fac0, sol0, &n0, &info_fac) // sol <-- (T+lam0 I)^-1 sol
                if(info_fac!=0) { 
                    if (unicode) { TRLIB_PRINTLN_2("Failure on computing h\u2080 upon initialization") }
                    else { TRLIB_PRINTLN_2("Failure on computing h0 upon initialization") }
                    TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE)
                }
                TRLIB_DNRM2(norm_sol0, &n0, sol0, &inc)
                if (norm_sol0 >= radius) { *warm0 = 1; } else { *warm0 = 0; }
            }
        }
        if(!*warm0) {
            *lam0 = 0.0;
            if (unicode) { TRLIB_PRINTLN_1("Coldstart. Seeking suitable initial \u03bb\u2080, starting with 0") }
            else { TRLIB_PRINTLN_1("Coldstart. Seeking suitable initial lam0, starting with 0") }
            TRLIB_DCOPY(&n0, diag, &inc, diag_fac0, &inc) // diag_fac0 <-- diag0
            TRLIB_DCOPY(&nm0, offdiag, &inc, offdiag_fac0, &inc) // offdiag_fac0 <-- offdiag0
            TRLIB_DCOPY(&n0, neglin, &inc, sol0, &inc) // sol0 <-- neglin0
            TRLIB_DPTTRF(&n0, diag_fac0, offdiag_fac0, &info_fac) // compute factorization
            if (info_fac == 0) {
                // test if lam0 = 0 is suitable
                TRLIB_DCOPY(&n0, neglin, &inc, sol0, &inc) // sol0 <-- neglin
                TRLIB_DPTTRS(&n0, &inc, diag_fac0, offdiag_fac0, sol0, &n0, &info_fac) // sol0 <-- T0^-1 sol0
                if (info_fac != 0) { TRLIB_PRINTLN_2("Failure on computing h\u2080 upon initialization") TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) }
                TRLIB_DNRM2(norm_sol0, &n0, sol0, &inc)
                if (norm_sol0<radius && equality) { info_fac = 1; } // in equality case we have to find suitable lam
            }
            if (info_fac != 0) { 
                if (unicode) { TRLIB_PRINTLN_1(" \u03bb\u2080 = 0 unsuitable \u2265 get leftmost ev of first block!") }
                else { TRLIB_PRINTLN_1(" lam0 = 0 unsuitable ==> get leftmost ev of first block!") }
                *sub_fail = trlib_leftmost_irreducible(irblk[1], diag, offdiag, *warm_leftmost, *leftmost, 1000, TRLIB_EPS_POW_75, verbose-2, unicode, " LM ", fout, leftmost_timing, leftmost, &jj); // ferr can safely be overwritten by computed leftmost for the moment as can jj with the number of rp iterations
                // if (*sub_fail != 0) { TRLIB_RETURN(TRLIB_TTR_FAIL_LM) } failure of leftmost: may lead to inefficiency, since what we are doing may be slow...
                // T - leftmost*I is singular, so do bisection to catch factorization and find suitable initial lambda
                pert_low = - TRLIB_EPS_POW_5 * fabs(*leftmost); // lower bound on allowed perturbation
                pert_up = 1.0/TRLIB_EPS; // upper bound on allowed perturbation
                jj = 0; // counter on number of tries
                lam_pert = 0.0;
                TRLIB_PRINTLN_1(" ")
                if (unicode) { TRLIB_PRINTLN_1(" perturb \u03bb\u2080 by safeguarded bisection to find suitable initial value") }
                else { TRLIB_PRINTLN_1(" perturb lam0 by safeguarded bisection to find suitable initial value") }
                while( jj < 50 ) {
                    if( jj % 20 == 0 ) {
                        if (unicode) { TRLIB_PRINTLN_2(" %2s%14s%14s%14s%3s%14s", "it", "low     ", "pert    ", "up      ", "pd", "  \u2016h\u2080\u2016 - radius") }
                        else { TRLIB_PRINTLN_2(" %2s%14s%14s%14s%3s%14s", "it", "low     ", "pert    ", "up      ", "pd", "  ||h0|| - radius") }
                    }
                    *lam0 = -(*leftmost) + lam_pert;
                    // factorize T + lam I
                    TRLIB_DCOPY(&n0, diag, &inc, diag_lam, &inc) // diag_lam <-- diag
                    TRLIB_DAXPY(&n0, lam0, ones, &inc, diag_lam, &inc) // diag_lam <-- lam + diag_lam
                    TRLIB_DCOPY(&n0, diag_lam, &inc, diag_fac0, &inc) // diag_fac <-- diag_lam
                    TRLIB_DCOPY(&nm0, offdiag, &inc, offdiag_fac0, &inc) // offdiag_fac <-- offdiag
                    TRLIB_DPTTRF(&n0, diag_fac0, offdiag_fac0, &info_fac) // compute factorization
                    if(info_fac == 0) {
                        pert_up = lam_pert; // as this ensures a factorization, it provides a upper bound
                        TRLIB_DCOPY(&n0, neglin, &inc, sol0, &inc) // sol0 <-- neglin
                        TRLIB_DPTTRS(&n0, &inc, diag_fac0, offdiag_fac0, sol0, &n0, &info_fac) // sol0 <-- T0^-1 sol0
                        if (info_fac != 0) { TRLIB_PRINTLN_2("Failure on computing h\u2080 in safeguarded initialization iteration") TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) }
                        TRLIB_DNRM2(norm_sol0, &n0, sol0, &inc)
                        TRLIB_PRINTLN_2(" %2ld%14e%14e%14e%3s%14e", jj, pert_low, lam_pert, pert_up, " +", norm_sol0 - radius)
                        if(norm_sol0 >= radius) { 
                            break;
                        }
                        else {
                            // lam_pert has to be decreased since we caught factorization but got solution that was too small
                            lam_pert = .5*(pert_low+pert_up);
                        }
                    }
                    else {
                        TRLIB_PRINTLN_2(" %2ld%14e%14e%14e%3s", jj, pert_low, lam_pert, pert_up, " -")
                        pert_low = lam_pert; // as factorization fails, it provides a upper bound
                        // now increase perturbation, either by bisection if there is a useful upper bound,
                        // otherwise by a small increment
                        if( pert_up == 1.0/TRLIB_EPS ) {
                            if( lam_pert == 0.0 ) { lam_pert = (1.0+fabs(*leftmost))*TRLIB_EPS_POW_75; } else { lam_pert = 2.0*lam_pert; }
                        }
                        else {
                            lam_pert = .5*(pert_low+pert_up);
                        }
                    }
                    ++jj;
                }
                // ensure that we get a factorization and compute solution with it
                if(info_fac != 0) {
                    lam_pert = pert_up;
                    *lam0 = -(*leftmost) + lam_pert;
                    TRLIB_DCOPY(&n0, diag, &inc, diag_lam, &inc) // diag_lam <-- diag
                    TRLIB_DAXPY(&n0, lam0, ones, &inc, diag_lam, &inc) // diag_lam <-- lam + diag_lam
                    TRLIB_DCOPY(&n0, diag_lam, &inc, diag_fac0, &inc) // diag_fac <-- diag_lam
                    TRLIB_DCOPY(&nm0, offdiag, &inc, offdiag_fac0, &inc) // offdiag_fac <-- offdiag
                    TRLIB_DPTTRF(&n0, diag_fac0, offdiag_fac0, &info_fac) // compute factorization
                    if(info_fac != 0) { TRLIB_RETURN(TRLIB_TTR_FAIL_FACTOR) }
                    TRLIB_DPTTRS(&n0, &inc, diag_fac0, offdiag_fac0, sol0, &n0, &info_fac) // sol0 <-- T0^-1 sol0
                    if (info_fac != 0) { 
                        if (unicode) { TRLIB_PRINTLN_2("Failure on computing h\u2080 upon after factorization was ensured") }
                        else { TRLIB_PRINTLN_2("Failure on computing h\u2080 upon after factorization was ensured") } 
                        TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE)
                    }
                    TRLIB_DNRM2(norm_sol0, &n0, sol0, &inc)
                }
            }
        }
    }
    if (norm_sol0 >= radius) { // perform newton iteration if possible
        /* now a suitable pair lam0, h0 has been found.
         * perform a newton iteration on 0 = 1/||h0(lam0)|| - 1/radius */
        if (unicode) { TRLIB_PRINTLN_1("Starting Newton iteration for \u03bb\u2080 with initial choice %e", *lam0) }
        else { TRLIB_PRINTLN_1("Starting Newton iteration for lam0 with initial choice %e", *lam0) }
        while (1) {
            /* compute newton correction to lam, by
                (1) Factoring T0 + lam0 I = LDL^T
                (2) Solving (T0+lam0 I)*h0 = -lin
                (3) L*w = h0/||h0||
                (4) compute increment (||h0||-Delta)/Delta/||w||_{D^-1}^2 */
    
            // steps (1) and (2) have already been performed on initializaton or previous iteration
    
            /* step (3) L*w = h/||h||
               compute ||w||_{D^-1}^2 in same loop */
            ferr = 1.0/norm_sol0; TRLIB_DCOPY(&n0, sol0, &inc, w, &inc) TRLIB_DSCAL(&n0, &ferr, w, &inc) // w <-- sol/||sol||
            invD_norm_w_sq = w[0]*w[0]/diag_fac0[0];
            for( jj = 1; jj < n0; ++jj ) { w[jj] = w[jj] - offdiag_fac0[jj-1]*w[jj-1]; invD_norm_w_sq += w[jj]*w[jj]/diag_fac0[jj]; }
    
            // step (4) compute increment (||h||-Delta)/Delta/||w||_{D^-1}^2
            dlam = (norm_sol0-radius)/(radius*invD_norm_w_sq);
    
            // iteration completed
            *iter_newton += 1;
    
            // test if dlam is not tiny or newton limit exceeded, return eventually
            if ( *lam0 + dlam == *lam0 || fabs(dlam) <= tol_newton_tiny * fmax(1.0, fabs(*lam0)) || *iter_newton > itmax) {
                if (unicode) { TRLIB_PRINTLN_1("%s%e%s%e", "Newton breakdown, d\u03bb = ", dlam, " \u03bb = ", *lam0) }
                else { TRLIB_PRINTLN_1("%s%e%s%e", "Newton breakdown, dlam = ", dlam, " \u03bb = ", *lam0) }
                if(*iter_newton > itmax) { ret = TRLIB_TTR_ITMAX; break; }
                ret = TRLIB_TTR_NEWTON_BREAK; break;
            }
    
            // prepare next iteration
    
            // update lam
            *lam0 += dlam;
    
            // step (1) Factoring T0 + lam0 I = LDL^T
            TRLIB_DCOPY(&n0, diag, &inc, diag_lam, &inc) // diag_lam <-- diag
            TRLIB_DAXPY(&n0, lam0, ones, &inc, diag_lam, &inc) // diag_lam <-- lam + diag_lam
            TRLIB_DCOPY(&n0, diag_lam, &inc, diag_fac0, &inc) // diag_fac <-- diag_lam
            TRLIB_DCOPY(&nm0, offdiag, &inc, offdiag_fac0, &inc) // offdiag_fac <-- offdiag
            TRLIB_DPTTRF(&n0, diag_fac0, offdiag_fac0, &info_fac) // compute factorization
            if (info_fac != 0) { 
                if (unicode) { TRLIB_PRINTLN_2("Fail on factorization, \u03bb = %e, d\u03bb = %e! Exiting Newton Iteration", *lam0, dlam) }
                else {TRLIB_PRINTLN_2("Fail on factorization, lam = %e, dlam = %e! Exiting Newton Iteration", *lam0, dlam) }
                TRLIB_RETURN(TRLIB_TTR_FAIL_FACTOR) 
            }
    
            // step (2) Solving (T+lam I)*h = -lin
            TRLIB_DCOPY(&n0, neglin, &inc, sol0, &inc) // sol <-- neglin
            TRLIB_DPTTRS(&n0, &inc, diag_fac0, offdiag_fac0, sol0, &n0, &info_fac) // sol <-- (T+lam I)^-1 sol
            if (info_fac != 0) {
                if (unicode) { TRLIB_PRINTLN_2("Failure on computing h\u2080 in newton iteration") }
                else { TRLIB_PRINTLN_2("Failure on computing h0 in newton iteration") }
                TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE)
            }
            if (refine) { TRLIB_DPTRFS(&n0, &inc, diag_lam, offdiag, diag_fac0, offdiag_fac0, neglin, &n0, sol0, &n0, &ferr, &berr, work, &info_fac) }
            if (info_fac != 0) {
                if (unicode) { TRLIB_PRINTLN_2("Failure on computing h\u2080 in refinement in newton iteration") }
                else { TRLIB_PRINTLN_2("Failure on computing h\u2080 in refinement in newton iteration") }
                TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE)
            }
            TRLIB_DNRM2(norm_sol0, &n0, sol0, &inc)
    
            if (*iter_newton % 20 == 1) {
                if (unicode) { TRLIB_PRINTLN_1("%6s%14s%14s%14s", " iter ", "       \u03bb      ", "      d\u03bb      ", " \u2016h\u2080(\u03bb)\u2016-radius") }
                else { TRLIB_PRINTLN_1("%6s%14s%14s%14s", " iter ", "     lam      ", "     dlam     ", "  tr resdidual") }
            }
            TRLIB_PRINTLN_1("%6ld%14e%14e%14e", *iter_newton, *lam0, dlam, norm_sol0-radius)
    
            // test for convergence
            if (norm_sol0 - radius <= tol_rel * radius) {
                // what if norm_sol < radius in a significant way?
                // theory tells this should not happen...
                    
                ret = TRLIB_TTR_CONV_BOUND; break;
            }
        }
    }
    *warm0 = 1;

    // test if we trust region problem is solved on first irreducible with sufficient accuracy
    // otherwise build linear combination with eigenvector to leftmost that solves trust region constraint
    if ( fabs(radius - norm_sol0) >= TRLIB_EPS_POW_5*radius ) { 
        if(*lam0 == 0.0 && !equality) { ret = TRLIB_TTR_CONV_INTERIOR; }
        else { 
            if (unicode) { TRLIB_PRINTLN_1(" Found \u03bb\u2080 with tr residual %e! Bail out with h\u2080 + \u03b1 eig", radius - norm_sol0) }
            else { TRLIB_PRINTLN_1(" Found lam0 with tr residual %e! Bail out with h0 + alpha eig", radius - norm_sol0) }
            *sub_fail = trlib_eigen_inverse(n0, diag, offdiag, 
                    *leftmost, 10, TRLIB_EPS_POW_5, ones,
                    diag_fac, offdiag_fac, sol, 
                    verbose-2, unicode, " EI", fout, eigen_timing, &ferr, &berr, &jj); // can safely overwrite ferr, berr, jj with results. only interesting: eigenvector
            if (*sub_fail != 0 && *sub_fail != -1) { TRLIB_PRINTLN_2("Failure in eigenvector computation: %ld", *sub_fail) TRLIB_RETURN(TRLIB_TTR_FAIL_EIG) }
            if (*sub_fail == -1) { TRLIB_PRINTLN_2("In eigenvector computation itmax reached, continue with approximate eigenvector") }
            // compute solution as linear combination of h0 and eigenvector
            // ||h0 + t*eig||^2 = ||h_0||^2 + t * <h0, eig> + t^2 = radius^2
            TRLIB_DDOT(dot, &n0, sol0, &inc, sol, &inc); // dot = <h0, eig>
            trlib_quadratic_zero( norm_sol0*norm_sol0 - radius*radius, 2.0*dot, TRLIB_EPS_POW_75, verbose - 3, unicode, prefix, fout, &ferr, &berr);
            // select solution that corresponds to smaller objective
            // quadratic as a function of t without offset
            // q(t) = 1/2 * leftmost * t^2 + (leftmost * <eig, h0> + <eig, lin>) * t
            TRLIB_DDOT(dot2, &n0, sol, &inc, neglin, &inc) // dot2 = - <eig, lin>
            if( .5*(*leftmost)*ferr*ferr + ((*leftmost)*dot - dot2)*ferr <= .5*(*leftmost)*berr*berr + ((*leftmost)*dot - dot2)*berr) {
                TRLIB_DAXPY(&n0, &ferr, sol, &inc, sol0, &inc)
            }
            else {
                TRLIB_DAXPY(&n0, &berr, sol, &inc, sol0, &inc)
            }
            ret = TRLIB_TTR_HARD_INIT_LAM;
        }
    }


    /* now in a situation were accurate lam0, h_0 exists to first irreducible block
     * invoke Theorem 5.8:
     * (i)  if lam0 >= -leftmost the pair lam0, h_0 solves the problem
     * (ii) if lam0 < -leftmost a solution has to be constructed to lam = -leftmost */

    // quick exit: only one irreducible block
    if (nirblk == 1) {
        *lam = *lam0; *warm = 1;
        TRLIB_DCOPY(&n0, sol0, &inc, sol, &inc) // sol <== sol0
        // compute objective. first store 2*gradient in w, then compute obj = .5*(sol, w)
        TRLIB_DCOPY(&n0, neglin, &inc, w, &inc) ferr = -2.0; TRLIB_DSCAL(&n0, &ferr, w, &inc) ferr = 1.0; // w <-- -2 neglin
        TRLIB_DLAGTM("N", &n0, &inc, &ferr, offdiag, diag, offdiag, sol, &n0, &ferr, w, &n0) // w <-- T*sol + w
        TRLIB_DDOT(dot, &n0, sol, &inc, w, &inc) *obj = 0.5*dot; // obj = .5*(sol, w)
        TRLIB_RETURN(ret)
    }

    // now that we have accurate lam, h_0 invoke Theorem 5.8
    // check if lam <= leftmost --> in that case the first block information describes everything
    if (unicode) { TRLIB_PRINTLN_1("\nCheck if \u03bb\u2080 provides global solution, get leftmost ev for irred blocks") }
    else { TRLIB_PRINTLN_1("\nCheck if lam0 provides global solution, get leftmost ev for irred blocks") }
    if(!*warm_leftmost) {
        *sub_fail = trlib_leftmost(nirblk, irblk, diag, offdiag, 0, leftmost[nirblk-1], 1000, TRLIB_EPS_POW_75, verbose-2, unicode, " LM ", fout, leftmost_timing, ileftmost, leftmost);
        *warm_leftmost = 1;
    }
    TRLIB_PRINTLN_1("    leftmost = %e (block %ld)", leftmost[*ileftmost], *ileftmost)
    if(*lam0 >= -leftmost[*ileftmost]) {
        if (unicode) { TRLIB_PRINTLN_1("  \u03bb\u2080 \u2265 -leftmost \u21d2 \u03bb = \u03bb\u2080, exit: h\u2080(\u03bb\u2080)") }
        else { TRLIB_PRINTLN_1("  lam0 >= -leftmost => lam = lam0, exit: h0(lam0)") }
        *lam = *lam0; *warm = 1;
        TRLIB_DCOPY(&n0, sol0, &inc, sol, &inc) // sol <== sol0
        // compute objective. first store 2*gradient in w, then compute obj = .5*(sol, w)
        TRLIB_DCOPY(&n0, neglin, &inc, w, &inc) ferr = -2.0; TRLIB_DSCAL(&n0, &ferr, w, &inc) ferr = 1.0; // w <-- -2 neglin
        TRLIB_DLAGTM("N", &n0, &inc, &ferr, offdiag, diag, offdiag, sol, &n0, &ferr, w, &n0) // w <-- T*sol + w
        TRLIB_DDOT(dot, &n0, sol, &inc, w, &inc) *obj = 0.5*dot; // obj = .5*(sol, w)
        TRLIB_RETURN(ret)
    }
    else {
        if (unicode) { TRLIB_PRINTLN_1("  -leftmost > \u03bb\u2080 \u21d2 \u03bb = -leftmost, exit: h\u2080(-leftmost) + \u03b1 u") }
        else  { TRLIB_PRINTLN_1("  -leftmost > lam0 => lam = -leftmost, exit: h0(-leftmost) + alpha u") }

        // Compute solution of (T0 - leftmost*I)*h0 = neglin
        *lam = -leftmost[*ileftmost]; *warm = 1;
        TRLIB_DCOPY(&n0, neglin, &inc, sol, &inc) // neglin <-- sol
        if(!*warm_fac){
            TRLIB_DCOPY(&n0, diag, &inc, diag_lam, &inc) // diag_lam <-- diag
            TRLIB_DAXPY(&n0, lam, ones, &inc, diag_lam, &inc) // diag_lam <-- lam + diag_lam
            TRLIB_DCOPY(&n0, diag_lam, &inc, diag_fac, &inc) // diag_fac <-- diag_lam
            TRLIB_DCOPY(&nm0, offdiag, &inc, offdiag_fac, &inc) // offdiag_fac <-- offdiag
            TRLIB_DPTTRF(&n0, diag_fac, offdiag_fac, &info_fac) // compute factorization
            if (info_fac != 0) { TRLIB_RETURN(TRLIB_TTR_FAIL_FACTOR) } 
        }
        *warm_fac = 1;
        TRLIB_DPTTRS(&n0, &inc, diag_fac, offdiag_fac, sol, &n0, &info_fac) // sol <-- (T+lam I)^-1 sol
        if (info_fac != 0) { TRLIB_PRINTLN_2("Failure on computing h") TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) }
        if (refine) { TRLIB_DPTRFS(&n0, &inc, diag_lam, offdiag, diag_fac, offdiag_fac, neglin, &n0, sol, &n0, &ferr, &berr, work, &info_fac) }
        if (info_fac != 0) { TRLIB_PRINTLN_2("Failure on refining h") TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) }
        TRLIB_DNRM2(norm_sol0, &n0, sol, &inc)

        // compute normalized eigenvector u corresponding to leftmost of block ileftmost
        nl = irblk[*ileftmost+1]-irblk[*ileftmost];
        *sub_fail = trlib_eigen_inverse(nl, diag+irblk[*ileftmost], offdiag+irblk[*ileftmost], 
                leftmost[*ileftmost], 10, TRLIB_EPS_POW_5, ones,
                diag_fac+irblk[*ileftmost], offdiag_fac+irblk[*ileftmost],
                sol+irblk[*ileftmost], 
                verbose-2, unicode, " EI", fout, eigen_timing, &ferr, &berr, &jj); // can safely overwrite ferr, berr, jj with results. only interesting: eigenvector
        if (*sub_fail != 0) { TRLIB_RETURN(TRLIB_TTR_FAIL_EIG) }

        // solution is of form [h,0,...,0,alpha*u,0,...,0]
        // alpha = sqrt( radius^2 - ||h||^2 )
        ferr = sqrt( radius*radius - norm_sol0*norm_sol0 );
        TRLIB_DSCAL(&nl, &ferr, sol+irblk[*ileftmost], &inc)

        if (unicode) { TRLIB_PRINTLN_1("    with \u2016h\u2080(-leftmost)\u2016 = %e, \u03b1 = %e", norm_sol0, ferr) }
        else { TRLIB_PRINTLN_1("    with ||h0(-leftmost)|| = %e, alpha = %e", norm_sol0, ferr) }

        ret = TRLIB_TTR_HARD;
        
        // compute objective. first store 2*gradient in w, then compute obj = .5*(sol, w)
        *obj = 0.5*leftmost[*ileftmost]*ferr*ferr;
        TRLIB_DCOPY(&n0, neglin, &inc, w, &inc) ferr = -2.0; TRLIB_DSCAL(&n0, &ferr, w, &inc) ferr = 1.0; // w <-- -2 neglin
        TRLIB_DLAGTM("N", &n0, &inc, &ferr, offdiag, diag, offdiag, sol, &n0, &ferr, w, &n0) // w <-- T*sol + w
        TRLIB_DDOT(dot, &n0, sol, &inc, w, &inc) *obj = *obj+0.5*dot; // obj = .5*(sol, w)
        TRLIB_RETURN(ret);
    }
}

trlib_int_t trlib_tri_factor_regularized_umin(
    trlib_int_t n, trlib_flt_t *diag, trlib_flt_t *offdiag,
    trlib_flt_t *neglin, trlib_flt_t lam,
    trlib_flt_t *sol,
    trlib_flt_t *ones, trlib_flt_t *fwork,
    trlib_int_t refine,
    trlib_int_t verbose, trlib_int_t unicode, char *prefix, FILE *fout,
    trlib_int_t *timing, trlib_flt_t *norm_sol, trlib_int_t *sub_fail) {

    // local variables
    #if TRLIB_MEASURE_TIME
        struct timespec verystart, start, end;
        TRLIB_TIC(verystart)
    #endif

    trlib_flt_t *diag_lam = fwork;        // vector that holds diag + lam, could be saved if we would implement iterative refinement ourselves
    trlib_flt_t *diag_fac = fwork+n;      // vector that holds diagonal of factor of diag + lam
    trlib_flt_t *offdiag_fac = fwork+2*n; // vector that holds offdiagonal of factor of diag + lam
    trlib_flt_t *work = fwork+3*n;        // workspace for iterative refinement
    trlib_flt_t ferr = 0.0;               // forward  error bound from iterative refinement
    trlib_flt_t berr = 0.0;               // backward error bound from iterative refinement
    trlib_int_t inc = 1;                  // vector increment
    trlib_int_t info_fac;                 // LAPACK return code
    trlib_int_t nm = n-1;

    // factorize T + lam0 I
    TRLIB_DCOPY(&n, diag, &inc, diag_lam, &inc) // diag_lam <-- diag
    TRLIB_DAXPY(&n, &lam, ones, &inc, diag_lam, &inc) // diag_lam <-- lam0 + diag_lam
    TRLIB_DCOPY(&n, diag_lam, &inc, diag_fac, &inc) // diag_fac <-- diag_lam
    TRLIB_DCOPY(&nm, offdiag, &inc, offdiag_fac, &inc) // offdiag_fac <-- offdiag
    TRLIB_DPTTRF(&n, diag_fac, offdiag_fac, &info_fac) // compute factorization
    if (info_fac != 0) { TRLIB_RETURN(TRLIB_TTR_FAIL_FACTOR); } // factorization failed, switch to coldastart

    TRLIB_DCOPY(&n, neglin, &inc, sol, &inc) // sol <-- neglin
    TRLIB_DPTTRS(&n, &inc, diag_fac, offdiag_fac, sol, &n, &info_fac) // sol <-- (T+lam I)^-1 sol
    if (info_fac != 0) { TRLIB_PRINTLN_2("Failure on backsolve for h") TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) }
    if (refine) { TRLIB_DPTRFS(&n, &inc, diag_lam, offdiag, diag_fac, offdiag_fac, neglin, &n, sol, &n, &ferr, &berr, work, &info_fac) }
    if (info_fac != 0) { TRLIB_PRINTLN_2("Failure on iterative refinement for h") TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) }

    TRLIB_DNRM2(*norm_sol, &n, sol, &inc)
    TRLIB_RETURN(TRLIB_TTR_CONV_INTERIOR); 
}

trlib_int_t trlib_tri_factor_get_regularization(
    trlib_int_t n, trlib_flt_t *diag, trlib_flt_t *offdiag,
    trlib_flt_t *neglin, trlib_flt_t *lam,
    trlib_flt_t sigma, trlib_flt_t sigma_l, trlib_flt_t sigma_u,
    trlib_flt_t *sol,
    trlib_flt_t *ones, trlib_flt_t *fwork,
    trlib_int_t refine,
    trlib_int_t verbose, trlib_int_t unicode, char *prefix, FILE *fout,
    trlib_int_t *timing, trlib_flt_t *norm_sol, trlib_int_t *sub_fail) {

    // local variables
    #if TRLIB_MEASURE_TIME
        struct timespec verystart, start, end;
        TRLIB_TIC(verystart)
    #endif

    trlib_flt_t *diag_lam = fwork;        // vector that holds diag + lam, could be saved if we would implement iterative refinement ourselves
    trlib_flt_t *diag_fac = fwork+n;      // vector that holds diagonal of factor of diag + lam
    trlib_flt_t *offdiag_fac = fwork+2*n; // vector that holds offdiagonal of factor of diag + lam
    trlib_flt_t *work = fwork+3*n;        // workspace for iterative refinement
    trlib_flt_t *aux  = fwork+5*n;        // auxiliary vector ds/n
    trlib_flt_t ferr = 0.0;               // forward  error bound from iterative refinement
    trlib_flt_t berr = 0.0;               // backward error bound from iterative refinement
    trlib_int_t inc = 1;                  // vector increment
    trlib_int_t info_fac;                 // LAPACK return code
    trlib_int_t nm = n-1;
    trlib_flt_t lambda_l = 0.0;           // lower bound on lambda
    trlib_flt_t lambda_u = 1e20;          // upper bound on lambda
    trlib_int_t jj = 0;                   // local loop variable
    trlib_flt_t dlam = 0.0;               // step in lambda
    trlib_flt_t dn = 0.0;                 // derivative of norm

    // get suitable lambda for which factorization exists
    info_fac = 1;
    while(info_fac != 0 && jj < 500) {
        // factorize T + lam0 I
        TRLIB_DCOPY(&n, diag, &inc, diag_lam, &inc) // diag_lam <-- diag
        TRLIB_DAXPY(&n, lam, ones, &inc, diag_lam, &inc) // diag_lam <-- lam0 + diag_lam
        TRLIB_DCOPY(&n, diag_lam, &inc, diag_fac, &inc) // diag_fac <-- diag_lam
        TRLIB_DCOPY(&nm, offdiag, &inc, offdiag_fac, &inc) // offdiag_fac <-- offdiag
        TRLIB_DPTTRF(&n, diag_fac, offdiag_fac, &info_fac) // compute factorization
        if(info_fac == 0) { break; }
        if(*lam > lambda_u) { break; }
        lambda_l = *lam;
        //if (*lam == 0.0) { *lam = 1.0; }
        *lam = 2.0 * (*lam); jj++;
    }
    if (info_fac != 0) { TRLIB_RETURN(TRLIB_TTR_FAIL_FACTOR); } // factorization failed
    TRLIB_PRINTLN_1("Initial Regularization Factor that allows Cholesky: %e", *lam);

    TRLIB_DCOPY(&n, neglin, &inc, sol, &inc) // sol <-- neglin
    TRLIB_DPTTRS(&n, &inc, diag_fac, offdiag_fac, sol, &n, &info_fac) // sol <-- (T+lam I)^-1 sol
    if (info_fac != 0) { TRLIB_PRINTLN_2("Failure on backsolve for h") TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) }
    if (refine) { TRLIB_DPTRFS(&n, &inc, diag_lam, offdiag, diag_fac, offdiag_fac, neglin, &n, sol, &n, &ferr, &berr, work, &info_fac) }
    if (info_fac != 0) { TRLIB_PRINTLN_2("Failure on iterative refinement for h") TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) }

    TRLIB_DNRM2(*norm_sol, &n, sol, &inc)

    jj = 0;
    TRLIB_PRINTLN_2("%ld\t Reg %e\t Reg/Norm %e\t lb %e ub %e", jj, *lam, *lam/(*norm_sol), sigma_l, sigma_u);

    // check if accetable
    if( *norm_sol * sigma_l <= *lam && *lam <= *norm_sol * sigma_u ) {
        TRLIB_PRINTLN_1("Exit with Regularization Factor %e and Reg/Norm %e", *lam, *lam/(*norm_sol))
        TRLIB_RETURN(TRLIB_TTR_CONV_INTERIOR); 
    }
    else {
        /* do safeguarded newton iteration on f(lam) = lam/n(lam) with n(lam) = ||s(lam)||
         * then dn = 1/n <s, ds> with - (H+lam I) ds = s
         * thus get - f/df = (n*lam - n*n*sigma) / ( lam dn - n) with dn = <s, ds/n> and (H + lam I) ds/n = - s/n
         * note that f is increasing for lam such that (H+lam I) is spd
         */

        jj = 0;

        while(jj < 500) {

            // first get the vector ds/n
            TRLIB_DCOPY(&n, sol, &inc, aux, &inc) // aux <-- sol
            dn = -1.0/(*norm_sol); // scaling for right hand side
            TRLIB_DSCAL(&n, &dn, aux, &inc) // aux <-- -sol/||norm_sol||
            TRLIB_DDOT(dn, &n, sol, &inc, aux, &inc) // dn = <s, ds/n>

            // compute step correction
            dlam = (*lam*(*norm_sol)-*norm_sol*(*norm_sol)*sigma) / (*lam*dn - *norm_sol);

            // check feasibility of step
            if (*lam + dlam <= lambda_u && lambda_l <= *lam + dlam) {
                *lam = *lam + dlam;
            }
            else { *lam = .5*( lambda_l + lambda_u); }

            // compute next function value
            
            // factorize T + lam0 I
            TRLIB_DCOPY(&n, diag, &inc, diag_lam, &inc) // diag_lam <-- diag
            TRLIB_DAXPY(&n, lam, ones, &inc, diag_lam, &inc) // diag_lam <-- lam0 + diag_lam
            TRLIB_DCOPY(&n, diag_lam, &inc, diag_fac, &inc) // diag_fac <-- diag_lam
            TRLIB_DCOPY(&nm, offdiag, &inc, offdiag_fac, &inc) // offdiag_fac <-- offdiag
            TRLIB_DPTTRF(&n, diag_fac, offdiag_fac, &info_fac) // compute factorization
            if (info_fac != 0) { TRLIB_RETURN(TRLIB_TTR_FAIL_FACTOR); } // factorization failed

            TRLIB_DCOPY(&n, neglin, &inc, sol, &inc) // sol <-- neglin
            TRLIB_DPTTRS(&n, &inc, diag_fac, offdiag_fac, sol, &n, &info_fac) // sol <-- (T+lam I)^-1 sol
            if (info_fac != 0) { TRLIB_PRINTLN_2("Failure on backsolve for h") TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) }
            if (refine) { TRLIB_DPTRFS(&n, &inc, diag_lam, offdiag, diag_fac, offdiag_fac, neglin, &n, sol, &n, &ferr, &berr, work, &info_fac) }
            if (info_fac != 0) { TRLIB_PRINTLN_2("Failure on iterative refinement for h") TRLIB_RETURN(TRLIB_TTR_FAIL_LINSOLVE) }

            TRLIB_DNRM2(*norm_sol, &n, sol, &inc)

            jj++;
            TRLIB_PRINTLN_2("%ld\t Reg %e\t Reg/Norm %e\t lb %e ub %e", jj, *lam, *lam/(*norm_sol), sigma_l, sigma_u);

            // check if accetable
            if( *norm_sol * sigma_l <= *lam && *lam <= *norm_sol * sigma_u ) {
                TRLIB_PRINTLN_1("Exit with Regularization Factor %e and Reg/Norm %e", *lam, *lam/(*norm_sol))
                TRLIB_RETURN(TRLIB_TTR_CONV_INTERIOR); 
            }
            else { // contract bounds
                if(*lam > *norm_sol * sigma_u) { lambda_u = *lam; }
                if(*lam < *norm_sol * sigma_l) { lambda_l = *lam; }
            }

        }
        TRLIB_PRINTLN_1("Failure: no convergence to determine regularaization factor, last iterate %e", *lam) TRLIB_RETURN(TRLIB_TTR_NEWTON_BREAK)
        
    }

}

trlib_int_t trlib_tri_factor_regularize_posdef(
    trlib_int_t n, trlib_flt_t *diag, trlib_flt_t *offdiag,
    trlib_flt_t tol_away, trlib_flt_t security_step, trlib_flt_t *regdiag) {

    /* modify diagonal to be able to factorize
       Cholesky recurrence for diagonal is
       diag_fac[0] = diag[0]
       diag_fac[i+1] = diag[i+1] - offdiag[i]*offdiag[i] / diag_fac[i]
       we have to ensure diag_fac > 0 */

    trlib_flt_t diag_fac = 0.0;
    trlib_int_t pivot = 0;
    
    regdiag[0] = 0.0;
    if (diag[0] <= tol_away) { regdiag[0] = security_step*tol_away; }
    diag_fac = diag[0] + regdiag[0];

    for(pivot = 0; pivot < n-1; ++pivot) {
        regdiag[pivot+1] = 0.0;
        if ( diag[pivot+1] - offdiag[pivot]*offdiag[pivot]/diag_fac <= tol_away * diag_fac ) {
            regdiag[pivot+1] = security_step * fabs(offdiag[pivot]*offdiag[pivot]/diag_fac - diag[pivot+1]);
        }
        diag_fac = diag[pivot+1] + regdiag[pivot+1] - offdiag[pivot]*offdiag[pivot]/diag_fac;
    }

    return 0;
}


trlib_int_t trlib_tri_timing_size() {
#if TRLIB_MEASURE_TIME
    return 1+TRLIB_SIZE_TIMING_LINALG+trlib_leftmost_timing_size()+trlib_eigen_timing_size();
#endif
    return 0;
}

trlib_int_t trlib_tri_factor_memory_size(trlib_int_t n) {
    return 6*n;
}