File: Sanity.pm

package info (click to toggle)
libdemeter-perl 0.9.27%2Bds6-9
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid, trixie
  • size: 74,028 kB
  • sloc: perl: 73,233; python: 2,196; makefile: 1,999; ansic: 1,368; lisp: 454; sh: 74
file content (1092 lines) | stat: -rw-r--r-- 27,041 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
package Demeter::Fit::Sanity;

=for Copyright
 .
 Copyright (c) 2006-2019 Bruce Ravel (http://bruceravel.github.io/home).
 All rights reserved.
 .
 This file is free software; you can redistribute it and/or
 modify it under the same terms as Perl itself. See The Perl
 Artistic License.
 .
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

=cut

use Moose::Role;
use Demeter::StrTypes qw( IfeffitFunction IfeffitProgramVar );

use Carp;
use File::Spec;
use List::MoreUtils qw(any true);
use Graph;
use Regexp::Assemble;
use Demeter::Constants qw($NUMBER $NULLFILE);

use Text::Wrap;
$Text::Wrap::columns = 65;



sub S_data_files_exist {
  my ($self) = @_;
  my $found = 0;
  my @data = @{ $self->data };
  foreach my $d (@data) {
    next if $d->from_athena;
    next if $d->from_yaml;
    next if not $d->fit_include;
    return 0 if ($d->file eq $NULLFILE);
    my $file = $d->file;
    if (not -e $file) {
      ++$found;
      $d->add_trouble('-e');
    } elsif (not -r $file) {
      ++$found;
      $d->add_trouble('-r');
    };
  };
  return $found;
};

sub S_feff_files_exist {
  my ($self) = @_;
  my $found = 0;
  my @paths = @{ $self->paths };
  foreach my $p (@paths) {
    next if not $p->include;
    next if not $p->data->fit_include;
    my ($pathto, $nnnn) = $p->get(qw(folder file));
    my $file = File::Spec->catfile($pathto, $nnnn);
    if (not -e $file) {
      ++$found;
      $p->add_trouble('-e');
    } elsif (not -r $file) {
      ++$found;
      $p->add_trouble('-r');
    };
  };
  return $found;
};

sub S_sp_exist {
  my ($self) = @_;
  my $found = 0;
  my @paths = @{ $self->paths };
  foreach my $p (@paths) {
    next if not $p->include;
    next if not $p->data->fit_include;
    ## could be reading a feffNNNN.dat file directly
    next if (($p->file !~ m{\A\s+\z}) and (not -e $p->file));
    if (ref($p->sp) !~ m{Path}) {
      ++$found;
      $p->add_trouble('spnotexist');
    };
  };
  return $found;
};


## 1. check that all guesses are used in defs and pathparams
sub S_defined_not_used {
  my ($self) = @_;
  my $found = 0;
  my @gds   = @{ $self->gds };
  my @paths = @{ $self->paths };
  foreach my $g (@gds) {
    my $name = lc($g->name);
    my $thisfound = 0;
    next if ($g->gds ne 'guess');
    foreach my $d (@gds) {
      next if ($d->gds !~ m{(?:def|restrain)});
      ++$thisfound if (lc($d->mathexp) =~ /\b$name\b/);
      last if $thisfound;
    };
    foreach my $p (@paths) {
      next if (ref($p) !~ m{Path});
      next if not $p->include;
      next if not $p->data->fit_include;
      last if $thisfound;
      foreach my $pp (qw(s02 e0 delr sigma2 ei third fourth dphase)) {
	++$thisfound if (lc($p->$pp) =~ /\b$name\b/);
	last if $thisfound;
      };
    };
    if (not $thisfound) {
      ++$found;
      $g->trouble('notused');
    };
  };
  return $found;
};

## 2. check that defs and path paramers do not use undefined GDS parameters
sub S_used_not_defined {
  my ($self) = @_;
  my $found = 0;
  my @gds   = @{ $self->gds };
  my @paths = @{ $self->paths };
  my @all_params = ();
  foreach my $g (@gds) {
    next if ($g->gds =~ m{(?:merge|skip)});
    push @all_params, lc($g->name);
  };
  my $params_regexp = Regexp::Assemble->new()->add(@all_params)->re;
  my $tokenizer_regexp = '(?-xism:(?=[\t\ \(\)\*\+\,\-\/\^])[\-\+\*\^\/\(\)\,\ \t])';
  ## this came from:
  # use Regexp::List;
  # my $opt = Regexp::List->new;
  # print $opt->list2re('-', '+', '*', '^', '/', '(', ')', ',', " ", "\t"), $/;

  foreach my $g (@gds) {
    next if ($g->gds =~ m{(?:guess|merge|skip)});
    my $mathexp = $g->mathexp;
    my @list = split(/$tokenizer_regexp+/, $mathexp);
    foreach my $token (@list) {
      #print $mathexp, "  ", $token, $/;
      next if ($token =~ m{\A\s*\z});		      # space, ok
      next if ($token =~ m{\A$NUMBER\z});	      # number, ok
      next if (is_IfeffitFunction($token));           # function, ok
      next if (lc($token) =~ m{\A(?:etok|pi)\z});     # Ifeffit's defined constants, ok
      next if (lc($token) =~ m{\A$params_regexp\z});  # defined param, ok
      next if (lc($token) eq 'reff');                 # reff, ok
      if (lc($token) =~ m{\[?cv\]?}) {
	++$found;
	$g->add_trouble('usecv');
      } else {
	++$found;
	$g->add_trouble('useundef');
      };
    };
  };
  foreach my $p (@paths) {
    next if not defined($p);
    next if not $p->include;
    next if not $p->data->fit_include;
    my $label = $p->name;
    foreach my $pp (qw(s02 e0 delr sigma2 ei third fourth dphase)) {
      my @list = split(/$tokenizer_regexp+/, $p->$pp);
      foreach my $token (@list) {
	next if ($token =~ m{\A\s*\z});	               # space, ok
	next if ($token =~ m{\A$NUMBER\z});            # number, ok
	next if (is_IfeffitFunction($token));          # function, ok
	next if (lc($token) =~ m{\A(?:etok|pi)\z});    # Ifeffit's defined constants, ok
	next if (lc($token) =~ m{\A$params_regexp\z}); # defined param, ok
	next if (lc($token) eq 'reff');                # reff, ok
	next if (lc($token) =~ m{\[?cv\]?});           # cv, ok
	++$found;
	#     "The math expression for $pp for \"$label\" uses an undefined token: $token"
	#    );
	$p->add_trouble(join('~', 'useundef', $pp, $token));
      };
    };
  };
  return $found;
};

## 3. check that ++ -- // *** ^^ do not appear in math expression
sub S_binary_ops {
  my ($self) = @_;
  my $found = 0;
  my @gds   = @{ $self->gds };
  my @paths = @{ $self->paths };
  my $bad_binary_op_regexp = '(?-xism:(?=[\*\+\-\/\^])(?:\+\+|\-\-|\*\*\*|\/\/|\^\^))';
  ##Regexp::Assemble->new()->add('++', '--', '***', '//', '^^')->re;
  foreach my $g (@gds) {
    next if ($g->gds =~ m{(?:merge|skip)});
    my $mathexp = $g->mathexp;
    if ($mathexp =~ m{($bad_binary_op_regexp)}) {
      my $which = $1;
      ++$found;
      #	   "The math expression for \"" . $g->name . "\" uses an invalid binary operation: $which"
      #	  );
      $g->add_trouble("binary_x_$which");
    };
  };
  foreach my $p (@paths) {
    next if not defined($p);
    next if not $p->include;
    next if not $p->data->fit_include;
    my $label = $p->name;
    foreach my $pp (qw(s02 e0 delr sigma2 ei third fourth dphase)) {
      my $mathexp = $p->$pp;
      if ($mathexp =~ m{($bad_binary_op_regexp)}) {
	my $which = $1;
	++$found;
	#     "The math expression for $pp for \"$label\" uses an invalid binary operation: $which"
	#    );
	$p->add_trouble(join('~', 'binary', $pp, $which));
      };
    };
  };
  return $found;
};

## 4. check that all function() names are valid in math expressions
sub S_function_names {
  my ($self) = @_;
  my $found = 0;
  my @gds   = @{ $self->gds   };
  my @paths = @{ $self->paths };

  foreach my $g (@gds) {
    next if ($g->gds =~ m{(?:merge|skip)});
    if ($g->mathexp =~ m{(\b\w+)\s*\(}) {
      my $match = $1;
      if (not is_IfeffitFunction($match)) {
	++$found;
	$g->add_trouble("function_$match");
      };
    };
  };
  foreach my $p (@paths) {
    next if not defined($p);
    next if not $p->include;
    next if not $p->data->fit_include;
    foreach my $pp (qw(s02 e0 delr sigma2 ei third fourth dphase)) {
      my $mathexp = $p->$pp;
      if ($mathexp =~ m{(\b\w+)\s*\(}) {
	my $match = $1;
	if (not is_IfeffitFunction($match)) {
	  ++$found;
	  $p->add_trouble(join('~', 'function', $pp, $match));
	};
      };
    };
  };
  return $found;
};

## 5. check that all data have unique group names
## 6. check that all paths have unique group names
sub S_unique_group_names {
  my ($self) = @_;
  my $found = 0;
  my @data  = @{ $self->data  };
  my @paths = @{ $self->paths };

  # check data group names
  my %dseen    = ();
  my %tag_seen = ();
  my %cv_seen  = ();
  foreach my $d (@data) {
    next if not $d->fit_include;
    ++$dseen{$d->group};
    $d->add_trouble('namenotunique') if ($dseen{$d->group} > 1);
    ++$tag_seen{$d->tag};
    $d->add_trouble('tagnotunique')  if ($tag_seen{$d->tag} > 1);
    ++$cv_seen{$d->cv};
    $d->add_trouble('cvnotunique')   if ($cv_seen{$d->cv} > 1);
  };
  foreach my $s (keys %dseen) {
    if ($dseen{$s} > 1) {
      ++$found;
    };
  };
  foreach my $s (keys %tag_seen) {
    if ($tag_seen{$s} > 1) {
      ++$found;
    };
  };
  ## foreach my $s (keys %cv_seen) {
  ##  if ($cv_seen{$s} > 1) {
  ## 	 ++$found;
  ##  };
  ## };

  # check path group names
  my %pseen = ();
  foreach my $p (@paths) {
    next if not defined($p);
    next if not $p->include;
    next if not $p->data->fit_include;
    ++$pseen{$p->group};
    $p->add_trouble('namenotunique') if ($pseen{$p->group} > 1);
  };
  foreach my $s (keys %pseen) {
    if ($pseen{$s} > 1) {
      ++$found;
    };
  };

  # cross check data and path group names
  my %seen = ();
  foreach my $p (@data, @paths) {
    next if not defined($p);
    ++$seen{$p->group};
    $p->add_trouble('pathdataname') if ($seen{$p->group} > 1);
  };
  foreach my $s (keys %seen) {
    if ($seen{$s} > 1 and $pseen{$s} and $pseen{$s} < 2 and $dseen{$s} < 2) {
      ++$found;
    };
  };
  return $found;
};

## 7. check that all GDS have unique names
sub S_gds_unique_names {
  my ($self) = @_;
  my $found = 0;
  my @gds = @{ $self->gds };
  my %seen = ();
  foreach my $g (@gds) {
    next if ($g->gds eq 'skip');
    ++$seen{lc($g->name)};
    $g->add_trouble('notunique') if ($seen{lc($g->name)} > 1);
  };
  foreach my $s (keys %seen) {
    if ($seen{$s} > 1) {
      ++$found;
    };
  };
  return $found;
};

## 8. check that parens match
sub S_parens_not_match {
  my ($self) = @_;
  my $found = 0;
  my @gds   = @{ $self->gds   };
  my @paths = @{ $self->paths };
  foreach my $g (@gds) {
    next if ($g->gds =~ m{(?:merge|skip)});
    my $not_ok = $self->check_parens($g->mathexp);
    if ($not_ok) {
      ++$found;
      $g->add_trouble('parens');
    };
  };
  foreach my $p (@paths) {
    next if not defined($p);
    next if not $p->include;
    next if not $p->data->fit_include;
    foreach my $pp (qw(s02 e0 delr sigma2 ei third fourth dphase)) {
      my $mathexp = $p->$pp;
      my $not_ok = $self->check_parens($mathexp);
      if ($not_ok) {
	++$found;
	$p->add_trouble("parens_".$pp);
      };
    };
  };
  return $found;
};

## 9. check that data params make sense
sub S_data_parameters {
  my ($self) = @_;
  my $found = 0;
  my @data  = @{ $self->data  };
  foreach my $d (@data) {
    next if (not $d->fit_include);
    my ($kmin, $kmax) = $d->get(qw(fft_kmin fft_kmax));
    if ($kmin >= $kmax) {
      ++$found;
      $d->add_trouble('kminkmax');
    };
    my ($rmin, $rmax) = $d->get(qw(bft_rmin bft_rmax));
    if ($rmin >= $rmax) {
      ++$found;
      $d->add_trouble('rminrmax');
    };
  };
  return $found;
};

## 10. check that number of guesses does not exceed Nidp
sub S_nidp {
  my ($self) = @_;
  my $found = 0;
  my @gds   = @{ $self->gds   };
  my @data  = @{ $self->data  };
  my ($nidp, $ndata) = (0,0);
  foreach my $d (@data) {
    next if (not $d->fit_include);
    ++$ndata;
    $nidp += $d->nidp;
  };
  my $nguess = 0;
  foreach my $g (@gds) {
    ++$nguess if ($g->gds eq 'guess');
  };
  if ($nguess > $nidp) {
    ++$found;
    $self->add_trouble('nvarnidp');
  };
  return $found;
};

## 11. check that rmin is not greater than rbkg
sub S_rmin_rbkg {
  my ($self) = @_;
  my $found = 0;
  my @data  = @{ $self->data  };
  foreach my $d (@data) {
    next if ($d->datatype eq 'chi');
    next if (not $d->fit_include);
    if ($d->bft_rmin < $d->bkg_rbkg) {
      ++$found;
      $d->add_trouble('rminrbkg');
    };
  };
  return $found;
};

## 12. check that reff is not far beyond Rmax for any path
sub S_reff_rmax {
  my ($self) = @_;
  my $found = 0;
  my @data  = @{ $self->data  };
  my @paths = @{ $self->paths };
  foreach my $d (@data) {
    next if (not $d->fit_include);
    foreach my $p (@paths) {
      next if not defined($p);
      next if (not $p->include);
      next if ($p->data ne $d);
      if ($p->reff > (0.2+$d->bft_rmax*$self->co->default('warnings', 'reff_margin'))) {
	my $identify = $p->name || $p->Index;
	++$found;
	$p->add_trouble('reffrmax');
      };
    };
  };
  return $found;
};

#  &max_scalars   =   65536.000000000
#  &max_arrays    =    8192.000000000
#  &max_strings   =    8192.000000000
#  &max_paths     =    1024.000000000
#  &max_varys     =     128.000000000
#  &max_data_sets =      16.000000000
#  spline knots   =      32
#  restraints     =      10
## 13. check that ifeffit hardwired limits are not exceeded
sub S_exceed_ifeffit_limits {
  my ($self) = @_;
  my $found = 0;
  return 0 if Demeter->is_larch;

  my @gds   = @{ $self->gds   };
  my @data  = @{ $self->data  };
  my @paths = @{ $self->paths };

  ## number of guess params
  my $n_guess     = 0;
  my $n_params    = 0;
  my $n_restraint = 0;
  foreach my $g (@gds) {
    ++$n_guess     if ($g->gds eq 'guess');
    ++$n_params    if ($g->gds !~ m{(?:merge|skip)});
    ++$n_restraint if ($g->gds eq 'restrain');
  };
  if ($n_guess > $self->fetch_scalar('&max_varys')) {
    ++$found;
    $self->add_trouble('nvarys');
  };
  if ($n_params > $self->fetch_scalar('&max_scalars')) {
    ++$found;
    $self->add_trouble('nparams');
  };
  if ($n_restraint > 10) {
    ++$found;
    $self->add_trouble('nrestraints');
  };

  ## number of data sets
  my $n_data = 0;
  foreach my $d (@data) {
    ++$n_data if ($d->fit_include);
  };
  if ($n_data > $self->fetch_scalar('&max_data_sets')) {
    ++$found;
    $self->add_trouble('ndatasets');
  };

  ## number of paths
  my $n_paths = 0;
  foreach my $p (@paths) {
    next if not defined($p);
    next if not $p->include;
    next if not $p->data->fit_include;
    ++$n_paths if ($p->include);
  };
  if ($n_paths > $self->fetch_scalar('&max_paths')) {
    ++$found;
    $self->add_trouble('npaths');
  };

  return $found;
};

## 14. check that parameters do not have program variable names
sub S_program_var_names {
  my ($self) = @_;
  my $found = 0;
  my @gds = @{ $self->gds };
  foreach my $g (@gds) {
    if (is_IfeffitProgramVar(lc($g->name))) {
      ++$found;
      $g->add_trouble('progvar');
    };
  };
  return $found;
};

sub S_bad_character {
  my ($self) = @_;
  my $found = 0;
  my @gds = @{ $self->gds };
  foreach my $g (@gds) {
    if (lc($g->name) !~ m{\A[a-z_][a-z0-9_]*\z}) {
      ++$found;
      $g->add_trouble('badchar');
    };
  };
  return $found;
};

## 16. check that all Path objects have either a ScatteringPath or a folder/file defined
sub S_path_calculation_exists {
  my ($self) = @_;
  my $found = 0;
  my @paths = @{ $self->paths };
  foreach my $p (@paths) {
    next if not $p->include;
    next if not $p->data->fit_include;
    next if (ref($p->sp) =~ m{(?:ScatteringPath|SSPath|FPath)});
    my $nnnn = File::Spec->catfile($p->folder, $p->file);
    next if ((-e $nnnn) and $p->file);
    ++$found;
    $p->add_trouble('nocalc');
  };
  return $found;
};

## 17. check that there are no unresolved merge parameetrs
sub S_notice_merge {
  my ($self) = @_;
  my $found = 0;
  my @gds = @{ $self->gds };
  foreach my $g (@gds) {
    if ($g->gds eq 'merge') {
      ++$found;
      $g->add_trouble('merge');
    };
  };
  return $found;
};

## 18. check that no more than one path is flagged as the default
sub S_default_path {
  my ($self) = @_;
  my $found = 0;
  my @paths = @{ $self->paths };
  foreach my $p (@paths) {
    next if not $p->include;
    next if not $p->data->fit_include;
    ++$found if $p->default_path;
  };
  $self->add_trouble('defaultpath') if ($found > 1);
  return $found;
};

## 19. check for loops and cycles among the GDS math expressions
sub S_cycle_loop {
  my ($self) = @_;
  my $found = 0;
  my @gds   = @{ $self->gds };
  my @all_params = ();
  foreach my $g (@gds) {
    next if ($g->gds =~ m{(?:merge|skip)});
    push @all_params, $g->name;
  };
  my $tokenizer_regexp = '(?-xism:(?=[\t\ \(\)\*\+\,\-\/\^])[\-\+\*\^\/\(\)\,\ \t])';
  #my $tokenizer_regexp = Regexp::Assemble->new()->add('-', '+', '*', '^', '/', '(', ')', ',', " ", "\t")->re;
  my $graph = Graph->new;

  foreach my $g (@gds) {
    next if ($g->gds =~ m{(?:merge|skip)});
    my $mathexp = $g->mathexp;
    my @list = split(/$tokenizer_regexp+/, lc($mathexp));
    foreach my $token (@list) {
      next if ($token =~ m{\A\s*\z});		  # space, ok
      next if ($token =~ m{\A$NUMBER\z});	  # number, ok
      next if (is_IfeffitFunction($token));       # function, ok
      next if (lc($token) =~ m{\A(?:etok|pi)\z}); # Ifeffit's defined constants, ok
      next if (lc($token) eq 'reff');             # reff, ok

      $graph -> add_edge(lc($g->name), $token);
    };
  };

  foreach my $loop ($graph->self_loop_vertices) {
    $self->add_trouble(join('~', 'loop', 'x', $loop));
    ++$found;
  };
  if ($graph->has_a_cycle) {
    my @cycle = $graph->find_a_cycle;
    if ($#cycle) {		# we have already reported on loops
      $self->add_trouble(join('~', 'cycle', 'x', join(" --> ", @cycle)));
      ++$found;
    };
  };

  return $found;
};

## 20. check for an obvious data repitition, Data attribute collided set to 1 for any data group
sub S_data_collision {
  my ($self) = @_;
  my $found = 0;
  my @data = @{ $self->data };
  foreach my $d (@data) {
    next if not $d->fit_include;
    if ((true {$_->group eq $d->group} @data) > 1) {
      ++$found;
      $d->add_trouble('collision');
    };
  };
  return $found;
};

## 21. check that each data set has at least one path associated with it
sub S_data_paths {
  my ($self) = @_;
  my $found = 0;
  my @data  = @{ $self->data  };
  my @paths = @{ $self->paths };
  foreach my $d (@data) {
    next if (not $d->fit_include);
    my $count = 0;
    foreach my $p (@paths) {
      ++$count if ($p->data eq $d);
    };
    if ($count == 0) {
      ++$found;
      $d->add_trouble('datanopaths');
    };
  };
  return $found;
};

1;


=head1 NAME

Demeter::Fit::Sanity - Sanity checks for EXAFS fitting models

=head1 VERSION

This documentation refers to Demeter version 0.9.26.

=head1 SYNOPSIS

  my $fitobject = Demeter::Fit ->
     new(gds   => \@gds_objects,
	 data  => [$data_object],
	 paths => \@path_objects,
	);
  $command = $fitobject -> fit;

Before the fit method is run, a series of sanity check on the data
contained in the fit object is run.  The sanity checks all live in
this module.

=head1 DESCRIPTION

This module contains all the sanity checks made on a Fit object before
the fit starts.  This file forms part of the base of the
Demeter::Fit class and serves no independent function.  That
is, using this module directly in a program does nothing useful -- it
is purely a utility module for the Feff object.

The user should never need to call the methods explicitly since they
are called automatically whenever a fit or a sum is performed.
However they are documented here so that the scope of such checks made
is clearly understood.

When problems are found, the fit will exit and a descriptive report
will be made.

=head1 METHODS

The following sanity checks are made on the Fit object:

=over 4

=item *

All data files included in the fit exist.

=item *

No data set is obviously used twice in the fit.

=item *

All F<feffNNNN.dat> files used in the fit exist.

=item *

All guess parameters are used in at least one def parameter or path
parameter.

=item *

No def or path parameters use parameters which have not been defined.

=item *

Binary operators are used correctly, specifically that none of these
strings appear in a math expression:

   ++    --   //   ***   ^^

=item *

All function names (i.e. strings that are followed by an open paren)
are valid Ifeffit functions.

=item *

All data and path objects have unique group names.

=item *

All GDS parameters have unique names.

=item *

All opening parens are matched by closing parens.

=item *

All data paremeters make sense, for example that C<fft_kmin> is
smaller than C<fft_kmax>.

=item *

The number of guess parameters does not exceed the number of
independent points.

=item *

The C<bft_rmin> value is not greater than C<bkg_rbkg>.

=item *

The R_eff of any path is not far beyond C<bft_rmax>.

=item *

Ifeffit's hardwired limits on things like the maximum number of guess
parameters and the maximum number of data sets are not exceeded by the
fitting model.

=item *

No GDS parameters have the names of Ifeffit program variables or other
reserved words.

=item *

No merge parameters remain unresolved.

=back

=head1 TROUBLE REPORTING

The C<trouble> attribute of an Demeter object will be filled with a
pipe-separated list of problem codes.

Some error codes contain additional information to further identify
the problem.  These codes have a keyword separated from the other
information by an underscore, making these sufficiently easy to parse
on the fly.  Indeed, the C<translate_trouble> method of the base
object (see L<Demeter>) does just that, so error reporting during a
fit is an example of literate programming.

Here are the explanations:

=head2 Problems with Data objects

=over 4

=item C<-e>

You specified an explicit data file to use in the fit (i.e. not part
of a project file) and that file does not exist.

=item C<-r>

You specified an explicit data file to use in the fit (i.e. not part
of a project file) and that data file cannot be read.

=item C<namenotunique>

The Ifeffit group name of this data group is not unique.

=item C<pathdataname>

This path has an Ifeffit group name which is used by a Path object.

=item C<tagnotunique>

The tag of this data group is not unique.

=item C<cvnotunique>

The characteristic value of this data group is not unique.

=item C<kminkmax>

C<kmin> is larger than C<kmax>.

=item C<rminrmax>

C<rmin> is larger than C<rmax>.

=item C<rminrbkg>

C<rmin> is smaller than the value of C<rbkg> that was used in the
background removal.

=item C<collision>

This data came from the the same source as another data group.  You
seem to be trying to increase your number of independent points by
fitting the same data more than once in a multiple data set fit.

=item C<datanopaths>

This data has no paths associated with it.  You must either assign
paths to it or exclude it from the fit.

=back

=head2 Problems with Path objects

=over 4

=item C<-e>

The path file does not exist (perhaps the Feff calculation was not run).

=item C<-r>

The path file cannot be read.

=item C<spnotexist>

The C<sp> attribute is not defined or not set to a ScatteringPath or
other Path object, and the path is not using a feffNNNN.dat file
directly.

=item C<useundef> + C<$pp> + C<$token>

The math expression for the C<$pp> path parameter contains an undefined
parameter, C<$token>.

=item C<binary> + C<$pp> + C<$token>

The math expression for the C<$pp> path parameter contains an unallowed
binary math operator, C<$token>.

=item C<function> + C<$pp> + C<$token>

The math expression for the C<$pp> path parameter contains a
mathematical function unknown to Ifeffit, C<$token>.

=item C<namenotunique>

The Ifeffit group name for this path is not unique.

=item C<pathdataname>

This path has an Ifeffit/Larch group name which is used by a Data object.

=item C<parens> + C<$pp>

The math expression for the C<$pp> path parameter has unmatched parentheses.

=item C<reffrmax>

The R effective for this path is much larger than the C<rmax> value
chosen for the fit to the data.

=item C<nocalc>

It seems as though the Feff calculation for this path has not been made yet.

=back

=head2 Problems with GDS objects

=over 4

=item C<notused>

This is a guess parameter which is not used in the math expressions
for any def or path parameters.

=item C<usecv>

This is a def parameter which uses the characteristic value (cv).
This is not yet allowed for def parameters.

=item C<useundef>

The math expression for this GDS parameter uses an undefined parameter
name.

=item C<binary> + C<$token>

The math expression for this GDS parameter contains an unallowed
binary math operator, C<$token>.

=item C<function> + C<$token>

The math expression for this GDS parameter contains a mathematical
function unknown to Ifeffit, C<$token>.

=item C<notunique>

The name of this GDS parameter is not unique.

=item C<parens>

The math expression for this GDS parameter has unmatched parentheses.

=item C<progvar>

The name of this GDS parameter is an Ifeffit program variable name.

=item C<badchar>

The name of this GDS parameter contains an unallowed character.
Allowed characters are letters (a-z), numbers (0-9), and underscore
(_).  The first character must not be a number.

=item C<merge>

This is an parameter which has been defined twice, possibly from the
merge of fitting projects or the creation of two more similar quick
first shell fitting models.

=back

=head2 Problems with Fit objects

=over 4

=item C<gds>

No GDS parameters are defined for this fit

=item C<data>

No data sets are defined for this fit

=item C<paths>

No paths are defined for this fit

=item C<nvarnidp>

This fitting model uses more guess parameters than the available
information content of the data.

=item C<nvarys>

This fitting model uses more than Ifeffit's compiled-in limit of guess
parameters (&max_varys).

=item C<nparams>

This fitting model uses more than Ifeffit's compiled-in limit of
parameters (&max_scalars).

=item C<nrestraints>

This fitting model uses more than Ifeffit's compiled-in limit of
restraints (10).

=item C<ndatasets>

This fitting model uses more than Ifeffit's compiled-in limit of
data sets (&max_data_sets).

=item C<npaths>

This fitting model uses more than Ifeffit's compiled-in limit of
paths (&max_paths).

=item C<defaultpath>

More than one path is flagged as being the default path, making it
unclear how to evaluate the log file.

=item C<loop> + C<$token>

The parameter C<$token> refers to itself in its math expression.

=item C<cycle> + C<$token>

There is a cyclical dependence among a set of parameter math
expressions. This cycle is C<$token>.

=back

=head1 CONFIGURATION AND ENVIRONMENT

See L<Demeter> for a description of the configuration system.

=head1 BUGS AND LIMITATIONS

Missing tests:

=over 4

=item *

Test that every Path is associated with a data set.  (Warn, not fatal.)

=item *

Test that each data in the data array is properly defined.

=item *

Test that every Path points to a real path file

=back

Please report problems to the Ifeffit Mailing List
(L<http://cars9.uchicago.edu/mailman/listinfo/ifeffit/>)

Patches are welcome.

=head1 AUTHOR

Bruce Ravel, L<http://bruceravel.github.io/home>

L<http://bruceravel.github.io/demeter/>

=head1 LICENCE AND COPYRIGHT

Copyright (c) 2006-2019 Bruce Ravel (L<http://bruceravel.github.io/home>). All rights reserved.

This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See L<perlgpl>.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

=cut