File: basic.t

package info (click to toggle)
libpdl-stats-perl 0.855-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 468 kB
  • sloc: perl: 1,459; makefile: 3
file content (188 lines) | stat: -rw-r--r-- 7,663 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
use strict;
use warnings;
use Test::More;

use PDL::LiteF;
use PDL::Stats::Basic;
use Test::PDL;

my $a = sequence 5;
is_pdl $a->stdv, pdl( 1.4142135623731 ), "standard deviation of $a";
is_pdl $a->stdv_unbiased, pdl( 1.58113883008419 ), "unbiased standard deviation of $a";
is_pdl $a->var, pdl( 2 ), "variance of $a";
is_pdl $a->var_unbiased, pdl( 2.5 ), "unbiased variance of $a";
is_pdl $a->se, pdl( 0.707106781186548 ), "standard error of $a";
is_pdl $a->ss, pdl( 10 ), "sum of squared deviations from the mean of $a";
is_pdl $a->skew, pdl( 0 ), "sample skewness of $a";
is_pdl $a->skew_unbiased, pdl( 0 ), "unbiased sample skewness of $a";
is_pdl $a->kurt, pdl( -1.3 ), "sample kurtosis of $a";
is_pdl $a->kurt_unbiased, pdl( -1.2 ), "unbiased sample kurtosis of $a";

{
my $x = pdl [(0.001) x 6];
my $var = $x->var;
ok $var >= 0, 'var >= 0' or diag "var = $var";
my $stdv = $x->stdv;
ok $stdv >= 0, 'stdv >= 0' or diag "stdv = $stdv";
}

is_pdl $_->ss, (($_ - $_->avg)**2)->sumover, "ss for $_" for
  pdl('[1 1 1 1 2 3 4 4 4 4 4 4]'),
  pdl('[1 2 2 2 3 3 3 3 4 4 5 5]'),
  pdl('[1 1 1 2 2 3 3 4 4 5 5 5]');

my $a_bad = sequence 6;
$a_bad->setbadat(-1);
is_pdl $a_bad->stdv, pdl( 1.4142135623731 ), "standard deviation of $a_bad";
is_pdl $a_bad->stdv_unbiased, pdl( 1.58113883008419 ), "unbiased standard deviation of $a_bad";
is_pdl $a_bad->var, pdl( 2 ), "variance of $a_bad";
is_pdl $a_bad->var_unbiased, pdl( 2.5 ), "unbiased variance of $a_bad";
is_pdl $a_bad->se, pdl( 0.707106781186548 ), "standard error of $a_bad";
is_pdl $a_bad->ss, pdl( 10 ), "sum of squared deviations from the mean of $a_bad";
is_pdl $a_bad->skew, pdl( 0 ), "sample skewness of $a_bad";
is_pdl $a_bad->skew_unbiased, pdl( 0 ), "unbiased sample skewness of $a_bad";
is_pdl $a_bad->kurt, pdl( -1.3 ), "sample kurtosis of $a_bad";
is_pdl $a_bad->kurt_unbiased, pdl( -1.2 ), "unbiased sample kurtosis of $a_bad";

my $b = pdl '0 0 0 1 1';
is_pdl $a->cov($b), pdl( 0.6 ), "sample covariance of $a and $b";
is_pdl $a->corr($b), pdl( 0.866025403784439 ), "Pearson correlation coefficient of $a and $b";
is_pdl $a->n_pair($b), indx( 5 ), "Number of good pairs between $a and $b";
is_pdl $a->corr($b)->t_corr( 5 ), pdl( 3 ), "t significance test of Pearson correlation coefficient of $a and $b";
is_pdl $a->corr_dev($b), pdl( 0.903696114115064 ), "correlation calculated from dev_m values of $a and $b";

my $b_bad = pdl 'BAD 0 0 1 1 1';
is_pdl $a_bad->cov($b_bad), pdl( 0.5 ), "sample covariance with bad data of $a_bad and $b_bad";
is_pdl $a_bad->corr($b_bad), pdl( 0.894427190999916 ), "Pearson correlation coefficient with bad data of $a_bad and $b_bad";
is_pdl $a_bad->n_pair($b_bad), indx( 4 ), "Number of good pairs between $a_bad and $b_bad with bad values taken into account";
is_pdl $a_bad->corr($b_bad)->t_corr( 4 ), pdl( 2.82842712474619 ), "t signifiance test of Pearson correlation coefficient with bad data of $a_bad and $b_bad";
is_pdl $a_bad->corr_dev($b_bad), pdl( 0.903696114115064 ), "correlation calculated from dev_m values with bad data of $a_bad and $b_bad";

my ($t, $df) = $a->t_test($b);
is_pdl $t, pdl( 2.1380899352994 ), "t-test between $a and $b - 't' output";
is_pdl $df, pdl( 8 ), "t-test between $a and $b - 'df' output";

($t, $df) = $a->t_test_nev($b);
is_pdl $t, pdl( 2.1380899352994 ), "t-test with non-equal variance between $a and $b - 't' output";
is_pdl $df, pdl( 4.94637223974763 ), "t-test with non-equal variance between $a and $b - 'df' output";

($t, $df) = $a->t_test_paired($b);
is_pdl $t, pdl( 3.13785816221094 ), "paired sample t-test between $a and $b - 't' output";
is_pdl $df, pdl( 4 ), "paired sample t-test between $a and $b - 'df' output";

($t, $df) = $a_bad->t_test($b_bad);
is_pdl $t, pdl( 1.87082869338697 ), "t-test with bad values between $a_bad and $b_bad - 't' output";
is_pdl $df, pdl( 8 ), "t-test with bad values between $a_bad and $b_bad - 'd' output";

($t, $df) = $a_bad->t_test_nev($b_bad);
is_pdl $t, pdl( 1.87082869338697 ), "t-test with non-equal variance with bad values between $a_bad and $b_bad - 't' output";
is_pdl $df, pdl( 4.94637223974763 ), "t-test with non-equal variance with bad values between $a_bad and $b_bad - 'df' output";

($t, $df) = $a_bad->t_test_paired($b_bad);
is_pdl $t, pdl( 4.89897948556636 ), "paired sample t-test with bad values between $a_bad and $b_bad - 't' output";
is_pdl $df, pdl( 3 ), "paired sample t-test with bad values between $a_bad and $b_bad - 'df' output";

{
  my ($data, $idv, $ido) = rtable(\*DATA, {V=>0});
  is_pdl $data, pdl '
   [  5 BAD BAD   2 BAD   5 BAD   9   4 BAD BAD BAD   5 BAD]
   [  7 BAD   3   7   0 BAD   0   8 BAD   0   3   0 BAD   0]
   [BAD BAD BAD BAD BAD   1 BAD   1 BAD BAD BAD BAD   1 BAD]
   [BAD BAD BAD BAD BAD   0 BAD   5 BAD BAD BAD BAD   0 BAD]
   [BAD BAD   0 BAD   2 BAD   0 BAD BAD   0   0   2 BAD   0]
  ';
}

{
  my $a = pdl '
  0.045 0.682 0.290 0.024 0.598 0.321 0.772 0.375 0.237 0.811;
  0.356 0.094 0.925 0.139 0.701 0.849 0.689 0.109 0.240 0.847;
  0.822 0.492 0.351 0.860 0.400 0.243 0.313 0.011 0.437 0.480
';
  is_pdl $a->cov_table, $a->cov($a->dummy(1)), 'cov_table';
  $a->setbadat(4,0);
  is_pdl $a->cov_table, $a->cov($a->dummy(1)), 'cov_table bad val';
}

{
  my $a = pdl '
  0.045 0.682 0.290 0.024 0.598 0.321 0.772 0.375 0.237 0.811;
  0.356 0.094 0.925 0.139 0.701 0.849 0.689 0.109 0.240 0.847;
  0.822 0.492 0.351 0.860 0.400 0.243 0.313 0.011 0.437 0.480
';
  is_pdl $a->corr_table, $a->corr($a->dummy(1)), "Square Pearson correlation table";
  $a->setbadat(4,0);
  is_pdl $a->corr_table, $a->corr($a->dummy(1)), "Square Pearson correlation table with bad data";
}

{
  my $a = pdl([0,1,2,3,4], [0,0,0,0,0]);
  $a = $a->setvaltobad(0);
  ok $a->stdv->nbad, "Bad value input to stdv makes the stdv itself bad";
}

SKIP: {
  eval { require PDL::Core; require PDL::GSL::CDF; };
  skip 'no PDL::GSL::CDF', 1 if $@;
  my $x = pdl(1, 2);
  my $n = pdl(2, 10);
  my $p = .5;
  is_pdl binomial_test( $x,$n,$p ), pdl(0.75, 0.9892578125), 'binomial_test';
}

{
    my $a = sequence 10, 2;
    my $factor = sequence(10) > 4;
    my $ans = pdl( [[0..4], [10..14]], [[5..9], [15..19]] );
    my ($a_, $l) = $a->group_by($factor);
    is_pdl $a_, $ans, 'group_by single factor equal n';
    is_deeply $l, [0, 1], 'group_by single factor label';

    $a = sequence 10,2;
    $factor = qsort sequence(10) % 3;
    $ans = pdl( [1.5, 11.5], [5, 15], [8, 18] );
    is_pdl $a->group_by($factor)->average, $ans, 'group_by single factor unequal n';

    $a = sequence 10;
    my @factors = ( [qw( a a a a b b b b b b )], [qw(0 1 0 1 0 1 0 1 0 1)] );
    $ans = pdl '[ 0 2 BAD; 1 3 BAD ], [ 4 6 8; 5 7 9 ]';
    ($a_, $l) = $a->group_by( @factors );
    is_pdl $a_, $ans, 'group_by multiple factors';
    is_deeply $l, [[qw(a_0 a_1)], [qw( b_0 b_1 )]], 'group_by multiple factors label';
}


{
    my @a = qw(a a b b c c);
    my $a = PDL::Stats::Basic::code_ivs( \@a );
    my $ans = pdl( 0,0,1,1,2,2 );
    is_pdl $a, $ans, 'code_ivs';

    $a[-1] = undef;
    my $a_bad = PDL::Stats::Basic::code_ivs( \@a );
    my $ans_bad = pdl '0 0 1 1 2 BAD';
    is_pdl $a_bad, $ans_bad, 'code_ivs with missing value undef correctly coded';

    $a[-1] = 'BAD';
    $a_bad = PDL::Stats::Basic::code_ivs( \@a );
    is_pdl $a_bad, $ans_bad, 'code_ivs with missing value BAD correctly coded';
}

done_testing();

__DATA__
999	90	91	92	93	94	
70	5	7	-999	-999	-999	
711	trying
71	-999	3	-999	-999	0	
72	2	7	-999	-999	-999	
73	-999	0	-999	-999	2	
74	5	-999	1	0	-999	
75	-999	0	-999	-999	0	
76	9	8	1	5	-999	
77	4	-999	-999	-999	-999	
78	-999	0	-999	-999	0	
79	-999	3	-999	-999	0	
80	-999	0	-999	-999	2	
81	5	-999	1	0	-999	
82	-999	0	-999	-999	0