1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
|
use strict;
use warnings;
use Test::More;
use PDL::LiteF;
use PDL::Stats::Basic;
use Test::PDL;
my $a = sequence 5;
is_pdl $a->stdv, pdl( 1.4142135623731 ), "standard deviation of $a";
is_pdl $a->stdv_unbiased, pdl( 1.58113883008419 ), "unbiased standard deviation of $a";
is_pdl $a->var, pdl( 2 ), "variance of $a";
is_pdl $a->var_unbiased, pdl( 2.5 ), "unbiased variance of $a";
is_pdl $a->se, pdl( 0.707106781186548 ), "standard error of $a";
is_pdl $a->ss, pdl( 10 ), "sum of squared deviations from the mean of $a";
is_pdl $a->skew, pdl( 0 ), "sample skewness of $a";
is_pdl $a->skew_unbiased, pdl( 0 ), "unbiased sample skewness of $a";
is_pdl $a->kurt, pdl( -1.3 ), "sample kurtosis of $a";
is_pdl $a->kurt_unbiased, pdl( -1.2 ), "unbiased sample kurtosis of $a";
{
my $x = pdl [(0.001) x 6];
my $var = $x->var;
ok $var >= 0, 'var >= 0' or diag "var = $var";
my $stdv = $x->stdv;
ok $stdv >= 0, 'stdv >= 0' or diag "stdv = $stdv";
}
is_pdl $_->ss, (($_ - $_->avg)**2)->sumover, "ss for $_" for
pdl('[1 1 1 1 2 3 4 4 4 4 4 4]'),
pdl('[1 2 2 2 3 3 3 3 4 4 5 5]'),
pdl('[1 1 1 2 2 3 3 4 4 5 5 5]');
my $a_bad = sequence 6;
$a_bad->setbadat(-1);
is_pdl $a_bad->stdv, pdl( 1.4142135623731 ), "standard deviation of $a_bad";
is_pdl $a_bad->stdv_unbiased, pdl( 1.58113883008419 ), "unbiased standard deviation of $a_bad";
is_pdl $a_bad->var, pdl( 2 ), "variance of $a_bad";
is_pdl $a_bad->var_unbiased, pdl( 2.5 ), "unbiased variance of $a_bad";
is_pdl $a_bad->se, pdl( 0.707106781186548 ), "standard error of $a_bad";
is_pdl $a_bad->ss, pdl( 10 ), "sum of squared deviations from the mean of $a_bad";
is_pdl $a_bad->skew, pdl( 0 ), "sample skewness of $a_bad";
is_pdl $a_bad->skew_unbiased, pdl( 0 ), "unbiased sample skewness of $a_bad";
is_pdl $a_bad->kurt, pdl( -1.3 ), "sample kurtosis of $a_bad";
is_pdl $a_bad->kurt_unbiased, pdl( -1.2 ), "unbiased sample kurtosis of $a_bad";
my $b = pdl '0 0 0 1 1';
is_pdl $a->cov($b), pdl( 0.6 ), "sample covariance of $a and $b";
is_pdl $a->corr($b), pdl( 0.866025403784439 ), "Pearson correlation coefficient of $a and $b";
is_pdl $a->n_pair($b), indx( 5 ), "Number of good pairs between $a and $b";
is_pdl $a->corr($b)->t_corr( 5 ), pdl( 3 ), "t significance test of Pearson correlation coefficient of $a and $b";
is_pdl $a->corr_dev($b), pdl( 0.903696114115064 ), "correlation calculated from dev_m values of $a and $b";
my $b_bad = pdl 'BAD 0 0 1 1 1';
is_pdl $a_bad->cov($b_bad), pdl( 0.5 ), "sample covariance with bad data of $a_bad and $b_bad";
is_pdl $a_bad->corr($b_bad), pdl( 0.894427190999916 ), "Pearson correlation coefficient with bad data of $a_bad and $b_bad";
is_pdl $a_bad->n_pair($b_bad), indx( 4 ), "Number of good pairs between $a_bad and $b_bad with bad values taken into account";
is_pdl $a_bad->corr($b_bad)->t_corr( 4 ), pdl( 2.82842712474619 ), "t signifiance test of Pearson correlation coefficient with bad data of $a_bad and $b_bad";
is_pdl $a_bad->corr_dev($b_bad), pdl( 0.903696114115064 ), "correlation calculated from dev_m values with bad data of $a_bad and $b_bad";
my ($t, $df) = $a->t_test($b);
is_pdl $t, pdl( 2.1380899352994 ), "t-test between $a and $b - 't' output";
is_pdl $df, pdl( 8 ), "t-test between $a and $b - 'df' output";
($t, $df) = $a->t_test_nev($b);
is_pdl $t, pdl( 2.1380899352994 ), "t-test with non-equal variance between $a and $b - 't' output";
is_pdl $df, pdl( 4.94637223974763 ), "t-test with non-equal variance between $a and $b - 'df' output";
($t, $df) = $a->t_test_paired($b);
is_pdl $t, pdl( 3.13785816221094 ), "paired sample t-test between $a and $b - 't' output";
is_pdl $df, pdl( 4 ), "paired sample t-test between $a and $b - 'df' output";
($t, $df) = $a_bad->t_test($b_bad);
is_pdl $t, pdl( 1.87082869338697 ), "t-test with bad values between $a_bad and $b_bad - 't' output";
is_pdl $df, pdl( 8 ), "t-test with bad values between $a_bad and $b_bad - 'd' output";
($t, $df) = $a_bad->t_test_nev($b_bad);
is_pdl $t, pdl( 1.87082869338697 ), "t-test with non-equal variance with bad values between $a_bad and $b_bad - 't' output";
is_pdl $df, pdl( 4.94637223974763 ), "t-test with non-equal variance with bad values between $a_bad and $b_bad - 'df' output";
($t, $df) = $a_bad->t_test_paired($b_bad);
is_pdl $t, pdl( 4.89897948556636 ), "paired sample t-test with bad values between $a_bad and $b_bad - 't' output";
is_pdl $df, pdl( 3 ), "paired sample t-test with bad values between $a_bad and $b_bad - 'df' output";
{
my ($data, $idv, $ido) = rtable(\*DATA, {V=>0});
is_pdl $data, pdl '
[ 5 BAD BAD 2 BAD 5 BAD 9 4 BAD BAD BAD 5 BAD]
[ 7 BAD 3 7 0 BAD 0 8 BAD 0 3 0 BAD 0]
[BAD BAD BAD BAD BAD 1 BAD 1 BAD BAD BAD BAD 1 BAD]
[BAD BAD BAD BAD BAD 0 BAD 5 BAD BAD BAD BAD 0 BAD]
[BAD BAD 0 BAD 2 BAD 0 BAD BAD 0 0 2 BAD 0]
';
}
{
my $a = pdl '
0.045 0.682 0.290 0.024 0.598 0.321 0.772 0.375 0.237 0.811;
0.356 0.094 0.925 0.139 0.701 0.849 0.689 0.109 0.240 0.847;
0.822 0.492 0.351 0.860 0.400 0.243 0.313 0.011 0.437 0.480
';
is_pdl $a->cov_table, $a->cov($a->dummy(1)), 'cov_table';
$a->setbadat(4,0);
is_pdl $a->cov_table, $a->cov($a->dummy(1)), 'cov_table bad val';
}
{
my $a = pdl '
0.045 0.682 0.290 0.024 0.598 0.321 0.772 0.375 0.237 0.811;
0.356 0.094 0.925 0.139 0.701 0.849 0.689 0.109 0.240 0.847;
0.822 0.492 0.351 0.860 0.400 0.243 0.313 0.011 0.437 0.480
';
is_pdl $a->corr_table, $a->corr($a->dummy(1)), "Square Pearson correlation table";
$a->setbadat(4,0);
is_pdl $a->corr_table, $a->corr($a->dummy(1)), "Square Pearson correlation table with bad data";
}
{
my $a = pdl([0,1,2,3,4], [0,0,0,0,0]);
$a = $a->setvaltobad(0);
ok $a->stdv->nbad, "Bad value input to stdv makes the stdv itself bad";
}
SKIP: {
eval { require PDL::Core; require PDL::GSL::CDF; };
skip 'no PDL::GSL::CDF', 1 if $@;
my $x = pdl(1, 2);
my $n = pdl(2, 10);
my $p = .5;
is_pdl binomial_test( $x,$n,$p ), pdl(0.75, 0.9892578125), 'binomial_test';
}
{
my $a = sequence 10, 2;
my $factor = sequence(10) > 4;
my $ans = pdl( [[0..4], [10..14]], [[5..9], [15..19]] );
my ($a_, $l) = $a->group_by($factor);
is_pdl $a_, $ans, 'group_by single factor equal n';
is_deeply $l, [0, 1], 'group_by single factor label';
$a = sequence 10,2;
$factor = qsort sequence(10) % 3;
$ans = pdl( [1.5, 11.5], [5, 15], [8, 18] );
is_pdl $a->group_by($factor)->average, $ans, 'group_by single factor unequal n';
$a = sequence 10;
my @factors = ( [qw( a a a a b b b b b b )], [qw(0 1 0 1 0 1 0 1 0 1)] );
$ans = pdl '[ 0 2 BAD; 1 3 BAD ], [ 4 6 8; 5 7 9 ]';
($a_, $l) = $a->group_by( @factors );
is_pdl $a_, $ans, 'group_by multiple factors';
is_deeply $l, [[qw(a_0 a_1)], [qw( b_0 b_1 )]], 'group_by multiple factors label';
}
{
my @a = qw(a a b b c c);
my $a = PDL::Stats::Basic::code_ivs( \@a );
my $ans = pdl( 0,0,1,1,2,2 );
is_pdl $a, $ans, 'code_ivs';
$a[-1] = undef;
my $a_bad = PDL::Stats::Basic::code_ivs( \@a );
my $ans_bad = pdl '0 0 1 1 2 BAD';
is_pdl $a_bad, $ans_bad, 'code_ivs with missing value undef correctly coded';
$a[-1] = 'BAD';
$a_bad = PDL::Stats::Basic::code_ivs( \@a );
is_pdl $a_bad, $ans_bad, 'code_ivs with missing value BAD correctly coded';
}
done_testing();
__DATA__
999 90 91 92 93 94
70 5 7 -999 -999 -999
711 trying
71 -999 3 -999 -999 0
72 2 7 -999 -999 -999
73 -999 0 -999 -999 2
74 5 -999 1 0 -999
75 -999 0 -999 -999 0
76 9 8 1 5 -999
77 4 -999 -999 -999 -999
78 -999 0 -999 -999 0
79 -999 3 -999 -999 0
80 -999 0 -999 -999 2
81 5 -999 1 0 -999
82 -999 0 -999 -999 0
|