1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
|
def outputfstats(Enum, Eden, dfnum, dfden, f, prob):
Enum = around(Enum,3)
Eden = around(Eden,3)
dfnum = around(Enum,3)
dfden = around(dfden,3)
f = around(f,3)
prob = around(prob,3)
suffix = '' # for *s after the p-value
if prob < 0.001: suffix = ' ***'
elif prob < 0.01: suffix = ' **'
elif prob < 0.05: suffix = ' *'
title = [['EF/ER','DF','Mean Square','F-value','prob','']]
lofl = title+[[Enum, dfnum, around(Enum/float(dfnum),3), f, prob, suffix],
[Eden, dfden, around(Eden/float(dfden),3),'','','']]
_support.printcc(lofl)
return
def paired(x, y):
"""
Interactively determines the type of data in x and y, and then runs the
appropriated statistic for paired group data.
Returns: appropriate statistic name, value, and probability
"""
x,y = map(asarray, (x,y))
samples = ''
while samples not in ['i','r','I','R','c','C']:
print '\nIndependent or related samples, or correlation (i,r,c): ',
samples = raw_input()
if samples in ['i','I','r','R']:
print '\nComparing variances ...',
# USE O'BRIEN'S TEST FOR HOMOGENEITY OF VARIANCE, Maxwell & delaney, p.112
r = obrientransform(x,y)
f,p = f_oneway(_support.colex(r,0),_support.colex(r,1))
if p<0.05:
vartype='unequal, p='+str(around(p,4))
else:
vartype='equal'
print vartype
if samples in ['i','I']:
if vartype[0]=='e':
t,p = ttest_ind(x,y,None,0)
print '\nIndependent samples t-test: ', around(t,4),around(p,4)
else:
if len(x)>20 or len(y)>20:
z,p = ranksums(x,y)
print '\nRank Sums test (NONparametric, n>20): ', around(z,4),around(p,4)
else:
u,p = mannwhitneyu(x,y)
print '\nMann-Whitney U-test (NONparametric, ns<20): ', around(u,4),around(p,4)
else: # RELATED SAMPLES
if vartype[0]=='e':
t,p = ttest_rel(x,y,0)
print '\nRelated samples t-test: ', around(t,4),around(p,4)
else:
t,p = ranksums(x,y)
print '\nWilcoxon T-test (NONparametric): ', around(t,4),around(p,4)
else: # CORRELATION ANALYSIS
corrtype = ''
while corrtype not in ['c','C','r','R','d','D']:
print '\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ',
corrtype = raw_input()
if corrtype in ['c','C']:
m,b,r,p,see = linregress(x,y)
print '\nLinear regression for continuous variables ...'
lol = [['Slope','Intercept','r','Prob','SEestimate'],
[around(m,4),around(b,4),around(r,4),around(p,4),around(see,4)]]
_support.printcc(lol)
elif corrtype in ['r','R']:
r,p = spearmanr(x,y)
print '\nCorrelation for ranked variables ...'
print "Spearman's r: ",around(r,4),around(p,4)
else: # DICHOTOMOUS
r,p = pointbiserialr(x,y)
print '\nAssuming x contains a dichotomous variable ...'
print 'Point Biserial r: ',around(r,4),around(p,4)
print '\n\n'
return None
def writecc (listoflists, file, writetype='w', extra=2):
"""
Writes a list of lists to a file in columns, customized by the max
size of items within the columns (max size of items in col, +2 characters)
to specified file. File-overwrite is the default.
Usage: writecc (listoflists,file,writetype='w',extra=2)
Returns: None
"""
if not isinstance(listoflists[0], (list, tuple)):
listoflists = [listoflists]
outfile = open(file,writetype)
rowstokill = []
list2print = copy.deepcopy(listoflists)
for i in range(len(listoflists)):
if listoflists[i] == ['\n'] or listoflists[i]=='\n' or listoflists[i]=='dashes':
rowstokill = rowstokill + [i]
rowstokill.reverse()
for row in rowstokill:
del list2print[row]
maxsize = [0]*len(list2print[0])
for col in range(len(list2print[0])):
items = _support.colex(list2print,col)
items = map(_support.makestr,items)
maxsize[col] = max(map(len,items)) + extra
for row in listoflists:
if row == ['\n'] or row == '\n':
outfile.write('\n')
elif row == ['dashes'] or row == 'dashes':
dashes = [0]*len(maxsize)
for j in range(len(maxsize)):
dashes[j] = '-'*(maxsize[j]-2)
outfile.write(_support.lineincustcols(dashes,maxsize))
else:
outfile.write(_support.lineincustcols(row,maxsize))
outfile.write('\n')
outfile.close()
return None
def outputpairedstats(fname,writemode,name1,n1,m1,se1,min1,max1,name2,n2,m2,se2,min2,max2,statname,stat,prob):
"""
Prints or write to a file stats for two groups, using the name, n,
mean, sterr, min and max for each group, as well as the statistic name,
its value, and the associated p-value.
Usage: outputpairedstats(fname,writemode,
name1,n1,mean1,stderr1,min1,max1,
name2,n2,mean2,stderr2,min2,max2,
statname,stat,prob)
Returns: None
"""
suffix = '' # for *s after the p-value
try:
x = prob.shape
prob = prob[0]
except:
pass
if prob < 0.001: suffix = ' ***'
elif prob < 0.01: suffix = ' **'
elif prob < 0.05: suffix = ' *'
title = [['Name','N','Mean','SD','Min','Max']]
lofl = title+[[name1,n1,round(m1,3),round(math.sqrt(se1),3),min1,max1],
[name2,n2,round(m2,3),round(math.sqrt(se2),3),min2,max2]]
if not isinstance(fname, basestring) or len(fname) == 0:
print
print statname
print
_support.printcc(lofl)
print
try:
if stat.shape == ():
stat = stat[0]
if prob.shape == ():
prob = prob[0]
except:
pass
print 'Test statistic = ',round(stat,3),' p = ',round(prob,3),suffix
print
else:
file = open(fname,writemode)
file.write('\n'+statname+'\n\n')
file.close()
writecc(lofl,fname,'a')
file = open(fname,'a')
try:
if stat.shape == ():
stat = stat[0]
if prob.shape == ():
prob = prob[0]
except:
pass
file.write(_support.list2string(['\nTest statistic = ',round(stat,4),' p = ',round(prob,4),suffix,'\n\n']))
file.close()
return None
|