File: stat_logit.tcl

package info (click to toggle)
tcllib 1.20%2Bdfsg-1
  • links: PTS
  • area: main
  • in suites: bullseye
  • size: 68,064 kB
  • sloc: tcl: 216,842; ansic: 14,250; sh: 2,846; xml: 1,766; yacc: 1,145; pascal: 881; makefile: 107; perl: 84; f90: 84; python: 33; ruby: 13; php: 11
file content (120 lines) | stat: -rw-r--r-- 3,024 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# stat_logit.tcl --
#     Logistic regression functions - part of the statistics package
#
#     Note:
#     The implementation was derived from the Wikipedia page on logistic regression,
#     (https://en.wikipedia.org/wiki/Logistic_regression) as is the test case.
#
#     TODO:
#     - Deviance to evaluate the goodness of fit
#     - Evaluate the probability
#

package require math::optimize

namespace eval ::math::statistics {
     variable xLogit {}
     variable yLogit {}
}

# logistic-model --
#     Fit 1/0 data to a logistic model
#
# Arguments:
#     xdata        Independent variables (list of lists if there are more than one)
#     ydata        Corresponding scores (0 or 1)
#
# Result:
#     Estimate of the parameters for a logistic model
#
# Note:
#     It is expected that the independent variables have roughly the same scale
#
proc ::math::statistics::logistic-model {xdata ydata} {
    variable xLogit
    variable yLogit

    set xLogit {}
    foreach coords $xdata {
        lappend xLogit [concat 1.0 $coords]
    }
    set yLogit $ydata

    #
    # Use a trivial starting point
    #
    set startx [lrepeat [llength [lindex $xLogit 0]] 0.0]

    set result [::math::optimize::nelderMead LogisticML_NM $startx]

    return [dict get $result x]
}


# LogisticML_NM --
#     Calculate the (log) maximum likelihood for the given logistic model
#     using Nelder-Mead
#
# Arguments:
#     args        Vector of the current regression coefficients
#
# Returns:
#     Log maximum likelihood
#
proc ::math::statistics::LogisticML_NM {args} {
    variable xLogit
    variable yLogit

    set loglike 0.0
    foreach coords $xLogit score $yLogit {
        set sum 0.0

        foreach c $coords v $args {
            set sum [expr {$sum + $v * $c}]
        }
        set exp [expr {exp(-$sum)}]

        if { $score == 1 } {
            set loglike [expr {$loglike - log(1.0 + $exp)}]
        } else {
            set loglike [expr {$loglike - $sum - log(1.0 + $exp)}]
        }
    }
    return [expr {-$loglike}]
}

# logistic-probability --
#     Calculate the probability of a positive score (1) given the model
#
# Arguments:
#     coeffs      Coefficients of the logistic model (for instance outcome of model fit)
#     values      Values of the independent variables
#
# Returns:
#     Probability
#
proc ::math::statistics::logistic-probability {coeffs values} {
    set sum 0.0

    foreach c $coeffs v [concat 1.0 $values] {
        set sum [expr {$sum + $c * $v}]
    }

    return [expr {1.0 / (1.0 + exp(-$sum))}]
}

# test case: from Wikipedia
if {0} {
set xdata {0.50 0.75 1.00 1.25 1.50 1.75 1.75 2.00 2.25 2.50 2.75 3.00 3.25 3.50 4.00 4.25 4.50 4.75 5.00 5.50}
set ydata {0    0    0    0    0    0    1    0    1    0    1    0    1    0    1    1    1    1    1    1   }

set coeffs [::math::statistics::logistic-model $xdata $ydata]

puts "Model fit: $coeffs"

puts "Probabilities:"

foreach x {1 2 3 4 5} {
    puts "$x - [::math::statistics::logistic-probability $coeffs $x]"
}
}