File: apop_fix_params.c

package info (click to toggle)
apophenia 1.0%2Bds-8
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye
  • size: 2,968 kB
  • sloc: ansic: 19,483; makefile: 372; awk: 124; sh: 105; sed: 32
file content (279 lines) | stat: -rw-r--r-- 12,424 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
/** \file 
 Set some of the parameters of a model to fixed values.*/

/* There's only one public function here. Its header is in likelihoods.h
 
Copyright (c) 2007, 2009, 2011 by Ben Klemens.  Licensed under the GPLv2; see COPYING.  */

#include "apop_internal.h"
static apop_model *fixed_param_model;

//The model keeps a table of what the blanks should be filled in with.
//This first section does the work for that part.
static double find_nans(double in){ return isnan(in); }

static void addin(apop_data *predict, size_t i, int j, size_t page){
    int len;
    if (!predict->matrix){
        predict->matrix = gsl_matrix_alloc(1, 4); 
        len = 0;
    } else 
        len = predict->matrix->size1;
    apop_matrix_realloc(predict->matrix, len + 1, 4);
    apop_data_set(predict, .row=len, .colname="row", .val=i);
    apop_data_set(predict, .row=len, .colname="col", .val=j);
    apop_data_set(predict, .row=len, .colname="page", .val=page);
}

static void find_missing(const apop_data *data, apop_data *predict, size_t page){
    //generate a list of fixed-parameter positions, and their paramvals.
   apop_data * mask = apop_map((apop_data*)data, find_nans);
    //find out where the NaNs are
    for (size_t i=0; mask->vector && i< mask->vector->size; i++)
        if (apop_data_get(mask, i, -1))
            addin(predict, i, -1, page);
    for (size_t i=0; mask->matrix && i< mask->matrix->size1; i++)
        for (int j=0; j <mask->matrix->size2; j++)
            if (apop_data_get(mask, i, j))
                addin(predict, i, j, page);
    apop_data_free(mask);
}

static apop_data *apop_predict_table_prep(apop_data *in){
    apop_data *out = apop_data_alloc( );
    apop_name_add(out->names, "<fillins>", 'h');
    apop_name_add(out->names, "row", 'c');
    apop_name_add(out->names, "col", 'c');
    apop_name_add(out->names, "page", 'c');
    apop_name_add(out->names, "value", 'c');
    for (int page=0; in; page++){
        find_missing(in, out, page);
        in = in->more;
    }
    return out;
}

/* Take a \c predict table and set the entries in the data set to the given predicted
  value. Functions for prediction and imputation use this internally, and append to your
  data a \c predict table of the right form.  For example, \c apop_ml_impute uses
  this internally.
  
  I assume that the ordering of elements in the \c predict table include everything on the
  first page, then everything on the second, et cetera. 

\param data The data set to be filled in. 
\param predict The set of fillins.
*/
static void apop_data_predict_fill(apop_data *data, apop_data *predict){
    if (!predict) return;
    int this_page_ct = 0;
    apop_data *this_page = data;
    for (int i=0; i < predict->matrix->size1; i++){
        int p = apop_data_get(predict, .row=i, .colname="page");
        while (p != this_page_ct){//entries are in sequential order, but may skip pages.
            this_page_ct++;
            this_page = this_page->more;
        }
        apop_data_set(this_page, .row= apop_data_get(predict, .row=i, .colname="row"),
                                 .col= apop_data_get(predict, .row=i, .colname="col"),
                                 .val= apop_data_get(predict, .row=i, .colname="value"));
    }
}

/////////End predict table machinery.

/** \cond doxy_ignore  Not in the apop.m4.h header --> not public. */
typedef struct {
    apop_model *base_model;
    apop_data *predict;
    int ct;
} apop_fix_params_settings;
/** \endcond */ //End of Doxygen ignore.

static void unpack(apop_data *out, apop_model *m){
    //predict table --> real param set 
   apop_fix_params_settings *mset = Apop_settings_get_group(m, apop_fix_params);
   Apop_col_tv(mset->predict, "value", p_in_tab);
   gsl_vector_memcpy(p_in_tab, m->parameters->vector);
   apop_data_predict_fill(out, mset->predict);
}

static void pack(apop_data *in, apop_model *m){
    //real param set --> predict table 
   apop_fix_params_settings *mset = Apop_settings_get_group(m, apop_fix_params);
   apop_data *predict = mset->predict;
    for(int i =0; i< predict->matrix->size1; i++){
        apop_data_set(predict, .row =i, .colname="value", .val=apop_data_get(in, 
                                                        apop_data_get(predict, .row=i, .colname="row"),
                                                        apop_data_get(predict, .row=i, .colname="col")));
        if (i< mset->ct-1 && apop_data_get(predict, .row= i+1, .colname="page") 
                                != apop_data_get(predict, .row= i, .colname="page"))
            in = in->more;
    }
   Apop_col_tv(mset->predict, "value", p_in_tab);
   if (m->parameters) //empty only during set_starting_point
       gsl_vector_memcpy(m->parameters->vector, p_in_tab);
}

//The macros generating the fixed_param_settings group's init/copy/free functions:
Apop_settings_init(apop_fix_params, 
    Apop_assert(in.base_model, "I can't fix a NULL model's parameters.");
)
Apop_settings_copy(apop_fix_params, )
Apop_settings_free(apop_fix_params, )

static long double fix_params_ll(apop_data *d, apop_model *fixed_model){
    apop_model *base_model = Apop_settings_get(fixed_model, apop_fix_params, base_model);
    unpack(base_model->parameters, fixed_model);
    return apop_log_likelihood(d, base_model);
}

static long double fix_params_p(apop_data *d, apop_model *fixed_model){
    apop_model *base_model = Apop_settings_get(fixed_model, apop_fix_params, base_model);
    unpack(base_model->parameters, fixed_model);
    return apop_p(d, base_model);
}

static long double fix_params_constraint(apop_data *data, apop_model *fixed_model){
    apop_model *base_model = Apop_settings_get(fixed_model, apop_fix_params, base_model);
    unpack(base_model->parameters, fixed_model);
    long double out = base_model->constraint(data, base_model);
    if (out) pack(base_model->parameters, fixed_model);
    return out;
}

static int fix_params_draw(double *out, gsl_rng* r, apop_model *eps){
    apop_model *base_model = Apop_settings_get(eps, apop_fix_params, base_model);
    unpack(base_model->parameters, eps);
    return base_model->draw(out, r, base_model);
}

static void fixed_est(apop_data * data, apop_model *params){
    if (!data) data = params->data;
    apop_maximum_likelihood(data, params);
    apop_model *base_model = Apop_settings_get(params, apop_fix_params, base_model);
    unpack(base_model->parameters, params);
}

static void fixed_param_show(apop_model *m, FILE *out){
    apop_fix_params_settings *mset = Apop_settings_get_group(m, apop_fix_params);
    fprintf(out, "The fill-in table:\n");
    apop_data_print(mset->predict, .output_pipe=out);
    if (!m->parameters) printf("This copy of the model has not yet been estimated.\n");
    else {
        fprintf(out, "The base model, after unpacking:\n");
        unpack(mset->base_model->parameters, m);
    }
    apop_model_print(mset->base_model, out);
}

static void fixed_param_prep(apop_data *data, apop_model *params){
    apop_model_print_vtable_add(fixed_param_show, fixed_param_model);
    apop_model_clear(data, params);
    //apop_model *base_model = Apop_settings_get(params, apop_fix_params, base_model);
    //apop_prep(data, base_model);
}

static apop_model *fixed_param_model = &(apop_model){"Fill me", .estimate=fixed_est, .p = fix_params_p, 
            .log_likelihood=fix_params_ll, .constraint= fix_params_constraint, 
            .draw=fix_params_draw, .prep=fixed_param_prep};


void set_starting_point(apop_data * in_params, apop_model * model_out, double *start, apop_data *predict_tab){
    //reshape the starting_pt to the shape of typical params
    apop_data *param_cp = apop_data_copy(in_params);
    Get_vmsizes(in_params);//tsize
    gsl_vector_view v = gsl_vector_view_array(start, tsize);
    apop_data_unpack(&(v.vector), param_cp);

    pack(param_cp, model_out); //write to the fill tab.
    apop_data_free(param_cp);
    Apop_col_tv(predict_tab, "value", ptv);
    double *new_start = malloc(ptv->size * sizeof(double)); //leak!!
    memcpy(new_start, ptv->data, ptv->size* sizeof(double));
    Apop_settings_add(model_out, apop_mle, starting_pt, new_start);
}

/** Produce a model based on another model, but with some of the parameters fixed at a given value. 
  
You will send me the model whose parameters you want fixed, with the \c parameters element
set as follows. For the fixed parameters, simply give the values to which they will
be fixed. Set the free parameters to \c NaN.

For example, here is a Binomial distribution with a fixed \f$n=30\f$ but \f$p_1\f$ allowed to float freely:

\code
apop_model *bi30 = apop_model_fix_params(apop_model_set_parameters(apop_binomial, 30, NAN));
Apop_model_add_group(bi30, apop_mle, .starting_pt=(double[]){.5}); // The Binomial doesn't like the
                                                                   //  default starting point of 1.
apop_model *out = apop_estimate(your_data, bi30);
\endcode

The output is an \c apop_model that can be estimated, Bayesian updated, et cetera.

\li Rather than using this model, you may simply want a now-filled-in copy of the
    original model. Use \ref apop_model_fix_params_get_base to retrieve the original model's parameters.
\li The \c estimate method always uses an MLE, and it never calls the base model's \c estimate method.
\li If the input model has an \ref apop_mle_settings group attached, I'll use them for the \c
    estimate method. Otherwise, I'll set my own.
\li If the parameter input has non-NaN values at the free parameters, then I'll use
    those as the starting point for any MLE search; the defaults for the variables without
    fixed values starts from <b>1</b> as usual.
\li I do check the \c more pointer of the \c parameters for additional pages and <tt>NaN</tt>s on those pages.

Here is a sample program. It produces a few thousand draws from a Multivariate Normal distribution,
and then tries to recover the means given a var/covar matrix fixed at the correct variance.

\include fix_params.c
  
\param model_in   The base model
\return a model that can be used like any other, with the given params fixed or free.
*/
apop_model * apop_model_fix_params(apop_model *model_in){
    Nullcheck_mp(model_in, NULL)
    apop_model *model_out  = Apop_model_copy_set(fixed_param_model,
                                apop_fix_params, .base_model = model_in);

    apop_data *predict_tab; //Keep the predict tab as a data set and in the settings struct
    predict_tab = apop_predict_table_prep(model_in->parameters);
    Apop_stopif (!predict_tab || !predict_tab->matrix|| !predict_tab->matrix->size2,
        apop_data_free(predict_tab);
        apop_model_free(model_out);
        return apop_model_copy(model_in);
        , 1, "No free parameters (which would be marked with a NaN). "
                "Returning a copy of the input model."
    );
    apop_settings_set(model_out, apop_fix_params, predict, predict_tab);
    model_out->vsize = predict_tab->matrix->size1;
    model_out->dsize = model_in->dsize;

    if (Apop_settings_get_group(model_in, apop_mle)){
        apop_settings_copy_group(model_out, model_in, "apop_mle");
        double *start = Apop_settings_get(model_in, apop_mle, starting_pt);
        if (start) set_starting_point(model_in->parameters, model_out, start, predict_tab);
    }
    else Apop_model_add_group(model_out, apop_mle, .method="PR cg",
                                     .step_size=1, .tolerance=0.2);
    if (Apop_settings_get_group(model_in, apop_parts_wanted))
        apop_settings_copy_group(model_out, model_in, "apop_parts_wanted");


    #define cut_if_missing(method) if (!model_in->method) model_out->method = NULL;
    cut_if_missing(p);
    cut_if_missing(draw);
    cut_if_missing(constraint);
    cut_if_missing(log_likelihood);
    snprintf(model_out->name, 100, "%s, with some params fixed", model_in->name);
    return model_out;
}

/** The \ref apop_model_fix_params function produces a model that has only the non-fixed
  parameters of the model. After estimation of the fixed-parameter model, this function
  fills the \c parameters element of the base model and returns a pointer to the
  base model.
*/
apop_model * apop_model_fix_params_get_base(apop_model *fixed_model){
    apop_model *base_model = Apop_settings_get(fixed_model, apop_fix_params, base_model);
    unpack(base_model->parameters, fixed_model);
    return base_model;
}