File: makedataset.m

package info (click to toggle)
dynare 6.3-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 67,632 kB
  • sloc: cpp: 79,090; ansic: 28,916; objc: 12,430; yacc: 4,528; pascal: 1,993; lex: 1,441; sh: 1,121; python: 634; makefile: 626; lisp: 163; xml: 18
file content (292 lines) | stat: -rw-r--r-- 11,934 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
function [dataset_, dataset_info, newdatainterface] = makedataset(options_, initialconditions, gsa_flag)
%[dataset_, dataset_info, newdatainterface] = makedataset(options_, initialconditions, gsa_flag)
% Initialize a dataset as a dseries object.
% INPUTS
% ======
%
%     options_              [struct]    Structure of options built by Dynare's preprocessor.
%     initialconditions     [double]    number of lags for VAR and DSGE_VAR
%     gsa_flag              [integer]   1: GSA, 0: other
%
% OUTPUTS
% =======
%
%     dataset_       [dseries]  The dataset.
%     dataset_info   [struct]   Various informations about the dataset (descriptive statistics and missing observations).
%
% EXAMPLE
% =======
%
%     [dataset_, dataset_info] = makedataset(options_) ;
%
%
% See also dynare_estimation_init

% Copyright © 2014-2023 Dynare Team
%
% This file is part of Dynare.
%
% Dynare is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% Dynare is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with Dynare.  If not, see <https://www.gnu.org/licenses/>.


if nargin<3
    gsa_flag = 0;
end

if nargin<2 || isempty(initialconditions)
    % If a the sample is to be used for the estimation of a VAR or DSGE-VAR model
    % the second argument must be a strictly positive integer (the number of lags).
    initialconditions = 0;
end

if isempty(options_.datafile) && isempty(options_.dataset.file) && isempty(options_.dataset.series)
    if gsa_flag
        dataset_ = dseries();
        dataset_info = struct('missing', struct('state', 0, 'aindex', [], 'vindex', [], 'number_of_observations', NaN, 'no_more_missing_observations', NaN), ...
                             'descriptive', struct('mean', [], 'covariance', [], 'correlation', [], 'autocovariance', []));
        newdatainterface=0;
        return
    else
        error('makedataset: datafile option is missing!')
    end
end

if isempty(options_.datafile) && ~isempty(options_.dataset.file)
    datafile = options_.dataset.file;
    newdatainterface = 1;
elseif isempty(options_.datafile) && ~isempty(options_.dataset.series)
    try
        dseriesobjectforuserdataset = evalin('base', options_.dataset.series);
    catch
        error('makedataset: %s is unknown!', options_.dataset.series)
    end
    if ~isdseries(dseriesobjectforuserdataset)
        error('makedataset: %s has to be a dseries object!', options_.dataset.series)
    end
    datafile = [];
    newdatainterface = 1;
elseif ~isempty(options_.datafile) && isempty(options_.dataset.file)
    datafile = options_.datafile;
    newdatainterface = 0;
elseif ~isempty(options_.datafile) && ~isempty(options_.dataset.file)
    error('makedataset: You cannot simultaneously use the data command and the datafile option (in the estimation command)!')
else
    error('makedataset: You have to specify the datafile!')
end

% Check extension.
if ~isempty(datafile)
    allowed_extensions = {'m','mat','csv','xls','xlsx'};
    datafile_extension = get_file_extension(datafile);
    if isempty(datafile_extension)
        available_extensions = {}; j = 1;
        [datafilepath, datafilename] = fileparts(datafile);
        if isempty(datafilepath)
            datafilepath = '.';
        end
        dircontent = dir(datafilepath);
        for i=1:length(allowed_extensions)
            if ~isempty(strmatch([datafilename '.' allowed_extensions{i}],{dircontent.name},'exact'))
                available_extensions(j) = {allowed_extensions{i}};
                j = j+1;
            end
        end
        if isempty(available_extensions)
            error('makedataset: I can''t find a datafile (with allowed extension m, mat, csv, xls or xlsx)!')
        end
        if length(available_extensions)>1
            error(sprintf(['makedataset: You did not specify an extension for the datafile, but more than one candidate ' ...
                           'is available in the designated folder!\nPlease, add an extension to the datafile ' ...
                           '(m, mat, csv, xls or xlsx are permitted extensions).']));
        end
        datafile = [datafile '.' available_extensions{1}];
    end
end

% Load the data in a dseries object.
if ~isempty(datafile)
    if ~(newdatainterface==0 && ((length(datafile)>2 && strcmp(datafile(end-1:end),'.m')) || (length(datafile)>4 && strcmp(datafile(end-3:end),'.mat'))))
        dataset_ = dseries(datafile);
    else
        if length(datafile)>2 && strcmp(datafile(end-1:end),'.m')
            % Load an m file with the old interface.
            dataset_ = load_m_file_data_legacy(datafile, options_.varobs);
        elseif length(datafile)>4 && strcmp(datafile(end-3:end),'.mat')
            % Load a mat file with the old interface.
            dataset_ = load_mat_file_data_legacy(datafile, options_.varobs);
        end
    end
else
    dataset_ = dseriesobjectforuserdataset;
    clear('dseriesobjectforuserdataset');
end

if size(unique(dataset_.name),1)~=size(dataset_.name,1)
    error('makedataset: the data set must not contain two variables with the same name and must not contain empty/non-named columns.')
end

% Select a subset of the variables.
dataset_ = dataset_{options_.varobs{:}};

% Apply log function if needed.
if options_.loglinear && ~options_.logdata
    dataset_ = dataset_.log();
end

% Test if an initial period (different from its default value) is explicitely defined in the datafile.
if isequal(dataset_.init, dates(1,1))
    dataset_default_initial_period = 1;
else
    dataset_default_initial_period = 0;
end

%  Test if an initial period (different from its default value) is explicitely defined in the mod file with the set_time command.
if ~isdates(options_.initial_period) && isnan(options_.initial_period)
    set_time_default_initial_period = 1;
else
    set_time_default_initial_period = 0;
end

if ~set_time_default_initial_period && dataset_default_initial_period
    % Overwrite the initial period in dataset (it was set to default).
    % Note that the updates of freq and time members are auto-magically
    % done by dseries::subsasgn overloaded method.
    dataset_.init = options_.initial_period;
end

if set_time_default_initial_period && ~dataset_default_initial_period
    % Overwrite the global initial period defined by set_time (it was set to default).
    options_.initial_period = dataset_.init;
end

if ~set_time_default_initial_period && ~dataset_default_initial_period
    % Check if dataset.init and options_.initial_period are identical.
    if options_.initial_period<dataset_.init
        error('makedataset: The date as defined by the set_time command is not consistent with the initial period in the database!')
    end
end

% Set firstobs, lastobs and nobs
if newdatainterface
    if isempty(options_.dataset.firstobs)
        % first_obs option was not used in the data command.
        firstobs = dataset_.init;
    else
        firstobs = options_.dataset.firstobs;
    end
    if isnan(options_.dataset.nobs)
        % nobs option was not used in the data command.
        if isempty(options_.dataset.lastobs)
            % last_obs option was not used in the data command.
            nobs = dataset_.nobs;
            lastobs = dataset_.dates(end);
        else
            lastobs = options_.dataset.lastobs;
            nobs = lastobs-firstobs+1;
        end
    else
        nobs = options_.dataset.nobs;
        if isempty(options_.dataset.lastobs)
            % last_obs option was not used in the data command.
            lastobs = firstobs+(nobs-1);
        else
            % last_obs and nobs were used in the data command. Check that they are consistent (with firstobs).
            if ~isequal(lastobs,firstobs+(nobs-1))
                error('makedataset: Options last_obs (%s), first_obs (%s) and nobs (%s) are not consistent!',char(lastobs),char(firstobs),num2str(nobs));
            end
        end
    end
else
    if isnan(options_.first_obs)
        firstobs = dataset_.init;
    else
        firstobs = dataset_.dates(options_.first_obs);
    end
    if isnan(options_.nobs)
        lastobs = dataset_.dates(end);
        nobs = lastobs-firstobs+1;
    else
        nobs = options_.nobs;
        lastobs = firstobs+(nobs-1);
    end
end

% Add initial conditions if needed
FIRSTOBS = firstobs-initialconditions;

% Check that firstobs belongs to dataset_.dates
if firstobs<dataset_.init
    error('makedataset: first_obs (%s) cannot be less than the first date in the dataset (%s)!',char(firstobs),char(dataset_.init))
end

% Check that FIRSTOBS belongs to dataset_.dates
if initialconditions && FIRSTOBS<dataset_.init
    error('makedataset: first_obs (%s) - %i cannot be less than the first date in the dataset (%s)!\nReduce the number of lags in the VAR model or increase the value of first_obs\nto at least first_obs=%i.', char(firstobs), initialconditions, char(dataset_.init),initialconditions+1);
end

% Check that lastobs belongs to dataset_.dates...
if newdatainterface
    if lastobs>dataset_.dates(end)
        error('makedataset: last_obs (%s) cannot be greater than the last date in the dataset (%s)!',char(lastobs),char(dataset_.dates(end)))
    end
else
    % ...  or check that nobs is smaller than the number of observations in dataset_.
    if FIRSTOBS>dataset_.dates(1)
        if FIRSTOBS+nobs-1>dataset_.dates(end)
            error('makedataset: Given first_obs=%u and %u total observations in the dataset, the current nobs of %s must not be greater than %s!', options_.first_obs, dataset_.nobs, num2str(nobs), num2str(dataset_.nobs-find(dataset_.dates==FIRSTOBS)+1))
        end
    else
        if nobs>dataset_.nobs
            error('makedataset: nobs (%s) cannot be greater than the last date in the dataset (%s)!', num2str(nobs), num2str(dataset_.nobs))
        end
    end
end

% Select a subsample.
dataset_ = dataset_(FIRSTOBS:lastobs);

% Initialize dataset_info structure.
dataset_info = struct('missing', struct('state', NaN, 'aindex', [], 'vindex', [], 'number_of_observations', NaN, 'no_more_missing_observations', NaN), ...
                     'descriptive', struct('mean', [], 'covariance', [], 'correlation', [], 'autocovariance', []));

% Fill dataset_info.missing if some observations are missing
dataset_info.missing.state = isanynan(dataset_.data);
if dataset_info.missing.state
    [dataset_info.missing.aindex, dataset_info.missing.number_of_observations, dataset_info.missing.no_more_missing_observations, dataset_info.missing.vindex] = ...
        describe_missing_data(dataset_.data);
else
    dataset_info.missing.aindex = num2cell(transpose(repmat(1:dataset_.vobs,dataset_.nobs,1)),1);
    dataset_info.missing.no_more_missing_observations = 1;
end

% Compute the empirical mean of the observed variables.
dataset_info.descriptive.mean = nanmean(dataset_.data,1);

% Compute the empirical covariance matrix of the observed variables.
dataset_info.descriptive.covariance = nancovariance(dataset_.data);

% Compute the empirical correlation matrix of the observed variables.
normalization_matrix = diag(1./sqrt(diag(dataset_info.descriptive.covariance)));
dataset_info.descriptive.correlation = normalization_matrix*dataset_info.descriptive.covariance*normalization_matrix;

% Compute autocorrelation function.
dataset_info.descriptive.autocovariance = nanautocovariance(dataset_.data, options_.ar);

% Save raw data.
dataset_info.rawdata = dataset_.data;

% Prefilter the data if needed (remove the mean).
if isequal(options_.prefilter, 1)
    dataset_ = dataset_.detrend();
end