File: df_pad.m

package info (click to toggle)
octave-dataframe 1.2.0-5
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 764 kB
  • sloc: makefile: 123
file content (199 lines) | stat: -rw-r--r-- 7,565 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
function df = df_pad(df, dim, n, coltype)
  %# function resu = df_pad(df, dim, n, coltype = [])
  %# given a dataframe, insert n rows or columns, and adjust everything
  %# accordingly. Coltype is a supplemental argument:
  %# dim = 1 => not used
  %# dim = 2 => type of the added column(s)
  %# dim = 3 => index of columns receiving a new sheet (default: all)

  %% Copyright (C) 2009-2017 Pascal Dupuis <cdemills@gmail.com>
  %%
  %% This file is part of the dataframe package for Octave.
  %%
  %% This package is free software; you can redistribute it and/or
  %% modify it under the terms of the GNU General Public
  %% License as published by the Free Software Foundation;
  %% either version 2, or (at your option) any later version.
  %%
  %% This package is distributed in the hope that it will be useful,
  %% but WITHOUT ANY WARRANTY; without even the implied
  %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  %% PURPOSE.  See the GNU General Public License for more
  %% details.
  %%
  %% You should have received a copy of the GNU General Public
  %% License along with this package; see the file COPYING.  If not,
  %% see <http://www.gnu.org/licenses/>.
  
  if (nargin < 4), coltype = []; end
  try
    NA = NA;
  catch
    NA = NaN;
  end
  
  switch dim
    case 1
      if (~isempty (df.x_name{1})),
        if (length (df.x_name{1}) < df.x_cnt(1)+n)
          %# generate a name for the new row(s)
          df.x_name{1}(df.x_cnt(1)+(1:n), 1) = {'_'};
          df.x_over{1}(1, df.x_cnt(1)+(1:n), 1) = true;
        end
      end
      %# complete row indexes: by default, row number.
      if (isempty (df.x_ridx))
        (1:n);
        dummy = ans(:);
      else
        dummy = vertcat (df.x_ridx, repmat (size (df.x_ridx, 1)+(1:n).', ...
                                           1, size (df.x_ridx, 2))); 
      end
      df.x_ridx = dummy; 
      %# pad every line
      for indi = (1:min (size (df.x_data, 2), df.x_cnt(2)))
        neff = n + df.x_cnt(1) - size (df.x_data{indi}, 1);
        if (neff > 0)
          m = max(1, size (df.x_data{indi}, 2));
          switch df.x_type{indi}
            case {'char'}
              %# there is no 'string NA'
              dummy = {}; dummy(1:neff, 1:m) = 'NA';
              dummy = vertcat (df.x_data{indi}, dummy);
            case { 'double'}
              dummy = vertcat (df.x_data{indi}, repmat (NA, neff, m));
            %# there is no 'NA' with logical values, avoid casting error
            case {'logical'}
              dummy = vertcat (df.x_data{indi}, repmat (false, neff, m));
            otherwise
              dummy = cast (vertcat (df.x_data{indi}, repmat (NA, neff, m)), ...
                            df.x_type{indi});
          end
          df.x_data{indi} = dummy;
          if (isempty (df.x_rep{indi}))
            df.x_rep{indi} = 1;
          end
        end
      end
      df.x_cnt(1) = df.x_cnt(1) + n;

    case 2
      %# create new columns
      if (isempty (coltype))
        error ('df_pad: dim equals 2, and coltype undefined');
      end
      if (length (n) > 1) %#second value is an offset
        indc =  n(2); n = n(1);
        if (indc < df.x_cnt(2)),
          %# shift to the right
          df.x_name{2}(n + (indc+1:end)) =  df.x_name{2}(indc+1:end);
          df.x_over{2}(n + (indc+1:end)) =  df.x_over{2}(indc+1:end);
          dummy = cstrcat (repmat ('_', n, 1), ...
                           strjust (num2str(indc + (1:n).'), 'left'));
          df.x_name{2}(indc + (1:n)) = cellstr (dummy);   
          df.x_over{2}(indc + (1:n)) = true;
          df.x_type(n+(indc+1:end)) = df.x_type(indc+1:end);
          df.x_type(indc + (1:n)) = NA;
          df.x_data(n + (indc+1:end)) = df.x_data(indc+1:end);
          df.x_rep(n + (indc+1:end)) = df.x_rep(indc+1:end);
          df.x_data(indc + (1:n)) = NA;
          df.x_rep(indc + (1:n)) = 1;
        end
      else
        %# add new values after the last column
        indc = min (size (df.x_data, 2), df.x_cnt(2)); 
      end
      if (~isa (coltype, 'cell')) coltype = {coltype}; end
      if (isscalar (coltype) && n > 1)
        coltype = repmat (coltype, 1, n);
      end
      for indi = (1:n)
        switch coltype{indi}
          case {'char'}
            dummy = {repmat(NA, df.x_cnt(1), 1) }; 
            dummy(:, 1) = '_';
          case { 'double'}
            dummy = repmat (NA, df.x_cnt(1), 1);
          case {'logical'} %# there is no NA in logical type
            dummy = repmat (false, df.x_cnt(1), 1);
          otherwise
            try
              dummy = cast (repmat (NA, df.x_cnt(1), 1), coltype{indi});
            catch
              %# There was an issue -- transfer coltype to data
              if (indc+indi > df.x_cnt(2))
                dummy = {coltype{indi}}; coltype{indi} = 'char';
                if (df.x_cnt(1) < 1)
                  %# nothing defined yet -- pad with one line
                  df.x_type{indc+indi} = coltype{indi};
                  df = df_pad (df, 1, 1);
                end
              else
                dummy = sprintf ('Trying to change type of column %d, which was %s, to %s', ...
                                 indc+indi, df.x_type{indi}, coltype{indi});
                error (dummy);
              end
            end  
        end
        df.x_data{indc+indi} = dummy;
        df.x_rep{indc+indi} = 1;
        df.x_type{indc+indi} = coltype{indi};
      end
   
      if (size (df.x_data, 2) > df.x_cnt(2)),
        df.x_cnt(2) =  size (df.x_data, 2);
      end
      if (length (df.x_name{2}) < df.x_cnt(2)),
        %# generate a name for the new column(s)
        dummy = cstrcat (repmat ('_', n, 1), ...
                         strjust (num2str (indc + (1:n).'), 'left'));
        df.x_name{2}(indc + (1:n)) = cellstr (dummy);
        df.x_over{2}(1, indc + (1:n)) = true;
      end   
      
    case 3
      if (n <= 0) return; end
      if (isempty (coltype)),
        coltype = 1:df.x_cnt(2);
      end
      dummy = max (n+cellfun (@length, df.x_rep(coltype)));
      if (size (df.x_ridx, 2) < dummy),
        df.x_ridx(:, end+1:dummy) = NA;
      end
      for indi = (coltype)
        switch df.x_type{indi}
          case {'char'}
            if (isa (df.x_data{indi}, 'char')) %# pure char
              dummy = horzcat (df.x_data{indi}(:, df.x_rep{indi}), ...
                               repmat({NA}, df.x_cnt(1), 1));
              keyboard
            else
              dummy =  horzcat (df.x_data{indi}(:, df.x_rep{indi}), ...
                                repmat({NA}, df.x_cnt(1), 1));
            end
          case {'double'}
            dummy = horzcat (df.x_data{indi}(:, df.x_rep{indi}), ...
                             repmat (NA, df.x_cnt(1), 1));
          case {'logical'}
            %# there is no logical 'NA' -- fill empty elems with false
            dummy = horzcat (df.x_data{indi}(:, df.x_rep{indi}), ...
                             repmat (false, df.x_cnt(1), 1));
          otherwise
            dummy = cast (horzcat (df.x_data{indi}(:, df.x_rep{indi}), ...
                                   repmat (NA, df.x_cnt(1), 1)), ...
                          df.x_type{indi});
        end
        df.x_data{indi} = dummy;
        df.x_rep{indi} = [df.x_rep{indi} length(df.x_rep{indi})+ones(1, n)];
        try
          assert (size(df.x_data{indi}, 2), max(df.x_rep{indi}))
        catch
          keyboard
        end
      end
      df =  df_thirddim (df);
    otherwise
      error ('Invalid dimension in df_pad');
  end

end