File: df_name2idx.m

package info (click to toggle)
octave-dataframe 1.2.0-6
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 784 kB
  • sloc: makefile: 126
file content (155 lines) | stat: -rw-r--r-- 5,126 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
function [idx, nelem, subs, mask] = df_name2idx(names, subs, count, dimname, missingOK);

  %# This is a helper routine to translate rownames or columnames into
  %# real index. Input: names, a char array, and subs, a cell array as
  %# produced by subsref and similar. This routine can also detect
  %# ranges, two values separated by ':'. On output, subs is
  %# 'sanitised' from names, and is either a vector, either a single ':'

  %% Copyright (C) 2009-2017 Pascal Dupuis <cdemills@gmail.com>
  %%
  %% This file is part of the dataframe package for Octave.
  %%
  %% This package is free software; you can redistribute it and/or
  %% modify it under the terms of the GNU General Public
  %% License as published by the Free Software Foundation;
  %% either version 2, or (at your option) any later version.
  %%
  %% This package is distributed in the hope that it will be useful,
  %% but WITHOUT ANY WARRANTY; without even the implied
  %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  %% PURPOSE.  See the GNU General Public License for more
  %% details.
  %%
  %% You should have received a copy of the GNU General Public
  %% License along with this package; see the file COPYING.  If not,
  %% see <http://www.gnu.org/licenses/>.
  
  if (nargin < 5) missingOK = false; end
  
  %# regexp idea of 'word boundary' changed between 3.6 and 3.7
  persistent wbs wbe;
  
  if (isempty (wbs))
    if (isempty ( regexp ('This is a test', '\<is\>')))
       [wbs, wbe] = deal ('\b');
    else
      wbs = '\<'; wbe = '\>';
    end
  end

  if (isempty (subs))
    %# not caring about rownames ? Avoid generating an error.
    idx = []; nelem = 0; return
  end

  if (~isa (dimname, 'char'))
    switch dimname
      case 1
        dimname = 'row';
      case 2
        dimname = 'column';
      case 3
        dimname = 'page';
      otherwise
        error ('Unknown dimension %d', dimname);
    end
  end

  if (isa (subs, 'char')),
    orig_name = subs;
    if (1 == size (subs, 1))
      if (strcmp(subs, ':')) %# range operator
        idx = 1:count; nelem = count;
        return
      end
    end
    subs = cellstr (subs);
  else
    if (~isvector(subs))
      %# yes/no ?
      %# error('Trying to access column as a matrix');
    end
    switch (class (subs))
      case {'cell'}
        orig_name = char (subs);
      case {'dataframe'}
        orig_name = 'elements indexed by a dataframe';
      otherwise
        orig_name = num2str (subs);
    end
  end

  if (isa (subs, 'cell'))
    subs = subs(:); idx = []; mask = logical (zeros (size (subs, 1), 1));
    %# translate list of variables to list of indices
    for indi = (1:size (subs, 1))
      %# regexp doesn't like empty patterns
      if (isempty (subs{indi})) continue; end
      %# convert  from standard pattern to regexp pattern
      subs{indi} = regexprep (subs{indi}, '([^\.\\])(\*|\?)', '$1.$2');
      %# quote repetition ops at begining of line, otherwise the regexp
      %# will stall forever/fail
      subs{indi} = regexprep (subs{indi}, ...
                              '^([\*\+\?\{\}\|])', '\\$1');
      %# detect | followed by EOL 
      subs{indi} = regexprep (subs{indi}, '([^\\])\|$', '$1\\|');
      if (0 == index (subs{indi}, ':'))
         %# if there's no special operator, make match strict
        if (isempty (regexp (subs{indi}, '[\.\*\+\?\{\}\(\)\[\]\^\$\\]')))
          subs{indi}  = [wbs subs{indi} wbe];
        end
        for indj = (1:min (length (names), count)) %# sanity check
          if (~isempty (regexp (names{indj}, subs{indi})))
            idx = [idx indj]; mask(indi) = true; dummy = true;
          end
        end
      else
        dummy = strsplit (subs{indi}, ':');
        ind_start = 1;
        if (~isempty (dummy{1}))
          ind_start = sscanf (dummy{1}, '%d');
          if (isempty (ind_start))
            ind_start = 1;
            for indj = (1:min(length (names), count)) %# sanity check
              if (~isempty (regexp (names{indj}, subs{indi}))),
                ind_start = indj; break; %# stop at the first match
              end
            end
          end
        end
        
        if (isempty (dummy{2}) || strcmp (dummy{2}, 'end'))
          ind_stop = count;
        else
          ind_stop = sscanf(dummy{2}, '%d');
          if (isempty (ind_stop))
            ind_stop = 1;
            for indj = (min (length (names), count):-1:1) %# sanity check
              if (~isempty (regexp (names{indj}, subs{indi})))
                ind_stop = indj; break; %# stop at the last match
              end
            end
          end
        end
        idx = [idx ind_start:ind_stop];
      end
    end
    if (isempty (idx) && ~missingOK)
      dummy = sprintf ('Unknown %s name while searching for %s', ...
                       dimname, orig_name);
      error (dummy);
    end
  elseif (isa (subs, 'logical'))
    idx = 1:length (subs(:)); idx = reshape (idx, size (subs));
    idx(~subs) = []; mask = subs;
  elseif (isa (subs, 'dataframe'))
    idx = subsindex (subs, 1);
  else
    idx = subs;
  end

  subs = idx;
  nelem = length (idx);
  
end