File: parsecell.m

package info (click to toggle)
octave-io 2.4.12-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 1,552 kB
  • sloc: objc: 2,428; cpp: 547; makefile: 134; sh: 23
file content (164 lines) | stat: -rw-r--r-- 5,889 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
## Copyright (C) 2009-2018 Philip Nienhuis <prnienhuis at users.sf.net>
##
## This program is free software; you can redistribute it and/or modify it under
## the terms of the GNU General Public License as published by the Free Software
## Foundation; either version 3 of the License, or (at your option) any later
## version.
##
## This program is distributed in the hope that it will be useful, but WITHOUT
## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
## details.
##
## You should have received a copy of the GNU General Public License along with
## this program; if not, see <http://www.gnu.org/licenses/>.

## -*- texinfo -*-
## @deftypefn {Function File} [ @var{numarr}, @var{txtarr}, @var{lim} ] = parsecell (@var{rawarr})
## @deftypefnx {Function File} [ @var{numarr}, @var{txtarr}, @var{lim} ] = parsecell (@var{rawarr}, @var{limits})
##
## Divide a heterogeneous 2D cell array into a 2D numeric array and a
## 2D cell array containing only strings. Both returned arrays are
## trimmed from empty outer rows and columns.
## This function is particularly useful for parsing cell arrays returned
## by functions reading spreadsheets (e.g., xlsread, odsread).
##
## Optional return argument @var{lim} contains two field with the outer
## column and row numbers of @var{numarr} and @var{txtarr} in the
## original array @var{rawarr}.
## Optional input argument @var{limits} can either be the spreadsheet
## data limits returned in the spreadsheet file pointer struct
## (field xls.limits or ods.limits), or the file ptr struct itself.
## If one of these is specified, optional return argument @var{lim}
## will contain the real spreadsheet row & column numbers enclosing
## the origins of the numerical and text data returned in @var{numarr}
## and @var{txtarr}.
##
## Examples:
##
## @example
##   [An, Tn] = parsecell (Rn);
##   (which returns the numeric contents of Rn into array An and the
##    text data into array Tn)
## @end example
##
## @example
##   [An, Tn, lims] = parsecell (Rn, xls.limits);
##   (which returns the numeric contents of Rn into array An and the
##    text data into array Tn.)
## @end example
##
## @seealso {xlsread, odsread, xls2oct, ods2oct}
##
## @end deftypefn

## Author: Philip Nienhuis
## Created: 2009-12-13

function [ numarr, txtarr, lim ] = parsecell (rawarr, arg2=[])

  if (isstruct (arg2))
    ## Assume a file ptr has been supplied
    if (isfield (arg2, "limits"))
      rawlimits = arg2.limits;
    else
      warning ("Invalid file ptr supplied to parsecell() - limits ignored.\n");
    endif
  else
    rawlimits = arg2;
  endif

  lim = struct ( "numlimits", [], "txtlimits", []);

  numarr = [];
  txtarr = {};
 
  if (! isempty (rawarr))
    ## Valid data returned. Divide into numeric & text arrays
    no_txt = 0; no_num = 0;
    if (all (all (cellfun (@isnumeric, rawarr))))
      numarr = num2cell (rawarr); 
      no_txt = 1;
    elseif (iscellstr (rawarr))
      txtarr = cellstr (rawarr);
      no_num = 1;
    endif
    ## Prepare parsing
    [nrows, ncols] = size (rawarr);
 
    ## Find text entries in raw data cell array
    txtptr = cellfun ("isclass", rawarr, "char");
    if (~no_txt)
      ## Prepare text array. Create placeholder for text cells
      txtarr = cell (size (rawarr));
      txtarr(:) = {""};
      if (any (any (txtptr)))
        ## Copy any text cells found into place holder
        txtarr(txtptr) = rawarr(txtptr);
        ## Clean up text array (find leading / trailing empty
        ## rows & columns)
        irowt = 1;
        while (! any (txtptr(irowt, :))); irowt++; endwhile
        irowb = nrows;
        while (! any (txtptr(irowb, :))); irowb--; endwhile
        icoll = 1;
        while (! any (txtptr(:, icoll))); icoll++; endwhile
        icolr = ncols;
        while (! any (txtptr(:, icolr))); icolr--; endwhile
        ## Crop textarray
        txtarr = txtarr(irowt:irowb, icoll:icolr);
        lim.txtlimits = [icoll, icolr; irowt, irowb];
        if (! isempty (rawlimits))
          correction = [1; 1];
          lim.txtlimits(:,1) = lim.txtlimits(:,1) + rawlimits(:,1) - correction;
          lim.txtlimits(:,2) = lim.txtlimits(:,2) + rawlimits(:,1) - correction;
        endif
      else
        ## If no text cells found return empty text array
        txtarr = {};
      endif
    endif

    if (! no_num)
      ## Prepare numeric array. Set all text & empty cells to NaN.
      ## First get their locations
      emptr = cellfun ("isempty", rawarr);
      emptr(find (txtptr)) = 1;
      if (all (all (emptr)))
        numarr= [];
      else
        ## Find leading & trailing empty rows
        irowt = 1;
        while (all(emptr(irowt, :))); irowt++; endwhile
        irowb = nrows;
        while (all(emptr(irowb, :))); irowb--; endwhile
        icoll = 1;
        while (all(emptr(:, icoll))); icoll++; endwhile
        icolr = ncols;
        while (all(emptr(:, icolr))); icolr--; endwhile

        ## Pre-crop rawarr
        rawarr = rawarr (irowt:irowb, icoll:icolr);
        ## Build numerical array
        numarr = zeros (irowb-irowt+1, icolr-icoll+1);
        ## Watch out for scalar (non-empty) numarr where emptr = 0
        if (sum (emptr(:)) > 0)
          numarr(emptr(irowt:irowb, icoll:icolr)) = NaN;
        endif
        numarr(! emptr(irowt:irowb, icoll:icolr)) = ...
               cell2mat (rawarr(~emptr(irowt:irowb, icoll:icolr)));
        ## Save limits
        lim.numlimits = [icoll, icolr; irowt, irowb];
        if (! isempty (rawlimits))
          correction = [1; 1];
          lim.numlimits(:,1) = lim.numlimits(:,1) + rawlimits(:,1) - correction(:);
          lim.numlimits(:,2) = lim.numlimits(:,2) + rawlimits(:,1) - correction(:);
        endif
      endif
    endif

    lim.rawlimits = rawlimits;
 
  endif

endfunction