File: xlsopen.m

package info (click to toggle)
octave-io 2.7.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,808 kB
  • sloc: objc: 2,092; cpp: 546; python: 438; makefile: 204; xml: 23; sh: 20
file content (466 lines) | stat: -rw-r--r-- 18,803 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
## Copyright (C) 2009-2025 Philip Nienhuis
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Octave; see the file COPYING.  If not, see
## <http://www.gnu.org/licenses/>.7

## -*- texinfo -*-
## @deftypefn {Function File} @var{xls} = xlsopen (@var{filename})
## @deftypefnx {Function File} @var{xls} = xlsopen (@var{filename}, @var{readwrite})
## @deftypefnx {Function File} @var{xls} = xlsopen (@var{filename}, @var{readwrite}, @var{reqintf})
## @deftypefnx {Function File} @var{xls} = xlsopen (@var{filename}, @var{readwrite}, @var{reqintf}, @var{verb})
## Get a pointer to a spreadsheet in memory in the form of return argument
## (file pointer struct) @var{xls}.
##
## Calling xlsopen without specifying a return argument is fairly useless
## and considered an error!  After processing the spreadsheet, the file
## pointer must be explicitly closed by calling xlsclose() to release possibly
## large amounts of RAM.
##
## @var{filename} should be a valid spreadsheet file name (including
## extension); see "help xlsread" for an overview of supported spreadsheet file
## formats.
##
## If @var{readwrite} is set to 0 (default value) or omitted, the spreadsheet
## file is opened for reading.  If @var{readwrite} is set to true or 1, a
## spreadsheet file is opened (or created) for reading & writing.
##
## Optional input argument @var{reqintf} can be used to override the
## spreadsheet I/O interface (see below) that otherwise would automatically
## be selected by xlsopen.  In most situations this parameter is unneeded as
## xlsopen automatically selects the most useful interface present, depending
## on installed external support software and requested file type.  A
## user-specified interface selection can be reset to default by entering
## a numeric value of -1.
##
## If a value of 1 or true is entered for @var{verb}, xlsopen returns info about
## the spreadsheet I/O interfaces that were found and/or are requested and
## active.  The default value is false (no info on interfaces is shown).
##
## Spreadsheet I/O Interfaces
## ==========================
## xlsopen works with interfaces, which are links to support software, mostly
## external. @*
## The built-in 'OCT' interface needs no external software and allows
## I/O from/to OOXML (Excel 2007 and up), ODS 1.2 and Gnumeric. @* For all other
## spreadsheet formats, or if you want more speed and/or more flexibility,
## additional external software is required.  See "help xlsread" for more info. @*
## Currently implemented interfaces to external SW are (in order of preference)
## 'COM' (Excel/COM), 'POI' (Java/Apache POI), 'JXL' (Java/JExcelAPI), 'OXS'
## (Java/OpenXLS), 'UNO' (Java/OpenOffice.org - EXPERIMENTAL!), 'OTK'
## (ODF Toolkit), 'JOD' (jOpendocument); see below:
##
## @table @asis
## @item xls and .xlsx:
## One or more of (1) a Java JRE plus Apache POI >= 3.5, and/or JExcelAPI
## and/or OpenXLS, and/or OpenOffice.org (or clones) installed on your computer
## + proper javaclasspath set, or (2 - Windows only) OF-windows package and
## MS-Excel.  These interfaces are referred to as POI, JXL, OXS, UNO and COM,
## resp., and are preferred in that order by default (depending on presence of
## the pertinent support SW).  Currently the OCT interface has the lowest
## priority. @*
## Excel'95 spreadsheets (BIFF5) can only be read using the JXL (JExcelAPI),
## UNO (Open-/LibreOffice), and  COM (Excel-ActiveX) interfaces.
##
## @item .ods, .sxc:
## A Java JRE plus one or more of (ODFtoolkit (version 0.7.5 or 0.8.6 - 0.8.8)
## & xercesImpl v.2.9.1), jOpenDocument, or OpenOffice.org (or clones)
## installed on your computer + proper javaclasspath set.  These interfaces
## are referred to as OTK, JOD, and UNO resp., and are preferred in that order
## by default (depending on presence of support SW).  The OCT interface has
## lowest priority). @*
## The old OpenOffice.org .sxc format can be read using the UNO interface and
## older versions of the JOD interface.
##
## @item Other formats:
## By invoking the UNO interface one can read any format that the installed
## LibreOffice version supports; see below. The same goes (on Windows systems)
## for the COM interface (invoking MS-Excel).  However, writing to other file
## formats than .xlsx, .ods and .xls is not implemented for COM.
##
## Depending on the installed LibreOffice release, in addition to .xls, .xlsx,
## .ods and .sxc, the following file formats may be read/written (untested!)
## when using the UNO interface.  The pertinent import/export filters are
## inferred from the filename extension.
##
## @verbatim
##   File format                           filename extension(s)
##   ====================================  =====================
##   "Gnumeric"                            .gnumeric, .gnm
##   "Text CSV"                            .csv
##   "UOF spreadsheet"                     .uos
##   "OpenDocument Spreadsheet Flat XML"   .fods
##   "dBase"                               .dbf
##   "Digital Interchange Format"          .dif
##   "Lotus 1-2-3"                         .wk1 .wk2 .123
##   "WPS Lotus Calc"                      .wk3 .wk4
##   "MS Works Calc"                       .wks, .wdb
##   "ClarisWorks Calc"                    .cwk
##   "Mac Works Calc"                      .wps
##   "Quattro Pro 6.0"                     .wb2
##   "WPS QPro Calc"                       .wb1 .wq1 .wq2
##   "Rich Text Format (StarCalc)"         .rtf
##   "SYLK"                                .slk .sylk
##   "Apple Numbers"                       .numbers
##   "Microsoft Multiplan"                 .mp
## @end verbatim
## @end table
##
## The utility function chk_spreadsheet_support.m can be useful to set the
## javaclasspath for the Java-based interfaces.
##
## Beware: 'zombie' Excel invocations may be left running invisibly in case
## of COM errors or after forgetting to close the file pointer.  Similarly for
## LibreOffice, which may even prevent Octave from being closed (the reason
## the UNO interface is still experimental).
##
## Examples:
##
## @example
##   xls = xlsopen ('test1.xls');
##   (get a pointer for reading from spreadsheet test1.xls)
##
##   xls = xlsopen ('test2.xls', 1, 'POI');
##   (as above, indicate test2.xls will be written to; in this case using Java
##    and the Apache POI interface are requested)
## @end example
##
## @seealso{xlsclose, xlsread, xlswrite, xls2oct, oct2xls, xlsfinfo,
## chk_spreadsheet_support}
##
## @end deftypefn

## Author: Philip Nienhuis <prnienhuis at users.sf.net>
## Created: 2009-11-29

function [ xls ] = xlsopen (filename, xwrite=0, reqinterface=[], verbose=false)

  persistent interfaces; persistent chkintf; persistent lastintf;
  ## interfaces.<intf> = [] (not yet checked),
  ##                      0 (found to be unsupported or unwanted), or
  ##                      1 (checked and OK)

  ## Define preferred order of (default) file extensions for if no extension
  ## was specified
  persistent prefext = {".xls", ".xlsx", ".xlsm", ".ods", ".gnumeric", ...
                        ".gnm", ".csv"};

  if (isempty (chkintf) || (isnumeric (reqinterface) && reqinterface == -1))
    ## Either not yet checked, or selection to be reset to default
    chkintf = 1;
    interfaces = struct ("COM", [], "JXL", [], "JOD", [], "OCT", 1, ...
                         "OTK", [], "OXS", [], "POI", [], "UNO", []);
    if (isnumeric (reqinterface))
      reqinterface = "";
    endif
  endif
  if (isempty (lastintf))
    lastintf = "---";
  endif
  xlsintf_cnt = 1;
  ## Bit mask keeping track of detected/supported interfaces
  xlssupport = 0;

  ## Input checks
  if (nargout < 1)
      error (["xlsopen: no return argument specified!\n", ...
              "usage:  XLS = xlsopen (Xlfile [, Rw] [, reqintf])\n"]);
  endif
  if (! (islogical (xwrite) || isnumeric (xwrite)))
      error (["xlsopen: numerical or logical value expected for arg ## 2 ", ...
              "(readwrite)\n"]);
  endif
  if (ischar (filename))
    [pth, fnam, ext] = fileparts (filename);
    if (isempty (fnam))
      error ("xlsopen: no filename or empty filename specified");
    endif
    if (xwrite && ! isempty (pth))
      apth = make_absolute_filename (pth);
      if (exist (apth) != 7)
        error ("xlsopen: cannot write into non-existent directory:\n'%s'\n", ...
               apth);
      endif
    endif
  else
    error ("xlsopen: filename expected for argument #1");
  endif

  ## Check available interfaces if none was specified now or in earlier call
  if (! isempty (reqinterface))
    intfmsg = "requested";
    if (! (ischar (reqinterface) || iscell (reqinterface)))
      error (["xlsopen: arg. #3 (interface) not recognized - ", ...
              "character value required\n"]);
    endif
    ## Turn arg3 into cell array if needed
    if (! iscell (reqinterface))
      reqinterface = {reqinterface};
    endif
    reqinterface = cellfun (@upper, reqinterface, "uni", 0);
    ## Check if previously used interface matches a requested interface
    if (isempty (regexpi (reqinterface, lastintf, "once"){1}) || ...
        ! interfaces.(reqinterface{1}))
      ## New interface requested. Provisionally disable all interfaces
      interfaces.COM = 0; interfaces.JOD = 0; interfaces.JXL = 0;
      interfaces.OCT = 0; interfaces.OTK = 0; interfaces.OXS = 0;
      interfaces.POI = 0; interfaces.UNO = 0;
      for ii=1:numel (reqinterface)
        ## Try to invoke requested interface(s) for this call. Check if it
        ## is supported anyway by emptying the corresponding var.
        try
          interfaces.(reqinterface{ii}) = [];
        catch
          error (sprintf (["xlsopen: unknown interface \"%s\" requested.\n"
                 "Only COM, JOD, JXL, OCT, OTK, OXS, POI or UNO) supported\n"], ...
                 reqinterface{}));
        end_try_catch
      endfor
      if (verbose)
        printf ("\nChecking requested interface(s): ");
      endif
      interfaces = getinterfaces (interfaces, verbose);
      ## Well, is/are the requested interface(s) supported on the system?
      xlsintf_cnt = 0;
      for ii=1:numel (reqinterface)
        if (! interfaces.(toupper (reqinterface{ii})))
          ## No it aint
          if (verbose)
            printf ("%s is not supported.\n", upper (reqinterface{ii}));
          endif
        else
          ++xlsintf_cnt;
        endif
      endfor
      ## Reset interface check indicator if no requested support found
      if (! xlsintf_cnt)
        chkintf = [];
        xls = [];
        return
      endif
    endif
  else
    intfmsg = "available";
  endif

  ## Check if spreadsheet file exists. First check (supported) file name suffix:
  ## FIXME: invoke subfunct mtchext() rather than repeat below code several times
  ftype = 0;
  has_suffix = 1;
  [ftype, ~, ext] = __get_ftype__ (filename);
  if (! isempty (ext))
    ext = lower (ext);
  else
    has_suffix = 0;
  endif

  ## Adapt file open mode for readwrite argument.
  ## Var readwrite is really used to avoid creating files when wanting
  ## to read, or not finding not-yet-existing files when wanting to write
  ## a new one.  Adapt file open mode for readwrite argument
  if (xwrite)
    fmode = "r+b";
    if (! has_suffix)
      ## Provisionally add .xlsx suffix to filename (most used format)
      filename = [filename ".xlsx"];
      ext = ".xlsx";
      ftype = 2;
    endif
  else
    fmode = "rb";
    if (! has_suffix)
      ## Try to find find an existing file with a recognized file extension
      filnm = mtchext (filename, prefext);
      if (! isempty (filnm))
        ## Simply choose the first one
        if (isstruct (filnm))
          filename = filnm(1).name;
        else
          filename = filnm;
        endif
      endif
    endif
  endif
  ## Explore for filename in relevant rw mode. stat() can't see if file is locked
  fid = fopen (filename, fmode);
  if (fid < 0)                      ## File doesn't exist...
    if (! xwrite)                   ## ...which obviously is fatal for reading...
      ## FIXME process open apps (Excel, LibreOffice, etc) before hard error
      error ( sprintf ("xlsopen: file %s not found\n", filename));
    else                            ## ...but for writing, we need more info:
      fid = fopen (filename, "rb"); ## Check if it exists at all...
      if (fid < 0)                  ## File didn't exist yet. Simply create it
        xwrite = 3;
      else                          ## File exists, but isn't writable => Error
        fclose (fid);               ## Do not forget to close the handle neatly
        error (sprintf (["xlsopen: write mode requested but file %s is ", ...
                        "not writable\n"], filename));
      endif
    endif
  else
    ## Close file anyway to avoid COM or Java errors
    fclose (fid);
  endif

  ## Check for the various interfaces. No problem if they've already been
  ## checked, getinterfaces (far below) just returns immediately then.
  interfaces = getinterfaces (interfaces, verbose);

  ## If no external interface was detected and no suffix was given, use .xlsx
  if (! has_suffix && ! (interfaces.COM + interfaces.POI + ...
                         interfaces.JXL + interfaces.OXS + ...
                         interfaces.UNO))
    ftype = 2;
  endif

  ## Initialize file ptr struct
  xls = struct ("xtype",    "NONE",
                "app",      [],
                "filename", [],
                "workbook", [],
                "changed",  0,
                "limits",   []);

  ## Keep track of which interface is selected
  xlssupport = 0;

  ## Interface preference order is defined below:
  ## currently COM -> POI -> JXL -> OXS -> OTK -> JOD -> UNO -> OCT
  ## ftype (file type) is conveyed depending on interface capabilities

  if ((! xlssupport) && interfaces.COM && (ftype != 5))
    ## Excel functioning has been tested above & file exists, so we just invoke it.
    if (verbose)
      printf ("   Invoking COM ...");
    endif
    [ xls, xlssupport, lastintf ] = __COM_spsh_open__ (xls, xwrite, filename, xlssupport);

  elseif ((! xlssupport) && ((interfaces.POI >= 2 && ftype <= 2) || ...
                             (interfaces.POI == 1 && ftype == 1)))
    if (verbose)
      printf ("   Invoking POI ...");
    endif
    [ xls, xlssupport, lastintf ] = __POI_spsh_open__ (xls, xwrite, filename, xlssupport, ftype, interfaces);

  elseif ((! xlssupport) && interfaces.JXL && ftype == 1)
    if (verbose)
      printf ("   Invoking JXL ...");
    endif
    [ xls, xlssupport, lastintf ] = __JXL_spsh_open__ (xls, xwrite, filename, xlssupport, ftype);

  elseif ((! xlssupport) && interfaces.OXS && ftype == 1)
    if (verbose)
      printf ("   Invoking OXS ...");
    endif
    [ xls, xlssupport, lastintf ] = __OXS_spsh_open__ (xls, xwrite, filename, xlssupport, ftype);

  elseif (interfaces.OTK && ! xlssupport && ftype == 3)
    if (verbose)
      printf ("   Invoking OTK ...");
    endif
    [ xls, xlssupport, lastintf ] = ...
              __OTK_spsh_open__ (xls, xwrite, filename, xlssupport);

  elseif (interfaces.JOD && ! xlssupport && (ftype == 3 || ftype == 4))
    if (verbose)
      printf ("   Invoking JOD ...");
    endif
    [ xls, xlssupport, lastintf ] = ...
              __JOD_spsh_open__ (xls, xwrite, filename, xlssupport);

  elseif ((! xlssupport) && interfaces.UNO && (ftype != 5))
    if (verbose)
      printf ("   Invoking UNO ...");
    endif
    ## Part 1 of kludge to avoid lengthy delays while LO searches for printers
    ## during aLoader.loadComponentFromURL (..., "_blank", ...) call below, only
    ## req/d for .ods files
    unwind_protect
      if (ftype == 3)
        SDDP = getenv ("SAL_DISABLE_DEFAULTPRINTER");
        setenv ("SAL_DISABLE_DEFAULTPRINTER", "1");
        ## Proceed with the JAVA-UNO bridge stuff
      endif
      [ xls, xlssupport, lastintf ] = __UNO_spsh_open__ (xls, xwrite, filename, xlssupport);
    unwind_protect_cleanup
      ## Part 2: set env.var. to original value (or wipe it if it was empty)
      if (ftype == 3)
        setenv ("SAL_DISABLE_DEFAULTPRINTER", SDDP);
      endif
    end_unwind_protect

  elseif ((! xlssupport) && interfaces.OCT && ...
      (ftype == 2 || ftype == 3 || ftype == 5))
    if (verbose)
      printf ("   Invoking OCT ...");
    endif
    [ xls, xlssupport, lastintf ] = __OCT_spsh_open__ (xls, xwrite, filename, xlssupport, ftype);
  endif

  ## Get Named Ranges, if any
  try
    xls.nmranges = getnmranges (xls);
  catch
  end_try_catch

  ## Rounding up. If none of the interfaces is supported we're out of luck.
  if (! xlssupport)
    if (isempty (reqinterface))
      ## If no suitable interface was detected (COM or UNO can read .csv), handle
      ## .csv in xlsread (as that's where Matlab n00bs would expect .csv support)
      if (ftype != 6)
        ## This message is appended after message from getinterfaces()
        if (verbose)
          printf ("None.\n");
        endif
        warning ("xlsopen: no'%s' spreadsheet I/O support with %s interfaces.\n", ...
                 ext, intfmsg);
      endif
    else
      ## No match between file type & interface found
      warning ("xlsopen: file type not supported by %s %s %s %s %s %s %s %s\n", ...
                reqinterface{:});
    endif
    xls = [];
    ## Reset found interfaces for re-testing in the next call. Add interfaces if needed.
    chkintf = [];
  else
    ## From here on xwrite is tracked via xls.changed in the various lower
    ## level r/w routines
    xls.changed = xwrite;

    ## xls.changed = 0 (existing/only read from), 1 (existing/data added), 2 (new,
    ## data added) or 3 (pristine, no data added).
    ## Until something was written to existing files we keep status "unchanged".
    if (xls.changed == 1)
      xls.changed = 0;
    endif
  endif

endfunction


function fname = mtchext (fname, prefext)

  ## In case of multiple files with same name, pick the one with preferred ext.
  flist = {dir([fname ".*"]).name};
  exts = cell2mat (cell2mat (regexpi (flist, '.*(\.\w+$)', "tokens")));
  ## Get first matching file extension. ismember() arg order = vital!
  extm = find (ismember (prefext, exts));
  if (! isempty (extm))
    fname = flist(extm);
  else
    fname = [fname prefext{1}];
  endif

endfunction