Package: catdoc / 1:0.95-4.1

04-XLS_parsing_improvements.patch Patch series | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
Description: Improve the XLS parsing:
 * Don't stop processing after an EOF which is not followed by a BOF, as there
   are many records that can appear after it (like a graph).
 * On unexpected BOF record, dump already extracted data before complaining and
   freeing memory.
 * Accept different versions of BOF and XF records.
 * Add more #defines for record types.

--- a/src/xlsparse.c
+++ b/src/xlsparse.c
@@ -107,12 +107,13 @@
 			itemsread = catdoc_read(rec, 1, reclen, input);
 			rec[reclen] = '\0';
 		}
+                /*
+		fprintf(stderr,"Rectype 0x%04X reclen=%d\n",rectype, reclen);
 		if(eof_flag) {
-			if (rectype != BOF) {
+			if (rectype != BOF8) {
 				break;
 			}    
-		}
-/* 		fprintf(stderr,"Rectype 0x%04X reclen=%d\n",rectype, reclen); */
+		}*/
 		process_item(rectype,reclen,rec);
 		if (rectype == MSEOF) {
 			eof_flag=1;
@@ -150,7 +151,7 @@
 	case WRITEPROT: 
 		/* File is write protected, but we only read it */
 		break;
-	case 0x42: {
+	case CODEPAGE: {
 		if (source_charset) break;
 		codepage=getshort(rec,0);
 		/*fprintf(stderr,"CODEPAGE %d\n",codepage); */
@@ -274,9 +275,10 @@
 		}	
 		break;
 	}
-	case 0x03:
-	case 0x103:
-	case 0x303:
+	/* These 3 don't seem to make any sense. */
+	case INVALID_03:
+	case SXFORMULA:
+	case INVALID_303:
 	case NUMBER: {
 		int row,col;
 		unsigned char **pcell;
@@ -363,22 +365,31 @@
 		*saved_reference=copy_unicode_string(&src);
 		break;
 	}	    
-	case BOF: {
+	case BOF2:
+	case BOF3:
+	case BOF4:
+	case BOF8: {
 		if (rowptr) {
 			fprintf(stderr,"BOF when current sheet is not flushed\n");
+                        print_sheet();
 			free_sheet();
 		}
 		break;
 	}	  
-	case XF:
-	case 0x43: /*from perl module Spreadsheet::ParseExecel */		  
+	case XF_4P:
+	case XF_4:
+	case XF: /*from perl module Spreadsheet::ParseExecel */
 		{
-			short int formatIndex = getshort(rec,2);
+			short int formatIndex;
+                        if (biff_version == 4)
+                            formatIndex = (short int)rec[1];
+                        else
+                            formatIndex = getshort(rec, 2);
 			/* we are interested only in format index here */ 
 			if (formatTableIndex >= formatTableSize) {
 				formatTable=realloc(formatTable,
-														(formatTableSize+=16)*sizeof(short int));
-					  	  
+                                        (formatTableSize+=16)*sizeof(short int));
+
 				if (!formatTable) {
 					fprintf(stderr,"Out of memory for format table");
 					exit (1);
--- a/src/xltypes.h
+++ b/src/xltypes.h
@@ -20,7 +20,7 @@
 #define AUTOFILTERINFO 		 0x9D
 #define BACKUP 		         0x40
 #define BLANK 		         0x201
-#define BOF 		         0x809
+#define BOF8 		         0x809
 #define BOOKBOOL 		 0xDA
 #define BOOLERR 		 0x205
 #define BOTTOMMARGIN 		 0x29
@@ -149,11 +149,21 @@
 #define WRITEPROT 		 0x86
 #define WSBOOL 		         0x81
 #define XCT 		         0x59
-#define XF 		         0xE0
+#define XF_4P 		         0xE0
 #define SST			 0xFC
 #define CONSTANT_STRING	         0xFD
 #define REFRESHALL		 0x1B7
 #define USESELFS		 0x160
 #define EXTSST		         0xFF
 /* Vitus additions */
-#define INTEGER_CELL 	 0x202
+#define INTEGER_CELL 	         0x202
+/* Tincho addtions */
+#define BOF2 		         0x09
+#define BOF3 		         0x209
+#define BOF4 		         0x409
+#define INVALID_03               0x03
+#define INVALID_303              0x303
+#define MSODRAWING               0xEC
+#define SXFORMULA                0x103
+#define XF                       0x43
+#define XF_4                     0x443