void-packages/srcpkgs/catdoc/patches/001-XLS_parsing_improvements.patch
Đoàn Trần Công Danh c987560802 srcpkgs/c*: convert patches to -Np1
```sh
git grep -l '^patch_args=-Np0' "srcpkgs/$1*/template" |
while read template; do
	for p in ${template%/template}/patches/*; do
		sed -i '
			\,^[+-][+-][+-] /dev/null,b
			/^[*-]\+ [0-9]\+\(,[0-9]\+\)\? [*-]\+$/b
			s,^[*][*][*] ,&a/,
			/^--- /{
				s,\(^--- \)\(./\)*,\1a/,
				s,[.][Oo][Rr][Ii][Gg]\([	/]\),\1,
				s/[.][Oo][Rr][Ii][Gg]$//
				s/[.]patched[.]\([^.]\)/.\1/
				h
			}
			/^+++ -/{
				g
				s/^--- a/+++ b/
				b
			}
			s,\(^+++ \)\(./\)*,\1b/,
		' "$p"
	done
	sed -i '/^patch_args=/d' $template
done
```
2021-06-20 13:17:29 +07:00

123 lines
3.5 KiB
Diff

Description: Improve the XLS parsing:
* Don't stop processing after an EOF which is not followed by a BOF, as there
are many records that can appear after it (like a graph).
* On unexpected BOF record, dump already extracted data before complaining and
freeing memory.
* Accept different versions of BOF and XF records.
* Add more #defines for record types.
--- a/src/xlsparse.c
+++ b/src/xlsparse.c
@@ -107,12 +107,13 @@
itemsread = catdoc_read(rec, 1, reclen, input);
rec[reclen] = '\0';
}
+ /*
+ fprintf(stderr,"Rectype 0x%04X reclen=%d\n",rectype, reclen);
if(eof_flag) {
- if (rectype != BOF) {
+ if (rectype != BOF8) {
break;
}
- }
-/* fprintf(stderr,"Rectype 0x%04X reclen=%d\n",rectype, reclen); */
+ }*/
process_item(rectype,reclen,rec);
if (rectype == MSEOF) {
eof_flag=1;
@@ -150,7 +151,7 @@
case WRITEPROT:
/* File is write protected, but we only read it */
break;
- case 0x42: {
+ case CODEPAGE: {
if (source_charset) break;
codepage=getshort(rec,0);
/*fprintf(stderr,"CODEPAGE %d\n",codepage); */
@@ -274,9 +275,10 @@
}
break;
}
- case 0x03:
- case 0x103:
- case 0x303:
+ /* These 3 don't seem to make any sense. */
+ case INVALID_03:
+ case SXFORMULA:
+ case INVALID_303:
case NUMBER: {
int row,col;
unsigned char **pcell;
@@ -363,22 +365,31 @@
*saved_reference=copy_unicode_string(&src);
break;
}
- case BOF: {
+ case BOF2:
+ case BOF3:
+ case BOF4:
+ case BOF8: {
if (rowptr) {
fprintf(stderr,"BOF when current sheet is not flushed\n");
+ print_sheet();
free_sheet();
}
break;
}
- case XF:
- case 0x43: /*from perl module Spreadsheet::ParseExecel */
+ case XF_4P:
+ case XF_4:
+ case XF: /*from perl module Spreadsheet::ParseExecel */
{
- short int formatIndex = getshort(rec,2);
+ short int formatIndex;
+ if (biff_version == 4)
+ formatIndex = (short int)rec[1];
+ else
+ formatIndex = getshort(rec, 2);
/* we are interested only in format index here */
if (formatTableIndex >= formatTableSize) {
formatTable=realloc(formatTable,
- (formatTableSize+=16)*sizeof(short int));
-
+ (formatTableSize+=16)*sizeof(short int));
+
if (!formatTable) {
fprintf(stderr,"Out of memory for format table");
exit (1);
--- a/src/xltypes.h
+++ b/src/xltypes.h
@@ -20,7 +20,7 @@
#define AUTOFILTERINFO 0x9D
#define BACKUP 0x40
#define BLANK 0x201
-#define BOF 0x809
+#define BOF8 0x809
#define BOOKBOOL 0xDA
#define BOOLERR 0x205
#define BOTTOMMARGIN 0x29
@@ -149,11 +149,21 @@
#define WRITEPROT 0x86
#define WSBOOL 0x81
#define XCT 0x59
-#define XF 0xE0
+#define XF_4P 0xE0
#define SST 0xFC
#define CONSTANT_STRING 0xFD
#define REFRESHALL 0x1B7
#define USESELFS 0x160
#define EXTSST 0xFF
/* Vitus additions */
-#define INTEGER_CELL 0x202
+#define INTEGER_CELL 0x202
+/* Tincho addtions */
+#define BOF2 0x09
+#define BOF3 0x209
+#define BOF4 0x409
+#define INVALID_03 0x03
+#define INVALID_303 0x303
+#define MSODRAWING 0xEC
+#define SXFORMULA 0x103
+#define XF 0x43
+#define XF_4 0x443