1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
|
Description: Check for partially-matched sscanf() patterns and consume an
appropriate number of characters.
Debian-Bug: http://bugs.debian.org/633704
Author: Benjamin Kaduk <kaduk@mit.edu>
diff -ruN uni2ascii-4.18.orig//ascii2uni.c uni2ascii-4.18/ascii2uni.c
--- uni2ascii-4.18.orig//ascii2uni.c 2011-05-14 22:15:20.000000000 -0400
+++ uni2ascii-4.18/ascii2uni.c 2011-08-23 20:07:29.000000000 -0400
@@ -208,6 +208,7 @@
char aHfmt [8+2+1];
char aDfmt [8+2+1];
char cbuf[5];
+ char fmt_itoa[12];
FILE *infp;
UTF32 num;
@@ -555,45 +556,64 @@
}
else if (FType == CHENT) {
if (AllHTMLP){
+ NConsumed = -1;
if(sscanf(iptr,aHfmt,&num,&NConsumed) > 0) {
- if(*(iptr+NConsumed-1) != ';') {
+ if(NConsumed == -1 || *(iptr+NConsumed-1) != ';') {
MicrosoftStyle++;
+ if (NConsumed == -1) {
+ if (snprintf(fmt_itoa, sizeof(fmt_itoa), "%x", num) > sizeof(fmt_itoa)-1) {
+ fprintf(stderr, "UTF32 codepoint overflowed static buffer\n");
+ exit(BADRECORD);
+ }
+ NConsumed = 3 /* "&#x" */ + strlen(fmt_itoa) + 1 /* ";" */;
+ }
fprintf(stderr,
_("The HTML/HDML entity %1$s at token %2$lu of line %3$lu lacks the requisite final semicolon.\n"),
ExtractSubstring(tmpstr,iptr,iptr+NConsumed-3),TokenNumber,LineNo);
if(StrictP) {putchar(*iptr++); continue;}
- else {putu8(num);iptr+=NConsumed;}
+ else {putu8(num);iptr+=NConsumed-1;}
}
else {putu8(num);iptr+=NConsumed;}
TokenNumber++;
continue;
}
+ NConsumed = -1;
if(sscanf(iptr,aDfmt,&num,&NConsumed) > 0) {
- if(*(iptr+NConsumed-1) != ';') {
+ if(NConsumed == -1 || *(iptr+NConsumed-1) != ';') {
MicrosoftStyle++;
+ if (NConsumed == -1) {
+ if (snprintf(fmt_itoa, sizeof(fmt_itoa), "%u", num) > sizeof(fmt_itoa)-1) {
+ fprintf(stderr, "UTF32 codepoint overflowed static buffer\n");
+ exit(BADRECORD);
+ }
+ NConsumed = 2 /* "&#" */ + strlen(fmt_itoa) + 1 /* ";" */;
+ }
fprintf(stderr,
_("The HTML/HDML entity %1$s at token %2$lu of line %3$lu lacks the requisite final semicolon.\n"),
ExtractSubstring(tmpstr,iptr,iptr+NConsumed-3),TokenNumber,LineNo);
if (StrictP) {putchar(*iptr++); continue;}
- else {putu8(num);iptr+=NConsumed;}
+ else {putu8(num);iptr+=NConsumed-1;}
}
else {putu8(num);iptr+=NConsumed;}
TokenNumber++;
continue;
}
}
+ NConsumed = -1;
if(sscanf(iptr,afmt,&enam,&NConsumed) > 0) {
+ if (NConsumed == -1) NConsumed = 1 /* "&" */ + strlen(enam) + 1 /* ";" */;
if( (num = LookupCodeForEntity(enam))) {
if(*(iptr+NConsumed-1) != ';') {
MicrosoftStyle++;
fprintf(stderr,_("The HTML/HDML entity %1$s at token %2$lu of line %3$lu lacks the requisite final semicolon.\n"),ExtractSubstring(tmpstr,iptr,iptr+NConsumed-3),TokenNumber,LineNo);
if(StrictP) {putchar(*iptr++);continue;}
- else {putu8(num);iptr+=NConsumed;}
+ else {putu8(num);iptr+=NConsumed-1;}
}
else {putu8(num);iptr+=NConsumed;}
TokenNumber++;
}
else {
+ if(*(iptr+NConsumed-1) != ';') NConsumed--;
fprintf(stderr,"ascii2uni: unknown HTML/HDML character entity \"&%s;\" at line %lu\n",
enam,LineNo);
putu8(UNI_REPLACEMENT_CHAR);
|