Description: Check for partially-matched sscanf() patterns and consume an
appropriate number of characters.
Debian-Bug: http://bugs.debian.org/633704
Author: Benjamin Kaduk <kaduk@mit.edu>
diff -ruN uni2ascii-4.18.orig//ascii2uni.c uni2ascii-4.18/ascii2uni.c
--- uni2ascii-4.18.orig//ascii2uni.c	2011-05-14 22:15:20.000000000 -0400
+++ uni2ascii-4.18/ascii2uni.c	2011-08-23 20:07:29.000000000 -0400
@@ -208,6 +208,7 @@
   char aHfmt [8+2+1];
   char aDfmt [8+2+1];
   char cbuf[5];
+  char fmt_itoa[12];
   FILE *infp;
 
   UTF32 num;
@@ -555,45 +556,64 @@
        }
        else if (FType == CHENT) {
 	 if (AllHTMLP){
+	   NConsumed = -1;
 	   if(sscanf(iptr,aHfmt,&num,&NConsumed) > 0) {
-	     if(*(iptr+NConsumed-1) != ';') {
+	     if(NConsumed == -1 || *(iptr+NConsumed-1) != ';') {
 	       MicrosoftStyle++;
+	       if (NConsumed == -1) {
+		 if (snprintf(fmt_itoa, sizeof(fmt_itoa), "%x", num) > sizeof(fmt_itoa)-1) {
+		   fprintf(stderr, "UTF32 codepoint overflowed static buffer\n");
+		   exit(BADRECORD);
+		 }
+	         NConsumed = 3 /* "&#x" */ + strlen(fmt_itoa) + 1 /* ";" */;
+	       }
 	       fprintf(stderr,
 		       _("The HTML/HDML entity %1$s at token %2$lu of line %3$lu lacks the requisite final semicolon.\n"),
 		       ExtractSubstring(tmpstr,iptr,iptr+NConsumed-3),TokenNumber,LineNo);
 	       if(StrictP) {putchar(*iptr++); continue;}
-	       else {putu8(num);iptr+=NConsumed;}
+	       else {putu8(num);iptr+=NConsumed-1;}
 	     }
 	     else {putu8(num);iptr+=NConsumed;}
 	     TokenNumber++;
 	     continue;
 	   }
+	   NConsumed = -1;
 	   if(sscanf(iptr,aDfmt,&num,&NConsumed) > 0) {
-	     if(*(iptr+NConsumed-1) != ';') {
+	     if(NConsumed == -1 || *(iptr+NConsumed-1) != ';') {
 	       MicrosoftStyle++;
+	       if (NConsumed == -1) {
+		 if (snprintf(fmt_itoa, sizeof(fmt_itoa), "%u", num) > sizeof(fmt_itoa)-1) {
+		   fprintf(stderr, "UTF32 codepoint overflowed static buffer\n");
+		   exit(BADRECORD);
+		 }
+	         NConsumed = 2 /* "&#" */ + strlen(fmt_itoa) + 1 /* ";" */;
+	       }
 	       fprintf(stderr,
 		       _("The HTML/HDML entity %1$s at token %2$lu of line %3$lu lacks the requisite final semicolon.\n"),
 		       ExtractSubstring(tmpstr,iptr,iptr+NConsumed-3),TokenNumber,LineNo);
 	       if (StrictP) {putchar(*iptr++); continue;}
-	       else {putu8(num);iptr+=NConsumed;}
+	       else {putu8(num);iptr+=NConsumed-1;}
 	     }
 	     else {putu8(num);iptr+=NConsumed;}
 	     TokenNumber++;
 	     continue;
 	   }
 	 }
+	 NConsumed = -1;
 	 if(sscanf(iptr,afmt,&enam,&NConsumed) > 0) {
+	   if (NConsumed == -1) NConsumed = 1 /* "&" */ + strlen(enam) + 1 /* ";" */;
 	   if( (num = LookupCodeForEntity(enam))) {
 	     if(*(iptr+NConsumed-1) != ';') {
 	       MicrosoftStyle++;
 	       fprintf(stderr,_("The HTML/HDML entity %1$s at token %2$lu of line %3$lu lacks the requisite final semicolon.\n"),ExtractSubstring(tmpstr,iptr,iptr+NConsumed-3),TokenNumber,LineNo);
 	       if(StrictP) {putchar(*iptr++);continue;}
-	       else {putu8(num);iptr+=NConsumed;}
+	       else {putu8(num);iptr+=NConsumed-1;}
 	     }
 	     else {putu8(num);iptr+=NConsumed;}
 	     TokenNumber++;
 	   }
 	   else {
+	     if(*(iptr+NConsumed-1) != ';') NConsumed--;
 	     fprintf(stderr,"ascii2uni: unknown HTML/HDML character entity \"&%s;\" at line %lu\n",
 		     enam,LineNo);
 	     putu8(UNI_REPLACEMENT_CHAR);
