File: unicode-repr.dpatch

package info (click to toggle)
python2.3 2.3.5-3sarge2
  • links: PTS
  • area: main
  • in suites: sarge
  • size: 43,908 kB
  • ctags: 81,384
  • sloc: ansic: 266,250; python: 246,028; makefile: 4,194; perl: 3,702; lisp: 3,630; sh: 2,576; xml: 1,601; objc: 740; cpp: 106; sed: 2
file content (96 lines) | stat: -rw-r--r-- 2,760 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#! /bin/sh -e

# All lines beginning with `# DPATCH:' are a description of the patch.
# DP: Fix wide unicode heap overflows (CVE-2006-4980)

dir=
if [ $# -eq 3 -a "$2" = '-d' ]; then
    pdir="-d $3"
    dir="$3/"
elif [ $# -ne 1 ]; then
    echo >&2 "usage: `basename $0`: -patch|-unpatch [-d <srcdir>]"
    exit 1
fi
case "$1" in
    -patch)
        patch $pdir -f --no-backup-if-mismatch -p0 < $0
        #cd ${dir}gcc && autoconf
        ;;
    -unpatch)
        patch $pdir -f --no-backup-if-mismatch -R -p0 < $0
        #rm ${dir}gcc/configure
        ;;
    *)
	echo >&2 "usage: `basename $0`: -patch|-unpatch [-d <srcdir>]"
        exit 1
esac
exit 0

# append the patch here and adjust the -p? flag in the patch calls.
--- Objects/unicodeobject.c.orig	2006-10-02 15:42:31.000000000 -0700
+++ Objects/unicodeobject.c	2006-10-02 15:50:51.058449437 -0700
@@ -1888,7 +1888,28 @@
 
     static const char *hexdigit = "0123456789abcdef";
 
-    repr = PyString_FromStringAndSize(NULL, 2 + 6*size + 1);
+    /* Initial allocation is based on the longest-possible unichr
+       escape.
+
+       In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source
+       unichr, so in this case it's the longest unichr escape. In
+       narrow (UTF-16) builds this is five chars per source unichr
+       since there are two unichrs in the surrogate pair, so in narrow
+       (UTF-16) builds it's not the longest unichr escape.
+
+       In wide or narrow builds '\uxxxx' is 6 chars per source unichr,
+       so in the narrow (UTF-16) build case it's the longest unichr
+       escape.
+    */
+
+    repr = PyString_FromStringAndSize(NULL,
+        2
+#ifdef Py_UNICODE_WIDE
+        + 10*size
+#else
+        + 6*size
+#endif
+        + 1);
     if (repr == NULL)
         return NULL;
 
@@ -1913,15 +1934,6 @@
 #ifdef Py_UNICODE_WIDE
         /* Map 21-bit characters to '\U00xxxxxx' */
         else if (ch >= 0x10000) {
-	    int offset = p - PyString_AS_STRING(repr);
-	    
-	    /* Resize the string if necessary */
-	    if (offset + 12 > PyString_GET_SIZE(repr)) {
-		if (_PyString_Resize(&repr, PyString_GET_SIZE(repr) + 100))
-		    return NULL;
-		p = PyString_AS_STRING(repr) + offset;
-	    }
-
             *p++ = '\\';
             *p++ = 'U';
             *p++ = hexdigit[(ch >> 28) & 0x0000000F];
@@ -1934,8 +1946,8 @@
             *p++ = hexdigit[ch & 0x0000000F];
 	    continue;
         }
-#endif
-	/* Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes */
+#else
+	/* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
 	else if (ch >= 0xD800 && ch < 0xDC00) {
 	    Py_UNICODE ch2;
 	    Py_UCS4 ucs;
@@ -1960,6 +1972,7 @@
 	    s--;
 	    size++;
 	}
+#endif
 
         /* Map 16-bit characters to '\uxxxx' */
         if (ch >= 256) {