1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
|
/* stringlib: bytes joining implementation */
#if STRINGLIB_IS_UNICODE
#error join.h only compatible with byte-wise strings
#endif
Py_LOCAL_INLINE(PyObject *)
STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
{
const char *sepstr = STRINGLIB_STR(sep);
Py_ssize_t seplen = STRINGLIB_LEN(sep);
PyObject *res = NULL;
char *p;
Py_ssize_t seqlen = 0;
Py_ssize_t sz = 0;
Py_ssize_t i, nbufs;
PyObject *seq, *item;
Py_buffer *buffers = NULL;
#define NB_STATIC_BUFFERS 10
Py_buffer static_buffers[NB_STATIC_BUFFERS];
#define GIL_THRESHOLD 1048576
int drop_gil = 1;
PyThreadState *save = NULL;
seq = PySequence_Fast(iterable, "can only join an iterable");
if (seq == NULL) {
return NULL;
}
seqlen = PySequence_Fast_GET_SIZE(seq);
if (seqlen == 0) {
Py_DECREF(seq);
return STRINGLIB_NEW(NULL, 0);
}
#if !STRINGLIB_MUTABLE
if (seqlen == 1) {
item = PySequence_Fast_GET_ITEM(seq, 0);
if (STRINGLIB_CHECK_EXACT(item)) {
Py_INCREF(item);
Py_DECREF(seq);
return item;
}
}
#endif
if (seqlen > NB_STATIC_BUFFERS) {
buffers = PyMem_NEW(Py_buffer, seqlen);
if (buffers == NULL) {
Py_DECREF(seq);
PyErr_NoMemory();
return NULL;
}
}
else {
buffers = static_buffers;
}
/* Here is the general case. Do a pre-pass to figure out the total
* amount of space we'll need (sz), and see whether all arguments are
* bytes-like.
*/
for (i = 0, nbufs = 0; i < seqlen; i++) {
Py_ssize_t itemlen;
item = PySequence_Fast_GET_ITEM(seq, i);
if (PyBytes_CheckExact(item)) {
/* Fast path. */
buffers[i].obj = Py_NewRef(item);
buffers[i].buf = PyBytes_AS_STRING(item);
buffers[i].len = PyBytes_GET_SIZE(item);
}
else {
if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
PyErr_Format(PyExc_TypeError,
"sequence item %zd: expected a bytes-like object, "
"%.80s found",
i, Py_TYPE(item)->tp_name);
goto error;
}
/* If the backing objects are mutable, then dropping the GIL
* opens up race conditions where another thread tries to modify
* the object which we hold a buffer on it. Such code has data
* races anyway, but this is a conservative approach that avoids
* changing the behaviour of that data race.
*/
drop_gil = 0;
}
nbufs = i + 1; /* for error cleanup */
itemlen = buffers[i].len;
if (itemlen > PY_SSIZE_T_MAX - sz) {
PyErr_SetString(PyExc_OverflowError,
"join() result is too long");
goto error;
}
sz += itemlen;
if (i != 0) {
if (seplen > PY_SSIZE_T_MAX - sz) {
PyErr_SetString(PyExc_OverflowError,
"join() result is too long");
goto error;
}
sz += seplen;
}
if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
PyErr_SetString(PyExc_RuntimeError,
"sequence changed size during iteration");
goto error;
}
}
/* Allocate result space. */
res = STRINGLIB_NEW(NULL, sz);
if (res == NULL)
goto error;
/* Catenate everything. */
p = STRINGLIB_STR(res);
if (sz < GIL_THRESHOLD) {
drop_gil = 0; /* Benefits are likely outweighed by the overheads */
}
if (drop_gil) {
save = PyEval_SaveThread();
}
if (!seplen) {
/* fast path */
for (i = 0; i < nbufs; i++) {
Py_ssize_t n = buffers[i].len;
char *q = buffers[i].buf;
memcpy(p, q, n);
p += n;
}
}
else {
for (i = 0; i < nbufs; i++) {
Py_ssize_t n;
char *q;
if (i) {
memcpy(p, sepstr, seplen);
p += seplen;
}
n = buffers[i].len;
q = buffers[i].buf;
memcpy(p, q, n);
p += n;
}
}
if (drop_gil) {
PyEval_RestoreThread(save);
}
goto done;
error:
res = NULL;
done:
Py_DECREF(seq);
for (i = 0; i < nbufs; i++)
PyBuffer_Release(&buffers[i]);
if (buffers != static_buffers)
PyMem_Free(buffers);
return res;
}
#undef NB_STATIC_BUFFERS
#undef GIL_THRESHOLD
|