1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
|
# An attempt to address the PIL.Image buffer directly without copying it.
#
# This is achieved by extracting the buffer ptr from Image.im.unsafe_ptrs
# the xsize, ysize, pixelsize and linesize are extracted as well to be used
# in TessBaseAPI.SetImage(buffer, width, height, bytes_per_pixel, bytes_per_line)
#
# This works but for sometimes the output is different than the original code. I assume
# this is due to the different image format used in this method.
#
# The performance advantage was not significant based on benchmarks on my machine.
from libc.stdint cimport uintptr_t
cdef object _mode_to_bpp = {'1':1, 'L':8, 'P':8, 'RGB':24, 'RGBA':32, 'CMYK':32, 'YCbCr':24, 'I':32, 'F':32}
cdef void _image_buffer2(image, cuchar_t **buff, int *width, int *height,
int *bpp, int *bpl):
"""Read image meta data from unsafe pointers."""
cdef uintptr_t buff_ptr
# get buffer from unsafe pointers without copying it
image.load()
ptrs = dict(image.im.unsafe_ptrs)
width[0] = ptrs['xsize']
height[0] = ptrs['ysize']
buff_ptr = ptrs['image']
buff[0] = (<cuchar_t **>buff_ptr)[0]
bpp[0] = ptrs['pixelsize']
bpl[0] = ptrs['linesize']
# for f in ptrs:
# name = f[0]
# if name == 'xsize': # width
# width[0] = f[1]
# elif name == 'ysize': # height
# height[0] = f[1]
# elif name == 'image': # buffer address
# buff_ptr = f[1]
# buff[0] = (<cuchar_t **>buff_ptr)[0]
# elif name == 'pixelsize': # bytes_per_pixel
# bpp[0] = f[1]
# elif name == 'linesize': # bytes_per_line
# bpl[0] = f[1]
cdef char *_image_to_text2(const unsigned char *buff, int width, int height, int bpp, int bpl,
const char *lang,
const PageSegMode pagesegmode, const char *path) nogil except NULL:
cdef:
TessBaseAPI baseapi
char *text
if baseapi.Init(path, lang) == -1:
return NULL
baseapi.SetPageSegMode(pagesegmode)
baseapi.SetImage(buff, width, height, bpp, bpl)
text = baseapi.GetUTF8Text()
baseapi.End()
return text
def image_to_text2(image, const char *lang=_DEFAULT_LANG, const PageSegMode pagesegmode=PSM_AUTO,
const char *path=_DEFAULT_PATH):
"""Recognize OCR text from an image object.
Args:
image (:class:`PIL.Image`): image to be processed.
Kwargs:
lang (str): An ISO 639-3 language string. Defaults to 'eng'.
pagesegmode (int): Page segmentation mode. Defaults to `PSM.AUTO`.
See :class:`~tesserocr.PSM` for all available psm options.
path (str): The name of the parent directory of tessdata.
Must end in /.
Returns:
str: The text extracted from the image.
Raises:
RuntimeError: When image fails to be loaded or recognition fails.
"""
cdef:
cuchar_t *buff = NULL
int width = 0
int height = 0
int bpp = 0
int bpl = 0
char *text
_image_buffer2(image, &buff, &width, &height, &bpp, &bpl)
# print width, height
# print bpp
# print bpl
with nogil:
text = _image_to_text2(buff, width, height, bpp, bpl,
lang, pagesegmode, path)
if text == NULL:
with gil:
raise RuntimeError('Failed to recognize image text.')
return _free_str(text)
cdef Pix *raw_to_pix(cuchar_t *buff, int bpp, int width, int height, int bpl) nogil:
"""Convert PIL image to Pix.
Applies the same logic done by tesseract's api.SetImage."""
cdef:
int x
int y
int wpl
uint *data
Pix *pix
bpp = bpp * 8
pix = pixCreate(width, height, 32 if bpp == 24 else bpp)
wpl = pixGetWpl(pix)
data = pixGetData(pix)
if bpp == 8:
# Greyscale just copies the bytes in the right order.
for y in xrange(height):
for x in xrange(width):
SET_DATA_BYTE(data, x, buff[x])
data += wpl
buff += bpl
elif bpp == 24:
# Put the colors in the correct places in the line buffer.
for y in xrange(height):
for x in xrange(width):
SET_DATA_BYTE(data, COLOR_RED, buff[3 * x])
SET_DATA_BYTE(data, COLOR_GREEN, buff[3 * x + 1])
SET_DATA_BYTE(data, COLOR_BLUE, buff[3 * x + 2])
data += 1
buff += bpl
elif bpp == 32:
# Maintain byte order consistency across different endianness.
for y in xrange(height):
for x in xrange(width):
data[x] = (buff[x * 4] << 24) | (buff[x * 4 + 1] << 16) | (buff[x * 4 + 2] << 8) | buff[x * 4 + 3]
data += wpl
buff += bpl
else:
with gil:
raise RuntimeError("Cannot convert RAW image to Pix with bpp = {}".format(bpp))
return pix
def image_to_text3(image, const char *lang=_DEFAULT_LANG, const PageSegMode psm=PSM_AUTO,
const char *path=_DEFAULT_PATH):
cdef:
Pix *pix
cuchar_t *buff = NULL
int width = 0
int height = 0
int bpp = 0
int bpl = 0
char *text
_image_buffer2(image, &buff, &width, &height, &bpp, &bpl)
with nogil:
pix = raw_to_pix(buff, bpp, width, height, bpl)
text = _image_to_text(pix, lang, psm, path)
if text == NULL:
with gil:
raise RuntimeError('Failed recognize picture')
return _free_str(text)
|