1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ocr.R
\name{pdf_ocr_text}
\alias{pdf_ocr_text}
\alias{pdf_ocr_data}
\title{OCR text extraction}
\usage{
pdf_ocr_text(
pdf,
pages = NULL,
opw = "",
upw = "",
dpi = 600,
language = "eng",
options = NULL
)
pdf_ocr_data(
pdf,
pages = NULL,
opw = "",
upw = "",
dpi = 600,
language = "eng",
options = NULL
)
}
\arguments{
\item{pdf}{file path or raw vector with pdf data}
\item{pages}{which pages of the pdf file to extract}
\item{opw}{string with owner password to open pdf}
\item{upw}{string with user password to open pdf}
\item{dpi}{resolution to render image that is passed to \link{pdf_convert}.}
\item{language}{passed to \link[tesseract:tesseract]{tesseract} to specify the
languge of the engine.}
\item{options}{passed to \link[tesseract:tesseract]{tesseract} to specify OCR parameters}
}
\description{
Perform OCR text extraction. This requires you have the \code{tesseract} package.
}
\seealso{
Other pdftools:
\code{\link{pdftools}},
\code{\link{qpdf}},
\code{\link{rendering}}
}
\concept{pdftools}
|