1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
|
/*
* The ExactImage library's hOCR to PDF command line frontend
* Copyright (C) 2008 - 2023 René Rebe, ExactCODE GmbH Germany
* Copyright (C) 2008 Archivista
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2. A copy of the GNU General
* Public License can be found in the file LICENSE.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANT-
* ABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
* Public License for more details.
*
* Alternatively, commercial licensing options are available from the
* copyright holder ExactCODE GmbH Germany.
*/
#include <string.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <cmath>
#include <cctype>
#include <vector>
#include "ArgumentList.hh"
#include "config.h"
#include "Codecs.hh"
#include "pdf.hh"
#include "hocr.hh"
using namespace Utility;
int main(int argc, char* argv[])
{
ArgumentList arglist(false);
// setup the argument list
Argument<bool> arg_help("h", "help",
"display this help text and exit");
arglist.Add(&arg_help);
Argument<std::string> arg_input("i", "input",
"input image filename",
1, 1, true, true);
arglist.Add(&arg_input);
Argument<std::string> arg_output("o", "output",
"output PDF filename",
1, 1, true, true);
arglist.Add(&arg_output);
Argument<int> arg_resolution("r", "resolution",
"resolution overwrite",
0, 1, true, true);
arglist.Add(&arg_resolution);
Argument<bool> arg_no_image("n", "no-image",
"do not place the image over the text",
0, 0, true, true);
arglist.Add(&arg_no_image);
Argument<bool> arg_sloppy_text("s", "sloppy-text",
"sloppily place text, group words, do not draw single glyphs",
0, 0, true, true);
arglist.Add(&arg_sloppy_text);
Argument<std::string> arg_text("t", "text",
"extract text, including trying to remove hyphens",
0, 1, true, true);
arglist.Add(&arg_text);
Argument<int> arg_quality ("", "quality",
"quality setting used for writing compressed images\n\t\t"
"integer range 0-100, the default is 75",
0, 1, true, true);
arglist.Add(&arg_quality);
Argument<std::string> arg_compression ("", "compress",
"compression method for writing images e.g. ascii85, hex, flate,\n"
"\t\tjpeg, jpeg2000 ... auto default based on bit-depth",
0, 1, true, true);
arglist.Add(&arg_compression);
// parse the specified argument list - and maybe output the Usage
if (!arglist.Read(argc, argv) || arg_help.Get() == true)
{
std::cerr << "ExactImage hOCR to PDF converter, version " VERSION << std::endl
<< "Copyright (C) 2008 - 2023 René Rebe, ExactCODE GmbH" << std::endl
<< "Copyright (C) 2008 Archivista" << std::endl
<< "Usage:" << std::endl;
arglist.Usage(std::cerr);
return 1;
}
// load the image, if specified and possible
Image image; image.w = image.h = 0;
if (arg_input.Size())
{
if (!ImageCodec::Read(arg_input.Get(), image)) {
std::cerr << "Error reading: " << arg_input.Get() << std::endl;
return 1;
}
}
if (arg_resolution.Size())
image.setResolution(arg_resolution.Get(), arg_resolution.Get());
if (image.resolutionX() <= 0 || image.resolutionY() <= 0) {
std::cerr << "Warning: Image x/y resolution not set, defaulting to: "
<< 300 << std::endl;
image.setResolution(300, 300);
}
unsigned int res = image.resolutionX();
bool sloppy = arg_sloppy_text.Get();
std::ofstream* txtStream = 0;
if (arg_text.Size()) {
txtStream = new std::ofstream(arg_text.Get().c_str());
}
std::ofstream s(arg_output.Get().c_str());
PDFCodec* pdfContext = new PDFCodec(&s);
pdfContext->beginPage(72. * image.w / res, 72. * image.h / res);
pdfContext->setFillColor(0, 0, 0);
hocr2pdf(std::cin, pdfContext, res, sloppy, txtStream);
int quality = 75;
if (arg_quality.Size())
quality = arg_quality.Get();
std::string compression = "";
if (arg_compression.Size())
compression = arg_compression.Get();
if (!arg_no_image.Get())
pdfContext->showImage(image, 0, 0, 72. * image.w / res, 72. * image.h / res, quality, compression);
delete pdfContext;
if (txtStream) {
txtStream->close();
delete txtStream;
}
return 0;
}
|