1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347
|
/*
** k2pdfopt.c K2pdfopt optimizes PDF/DJVU files for mobile e-readers
** (e.g. the Kindle) and smartphones. It works well on multi-
** column PDF/DJVU files and can re-flow text even on scanned PDF
** files. It can also be used as a general PDF copying/cropping/
** re-sizing manipulation tool. It can generate native or
** bitmapped PDF output, with an optional OCR layer. There are
** downloads for MS Windows, Mac OSX, and Linux. The MS Windows
** version has an integrated GUI. K2pdfopt is open source.
**
** Copyright (C) 2020 http://willus.com
**
** This program is free software: you can redistribute it and/or modify
** it under the terms of the GNU Affero General Public License as
** published by the Free Software Foundation, either version 3 of the
** License, or (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU Affero General Public License for more details.
**
** You should have received a copy of the GNU Affero General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
**
**
** VERSION HISTORY
**
** See k2version.c.
**
**
** SOURCE CODE FLOW (BRIEF)
**
** Conversion process:
** k2pdfopt_proc_wildarg() (k2file.c) Convert wildcard arg, e.g. *.pdf
** |
** V
** k2pdfopt_proc_arg() (k2file.c) Handle arg if it is a folder
** |
** V
** k2pdfopt_proc_one() (k2file.c) Process a single PDF/DJVU file.
** | (Embarrassingly long function.)
** V
** bmpregion_source_page_add() (k2proc.c) Processes a source page of the file.
** This adds rectangular regions
** (BMPREGION structure) to the
** PAGEREGIONS structure.
**
** Some other key functions:
**
** bmpregion_vertically_break() (k2proc.c) looks for "text rows" in each region,
** segmenting it into consecutive "rows."
**
** bmpregion_add_textrow() (k2proc.c) is called by bmpregion_vertically_break()
** to accumulate the BMPEREGIONs row by row.
**
** bmpregion_add() (k2proc.c) processes a "row" or rectangular region of the
** source page. It is fairly well commented.
**
** bmpregion_analyze_justification_and_line_spacing() (k2proc.c) analyzes the
** "rows" and attempts to determine things like if they are regular, uniform
** rows of text, how the text is justified, what the line spacing and font
** size is, and if any lines are indented or terminate a section.
**
** bmpregion_one_row_wrap_and_add() (k2proc.c) parses through one row and looks
** for words. It parses out each word to wrapbmp_add().
**
** wrapbmp_add() (wrapbmp.c) adds a graphical word (as a rectangular bitmap region)
** to the WRAPBMP structure, which stores up a row of text until it is too
** wide for the destination file and the flushes that row of text to the
** destination file.
**
**
*/
#include <k2pdfopt.h>
#if (defined(HAVE_K2GUI) && (defined(WIN32) || defined(WIN64)))
#include <windows.h>
static void k2pdfopt_launch_gui(K2PDFOPT_CONVERSION *k2conv,STRBUF *env,STRBUF *cmdline,int ascii);
#endif
int main(int argc,char *argv[])
{
int i;
static K2PDFOPT_CONVERSION _k2conv,*k2conv;
K2PDFOPT_SETTINGS *k2settings;
static STRBUF _cmdline,_env,_usermenu;
STRBUF *cmdline,*env,*usermenu;
static char *funcname="main";
#if (defined(WIN32) || defined(WIN64))
int ascii;
short *clinew;
clinew=(short *)GetCommandLineW(); /* Get UTF-16 command-line */
ascii=(clinew==NULL || wide_is_ascii(clinew));
#endif
k2conv=&_k2conv;
k2pdfopt_conversion_init(k2conv);
k2settings=&k2conv->k2settings;
cmdline=&_cmdline;
env=&_env;
usermenu=&_usermenu;
strbuf_init(cmdline);
strbuf_init(env);
strbuf_init(usermenu);
#if (defined(WIN32) || defined(WIN64))
strbuf_ensure(env,1024);
wsys_get_envvar_ex("K2PDFOPT",env->s,1023);
#else
strbuf_cpy(env,getenv("K2PDFOPT"));
#endif
#if (defined(WIN32) || defined(WIN64))
{
short **argvw;
int nargs;
argvw=(short **)CommandLineToArgvW((LPWSTR)clinew,&nargs);
for (i=1;i<argc;i++)
{
char *clineu8;
int clen;
clen=utf16_to_utf8(NULL,argvw[i],MAXUTF8PATHLEN);
willus_mem_alloc_warn((void **)&clineu8,clen+1,funcname,10);
utf16_to_utf8(clineu8,argvw[i],clen);
strbuf_cat_with_quotes(cmdline,clineu8);
willus_mem_free((double **)&clineu8,funcname);
}
LocalFree(argvw);
}
#else
for (i=1;i<argc;i++)
strbuf_cat_with_quotes(cmdline,argv[i]);
#endif
k2sys_init();
k2pdfopt_settings_init(k2settings);
k2pdfopt_files_clear(&k2conv->k2files);
/* Only set ansi and user interface */
parse_cmd_args(k2conv,env,cmdline,usermenu,2,0);
#ifdef HAVE_K2GUI
if (k2settings->gui>0
|| (k2settings->gui<0
&& (win_has_own_window()
|| (!win_has_own_window() && argc<2))))
{
strbuf_free(usermenu);
k2pdfopt_launch_gui(k2conv,env,cmdline,ascii);
k2sys_close(k2settings);
strbuf_free(env);
strbuf_free(cmdline);
k2pdfopt_conversion_close(k2conv);
return(0);
}
#endif
if (k2settings->show_usage[0]!='\0')
{
k2sys_header(NULL);
if (k2settings->query_user==0
#if (defined(WIN32) || defined(WIN64))
|| !win_has_own_window()
#endif
)
k2pdfopt_usage(k2settings->show_usage,0);
else
{
if (!k2pdfopt_usage(k2settings->show_usage,1))
{
k2sys_close(k2settings);
strbuf_free(usermenu);
strbuf_free(env);
strbuf_free(cmdline);
k2pdfopt_conversion_close(k2conv);
return(0);
}
}
if (k2settings->query_user!=0)
k2sys_enter_to_exit(k2settings);
k2sys_close(k2settings);
strbuf_free(usermenu);
strbuf_free(env);
strbuf_free(cmdline);
k2pdfopt_conversion_close(k2conv);
return(0);
}
#ifdef HAVE_TESSERACT_LIB
if (k2settings->dst_ocr_lang[0]=='?')
{
char *p;
extern char *ocrtess_langnames[];
k2sys_header(NULL);
ocrtess_debug_info(&p,1);
aprintf("%s",p);
printf("\nAvailable languages:\n"
"Code Language\n"
"---------------------\n");
for (i=0;ocrtess_langnames[i][0]!='\0';i+=3)
printf("%-12s %s\n",ocrtess_langnames[i+1],ocrtess_langnames[i+2]);
willus_mem_free((double **)&p,funcname);
if (k2settings->query_user!=0)
k2sys_enter_to_exit(k2settings);
k2sys_close(k2settings);
strbuf_free(usermenu);
strbuf_free(env);
strbuf_free(cmdline);
k2pdfopt_conversion_close(k2conv);
return(0);
}
#endif
#if (defined(WIN32) || defined(WIN64))
{
if (win_has_own_window())
k2settings->query_user=1;
else
k2settings->query_user=(k2conv->k2files.n==0);
}
#else
k2settings->query_user=1;
#endif
#if (!defined(WIN32) && !defined(WIN64))
if (k2settings->query_user)
{
int tty_rows;
tty_rows = get_ttyrows();
for (i=0;i<tty_rows-16;i++)
aprintf("\n");
}
#endif
k2sys_header(NULL);
/*
** Set all options from command-line arguments
*/
parse_cmd_args(k2conv,env,cmdline,usermenu,1,0);
/*
** Get user input
*/
if (k2pdfopt_menu(k2conv,env,cmdline,usermenu)==-1)
{
k2sys_close(k2settings);
strbuf_free(usermenu);
strbuf_free(env);
strbuf_free(cmdline);
k2pdfopt_conversion_close(k2conv);
return(0);
}
/*
** Re-init and then re-parse after all user menu entries applied.
*/
k2pdfopt_settings_init(k2settings);
parse_cmd_args(k2conv,env,cmdline,usermenu,1,0);
/*
** Process files
*/
{
K2PDFOPT_FILELIST_PROCESS k2listproc;
double start,stop;
start=(double)clock()/CLOCKS_PER_SEC;
for (i=k2listproc.filecount=0;i<k2conv->k2files.n;i++)
{
k2listproc.outname=NULL;
k2listproc.bmp=NULL;
k2listproc.mode=K2PDFOPT_FILELIST_PROCESS_MODE_CONVERT_FILES;
k2pdfopt_proc_wildarg(k2settings,k2conv->k2files.file[i],&k2listproc);
willus_mem_free((double **)&k2listproc.outname,funcname);
}
stop=(double)clock()/CLOCKS_PER_SEC;
k2sys_cpu_update(k2settings,start,stop);
}
/*
** All done.
*/
k2sys_enter_to_exit(k2settings);
k2sys_close(k2settings);
strbuf_free(usermenu);
strbuf_free(env);
strbuf_free(cmdline);
k2pdfopt_conversion_close(k2conv);
return(0);
}
#if (defined(HAVE_K2GUI) && (defined(WIN32) || defined(WIN64)))
static void k2pdfopt_launch_gui(K2PDFOPT_CONVERSION *k2conv,STRBUF *env,STRBUF *cmdline,
int ascii)
{
#if (WILLUSDEBUGX & 0x4000)
printf("\n\nNEED TO TURN OFF WILLUSDEBUGX FOR FINAL COMPILE...\n\n");
if (0)
#else
if (k2conv->k2settings.gui!=2 && (!win_has_own_window() || !k2conv->k2settings.guimin))
#endif
{
short exename[512];
short *x;
short *buf;
int i;
STARTUPINFOW si;
PROCESS_INFORMATION pi;
static char *funcname="k2pdfopt_launch_gui";
GetStartupInfoW(&si);
GetModuleFileNameW(NULL,(WCHAR *)exename,511);
x=(short *)GetCommandLineW();
willus_mem_alloc_warn((void **)&buf,sizeof(short)*(wide_strlen(x)+8),funcname,10);
wide_strcpy(buf,x);
i=wide_strlen(buf);
buf[i++]=' ';
buf[i++]='-';
buf[i++]='g';
buf[i++]='u';
buf[i++]='i';
buf[i++]='+';
buf[i++]=0;
memset(&pi,0,sizeof(PROCESS_INFORMATION));
memset(&si,0,sizeof(STARTUPINFOW));
si.cb = sizeof(STARTUPINFOW);
si.dwX = 0; /* Ignored unless si.dwFlags |= STARTF_USEPOSITION */
si.dwY = 0;
si.dwXSize = 0; /* Ignored unless si.dwFlags |= STARTF_USESIZE */
si.dwYSize = 0;
si.dwFlags = STARTF_USESHOWWINDOW;
si.wShowWindow = SW_SHOWNORMAL;
/* Launching from a console will NOT create new console. */
CreateProcessW((LPCWSTR)exename,(LPWSTR)buf,0,0,1,DETACHED_PROCESS,0,NULL,&si,&pi);
}
else
{
HINSTANCE hinst;
/* Free console and launch the GUI window */
hinst=GetModuleHandle(NULL);
#if (!(WILLUSDEBUGX & 0x4000))
FreeConsole();
#endif
k2gui_main(k2conv,hinst,NULL,env,cmdline,ascii);
}
}
#endif /* Windows GUI */
|