File: any2djvu

package info (click to toggle)
djvulibre 3.5.27.1-7
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 8,976 kB
  • ctags: 8,037
  • sloc: cpp: 65,455; xml: 5,723; ansic: 4,640; sh: 846; makefile: 315
file content (233 lines) | stat: -rwxr-xr-x 7,040 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
#! /bin/bash -f

function copyright()
{
    echo "Copyright (C) 2002 David Kreil <D.Kreil@IEEE.Org>"
    echo "Modified by Barak A. Pearlmutter <bap@debian.org>"
    echo "         and Yaroslav Halchenko <debian@onerussian.com>"
    echo "Released under the GNU GPL v2, 21-Oct-2002."
}

function warranty()
{
    echo "This program is distributed in the hope that it will be useful,"
    echo "but WITHOUT ANY WARRANTY; without even the implied warranty of"
    echo "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the"
    echo "GNU General Public License for more details."
}

function disclaimer()
{
    echo "By using this tool you accept the following disclaimer:"
    echo "  Because the any2djvu service is free of charge, there is no"
    echo "  warranty of any kind.  In particular, no effort is made to"
    echo "  prevent anyone from downloading the files submitted to or"
    echo "  produced by the any2djvu server."
}

# TO DO:
#  - error handling

rurl="http://any2djvu.djvu.org"
rcgi="any2djvu.php"
res=400
ocr=1
docformat=2

function warn()
{
    echo "Notes:"
    echo " - Internet connection is required."
    echo " - Filenames are assumed to require no URL-encoding."
    echo " - Documents must be PostScript (.ps, .ps.gz) or PDF (.pdf)."
    echo " - Defaults are: conversion is at 400dpi, with English OCR enabled."
    echo " - This script should not be used for large scale conversions of"
    echo "   documents, as it may badly affect and hence endanger the free web"
    echo "   service to the community."
    echo " - This software comes with NO WARRANTY."
}

function format_help()
{
    echo "Codes for the formats of the input documents to use with -f"
    echo "  1 - DjVu Document (for verification or OCR)"
    echo "  2 - PS/PS.GZ/PDF Document (default)"
    echo "  3 - Photo/Picture/Icon"
    echo "  4 - Scanned Document - B&W - <200 dpi"
    echo "  5 - Scanned Document - B&W - 200-400 dpi"
    echo "  6 - Scanned Document - B&W - >400 dpi"
    echo "  7 - Scanned Document - Color/Mixed - <200 dpi"
    echo "  8 - Scanned Document - Color/Mixed - 200-400 dpi"
    echo "  9 - Scanned Document - Color/Mixed - >400 dpi"
}

function usage()
{
    echo "Convert files from .ps/.ps.gz/.pdf to .djvu"
    echo "Usage:	$0 [options] [url] {filename(s)}"
    echo
    echo "Options:"
    echo "  -q          quiet mode: reduce chatter on the screen"
    echo "  -a          acknowledge internet transmission"
    echo "  -c          clean after work: remove log file"
    echo "  -r dpi      resolution in dpi (200/300/400, default: $res)"
    echo "  -o ocr      perform OCR (0 - no, 1 - yes, default: $ocr)"
    echo "  -f format   format of input document; invoke -f help for list"
    echo "  -u url      base URL of server (default: $rurl)"
    echo "  -p s        CGI script name (default: $rcgi)"
    echo
    echo "If no local file is named by the 1st argument the 1st"
    echo "argument is taken to be an external web-accessible directory and,"
    echo "the specified files are uploaded from that location."
    echo
    echo "Examples:"
    echo "  cd ~bap/public_html/foo"
    echo " # uploads from web-accessible directory"
    echo "  any2djvu http://barak.pearlmutter.net/papers mesh-preprint.ps.gz"
    echo "  any2djvu http://www.inference.phy.cam.ac.uk/mackay *.ps.gz bar.pdf"
    echo " # uploads from current directory"
    echo "  any2djvu b*.pdf"
    echo "  any2djvu -s -c -r 300 -o 0 bp.ps"
}


while getopts "hacqsr:o:f:u:p:" opt
  do
  case "$opt" in
      h) usage; exit 0 ;;
      a) DJVU_ONLINE_ACK=1 ;;
      c) doclean=1 ;;
      s) echo "option -s is deprecated, please use -q"; silent=1  ;;
      q) silent=1  ;;
      r) res="$OPTARG" ;;
      o) ocr="$OPTARG" ;;
      f) docformat="$OPTARG" ;;
      u) rurl="$OPTARG" ;;
      p) rcgi="$OPTARG" ;;
      *) usage; exit 2 ;;
  esac
done

shift $((OPTIND - 1))

# check resolution
if [ ${res#[234]} != '00' ]; then
    echo 'error: -r resolution must be one of: 200, 300, 400'
    exit 2
fi

# check OCR option
if [ ! "x$ocr" == x0 ] && [ ! "x$ocr" == x1 ]; then
    echo 'error: -o OCR must be 0 or 1'
    exit 2
fi

# if help is requested or docformat is not specified right - show help
if [ "x$docformat" == x'help' ]; then
    format_help
    exit 0
fi

if [ -z "$docformat" ] || [ ! -z "${docformat#[0-9]}" ]; then
    echo "error: requested document format unknown: $docformat".
    format_help
    exit 2
fi

lurl=''
if [ ! -e "$1" ]; then	     # if 1st argument is not an existing file
    lurl="$1"			# assume that it is a URL
    shift
fi

#log=`date -u`": "`whoami`'@'`hostname`", pid $$: $0 (cwd "`pwd`")"
log="`date -u`: `whoami`@`hostname`, pid $$: $0 (cwd `pwd`)"
[ -z $silent ]  && echo '/-- Started' "$log" >&2

in="$1"
shift
if [ -z "$in" ]; then
    echo "error: no files to convert"
    usage
    exit 1
fi

if [ ! -z $s ]; then
    copyright
    disclaimer
    warn
fi

if [ -z "$DJVU_ONLINE_ACK" ]; then
    cat <<EOF
WARNING!

any2djvu uses an external server which is willing to perform the
conversion and requires the document transfer over to that server.
There is a security issue in operating on documents not intended for
widespread distribution, which could be partially although not
completely ameliorated by using a secure web connection.

Do you acknowledge and allow the transmission of the document?
(Type 'yes' to acknowledge. You can define non-empty environment
 variable DJVU_ONLINE_ACK to avoid seeing this dialog, or use -a
 command line parameter to any2djvu).
EOF
    echo -n "[yes/no]:"
    read ack
    if [[ ! "$ack" =~ ^[yY][eE][sS]$ ]]; then
        warn
        exit 1
    fi
fi

wgetopts=''
curlopts=''
shellopts=''
if [ ! -z $silent ]; then
    curlopts=' -s '
    wgetopts=' -q '
    shellopts=' 1>/dev/null ' # 2>&1
fi

cgiopts="&docformat=$docformat&resolution=$res&ocr=$ocr&legal=1"
while [ -n "$in" ]; do
    b="$in"
    b="${b%.ps.gz}"
    b="${b%.ps}"
    b="${b%.pdf}"
    b="${b%.PDF}"
    echo `date -u` Processing $b ...
    log="$b-any2djvu.log"
    if [ -z "$lurl" ]; then
        if [ ! -e "$in" ]; then
            echo "File $in wasn't found. Conversion stopped"
            exit 2
        fi
        curl $curlopts -F "fupload=@$in" -F submit=Submit               \
            -F docformat=$docformat -F resolution=$res                  \
            -F ocr=$ocr -F legal=1 "$rurl/$rcgi"                          \
            | eval tee "'$log'" $shellopts
    else
        wget $wgetopts -O - "$rurl/$rcgi?urlupload=$lurl/$in$cgiopts"   \
            | eval tee "'$log'" $shellopts
    fi
    l=`egrep 'href=djvu/.*\.djvu' "$log"`
    l="${l##*href=}"
    l="${l%%>*}"
    if [ -z "$l" ]; then
        echo "error: something got wrong. check log file"
        exit 1
    fi
    wget $wgetopts -O "$b.djvu" "$rurl/$l"

    [ -z "$doclean" ] || rm "$log"
    [ -z $silent ]  && ls -l "$b.djvu"

    in="$1"
    shift
done

log=`date -u`": "`whoami`'@'`hostname`", pid $$: $0 (cwd "`pwd`")"

[ -z $silent ]  && echo '\-- Done' $log >&2