File: imgsizer

package info (click to toggle)
imgsizer 2.7-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 140 kB
  • ctags: 36
  • sloc: python: 179; xml: 145; makefile: 60
file content (309 lines) | stat: -rwxr-xr-x 10,384 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
#!/usr/bin/python
#
# imgsizer -- correct image sizes in WWW pages
# by Eric S. Raymond <esr@thyrsus.com>
# 
# Fix up IMG tags in given documents to contain correct sizes.
# 
# Works with Python 1.5.2
#
# Copy, use, and redistribute freely, but don't take my name off it and
# clearly mark an altered version.  Fixes and enhancements cheerfully 
# accepted.
#
# Changelog:
#
# Originally created by Eric S. Raymond <esr@thyrsus.com> 30 Jul 1996
#
# Modified by Erik Rossen <rossen@planet.ch> 15 May 1999
#
#    Added the --nomagick switch, to use file(1) and rdjpgcom(1)
#    to determine the image size instead of identify(1) from the
#    ImageMagick suite.
#
# Modified by Michael C. Toren <michael@toren.net> 18 Aug 2000
#
#    Fixed bug where the SRC attribute's value needed to be in quotes,
#    improved command line parsing (but it could still use some work),
#    added -q switch to omit quotes when generating tags, and -l switch
#    to generate lowercase tags.  -mct
#
# Modified by Michael C. Toren <michael@toren.net> 19 Aug 2000
#
#    Improved the command line parsing some more, now looks for additional
#    arguments via an IMGSIZER environmental variable, added the -d switch
#    to set the DocumentRoot, -v switch to display version information,
#    and -h switch to display usage information.  -mct
#
# Modified by Michael C. Toren <michael@toren.net> 23 Feb 2001
#
#    Fixed two bugs reported by Jeroen Valcke <jeroen@valcke.com>, one
#    where the -d switch did not function properly if the img src attribute
#    was quoted, and another where the &error sub was incorrectly reporting
#    the line number an error occurred due to the input record separator
#    being set to ">".
#
# Rewritten in Python by Eric S. Raymond <esr@thyrsus.com> 11 July 2001
#
#    Time to get rid of the dependency on httpget.  The -l option is gone, too;
#    instead, we deduce the right case by looking at the leading tag.  -q
#    is gone; we always emit without quotes.  -m is gone too, instead we
#    try commands in least to most expensive order, and notice when a command
#    returns not to try it again.
#
# Fixes by ESR, 29 July 2001
#
#    Incorporated fixes by Peter S. Galbraith.
#
# Fixes by ESR, 25 April 2003
#
#    Merged amended versions of Lennart Poettering's fix for Debian bug 139714.
#    and Jeroen N. Witmond's fix for Debian bug 168964.  Added regression-test
#    production.
#
# Enhancement by ESR, 14 Nov 2003
#
#    Verify and merge Lucien Saviot's patch to produce XHTML from XHTML input.
#    Also his change to handle spurious lin e breaks produced by Dave Raggett's
#    tidy(1) utility.
#
# Modified by Andrew Gwozdziewycz <gwozdzie@lucas.cis.temple.edu>, 17 June 2004
#
#    Added support for the Python Imaging Library to determine size in case of
#    failure from file(1), rdjpgcom(1) and identify(1).

import sys, os, getopt, string, re, urllib, commands

# Arrange for both 1.5 and 2.1 compatibility
try:
    import filecmp
    cmp = filecmp
    del filecmp
except ImportError:
    import cmp

version = "2.7, 05 Aug 2004";

splash = """imgsizer version %s, Eric S. Raymond <esr@thyrsus.com>
See <http://www.catb.org/~esr/software.html> for updates."""

usage = """Usage: imgsizer [OPTIONS] [HTML File]

Options:

    -V, --version

        Display version information and exit.

    -h, --help

        Display usage information.

    -d <directory>, --document-root <directory>

        Directory where absolute image filenames (i.e, ones which contain
        a leading "/") may be found.

    -n, --no-overwrite

        Don't overwrite existing width and height tags if both are present.

"""

# Optimization latches -- if an attempt  to invoke a command returns 127
# "not found" there will turn off and that command won't be tried again.
magick = 1	# using ImageMagick by default
rdjpgcom = 1	# using rdjpcom by default
pythonimage = 1 # use python imaging library

def attrformat(xc, dim):
    if lower:
        res = " " + dim
    else:
        res = " " + string.upper(dim)
    res = res + '="' + str(xc) + '"'
    return res

def sizefix(infp, outfp):
# Apply attrfix to the attributes in each image tag
    global lower
    while 1:
        ch = infp.read(1)
        if ch == '':
            return
        outfp.write(ch)
        if ch == '<':
            # within an HTML tag
            lead = infp.read(2)
            outfp.write(lead)
            if not lead in ("im", "IM"):
                continue
            # splitting the read this way copes with single-char tags like <b>
            lead = lead + infp.read(1)
            outfp.write(lead[-1])
            if not lead in ("img", "IMG"):
                continue
            # within an image tag
            lower = (lead == 'img')
            state = suppress = 0
            attributes = ""
            while 1:
                ch = infp.read(1)
                if ch == '':
                    return
                if ch == '>':
                    break
                if ch == '/':
                    ch2 = infp.read(1)
                    ch = ch + ch2
                    if ch2 == '>':
                        break
                attributes = attributes + ch
            outfp.write(transform(attributes) + ch)

x_match = re.compile (r" ([0-9]+) *x *([0-9]+)")
rdjpg_match = re.compile (r" ([0-9]+)w *\* *([0-9]+)h")

def imgsize(src):
    "Return the image size in pixels for a given image source."
    global magick, rdjpgcom, pythonimage
    try:
        (filename, headers) = urllib.urlretrieve(src)
    except IOError:
        return None
    # Now let's see if we can get a size for the retrieved image.
    # Try file(1) first -- cheapest, as it doesn't read the whole image
    (status, output) = commands.getstatusoutput("file " + filename)
    if status == 0:
        # file(1) works for every common image format other than JPEG
        if string.find(output, "JPEG") == -1:
            sizes = x_match.search(output)
            if sizes:
                return (sizes.group(1), sizes.group(2))
        elif rdjpgcom:
            # Use rdjpgcom(1) to handle JPEGs
            (status, output) = commands.getstatusoutput("rdjpgcom -verbose " + filename)
            sizes = rdjpg_match.search(output)
            if sizes:
                return (sizes.group(1), sizes.group(2))
            elif status == 127:
                rdjpgcom = 0
    # Next try identify(1), more expensive but bulletproof
    if magick:
        (status, output) = commands.getstatusoutput("identify " + filename)
        if status == 0:
            sizes = x_match.search(output)
            if sizes:
                return (sizes.group(1), sizes.group(2))
        elif status == 127:
            sys.stderr.write("imgsizer: giving up on ImageMagick\n")
            magick = 0
    # if that fails, try at _LAST_ resort Python Imaging Library
    # open doesn't actually load all the data, so it shouldn't be too expensive
    if pythonimage:
        try:
           import Image
           pyimg = Image.open(filename)
           return pyimg.size
        except (ImportError, IOError):
           sys.stderr.write("imgsizer: giving up on Python Imaging Library\n")
           pythonimage = 0
           pass

    # All attempts failed
    sys.stderr.write("imgsizer: couldn't analyze %s\n" % src)

source  = re.compile('SRC\s*=\s*"?([^" \t\n]*)"?', re.I)
awidth  = re.compile(r' *WIDTH\s*=\s*"?[0-9]*"?', re.I)
aheight = re.compile(r' *HEIGHT\s*=\s*"?[0-9]*"?', re.I)
pwidth  = re.compile(r'WIDTH\s*=\s*"?[0-9]*%"?', re.I)
pheight = re.compile(r'HEIGHT\s*=\s*"?[0-9]*%"?', re.I)

def transform(attr):
    src = source.search(attr)
    # Must have a source part and no percents in existing width or height
    if not src or pwidth.search(attr) or pheight.search(attr):
        return attr
    if no_overwrite and awidth.search(attr) and aheight.search(attr):
        return attr
    # Correct the url for documentation root, if present
    url = src.group(1)
    if url[0] == '/' and root:
        url = os.path.join(root, url[1:])
    # OK, get the size tuple if possible
    dimensions = imgsize(url)
    if not dimensions:
        return attr
    else:
        # Nuke any old size attr
        if not no_overwrite:
            attr = re.sub(awidth, "", attr)
            attr = re.sub(aheight, "", attr)
        # Compute image dimensions
        (xc, yc) = dimensions
        # Plug in the new attr
        return attr + attrformat(xc, "width") + attrformat(yc, "height") 

# Output uppercase tags, surrounded by quotes, by default.
lower = 0
quotes = 1

# Set the default DocumentRoot to the current working directory.
root = "."

out = "imgsizer-out$$"
dir = "."	# NOTE: if you are doing <yourfile make sure that pwd is correct! 

# Collect options from the environment first, then the command line
options = os.environ.get("IMGSIZER")
if options:
    options = string.split(options)
else:
    options = []
options = options + sys.argv[1:]

# Process options
(options, arguments) = getopt.getopt(options, "Vhd:n",
			     ('version', 'help', 'usage', 'document=', 'no-overwrite'))
no_overwrite = 0
for (switch, val) in options:
    if switch in ('-V', '--version'):
        print splash % version
        raise SystemExit
    elif switch in ('-h', '--help', '--usage'):
        print splash + "\n\n" + usage
        raise SystemExit
    elif switch in ('-d', '--document'):
        root = val
        if not os.path.isdir(root):
            print "Document root isn't a directory"
            raise SystemExit, 1
    elif switch in ('-n', '--no-overwrite'):
        no_overwrite = 1

if not arguments:
    sizefix(sys.stdin, sys.stdout)
else:
    for file in arguments:
        try:
            infp = open(file)
        except:
            print "imgsizer: can't open input file", file
            raise SystemExit, 1
        tempfile = file + ".~imgsizer-%d~" % os.getpid()
        try:
            outfp = open(tempfile, "w")
        except OSError:
            print "imgsizer: can't open tempfile"
            raise SystemExit, 1
        sizefix(infp, outfp)
        if cmp.cmp(file, tempfile):
            os.remove(tempfile)
        else:
            try:
                os.rename(tempfile, file)
            except OSError:
                sys.stderr.write("imgsize: couldn't replace " + file)
                os.remove(tempfile)

# End