File: swf_extractor.py

package info (click to toggle)
hachoir 3.1.0%2Bdfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 3,364 kB
  • sloc: python: 50,349; makefile: 129; sh: 26
file content (100 lines) | stat: -rwxr-xr-x 3,236 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python3
from hachoir.parser import createParser, guessParser
from sys import stderr, exit, argv


class JpegExtractor:

    def __init__(self):
        self.jpg_index = 1
        self.snd_index = 1
        self.verbose = False

    def storeJPEG(self, content):
        name = "image-%03u.jpg" % self.jpg_index
        print("Write new image: %s" % name)
        with open(name, "wb") as fp:
            fp.write(content)
        self.jpg_index += 1

    def createNewSound(self):
        name = "sound-%03u.mp3" % self.snd_index
        print("Write new sound: %s" % name)
        self.snd_index += 1
        return open(name, "wb")

    def extractFormat2(self, field):
        if "jpeg_header" in field:
            header = field["jpeg_header"]
            if 32 < header.size:
                if self.verbose:
                    print("Use JPEG table: %s" % header.path)
                header = field.root.stream.readBytes(
                    header.absolute_address, (header.size - 16) // 8)
            else:
                header = ""
        else:
            header = None
        content = field["image"].value
        if header:
            content = header + content[2:]
        if self.verbose:
            print("Extract JPEG from %s" % field.path)
        self.storeJPEG(content)

    def extractSound2(self, parser):
        header = None
        output = None
        for field in parser:
            if field.name.startswith("def_sound["):
                header = field
                output = self.createNewSound()
                data = header["music_data"].value
                assert data[:1] == b'\xFF'
                output.write(data)
            elif field.name.startswith("sound_blk") \
                    and "music_data" in field:
                data = field["music_data"].value
                if data:
                    assert data[0] == '\xFF'
                    output.write(data)

    def main(self):
        if len(argv) != 2:
            print("usage: %s document.swf" % argv[0], file=stderr)
            exit(1)

        filename = argv[1]
        parser = createParser(filename)

        if parser["signature"].value == "CWS":
            deflate_swf = parser["compressed_data"].getSubIStream()
            parser = guessParser(deflate_swf)

        if "jpg_table/data" in parser:
            # JPEG pictures with common header
            jpeg_header = parser["jpg_table/data"].value[:-2]
            for field in parser.array("def_bits"):
                jpeg_content = field["image"].value[2:]
                if self.verbose:
                    print("Extract JPEG from %s" % field.path)
                self.storeJPEG(jpeg_header + jpeg_content)

        # JPEG in format 2/3
        for field in parser.array("def_bits_jpeg2"):
            self.extractFormat2(field)
        for field in parser.array("def_bits_jpeg3"):
            self.extractFormat2(field)

        # Extract sound
        # self.extractSound(parser)
        self.extractSound2(parser)

        # Does it extract anything?
        if self.jpg_index == 1:
            print("No JPEG picture found.")
        if self.snd_index == 1:
            print("No sound found.")


JpegExtractor().main()