File: wallflower

package info (click to toggle)
libdancer-perl 1.3095%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 2,032 kB
  • sloc: perl: 6,803; sh: 48; sql: 5; makefile: 2
file content (240 lines) | stat: -rwxr-xr-x 7,289 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
#!perl
use strict;
use warnings;
use Getopt::Long;
use Pod::Usage;
use Config;
use File::Basename ();
use File::Path 'mkpath';
use Cwd qw( realpath );

# must cleanup the dance floor long before Dancer starts moving
my %option;

BEGIN {

    # some defaults
    %option = (index => 'index.html');

    # command-line parameters
    GetOptions(
        \%option,        'application=s', 'include|I=s', 'environment=s',
        'destination=s', 'index=s',       'log=s',       'vhost=s',
        'help',          'manual'
    ) or pod2usage(-verbose => 1);

    # simple on-line help
    pod2usage(-verbose => 1) if $option{help};
    pod2usage(-verbose => 2) if $option{manual};

    # add include dirs to @INC
    my $path_sep = $Config::Config{path_sep} || ';';
    unshift @INC, split /\Q$path_sep\E/, $option{include}
      if defined $option{include};

    # load the application
    die "no app" if !defined $option{application};
    eval "use $option{application}; 1;"
      or die "Unable to load application '$option{application}': $@";

    # compute the location of the application
    (my $module = $option{application} . '.pm') =~ s<::></>;
    $ENV{DANCER_APPDIR} ||= realpath(
        File::Spec->catdir(File::Basename::dirname($INC{$module}), '..'));
}

# Dancer hits the stage
use Dancer;
use Dancer::Handler;

set environment => $option{environment} || 'production';
set log         => $option{log}         || 'warning';

# some basic verification
my $wwwdocs = $option{destination};
die "Destination not defined" if !defined $wwwdocs;
die "Invalid destination '$wwwdocs'" if !-e $wwwdocs || !-d $wwwdocs;

# I'm just hanging on to my friend's purse
my $log;
while (<>) {

    # ignore blank lines and comments
    next if /^\s*(#|$)/;
    chomp;

    # default values
    my $url = URI->new($_);
    my ($status, $file, $bytes) = (500, '-', '-');
    $log = "$status $url";

    # require an absolute path
    next if $url->path !~ /^\//;

    # fake a minimal environment
    local $ENV{SERVER_NAME} = 'localhost';
    local $ENV{SERVER_PORT} = '80';
    local $ENV{HTTP_HOST} = eval { $url->host } || $option{vhost}
      if defined $option{vhost};

    # create a new request object
    # strip everything but the path
    my $request = Dancer::Request->new_for_request(GET => $url->path);
    $request->headers(
        HTTP::Headers->new(
            Accept => '*/*',
            $url->can('host_port') ? (Host => $url->host_port) : (),
        )
    );

    # obtain a response
    my $response = Dancer::Handler->handle_request($request);
    ($status, my $content) = @{$response}[0, 2];
    $log = "$status $url";

    # save successes to the appropriate file
    if ($status eq '200') {

        # absolute paths have the empty string as their first path_segment
        my (undef, @segments) = $url->path_segments;

        # create a vhost directory if required
        unshift @segments, eval { $url->host } || $option{vhost}
          if defined $option{vhost};

        # assume directory
        push @segments, $option{index} if $segments[-1] !~ /\./;

        # generate target file name
        my $file = File::Spec->catfile($wwwdocs, @segments);
        pop @segments;
        my $dir = File::Spec->catdir($wwwdocs, @segments);

        # ensure the subdirectory exists
        mkpath $dir if !-e $dir;
        open my $fh, '>', $file or die "Can't open $file for writing: $!";

        # copy content to the file
        if (ref $content eq 'ARRAY') {
            print $fh @$content;
        }
        elsif (ref $content eq 'GLOB') {
            print {$fh} <$content>;
        }
        elsif (eval { $content->can('getlines') }) {
            print {$fh} $content->getlines;
        }
        else {
            die "Don't know how to handle $content";
        }

        # finish
        close $fh;
        $bytes = -s $file;
        $log .= " => $file [$bytes]";
    }
}
continue {
    print "$log\n" if $log;
    $log = '';
}

__END__

=head1 NAME

wallflower - Sorry I can't dance, I'm hanging on to my friend's purse

=head1 SYNOPSIS

 wallflower [options]

=head1 OPTIONS

 --application <name>       Name of the Dancer application
 --destination <path>       Destination directory for the files

 --include     <path>       Library paths to include
 --environment <name>       Application environment (default: production)
 --index       <filename>   Default name for index file (default: index.html)
 --log         <level>      Dancer log level (default: warning)
 --vhost       <vhost>      Default vhost, enforce vhost dir creation

 --help                     Print a short online help and exit
 --manual                   Print the full manual page and exit

=head1 DESCRIPTION

B<wallflower> turns your Dancer application into a static web site.

While not suitable for all applications, there are a number of use cases
where this makes sense. Most web sites are in essence static. Without a
way for user to update information on the site (via forms, comments, etc)
the only changes in the web site come from sources that you control
(including the database) and that are accessible in your development
environment.

Using Dancer for a static web site actually makes a lot of sense,
just because if gives you access to all the features of the framework
for that site. Think of it as I<extreme caching>.

So, forms could be processed on your development server
(e.g. to update a local database), and the pages to be I<published>
would be a subset of all the URL that the application supports.

Turning such an application into a real static site (a set of pages
to upload to a static web server) is just a matter of generating all
possible URL for the static site and saving them to files.

B<wallflower> does exactly that. It reads a list of URL, strips them
from their query strings, turn them into C<GET> requests and saves the
body response to a file whose name matches the request pathinfo.

B<wallflower> is not a generic offline browsing tool.

=head1 EXAMPLE

The web site created by C<dancer -a mywebapp> is the perfect example.
The complete list of URL needed to view the site is:

    /
    /css
    /css/error.css
    /css/style.css
    /favicon.ico
    /images/perldancer-bg.jpg
    /images/perldancer.jpg
    /javascripts/jquery.js

Passing this list to B<wallflower> gives the following result:

    $ wallflower -a mywebapp -d /tmp/output urls.txt
    200 / => /tmp/output/index.html [5257]
    200 /css/error.css => /tmp/output/css/error.css [1210]
    200 /css/style.css => /tmp/output/css/style.css [2972]
    200 /favicon.ico => /tmp/output/favicon.ico [1406]
    200 /images/perldancer-bg.jpg => /tmp/output/images/perldancer-bg.jpg [7125]
    200 /images/perldancer.jpg => /tmp/output/images/perldancer.jpg [2240]
    200 /javascripts/jquery.js => /tmp/output/javascripts/jquery.js [72174]

Note that URL with a path ending with a C</> or a name without an extension
will be considered to be a directory, and have the default "index" filename
appended.

Any URL resulting in a status different than 200 will be logged, 
but not saved:

    404 /css
    500 foo/bar

=head1 AUTHOR

Philippe Bruhat (BooK)

=head1 LICENSE

This program is free software and is published under the same
terms as Perl itself.

=cut