1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
|
package Badger::Codec::TT;
use Badger::Class
version => 0.01,
debug => 0,
base => 'Badger::Codec Badger::Prototype',
import => 'class CLASS',
utils => 'blessed textlike',
constants => 'ARRAY HASH',
messages => {
badref => 'Cannot encode %s reference',
};
our $MATCH_ESCAPED = qr/ \\([\\']) /x;
our $MATCH_WORD = qr/ (\w+) /x;
our $MATCH_QUOTE = qr/ '( (?:\\[\\'] | . | \n)*? )' /sx;
our $MATCH_QUOTED = qr/ \G \s* $MATCH_QUOTE /sx;
our $MATCH_KEY = qr/ \G \s* (?: $MATCH_WORD | $MATCH_QUOTE )/sx;
our $MATCH_NUMBER = qr/ \G \s* ( -? \d+ (?: \.\d+ )? ) /x;
our $MATCH_COMMA = qr/ \G \s* (,\s*)? /x;
our $MATCH_ASSIGN = qr/ \G \s* (:|=>?) \s* /x;
our $MATCH_HASH = qr/ \G \s* \{ /x;
our $MATCH_END_HASH = qr/ \G \s* \} /x;
our $MATCH_LIST = qr/ \G \s* \[ /x;
our $MATCH_END_LIST = qr/ \G \s* (\] | $) /x; # special case
our $MATCH_UNDEF = qr/ \G \s* undef /x;
our $ASSIGN = '=';
our $COMMA = ' ';
sub init {
my ($self, $config) = @_;
$self->{ assign } = $config->{ assign } || $ASSIGN;
$self->{ comma } = $config->{ comma } || $COMMA;
return $self;
}
sub encode {
shift->prototype->_encode(@_);
}
sub decode {
shift->prototype->_decode(@_);
}
sub _encode {
my $self = shift;
my $data = shift;
$self->debug("encoding: $data\n") if DEBUG;
# object may have stringification method
if (blessed $data && textlike $data) {
$data = '' . $data; # drop-through to string handler below
}
if (! defined $data) {
$self->debug("encoding undef") if DEBUG;
return 'undef';
}
elsif (! ref $data) {
if ($data =~ /^ $MATCH_NUMBER $/ox) {
$self->debug("encoding number: $data") if DEBUG;
return $data;
}
else {
# escape any single quotes or backslashes in the value before quoting it
$self->debug("encoding text: $data") if DEBUG;
$data =~ s/(['\\])/\\$1/g;
return "'" . $data . "'";
}
}
elsif (ref $data eq ARRAY) {
$self->debug("encoding list") if DEBUG;
return '[' . join($self->{ comma }, map { _encode($self, $_) } @$data) . ']';
}
elsif (ref $data eq HASH) {
$self->debug("encoding hash") if DEBUG;
my $a = $self->{ assign };
my ($k, $v);
return
'{'
. join($self->{ comma },
map {
$k = $_;
$v = _encode($self, $data->{$k});
if ($k =~ /\W/) {
$k =~ s/(['\\])/\\$1/g;
$k = "'" . $k . "'";
}
$k . $a . $v; # key = value
}
sort keys %$data
)
. '}';
}
else {
return $self->error_msg( bad_ref => ref $data );
}
}
sub _decode {
my $self = shift;
my $text = ref $_[0] ? $_[0] : \$_[0];
my ($key, $value);
if (DEBUG) {
my $pos = pos $$text;
if ($pos) {
$$text =~ /\G(.*)/;
$self->debug("decoding: $1\n");
pos $$text = $pos;
}
else {
$self->debug("decoding: $$text\n");
}
}
if ($$text =~ /$MATCH_HASH/cog) {
$self->debug("matched hash\n") if DEBUG;
$value = { };
while ($$text =~ /$MATCH_KEY/cog) {
if (defined $1) {
$key = $1;
}
else {
$key = $2;
$key =~ s/$MATCH_ESCAPED/$1/og;
}
$$text =~ /$MATCH_ASSIGN/cog
|| return $self->error("Missing value after $key");
$value->{ $key } = _decode($self, $text);
$$text =~ /$MATCH_COMMA/cog;
}
$$text =~ /$MATCH_END_HASH/cog
|| return $self->error("missing } at end of hash definition");
}
elsif ($$text =~ /$MATCH_LIST/cog) {
$self->debug("matched list\n") if DEBUG;
$value = [ ];
while (1) {
if ($$text =~ /$MATCH_END_LIST/cog) {
last if $1 eq ']';
return $self->error("missing ] at end of list definition ($1)");
}
push(@$value, _decode($self, $text));
$$text =~ /$MATCH_COMMA/cog;
}
}
elsif ($$text =~ /$MATCH_QUOTED/cog) {
$self->debug("matched quoted\n") if DEBUG;
$value = $1;
$value =~ s/$MATCH_ESCAPED/$1/og;
$self->debug("found quoted string: $value\n") if DEBUG;
}
elsif ($$text =~ /$MATCH_NUMBER/cog) {
$self->debug("matched number") if DEBUG;
$value = $1;
$self->debug("found number: $value\n") if DEBUG;
}
elsif ($$text =~ /$MATCH_UNDEF/cog) {
$self->debug("matched undef") if DEBUG;
$value = undef;
}
else {
$self->debug("matched other") if DEBUG;
$$text =~ /\G(.*)/;
return $self->error("bad value: $1")
}
return $value;
}
1;
=head1 NAME
Badger::Codec::TT - encode/decode data using TT data syntax
=head1 SYNOPSIS
use Badger::Codec::TT;
my $codec = Badger::Codec::TT->new();
my $encoded = $codec->encode({ msg => "Hello World" });
my $decoded = $codec->decode($encoded);
=head1 DESCRIPTION
This module implements a subclass of L<Badger::Codec> which encodes and
decodes data to and from an extended form of the data definition syntax used
in the Template Toolkit. It mainly exists for testing purposes (so that we
don't require people to install YAML or JSON just to run some of the Badger
tests) and to support some legacy systems that use data encoded in this way
(mostly dating back to the days before YAML and JSON were around). If you're
starting out afresh then you're probably better off using YAML or JSON unless
you have good reason not to.
The syntax is similar to Perl in that it uses single quotes for literal
strings, square brackets for list definitions and curly braces for hash
definitions along with the C<=E<gt>> "fat comma" operator to separate hash
keys and values. Data structures can be nested indefinitely. The unquoted
C<undef> token can be used to explicitly represent the undefined value.
{
message => 'Hello World, this is some text',
things => ['a list', 'of some things'],
stuff => {
pi => 3.14,
foo => [ { nested => 'hash' }, ['nested', 'list' ] ],
nul => undef,
},
}
TT syntax is more liberal than Perl. It allows you to use C<=> instead
of C<=E<gt>> to separate keys and values in hash arrays, and commas between
items are optional.
{
message = 'Hello World, this is some text'
things = ['a list' 'of some things']
stuff = {
pi = 3.14
foo = [ { nested = 'hash' } ['nested' 'list' ] ]
nul = undef
}
}
It will also accept C<:> as a delimiter between hash keys and values,
thus providing an overlap with a useful subset of JSON syntax:
{
message: 'Hello World, this is some text',
things: ['a list' 'of some things'],
stuff: {
pi: 3.14,
foo: [ { nested: 'hash' }, ['nested', 'list' ] ],
nul: undef
}
}
The decoder is very liberal in what it will accept for delimiters. You can mix
and match any of the above styles in the same document if you really want to.
However, you would be utterly batshit insane to do such a thing, let alone
want for it. Just because we'll accept any of the commonly used formats
doesn't mean that you should be using them all at once.
{
perl => 'Perl looks like this',
tt = 'TT looks like this'
json: 'JSON looks like this
}
Note that while the syntax may be more liberal than either Perl or JSON,
the semantics are decidedly stricter. It is not possible to embed arbitrary
Perl code, instantiate Javascript objects, or do anything else outside of
defining vanilla data structures.
The encoder generates TT syntax by default (C<=> for assignment, with a single
space to delimiter items). You can change these options using the C<assign>
and C<comma> configuration options.
my $codec = Badger::Codec::TT->new( assign => '=>', comma => ',' );
print $codec->encode($some_data);
=head1 METHODS
=head2 encode($data)
Encodes the Perl data in C<$data> to a TT string.
$encoded = Badger::Codec::TT->encode($data);
=head2 decode($tt)
Decodes the encoded TT string in C<$tt> back into a Perl data structure.
$decoded = Badger::Codec::TT->decode($encoded);
=head2 encoder()
This method returns a reference to an encoding subroutine.
my $sub = Badger::Codec::TT->encoder;
$encoded = $sub->($data);
=head2 decoder()
This method returns a reference to a decoding subroutine.
my $sub = Badger::Codec::TT->decoder;
$decoded = $sub->($encoded);
=head1 INTERNAL SUBROUTINES
=head2 _encode($data)
This internal subroutine performs the recursive encoding of the data.
=head2 _decode($tt)
This internal subroutine performs the recursive decoding of the data.
=head1 AUTHOR
Andy Wardley L<http://wardley.org/>
=head1 COPYRIGHT
Copyright (C) 2005-2009 Andy Wardley. All rights reserved.
=head1 SEE ALSO
L<Badger::Codecs>, L<Badger::Codec>, L<Template>
=cut
# Local Variables:
# mode: Perl
# perl-indent-level: 4
# indent-tabs-mode: nil
# End:
#
# vim: expandtab shiftwidth=4:
|