File: Util.pm

package info (click to toggle)
libwww-perl 5.36-1.1
  • links: PTS
  • area: main
  • in suites: slink
  • size: 848 kB
  • ctags: 400
  • sloc: perl: 6,366; makefile: 51; sh: 6
file content (180 lines) | stat: -rw-r--r-- 4,413 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
package HTTP::Headers::Util;

use strict;
use vars qw($VERSION @ISA @EXPORT_OK);

$VERSION = sprintf("%d.%02d", q$Revision: 1.7 $ =~ /(\d+)\.(\d+)/);

require Exporter;
@ISA=qw(Exporter);

@EXPORT_OK=qw(split_header_words join_header_words);

=head1 NAME

HTTP::Headers::Util - Header value parsing utility functions

=head1 SYNOPSIS

  use HTTP::Headers::Util qw(split_header_words);
  @values = split_header_words($h->header("Content-Type"));

=head1 DESCRIPTION

This module provide a few functions that helps parsing and
construction of valid HTTP header values.  None of the functions are
exported by default.

The following functions are provided:

=over 4


=item split_header_words( @header_values )

This function will parse the header values given as argument into a
list of anonymous arrays containing key/value pairs.  The function
know how to deal with ",", ";" and "=" as well as quoted values after
"=".  A list of space separated tokens are parsed as if they were
separated by ";".

If the @header_values passed as argument contains multiple values,
then they are treated as if they were a single value separated by
comma ",".

This means that this function is useful to parse header fields that
follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
the requirement for tokens).

  headers           = #header
  header            = (token | parameter) *( [";"] (token | parameter))

  token             = 1*<any CHAR except CTLs or separators>
  separators        = "(" | ")" | "<" | ">" | "@"
                    | "," | ";" | ":" | "\" | <">
                    | "/" | "[" | "]" | "?" | "="
                    | "{" | "}" | SP | HT

  quoted-string     = ( <"> *(qdtext | quoted-pair ) <"> )
  qdtext            = <any TEXT except <">>
  quoted-pair       = "\" CHAR

  parameter         = attribute "=" value
  attribute         = token
  value             = token | quoted-string

Each I<header> is represented by an anonymous array of key/value
pairs.  If a token is recognized then the value will be C<undef>.
Syntactically incorrect headers will not necessary be parsed as you
would want.

This is easier to describe with some examples:

   split_header_words('foo="bar"; port="80,81"; discard, bar=baz')
   split_header_words('text/html; charset="iso-8859-1");
   split_header_words('Basic realm="\"foo\\bar\""');

will return

   [foo=>'bar', port=>'80,81', discard=> undef], [bar=>'baz' ]
   ['text/html' => undef, charset => 'iso-8859-1']
   [Basic => undef, realm => '"foo\bar"']

=cut


sub split_header_words
{
    my(@val) = @_;
    my @res;
    for (@val) {
	my @cur;
	while (length) {
	    if (s/^\s*(=*[^\s=;,]+)//) {  # 'token' or parameter 'attribute'
		push(@cur, $1);
		# a quoted value
		if (s/^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"//) {
		    my $val = $1;
		    $val =~ s/\\(.)/$1/g;
		    push(@cur, $val);
		# some unquoted value
		} elsif (s/^\s*=\s*([^;,\s]*)//) {
		    my $val = $1;
		    $val =~ s/\s+$//;
		    push(@cur, $val);
		# no value, a lone token
		} else {
		    push(@cur, undef);
		}
	    } elsif (s/^\s*,//) {
		push(@res, [@cur]) if @cur;
		@cur = ();
	    } elsif (s/^\s*;// || s/^\s+//) {
		# continue
	    } else {
		die "This should not happen: '$_'";
	    }
	}
	push(@res, \@cur) if @cur;
    }
    @res;
}


=item join_header_words( @arrays )

This will do the opposite convertion of what split_header_words()
does.  It takes a list of anonymous arrays as argument (or a list of
key/value pairs) and produce a single header value.  Attribute values
are quoted if needed.

Example:

   join_header_words(["text/plain" => undef, charset => "iso-8859/1"]);
   join_header_words(""text/plain" => undef, charset => "iso-8859/1");

will both return the string:

   text/plain; charset="iso-8859/1"

=cut

sub join_header_words
{
    @_ = ([@_]) if @_ && !ref($_[0]);
    my @res;
    for (@_) {
	my @cur = @$_;
	my @attr;
	while (@cur) {
	    my $k = shift @cur;
	    my $v = shift @cur;
	    if (defined $v) {
		if ($v =~ /^\w+$/) {
		    $k .= "=$v";
		} else {
		    $v =~ s/([\"\\])/\\$1/g;  # escape " and \
		    $k .= qq(="$v");
		}
	    }
	    push(@attr, $k);
	}
	push(@res, join("; ", @attr)) if @attr;
    }
    join(", ", @res);
}

1;

__END__

=back

=head1 COPYRIGHT

Copyright 1997-1998, Gisle Aas

This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=cut