1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
|
# vim: set ts=2 sts=2 sw=2 expandtab smarttab:
use utf8;
use strict;
use warnings;
use lib 't/lib';
use MarkdownTests;
# Escape things that would be interpreted as inline html.
sub entity_encode_ok {
my ($desc, $pod, $markdown, %opts) = @_;
my $verbatim = $opts{verbatim} || $pod;
note hex_escape $pod;
convert_both($pod, $markdown, $verbatim, $desc);
# Encoding some entities (but not [&<]) should produce the same as none.
convert_both($pod, $markdown, $verbatim, "$desc (html_encode_chars => non ascii)",
attr => {html_encode_chars => '\x80-\xff'});
# Encoding [&<] will produce more of those chars.
convert_both($pod, $opts{entities}, $verbatim, "$desc (html_encode_chars => 1)",
attr => {html_encode_chars => 1}) if $opts{entities};
}
sub convert_both {
my ($pod, $markdown, $verbatim, $desc, %opts) = @_;
convert_ok("B<<< $pod >>>", $markdown, "$desc: inline html escaped", %opts);
convert_ok("C<<< $pod >>>", qq{`$verbatim`}, "$desc: html not escaped in code span", %opts);
}
my %_escape = Pod::Markdown::__escape_sequences;
my %_e_escape = do {
my $parser = Pod::Markdown->new(html_encode_chars => 1);
map { ($_ => $parser->encode_entities($_escape{$_})) } keys %_escape;
};
like $_e_escape{amp}, qr/&/, 'entity-encoded escape sanity check';
with_and_without_entities {
my $char = $_[0] ? '•' : '•';
entity_encode_ok 'sanity check' => (
q{< & > E<0x2022>},
q{**< & > •**},
entities => "**< & > ${char}**",
verbatim => q{< & > •},
);
};
# This was an actual bug report.
entity_encode_ok 'command lines args' => (
q{--file=<filename>},
q{**--file=<filename>**},
entities => q{**--file=<filename>**},
);
# Ditto
entity_encode_ok 'email address' => (
q{Email Address E<lt>foo@bar.comE<gt>},
q{**Email Address <foo@bar.com>**},
entities => q{**Email Address <foo@bar.com>**},
verbatim => q{Email Address <foo@bar.com>},
);
# Use real html tags.
# This is a good example to copy/paste into a markdown processor
# to see how it handles the html.
# For example, github respects "\<" and "\&" but daringfireball does not.
# That's why we use html entity encoding (more portable).
entity_encode_ok 'real html' => (
q{h =<hr>},
q{**h&nbsp;=<hr>**},
entities => q{**h&nbsp;=<hr>**},
);
# Test link text.
entity_encode_ok 'html chars in link text' => (
q{L<< Some & <thing>|http://some.where/ >>},
q{**[Some &amp; <thing>](http://some.where/)**},
entities => q{**[Some &amp; <thing>](http://some.where/)**},
# Markdown will print this rather than making it a link,
# but I'm not sure what else to do about it.
verbatim => q{[Some & <thing>](http://some.where/)},
);
entity_encode_ok 'html chars in url' => (
# This may not be a valid url but let this test demonstrate how it currently works.
q{L<< Yo|http://some.where?a=&<=<tag> >>},
q{**[Yo](http://some.where?a=&<=<tag>)**},
# Same as above (shrug).
verbatim => q{[Yo](http://some.where?a=&<=<tag>)},
);
# Test with 'false' values to avoid conditional bugs.
# In this case a bare zero won't trigger the need for an escape.
entity_encode_ok 'false values' => (
q{<0 &0},
q{**<0 &0**},
entities => q{**<0 &0**},
);
# Ensure that two pod "strings" still escape the < and & properly.
# Use S<> since it counts as an event (and therefore creates two separate
# "handle_text" calls) but does not produce boundary characters (the text
# inside and around the S<> will have no characters between them).
entity_encode_ok '< and & are escaped properly even as separate pod strings' => (
q{the <S<cmp>E<gt> operator and S<&>foobar; (or S<&>fooS<bar>;) and eol &},
q{**the <cmp> operator and &foobar; (or &foobar;) and eol &**},
entities => q{**the <cmp> operator and &foobar; (or &foobar;) and eol &**},
verbatim => q{the <cmp> operator and &foobar; (or &foobar;) and eol &},
);
# Don't undo it for literal ones that happen to be at the end of strings.
entity_encode_ok 'literal entity from pod at end of string stays amp-escaped' => (
q{literal & and <},
q{**literal &amp; and &lt;**},
);
entity_encode_ok 'field splitting: amps at beginning and end and multiple in the middle' => (
q{& ity &&& and &},
q{**& ity &&& and &**},
entities => q{**& ity &&& and &**},
);
entity_encode_ok 'literal occurrences of internal escape sequences are unaltered' => (
qq[hi $_escape{amp} ($_escape{amp_code}) & $_escape{lt} ($_escape{lt_code}) < &exclam;],
qq[**hi $_escape{amp} ($_escape{amp_code}) & $_escape{lt} ($_escape{lt_code}) < &exclam;**],
entities => qq[**hi $_e_escape{amp} ($_e_escape{amp_code}) & $_e_escape{lt} ($_e_escape{lt_code}) < &exclam;**],
);
sub so_example {
# Test case from http://stackoverflow.com/questions/28496298/escape-angle-brackets-using-podmarkdown {
my $str = "=head1 OPTIONS\n\n=over 4\n\n=item B<< --file=<filename> >>\n\nFile name \n\n=back\n";
my $parser = Pod::Markdown->new(@_); # (@_) - rwstauner
my $markdown;
$parser->output_string( \$markdown );
$parser->parse_string_document($str);
# }
return $markdown;
}
eq_or_diff so_example(), "# OPTIONS\n\n- **--file=<filename>**\n\n File name \n",
'SO example properly escaped';
eq_or_diff so_example(html_encode_chars => 1), "# OPTIONS\n\n- **--file=<filename>**\n\n File name \n",
'SO example with additional escapes';
my $list_pod = <<POD;
=head2 hi <there> &you; < &
=over
=item & some < t&e;xt
<paragraph>
<
&
=back
=over
=item 1.
item <text> < &
<para>
=back
POD
convert_ok($list_pod, <<MKDN,
## hi <there> &you; < &
- & some < t&e;xt
<paragraph>
<
&
1. item <text> < &
<para>
MKDN
'escape entities in lists and items properly',
);
convert_ok($list_pod, <<MKDN,
## hi <there> &you; < &
- & some < t&e;xt
<paragraph>
<
&
1. item <text> < &
<para>
MKDN
'escape all entities in lists and items',
attr => { html_encode_chars => 1 }
);
done_testing;
|