1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
|
package commonmark_test
import (
"testing"
"github.com/JohannesKaufmann/html-to-markdown/v2/converter"
"github.com/JohannesKaufmann/html-to-markdown/v2/plugin/base"
"github.com/JohannesKaufmann/html-to-markdown/v2/plugin/commonmark"
)
func TestNewCommonmarkPlugin_Italic(t *testing.T) {
const nonBreakingSpace = '\u00A0'
const zeroWidthSpace = '\u200b'
runs := []struct {
desc string
input string
expected string
}{
{
desc: "simple",
input: `<p><em>Text</em></p>`,
expected: `*Text*`,
},
{
desc: "normal text surrounded by italic",
input: `<em>Italic</em>Normal<em>Italic</em>`,
expected: `*Italic*Normal*Italic*`,
},
{
desc: "italic text surrounded by normal",
input: `Normal<em>Italic</em>Normal`,
expected: `Normal*Italic*Normal`,
},
{
desc: "with spaces inside",
input: `<p><em> Text </em></p>`,
expected: `*Text*`,
},
{
desc: "with delimiter inside",
input: `<p><em>*A*B*</em></p>`,
expected: `*\*A\*B\**`,
},
{
desc: "adjacent",
input: `<em>A</em><em>B</em> <em>C</em>`,
expected: `*AB* *C*`,
},
{
desc: "adjacent and lots of spaces",
input: `<em> A </em><em> B </em> <em> C </em>`,
expected: `*A B* *C*`,
},
{
desc: "nested",
input: `<em>A <em>B</em> C</em>`,
expected: `*A B C*`,
},
{
desc: "nested and lots of spaces",
input: `<em> A <em> B </em> C </em>`,
expected: `*A B C*`,
},
{
desc: "mixed nested 1",
input: `<em>A <strong>B</strong> C</em>`,
expected: `*A **B** C*`,
},
{
desc: "mixed nested 2",
input: `<strong>A <em>B</em> C</strong>`,
expected: `**A *B* C**`,
},
{
desc: "mixed different italic",
input: `<i>A<em>B</em>C</i>`,
expected: `*ABC*`,
},
{
desc: "next to each other in other containers",
input: `<div>
<em>A</em>
<article><em>B</em></article>
<em>C</em>
</div>`,
expected: "*A*\n\n*B*\n\n*C*",
},
// - - - - //
{
desc: "empty italic #1",
input: `before<i></i>after`,
expected: `beforeafter`,
},
{
desc: "empty italic #2",
input: `before<i> </i>after`,
expected: `before after`,
},
{
desc: "empty italic #3",
input: `before <i> </i> after`,
expected: `before after`,
},
{
desc: "italic with non-breaking-space",
input: `before<i>` + string(nonBreakingSpace) + `</i>after`,
expected: `before` + string(nonBreakingSpace) + `after`,
},
{
desc: "italic with zero-width-space",
input: `before<i>` + string(zeroWidthSpace) + `</i>after`,
expected: `before*` + string(zeroWidthSpace) + `*after`,
},
}
for _, run := range runs {
t.Run(run.desc, func(t *testing.T) {
conv := converter.NewConverter(
converter.WithPlugins(
base.NewBasePlugin(),
commonmark.NewCommonmarkPlugin(),
),
)
out, err := conv.ConvertString(run.input)
if err != nil {
t.Error(err)
}
if out != run.expected {
t.Errorf("expected %q but got %q", run.expected, out)
}
})
}
}
|