1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
|
defmodule EarmarkParser.Parser.LinkParser do
@moduledoc false
import EarmarkParser.Helpers.LeexHelpers, only: [tokenize: 2]
import EarmarkParser.Helpers.YeccHelpers, only: [parse!: 2]
import EarmarkParser.Helpers.StringHelpers, only: [behead: 2]
# Hopefully this will go away in v1.3
# **********************************
#
# Right now it needs to parse the url part of strings according to the following grammar
#
# url -> ( inner_url )
# url -> ( inner_url title )
#
# inner_url -> ( inner_url )
# inner_url -> [ inner_url ]
# inner_url -> url_char*
#
# url_char -> . - quote - ( - ) - [ - ]
#
# title -> quote .* quote ;; not LALR-k here
#
# quote -> "
# quote -> ' ;; yep allowing '...." for now
#
# non_quote -> . - quote
@doc false
def parse_link(src, lnb) do
case parse!(src,
lexer: :earmark_parser_link_text_lexer,
parser: :earmark_parser_link_text_parser
) do
{link_or_img, link_text, parsed_text} ->
beheaded = behead(src, to_string(parsed_text))
tokens = tokenize(beheaded, with: :earmark_parser_link_text_lexer)
p_url(tokens, lnb)
|> make_result(to_string(link_text), to_string(parsed_text), link_or_img)
_ ->
nil
end
end
defp p_url([{:open_paren, _} | ts], lnb) do
url(ts, {[], [], nil}, [:close_paren], lnb)
end
defp p_url(_, _) do
nil
end
# push one level
defp url([{:open_paren, text} | ts], result, needed, lnb) do
url(ts, add(result, text), [:close_paren | needed], lnb)
end
# pop last level
defp url([{:close_paren, _} | _], result, [:close_paren], _lnb) do
result
end
# pop inner level
defp url([{:close_paren, text} | ts], result, [:close_paren | needed], lnb) do
url(ts, add(result, text), needed, lnb)
end
# A quote on level 0 -> bailing out if there is a matching quote
defp url(ts_all = [{:open_title, text} | ts], result, [:close_paren], lnb) do
case bail_out_to_title(ts_all, result) do
nil -> url(ts, add(result, text), [:close_paren], lnb)
res -> res
end
end
# All these are just added to the url
defp url([{:open_bracket, text} | ts], result, needed, lnb) do
url(ts, add(result, text), needed, lnb)
end
defp url([{:close_bracket, text} | ts], result, needed, lnb) do
url(ts, add(result, text), needed, lnb)
end
defp url([{:any_quote, text} | ts], result, needed, lnb) do
url(ts, add(result, text), needed, lnb)
end
defp url([{:verbatim, text} | ts], result, needed, lnb) do
url(ts, add(result, text), needed, lnb)
end
defp url([{:ws, text} | ts], result, needed, lnb) do
url(ts, add(result, text), needed, lnb)
end
defp url([{:escaped, text} | ts], result, needed, lnb) do
url(ts, add(result, text), needed, lnb)
end
# That is not good, actually this is not a legal url part of a link
defp url(_, _, _, _) do
nil
end
defp bail_out_to_title(ts, result) do
with remaining_text <- ts |> Enum.map_join(&text_of_token/1) do
case title(remaining_text) do
nil ->
nil
{title_text, inner_title} ->
add_title(result, {title_text, inner_title})
end
end
end
defp text_of_token(token)
defp text_of_token({:escaped, text}) do
"\\#{text}"
end
defp text_of_token({_, text}) do
text
end
# sic!!! Greedy and not context aware, matching '..." and "...' for backward comp
defp title(remaining_text) do
title_rgx = ~r{\A\s+(['"])(.*?)\1(?=\))}
case Regex.run(title_rgx, remaining_text) do
nil -> nil
[parsed, _, inner] -> {parsed, inner}
end
end
defp make_result(nil, _, parsed_text, :link) do
wikilink_rgx = ~r{\A\[\[([^\]\|]+)(?:\|([^\]]+))?\]\]\Z}
case Regex.run(wikilink_rgx, parsed_text) do
nil -> nil
[_, wikilink] -> make_wikilink(parsed_text, wikilink, wikilink)
[_, wikilink, link_text] -> make_wikilink(parsed_text, wikilink, link_text)
end
end
defp make_result(nil, _, _, _) do
nil
end
defp make_result({parsed, url, title}, link_text, parsed_text, link_or_img) do
{"#{parsed_text}(#{list_to_text(parsed)})", link_text, list_to_text(url), title, link_or_img}
end
defp add({parsed_text, url_text, nil}, text) do
{[text | parsed_text], [text | url_text], nil}
end
defp add_title({parsed_text, url_text, _}, {parsed, inner}) do
{[parsed | parsed_text], url_text, inner}
end
defp make_wikilink(parsed_text, target, link_text) do
{parsed_text, String.trim(link_text), String.trim(target), nil, :wikilink}
end
defp list_to_text(lst) do
lst |> Enum.reverse() |> Enum.join("")
end
end
# SPDX-License-Identifier: Apache-2.0
|