File: link_parser.ex

package info (click to toggle)
elixir-earmark-parser 1.4.44-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,148 kB
  • sloc: makefile: 9
file content (174 lines) | stat: -rw-r--r-- 4,810 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
defmodule EarmarkParser.Parser.LinkParser do
  @moduledoc false
  import EarmarkParser.Helpers.LeexHelpers, only: [tokenize: 2]
  import EarmarkParser.Helpers.YeccHelpers, only: [parse!: 2]
  import EarmarkParser.Helpers.StringHelpers, only: [behead: 2]

  # Hopefully this will go away in v1.3
  # **********************************
  #
  # Right now it needs to parse the url part of strings according to the following grammar
  #
  #      url -> ( inner_url )
  #      url -> ( inner_url title )
  #
  #      inner_url   -> ( inner_url )
  #      inner_url   -> [ inner_url ]
  #      inner_url   ->  url_char*
  #
  #      url_char -> . - quote - ( - ) - [ - ]
  #
  #      title -> quote .* quote  ;;   not LALR-k here
  #
  #      quote ->  "
  #      quote ->  '              ;;  yep allowing '...." for now
  #
  #      non_quote -> . - quote

  @doc false
  def parse_link(src, lnb) do
    case parse!(src,
           lexer: :earmark_parser_link_text_lexer,
           parser: :earmark_parser_link_text_parser
         ) do
      {link_or_img, link_text, parsed_text} ->
        beheaded = behead(src, to_string(parsed_text))
        tokens = tokenize(beheaded, with: :earmark_parser_link_text_lexer)

        p_url(tokens, lnb)
        |> make_result(to_string(link_text), to_string(parsed_text), link_or_img)

      _ ->
        nil
    end
  end

  defp p_url([{:open_paren, _} | ts], lnb) do
    url(ts, {[], [], nil}, [:close_paren], lnb)
  end

  defp p_url(_, _) do
    nil
  end

  # push one level
  defp url([{:open_paren, text} | ts], result, needed, lnb) do
    url(ts, add(result, text), [:close_paren | needed], lnb)
  end

  # pop last level
  defp url([{:close_paren, _} | _], result, [:close_paren], _lnb) do
    result
  end

  # pop inner level
  defp url([{:close_paren, text} | ts], result, [:close_paren | needed], lnb) do
    url(ts, add(result, text), needed, lnb)
  end

  # A quote on level 0 -> bailing out if there is a matching quote
  defp url(ts_all = [{:open_title, text} | ts], result, [:close_paren], lnb) do
    case bail_out_to_title(ts_all, result) do
      nil -> url(ts, add(result, text), [:close_paren], lnb)
      res -> res
    end
  end

  # All these are just added to the url
  defp url([{:open_bracket, text} | ts], result, needed, lnb) do
    url(ts, add(result, text), needed, lnb)
  end

  defp url([{:close_bracket, text} | ts], result, needed, lnb) do
    url(ts, add(result, text), needed, lnb)
  end

  defp url([{:any_quote, text} | ts], result, needed, lnb) do
    url(ts, add(result, text), needed, lnb)
  end

  defp url([{:verbatim, text} | ts], result, needed, lnb) do
    url(ts, add(result, text), needed, lnb)
  end

  defp url([{:ws, text} | ts], result, needed, lnb) do
    url(ts, add(result, text), needed, lnb)
  end

  defp url([{:escaped, text} | ts], result, needed, lnb) do
    url(ts, add(result, text), needed, lnb)
  end

  # That is not good, actually this is not a legal url part of a link
  defp url(_, _, _, _) do
    nil
  end

  defp bail_out_to_title(ts, result) do
    with remaining_text <- ts |> Enum.map_join(&text_of_token/1) do
      case title(remaining_text) do
        nil ->
          nil

        {title_text, inner_title} ->
          add_title(result, {title_text, inner_title})
      end
    end
  end

  defp text_of_token(token)

  defp text_of_token({:escaped, text}) do
    "\\#{text}"
  end

  defp text_of_token({_, text}) do
    text
  end

  # sic!!! Greedy and not context aware, matching '..." and "...' for backward comp
  defp title(remaining_text) do
    title_rgx = ~r{\A\s+(['"])(.*?)\1(?=\))}

    case Regex.run(title_rgx, remaining_text) do
      nil -> nil
      [parsed, _, inner] -> {parsed, inner}
    end
  end

  defp make_result(nil, _, parsed_text, :link) do
    wikilink_rgx = ~r{\A\[\[([^\]\|]+)(?:\|([^\]]+))?\]\]\Z}

    case Regex.run(wikilink_rgx, parsed_text) do
      nil -> nil
      [_, wikilink] -> make_wikilink(parsed_text, wikilink, wikilink)
      [_, wikilink, link_text] -> make_wikilink(parsed_text, wikilink, link_text)
    end
  end

  defp make_result(nil, _, _, _) do
    nil
  end

  defp make_result({parsed, url, title}, link_text, parsed_text, link_or_img) do
    {"#{parsed_text}(#{list_to_text(parsed)})", link_text, list_to_text(url), title, link_or_img}
  end

  defp add({parsed_text, url_text, nil}, text) do
    {[text | parsed_text], [text | url_text], nil}
  end

  defp add_title({parsed_text, url_text, _}, {parsed, inner}) do
    {[parsed | parsed_text], url_text, inner}
  end

  defp make_wikilink(parsed_text, target, link_text) do
    {parsed_text, String.trim(link_text), String.trim(target), nil, :wikilink}
  end

  defp list_to_text(lst) do
    lst |> Enum.reverse() |> Enum.join("")
  end
end

# SPDX-License-Identifier: Apache-2.0