File: parser.rb

package info (click to toggle)
ruby-kdl 1.0.3-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 480 kB
  • sloc: ruby: 6,667; yacc: 72; sh: 5; makefile: 4
file content (123 lines) | stat: -rw-r--r-- 3,942 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
module KDL
  module Types
    class IRLReference < Value
      class Parser
        RGX = /^(?:(?:([a-z][a-z0-9+.\-]+)):\/\/([^@]+@)?([^\/?#]+)?)?(\/?[^?#]*)?(?:\?([^#]*))?(?:#(.*))?$/i.freeze
        PERCENT_RGX = /%[a-f0-9]{2}/i.freeze

        RESERVED_URL_CHARS = %w[! # $ & ' ( ) * + , / : ; = ? @ \[ \] %]
        UNRESERVED_URL_CHARS = %w[A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
                                  a b c d e f g h i j k l m n o p q r s t u v w x y z
                                  0 1 2 3 4 5 6 7 8 9 - _ . ~].freeze
        URL_CHARS = RESERVED_URL_CHARS + UNRESERVED_URL_CHARS

        def initialize(string)
          @string = string
        end

        def parse
          scheme, auth, domain, path, search, hash = *parse_url

          if @string.ascii_only?
            unicode_path = Parser.decode(path)
            unicode_search = Parser.decode(search)
            unicode_hash = Parser.decode(hash)
          else
            unicode_path = path
            path = Parser.encode(unicode_path)
            unicode_search = search
            search_params = unicode_search ? unicode_search.split('&').map { |x| x.split('=') } : nil
            search = search_params ? search_params.map { |k, v| "#{Parser.encode(k)}=#{Parser.encode(v)}" }.join('&') : nil
            unicode_hash = hash
            hash = Parser.encode(hash)
          end

          if domain
            validator = IDNHostname::Validator.new(domain)
            domain = validator.ascii
            unicode_domain = validator.unicode
          else
            unicode_domain = domain
          end

          unicode_value = Parser.build_uri_string(scheme, auth, unicode_domain, unicode_path, unicode_search, unicode_hash)
          ascii_value = Parser.build_uri_string(scheme, auth, domain, path, search, hash)

          [ascii_value,
           { unicode_value: unicode_value,
             unicode_domain: unicode_domain,
             unicode_path: unicode_path,
             unicode_search: unicode_search,
             unicode_hash: unicode_hash }]
        end

        def parse_url
          match = RGX.match(@string)
          raise ArgumentError, "invalid IRL `#{@string}'" if match.nil?

          _, *parts = *match
          raise ArgumentError, "invalid IRL `#{@string}'" unless parts.all? { |part| Parser.valid_url_part?(part) }

          parts
        end

        def self.valid_url_part?(string)
          return true unless string

          string.chars.all? do |char|
            !char.ascii_only? || URL_CHARS.include?(char)
          end
        end

        def self.encode(string)
          return string unless string

          string.chars
                .map { |c| c.ascii_only? ? c : percent_encode(c) }
                .join
                .force_encoding('utf-8')
        end

        def self.decode(string)
          return string unless string

          string.gsub(PERCENT_RGX) do |match|
            char = match[1, 2].to_i(16).chr
            if RESERVED_URL_CHARS.include?(char)
              match
            else
              char
            end
          end.force_encoding('utf-8')
        end

        def self.percent_encode(c)
          c.bytes.map { |b| "%#{b.to_s(16)}" }.join.upcase
        end

        def self.build_uri_string(scheme, auth, domain, path, search, hash)
          string = ''
          string += "#{scheme}://" if scheme
          string += auth if auth
          string += domain if domain
          string += path if path
          string += "?#{search}" if search
          string += "##{hash}" if hash
          string
        end
      end
    end

    class IRL < IRLReference
      class Parser < IRLReference::Parser
        def parse_url
          parts = super
          scheme, * = parts
          raise ArgumentError, "invalid IRL `#{@string}'" if scheme.nil? || scheme.empty?

          parts
        end
      end
    end
  end
end