File: list.rb

package info (click to toggle)
ruby-public-suffix 3.0.3%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster
  • size: 284 kB
  • sloc: ruby: 1,431; makefile: 22
file content (243 lines) | stat: -rw-r--r-- 6,770 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# = Public Suffix
#
# Domain name parser based on the Public Suffix List.
#
# Copyright (c) 2009-2018 Simone Carletti <weppos@weppos.net>

module PublicSuffix

  # A {PublicSuffix::List} is a collection of one
  # or more {PublicSuffix::Rule}.
  #
  # Given a {PublicSuffix::List},
  # you can add or remove {PublicSuffix::Rule},
  # iterate all items in the list or search for the first rule
  # which matches a specific domain name.
  #
  #   # Create a new list
  #   list =  PublicSuffix::List.new
  #
  #   # Push two rules to the list
  #   list << PublicSuffix::Rule.factory("it")
  #   list << PublicSuffix::Rule.factory("com")
  #
  #   # Get the size of the list
  #   list.size
  #   # => 2
  #
  #   # Search for the rule matching given domain
  #   list.find("example.com")
  #   # => #<PublicSuffix::Rule::Normal>
  #   list.find("example.org")
  #   # => nil
  #
  # You can create as many {PublicSuffix::List} you want.
  # The {PublicSuffix::List.default} rule list is used
  # to tokenize and validate a domain.
  #
  class List

    DEFAULT_LIST_PATH = File.expand_path("../../data/list.txt", __dir__)

    # Gets the default rule list.
    #
    # Initializes a new {PublicSuffix::List} parsing the content
    # of {PublicSuffix::List.default_list_content}, if required.
    #
    # @return [PublicSuffix::List]
    def self.default(**options)
      @default ||= parse(File.read(DEFAULT_LIST_PATH), options)
    end

    # Sets the default rule list to +value+.
    #
    # @param  value [PublicSuffix::List] the new list
    # @return [PublicSuffix::List]
    def self.default=(value)
      @default = value
    end

    # Parse given +input+ treating the content as Public Suffix List.
    #
    # See http://publicsuffix.org/format/ for more details about input format.
    #
    # @param  string [#each_line] the list to parse
    # @param  private_domains [Boolean] whether to ignore the private domains section
    # @return [PublicSuffix::List]
    def self.parse(input, private_domains: true)
      comment_token = "//".freeze
      private_token = "===BEGIN PRIVATE DOMAINS===".freeze
      section = nil # 1 == ICANN, 2 == PRIVATE

      new do |list|
        input.each_line do |line|
          line.strip!
          case # rubocop:disable Style/EmptyCaseCondition

          # skip blank lines
          when line.empty?
            next

          # include private domains or stop scanner
          when line.include?(private_token)
            break if !private_domains
            section = 2

          # skip comments
          when line.start_with?(comment_token)
            next

          else
            list.add(Rule.factory(line, private: section == 2))

          end
        end
      end
    end


    # Initializes an empty {PublicSuffix::List}.
    #
    # @yield [self] Yields on self.
    # @yieldparam [PublicSuffix::List] self The newly created instance.
    def initialize
      @rules = {}
      yield(self) if block_given?
    end


    # Checks whether two lists are equal.
    #
    # List <tt>one</tt> is equal to <tt>two</tt>, if <tt>two</tt> is an instance of
    # {PublicSuffix::List} and each +PublicSuffix::Rule::*+
    # in list <tt>one</tt> is available in list <tt>two</tt>, in the same order.
    #
    # @param  other [PublicSuffix::List] the List to compare
    # @return [Boolean]
    def ==(other)
      return false unless other.is_a?(List)
      equal?(other) || @rules == other.rules
    end
    alias eql? ==

    # Iterates each rule in the list.
    def each(&block)
      Enumerator.new do |y|
        @rules.each do |key, node|
          y << entry_to_rule(node, key)
        end
      end.each(&block)
    end


    # Adds the given object to the list and optionally refreshes the rule index.
    #
    # @param  rule [PublicSuffix::Rule::*] the rule to add to the list
    # @return [self]
    def add(rule)
      @rules[rule.value] = rule_to_entry(rule)
      self
    end
    alias << add

    # Gets the number of rules in the list.
    #
    # @return [Integer]
    def size
      @rules.size
    end

    # Checks whether the list is empty.
    #
    # @return [Boolean]
    def empty?
      @rules.empty?
    end

    # Removes all rules.
    #
    # @return [self]
    def clear
      @rules.clear
      self
    end

    # Finds and returns the rule corresponding to the longest public suffix for the hostname.
    #
    # @param  name [#to_s] the hostname
    # @param  default [PublicSuffix::Rule::*] the default rule to return in case no rule matches
    # @return [PublicSuffix::Rule::*]
    def find(name, default: default_rule, **options)
      rule = select(name, **options).inject do |l, r|
        return r if r.class == Rule::Exception
        l.length > r.length ? l : r
      end
      rule || default
    end

    # Selects all the rules matching given hostame.
    #
    # If `ignore_private` is set to true, the algorithm will skip the rules that are flagged as
    # private domain. Note that the rules will still be part of the loop.
    # If you frequently need to access lists ignoring the private domains,
    # you should create a list that doesn't include these domains setting the
    # `private_domains: false` option when calling {.parse}.
    #
    # Note that this method is currently private, as you should not rely on it. Instead,
    # the public interface is {#find}. The current internal algorithm allows to return all
    # matching rules, but different data structures may not be able to do it, and instead would
    # return only the match. For this reason, you should rely on {#find}.
    #
    # @param  name [#to_s] the hostname
    # @param  ignore_private [Boolean]
    # @return [Array<PublicSuffix::Rule::*>]
    def select(name, ignore_private: false)
      name = name.to_s

      parts = name.split(DOT).reverse!
      index = 0
      query = parts[index]
      rules = []

      loop do
        match = @rules[query]
        if !match.nil? && (ignore_private == false || match.private == false)
          rules << entry_to_rule(match, query)
        end

        index += 1
        break if index >= parts.size
        query = parts[index] + DOT + query
      end

      rules
    end
    private :select # rubocop:disable Style/AccessModifierDeclarations

    # Gets the default rule.
    #
    # @see PublicSuffix::Rule.default_rule
    #
    # @return [PublicSuffix::Rule::*]
    def default_rule
      PublicSuffix::Rule.default
    end


    protected

    attr_reader :rules


    private

    def entry_to_rule(entry, value)
      entry.type.new(value: value, length: entry.length, private: entry.private)
    end

    def rule_to_entry(rule)
      Rule::Entry.new(rule.class, rule.length, rule.private)
    end

  end
end