1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
|
# = Public Suffix
#
# Domain name parser based on the Public Suffix List.
#
# Copyright (c) 2009-2018 Simone Carletti <weppos@weppos.net>
module PublicSuffix
# A {PublicSuffix::List} is a collection of one
# or more {PublicSuffix::Rule}.
#
# Given a {PublicSuffix::List},
# you can add or remove {PublicSuffix::Rule},
# iterate all items in the list or search for the first rule
# which matches a specific domain name.
#
# # Create a new list
# list = PublicSuffix::List.new
#
# # Push two rules to the list
# list << PublicSuffix::Rule.factory("it")
# list << PublicSuffix::Rule.factory("com")
#
# # Get the size of the list
# list.size
# # => 2
#
# # Search for the rule matching given domain
# list.find("example.com")
# # => #<PublicSuffix::Rule::Normal>
# list.find("example.org")
# # => nil
#
# You can create as many {PublicSuffix::List} you want.
# The {PublicSuffix::List.default} rule list is used
# to tokenize and validate a domain.
#
class List
DEFAULT_LIST_PATH = File.expand_path("../../data/list.txt", __dir__)
# Gets the default rule list.
#
# Initializes a new {PublicSuffix::List} parsing the content
# of {PublicSuffix::List.default_list_content}, if required.
#
# @return [PublicSuffix::List]
def self.default(**options)
@default ||= parse(File.read(DEFAULT_LIST_PATH), options)
end
# Sets the default rule list to +value+.
#
# @param value [PublicSuffix::List] the new list
# @return [PublicSuffix::List]
def self.default=(value)
@default = value
end
# Parse given +input+ treating the content as Public Suffix List.
#
# See http://publicsuffix.org/format/ for more details about input format.
#
# @param string [#each_line] the list to parse
# @param private_domains [Boolean] whether to ignore the private domains section
# @return [PublicSuffix::List]
def self.parse(input, private_domains: true)
comment_token = "//".freeze
private_token = "===BEGIN PRIVATE DOMAINS===".freeze
section = nil # 1 == ICANN, 2 == PRIVATE
new do |list|
input.each_line do |line|
line.strip!
case # rubocop:disable Style/EmptyCaseCondition
# skip blank lines
when line.empty?
next
# include private domains or stop scanner
when line.include?(private_token)
break if !private_domains
section = 2
# skip comments
when line.start_with?(comment_token)
next
else
list.add(Rule.factory(line, private: section == 2))
end
end
end
end
# Initializes an empty {PublicSuffix::List}.
#
# @yield [self] Yields on self.
# @yieldparam [PublicSuffix::List] self The newly created instance.
def initialize
@rules = {}
yield(self) if block_given?
end
# Checks whether two lists are equal.
#
# List <tt>one</tt> is equal to <tt>two</tt>, if <tt>two</tt> is an instance of
# {PublicSuffix::List} and each +PublicSuffix::Rule::*+
# in list <tt>one</tt> is available in list <tt>two</tt>, in the same order.
#
# @param other [PublicSuffix::List] the List to compare
# @return [Boolean]
def ==(other)
return false unless other.is_a?(List)
equal?(other) || @rules == other.rules
end
alias eql? ==
# Iterates each rule in the list.
def each(&block)
Enumerator.new do |y|
@rules.each do |key, node|
y << entry_to_rule(node, key)
end
end.each(&block)
end
# Adds the given object to the list and optionally refreshes the rule index.
#
# @param rule [PublicSuffix::Rule::*] the rule to add to the list
# @return [self]
def add(rule)
@rules[rule.value] = rule_to_entry(rule)
self
end
alias << add
# Gets the number of rules in the list.
#
# @return [Integer]
def size
@rules.size
end
# Checks whether the list is empty.
#
# @return [Boolean]
def empty?
@rules.empty?
end
# Removes all rules.
#
# @return [self]
def clear
@rules.clear
self
end
# Finds and returns the rule corresponding to the longest public suffix for the hostname.
#
# @param name [#to_s] the hostname
# @param default [PublicSuffix::Rule::*] the default rule to return in case no rule matches
# @return [PublicSuffix::Rule::*]
def find(name, default: default_rule, **options)
rule = select(name, **options).inject do |l, r|
return r if r.class == Rule::Exception
l.length > r.length ? l : r
end
rule || default
end
# Selects all the rules matching given hostame.
#
# If `ignore_private` is set to true, the algorithm will skip the rules that are flagged as
# private domain. Note that the rules will still be part of the loop.
# If you frequently need to access lists ignoring the private domains,
# you should create a list that doesn't include these domains setting the
# `private_domains: false` option when calling {.parse}.
#
# Note that this method is currently private, as you should not rely on it. Instead,
# the public interface is {#find}. The current internal algorithm allows to return all
# matching rules, but different data structures may not be able to do it, and instead would
# return only the match. For this reason, you should rely on {#find}.
#
# @param name [#to_s] the hostname
# @param ignore_private [Boolean]
# @return [Array<PublicSuffix::Rule::*>]
def select(name, ignore_private: false)
name = name.to_s
parts = name.split(DOT).reverse!
index = 0
query = parts[index]
rules = []
loop do
match = @rules[query]
if !match.nil? && (ignore_private == false || match.private == false)
rules << entry_to_rule(match, query)
end
index += 1
break if index >= parts.size
query = parts[index] + DOT + query
end
rules
end
private :select # rubocop:disable Style/AccessModifierDeclarations
# Gets the default rule.
#
# @see PublicSuffix::Rule.default_rule
#
# @return [PublicSuffix::Rule::*]
def default_rule
PublicSuffix::Rule.default
end
protected
attr_reader :rules
private
def entry_to_rule(entry, value)
entry.type.new(value: value, length: entry.length, private: entry.private)
end
def rule_to_entry(rule)
Rule::Entry.new(rule.class, rule.length, rule.private)
end
end
end
|