File: document.rb

package info (click to toggle)
ruby-nokogumbo 2.0.5-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,372 kB
  • sloc: ansic: 32,909; ruby: 452; makefile: 7
file content (53 lines) | stat: -rw-r--r-- 2,118 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
module Nokogiri
  module HTML5
    class Document < Nokogiri::HTML::Document
      def self.parse(string_or_io, url = nil, encoding = nil, **options, &block)
        yield options if block_given?
	string_or_io = '' unless string_or_io

        if string_or_io.respond_to?(:encoding) && string_or_io.encoding.name != 'ASCII-8BIT'
          encoding ||= string_or_io.encoding.name
        end

        if string_or_io.respond_to?(:read) && string_or_io.respond_to?(:path)
          url ||= string_or_io.path
        end
        unless string_or_io.respond_to?(:read) || string_or_io.respond_to?(:to_str)
          raise ArgumentError.new("not a string or IO object")
        end
        do_parse(string_or_io, url, encoding, options)
      end

      def self.read_io(io, url = nil, encoding = nil, **options)
        raise ArgumentError.new("io object doesn't respond to :read") unless io.respond_to?(:read)
        do_parse(io, url, encoding, options)
      end

      def self.read_memory(string, url = nil, encoding = nil, **options)
        raise ArgumentError.new("string object doesn't respond to :to_str") unless string.respond_to?(:to_str)
        do_parse(string, url, encoding, options)
      end

      def fragment(tags = nil)
        DocumentFragment.new(self, tags, self.root)
      end

      def to_xml(options = {}, &block)
        # Bypass XML::Document#to_xml which doesn't add
        # XML::Node::SaveOptions::AS_XML like XML::Node#to_xml does.
        XML::Node.instance_method(:to_xml).bind(self).call(options, &block)
      end

      private
      def self.do_parse(string_or_io, url, encoding, options)
        string = HTML5.read_and_encode(string_or_io, encoding)
        max_attributes = options[:max_attributes] || Nokogumbo::DEFAULT_MAX_ATTRIBUTES
        max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogumbo::DEFAULT_MAX_ERRORS
        max_depth = options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH
        doc = Nokogumbo.parse(string, url, max_attributes, max_errors, max_depth)
        doc.encoding = 'UTF-8'
        doc
      end
    end
  end
end