File: fetcher.rb

package info (click to toggle)
ruby-open-graph-reader 0.6.2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 9,980 kB
  • ctags: 133
  • sloc: ruby: 1,505; xml: 22; makefile: 2
file content (117 lines) | stat: -rw-r--r-- 3,001 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
require "faraday"

begin
  require "faraday_middleware/response/follow_redirects"
rescue LoadError; end

begin
  require "faraday/cookie_jar"
rescue LoadError; end

require "open_graph_reader/version"

module OpenGraphReader
  # Fetch an URI to retrieve its HTML body, if available.
  #
  # @api private
  class Fetcher
    HEADERS = {
      "Accept"     => "text/html",
      "User-Agent" => "OpenGraphReader/#{OpenGraphReader::VERSION} (+https://github.com/jhass/open_graph_reader)"
    }.freeze

    # Create a new fetcher.
    #
    # @param [URI] uri the URI to fetch.
    def initialize uri
      raise ArgumentError, "url needs to be an instance of URI" unless uri.is_a? URI
      @uri = uri
      @fetch_failed = false
      @connection = Faraday.default_connection.dup
      @connection.headers.replace(HEADERS)
      @head_response = nil
      @get_response = nil

      prepend_middleware Faraday::CookieJar if defined? Faraday::CookieJar
      prepend_middleware FaradayMiddleware::FollowRedirects if defined? FaradayMiddleware
    end

    # The URL to fetch
    #
    # @return [String]
    def url
      @uri.to_s
    end

    # Fetch the full page.
    #
    # @return [Faraday::Response,nil]
    def fetch
      @get_response = @connection.get(@uri)
    rescue Faraday::Error
      @fetch_failed = true
    end
    alias_method :fetch_body, :fetch

    # Fetch just the headers
    #
    # @return [Faraday::Response,nil]
    def fetch_headers
      @head_response = @connection.head(@uri)
    rescue Faraday::Error
      @fetch_failed = true
    end

    # Retrieve the body
    #
    # @todo Custom error class
    # @raise [ArgumentError] The received content does not seems to be HTML.
    # @return [String]
    def body
      fetch_body unless fetched?
      raise NoOpenGraphDataError, "No response body received for #{@uri}" if fetch_failed?
      raise NoOpenGraphDataError, "Did not receive a HTML site at #{@uri}" unless html?
      @get_response.body
    end

    # Whether the target URI seems to return HTML
    #
    # @return [Bool]
    def html?
      fetch_headers unless fetched_headers?
      response = @get_response || @head_response
      return false if fetch_failed?
      return false unless response
      return false unless response.success?
      return false unless response["content-type"]
      response["content-type"].include? "text/html"
    end

    # Whether the target URI was fetched.
    #
    # @return [Bool]
    def fetched?
      fetch_failed? || !@get_response.nil?
    end
    alias_method :fetched_body?, :fetched?

    # Whether the headers of the target URI were fetched.
    #
    # @return [Bool]
    def fetched_headers?
      fetch_failed? || !@get_response.nil? || !@head_response.nil?
    end

    private

    def fetch_failed?
      @fetch_failed
    end

    def prepend_middleware middleware
      return if @connection.builder.handlers.include? middleware

      @connection.builder.insert(0, middleware)
    end
  end
end