File: pluggable_parsers.rb

package info (click to toggle)
libwww-mechanize-ruby 1.0.0-1
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 956 kB
  • ctags: 883
  • sloc: ruby: 6,621; makefile: 4
file content (101 lines) | stat: -rw-r--r-- 3,099 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
require 'mechanize/file'
require 'mechanize/file_saver'
require 'mechanize/page'

class Mechanize
  # = Synopsis
  # This class is used to register and maintain pluggable parsers for
  # Mechanize to use.
  #
  # A Pluggable Parser is a parser that Mechanize uses for any particular
  # content type.  Mechanize will ask PluggableParser for the class it
  # should initialize given any content type.  This class allows users to
  # register their own pluggable parsers, or modify existing pluggable
  # parsers.
  #
  # PluggableParser returns a Mechanize::File object for content types
  # that it does not know how to handle.  Mechanize::File provides
  # basic functionality for any content type, so it is a good class to
  # extend when building your own parsers.
  # == Example
  # To create your own parser, just create a class that takes four
  # parameters in the constructor.  Here is an example of registering
  # a pluggable parser that handles CSV files:
  #  class CSVParser < Mechanize::File
  #    attr_reader :csv
  #    def initialize(uri=nil, response=nil, body=nil, code=nil)
  #      super(uri, response, body, code)
  #      @csv = CSV.parse(body)
  #    end
  #  end
  #  agent = Mechanize.new
  #  agent.pluggable_parser.csv = CSVParser
  #  agent.get('http://example.com/test.csv')  # => CSVParser
  # Now any page that returns the content type of 'text/csv' will initialize
  # a CSVParser and return that object to the caller.
  #
  # To register a pluggable parser for a content type that pluggable parser
  # does not know about, just use the hash syntax:
  #  agent.pluggable_parser['text/something'] = SomeClass
  #
  # To set the default parser, just use the 'defaut' method:
  #  agent.pluggable_parser.default = SomeClass
  # Now all unknown content types will be instances of SomeClass.
  class PluggableParser
    CONTENT_TYPES = {
      :html => 'text/html',
      :wap  => 'application/vnd.wap.xhtml+xml',
      :xhtml => 'application/xhtml+xml',
      :pdf  => 'application/pdf',
      :csv  => 'text/csv',
      :xml  => 'text/xml',
    }

    attr_accessor :default

    def initialize
      @parsers = { CONTENT_TYPES[:html]   => Page,
        CONTENT_TYPES[:xhtml]  => Page,
        CONTENT_TYPES[:wap]    => Page,
      }
      @default = File
    end

    def parser(content_type)
      content_type.nil? ? default : @parsers[content_type] || default
    end

    def register_parser(content_type, klass)
      @parsers[content_type] = klass
    end

    def html=(klass)
      register_parser(CONTENT_TYPES[:html], klass)
      register_parser(CONTENT_TYPES[:xhtml], klass)
    end

    def xhtml=(klass)
      register_parser(CONTENT_TYPES[:xhtml], klass)
    end

    def pdf=(klass)
      register_parser(CONTENT_TYPES[:pdf], klass)
    end

    def csv=(klass)
      register_parser(CONTENT_TYPES[:csv], klass)
    end

    def xml=(klass)
      register_parser(CONTENT_TYPES[:xml], klass)
    end

    def [](content_type)
      @parsers[content_type]
    end

    def []=(content_type, klass)
      @parsers[content_type] = klass
    end
  end
end