File: Html4Document.java

package info (click to toggle)
ruby-nokogiri 1.13.10%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 7,416 kB
  • sloc: ansic: 38,198; xml: 28,086; ruby: 22,271; java: 15,517; cpp: 7,037; yacc: 244; sh: 148; makefile: 136
file content (155 lines) | stat: -rw-r--r-- 4,689 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
package nokogiri;

import org.jruby.Ruby;
import org.jruby.RubyClass;
import org.jruby.anno.JRubyClass;
import org.jruby.anno.JRubyMethod;
import org.jruby.runtime.Helpers;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import nokogiri.internals.HtmlDomParserContext;

import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;

/**
 * Class for Nokogiri::HTML4::Document.
 *
 * @author sergio
 * @author Yoko Harada <yokolet@gmail.com>
 */
@JRubyClass(name = "Nokogiri::HTML4::Document", parent = "Nokogiri::XML::Document")
public class Html4Document extends XmlDocument
{
  private static final String DEFAULT_CONTENT_TYPE = "html";
  private static final String DEFAULT_PUBLIC_ID = "-//W3C//DTD HTML 4.01//EN";
  private static final String DEFAULT_SYTEM_ID = "http://www.w3.org/TR/html4/strict.dtd";

  private String parsed_encoding = null;

  public
  Html4Document(Ruby ruby, RubyClass klazz)
  {
    super(ruby, klazz);
  }

  public
  Html4Document(Ruby runtime, Document document)
  {
    this(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document"), document);
  }

  public
  Html4Document(Ruby ruby, RubyClass klazz, Document doc)
  {
    super(ruby, klazz, doc);
  }

  @JRubyMethod(name = "new", meta = true, rest = true, required = 0)
  public static IRubyObject
  rbNew(ThreadContext context, IRubyObject klazz, IRubyObject[] args)
  {
    final Ruby runtime = context.runtime;
    Html4Document htmlDocument;
    try {
      Document docNode = createNewDocument(runtime);
      htmlDocument = (Html4Document) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(runtime, (RubyClass) klazz);
      htmlDocument.setDocumentNode(context.runtime, docNode);
    } catch (Exception ex) {
      throw asRuntimeError(runtime, "couldn't create document: ", ex);
    }

    Helpers.invoke(context, htmlDocument, "initialize", args);

    return htmlDocument;
  }

  public IRubyObject
  getInternalSubset(ThreadContext context)
  {
    IRubyObject internalSubset = super.getInternalSubset(context);

    // html documents are expected to have a default internal subset
    // the default values are the same ones used when the following
    // feature is turned on
    // "http://cyberneko.org/html/features/insert-doctype"
    // the reason we don't turn it on, is because it overrides the document's
    // declared doctype declaration.

    if (internalSubset.isNil()) {
      internalSubset = XmlDtd.newEmpty(context.getRuntime(),
                                       getDocument(),
                                       context.getRuntime().newString(DEFAULT_CONTENT_TYPE),
                                       context.getRuntime().newString(DEFAULT_PUBLIC_ID),
                                       context.getRuntime().newString(DEFAULT_SYTEM_ID));
      setInternalSubset(internalSubset);
    }

    return internalSubset;
  }

  @Override
  void
  init(Ruby runtime, Document document)
  {
    stabilizeTextContent(document);
    document.normalize();
    setInstanceVariable("@decorators", runtime.getNil());
    if (document.getDocumentElement() != null) {
      stabilizeAttrs(document.getDocumentElement());
    }
  }

  private static void
  stabilizeAttrs(Node node)
  {
    if (node.hasAttributes()) {
      NamedNodeMap nodeMap = node.getAttributes();
      for (int i = 0; i < nodeMap.getLength(); i++) {
        Node n = nodeMap.item(i);
        if (n instanceof Attr) {
          stabilizeAttr((Attr) n);
        }
      }
    }
    NodeList children = node.getChildNodes();
    for (int i = 0; i < children.getLength(); i++) {
      stabilizeAttrs(children.item(i));
    }
  }

  public void
  setParsedEncoding(String encoding)
  {
    parsed_encoding = encoding;
  }

  public String
  getPraedEncoding()
  {
    return parsed_encoding;
  }

  @JRubyMethod(meta = true, required = 4)
  public static IRubyObject
  read_io(ThreadContext context, IRubyObject klass, IRubyObject[] args)
  {
    HtmlDomParserContext ctx = new HtmlDomParserContext(context.runtime, args[2], args[3]);
    ctx.setIOInputSource(context, args[0], args[1]);
    return ctx.parse(context, (RubyClass) klass, args[1]);
  }

  @JRubyMethod(meta = true, required = 4)
  public static IRubyObject
  read_memory(ThreadContext context, IRubyObject klass, IRubyObject[] args)
  {
    HtmlDomParserContext ctx = new HtmlDomParserContext(context.runtime, args[2], args[3]);
    ctx.setStringInputSource(context, args[0], args[1]);
    return ctx.parse(context, (RubyClass) klass, args[1]);
  }
}