1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
|
require "helper"
class TestMemoryLeak < Nokogiri::TestCase
def setup
super
@str = <<EOF
<!DOCTYPE HTML>
<html>
<body>
<br />
</body>
</html>
EOF
end
#
# this suite is turned off unless the env var NOKOGIRI_GC is non-nil
#
# to run any of these tests, do something like this on the commandline:
#
# $ NOKOGIRI_GC=t ruby -Ilib:test \
# test/test_memory_leak.rb \
# -n /test_leaking_namespace_node_strings/
#
# also see:
#
# https://github.com/sparklemotion/nokogiri/issues/1603
#
# which is an open issue to resurrect these tests and run them as
# part of the CI pipeline.
#
if ENV['NOKOGIRI_GC'] # turning these off by default for now
def test_dont_hurt_em_why
content = File.open("#{File.dirname(__FILE__)}/files/dont_hurt_em_why.xml").read
ndoc = Nokogiri::XML(content)
2.times do
ndoc.search('status text').first.inner_text
ndoc.search('user name').first.inner_text
GC.start
end
end
class BadIO
def read(*args)
raise 'hell'
end
def write(*args)
raise 'chickens'
end
end
def test_for_mem_leak_on_io_callbacks
io = File.open SNUGGLES_FILE
Nokogiri::XML.parse(io)
loop do
Nokogiri::XML.parse(BadIO.new) rescue nil
doc.write BadIO.new rescue nil
end
end
def test_for_memory_leak
begin
# we don't use Dike in any tests, but requiring it has side effects
# that can create memory leaks, and that's what we're testing for.
require 'rubygems'
require 'dike' # do not remove!
count_start = count_object_space_documents
xml_data = <<-EOS
<test>
<items>
<item>abc</item>
<item>1234</item>
<item>Zzz</item>
<items>
</test>
EOS
20.times do
doc = Nokogiri::XML(xml_data)
doc.xpath("//item")
end
2.times { GC.start }
count_end = count_object_space_documents
assert((count_end - count_start) <= 2, "memory leak detected")
rescue LoadError
puts "\ndike is not installed, skipping memory leak test"
end
end
def test_node_set_namespace_mem_leak
xml = Nokogiri::XML "<foo></foo>"
ctx = Nokogiri::XML::XPathContext.new(xml)
loop do
ctx.evaluate("//namespace::*")
end
end
def test_leak_on_node_replace
loop do
doc = Nokogiri.XML("<root><foo /></root>")
n = Nokogiri::XML::CDATA.new(doc, "bar")
pivot = doc.root.children[0]
pivot.replace(n)
end
end
def test_sax_parser_context
io = StringIO.new(@str)
loop do
Nokogiri::XML::SAX::ParserContext.new(@str)
Nokogiri::XML::SAX::ParserContext.new(io)
io.rewind
Nokogiri::HTML::SAX::ParserContext.new(@str)
Nokogiri::HTML::SAX::ParserContext.new(io)
io.rewind
end
end
class JumpingSaxHandler < Nokogiri::XML::SAX::Document
def initialize(jumptag)
@jumptag = jumptag
super()
end
def start_element(name, attrs = [])
throw @jumptag
end
end
def test_jumping_sax_handler
doc = JumpingSaxHandler.new(:foo)
loop do
catch(:foo) do
Nokogiri::HTML::SAX::Parser.new(doc).parse(@str)
end
end
end
def test_in_context_parser_leak
loop do
doc = Nokogiri::XML::Document.new
fragment1 = Nokogiri::XML::DocumentFragment.new(doc, '<foo/>')
node = fragment1.children[0]
node.parse('<bar></bar>')
end
end
def test_in_context_parser_leak_ii
loop { Nokogiri::XML('<a/>').root.parse('<b/>') }
end
def test_leak_on_xpath_string_function
doc = Nokogiri::XML(@str)
loop do
doc.xpath('name(//node())')
end
end
def test_leaking_namespace_node_strings
# see https://github.com/sparklemotion/nokogiri/issues/1810 for memory leak report
ns = {'xmlns' => 'http://schemas.xmlsoap.org/soap/envelope/'}
20.times do
10_000.times do
Nokogiri::XML::Builder.new do |xml|
xml.send 'Envelope', ns do
xml.send 'Foobar', ns
end
end
end
puts MemInfo.rss
end
end
def test_leaking_namespace_node_strings_with_prefix
# see https://github.com/sparklemotion/nokogiri/issues/1810 for memory leak report
ns = {'xmlns:foo' => 'http://schemas.xmlsoap.org/soap/envelope/'}
20.times do
10_000.times do
Nokogiri::XML::Builder.new do |xml|
xml.send 'Envelope', ns do
xml.send 'Foobar', ns
end
end
end
puts MemInfo.rss
end
end
def test_leaking_dtd_nodes_after_internal_subset_removal
# see https://github.com/sparklemotion/nokogiri/issues/1784
100_000.times do |i|
doc = Nokogiri::HTML::Document.new
doc.internal_subset.remove
puts MemInfo.rss if (i % 1000 == 0)
end
end
describe "#2114 RelaxNG schema parsing has a small memory leak" do
it "no longer leaks" do
prev_rss = MemInfo.rss
100_001.times do |j|
Nokogiri::XML::RelaxNG.from_document(Nokogiri::XML::Document.parse(File.read(ADDRESS_SCHEMA_FILE)))
if (j % 10_000 == 0)
curr_rss = MemInfo.rss
diff_rss = curr_rss - prev_rss
printf("\n(iter %d) %d", j, curr_rss)
printf(" (%s%d)", diff_rss >= 0 ? "+" : "-", diff_rss) if j > 0
prev_rss = curr_rss
end
end
puts
end
end
end # if NOKOGIRI_GC
module MemInfo
# from https://stackoverflow.com/questions/7220896/get-current-ruby-process-memory-usage
# this is only going to work on linux
PAGE_SIZE = `getconf PAGESIZE`.chomp.to_i rescue 4096
STATM_PATH = "/proc/#{Process.pid}/statm"
STATM_FOUND = File.exist?(STATM_PATH)
def self.rss
if STATM_FOUND
return (File.read(STATM_PATH).split(' ')[1].to_i * PAGE_SIZE) / 1024
end
return 0
end
end
private
def count_object_space_documents
count = 0
ObjectSpace.each_object {|j| count += 1 if j.is_a?(Nokogiri::XML::Document) }
count
end
end
|