module REXML
	# Generates Source-s.  USE THIS CLASS.
	class SourceFactory
		# Generates a Source object
		# @param arg Either a String, or an IO
		# @return a Source, or nil if a bad argument was given
		def SourceFactory::create_from arg#, slurp=true
			if arg.kind_of? String
				source = Source.new(arg)
			elsif arg.kind_of? IO
				#if slurp
				#	source = Source.new( arg.read )
				#else
					source = IOSource.new(arg)
				#end
			end
			source
		end
	end

	# A Source can be searched for patterns, and wraps buffers and other
	# objects and provides consumption of text
	class Source
		# The current buffer (what we're going to read next)
		attr_reader :buffer
		# The line number of the last consumed text
		attr_reader :line

		# Constructor
		# @param arg must be a String, and should be a valid XML document
		def initialize arg
			@orig = @buffer = arg
			@encoding = encoding( @buffer )
			@buffer = utf8_enc(@buffer) if (@encoding=='UTF16' or @encoding=="UNILE")
			@line = 0
		end

		# Scans the source for a given pattern.  Note, that this is not your
		# usual scan() method.  For one thing, the pattern argument has some
		# requirements; for another, the source can be consumed.  You can easily
		# confuse this method.  Originally, the patterns were easier
		# to construct and this method more robust, because this method 
		# generated search regexes on the fly; however, this was 
		# computationally expensive and slowed down the entire REXML package 
		# considerably, since this is by far the most commonly called method.
		# @param pattern must be a Regexp, and must be in the form of
		# /^\s*(#{your pattern, with no groups})(.*)/.  The first group
		# will be returned; the second group is used if the consume flag is
		# set.
		# @param consume if true, the pattern returned will be consumed, leaving
		# everything after it in the Source.
		# @return the pattern, if found, or nil if the Source is empty or the
		# pattern is not found.
		def scan pattern, consume=false
			return nil if @buffer.nil?
			rv = @buffer.scan(pattern)
			@buffer = $' if consume and rv.size>0
			rv
		end

		def match pattern, consume=false
			md = pattern.match @buffer
			@buffer = $' if consume and md
			return md
		end

		# @return true if the Source is exhausted
		def empty?
			@buffer.nil? or @buffer.strip.nil?
		end

		# @return the current line in the source
		def current_line
			lines = @orig.split
			res = lines.grep @buffer[0..30]
			res = res[-1] if res.kind_of? Array
			lines.index( res ) if res
		end

		# Taken from code contributed by Ernest Ellingson <erne@powernav.com>
		def encoding(str)
			if str=~/^\376\377/
				'UTF16'  #unicode big endian
			elsif str=~/^\377\376/
				'UNILE' #unicode litle endian
			elsif str=~/([\300-\377][\200-\277])/  
				#not a guaranteed test won't detect random pairs
				'UTF8'
			else
				'ASC'
			end
		end

		# Taken from code contributed by Ernest Ellingson <erne@powernav.com>
		def utf8_enc(str)
			arrayEnc=[]
			str.each_byte{|b| arrayEnc << b}
			arrayUTF8=[]
			if @encoding=='UTF16'
				2.step(arrayEnc.size-1, 2){|i| arrayUTF8 << 
				[arrayEnc.at(i+1) + arrayEnc.at(i) * 0x100].pack("U")}
			elsif @encoding=='UNILE'
				2.step(arrayEnc.size-1, 2){|i| arrayUTF8 << 
				[arrayEnc.at(i) + arrayEnc.at(i+1) * 0x100].pack("U")}
			end
			arrayUTF8.join("")
		end
	end

	# A Source that wraps an IO.  See the Source class for method
	# documentation
	class IOSource < Source
		def initialize arg, block_size=500
			@er_source = @source = arg
			@block_size = block_size
			super @source.read(@block_size)
			@to_utf = (@encoding == 'UTF16' or @encoding == "UNILE")
		end

		def scan pattern, consume=false
			rv = super
			# You'll notice that this next section is very similar to the same
			# section in match(), but just a liiittle different.  This is
			# because it is a touch faster to do it this way with scan()
			# than the way match() does it; enough faster to warrent duplicating
			# some code
			if rv.size == 0
				until @buffer =~ pattern or @source.nil?
					begin
						str = @source.read(@block_size)
						str = utf8_enc(str) if str and @to_utf
						@buffer << str
					rescue
						@source = nil
					end
				end
				rv = super
			end
			rv
		end
		def match pattern, consume=false
			rv = super
			while !rv and @source
				begin
					str = @source.read(@block_size)
					str = utf8_enc(str) if str and @to_utf
					@buffer << str
					rv = super
				rescue
					@source = nil
				end
			end
			rv
		end
		def empty?
			super and ( @source.nil? || @source.eof? )
		end

		# @return the current line in the source
		def current_line
			@er_source.rewind
			line = 0
			count = 0
			@buffer.split("\n").each {|l|
				b = Regexp.escape( l )
				@er_source.find {|l| l =~ /#{b}/u}
				line = @er_source.lineno - count
				count += 1
			}
			line
		end
	end
end
