--- ../lib/csv.rb # CSV -- module for generating/parsing CSV data. # $Id: coverage.txt,v 1.4 2002/11/09 13:03:21 nahi Exp $ # This module is copyrighted free software by NAKAMURA, Hiroshi. # You can redistribute it and/or modify it under the same term as Ruby. + class CSV + public # DESCRIPTION # CSV::Cell -- Describes 1 cell of CSV. # + class Cell + public # Datum as string. + attr_accessor :data # Is this datum null? + attr_accessor :isNull # SYNOPSIS # aCell = CSV::Cell.new( data = "", isNull = true ) # # ARGS # data: datum as String + # isNull: is this datum null? # # RETURNS # aCell: Created instance. # # DESCRIPTION # Create instance. If isNull is true, datum is stored in the instance # created but it should be treated as "NULL". # + def initialize( data = "", isNull = true ) + @data = data + @isNull = isNull + end # SYNOPSIS # CSV::Cell#match( rhs ) # # ARGS # rhs: an instance of CSV::Cell to be compared. # # RETURNS # true/false. See the souce if you want to know matching algorithm. # # DESCRIPTION # Compare another cell with me. Bare in mind Null matches with Null # using this method. Use CSV::Cell#== if you want Null never matches # with other data including Null. # + def match( rhs ) + if @isNull and rhs.isNull + return true + elsif @isNull or rhs.isNull + return false + end + @data == rhs.data + end # SYNOPSIS # CSV::Cell#==( rhs ) # # ARGS # rhs: an instance of CSV::Cell to be compared. # # RETURNS # true/false. See the souce if you want to know matching algorithm. # # DESCRIPTION # Compare another cell with me. Bare in mind Null is not match with # Null using this method. Null never matches with other data including # Null. Use CSV::Cell#match if you want Null matches with Null. # + def ==( rhs ) + if @isNull or rhs.isNull + return false + end + @data == rhs.data + end + end # For backward compatibility. + ColData = Cell # DESCRIPTION # CSV::Row -- Describes a row of CSV. Each element must be a CSV::Cell. # + class Row < Array + public # SYNOPSIS # CSV::Row#to_a # # RETURNS # An Array of String. # # DESCRIPTION # Convert CSV::Cell to String. Null is converted to nil. # + def to_a + self.collect { | cell | cell.isNull ? nil : cell.data } + end # SYNOPSIS # CSV::Row#match( rhs ) # # ARGS # rhs: an Array of cells. Each cell is a instance of CSV::Cell. # # RETURNS # true/false. See the souce if you want to know matching algorithm. # # DESCRIPTION # Compare another row with me. # + def match( rhs ) + if self.size != rhs.size + return false + end + 0.upto( self.size - 1 ) do | i | + unless self[ i ].match( rhs[ i ] ) + return false + end + end + return true + end + end # SYNOPSIS # 1. reader = CSV.open( filename, "r" ) # # 2. CSV.open( filename, "r" ) do | row | # ... # end # # 3. writer = CSV.open( filename, "w" ) # # 4. CSV.open( filename, "w" ) do | writer | # ... # end # # ARGS # filename: filename to open. # mode: "r" for read (parse) # "w" for write (generate) # row: an Array of cells which is a parsed line. # writer: Created writer instance. See CSV::Writer#<< and # CSV::Writer#addRow to know how to generate CSV string. # # RETURNS # reader: Create reader instance. To get parse result, see # CSV::Reader#each. # writer: Created writer instance. See CSV::Writer#<< and # CSV::Writer#addRow to know how to generate CSV string. # # DESCRIPTION # Open a CSV formatted file to read or write. # # EXAMPLE 1 # reader = CSV.open( "csvfile.csv", "r" ) # row1 = reader.shift # row2 = reader.shift # if row2.empty? # p "row2 not find." # end # reader.close # # EXAMPLE 2 # CSV.open( "csvfile.csv", "r" ) do | row | # p row # end # # EXAMPLE 3 # writer = CSV.open( "csvfile.csv", "w" ) # writer << [ "r1c1", "r1c2" ] << [ "r2c1", "r2c2" ] << [ nil, nil ] # writer.close # # EXAMPLE 4 # CSV.open( "csvfile.csv", "w" ) do | writer | # writer << [ "r1c1", "r1c2" ] # writer << [ "r2c1", "r2c2" ] # writer << [ nil, nil ] # end # + def CSV.open( filename, mode, colSep = ?,, &block ) + if mode == "r" or mode == "rb" + openReader( filename, colSep, &block ) + elsif mode == "w" or mode == "wb" + openWriter( filename, colSep, &block ) + else + raise ArgumentError.new( "'mode' mustbe 'r' or 'w'" ) + end + end # Private class methods. + class << self + private + def openReader( filename, colSep, &block ) + file = File.open( filename, "rb" ) + if block + begin + CSV::Reader.parse( file, colSep ) do | row | + yield( row ) + end + ensure + file.close + end + nil + else + reader = CSV::Reader.create( file, colSep ) + reader.closeOnTerminate + reader + end + end + def openWriter( filename, colSep, &block ) + file = File.open( filename, "wb" ) + if block + begin + CSV::Writer.generate( file, colSep ) do | writer | + yield( writer ) + end + ensure + file.close + end + nil + else + writer = CSV::Writer.create( file, colSep ) + writer.closeOnTerminate + writer + end + end + end # DESCRIPTION # CSV::Reader -- CSV formatted string/stream reader. # # EXAMPLE # Read CSV lines untill the first column is 'stop'. # # CSV::Reader.parse( File.open( "bigdata", "rb" )) do | row | # p row # break if !row[ 0 ].isNull && row[ 0 ].data == 'stop' # end # + class Reader + public # SYNOPSIS # CSV::Reader.parse( stringOrReadable ) do | row | # ... # end # # ARGS # stringOrReadable: a CSV data to be parsed. A String or an IO. # row: a CSV::Row; an Array of a CSV::Cell in a line. # # RETURNS # nil # # DESCRIPTION # Parse CSV data and get lines. Caller block is called for each line # with an argument which is a chunk of cells in a row. # # Block value is always nil. Rows are not cached for performance # reason. # + def Reader.parse( stringOrReadable, colSep = ?, ) + reader = create( stringOrReadable, colSep ) + reader.each do | row | + yield( row ) + end + reader.close + nil + end # SYNOPSIS # reader = CSV::Reader.create( stringOrReadable ) # # ARGS # stringOrReadable: a CSV data to be parsed. A String or an IO. # # RETURNS # reader: Created instance. # # DESCRIPTION # Create instance. To get parse result, see CSV::Reader#each. # + def Reader.create( stringOrReadable, colSep = ?, ) + case stringOrReadable + when IO + IOReader.new( stringOrReadable, colSep ) + when String + StringReader.new( stringOrReadable, colSep ) + else + IOReader.new( stringOrReadable, colSep ) + end + end # SYNOPSIS # CSV::Reader#each do | row | # ... # end # # ARGS # row: a CSV::Row; an Array of a CSV::Cell in a line. # # RETURNS # nil # # DESCRIPTION # Caller block is called for each line with an argument which is a chunk # of cells in a row. # # Block value is always nil. Rows are not cached for performance # reason. # + def each + while true + row = Row.new + nofCells = getRow( row ) + if nofCells == 0 + break + end + yield( row ) + end + nil + end # SYNOPSIS # cell = CSV::Reader#shift # # RETURNS # cell: a CSV::Row; an Array of a CSV::Cell. # # DESCRIPTION # Extract cells of next line. # + def shift + row = Row.new + nofCells = getRow( row ) + row + end # SYNOPSIS # CSV::Reader#close # # RETURNS # nil # # DESCRIPTION # Close this reader. # + def close + terminate + end + private + def initialize( dev ) + raise RuntimeError.new( 'Do not instanciate this class directly.' ) + end + def getRow( row ) + raise NotImplementedError.new( 'Method getRow must be defined in a derived class.' ) + end + def terminate # Define if needed. end end # DESCRIPTION # CSV::StringReader -- CSV formatted stream reader. # # EXAMPLE # Read CSV lines untill the first column is 'stop'. # # CSV::Reader.parse( File.open( "bigdata", "rb" )) do | row | # p row # break if !row[ 0 ].isNull && row[ 0 ].data == 'stop' # end # + class StringReader < Reader + public # SYNOPSIS # reader = CSV::StringReader.new( string ) # # ARGS # string: a CSV String to be parsed. # # RETURNS # reader: Created instance. # # DESCRIPTION # Create instance. To get parse result, see CSV::Reader#each. # + def initialize( string, colSep = ?, ) + @colSep = colSep + @dev = string + @idx = 0 + if @dev[0, 3] == "\xef\xbb\xbf" + @idx += 3 + end + end + private + def getRow( row ) + nofCells, nextIdx = CSV.parseLine( @dev, @idx, row, @colSep ) + if nofCells == 0 && nextIdx == 0 && @idx != @dev.size + raise IllegalFormatError.new + end + @idx = nextIdx + return nofCells + end + end # DESCRIPTION # CSV::IOReader -- CSV formatted stream reader. # # EXAMPLE # Read CSV lines untill the first column is 'stop'. # # CSV::Reader.parse( File.open( "bigdata", "rb" )) do | row | # p row # break if !row[ 0 ].isNull && row[ 0 ].data == 'stop' # end # + class IOReader < Reader + public # SYNOPSIS # reader = CSV::IOReader.new( io ) # # ARGS # io: a CSV data to be parsed. Must be an IO. (io#sysread is called.) # # RETURNS # reader: Created instance. # # DESCRIPTION # Create instance. To get parse result, see CSV::Reader#each. # + def initialize( io, colSep = ?, ) + @io = io + @colSep = colSep + @dev = CSV::IOBuf.new( @io ) + @idx = 0 + if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf + @idx += 3 + end + @closeOnTerminate = false + end # SYNOPSIS # CSV::IOReader#closeOnTerminate # # RETURNS # true # # DESCRIPTION # Tell this reader to close the IO when terminated (Triggered by invoking # CSV::IOReader#close). # + def closeOnTerminate + @closeOnTerminate = true + end + private + def getRow( row ) + nofCells, nextIdx = CSV.parseLine( @dev, @idx, row, @colSep ) + if nofCells == 0 && nextIdx == 0 && !@dev.isEOS? + raise IllegalFormatError.new + end + dropped = @dev.drop( nextIdx ) + @idx = nextIdx - dropped + return nofCells + end + def terminate + if @closeOnTerminate + @io.close + end + if @dev + @dev.close + end + end + end # DESCRIPTION # CSV::Writer -- CSV formatted string/stream writer. # # EXAMPLE # Write rows to 'csvout' file. # # outfile = File.open( 'csvout', 'wb' ) # CSV::Writer.generate( outfile ) do | csv | # csv << [ 'c1', nil, '', '"', "\r\n", 'c2' ] # # or # csv.addRow [ # CSV::Cell.new( 'c1', false ), # CSV::Cell.new( 'dummy', true ), # CSV::Cell.new( '', false ), # CSV::Cell.new( '"', false ), # CSV::Cell.new( "\r\n", false ) # CSV::Cell.new( "c2", false ) # ] # ... # ... # end # # outfile.close # + class Writer + public # SYNOPSIS # CSV::Writer.generate( stringOrWritable ) do | writer | # ... # end # # ARGS # stringOrWritable: device for generated CSV string. Must respond to # '<<( aString )'. # writer: Created writer instance. See CSV::Writer#<< and # CSV::Writer#addRow to know how to generate CSV string. # # RETURNS # nil # # DESCRIPTION # Create writer instance. Caller block is called with the new instance. # To add CSV data to generate CSV string, see CSV::Writer#<< or # CSV::Writer#addRow. # + def Writer.generate( stringOrWritable, colSep = ?, ) + writer = Writer.create( stringOrWritable, colSep ) + yield( writer ) + writer.close + nil + end # SYNOPSIS # writer = CSV::Writer.create( stringOrReadable ) # # ARGS # stringOrWritable: device for generated CSV string. Must respond to # '<<( aString )'. # # RETURNS # writer: Created instance. # # DESCRIPTION # Create instance. To add CSV data to generate CSV string, see # CSV::Writer#<< or CSV::Writer#addRow. # + def Writer.create( stringOrReadable, colSep = ?, ) + BasicWriter.new( stringOrReadable, colSep ) + end # SYNOPSIS # CSV::Writer#<<( strArray ) # # ARGS # strArray: an Array of a String. # # RETURNS # self # # DESCRIPTION # Dump CSV stream to the device. Argument is an array of a String like # [ "c1", "c2", "c3" ]. # + def <<( row ) + @dev << CSV.create( row, @colSep ) << "\r\n" + self + end # SYNOPSIS # CSV::Writer#<<( cellsArray ) # # ARGS # cellsArray: an Array of a CSV::Cell. # # RETURNS # self # # DESCRIPTION # Dump CSV stream to the device. Argument is an array of a CSV::Cell # like [ CSV::Cell.new( "c1", false ), CSV::Cell.new( "dummy", true ) ]. # (Formar is "c1" and latter is Null.) # + def addRow( row ) + CSV.createLine( row, row.size, @dev, @colSep ) + self + end # SYNOPSIS # CSV::Writer#close # # RETURNS # nil # # DESCRIPTION # Close this writer. # + def close + terminate + end + private + def initialize( dev ) + raise RuntimeError.new( 'Do not instanciate this class directly.' ) + end + def terminate # Define if needed. end end # DESCRIPTION # CSV::BasicWriter -- CSV formatted string/stream writer using <<. # + class BasicWriter < Writer + public # SYNOPSIS # writer = CSV::BasicWriter.new( stringOrWritable ) # # ARGS # stringOrWritable: device for generated CSV string. Must respond to # '<<( aString )'. # # RETURNS # writer: Created instance. # # DESCRIPTION # Create instance. To add CSV data to generate CSV string, see # CSV::Writer#<< or CSV::Writer#addRow. # + def initialize( stringOrWritable, colSep = ?, ) + @colSep = colSep + @dev = stringOrWritable + @closeOnTerminate = false + end # SYNOPSIS # CSV::BasicWriter#closeOnTerminate # # RETURNS # true # # DESCRIPTION # Tell this writer to close the IO when terminated (Triggered by invoking # CSV::BasicWriter#close). # + def closeOnTerminate + @closeOnTerminate = true + end + private + def terminate + if @closeOnTerminate + @dev.close + end + end + end # SYNOPSIS # cells = CSV.parse( src, colSep = ?, ) # # ARGS # src: a CSV String. # colSep: Column separator. ?, by default. If you want to separate # fields with semicolon, give ?; here. # # RETURNS # cells: an Array of parsed cells in first line. Each cell is a String. # # DESCRIPTION # Parse one line from given string. Bare in mind it parses ONE LINE. Rest # of the string is ignored for example "a,b\r\nc,d" => [ "a", "b" ] and the # second line "c,d" is ignored. # # If you don't know whether a target string to parse is exactly 1 line or # not, use CSV.parseLine instead of this method. # + def CSV.parse( src, colSep = ?, ) + idx = 0 + resType = :DT_COLSEP + cells = Row.new + begin + while ( resType.equal?( :DT_COLSEP )) + aCell = Cell.new + resType, idx = parseBody( src, idx, aCell, colSep ) + cells.push( aCell.isNull ? nil : aCell.data ) + end + rescue IllegalFormatError + return Row.new + end + cells + end # SYNOPSIS # str = CSV.create( cells, colSep = ?, ) # # ARGS # cells: an Array of cell to be converted to CSV string. Each cell must # respond to 'to_s'. # colSep: Column separator. ?, by default. If you want to separate # fields with semicolon, give ?; here. # # RETURNS # str: a String of generated CSV string. # # DESCRIPTION # Create a line from cells. Each cell is stringified by to_s. # + def CSV.create( cells, colSep = ?, ) + if ( cells.size == 0 ) + return "" + end + resType = :DT_COLSEP + resStr = "" + idx = 0 + while true + cell = if ( cells[ idx ].nil? ) + Cell.new( '', true ) + else + Cell.new( cells[ idx ].to_s, false ) + end + createBody( cell, resStr, colSep ) + idx += 1 + if ( idx == cells.size ) + break + end + createSeparator( :DT_COLSEP, resStr, colSep ) + end + resStr + end # SYNOPSIS # nofCell, idx = CSV.parseLine( src, idx, outDev, colSep = ?, ) # # ARGS # src: a CSV data to be parsed. Must respond '[]( idx )'. # src[]( idx ) must return a char. (Not a string such as "a", but 97). # src[]( idxOutOfBounds ) must return nil. A String satisfies this # requirement. # idx: index of parsing location of 'src'. 0 origin. # outDev: buffer for parsed cells. Must respond '<<( aCSV::Cell )'. # colSep: Column separator. ?, by default. If you want to separate # fields with semicolon, give ?; here. # # RETURNS # nofCell: num of parsed cells. # idx: index of next parsing location of 'src'. # # DESCRIPTION # Parse a line from string. To parse lines in CSV string, see EXAMPLE # below. # # EXAMPLE # src = "a,b\r\nc,d\r\ne,f" # idx = 0 # begin # parsed = [] # nofCells, idx = CSV.parseLine( src, idx, parsed ) # puts "Parsed #{ nofCells } cells." # p parsed # end while nofCells > 0 # + def CSV.parseLine( src, idx, outDev, colSep = ?, ) + idxBack = idx + nofCell = 0 + resType = :DT_COLSEP + begin + while ( !resType.equal?( :DT_ROWSEP )) + aCell = Cell.new + resType, idx = parseBody( src, idx, aCell, colSep ) + if resType.equal?( :DT_EOS ) + if idx == idxBack #(( nofCell == 0 ) && ( aCell.isNull )) + return 0, 0 + end + resType = :DT_ROWSEP + end + nofCell += 1 + outDev << aCell + end + rescue IllegalFormatError + return 0, 0 + end + return nofCell, idx + end # SYNOPSIS # nofCells = CSV.createLine( src, cells, outDev, colSep = ?, ) # # ARGS # src: an Array of CSV::Cell to be converted to CSV string. Must respond to # 'size' and '[]( idx )'. src[ idx ] must return CSV::Cell. # cells: num of cells in a line. # outDev: buffer for created CSV string. Must respond to '<<( aString )'. # colSep: Column separator. ?, by default. If you want to separate # fields with semicolon, give ?; here. # # RETURNS # nofCells: num of converted cells. # # DESCRIPTION # Convert a line from cells data to string. To create multi-row CSV string, # see EXAMPLE below. # # EXAMPLE # def d( str ) # CSV::Cell.new( str, false ) # end # # row1 = [ d( 'a' ), d( 'b' ) ] # row2 = [ d( 'c' ), d( 'd' ) ] # row3 = [ d( 'e' ), d( 'f' ) ] # src = [ row1, row2, row3 ] # buf = '' # src.each do | row | # nofCells = CSV.createLine( row, 2, buf ) # puts "Created #{ nofCells } cells." # end # p buf # + def CSV.createLine( src, cells, outDev, colSep = ?, ) + srcSize = src.size + if ( srcSize == 0 ) + if cells == 0 + createSeparator( :DT_ROWSEP, outDev, colSep ) + end + return 0 + end + resType = :DT_COLSEP + nofCells = 0 + createBody( src[ nofCells ], outDev, colSep ) + nofCells += 1 + while (( nofCells < cells ) && ( nofCells != srcSize )) + createSeparator( :DT_COLSEP, outDev, colSep ) + createBody( src[ nofCells ], outDev, colSep ) + nofCells += 1 + end + if ( nofCells == cells ) + createSeparator( :DT_ROWSEP, outDev, colSep ) + else + createSeparator( :DT_COLSEP, outDev, colSep ) + end + nofCells + end + private + class IllegalFormatError < RuntimeError; end # Private class methods. + class << self + private + def parseBody( src, idx, aCell, colSep ) + aCell.isNull = false + state = :ST_START + quoted = false + cr = false + c = nil + while ( c = src[ idx ] ) + idx += 1 + resultState = :DT_UNKNOWN + if ( c == colSep ) + if state.equal?( :ST_DATA ) + if cr + raise IllegalFormatError.new + end + if ( !quoted ) + state = :ST_END + resultState = :DT_COLSEP + else + aCell.data << c.chr + end + elsif state.equal?( :ST_QUOTE ) + if cr + raise IllegalFormatError.new + end + state = :ST_END + resultState = :DT_COLSEP + else # :ST_START + aCell.isNull = true + state = :ST_END + resultState = :DT_COLSEP + end + elsif ( c == ?" ) # " for vim syntax hilighting. + if state.equal?( :ST_DATA ) + if cr + raise IllegalFormatError.new + end + if quoted + quoted = false + state = :ST_QUOTE + else + raise IllegalFormatError.new + end + elsif state.equal?( :ST_QUOTE ) + aCell.data << c.chr + quoted = true + state = :ST_DATA + else # :ST_START + quoted = true + state = :ST_DATA + end + elsif ( c == ?\r ) + if cr + raise IllegalFormatError.new + end + if quoted + aCell.data << c.chr + state = :ST_DATA + else + cr = true + end + elsif ( c == ?\n ) + if state.equal?( :ST_DATA ) + if cr + state = :ST_END + resultState = :DT_ROWSEP + cr = false + else + if quoted + aCell.data << c.chr + state = :ST_DATA + else + state = :ST_END + resultState = :DT_ROWSEP + end + end + elsif state.equal?( :ST_QUOTE ) + state = :ST_END + resultState = :DT_ROWSEP + if cr + cr = false + end + else # :ST_START + aCell.isNull = true + state = :ST_END + resultState = :DT_ROWSEP + end + else + if state.equal?( :ST_DATA ) || state.equal?( :ST_START ) + if cr + raise IllegalFormatError.new + end + aCell.data << c.chr + state = :ST_DATA + else # :ST_QUOTE + raise IllegalFormatError.new + end + end + if state.equal?( :ST_END ) + return resultState, idx; + end + end + if state.equal?( :ST_START ) + aCell.isNull = true + elsif state.equal?( :ST_QUOTE ) + true # dummy for coverate; only a data + elsif quoted + raise IllegalFormatError.new + elsif cr + raise IllegalFormatError.new + end + return :DT_EOS, idx + end + def createBody( cellData, outDev, colSep ) + addData = cellData.data.dup + if ( !cellData.isNull ) if ( addData.gsub!( '"', '""' ) || + addData.include?( colSep ) || + ( /[\r\n]/ =~ addData ) || ( cellData.data.empty? )) + outDev << '"' << addData << '"' + else + outDev << addData + end + end + end + def createSeparator( type, outDev, colSep ) + case type + when :DT_COLSEP + outDev << colSep.chr + when :DT_ROWSEP + outDev << "\r\n" + end + end + end # DESCRIPTION # CSV::StreamBuf -- a class for a bufferd stream. # # EXAMPLE 1 -- an IO. # class MyBuf < StreamBuf # # Do initialize myself before a super class. Super class might call my # # method 'read'. (Could be awful for C++ user. :-) # def initialize( s ) # @s = s # super() # end # # # define my own 'read' method. # # CAUTION: Returning a string which size is not equal to 'size' means # # EnfOfStream. # def read( size ) # @s.sysread( size ) # end # # # release buffers. in Ruby which has GC, you do not have to call this... # def terminate # @s = nil # super() # end # end # # aBuf = MyBuf.new( STDIN ) # myStr = "" # p aBuf[0, 0] # => "" ( null string ) # p aBuf[0] # => 97 ( char code of 'a' ) # p aBuf[0, 1] # => "a" # myStr = aBuf[0, 5] # p myStr # => "abcde" ( 5 chars ) # p aBuf[0, 6] # => "abcde\n" ( 6 chars ) # p aBuf[0, 7] # => "abcde\n" ( 6 chars ) # p aBuf.drop( 3 ) # => 3 ( dropped chars ) # p aBuf.get(0, 2) # => "de" ( 2 chars ) # p aBuf.isEOS? # => false ( is not EOS here ) # p aBuf.drop( 5 ) # => 3 ( dropped chars ) # p aBuf.isEOS? # => true ( is EOS here ) # p aBuf[0] # => nil ( is EOS here ) # # EXAMPLE 2 -- String. # This is a conceptual example. No pros with this. # # class StrBuf < StreamBuf # def initialize( s ) # @str = s # @idx = 0 # super() # end # # def read( size ) # str = @str[ @idx, size ] # @idx += str.size # str # end # end # + class StreamBuf # pure virtual. (do not instanciate it directly) + public # SYNOPSIS # char/str = CSV::StreamBuf#get( idx, n = nil ) # char/str = CSV::StreamBuf#[ idx, n = nil ] # # ARGS # idx: index of a string to specify a start point of a string to get. # Unlike String instance, idx < 0 returns nil. # n: size of a string to get. # # RETURNS # char: if n == nil. A char at idx. # str: if n != nil. A partial string, from idx to ( idx + size ). At # EOF, the string size could not equal to arg n. # # DESCRIPTION # Get a char or a partial string from the stream. # + def []( idx, n = nil )  + if idx < 0 + return nil + end + if ( idxIsEOS?( idx )) + if n and ( @offset + idx == @maxIdx ) # Like a String, "abc"[ 4, 1 ] returns nil and # "abc"[ 3, 1 ] returns "", not nil. + return "" + else + return nil + end + end + myBuf = @curBuf + myOffset = @offset + nextIdx = idx + while ( myOffset + nextIdx >= BufSize ) + if ( myBuf == @bufTailIdx ) + unless addBuf + break + end + end + myBuf += 1 + nextIdx = myOffset + nextIdx - BufSize + myOffset = 0 + end + if ( @isEOS && ( myBuf == @bufTailIdx ) && + ( myOffset + nextIdx > @maxIdx )) + return nil + end + if !n + return @bufList[ myBuf ][ myOffset + nextIdx ] # Fixnum of char code. + elsif ( myOffset + nextIdx + n - 1 < BufSize ) + return @bufList[ myBuf ][ myOffset + nextIdx, n ] # String. else # should do loop insted of (tail) recursive call... + res = @bufList[ myBuf ][ myOffset + nextIdx..-1 ] + addSize = BufSize - ( myOffset + nextIdx ) + idx += addSize + n -= addSize + ret = self[ idx, n ] + if ret + res << ret + end + return res + end + end + alias get [] # SYNOPSIS # dropSize = CSV::StreamBuf#drop( n ) # # ARGS # n: drop size # # RETURNS # dropSize: droped size. At EOF, dropped size might not equals to arg n. # 0 if n <= 0. # # DESCRIPTION # Drop a string from the stream. Once you drop the head of the stream, # access to the dropped part via [] or get returns nil. # + def drop( n ) + if isEOS? + return 0 + end + dropSize = 0 + while ( n > 0 ) + if ( @isEOS && ( @curBuf == @bufTailIdx )) + if ( @offset + n < @maxIdx ) + dropSize += n + @offset += n + n = 0 + else + dropSize += ( @maxIdx - @offset ) + n = 0 + @offset = 0 + relBuf + end + else + if ( @offset + n < BufSize ) + dropSize += n + @offset += n + n = 0 + else + dropSize += ( BufSize - @offset ) + n -= ( BufSize - @offset ) + @offset = 0 + unless relBuf + unless addBuf break end + @curBuf = @bufTailIdx + end + end + end + end + dropSize + end # SYNOPSIS # eofP = CSV::StreamBuf#isEOS? # # RETURNS # eofP: true if end of the stream or false. # # DESCRIPTION # Check EOF or not. # + def isEOS? + return idxIsEOS?( 0 ) + end # SYNOPSIS # N/A # # DESCRIPTION # Do not instanciate this class directly. Define your own class which # derives this class and define 'read' instance method. # + def initialize + @bufList = [] + @curBuf = @bufTailIdx = -1 + @offset = @maxIdx = 0 + @isEOS = false + unless addBuf raise RuntimeError.new( "Couldn't read any buffer..." ) end + @curBuf = @bufTailIdx + end + protected + def terminate + while ( relBuf ); end + end # protected method 'read' must be defined in derived classes. # CAUTION: Returning a string which size is not equal to 'size' means # EnfOfStream. When it is not at EOS, you must block the callee, try to # read and return the sized string. + def read( size ) # raise EOFError + raise NotImplementedError.new( 'Method read must be defined in a derived class.' ) + end + private + def addBuf + if @isEOS + return false + end + begin + readStr = read( BufSize ) + rescue EOFError + readStr = "" rescue + terminate + raise + end + rSize = readStr.size + if ( rSize != BufSize ) + @isEOS = true + @maxIdx = rSize + end + if ( rSize == 0 ) + readStr = "" + end + @bufList.push( readStr ) + @bufTailIdx += 1 + true + end + def relBuf + if ( @curBuf < 0 ) + return false + end + @bufList[ @curBuf ] = nil + if ( @curBuf == @bufTailIdx ) + @curBuf = -1 + return false + else + @curBuf += 1 + return true + end + end + def idxIsEOS?( idx ) + ( @isEOS && (( @curBuf < 0 ) || ( @curBuf == @bufTailIdx ) && + ( @offset + idx >= @maxIdx ))) + end + BufSize = 1024 * 100 + end # DESCRIPTION # CSV::IOBuf -- a class for a bufferd IO. # # EXAMPLE # # File 'bigdata' could be a giga-byte size one! # buf = CSV::IOBuf.new( File.open( "bigdata", "rb" )) # CSV::Reader.new( buf ).each do | row | # p row # break if row[ 0 ].data == "admin" # end # + class IOBuf < StreamBuf + public + def initialize( s ) + @s = s + super() + end + def close + terminate + end + private + def read( size ) + @s.sysread( size ) + end + def terminate + super() + end + end + end