File: reader_spec.cr

package info (click to toggle)
crystal 1.14.0%2Bdfsg-1
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 24,384 kB
sloc: javascript: 6,400; sh: 695; makefile: 269; ansic: 121; python: 105; cpp: 77; xml: 32
file content (291 lines) | stat: -rw-r--r-- 10,162 bytes
require "spec"
require "char/reader"
require "../../support/string"

private def assert_invalid_byte_sequence(bytes, *, file = __FILE__, line = __LINE__)
  reader = Char::Reader.new(String.new bytes)
  reader.current_char.should eq(Char::REPLACEMENT), file: file, line: line
  reader.current_char_width.should eq(1), file: file, line: line
  reader.error.should eq(bytes[0]), file: file, line: line
end

private def assert_reads_at_end(bytes, char, *, file = __FILE__, line = __LINE__)
  str = String.new bytes
  reader = Char::Reader.new(str, pos: bytes.size)
  reader.previous_char.should eq(char), file: file, line: line
  reader.current_char.should eq(char), file: file, line: line
  reader.current_char_width.should eq(bytes.size), file: file, line: line
  reader.pos.should eq(0), file: file, line: line
  reader.error.should be_nil, file: file, line: line
end

private def assert_invalid_byte_sequence_at_end(bytes, *, file = __FILE__, line = __LINE__)
  str = String.new bytes
  reader = Char::Reader.new(str, pos: bytes.size)
  reader.previous_char
  reader.current_char.should eq(Char::REPLACEMENT), file: file, line: line
  reader.current_char_width.should eq(1), file: file, line: line
  reader.pos.should eq(bytes.size - 1), file: file, line: line
  reader.error.should eq(bytes[-1]), file: file, line: line
end

describe "Char::Reader" do
  it "iterates through empty string" do
    reader = Char::Reader.new("")
    reader.pos.should eq(0)
    reader.current_char.ord.should eq(0)
    reader.error.should be_nil
    reader.has_next?.should be_false

    expect_raises IndexError do
      reader.next_char
    end
  end

  it "iterates through string of size one" do
    reader = Char::Reader.new("a")
    reader.pos.should eq(0)
    reader.current_char.should eq('a')
    reader.has_next?.should be_true
    reader.next_char.ord.should eq(0)
    reader.has_next?.should be_false

    expect_raises IndexError do
      reader.next_char
    end
  end

  it "iterates through chars" do
    reader = Char::Reader.new("há日本語")
    reader.pos.should eq(0)
    reader.current_char.ord.should eq(104)
    reader.has_next?.should be_true

    reader.next_char.ord.should eq(225)

    reader.pos.should eq(1)
    reader.current_char.ord.should eq(225)

    reader.next_char.ord.should eq(26085)
    reader.next_char.ord.should eq(26412)
    reader.next_char.ord.should eq(35486)
    reader.has_next?.should be_true

    reader.next_char.ord.should eq(0)
    reader.has_next?.should be_false

    expect_raises IndexError do
      reader.next_char
    end
  end

  it "peeks next char" do
    reader = Char::Reader.new("há日本語")
    reader.peek_next_char.ord.should eq(225)
  end

  it "sets pos" do
    reader = Char::Reader.new("há日本語")
    reader.pos = 1
    reader.pos.should eq(1)
    reader.current_char.ord.should eq(225)
  end

  describe "#each" do
    it "yields chars" do
      reader = Char::Reader.new("abc")
      chars = [] of Char
      reader.each do |char|
        chars << char
      end.should be_nil
      chars.should eq ['a', 'b', 'c']
    end

    it "does not yield if empty" do
      reader = Char::Reader.new("")
      reader.each do |char|
        fail "reader each shouldn't yield on empty string"
      end.should be_nil
    end

    it "checks bounds after block" do
      string = "f"
      reader = Char::Reader.new(string)
      reader.each do |c|
        c.should eq 'f'
        reader.next_char
      end
    end
  end

  it "starts at end" do
    reader = Char::Reader.new(at_end: "")
    reader.pos.should eq(0)
    reader.current_char.ord.should eq(0)
    reader.has_previous?.should be_false
    reader.has_next?.should be_false
  end

  it "gets previous char (ascii)" do
    reader = Char::Reader.new(at_end: "hello")
    reader.pos.should eq(4)
    reader.current_char.should eq('o')
    reader.has_previous?.should be_true
    reader.has_next?.should be_true

    reader.previous_char.should eq('l')
    reader.has_next?.should be_true
    reader.previous_char.should eq('l')
    reader.previous_char.should eq('e')
    reader.previous_char.should eq('h')
    reader.has_previous?.should be_false

    expect_raises IndexError do
      reader.previous_char
    end
  end

  it "gets previous char (unicode)" do
    reader = Char::Reader.new(at_end: "há日本語")
    reader.pos.should eq(9)
    reader.current_char.should eq('語')
    reader.has_previous?.should be_true
    reader.has_next?.should be_true

    reader.previous_char.should eq('本')
    reader.has_next?.should be_true
    reader.previous_char.should eq('日')
    reader.previous_char.should eq('á')
    reader.previous_char.should eq('h')
    reader.has_previous?.should be_false
  end

  it "starts at pos" do
    reader = Char::Reader.new("há日本語", pos: 9)
    reader.pos.should eq(9)
    reader.current_char.should eq('語')
  end

  it "#current_char?" do
    reader = Char::Reader.new("há日本語")
    reader.current_char?.should eq('h')
    reader.next_char
    reader.current_char?.should eq('á')
    reader.next_char
    reader.current_char?.should eq('日')
    reader.next_char
    reader.current_char?.should eq('本')
    reader.next_char
    reader.current_char?.should eq('語')
    reader.next_char
    reader.current_char?.should be_nil
    reader.previous_char
    reader.current_char?.should eq('語')
  end

  it "#next_char?" do
    reader = Char::Reader.new("há日本語")
    reader.next_char?.should eq('á')
    reader.pos.should eq(1)
    reader.next_char?.should eq('日')
    reader.pos.should eq(3)
    reader.next_char?.should eq('本')
    reader.pos.should eq(6)
    reader.next_char?.should eq('語')
    reader.pos.should eq(9)
    reader.next_char?.should be_nil
    reader.pos.should eq(12)
    reader.next_char?.should be_nil
    reader.pos.should eq(12)
  end

  it "#previous_char?" do
    reader = Char::Reader.new("há日本語", pos: 12)
    reader.previous_char?.should eq('語')
    reader.pos.should eq(9)
    reader.previous_char?.should eq('本')
    reader.pos.should eq(6)
    reader.previous_char?.should eq('日')
    reader.pos.should eq(3)
    reader.previous_char?.should eq('á')
    reader.pos.should eq(1)
    reader.previous_char?.should eq('h')
    reader.pos.should eq(0)
    reader.previous_char?.should be_nil
    reader.pos.should eq(0)
  end

  it "errors on invalid UTF-8" do
    {% for bytes in INVALID_UTF8_BYTE_SEQUENCES %}
      assert_invalid_byte_sequence Bytes{{ bytes }}
    {% end %}
  end

  describe "#previous_char" do
    it "reads on valid UTF-8" do
      {% for bytes, char in VALID_UTF8_BYTE_SEQUENCES %}
        assert_reads_at_end Bytes{{ bytes }}, {{ char }}
      {% end %}
    end

    it "errors on invalid UTF-8" do
      assert_invalid_byte_sequence_at_end Bytes[0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xbf]
      assert_invalid_byte_sequence_at_end Bytes[0xc0]
      assert_invalid_byte_sequence_at_end Bytes[0xff]

      assert_invalid_byte_sequence_at_end Bytes[0x00, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0x7f, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0x9f, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xa0, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xbf, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xc0, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xc1, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xe0, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xff, 0x80]

      assert_invalid_byte_sequence_at_end Bytes[0x00, 0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0x7f, 0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0x80, 0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0x8f, 0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0x90, 0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xbf, 0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xc0, 0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xc1, 0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xc2, 0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xdf, 0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xe0, 0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xe0, 0x9f, 0xbf]
      assert_invalid_byte_sequence_at_end Bytes[0xf0, 0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xff, 0x80, 0x80]

      assert_invalid_byte_sequence_at_end Bytes[0x00, 0xa0, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0x7f, 0xa0, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0x80, 0xa0, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0x8f, 0xa0, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0x90, 0xa0, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xbf, 0xa0, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xc0, 0xa0, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xc1, 0xa0, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xc2, 0xa0, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xdf, 0xa0, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xed, 0xa0, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xed, 0xbf, 0xbf]
      assert_invalid_byte_sequence_at_end Bytes[0xf0, 0xa0, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xff, 0xa0, 0x80]

      assert_invalid_byte_sequence_at_end Bytes[0x00, 0x80, 0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xef, 0x80, 0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xf0, 0x80, 0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xf5, 0x80, 0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xff, 0x80, 0x80, 0x80]

      assert_invalid_byte_sequence_at_end Bytes[0x00, 0x90, 0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xef, 0x90, 0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xf4, 0x90, 0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xf5, 0x90, 0x80, 0x80]
      assert_invalid_byte_sequence_at_end Bytes[0xff, 0x90, 0x80, 0x80]
    end
  end
end