File: basic_latin_test.rb

package info (click to toggle)
ruby-stringex 2.8.6-1
links: PTS, VCS
area: main
in suites: forky, sid
size: 1,220 kB
sloc: ruby: 3,749; makefile: 5
file content (144 lines) | stat: -rw-r--r-- 3,284 bytes
parent folder | download | duplicates (6)
# encoding: UTF-8

require "test/unit"
require "stringex"
require File.join(File.expand_path(File.dirname(__FILE__)), "codepoint_test_helper.rb")
include CodepointTestHelper

class BasicLatinTest < Test::Unit::TestCase
  # This test suite is just regression test and debugging
  # to better transliterate the Basic Latin Unicode codepoints
  #
  # http://unicode.org/charts/
  # http://unicode.org/charts/PDF/U0000.pdf

  # NOTE: I can't figure out how to test control characters.
  # Get weird results trying to pack them to unicode.

  def test_spaces
    assert_equal_encoded " ", %w{0020 00a0}
    assert_equal_encoded "",  %w{200b 2060}
  end

  def test_exclamation_marks
    assert_equal_encoded "!", %w{0021 2762}
    assert_equal_encoded "!!", "203c"
    assert_equal_encoded "", "00a1"
    assert_equal_encoded "?!", "203d"
  end

  def test_quotation_marks
    assert_equal_encoded "\"", %w{0022 02ba 2033 3003}
  end

  def test_apostrophes
    assert_equal_encoded "'", %w{0027 02b9 02bc 02c8 2032}
  end

  def test_asterisks
    assert_equal_encoded "*", %w{002a 066d 204e 2217 26b9 2731}
  end

  def test_commas
    assert_equal_encoded ",", %w{002c 060c}
  end

  def test_periods
    assert_equal_encoded ".", %w{002e 06d4}
  end

  def test_hyphens
    assert_equal_encoded "-", %w{002d 2010 2011 2012 2212}
  end

  def test_endash
    assert_equal_encoded "--", %w{2013 2015}
  end

  def test_emdash
    assert_equal_encoded "---", %w{2014}
  end

  def test_dotleader
    assert_equal_encoded "..", %w{2025}
  end

  def test_ellipsis
    assert_equal_encoded "...", %w{2026}
  end

  def test_slashes
    assert_equal_encoded "/", %w{002f 2044 2215}
    assert_equal_encoded "\\", %w{005c 2216}
  end

  def test_colons
    assert_equal_encoded ":", %w{003a 2236}
  end

  def test_semicolons
    assert_equal_encoded ";", %w{003b 061b}
  end

  def test_less_thans
    assert_equal_encoded "<", %w{003c 2039 2329 27e8 3008}
  end

  def test_equals
    assert_equal_encoded "=", "003d"
  end

  def test_greater_thans
    assert_equal_encoded ">", %w{003e 203a 232a 27e9 3009}
  end

  def test_question_marks
    assert_equal_encoded "?", %w{003f 061f}
    assert_equal_encoded "", "00bf"
    assert_equal_encoded "?!", %w{203d 2048}
    assert_equal_encoded "!?", "2049"
  end

  def test_circumflexes
    assert_equal_encoded "^", %w{005e 2038 2303}
  end

  def test_underscores
    assert_equal_encoded "_", %w{005f 02cd 2017}
  end

  def test_grave_accents
    assert_equal_encoded "'", %w{02cb 2035}
    # Ascii grave accent should remain as ascii!
    assert_equal_encoded "`", "0060"
  end

  def test_bars
    assert_equal_encoded "|", %w{007c 2223 2758}
  end

  def test_tildes
    assert_equal_encoded "~", %w{007e 02dc 2053 223c ff5e}
  end

  def test_related_letters
    {
      "B" => "212c",
      "C" => %w{2102 212d},
      "E" => %w{2107 2130},
      "F" => "2131",
      "H" => %w{210b 210c 210d},
      "I" => %w{0130 0406 04c0 2110 2111 2160},
      "K" => "212a",
      "L" => "2112",
      "M" => "2133",
      "N" => "2115",
      "P" => "2119",
      "Q" => "211a",
      "R" => %w{211b 211c 211d},
      "Z" => %w{2124 2128}
    }.each do |expected, encode_mes|
      assert_equal_encoded expected, encode_mes
    end
  end
end