1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
|
#! /usr/bin/env crystal
#
# This script generates the file spec/std/string/graphemes_break_spec.cr
# that contains test cases for Unicode grapheme clusters based on the default
# Grapheme_Cluster_Break Test
# http://www.unicode.org/Public/x.y.z/ucd/auxiliary/GraphemeBreakTest.txt
require "http/client"
require "../src/compiler/crystal/formatter"
UCD_ROOT = "http://www.unicode.org/Public/#{Unicode::VERSION}/ucd/"
url = "#{UCD_ROOT}auxiliary/GraphemeBreakTest.txt"
path = "#{__DIR__}/../spec/std/string/grapheme_break_spec.cr"
def string_or_char(string)
if string.size == 1
string[0]
else
string
end
end
File.open(path, "w") do |file|
file.puts <<-CRYSTAL
# This file was automatically generated by running:
#
# scripts/generate_grapheme_break_spec.cr
#
# See https://www.unicode.org/license.html for the Unicode license agreement.
# DO NOT EDIT
require "./spec_helper"
describe "String#each_grapheme" do
CRYSTAL
HTTP::Client.get(url).body.each_line do |line|
next if line.starts_with?('#')
format, _, comment = line.partition('#')
# TODO: implement grapheme boundary rule GB9c in UAX29
pending = comment.includes?("[9.3]")
graphemes = [] of String | Char
string = String.build do |io|
grapheme = String::Builder.new
format.split.in_groups_of(2) do |ary|
operator, codepoint = ary
break if codepoint.nil?
char = codepoint.to_i(16).chr
io << char
case operator
when "÷"
unless grapheme.empty?
graphemes << string_or_char(grapheme.to_s)
end
grapheme = String::Builder.new
when "×"
else raise "Unexpected operator #{operator.inspect}"
end
grapheme << char
end
graphemes << string_or_char(grapheme.to_s)
end
file.puts " #{%(pending "GB9c" { ) if pending} it_iterates_graphemes #{string.dump}, [#{graphemes.join(", ", &.dump)}] #{" }" if pending} # #{comment}"
end
file.puts "end"
end
`crystal tool format #{path}`
|