File: generate_grapheme_break_specs.cr

package info (click to toggle)
crystal 1.14.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 24,384 kB
  • sloc: javascript: 6,400; sh: 695; makefile: 269; ansic: 121; python: 105; cpp: 77; xml: 32
file content (74 lines) | stat: -rwxr-xr-x 2,066 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#! /usr/bin/env crystal
#
# This script generates the file spec/std/string/graphemes_break_spec.cr
# that contains test cases for Unicode grapheme clusters based on the default
# Grapheme_Cluster_Break Test

# http://www.unicode.org/Public/x.y.z/ucd/auxiliary/GraphemeBreakTest.txt

require "http/client"
require "../src/compiler/crystal/formatter"

UCD_ROOT = "http://www.unicode.org/Public/#{Unicode::VERSION}/ucd/"

url = "#{UCD_ROOT}auxiliary/GraphemeBreakTest.txt"

path = "#{__DIR__}/../spec/std/string/grapheme_break_spec.cr"

def string_or_char(string)
  if string.size == 1
    string[0]
  else
    string
  end
end

File.open(path, "w") do |file|
  file.puts <<-CRYSTAL
    # This file was automatically generated by running:
    #
    #   scripts/generate_grapheme_break_spec.cr
    #
    # See https://www.unicode.org/license.html for the Unicode license agreement.
    # DO NOT EDIT

    require "./spec_helper"

    describe "String#each_grapheme" do
    CRYSTAL
  HTTP::Client.get(url).body.each_line do |line|
    next if line.starts_with?('#')

    format, _, comment = line.partition('#')

    # TODO: implement grapheme boundary rule GB9c in UAX29
    pending = comment.includes?("[9.3]")

    graphemes = [] of String | Char
    string = String.build do |io|
      grapheme = String::Builder.new
      format.split.in_groups_of(2) do |ary|
        operator, codepoint = ary
        break if codepoint.nil?
        char = codepoint.to_i(16).chr
        io << char
        case operator
        when "÷"
          unless grapheme.empty?
            graphemes << string_or_char(grapheme.to_s)
          end
          grapheme = String::Builder.new
        when "×"
        else raise "Unexpected operator #{operator.inspect}"
        end
        grapheme << char
      end
      graphemes << string_or_char(grapheme.to_s)
    end

    file.puts "  #{%(pending "GB9c" { ) if pending} it_iterates_graphemes #{string.dump}, [#{graphemes.join(", ", &.dump)}] #{" }" if pending} # #{comment}"
  end
  file.puts "end"
end

`crystal tool format #{path}`