File: generate_patterns_tk.rb

package info (click to toggle)
texlive-lang 2022.20230122-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,447,264 kB
  • sloc: perl: 61,377; xml: 53,781; makefile: 4,525; sh: 4,338; ansic: 2,892; python: 2,861; ruby: 1,031; lisp: 750; awk: 649; java: 159; sed: 142; csh: 25
file content (135 lines) | stat: -rwxr-xr-x 3,592 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env ruby
#
# This script generates hyphenation patterns for Turkmen
#
# This script has been written by Mojca Miklavec <mojca dot miklavec dot lists at gmail dot com>

# open file for writing the patterns
# $tr = File.new("hyph-tk.tex", "w")
# in TDS
$tr = File.new("../../../../../tex/generic/hyph-utf8/patterns/tex/hyph-tk.tex", "w")

# write comments into the file
def add_comment(str)
	$tr.puts "% " + str.gsub(/\n/, "\n% ").gsub(/% \n/, "%\n")
end

# define a class of vowels and consonants
# vowels are split into so that unnecessary permutations are not generated
front_vowels = %w{ä e i ö ü}
back_vowels = %w{a y o u}
consonants = %w{b ç d f g h j k l m n p r s t w ý z ň ž ş}
# This is to eliminate impossible combinations
common_suffix_consonants = %w{b ç d g j k l m n p s t ý z ş}


# start the file
add_comment(
"Hyphenation patterns for Turkmen (hyph-tk.tex)

Author:  Nazar Annagurban <nazartm at gmail.com>
License: Public domain
Version: 0.1
Date:    16 March 2010

----------------------------------------------------------------------

The file has been auto-generated from generate_patterns_tk.rb
that is part of hyph-utf8.

For more information about UTF-8 hyphenation patterns for TeX and
links to this file see
    http://www.tug.org/tex-hyphen/
")

# we have the following comment for Basque:
#
# Some of the patterns below represent combinations that never
# happen in Turkmen. Would they happen, they would be hyphenated
# according to the rules.

$tr.puts '\patterns{'
add_comment("Some suffixes are added through a hyphen. When hyphenating these words, a hyphen is added before the hyphen so that the line ends with a hyphen and the new line starts with a hyphen.")
$tr.puts "1-4"

add_comment("Allow hyphen after a vowel if and only if there is a single consonant before next the vowel")
front_vowels.each do |v1|
	consonants.each do |c|
		front_vowels.each do |v2|
			$tr.puts "#{v1}1#{c}#{v2}"
		end
	end
end

back_vowels.each do |v1|
	consonants.each do |c|
		back_vowels.each do |v2|
			$tr.puts "#{v1}1#{c}#{v2}"
		end
	end
end

add_comment("These combinations occur in words of foreign origin or joined words")
consonants.each do |c|
  	$tr.puts "a1#{c}i"
  	$tr.puts "a1#{c}e"
	$tr.puts "y1#{c}ä"
	$tr.puts "y1#{c}i"
	$tr.puts "y1#{c}e"
	$tr.puts "o1#{c}i"
	$tr.puts "o1#{c}e"
	$tr.puts "u1#{c}i"
	$tr.puts "u1#{c}e"
	$tr.puts "i1#{c}a"
	$tr.puts "i1#{c}o"
	$tr.puts "e1#{c}a"
	$tr.puts "e1#{c}o"
	$tr.puts "ä1#{c}o"
	$tr.puts "ä1#{c}a"
	$tr.puts "ö1#{c}a"
end

add_comment("Allow hyphen between two consonants (if there is only two of them), except when they are at the begining of the word")
consonants.each do |c1|
	consonants.each do |c2|
		$tr.puts "#{c1}1#{c2}"
		$tr.puts ".#{c1}2#{c2}"
	end
end

add_comment("Patterns for triple consonants. There may be additions to this category, as this list is not exhaustive.")
common_suffix_consonants.each do |c|
	$tr.puts "ý2t1#{c}"
	$tr.puts "ý2n1#{c}"
	$tr.puts "ý2d1#{c}"
	$tr.puts "r2t1#{c}"
	$tr.puts "ý2p1#{c}"
	$tr.puts "l2p1#{c}"
	$tr.puts "l2t1#{c}"
	$tr.puts "g2t1#{c}"
	$tr.puts "n2t1#{c}"
	$tr.puts "r2k1#{c}"
	$tr.puts "r2p1#{c}"
	$tr.puts "k2t1#{c}"
	$tr.puts "r2h1#{c}"
	$tr.puts "s2t1#{c}"
	$tr.puts "l2k1#{c}"
	$tr.puts "w2p1#{c}"
	$tr.puts "n2s1#{c}"
	$tr.puts "r2s1#{c}"
	$tr.puts "l2m1#{c}"
end

add_comment("Exceptions and single word occurence patterns for words of foreign origin i.e. Russian")
$tr.puts "s2k1d"
$tr.puts "l1s2k"
$tr.puts "l1s2t"
$tr.puts "s1t2r"
$tr.puts "n2g1l"
$tr.puts "n1g2r"
$tr.puts "s2k1w"

# end the file
$tr.puts '}'
$tr.close