File: ue2gsv.pl

package info (click to toggle)
gtksourceview4 4.8.4-6
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 12,376 kB
  • sloc: ansic: 50,318; xml: 1,432; javascript: 856; perl: 212; sh: 150; php: 48; yacc: 45; ruby: 38; sql: 30; cobol: 20; objc: 19; lisp: 19; fortran: 14; python: 13; makefile: 13; cpp: 8; ml: 3
file content (220 lines) | stat: -rw-r--r-- 5,503 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
#!/usr/bin/perl -w

# Convert from UltraEdit syntax definition file format to GtkSourceView's .lang

use strict;

my $language_name = "";
my $string_chars = "";
my $escape_char = "";
my $case_sensitive = 1;
my @line_comments = ();
my $block_comment_on = "";
my $block_comment_off = "";
my %classes;

my $l_seen = 0;
my $this_class_name = "";
my @this_class_keywords = ();

######################################################################
# Parsing

while (<>) {
    my $line = $_;
    
    if ($line =~ /^\/L[0-9]+\"([^\"]+)\"/) {
	$l_seen = 1;
	$language_name = $1;
	&parse_language_line ($line);
	next;
    }

    die "Not a proper UltraEdit syntax file\n" if (!$l_seen);

    # Chop trailing whitespace
    $line =~ s/\s+$//;

    # Skip unhandled control lines and empty lines
    # FIXME: handle "Function String" at the very least and generate pattern items
    next if ($line =~ /^\/(Delimiters|Function String|Indent String|Unindent String)/);
    next if ($line eq "");

    if ($line =~ /^\/C([0-9]+)\s*(.*)/) {
	my $new_class_number = $1;
	my $new_class_name = $2;

	# Save old class first
	if ($this_class_name ne "") {
	    # We need to copy the array, since we're going to store a reference to it
	    my @keywords_copy = @this_class_keywords;
	    $classes{$this_class_name} = \@keywords_copy;
	}
	
	$this_class_name = $new_class_name eq "" ? $new_class_number : $new_class_name;
	# Strip quotes from class name
	$this_class_name =~ s/^\"?(.+)\"$/$1/;
	@this_class_keywords = ();
	next;
    }

    if ($this_class_name eq "") {
	print "I don't have a class to add the keywords to, at line $.\n";
	next;
    }

    # Add keywords to the current class
    my @keys = split /\s+/, $line;
    push @this_class_keywords, @keys;
}


######################################################################
# Output

&xml_reset;

&xml_print ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
&xml_print ("<!-- <!DOCTYPE language SYSTEM \"language.dtd\"> -->\n");
# FIXME: what to do with the mime types?
&xml_print ("<language name=\"$language_name\" ",
	    "version=\"1.0\" section=\"Sources\" ",
	    "mimetypes=\"text/x-whatever\">\n");
&xml_enter;

# Line comments
foreach my $line_comment (@line_comments) {
    &xml_print ("<line-comment name=\"Line Comment\" style=\"Comment\">\n");
    &xml_enter;
    &xml_print ("<start-regex>", &regex_xml_quote ($line_comment), "</start-regex>\n");
    &xml_leave;
    &xml_print ("</line-comment>/\n");
} 

# Block comments
if ($block_comment_on ne "") {
    &xml_print ("<block-comment name=\"Block Comment\" style=\"Comment\">\n");
    &xml_enter;
    &xml_print ("<start-regex>", &regex_xml_quote ($block_comment_on), "</start-regex>\n");
    &xml_print ("<end-regex>", &regex_xml_quote ($block_comment_off), "</end-regex>\n");
    &xml_leave;
    &xml_print ("</block-comment>/\n");
}

# Strings
foreach my $string_delimiter (split / */, $string_chars) {
    &xml_print ("<string name=\"String\" style=\"String\" end-at-line-end=\"TRUE\">\n");
    &xml_enter;
    &xml_print ("<start-regex>", &regex_xml_quote ($string_delimiter), "</start-regex>\n");
    &xml_print ("<end-regex>", &regex_xml_quote ($string_delimiter), "</end-regex>\n");
    &xml_leave;
    &xml_print ("</string>\n");
} 

# Keyword classes
foreach my $class (keys %classes) {
    &xml_print ("<keyword-list name=\"$class\" style=\"Keyword\" case-sensitive=\"",
		$case_sensitive ? "TRUE" : "FALSE", "\">\n");
    &xml_enter;
    foreach my $key (@{$classes{$class}}) {
	&xml_print ("<keyword>", &xml_quote ($key), "</keyword>\n");
    }
    &xml_leave;
    &xml_print ("</keyword-list>\n");
}

# Remaining pattern items 

# FIXME: this is intended to output pattern items with elements from
# classes which can't be expressed as keywords

&xml_leave;
&xml_print ("</language>\n");


######################################################################
# Auxiliary functions

sub parse_language_line
{
    my $line = shift;
    my @parts = split / /, $line;

    while (@parts) {
	my $part = shift @parts;

	# Handle single words first
	if ($part eq "Nocase") {
	    $case_sensitive = 0;
	    next;
	}
	# Handle argument extended phrases
	elsif ($part =~ /Line|Block|File|Escape|String/) {
	    # Eat up @parts until the equal sign
	    while (@parts) {
		my $next_part = shift @parts;
		if ($next_part eq "=") {
		    last;
		}
		$part .= " $next_part";
	    }
	}
	else {
	    next;
	}

	last if ($part eq "File Extensions");

	# Get the argument
	my $argument = shift @parts;
	if ($part =~ /Line Comment|Line Comment Alt/) {
	    push @line_comments, $argument;
	}
	elsif ($part eq "Block Comment On") {
	    $block_comment_on = $argument;
	}
	elsif ($part eq "Block Comment Off") {
	    $block_comment_off = $argument;
	}
	elsif ($part eq "Escape Char") {
	    # Not yet supported in GtkSourceView
	    $escape_char = $argument;
	}
	elsif ($part eq "String Chars") {
	    $string_chars = $argument;
	}
	else {
	    print "Unknown phrase $part\n";
	}
    }
}

my $xml_indent_level;

sub xml_reset  { $xml_indent_level = 0; }
sub xml_enter  { $xml_indent_level += 1; }
sub xml_leave  { $xml_indent_level -= 1; }
sub xml_indent { print "\t" x $xml_indent_level; }
sub xml_print  { &xml_indent; print @_; }

sub xml_quote
{
    $_ = $_[0];
    s/\&/\&amp;/g;
    s/\</\&lt;/g;
    s/\>/\&gt;/g;
    s/\"/\&quot;/g;
    return $_;
}

sub regex_quote
{
    $_ = $_[0];
    s/\*/\\*/g;
    return $_;
}

sub regex_xml_quote
{
    return &xml_quote (&regex_quote ($_[0]));
}