File: ddltrans

package info (click to toggle)
libchado-perl 1.31-6
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, sid
  • size: 44,716 kB
  • sloc: sql: 282,721; xml: 192,553; perl: 25,524; sh: 102; python: 73; makefile: 57
file content (374 lines) | stat: -rwxr-xr-x 15,233 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
#!/usr/bin/env perl

# DDLTRANS: Transform sql schema definitions into various useful formats.
# (DDL=data definition language, the schema defining subset of SQL -- create table, etc.)

# Syntax: ddltrans [-s schemaname] [-f format] ddlfiles
# Writes to stdout.

# Converts ddl files to dtd, xml, perl or html according to format arg. Default is html.

# -s arg is used to define outermost element in XML; required iff format is dtd.

# -dtd: generates a DTD according to translation rules of XORT (XML Object To Relational 
# Translator) system, which specify a fixed mapping from a relational schema to an (optionally 
# hierarchical, i.e. object-like) XML format.
# There is a loader and dumper that will load/dump XORT XML into corresponding PostGreSQL databases.

# -perl: generates a hierarchical Perl data structure (hashes and arrays) containing all the
# metadata extracted from DDL, for use by schema-driven perl applications. Suitable for loading
# with "do".

# -html: generates very simple html presentation of schema, suitable (possibly) for developers.

# -xml: generates straighforward XML representation of schema suitable for schema-driven applications
# in languages other than Perl; e.g. Java.

# WARNING: The SQL parser is not robust; stylistic variations of legal SQL may break it.

use Getopt::Std;
use Data::Dumper; 

my %opts;
getopts('s:f:', \%opts);


# open(STDIN,"/users/stan/sql/sequence.sql") || die "Can't read .sql"; $opts{f}=dtd;

$date=`date`; chop($date);

$schema = {_entity=>set}; # set of tables
$currobj=0;
$output_format = "html"; # default
$known_format{xml}=1;
$known_format{perl}=1;
$known_format{html}=1;
$known_format{dtd}=1;
if(defined($opts{f}))
{ if($known_format{$opts{f}})
  { $output_format = $opts{f}; }
  else { die "Unknown format $opts{f} -- supported formats are perl, xml and html.\n"; }}

if( $output_format eq "dtd"){
  die "Must supply -s schemaname when generating dtd\n" if ! defined($opts{s});
  $schemaname=$opts{s}; 
}


while(<>){
    chop;
    next if $_ !~ /\S/;  # skip blank lines
    next if /^##/;  # skip blank lines
    if( /^\s*(\S.*\S)\s*$/ ){ $_ = $1; } # eliminate leading/trailing blanks
    if(/^([^-]*)\-\-(.*)$/) # handle comments
    { $_=$1; $newcomment=$2;
      if($newcomment !~ /=====*/ & $newcomment !~ /TABLE/ ) # ignore header comments
      { $comment="$comment $newcomment"; # concat comments 
	if($comment=~/^\s+(\S.*\S)\s*$/){ $comment=$1; }} # remove blanks
  }
    next if $_ !~ /\S/;
    if( /^(.*\S)\s+$/ ){ $_ = $1; }
    if(/create table\s+(\S+)\s\((.*)$/) {
	die Dumper($currobj) if $currobj;
	$currobj = { _entity=>table, 
		     name=>$1, column=>{_entity=>list, _order=>[]}};
	if( $output_format eq "xml"){ $currobj->{_order} = [name, comment, column, unique, "index"]; }
	if($comment){ $currobj->{comment}=$comment; $comment=""; }
	$rest=$2;
	if( $rest =~ /\S/ ){ die "Unexpected stuff after ( in create table line"; }
	next;
    } elsif( $currobj->{_entity} eq "table" ){
	if(/^\s*(\S.*),\s*$/){ $_=$1;}      # lose any final comma
	if( /^(.*)\)\s*;\s*/ ){ $_=$1; $eoc=1; } # lose any final );
	if(/foreign\s+key\s+\(\s*(\S+)\s*\)\s+references\s+\s*(\S+)\s*\s+\(\s*(\S+)\s*\)/)
	{ $fkcol=$1;
	  $keytable=$2;
	  $keycol=$3; 
	  $currobj->{column}->{$fkcol}->{fk_table}=$keytable;
	  $currobj->{column}->{$fkcol}->{fk_column}=$keycol;
      }
	elsif(/primary\s+key\s+\(\s*(\S+)\s*\)/)
	{ $currobj->{primarykey}=$1; 
	  die "Can't find primary key $1 of $currobj->{name}" if ! defined($currobj->{column}->{$1});
	  $currobj->{column}->{$1}->{primarykey}="yes";
      }
	elsif(/^unique\s*\(\s*(\S.*)\s*\)$/) # Assumes only one to a table -- not correct?
	{ @ucols=split(/\s*,\s*/,$1);
	  foreach $col (@ucols)
	  { die "Can't find $currobj->{name}.$col in uniqueness constraint" if ! defined($currobj->{column}->{$col});
	    push(@{$currobj->{unique}}, $col);
	    $currobj->{column}->{$col}->{unique}=@ucols+0; # store of # columns in unique key under each col: eg 1 of 3, 1 or 2, etc.
	}
      }
	elsif(/^(\S+)\s+(\S.*)$/)
	{ $colname=$1;
	  $type=$2;
	  if($type=~/^(\S+)\s(\S.*)$/){ $type = $1; $rest=$2; }
	  else { $rest=""; }
	  $col={_entity=>"column", 
		name=>$colname, type=>$type };
	  if( $output_format eq "xml")
	  { $col->{_order} = [name, type, allownull, comment, fk_table, fk_column, foreign_references]; }
	  $col->{allownull}="yes";
	  if( $rest )
	  { if($rest =~ /\s*not\s+null/i ){  
	      $col->{allownull}="no"; }
	    if($rest =~ /default\s+(\S.*)$/ )
	    { $col->{default}=$1; }}
	  if( $comment ){ $col->{comment}=$comment; $comment=""; }
	  $currobj->{column}->{$colname}=$col;
	  push(@{$currobj->{column}->{_order}},$colname);
      }}
    elsif( /^\s*\)\s*;\s*$/ ){
	$eoc=1;
    }
    elsif(/create index\s+(\S+)\s+on\s+(\S+)\s*\(\s*(\S+)\s*\)\s*;$/i) {
	$index=$1; $table=$2; $cols=$3;
	$schema->{$table}->{indexes}->{$index} = { _entity=>"index", name=>$index, columns=>$cols };
	$schema->{$table}->{indexes}->{_entity} = set;
    }
    elsif(/^grant/i)
    { # ignore permissions for now; should annotate tables
    }
    elsif(/^drop/i)
    { # ignore 
    }
    elsif(/^\s*create /i)
    { # ignore 
	if($_ !~ /;$/){ while(<>){ last if /;\s*$/; }}
    }
    else { warn "Unrecognized sql: $_\n"; if($_ !~ /;$/){ while(<>){ last if /;$/; }}}
    if($eoc)
    { die "No name in ", Dumper($currobj) if ! defined($currobj->{name});
      $schema->{$currobj->{name}} = $currobj;
#      print Dumper($currobj), "\n"; 
      $currobj=0;
      $eoc=0;
    }
}


# Create inverse foreign key reference info
foreach $table (keys(%{$schema}))
{ foreach $col (keys(%{$schema->{$table}->{column}}))
  { next if $col =~ /^\_/;
    if(defined($schema->{$table}->{column}->{$col}->{fk_table}))
    { $fk_table=$schema->{$table}->{column}->{$col}->{fk_table};
      $fk_col=$schema->{$table}->{column}->{$col}->{fk_column};
      if(defined($schema->{$fk_table}))
      { if(defined($schema->{$fk_table}->{column}->{$fk_col}))
	{ push(@{$schema->{$fk_table}->{column}->{$fk_col}->{foreign_references}},
	       {table=>$table, column=>$col}); }
	else { warn "Foreign key reference from $table.$col to unknown column $fk_col of $fk_table.\n"; }}
      else  { warn "Foreign key reference from $table.$col to unknown table $fk_table.$fk_col.\n"; }}}}

if( $output_format eq "perl"){ 
  print "# Schema metadata produced by ddltrans on $date\n"; 
  print Data::Dumper->Dump([$schema], [schema]), "\n"; }

elsif( $output_format eq "xml"){ 
  print "<!-- Schema metadata produced by ddltrans on $date>\n\n"; 
  print_xml( {schema=>{schema=>$schema}} ); }

elsif( $output_format eq "html"){
  print "<em>Schema metadata produced by ddltrans on $date</em>\n\n"; 
  print print_html($schema); }
elsif( $output_format eq "dtd"){ print_dtd($schema); }
else { die "Unrecognized output format $output_format"; }

sub print_xml {
    my($x, $indent)=@_;

    if( ! defined($indent)){ $indent = 0; }
#    print ref($x), "\n"; # Dumper($x); 
    if(ref($x) eq "HASH")
    { my($k);
      my(@keys)=keys(%{$x});
      if(defined($x->{_order}))
      { @keys=(); foreach $k (@{$x->{_order}}){ if(defined($x->{$k})){ push(@keys, $k); }}}
      if($x->{_entity} eq "set" | $x->{_entity} eq "list") 
      { foreach $k (@keys)
	{ if($k !~ /^_/){ print_xml( $x->{$k}, $indent); }}}
      else { 
	  if(defined($x->{_entity})){ print "\n", " " x $indent, "<$x->{_entity}>"; $indent += 3; }
	  foreach $k (@keys)
	  { next if $k =~ /^\_/;
	    my $show_ent=1;
	    if( ref($x->{$k}) eq "HASH")
	    { $show_ent=0 if $x->{$k}->{_entity} eq "set" || $x->{$k}->{_entity} eq "list"; }
	    if($show_ent)
	    { print "\n", " " x $indent, "<$k>";
	      print_xml( $x->{$k}, $indent + 3);
	      if( $show_ent & ref($x->{$k}) eq "HASH" | ref($x->{$k}) eq "ARRAY")
	      { print "\n", " " x $indent; } 
	      print "</$k>"; }
	    else { print_xml( $x->{$k}, $indent); }
	}
	  if(defined($x->{_entity})){ $indent -= 3; print "\n", " " x $indent, "<\\$x->{_entity}>";  }}
  }
    elsif(ref($x) eq "ARRAY" )
    { my($i);
      for($i=0;$i<(@{$x});$i++)
      { print "\n", " " x $indent, "<li>";
	print_xml( $x->[$i], $indent + 4 );
	print "<\\li>"; }}
    else { print $x; }
}


sub print_html {
    my($schema, $indent)=@_;
    
    print "<html>\n";
    foreach $table (sort(keys(%{$schema})))
    { next if $table =~ /^_/;
      print "<h2>$table</h2>\n$schema->{$table}->{comment}\n<table border=1><tr><th>Name</th><th>Type</th><th>NULL?</th><th>Comment</th><th>References</th></tr>\n";
      foreach $colname (keys(%{$schema->{$table}->{column}}))
      { next if $colname =~ /^_/;
	$col = $schema->{$table}->{column}->{$colname};
	print "<tr><td>$col->{name}</td><td>$col->{type}</td><td>$col->{allownull}</td><td>$col->{comment}</td><td>";
	if(defined($col->{fk_table}))
	{ print "$col->{fk_table}.$col->{fk_column}"; }
	elsif(defined($col->{foreign_references}))
	{ foreach $fk (@{$col->{foreign_references}})
	  { print "<li>$fk->{table}.$fk->{column}"; }}
	print "</td></tr>\n"; }
      print "</table>\n\n"; 
  }
}

sub print_dtd {
    my($schema, $tables)=@_;
     
    print "<?xml version='1.0' encoding='UTF-8' ?>\n\n";
    print "<!-- **********************  $schemaname XML DTD **************************
  autogenerated on $date by ddltrans
  for use with XORT dumper and loader.

Relational Schema to DTD Conversion rules:
-Outermost element is <$schemaname> -- permits any table elements in any order as children
-Every table has a corresponding element. Table elements have a child element for every
 column except the primary key, in order, followed optionally by all tables
 containing foreign keys to this table, in any order.
-Tables may be nested inside other tables iff the inner table
 has a foreign key column to the primary key of the outer table.
 If the foreign key field is required, it can be omitted, as it
 will be inferred from the nesting context.
-Table elements take an optional id attribute which can be used to define
 a local id for the table row within the file. This id can be any string,
 and it can be used in foreign key fields to refer to the table row.
-Table elements take on optional op attribute which can be insert, update,
 delete, force, or lookup. insert is default. Lookup is used to retrieve
 primary keys of objects in DB.
-The number of instances of a column element within a table element is
 context dependent. In an insert operation, non-null data columns must
 appear once. Required foreign key columns may be omitted if they can
 be inferred from nesting context -- the first outer element of the
 appropriate type will be plugged in. In an update op, unique key columns
 may appear twice if the key is being modified: once to specify the
 object to be updated in terms of its unique key values, and once to
 specify the new values (with op=update).
-Column elements have op=update attribute if the column is being
 updated.
-Foreign key columns may contain any of the following:
    -an instance of the table element for the referenced table --
      typically this will have op=lookup and supply the values of those
      columns that make up a unique key for this table
    -a local id defined previously in the file (def before ref!)
    -a global accession number. This applies only to objects that have
     a primary dbxref in their maintable (e.g. feature, cvterm)
     and the value must match an entry in the dbxref table.
-When op is update or delete, the affected object can be specified by a 
 ref=value attribute, where value is a local id or global accession number.

-->\n\n";
    $tablenames=[];
    foreach $tablename (sort(keys(%{$schema})))
    { next if $tablename =~ /^_/;
      next if $tablename =~ /_audit$/;
      push(@{$tablenames},$tablename); }
    print "<!ELEMENT $schemaname ( ", join(" | ", @{$tablenames}), ")*>\n";
    print "<!ATTLIST $schemaname\n\tdumpspec CDATA #IMPLIED\n\tdata CDATA #IMPLIED>\n\n";
    foreach $tablename (@{$tablenames})
    {  $table=$schema->{$tablename};
       $colnames=$table->{column}->{_order};

       # print documentation comment
       print "\n<!-- ********************* TABLE $tablename *****************************\n";
       for $colname (@{$colnames})
       { $col=$table->{column}->{$colname};
	 print "*\t$col->{name}\t$col->{type}";
	 if($col->{allownull} eq "no"){ print "\tnot null"; }
	 if($col->{default}){ print "\tdefault $col->{default}"; }
	 if($col->{primarykey}){ print "\tprimary key"; }
	 if($col->{fk_table}){ print "\tforeign key to $col->{fk_table}.$col->{fk_column}"; }
	 if($col->{unique}){ print "\tunique($col->{unique})"; }
	 if($col->{comment}){print " - $col->{comment}"; }
	 print "\n"; }
       print "     ************************************************************************* -->\n";
       # print table element
       print "<!ELEMENT $tablename ";

       # column subelements
       if(@{$colnames}){
	 print "(";
	 $dlm="";
	 for $colname (@{$colnames})
	   { next if $colname eq $table->{primarykey};
	     # Cardinality of table fields:
	     # if part of a unique key, may appear twice in an update context: once to refer, once to modify
	     # if a foreign key, may be ommitted due to hierarchy rule
	     # if a non-null field, may still be ommitted in an update or delete context.
	     $card="?";
	     if( defined($table->{column}->{$colname}->{unique}) ) { $card="*"; }
	     print "$dlm$colname"; $dlm = " | "; }
	 print ")* ";
       }
       
       # linking table subelements
       my($joins)=$table->{column}->{$table->{primarykey}}->{foreign_references};
       if(@{$joins}+0)
       { print "\n\t, ( ", $dlm = "";
	 $printed={};
	 foreach $join (@{$joins})
	 { next if $printed->{$join->{table}};
	   $printed->{$join->{table}}=1;
	   print $dlm, $join->{table}; $dlm=" | "; }
	 print ")*\n "; }

       print ">\n";  # end table element
       
       # Table element ttribute list
       print "\n<!ATTLIST $tablename\n\tid CDATA #IMPLIED\n\tref CDATA #IMPLIED\n\top (lookup | insert | update | force | delete) #IMPLIED>\n";
	   
       # column elements
       for $colname (@{$colnames})
       { next if $colname eq $table->{primarykey};
	 $col=$table->{column}->{$colname};
	 if(defined($col->{fk_table})){ $def=$col->{fk_table}; }
	 else { $def=$col->{type}; }
	 if($defined{$colname})
	 { if($def ne $defined{$colname}->{to})
	   { next if canonical_type( $def ) eq canonical_type( $defined{$colname}->{to});
	     warn "Warning: Incompatible definitions of column `$colname':\n\tin table $defined{$colname}->{from} as $defined{$colname}->{to}\n\tin table $tablename as $def\n"; }
	   next; }
	 $defined{$colname}={from=>$tablename, to=>$def};
	 print "<!ELEMENT $colname ";
	 if($col->{fk_table}){ print "( #PCDATA | $col->{fk_table} )"; }
	 else { print "(#PCDATA)"; }
	 print " >"; 
	 print "\t<!ATTLIST $colname op (update) #IMPLIED>";
	 print "\n";
     }
   }
}

sub canonical_type {
# canonicalize datatypes for purposes of checking compatibility of XML entity defs
# eg varchar and text are equivalent, int and integer, etc.
    my($type)=@_;
    if( $type =~ /^int/ ){ return "int"; }
    elsif( $type =~ /^varchar/ ){ return "char"; }
    elsif( $type eq "text" ){ return "char"; }
    else { return $type; }
}