File: cif_schema.sql

package info (click to toggle)
cif-api 0.4.2-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,472 kB
  • sloc: ansic: 19,411; lex: 246; makefile: 201; sql: 164; yacc: 115; sh: 37
file content (327 lines) | stat: -rw-r--r-- 12,575 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
--
-- Database schema for arbitrary CIF
--
-- Copyright 2014, 2015 John C. Bollinger
--
--
-- This file is part of the CIF API.
--
-- The CIF API is free software: you can redistribute it and/or modify
-- it under the terms of the GNU Lesser General Public License as published
-- by the Free Software Foundation, either version 3 of the License, or
-- (at your option) any later version.
--
-- The CIF API is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU Lesser General Public License for more details.
--
-- You should have received a copy of the GNU Lesser General Public License
-- along with the CIF API.  If not, see <http://www.gnu.org/licenses/>.
--

--
-- This schema relies on and represents only the general, underlying data model
-- of CIF.  As a design criterion, it is a closed schema, in the sense that it
-- defines all tables necessary to model any CIF.  In particular, it stores all
-- data values in the same table, rather than in separate per-loop tables,
-- because there is no way to predict which per-loop tables would be needed for
-- arbitrary CIF.
--
-- The database defined by this schema represents one logical CIF, organized at
-- the topmost level as a collection of data blocks.  Separate CIFs require
-- separate databases (but not necessarily separate DBMSs).
--
-- This schema relies to some extent on the characteristics of SQLite, especially
-- dynamic typing.  SQLite does not enforce limits on values' sizes based on their
-- declared data types (e.g. varchar(80)), nor does it automatically coerce values
-- to the nominal type declared for the column in which they are stored.
--
-- The text of this schema is processed at build time into a collection of C
-- string literals.  So that this is feasible without a full-blown SQL parser,
-- the following constraints must be followed:
-- 1. begin ... end blocks may not be nested.
-- 2. "end" keywords terminating blocks must be followed by semicolons on the
--    same line.
-- 3. Only whitespace may appear after "end;" on any line.
-- 4. The character sequence "begin", in any case combination, must not appear
--    on a non-comment line except as the opening "begin" keyword of a block, or
--    as the short form of "begin transaction".  A semicolon must follow on the
--    same line in the latter case, and must NOT follow anywhere on the same
--    line in the former.
-- 5. Only space characters and line terminators may be used as whitespace.
--
-- To make the process a little easier on the build system, the following is also
-- required:
-- 6. Comment lines must not end with semicolons.
--

--
-- Represents a data block or save frame to which data items may belong.  A
-- corresponding row is expected to exist (be created) in either the data_block
-- table or the save_frame table.
--
-- Using an AUTOINCREMENT primary key prevents container IDs from ever being
-- reused in a given database.  That can be important for avoiding confusion
-- if containers are deleted and new ones thereafter created.
-- 
create table container (
  id integer primary key autoincrement,
  next_loop_num integer not null default 0
);

--
-- Represents a container that is a data block, rather than a save frame.  All
-- data blocks in the database must have distinct, nonempty names.
--
create table data_block (
  container_id integer primary key,
  name varchar(80) not null,
  name_orig varchar(80) not null,
  
  foreign key (container_id)
    references container(id)
    on delete cascade,
  -- A UNIQUE constraint will cause an unique index to be created automatically
  unique (name)
);

--
-- Represents a container that is a save frame, rather than a data block.  Each
-- is associated with a parent data block or save frame that contains it.  Save
-- frame names (frame codes) must be distinct within their containers and nonempty.
--
create table save_frame (
  container_id integer primary key,
  parent_id integer not null,
  name varchar(80) not null,
  name_orig varchar(80) not null,
  
  foreign key (container_id)
    references container(id)
    on delete cascade,
  foreign key (parent_id)
    references container(id)
    on delete cascade,
  -- A UNIQUE constraint will cause an unique index to be created automatically
  unique (parent_id, name),
  check (container_id != parent_id)
);

--
-- Represents loops (including implicit ones) present in any container.
-- Loops associate data items into groups and their associated values into
-- packets.  The CIF (or other) category to which the data in the loop are
-- expected to belong can also be recorded here, if it is known.
--
-- loop numbers are required to be non-negative
--
create table loop (
  container_id integer not null,
  loop_num integer not null,
  category varchar(80),
  last_row_num integer default 0,
 
  primary key (container_id, loop_num),
  foreign key (container_id)
    references container(id)
    on delete cascade,
  check (loop_num >= 0)
);

--
-- This index supports queries by category, as well as the next two triggers.
--
create index ix1_loop
  on loop (category, container_id);

--
-- Restrict each container to one scalar loop on insert
--
create trigger tr1_loop
  before insert on loop
  when NEW.category = ''
  begin
    select raise(ABORT, 'duplicate scalar loop')
      from loop
      where container_id = NEW.container_id and category = '';
  end;

--
-- Restrict each container to one scalar loop on update
--
create trigger tr2_loop
  before update of category on loop
  when (NEW.category = '') and (OLD.category is not '')
  begin
    select raise(ABORT, 'duplicate scalar loop')
      from loop
      where container_id = NEW.container_id and category = '';
  end;

--
-- Restrict scalar loops to at most one row on insert
--
-- This provides only an *advisory* restriction, relying on the the database
-- user to draw on loop.last_row_num to choose row numbers for new loop
-- packets.
--
create trigger tr3_loop
  before insert on loop
  when NEW.category = '' and (coalesce(NEW.last_row_num, 0) != 0)
  begin
    select raise(ABORT, 'Attempted to create multiple values for a scalar');
  end;

--
-- Restrict scalar loops to at most one row on update
--
-- This provides only an *advisory* restriction, relying on the the database
-- user to draw on loop.last_row_num to choose row numbers for new loop
-- packets.
--
create trigger tr4_loop
  before update on loop
  when NEW.category = '' and (coalesce(NEW.last_row_num, 0) > 1)
  begin
    select raise(ABORT, 'Attempted to create multiple values for a scalar');
  end;

--
-- This view merely provides a place to hang an "instead of" trigger
-- facilitating automatic generation of loop numbers within each container.
-- If loops are inserted only via this view, then loop numbers are
-- guaranteed to increase monotonically within each container.
--
create view unnumbered_loop as
  select container_id, category
  from loop;

create trigger tr1_unnumbered_loop
  instead of insert on unnumbered_loop
  begin
--  Substitute an insert statement on loop, with a non-null loop number
    insert into loop(container_id, loop_num, category)
      values (NEW.container_id,
--      The following evaluates to NULL if there is no container with the given
--      id; that's no problem, because in that case the insertion was already
--      going to fail
        (select next_loop_num from container where id = NEW.container_id),
        NEW.category);
    update container
      set next_loop_num = next_loop_num + 1
      where id = NEW.container_id;
  end;

--
-- Associates an item with a particular loop in the scope of a given container
--
create table loop_item (
  container_id integer not null,
  name varchar(80) not null,
  name_orig varchar(80) not null,
  loop_num integer not null,
  
  primary key (container_id, name),
  foreign key (container_id, loop_num)
    references loop(container_id, loop_num)
    on delete cascade
);

create index ix1_loop_item
  on loop_item (container_id, loop_num);

--
-- Represents a single data value; particularly, the one for item 'name' in
-- packet number 'row_num' of the appropriate loop in the container
-- identified by 'container_id'.
--
-- The 'kind' column encodes the data type of the value:
-- 0 = CHARACTER
-- 1 = NUMBER
-- 2 = LIST
-- 3 = TABLE
-- 4 = N/A
-- 5 = UNKNOWN
-- Only when the kind is one of the last two may 'val' be null
--
-- val_text records a text representation of CHARACTER and NUMBER values,
-- whereas val records a parsed value specific to the value kind: a copy of
-- the value text for kind 0, a parsed double value for kind 1, or a
-- serialized list or table for kinds 2 and 3, respectively.  Both val and
-- val_text are NULL for kinds 4 and 5.
-- 
-- For kind 1, val_digits and su_digits record decimal digit-string
-- representations of the value and its standard uncertainty, with the
-- rightmost digit of each being interpreted as appearing in the
-- 10^(-scale) position.  These support arbitrary-precision fixed point
-- numbers of any scale expressible in CIF, but not necessarily in the
-- floating-point or integer format in which the DB stores numeric values.
-- su_digits is NULL for exact numbers.  The sign associated with the
-- (digit string, scale) representation of the value is taken from the
-- beginning of value_text; there is no separate field for it.
-- These fields are null for kinds other than 1.
--
-- IMPORTANT: If the precision or scale of a numeric CIF value exceeds that
-- representable in the floating-point format used by the DB engine (8-byte
-- IEEE floating point for SQLite 3 on x86 and x86_64) then val will at best
-- fail to express that value to its correct precision.  In such a case it
-- might also be denormalized, or it might express an infinite or a
-- not-a-number value.  That does not interfere with storage and retrieval
-- fidelity, however, as those rely on the digit-string representations
-- of the value and uncertainty.
--
-- Note also that this schema does not enforce the presence of an explicit value
-- for each item in each loop packet.  Where a loop contains an item (as defined
-- by table loop_item) but no value is given for it in this table for a
-- given row_num, it should be interpreted as if a value existed in that
-- row with kind = 5, except as described next.
--
-- Row / packet numbers are not guaranteed to be consecutive for any given
-- loop.  Where there is a gap between row numbers for a given loop, it
-- means only that there are no packets having the corresponding number.
-- In particular, it does NOT imply the existence of any packets with
-- all-unknown values (though such packets can be represented explicitly).
--
-- Rationale:
-- The structure of this table addresses these design considerations:
-- 1. It must record in full fidelity all values representable via the
--    data types used by the CIF API, including numeric values that cannot
--    be expressed exactly as doubles.
-- 2. It must readily support selection predicates pertaining to the character
--    or numeric values represented by rows.
-- 3. Deletions from this table must suffice to effect loop packet deletions.
--    Updates to this or any other table must not be required for that purpose.
-- 4. Some denormalization is preferrable to requiring common queries to
--    perform extra joins, especially outer joins.
-- 5. Selection predicates pertaining to the contents of list or table values
--    do NOT need to be readily supported by this table.
--
create table item_value (
  container_id integer not null,
  name varchar(80) not null,
  row_num integer not null,
  kind integer(1),
  quoted integer(1),
  -- specific to kinds 0 - 3:
  val numeric,
  -- specific to kinds 0 and 1:
  val_text varchar(80),
  -- specific to kind 1:
  val_digits varchar(15),
  su_digits varchar(15),
  scale integer(4),
  
  primary key (container_id, name, row_num),
  foreign key (container_id, name)
    references loop_item(container_id, name)
    on delete cascade,
  check (row_num > 0),
  check (case when (val is null) then kind in (4, 5) else kind in (0, 1, 2, 3) end),
  check ((val_text is null) = (kind not in (0, 1))),
  check (case when (kind = 1) then (scale is not null)
        and (length(val_digits) > 0) and (val_digits not glob '*[^0-9]*')
        and ((su_digits is null) or ((length(su_digits) > 0) and (su_digits not glob '*[^0-9]*')))
      else (coalesce(val_digits, su_digits, scale) is null) end)
);