File: gb_graph.w

package info (click to toggle)
sgb 1:20030623-3
  • links: PTS
  • area: non-free
  • in suites: sarge
  • size: 1,868 kB
  • ctags: 28
  • sloc: makefile: 197; sh: 15
file content (941 lines) | stat: -rw-r--r-- 38,185 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
% This file is part of the Stanford GraphBase (c) Stanford University 1993
@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES!

\def\title{GB\_\,GRAPH}

@* Introduction. This is {\sc GB\_\,GRAPH}, the data-structure module
used by all GraphBase routines to allocate memory. The basic data
types for graph representation are also defined~here.

Many examples of how to use these conventions appear in other GraphBase
modules. The best introduction to such examples can probably be found
in {\sc GB\_\,BASIC}, which contains subroutines for generating and
transforming various classical graphs.

@ The code below is believed to be system-independent; it should
produce equivalent results on all systems, assuming that the standard
|calloc| and |free| functions of \CEE/ are available.

However, a test program helps build confidence that everything does in fact
work as it should. To make such a test, simply compile and run \.{test\_graph}.
This particular test is fairly rudimentary, but it should be passed before
more elaborate routines are tested.

@(test_graph.c@>=
#include "gb_graph.h"  /* all users of {\sc GB\_\,GRAPH} should do this */
@<Declarations of test variables@>@;
@#
int main()
{
  @<Create a small graph@>;
  @<Test some intentional errors@>;
  @<Check that the small graph is still there@>;
  printf("OK, the gb_graph routines seem to work!\n");
  return 0;
}

@ The \CEE/ code for {\sc GB\_\,GRAPH} doesn't have a main routine;
it's just a bunch of subroutines waiting to be incorporated into
programs at a higher level via the system loading routine. Here is
the general outline of \.{gb\_graph.c}:

@p
#ifdef SYSV
#include <string.h>
#else
#include <strings.h>
#endif
#include <stdio.h>
#include <stdlib.h>
@h@#
@<Type declarations@>@;
@<Private declarations@>@;
@<External declarations@>@;
@<External functions@>

@ The type declarations of {\sc GB\_\,GRAPH} appear also in the header file
\.{gb\_graph.h}. For convenience, that header file also incorporates the
standard system headers for input/output and string manipulation.

Some system header files define an unsafe macro called |min|, which will
interfere with GraphBase use of a useful identifier. We scotch that.

@(gb_graph.h@>=
#include <stdio.h>
#include <stdlib.h>
#ifdef SYSV
#include <string.h>
#else
#include <strings.h>
#endif
#undef min
@<Type declarations@>@;

@ GraphBase programs often have a ``verbose'' option, which needs to
be enabled by the setting of an external variable. They also tend to have
a variable called |panic_code|, which helps identify unusual errors.
We might as well declare those variables here.

@<External d...@>=
long verbose=0; /* nonzero if ``verbose'' output is desired */
long panic_code=0; /* set nonzero if graph generator returns null pointer */

@ Every external variable should be declared twice in this \.{CWEB}
file: once for {\sc GB\_\,GRAPH} itself (the ``real'' declaration for
storage allocation purposes) and once in \.{gb\_graph.h} (for
cross-references by {\sc GB\_\,GRAPH} users).

@(gb_graph.h@>=
extern long verbose; /* nonzero if ``verbose'' output is desired */
extern long panic_code; /* set nonzero if graph generator panics */

@ When |panic_code| is assigned a nonzero value, one of the symbolic
names defined here is used to help pinpoint the problem.
Small values indicate memory limitations; values in the 10s and 20s
indicate input/output anomalies; values in the 30s and 40s indicate
errors in the parameters to a subroutine. Some panic codes
stand for cases the author doesn't think will ever arise, although
the program checks for them just to be extra safe. Multiple instances
of the same type of error within a single subroutine are distinguished
by adding an integer; for example, `|syntax_error+1|' and `|syntax_error+2|'
identify two different kinds of syntax error, as an aid in trouble-shooting.
The |early_data_fault| and |late_data_fault| codes are explained further
by the value of |io_errors|.

@(gb_graph.h@>=
#define alloc_fault (-1) /* a previous memory request failed */
#define no_room 1 /* the current memory request failed */
#define early_data_fault 10 /* error detected at beginning of \.{.dat} file */
#define late_data_fault 11 /* error detected at end of \.{.dat} file */
#define syntax_error 20 /* error detected while reading \.{.dat} file */
#define bad_specs 30 /* parameter out of range or otherwise disallowed */
#define very_bad_specs 40 /* parameter far out of range or otherwise stupid */
#define missing_operand 50 /* graph parameter is |NULL| */
#define invalid_operand 60 /* graph parameter doesn't obey assumptions */
#define impossible 90 /* ``this can't happen'' */

@* Representation of graphs. The GraphBase programs employ a simple
and flexible set of data structures to represent and manipulate graphs
in computer memory.  Vertices appear in a sequential array of
\&{Vertex} records, and the arcs emanating from each vertex appear in
a linked list of \&{Arc} records. There is also a \&{Graph} record, to
provide information about the graph as a whole.

The structure layouts for \&{Vertex}, \&{Arc}, and \&{Graph} records
include a number of utility fields that can be used for any purpose by
algorithms that manipulate the graphs. Each utility field is a union
type that can be either a pointer of various kinds or a (long) integer.

Let's begin the formal definition of these data structures by declaring the
union type \&{util}. The suffixes .|V|, .|A|, .|G|, and .|S| on the name
of a utility variable mean that the variable is a pointer to a vertex, arc,
graph, or string, respectively; the suffix .|I| means that the variable is
an integer. (We use one-character names because such names are easy to type
when debugging.)

@<Type dec...@>=
typedef union {
  struct vertex_struct *V; /* pointer to \&{Vertex} */
  struct arc_struct *A; /* pointer to \&{Arc} */
  struct graph_struct *G; /* pointer to \&{Graph} */
  char *S; /* pointer to string */
  long I; /* integer */
} util;

@ Each \&{Vertex} has two standard fields and six utility fields; hence it
occupies 32 bytes on most systems, not counting the memory needed for
supplementary string data. The standard fields are
$$\vcenter{\halign{#,\ \ \hfil&#\hfil\cr
|arcs|&a pointer to an \&{Arc};\cr
|name|&a pointer to a string of characters.\cr}}$$
If |v| points to a \&{Vertex} and |v->arcs| is |NULL|, there are no arcs
emanating from~|v|. But if |v->arcs| is non-|NULL|, it points to an \&{Arc}
record representing an arc from~|v|, and that record has a |next| field that
points in the same way to the representations of all other arcs from~|v|.

The utility fields are called |u|, |v|, |w|, |x|, |y|, |z|. Macros can
be used to give them syntactic sugar in particular applications. They are
typically used to record such things as the in-degree or out-degree, or
whether a vertex is `marked'. Utility fields might also link the vertex
to other vertices or arcs in one or more lists.

@<Type dec...@>=
typedef struct vertex_struct {
  struct arc_struct *arcs; /* linked list of arcs coming out of this vertex */
  char *name; /* string identifying this vertex symbolically */
  util u,v,w,x,y,z; /* multipurpose fields */
} Vertex;

@ Each \&{Arc} has three standard fields and two utility fields. Thus it
occupies 20~bytes on most computer systems. The standard fields are
$$\vcenter{\halign{#,\ \ \hfil&#\hfil\cr
|tip|&a pointer to a |Vertex|;\cr
|next|&a pointer to an \&{Arc};\cr
|len|&a (long) integer.\cr}}$$
If |a| points to an \&{Arc} in the list of arcs from vertex~|v|, it represents
an arc of length |a->len| from |v| to |a->tip|, and the next arc from |v|
in the list is represented by |a->next|.

The utility fields are called |a| and |b|.

@<Type dec...@>=
typedef struct arc_struct {
  struct vertex_struct *tip; /* the arc points to this vertex */
  struct arc_struct *next; /* another arc pointing from the same vertex */
  long len; /* length of this arc */
  util a,b; /* multipurpose fields */
} Arc;

@* Storage allocation. Memory space must be set aside dynamically for
vertices, arcs, and their attributes. The GraphBase routines provided by
{\sc GB\_\,GRAPH} accomplish this task with reasonable ease and efficiency
by using the concept of memory ``areas.'' The user should first declare an
\&{Area} variable by saying, for example,
$$\hbox{\&{Area} |s|;}$$
and if this variable isn't static or otherwise known to be zero, it must be
cleared initially by saying `|init_area(s)|'. Then any number of subroutine
calls of the form  `|gb_alloc(n,s)|' can be given; |gb_alloc|
will return a pointer to a block of |n| consecutive bytes, all cleared to zero.
Finally, the user can issue the command
$$\hbox{|gb_free(s)|;}$$
this statement will return all memory blocks currently allocated to area~|s|,
making them available for future allocation.

The number of bytes |n| specified to |gb_alloc| must be positive, and
it should usually be 1000 or more, since this will reduce the number
of system calls. Other routines are provided below to allocate smaller
amounts of memory, such as the space needed for a single new \&{Arc}.

If no memory of the requested size is presently available, |gb_alloc|
returns the null pointer |NULL|. In such cases |gb_alloc| also sets
the external variable |gb_trouble_code| to a nonzero value. The user
can therefore discover whether any one of an arbitrarily long series
of allocation requests has failed by making a single test, `|if
(gb_trouble_code)|'. The value of |gb_trouble_code| should be cleared to zero
by every graph generation subroutine; therefore it need not be
initialized to zero.

A special macro |gb_typed_alloc(n,t,s)| makes it convenient to allocate
the space for |n| items of type~|t| in area~|s|.

@d gb_typed_alloc(n,t,s) @[(t*)@]gb_alloc((long)((n)*@[sizeof@](t)),s)

@ The implementation of this scheme is almost ridiculously easy. The
value of~|n| is increased by twice the number of bytes in a pointer,
and the resulting number is rounded upwards if necessary so that it's
a multiple of 256. Then memory is allocated using |calloc|.  The extra
bytes will contain two pointers, one to the beginning of the block and
one to the next block associated with the same area variable.

The \&{Area} type is defined to be an array of length 1. This makes it possible
for users to say just `|s|' instead of `|&s|' when using an area
variable as a parameter.

@<Type...@>=
#define init_area(s) @t\quad@> @[*s=NULL@]
struct area_pointers {
  char *first; /* address of the beginning of this block */
  struct area_pointers *next; /* address of area pointers in the previously
        allocated block */
};

typedef struct area_pointers *Area[1];

@ First we round |n| up, if necessary, so that it's a multiple of the
size of a pointer variable. Then we know we can put |area_pointers| into
memory at a position |n| after any address returned by |calloc|. (This
logic should work whenever the number of bytes in a pointer variable
is a divisor of~256.)

The upper limit on |n| here is governed by old \CEE/ conventions in
which the first parameter to |calloc| must be less than~$2^{16}$.
Users who need graphs with more than half a million vertices might
want to raise this limit on their systems, but they would probably
be better off representing large graphs in a more compact way.

{\sl Important Note:\/}
Programs of the Stanford GraphBase implicitly assume that all
memory allocated by |calloc| comes from a single underlying memory array.
Pointer values are compared to each other in many places, even when
the objects pointed to have been allocated at different times. Strictly
speaking, this liberal use of pointer comparisons fails to conform to
the restrictions of ANSI Standard \CEE/, if the comparison involves
a less-than or greater-than relation. Users whose system supports only
the strict standard will need to make several dozen changes.
@^system dependencies@>

@<External fun...@>=
char *gb_alloc(n,s)
  long n; /* number of consecutive bytes desired */
  Area s; /* storage area that will contain the new block */
{@+long m=sizeof(char *); /* |m| is the size of a pointer variable */
  Area t; /* a temporary pointer */
  char *loc; /* the block address */
  if (n<=0 || n>0xffff00-2*m) {
    gb_trouble_code|=2; /* illegal request */
    return NULL;
  }
  n=((n+m-1)/m)*m; /* round up to multiple of |m| */
  loc=(char*)calloc((unsigned)((n+2*m+255)/256),256);
  if (loc) {
    *t=(struct area_pointers*)(loc+n);
    (*t)->first=loc;
    (*t)->next=*s;
    *s=*t;
  }@+else gb_trouble_code|=1;
  return loc;
}

@ @<External d...@>=
long gb_trouble_code=0; /* did |gb_alloc| return |NULL|? */

@ @(gb_graph.h@>=
extern long gb_trouble_code; /* anomalies noted by |gb_alloc| */

@ Notice that |gb_free(s)| can be called twice in a row, because the list
of blocks is cleared out of the area variable~|s|.

@<External fun...@>=
void gb_free(s)
  Area s;
{@+Area t;
  while (*s) {
    *t=(*s)->next;
    free((*s)->first);
    *s=*t;
  }
}

@ The two external procedures we've defined above should be mentioned in
the header file, so let's do that before we forget.

@(gb_graph.h@>=
extern char *gb_alloc(); /* allocate another block for an area */
#define gb_typed_alloc(n,t,s) @[@t\quad@>\
               @[(t*)@]gb_alloc((long)((n)*@[sizeof@](t)),s)@]
extern void gb_free(); /* deallocate all blocks for an area */

@ Here we try to allocate 10 million bytes of memory. If we succeed,
fine; if not, we verify that the error was properly reported.

(An early draft of this program attempted to allocate memory until all
space was exhausted. That tactic provided a more thorough test, but it
was a bad idea because it brought certain large systems to their
knees; it was terribly unfriendly to other users who were innocently
trying to do their own work on the same machine.)

@<Test some intentional errors@>=
if (gb_alloc(0L,s)!=NULL || gb_trouble_code!=2) {
  fprintf(stderr,"Allocation error 2 wasn't reported properly!\n");@+return -2;
}
for (;g->vv.I<100;g->vv.I++)@+ if (gb_alloc(100000L,s)) {
  g->uu.I++;
  printf(".");
  fflush(stdout);
}
if (g->uu.I<100 && gb_trouble_code!=3) {
  fprintf(stderr,"Allocation error 1 wasn't reported properly!\n");@+return -1;
}
if (g->uu.I==0) {
  fprintf(stderr,"I couldn't allocate any memory!\n");@+return -3;
}
gb_free(s); /* we've exhausted memory, let's put some back */
printf("Hey, I allocated %ld00000 bytes successfully. Terrific...\n",g->uu.I);

gb_trouble_code=0;

@ @<Decl...@>=
Area s; /* temporary allocations in the test routine */

@*Growing a graph. Now we're ready to look at the \&{Graph} type. This is
a data structure that can be passed to an algorithm that operates on
graphs---to find minimum spanning trees, or strong components, or whatever.

A \&{Graph} record has seven standard fields and six utility fields. The
standard fields are
$$\vcenter{\halign{#,\ \ \hfil&#\hfil\cr
|vertices|&a pointer to an array of |Vertex| records;\cr
|n|&the total number of vertices;\cr
|m|&the total number of arcs;\cr
|id|&a symbolic identification giving parameters of the GraphBase procedure\cr
\omit& that generated this graph;\cr
|util_types|&a symbolic representation of the data types in utility fields;\cr
|data|&an |Area| used for |Arc| storage and string storage;\cr
|aux_data|&an |Area| used for auxiliary information that some users might\cr
\omit     &want to discard.\cr}}$$
The utility fields are called |uu|, |vv|, |ww|, |xx|, |yy|, and |zz|.

As a consequence of these conventions, we can visit all arcs of a
graph~|g| by using the following program:
$$\vcenter{\halign{#\hfil\cr
|Vertex *v;|\cr
|Arc *a;|\cr
|for (v=g->vertices; v<g->vertices+g->n; v++)|\cr
\quad|for (a=v->arcs; a; a=a->next)|\cr
\qquad\\{visit}|(v,a)|;\cr}}$$

@<Type...@>=
#define ID_FIELD_SIZE 161
typedef struct graph_struct {
  Vertex *vertices; /* beginning of the vertex array */
  long n; /* total number of vertices */
  long m; /* total number of arcs */
  char id[ID_FIELD_SIZE]; /* GraphBase identification */
  char util_types[15]; /* usage of utility fields */
  Area data; /* the main data blocks */
  Area aux_data; /* subsidiary data blocks */
  util uu,vv,ww,xx,yy,zz; /* multipurpose fields */
} Graph;

@ The |util_types| field should always hold a string of length 14, followed
as usual by a null character to terminate that string. The first six
characters of |util_types| specify the usage of utility fields |u|, |v|,
|w|, |x|, |y|, and~|z| in |Vertex| records; the next two characters give the
format of the utility fields in |Arc| records; the last six give the
format of the utility fields in |Graph| records.  Each character
should be either \.I (denoting a |long| integer),
\.S (denoting a pointer to a string),
\.V (denoting a pointer to a |Vertex|), \.A (denoting a pointer to an
|Arc|), \.G (denoting a pointer to a |Graph|), or \.Z (denoting an
unused field that remains zero). The default for |util_types| is
|"ZZZZZZZZZZZZZZ"|, when none of the utility fields is being used.

For example, suppose that a bipartite graph |g| is using field |g->uu.I|
to specify the size of its first part. Suppose further that |g| has a
string in utility field |a| of each |Arc| and uses
utility field |w| of |Vertex| records to point to an |Arc|. If |g|
leaves all other utility fields untouched, its |util_types| should be
|"ZZAZZZSZIZZZZZ"|.

The |util_types| string is presently examined only by the |save_graph| and
|restore_graph| routines, which convert GraphBase graphs from internal
data structures to symbolic external files and vice versa. Therefore
users need not update the |util_types| when they write algorithms to
manipulate graphs, unless they are going to use |save_graph| to output
a graph in symbolic form, or unless they are using some other
GraphBase-related software that might rely on the conventions of
|util_types|.  (Such software is not part of the ``official'' Stanford
GraphBase, but it might conceivably exist some~day.)

@ Some applications of bipartite graphs require all vertices of the first
part to appear at the beginning of the |vertices| array. In such cases,
utility field |uu.I| is traditionally given the symbolic name |n_1|, and
it is set equal to the size of that first part. The size of the other
part is then |g->n - g->n_1|.
@^bipartite graph@>

@d n_1 uu.I /* utility field |uu| may denote size of bipartite first part */

@(gb_graph.h@>=
#define n_1 @t\quad@> uu.I
#define mark_bipartite(g,n1) @[g->n_1=n1,g->util_types[8]='I'@]

@ A new graph is created by calling |gb_new_graph(n)|, which returns a
pointer to a |Graph| record for a graph with |n| vertices and no arcs.
This function also initializes several private variables that are used
by the |gb_new_arc|, |gb_new_edge|, |gb_virgin_arc|, and |gb_save_string|
procedures below.

We actually reserve space for |n+extra_n| vertices, although claiming only~$n$,
because several graph manipulation algorithms like to add a special vertex
or two to the graphs they deal with. 

@<External f...@>=
Graph *gb_new_graph(n)
  long n; /* desired number of vertices */
{
  cur_graph=(Graph*)calloc(1,sizeof(Graph));
  if (cur_graph) {
    cur_graph->vertices=gb_typed_alloc(n+extra_n,Vertex,cur_graph->data);
    if (cur_graph->vertices) {Vertex *p;
      cur_graph->n=n;
      for (p=cur_graph->vertices+n+extra_n-1; p>=cur_graph->vertices; p--)
        p->name=null_string;
      sprintf(cur_graph->id,"gb_new_graph(%ld)",n);
      strcpy(cur_graph->util_types,"ZZZZZZZZZZZZZZ");
    }@+else {
      free((char*)cur_graph);
      cur_graph=NULL;
    }
  }
  next_arc=bad_arc=NULL;
  next_string=bad_string=NULL;
  gb_trouble_code=0;
  return cur_graph;
}

@ The value of |extra_n| is ordinarily~4, and it should probably always be at
least~4.

@<External d...@>=
long extra_n=4; /* the number of shadow vertices allocated by |gb_new_graph| */
char null_string[1]; /* a null string constant */

@ @(gb_graph.h@>=
extern long extra_n;
  /* the number of shadow vertices allocated by |gb_new_graph| */
extern char null_string[]; /* a null string constant */
extern void make_compound_id();
  /* routine to set one |id| field from another */
extern void make_double_compound_id(); /* ditto, but from two others */

@ The |id| field of a graph is sometimes manufactured from the |id| field
of another graph. The following routines do this without allowing the
string to get too long after repeated copying.

@<External f...@>=
void make_compound_id(g,s1,gg,s2) /* |sprintf(g->id,"%s%s%s",s1,gg->id,s2)| */
  Graph *g; /* graph whose |id| is to be set */
  char *s1; /* string for the beginning of the new |id| */
  Graph *gg; /* graph whose |id| is to be copied */
  char *s2; /* string for the end of the new |id| */
{@+int avail=ID_FIELD_SIZE-strlen(s1)-strlen(s2);
  char tmp[ID_FIELD_SIZE];
  strcpy(tmp,gg->id);
  if (strlen(tmp)<avail) sprintf(g->id,"%s%s%s",s1,tmp,s2);
  else sprintf(g->id,"%s%.*s...)%s",s1,avail-5,tmp,s2);
}

@ @<External f...@>=
void make_double_compound_id(g,s1,gg,s2,ggg,s3)
              /* |sprintf(g->id,"%s%s%s%s%s",s1,gg->id,s2,ggg->id,s3)| */
  Graph *g; /* graph whose |id| is to be set */
  char *s1; /* string for the beginning of the new |id| */
  Graph *gg; /* first graph whose |id| is to be copied */
  char *s2; /* string for the middle of the new |id| */
  Graph *ggg; /* second graph whose |id| is to be copied */
  char *s3; /* string for the end of the new |id| */
{@+int avail=ID_FIELD_SIZE-strlen(s1)-strlen(s2)-strlen(s3);
  if (strlen(gg->id)+strlen(ggg->id)<avail)
    sprintf(g->id,"%s%s%s%s%s",s1,gg->id,s2,ggg->id,s3);
  else sprintf(g->id,"%s%.*s...)%s%.*s...)%s",s1,avail/2-5,gg->id,
             s2,(avail-9)/2,ggg->id,s3);
}

@ But how do the arcs get there? That's where the private variables in
|gb_new_graph| come in. If |next_arc| is unequal to |bad_arc|, it points to
an unused |Arc| record in a previously allocated block of |Arc| records.
Similarly, |next_string| and |bad_string| are addresses used to
place strings into a block of memory allocated for that purpose.

@<Private...@>=
static Arc *next_arc; /* the next |Arc| available for allocation */
static Arc *bad_arc; /* but if |next_arc=bad_arc|, that |Arc| isn't there */
static char *next_string; /* the next byte available for storing a string */
static char *bad_string; /* but if |next_string=bad_string|, don't byte */
static Arc dummy_arc[2]; /* an |Arc| record to point to in an emergency */
static Graph dummy_graph; /* a |Graph| record that's normally unused */
static Graph *cur_graph=&dummy_graph; /* the |Graph| most recently created */

@ All new |Arc| records that are created by the automatic |next_arc|/|bad_arc|
scheme originate in a procedure called |gb_virgin_arc|, which returns the
address of a new record having type |Arc|.

When a new block of |Arc| records is needed, we create 102 of them at once.
This strategy causes exactly 2048 bytes to be allocated on most
computer systems---a nice round number. The routine will still work, however,
if 102 is replaced by any positive even number. The new block goes into
the |data| area of |cur_graph|.

Graph-building programs do not usually call |gb_virgin_arc| directly;
they generally invoke one of the higher-level routines |gb_new_arc|
or |gb_new_edge| described below.

If memory space has been exhausted, |gb_virgin_arc| will return a
pointer to |dummy_arc|, so that the calling procedure can safely
refer to fields of the result even though |gb_trouble_code| is nonzero.

@d arcs_per_block 102

@<External f...@>=
Arc *gb_virgin_arc()
{@+register Arc *cur_arc=next_arc;
  if (cur_arc==bad_arc) {
    cur_arc=gb_typed_alloc(arcs_per_block,Arc,cur_graph->data);
    if (cur_arc==NULL)
      cur_arc=dummy_arc;
    else {
      next_arc = cur_arc+1;
      bad_arc = cur_arc+arcs_per_block;
    }
  }
  else next_arc++;
  return cur_arc;
}

@ The routine |gb_new_arc(u,v,len)| creates a new arc of length |len|
from vertex~|u| to vertex~|v|. The arc becomes part of the graph that
was most recently created by |gb_new_graph|---the graph
pointed to by the private variable |cur_graph|. This routine assumes
that |u| and |v| are both vertices in |cur_graph|.

The new arc will be pointed to by |u->arcs|, immediately after
|gb_new_arc(u,v,len)| has acted. If there is no room for the new arc,
|gb_trouble_code| is set nonzero, but |u->arcs| will point to the non-|NULL|
record |dummy_arc|
so that additional information can safely be stored in its utility fields
without risking system crashes before |gb_trouble_code| is tested.
However, the linking structure of arcs is apt to be fouled up in such
cases; programs should make sure that |gb_trouble_code==0| before doing any
extensive computation on a graph.

@<External f...@>=
void gb_new_arc(u,v,len)
  Vertex *u, *v; /* a newly created arc will go from |u| to |v| */
  long len; /* its length */
{@+register Arc *cur_arc=gb_virgin_arc();
  cur_arc->tip=v; @+cur_arc->next=u->arcs; @+cur_arc->len=len;
  u->arcs=cur_arc;
  cur_graph->m++;
}

@ An undirected graph has ``edges'' instead of arcs. We represent an edge
by two arcs, one going each way.
@^undirected graph@>

The fact that |arcs_per_block| is even means that the |gb_new_edge|
routine needs to call |gb_virgin_arc| only once instead of twice.

Caveats: This routine, like |gb_new_arc|, should be used only after
|gb_new_graph| has caused the private variable |cur_graph| to point to
the graph containing the new edge. The routine |gb_new_edge| must
not be used together with |gb_new_arc| or |gb_virgin_arc| when
building a graph, unless |gb_new_arc| and |gb_virgin_arc| have been
called an even number of times before |gb_new_edge| is invoked.

The new edge will be pointed to by |u->arcs| and by |v->arcs| immediately
after |gb_new_edge| has created it, assuming that |u!=v|. The two arcs
appear next to each other in memory; indeed, |gb_new_edge| rigs things so
that |v->arcs| is |u->arcs+1| when |u<v|.

On many computers it turns out that the first |Arc| record of every such
pair of arcs will have an address that is a multiple of~8, and the
second |Arc| record will have an address that is not a multiple of~8 (because
the first |Arc| will be 20 bytes long, and because |calloc| always returns
a multiple of~8). However, it is not safe to assume this when writing
portable code. Algorithms for undirected graphs can still make good use of
the fact that arcs for edges are paired, without needing any mod~8 assumptions,
if all edges have been created and linked into the graph by |gb_new_edge|:
The inverse of an arc~|a| from |u| to~|v| will be arc |a+1| if and only if
|u<v| or |a->next=a+1|; it will be arc |a-1| if and only if |u>=v| and
|a->next!=a+1|. The condition |a->next=a+1| can hold only if |u=v|.

@d gb_new_graph gb_nugraph /* abbreviations for Procrustean linkers */
@d gb_new_arc gb_nuarc
@d gb_new_edge gb_nuedge

@<External f...@>=
void gb_new_edge(u,v,len)
  Vertex *u, *v; /* new arcs will go from |u| to |v| and from |v| to |u| */
  long len; /* their length */
{@+register Arc *cur_arc=gb_virgin_arc();
  if (cur_arc!=dummy_arc) next_arc++;
  if (u<v) {
    cur_arc->tip=v; @+cur_arc->next=u->arcs;
    (cur_arc+1)->tip=u; @+(cur_arc+1)->next=v->arcs;
    u->arcs=cur_arc; v->arcs=cur_arc+1;
  }@+else {
    (cur_arc+1)->tip=v; @+(cur_arc+1)->next=u->arcs;
    u->arcs=cur_arc+1; /* do this now in case |u==v| */
    cur_arc->tip=u; @+cur_arc->next=v->arcs;
    v->arcs=cur_arc;
  }
  cur_arc->len=(cur_arc+1)->len=len;
  cur_graph->m+=2;
}

@ Sometimes (let us hope rarely) we might need to use a dirty trick
hinted at in the previous discussion. On most computers, the mate to
arc~|a| will be |a-1| if and only if |edge_trick&(siz_t)a|
is nonzero.
@^system dependencies@>
@^pointer hacks@>

@<External d...@>=
siz_t edge_trick=sizeof(Arc)-(sizeof(Arc)&(sizeof(Arc)-1));

@ @(gb_graph.h@>=
extern siz_t edge_trick; /* least significant 1 bit in |sizeof(Arc)| */

@ The type |siz_t| just mentioned should be the type returned by \CEE/'s
|sizeof| operation; it's the basic unsigned type for machine
addresses in pointers. ANSI standard \CEE/ calls this type \&{size\_t},
but we cannot safely use \&{size\_t} in all the GraphBase programs
because some older \CEE/
systems mistakenly define \&{size\_t} to be a signed type.
@^system dependencies@>

@f siz_t int

@<Type d...@>=
typedef unsigned long @[siz_t@];
  /* basic machine address, as signless integer */

@ Vertices generally have a symbolic name, and we need a place to put
such names. The |gb_save_string| function is a convenient utility
for this purpose:
Given a null-terminated string of any length, |gb_save_string| stashes
it away in a safe place and returns a pointer to that place. Memory is
conserved by combining strings from the current graph into largish blocks
of a convenient size.

Note that |gb_save_string| should be used only after |gb_new_graph|
has provided suitable initialization, because the private variable
|cur_graph| must point to the graph for which storage is currently
being allocated, and because the private variables |next_string| and
|bad_string| must also have suitable values.

@d string_block_size 1016 /* $1024-8$ is usually efficient */

@<External f...@>=
char *gb_save_string(s)
  register char *s; /* the string to be copied */
{@+register char *p=s;
  register long len;
       /* length of the string and the following null character */
  while (*p++) ; /* advance to the end of the string */
  len=p-s;
  p=next_string;
  if (p+len>bad_string) { /* not enough room in the current block */
    long size=string_block_size;
    if (len>size)
      size=len;
    p=gb_alloc(size,cur_graph->data);
    if (p==NULL)
      return null_string; /* return a pointer to |""| if memory ran out */
    bad_string=p+size;
  }
  while (*s) *p++=*s++; /* copy the non-null bytes of the string */
  *p++='\0'; /* and append a null character */
  next_string=p;
  return p-len;
}

@ The test routine illustrates some of these basic maneuvers.

@<Create a small graph@>=
g=gb_new_graph(2L);
if (g==NULL) {
  fprintf(stderr,"Oops, I couldn't even create a trivial graph!\n");
  return -4;
}
u=g->vertices;@+ v=u+1;
u->name=gb_save_string("vertex 0");
v->name=gb_save_string("vertex 1");

@ @<Decl...@>=
Graph *g;
Vertex *u,*v;

@ If the ``edge trick'' fails, the standard GraphBase routines are
unaffected except for the demonstration program {\sc MILES\_\,SPAN}. (And
that program uses |edge_trick| only when printing verbose comments.)
@^edge trick failure@>

@<Check that the small graph is still there@>=
if (strncmp(u->name,v->name,7)) {
  fprintf(stderr,"Something is fouled up in the string storage machinery!\n");
  return -5;
}
gb_new_edge(v,u,-1L);
gb_new_edge(u,u,1L);
gb_new_arc(v,u,-1L);
if ((edge_trick&(siz_t)(u->arcs))||
    (edge_trick&(siz_t)(u->arcs->next->next))||
    !(edge_trick&(siz_t)(v->arcs->next)))
  printf("Warning: The \"edge trick\" failed!\n");
if (v->name[7]+g->n!=v->arcs->next->tip->name[7]+g->m-2) {
     /* |'1'+2!='0'+5-2| */
  fprintf(stderr,"Sorry, the graph data structures aren't working yet.\n");
  return -6;
}

@ Some applications might need to add arcs to several graphs at a time,
violating the assumptions stated above about |cur_graph| and the other
private variables. The |switch_to_graph| function gets around that
restriction, by using the utility slots |ww|, |xx|, |yy|, and
|zz| of |Graph| records to save and restore the private variables.

Just say |switch_to_graph(g)| in order to make |cur_graph| be~|g| and
to restore the other private variables that are needed by
|gb_new_arc|, |gb_virgin_arc|, |gb_new_edge|, and |gb_save_string|.
Restriction: The graph |g| being switched to must have previously been
switched from; that is, it must have been |cur_graph| when |switch_to_graph|
was called previously. Otherwise its private allocation variables will
not have been saved. To meet this restriction, you should say
|switch_to_graph(NULL)| just before calling |gb_new_graph|, if you
intend to switch back to the current graph later.

(The swap-in-swap-out nature of these conventions may seem inelegant, but
convenience and efficiency are more important than elegance when most
applications do not need the ability to switch between graphs.)

@<External f...@>=
void switch_to_graph(g)
  Graph *g;
{
  cur_graph->ww.A=next_arc; @+cur_graph->xx.A=bad_arc;
  cur_graph->yy.S=next_string; @+cur_graph->zz.S=bad_string;
  cur_graph=(g? g: &dummy_graph);
  next_arc=cur_graph->ww.A; @+bad_arc=cur_graph->xx.A;
  next_string=cur_graph->yy.S; @+bad_string=cur_graph->zz.S;
  cur_graph->ww.A=NULL;
  cur_graph->xx.A=NULL;
  cur_graph->yy.S=NULL;
  cur_graph->zz.S=NULL;
}

@ Finally,
here's a routine that obliterates an entire graph when it is no longer needed:

@<External fun...@>=
void gb_recycle(g)
  Graph *g;
{
  if (g) {
    gb_free(g->data);
    gb_free(g->aux_data);
    free((char*)g); /* the user must not refer to |g| again */
  }
}

@ @(gb_graph.h@>=
#define gb_new_graph gb_nugraph /* abbreviations for external linkage */
#define gb_new_arc gb_nuarc
#define gb_new_edge gb_nuedge
extern Graph*gb_new_graph(); /* create a new graph structure */
extern void gb_new_arc(); /* append an arc to the current graph */
extern Arc*gb_virgin_arc(); /* allocate a new |Arc| record */
extern void gb_new_edge(); /* append an edge (two arcs) to the current graph */
extern char*gb_save_string(); /* store a string in the current graph */
extern void switch_to_graph(); /* save allocation variables, swap in others */
extern void gb_recycle(); /* delete a graph structure */

@* Searching for vertices. We sometimes want to be able to find a vertex, given
its name, and it is nice to do this in a standard way. The following simple
subroutines can be used:

{\narrower
\smallskip|hash_in(v)| puts the name of vertex |v| into the hash table;
\smallskip|hash_out(s)| finds a vertex named |s|, if present in the hash table;
\smallskip|hash_setup(g)| prepares a hash table for all vertices of graph~|g|;
\smallskip|hash_lookup(s,g)| looks up the name |s| in the hash table of |g|.
\smallskip}

\noindent Routines |hash_in| and |hash_out| apply to the current graph being
created, while |hash_setup| and |hash_lookup| apply to arbitrary graphs.

Important: Utility fields |u| and |v| of each vertex are reserved for use by
the search routine when hashing is active. You can crash the system
if you try to fool around with these values yourself, or if you use any
subroutines that change those fields. The first two characters in the
current graph's |util_types| field should be \.{VV} if the hash table
information is to be saved by {\sc GB\_\,SAVE}.

Warning: Users of this hash scheme must preserve the number of
vertices |g->n| in the current graph~|g|. If |g->n| is changed,
the hash table will be worthless, unless |hash_setup| is used to
rehash everything.

@<gb_graph.h@>=
extern void hash_in(); /* input a name to the hash table of current graph */
extern Vertex* hash_out(); /* find a name in hash table of current graph */
extern void hash_setup(); /* create a hash table for a given graph */
extern Vertex* hash_lookup(); /* find a name in a given graph */

@ The lookup scheme is quite simple. We compute a more-or-less random
value |h| based on the vertex name, where |0<=h<n|, assuming that
the graph has |n|~vertices. There is a list of all vertices whose hash
address is~|h|, starting at |(g->vertices+h)->hash_head| and linked
together in the |hash_link| fields, where |hash_head| and |hash_link| are
utility fields |u.V| and |v.V|.

@d hash_link u.V
@d hash_head v.V

@ @<External fun...@>=
void hash_in(v)
  Vertex *v;
{@+  register char *t=v->name;
  register Vertex *u;
  @<Find vertex |u|, whose location is the hash code for string |t|@>;
  v->hash_link=u->hash_head;
  u->hash_head=v;
}

@ The hash code for a string $c_1c_2\ldots c_l$ of length $l$ is a
nonlinear function of the characters; this function appears to produce
reasonably random results between 0 and the number of vertices in the
current graph.  Simpler approaches were noticeably poorer in the
author's tests.

Caution: This hash coding scheme is system-dependent, because it
uses the system's character codes. If you create a graph on a
machine with ASCII code and save it with {\sc GB\_\,SAVE}, and if you
subsequently ship the
resulting text file to some friend whose machine does not use ASCII code,
your friend will have to rebuild the hash structure with |hash_setup|
before being able to use |hash_lookup| successfully.
@^character-set dependencies@>

@d HASH_MULT 314159 /* random multiplier */
@d HASH_PRIME 516595003 /* the 27182818th prime; it's less than $2^{29}$ */

@<Find vertex |u|...@>=
{@+register long h;
  for (h=0;*t;t++) {
    h+=(h^(h>>1))+HASH_MULT*(unsigned char)*t;
    while (h>=HASH_PRIME) h-=HASH_PRIME;
  }
  u=cur_graph->vertices+(h % cur_graph->n);
}

@ If the hash function were truly random, the average number of
string comparisons made would be less than $(e^2+7)/8\approx 1.80$ on
a successful search, and less than $(e^2+1)/4\approx2.10$ on an
unsuccessful search [{\sl Sorting and Searching}, Section 6.4,
Eqs.~(15) and~(16)].

@<External fun...@>=
Vertex* hash_out(s)
  char* s;
{@+register char *t=s;
  register Vertex *u;
  @<Find vertex |u|...@>;
  for (u=u->hash_head;u;u=u->hash_link)
    if (strcmp(s,u->name)==0) return u;
  return NULL; /* not found */
}

@ @<External fun...@>=
void hash_setup(g)
  Graph *g;
{@+Graph *save_cur_graph;
  if (g && g->n>0) {@+register Vertex *v;
    save_cur_graph=cur_graph;
    cur_graph=g;
    for (v=g->vertices;v<g->vertices+g->n;v++) v->hash_head=NULL;
    for (v=g->vertices;v<g->vertices+g->n;v++) hash_in(v);
    g->util_types[0]=g->util_types[1]='V';
         /* indicate usage of |hash_head| and |hash_link| */
    cur_graph=save_cur_graph;
  }
}

@ @<External fun...@>=
Vertex* hash_lookup(s,g)
  char *s;
  Graph *g;
{@+Graph *save_cur_graph;
  if (g && g->n>0) {@+register Vertex *v;
    save_cur_graph=cur_graph;
    cur_graph=g;
    v=hash_out(s);
    cur_graph=save_cur_graph;
    return v;
  }
  else return NULL;
}

@* Index. Here is a list that shows where the identifiers of this program are
defined and used.