1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
|
#! /usr/bin/env perl
##
## Copyright (C) by Argonne National Laboratory
## See COPYRIGHT in top-level directory
##
# This script scavenges the MPICH source tree looking for "subconfigure.m4"
# files. It then analyzes the dependencies between them and emits a
# "subsys_include.m4" file that "m4_include"s them all in the correct order and
# writes an autoconf macro that is suitable for expanding subsys macros in the
# correct order.
#
# Finding a "dnl MPICH_SUBCFG_BEFORE=BAR" statement inside of
# FOO/subconfigure.m4 means that BAR depends on FOO and that FOO's macros should
# be emitted *before* BAR's macros.
#
# Finding a "dnl MPICH_SUBCFG_AFTER=QUUX" statement inside of
# BAZ/subconfigure.m4 means that BAZ depends on QUUX and that BAZ's macros be emitted
# *after* QUUX's macros.
#
# We have both forms of macros because some subsystems know their consumers
# explicitly and some packages only know what they consume explicitly, and some
# packages are a blend. For example, ch3 depends on the mpid/common/sched code
# in order to support NBC ops so a BEFORE statement is used in the
# ch3/subconfigure.m4 in order to "enable_mpid_common_sched=yes". The nemesis
# netmods all depend on the core nemesis configuration happening first, so they
# use an AFTER statement in each of their subconfigure.m4 files.
################################################################################
use strict;
use warnings;
use Getopt::Long;
use Data::Dumper;
################################################################################
# Default global settings and constants
# unlikely to change, since this name is assumed in lots of other pieces of the
# build system
my $SUBCFG_NAME = "subconfigure.m4";
my $OUTPUT_FILE = "subsys_include.m4";
# if true, add a dependency (ancestor-->child) when $SUBCFG_NAME files are
# encountered and they have an ancestor $SUBCFG_NAME file in an enclosing
# directory
my $USE_IMPLICIT_EDGES = 1;
# the existence of this file means we should stop recursing down
# the enclosing directory tree
my $stop_sentinel = ".no_subcfg_recursion";
# which directories should be recursively searched for "subconfigure.m4" files
my @root_dirs = qw( src );
# coloring constants for the graph algorithms later on
my ($WHITE, $GRAY, $BLACK) = (1, 2, 3);
################################################################################
# Option processing: here's a great place to permit overriding the default
# global settings from above if we ever need to in the future.
my $do_bist = 0;
GetOptions(
"--help" => \&print_usage,
"--bist" => \$do_bist,
) or die "unable to process options, stopped";
sub print_usage {
print <<EOT;
This script builds '$OUTPUT_FILE' from '$SUBCFG_NAME' files.
Usage: $0 [--help] [--bist]
--bist - Run simple self tests on this script.
--help - This message.
EOT
exit 0;
}
if ($do_bist) {
bist();
}
################################################################################
# preorder traverse the root dirs looking for files named $SUBCFG_NAME
# stack for recursion, contains dirs that must yet be visited
my @dirstack = ( @root_dirs );
# Parallel stack that keeps track of the nearest ancestor with a subconfigure.m4
# file. All root dirs have no ancestors.
my @anc_stack = map { '' } @root_dirs;
# keys are the full path to the found file, value is the nearest ancestor (in
# the directory hierarchy sense) subconfigure.m4 file, or '' if none exists.
my %found_files = ();
while (my $dir = pop @dirstack) {
my $anc = pop @anc_stack;
# check for a $SUBCFG_NAME before recursing in order to correctly propagate
# ancestor information for subdirectories
my $fp = "$dir/$SUBCFG_NAME";
if (-e $fp) {
# found a subconfigure.m4 file
$found_files{$fp} = $anc;
# override our parent's ancestor for all of our descendants
$anc = $fp;
}
if (-e "$dir/$stop_sentinel") {
# the existence of this file means we should stop recursing down
# this particular directory tree
next;
}
# now that we've visited the current vertex, push all of our child dirs onto
# the stack to continue the traversal
opendir DH, $dir
or die "unable to open dir='$dir', stopped";
my @contents = sort readdir DH;
foreach my $f (@contents) {
# avoid endless recursion
next if $f eq "." || $f eq "..";
if (-d "$dir/$f") {
push @dirstack, "$dir/$f";
push @anc_stack, $anc;
}
}
closedir DH;
}
################################################################################
# We now have a list of all $SUBCFG_NAME files in @found_files. Process each of
# the files and build a DAG.
# A DAG where the vertices are full path filenames for $SUBCFG_NAME files and the
# edges are dependencies between the $SUBCFG_NAME files (A-->B indicates that
# A should come before B in a topo sort). We concretely express this DAG as an
# adjacency list stored in a hash. Keys of the hash are filenames, values are
# refs to hashes whose keys are outbound edge filenames.
# IOW:
# ( a => {b=>1,c=>1}, b => {c=>1}, c => {} )
# represents the following (crudely drawn) graph:
# a-->b-->c
# \------^
my %dag = ();
# Helper routine, adds a new edge to the given dag hash (passed by reference),
# automatically creating src or dst vertices as necessary. A cute bit of calling
# syntax is that the src and dst args can be separated by "=>" because it's just
# a fancy comma in perl.
sub add_edge {
my $dag_ref = shift;
my $src = shift;
my $dst = shift;
die "\$dag_ref is invalid, stopped" unless ref($dag_ref) eq "HASH";
die "\$src is invalid, stopped" unless $src;
die "\$dst is invalid, stopped" unless $dst;
$dag_ref->{$src} = {} unless exists $dag_ref->{$src};
$dag_ref->{$src}->{$dst} = 1;
}
foreach my $k (keys %found_files) {
# add the vertex to the graph with no edges to start
$dag{$k} = {} unless exists $dag{$k};
my $anc = $found_files{$k};
if ($anc and $USE_IMPLICIT_EDGES) {
# need to add the implicit edge from the ancestor to $k
add_edge(\%dag, $anc => $k);
}
# now process the file and add any explicit edges
open FILE, '<', $k;
while (my $line = <FILE>) {
if ($line =~ m/^\s*dnl +MPICH_SUBCFG_([A-Z_]*)=(.*)\s*$/) {
my $bef_aft = $1;
my $arg = $2;
# users can set GEN_SUBCFG_NO_ERROR=1 in the environment to prevent
# this script from complaining about missing '/' chars
if ($arg !~ m|/| and not $ENV{GEN_SUBCFG_NO_ERROR}) {
print "ERROR: no '/' characters detected in '$arg', possible old-style structured comment still present\n";
exit 1;
}
# normalize the $arg to match our DAG
$arg .= "/$SUBCFG_NAME";
if ($bef_aft eq "BEFORE") {
add_edge(\%dag, $k => $arg);
}
elsif ($bef_aft eq "AFTER") {
add_edge(\%dag, $arg => $k);
}
else {
die "unrecognized structured comment ('MPICH_SUBCFG_${bef_aft}')\n".
"at $k:$., possible typo? Stopped";
}
}
}
close FILE;
}
################################################################################
# We now have a DAG expressing the dependency information between the various
# subconfigure.m4 files. Now we need to topologically sort it.
#
# We use the topo sort algorithm given in "Introduction to Algorithms" (1st
# ed.), page 486 with a small modification to detect cycles in the digraph. We
# perform a DFS on the DAG, coloring vertices as we go. We could compute
# discovery and finishing times, as well as predecessors, but we don't need that
# information for topological sorting or cycle detection. As each vertex is
# finished (colored BLACK) we prepend it to an array. The resulting array is
# sorted in "ascending" topological order (a,b,c in our previous example).
# the output array in which the sorted results will be stored
my @tsorted = ();
topo_sort(\%dag, \@tsorted);
################################################################################
# Now just emit the $OUTPUT_FILE in the correct format.
open OUTFD, '>', $OUTPUT_FILE;
my $datestamp = scalar(localtime);
print OUTFD <<EOT;
dnl generated by $0 at $datestamp
dnl DO NOT EDIT BY HAND!!!
dnl re-run ./maint/updatefiles instead
EOT
foreach my $file (@tsorted) {
print OUTFD "m4_include([$file])\n";
}
print OUTFD <<EOT;
dnl a macro suitable for use as m4_map([some_unary_macro],[PAC_SUBCFG_MODULE_LIST])
m4_define([PAC_SUBCFG_MODULE_LIST],
m4_dquote(
EOT
foreach my $file (@tsorted[0 .. $#tsorted-1]) {
my $mod_name = $file;
$mod_name =~ s+/$SUBCFG_NAME$++;
$mod_name =~ tr+/+_+;
print OUTFD "[$mod_name],\n";
}
my $mod_name = $tsorted[-1];
$mod_name =~ s+/$SUBCFG_NAME$++g;
$mod_name =~ tr+/+_+;
print OUTFD "[$mod_name]dnl <--- this dnl is important\n";
print OUTFD "))\n\n";
close OUTFD;
################################################################################
# SUBROUTINES
################################################################################
# The DFS-Visit(u) algorithm specialized for topo sorting. Currently a
# subroutine to permit recursive invocation, but could be converted to use an
# explicit stack and the subroutine could be eliminated.
#
# takes four arguments: a ref to the DAG hash, a ref to a colors hash, the
# vertex $u, and an output array reference to be populated as vertices are
# finished.
sub dfs_visit {
my $dag_ref = shift;
my $colors_ref = shift;
my $u = shift;
my $out_arr_ref = shift;
$colors_ref->{$u} = $GRAY;
foreach my $v (keys %{$dag_ref->{$u}}) {
# detect cycles in the graph at this point, see ("Classification of edges"
# in CLR)
if ((exists $colors_ref->{$v}) && $colors_ref->{$v} == $GRAY) {
# We are already exploring the tree from $v, so this is a "back edge",
# indicating a cycle is present in the digraph. This is erroneous in
# our usage, since we cannot topologically sort a cyclic graph.
die "A back edge was found in the digraph but a DAG is required.\n".
"The back edge was from\n".
" $u\n".
"to\n".
" $v\n".
"Stopped";
}
elsif ((exists $colors_ref->{$v}) && $colors_ref->{$v} == $WHITE) {
dfs_visit($dag_ref, $colors_ref, $v, $out_arr_ref);
}
}
$colors_ref->{$u} = $BLACK;
# append $u to the output
unshift @$out_arr_ref, $u;
}
# Takes two arguments, a ref to a DAG hash and a reference to an output array.
# Returns in the output array a valid topological sort of the given DAG.
sub topo_sort {
my $dag_ref = shift;
my $out_arr_ref = shift;
# helper hash that is indexed by vertex name in order to avoid building a
# complicated set of nested structures inside the main DAG
my $colors_ref = {}; # values are one of $WHITE, $GRAY, or $BLACK
# a simplified version of the DFS(G) algorithm
foreach my $u (keys %$dag_ref) {
$colors_ref->{$u} = $WHITE;
}
foreach my $u (keys %$dag_ref) {
if ($colors_ref->{$u} == $WHITE) {
dfs_visit($dag_ref, $colors_ref, $u, $out_arr_ref);
}
}
}
################################################################################
# self tests
# run this subroutine to self-test portions of this script
sub bist {
bist_topo_sort();
print "all self-tests PASSED\n";
exit 0;
}
sub bist_topo_sort {
my $dag;
my $out_arr;
my $expected;
$dag = { a => {b=>1,c=>1}, b => {c=>1}, c => {} };
$out_arr = [];
$expected = [ qw(a b c) ];
topo_sort($dag, $out_arr);
cmp_arrays($out_arr, $expected);
$dag = { a => {}, b => {}, c => {} };
$out_arr = [];
topo_sort($dag, $out_arr);
# this DAG has no single expected result, so just check lengths
unless (scalar @$out_arr eq scalar @$expected) {
die "\$out_arr and \$expected differ in length, stopped\n";
}
my $routine = (caller(0))[3];
print "$routine PASSED\n";
}
sub cmp_arrays {
my $out_arr = shift;
my $expected = shift;
#print "out_arr=".Dumper($out_arr)."\n";
#print "expected=".Dumper($expected)."\n";
unless (scalar @$out_arr eq scalar @$expected) {
die "\$out_arr and \$expected differ in length, stopped\n";
}
for (my $i = 0; $i < @$out_arr; ++$i) {
unless ($out_arr->[$i] eq $expected->[$i]) {
die "element $i of \$out_arr differs from the expected value (".
$out_arr->[$i]." ne ".$expected->[$i]."), stopped\n";
}
}
}
|