1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
|
#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
if 0;
#------------------------------------------------------------------------------
#$Author: antanas $
#$Date: 2022-07-31 09:52:44 +0300 (Sun, 31 Jul 2022) $
#$Revision: 9353 $
#$URL: svn+ssh://www.crystallography.net/home/coder/svn-repositories/cod-tools/tags/v3.7.0/scripts/cif_hkl_COD_number $
#------------------------------------------------------------------------------
#*
#* Parse a CIF file and print out the essential information in the COD
#* CIF format
#*
#* USAGE:
#* $0 --options input1.cif input*.cif
#**
use strict;
use warnings;
use File::Basename qw( basename );
use COD::CIF::Data::CODFlags qw( has_hkl );
use COD::CIF::Parser qw( parse_cif );
use COD::CIF::Tags::DictTags;
use COD::CIF::Tags::COD;
use COD::CIF::Tags::Excluded;
use COD::CIF::Tags::Manage qw( exclude_tag exclude_empty_tags set_tag );
use COD::CIF::Tags::Print qw( print_cif fold );
use COD::CIF::Tags::CanonicalNames qw( canonicalize_names );
use COD::SOptions qw( getOptions get_value );
use COD::SUsage qw( usage options );
use COD::ErrorHandler qw( process_warnings
process_errors
process_parser_messages
report_message );
use COD::ToolsVersion qw( get_version_string );
my $die_on_error_level = {
ERROR => 1,
WARNING => 0,
NOTE => 0
};
my $use_parser = 'c';
my $exclude_empty_tags = 0;
my $preserve_loop_order = 0;
my $record_original_filename = 0;
my $exclude_misspelled_tags = 0;
my $fold_long_fields = 0;
my $fold_title = 0;
my $folding_width = 76;
my $extra_tag_file;
my $original_filename;
my $cif_comment_header; # A header with comments to printed at the
# beginning of the output CIF file.
my $cif_header_file; # The name of an external file that holds a CIF
# header.
my $data_block_format = "%07d";
my $data_block_nr; # If defined, specifies that data blocks should be
# numbered in a COD-like fasion.
#* OPTIONS:
#* -h, --add-cif-header header_file.cif
#* Prepend each of the output files with the comments
#* from the beginning of the specified file.
#*
#* --exclude-empty-tags
#* Remove tags that contain only empty values. A value is
#* considered empty if it equal to a single question mark ('?')
#* or a single period ('.').
#* --dont-exclude-empty-tags, --no-exclude-empty-tags
#* Disable the '--exclude-empty-tags' option (default).
#*
#* -x, --extra-tag-list tag-list.lst
#* Add additional tags to the list of recognised CIF tags.
#* These extra tags are presented in a separate file, one
#* tag per line.
#* --exclude-misspelled-tags
#* Remove tags that were not present in the recognised
#* tag list.
#* --dont-exclude-misspelled-tags,
#* --no-exclude-misspelled-tags
#* Disable the '--exclude-misspelled-tags' option.
#*
#* --preserve-loop-order
#* Print loops in the same order they appeared in the
#* original file.
#* --use-internal-loop-order
#* Disregard original loop order while printing the tags
#* (default).
#*
#* --original-filename data_source.cif
#* Use the provided string as the name of the original file.
#* (see --record-original-filename).
#* --clear-original-filename
#* Do not use any previously provided strings as the name
#* of the original file.
#* --record-original-filename
#* Record the original filename and the original data
#* block name for each data block as the '_cod_data_source_*'
#* tag values.
#* --dont-record-original-filename
#* Do not record the original filename and the original
#* data block name (default).
#*
#* -S, --start-data-block-number 1234567
#* Use the provided number as the start number when
#* renaming data blocks (default: '7000001'). Setting
#* this option enables the '-R' option.
#* -d, --data-block-format '%07d'
#* Use the provided format to determine new data block
#* names from the provided data block numbers
#* (default: '%07d').
#* -R, --renumber-data-blocks
#* Rename all data blocks. The new names are constructed
#* by taking a start number (specified by the '-S' option),
#* applying the string format (specified by the '-d' option)
#* and then incrementing the start number for each
#* sequential data block.
#* -R-, --dont-renumber-data-blocks
#* Do not rename data blocks (default). Enabling this
#* option sets the '-S' option to default value.
#*
#* --folding-width 76
#* Specify the length of the longest unfolded line.
#*
#* --fold-title
#* --dont-fold-title
#* Folds the title, if longer than folding width.
#* Default: off.
#*
#* --fold-long-fields
#* --dont-fold-long-fields
#* Fold fields, longer than folding width. Default: off.
#*
#* --use-perl-parser
#* Use Perl parser to parse CIF files.
#* --use-c-parser
#* Use C parser to parse CIF files (default).
#*
#* --help, --usage
#* Output a short usage message (this message) and exit.
#* --version
#* Output version information and exit.
#**
@ARGV = getOptions(
"-h,--add-cif-header" => \$cif_header_file,
"-x,--extra-tag-list" => \$extra_tag_file,
"--exclude-empty-tags" => sub { $exclude_empty_tags = 1; },
"--dont-exclude-empty-tags" => sub { $exclude_empty_tags = 0; },
"--no-exclude-empty-tags" => sub { $exclude_empty_tags = 0; },
"--exclude-misspelled-tags" => sub { $exclude_misspelled_tags = 1; },
"--dont-exclude-misspelled-tags" => sub { $exclude_misspelled_tags = 0; },
"--no-exclude-misspelled-tags" => sub { $exclude_misspelled_tags = 0; },
"--preserve-loop-order" => sub { $preserve_loop_order = 1; },
"--use-internal-loop-order" => sub { $preserve_loop_order = 0; },
"--original-filename" => sub { $original_filename = get_value();
$record_original_filename = 1 },
"--clear-original-filename" => sub { undef $original_filename },
"--record-original-filename" => sub { $record_original_filename = 1; },
"--dont-record-original-filename" => sub { $record_original_filename = 0; },
"-d,--data-block-format" => \$data_block_format,
"-S,--start-data-block-number" => \$data_block_nr,
"-R,--renumber-data-blocks" => sub { $data_block_nr = 7000001 },
"-R-,--dont-renumber-data-blocks" => sub { undef $data_block_nr },
"--folding-width" => \$folding_width,
"--fold-title" => sub{ $fold_title = 1 },
"--dont-fold-title" => sub{ $fold_title = 0 },
"--fold-long-fields" => sub{ $fold_long_fields = 1 },
"--dont-fold-long-fields" => sub{ $fold_long_fields = 0 },
"--use-perl-parser" => sub { $use_parser = "perl" },
"--use-c-parser" => sub { $use_parser = "c" },
'--options' => sub { options; exit },
'--help,--usage' => sub { usage; exit },
'--version' => sub { print get_version_string(), "\n"; exit }
);
my @extra_tags = ();
eval {
if( $extra_tag_file ) {
open( my $extra, '<', "$extra_tag_file" ) or die 'ERROR, '
. 'could not open extra tag list file for reading -- '
. lcfirst($!) . "\n";
@extra_tags = map { s/\s//g; $_ }
map {split}
grep { /^\s*_/ } <$extra>;
close( $extra ) or die 'ERROR, '
. 'error while closing extra tag file after reading -- '
. lcfirst($!) . "\n";
## local $, = "\n"; local $\ = "\n";
print @extra_tags;
}
};
if ($@) {
process_errors( {
'message' => $@,
'program' => $0,
'filename' => $extra_tag_file
}, $die_on_error_level->{'ERROR'} );
};
my %excluded_tags = map { ($_,$_) } @COD::CIF::Tags::Excluded::tag_list;
my @dictionary_tags = ( @COD::CIF::Tags::DictTags::tag_list,
@COD::CIF::Tags::COD::tag_list,
@extra_tags );
my %dictionary_tags = map { $_, $_ } @dictionary_tags;
my %cif_tags_lc = map {(lc($_),$_)} @dictionary_tags;
# Reading the header file
eval {
if( defined $cif_header_file ) {
open( my $header, '<', "$cif_header_file" ) or die 'ERROR, '
. 'could not open CIF header file for reading -- '
. lcfirst($!) . "\n";
local $/; # enable "slurp" mode: read the whole header file.
$cif_comment_header = <$header>;
close( $header ) or die 'ERROR, '
. 'error while closing CIF header file after reading -- '
. lcfirst($!) . "\n";
# The header must not contain CIF 2.0 magic code. For CIF 2.0
# files the magic code is printed explicitly before the header.
$cif_comment_header =~ s/^#\\#CIF_2\.0[ \t]*\n//;
}
};
if ($@) {
process_errors( {
'message' => $@,
'program' => $0,
'filename' => $cif_header_file
}, $die_on_error_level->{'ERROR'} );
};
binmode( STDOUT, ":encoding(UTF-8)" );
binmode( STDERR, ":encoding(UTF-8)" );
my $filename = shift(@ARGV);
my $options = { parser => $use_parser, no_print => 1 };
my ($data, $err_count, $messages) = parse_cif($filename, $options);
process_parser_messages( $messages, $die_on_error_level );
if( !@{$data} ) {
report_message( {
'program' => $0,
'filename' => $filename,
'err_level' => 'WARNING',
'message' => 'file seems to be empty'
}, $die_on_error_level->{'WARNING'} );
}
#------------------------------------------------------------------------------
for my $dataset (@$data) {
my $datablok = $dataset->{values};
my $dataname = 'data_' . $dataset->{'name'};
local $SIG{__WARN__} = sub {
process_warnings( {
'message' => @_,
'program' => $0,
'add_pos' => $dataname,
'filename' => $filename
}, $die_on_error_level )
};
canonicalize_names( $dataset );
next if !has_hkl( $dataset );
eval {
# Remove empty tags, if requested:
if( $exclude_empty_tags ) {
exclude_empty_tags( $dataset );
}
# Fold title if requested:
if( $fold_title ) {
my $cif_title = join( " ", @{$datablok->{_publ_section_title}} );
$cif_title =~ s/\n/ /g;
$datablok->{_publ_section_title} = [
"\n" . join( "\n", map { " " . $_ }
fold( $folding_width - 2,
" +", " ", $cif_title ))
];
}
# Exclude potentially copyrighted and irrelevant tags
# unconditionally:
do {
my @tag_list = @{$dataset->{tags}};
for my $tag (@tag_list) {
if( exists $excluded_tags{$tag} || $tag =~ /^_vrf_/ ) {
exclude_tag( $dataset, $tag );
}
}
};
# Check for misspelled tags:
for my $tag (@{$dataset->{tags}}) {
unless( exists $dictionary_tags{$tag} ) {
warn "WARNING, tag '$tag' is not recognised\n";
}
}
# Exclude the misspelled tags if requested:
if( $exclude_misspelled_tags ) {
my @tag_list = @{$dataset->{tags}};
for my $tag (@tag_list) {
unless( exists $dictionary_tags{$tag} ) {
exclude_tag( $dataset, $tag );
}
}
}
# Add the data source file name, if requested:
if( $record_original_filename ) {
my $basename;
if( defined $original_filename ) {
$basename = basename( $original_filename );
} elsif( defined $filename && $filename ne "-" ) {
$basename = basename( $filename );
} else {
$basename = "?";
}
set_tag( $dataset, "_cod_data_source_file", $basename );
set_tag( $dataset, "_cod_data_source_block", $dataset->{name} );
}
# Clean up the resulting CIF data structure:
for my $excluded_tag (qw( _publ_author_address
_publ_author.address
_publ_author_email
_publ_author.email
_publ_author_footnote
_publ_author.footnote
_publ_author_id_iucr
_publ_author.id_iucr )) {
if( exists $datablok->{$excluded_tag} ) {
exclude_tag( $dataset, $excluded_tag );
}
}
# Print out the CIF header if requested:
if( defined $cif_comment_header ) {
# Ensure that for CIF v2.0 the magic code comes
# before the CIF comment header:
if( exists $dataset->{cifversion} &&
$dataset->{cifversion}{major} == 2 ) {
printf( "#\\#CIF_%d.%d\n",
$dataset->{cifversion}{major},
$dataset->{cifversion}{minor} );
}
print $cif_comment_header;
}
# Calculate the data block name:
if( defined $data_block_nr ) {
$dataset->{name} = sprintf $data_block_format, $data_block_nr;
$data_block_nr ++;
}
# Print out requested tags:
print_cif( $dataset, {
exclude_misspelled_tags => $exclude_misspelled_tags,
preserve_loop_order => $preserve_loop_order,
fold_long_fields => $fold_long_fields,
folding_width => $folding_width,
dictionary_tags => \%dictionary_tags,
dictionary_tag_list => \@dictionary_tags,
} );
};
if ($@) {
process_errors( {
'message' => $@,
'program' => $0,
'filename' => $filename,
'add_pos' => $dataname
}, $die_on_error_level->{'ERROR'} );
};
}
|