File: cif2ref

package info (click to toggle)
cod-tools 3.7.0%2Bdfsg-1
links: PTS, VCS
area: main
in suites: bookworm
size: 154,792 kB
sloc: perl: 57,588; sh: 36,842; ansic: 6,402; xml: 1,982; yacc: 1,117; makefile: 727; python: 166
file content (278 lines) | stat: -rwxr-xr-x 8,475 bytes
#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: antanas $
#$Date: 2021-04-28 19:35:53 +0300 (Wed, 28 Apr 2021) $
#$Revision: 8738 $
#$URL: svn+ssh://www.crystallography.net/home/coder/svn-repositories/cod-tools/tags/v3.7.0/scripts/cif2ref $
#------------------------------------------------------------------------------
#*
#* Parse a CIF file and print out the essential information in the COD
#* REF format. Accepts optional bibliography file in ref, xrf or plaintext
#* format as the second command line argument.
#*
#* USAGE:
#*    $0 input1.cif optional-bibliography-file.txt
#**

use strict;
use warnings;
use COD::CIF::Parser qw( parse_cif );
use COD::CIF::Unicode2CIF qw( cif2unicode );
use COD::SOptions qw( getOptions );
use COD::SUsage qw( usage );
use COD::ErrorHandler qw( process_errors process_parser_messages );
use COD::ToolsVersion qw( get_version_string );

my $use_parser = 'c';
my $die_on_errors   = 1;
my $die_on_warnings = 0;
my $die_on_notes    = 0;

#* OPTIONS:
#*   -c, --always-continue
#*                     Continue processing and return successful return status
#*                     even if errors are diagnosed.
#*   -c-, --always-die
#*                     Stop and return error status if errors are diagnosed.
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    '-c,--always-continue' => sub { $die_on_errors   = 0;
                                    $die_on_warnings = 0;
                                    $die_on_notes    = 0 },
    '-c-,--always-die'     => sub { $die_on_errors   = 1;
                                    $die_on_warnings = 1;
                                    $die_on_notes    = 1 },

    '--help,--usage' => sub { usage; exit },
    '--version'      => sub { print get_version_string(), "\n"; exit }
);

my $die_on_error_level = {
    ERROR   => $die_on_errors,
    WARNING => $die_on_warnings,
    NOTE    => $die_on_notes
};

my $filename = @ARGV > 0 ? shift(@ARGV) : "-";
my $bib_file = @ARGV > 0 ? shift(@ARGV) : undef;

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

my $options = { parser => $use_parser, no_print   => 1 };
my ( $data, $err_count, $messages ) = parse_cif( $filename, $options );
process_parser_messages( $messages, $die_on_error_level );

my $n = 0;
for my $dataset (@$data) {
    my $datablok = $dataset->{values};
    next if !defined $datablok->{_atom_site_label};

    print "\n" if $n > 0;

    # Chemical formula:
    my $formula = $datablok->{_chemical_formula_sum}[0];
    if( defined $formula ) {
        $formula =~ s/^\s*|\s*$//g;
    } else {
        $formula = '?';
    }
    $formula =~ s/\n/ /g;
    $formula =~ s/\s+/ /g;

    # Cell constants:
    my $a = $datablok->{_cell_length_a}[0];
    my $b = $datablok->{_cell_length_b}[0];
    my $c = $datablok->{_cell_length_c}[0];
    my $alpha = $datablok->{_cell_angle_alpha}[0];
    my $beta  = $datablok->{_cell_angle_beta}[0];
    my $gamma = $datablok->{_cell_angle_gamma}[0];

    # Spacegroup:
    my $spacegroup;
    for my $tag ('_space_group_name_h-m_alt',
                 '_symmetry_space_group_name_h-m') {
        next if !exists $datablok->{$tag};
        $spacegroup = $datablok->{$tag}[0];
        last;
    }

    # Atom records:

    my $atom = $datablok->{_atom_site_label};

    my $x = $datablok->{_atom_site_fract_x};
    my $y = $datablok->{_atom_site_fract_y};
    my $z = $datablok->{_atom_site_fract_z};

    my $q = $datablok->{_atom_site_occupancy};
    my $U = $datablok->{_atom_site_u_iso_or_equiv};

    if( !defined $U ) {
        $U = $datablok->{_atom_site_uiso_or_biso};
    }

    if( !defined $U && defined $datablok->{_atom_site_b_iso_or_equiv} ) {
        my $B = $datablok->{_atom_site_b_iso_or_equiv};
        my $Pi = 3.14159265358979;
        for my $i ( 0..$#{$B} ) {
            my $b = $B->[$i];
            $b =~ s/\(.*\)$//;
            $datablok->{_atom_site_u_iso_or_equiv}[$i] = $b/(8*$Pi**2);
        }
        $U = $datablok->{_atom_site_u_iso_or_equiv};
    }

    my $multiplicity = $datablok->{_atom_site_symmetry_multiplicity};

    if( !defined $multiplicity ) {
        $multiplicity = $datablok->{_atom_site_symetry_multiplicity};
    }

    my $Wyckoff_sym = $datablok->{_atom_site_wyckoff_symbol};

    # Print it out:

    if( defined $bib_file ) {
        my $reference;
        eval {
            open my $biblio, '<', $bib_file or  die 'ERROR, '
              . 'could not open bibliography file for reading -- '
              . lcfirst($!) . "\n";

            if( $bib_file =~ /\.ref/ ) {
                $reference = <$biblio>; # read the first line
            } elsif( $bib_file =~ /\.xrf/ ) {
                my @reference = grep { !/^\#/ } <$biblio>;
                $reference = $reference[0]; # read the first non-comment line
            } else {
                local $/ = undef; # read the whole file
                $reference = <$biblio>;
            }

            close $biblio or die 'ERROR, '
              . 'error while closing bibliography file after reading -- '
              . lcfirst($!) . "\n";
        };
        if ($@) {
            process_errors( {
              'message'  => $@,
              'program'  => $0,
              'filename' => $bib_file
            }, $die_on_error_level->{'ERROR'} );
        };
        chomp $reference;
        $reference =~ s/\n/ /g;
        print $reference, "\n";
    } else {
        my $text = get_cif_description( $datablok );
        if( defined $text ) {
            print $text, "\n";
        } else {
            print "Bibliography ...\n";
        }
    }

    do {
        local $, = ' ';
        local $\ = "\n";

        print $formula;

        ## $a =~ s/\(.*\)//g;
        ## $b =~ s/\(.*\)//g;
        ## $c =~ s/\(.*\)//g;
        ## $alpha =~ s/\(.*\)//g;
        ## $beta  =~ s/\(.*\)//g;
        ## $gamma =~ s/\(.*\)//g;

        $a = '?' unless defined $a;
        $b = '?' unless defined $b;
        $c = '?' unless defined $c;
        $alpha = '?' unless defined $alpha;
        $beta  = '?' unless defined $beta;
        $gamma = '?' unless defined $gamma;

        $alpha =~ s/^([0-9]+)\.$/$1/;
        $beta  =~ s/^([0-9]+)\.$/$1/;
        $gamma =~ s/^([0-9]+)\.$/$1/;

        print $a, $b, $c, $alpha, $beta, $gamma;

        $spacegroup = '?' unless defined $spacegroup;
        $spacegroup =~ s/[\s()~_]//g;
        print $spacegroup;

        for my $i (0..$#{$x}) {
            $atom->[$i] =~ s/\s//g;
            print
                $atom->[$i],
                defined $multiplicity ? $multiplicity->[$i] : '?',
                defined $Wyckoff_sym ? $Wyckoff_sym->[$i] : '?',
                $x->[$i], $y->[$i], $z->[$i],
                defined $q ? $q->[$i] : '?',
                defined $U ? $U->[$i] : '?';
        }
    };

    $n ++;
}

sub get_cif_description
{
    my $datablok = $_[0];

    my $text;

    my $separator = ' ';

    for my $tag (qw(
                    _chemical_name_mineral
                    _chemical_name_common
                    _chemical_name_systematic
                    _publ_author_name
                    _publ_section_title
                    _journal_name_full
                    _journal_year
                    _journal_volume
                    _journal_issue
                    _journal_page_first
                    _journal_page_last
                )) {
        my $val = '';
        if( exists $datablok->{$tag} ) {
            $val = join( '; ', @{$datablok->{$tag}} );
            $val =~ s/^\s*|\s*$//g;
        }
        if( $tag eq '_journal_issue' && $val ne '' ) {
            $val = '(' . $val . ')';
        }
        if( defined $text ) {
            $text .= $separator . $val;
        } else {
            $text = $val;
        }
        if( $tag eq '_journal_year' || $tag eq '_journal_issue' ) {
            $separator = ' ';
        } elsif( $tag eq '_journal_volume' ) {
            $separator = '';
        } elsif( $tag eq '_chemical_name_common' ) {
            $separator = '|';
        }
    }
    if( defined $text ) {
        $text =~ s/\n/ /g;
        $text =~ s/\s+/ /g;
        $text =~ s/^\s*|\s*$//g;
        $text = cif2unicode( $text );
    }
    return $text;
}