File: cif_reformat_AMCSD_author_names

package info (click to toggle)
cod-tools 3.7.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 154,792 kB
  • sloc: perl: 57,588; sh: 36,842; ansic: 6,402; xml: 1,982; yacc: 1,117; makefile: 727; python: 166
file content (127 lines) | stat: -rwxr-xr-x 4,328 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: antanas $
#$Date: 2021-04-28 19:35:53 +0300 (Wed, 28 Apr 2021) $
#$Revision: 8738 $
#$URL: svn+ssh://www.crystallography.net/home/coder/svn-repositories/cod-tools/tags/v3.7.0/scripts/cif_reformat_AMCSD_author_names $
#------------------------------------------------------------------------------
#*
#* Parse a CIF file, reformat author names in AMCSD convention into COD format.
#*
#* USAGE:
#*    $0 --options input1.cif input*.cif
#**

use strict;
use warnings;
use COD::CIF::Parser qw( parse_cif );
use COD::CIF::Tags::DictTags;
use COD::CIF::Tags::COD;
use COD::CIF::Tags::CanonicalNames qw( canonicalize_all_names );
use COD::CIF::Tags::Print qw( print_cif );
use COD::SOptions qw( getOptions );
use COD::SUsage qw( usage options );
use COD::ErrorHandler qw( process_parser_messages );
use COD::ToolsVersion qw( get_version_string );

my $use_parser = 'c';

my $die_on_error_level = {
    ERROR   => 1,
    WARNING => 0,
    NOTE    => 0
};

my $keep_tag_order = 0;

#* OPTIONS:
#*   --keep-tag-order
#*                     Keep the original tag order in CIF file (default).
#*   --sort-tags
#*                     Reorder tags in CIF file according to COD.
#*
#*   --use-perl-parser
#*                     Use Perl parser to parse CIF files.
#*   --use-c-parser
#*                     Use C parser to parse CIF files (default).
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    "--keep-tag-order"  => sub { $keep_tag_order = 1; },
    "--sort-tags"       => sub { $keep_tag_order = 0; },
    "--use-perl-parser" => sub { $use_parser = "perl" },
    "--use-c-parser"    => sub { $use_parser = "c" },

    '--options'      => sub { options; exit },
    '--help,--usage' => sub { usage; exit },
    '--version'      => sub { print get_version_string(), "\n"; exit }
);

my @dictionary_tags = ( @COD::CIF::Tags::DictTags::tag_list,
                        @COD::CIF::Tags::COD::tag_list );
my %dictionary_tags = map { $_, $_ } @dictionary_tags;

@ARGV = ("-") unless @ARGV;

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

for my $filename (@ARGV) {
    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    my ( $data, $err_count, $messages ) = parse_cif( $filename, $options );
    process_parser_messages( $messages, $die_on_error_level );
    next if ( $err_count > 0 );

    canonicalize_all_names( $data );

    for my $dataset (@$data) {

        my $values = $dataset->{values};

        if( exists $values->{_publ_author_name} ) {
            for my $i (0..$#{$values->{_publ_author_name}}) {
                my $author = $values->{_publ_author_name}[$i];
                if( $author =~
                    /^
                     \s*((?:[A-Z][a-z]+)?[A-Z][a-z\']+)  # Surname
                     \s+((?:[A-Z](?:\s+|-))*[A-Z])\s*$   # Initials
                    /x ) {
                    my $surname = $1;
                    my $initials = $2;
                    $initials =~ s/([A-Z])/$1./g;
                    $values->{_publ_author_name}[$i] =
                        "$surname, $initials";
                }
                if( $author =~
                    /^
                     \s*((?:[A-Za-z][a-z]*\s+)*[A-Za-z]?[a-z]+)  # 'von' prefix
                     \s+((?:[A-Z][a-z]+)?[A-Z][a-z\']+) # Surname
                     \s+((?:[A-Z](?:\s+|-))*[A-Z])\s*$  # Initials
                    /x ) {
                    my $von_prefix = $1;
                    my $surname = $2;
                    my $initials = $3;
                    $initials =~ s/([A-Z])/$1./g;
                    $values->{_publ_author_name}[$i] =
                        "$von_prefix $surname, $initials";
                }
            }
        }

        print_cif( $dataset, {
            exclude_misspelled_tags => 0,
            preserve_loop_order => 1,
            fold_long_fields => 0,
            dictionary_tags => \%dictionary_tags,
            dictionary_tag_list => \@dictionary_tags,
            keep_tag_order => $keep_tag_order,
        } );
    }
}