File: cif_molecule

package info (click to toggle)
cod-tools 2.3%2Bdfsg-3
links: PTS, VCS
area: main
in suites: buster
size: 114,852 kB
sloc: perl: 53,336; sh: 23,842; ansic: 6,318; xml: 1,982; yacc: 1,112; makefile: 716; python: 158; sql: 73
file content (2218 lines) | stat: -rwxr-xr-x 85,006 bytes
#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: andrius $
#$Date: 2019-01-21 09:23:55 +0200 (Pr, 21 saus. 2019) $
#$Revision: 6648 $
#$URL: svn://www.crystallography.net/cod-tools/tags/v2.3/scripts/cif_molecule $
#------------------------------------------------------------------------------
#*
#* Restores molecules from a CIF file.
#*
#* USAGE:
#*    $0 --options input1.cif input*.cif
#**

# Note: this script assumes that atoms have unique labels in the input
# CIF file; most often these are labels given by the _atom_site_label
# tag. If the assumption of uniqueness does not hold, the script
# attempts by default to create unique labels itself, appending numeric
# prefixes to the duplicate labels.
#
# The uniqueness of the labels is assumed in checks for atoms at
# special positions, and most importantly in the code removing
# duplicate molecules.
#
# Although there is an option to switch off this diversification of
# labels, the algorithms employed in this script will most probably
# break and give incorrect results (e.g. some atoms, namely ones with
# duplicate labels, will be missing from the output). Thus, use option
# '--dont-uniquify-atoms' with caution.
#
# Atom identification.
# Atoms will be identified withing this program using three components:
#
# a) the original label, as found in the input CIF (the "site_label",
# taken from the _atom_site_label data item). This label must be
# unique; it it is not, it will be uniquified by adding a serial
# number upon reading in;
#
# b) a rotation operator (unity operator if no rotation is applied);
# upon any rotation or when atoms are read in, their fractional
# coordinates are truncated modulo 1, i.e. moved to the first octant
# [0..1)x[0..1)x[0..1).
#
# c) a translation vector from the first octant to the actual atom
# position; translation names will use IUCr convention shift +5 (555
# is 0,0,0 translation). For larger translations, ":" character
# separator will be used, e.g. 10:5:-11.
#
# These three components, concatenated with underscores ("_"), will be
# used as unique atom names (the "name" key in the $atom_info hash).

use strict;
use File::Basename qw( basename );
use Clone qw( clone );
use COD::Algebra qw( gcd );
use COD::Algebra::Vector qw( distance vector_sub );
use COD::AtomBricks qw( build_bricks get_atom_index get_search_span );
use COD::AtomNeighbours qw( get_max_covalent_radius make_neighbour_list );
use COD::AtomProperties;
use COD::CIF::Data qw( get_cell get_symmetry_operators );
use COD::CIF::Data::AtomList qw( atom_array_from_cif
                                 atom_groups
                                 atom_is_disordered
                                 atoms_are_alternative
                                 copy_atom
                                 datablock_from_atom_array
                                 generate_cod_molecule_data_block
                                 dump_atoms_as_cif );
use COD::CIF::Data::SymmetryGenerator qw( apply_shifts
                                          atoms_coincide
                                          chemical_formula_sum
                                          symop_apply
                                          symops_apply_modulo1
                                          test_bond
                                          test_bump
                                          translate_atom
                                          translation
                                          trim_polymer );
use COD::CIF::Parser qw( parse_cif );
use COD::CIF::Tags::CanonicalNames qw( canonicalize_all_names );
use COD::CIF::Tags::Manage qw( exclude_tag rename_tags set_loop_tag set_tag );
use COD::CIF::Tags::Merge qw( merge_datablocks );
use COD::CIF::Tags::Print qw( print_cif print_value
                              print_single_tag_and_value );
use COD::ErrorHandler qw( process_errors process_warnings
                          process_parser_messages report_message );
use COD::MorganFingerprints qw( make_morgan_fingerprint );
use COD::Spacegroups::Builder;
use COD::Spacegroups::Symop::Algebra qw( symop_mul symop_invert
                                         symop_is_unity symop_vector_mul
                                         symop_modulo_1 );
use COD::Spacegroups::Symop::Parse qw( symop_from_string
                                       string_from_symop
                                       symop_string_canonical_form
                                       modulo_1 );
use COD::SOptions qw( getOptions );
use COD::SUsage qw( usage options );
use COD::ToolsVersion;

no warnings 'recursion';

my $Id = '$Id: cif_molecule 6648 2019-01-21 07:23:55Z andrius $';

my $debug;
my $symdebug;
my $verbose = 0;
my $total_nbumps = 0;

my $sort_molecules = 1; # A flag indicating whether molecules should
                        # be sorted in the output (descending by atom
                        # number)

my $dump_atoms = 0;
my $format = "%8.6f";
my $continue_on_errors = 0;
my $covalent_sensitivity = 0.35;
my $audit = 1;
my $uniquify_atoms = 1;
my $exclude_zero_occupancies = 1; # Do not use atoms with zero occupancies
my $exclude_dummy_atoms = 1;      # Do not use atoms with the 'dum' calc flag

my $force_unit_occupancies = 0; # Forcibly set occupancies to 1.0.

# A fraction of covalent bond radii Used to determine when atoms are
# too close and are considered a bump:

my $bump_distance_factor = 0.75;

my $ignore_bumps = 0; # detect and warn about close atom "bumps"
                      # but do not stop processing.

# A span, in +/- unit cells, in which polymeric molecules (repeating
# units) will be constructed:

my $max_polymer_span = 4;

# A maximum allowed count of polymer example atoms: more than this
# amount of symmetry (translational) equivalent atoms, for each AU
# atom, will not be written to the output file:

my $max_polymer_atoms = 100;

my $cif_header_file; # Comments from the beginning of this file will be
                     # prepended to the output.

my $use_parser = "c"; # Used CIF parser

my $use_morgan_fingerprints = 0; # Use Morgan fingerprints to identify
                                 # duplicated moieties

my $use_atom_classes = 1; # Use COD AtomClassifier to sort atoms for
                          # generation of Morgan fingerprints

# Used for atom classification via AtomClassifier:
my $flat_planarity = 0.10;
my $classification_level = 3;
my $max_ring_size = 7; # maximum size of detected rings

my $use_one_output_datablock = 0; # Put all molecules, and all
                                  # disorder groups, into a single
                                  # data block in the output.

my $merge_disorder_groups = 0; # Put all alternative conformations
                               # into one data block.

my $preserve_stoichiometry = 0; # If true (1), apply symmetry
                                # operators from cosets of a point
                                # group in each molecule to all other
                                # molecules, to preserve molecular
                                # stoichiometry (charge balance,
                                # etc.).

my $largest_molecule_only = 0; # Output only the largest (having the
                               # greatest number of atoms) molecule.

my $output_geom_bond = 0; # Compute and output the _geom_bond_... data
                          # items (bond lengths, valencies, etc.)

my $expand_to_p1 = 0; # Do we want a full P1 unit cell that can be used
                      # to re-create the whole crystal using only the
                      # lattice translations?

my $die_on_errors   = 1;
my $die_on_warnings = 0;
my $die_on_notes    = 0;

#* OPTIONS:
#*   -1, --one-datablock-output
#*                     Output all moieties to a single output data block.
#*
#*                     However, if the --split-disorder-groups option is
#*                     enabled all generated alternative conformations will
#*                     be put into separate data blocks starting with the
#*                     most likely one (disorder group occupancy wise) and
#*                     ending with the least likely one. In order to retrieve
#*                     only the most likely one, the --largest-molecule-only
#*                     option should be used in combination with the
#*                     --one-datablock-output option.
#*
#*   -1-, --multiple-datablocks-output
#*                     Separate each molecule and each example of an alternative
#*                     conformation into a separate data block (default).
#*
#*   -c, --covalent-sensitivity
#*                     Set a new covalent sensitivity value (default 0.35).
#*
#*   -g, --geom-bond-output
#*                     Output _geom_bond_... data items (bond lengths,
#*                     valencies, etc.).
#*
#*   -g-, --no-geom-bond-output
#*                     Do not output _geom_bond_... information (default).
#*
#*   -h, --add-cif-header input_header.txt
#*                     Comments from the beginning of this file will be
#*                     prepended to the output.
#*
#*   -i, --ignore-bumps
#*                     Detect and warn about close atom "bumps" but do not
#*                     stop processing.
#*
#*   --dont-ignore-bumps, --no-ignore-bumps
#*                     Stop processing immediately if bumps are
#*                     detected (default).
#*
#*   -s, --sort-molecules
#*                     Sort molecules in descending order by their atom count
#*                     and overall occupancy before outputting them. Atom count
#*                     takes precedence over overall occupancy (default).
#*
#*   --dont-sort-molecules, --no-sort-molecules
#*                     Do not sort molecules, print them out in the order they
#*                     are detected.
#*
#*   --expand-to-P1, --P1-expand, --p1-expand
#*                     Expand all atoms to the P1 unit cell, so that the
#*                     translation operators can be be used to restore the
#*                     whole crystal.
#*
#*   --dont-expand-to-P1, --no-expand-to-P1
#*   --dont-P1-expand, --no-p1-expand
#*                     Do not expand to P1, output only the minimal molecule
#*                     list (default).
#*
#*   --uniquify-atoms
#*                     Makes unique the labels of atoms (default).
#*
#*   --no-uniquify-atoms, --dont-uniquify-atoms
#*                     Do not makes unique labels for atoms,
#*                     exclude duplicates.
#*
#*   --use-morgan-fingerprints
#*                     Use Morgan fingerprints to identify and skip
#*                     duplicated moieties.
#*
#*   --no-use-morgan-fingerprints, --dont-use-morgan-fingerprints
#*                     Use atom labels to identify and skip duplicated
#*                     moieties. This method is default, however under
#*                     certain circumstances it leaves duplicate moieties,
#*                     as asymmetric unit can initially contain more than
#*                     one copy of a single moiety (default).
#*
#*   --use-atom-classes
#*                     Use COD atom classes, generated by AtomClassifier
#*                     module from 'atomclasses' repository, for the
#*                     generation of Morgan fingerprints. Requires the
#*                     external AtomClassifier module (default).
#*
#*   --no-use-atom-classes, --dont-use-atom-classes
#*                     Use atom chemical types for generation of Morgan
#*                     fingerprints instead of COD atom classes.
#*
#*   --bump-distance-factor 0.75
#*                     A fraction of covalent bond radii sum used to
#*                     determine when atoms are too close and are
#*                     considered a bump (default 0.75).
#*
#*   --continue-on-errors
#*                     Do not stop if errors such as unrecognised atoms are
#*                     encountered; the output may be incorrect and missing
#*                     some atoms if this option is used!
#*
#*   --dont-continue-on-errors, --no-continue-on-errors
#*                     Stop immediately when an error is encountered.
#*
#*   --exclude-zero-occupancies
#*                     Do not use atoms with 0 occupancies in calculations
#*                     (default).
#*
#*   --dont-exclude-zero-occupancies, --no-exclude-zero-occupancies
#*                     Use atoms with 0 occupancies in calculations.
#*
#*   --exclude-dummy-atoms
#*                     Do not use dummy atoms (marked by the 'dum' calc flag)
#*                     in calculations (default).
#*
#*   --dont-exclude-dummy-atoms, --no-exclude-dummy-atoms
#*                     Use dummy atoms (marked by the 'dum' calc flag)
#*                     in calculations. Dummy atoms can be used to mark
#*                     interesting positions within the unit cell
#*                     (e.g. geometric centers of coordinated atom rings),
#*                     but they are not considered as part of the molecule.
#*                     As a result, the occupancies of all output dummy atoms
#*                     are set to '.'. It should also be noted that dummy atoms
#*                     with non-numeric coordinates will still be excluded.
#*
#*   --dont-continue-on-errors, --no-continue-on-errors
#*                     Stop immediately when an error is encountered.
#*
#*   --preserve-stoichiometry
#*                     Apply necessary symmetry operators to preserve molecular
#*                     stoichiometry (charges, etc.)
#*
#*   --dont-preserve-stoichiometry, --no-preserve-stoichiometry
#*                     Do not apply any more symmetry operators than needed to
#*                     reconstruct covalently connected networks; may
#*                     break stoichiometry of salts and complexes (default).
#*
#*   --force-unit-occupancies
#*                     Set occupancies of all output atoms to 1.0. Unit
#*                     occupancies are only set when outputting the atoms
#*                     and do not affect the flow of the algorithm
#*                     (disorder group processing, molecule sorting, etc.).
#*                     Dummy atoms are excluded from the effects of this option
#*                     and are always output with the '.' occupancy.
#*
#*                     Some programs, notably Jumbo converter's cif2cml,
#*                     assume unresolved disorder and do not recognize
#*                     aromatic rings if occupancies are not unities.
#*                     Obviously, this flag has only sense in combination
#*                     with --split-disorder-groups.
#*
#*   --dont-force-unit-occupancies, --do-not-force-unit-occupancies,
#*   --no-force-unit-occupancies
#*                     Leave occupancies as they are (default).
#*
#*   --dump-atoms
#*                     Dump atoms (including symmetry-equivalent) in CIF
#*                     format for inspection with some graphics program.
#*
#*   --dont-dump-atoms, --no-dump-atoms
#*                     Do not dump atoms (default).
#*
#*   --max-polymer-span 4
#*                     A span, in +/- unit cells, in which polymeric
#*                     molecules (repeating units) will be constructed.
#*
#*   --max-polymer-atoms 100
#*                     A maximum allowed count of polymer example atoms:
#*                     more than this amount of symmetry (translational)
#*                     equivalent atoms, for each AU atom, will not be
#*                     written to the output:
#*
#*                     Using --max-polymer-span=0 --max-polymer-atoms=1
#*                     essentially switches off the polymer detection.
#*
#*   --split-disorder-groups, --dont-merge-disorder-groups
#*                     Put examples of disorder group conformations into
#*                     separate data blocks (default).
#*
#*   --merge-disorder-groups, --dont-split-disorder-groups
#*                     Put all disorder groups into one data block.
#*
#*   --largest, --largest-molecule-only
#*                     Output only the largest molecule. The largest molecule
#*                     is selected based on two criteria in the given order:
#*                     atom count and overall occupancy of the molecule.
#*                     When the combination of the --one-datablock-output and
#*                     --split-disorder-groups options is in effect the
#*                     molecule with the most likely disorder conformation
#*                     (occupancy wise) is returned.
#*
#*                     NOTE: if there is more than one disorder assembly
#*                     and the --split-disorder-groups option is in effect,
#*                     the conformation with the highest atom count might not
#*                     be generated at all. In this case, a molecule that best
#*                     fits the previously defined criteria out of the generated
#*                     conformation subset will be returned.
#*
#*   --all, --all-molecules
#*                     Output all molecules (default).
#*
#*   --use-perl-parser
#*   --use-c-parser
#*                     Specify parser to parse CIF files. C parser is default.
#*
#*   --symdebug
#*                     Print debug output for symmetry reconstruction.
#*   --no-symdebug
#*                     Do not print any symmetry debug output (default).
#*   --debug
#*                     Print some human-readable debug output.
#*   --no-debug
#*                     Suppress any debug output (default).
#*
#*   --format "%8.6f"
#*                     Use the specified format for output coordinate printout.
#*
#*   --audit
#*                     Print audit information to the generated CIF file (default).
#*   --no-audit
#*                     Do not print audit information to the generated CIF file.
#*
#*   --verbose
#*                     Print warning messages in long format.
#*   --no-verbose
#*                     Print warning messages in concise format (default).
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    "-1,--one-datablock-output" => sub { $use_one_output_datablock = 1; },
    "-1-,--multiple-datablocks-output" =>
        sub { $use_one_output_datablock = 0; },

    "--expand-to-P1,--P1-expand,--p1-expand" => sub { $expand_to_p1 = 1 },
    "--no-expand-to-P1,--no-P1-expand,--no-p1-expand" =>
        sub { $expand_to_p1 = 0 },
    "--dont-expand-to-P1,--dont-P1-expand,--dont-p1-expand" =>
        sub { $expand_to_p1 = 0 },
    "--do-not-expand-to-P1,--do-not-P1-expand,--do-not-p1-expand" => 
        sub { $expand_to_p1 = 0 },

    "--uniquify-atoms"      => sub { $uniquify_atoms = 1; },
    "--no-uniquify-atoms"   => sub { $uniquify_atoms = 0; },
    "--dont-uniquify-atoms" => sub { $uniquify_atoms = 0; },

    "--use-morgan-fingerprints" =>
        sub { $use_morgan_fingerprints = 1 },
    "--no-use-morgan-fingerprints" =>
        sub { $use_morgan_fingerprints = 0 },
    "--dont-use-morgan-fingerprints" =>
        sub { $use_morgan_fingerprints = 0 },

    "--use-atom-classes" => sub { $use_atom_classes = 1 },
    "--no-use-atom-classes" => sub { $use_atom_classes = 0 },
    "--dont-use-atom-classes" => sub { $use_atom_classes = 0 },

    "-c,--covalent-sensitivity" => \$covalent_sensitivity,

    "-g,--geom-bond-output"     => sub { $output_geom_bond = 1 },
    "-g-,--no-geom-bond-output" => sub { $output_geom_bond = 0 },

    "-h,--add-cif-header" => \$cif_header_file,

    "-i,--ignore-bumps"   => sub{ $ignore_bumps = 1 },
    "--no-ignore-bumps"   => sub{ $ignore_bumps = 0 },
    "--dont-ignore-bumps" => sub{ $ignore_bumps = 0 },

    "-s,--sort-molecules"   => sub{ $sort_molecules = 1 },
    "--no-sort-molecules"   => sub{ $sort_molecules = 0 },
    "--dont-sort-molecules" => sub{ $sort_molecules = 0 },

    "--exclude-zero-occupancies"    => sub { $exclude_zero_occupancies = 1; },
    "--no-exclude-zero-occupancies" => sub { $exclude_zero_occupancies = 0; },
    "--dont-exclude-zero-occupancies" => sub { $exclude_zero_occupancies = 0; },

    "--exclude-dummy-atoms"    => sub { $exclude_dummy_atoms = 1; },
    "--no-exclude-dummy-atoms" => sub { $exclude_dummy_atoms = 0; },
    "--dont-exclude-dummy-atoms" => sub { $exclude_dummy_atoms = 0; },

    "--preserve-stoichiometry" => sub { $preserve_stoichiometry = 1 },
    "--dont-preserve-stoichiometry, --no-preserve-stoichiometry" =>
        sub { $preserve_stoichiometry = 0 },

    "--bump-distance-factor" => \$bump_distance_factor,

    "--max-polymer-span" => \$max_polymer_span,
    "--max-polymer-atoms" => \$max_polymer_atoms ,

    "--symdebug"    => sub { $symdebug = 1 },
    "--no-symdebug" => sub { $symdebug = 0 },

    "--debug"    => sub { $debug = 1 },
    "--no-debug" => sub { $debug = 0 },

    "--format" => \$format,

    "--force-unit-occupancies" => sub { $force_unit_occupancies = 1 },
    "--no-force-unit-occupancies" => sub { $force_unit_occupancies = 0 },
    "--dont-force-unit-occupancies" => sub { $force_unit_occupancies = 0 },
    "--do-not-force-unit-occupancies" => sub { $force_unit_occupancies = 0 },

    "--dump-atoms"      => sub{ $dump_atoms = 1 },
    "--dont-dump-atoms" => sub{ $dump_atoms = 0 },
    "--no-dump-atoms"   => sub{ $dump_atoms = 0 },

    "--split-disorder-groups,--dont-merge-disorder-groups," .
    "--do-not-merge-disorder-groups,--no-merge-disorder-groups"
        => sub { $merge_disorder_groups = 0 },
    "--merge-disorder-groups,--dont-split-disorder-groups" .
    "--do-not-split-disorder-groups,--no-split-disorder-groups"
        => sub { $merge_disorder_groups = 1 },

    "--largest,--largest-molecule-only"
        => sub { $largest_molecule_only = 1 },
    "--all,--all-molecules"
        => sub { $largest_molecule_only = 0 },

    "--always-continue"                 => sub { $die_on_errors   = 0;
                                                 $die_on_warnings = 0;
                                                 $die_on_notes    = 0 },
    "-c-,--always-die"                  => sub { $die_on_errors   = 1;
                                                 $die_on_warnings = 1;
                                                 $die_on_notes    = 1 },

    "--continue-on-errors"          => sub { $die_on_errors = 0 },
    "--dont-continue-on-errors"     => sub { $die_on_errors = 1 },
    "--die-on-errors"               => sub { $die_on_errors = 1 },
    "--no-continue-on-errors"       => sub { $die_on_errors = 1 },

    "--continue-on-warnings" => sub { $die_on_warnings = 0 },
    "--die-on-warnings"      => sub { $die_on_warnings = 1 },

    "--continue-on-notes"    => sub { $die_on_notes = 0 },
    "--die-on-notes"         => sub { $die_on_notes = 1 },

    "--use-perl-parser"       => sub{ $use_parser = "perl" },
    "--use-c-parser"          => sub{ $use_parser = "c" },

    "--audit"                   => sub { $audit = 1; },
    "--no-audit"                => sub { $audit = 0; },

    "--verbose"                 => sub { $verbose = 1; },
    "--no-verbose"              => sub { $verbose = 0; },
    "--options"                 => sub { options; exit },
    "--help,--usage"            => sub { usage; exit },
    '--version'                 => sub { print 'cod-tools version ',
                                         $COD::ToolsVersion::Version, "\n";
                                         exit },

# The following options are left only for compatibility with historic
# version of the script:

# The '--remove-duplicate-molecules' is no longer necessary since the
# new algorithm (after changing order of molecule generation and
# disorder group representative generation) never produces duplicate
# molecules:

    "--remove-duplicate-molecules"      => sub { },
    "--no-remove-duplicate-molecules"   => sub { },
    "--dont-remove-duplicate-molecules" => sub { },
);

my $die_on_error_level = {
    ERROR   => $die_on_errors,
    WARNING => $die_on_warnings,
    NOTE    => $die_on_notes
};

# Covalent raddi taken from Kitaigorodskij 1955, "Organicheskaja
# kristallochimija", p. 11.

#==============================================================================#
my %atom_radii = (
    "C" => [
        # bond order name, bond order, covalent radius in Ångstrøms:
        [ "single",         1.0, 0.77 ],
        [ "one-and-a-half", 1.5, 0.70 ],
        [ "double",         2.0, 0.67 ],
        [ "triple",         3.0, 0.60 ],
    ],
    "Si" => [
        [ "single", 1.0, 1.17 ],
        [ "double", 2.0, 1.07 ],
        [ "triple", 3.0, 1.00 ],
    ],
    "Ge" => [
        [ "single", 1.0, 1.17 ],
        [ "double", 2.0, 1.07 ],
        [ "triple", 3.0, 1.00 ],
    ],
    "Sn" => [
        [ "single", 1.0, 1.22 ],
        [ "double", 2.0, 1.20 ],
    ],
    "O" => [
        [ "single", 1.0, 0.66 ],
        [ "double", 2.0, 0.55 ],
    ],
    "S" => [
        [ "single", 1.0, 1.04 ],
        [ "double", 2.0, 0.94 ],
    ],
    "Se" => [
        [ "single", 1.0, 1.17 ],
        [ "double", 2.0, 1.07 ],
    ],
    "Te" => [
        [ "single", 1.0, 1.37 ],
        [ "double", 2.0, 1.27 ],
    ],
    "B" => [
        [ "single", 1.0, 0.88 ],
        [ "double", 2.0, 0.76 ],
        [ "triple", 3.0, 0.68 ],
    ],
    "N" => [
        [ "single", 1.0, 0.70 ],
        [ "double", 2.0, 0.60 ],
        [ "triple", 3.0, 0.55 ],
    ],
    "P" => [
        [ "single", 1.0, 1.10 ],
        [ "double", 2.0, 1.00 ],
        [ "triple", 3.0, 0.93 ],
    ],
    "As" => [
        [ "single", 1.0, 1.21 ],
        [ "double", 2.0, 1.11 ],
    ],
    "Sb" => [
        [ "single", 1.0, 1.41 ],
        [ "double", 2.0, 1.31 ],
    ],
    "H" => [
        [ "single", 1.0, 0.30 ],
    ],
    "F" => [
        [ "single", 1.0, 0.64 ],
    ],
    "Cl" => [
        [ "single", 1.0, 1.00 ],
    ],
    "Br" => [
        [ "single", 1.0, 1.14 ],
    ],
    "I" => [
        [ "single", 1.0, 1.33 ],
    ],
    "Hg" => [
        [ "single", 1.0, 1.50 ],
    ],
);

#==============================================================================#
# Forward subroutine definitions:

sub symgen_atom( $$ );
sub symgen_all_atoms( $$ );
sub find_molecules( $$$$$$ );
sub find_molecule( $$$$$$$$$ );

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

my $cif_header;
eval {
    if( $cif_header_file ) {
        open( my $header, '<',"$cif_header_file" ) or die "ERROR, "
          . "could not open header file for input -- ". lcfirst($!) . "\n";

        $cif_header = "";
        while( <$header> ) {
            last unless /^#/;
            $cif_header .= $_;
        };

        close( $header ) or die "ERROR, "
           . "error while closing header file after reading -- "
           . lcfirst($!) . "\n";

        # The header must not contain CIF 2.0 magic code. For CIF 2.0
        # files the magic code is printed explicitly before the header.
        $cif_header =~ s/^#\\#CIF_2\.0[ \t]*\n//;
    }
};
if ($@) {
    process_errors( {
      'message'       => $@,
      'program'       => $0,
      'filename'      => $cif_header_file,
    }, $die_on_errors )
};

@ARGV = ("-") unless @ARGV;

for my $filename (@ARGV) {

    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    my ( $data, $err_count, $messages ) = parse_cif( $filename, $options );
    process_parser_messages( $messages, $die_on_error_level );

    # Is this line necessary?
    # next if ( $err_count > 0 );

    if( !ref $data ||
        !@$data || !defined $data->[0] || !defined $data->[0]{name} ) {
        report_message( {
                'filename'  => $filename,
                'program'   => $0,
                'err_level' => 'WARNING',
                'message'   => 'file seems to be empty'
            }, $die_on_warnings );
        next;
    }

    canonicalize_all_names( $data );

    if( $cif_header ) {
        # Ensure that for CIF v2.0 the magic code comes
        # before the CIF comment header:
        if( grep { exists $_->{cifversion} &&
                          $_->{cifversion}{major} == 2 } @$data ) {
            print "#\\#CIF_2.0\n";
        }
        print $cif_header;
    }

    for my $dataset (@$data) {

        my $dataname = 'data_' . $dataset->{name};

        local $SIG{__WARN__} =  sub { process_warnings( {
                                       'message'       => @_,
                                       'program'       => $0,
                                       'filename'      => $filename,
                                       'add_pos'       => $dataname
                                     }, {
                                       WARNING => $die_on_warnings,
                                       NOTE    => $die_on_notes,
                                     } ) };

        my $values = $dataset->{values};
        my $sym_data;
        eval {
            # Extracts symmetry operators.
            # Raises warnings upon unrecognised symmetry information.
            # Raises die if unable to find symmetry information.
            $sym_data = get_symmetry_operators( $dataset );

            my $unity_operator_found = 0;
            for my $symop (@$sym_data) {
                if( symop_is_unity( symop_from_string( $symop ) ) ) {
                    $unity_operator_found = 1;
                    last;
                }
            }
            if( !$unity_operator_found ) {
                warn "WARNING, unity symmetry operator ('x,y,z') is not "
                   . "found in the symmetry operator list -- results may "
                   . "be incorrect\n";
            }
        };
        if ( $@ ) {
            process_errors( {
              'message'       => $@,
              'program'       => $0,
              'filename'      => $filename,
              'add_pos'       => $dataname
            }, $die_on_errors )
        }
        next if !defined $sym_data || !@{$sym_data};

        my $unique_molecules;
        eval {
            $unique_molecules = get_molecules( $covalent_sensitivity,
                                               $sym_data,
                                               $dataset,
                                               \%COD::AtomProperties::atoms,
                                               $uniquify_atoms );
        };
        if ( $@ ) {
            process_errors( {
              'message'       => $@,
              'program'       => $0,
              'filename'      => $filename,
              'add_pos'       => $dataname
            }, $die_on_errors )
        }
        next if !defined $unique_molecules || !@{$unique_molecules};

        eval {
            if( $preserve_stoichiometry ) {
                my $molecular_symmetry = COD::Spacegroups::Builder->new;

                foreach my $molecule (@$unique_molecules) {
                # Build molecule point group here...
                    my $sg = COD::Spacegroups::Builder->new;
                    my %original_atoms = ();
                    for my $atom (@{$molecule->{atoms}}) {
                        my $atom_label = $atom->{site_label};
                        if( exists $atom->{site_symops} ) {
                            $sg->insert_symops( $atom->{site_symops} );
                        }
                        if( !exists $original_atoms{$atom_label} ) {
                            $original_atoms{$atom_label} = $atom;
                        } else {
                            my $symop1 = $original_atoms{$atom_label}{symop};
                            my $inverted_symop1 = symop_invert( $symop1 );
                            $sg->insert_symop( symop_mul( $atom->{symop},
                                                          $inverted_symop1 ));
                        }
                    }
                    if( $symdebug ) {
                        print "Molecule symmetry for molecule "
                            . "'$molecule->{chemical_formula_sum}':\n";
                        $sg->print();
                    }
                    $molecule->{symmetry} = $sg;
                    $molecular_symmetry->insert_symops( $sg->all_symops_ref() );
                }
                if( $symdebug ) {
                    print "Molecule cluster symmetry:\n";
                    $molecular_symmetry->print();
                }

                my @stoichiometric_molecules;
                foreach my $molecule (@$unique_molecules) {
                    use COD::Spacegroups::Cosets qw( find_left_cosets
                                                     canonical_string_from_symop );
                    my @cosets = find_left_cosets(
                        $molecular_symmetry->all_symops_ref(),
                        $molecule->{symmetry}->all_symops_ref()
                    );
                    if( $symdebug ) {
                        use COD::Serialise qw( serialiseRef );
                        print "Molecule '$molecule->{chemical_formula_sum}':\n";
                        serialiseRef( $molecule );
                        print "Cosets for '$molecule->{chemical_formula_sum}':\n";
                        serialiseRef( \@cosets );
                    }
                    push( @stoichiometric_molecules, $molecule );
                    for my $coset (@cosets[1..$#cosets]) {
                        ## use COD::Serialise qw( serialiseRef ); serialiseRef( [@cosets[1..$#cosets]] );
                        my $symop = $coset->[0];
                        my $symop_key = canonical_string_from_symop( $symop );
                        my %additional_molecule = (
                            atoms =>
                                symop_apply_to_atoms( $molecule->{atoms},
                                                      $symop ),
                            chemical_formula_sum => 
                                $molecule->{chemical_formula_sum},
                            is_polymer => $molecule->{is_polymer},
                            polymer_dimension => $molecule->{polymer_dimension},
                        );
                        push( @stoichiometric_molecules,
                              \%additional_molecule );
                        ## use COD::Serialise; serialiseRef( \%additional_molecule );
                    }
                }

                # Find molecular Z value:
                my %molecules;
                for my $molecule (@stoichiometric_molecules) {
                    my $molecule_key;
                    if( $use_morgan_fingerprints ) {
                        my $neighbours =
                            make_neighbour_list(
                                $molecule->{atoms},
                                $covalent_sensitivity,
                                $bump_distance_factor,
                                \%COD::AtomProperties::atoms,
                                1 );
                        $molecule_key =
                            make_morgan_fingerprint(
                                $neighbours,
                                $use_atom_classes,
                                $classification_level,
                                $max_ring_size,
                                $flat_planarity );
                    } else {
                        $molecule_key =
                            join( "\0", sort map {$_->{site_label}}
                                  @{$molecule->{atoms}} );
                    }
                    push( @{$molecules{$molecule_key}}, $molecule );
                }
                my $Z = gcd( map { int(@$_) } values %molecules );

                ## print STDERR ">>> Z = $Z\n";

                # Simplify molecular formula:

                if( $Z > 1 ) {
                    @stoichiometric_molecules = ();
                    for my $molecule_key (keys %molecules) {
                        my $N = int(@{$molecules{$molecule_key}});
                        for my $i (0 .. $N/$Z - 1) {
                            push( @stoichiometric_molecules,
                                  $molecules{$molecule_key}[$i] );
                        }
                    }
                }

                $unique_molecules = \@stoichiometric_molecules;
            } # Preserve stoichiometry

            my $Z = 1;
            if( $use_one_output_datablock ) {
                my @all_atoms = map { @{$_->{atoms}} } @$unique_molecules;
                if( @all_atoms > 0 ) {
                    # Find molecular Z value, once more:
                    my %moieties;
                    for my $moiety (@$unique_molecules) {
                        my $moiety_key;
                        if( $use_morgan_fingerprints ) {
                            my $neighbours =
                                make_neighbour_list(
                                    $moiety->{atoms},
                                    $covalent_sensitivity,
                                    $bump_distance_factor,
                                    \%COD::AtomProperties::atoms,
                                    1 );
                            $moiety_key =
                                make_morgan_fingerprint(
                                    $neighbours,
                                    $use_atom_classes,
                                    $classification_level,
                                    $max_ring_size,
                                    $flat_planarity );
                        } else {
                            $moiety_key =
                                join( "\0", sort map {$_->{site_label}}
                                      @{$moiety->{atoms}} );
                        }
                        push( @{$moieties{$moiety_key}}, $moiety );
                    }

                    $Z = gcd( map { int(@$_) } values %moieties );
                }
            }

            # Trim polymers
            for my $moiety (@$unique_molecules) {
                next if !$moiety->{is_polymer};
                $moiety->{atoms} = trim_polymer( $moiety->{atoms},
                                                 $max_polymer_span );
            }

            # Merge all molecules to one if requested
            if( $use_one_output_datablock ) {
                my @all_atoms = map { @{$_->{atoms}} } @$unique_molecules;
                if( @all_atoms > 0 ) {
                    my $max_polymer_dimension;
                    for my $moiety (@$unique_molecules) {
                        next if !$moiety->{polymer_dimension};
                        if( !$max_polymer_dimension ||
                             $max_polymer_dimension < $moiety->{polymer_dimension} ) {
                             $max_polymer_dimension = $moiety->{polymer_dimension};
                        }
                    }
                    $unique_molecules = [{
                        atoms =>
                            \@all_atoms,
                        chemical_formula_sum =>
                            chemical_formula_sum( \@all_atoms, $Z ),
                        is_polymer => ((grep { $_->{is_polymer} == 1 }
                                               @$unique_molecules) > 0),
                        polymer_dimension => $max_polymer_dimension,
                    }];
                }
            }

            ## use COD::Serialise qw( serialiseRef ); serialiseRef( $unique_molecules );
            # Split init atoms into assemblies and groups, if requested
            if( !$merge_disorder_groups ) {
                my @split_molecules;
                my $n = 1;
                for my $molecule (@$unique_molecules) {
                    ## print ">>> molecule No. ", $n++, "\n";
                    my $atom_list = $molecule->{atoms};
                    my $disorder_groups = atom_groups($atom_list);
                    ## print ">>> ngroups = ", int(@$disorder_groups), "\n";
                    ## use COD::Serialise qw( serialiseRef ); serialiseRef( $disorder_groups );
                    for my $disorder_representative (@$disorder_groups) {
                        push( @split_molecules,
                              {
                                  atoms =>
                                      $disorder_representative,
                                  chemical_formula_sum =>
                                      chemical_formula_sum
                                          ( $disorder_representative, $Z ),
                                  is_polymer => $molecule->{is_polymer},
                                  polymer_dimension =>
                                      $molecule->{polymer_dimension},
                              }
                        );
                    }
                }
                $unique_molecules = \@split_molecules;
            }

            # There is no need to sort the molecules if the single data block
            # output is required since:
            # a) there is only one molecule (no disorder);
            # b) there is several disorder configurations, but the
            #    best one (occupancy wise) is already at the beginning
            #    of the array
            if( !$use_one_output_datablock &&
                ( $sort_molecules || $largest_molecule_only ) ) {
                my @molecule_sum_occupancy;
                for (my $i = 0; $i < @{$unique_molecules}; $i++ ) {
                    $molecule_sum_occupancy[$i] = 0;
                    my $atoms = $unique_molecules->[$i]{'atoms'};
                    next if ( !defined $atoms->[0]{'atom_site_occupancy'} );
                    for my $atom (@{$atoms}) {
                        my $occupancy = (
                                      $atom->{'atom_site_occupancy'} eq '.' ||
                                      $atom->{'atom_site_occupancy'} eq '?' )
                                      ? 0 : $atom->{'atom_site_occupancy'};
                        $occupancy =~ s/[(]\d+[)]$//; # remove precision
                        $molecule_sum_occupancy[$i] += $occupancy;
                    }
                };

                my @sorted_indexes = sort {
                    @{$unique_molecules->[$b]{atoms}} <=>
                    @{$unique_molecules->[$a]{atoms} ||
                    $molecule_sum_occupancy[$b] <=>
                    $molecule_sum_occupancy[$a] }
                } 0..$#$unique_molecules;
                @{$unique_molecules} = @{$unique_molecules}[@sorted_indexes];
            }

            my $molecule_id = 0;
            my $dataset_name = $dataset->{name};
            foreach my $molecule (@$unique_molecules) {
                my $id;
                unless( ($use_one_output_datablock &&
                         $merge_disorder_groups) ||
                         $largest_molecule_only ) {
                    $id = $molecule_id;
                } else {
                    $id = undef;
                }

                if( $output_geom_bond ) {
                    $molecule->{bonds} = atom_bonds( $molecule->{atoms},
                                                     \%COD::AtomProperties::atoms,
                                                     $covalent_sensitivity );
                }

                print_molecule( $id, $audit, $molecule, $Id,
                                $dataset, $dataset_name, $filename,
                                $sym_data, $Z );

                last if $largest_molecule_only;

                $molecule_id++;
            }
        }; # eval block end
        if ( $@ ) {
            process_errors( {
              'message'       => $@,
              'program'       => $0,
              'filename'      => $filename,
              'add_pos'       => $dataname
            }, $die_on_errors )
        }
    }
}

#==============================================================================#
# This is the main function where other functions such as find_molecules are
# called.
# Accepts
#     covalent_sensitivity - a threshold for covalent sensitivity
#     filename             - CIF file name
#     sym_data             - symmetric data from the CIF file
#     atom_site_tag        - atom site label or atom site type symbol from the
#                            CIF file
#     values               - a hash where a data from the CIF file is stored
#
# Returns
#     unique_molecules     - an array of hashes
#                     %molecule = (
#                         atoms=>[\%atom_info1, \%atom_info2], #covalent bond
#                         chemical_formula_sum=>"C6 H6",
#                                 );

sub get_molecules
{
    my $covalent_sensitivity = shift;
    my $sym_data             = shift;
    my $dataset              = shift;
    my $atom_properties      = shift;
    my $uniquify_atoms       = shift;

    my $values = $dataset->{values};

    # Parse symmetry operators:
    my @sym_operators = map { symop_from_string($_) } @{$sym_data};

    # Create a list of symmetry operators:
    my $symop_list = { symops => [ map { symop_from_string($_) } @$sym_data ],
                       symop_ids => {} };
    for (my $i = 0; $i < @{$sym_data}; $i++)
    {
        $symop_list->{symop_ids}
                     {symop_string_canonical_form($sym_data->[$i])} = $i;
    }

    my $cif_atom_list_options = {
        uniquify_atom_names => 1,
        uniquify_atoms => $uniquify_atoms,
        exclude_dummy_atoms => $exclude_dummy_atoms,
        exclude_dummy_coordinates => 1,
        exclude_unknown_coordinates => 1,
        symop_list => $symop_list,
        modulo_1 => 1,
        atom_properties => $atom_properties,
        continue_on_errors => !$die_on_errors
    };

    # Extract atoms fract coordinates
    my $atom_list = atom_array_from_cif( $dataset, $cif_atom_list_options );
    return [] unless defined $atom_list;

    # atoms with zero occupancies are not initially filtered in the
    # 'atom_array_from_cif' subroutine due to some dummy atoms
    # potentially containing zero or equivalent ('.', '?') occupancies
    if ( $exclude_zero_occupancies ) {
        my @filtered_atom_list;
        for my $atom ( @$atom_list ) {
            my $has_zero_occupancy = 0;
            if ( exists $atom->{'atom_site_occupancy'} ) {
                if ( $atom->{'atom_site_occupancy'} eq '?' ||
                     $atom->{'atom_site_occupancy'} eq '.' ) {
                    $has_zero_occupancy = 1;
                } else {
                    my $occupancy = $atom->{'atom_site_occupancy'};
                    $occupancy =~ s/[(]\d+[)]$//; # remove precision
                    if ( $occupancy == 0.0 ) {
                        $has_zero_occupancy = 1;
                    }
                }
            }

            next if ( $has_zero_occupancy &&
                      ( !exists $atom->{'calc_flag'} ||
                        $atom->{'calc_flag'} ne 'dum' ) );

            push @filtered_atom_list, $atom;
        }
        $atom_list = \@filtered_atom_list;
    }

    if( !@$atom_list ) {
        warn "WARNING, no atoms suitable for processing were found -- "
           . "maybe all occupancies were unknown, zero, or "
           . "all atom types were unrecognised\n";
            return [];
    }

    my $max_covalent_radius = get_max_covalent_radius( $atom_properties );

    my @unique_molecules;
    my %seen_molecules;

    my $unit_cell_atoms = symgen_all_atoms( $atom_list, \@sym_operators );

    my $symmetric_atoms = apply_shifts( $unit_cell_atoms );

    my @initial_atoms;
    if( $expand_to_p1 ) {
        @initial_atoms = @$unit_cell_atoms;
    } else {
        my %atom_list_names = map { $_->{name} => 1 } @$atom_list;
        foreach my $symmetric_atom ( @$symmetric_atoms ) {
            push( @initial_atoms, $symmetric_atom )
                if exists $atom_list_names{$symmetric_atom->{name}};
        }
    }

    if( $dump_atoms ) {
        dump_atoms_as_cif( 1, \@initial_atoms,
                           [ get_cell( $values ) ] );
    } else {

        my $bricks = build_bricks( $symmetric_atoms,
                                   $max_covalent_radius * 2 +
                                   $covalent_sensitivity );

        # Finds molecules
        my @current_ordered_molecules = find_molecules( $covalent_sensitivity,
                                                        $atom_properties,
                                                        $symmetric_atoms,
                                                        \@initial_atoms,
                                                        $bricks,
                                                        \%seen_molecules );

        push( @unique_molecules, @current_ordered_molecules );
    }

    # Calculates chemical formula sum
    foreach my $molecule (@unique_molecules) {
        $molecule->{chemical_formula_sum} =
            chemical_formula_sum( $molecule->{atoms} );
    }

    return \@unique_molecules;
}

#===============================================================#
# Applies symmetry operator to all atoms in a given list.
#
# The symop_apply_to_atoms subroutine accepts a reference to an array
# of hash references:
#
# $atom_list = [
#                 {
#                    site_label=>"C1",
#                    name=>"C1_2",
#                    chemical_type=>"C",
#                    coordinates_fract=>[1.0, 1.0, 1.0],
#                    unity_matrix_applied=>1
#                 }, # $atom_info hash
#                 $atom2_info, 
#                 $atom3_info, 
#                 $atom4_info
#              ]
#
# and a reference to an array - symmetry operator:
#
# my $symop = [
#     [ r11 r12 r13 t1 ]
#     [ r21 r22 r23 t1 ]
#     [ r31 r32 r33 t1 ]
#     [   0   0   0  1 ]
# ],
#
# Returns an list of the above-mentioned atom_info hashes.

sub symop_apply_to_atoms
{
    my($atom_list, $symop) = @_;

    my @sym_atoms = ();
    for my $atom (@$atom_list) {
        push( @sym_atoms,
            symop_apply( $atom, $symop,
                         { append_symop_to_label => $expand_to_p1 } ) );
    }

    return \@sym_atoms;
}

#===============================================================#
# Generate symmetry equivalents of an atom, exclude duplicates
# on special positions

sub symgen_atom($$)
{
    my ( $atom, $sym_operators ) = @_;

    my( $sym_atoms ) = symops_apply_modulo1( $atom, $sym_operators,
                                             { append_symop_to_label =>
                                               $expand_to_p1 } );

    if( $sym_atoms && 
        ( !@{$sym_atoms} ||
          $sym_atoms->[0]{multiplicity_ratio} == 1 )) {
        return @$sym_atoms;
    } else {
        my @unique_atoms;
        my %to_be_deleted;
        for my $i (0..$#$sym_atoms-1) {
            for my $j ($i+1..$#$sym_atoms) {
                if( atoms_coincide( $sym_atoms->[$i],
                                    $sym_atoms->[$j],
                                    $sym_atoms->[$i]{f2o} )) {
                    $to_be_deleted{$sym_atoms->[$j]{name}} = 1;
                }
            }
        }
        for my $atom (@$sym_atoms) {
            if( !defined $to_be_deleted{$atom->{name}} ) {
                push( @unique_atoms, $atom );
            }
        }
        return @unique_atoms;
    }
}

#===============================================================#
# Generate symmetry equivalents of all atoms from a list, exclude
# duplicates on special positions. Check the multiplicity values
# provided in the original file.

sub symgen_all_atoms($$)
{
    my ( $atoms, $sym_operators ) = @_;

    my @sym_atoms = ();

    for my $atom (@{$atoms}) {
        push( @sym_atoms, symgen_atom( $atom, $sym_operators ) );
    }

    my $nr_multiplicity_ratios_found = 0;

    for my $atom (@{$atoms}) {
        my $multiplicity = $atom->{multiplicity};
        my $multiplicity_ratio = $atom->{multiplicity_ratio};

        if( exists $atom->{_atom_site_symmetry_multiplicity} &&
            $atom->{_atom_site_symmetry_multiplicity} ne '?' &&
            $atom->{_atom_site_symmetry_multiplicity} ne '.' &&
            $atom->{_atom_site_symmetry_multiplicity} !=
            $multiplicity ) {
            if( $atom->{_atom_site_symmetry_multiplicity} ==
                $multiplicity_ratio ) {
                $nr_multiplicity_ratios_found++;
            } else {
                warn "WARNING, given multiplicity value "
                   . "'$atom->{_atom_site_symmetry_multiplicity}' "
                   . "for atom $atom->{name} is different from "
                   . "calculated value '$multiplicity' -- "
                   . "taking calculated value\n";
            }
        }
    }

    if( $nr_multiplicity_ratios_found > 0 ) {
        warn "WARNING, multiplicity ratios are given instead of "
           . "multiplicities for $nr_multiplicity_ratios_found atoms -- "
           . "taking calculated values\n";
    }

    return \@sym_atoms;
}

#===============================================================#
# Prints molecule to the CIF file.

# Accepts a hash
# %molecule = (
#               atoms=>[\%atom_info1, \%atom_info2], #covalent bond
#               chemical_formula_sum=>"\\'C6 H6\\'",
#             );

sub print_molecule
{
    my( $molecule_id, $audit, $molecule, $Id, $dataset, $dataset_name,
        $filename, $sym_data, $Z ) = @_;

    my $new_dataset = clone( $dataset );

    $new_dataset->{name} = $dataset_name;
    if( defined $molecule_id ) {
        $new_dataset->{name} .= "_molecule_" . $molecule_id;
    }

    my @data2copy = qw(
    _publ_author_name
    _publ_section_title
    _journal_issue
    _journal_name_full
    _journal_page_first
    _journal_page_last
    _journal_volume
    _journal_year

    _cell_length_a
    _cell_length_b
    _cell_length_c
    _cell_angle_alpha
    _cell_angle_beta
    _cell_angle_gamma

    _cell_measurement_pressure
    _cell_measurement.pressure
    _cell_measurement.pressure_esd
    _cell_measurement_pressure_gPa
    _cell_measurement_radiation
    _cell_measurement.radiation
    _cell_measurement.temp
    _cell_measurement_temperature
    _cell_measurement_temperature_C
    _cell_measurement.temp_esd
    _cell_measurement_wavelength
    _cell_measurement.wavelength
    _cell_measurement_wavelength_nm
    _cell_measurement_wavelength_pm

    _diffrn_ambient_environment
    _diffrn.ambient_environment
    _diffrn_ambient_pressure
    _diffrn.ambient_pressure
    _diffrn.ambient_pressure_esd
    _diffrn_ambient_pressure_gPa
    _diffrn_ambient_pressure_gt
    _diffrn.ambient_pressure_gt
    _diffrn_ambient_pressure_lt
    _diffrn.ambient_pressure_lt
    _diffrn.ambient_temp
    _diffrn.ambient_temp_details
    _diffrn_ambient_temperature
    _diffrn_ambient_temperature_C
    _diffrn_ambient_temperature_gt
    _diffrn_ambient_temperature_lt
    _diffrn.ambient_temp_esd
    _diffrn.ambient_temp_gt
    _diffrn.ambient_temp_lt

    _diffrn_radiation_collimation
    _diffrn_radiation.collimation
    _diffrn_radiation_detector
    _diffrn_radiation_detector_dtime
    _diffrn_radiation.diffrn_id
    _diffrn_radiation.div_x_source
    _diffrn_radiation.div_x_y_source
    _diffrn_radiation.div_y_source
    _diffrn_radiation_filter_edge
    _diffrn_radiation.filter_edge
    _diffrn_radiation_filter_edge_nm
    _diffrn_radiation_filter_edge_pm
    _diffrn_radiation_inhomogeneity
    _diffrn_radiation.inhomogeneity
    _diffrn_radiation_monochromator
    _diffrn_radiation.monochromator
    _diffrn_radiation_polarisn_norm
    _diffrn_radiation.polarisn_norm
    _diffrn_radiation_polarisn_ratio
    _diffrn_radiation.polarisn_ratio
    _diffrn_radiation.polarizn_source_norm
    _diffrn_radiation.polarizn_source_ratio
    _diffrn_radiation_probe
    _diffrn_radiation.probe
    _diffrn_radiation_source
    _diffrn_radiation_type
    _diffrn_radiation.type
    _diffrn_radiation_wavelength
    _diffrn_radiation_wavelength_id
    _diffrn_radiation_wavelength.id
    _diffrn_radiation.wavelength_id
    _diffrn_radiation_wavelength_nm
    _diffrn_radiation_wavelength_pm
    _diffrn_radiation_wavelength.wavelength
    _diffrn_radiation_wavelength_wt
    _diffrn_radiation_wavelength.wt
    _diffrn_radiation_xray_symbol
    _diffrn_radiation.xray_symbol

    _diffrn_reflns_theta_full
    _diffrn_reflns_resolution_full
    _diffrn_reflns_theta_max
    _diffrn_reflns_resolution_max
    _reflns_d_resolution_high
    _reflns.d_resolution_high
    _reflns_d_resolution_high_nm
    _reflns_d_resolution_high_pm
    _reflns_d_resolution_low
    _reflns.d_resolution_low
    _reflns_d_resolution_low_nm
    _reflns_d_resolution_low_pm
    _diffrn_reflns_limit_h_max
    _diffrn_reflns.limit_h_max
    _diffrn_reflns_limit_h_min
    _diffrn_reflns.limit_h_min
    _diffrn_reflns_limit_k_max
    _diffrn_reflns.limit_k_max
    _diffrn_reflns_limit_k_min
    _diffrn_reflns.limit_k_min
    _diffrn_reflns_limit_l_max
    _diffrn_reflns.limit_l_max
    _diffrn_reflns_limit_l_min
    _diffrn_reflns.limit_l_min

    _cod_duplicate_entry
    _[local]_cod_duplicate_entry
);

    my @data2rename = qw(
    _chemical_formula_analytical
    _chemical_formula.analytical
    _chemical_formula.entry_id
    _chemical_formula_iupac
    _chemical_formula.iupac
    _chemical_formula_moiety
    _chemical_formula.moiety
    _chemical_formula_structural
    _chemical_formula.structural
    _chemical_formula_sum
    _chemical_formula.sum
    _pd_proc_ls_prof_R_factor
    _pd_proc_ls_prof_wR_factor
    _refine_hist.R_factor_all
    _refine_hist.R_factor_obs
    _refine_hist.R_factor_R_free
    _refine_hist.R_factor_R_work
    _refine_ls_class_R_factor_all
    _refine_ls_class.R_factor_all
    _refine_ls_class_R_factor_gt
    _refine_ls_class.R_factor_gt
    _refine_ls_class_wR_factor_all
    _refine_ls_class.wR_factor_all
    _refine_ls_R_factor_all
    _refine.ls_R_factor_all
    _refine_ls_R_factor_gt
    _refine.ls_R_factor_gt
    _refine_ls_R_factor_obs
    _refine.ls_R_factor_obs
    _refine.ls_R_factor_R_free
    _refine.ls_R_factor_R_free_error
    _refine.ls_R_factor_R_free_error_details
    _refine.ls_R_factor_R_work
    _refine_ls_shell.R_factor_all
    _refine_ls_shell.R_factor_obs
    _refine_ls_shell.R_factor_R_free
    _refine_ls_shell.R_factor_R_free_error
    _refine_ls_shell.R_factor_R_work
    _refine_ls_shell.wR_factor_all
    _refine_ls_shell.wR_factor_obs
    _refine_ls_shell.wR_factor_R_free
    _refine_ls_shell.wR_factor_R_work
    _refine_ls_wR_factor_all
    _refine.ls_wR_factor_all
    _refine_ls_wR_factor_gt
    _refine_ls_wR_factor_obs
    _refine.ls_wR_factor_obs
    _refine_ls_wR_factor_ref
    _refine.ls_wR_factor_R_free
    _refine.ls_wR_factor_R_work
    _reflns_class_R_factor_all
    _reflns_class.R_factor_all
    _reflns_class_R_factor_gt
    _reflns_class.R_factor_gt
    _reflns_class_wR_factor_all
    _reflns_class.wR_factor_all
);

    my %data2copy = map { $_, $_ } @data2copy;

    my @tag_list = @{$new_dataset->{tags}};

    my $atom_site_type_symbol = $new_dataset->{values}{_atom_site_type_symbol};
    my $atom_site_occupancy   = $new_dataset->{values}{_atom_site_occupancy};
    my $atom_site_U_iso_or_equiv =
        $new_dataset->{values}{_atom_site_U_iso_or_equiv};

    my $src_tag_prefix = '_[local]_cod_src';
    my %renamed_tags = rename_tags( $new_dataset,
                                    \@data2rename,
                                    $src_tag_prefix );

    my @tags_to_exclude = grep { !exists $data2copy{$_} &&
                                 !exists $renamed_tags{$_} }
                               @{$new_dataset->{tags}};
    foreach (@tags_to_exclude) {
        exclude_tag( $new_dataset, $_ );
    }

    if( $audit ) {
        my $id_value = $Id;
        $id_value =~ s/\s*\$\s*//g;
        set_tag( $new_dataset, '_audit_creation_method', $id_value );
    }

    set_tag( $new_dataset, '_chemical_formula_sum',
             $molecule->{chemical_formula_sum} );
    set_tag( $new_dataset, '_cod_data_source_file',
             basename( $filename ) );
    set_tag( $new_dataset, '_cod_data_source_block',
             $dataset_name );
    set_tag( $new_dataset, '_cell_formula_units_Z', $Z );
    set_tag( $new_dataset, '_symmetry_space_group_name_H-M', 'P 1' );

    set_loop_tag( $new_dataset, '_symmetry_equiv_pos_as_xyz',
                  undef, [ 'x, y, z' ] );

    if( $molecule->{is_polymer} ) {
        set_tag( $new_dataset, '_cod_molecule_is_polymer', 'yes' );
    }
    if( $molecule->{polymer_dimension} ) {
        set_tag( $new_dataset, '_cod_molecule_polymer_dimension',
                 $molecule->{polymer_dimension} );
    }

    my @atoms = sort {
        length($a->{name}) == length($b->{name}) ?
        $a->{name} cmp $b->{name} :
        length($a->{name}) <=> length($b->{name})
    } @{$molecule->{atoms}};

    my $atoms_datablock = datablock_from_atom_array( \@atoms );
    merge_datablocks( $atoms_datablock, $new_dataset );

    my $cod_molecule_datablock = generate_cod_molecule_data_block( \@atoms );
    merge_datablocks( $cod_molecule_datablock, $new_dataset );

    if( $force_unit_occupancies &&
        exists $new_dataset->{values}{_atom_site_occupancy} ) {
        set_loop_tag( $new_dataset,
                      '_atom_site_occupancy',
                      '_atom_site_label',
                      [ map { exists $_->{calc_flag} && $_->{calc_flag} eq 'dum'
                                ? '.' : '1.0' } @atoms ] );
    }
    if( !$use_one_output_datablock ) {
        exclude_tag( $new_dataset, '_atom_site_disorder_assembly' );
        exclude_tag( $new_dataset, '_atom_site_disorder_group' );
    }

    # Forcing coordinate format
    for my $tag ( qw( _atom_site_fract_x
                      _atom_site_fract_y
                      _atom_site_fract_z ) ) {
        set_loop_tag( $new_dataset,
                      $tag,
                      '_atom_site_label',
                      [ map { $_ = sprintf $format, $_;
                              s/^\s+//; s/\s+$//; $_ }
                            @{$new_dataset->{values}{$tag}} ] );
    }

    # Printing _geom_bond_ output on request
    if( $output_geom_bond ) {
        if( exists $molecule->{bonds} ) {
            set_loop_tag( $new_dataset,
                          '_geom_bond_atom_site_label_1',
                          '_geom_bond_atom_site_label_1',
                          [ map { $_->{atom1}{name} }
                                @{$molecule->{bonds}} ] );
            set_loop_tag( $new_dataset,
                          '_geom_bond_atom_site_label_2',
                          '_geom_bond_atom_site_label_1',
                          [ map { $_->{atom2}{name} }
                                @{$molecule->{bonds}} ] );
            set_loop_tag( $new_dataset,
                          '_geom_bond_distance',
                          '_geom_bond_atom_site_label_1',
                          [ map { sprintf '%.5f', $_->{distance} }
                                @{$molecule->{bonds}} ] );
            set_loop_tag( $new_dataset,
                          '_geom_bond_valence',
                          '_geom_bond_atom_site_label_1',
                          [ map { $_->{order} }
                                @{$molecule->{bonds}} ] );
        } else {
            warn "WARNING, bond data necessary to compute _geom_bond_ "
               . "data items was not calculated\n";
        }
    }

    print_cif( $new_dataset,
                    {
                        preserve_loop_order => 1,
                        keep_tag_order => 1
                    } );

    return;
}

#===============================================================#
# Finds all possible molecules in the CIF file. If two atoms are connected via
# then the algorithm states that there in no bond between these two atoms.

# The algorithm:
# 1. Takes an initial atom and tests if it has not been found in the other
#    molecule yet
# 2. If not, then begins to search for the other molecule:
# 2.1  Does modulo_1 for the initial atom
# 2.2  Finds a translation from initial atom to atom_modulo_1
# 2.3  Searches for all neighbors of atom_modulo_1
# 2.4  For each neighbor of atom_modulo_1 does 2.1 -- 2.4
# 2.5  atom_modulo_1 and all its neighbors translates according translation
#       vector. atom_modulo_1 now becomes initial atom. The others - accordingly
# 3. Stops and does the step 1 until there is no left any initial atom.


# Accepts
#     covalent_sensitivity - a threshold for covalent sensitivity
#     atom_properties(
#           H => {
#                     name => Hydrogen, #(chemical_type)
#                     period => 1,
#                     group => 1,
#                     block => s,
#                     atomic_number => "1",
#                     atomic_weight => 1.008,
#                     covalent_radius => 0.23,
#                     vdw_radius => 1.09,
#                     valency => [1],
#                     },
#          );
# symmetric_atoms and initial_atoms are arrays of
#                                 $atom_info = {
#                                             name=>"C1_2",
#                                             site_label=>"C1",
#                                             chemical_type=>"C",
#                                             coordinates_fract=>[1.0, 1.0,1.0],
#                                             coordinates_ortho=>[1.0, 1.0,1.0],
#                                             unity_matrix_applied=>1
#                                             }
# Returns an array of
# %molecule = (
#               atoms => [
#                   \%atom1_info, \%atom2_info, \%atom3_info, \%atom4_info
#               ],
#               bonds => [
#                   [ \%atom1_info, \%atom2_info ],
#                   [ \%atom1_info, \%atom3_info ],
#                   [ \%atom4_info, \%atom3_info ],
#               ] # covalent bond description
#               chemical_formula_sum => "C6 H6",
#             );

sub find_molecules($$$$$$)
{
    my $covalent_sensitivity = shift(@_);
    my $atom_properties      = shift(@_);
    my $symmetric_atoms      = shift(@_);
    my $initial_atoms        = shift(@_);
    my $bricks               = shift(@_);
    my $seen_molecules       = shift(@_);

    my @unique_molecules;
    my %used_atoms;
    my %used_originals;
    my %used_uc_atoms;
    my %checked_pairs;
    my $nbumps = 0;

    foreach my $initial_atom (@$initial_atoms)
    {
        next if exists $used_originals{$initial_atom->{cell_label}};
        print STDERR ">>>> starting new molecule\n" if $debug;

        ## if( ! $expand_to_p1 &&
        ##     $initial_atom->{cell_label} ne $initial_atom->{site_label} ) {
        ##     print STDERR
        ##         ">>>> site: $initial_atom->{site_label}, " .
        ##         "cell: $initial_atom->{cell_label}\n";
        ## }

        my( $molecule_atoms, $mol_nbumps, $mol_polymer_atoms ) =
            find_molecule( $covalent_sensitivity,
                           $atom_properties,
                           $symmetric_atoms,
                           \%used_atoms,
                           \%used_originals,
                           \%used_uc_atoms,
                           \%checked_pairs,
                           $initial_atom, $bricks );

        my @molecule_atoms = @$molecule_atoms;
        $nbumps += $mol_nbumps;

        if( !@molecule_atoms ) {
            warn "WARNING, found molecule with no atoms -- strange...\n";
            next;
        }

        # Calculate polymer dimension
        my $polymer_dimension;
        if( $mol_polymer_atoms > 0 ) {
            my $polymer_vectors = {};
            for my $atom ( @molecule_atoms ) {
                my $site_label = $atom->{site_label};
                my $symop_id = $atom->{symop_id};
                if( !exists $polymer_vectors->{$site_label}{$symop_id} ) {
                    $polymer_vectors->{$site_label}{$symop_id} = [];
                }
                push( @{$polymer_vectors->{$site_label}{$symop_id}},
                      $atom->{translation} );
            }
            for my $site_label (keys %$polymer_vectors) {
                for my $symop_id (keys %{$polymer_vectors->{$site_label}} ) {
                    my @polymer_vectors = @{$polymer_vectors->{$site_label}{$symop_id}};
                    next if @polymer_vectors < 2;
                    my $reference_vector = shift @polymer_vectors;
                    my $polymer_dimension_now =
                        mat_rank( [ map { vector_sub( $_, $reference_vector ) }
                                          @polymer_vectors ] );
                    next if !defined $polymer_dimension_now;
                    if( !defined $polymer_dimension ||
                        $polymer_dimension < $polymer_dimension_now ) {
                        $polymer_dimension = $polymer_dimension_now;
                    }
                }
            }
        }

        my %molecule = (
            atoms => \@molecule_atoms,
            chemical_formula_sum => '',
            is_polymer => ($mol_polymer_atoms > 0),
            polymer_dimension => $polymer_dimension,
        );

        push( @unique_molecules, \%molecule );
    }

    if( !$verbose && $nbumps > 0 ) {
        warn "WARNING, $nbumps pair(s) of atoms are too close to "
           . "each other and are considered as bumps\n";
    }

    return @unique_molecules;
}

# ============================================================================ #

sub find_molecule($$$$$$$$$)
{
    my $covalent_sensitivity = shift(@_);
    my $atom_properties      = shift(@_);
    my $symmetric_atoms      = shift(@_);
    my $used_atoms           = shift(@_);
    my $used_originals       = shift(@_);
    my $used_uc_atoms        = shift(@_);
    my $checked_pairs        = shift(@_);
    my $current_atom         = shift(@_);
    my $bricks               = shift(@_);

    my @current_coords_fract_modulo_1 =
        map { modulo_1($_) } @{$current_atom->{coordinates_fract}};

    my $atom_in_unit_cell_coords_ortho =
        symop_vector_mul( $current_atom->{f2o}, \@current_coords_fract_modulo_1 );

    my $current_translation = translation( $current_atom->{coordinates_fract},
                                           \@current_coords_fract_modulo_1 );

    my @neighbors;

    do {
        no warnings;
        if( exists $used_atoms->
            {$current_atom->{site_label}}
            {$current_atom->{symop_id}}
            {$current_atom->{translation_id}} ) {
            print STDERR "<<<< atom labeled '$current_atom->{name}' " .
                "is already in some molecule, returning\n"
                if $debug;
            return ( \@neighbors, 0, 0 );
        }

        $used_atoms->{$current_atom->{site_label}}
            {$current_atom->{symop_id}}
            {$current_atom->{translation_id}} = $current_atom;
    }; # end no warnings

    $used_originals->{$current_atom->{cell_label}} =
        $current_atom->{cell_label};

    my $polymer_atoms = 0;

    do {
        no warnings;
        if( exists $used_uc_atoms->
            {$current_atom->{site_label}}
            {$current_atom->{symop_id}} ) {
            my $used_uc_atom = $used_uc_atoms->
                    {$current_atom->{site_label}}
                    {$current_atom->{symop_id}};
            print STDERR ">>> !!!! detected a used unit cell " .
                "label $current_atom->{name}/$current_atom->{symop_id}/" .
                "$current_atom->{translation_id} (${used_uc_atom}-th time)\n"
            if $debug;

            $polymer_atoms++;

            if( $used_uc_atoms->
                {$current_atom->{site_label}}
                {$current_atom->{symop_id}} > $max_polymer_atoms ) {
                my $message = "the maximum number of polymer atom " .
                    "repetitions $max_polymer_atoms was hit for the " .
                    "atom $current_atom->{site_label} " .
                    "($current_atom->{symop_id}), " .
                    "to get around this limit, please increase " .
                    "--max-polymer-atoms, to say, " .
                    "--max-polymer-atoms=" . (2 * $max_polymer_atoms) . " " .
                    "or decrease --max-polymer-span (e.g. " .
                    "--max-polymer-span=" . int($max_polymer_span/2) . ", " .
                    "but expect increased computation times and " .
                    "memory consumption)";
                if( !$die_on_errors ) {
                    warn "WARNING, $message\n";
                    return ( [], 0, $polymer_atoms );
                } else {
                    die "ERROR, $message\n";
                }
            }

            if( abs($current_atom->{translation}[0]) > $max_polymer_span ||
                abs($current_atom->{translation}[1]) > $max_polymer_span ||
                abs($current_atom->{translation}[2]) > $max_polymer_span ) {
                return ( [], 0, $polymer_atoms );
            }
        }

        $used_uc_atoms->
            {$current_atom->{site_label}}
            {$current_atom->{symop_id}} ++;
    }; # end no warnings

    print STDERR
        ">>> considering atom $current_atom->{name} " .
            "(@{$atom_in_unit_cell_coords_ortho}) " .
        "($current_atom->{cell_label}/" .
        "$current_atom->{symop_id}/$current_atom->{translation_id})\n"
        if $debug;

    push( @neighbors, $current_atom );

    my ($i_init, $j_init, $k_init) =
        get_atom_index( $bricks, @{$atom_in_unit_cell_coords_ortho} );

    my ( $min_i, $max_i, $min_j, $max_j, $min_k, $max_k );

    eval {
        ( $min_i, $max_i, $min_j, $max_j, $min_k, $max_k ) =
            get_search_span( $bricks, $i_init, $j_init, $k_init );
    };
    if( $@ ) {
        use COD::Serialise qw( serialiseRef );
        serialiseRef( $atom_in_unit_cell_coords_ortho );
        serialiseRef( [ $i_init, $j_init, $k_init ] );
        serialiseRef( $bricks );
        die( $@ );
    }

    if( $debug ) {
        local $" = ", ";
        print STDERR
            ">>> now scanning its distinct neighbours " .
            "around @{$atom_in_unit_cell_coords_ortho}:\n";
    };

    my $new_label = $current_atom->{name};
    my $nbumps = 0;

    ## foreach my $sym_atom (@$symmetric_atoms)
    for my $i ($min_i .. $max_i) {
    for my $j ($min_j .. $max_j) {
    for my $k ($min_k .. $max_k) {
        for my $sym_atom ( @{$bricks->{atoms}[$i][$j][$k]} ) {
            my $sym_atom_coords_ortho = $sym_atom->{coordinates_ortho};
            my $sym_label = $sym_atom->{name};

            # We have found the same atom, no need to add bond or
            # neighbour
            next if $new_label eq $sym_label;

            # No need to consider alternative atoms
            next if atoms_are_alternative( $current_atom, $sym_atom );

            my $dist = distance( $atom_in_unit_cell_coords_ortho,
                                 $sym_atom_coords_ortho );

            do {
                local $" = ' ';
                print STDERR ">>> checking neighbour $sym_label " .
                    "(@{$sym_atom_coords_ortho}), " .
                    "d = $dist\n";
            } if $debug;

            if( !exists $checked_pairs->{$sym_label}{$new_label} &&
                test_bump( $atom_properties,
                           $current_atom->{chemical_type},
                           $sym_atom->{chemical_type},
                           $current_atom->{site_label},
                           $sym_atom->{site_label},
                           $dist, $bump_distance_factor ) ) {
                my $message = "atoms \"$current_atom->{name}\" and " .
                    "\"$sym_atom->{name}\" are too close " .
                    "(distance = " .
                    sprintf( "%6.4f", $dist ) .
                    ") and are considered a bump";
                if( !$ignore_bumps ) {
                    die "ERROR, $message -- aborting calculations\n";
                }
                if( $verbose || $total_nbumps < 5 ) {
                    warn "WARNING, $message\n";
                }
                $nbumps++;
                $total_nbumps++;
            }

            $checked_pairs->{$sym_label}{$new_label} = 1;
            $checked_pairs->{$new_label}{$sym_label} = 1;

            next if !test_bond( $atom_properties,
                                $current_atom->{chemical_type},
                                $sym_atom->{chemical_type},
                                $dist,
                                $covalent_sensitivity );

            do {
                use COD::Serialise qw( serialiseRef );
                local $" = ' ';
                print STDERR ">>> found bond:\n";
                serialiseRef( { "translation" => $current_translation,
                                "original atom" => $current_atom,
                                "sym atom" => $sym_atom } );
            } if $debug;

            my $back_shifted_sym_atom =
                translate_atom( $sym_atom, $current_translation );

            do {
                use COD::Serialise qw( serialiseRef );
                print ">>>> back-shifted atom:\n";
                serialiseRef( { sym_atom => $sym_atom,
                                backshifted => $back_shifted_sym_atom } );
            } if $debug;

            my( $neighbours, $mol_nbumps, $mol_polymer_atoms ) =
                find_molecule( $covalent_sensitivity,
                               $atom_properties,
                               $symmetric_atoms,
                               $used_atoms,
                               $used_originals,
                               $used_uc_atoms,
                               $checked_pairs,
                               $back_shifted_sym_atom,
                               $bricks );

            push( @neighbors, @$neighbours );
            $nbumps += $mol_nbumps;
            $polymer_atoms += $mol_polymer_atoms;
        }
    }}}

    print ">>> Finished checks;\n" if $debug;

    do {
        use COD::Serialise qw( serialiseRef );
        print ">>> Before translation:";
        serialiseRef( \@neighbors );
    } if $debug;

    return ( \@neighbors, $nbumps, $polymer_atoms );
}

#===========================================================================
# Return a list of chemical bonds (represented as atom pairs, each
# pair being two references to two %atom_info structures describing
# the bondsd atoms).

sub atom_bonds
{
    my ($atoms, $atom_properties, $covalent_sensitivity) = @_;

    my $max_covalent_radius = get_max_covalent_radius( $atom_properties );

    my $bricks = build_bricks( $atoms,
                                           $max_covalent_radius * 2 +
                                           $covalent_sensitivity );

    my %used_atoms;
    my @bonds;

    for my $atom (@$atoms) {

        $used_atoms{$atom->{name}} = $atom;

        my $coordinates = $atom->{coordinates_ortho};

        my ($i_init, $j_init, $k_init) =
            get_atom_index( $bricks, @$coordinates );

        my ( $min_i, $max_i, $min_j, $max_j, $min_k, $max_k );

        eval {
            ( $min_i, $max_i, $min_j, $max_j, $min_k, $max_k ) =
                get_search_span( $bricks, $i_init, $j_init, $k_init );
        };
        if( $@ ) {
            use COD::Serialise qw( serialiseRef );
            serialiseRef( $coordinates );
            serialiseRef( [ $i_init, $j_init, $k_init ] );
            serialiseRef( $bricks );
            die( $@ );
        }

        ## foreach my $sym_atom (@$symmetric_atoms)
        for my $i ($min_i .. $max_i) {
        for my $j ($min_j .. $max_j) {
        for my $k ($min_k .. $max_k) {
            for my $neighbour ( @{$bricks->{atoms}[$i][$j][$k]} ) {

                next if exists $used_atoms{$neighbour->{name}};

                my $neighbour_coords = $neighbour->{coordinates_ortho};

                if( $atom == $neighbour ) {
                    # We have found the same atom, no need to add bond or
                    # neighbour
                    next;
                }

                my $distance = distance( $coordinates, $neighbour_coords );

                my $is_bond = test_bond($atom_properties,
                                        $atom->{chemical_type},
                                        $neighbour->{chemical_type},
                                        $distance,
                                        $covalent_sensitivity);

                if( $is_bond &&
                    !atoms_are_alternative( $atom, $neighbour ) ) {

                    do {
                        use COD::Serialise qw( serialiseRef );
                        local $" = ' ';
                        print STDERR ">>> found bond:\n";
                        serialiseRef( { "original atom" => $atom,
                                        "neighbour atom" => $neighbour } );
                    } if $debug;

                    my $bond_order =
                        get_bond_order( $distance,
                                        $atom->{chemical_type},
                                        $neighbour->{chemical_type},
                                        $atom_properties );

                    push( @bonds, {
                        atom1 => $atom,
                        atom2 => $neighbour,
                        distance => $distance,
                        order => $bond_order,
                    });
                }
            }
        }}}
    }

    return \@bonds;
}

#==============================================================================
# Use heuristics to guess bond order from its length:

sub get_bond_order
{
    my( $distance, $atom1_type, $atom2_type, $atom_properties ) = @_;

    if( exists $atom_radii{$atom1_type} && exists $atom_radii{$atom2_type} ) {
        my @atom1_radii = @{$atom_radii{$atom1_type}};
        my @atom2_radii = @{$atom_radii{$atom2_type}};
        my @lengths;
        for my $a1 (@atom1_radii) {
            for my $a2 (@atom2_radii) {
                if( $a1->[0] eq $a2->[0] ) {
                    push( @lengths, [ $a1->[0], $a1->[1],
                                      $a1->[2] + $a2->[2] ] );
                }
            }
        }
        @lengths = sort {$a->[2] <=> $b->[2]} @lengths;
        for my $length (@lengths) {
            if( $distance < $length->[2] ) {
                return $length->[1];
            }
        }
        return "?";
    } else {
        return "?";
    }
}

#==============================================================================
# Calculate the rank of a matrix using Gaussian elimination algorithm.

sub mat_rank
{
    my( $m ) = @_;
    return 0 if @$m == 0;

    my @m = @$m;

    my $topmost = 0;
    for( my $j = 0; $j < @{ $m[0] }; $j++ ) {
        # Sorting lines of the matrix favouring the lowest absolute value
        # of the analysed column, keeping the zeroes in the bottom:

        @m[$topmost..$#m] = sort { ($a->[$j] == 0) - ($b->[$j] == 0) +
                                   ($a->[$j] != 0 &&  $b->[$j] != 0 ) *
                                   (abs( $a->[$j] ) <=> abs( $b->[$j] )) }
                                 @m[$topmost..$#m];

        # Starting from the first non-pegged line, the first line with
        # non-zero value of the analysed column is taken and used to
        # produce zeroes in the analysed column of lines below.
        my $i = $topmost;
        my $row;
        while( $i < @m ) {
            if( $m[$i]->[$j] != 0 ) {
                if( defined $row ) {
                    # If non-zero line is found, a quotient is calculated
                    # to produce zero in the analysed column of current row.
                    my $q = $m[$i]->[$j] / $m[$row]->[$j];
                    for( my $k = $j; $k < @{ $m[0] }; $k++ ) {
                        $m[$i]->[$k] -= $q * $m[$row]->[$k];
                    }
                } else {
                    # If non-zero line is not found, take this as non-zero
                    # line and divide by the value of the analysed column.
                    $row = $i;
                    my $q = $m[$row]->[$j];
                    for( my $k = $j; $k < @{ $m[0] }; $k++ ) {
                        $m[$row]->[$k] = $m[$row]->[$k] / $q;
                    }
                }
            }
            $i++;
        }
        # Peg the used line in order not to use it once again.
        $topmost = $row + 1 if defined $row;
    }

    # Removing all-zero lines
    my @non_null_rows = map { $_->[0] != 0 ||
                              $_->[1] != 0 ||
                              $_->[2] != 0 ? $_ : () } @m;

    return scalar @non_null_rows;
}