File: cif_molecule

package info (click to toggle)
cod-tools 3.11.0%2Bdfsg-1
links: PTS, VCS
area: main
in suites: trixie
size: 159,136 kB
sloc: perl: 58,707; sh: 41,323; ansic: 7,268; xml: 1,982; yacc: 1,117; makefile: 731; python: 166
file content (3605 lines) | stat: -rwxr-xr-x 146,025 bytes
parent folder | download | duplicates (2)
#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: Yaroslav_Rozdobudko $
#$Date: 2025-04-01 18:05:55 +0300 (Tue, 01 Apr 2025) $
#$Revision: 10560 $
#$URL: svn+ssh://www.crystallography.net/home/coder/svn-repositories/cod-tools/tags/v3.11.0/scripts/cif_molecule $
#------------------------------------------------------------------------------
#*
#* Restore molecules from a CIF file.
#*
#* USAGE:
#*    $0 --options input1.cif input*.cif
#**

# Note: this script assumes that atoms have unique labels in the input
# CIF file; most often these are labels given by the _atom_site_label
# tag. If the assumption of uniqueness does not hold, the script
# attempts by default to create unique labels itself, appending numeric
# prefixes to the duplicate labels.
#
# The uniqueness of the labels is assumed in checks for atoms at
# special positions, and most importantly in the code removing
# duplicate molecules.
#
# Although there is an option to switch off this diversification of
# labels, the algorithms employed in this script will most probably
# break and give incorrect results (e.g. some atoms, namely ones with
# duplicate labels, will be missing from the output). Thus, use option
# '--no-uniquify-atoms' with caution.
#
# Atom identification.
# Atoms will be identified within this program using three components:
#
# a) the original label, as found in the input CIF (the "site_label",
# taken from the _atom_site_label data item). This label must be
# unique; if it is not, it will be uniquified by adding a serial
# number upon reading in.
#
# b) a rotation operator (unity operator if no rotation is applied);
# upon any rotation or when atoms are read in, their fractional
# coordinates are truncated modulo 1, i.e. moved to the first octant
# [0..1)x[0..1)x[0..1).
#
# c) a translation vector from the first octant to the actual atom
# position; translation names will use IUCr convention shift +5 (555
# is 0,0,0 translation). For larger translations, ":" character
# separator will be used, e.g. 10:5:-11.
#
# These three components, concatenated with underscores ("_"), will be
# used as unique atom names (the "name" key in the $atom_info hash).

use strict;
use File::Basename qw( basename );
use Clone qw( clone );
use COD::Algebra qw( gcd );
use COD::Algebra::Vector qw( distance vector_sub );
use COD::AtomBricks qw( build_bricks get_atom_index get_search_span );
use COD::AtomNeighbours qw(
    get_max_covalent_radius
    get_max_vdw_radius
    make_neighbour_list
);
use COD::AtomProperties;
use COD::CIF::Data qw( get_cell
                       get_space_group_number
                       get_symmetry_operators );
use COD::CIF::Data::AtomList qw( atom_array_from_cif
                                 atom_groups
                                 atoms_are_alternative
                                 datablock_from_atom_array
                                 generate_cod_molecule_data_block
                                 dump_atoms_as_cif );
use COD::CIF::Data::SymmetryGenerator qw( apply_shifts
                                          atoms_coincide
                                          chemical_formula_sum
                                          symop_apply
                                          symops_apply_modulo1
                                          test_bond
                                          test_bump
                                          translate_atom
                                          translation
                                          trim_polymer );
use COD::CIF::Parser qw( parse_cif );
use COD::CIF::Tags::CanonicalNames qw( canonicalize_all_names );
use COD::CIF::Tags::Manage qw( contains_data_item
                               exclude_tag
                               rename_tags
                               set_loop_tag
                               set_tag );
use COD::CIF::Tags::Merge qw( merge_datablocks );
use COD::CIF::Tags::Print qw( print_cif );
use COD::ErrorHandler qw( process_errors process_warnings
                          process_parser_messages report_message );
use COD::MorganFingerprints qw( make_morgan_fingerprint );
use COD::Spacegroups::Builder;
use COD::Spacegroups::SimpleBuilder;
use COD::Spacegroups::Lookup qw( make_symop_hash );
use COD::Spacegroups::Lookup::COD;
use COD::Spacegroups::Symop::Algebra qw( symop_mul
                                         symop_invert
                                         symop_is_unity
                                         symop_vector_mul );
use COD::Spacegroups::Symop::Parse qw( symop_from_string
                                       string_from_symop
                                       symop_string_canonical_form
                                       modulo_1 );
use COD::SOptions qw( getOptions get_value );
use COD::SUsage qw( usage options );
use COD::ToolsVersion qw( get_version_string );
## use COD::Algebra::GaussJordan qw( gj_elimination_non_zero_elements );
use COD::Algebra::GaussJordanBigRat qw( gj_elimination_non_zero_elements );

no warnings 'recursion';

my $Id = '$Id: cif_molecule 10560 2025-04-01 15:05:55Z Yaroslav_Rozdobudko $';

my $debug;
my $symdebug;
my $verbose = 0;
my $total_nbumps = 0;

my $sort_molecules = 1; # A flag indicating whether molecules should
                        # be sorted in the output (descending by atom
                        # number)

my $dump_atoms = 0;
my $format = "%8.6f";
my $continue_on_errors = 0;
my $covalent_sensitivity = 0.35;
my $audit = 1;
my $uniquify_atoms = 1;
my $exclude_zero_occupancies = 1; # Do not use atoms with zero occupancies
my $exclude_dummy_atoms = 1;      # Do not use atoms with the 'dum' calc flag

my $force_unit_occupancies = 0; # Forcibly set occupancies to 1.0.

# A fraction of covalent bond radii used to determine when atoms are
# too close and are considered a bump:

my $bump_distance_factor = 0.75;

# A fraction of vdW radii used to determine when atoms are too close
# and are considered as overlapping; used, for instance, to determine
# whether an atom group that is disordered around special position is
# mapped onto itself by a symmetry operator:

my $vdw_distance_factor = 1.2;

my $ignore_bumps = 0; # detect and warn about close atom "bumps"
                      # but do not stop processing.

# A span, in +/- unit cells, in which polymeric molecules (repeating
# units) will be constructed:

my $max_polymer_span = 0;

my $cif_header_file; # Comments from the beginning of this file will be
                     # prepended to the output.

my $use_parser = "c"; # Used CIF parser

my $use_morgan_fingerprints = 0; # Use Morgan fingerprints to identify
                                 # duplicated moieties

my $use_atom_classes = 1; # Use COD AtomClassifier to sort atoms for
                          # generation of Morgan fingerprints

# Used for atom classification via AtomClassifier:
my $flat_planarity = 0.10;
my $classification_level = 3;
my $max_ring_size = 7; # maximum size of detected rings

my $use_one_output_datablock = 0; # Put all molecules, and all
                                  # disorder groups, into a single
                                  # data block in the output.

my $merge_disorder_groups = 0; # Put all alternative conformations
                               # into one data block.

my $preserve_stoichiometry = 0; # If true (1), apply symmetry
                                # operators from cosets of a point
                                # group in each molecule to all other
                                # molecules, to preserve molecular
                                # stoichiometry (charge balance,
                                # etc.).

my $largest_molecule_only = 0; # Output only the largest (having the
                               # greatest number of atoms) molecule.

my $output_geom_bond = 0; # Compute and output the _geom_bond_... data
                          # items (bond lengths, valencies, etc.)

my $expand_to_p1 = 0; # Do we want a full P1 unit cell that can be used
                      # to re-create the whole crystal using only the
                      # lattice translations?

# Random seed to be used for rand() function:

my $random_seed;

# If true, generates symmetry equivalent sites for disorder groups
# with negative indices.
my $use_special_position_disorder = 1;

my $special_position_operator_set = 0;

# The simpler and slower space group builder algorithm
# (COD::Spacegroup::SimpleBuilder) is mostly intended for debugging.
# Ideally, it should give results identical to the space group builder
# algorithm optimised for speed (COD::Spacegroup::SimpleBuilder). It
# is also expected that the optimised algorithm outperforms the simple one.
my $space_group_builder_type = 'optimised';
# 'optimised' => 'use COD::Spacegroups::Builder'
# 'simple'    => 'use COD::Spacegroups::SimpleBuilder'

my $die_on_errors   = 1;
my $die_on_warnings = 0;
my $die_on_notes    = 0;

my $print_quotient_graph  = 1;
my $include_non_polymer_quotient_graph = 0;

my $machine_epsilon = get_machine_epsilon();

my %SYMOP_LOOKUP_HASH = make_symop_hash( [
                            \@COD::Spacegroups::Lookup::COD::table,
                            \@COD::Spacegroups::Lookup::COD::extra_settings
                        ] );

#* OPTIONS:
#*   --use-optimised-spacegroup-builder
#*                     Use the space group builder algorithm optimised
#*                     for speed as implemented in the
#*                     COD::Spacegroups::Builder module. Default.
#*   --use-simple-spacegroup-builder
#*                     Use the simpler and slower space group builder
#*                     algorithm as implemented in the
#*                     COD::Spacegroups::SimpleBuilder module.
#*
#*   -1, --one-datablock-output
#*                     Output all moieties to a single output data block.
#*
#*                     However, if the --split-disorder-groups option is
#*                     enabled all generated alternative conformations will
#*                     be put into separate data blocks starting with the
#*                     most likely one (disorder group occupancy wise) and
#*                     ending with the least likely one. In order to retrieve
#*                     only the most likely one, the --largest-molecule-only
#*                     option should be used in combination with the
#*                     --one-datablock-output option.
#*
#*   -1-, --multiple-datablocks-output
#*                     Separate each molecule and each example of an alternative
#*                     conformation into a separate data block. Default.
#*
#*   -c, --covalent-sensitivity
#*                     Set a new covalent sensitivity value. Default: 0.35.
#*
#*                     This value is used as the tolerance parameter when
#*                     evaluating if a chemical bond exists between two atoms.
#*                     Atoms a_1 and a_2 are considered bonded if:
#*
#*                     dist(a_1, a_2) < r_cov(a_1) + r_cov(a_2) + tolerance
#*
#*   -g, --geom-bond-output
#*                     Output _geom_bond_... data items (bond lengths,
#*                     valencies, etc.).
#*
#*   -g-, --no-geom-bond-output
#*                     Do not output _geom_bond_... information. Default.
#*
#*   -h, --add-cif-header input_header.txt
#*                     Prepend comments from the beginning of the given file
#*                     to the output.
#*
#*   -i, --ignore-bumps
#*                     Detect and warn about close atom "bumps" but do not
#*                     stop processing.
#*
#*   --no-ignore-bumps, --dont-ignore-bumps
#*                     Stop processing immediately if bumps are detected.
#*                     Default.
#*
#*   -s, --sort-molecules
#*                     Sort molecules in descending order by their atom count
#*                     and overall occupancy before outputting them. Atom count
#*                     takes precedence over overall occupancy. Default.
#*
#*   --no-sort-molecules, --dont-sort-molecules
#*                     Do not sort molecules, print them out in the order they
#*                     are detected.
#*
#*   --expand-to-P1, --P1-expand, --p1-expand
#*                     Expand all atoms to the P1 unit cell, so that the
#*                     translation operations can be used to restore the whole
#*                     crystal.
#*
#*   --no-expand-to-P1, --no-P1-expand, --no-p1-expand,
#*   --dont-expand-to-P1, --dont-P1-expand
#*                     Do not expand to P1, output only the minimal molecule
#*                     list. Default.
#*
#*   --uniquify-atoms
#*                     Make atom labels unique. Default.
#*
#*   --no-uniquify-atoms, --dont-uniquify-atoms
#*                     Do not make atom labels unique, exclude duplicates.
#*
#*   --use-morgan-fingerprints
#*                     Use Morgan fingerprints to identify and skip
#*                     duplicated moieties.
#*
#*   --no-use-morgan-fingerprints, --dont-use-morgan-fingerprints
#*                     Use atom labels to identify and skip duplicated
#*                     moieties. This method is the default, however under
#*                     certain circumstances it leaves duplicate moieties,
#*                     as the asymmetric unit can initially contain more than
#*                     one copy of a single moiety. Default.
#*
#*   --use-atom-classes
#*                     Use COD atom classes, generated by AtomClassifier
#*                     module from 'atomclasses' repository, for the
#*                     generation of Morgan fingerprints. Requires the
#*                     external AtomClassifier module. Default.
#*
#*   --no-use-atom-classes, --dont-use-atom-classes
#*                     Use atom chemical types for generation of Morgan
#*                     fingerprints instead of COD atom classes.
#*
#*   --bump-distance-factor 0.75
#*                     A fraction of covalent bond radii sum used to
#*                     determine when atoms are too close and are
#*                     considered a bump. Default: 0.75.
#*
#*   --vdw-distance-factor 1.2
#*                     A factor for the vdW radii sum used to
#*                     determine when atoms are too close and are
#*                     considered a vdW clash. Default: 1.2.
#*
#*   --continue-on-errors
#*                     Do not stop if errors such as unrecognised atoms are
#*                     encountered; the output may be incorrect and missing
#*                     some atoms if this option is used!
#*
#*   --no-continue-on-errors, --dont-continue-on-errors
#*                     Stop immediately when an error is encountered.
#*
#*   --exclude-zero-occupancies
#*                     Do not use atoms with 0 occupancies in calculations.
#*                     Default.
#*
#*   --no-exclude-zero-occupancies, --dont-exclude-zero-occupancies
#*                     Use atoms with 0 occupancies in calculations.
#*
#*   --exclude-dummy-atoms
#*                     Do not use dummy atoms (marked by the 'dum' calc flag)
#*                     in calculations. Default.
#*
#*   --no-exclude-dummy-atoms, --dont-exclude-dummy-atoms
#*                     Use dummy atoms (marked by the 'dum' calc flag)
#*                     in calculations. Dummy atoms can be used to mark
#*                     interesting positions within the unit cell
#*                     (e.g. geometric centers of coordinated atom rings),
#*                     but they are not considered a part of the molecule.
#*                     As a result, the occupancies of all output dummy atoms
#*                     are set to '.'. It should also be noted that dummy atoms
#*                     with non-numeric coordinates will still be excluded.
#*
#*   --preserve-stoichiometry
#*                     Apply necessary symmetry operations to preserve molecular
#*                     stoichiometry (charges, etc.).
#*
#*   --no-preserve-stoichiometry, --dont-preserve-stoichiometry
#*                     Do not apply any more symmetry operations than needed to
#*                     reconstruct covalently connected networks; may
#*                     break stoichiometry of salts and complexes. Default.
#*
#*   --force-unit-occupancies
#*                     Set occupancies of all output atoms to 1.0. Unit
#*                     occupancies are only set when outputting the atoms
#*                     and do not affect the flow of the algorithm
#*                     (disorder group processing, molecule sorting, etc.).
#*                     Dummy atoms are excluded from the effects of this option
#*                     and are always output with the '.' occupancy.
#*
#*                     Some programs, notably Jumbo converter's cif2cml,
#*                     assume unresolved disorder and do not recognize
#*                     aromatic rings if occupancies are not unities.
#*                     Obviously, this flag only has sense in combination
#*                     with --split-disorder-groups.
#*
#*   --no-force-unit-occupancies, --dont-force-unit-occupancies,
#*   --do-not-force-unit-occupancies
#*                     Leave occupancies as they are. Default.
#*
#*   --dump-atoms
#*                     Dump atoms (including symmetry-equivalent) in CIF
#*                     format for inspection with some graphics program.
#*
#*   --no-dump-atoms, --dont-dump-atoms
#*                     Do not dump atoms. Default.
#*
#*   --max-polymer-span 0
#*                     A span, in +/- unit cells, in which polymeric
#*                     molecules (repeating units) will be constructed.
#*                     When the value is set to '0', polymeric molecules
#*                     are represented only by the atoms that were included
#*                     in the determination of the corresponding quotient
#*                     graphs. Default: 0.
#*
#*   --max-polymer-atoms 100
#*                     The maximum allowed count of polymer example atoms.
#*                     More than this number of symmetry (translational)
#*                     equivalent atoms, for each atom of the asymmetric
#*                     unit, will not be written to the output. Default: 100.
#*
#*                     This option is considered deprecated and is only retained
#*                     for backwards compatibility. The current implementation
#*                     uses the quotient graph approach to find limits of the
#*                     polymeric molecule. Essentially, there is no need to
#*                     limit the molecule in any way. The '--max-polymer-span'
#*                     option is only used to specify the desired span of the
#*                     monomer repetition, but it is not required since the
#*                     molecule detection is limited by quotient graph.
#*
#*   --split-disorder-groups, --dont-merge-disorder-groups
#*                     Put examples of disorder group conformations into
#*                     separate data blocks. Default.
#*
#*   --merge-disorder-groups, --no-split-disorder-groups,
#*   --dont-split-disorder-groups
#*                     Put all disorder groups into one data block.
#*
#*   --use-special-disorder-symmetry
#*                     Generate symmetry equivalents for disorder groups
#*                     with negative indices. Default.
#*
#*   --random-seed 123456
#*                     Use the provided seed to initialise the random
#*                     number generator. Use "" (empty string) as a seed
#*                     to revert back to the default seed.
#*
#*   --special-disorder-operator-set 0
#*   --special-disorder-operator-set random
#*                     Indicates which operator set to apply to atom groups
#*                     that are disordered around a special position. Can be
#*                     an integer (0, 1, 2, ...) or a special value "random",
#*                     in which case a random operator is selected for each
#*                     special position image.
#*
#*   --no-use-special-disorder-symmetry,
#*   --dont-use-special-disorder-symmetry,
#*   --do-not-use-special-disorder-symmetry,
#*                     Do not generate symmetry equivalents for disorder
#*                     groups with negative indices.
#*
#*   --largest, --largest-molecule-only
#*                     Output only the largest molecule. The largest molecule
#*                     is selected based on two criteria in the given order:
#*                     atom count and overall occupancy of the molecule.
#*                     When the combination of the --one-datablock-output and
#*                     --split-disorder-groups options is in effect the
#*                     molecule with the most likely disorder conformation
#*                     (occupancy wise) is returned.
#*
#*                     NOTE: if there is more than one disorder assembly
#*                     and the --split-disorder-groups option is in effect,
#*                     the conformation with the highest atom count might not
#*                     be generated at all. In this case, a molecule that best
#*                     fits the previously defined criteria out of the generated
#*                     conformation subset will be returned.
#*
#*   --all, --all-molecules
#*                     Output all molecules. Default.
#*
#*   --use-perl-parser
#*   --use-c-parser
#*                     Specify parser to parse CIF files. C parser is default.
#*
#*   --print-quotient-graph
#*                     Output the quotient graph information. Default.
#*                     Quotient graphs are output only for polymeric molecules
#*                     unless the '--include-non-polymer-quotient-graph' option
#*                     is also specified.
#*   --no-print-quotient-graph
#*                     Do not output the quotient graph information.
#*
#*   --include-non-polymer-quotient-graph
#*                     Output quotient graph for non polymer molecules.
#*   --no-include-non-polymer-quotient-graph
#*                     Do not output quotient graph for non polymer molecules.
#*                     Default.
#*
#*   --symdebug
#*                     Print debug output for symmetry reconstruction.
#*   --no-symdebug
#*                     Do not print any symmetry debug output. Default.
#*   --debug
#*                     Print some human-readable debug output.
#*   --no-debug
#*                     Suppress any debug output. Default.
#*
#*   --format "%8.6f"
#*                     Use the specified format for output coordinate printout.
#*
#*   --audit
#*                     Print audit information to the generated CIF file. Default.
#*   --no-audit
#*                     Do not print audit information to the generated CIF file.
#*
#*   --verbose
#*                     Print warning messages in long format.
#*   --no-verbose
#*                     Print warning messages in concise format. Default.
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    '--use-simple-spacegroup-builder' =>
        sub { $space_group_builder_type = 'simple' },

    '--use-optimised-spacegroup-builder' =>
        sub { $space_group_builder_type = 'optimised' },

    '-1,--one-datablock-output' => sub { $use_one_output_datablock = 1; },
    '-1-,--multiple-datablocks-output' =>
        sub { $use_one_output_datablock = 0; },

    '--expand-to-P1,--P1-expand,--p1-expand' => sub { $expand_to_p1 = 1 },
    '--no-expand-to-P1,--no-P1-expand,--no-p1-expand' =>
        sub { $expand_to_p1 = 0 },
    '--dont-expand-to-P1,--dont-P1-expand,--dont-p1-expand' =>
        sub { $expand_to_p1 = 0 },
    '--do-not-expand-to-P1,--do-not-P1-expand,--do-not-p1-expand' =>
        sub { $expand_to_p1 = 0 },

    '--uniquify-atoms'      => sub { $uniquify_atoms = 1; },
    '--no-uniquify-atoms'   => sub { $uniquify_atoms = 0; },
    '--dont-uniquify-atoms' => sub { $uniquify_atoms = 0; },

    '--use-morgan-fingerprints' =>
        sub { $use_morgan_fingerprints = 1 },
    '--no-use-morgan-fingerprints' =>
        sub { $use_morgan_fingerprints = 0 },
    '--dont-use-morgan-fingerprints' =>
        sub { $use_morgan_fingerprints = 0 },

    '--use-atom-classes' => sub { $use_atom_classes = 1 },
    '--no-use-atom-classes' => sub { $use_atom_classes = 0 },
    '--dont-use-atom-classes' => sub { $use_atom_classes = 0 },

    '-c,--covalent-sensitivity' => \$covalent_sensitivity,

    '-g,--geom-bond-output'     => sub { $output_geom_bond = 1 },
    '-g-,--no-geom-bond-output' => sub { $output_geom_bond = 0 },

    '-h,--add-cif-header' => \$cif_header_file,

    '-i,--ignore-bumps'   => sub{ $ignore_bumps = 1 },
    '--no-ignore-bumps'   => sub{ $ignore_bumps = 0 },
    '--dont-ignore-bumps' => sub{ $ignore_bumps = 0 },

    '-s,--sort-molecules'   => sub{ $sort_molecules = 1 },
    '--no-sort-molecules'   => sub{ $sort_molecules = 0 },
    '--dont-sort-molecules' => sub{ $sort_molecules = 0 },

    '--exclude-zero-occupancies'    => sub { $exclude_zero_occupancies = 1; },
    '--no-exclude-zero-occupancies' => sub { $exclude_zero_occupancies = 0; },
    '--dont-exclude-zero-occupancies' => sub { $exclude_zero_occupancies = 0; },

    '--exclude-dummy-atoms'    => sub { $exclude_dummy_atoms = 1; },
    '--no-exclude-dummy-atoms' => sub { $exclude_dummy_atoms = 0; },
    '--dont-exclude-dummy-atoms' => sub { $exclude_dummy_atoms = 0; },

    '--preserve-stoichiometry' => sub { $preserve_stoichiometry = 1 },
    '--dont-preserve-stoichiometry, --no-preserve-stoichiometry' =>
        sub { $preserve_stoichiometry = 0 },

    '--bump-distance-factor' => \$bump_distance_factor,

    '--vdw-distance-factor' => \$vdw_distance_factor,

    '--max-polymer-span' => \$max_polymer_span,
    '--max-polymer-atoms' => sub {
        warn "$0:: NOTE, the '--max-polymer-atoms' option has been " .
             'deprecated and no longer affects the behaviour of the ' .
             'program -- the deprecated option will be removed in a ' .
             'future major version release.' . "\n";
             get_value();
    },

    '--symdebug'    => sub { $symdebug = 1 },
    '--no-symdebug' => sub { $symdebug = 0 },

    '--debug'    => sub { $debug = 1 },
    '--no-debug' => sub { $debug = 0 },

    '--format' => \$format,

    '--force-unit-occupancies' => sub { $force_unit_occupancies = 1 },
    '--no-force-unit-occupancies' => sub { $force_unit_occupancies = 0 },
    '--dont-force-unit-occupancies' => sub { $force_unit_occupancies = 0 },
    '--do-not-force-unit-occupancies' => sub { $force_unit_occupancies = 0 },

    '--dump-atoms'      => sub{ $dump_atoms = 1 },
    '--dont-dump-atoms' => sub{ $dump_atoms = 0 },
    '--no-dump-atoms'   => sub{ $dump_atoms = 0 },

    '--split-disorder-groups,--dont-merge-disorder-groups,' .
    '--do-not-merge-disorder-groups,--no-merge-disorder-groups'
        => sub { $merge_disorder_groups = 0 },
    '--merge-disorder-groups,--dont-split-disorder-groups' .
    '--do-not-split-disorder-groups,--no-split-disorder-groups'
        => sub { $merge_disorder_groups = 1 },

    '--random-seed' => \$random_seed,
    '--special-disorder-operator-set' => \$special_position_operator_set,

    '--use-special-disorder-symmetry'
        => sub { $use_special_position_disorder = 1 },
    '--no-use-special-disorder-symmetry,' .
    '--dont-use-special-disorder-symmetry,' .
    '--do-not-use-special-disorder-symmetry'
        => sub { $use_special_position_disorder = 0 },

    '--largest,--largest-molecule-only'
        => sub { $largest_molecule_only = 1 },
    '--all,--all-molecules'
        => sub { $largest_molecule_only = 0 },

    '--always-continue'                 => sub { $die_on_errors   = 0;
                                                 $die_on_warnings = 0;
                                                 $die_on_notes    = 0 },
    '-c-,--always-die'                  => sub { $die_on_errors   = 1;
                                                 $die_on_warnings = 1;
                                                 $die_on_notes    = 1 },

    '--continue-on-errors'          => sub { $die_on_errors = 0 },
    '--dont-continue-on-errors'     => sub { $die_on_errors = 1 },
    '--die-on-errors'               => sub { $die_on_errors = 1 },
    '--no-continue-on-errors'       => sub { $die_on_errors = 1 },

    '--continue-on-warnings' => sub { $die_on_warnings = 0 },
    '--die-on-warnings'      => sub { $die_on_warnings = 1 },

    '--continue-on-notes'    => sub { $die_on_notes = 0 },
    '--die-on-notes'         => sub { $die_on_notes = 1 },

    '--use-perl-parser'       => sub{ $use_parser = 'perl' },
    '--use-c-parser'          => sub{ $use_parser = 'c' },

    '--print-quotient-graph'  => sub{ $print_quotient_graph = 1 },
    '--no-print-quotient-graph'
                              => sub{ $print_quotient_graph = 0 },

    '--include-non-polymer-quotient-graph'
                              => sub{ $include_non_polymer_quotient_graph = 1 },
    '--no-include-non-polymer-quotient-graph'
                              => sub{ $include_non_polymer_quotient_graph = 0 },

    '--audit'                   => sub { $audit = 1; },
    '--no-audit'                => sub { $audit = 0; },

    '--verbose'                 => sub { $verbose = 1; },
    '--no-verbose'              => sub { $verbose = 0; },

    '--options'      => sub { options; exit },
    '--help,--usage' => sub { usage; exit },
    '--version'      => sub { print get_version_string(), "\n"; exit },

# The following options are left only for compatibility with historic
# version of the script:

# The '--remove-duplicate-molecules' is no longer necessary since the
# new algorithm (after changing order of molecule generation and
# disorder group representative generation) never produces duplicate
# molecules:

    '--remove-duplicate-molecules'      => sub { },
    '--no-remove-duplicate-molecules'   => sub { },
    '--dont-remove-duplicate-molecules' => sub { },
);

my $die_on_error_level = {
    ERROR   => $die_on_errors,
    WARNING => $die_on_warnings,
    NOTE    => $die_on_notes
};

if ($max_polymer_span < 0) {
    report_message(
        {
            'program'   => $0,
            'err_level' => 'ERROR',
            'message'   => 'the \'--max-polymer-span\' option parameter must ' .
                           'be a non-negative integer value',
        }, $die_on_error_level
    );
}

if ($max_polymer_span < 0) {
    warn "ERROR, .\n";
}

# Initialise the Perl random number generator:

if( defined $random_seed && $random_seed ne "" ) {
    srand($random_seed);
}

# Covalent radii taken from Kitaigorodskij 1955, "Organicheskaja
# kristallochimija", p. 11.

#==============================================================================#
my %atom_radii = (
    "C" => [
        # bond order name, bond order, covalent radius in ångströms:
        [ "single",         1.0, 0.77 ],
        [ "one-and-a-half", 1.5, 0.70 ],
        [ "double",         2.0, 0.67 ],
        [ "triple",         3.0, 0.60 ],
    ],
    "Si" => [
        [ "single", 1.0, 1.17 ],
        [ "double", 2.0, 1.07 ],
        [ "triple", 3.0, 1.00 ],
    ],
    "Ge" => [
        [ "single", 1.0, 1.17 ],
        [ "double", 2.0, 1.07 ],
        [ "triple", 3.0, 1.00 ],
    ],
    "Sn" => [
        [ "single", 1.0, 1.22 ],
        [ "double", 2.0, 1.20 ],
    ],
    "O" => [
        [ "single", 1.0, 0.66 ],
        [ "double", 2.0, 0.55 ],
    ],
    "S" => [
        [ "single", 1.0, 1.04 ],
        [ "double", 2.0, 0.94 ],
    ],
    "Se" => [
        [ "single", 1.0, 1.17 ],
        [ "double", 2.0, 1.07 ],
    ],
    "Te" => [
        [ "single", 1.0, 1.37 ],
        [ "double", 2.0, 1.27 ],
    ],
    "B" => [
        [ "single", 1.0, 0.88 ],
        [ "double", 2.0, 0.76 ],
        [ "triple", 3.0, 0.68 ],
    ],
    "N" => [
        [ "single", 1.0, 0.70 ],
        [ "double", 2.0, 0.60 ],
        [ "triple", 3.0, 0.55 ],
    ],
    "P" => [
        [ "single", 1.0, 1.10 ],
        [ "double", 2.0, 1.00 ],
        [ "triple", 3.0, 0.93 ],
    ],
    "As" => [
        [ "single", 1.0, 1.21 ],
        [ "double", 2.0, 1.11 ],
    ],
    "Sb" => [
        [ "single", 1.0, 1.41 ],
        [ "double", 2.0, 1.31 ],
    ],
    "H" => [
        [ "single", 1.0, 0.30 ],
    ],
    "F" => [
        [ "single", 1.0, 0.64 ],
    ],
    "Cl" => [
        [ "single", 1.0, 1.00 ],
    ],
    "Br" => [
        [ "single", 1.0, 1.14 ],
    ],
    "I" => [
        [ "single", 1.0, 1.33 ],
    ],
    "Hg" => [
        [ "single", 1.0, 1.50 ],
    ],
);

#==============================================================================#
# Forward subroutine definitions:

sub symgen_atom( $$ );
sub symgen_all_atoms( $$$ );
sub find_molecules( $$$$$$ );
sub find_molecule( $$$$$$$$$$$$$ );

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

my $cif_header;
eval {
    if( $cif_header_file ) {
        open( my $header, '<',"$cif_header_file" ) or die "ERROR, "
          . "could not open header file for input -- ". lcfirst($!) . "\n";

        $cif_header = "";
        while( <$header> ) {
            last unless /^#/;
            $cif_header .= $_;
        };

        close( $header ) or die "ERROR, "
           . "error while closing header file after reading -- "
           . lcfirst($!) . "\n";

        # The header must not contain CIF 2.0 magic code. For CIF 2.0
        # files the magic code is printed explicitly before the header.
        $cif_header =~ s/^#\\#CIF_2\.0[ \t]*\n//;
    }
};
if ($@) {
    process_errors( {
      'message'       => $@,
      'program'       => $0,
      'filename'      => $cif_header_file,
    }, $die_on_errors )
};

@ARGV = ("-") unless @ARGV;

# Choose an appropriate space group builder class as specified in the options:
sub make_spacegroup_builder
{
    my ($builder_type) = @_;

    return COD::Spacegroups::Builder->new
        if $builder_type eq 'optimised';
    return COD::Spacegroups::SimpleBuilder->new
        if $builder_type eq 'simple';
    die "unknown spacegroup builder type '$builder_type'" . "\n";
}

for my $filename (@ARGV) {

    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    my ( $data, $err_count, $messages ) = parse_cif( $filename, $options );
    process_parser_messages( $messages, $die_on_error_level );

    # Is this line necessary?
    # next if ( $err_count > 0 );

    if( !ref $data ||
        !@$data || !defined $data->[0] || !defined $data->[0]{name} ) {
        report_message( {
                'filename'  => $filename,
                'program'   => $0,
                'err_level' => 'WARNING',
                'message'   => 'file seems to be empty'
            }, $die_on_warnings );
        next;
    }

    canonicalize_all_names( $data );

    if( $cif_header ) {
        # Ensure that for CIF v2.0 the magic code comes
        # before the CIF comment header:
        if( grep { exists $_->{cifversion} &&
                          $_->{cifversion}{major} == 2 } @$data ) {
            print "#\\#CIF_2.0\n";
        }
        print $cif_header;
    }

    for my $dataset (@$data) {

        my $dataname = 'data_' . $dataset->{name};

        local $SIG{__WARN__} =  sub { process_warnings( {
                                       'message'       => @_,
                                       'program'       => $0,
                                       'filename'      => $filename,
                                       'add_pos'       => $dataname
                                     }, {
                                       WARNING => $die_on_warnings,
                                       NOTE    => $die_on_notes,
                                     } ) };

        my $values = $dataset->{values};
        my $sym_data;
        eval {
            # Extracts symmetry operators.
            # Raises warnings upon unrecognised symmetry information.
            # Raises die if unable to find symmetry information.
            $sym_data = get_symmetry_operators( $dataset );

            my $unity_operator_found = 0;
            for my $symop (@$sym_data) {
                if( symop_is_unity( symop_from_string( $symop ) ) ) {
                    $unity_operator_found = 1;
                    last;
                }
            }
            if( !$unity_operator_found ) {
                warn "WARNING, unity symmetry operation ('x,y,z') is not "
                   . "found in the symmetry operation list -- results may "
                   . "be incorrect\n";
            } elsif ( !symop_is_unity( symop_from_string( $sym_data->[0] ) ) ) {
                # TODO: the symmetry operation position is currently only
                # determined from the string position in a CIF loop.
                # Technically, the appropriate looped list key data item
                # (i.e. _space_group_symop_id) should also be examined
                warn "WARNING, unity symmetry operation ('x,y,z') is not "
                   . "the first symmetry operation in the symmetry operation "
                   . "list -- results may be incorrect\n";
            }
        };
        if ( $@ ) {
            process_errors( {
              'message'       => $@,
              'program'       => $0,
              'filename'      => $filename,
              'add_pos'       => $dataname
            }, $die_on_errors )
        }
        next if !defined $sym_data || !@{$sym_data};

        my $original_sg_number = get_space_group_number(
                                    $sym_data,
                                    \%SYMOP_LOOKUP_HASH,
                                    $dataset
                                 );

        my $unique_molecules;
        my $last_net_id;
        eval {
            ($unique_molecules, $last_net_id) = get_molecules( $covalent_sensitivity,
                                               $sym_data,
                                               $dataset,
                                               \%COD::AtomProperties::atoms,
                                               $uniquify_atoms );
        };

        if ( $@ ) {
            process_errors( {
              'message'       => $@,
              'program'       => $0,
              'filename'      => $filename,
              'add_pos'       => $dataname
            }, $die_on_errors )
        }
        next if !defined $unique_molecules || !@{$unique_molecules};

        eval {
            if( $preserve_stoichiometry && $expand_to_p1 ) {
                warn "NOTE, option '--expand-to-P1' implies " .
                    "'--preserve-stoichiometry'" . "\n";
            }
            if( $preserve_stoichiometry && ! $expand_to_p1 ) {
                my $molecular_symmetry =
                    make_spacegroup_builder( $space_group_builder_type );

                print STDERR "Start building molecule symmetry groups:\n"
                    if $symdebug;

                foreach my $molecule (@$unique_molecules) {
                # Build molecule point group here...
                    my $sg =
                        make_spacegroup_builder( $space_group_builder_type );
                    print STDERR "\nProcessing molecule "
                        . "'$molecule->{chemical_formula_sum}':\n"
                        if( $symdebug );
                    my %original_atoms = ();
                    for my $atom (@{$molecule->{atoms}}) {
                        my $atom_label = $atom->{site_label};
                        if( exists $atom->{site_symops} ) {
                            $sg->insert_symops( $atom->{site_symops} );
                            do {
                                for (@{$atom->{site_symops}}) {
                                    print STDERR "<<<< inserting symop: ", string_from_symop($_),"\n";
                                }
                            } if $symdebug;
                        }
                        if( !exists $original_atoms{$atom_label} ) {
                            $original_atoms{$atom_label} = $atom;
                        } else {
                            my $symop1 = $original_atoms{$atom_label}{symop};
                            my $inverted_symop1 = symop_invert( $symop1 );
                            $sg->insert_symop( symop_mul( $atom->{symop},
                                                          $inverted_symop1 ));
                            do {
                                print( STDERR "<<<< inserting symop (inversion): ",
                                       string_from_symop(
                                           symop_mul(
                                               $atom->{symop},
                                               $inverted_symop1
                                           )
                                       ), "\n" );
                            } if $symdebug;
                        }
                    }
                    $molecule->{symmetry} = $sg;
                    $molecular_symmetry->insert_symops( $sg->all_symops_ref() );
                    if( $symdebug ) {
                        print STDERR "\nMolecule symmetry for molecule "
                            . "'$molecule->{chemical_formula_sum}':\n";
                        $sg->print( \*STDERR );
                        print STDERR "\nMolecule cluster symmetry after insertion:\n";
                        $molecular_symmetry->print( \*STDERR );
                    }
                }
                if( $symdebug ) {
                    print STDERR "\nMolecule cluster symmetry:\n";
                    $molecular_symmetry->print( \*STDERR );
                    print STDERR "\nFinished building molecule symmetry groups:\n";
                }

                my @stoichiometric_molecules;
                foreach my $molecule (@$unique_molecules) {
                    use COD::Spacegroups::Cosets qw( find_left_cosets
                                                     canonical_string_from_symop );

                    if( $symdebug ) {
                        print STDERR "\nMolecule cluster symmetry:\n";
                        $molecular_symmetry->print( \*STDERR );
                        print STDERR "\nMolecule symmetry for molecule "
                            . "'$molecule->{chemical_formula_sum}':\n";
                        $molecule->{symmetry}->print( \*STDERR );
                    }

                    my @cosets = find_left_cosets(
                        $molecular_symmetry->all_symops_ref(),
                        $molecule->{symmetry}->all_symops_ref()
                    );
                    if( $symdebug ) {
                        print STDERR "Cosets for '$molecule->{chemical_formula_sum}':\n";
                        ## serialiseRef( \@cosets, "", \*STDERR );
                        my $indent = "   ";
                        my $n = 1;
                        for my $coset (@cosets) {
                            print STDERR $indent, "Coset ", $n++, ": \n";
                            for my $symop (@$coset) {
                                print( STDERR $indent x 2,
                                       string_from_symop( $symop ), "\n" );
                            }
                        }
                    }
                    push( @stoichiometric_molecules, $molecule );
                    for my $coset (@cosets[1..$#cosets]) {
                        ## use COD::Serialise qw( serialiseRef ); serialiseRef( [@cosets[1..$#cosets]] );
                        my $symop = $coset->[0];
                        my $symop_key = canonical_string_from_symop( $symop );

                        my %additional_molecule = (
                            atoms =>
                                symop_apply_to_atoms( $molecule->{atoms},
                                                      $symop ),
                            chemical_formula_sum =>
                                $molecule->{chemical_formula_sum},
                            is_polymer => $molecule->{is_polymer},
                            polymer_dimension => $molecule->{polymer_dimension},
                            polymer_basis => $molecule->{polymer_basis},
                            original_atoms => $molecule->{atoms}
                        );
                        $additional_molecule{symmetry}{symops} = $coset;
                        push( @stoichiometric_molecules,
                              \%additional_molecule );
                        ## use COD::Serialise; serialiseRef( \%additional_molecule );
                    }
                }

                # Find molecular Z value:
                my %molecules;
                for my $molecule (@stoichiometric_molecules) {
                    my $molecule_key;
                    if( $use_morgan_fingerprints ) {
                        my $neighbours =
                            make_neighbour_list(
                                $molecule->{atoms},
                                $covalent_sensitivity,
                                $bump_distance_factor,
                                \%COD::AtomProperties::atoms,
                                1 );
                        $molecule_key =
                            make_morgan_fingerprint(
                                $neighbours,
                                $use_atom_classes,
                                $classification_level,
                                $max_ring_size,
                                $flat_planarity );
                    } else {
                        $molecule_key =
                            join( "\0", sort map {$_->{site_label}}
                                  @{$molecule->{atoms}} );
                    }
                    push( @{$molecules{$molecule_key}}, $molecule );
                }
                my $Z = gcd( map { int(@$_) } values %molecules );

                ## print STDERR ">>> Z = $Z\n";

                # Simplify molecular formula:

                if( $Z > 1 ) {
                    @stoichiometric_molecules = ();
                    for my $molecule_key (sort keys %molecules) {
                        my $N = int(@{$molecules{$molecule_key}});
                        for my $i (0 .. $N/$Z - 1) {
                            if (!exists $molecules{$molecule_key}[$i]->{molecule_graph} &&
                                ($molecules{$molecule_key}[$i]->{is_polymer} || $include_non_polymer_quotient_graph)) {

                                my %molecule_graph;
                                my $symop = $molecules{$molecule_key}[$i]->{symmetry}{symops}[0];
                                my @sym_operators = map { symop_from_string($_) } @{$sym_data};
                                my $new_atoms = symop_apply_to_atoms( $molecules{$molecule_key}[$i]->{original_atoms}, $symop );
                                my %initial_atom_names = map { $_->{name} => 1 } @$new_atoms;

                                my $unit_cell_atoms = symgen_all_atoms( $new_atoms, \@sym_operators,
                                                                        {
                                                                            print_errors => 1,
                                                                            initial_atom_names =>
                                                                                \%initial_atom_names
                                                                        } );

                                my $symmetric_atoms = apply_shifts( $unit_cell_atoms );

                                my $atom_properties = \%COD::AtomProperties::atoms;

                                my $max_covalent_radius = get_max_covalent_radius( $atom_properties );
                                my $bricks = build_bricks( $symmetric_atoms,
                                                           $max_covalent_radius * 2 +
                                                           $covalent_sensitivity );


                                my %existing_translations;
                                my %added_atoms;
                                my @unique_molecules;
                                my %used_atoms;
                                my %used_originals;
                                my %checked_pairs;
                                my $nbumps = 0;
                                my $initial_atom = $new_atoms->[0];

                                my( $molecule_atoms, $mol_nbumps, $mol_polymer_atoms ) =
                                    find_molecule( $covalent_sensitivity,
                                                   $atom_properties,
                                                   $symmetric_atoms,
                                                   \%used_atoms,
                                                   \%used_originals,
                                                   \%checked_pairs,
                                                   $initial_atom,
                                                   $bricks,
                                                   \%molecule_graph,
                                                   \%existing_translations,
                                                   \%added_atoms,
                                                   undef,
                                                   $last_net_id );

                                if ( $mol_polymer_atoms == 0 ) {
                                    foreach my $key (keys %molecule_graph) {
                                        delete $molecule_graph{$key};
                                    }
                                } else {
                                    $last_net_id++;
                                }
                                $molecules{$molecule_key}[$i]{molecule_graph} = \%molecule_graph;
                            }
                            push( @stoichiometric_molecules,
                                  $molecules{$molecule_key}[$i] );
                        }
                    }
                }

                $unique_molecules = \@stoichiometric_molecules;
            }

            my $Z = 1;
            if( $use_one_output_datablock ) {
                my @all_atoms = map { @{$_->{atoms}} } @$unique_molecules;
                if( @all_atoms > 0 ) {
                    # Find molecular Z value, once more:
                    my %moieties;
                    for my $moiety (@$unique_molecules) {
                        my $moiety_key;
                        if( $use_morgan_fingerprints ) {
                            my $neighbours =
                                make_neighbour_list(
                                    $moiety->{atoms},
                                    $covalent_sensitivity,
                                    $bump_distance_factor,
                                    \%COD::AtomProperties::atoms,
                                    1 );
                            $moiety_key =
                                make_morgan_fingerprint(
                                    $neighbours,
                                    $use_atom_classes,
                                    $classification_level,
                                    $max_ring_size,
                                    $flat_planarity );
                        } else {
                            $moiety_key =
                                join( "\0", sort map {$_->{site_label}}
                                      @{$moiety->{atoms}} );
                            ##print STDERR ">>>> \$moiety_key = $moiety_key\n";
                        }
                        push( @{$moieties{$moiety_key}}, $moiety );
                    }

                    $Z = gcd( map { int(@$_) } values %moieties );
                }
            }

            # Merge all molecules to one if requested.
            if( $use_one_output_datablock ) {
                my @all_atoms = map { @{$_->{atoms}} } @$unique_molecules;
                if( @all_atoms > 0 ) {
                    my @all_bases;
                    for my $moiety (@$unique_molecules) {
                        next if !$moiety->{polymer_dimension};
                        push @all_bases,
                            basis_string_to_matrix( $moiety->{polymer_basis} );
                    }
                    # Once @all_bases matrix is full, deref components and
                    # calculate cumulative rank and basis.
                    my $m = [map { @{$_} } @all_bases];
                    my ( $rank, $basis ) = get_rank_and_basis( $m );
                    my @molecule_graphs = map { $_->{molecule_graph} } @$unique_molecules;
                    my %combined_graphs;

                    for my $graph (@molecule_graphs) {
                        for my $vertex_id (keys %{$graph}) {
                            $combined_graphs{$vertex_id} = $graph->{$vertex_id};
                        }
                    }
                    $unique_molecules = [{
                        atoms =>
                            \@all_atoms,
                        chemical_formula_sum =>
                            chemical_formula_sum( \@all_atoms, $Z ),
                        is_polymer => ((grep { $_->{is_polymer} == 1 }
                                               @$unique_molecules) > 0),
                        polymer_dimension => $rank,
                        polymer_basis => $basis,
                        molecule_graph => \%combined_graphs,
                    }];
                }
            }

            ## use COD::Serialise qw( serialiseRef ); serialiseRef( $unique_molecules );
            # Split init atoms into assemblies and groups, if requested.
            if( !$merge_disorder_groups ) {
                my @split_molecules;
                my $n = 1;
                for my $molecule (@$unique_molecules) {
                    ## print ">>> molecule No. ", $n++, "\n";
                    my $atom_list = $molecule->{atoms};
                    my $disorder_groups = atom_groups($atom_list);
                    my $number_of_disorder_groups = scalar (@{ $disorder_groups });
                    ## print ">>> ngroups = ", int(@$disorder_groups), "\n";
                    ## use COD::Serialise qw( serialiseRef ); serialiseRef( $disorder_groups );
                    for my $disorder_representative (@$disorder_groups) {
                        my $split_molecule = {
                                  is_polymer => $molecule->{is_polymer},
                                  polymer_dimension =>
                                      $molecule->{polymer_dimension},
                                  polymer_basis => $molecule->{polymer_basis},
                              };
                        if ($number_of_disorder_groups > 1 && ($molecule->{is_polymer} ||
                            (!$molecule->{is_polymer} && $include_non_polymer_quotient_graph))) {

                            my @sym_operators = map { symop_from_string($_) } @{$sym_data};
                            my %initial_atom_names = map { $_->{name} => 1 } @$disorder_representative;

                            my $unit_cell_atoms = symgen_all_atoms( $disorder_representative, \@sym_operators,
                                                                    {
                                                                        print_errors => 1,
                                                                        initial_atom_names =>
                                                                            \%initial_atom_names
                                                                    } );

                            my $symmetric_atoms = apply_shifts( $unit_cell_atoms );

                            my $atom_properties = \%COD::AtomProperties::atoms;

                            my $max_covalent_radius = get_max_covalent_radius( $atom_properties );
                            my $bricks = build_bricks( $symmetric_atoms,
                                                       $max_covalent_radius * 2 +
                                                       $covalent_sensitivity );

                            my %existing_translations;
                            my %added_atoms;
                            my @unique_molecules;
                            my %used_atoms;
                            my %used_originals;
                            my %checked_pairs;
                            my $nbumps = 0;
                            my $initial_atom = $disorder_representative->[0];
                            my %molecule_graph;

                            my( $molecule_atoms, $mol_nbumps, $mol_polymer_atoms ) =
                                find_molecule( $covalent_sensitivity,
                                               $atom_properties,
                                               $symmetric_atoms,
                                               \%used_atoms,
                                               \%used_originals,
                                               \%checked_pairs,
                                               $initial_atom,
                                               $bricks,
                                               \%molecule_graph,
                                               \%existing_translations,
                                               \%added_atoms,
                                               undef,
                                               $last_net_id );

                            if ( $mol_polymer_atoms == 0 &&
                                 !$include_non_polymer_quotient_graph) {
                                foreach my $key (keys %molecule_graph) {
                                    delete $molecule_graph{$key};
                                }
                            } else {
                                $last_net_id++;
                            }
                            $split_molecule->{atoms} = $molecule_atoms;
                            $split_molecule->{chemical_formula_sum} = chemical_formula_sum ( $molecule_atoms, $Z );
                            $split_molecule->{molecule_graph} = \%molecule_graph;
                        } else {
                            $split_molecule->{atoms} = $disorder_representative;
                            $split_molecule->{chemical_formula_sum} = chemical_formula_sum ( $disorder_representative, $Z );
                            $split_molecule->{molecule_graph} = $molecule->{molecule_graph};
                        }

                        push( @split_molecules, $split_molecule);
                    }
                }
                $unique_molecules = \@split_molecules;
            }

            # There is no need to sort the molecules if the single data block
            # output is required since:
            # a) there is only one molecule (no disorder);
            # b) there are several disorder configurations, but the
            #    best one (occupancy wise) is already at the beginning
            #    of the array
            if( !$use_one_output_datablock &&
                ( $sort_molecules || $largest_molecule_only ) ) {
                my @molecule_sum_occupancy;
                for (my $i = 0; $i < @{$unique_molecules}; $i++ ) {
                    $molecule_sum_occupancy[$i] = 0;
                    my $atoms = $unique_molecules->[$i]{'atoms'};
                    next if !defined $atoms->[0]{'atom_site_occupancy'};
                    for my $atom (@{$atoms}) {
                        my $occupancy = (
                                      $atom->{'atom_site_occupancy'} eq '.' ||
                                      $atom->{'atom_site_occupancy'} eq '?' )
                                      ? 0 : $atom->{'atom_site_occupancy'};
                        $occupancy =~ s/[(][0-9]+[)]$//; # remove precision
                        $molecule_sum_occupancy[$i] += $occupancy;
                    }
                };

                my @sorted_indexes = sort {
                    @{$unique_molecules->[$b]{atoms}} <=>
                    @{$unique_molecules->[$a]{atoms} ||
                    $molecule_sum_occupancy[$b] <=>
                    $molecule_sum_occupancy[$a] }
                } 0..$#$unique_molecules;
                @{$unique_molecules} = @{$unique_molecules}[@sorted_indexes];
            }

            my $molecule_id = 0;
            my $dataset_name = $dataset->{name};
            foreach my $molecule (@$unique_molecules) {
                my $id;
                unless( ($use_one_output_datablock &&
                         $merge_disorder_groups) ||
                         $largest_molecule_only ) {
                    $id = $molecule_id;
                } else {
                    $id = undef;
                }

                if( $output_geom_bond ) {
                    $molecule->{bonds} = atom_bonds( $molecule->{atoms},
                                                     \%COD::AtomProperties::atoms,
                                                     $covalent_sensitivity );
                }

                print_molecule( $id, $audit, $molecule, $Id,
                                $dataset, $dataset_name, $filename,
                                $sym_data, $Z, $original_sg_number,
                                {
                                    'coordinate_format' =>
                                        $format,
                                    'force_unit_occupancies' =>
                                        $force_unit_occupancies,
                                    'include_disorder' =>
                                        $use_one_output_datablock,
                                    'include_geom_bond' =>
                                        $output_geom_bond,
                                    'include_polymer_dimension' =>
                                        ( $molecule->{polymer_dimension} ? 1 : 0 ),
                                    'include_quotient_graph' =>
                                        $print_quotient_graph,
                                } );

                last if $largest_molecule_only;

                $molecule_id++;
            }
        };

        if ( $@ ) {
            process_errors( {
              'message'       => $@,
              'program'       => $0,
              'filename'      => $filename,
              'add_pos'       => $dataname
            }, $die_on_errors )
        }
    }
}

#==============================================================================#
# Check whether an atom belongs to a group which is disordered around
# a special position. The $atom is a reference to a hash returned by
# atom_array_from_cif() subroutine.

sub is_disordered_around_special_position($)
{
    my ($atom) = @_;

    # "A minus prefix (e.g. "-1") is used to indicate sites disordered
    # about a special position"
    # (https://www.iucr.org/__data/iucr/cifdic_html/1/cif_core.dic/Iatom_site_disorder_group.html,
    # 2019-10-22):
    if( $atom &&  exists $atom->{group} &&
        $atom->{group} =~ /^-/ ) {
        return 1
    } else {
        return 0;
    }
}

#==============================================================================#
# Calculate distance between two atoms. The atoms are represented as
# references to a hash returned by atom_array_from_cif()
# subroutine. These hashes MUST contain 'coordinates_ortho' field
# (with Cartesian atom coordinates.

sub atom_distance($$)
{
    my ($atom1, $atom2) = @_;

    return
        distance(
            $atom1->{coordinates_ortho},
            $atom2->{coordinates_ortho},
        );
}

#==============================================================================#
# Test if two atoms are too close (i.e. if the "bump").

sub atoms_bump($$$$)
{
    my ($atom1, $atom2, $atom_properties, $distance_factor) = @_;

    my $distance = atom_distance( $atom1, $atom2 );

    do {
        local $, = " ";
        local $\ = "\n";
        print STDERR ">>>> checking bump: ",
        $atom1->{chemical_type}, $atom2->{chemical_type},
        $atom1->{site_label}, $atom2->{site_label}, $distance,
        $distance_factor;
    } if 0;

    return
        test_bump(
            $atom_properties,
            $atom1->{chemical_type},
            $atom2->{chemical_type},
            $atom1->{site_label},
            $atom2->{site_label},
            $distance,
            $distance_factor,
            "vdw_radius"
        );
}

#==============================================================================#
# Find all atoms sets that are disordered around a special position;
# determine symmetry (sub)group of each such special position and left
# cosets of each such symmetry group. Store references to space group
# operators, coset operator lists and unique operators needed to by
# applied to disordered atoms into each atom's record (hash).

sub determine_disordered_set_symmetry($$$$)
{
    my ( $atom_list, $symmetry_operators, $atom_properties,
         $distance_factor ) = @_;

    my %special_disorder_groups;

    for my $atom (@$atom_list) {
        if( is_disordered_around_special_position( $atom ) ) {
            my $disorder_group_key = $atom->{group};
            push( @{$special_disorder_groups{$disorder_group_key}},
                  $atom );
        }
    }

    for my $group_key (sort keys %special_disorder_groups) {
        my @group_atoms = @{$special_disorder_groups{$group_key}};
        my $unity_symop =
            [ [ 1, 0, 0, 0 ],
              [ 0, 1, 0, 0 ],
              [ 0, 0, 1, 0 ],
              [ 0, 0, 0, 1 ] ];
        my $group_symmetric_atoms =
            apply_shifts(
                symgen_all_atoms( \@group_atoms, [ $unity_symop ],
                                  { print_errors => 0 } )
            );

        my $max_vdw_radius = get_max_vdw_radius( $atom_properties );

        my $bricks = build_bricks( $group_symmetric_atoms,
                                   $max_vdw_radius * 2.5 );

        do {
            local $\ = "\n";
            local $, = " ";
            print STDERR int(@$group_symmetric_atoms);
            print STDERR "\$group_symmetric_atoms";
            for my $atom (@$group_symmetric_atoms) {
                print( STDERR substr($atom->{site_label}, 0, 1),
                       @{$atom->{coordinates_ortho}} );
            }
        } if 0;

        my @symgroup_generators;
        for my $symop (@$symmetry_operators) {
            do {
                local $\ = "\n";
                print STDERR int(@group_atoms);
                print STDERR "Group label ", $group_key;
            } if 0;
            for my $atom (@group_atoms) {
                my $symm_atom = symop_apply( $atom, $symop, {modulo_1 => 1} );

                do {
                    local $, = " ";
                    local $\ = "\n";
                    print STDERR substr($symm_atom->{site_label}, 0, 1),
                        @{$symm_atom->{coordinates_ortho}};
                } if 0;

                # Performed an optimised search of the neighbouring
                # atoms in "bricks":
                my $coordinates = $symm_atom->{coordinates_ortho};

                my ($i_init, $j_init, $k_init) =
                    get_atom_index( $bricks, @$coordinates );

                my ( $min_i, $max_i, $min_j, $max_j, $min_k, $max_k );

                eval {
                    ( $min_i, $max_i, $min_j, $max_j, $min_k, $max_k ) =
                        get_search_span( $bricks, $i_init, $j_init, $k_init );
                };
                if( $@ ) {
                    if( $debug ) {
                        use COD::Serialise qw( serialiseRef );
                        serialiseRef( $coordinates );
                        serialiseRef( [ $i_init, $j_init, $k_init ] );
                        serialiseRef( $bricks );
                    }
                    die $@ . "\n";
                }

                for my $i ($min_i .. $max_i) {
                for my $j ($min_j .. $max_j) {
                for my $k ($min_k .. $max_k) {
                    for my $other_atom ( @{$bricks->{atoms}[$i][$j][$k]} ) {
                        do {
                            print STDERR ">>> testing: ",
                                $symm_atom->{name}, " ",
                                $other_atom->{name}, " ",
                                "distance = ",
                                atom_distance($symm_atom, $other_atom ),
                                "\n";
                        } if 0;
                        if( atoms_bump( $symm_atom, $other_atom,
                                        $atom_properties, $distance_factor ) ) {
                            push( @symgroup_generators, $symop );
                            do {
                                print( STDERR ">>> bump: ",
                                       $symm_atom->{'site_label'}, " ",
                                       $other_atom->{'site_label'},
                                       "\n" );
                                print( STDERR ">>> pushing symop ",
                                       string_from_symop($symop), "\n" );
                            } if 0;
                        }
                    }
                }}}
            }
        }

        # Now the @symgroup_generators will contain, if any, those
        # symmetry operators that mapped the currently processed
        # disordered group onto itself. Let's build a subgroup
        # generated by those operators, it will the symmetry of the
        # site around which the atom group is disordered.
        if( @symgroup_generators ) {
            use COD::Spacegroups::Cosets qw( find_left_cosets );

            my $sg_builder =
                make_spacegroup_builder( $space_group_builder_type );

            $sg_builder->insert_symops( \@symgroup_generators );

            my $subgroup_operators = $sg_builder->all_symops_ref();

            do {
                local $" = "; ";
                local $\ = "\n";
                my @disorder_symops =
                    map { string_from_symop($_) } @$subgroup_operators;
                print STDERR ">>>> disorder symops: @disorder_symops";
            } if 0;

            my @cosets = find_left_cosets( $symmetry_operators,
                                           $subgroup_operators );

            my @permissible_operators;
            for my $coset (@cosets) {
                do {
                    local $" = "; ";
                    local $\ = "\n";
                    my @coset_symop_strings = map {string_from_symop($_)} @$coset;
                    print( STDERR ">>> coset ", ": ",
                           "@coset_symop_strings" );
                } if 0;
                for my $i (0..$#$coset) {
                    push( @{$permissible_operators[$i]}, $coset->[$i] );
                }
            }

            do {
                local $" = "; ";
                local $\ = "\n";
                for my $operators (@permissible_operators) {
                    my @operator_strings = map {string_from_symop($_)} @$operators;
                    print( STDERR ">>> operator set ", ": ",
                           "@operator_strings" );
                }
            } if 0;

            # Distribute the found operators into disorder group
            # atoms. Only those symmetry operators listed here should
            # be applied to the atoms that contain them:
            if( @permissible_operators ) {
                for my $atom (@group_atoms) {
                    die "permissible operators already defined!"
                        if exists $atom->{permissible_operators};
                    $atom->{permissible_operators} = \@permissible_operators;
                    $atom->{disorder_site_symmetry} = $subgroup_operators;
                }
            }
        } else {
            warn "WARNING, disorder group '$group_key' is not mapped " .
                "to itself by any non-unity symmetry operator\n";
        }
    }
}


#==============================================================================#
# This is the main function where other functions such as find_molecules are
# called.
# Accepts
#     covalent_sensitivity - a threshold for covalent sensitivity
#     filename             - CIF file name
#     sym_data             - symmetric data from the CIF file
#     atom_site_tag        - atom site label or atom site type symbol from the
#                            CIF file
#     values               - a hash where a data from the CIF file is stored
#
# Returns
#     unique_molecules     - an array of hashes
#                     %molecule = (
#                         atoms=>[\%atom_info1, \%atom_info2], #covalent bond
#                         chemical_formula_sum=>"C6 H6",
#                                 );

sub get_molecules
{
    my $covalent_sensitivity = shift;
    my $sym_data             = shift;
    my $dataset              = shift;
    my $atom_properties      = shift;
    my $uniquify_atoms       = shift;

    my $values = $dataset->{values};

    # Parse symmetry operators:
    my @sym_operators = map { symop_from_string($_) } @{$sym_data};

    # Create a list of symmetry operators:
    my $symop_list = { symops => [ map { symop_from_string($_) } @$sym_data ],
                       symop_ids => {} };
    for (my $i = 0; $i < @{$sym_data}; $i++)
    {
        $symop_list->{symop_ids}
                     {symop_string_canonical_form($sym_data->[$i])} = $i;
    }

    my $cif_atom_list_options = {
        uniquify_atom_names => 1,
        uniquify_atoms => $uniquify_atoms,
        exclude_dummy_atoms => $exclude_dummy_atoms,
        exclude_dummy_coordinates => 1,
        exclude_unknown_coordinates => 1,
        symop_list => $symop_list,
        modulo_1 => 1,
        atom_properties => $atom_properties,
        continue_on_errors => !$die_on_errors
    };

    # Extract atoms fract coordinates
    my $atom_list = atom_array_from_cif( $dataset, $cif_atom_list_options );
    return [] unless defined $atom_list;

    # atoms with zero occupancies are not initially filtered in the
    # 'atom_array_from_cif' subroutine due to some dummy atoms
    # potentially containing zero or equivalent ('.', '?') occupancies
    if ( $exclude_zero_occupancies ) {
        my @filtered_atom_list;
        for my $atom ( @$atom_list ) {
            my $has_zero_occupancy = 0;
            if ( exists $atom->{'atom_site_occupancy'} ) {
                if ( $atom->{'atom_site_occupancy'} eq '?' ||
                     $atom->{'atom_site_occupancy'} eq '.' ) {
                    $has_zero_occupancy = 1;
                } else {
                    my $occupancy = $atom->{'atom_site_occupancy'};
                    $occupancy =~ s/[(][0-9]+[)]$//; # remove precision
                    if ( $occupancy == 0.0 ) {
                        $has_zero_occupancy = 1;
                    }
                }
            }

            next if ( $has_zero_occupancy &&
                      ( !exists $atom->{'calc_flag'} ||
                        $atom->{'calc_flag'} ne 'dum' ) );

            push @filtered_atom_list, $atom;
        }
        $atom_list = \@filtered_atom_list;
    }

    if( !@$atom_list ) {
        warn "WARNING, no atoms suitable for processing were found -- "
           . "maybe all occupancies were unknown, zero, or "
           . "all atom types were unrecognised\n";
            return [];
    }

    my $max_covalent_radius = get_max_covalent_radius( $atom_properties );

    my @unique_molecules;
    my $last_net_id;
    my %seen_molecules;

    # If there are atom sets that are disordered around a special
    # position, determine their symmetry subgroups in the space group
    # and their symmetry subgroup cosets:

    determine_disordered_set_symmetry( $atom_list, \@sym_operators,
                                       $atom_properties,
                                       $vdw_distance_factor );

    # Apply necessary symmetry operators to all atoms. For atom sets
    # that are disordered around a special position, only symmetry
    # operators from atom set symmetry group cosets are applied, one
    # symop from each coset:
    my %initial_atom_names = map { $_->{name} => 1 } @$atom_list;

    my $unit_cell_atoms = symgen_all_atoms( $atom_list, \@sym_operators,
                                            {
                                                print_errors => 1,
                                                initial_atom_names =>
                                                    \%initial_atom_names
                                            } );

    my $symmetric_atoms = apply_shifts( $unit_cell_atoms );

    my @initial_atoms;
    if( $expand_to_p1 ) {
        @initial_atoms = @$unit_cell_atoms;
    } else {
        do {
            local $" = ", ";
            local $\ = "\n";
            my @atom_names = sort keys %initial_atom_names;
            print STDERR ">>> atom names: @atom_names";
        } if 0;
        foreach my $symmetric_atom ( @$symmetric_atoms ) {
            do {
                local $\ = "\n";
                print STDERR ">>> checking atom: $symmetric_atom->{name}";
            } if 0;
            push( @initial_atoms, $symmetric_atom )
                if exists $initial_atom_names{$symmetric_atom->{name}};
        }
    }

    my $current_ordered_molecules;

    if( $dump_atoms ) {
        dump_atoms_as_cif( 1, \@initial_atoms,
                           [ get_cell( $values ) ] );
    } else {

        my $bricks = build_bricks( $symmetric_atoms,
                                   $max_covalent_radius * 2 +
                                   $covalent_sensitivity );

        # Finds molecules
        ($current_ordered_molecules, $last_net_id) = find_molecules( $covalent_sensitivity,
                                                        $atom_properties,
                                                        $symmetric_atoms,
                                                        \@initial_atoms,
                                                        $bricks,
                                                        \%seen_molecules );

        push( @unique_molecules, @$current_ordered_molecules );
    }

    # Calculates chemical formula sum
    foreach my $molecule (@unique_molecules) {
        $molecule->{chemical_formula_sum} =
            chemical_formula_sum( $molecule->{atoms} );
    }

    return (\@unique_molecules, $last_net_id);
}

#===============================================================#
# Applies symmetry operator to all atoms in a given list.
#
# The symop_apply_to_atoms subroutine accepts a reference to an array
# of hash references:
#
# $atom_list = [
#                 {
#                    site_label=>"C1",
#                    name=>"C1_2",
#                    chemical_type=>"C",
#                    coordinates_fract=>[1.0, 1.0, 1.0],
#                    unity_matrix_applied=>1
#                 }, # $atom_info hash
#                 $atom2_info,
#                 $atom3_info,
#                 $atom4_info
#              ]
#
# and a reference to an array - symmetry operator:
#
# my $symop = [
#     [ r11 r12 r13 t1 ]
#     [ r21 r22 r23 t1 ]
#     [ r31 r32 r33 t1 ]
#     [   0   0   0  1 ]
# ],
#
# Returns an list of the above-mentioned atom_info hashes.

sub symop_apply_to_atoms
{
    my($atom_list, $symop) = @_;

    my @sym_atoms = ();
    for my $atom (@$atom_list) {
        push( @sym_atoms,
            symop_apply( $atom, $symop,
                         { append_symop_to_label => $expand_to_p1 } ) );
    }

    return \@sym_atoms;
}

#===============================================================#
# Generate symmetry equivalents of an atom, exclude duplicates
# on special positions

sub symgen_atom($$)
{
    my ( $atom, $sym_operators ) = @_;

    my( $sym_atoms ) = symops_apply_modulo1( $atom, $sym_operators,
                                             { append_symop_to_label =>
                                               $expand_to_p1,
                                               use_special_position_disorder =>
                                               $use_special_position_disorder } );

    if( $sym_atoms &&
        ( !@{$sym_atoms} ||
          $sym_atoms->[0]{multiplicity_ratio} == 1 )) {
        return @$sym_atoms;
    } else {
        my @unique_atoms;
        my %to_be_deleted;
        for my $i (0..$#$sym_atoms-1) {
            for my $j ($i+1..$#$sym_atoms) {
                if( atoms_coincide( $sym_atoms->[$i],
                                    $sym_atoms->[$j],
                                    $sym_atoms->[$i]{f2o} )) {
                    $to_be_deleted{$sym_atoms->[$j]{name}} = 1;
                }
            }
        }
        for my $atom (@$sym_atoms) {
            if( !defined $to_be_deleted{$atom->{name}} ) {
                push( @unique_atoms, $atom );
            }
        }
        return @unique_atoms;
    }
}

#===============================================================#
# Generate symmetry equivalents of all atoms from a list, exclude
# duplicates on special positions. Check the multiplicity values
# provided in the original file.

sub symgen_all_atoms($$$)
{
    my ( $atoms, $sym_operators, $options ) = @_;

    my $print_errors = 1
        if $options && $options->{print_errors};

    my $initial_atom_names = $options->{initial_atom_names}
        if exists $options->{initial_atom_names};

    my @sym_atoms = ();

    my %disorder_group_operators;

    for my $atom (@{$atoms}) {
        my $atom_symops;
        if( exists $atom->{permissible_operators} ) {
            my $operator_set_index;
            if( $special_position_operator_set eq "random" ) {
                my $disorder_group = $atom->{group};
                if( exists $disorder_group_operators{$disorder_group} ) {
                    $atom_symops =
                        $disorder_group_operators{$disorder_group}
                } else {
                    # Pre-multiply the symmetry operator with a randomly
                    # selected operator that maps a disordered group into
                    # itself:
                    my @randomised_symops;
                    my $site_symops = $atom->{disorder_site_symmetry};
                    my $nsymops = int(@$site_symops);

                    for my $permissible_symop (
                        @{$atom->{permissible_operators}[0]}
                        ) {
                        my $symop_index = int(rand($nsymops));
                        my $site_symop = $site_symops->[$symop_index];
                        do {
                            use Data::Dumper;
                            print STDERR Dumper( $site_symop, $permissible_symop );
                        } if 0;
                        push( @randomised_symops,
                              symop_mul( $permissible_symop, $site_symop )
                            );
                    }
                    $disorder_group_operators{$disorder_group} =
                        \@randomised_symops;
                    $atom_symops = \@randomised_symops;
                }
            } else {
                $operator_set_index =
                    $special_position_operator_set %
                    int(@{$atom->{permissible_operators}});
                $atom_symops =
                    $atom->{permissible_operators}[$operator_set_index];
            }

        } else {
            $atom_symops = $sym_operators;
        }
        die unless defined $atom_symops;
        # If symgen_all_atoms() is called by the code that needs all
        # symmetry equivalents in the cell to reconstruct molecules,
        # it passes a hash of all initial atoms names as an optional
        # parameter. Molecules are reconstructed by the caller of
        # this function starting from atoms that have their names
        # listed in the $initial_atom_names = {} hash. Normally, they
        # are just _atom_site_label values from the CIF since, when a
        # unity operator is applied, the atom name is not changed by
        # symgen_atom(). However, if an atom belongs to a group that is
        # disordered around a special position, and if we choose to
        # apply some of the disorder site operators to this atom, the
        # unity operator will never be used for such atom. In this
        # case, its name in the initial atom list must be replaced so
        # that the molecule reconstruction code finds them:
        if( $initial_atom_names &&
            !symop_is_unity( $atom_symops->[0] ) ) {
            my $symop_string = canonical_string_from_symop( $atom_symops->[0] );
            # FIXME: the code that generates the $symop_id value MUST
            # be exactly the same as in symop_register_applied_symop()
            # of the SymmetryGenerator.pm, line 206 onwards
            # (rev. 7270). The code should be refactored so that the
            # symop_id generation happens just in one place (restore
            # SPOT) (S.G.).
            my $symop_id =
                $atom->{symop_list}{symop_ids}{$symop_string} + 1;

            my $old_atom_name = $atom->{site_label};
            my $new_atom_name = $atom->{site_label} . '_' . $symop_id . '_555';
            delete $initial_atom_names->{$old_atom_name};
            $initial_atom_names->{$new_atom_name} = 1;
        }
        push( @sym_atoms, symgen_atom( $atom, $atom_symops ) );
    }

    my $nr_multiplicity_ratios_found = 0;

    for my $atom (@{$atoms}) {
        my $multiplicity = $atom->{multiplicity};
        my $multiplicity_ratio = $atom->{multiplicity_ratio};

        if( exists $atom->{_atom_site_symmetry_multiplicity} &&
            $atom->{_atom_site_symmetry_multiplicity} != $multiplicity ) {
            if( $atom->{_atom_site_symmetry_multiplicity} ==
                $multiplicity_ratio ) {
                $nr_multiplicity_ratios_found++;
            } else {
                if( $print_errors ) {
                    warn 'WARNING, the given multiplicity value of atom ' .
                         "'$atom->{name}' differs from the calculated value " .
                         "('$atom->{_atom_site_symmetry_multiplicity}' vs. " .
                         "'$multiplicity') -- the calculated value will be " .
                         'used' . "\n";
                }
            }
        }
    }

    if( $nr_multiplicity_ratios_found > 0 &&
        $print_errors ) {
        warn "WARNING, multiplicity ratios are given instead of "
           . "multiplicities for $nr_multiplicity_ratios_found atoms -- "
           . "taking calculated values\n";
    }

    return \@sym_atoms;
}

##
# Prints molecule to the CIF file.
#
# ...
# 
# @param $molecule
#       Reference to a molecule data structure of the following form:
#       {
#         # Reference to an array of atoms that make up the molecule.
#           'atoms' => [
#                        \%atom_2_info,
#                        \%atom_2_info,
#                        ...
#                      ],
#         # Reference to an array of covalent bond data structures.
#           'bonds' => [
#                        {
#                          'atom_1' => \%atom_1_info,
#                          'atom_2' => \%atom_2_info
#                          'distance' => 1.54 
#                        },
#                        # ...
#                      ],
#         # Summary chemical formula of the molecule.
#           'chemical_formula_sum' => 'C32 H28 Ag2 N12 O4',
#         # Flag value denoting if an atom is a polymer.
#           'is_polymer' => 0,
#         # A positive integer representing the polymer dimension (1, 2 or 3).
#         # This value is undefined for non-polymer molecules.
#           'polymer_dimension' => 2
#         # String denoting the polymer basis.
#         # This value is undefined for non-polymer molecules.
#           'polymer_basis'     => '1;0;1 0;1;0'
#       }
# ...
#
# @param $sym_data
#       Reference to an array of symmetry operations as returned
#       by the COD::CIF::Data::get_symmetry_operators() subroutine.
#       Currently not used.
# @param $Z
#       Molecular Z number.
# @param $original_sg_number
#       Space group IT number derived from the input crystal structure
#       (see the get_space_group_number() subroutine). May be undefined.
# @param $options
#       Reference to an option hash. The following options are recognised:
#       {
#         # Use the specified atomic coordinate format in the output.
#         # Default: '%8.6f'. 
#           'coordinate_format' => '%8.6f',
#         # Set occupancies of all output atoms to 1.0.
#         # Default: '0'.
#           'force_unit_occupancies' => 0,
#         # Include disorder group and assembly information in the output.
#         # Default: '0'.
#           '$include_disorder' => 0,
#         # Include the GEOM_BOND data loop in the output.
#         # Default: '0'.
#           'include_geom_bond' => 0,
#         # Include polymer dimension and basis information in the output.
#         # Default: '1'
#           'include_polymer_dimension' => 1,
#         # Include the quotient graph information in the output.
#         # Default: '1'
#           'include_quotient_graph' => 1,
#       }
##
sub print_molecule
{
    my( $molecule_id, $audit, $molecule, $Id, $dataset, $dataset_name,
        $filename, $sym_data, $Z, $original_sg_number, $options ) = @_;

    my $coordinate_format =
                        defined $options->{'coordinate_format'} ?
                                $options->{'coordinate_format'} : '%8.6f';
    my $force_unit_occupancies =
                        defined $options->{'force_unit_occupancies'} ?
                                $options->{'force_unit_occupancies'} : 0;
    my $include_disorder =
                        defined $options->{'include_disorder'} ?
                                $options->{'include_disorder'} : 0;
    my $include_geom_bond =
                        defined $options->{'include_geom_bond'} ?
                                $options->{'include_geom_bond'} : 0;
    my $include_polymer_dimension =
                        defined $options->{'include_polymer_dimension'} ?
                                $options->{'include_polymer_dimension'} : 1;
    my $include_quotient_graph =
                        defined $options->{'include_quotient_graph'} ?
                                $options->{'include_quotient_graph'} : 1;

    my $new_dataset = clone( $dataset );

    $new_dataset->{name} = $dataset_name;
    if( defined $molecule_id ) {
        $new_dataset->{name} .= '_molecule_' . $molecule_id;
    }

    my @data2copy = qw(
    _publ_author_name
    _publ_section_title
    _journal_issue
    _journal_name_full
    _journal_page_first
    _journal_page_last
    _journal_volume
    _journal_year

    _cell_length_a
    _cell_length_b
    _cell_length_c
    _cell_angle_alpha
    _cell_angle_beta
    _cell_angle_gamma

    _cell_measurement_pressure
    _cell_measurement.pressure
    _cell_measurement.pressure_esd
    _cell_measurement_pressure_gPa
    _cell_measurement_radiation
    _cell_measurement.radiation
    _cell_measurement.temp
    _cell_measurement_temperature
    _cell_measurement_temperature_C
    _cell_measurement.temp_esd
    _cell_measurement_wavelength
    _cell_measurement.wavelength
    _cell_measurement_wavelength_nm
    _cell_measurement_wavelength_pm

    _diffrn_ambient_environment
    _diffrn.ambient_environment
    _diffrn_ambient_pressure
    _diffrn.ambient_pressure
    _diffrn.ambient_pressure_esd
    _diffrn_ambient_pressure_gPa
    _diffrn_ambient_pressure_gt
    _diffrn.ambient_pressure_gt
    _diffrn_ambient_pressure_lt
    _diffrn.ambient_pressure_lt
    _diffrn.ambient_temp
    _diffrn.ambient_temp_details
    _diffrn_ambient_temperature
    _diffrn_ambient_temperature_C
    _diffrn_ambient_temperature_gt
    _diffrn_ambient_temperature_lt
    _diffrn.ambient_temp_esd
    _diffrn.ambient_temp_gt
    _diffrn.ambient_temp_lt

    _diffrn_radiation_collimation
    _diffrn_radiation.collimation
    _diffrn_radiation_detector
    _diffrn_radiation_detector_dtime
    _diffrn_radiation.diffrn_id
    _diffrn_radiation.div_x_source
    _diffrn_radiation.div_x_y_source
    _diffrn_radiation.div_y_source
    _diffrn_radiation_filter_edge
    _diffrn_radiation.filter_edge
    _diffrn_radiation_filter_edge_nm
    _diffrn_radiation_filter_edge_pm
    _diffrn_radiation_inhomogeneity
    _diffrn_radiation.inhomogeneity
    _diffrn_radiation_monochromator
    _diffrn_radiation.monochromator
    _diffrn_radiation_polarisn_norm
    _diffrn_radiation.polarisn_norm
    _diffrn_radiation_polarisn_ratio
    _diffrn_radiation.polarisn_ratio
    _diffrn_radiation.polarizn_source_norm
    _diffrn_radiation.polarizn_source_ratio
    _diffrn_radiation_probe
    _diffrn_radiation.probe
    _diffrn_radiation_source
    _diffrn_radiation_type
    _diffrn_radiation.type
    _diffrn_radiation_wavelength
    _diffrn_radiation_wavelength_id
    _diffrn_radiation_wavelength.id
    _diffrn_radiation.wavelength_id
    _diffrn_radiation_wavelength_nm
    _diffrn_radiation_wavelength_pm
    _diffrn_radiation_wavelength.wavelength
    _diffrn_radiation_wavelength_wt
    _diffrn_radiation_wavelength.wt
    _diffrn_radiation_xray_symbol
    _diffrn_radiation.xray_symbol

    _diffrn_reflns_theta_full
    _diffrn_reflns_resolution_full
    _diffrn_reflns_theta_max
    _diffrn_reflns_resolution_max
    _reflns_d_resolution_high
    _reflns.d_resolution_high
    _reflns_d_resolution_high_nm
    _reflns_d_resolution_high_pm
    _reflns_d_resolution_low
    _reflns.d_resolution_low
    _reflns_d_resolution_low_nm
    _reflns_d_resolution_low_pm
    _diffrn_reflns_limit_h_max
    _diffrn_reflns.limit_h_max
    _diffrn_reflns_limit_h_min
    _diffrn_reflns.limit_h_min
    _diffrn_reflns_limit_k_max
    _diffrn_reflns.limit_k_max
    _diffrn_reflns_limit_k_min
    _diffrn_reflns.limit_k_min
    _diffrn_reflns_limit_l_max
    _diffrn_reflns.limit_l_max
    _diffrn_reflns_limit_l_min
    _diffrn_reflns.limit_l_min

    _cod_duplicate_entry
    _[local]_cod_duplicate_entry
);

    my @data2rename = qw(
    _chemical_formula_analytical
    _chemical_formula.analytical
    _chemical_formula.entry_id
    _chemical_formula_iupac
    _chemical_formula.iupac
    _chemical_formula_moiety
    _chemical_formula.moiety
    _chemical_formula_structural
    _chemical_formula.structural
    _chemical_formula_sum
    _chemical_formula.sum
    _chemical_name_common
    _chemical_name_systematic
    _chemical_name_mineral
    _pd_proc_ls_prof_R_factor
    _pd_proc_ls_prof_wR_factor
    _refine_hist.R_factor_all
    _refine_hist.R_factor_obs
    _refine_hist.R_factor_R_free
    _refine_hist.R_factor_R_work
    _refine_ls_class_R_factor_all
    _refine_ls_class.R_factor_all
    _refine_ls_class_R_factor_gt
    _refine_ls_class.R_factor_gt
    _refine_ls_class_wR_factor_all
    _refine_ls_class.wR_factor_all
    _refine_ls_R_factor_all
    _refine.ls_R_factor_all
    _refine_ls_R_factor_gt
    _refine.ls_R_factor_gt
    _refine_ls_R_factor_obs
    _refine.ls_R_factor_obs
    _refine.ls_R_factor_R_free
    _refine.ls_R_factor_R_free_error
    _refine.ls_R_factor_R_free_error_details
    _refine.ls_R_factor_R_work
    _refine_ls_shell.R_factor_all
    _refine_ls_shell.R_factor_obs
    _refine_ls_shell.R_factor_R_free
    _refine_ls_shell.R_factor_R_free_error
    _refine_ls_shell.R_factor_R_work
    _refine_ls_shell.wR_factor_all
    _refine_ls_shell.wR_factor_obs
    _refine_ls_shell.wR_factor_R_free
    _refine_ls_shell.wR_factor_R_work
    _refine_ls_wR_factor_all
    _refine.ls_wR_factor_all
    _refine_ls_wR_factor_gt
    _refine_ls_wR_factor_obs
    _refine.ls_wR_factor_obs
    _refine_ls_wR_factor_ref
    _refine.ls_wR_factor_R_free
    _refine.ls_wR_factor_R_work
    _reflns_class_R_factor_all
    _reflns_class.R_factor_all
    _reflns_class_R_factor_gt
    _reflns_class.R_factor_gt
    _reflns_class_wR_factor_all
    _reflns_class.wR_factor_all
);

    # Copy the '_atom_type.symbol' and '_atom_type.oxidation_number'
    # data items only if both are simultaneously provided. Otherwise,
    # the oxidation numbers cannot be mapped to the corresponding atom
    # types or the oxidation numbers are not provided at all.
    if( ( contains_data_item( $new_dataset, '_atom_type_symbol' ) ||
          contains_data_item( $new_dataset, '_atom_type.symbol' ) ) &&
        ( contains_data_item( $new_dataset, '_atom_type_oxidation_number' ) ||
          contains_data_item( $new_dataset, '_atom_type.oxidation_number' ) ) )
    {
        push @data2copy, qw(
                _atom_type.symbol
                _atom_type_symbol
                _atom_type.oxidation_number
                _atom_type_oxidation_number
            )
    }

    my %data2copy = map { $_ => $_ } @data2copy;

    my @tag_list = @{$new_dataset->{tags}};

    my $src_tag_prefix = '_[local]_cod_src';
    my %renamed_tags = rename_tags( $new_dataset,
                                    \@data2rename,
                                    $src_tag_prefix );

    my @tags_to_exclude = grep { !exists $data2copy{$_} &&
                                 !exists $renamed_tags{$_} }
                               @{$new_dataset->{tags}};
    foreach (@tags_to_exclude) {
        exclude_tag( $new_dataset, $_ );
    }

    if( $audit ) {
        my $id_value = $Id;
        $id_value =~ s/\s*\$\s*//g;
        set_tag( $new_dataset, '_audit_creation_method', $id_value );
    }

    set_tag( $new_dataset, '_chemical_formula_sum',
             $molecule->{chemical_formula_sum} );

    set_tag( $new_dataset, '_cod_data_source_file',
             basename( $filename ) );
    set_tag( $new_dataset, '_cod_data_source_block',
             $dataset_name );
    set_tag( $new_dataset, '_cell_formula_units_Z', $Z );
    set_tag( $new_dataset, '_space_group_name_H-M_alt', 'P 1' );

    set_loop_tag( $new_dataset, '_space_group_symop_id', undef, [ '1' ] );
    set_loop_tag( $new_dataset, '_space_group_symop_operation_xyz',
                  '_space_group_symop_id', [ 'x, y, z' ] );

    if( defined $original_sg_number ) {
        set_tag( $new_dataset, '_cod_molecule_space_group_IT_number',
                 $original_sg_number );
    }

    if( $molecule->{is_polymer} ) {
        set_tag( $new_dataset, '_cod_molecule_is_polymer', 'yes' );
    }
    else {
        set_tag( $new_dataset, '_cod_molecule_is_polymer', 'no' );
    }

    if( $include_polymer_dimension && $molecule->{polymer_dimension} ) {
        set_tag( $new_dataset, '_cod_molecule_polymer_dimension',
                 $molecule->{polymer_dimension} );
        set_tag( $new_dataset, '_cod_molecule_polymer_basis',
                 $molecule->{polymer_basis} );
    }

    my @atoms = sort {
        length($a->{name}) == length($b->{name}) ?
        $a->{name} cmp $b->{name} :
        length($a->{name}) <=> length($b->{name})
    } @{$molecule->{atoms}};

    my $atoms_datablock = datablock_from_atom_array( \@atoms );
    merge_datablocks( $atoms_datablock, $new_dataset );

    my $cod_molecule_datablock = generate_cod_molecule_data_block( \@atoms );
    merge_datablocks( $cod_molecule_datablock, $new_dataset );

    if( $force_unit_occupancies &&
        exists $new_dataset->{values}{_atom_site_occupancy} ) {
        set_loop_tag( $new_dataset,
                      '_atom_site_occupancy',
                      '_atom_site_label',
                      [ map { exists $_->{calc_flag} && $_->{calc_flag} eq 'dum'
                                ? '.' : '1.0' } @atoms ] );
    }
    if( !$include_disorder ) {
        exclude_tag( $new_dataset, '_atom_site_disorder_assembly' );
        exclude_tag( $new_dataset, '_atom_site_disorder_group' );
    }

    # Force coordinate format
    for my $tag ( qw( _atom_site_fract_x
                      _atom_site_fract_y
                      _atom_site_fract_z ) ) {
        set_loop_tag( $new_dataset,
                      $tag,
                      '_atom_site_label',
                      [ map { $_ = sprintf $coordinate_format, $_;
                              s/^\s+//; s/\s+$//; $_ }
                            @{$new_dataset->{values}{$tag}} ] );
    }

    # Print _geom_bond_ output on request
    if( $include_geom_bond ) {
        if( exists $molecule->{bonds} ) {

            my @sorted_bonds;
            for my $bond (@{$molecule->{bonds}}) {
                my $atom_1 = $bond->{atom1};
                my $atom_2 = $bond->{atom2};
                if( ( $atom_1->{'name'} gt $atom_2->{'name'} &&
                    # Do not switch asymetric unit atoms with generated atoms
                    !( $atom_1->{'name'} eq $atom_1->{'site_label'} &&
                       $atom_2->{'name'} ne $atom_2->{'site_label'} ) &&
                    # Do not switch heavy atoms with hydrogen atoms
                    !( $atom_1->{'chemical_type'} ne 'H' &&
                       $atom_2->{'chemical_type'} eq 'H' ) ) ||
                    # Prefer asymetric unit atoms over generated atoms
                    ( $atom_1->{'name'} ne $atom_1->{'site_label'} &&
                      $atom_2->{'name'} eq $atom_2->{'site_label'} ) ) {
                    $bond->{atom1} = $atom_2;
                    $bond->{atom2} = $atom_1;
                }
                push @sorted_bonds, $bond;
            }
            @sorted_bonds = sort {
                    length($a->{atom1}{name}) <=> length($b->{atom1}{name}) ||
                    $a->{atom1}{name} cmp $b->{atom1}{name} ||
                    length($a->{atom2}{name}) <=> length($b->{atom2}{name}) ||
                    $a->{atom2}{name} cmp $b->{atom2}{name}
               } @{$molecule->{bonds}};

            set_loop_tag( $new_dataset,
                          '_geom_bond_atom_site_label_1',
                          '_geom_bond_atom_site_label_1',
                          [ map { $_->{atom1}{name} } @sorted_bonds ] );
            set_loop_tag( $new_dataset,
                          '_geom_bond_atom_site_label_2',
                          '_geom_bond_atom_site_label_1',
                          [ map { $_->{atom2}{name} } @sorted_bonds ] );
            set_loop_tag( $new_dataset,
                          '_geom_bond_distance',
                          '_geom_bond_atom_site_label_1',
                          [ map { sprintf '%.5f', $_->{distance} }
                                @sorted_bonds ] );
            set_loop_tag( $new_dataset,
                          '_geom_bond_valence',
                          '_geom_bond_atom_site_label_1',
                          [ map { $_->{order} } @sorted_bonds ] );
        } else {
            warn 'WARNING, bond data necessary to compute _geom_bond_ '
               . 'data items was not calculated' . "\n";
        }
    }

    if( $print_quotient_graph && $molecule->{molecule_graph} && keys %{ $molecule->{molecule_graph} } > 0 ) {
        my %molecule_graph = %{ $molecule->{molecule_graph} };

        my %valid_atom_names = map { $_->{name} => 1 } @{ $molecule->{atoms} };

        my %added_labels_node_labels;
        my %node_label_chemical_types;
        my %node_label_net_id;
        my %node_labels_full;
        my %added_labels;
        my %unique_net_ids;

        foreach my $node_label (keys %molecule_graph) {
            my $node_net_id = $molecule_graph{$node_label}{net_id};
            $unique_net_ids{$node_net_id} = 1;
        }

        my @unique_net_ids = sort { $a <=> $b } keys %unique_net_ids;

        set_loop_tag($new_dataset, '_topol_net.id',
                     '_topol_net.id', \@unique_net_ids);

        set_loop_tag($new_dataset, '_topol_net.label',
            '_topol_net.id', [map { "Net$_" } @unique_net_ids]);

        foreach my $node_label (sort keys %molecule_graph) {
            my $node_atom_label = $molecule_graph{$node_label}{atom}->{'name'};
            my $chemical_type = $molecule_graph{$node_label}{atom}->{'chemical_type'};

            my $atom_identifier = extract_atom_identifier($molecule_graph{$node_label}{atom});
            unless ( exists $added_labels{$atom_identifier} ) {
                if (exists $valid_atom_names{$node_atom_label}) {
                    $added_labels_node_labels{$atom_identifier} = $node_atom_label;
                    $node_label_chemical_types{$node_atom_label} = $chemical_type;
                    $node_label_net_id{$atom_identifier} = $molecule_graph{$node_label}{net_id};
                    $added_labels{$atom_identifier} = 1;
                }
            }

            $node_labels_full{$node_atom_label} = 1;
            if (exists $molecule_graph{$node_label}{'edges'}) {
                my %edges = %{ $molecule_graph{$node_label}{'edges'}};
                foreach my $edge_label (sort keys %edges) {
                    my $destination_atom_label = $edges{$edge_label}{atom}->{'name'};
                    my $destination_chemical_type = $edges{$edge_label}{atom}->{'chemical_type'};
                    my $edge_atom_identifier = extract_atom_identifier($edges{$edge_label}{atom});
                    unless ( exists $added_labels{$edge_atom_identifier} ) {
                        if (exists $valid_atom_names{$destination_atom_label} ) {
                            $added_labels_node_labels{$edge_atom_identifier} = $destination_atom_label;
                            $node_label_chemical_types{$destination_atom_label} = $destination_chemical_type;
                            $node_label_net_id{$edge_atom_identifier} = $molecule_graph{$node_label}{net_id};
                            $added_labels{$edge_atom_identifier} = 1;
                        }
                    }
                    $node_labels_full{$destination_atom_label} = 1;
                }
            }
        }

        my @sorted_added_labels = sort keys %added_labels;
        my %node_label_to_id;
        my %node_id_to_label;
        my $node_id_counter = 1;

        foreach my $label (@sorted_added_labels) {
            $node_label_to_id{$label} = $node_id_counter;
            $added_labels{$label} = $node_id_counter;
            $node_id_to_label{$node_id_counter} = $label;
            $node_id_counter++;
        }

        my @topol_node_ids = sort { $a <=> $b } keys %node_id_to_label;

        set_loop_tag($new_dataset, '_topol_node.id',
                     '_topol_node.id', \@topol_node_ids);

        my @assigned_net_ids;
        foreach my $node_id (@topol_node_ids) {
            my $label = $node_id_to_label{$node_id};

            push @assigned_net_ids, $node_label_net_id{$label};
        }

        set_loop_tag($new_dataset, '_topol_node.net_id',
                     '_topol_node.id', \@assigned_net_ids);

        my @topol_atom_labels = map {
            my $atom_label = $node_id_to_label{$_};
            my $node_label = $added_labels_node_labels{$atom_label};
            $node_label;
        } @topol_node_ids;

        my @sorted_node_labels_full = sort keys %node_labels_full;
        my %node_label_to_id_full;
        my $node_id_counter_full = 1;

        foreach my $label (@sorted_node_labels_full) {
            $node_label_to_id_full{$label} = $node_id_counter_full;
            $node_id_to_label{$node_id_counter_full} = $label;
            $node_id_counter_full++;
        }

        my $link_id_counter = 1;
        my @link_ids;
        my @link_net_ids;
        my @node_ids_1;
        my @node_ids_2;
        my @translations_2_x;
        my @translations_2_y;
        my @translations_2_z;
        my %link_id_atoms;

        foreach my $node_label (sort keys %molecule_graph) {
            my $node_net_id = $molecule_graph{$node_label}{net_id};
            my $node_atom_label = $molecule_graph{$node_label}{atom}->{'name'};
            my $atom_identifier = extract_atom_identifier($molecule_graph{$node_label}{atom});
            next unless exists $node_label_to_id_full{$node_atom_label};
            $node_atom_label =~ s/_\d+$//;
            my $node_id_1 = $added_labels{$atom_identifier};
            if (exists $molecule_graph{$node_label}{'edges'}) {
                my %edges = %{ $molecule_graph{$node_label}{'edges'} };
                foreach my $edge_node_label (sort keys %edges) {
                    my $node_edge_label = $edges{$edge_node_label}{atom}->{'name'};
                    my $edge_atom_identifier = extract_atom_identifier($edges{$edge_node_label}{atom});
                    next unless exists $node_label_to_id_full{$node_edge_label};
                    $node_edge_label =~ s/_\d+$//;
                    my $node_id_2 = $added_labels{$edge_atom_identifier};
                    my @edge_values = @{ $edges{$edge_node_label}{'edges'} };
                    foreach my $edge_value (@edge_values) {
                        my @translation = @$edge_value;

                        push @link_ids, $link_id_counter;
                        push @link_net_ids, $node_net_id;
                        $link_id_atoms{$link_id_counter} = [$node_id_1, $node_id_2];
                        push @node_ids_1, $node_id_1;
                        push @node_ids_2, $node_id_2;
                        push @translations_2_x, $translation[0];
                        push @translations_2_y, $translation[1];
                        push @translations_2_z, $translation[2];
                        $link_id_counter++;
                    }
                }
            }
        }

        set_loop_tag($new_dataset, '_topol_link.id',
                     '_topol_link.id', \@link_ids);

        set_loop_tag($new_dataset, '_topol_link.net_id',
                     '_topol_link.id', \@link_net_ids);

        set_loop_tag($new_dataset, '_topol_link.node_id_1',
                     '_topol_link.id', \@node_ids_1);

        set_loop_tag($new_dataset, '_topol_link.node_id_2',
                     '_topol_link.id', \@node_ids_2);

        set_loop_tag($new_dataset, '_topol_link.translation_2_x',
                     '_topol_link.id', \@translations_2_x);
        set_loop_tag($new_dataset, '_topol_link.translation_2_y',
                     '_topol_link.id', \@translations_2_y);
        set_loop_tag($new_dataset, '_topol_link.translation_2_z',
                     '_topol_link.id', \@translations_2_z);

        my @topol_atom_element_symbol;

        foreach my $label (@topol_atom_labels) {
            my $atom_label = $added_labels_node_labels{$label};
            push @topol_atom_element_symbol, $node_label_chemical_types{$label};
        }

        my @topol_atom_node_ids = @topol_node_ids;

        set_loop_tag($new_dataset, '_topol_atom.id',
                     '_topol_atom.id', \@topol_node_ids);

        set_loop_tag($new_dataset, '_topol_atom.atom_label',
                     '_topol_atom.id', \@topol_atom_labels);

        set_loop_tag($new_dataset, '_topol_atom.node_id',
                     '_topol_atom.id', \@topol_atom_node_ids);

        set_loop_tag($new_dataset, '_topol_atom.element_symbol',
                     '_topol_atom.id', \@topol_atom_element_symbol);
    }

    print_cif( $new_dataset,
                    {
                        preserve_loop_order => 1,
                        keep_tag_order => 1
                    } );

    return;
}

#===============================================================#
# Finds all possible molecules in the CIF file. If two atoms are connected via
# then the algorithm states that there in no bond between these two atoms.

# The algorithm:
# 1. Takes an initial atom and tests if it has not been found in the other
#    molecule yet
# 2. If not, then begins to search for the other molecule:
# 2.1  Does modulo_1 for the initial atom
# 2.2  Finds a translation from initial atom to atom_modulo_1
# 2.3  Searches for all neighbours of atom_modulo_1
# 2.4  For each neighbour of atom_modulo_1 does 2.1 -- 2.4
# 2.5  atom_modulo_1 and all its neighbours translates according translation
#       vector. atom_modulo_1 now becomes initial atom. The others - accordingly
# 3. Stops and does the step 1 until there is no left any initial atom.


# Accepts
#     covalent_sensitivity - a threshold for covalent sensitivity
#     atom_properties(
#           H => {
#                     name => Hydrogen, #(chemical_type)
#                     period => 1,
#                     group => 1,
#                     block => s,
#                     atomic_number => "1",
#                     atomic_weight => 1.008,
#                     covalent_radius => 0.23,
#                     vdw_radius => 1.09,
#                     valency => [1],
#                     },
#          );
# symmetric_atoms and initial_atoms are arrays of
#                                 $atom_info = {
#                                             name=>"C1_2",
#                                             site_label=>"C1",
#                                             chemical_type=>"C",
#                                             coordinates_fract=>[1.0, 1.0,1.0],
#                                             coordinates_ortho=>[1.0, 1.0,1.0],
#                                             unity_matrix_applied=>1
#                                             }
# Returns an array of
# %molecule = (
#               'atoms' => [
#                   \%atom1_info, \%atom2_info, \%atom3_info, \%atom4_info
#               ],
#               'bonds' => [
#                   [ \%atom1_info, \%atom2_info ],
#                   [ \%atom1_info, \%atom3_info ],
#                   [ \%atom4_info, \%atom3_info ],
#               ] # covalent bond description
#               'chemical_formula_sum' => "C6 H6",
#               'molecule_graph' => {
#                   'C4_1' => { # Source atom label
#                       'atom' => \%atom_info,
#                       'edges' => {
#                           'C6_2' => { # Destination atom label
#                               'atom' => \%atom_info,
#                               'edges' => [
#                                   [ '0', '0', '0' ],
#                                   [ '-1', '-1', '0' ],
#                                   [ '0', '-1', '0' ],
#                               ],
#                           },
#                       },
#
#                   },
#               }
#               'net_id' => 1
#             );

sub find_molecules($$$$$$)
{
    my $covalent_sensitivity = shift(@_);
    my $atom_properties      = shift(@_);
    my $symmetric_atoms      = shift(@_);
    my $initial_atoms        = shift(@_);
    my $bricks               = shift(@_);
    my $seen_molecules       = shift(@_);

    my @unique_molecules;
    my %used_atoms;
    my %used_originals;
    my %checked_pairs;
    my $nbumps = 0;
    my $net_id = 1;

    foreach my $initial_atom (@$initial_atoms)
    {
        next if exists $used_originals{$initial_atom->{cell_label}};
        print STDERR ">>>> starting new molecule\n" if $debug;

        ## if( ! $expand_to_p1 &&
        ##     $initial_atom->{cell_label} ne $initial_atom->{site_label} ) {
        ##     print STDERR
        ##         ">>>> site: $initial_atom->{site_label}, " .
        ##         "cell: $initial_atom->{cell_label}\n";
        ## }

        my %molecule_graph;
        my %existing_translations;
        my %added_atoms;

        my( $molecule_atoms, $mol_nbumps, $mol_polymer_atoms ) =
            find_molecule( $covalent_sensitivity,
                           $atom_properties,
                           $symmetric_atoms,
                           \%used_atoms,
                           \%used_originals,
                           \%checked_pairs,
                           $initial_atom,
                           $bricks,
                           \%molecule_graph,
                           \%existing_translations,
                           \%added_atoms,
                           undef,
                           $net_id );

        foreach my $key (keys %molecule_graph) {
            unless (exists $molecule_graph{$key}->{edges}) {
                delete $molecule_graph{$key};
            }
        }

        my @molecule_atoms = @$molecule_atoms;
        $nbumps += $mol_nbumps;

        if( !@molecule_atoms ) {
            warn "WARNING, found molecule with no atoms -- strange...\n";
            next;
        }

        # Calculate polymer dimension and basis.
        my $polymer_dimension;
        my $polymer_basis;

        if( $mol_polymer_atoms > 0 ) {
            $net_id++;
            # old method to calculate polymer dimensions and basis commented

            # my $polymer_vectors = {};
            # for my $atom ( @molecule_atoms ) {
            #     my $site_label = $atom->{site_label};
            #     my $symop_id = $atom->{symop_id};
            #     if( !exists $polymer_vectors->{$site_label}{$symop_id} ) {
            #         $polymer_vectors->{$site_label}{$symop_id} = [];
            #     }
            #     push( @{$polymer_vectors->{$site_label}{$symop_id}},
            #           $atom->{translation} );
            # }

            # according to (Gao 2020) we can calculate dimensionality using quotient graph
            # now just array from labeled edges created, rank of a matrix will get dimension

            my @all_edges;

            foreach my $node (keys %molecule_graph) {
                if (exists $molecule_graph{$node}{'edges'}) {
                    my $edges_ref = $molecule_graph{$node}{'edges'};
                    foreach my $edge_node (keys %$edges_ref) {
                        foreach my $edge (@{$edges_ref->{$edge_node}->{edges}}) {
                            push @all_edges, $edge;
                        }
                    }
                }
            }

            my @polymer_vectors = @all_edges;

            my $reference_vector = [0,0,0];
            my @vectors;
            if(@polymer_vectors) {
                @vectors = [ map { vector_sub( $_, $reference_vector ) } @polymer_vectors ];
            } else {
                @vectors = [$reference_vector];
            }
            my ($polymer_dimension_now, $basis_now) = get_rank_and_basis(@vectors);
            if( !defined $polymer_dimension ||
                $polymer_dimension < $polymer_dimension_now ) {
                $polymer_dimension = $polymer_dimension_now;
                $polymer_basis = $basis_now;
            }

        } elsif ( !$include_non_polymer_quotient_graph ) {
            foreach my $key (keys %molecule_graph) {
                delete $molecule_graph{$key};
            }
        } elsif ($include_non_polymer_quotient_graph) {
            $net_id++;
        }

        my %molecule = (
            atoms => \@molecule_atoms,
            chemical_formula_sum => '',
            is_polymer => ($mol_polymer_atoms > 0),
            polymer_dimension => $polymer_dimension,
            polymer_basis => $polymer_basis,
            molecule_graph => \%molecule_graph,
        );

        push( @unique_molecules, \%molecule );
    }

    if( !$verbose && $nbumps > 0 ) {
        warn "WARNING, $nbumps pair(s) of atoms are too close to "
           . "each other and are considered as bumps\n";
    }

    do {
        use COD::Serialise qw( serialiseRef );
        local $" = ' ';
        print ">>> reconstructed molecules:\n";
        serialiseRef( \@unique_molecules );
    } if $debug;

    return (\@unique_molecules, $net_id);
}

# ============================================================================ #

sub find_molecule($$$$$$$$$$$$$)
{
    my $covalent_sensitivity  = shift(@_);
    my $atom_properties       = shift(@_);
    my $symmetric_atoms       = shift(@_);
    my $used_atoms            = shift(@_);
    my $used_originals        = shift(@_);
    my $checked_pairs         = shift(@_);
    my $start_atom            = shift(@_);
    my $bricks                = shift(@_);
    my $molecule_graph        = shift(@_);
    my $existing_translations = shift(@_);
    my $added_atoms           = shift(@_);
    my $previous_atom         = shift(@_);
    my $net_id                = shift(@_);

    my @queue = ({ atom => $start_atom });
    my @final_neighbors;
    my @neighbors;
    my $mol_polymer_atoms = 0;
    my $nbumps = 0;
    my %processed_atoms;

    while ( my $current_info = shift @queue ) {
        my $current_atom = $current_info->{atom};
        my $previous_atom = $current_info->{previous_atom};
        my $polymer_atoms = 0;
        my @current_coords_fract_modulo_1 =
        map { modulo_1($_) } @{$current_atom->{coordinates_fract}};
        my $current_translation = translation( $current_atom->{coordinates_fract},
                                               \@current_coords_fract_modulo_1 );

        my $atom_in_unit_cell_coords_ortho =
            symop_vector_mul( $current_atom->{f2o},
                              \@current_coords_fract_modulo_1 );

        if( $previous_atom && %{$previous_atom} ) {
            my $current_atom_label = extract_atom_identifier( $current_atom );
            my $previous_atom_label = extract_atom_identifier( $previous_atom );

            if( !exists $added_atoms->{$previous_atom->{cell_label}}
                                      {$previous_atom->{symop_id}}
                                      {$previous_atom->{group}} ) {
                $added_atoms->{$previous_atom->{cell_label}}
                              {$previous_atom->{symop_id}}
                              {$previous_atom->{group}} = $previous_atom;
                push( @final_neighbors, $previous_atom );
            }

            my @edge_label = ();

            if( !exists $added_atoms->{$current_atom->{cell_label}}
                                      {$current_atom->{symop_id}}
                                      {$current_atom->{group}} ) {
                $added_atoms->{$current_atom->{cell_label}}
                              {$current_atom->{symop_id}}
                              {$current_atom->{group}} = $current_atom;
                @edge_label = (0, 0, 0);
                push( @final_neighbors, $current_atom );
            } else {
                my $existing_translation = $added_atoms->
                    {$current_atom->{cell_label}}
                    {$current_atom->{symop_id}}
                    {$current_atom->{group}}->{translation};

                for ( my $i = 0 ; $i < scalar( @{$current_translation} ) ; $i++ ) {
                    push( @edge_label,
                        @{$current_translation}[$i] -
                        @{$existing_translation}[$i] );
                }
                if( $current_atom_label eq $previous_atom_label ) {
                    if( $edge_label[0] == 0 &&
                        $edge_label[1] == 0 &&
                        $edge_label[2] == 0 ) {
                        next;
                    }
                }
            }

            my @opposite_edge_label        = map { $_ * -1 } @edge_label;
            my $edge_string_label          = join( "", @edge_label );
            my $opposite_edge_string_label = join( "", @opposite_edge_label );

            my $is_translation_exists = 0;

            my $translation_id = ( 5+$edge_label[0] ) .
                                 ( 5+$edge_label[1] ) .
                                 ( 5+$edge_label[2] );
            my $opposite_translation_id = ( 5+$opposite_edge_label[0] ) .
                                          ( 5+$opposite_edge_label[1] ) .
                                          ( 5+$opposite_edge_label[2] );

            $is_translation_exists = translation_exists(
                $existing_translations, $previous_atom_label,
                $current_atom_label,    $translation_id,
                \@edge_label,           $is_translation_exists
            );
            $is_translation_exists = translation_exists(
                $existing_translations, $current_atom_label,
                $previous_atom_label,   $opposite_translation_id,
                \@opposite_edge_label,  $is_translation_exists
            );

            if( $is_translation_exists ) {
                $mol_polymer_atoms++;
                next;
            }

            my $original_atom = $added_atoms->
                {$current_atom->{cell_label}}
                {$current_atom->{symop_id}}
                {$current_atom->{group}};

            my $original_atom_label = extract_atom_identifier( $original_atom );

            if( !exists $molecule_graph->{$previous_atom_label} ) {
                $molecule_graph->{$previous_atom_label} = {
                    net_id => $net_id,
                    atom  => $previous_atom,
                    edges => { $original_atom_label => {
                            atom => $original_atom,
                            edges => [ \@edge_label ]
                        }
                    }
                };
                $existing_translations->{$previous_atom_label}
                                        {$original_atom_label}
                                        {$translation_id} = \@edge_label;
            } else {
                my $existing_edges = $molecule_graph->
                                     {$previous_atom_label}->
                                     {edges};

                if( exists $molecule_graph->{$previous_atom_label}->{edges} ) {
                    if( exists $existing_edges->{$current_atom_label} ) {
                        my $add_edge = 1;
                        my $edges = $existing_edges->
                                    {$current_atom_label}->
                                    {edges};
                        for ( my $i = 0 ; $i < scalar( @{$edges} ) ; $i++ ) {
                            if( $edges->[$i][0] == $edge_label[0] &&
                                $edges->[$i][1] == $edge_label[1] &&
                                $edges->[$i][2] == $edge_label[2] )
                            {
                                $add_edge = 0;
                                last;
                            }
                        }
                        if( $add_edge ) {
                            push( @{$existing_edges->
                                  {$current_atom_label}->
                                  {edges}},
                                  \@edge_label );
                            $existing_translations->{$previous_atom_label}
                                                    {$current_atom_label}
                                                    {$translation_id} = \@edge_label;
                        }
                    } else {
                        $existing_edges->{$original_atom_label} = {
                            atom => $original_atom,
                            edges => [ \@edge_label ]
                        };
                        $existing_translations->{$previous_atom_label}
                                                {$original_atom_label}
                                                {$translation_id} = \@edge_label;
                    }
                } else {
                    $molecule_graph->{$previous_atom_label}->{edges} = {
                        $original_atom_label => {
                            atom => $original_atom,
                            edges => [ \@edge_label ]
                        }
                    };
                    $existing_translations->{$previous_atom_label}
                                            {$original_atom_label}
                                            {$translation_id} = \@edge_label;
                }
            }

            if( $edge_label[0] != 0 ||
                $edge_label[1] != 0 ||
                $edge_label[2] != 0 ) {
                $mol_polymer_atoms++;
                if( exists $added_atoms->{$current_atom->{cell_label}}
                                         {$current_atom->{symop_id}}
                                         {$current_atom->{group}} ) {
                    next;
                } else {
                    $added_atoms->{$current_atom->{cell_label}}
                                  {$current_atom->{symop_id}}
                                  {$current_atom->{group}} = $current_atom;
                    push( @final_neighbors, $current_atom );
                    next;
                }
            }
        }

        $used_originals->{$current_atom->{cell_label}} =
            $current_atom->{cell_label};

        print STDERR
            ">>> considering atom $current_atom->{name} " .
                "(@{$atom_in_unit_cell_coords_ortho}) " .
            "($current_atom->{cell_label}/" .
            "$current_atom->{symop_id}/$current_atom->{translation_id})\n"
            if $debug;

        if( !exists $added_atoms->{$current_atom->{cell_label}}
                                  {$current_atom->{symop_id}}
                                  {$current_atom->{group}} ) {
            push( @final_neighbors, $current_atom );
            $added_atoms->{$current_atom->{cell_label}}
                          {$current_atom->{symop_id}}
                          {$current_atom->{group}} = $current_atom;
        }

        my ( $neighbors, $mol_nbumps ) =
            get_neighbours(
                $covalent_sensitivity,
                $atom_properties,
                $checked_pairs,
                $current_atom,
                $bricks);

        foreach my $neighbor_info (@$neighbors) {
            my $atom = $neighbor_info->{atom};
            if( !exists $processed_atoms{$atom->{cell_label}}
                                        {$atom->{symop_id}}
                                        {$atom->{translation_id}}
                                        {$atom->{group}}
                                        {$current_atom->{cell_label}}
                                        {$current_atom->{symop_id}}
                                        {$current_atom->{translation_id}}
                                        {$current_atom->{group}} ) {
                push @queue, $neighbor_info;
                $processed_atoms{$atom->{cell_label}}
                                {$atom->{symop_id}}
                                {$atom->{translation_id}}
                                {$atom->{group}}
                                {$current_atom->{cell_label}}
                                {$current_atom->{symop_id}}
                                {$current_atom->{translation_id}}
                                {$current_atom->{group}} = [$atom, $current_atom];
                $processed_atoms{$current_atom->{cell_label}}
                                {$current_atom->{symop_id}}
                                {$current_atom->{translation_id}}
                                {$current_atom->{group}}
                                {$atom->{cell_label}}
                                {$atom->{symop_id}}
                                {$atom->{translation_id}}
                                {$atom->{group}} = [$current_atom, $atom];
            }
        }

        $nbumps += $mol_nbumps;
    }

    my $polymer_span_processed_counter = 0;
    my %seen_atoms;
    foreach my $atom (@final_neighbors) {
        $seen_atoms{$atom->{name}} = 1;
    }

    while ($polymer_span_processed_counter < $max_polymer_span) {
        my @new_atoms;

        foreach my $atom (@final_neighbors) {
            foreach my $node (keys %{$molecule_graph}) {
                if (exists $molecule_graph->{$node}{'edges'}) {
                    my $edges_ref = $molecule_graph->{$node}{'edges'};
                    foreach my $edge_node (keys %$edges_ref) {
                        foreach my $edge (@{$edges_ref->{$edge_node}{'edges'}}) {
                            my $translated_atom = translate_atom($atom, $edge);

                            unless ( $seen_atoms{$translated_atom->{name}} ) {
                                push @new_atoms, $translated_atom;
                                $seen_atoms{$translated_atom->{name}} = 1;
                            }

                            my @opposite_direction = map { $_ * -1 } @$edge;
                            my $back_translated_atom = translate_atom( $atom,
                                                                      \@opposite_direction );

                            unless ( $seen_atoms{$back_translated_atom->{name}} ) {
                                push @new_atoms, $back_translated_atom;
                                $seen_atoms{$back_translated_atom->{name}} = 1;
                            }
                        }
                    }
                }
            }
        }

        push @final_neighbors, @new_atoms;
        $polymer_span_processed_counter++;
    }

    my %molecule = (
        atoms => \@final_neighbors,
        chemical_formula_sum => '',
        is_polymer => ($mol_polymer_atoms > 0),
        graph => $molecule_graph
    );

    return (\@final_neighbors, $nbumps, $mol_polymer_atoms);
}

sub get_neighbours
{
    my $covalent_sensitivity = shift @_;
    my $atom_properties      = shift @_;
    my $checked_pairs        = shift @_;
    my $current_atom         = shift @_;
    my $bricks               = shift @_;

    my @current_coords_fract_modulo_1 =
        map { modulo_1($_) } @{$current_atom->{coordinates_fract}};

    my $atom_in_unit_cell_coords_ortho =
        symop_vector_mul( $current_atom->{f2o}, \@current_coords_fract_modulo_1 );

    my $current_translation = translation( $current_atom->{coordinates_fract},
                                           \@current_coords_fract_modulo_1 );

    my ($i_init, $j_init, $k_init) =
        get_atom_index( $bricks, @{$atom_in_unit_cell_coords_ortho} );

    my ( $min_i, $max_i, $min_j, $max_j, $min_k, $max_k );

    eval {
        ( $min_i, $max_i, $min_j, $max_j, $min_k, $max_k ) =
            get_search_span( $bricks, $i_init, $j_init, $k_init );
    };
    if( $@ ) {
        if( $debug ) {
            use COD::Serialise qw( serialiseRef );
            serialiseRef( $atom_in_unit_cell_coords_ortho );
            serialiseRef( [ $i_init, $j_init, $k_init ] );
            serialiseRef( $bricks );
        }
        die $@ . "\n";
    }

    if( $debug ) {
        local $" = ", ";
        print STDERR
            ">>> now scanning its distinct neighbours " .
            "around @{$atom_in_unit_cell_coords_ortho}:\n";
    };

    my @neighbors;
    my $nbumps = 0;

    for my $i ($min_i .. $max_i) {
    for my $j ($min_j .. $max_j) {
    for my $k ($min_k .. $max_k) {
        for my $sym_atom ( @{$bricks->{atoms}[$i][$j][$k]} ) {
            my $new_label = $current_atom->{name};
            my $sym_label = $sym_atom->{name};

            # Same atom found, no need to add bond or neighbour
            next if $current_atom->{name} eq $sym_atom->{name};

            # Skip check between alternative positions of the same atom
            if( atoms_are_alternative( $current_atom, $sym_atom ) ) {
                $checked_pairs->{$sym_label}{$new_label} = 1;
                $checked_pairs->{$new_label}{$sym_label} = 1;
                next;
            }

            my $dist = distance( $atom_in_unit_cell_coords_ortho,
                                 $sym_atom->{coordinates_ortho} );

            do {
                local $" = ' ';
                print STDERR ">>> checking neighbour $sym_label " .
                    "(@{$sym_atom->{coordinates_ortho}}), " .
                    "d = $dist\n";
            } if $debug;

            my $is_bump = test_bump( $atom_properties,
                                     $current_atom->{chemical_type},
                                     $sym_atom->{chemical_type},
                                     $current_atom->{site_label},
                                     $sym_atom->{site_label},
                                     $dist, $bump_distance_factor );

            if( $is_bump ) {
                if( !exists $checked_pairs->{$sym_label}{$new_label} ) {
                    my $message = sprintf(
                        "atoms '%s' and '%s' are too close (distance = " .
                        '%6.4f) and are considered a bump',
                        (sort { length($a) <=> length($b) || $a cmp $b }
                            $current_atom->{name}, $sym_atom->{name}),
                        $dist
                    );
                    if( $ignore_bumps ) {
                        if( $verbose || $total_nbumps < 5 ) {
                            warn "WARNING, $message\n";
                        }
                        $nbumps++;
                        $total_nbumps++;
                    } else {
                        die "ERROR, $message -- aborting calculations\n";
                    }
                }
            }

            $checked_pairs->{$sym_label}{$new_label} = 1;
            $checked_pairs->{$new_label}{$sym_label} = 1;

            my $is_bond = test_bond($atom_properties,
                                    $current_atom->{chemical_type},
                                    $sym_atom->{chemical_type},
                                    $dist,
                                    $covalent_sensitivity);

            if( $is_bond ) {
                do {
                    use COD::Serialise qw( serialiseRef );
                    local $" = ' ';
                    print STDERR ">>> found bond:\n";
                    serialiseRef( { "translation" => $current_translation,
                                    "original atom" => $current_atom,
                                    "sym atom" => $sym_atom } );
                } if $debug;

                my $back_shifted_sym_atom =
                    translate_atom( $sym_atom, $current_translation );

                do {
                    use COD::Serialise qw( serialiseRef );
                    print ">>>> back-shifted atom:\n";
                    serialiseRef( { sym_atom => $sym_atom,
                                    backshifted => $back_shifted_sym_atom } );
                } if $debug;

                my $neighbor_info = {
                    previous_atom => $current_atom,
                    atom => $back_shifted_sym_atom
                };
                push @neighbors, $neighbor_info;
            }
        }
    }}}

    return (\@neighbors, $nbumps);
}

sub extract_atom_identifier {
    my ($atom) = @_;
    my $atom_name = "$atom->{cell_label}_$atom->{symop_id}";
}

sub translation_exists {
    my ($existing_translations) = shift(@_);
    my ($from)                  = shift(@_);
    my ($to)                    = shift(@_);
    my ($label)                 = shift(@_);
    my ($edge_label)            = shift(@_);
    my ($translation_exists)    = shift(@_);

    if( exists $existing_translations->{$from}{$to}{$label} ) {
        my $translation_ref = $existing_translations->{$from}{$to}{$label};

        if( $translation_ref->[0] == $edge_label->[0] &&
            $translation_ref->[1] == $edge_label->[1] &&
            $translation_ref->[2] == $edge_label->[2] )
        {
            $translation_exists = 1;
        }
    }
    return $translation_exists;
}

#===========================================================================
# Return a list of chemical bonds (represented as atom pairs, each
# pair being two references to two %atom_info structures describing
# the bonded atoms).

sub atom_bonds
{
    my ($atoms, $atom_properties, $covalent_sensitivity) = @_;

    my $max_covalent_radius = get_max_covalent_radius( $atom_properties );

    my $bricks = build_bricks( $atoms,
                                           $max_covalent_radius * 2 +
                                           $covalent_sensitivity );

    my %used_atoms;
    my @bonds;

    for my $atom (@$atoms) {

        $used_atoms{$atom->{name}} = $atom;

        my $coordinates = $atom->{coordinates_ortho};

        my ($i_init, $j_init, $k_init) =
            get_atom_index( $bricks, @$coordinates );

        my ( $min_i, $max_i, $min_j, $max_j, $min_k, $max_k );

        eval {
            ( $min_i, $max_i, $min_j, $max_j, $min_k, $max_k ) =
                get_search_span( $bricks, $i_init, $j_init, $k_init );
        };
        if( $@ ) {
            if( $debug ) {
                use COD::Serialise qw( serialiseRef );
                serialiseRef( $coordinates );
                serialiseRef( [ $i_init, $j_init, $k_init ] );
                serialiseRef( $bricks );
            }
            die $@ . "\n";
        }

        ## foreach my $sym_atom (@$symmetric_atoms)
        for my $i ($min_i .. $max_i) {
        for my $j ($min_j .. $max_j) {
        for my $k ($min_k .. $max_k) {
            for my $neighbour ( @{$bricks->{atoms}[$i][$j][$k]} ) {

                next if exists $used_atoms{$neighbour->{name}};

                my $neighbour_coords = $neighbour->{coordinates_ortho};

                # Same atom found, no need to add bond or neighbour
                next if $atom == $neighbour;

                # Skip check between alternative positions of the same atom
                next if atoms_are_alternative( $atom, $neighbour );

                my $distance = distance( $coordinates, $neighbour_coords );

                my $is_bond = test_bond($atom_properties,
                                        $atom->{chemical_type},
                                        $neighbour->{chemical_type},
                                        $distance,
                                        $covalent_sensitivity);

                if( $is_bond ) {

                    do {
                        use COD::Serialise qw( serialiseRef );
                        local $" = ' ';
                        print STDERR ">>> found bond:\n";
                        serialiseRef( { "original atom" => $atom,
                                        "neighbour atom" => $neighbour } );
                    } if $debug;

                    my $bond_order =
                        get_bond_order( $distance,
                                        $atom->{chemical_type},
                                        $neighbour->{chemical_type},
                                        $atom_properties );

                    push( @bonds, {
                        atom1 => $atom,
                        atom2 => $neighbour,
                        distance => $distance,
                        order => $bond_order,
                    });
                }
            }
        }}}
    }

    return \@bonds;
}

#==============================================================================
# Use heuristics to guess bond order from its length:

sub get_bond_order
{
    my( $distance, $atom1_type, $atom2_type, $atom_properties ) = @_;

    if( exists $atom_radii{$atom1_type} && exists $atom_radii{$atom2_type} ) {
        my @atom1_radii = @{$atom_radii{$atom1_type}};
        my @atom2_radii = @{$atom_radii{$atom2_type}};
        my @lengths;
        for my $a1 (@atom1_radii) {
            for my $a2 (@atom2_radii) {
                if( $a1->[0] eq $a2->[0] ) {
                    push( @lengths, [ $a1->[0], $a1->[1],
                                      $a1->[2] + $a2->[2] ] );
                }
            }
        }
        @lengths = sort {$a->[2] <=> $b->[2]} @lengths;
        for my $length (@lengths) {
            if( $distance < $length->[2] ) {
                return $length->[1];
            }
        }
        return "?";
    } else {
        return "?";
    }
}


#==============================================================================
# Calculate the rank and basis of a matrix using Gauss-Jordan elimination.
# @param   matrix
# @retval  rank (integer), matrix basis (string)

sub get_rank_and_basis
{
    my( $m ) = @_;
    return 0 if @{$m} == 0; # no need to create row echelon form

    do {
        local $\ = "\n";
        for (@{$m}) {
            print STDERR join ' ', @{$_};
        }
    } if 0;

    my $reduced_row_echelon_matrix =
        gj_elimination_non_zero_elements( $m, 8 * $machine_epsilon );

    my @rre_semicolon_separated_vectors =
        reverse sort { $a cmp $b }
            map {
                join ';', map {sprintf '%g', $_} @{$_}
            } @{$reduced_row_echelon_matrix};

    # set of linearly independent vectors for the matrix:
    my $basis = join ' ', @rre_semicolon_separated_vectors;

    print STDERR ">>>> basis: $basis\n"
        if 0;

    return scalar( @{$reduced_row_echelon_matrix} ), $basis;
}


#==============================================================================
# Convert single-quoted basis string to matrix.
# @param   string
# @retval  matrix

sub basis_string_to_matrix
{
    my( $string ) = @_;
    $string =~ s/'//g; # remove single-quotes
    my @vectors = split /\s+/, $string;
    my $matrix = [map { [split /;/, $_] } @vectors];
    return $matrix;
}


#==============================================================================
# Find machine epsilon.
# @param   void
# @retval  scalar
sub get_machine_epsilon
{
    my $epsilon = 1.00;
    while ( $epsilon + 1.00 > 1.00 ) {
        $epsilon /= 2;
    }
    return $epsilon;
}