File: xtp_basisset

package info (click to toggle)
votca 2025.1-1
links: PTS, VCS
area: main
in suites: forky
size: 132,424 kB
sloc: xml: 345,964; cpp: 80,067; python: 15,957; sh: 4,580; perl: 2,169; javascript: 202; makefile: 34
file content (234 lines) | stat: -rwxr-xr-x 8,640 bytes
parent folder | download | duplicates (3)
#! /usr/bin/env python3
#
# Copyright 2009-2024 The VOTCA Development Team (http://www.votca.org)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import argparse
import os
import sys
import time
from pathlib import Path

import lxml.etree as lxml

VERSION = '@PROJECT_VERSION@ #VOTCA_GIT_ID#'

PROGTITLE = 'THE VOTCA::XTP converter basissetfiles'
PROGDESCR = 'Creates votca xml basissetfiles from NWCHEM basissetfiles'
VOTCAHEADER = f"""
==================================================
========   VOTCA (http://www.votca.org)   ========
==================================================

{PROGTITLE}

please read and cite: @PROJECT_CITATION@
and submit bugs to @PROJECT_CONTACT@
xtp_basisset, version {VERSION}

"""


def okquit(what: str):
    """Exit gracefully."""
    print(what)
    sys.exit(0)


def exists(input_file: str) -> Path:
    """Check if the input file exists."""
    path = Path(input_file)
    if not path.exists():
        raise argparse.ArgumentTypeError(f"{input_file} doesn't exist!")

    return path


# =============================================================================
# PROGRAM OPTIONS
# =============================================================================


class XtpHelpFormatter(argparse.HelpFormatter):
    def _format_usage(self, usage, action, group, prefix):
        return VOTCAHEADER


progargs = argparse.ArgumentParser(prog='xtp_basisset',
                                   formatter_class=lambda prog: XtpHelpFormatter(
                                       prog, max_help_position=70),
                                   description=PROGDESCR)

progargs.add_argument('-f', '--input',
                      dest='input',
                      required=True,
                      type=exists,
                      help='NWchem file containing the basisset or Turbomole folder with element names.')

progargs.add_argument('-o', '--outputvotca',
                      dest='outputfile',
                      required=True,
                      help='Path of votca outputfile')


progargs.add_argument('-t', '--turbomolebasisset',
                      dest='turbobasis',
                      default=None,
                      help='For turbomole specify the basisset that is supposed to be extracted from Files, for auxbasis sets the basisset the aux basisset is supposed to be used for.')

OPTIONS = progargs.parse_args()


def getelemententry(root, element):
    for e in root:
        # print e.get("name")
        if e.get("name") == element:
            return e
    return lxml.SubElement(root, "element", name=element)


def convertofloat(floatstring: str) -> float:

    if "D" in floatstring:
        floatstring = floatstring.replace("D", "E")
    try:
        return float(floatstring)
    except ValueError:
        print(f"Cannot figure out what '{floatstring}' means")
        raise


def turbomolegetcontributions(element, lines, basissetstringlist, basis, shells) -> None:

    newbasissets = []
    start = False
    basissetfound = False
    for line in lines:
        if line[0] == "#":
            continue
        elif line in ["\n", "\r\n"]:
            continue
        elif "$end" in line:
            continue
        if line[0] == "*" and basissetfound:
            # print start
            if start:
                # print "Breaking"
                break
            else:
                start = True
            continue

        if line.split()[0] == element:
            if line.split()[-1] in basissetstringlist and basissetfound == False:
                basissetfound = True
            continue
        if start and basissetfound:
            if "->" in line:
                newbasissets.append(line.split()[-1])
                continue
            entries = line.split()
            if len(entries) == 2:
                if entries[-1].upper() in shells:
                    shelltype = entries[-1].upper()
                    shell = lxml.SubElement(
                        basis, "shell", type=shelltype, scale="1.0")
                else:

                    constant = lxml.SubElement(
                        shell, "constant", decay="{:1.6e}".format(convertofloat(entries[0])))
                    contraction = lxml.SubElement(
                        constant, "contractions", type=shelltype, factor="{:1.6e}".format(convertofloat(entries[1])))
                    # contraction.text=" "
            else:
                print(f"Cannot figure out what line '{line}' means")
                sys.exit()

    if len(newbasissets) > 0:
        turbomolegetcontributions(element, lines, newbasissets, basis, shells)


# =============================================================================
# PARSING NWCHEM FILE
# =============================================================================
basissetname = OPTIONS.input.stem
basis = lxml.Element("basis", name=basissetname)
basis.append(lxml.Comment(
    f"Basis set created by xtp_basisset from {OPTIONS.input.name} at {time.strftime('%c')}"))

elements = ['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'K', 'Ar', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Ni', 'Co', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'I', 'Te', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr',
            'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Pt', 'Ir', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Pa', 'Th', 'Np', 'U', 'Am', 'Pu', 'Bk', 'Cm', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Rf', 'Lr', 'Db', 'Bh', 'Sg', 'Mt', 'Ds', 'Rg', 'Hs', 'Uut', 'Uub', 'Uup', 'Uuq', 'Uuh', 'Uuo']
shells = ["S", "P", "D", "F", "G", "H", "I"]


if OPTIONS.input.is_file():
    print(f"File {OPTIONS.input} seems to be a NWCHEM file")

    keywords = ["BASIS", "end", "basis", "END"]

    with open(OPTIONS.input, 'r') as f:
        for line in f.readlines():
            if line[0] == "#":
                continue
            elif line in ["\n", "\r\n"]:
                element = None
                continue
            entries = line.split()
            if entries[0] in keywords:
                element = None
                continue
            elif entries[0] in elements:
                element = getelemententry(basis, entries[0])
                shelltype = entries[1]
                shell = lxml.SubElement(
                    element, "shell", type=shelltype, scale="1.0")
            elif len(entries) > 1 and shell is not None:
                constant = lxml.SubElement(
                    shell, "constant", decay="{:1.6e}".format(convertofloat(entries[0])))
                for contractionfactor, singleshell in zip(entries[1:], shelltype):
                    contraction = lxml.SubElement(constant, "contractions", type=singleshell, factor="{:1.6e}".format(
                        convertofloat(contractionfactor)))
                    # contraction.text=" "
            else:
                okquit("\nCannot understand line in file:{}".format(line))

elif OPTIONS.input.is_dir():

    print(
        f"Directory {OPTIONS.input.as_posix()} seems to be a Turbomole folder")
    if OPTIONS.turbobasis is None:
        progargs.print_help()
        okquit(
            "\nQuit here, because: Turbomole basisset not set (option -t/--turbomolebasisset)")
    for ele in elements:
        if os.path.isfile(ele.lower()):
            print(f"Opening {ele.lower()}")
            with open(ele.lower(), 'r') as f:
                lines = f.readlines()

            element = getelemententry(basis, ele)
            turbomolegetcontributions(
                ele.lower(), lines, OPTIONS.turbobasis, element, shells)
        else:
            print(
                f"File {ele.lower()} belonging to {element} not found. Skipping element!")


print(
    f"Imported  new basisset {basissetname} from {OPTIONS.input.as_posix()} written to file {OPTIONS.outputfile} with xtp_basisset")


with open(OPTIONS.outputfile, 'wb') as f:
    f.write(lxml.tostring(basis, pretty_print=True))