File: codcif2sdf

package info (click to toggle)
cod-tools 3.7.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 154,792 kB
  • sloc: perl: 57,588; sh: 36,842; ansic: 6,402; xml: 1,982; yacc: 1,117; makefile: 727; python: 166
file content (128 lines) | stat: -rwxr-xr-x 3,330 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#! /bin/sh
#------------------------------------------------------------------------------
#$Author: antanas $
#$Date: 2021-08-01 23:10:01 +0300 (Sun, 01 Aug 2021) $
#$Revision: 8845 $
#$URL: svn+ssh://www.crystallography.net/home/coder/svn-repositories/cod-tools/tags/v3.7.0/scripts/codcif2sdf $
#------------------------------------------------------------------------------
#*
#* Convert CIFs with crystal description (similar to those obtained from
#* the COD) to an SDF file with a molecule description.
#* Intended to be suitable for upload to PubChem.
#*
#* USAGE:
#*   $0 --options < file1.cif
#*   $0 --options file1.cif
#*   $0 --options file1.cif file2*.cif
#**

TMP_DIR="${TMPDIR}"

set -ue
## set -x

script() { echo "# $*"; cat; }
setvar() { eval $1="'$3'"; }

setvar FILES = ""

setvar BASENAME = "`basename $0`"

setvar IGNORE_BUMPS_OPT = ""

#* OPTIONS:
#*   -i, --ignore-bumps
#*                     Ignore atomic bumps while running cif_molecule.
#*
#*   --tmp-dir /tmp
#*                     Use the specified temporary directory (default: '/tmp').
#*
#*   --help, --usage
#*                     Output a short help message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
while [ $# -gt 0 ]
do
  case $1 in
      -i|--ignore-bumps|--ignore-bump|--ignore-bum|--ignore-bu|--ignore-b|\
      --ignore|--ignor|--igno|--ign|--ig|--i)
          IGNORE_BUMPS_OPT="-i"
          ;;
      --tmp-dir|--tmp-di|--tmp-d|--tmp|--tm|--t)
          TMP_DIR="$2"
          shift
          ;;
      --options|--option|--optio|--opti|--opt|--op|--o)
          echo "$(basename "$0"):: The '--options' option is a placeholder."
          echo "$(basename "$0"):: It should be replaced by one of the following options:"
          awk '/#\* OPTIONS:/,/#\*\*/ {
                  sub("OPTIONS:", "");
                  sub("^ *#[*]?[*]?", "");
                  gsub("\\$0","'"$0"'");
                  print $0
              }' "$0"
          exit
          ;;
      --help|--hel|--he|--h|--usage)
          awk '/#\*/,/#\*\*/ {
                  sub("^ *#[*]?[*]?", "");
                  gsub("\\$0","'"$0"'");
                  print $0
              }' "$0"
          exit
          ;;
      --version)
          $(dirname $0)/cod-tools-version
          exit
          ;;
      -*) echo "`basename $0`:: ERROR, unknown option '$1'." >&2 ; exit 1 ;;
      *)  FILES="$FILES '$1'" ;;
    esac
    shift
done

## echo ${FILES}
eval set -- "${FILES}"

test -n "${FILES}" && FILES="-"

test -z "${TMP_DIR}" && TMP_DIR="/tmp"
TMP_DIR="${TMP_DIR}/tmp-${BASENAME}-$$"
mkdir "${TMP_DIR}"

TMP_SPLIT_DIR="${TMP_DIR}/split"
mkdir ${TMP_SPLIT_DIR}

TMP_CIF="${TMP_DIR}/-"

## set -x

for CIF in ${1+"$@"}
do
    if [ ${CIF} = "-" ]
    then
        echo $0: $CIF: reading CIF from STDIN... >&2
        cat > ${TMP_CIF}
        CIF=${TMP_CIF}
    else
        if [ ! -e ${CIF} ]
        then
            echo $0: $CIF: file does not exist. >&2
            continue
        fi
    fi

    cif_molecule \
        --preserve-stoichiometry \
        --one-datablock-output \
        --split-disorder-groups \
        --largest-molecule-only \
        ${IGNORE_BUMPS_OPT} \
        "${CIF}" \
    | molcif2sdf --cod-cif "${CIF}" --tmp-dir "${TMP_DIR}"

done

## set -x
rm -rf "${TMP_DIR}"