File: ccget.py

package info (click to toggle)
cclib-data 1.6.2-2
  • links: PTS, VCS
  • area: non-free
  • in suites: bookworm, bullseye, sid
  • size: 87,912 kB
  • sloc: python: 16,440; sh: 131; makefile: 79; cpp: 31
file content (215 lines) | stat: -rw-r--r-- 7,017 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2017, the cclib development team
#
# This file is part of cclib (http://cclib.github.io) and is distributed under
# the terms of the BSD 3-Clause License.

"""Script for loading data from computational chemistry files."""


from __future__ import print_function

import glob
import logging
import os.path
import sys
from functools import partial
from pprint import pprint

import numpy

from cclib.parser import ccData
from cclib.io import ccread, URL_PATTERN


# Set up options for pretty-printing output.
if sys.version_info < (3, 4):
    pprint = partial(pprint, width=120)
else:
    pprint = partial(pprint, width=120, compact=True)
numpy.set_printoptions(linewidth=120)


def ccget():
    """Parse files with cclib based on command line arguments."""

    import argparse

    parser = argparse.ArgumentParser()

    parser.add_argument(
        "attribute_or_compchemlogfile", nargs="+",
        help="one or more attributes to be parsed from one ore more logfiles",
    )

    group = parser.add_mutually_exclusive_group()

    group.add_argument(
        "--list", "-l",
        action="store_true",
        help="print a list of attributes available in each file",
    )
    group.add_argument(
        "--json", "-j",
        action="store_true",
        help="the given logfile is in CJSON format",
    )
    group.add_argument(
        "--multi", "-m",
        action="store_true",
        help="parse multiple input files as one input stream",
    )

    parser.add_argument(
        "--verbose", "-v",
        action="store_true",
        help="more verbose parsing output (only errors by default)",
    )
    parser.add_argument(
        "--future", "-u",
        action="store_true",
        help="use experimental features (currently optdone_as_list)",
    )
    parser.add_argument(
        "--full", "-f",
        action="store_true",
        help="toggle full print behaviour for attributes",
    )

    args = parser.parse_args()

    arglist = args.attribute_or_compchemlogfile
    showattr = args.list
    cjsonfile = args.json
    multifile = args.multi
    verbose = args.verbose
    future = args.future
    full = args.full

    # Toggle full print behaviour for numpy arrays.
    if full:
        numpy.set_printoptions(threshold=numpy.nan)

    # We need at least one attribute and the filename, so two arguments, or
    # just one filename if we want to list attributes that can be extracted.
    # In multifile mode, we generally want at least two filenames, so the
    # expected number of arguments is a bit different.
    if not multifile:
        correct_number = (not showattr and len(arglist) > 1) or (showattr and len(arglist) > 0)
    else:
        correct_number = (not showattr and len(arglist) > 2) or (showattr and len(arglist) > 1)
    if not correct_number:
        print("The number of arguments does not seem to be correct.")
        parser.print_usage()
        parser.exit(1)

    # Figure out which are the attribute names and which are the filenames or links.
    # Note that in Linux, the shell expands wild cards, but not so in Windows,
    # so try to do that here using glob.
    attrnames = []
    filenames = []
    for arg in arglist:
        if arg in ccData._attrlist:
            attrnames.append(arg)
        elif URL_PATTERN.match(arg) or os.path.isfile(arg):
            filenames.append(arg)
        else:
            wildcardmatches = glob.glob(arg)
            if wildcardmatches:
                filenames.extend(wildcardmatches)
            else:
                print("%s is neither a filename nor an attribute name." % arg)
                parser.print_usage()
                parser.exit(1)

    # Since there is some ambiguity to the correct number of arguments, check
    # that there is at least one filename (or two in multifile mode), and also
    # at least one attribute to parse if the -l option was not passed.
    if len(filenames) == 0:
        print("No logfiles given")
        parser.exit(1)
    if multifile and len(filenames) == 1:
        print("Expecting at least two logfiles in multifile mode")
        parser.exit(1)
    if not showattr and len(attrnames) == 0:
        print("No attributes given")
        parser.exit(1)

    # This should be sufficient to correctly handle multiple files, that is to
    # run the loop below only once with all logfiles in the variable `filename`.
    # Although, perhaps it would be clearer to abstract the contents of the loop
    # into another function.
    if multifile:
        filenames = [filenames]

    # Now parse each file and print out the requested attributes.
    for filename in filenames:

        if multifile:
            name = ", ".join(filename[:-1]) + " and " + filename[-1]
        else:
            name = filename

        # The keyword dictionary are not used so much. but could be useful for
        # passing options downstream. For example, we might use --future for
        # triggering experimental or alternative behavior (as with optdone).
        kwargs = {}
        if verbose:
            kwargs['verbose'] = True
            kwargs['loglevel'] = logging.INFO
        else:
            kwargs['verbose'] = False
            kwargs['loglevel'] = logging.ERROR
        if future:
            kwargs['future'] = True
        if cjsonfile:
            kwargs['cjson'] = True

        print("Attempting to read %s" % name)
        data = ccread(filename, **kwargs)

        if data is None:
            print("Cannot figure out the format of '%s'" % name)
            print("Report this to the cclib development team if you think it is an error.")
            print("\n" + parser.format_usage())
            parser.exit(1)

        if showattr:
            print("cclib can parse the following attributes from %s:" % name)
            if cjsonfile:
                for key in data:
                    print(key)
                break
            for attr in data._attrlist:
                if hasattr(data, attr):
                    print("  %s" % attr)
        else:
            invalid = False
            for attr in attrnames:
                if cjsonfile:
                    if attr in data:
                        print("%s:\n%s" % (attr, data[attr]))
                        continue
                else:
                    if hasattr(data, attr):
                        print(attr)
                        attr_val = getattr(data, attr)
                        # List of attributes to be printed with new lines
                        if attr in data._listsofarrays and full:
                            for val in attr_val:
                                pprint(val)
                        else:
                            pprint(attr_val)
                        continue

                print("Could not parse %s from this file." % attr)
                invalid = True
            if invalid:
                parser.print_help()


if __name__ == "__main__":

    ccget()