File: find.py

package info (click to toggle)
python-ase 3.26.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 15,484 kB
  • sloc: python: 148,112; xml: 2,728; makefile: 110; javascript: 47
file content (130 lines) | stat: -rw-r--r-- 4,070 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# fmt: off

# Note:
# Try to avoid module level import statements here to reduce
# import time during CLI execution
import sys


class CLICommand:
    """Find files with atoms in them.

    Search through files known to ASE applying a query to filter the results.

    See https://ase-lib.org/ase/db/db.html#querying for more
    informations on how to construct the query string.
    """

    @staticmethod
    def add_arguments(parser):
        parser.add_argument('folder', help='Folder to look in.')
        parser.add_argument(
            'query', nargs='?',
            help='Examples: More than 2 hydrogens and no silver: "H>2,Ag=0". '
            'More than 1000 atoms: "natoms>1000". '
            'Slab geometry containing Cu and Ni: "pbc=TTF,Cu,Ni".')
        parser.add_argument('-v', '--verbose', action='store_true',
                            help='More output.')
        parser.add_argument('-l', '--long', action='store_true',
                            help='Show also periodic boundary conditions, '
                            'chemical formula and filetype.')
        parser.add_argument('-i', '--include', help='Include only filenames '
                            'ending with given strings.  Example: '
                            '"-i .xyz,.traj".')
        parser.add_argument('-x', '--exclude', help='Exclude filenames '
                            'ending with given strings.  Example: '
                            '"-x .cif".')

    @staticmethod
    def run(args):
        main(args)


def main(args):
    from ase.db.core import parse_selection

    query = parse_selection(args.query)
    include = args.include.split(',') if args.include else []
    exclude = args.exclude.split(',') if args.exclude else []

    if args.long:
        print('pbc {:10} {:15} path'.format('formula', 'filetype'))

    for path in allpaths(args.folder, include, exclude):
        format, row = check(path, query, args.verbose)
        if format:
            if args.long:
                print('{} {:10} {:15} {}'
                      .format(''.join(str(p) for p in row.pbc.astype(int)),
                              row.formula,
                              format,
                              path))
            else:
                print(path)


def allpaths(folder, include, exclude):
    """Generate paths."""
    import os
    import os.path as op

    exclude += ['.py', '.pyc']
    for dirpath, dirnames, filenames in os.walk(folder):
        for name in filenames:
            if any(name.endswith(ext) for ext in exclude):
                continue
            if include:
                for ext in include:
                    if name.endswith(ext):
                        break
                else:
                    continue
            path = op.join(dirpath, name)
            yield path

        # Skip .git, __pycache__ and friends:
        dirnames[:] = (name for name in dirnames if name[0] not in '._')


def check(path, query, verbose):
    """Check a path.

    Returns a (filetype, AtomsRow object) tuple.
    """
    from ase.db import connect
    from ase.db.jsondb import JSONDatabase
    from ase.db.row import atoms2dict
    from ase.io import read
    from ase.io.formats import UnknownFileTypeError, filetype

    class FakeDB(JSONDatabase):
        def __init__(self, atoms):
            self.bigdct = {1: atoms2dict(atoms)}

        def _read_json(self):
            return self.bigdct, [1], 2

    try:
        format = filetype(path, guess=False)
    except (OSError, UnknownFileTypeError):
        return '', None

    if format in ['db', 'json']:
        db = connect(path)
    else:
        try:
            atoms = read(path, format=format)
        except Exception as x:
            if verbose:
                print(path + ':', x, file=sys.stderr)
            return '', None
        db = FakeDB(atoms)

    try:
        for row in db._select(*query):
            return format, row
    except Exception as x:
        if verbose:
            print(path + ':', x, file=sys.stderr)

    return '', None