File: ArchiveTestingDataOnGirder.py

package info (click to toggle)
insighttoolkit4 4.13.3withdata-dfsg1-4
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 489,260 kB
  • sloc: cpp: 557,342; ansic: 146,850; fortran: 34,788; python: 16,572; sh: 2,187; lisp: 2,070; tcl: 993; java: 362; perl: 200; makefile: 129; csh: 81; pascal: 69; xml: 19; ruby: 10
file content (191 lines) | stat: -rwxr-xr-x 8,505 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#!/usr/bin/env python

import argparse
import girder_client
from girder_client import GirderClient
import os
import fnmatch
import json
import mimetypes
from distutils.version import StrictVersion

if StrictVersion(girder_client.__version__) < StrictVersion("2.0.0"):
    raise Exception("Girder 2.0.0 or newer is required")

class GirderExternalDataCli(GirderClient):
    """
    A command line Python client for interacting with a Girder instance's
    RESTful api, specifically for performing uploads into a Girder instance.
    """
    def __init__(self, apiKey, objectStore):
        """initialization function to create a GirderCli instance, will attempt
        to authenticate with the designated Girder instance.
        """
        GirderClient.__init__(self,
                              apiUrl='https://data.kitware.com/api/v1')
        self.objectStore = objectStore
        self.authenticate(apiKey=apiKey)

    def content_link_upload(self, localFolder, parentId, ext='.sha512',
            parentType='folder', blacklist=['.git', '.ExternalData'],
            reuseExisting=True, dryRun=False):
        """Upload objects corresponding to CMake ExternalData content links.

        This will recursively walk down the tree and find content links ending
        with the specified extension and create a hierarchy on the server under
        the parentId.

        :param ext: Content link file extension.
        :param parentId: id of the parent in Girder or resource path.
        :param parentType: one of (collection,folder,user), default of folder.
        :param reuseExisting: bool whether to accept an existing item of
            the same name in the same location, or create a new one instead.
        :param dryRun: Do not actually upload any content.
        """
        parentId = self._checkResourcePath(parentId)
        localFolder = os.path.normpath(localFolder)
        for entry in os.listdir(localFolder):
            if entry in blacklist:
                print("Ignoring file %s as it is blacklisted" % entry)
                continue
            full_entry = os.path.join(localFolder, entry)
            if os.path.islink(full_entry):
                # os.walk skips symlinks by default
                print("Skipping file %s as it is a symlink" % entry)
                continue
            if os.path.isdir(full_entry):
                self._uploadFolderRecursive(
                    full_entry, parentId, parentType, ext,
                    reuseExisting=reuseExisting, blacklist=blacklist,
                    dryRun=dryRun)

    def _uploadContentLinkItem(self, name, content_link, folder,
            ext='.sha512', parentType='folder', dryRun=False,
            reuseExisting=False):
        """Upload objects corresponding to CMake ExternalData content links.

        This will upload the file with name, *name*, for the content link
        located at *content_link* to the Girder folder, *folder*.

        :param ext: Content link file extension.
        :param parentType: one of (collection,folder,user), default of folder.
        :param reuseExisting: bool whether to accept an existing item of
            the same name in the same location, or create a new one instead.
        :param dryRun: Do not actually upload any content.
        """
        content_link = os.path.normpath(content_link)
        if os.path.isfile(content_link) and \
                fnmatch.fnmatch(content_link, '*' + ext):
            if parentType != 'folder':
                raise Exception(('Attempting to upload an item under a %s.'
                                % parentType) +
                                ' Items can only be added to folders.')
            else:
                with open(content_link, 'r') as fp:
                    hash_value = fp.readline().strip()
                self._uploadAsItem(
                    name,
                    folder['_id'],
                    os.path.join(self.objectStore, hash_value),
                    reuseExisting=reuseExisting,
                    dryRun=dryRun)

    def _uploadFolderRecursive(self, localFolder, parentId, parentType,
                                 ext='.sha512',
                                 reuseExisting=False,
                                 blacklist=[],
                                 dryRun=False):
        """Function to recursively upload a folder and all of its descendants.
        :param localFolder: full path to local folder to be uploaded
        :param parentId: id of parent in Girder,
            where new folder will be added
        :param parentType: one of (collection, folder, user)
        :param leaf_folders_as_items: whether leaf folders should have all
        files uploaded as single items
        :param reuseExisting: boolean indicating whether to accept an existing
        item
        of the same name in the same location, or create a new one instead
        """
        localFolder = os.path.normpath(localFolder)
        filename = os.path.basename(localFolder)
        if filename in blacklist:
            print("Ignoring file %s as it is blacklisted" % filename)
            return

        # Do not add the folder if it does not contain any content links
        has_content_link = False
        for root, dirnames, filenames in os.walk(localFolder):
                for filename in fnmatch.filter(filenames, '*' + ext):
                        has_content_link = True
                        break
        if not has_content_link:
            return

        print('Creating Folder from %s' % localFolder)
        if dryRun:
            # create a dryRun placeholder
            folder = {'_id': 'dryRun'}
        elif localFolder == '.':
            folder = {'_id': parentId}
        else:
            folder = self.loadOrCreateFolder(
                os.path.basename(localFolder), parentId, parentType)

        for entry in sorted(os.listdir(localFolder)):
            if entry in blacklist:
                print("Ignoring file %s as it is blacklisted" % entry)
                continue
            full_entry = os.path.join(localFolder, entry)
            if os.path.islink(full_entry):
                # os.walk skips symlinks by default
                print("Skipping file %s as it is a symlink" % entry)
                continue
            elif os.path.isdir(full_entry):
                # At this point we should have an actual folder, so can
                # pass that as the parentType
                self._uploadFolderRecursive(
                    full_entry, folder['_id'], 'folder',
                    ext, reuseExisting=reuseExisting,
                    blacklist=blacklist, dryRun=dryRun)
            else:
                name = os.path.splitext(entry)[0]
                self._uploadContentLinkItem(name, full_entry, folder,
                        ext=ext, parentType=parentType, dryRun=dryRun,
                        reuseExisting=reuseExisting)

            if not dryRun:
                for callback in self._folderUploadCallbacks:
                    callback(folder, localFolder)


def main():
    parser = argparse.ArgumentParser(
        description='Upload CMake ExternalData content links to Girder')
    parser.add_argument(
        '--dry-run', action='store_true',
        help='will not write anything to Girder, only report on what would '
        'happen')
    parser.add_argument('--api-key', required=True, default=None)
    parser.add_argument('--local-folder', required=False,
                        default=os.path.join(os.path.dirname(__file__), '..',
                            '..'),
                        help='path to local target folder')
    # Default is ITK/ITKTestingData/Nightly
    parser.add_argument('--parent-id', required=False,
                        default='57b673388d777f10f269651c',
                        help='id of Girder parent target')
    parser.add_argument('--object-store', required=True,
                        help='Path to the CMake ExternalData object store')
    parser.add_argument(
        '--no-reuse', action='store_true',
        help='Don\'t reuse existing items of same name at same location')
    args = parser.parse_args()

    reuseExisting = not args.no_reuse
    gc = GirderExternalDataCli(args.api_key,
        objectStore=os.path.join(args.object_store, 'SHA512'))
    gc.content_link_upload(args.local_folder, args.parent_id,
            reuseExisting=reuseExisting, dryRun=args.dry_run)

if __name__ == '__main__':
    main()