File: mongo_upload.py

package info (click to toggle)
snapd 2.71-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 79,536 kB
  • sloc: ansic: 16,114; sh: 16,105; python: 9,941; makefile: 1,890; exp: 190; awk: 40; xml: 22
file content (65 lines) | stat: -rwxr-xr-x 2,541 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python3

import argparse
import datetime
import json
import os
from pymongo import MongoClient, InsertOne

HOST = 'MONGO_HOST'
PORT = 'MONGO_PORT'
USER = 'MONGO_USER'
PASSWORD = 'MONGO_PASSWORD'


def upload_documents(folder, verbose):
    if HOST not in os.environ.keys():
        raise RuntimeError(
            "the {} environment variable must be set and contain the host data".format(HOST))
    if PORT not in os.environ.keys():
        raise RuntimeError(
            "the {} environment variable must be set and contain the port data".format(PORT))
    if USER not in os.environ.keys():
        raise RuntimeError(
            "the {} environment variable must be set and contain the username".format(USER))
    if PASSWORD not in os.environ.keys():
        raise RuntimeError(
            "the {} environment variable must be set and contain the password".format(PASSWORD))
    if not os.environ[PORT].isdigit():
        raise RuntimeError(
            "the {} environment variable must contain a valid port number".format(PORT))

    with MongoClient(host=os.environ[HOST], port=int(os.environ[PORT]), username=os.environ[USER], password=os.environ[PASSWORD]) as client:
        db = client.snapd
        collection = db.features

        requesting = []
        timestamp = datetime.datetime.now(datetime.timezone.utc)
        for file in os.listdir(folder):
            if file.endswith(".json"):
                with open(os.path.join(folder, file), 'r', encoding='utf-8') as f:
                    j = json.load(f)
                    j['timestamp'] = timestamp
                    if file == 'all-features.json':
                        j['all_features'] = True
                    requesting.append(InsertOne(j))

        result = collection.bulk_write(requesting)
        if verbose:
            print("inserted {} new documents".format(result.inserted_count))


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description="mongodb document uploader. It assumes the following environment variables are set: {}, {}, {}, and {}".format(HOST, PORT, USER, PASSWORD))
    parser.add_argument(
        '--dir', help='directory containing json files', required=True, type=str)
    parser.add_argument(
        '--verbose', help='print upload statement', action='store_true')
    args = parser.parse_args()

    if not os.path.isdir(args.dir):
        raise RuntimeError(
            "the indicated directory {} does not exist.".format(args.dir))

    upload_documents(args.dir, args.verbose)