File: test_access.py

package info (click to toggle)
pydataverse 0.3.4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,168 kB
  • sloc: python: 4,862; sh: 61; makefile: 13
file content (146 lines) | stat: -rw-r--r-- 4,110 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import os
import json
import httpx

from pyDataverse.api import DataAccessApi


class TestDataAccess:
    def test_get_data_by_id(self):
        """Tests getting data file by id."""

        # Arrange
        BASE_URL = os.getenv("BASE_URL").rstrip("/")
        API_TOKEN = os.getenv("API_TOKEN")

        assert BASE_URL is not None, "BASE_URL is not set"
        assert API_TOKEN is not None, "API_TOKEN is not set"

        # Create dataset
        metadata = json.load(open("tests/data/file_upload_ds_minimum.json"))
        pid = self._create_dataset(BASE_URL, API_TOKEN, metadata)
        api = DataAccessApi(BASE_URL, API_TOKEN)

        # Upload a file
        self._upload_datafile(BASE_URL, API_TOKEN, pid)

        # Retrieve the file ID
        file_id = self._get_file_id(BASE_URL, API_TOKEN, pid)

        # Act
        response = api.get_datafile(file_id, is_pid=False)
        response.raise_for_status()
        content = response.content.decode("utf-8")

        # Assert
        expected = open("tests/data/datafile.txt").read()
        assert content == expected, "Data retrieval failed."

    def test_get_data_by_pid(self):
        """Tests getting data file by id.

        Test runs with a PID instead of a file ID from Harvard.
        No PID given if used within local containers

        TODO - Check if possible with containers
        """

        # Arrange
        BASE_URL = "https://dataverse.harvard.edu"
        pid = "doi:10.7910/DVN/26093/IGA4JD"
        api = DataAccessApi(BASE_URL)

        # Act
        response = api.get_datafile(pid, is_pid=True)
        response.raise_for_status()
        content = response.content

        # Assert
        expected = self._get_file_content(BASE_URL, pid)
        assert content == expected, "Data retrieval failed."

    @staticmethod
    def _create_dataset(
        BASE_URL: str,
        API_TOKEN: str,
        metadata: dict,
    ):
        """
        Create a dataset in the Dataverse.

        Args:
            BASE_URL (str): The base URL of the Dataverse instance.
            API_TOKEN (str): The API token for authentication.
            metadata (dict): The metadata for the dataset.

        Returns:
            str: The persistent identifier (PID) of the created dataset.
        """
        url = f"{BASE_URL}/api/dataverses/root/datasets"
        response = httpx.post(
            url=url,
            json=metadata,
            headers={
                "X-Dataverse-key": API_TOKEN,
                "Content-Type": "application/json",
            },
        )

        response.raise_for_status()

        return response.json()["data"]["persistentId"]

    @staticmethod
    def _get_file_id(
        BASE_URL: str,
        API_TOKEN: str,
        pid: str,
    ):
        """Retrieves a file ID for a given persistent identifier (PID) in Dataverse."""

        response = httpx.get(
            url=f"{BASE_URL}/api/datasets/:persistentId/?persistentId={pid}",
            headers={
                "X-Dataverse-key": API_TOKEN,
                "Content-Type": "application/json",
            },
        )

        response.raise_for_status()

        return response.json()["data"]["latestVersion"]["files"][0]["dataFile"]["id"]

    @staticmethod
    def _upload_datafile(
        BASE_URL: str,
        API_TOKEN: str,
        pid: str,
    ):
        """Uploads a file to Dataverse"""

        url = f"{BASE_URL}/api/datasets/:persistentId/add?persistentId={pid}"
        response = httpx.post(
            url=url,
            files={"file": open("tests/data/datafile.txt", "rb")},
            headers={
                "X-Dataverse-key": API_TOKEN,
            },
        )

        response.raise_for_status()

    @staticmethod
    def _get_file_content(
        BASE_URL: str,
        pid: str,
    ):
        """Retrieves the file content for testing purposes."""

        response = httpx.get(
            url=f"{BASE_URL}/api/access/datafile/:persistentId/?persistentId={pid}",
            follow_redirects=True,
        )

        response.raise_for_status()

        return response.content