1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
|
import os
import json
import httpx
from pyDataverse.api import DataAccessApi
class TestDataAccess:
def test_get_data_by_id(self):
"""Tests getting data file by id."""
# Arrange
BASE_URL = os.getenv("BASE_URL").rstrip("/")
API_TOKEN = os.getenv("API_TOKEN")
assert BASE_URL is not None, "BASE_URL is not set"
assert API_TOKEN is not None, "API_TOKEN is not set"
# Create dataset
metadata = json.load(open("tests/data/file_upload_ds_minimum.json"))
pid = self._create_dataset(BASE_URL, API_TOKEN, metadata)
api = DataAccessApi(BASE_URL, API_TOKEN)
# Upload a file
self._upload_datafile(BASE_URL, API_TOKEN, pid)
# Retrieve the file ID
file_id = self._get_file_id(BASE_URL, API_TOKEN, pid)
# Act
response = api.get_datafile(file_id, is_pid=False)
response.raise_for_status()
content = response.content.decode("utf-8")
# Assert
expected = open("tests/data/datafile.txt").read()
assert content == expected, "Data retrieval failed."
def test_get_data_by_pid(self):
"""Tests getting data file by id.
Test runs with a PID instead of a file ID from Harvard.
No PID given if used within local containers
TODO - Check if possible with containers
"""
# Arrange
BASE_URL = "https://dataverse.harvard.edu"
pid = "doi:10.7910/DVN/26093/IGA4JD"
api = DataAccessApi(BASE_URL)
# Act
response = api.get_datafile(pid, is_pid=True)
response.raise_for_status()
content = response.content
# Assert
expected = self._get_file_content(BASE_URL, pid)
assert content == expected, "Data retrieval failed."
@staticmethod
def _create_dataset(
BASE_URL: str,
API_TOKEN: str,
metadata: dict,
):
"""
Create a dataset in the Dataverse.
Args:
BASE_URL (str): The base URL of the Dataverse instance.
API_TOKEN (str): The API token for authentication.
metadata (dict): The metadata for the dataset.
Returns:
str: The persistent identifier (PID) of the created dataset.
"""
url = f"{BASE_URL}/api/dataverses/root/datasets"
response = httpx.post(
url=url,
json=metadata,
headers={
"X-Dataverse-key": API_TOKEN,
"Content-Type": "application/json",
},
)
response.raise_for_status()
return response.json()["data"]["persistentId"]
@staticmethod
def _get_file_id(
BASE_URL: str,
API_TOKEN: str,
pid: str,
):
"""Retrieves a file ID for a given persistent identifier (PID) in Dataverse."""
response = httpx.get(
url=f"{BASE_URL}/api/datasets/:persistentId/?persistentId={pid}",
headers={
"X-Dataverse-key": API_TOKEN,
"Content-Type": "application/json",
},
)
response.raise_for_status()
return response.json()["data"]["latestVersion"]["files"][0]["dataFile"]["id"]
@staticmethod
def _upload_datafile(
BASE_URL: str,
API_TOKEN: str,
pid: str,
):
"""Uploads a file to Dataverse"""
url = f"{BASE_URL}/api/datasets/:persistentId/add?persistentId={pid}"
response = httpx.post(
url=url,
files={"file": open("tests/data/datafile.txt", "rb")},
headers={
"X-Dataverse-key": API_TOKEN,
},
)
response.raise_for_status()
@staticmethod
def _get_file_content(
BASE_URL: str,
pid: str,
):
"""Retrieves the file content for testing purposes."""
response = httpx.get(
url=f"{BASE_URL}/api/access/datafile/:persistentId/?persistentId={pid}",
follow_redirects=True,
)
response.raise_for_status()
return response.content
|