Source code for dcmri.data
import os
import sys
import pickle
import shutil
import zipfile
import csv
from io import TextIOWrapper
import requests
import numpy as np
# filepaths need to be identified with importlib_resources
# rather than __file__ as the latter does not work at runtime
# when the package is installed via pip install
if sys.version_info < (3, 9):
# importlib.resources either doesn't exist or lacks the files()
# function, so use the PyPI version:
import importlib_resources
else:
# importlib.resources has files(), so use that:
import importlib.resources as importlib_resources
# Zenodo DOI of the repository
# DOIs need to be updated when new versions are created
DOI = {
'MRR': "15285017", # v0.0.3
'TRISTAN': "15285027" # v0.0.1
}
# Datasets available via fetch()
DATASETS = {
'KRUK': {'doi': DOI['MRR'], 'ext': '.dmr.zip'},
'tristan_humans_healthy_ciclosporin': {'doi': DOI['TRISTAN'], 'ext': '.dmr.zip'},
'tristan_humans_healthy_controls_leeds': {'doi': DOI['TRISTAN'], 'ext': '.dmr.zip'},
'tristan_humans_healthy_controls_sheffield': {'doi': DOI['TRISTAN'], 'ext': '.dmr.zip'},
'tristan_humans_healthy_metformin': {'doi': DOI['TRISTAN'], 'ext': '.dmr.zip'},
'tristan_humans_healthy_rifampicin': {'doi': DOI['TRISTAN'], 'ext': '.dmr.zip'},
'tristan_humans_patients_rifampicin': {'doi': DOI['TRISTAN'], 'ext': '.dmr.zip'},
'tristan_rats_healthy_multiple_dosing': {'doi': DOI['TRISTAN'], 'ext': '.dmr.zip'},
'tristan_rats_healthy_reproducibility': {'doi': DOI['TRISTAN'], 'ext': '.dmr.zip'},
'tristan_rats_healthy_six_drugs': {'doi': DOI['TRISTAN'], 'ext': '.dmr.zip'},
'minipig_renal_fibrosis': {'doi': None, 'ext': '.dmr.zip'},
}
[docs]
def fetch(dataset=None, clear_cache=False, download_all=False) -> dict:
"""Fetch a dataset included in dcmri
Args:
dataset (str, optional): name of the dataset. See below for options.
clear_cache (bool, optional): When a dataset is fetched, it is
downloaded and then stored in a local cache memory for faster access
next time it is fetched. Set clear_cache=True to delete all data
in the cache memory. Default is False.
download_all (bool, optional): By default only the dataset that is
fetched is downloaded. Set download_all=True to download all
datasets at once. This will cost some time but then offers fast and
offline access to all datasets afterwards. This will take up around
300 MB of space on your hard drive. Default is False.
Returns:
dict: Data as a dictionary.
Notes:
The following datasets are currently available:
`Magnetic resonance renography <https://zenodo.org/records/15284968>`_
- KRUK
`TRISTAN Gadoxetate kinetics <https://zenodo.org/records/15285027>`_
- tristan_humans_healthy_rifampicin
- tristan_humans_healthy_metformin
- tristan_humans_healthy_ciclosporin
- tristan_humans_healthy_controls_leeds
- tristan_humans_healthy_controls_sheffield
- tristan_rats_healthy_six_drugs
- tristan_rats_healthy_reproducibility
- tristan_rats_healthy_multiple_dosing
Other
- minipig_renal_fibrosis: Kidney data in a minipig with
unilateral ureter stenosis. More detail in future versions..
Example:
Fetch the **tristan_humans_healthy_rifampicin** dataset and read it:
.. plot::
:include-source:
:context: close-figs
>>> import dcmri as dc
>>> import pydmr
# fetch dmr file
>>> file = dc.fetch('tristan_humans_healthy_rifampicin')
# read dmr file
>>> data = pydmr.read(file)
"""
if dataset is None:
v = None
elif dataset not in DATASETS:
raise ValueError(
f'Dataset {dataset} is unknown. Please choose one of '
f'{DATASETS}'
)
else:
v = _fetch_dataset(dataset)
if clear_cache:
_clear_cache()
if download_all:
for d in DATASETS.keys():
_download(d)
return v
def _clear_cache():
"""
Clear the folder where the data downloaded via fetch are saved.
Note if you clear the cache the data will need to be downloaded again
if you need them.
"""
f = importlib_resources.files('dcmri.datafiles')
for item in f.iterdir():
if item.is_file():
item.unlink() # Delete the file
def _fetch_dataset(dataset):
f = importlib_resources.files('dcmri.datafiles')
datafile = str(f.joinpath(dataset + DATASETS[dataset]['ext']))
# If this is the first time the data are accessed, download them.
if not os.path.exists(datafile):
_download(dataset)
return datafile
def _download(dataset): # add version keyword
f = importlib_resources.files('dcmri.datafiles')
datafile = str(f.joinpath(dataset + DATASETS[dataset]['ext']))
if os.path.exists(datafile):
return
# Dataset repository
version_doi = DATASETS[dataset]['doi']
if version_doi is None:
raise ValueError(
f'Dataset {dataset} is not online and not stored in dcmri/datafiles.'
)
# Dataset download link
file_url = "https://zenodo.org/records/" + version_doi + "/files/" + dataset + DATASETS[dataset]['ext']
# Make the request and check for connection error
try:
file_response = requests.get(file_url)
except requests.exceptions.ConnectionError as err:
raise requests.exceptions.ConnectionError(
"\n\n"
"A connection error occurred trying to download the test data \n"
"from Zenodo. This usually happens if you are offline. The \n"
"first time a dataset is fetched via dcmri.fetch you need to \n"
"be online so the data can be downloaded. After the first \n"
"time they are saved locally so afterwards you can fetch \n"
"them even if you are offline. \n\n"
"The detailed error message is here: " + str(err))
# Check for other errors
file_response.raise_for_status()
# Save the file locally
with open(datafile, 'wb') as f:
f.write(file_response.content)