Source code for dcmri.data

import os
import sys
import pickle
import shutil
import zipfile
import csv
from io import TextIOWrapper

import requests
import numpy as np

# filepaths need to be identified with importlib_resources
# rather than __file__ as the latter does not work at runtime
# when the package is installed via pip install

if sys.version_info < (3, 9):
    # importlib.resources either doesn't exist or lacks the files()
    # function, so use the PyPI version:
    import importlib_resources
else:
    # importlib.resources has files(), so use that:
    import importlib.resources as importlib_resources


# Zenodo DOI of the repository
# DOIs need to be updated when new versions are created
DOI = {
    'MRR': "15285017",      # v0.0.3
    'TRISTAN': "15285027"   # v0.0.1
}

# Datasets available via fetch()
DATASETS = {
    'KRUK': {'doi': DOI['MRR'], 'ext': '.dmr.zip'},
    'tristan_humans_healthy_ciclosporin': {'doi': DOI['TRISTAN'], 'ext': '.dmr.zip'},
    'tristan_humans_healthy_controls_leeds': {'doi': DOI['TRISTAN'], 'ext': '.dmr.zip'},
    'tristan_humans_healthy_controls_sheffield': {'doi': DOI['TRISTAN'], 'ext': '.dmr.zip'},
    'tristan_humans_healthy_metformin': {'doi': DOI['TRISTAN'], 'ext': '.dmr.zip'},
    'tristan_humans_healthy_rifampicin': {'doi': DOI['TRISTAN'], 'ext': '.dmr.zip'},
    'tristan_humans_patients_rifampicin': {'doi': DOI['TRISTAN'], 'ext': '.dmr.zip'},
    'tristan_rats_healthy_multiple_dosing': {'doi': DOI['TRISTAN'], 'ext': '.dmr.zip'},
    'tristan_rats_healthy_reproducibility': {'doi': DOI['TRISTAN'], 'ext': '.dmr.zip'},
    'tristan_rats_healthy_six_drugs': {'doi': DOI['TRISTAN'], 'ext': '.dmr.zip'},
    'minipig_renal_fibrosis': {'doi': None, 'ext': '.dmr.zip'},
}


[docs] def fetch(dataset=None, clear_cache=False, download_all=False) -> dict: """Fetch a dataset included in dcmri Args: dataset (str, optional): name of the dataset. See below for options. clear_cache (bool, optional): When a dataset is fetched, it is downloaded and then stored in a local cache memory for faster access next time it is fetched. Set clear_cache=True to delete all data in the cache memory. Default is False. download_all (bool, optional): By default only the dataset that is fetched is downloaded. Set download_all=True to download all datasets at once. This will cost some time but then offers fast and offline access to all datasets afterwards. This will take up around 300 MB of space on your hard drive. Default is False. Returns: dict: Data as a dictionary. Notes: The following datasets are currently available: `Magnetic resonance renography <https://zenodo.org/records/15284968>`_ - KRUK `TRISTAN Gadoxetate kinetics <https://zenodo.org/records/15285027>`_ - tristan_humans_healthy_rifampicin - tristan_humans_healthy_metformin - tristan_humans_healthy_ciclosporin - tristan_humans_healthy_controls_leeds - tristan_humans_healthy_controls_sheffield - tristan_rats_healthy_six_drugs - tristan_rats_healthy_reproducibility - tristan_rats_healthy_multiple_dosing Other - minipig_renal_fibrosis: Kidney data in a minipig with unilateral ureter stenosis. More detail in future versions.. Example: Fetch the **tristan_humans_healthy_rifampicin** dataset and read it: .. plot:: :include-source: :context: close-figs >>> import dcmri as dc >>> import pydmr # fetch dmr file >>> file = dc.fetch('tristan_humans_healthy_rifampicin') # read dmr file >>> data = pydmr.read(file) """ if dataset is None: v = None elif dataset not in DATASETS: raise ValueError( f'Dataset {dataset} is unknown. Please choose one of ' f'{DATASETS}' ) else: v = _fetch_dataset(dataset) if clear_cache: _clear_cache() if download_all: for d in DATASETS.keys(): _download(d) return v
def _clear_cache(): """ Clear the folder where the data downloaded via fetch are saved. Note if you clear the cache the data will need to be downloaded again if you need them. """ f = importlib_resources.files('dcmri.datafiles') for item in f.iterdir(): if item.is_file(): item.unlink() # Delete the file def _fetch_dataset(dataset): f = importlib_resources.files('dcmri.datafiles') datafile = str(f.joinpath(dataset + DATASETS[dataset]['ext'])) # If this is the first time the data are accessed, download them. if not os.path.exists(datafile): _download(dataset) return datafile def _download(dataset): # add version keyword f = importlib_resources.files('dcmri.datafiles') datafile = str(f.joinpath(dataset + DATASETS[dataset]['ext'])) if os.path.exists(datafile): return # Dataset repository version_doi = DATASETS[dataset]['doi'] if version_doi is None: raise ValueError( f'Dataset {dataset} is not online and not stored in dcmri/datafiles.' ) # Dataset download link file_url = "https://zenodo.org/records/" + version_doi + "/files/" + dataset + DATASETS[dataset]['ext'] # Make the request and check for connection error try: file_response = requests.get(file_url) except requests.exceptions.ConnectionError as err: raise requests.exceptions.ConnectionError( "\n\n" "A connection error occurred trying to download the test data \n" "from Zenodo. This usually happens if you are offline. The \n" "first time a dataset is fetched via dcmri.fetch you need to \n" "be online so the data can be downloaded. After the first \n" "time they are saved locally so afterwards you can fetch \n" "them even if you are offline. \n\n" "The detailed error message is here: " + str(err)) # Check for other errors file_response.raise_for_status() # Save the file locally with open(datafile, 'wb') as f: f.write(file_response.content)