Skip to content

dodola.repository

Objects to read and write stored climate model data.

Functions:

Name Description
read

Read Dataset from Zarr store

read_attrs

Read and deserialize JSON attrs file

write

Write Dataset to Zarr Store

dodola.repository.read

read(url_or_path)

Read Dataset from Zarr store

Parameters:

Name Type Description Default
url_or_path str

Location of Zarr store to read.

required

Returns:

Type Description
Dataset
Source code in dodola/repository.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
def read(url_or_path):
    """Read Dataset from Zarr store

    Parameters
    ----------
    url_or_path : str
        Location of Zarr store to read.

    Returns
    -------
    xr.Dataset
    """
    logger.debug(f"Reading {url_or_path}")
    x = open_zarr(url_or_path)
    logger.info(f"Read {url_or_path}")
    return x

dodola.repository.read_attrs

read_attrs(urlpath)

Read and deserialize JSON attrs file

Source code in dodola/repository.py
75
76
77
78
79
80
81
82
83
84
def read_attrs(urlpath):
    """Read and deserialize JSON attrs file"""
    logger.debug(f"Reading attrs from {urlpath}")

    with fsspec.open(urlpath) as f:
        out = json.load(f)
        logger.info(f"Read attrs from {urlpath}")

    logger.debug(f"Read attrs {out}")
    return out

dodola.repository.write

write(url_or_path, x, region=None)

Write Dataset to Zarr Store

Note, any lazy computations will be evaluated.

Parameters:

Name Type Description Default
url_or_path str

Location to write Zarr store to.

required
x Dataset
required
region dict or None

Optional mapping from dimension names to integer slices along dataset dimensions to indicate the region of existing zarr array(s) in which to write this dataset’s data. Variables not sliced in the region are dropped.

None
Source code in dodola/repository.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def write(url_or_path, x, region=None):
    """Write Dataset to Zarr Store

    Note, any lazy computations will be evaluated.

    Parameters
    ----------
    url_or_path : str
        Location to write Zarr store to.
    x : xr.Dataset
    region : dict or None, optional
        Optional mapping from dimension names to integer slices along dataset
        dimensions to indicate the region of existing zarr array(s) in
        which to write this dataset’s data. Variables not sliced in the region
        are dropped.
    """
    logger.debug(f"Writing {url_or_path}")
    logger.debug(f"Output Dataset {x=}")

    if region:
        # TODO: This behavior needs a better, focused, unit test.
        logger.info(f"Writing to Zarr Store region, {region=}")

        # We need to drop all variables not sliced by the selected zarr_region.
        variables_to_drop = []
        region_variables = list(region.keys())
        for variable_name, variable in x.variables.items():
            if any(
                region_variable not in variable.dims
                for region_variable in region_variables
            ):
                variables_to_drop.append(variable_name)

        logger.info(
            f"Dropping variables before Zarr region write: {variables_to_drop=}"
        )
        x = x.drop_vars(variables_to_drop)

        x.to_zarr(url_or_path, region=region, mode="a", compute=True)
    else:
        x.to_zarr(url_or_path, mode="w", compute=True)
    logger.info(f"Written {url_or_path}")