Skip to content

elapid.utils

Backend helper and convenience functions.

check_raster_alignment(raster_paths)

Checks whether the extent, resolution and projection of multiple rasters match exactly.

Parameters:

Name Type Description Default
raster_paths list

a list of raster covariate paths

required

Returns:

Type Description
bool

whether all rasters align

Source code in elapid/utils.py
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
def check_raster_alignment(raster_paths: list) -> bool:
    """Checks whether the extent, resolution and projection of multiple rasters match exactly.

    Args:
        raster_paths: a list of raster covariate paths

    Returns:
        whether all rasters align
    """
    first = raster_paths[0]
    rest = raster_paths[1:]

    with rio.open(first) as src:
        res = src.res
        bounds = src.bounds
        transform = src.transform

    for path in rest:
        with rio.open(path) as src:
            if src.res != res or src.bounds != bounds or src.transform != transform:
                return False

    return True

count_raster_bands(raster_paths)

Returns the total number of bands from a list of rasters.

Parameters:

Name Type Description Default
raster_paths list

List of raster data file paths.

required

Returns:

Name Type Description
n_bands int

total band count.

Source code in elapid/utils.py
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
def count_raster_bands(raster_paths: list) -> int:
    """Returns the total number of bands from a list of rasters.

    Args:
        raster_paths: List of raster data file paths.

    Returns:
        n_bands: total band count.
    """
    n_bands = 0
    for path in raster_paths:
        with rio.open(path) as src:
            n_bands += src.count

    return n_bands

create_output_raster_profile(raster_paths, template_idx=0, windowed=True, nodata=None, count=1, compress=None, driver='GTiff', bigtiff=True, dtype='float32')

Gets parameters for windowed reading/writing to output rasters.

Parameters:

Name Type Description Default
raster_paths list

raster paths of covariates to apply the model to

required
template_idx int

index of the raster file to use as a template. template_idx=0 sets the first raster as template

0
windowed bool

perform a block-by-block data read. slower, but reduces memory use

True
nodata Number

output nodata value

None
count int

number of bands in the prediction output

1
driver str

output raster file format (from rasterio.drivers.raster_driver_extensions())

'GTiff'
compress str

compression type to apply to the output file

None
bigtiff bool

specify the output file as a bigtiff (for rasters > 2GB)

True
dtype str

rasterio data type string

'float32'

Returns:

Type Description
windows, profile

an iterable and a dictionary for the window reads and the raster profile

Source code in elapid/utils.py
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
def create_output_raster_profile(
    raster_paths: list,
    template_idx: int = 0,
    windowed: bool = True,
    nodata: Number = None,
    count: int = 1,
    compress: str = None,
    driver: str = "GTiff",
    bigtiff: bool = True,
    dtype: str = "float32",
) -> Tuple[Iterable, Dict]:
    """Gets parameters for windowed reading/writing to output rasters.

    Args:
        raster_paths: raster paths of covariates to apply the model to
        template_idx: index of the raster file to use as a template. template_idx=0 sets the first raster as template
        windowed: perform a block-by-block data read. slower, but reduces memory use
        nodata: output nodata value
        count: number of bands in the prediction output
        driver: output raster file format (from rasterio.drivers.raster_driver_extensions())
        compress: compression type to apply to the output file
        bigtiff: specify the output file as a bigtiff (for rasters > 2GB)
        dtype: rasterio data type string

    Returns:
        (windows, profile): an iterable and a dictionary for the window reads and the raster profile
    """
    with rio.open(raster_paths[template_idx]) as src:
        if windowed:
            windows = [window for _, window in src.block_windows()]
        else:
            windows = [rio.windows.Window(0, 0, src.width, src.height)]

        dst_profile = src.profile.copy()
        dst_profile.update(
            count=count,
            dtype=dtype,
            nodata=nodata,
            compress=compress,
            driver=driver,
        )
        if bigtiff and driver == "GTiff":
            dst_profile.update(BIGTIFF="YES")

    return windows, dst_profile

download_sample_data(dir, name='ariolimax', quiet=False)

Downloads sample raster and vector files from a web server.

Parameters:

Name Type Description Default
dir str

the directory to download the data to

required
name str

the sample dataset to download. options include: "ariolimax" button's banana slug dataset

'ariolimax'
quiet bool

disable the progress bar

False

Returns:

Type Description
None

None. Downloads files to dir

Source code in elapid/utils.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
def download_sample_data(dir: str, name: str = "ariolimax", quiet: bool = False) -> None:
    """Downloads sample raster and vector files from a web server.

    Args:
        dir: the directory to download the data to
        name: the sample dataset to download. options include:
            "ariolimax" button's banana slug dataset
        quiet: disable the progress bar

    Returns:
        None. Downloads files to `dir`
    """
    name = str.lower(name)
    https = "https://earth-chris.github.io/images/research"

    if name == "ariolimax":
        fnames = [
            "ariolimax-ca.gpkg",
            "ca-cloudcover-mean.tif",
            "ca-cloudcover-stdv.tif",
            "ca-leafareaindex-mean.tif",
            "ca-leafareaindex-stdv.tif",
            "ca-surfacetemp-mean.tif",
            "ca-surfacetemp-stdv.tif",
        ]

    try:
        os.mkdir(dir)
    except FileExistsError:
        pass

    tqdm = get_tqdm()
    for fname in tqdm(fnames, disable=quiet, **tqdm_opts):
        request.urlretrieve(f"{https}/{fname}", os.path.join(dir, fname))

format_band_labels(raster_paths, labels=None)

Verify the number of labels matches the band count, create labels if none passed.

Parameters:

Name Type Description Default
raster_paths list

count the total number of bands in these rasters.

required
labels List[str]

a list of band labels.

None

Returns:

Name Type Description
labels

creates default band labels if none are passed.

Source code in elapid/utils.py
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
def format_band_labels(raster_paths: list, labels: List[str] = None):
    """Verify the number of labels matches the band count, create labels if none passed.

    Args:
        raster_paths: count the total number of bands in these rasters.
        labels: a list of band labels.

    Returns:
        labels: creates default band labels if none are passed.
    """
    n_bands = count_raster_bands(raster_paths)

    if labels is None:
        labels = make_band_labels(n_bands)

    n_labels = len(labels)
    assert n_labels == n_bands, f"number of band labels ({n_labels}) != n_bands ({n_bands})"

    return labels.copy()

get_raster_band_indexes(raster_paths)

Counts the number raster bands to index multi-source, multi-band covariates.

Parameters:

Name Type Description Default
raster_paths list

a list of raster paths

required

Returns:

Type Description
nbands, band_idx

int and list of the total number of bands and the 0-based start/stop band index for each path

Source code in elapid/utils.py
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
def get_raster_band_indexes(raster_paths: list) -> Tuple[int, list]:
    """Counts the number raster bands to index multi-source, multi-band covariates.

    Args:
        raster_paths: a list of raster paths

    Returns:
        (nbands, band_idx): int and list of the total number of bands and the 0-based start/stop
            band index for each path
    """
    nbands = 0
    band_idx = [0]
    for i, raster_path in enumerate(raster_paths):
        with rio.open(raster_path) as src:
            nbands += src.count
            band_idx.append(band_idx[i] + src.count)

    return nbands, band_idx

get_tqdm()

Returns a context-appropriate tqdm progress tracking function.

Determines the appropriate tqdm based on the user context, as behavior changes inside/outside of jupyter notebooks.

Returns:

Name Type Description
tqdm Callable

the context-specific tqdm module

Source code in elapid/utils.py
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
def get_tqdm() -> Callable:
    """Returns a context-appropriate tqdm progress tracking function.

    Determines the appropriate tqdm based on the user context, as
        behavior changes inside/outside of jupyter notebooks.

    Returns:
        tqdm: the context-specific tqdm module
    """
    if in_notebook():
        from tqdm.notebook import tqdm
    else:
        from tqdm import tqdm

    return tqdm

in_notebook()

Evaluate whether the module is currently running in a jupyter notebook.

Source code in elapid/utils.py
240
241
242
def in_notebook() -> bool:
    """Evaluate whether the module is currently running in a jupyter notebook."""
    return "ipykernel" in sys.modules

load_object(path, compressed=True)

Reads a python object into memory that's been saved to disk.

Parameters:

Name Type Description Default
path str

the file path of the object to load

required
compressed bool

flag to specify whether the file was compressed prior to saving

True

Returns:

Name Type Description
obj Any

the python object that has been saved (e.g., a MaxentModel() instance)

Source code in elapid/utils.py
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
def load_object(path: str, compressed: bool = True) -> Any:
    """Reads a python object into memory that's been saved to disk.

    Args:
        path: the file path of the object to load
        compressed: flag to specify whether the file was compressed prior to saving

    Returns:
        obj: the python object that has been saved (e.g., a MaxentModel() instance)
    """
    with open(path, "rb") as f:
        obj = f.read()

    if compressed:
        obj = gzip.decompress(obj)

    return pickle.loads(obj)

load_sample_data(name='ariolimax', drop_geometry=False)

Loads example species presence/background and covariate data.

Parameters:

Name Type Description Default
name str

the sample dataset to load. options include: "ariolimax" button's banana slug dataset "bradypus" from the R 'maxnet' package

'ariolimax'

Returns:

Type Description
x, y

a tuple of dataframes containing covariate and response data, respectively

Source code in elapid/utils.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def load_sample_data(name: str = "ariolimax", drop_geometry: bool = False) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """Loads example species presence/background and covariate data.

    Args:
        name: the sample dataset to load. options include:
            "ariolimax" button's banana slug dataset
            "bradypus" from the R 'maxnet' package

    Returns:
        (x, y): a tuple of dataframes containing covariate and response data, respectively
    """
    name = str.lower(name)
    assert name in ["bradypus", "ariolimax"], "Invalid sample data requested"

    package_path = os.path.realpath(__file__)
    package_dir = os.path.dirname(package_path)

    if name == "bradypus":
        file_path = os.path.join(package_dir, "data", "bradypus.csv.gz")
        assert os.path.exists(file_path), "sample data missing from install path."
        df = pd.read_csv(file_path, compression="gzip").astype("int64")
        y = df["presence"].astype("int8")
        x = df.drop(columns=["presence"]).astype({"ecoreg": "category"})
        return x, y

    if name == "ariolimax":
        file_path = os.path.join(package_dir, "data", "ariolimax.gpkg")
        assert os.path.exists(file_path), "sample data missing from install path."
        df = gpd.read_file(file_path)
        columns_to_drop = ["presence"]
        if drop_geometry:
            columns_to_drop.append("geometry")
        x = df.drop(columns=columns_to_drop)
        y = df["presence"].astype("int8")
        return x, y

make_band_labels(n_bands)

Creates a list of band names to assign as dataframe columns.

Parameters:

Name Type Description Default
n_bands int

total number of raster bands to create labels for.

required

Returns:

Name Type Description
labels list

list of column names.

Source code in elapid/utils.py
296
297
298
299
300
301
302
303
304
305
306
307
308
def make_band_labels(n_bands: int) -> list:
    """Creates a list of band names to assign as dataframe columns.

    Args:
        n_bands: total number of raster bands to create labels for.

    Returns:
        labels: list of column names.
    """
    n_zeros = n_digits(n_bands)
    labels = ["b{band_number:0{n_zeros}d}".format(band_number=i + 1, n_zeros=n_zeros) for i in range(n_bands)]

    return labels

n_digits(number)

Counts the number of significant integer digits of a number.

Parameters:

Name Type Description Default
number Number

the number to evaluate.

required

Returns:

Name Type Description
order int

number of digits required to represent a number

Source code in elapid/utils.py
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
def n_digits(number: Number) -> int:
    """Counts the number of significant integer digits of a number.

    Args:
        number: the number to evaluate.

    Returns:
        order: number of digits required to represent a number
    """
    if number == 0:
        order = 1
    else:
        order = np.floor(np.log10(number)).astype(int) + 1

    return order

repeat_array(x, length=1, axis=0)

Repeats a 1D numpy array along an axis to an arbitrary length

Parameters:

Name Type Description Default
x np.array

the n-dimensional array to repeat

required
length int

the number of times to repeat the array

1
axis int

the axis along which to repeat the array (valid values include 0 to n+1)

0

Returns:

Type Description
np.ndarray

An n+1 dimensional numpy array

Source code in elapid/utils.py
26
27
28
29
30
31
32
33
34
35
36
37
def repeat_array(x: np.array, length: int = 1, axis: int = 0) -> np.ndarray:
    """Repeats a 1D numpy array along an axis to an arbitrary length

    Args:
        x: the n-dimensional array to repeat
        length: the number of times to repeat the array
        axis: the axis along which to repeat the array (valid values include 0 to n+1)

    Returns:
        An n+1 dimensional numpy array
    """
    return np.expand_dims(x, axis=axis).repeat(length, axis=axis)

save_object(obj, path, compress=True)

Writes a python object to disk for later access.

Parameters:

Name Type Description Default
obj object

a python object or variable to be saved (e.g., a MaxentModel() instance)

required
path str

the output file path

required
Source code in elapid/utils.py
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def save_object(obj: object, path: str, compress: bool = True) -> None:
    """Writes a python object to disk for later access.

    Args:
        obj: a python object or variable to be saved (e.g., a MaxentModel() instance)
        path: the output file path
    """
    obj = pickle.dumps(obj)

    if compress:
        obj = gzip.compress(obj)

    with open(path, "wb") as f:
        f.write(obj)

square_factor(n)

Compute a square form-factor to fit n items.

Parameters:

Name Type Description Default
n int

the number of items to fit into a square.

required

Returns:

Type Description
tuple

(x, y) tuple of the square dimensions.

Source code in elapid/utils.py
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
def square_factor(n: int) -> tuple:
    """Compute a square form-factor to fit `n` items.

    Args:
        n: the number of items to fit into a square.

    Returns:
        (x, y) tuple of the square dimensions.
    """
    val = np.ceil(np.sqrt(n))
    val2 = int(n / val)
    while val2 * val != float(n):
        val -= 1
        val2 = int(n / val)
    return int(val), int(val2)