elapid.utils¶
Backend helper and convenience functions.
check_raster_alignment(raster_paths)
¶
Checks whether the extent, resolution and projection of multiple rasters match exactly.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
raster_paths |
list |
a list of raster covariate paths |
required |
Returns:
Type | Description |
---|---|
bool |
whether all rasters align |
Source code in elapid/utils.py
def check_raster_alignment(raster_paths: list) -> bool:
"""Checks whether the extent, resolution and projection of multiple rasters match exactly.
Args:
raster_paths: a list of raster covariate paths
Returns:
whether all rasters align
"""
first = raster_paths[0]
rest = raster_paths[1:]
with rio.open(first) as src:
res = src.res
bounds = src.bounds
transform = src.transform
for path in rest:
with rio.open(path) as src:
if src.res != res or src.bounds != bounds or src.transform != transform:
return False
return True
count_raster_bands(raster_paths)
¶
Returns the total number of bands from a list of rasters.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
raster_paths |
list |
List of raster data file paths. |
required |
Returns:
Type | Description |
---|---|
n_bands |
total band count. |
Source code in elapid/utils.py
def count_raster_bands(raster_paths: list) -> int:
"""Returns the total number of bands from a list of rasters.
Args:
raster_paths: List of raster data file paths.
Returns:
n_bands: total band count.
"""
n_bands = 0
for path in raster_paths:
with rio.open(path) as src:
n_bands += src.count
return n_bands
create_output_raster_profile(raster_paths, template_idx=0, windowed=True, nodata=None, count=1, compress=None, driver='GTiff', bigtiff=True, dtype='float32')
¶
Gets parameters for windowed reading/writing to output rasters.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
raster_paths |
list |
raster paths of covariates to apply the model to |
required |
template_idx |
int |
index of the raster file to use as a template. template_idx=0 sets the first raster as template |
0 |
windowed |
bool |
perform a block-by-block data read. slower, but reduces memory use |
True |
nodata |
Union[int, float] |
output nodata value |
None |
count |
int |
number of bands in the prediction output |
1 |
output_driver |
output raster file format (from rasterio.drivers.raster_driver_extensions()) |
required | |
compress |
str |
compression type to apply to the output file |
None |
bigtiff |
bool |
specify the output file as a bigtiff (for rasters > 2GB) |
True |
dtype |
str |
rasterio data type string |
'float32' |
Returns:
Type | Description |
---|---|
(windows, profile) |
an iterable and a dictionary for the window reads and the raster profile |
Source code in elapid/utils.py
def create_output_raster_profile(
raster_paths: list,
template_idx: int = 0,
windowed: bool = True,
nodata: Number = None,
count: int = 1,
compress: str = None,
driver: str = "GTiff",
bigtiff: bool = True,
dtype: str = "float32",
) -> Tuple[Iterable, Dict]:
"""Gets parameters for windowed reading/writing to output rasters.
Args:
raster_paths: raster paths of covariates to apply the model to
template_idx: index of the raster file to use as a template. template_idx=0 sets the first raster as template
windowed: perform a block-by-block data read. slower, but reduces memory use
nodata: output nodata value
count: number of bands in the prediction output
output_driver: output raster file format (from rasterio.drivers.raster_driver_extensions())
compress: compression type to apply to the output file
bigtiff: specify the output file as a bigtiff (for rasters > 2GB)
dtype: rasterio data type string
Returns:
(windows, profile): an iterable and a dictionary for the window reads and the raster profile
"""
with rio.open(raster_paths[template_idx]) as src:
if windowed:
windows = [window for _, window in src.block_windows()]
else:
windows = [rio.windows.Window(0, 0, src.width, src.height)]
dst_profile = src.profile.copy()
dst_profile.update(
count=count,
dtype=dtype,
nodata=nodata,
compress=compress,
driver=driver,
)
if bigtiff and driver == "GTiff":
dst_profile.update(BIGTIFF="YES")
return windows, dst_profile
format_band_labels(raster_paths, labels=None)
¶
Verifies whether a list of band labels matches the band count, or creates labels when none are passed.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
raster_paths |
list |
required |
Source code in elapid/utils.py
def format_band_labels(raster_paths: list, labels: list = None):
"""Verifies whether a list of band labels matches the band count,
or creates labels when none are passed.
Args:
raster_paths:
"""
n_bands = count_raster_bands(raster_paths)
if labels is None:
labels = make_band_labels(n_bands)
n_labels = len(labels)
assert n_labels == n_bands, f"number of band labels ({n_labels}) != n_bands ({n_bands})"
return labels
get_raster_band_indexes(raster_paths)
¶
Counts the number raster bands to index multi-source, multi-band covariates.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
raster_paths |
list |
a list of raster paths |
required |
Returns:
Type | Description |
---|---|
(nbands, band_idx) |
int and list of the total number of bands and the 0-based start/stop band index for each path |
Source code in elapid/utils.py
def get_raster_band_indexes(raster_paths: list) -> Tuple[int, list]:
"""Counts the number raster bands to index multi-source, multi-band covariates.
Args:
raster_paths: a list of raster paths
Returns:
(nbands, band_idx): int and list of the total number of bands and the 0-based start/stop
band index for each path
"""
nbands = 0
band_idx = [0]
for i, raster_path in enumerate(raster_paths):
with rio.open(raster_path) as src:
nbands += src.count
band_idx.append(band_idx[i] + src.count)
return nbands, band_idx
get_tqdm()
¶
Returns a context-appropriate tqdm progress tracking function.
Determines the appropriate tqdm based on the user context, as behavior changes inside/outside of jupyter notebooks.
Returns:
Type | Description |
---|---|
tqdm |
the context-specific tqdm module |
Source code in elapid/utils.py
def get_tqdm() -> Callable:
"""Returns a context-appropriate tqdm progress tracking function.
Determines the appropriate tqdm based on the user context, as
behavior changes inside/outside of jupyter notebooks.
Returns:
tqdm: the context-specific tqdm module
"""
if in_notebook():
from tqdm.notebook import tqdm
else:
from tqdm import tqdm
return tqdm
in_notebook()
¶
Evaluate whether the module is currently running in a jupyter notebook.
Source code in elapid/utils.py
def in_notebook() -> bool:
"""Evaluate whether the module is currently running in a jupyter notebook."""
return "ipykernel" in sys.modules
load_object(path, compressed=True)
¶
Reads a python object into memory that's been saved to disk.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
path |
str |
the file path of the object to load |
required |
compressed |
bool |
flag to specify whether the file was compressed prior to saving |
True |
Returns:
Type | Description |
---|---|
obj |
the python object that has been saved (e.g., a MaxentModel() instance) |
Source code in elapid/utils.py
def load_object(path: str, compressed: bool = True) -> Any:
"""Reads a python object into memory that's been saved to disk.
Args:
path: the file path of the object to load
compressed: flag to specify whether the file was compressed prior to saving
Returns:
obj: the python object that has been saved (e.g., a MaxentModel() instance)
"""
with open(path, "rb") as f:
obj = f.read()
if compressed:
obj = gzip.decompress(obj)
return pickle.loads(obj)
load_sample_data(name='bradypus')
¶
Loads example species presence/background and covariate data.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
name |
str |
the sample dataset to load. options currently include ["bradypus"], from the R 'maxnet' package |
'bradypus' |
Returns:
Type | Description |
---|---|
(x, y) |
a tuple of dataframes containing covariate and response data, respectively |
Source code in elapid/utils.py
def load_sample_data(name: str = "bradypus") -> Tuple[pd.DataFrame, pd.DataFrame]:
"""Loads example species presence/background and covariate data.
Args:
name: the sample dataset to load. options currently include ["bradypus"], from the R 'maxnet' package
Returns:
(x, y): a tuple of dataframes containing covariate and response data, respectively
"""
assert name.lower() in ["bradypus"], "Invalid sample data requested"
package_path = os.path.realpath(__file__)
package_dir = os.path.dirname(package_path)
if name.lower() == "bradypus":
file_path = os.path.join(package_dir, "data", "bradypus.csv.gz")
df = pd.read_csv(file_path, compression="gzip").astype("int64")
y = df["presence"].astype("int8")
x = df.drop(columns=["presence"]).astype({"ecoreg": "category"})
return x, y
make_band_labels(n_bands)
¶
Creates a list of band names to assign as dataframe columns.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n_bands |
int |
total number of raster bands to create labels for. |
required |
Returns:
Type | Description |
---|---|
labels |
list of column names. |
Source code in elapid/utils.py
def make_band_labels(n_bands: int) -> list:
"""Creates a list of band names to assign as dataframe columns.
Args:
n_bands: total number of raster bands to create labels for.
Returns:
labels: list of column names.
"""
n_zeros = n_digits(n_bands)
labels = ["b{band_number:0{n_zeros}d}".format(band_number=i + 1, n_zeros=n_zeros) for i in range(n_bands)]
return labels
n_digits(number)
¶
Counts the number of significant integer digits of a number.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
number |
Union[int, float] |
the number to evaluate. |
required |
Returns:
Type | Description |
---|---|
order |
number of digits required to represent a number |
Source code in elapid/utils.py
def n_digits(number: Number) -> int:
"""Counts the number of significant integer digits of a number.
Args:
number: the number to evaluate.
Returns:
order: number of digits required to represent a number
"""
if number == 0:
order = 1
else:
order = np.floor(np.log10(number)).astype(int) + 1
return order
repeat_array(x, length=1, axis=0)
¶
Repeats a 1D numpy array along an axis to an arbitrary length
Parameters:
Name | Type | Description | Default |
---|---|---|---|
x |
<built-in function array> |
the n-dimensional array to repeat |
required |
length |
int |
the number of times to repeat the array |
1 |
axis |
int |
the axis along which to repeat the array (valid values include 0 to n+1) |
0 |
Returns:
Type | Description |
---|---|
ndarray |
An n+1 dimensional numpy array |
Source code in elapid/utils.py
def repeat_array(x: np.array, length: int = 1, axis: int = 0) -> np.ndarray:
"""Repeats a 1D numpy array along an axis to an arbitrary length
Args:
x: the n-dimensional array to repeat
length: the number of times to repeat the array
axis: the axis along which to repeat the array (valid values include 0 to n+1)
Returns:
An n+1 dimensional numpy array
"""
return np.expand_dims(x, axis=axis).repeat(length, axis=axis)
save_object(obj, path, compress=True)
¶
Writes a python object to disk for later access.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
obj |
object |
a python object or variable to be saved (e.g., a MaxentModel() instance) |
required |
path |
str |
the output file path |
required |
Source code in elapid/utils.py
def save_object(obj: object, path: str, compress: bool = True) -> None:
"""Writes a python object to disk for later access.
Args:
obj: a python object or variable to be saved (e.g., a MaxentModel() instance)
path: the output file path
"""
obj = pickle.dumps(obj)
if compress:
obj = gzip.compress(obj)
with open(path, "wb") as f:
f.write(obj)