Skip to content

elapid.models

Classes for training species distribution models.

EnsembleModel

Bases: BaseEstimator, ClassifierMixin

Barebones estimator for ensembling multiple model predictions.

Source code in elapid/models.py
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
class EnsembleModel(BaseEstimator, ClassifierMixin):
    """Barebones estimator for ensembling multiple model predictions."""

    models: list = None
    reducer: str = None

    def __init__(self, models: List[BaseEstimator], reducer: str = EnsembleConfig.reducer):
        """Create a model ensemble from a set of trained models.

        Args:
            models: iterable of models with `.predict()` and/or `.predict_proba()` methods
            reducer: method for reducing/ensembling each model's predictions.
                select from ['mean', 'median', 'mode']
        """
        self.models = models
        self.reducer = reducer

    def reduce(self, preds: List[np.ndarray]) -> np.ndarray:
        """Reduce multiple model predictions into ensemble prediction/probability scores.

        Args:
            preds: list of model predictions from .predict() or .predict_proba()

        Returns:
            array-like of shape (n_samples, n_classes) with model predictions
        """
        reducer = self.reducer.lower()

        if reducer == "mean":
            reduced = np.nanmean(preds, axis=0)

        elif reducer == "median":
            reduced = np.nanmedian(preds, axis=0)

        elif reducer == "mode":
            try:
                summary = scistats.mode(preds, axis=0, nan_policy="omit", keepdims=False)
                reduced = summary.mode
            except TypeError:
                summary = scistats.mode(preds, axis=0, nan_policy="omit")
                reduced = np.squeeze(summary.mode)

        return reduced

    def predict(self, x: ArrayLike) -> np.ndarray:
        """Applies models to a set of covariates or features. Requires each model has been fit.

        Args:
            x: array-like of shape (n_samples, n_features) with covariate data

        Returns:
            array-like of shape (n_samples,) with model predictions
        """
        preds = [model.predict(x) for model in self.models]
        ensemble = self.reduce(preds)
        return ensemble

    def predict_proba(self, x: ArrayLike) -> np.ndarray:
        """Compute prediction probability scores for each class.

        Args:
            x: array-like of shape (n_samples, n_features) with covariate data

        Returns:
            array-like of shape (n_samples, n_classes) with model predictions
        """
        probas = [model.predict_proba(x) for model in self.models]
        ensemble = self.reduce(probas)
        return ensemble

__init__(models, reducer=EnsembleConfig.reducer)

Create a model ensemble from a set of trained models.

Parameters:

Name Type Description Default
models List[BaseEstimator]

iterable of models with .predict() and/or .predict_proba() methods

required
reducer str

method for reducing/ensembling each model's predictions. select from ['mean', 'median', 'mode']

EnsembleConfig.reducer
Source code in elapid/models.py
477
478
479
480
481
482
483
484
485
486
def __init__(self, models: List[BaseEstimator], reducer: str = EnsembleConfig.reducer):
    """Create a model ensemble from a set of trained models.

    Args:
        models: iterable of models with `.predict()` and/or `.predict_proba()` methods
        reducer: method for reducing/ensembling each model's predictions.
            select from ['mean', 'median', 'mode']
    """
    self.models = models
    self.reducer = reducer

predict(x)

Applies models to a set of covariates or features. Requires each model has been fit.

Parameters:

Name Type Description Default
x ArrayLike

array-like of shape (n_samples, n_features) with covariate data

required

Returns:

Type Description
np.ndarray

array-like of shape (n_samples,) with model predictions

Source code in elapid/models.py
515
516
517
518
519
520
521
522
523
524
525
526
def predict(self, x: ArrayLike) -> np.ndarray:
    """Applies models to a set of covariates or features. Requires each model has been fit.

    Args:
        x: array-like of shape (n_samples, n_features) with covariate data

    Returns:
        array-like of shape (n_samples,) with model predictions
    """
    preds = [model.predict(x) for model in self.models]
    ensemble = self.reduce(preds)
    return ensemble

predict_proba(x)

Compute prediction probability scores for each class.

Parameters:

Name Type Description Default
x ArrayLike

array-like of shape (n_samples, n_features) with covariate data

required

Returns:

Type Description
np.ndarray

array-like of shape (n_samples, n_classes) with model predictions

Source code in elapid/models.py
528
529
530
531
532
533
534
535
536
537
538
539
def predict_proba(self, x: ArrayLike) -> np.ndarray:
    """Compute prediction probability scores for each class.

    Args:
        x: array-like of shape (n_samples, n_features) with covariate data

    Returns:
        array-like of shape (n_samples, n_classes) with model predictions
    """
    probas = [model.predict_proba(x) for model in self.models]
    ensemble = self.reduce(probas)
    return ensemble

reduce(preds)

Reduce multiple model predictions into ensemble prediction/probability scores.

Parameters:

Name Type Description Default
preds List[np.ndarray]

list of model predictions from .predict() or .predict_proba()

required

Returns:

Type Description
np.ndarray

array-like of shape (n_samples, n_classes) with model predictions

Source code in elapid/models.py
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
def reduce(self, preds: List[np.ndarray]) -> np.ndarray:
    """Reduce multiple model predictions into ensemble prediction/probability scores.

    Args:
        preds: list of model predictions from .predict() or .predict_proba()

    Returns:
        array-like of shape (n_samples, n_classes) with model predictions
    """
    reducer = self.reducer.lower()

    if reducer == "mean":
        reduced = np.nanmean(preds, axis=0)

    elif reducer == "median":
        reduced = np.nanmedian(preds, axis=0)

    elif reducer == "mode":
        try:
            summary = scistats.mode(preds, axis=0, nan_policy="omit", keepdims=False)
            reduced = summary.mode
        except TypeError:
            summary = scistats.mode(preds, axis=0, nan_policy="omit")
            reduced = np.squeeze(summary.mode)

    return reduced

MaxentModel

Bases: BaseEstimator, ClassifierMixin

Model estimator for Maxent-style species distribution models.

Source code in elapid/models.py
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
class MaxentModel(BaseEstimator, ClassifierMixin):
    """Model estimator for Maxent-style species distribution models."""

    def __init__(
        self,
        feature_types: Union[list, str] = MaxentConfig.feature_types,
        tau: float = MaxentConfig.tau,
        transform: float = MaxentConfig.transform,
        clamp: bool = MaxentConfig.clamp,
        scorer: str = MaxentConfig.scorer,
        beta_multiplier: float = MaxentConfig.beta_multiplier,
        beta_lqp: float = MaxentConfig.beta_lqp,
        beta_hinge: float = MaxentConfig.beta_hinge,
        beta_threshold: float = MaxentConfig.beta_lqp,
        beta_categorical: float = MaxentConfig.beta_categorical,
        n_hinge_features: int = MaxentConfig.n_hinge_features,
        n_threshold_features: int = MaxentConfig.n_threshold_features,
        convergence_tolerance: float = MaxentConfig.tolerance,
        use_lambdas: str = MaxentConfig.use_lambdas,
        n_lambdas: int = MaxentConfig.n_lambdas,
        class_weights: Union[str, float] = MaxentConfig.class_weights,
        n_cpus: int = NCPUS,
        use_sklearn: bool = FORCE_SKLEARN,
    ):
        """Create a maxent model object.

        Args:
            feature_types: maxent feature types to fit. must be in string "lqphta" or
                list ["linear", "quadratic", "product", "hinge", "threshold", "auto"]
            tau: maxent prevalence value for scaling logistic output
            transform: maxent model transformation type. select from
                ["raw", "logistic", "cloglog"].
            clamp: set features to min/max range from training during prediction
            scorer: sklearn scoring function for model training
            beta_multiplier: scaler for all regularization parameters.
                higher values drop more coeffiecients
            beta_lqp: linear, quadratic and product feature regularization scaler
            beta_hinge: hinge feature regularization scaler
            beta_threshold: threshold feature regularization scaler
            beta_categorical: categorical feature regularization scaler
            n_hinge_features: the number of hinge features to fit in feature transformation
            n_threshold_features: the number of thresholds to fit in feature transformation
            convergence_tolerance: model convergence tolerance level
            use_lambdas: guide for which model lambdas to select (either "best" or "last")
            n_lambdas: number of lamba values to fit models with
            class_weights: strategy for weighting presence samples.
                pass "balanced" to compute the ratio based on sample frequency
                or pass a float for the presence:background weight ratio
                the R `maxnet` package uses a value of 100 as default.
                set to None to ignore.
            n_cpus: threads to use during model training
            use_sklearn: force using `sklearn` for fitting logistic regression.
                turned off by default to use `glmnet` for fitting.
                this feature was turned on to support Windows users
                who install the package without a fortran compiler.
        """
        self.feature_types = feature_types
        self.tau = tau
        self.transform = transform
        self.clamp = clamp
        self.scorer = scorer
        self.beta_multiplier = beta_multiplier
        self.beta_hinge = beta_hinge
        self.beta_lqp = beta_lqp
        self.beta_threshold = beta_threshold
        self.beta_categorical = beta_categorical
        self.n_hinge_features = n_hinge_features
        self.n_threshold_features = n_threshold_features
        self.convergence_tolerance = convergence_tolerance
        self.n_cpus = n_cpus
        self.use_lambdas = use_lambdas
        self.n_lambdas = n_lambdas
        self.class_weights = class_weights
        self.use_sklearn = use_sklearn

        # computed during model fitting
        self.initialized_ = False
        self.estimator = None
        self.preprocessor = None
        self.transformer = None
        self.regularization_ = None
        self.lambdas_ = None
        self.beta_scores_ = None
        self.entropy_ = 0.0
        self.alpha_ = 0.0

    def fit(
        self,
        x: ArrayLike,
        y: ArrayLike,
        sample_weight: ArrayLike = None,
        categorical: List[int] = None,
        labels: list = None,
        preprocessor: BaseEstimator = None,
    ) -> None:
        """Trains a maxent model using a set of covariates and presence/background points.

        Args:
            x: array of shape (n_samples, n_features) with covariate data
            y: array of shape (n_samples,) with binary presence/background (1/0) values
            sample_weight: array of weights assigned to each sample with shape (n_samples,).
                this is modified by the `class_weights` model parameter unless
                you set `class_weights=None`.
            categorical: indices for which columns are categorical
            labels: covariate labels. ignored if x is a pandas DataFrame
            preprocessor: an `sklearn` transformer with a .transform() and/or
                a .fit_transform() method. Some examples include a PCA() object or a
                RobustScaler().
        """
        # clear state variables
        self.alpha_ = 0.0
        self.entropy_ = 0.0

        # format the input data
        y = format_occurrence_data(y)

        # apply preprocessing
        if preprocessor is not None:
            self.preprocessor = preprocessor
            try:
                x = self.preprocessor.transform(x)
            except NotFittedError:
                x = self.preprocessor.fit_transform(x)

        # fit the feature transformer
        self.feature_types = validate_feature_types(self.feature_types)
        self.transformer = MaxentFeatureTransformer(
            feature_types=self.feature_types,
            clamp=self.clamp,
            n_hinge_features=self.n_hinge_features,
            n_threshold_features=self.n_threshold_features,
        )
        features = self.transformer.fit_transform(x, categorical=categorical, labels=labels)
        feature_labels = self.transformer.feature_names_

        # compute class weights
        if self.class_weights is not None:
            pbr = len(y) / y.sum() if self.class_weights == "balanced" else self.class_weights
            class_weight = compute_weights(y, pbr=pbr)

            # scale the sample weight
            if sample_weight is None:
                sample_weight = class_weight
            else:
                sample_weight *= class_weight

        # model fitting with sklearn
        if self.use_sklearn:
            C = estimate_C_from_betas(self.beta_multiplier)
            self.initialize_sklearn_model(C)
            self.estimator.fit(features, y, sample_weight=sample_weight)
            self.beta_scores_ = self.estimator.coef_[0]

        # model fitting with glmnet
        else:
            # set feature regularization parameters
            self.regularization_ = compute_regularization(
                y,
                features,
                feature_labels=feature_labels,
                beta_multiplier=self.beta_multiplier,
                beta_lqp=self.beta_lqp,
                beta_threshold=self.beta_threshold,
                beta_hinge=self.beta_hinge,
                beta_categorical=self.beta_categorical,
            )

            # get model lambda scores to initialize the glm
            self.lambdas_ = compute_lambdas(y, sample_weight, self.regularization_, n_lambdas=self.n_lambdas)

            # model fitting
            self.initialize_glmnet_model(lambdas=self.lambdas_)
            self.estimator.fit(
                features,
                y,
                sample_weight=sample_weight,
                relative_penalties=self.regularization_,
            )

            # get the beta values based on which lambda selection method to use
            if self.use_lambdas == "last":
                self.beta_scores_ = self.estimator.coef_path_[0, :, -1]
            elif self.use_lambdas == "best":
                self.beta_scores_ = self.estimator.coef_path_[0, :, self.estimator.lambda_max_inx_]

        # store initialization state
        self.initialized_ = True

        # apply maxent-specific transformations
        class_transform = self.get_params()["transform"]
        self.set_params(transform="raw")
        raw = self.predict(x[y == 0])
        self.set_params(transform=class_transform)

        # alpha is a normalizing constant that ensures that f1(z) integrates (sums) to 1
        self.alpha_ = maxent_alpha(raw)

        # the distance from f(z) is the relative entropy of f1(z) WRT f(z)
        self.entropy_ = maxent_entropy(raw)

        return self

    def predict(self, x: ArrayLike) -> ArrayLike:
        """Apply a model to a set of covariates or features. Requires that a model has been fit.

        Args:
            x: array-like of shape (n_samples, n_features) with covariate data

        Returns:
            predictions: array-like of shape (n_samples,) with model predictions
        """
        if not self.initialized_:
            raise NotFittedError("Model must be fit first")

        # feature transformations
        x = x if self.preprocessor is None else self.preprocessor.transform(x)
        features = x if self.transformer is None else self.transformer.transform(x)

        # apply the model
        engma = np.matmul(features, self.beta_scores_) + self.alpha_

        # scale based on the transform type
        if self.transform == "raw":
            return maxent_raw_transform(engma)

        elif self.transform == "logistic":
            return maxent_logistic_transform(engma, self.entropy_, self.tau)

        elif self.transform == "cloglog":
            return maxent_cloglog_transform(engma, self.entropy_)

    def predict_proba(self, x: ArrayLike) -> ArrayLike:
        """Compute prediction probability scores for the 0/1 classes.

        Args:
            x: array-like of shape (n_samples, n_features) with covariate data

        Returns:
            predictions: array-like of shape (n_samples, 2) with model predictions
        """
        ypred = self.predict(x).reshape(-1, 1)
        predictions = np.hstack((1 - ypred, ypred))

        return predictions

    def fit_predict(
        self,
        x: ArrayLike,
        y: ArrayLike,
        categorical: list = None,
        labels: list = None,
        preprocessor: BaseEstimator = None,
    ) -> ArrayLike:
        """Trains and applies a model to x/y data.

        Args:
            x: array-like of shape (n_samples, n_features) with covariate data
            y: array-like of shape (n_samples,) with binary presence/background (1/0) values
            categorical: column indices indicating which columns are categorical
            labels: Covariate labels. Ignored if x is a pandas DataFrame
            preprocessor: an `sklearn` transformer with a .transform() and/or
                a .fit_transform() method. Some examples include a PCA() object or a
                RobustScaler().

        Returns:
            predictions: Array-like of shape (n_samples,) with model predictions
        """
        self.fit(x, y, categorical=categorical, labels=labels, preprocessor=preprocessor)
        predictions = self.predict(x)

        return predictions

    def initialize_glmnet_model(
        self,
        lambdas: np.array,
        alpha: float = 1,
        standardize: bool = False,
        fit_intercept: bool = True,
    ) -> None:
        """Creates the Logistic Regression with elastic net penalty model object.

        Args:
            lambdas: array of model lambda values. get from elapid.features.compute_lambdas()
            alpha: elasticnet mixing parameter. alpha=1 for lasso, alpha=0 for ridge
            standardize: specify coefficient normalization
            fit_intercept: include an intercept parameter
        """
        self.estimator = LogitNet(
            alpha=alpha,
            lambda_path=lambdas,
            standardize=standardize,
            fit_intercept=fit_intercept,
            scoring=self.scorer,
            n_jobs=self.n_cpus,
            tol=self.convergence_tolerance,
        )

    def initialize_sklearn_model(self, C: float, fit_intercept: bool = True) -> None:
        """Creates an sklearn Logisticregression estimator with L1 penalties.

        Args:
            C: the regularization parameter
            fit_intercept: include an intercept parameter
        """
        self.estimator = LogisticRegression(
            C=C,
            fit_intercept=fit_intercept,
            penalty="l1",
            solver="liblinear",
            tol=self.convergence_tolerance,
            max_iter=self.n_lambdas,
        )

__init__(feature_types=MaxentConfig.feature_types, tau=MaxentConfig.tau, transform=MaxentConfig.transform, clamp=MaxentConfig.clamp, scorer=MaxentConfig.scorer, beta_multiplier=MaxentConfig.beta_multiplier, beta_lqp=MaxentConfig.beta_lqp, beta_hinge=MaxentConfig.beta_hinge, beta_threshold=MaxentConfig.beta_lqp, beta_categorical=MaxentConfig.beta_categorical, n_hinge_features=MaxentConfig.n_hinge_features, n_threshold_features=MaxentConfig.n_threshold_features, convergence_tolerance=MaxentConfig.tolerance, use_lambdas=MaxentConfig.use_lambdas, n_lambdas=MaxentConfig.n_lambdas, class_weights=MaxentConfig.class_weights, n_cpus=NCPUS, use_sklearn=FORCE_SKLEARN)

Create a maxent model object.

Parameters:

Name Type Description Default
feature_types Union[list, str]

maxent feature types to fit. must be in string "lqphta" or list ["linear", "quadratic", "product", "hinge", "threshold", "auto"]

MaxentConfig.feature_types
tau float

maxent prevalence value for scaling logistic output

MaxentConfig.tau
transform float

maxent model transformation type. select from ["raw", "logistic", "cloglog"].

MaxentConfig.transform
clamp bool

set features to min/max range from training during prediction

MaxentConfig.clamp
scorer str

sklearn scoring function for model training

MaxentConfig.scorer
beta_multiplier float

scaler for all regularization parameters. higher values drop more coeffiecients

MaxentConfig.beta_multiplier
beta_lqp float

linear, quadratic and product feature regularization scaler

MaxentConfig.beta_lqp
beta_hinge float

hinge feature regularization scaler

MaxentConfig.beta_hinge
beta_threshold float

threshold feature regularization scaler

MaxentConfig.beta_lqp
beta_categorical float

categorical feature regularization scaler

MaxentConfig.beta_categorical
n_hinge_features int

the number of hinge features to fit in feature transformation

MaxentConfig.n_hinge_features
n_threshold_features int

the number of thresholds to fit in feature transformation

MaxentConfig.n_threshold_features
convergence_tolerance float

model convergence tolerance level

MaxentConfig.tolerance
use_lambdas str

guide for which model lambdas to select (either "best" or "last")

MaxentConfig.use_lambdas
n_lambdas int

number of lamba values to fit models with

MaxentConfig.n_lambdas
class_weights Union[str, float]

strategy for weighting presence samples. pass "balanced" to compute the ratio based on sample frequency or pass a float for the presence:background weight ratio the R maxnet package uses a value of 100 as default. set to None to ignore.

MaxentConfig.class_weights
n_cpus int

threads to use during model training

NCPUS
use_sklearn bool

force using sklearn for fitting logistic regression. turned off by default to use glmnet for fitting. this feature was turned on to support Windows users who install the package without a fortran compiler.

FORCE_SKLEARN
Source code in elapid/models.py
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def __init__(
    self,
    feature_types: Union[list, str] = MaxentConfig.feature_types,
    tau: float = MaxentConfig.tau,
    transform: float = MaxentConfig.transform,
    clamp: bool = MaxentConfig.clamp,
    scorer: str = MaxentConfig.scorer,
    beta_multiplier: float = MaxentConfig.beta_multiplier,
    beta_lqp: float = MaxentConfig.beta_lqp,
    beta_hinge: float = MaxentConfig.beta_hinge,
    beta_threshold: float = MaxentConfig.beta_lqp,
    beta_categorical: float = MaxentConfig.beta_categorical,
    n_hinge_features: int = MaxentConfig.n_hinge_features,
    n_threshold_features: int = MaxentConfig.n_threshold_features,
    convergence_tolerance: float = MaxentConfig.tolerance,
    use_lambdas: str = MaxentConfig.use_lambdas,
    n_lambdas: int = MaxentConfig.n_lambdas,
    class_weights: Union[str, float] = MaxentConfig.class_weights,
    n_cpus: int = NCPUS,
    use_sklearn: bool = FORCE_SKLEARN,
):
    """Create a maxent model object.

    Args:
        feature_types: maxent feature types to fit. must be in string "lqphta" or
            list ["linear", "quadratic", "product", "hinge", "threshold", "auto"]
        tau: maxent prevalence value for scaling logistic output
        transform: maxent model transformation type. select from
            ["raw", "logistic", "cloglog"].
        clamp: set features to min/max range from training during prediction
        scorer: sklearn scoring function for model training
        beta_multiplier: scaler for all regularization parameters.
            higher values drop more coeffiecients
        beta_lqp: linear, quadratic and product feature regularization scaler
        beta_hinge: hinge feature regularization scaler
        beta_threshold: threshold feature regularization scaler
        beta_categorical: categorical feature regularization scaler
        n_hinge_features: the number of hinge features to fit in feature transformation
        n_threshold_features: the number of thresholds to fit in feature transformation
        convergence_tolerance: model convergence tolerance level
        use_lambdas: guide for which model lambdas to select (either "best" or "last")
        n_lambdas: number of lamba values to fit models with
        class_weights: strategy for weighting presence samples.
            pass "balanced" to compute the ratio based on sample frequency
            or pass a float for the presence:background weight ratio
            the R `maxnet` package uses a value of 100 as default.
            set to None to ignore.
        n_cpus: threads to use during model training
        use_sklearn: force using `sklearn` for fitting logistic regression.
            turned off by default to use `glmnet` for fitting.
            this feature was turned on to support Windows users
            who install the package without a fortran compiler.
    """
    self.feature_types = feature_types
    self.tau = tau
    self.transform = transform
    self.clamp = clamp
    self.scorer = scorer
    self.beta_multiplier = beta_multiplier
    self.beta_hinge = beta_hinge
    self.beta_lqp = beta_lqp
    self.beta_threshold = beta_threshold
    self.beta_categorical = beta_categorical
    self.n_hinge_features = n_hinge_features
    self.n_threshold_features = n_threshold_features
    self.convergence_tolerance = convergence_tolerance
    self.n_cpus = n_cpus
    self.use_lambdas = use_lambdas
    self.n_lambdas = n_lambdas
    self.class_weights = class_weights
    self.use_sklearn = use_sklearn

    # computed during model fitting
    self.initialized_ = False
    self.estimator = None
    self.preprocessor = None
    self.transformer = None
    self.regularization_ = None
    self.lambdas_ = None
    self.beta_scores_ = None
    self.entropy_ = 0.0
    self.alpha_ = 0.0

fit(x, y, sample_weight=None, categorical=None, labels=None, preprocessor=None)

Trains a maxent model using a set of covariates and presence/background points.

Parameters:

Name Type Description Default
x ArrayLike

array of shape (n_samples, n_features) with covariate data

required
y ArrayLike

array of shape (n_samples,) with binary presence/background (1/0) values

required
sample_weight ArrayLike

array of weights assigned to each sample with shape (n_samples,). this is modified by the class_weights model parameter unless you set class_weights=None.

None
categorical List[int]

indices for which columns are categorical

None
labels list

covariate labels. ignored if x is a pandas DataFrame

None
preprocessor BaseEstimator

an sklearn transformer with a .transform() and/or a .fit_transform() method. Some examples include a PCA() object or a RobustScaler().

None
Source code in elapid/models.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
def fit(
    self,
    x: ArrayLike,
    y: ArrayLike,
    sample_weight: ArrayLike = None,
    categorical: List[int] = None,
    labels: list = None,
    preprocessor: BaseEstimator = None,
) -> None:
    """Trains a maxent model using a set of covariates and presence/background points.

    Args:
        x: array of shape (n_samples, n_features) with covariate data
        y: array of shape (n_samples,) with binary presence/background (1/0) values
        sample_weight: array of weights assigned to each sample with shape (n_samples,).
            this is modified by the `class_weights` model parameter unless
            you set `class_weights=None`.
        categorical: indices for which columns are categorical
        labels: covariate labels. ignored if x is a pandas DataFrame
        preprocessor: an `sklearn` transformer with a .transform() and/or
            a .fit_transform() method. Some examples include a PCA() object or a
            RobustScaler().
    """
    # clear state variables
    self.alpha_ = 0.0
    self.entropy_ = 0.0

    # format the input data
    y = format_occurrence_data(y)

    # apply preprocessing
    if preprocessor is not None:
        self.preprocessor = preprocessor
        try:
            x = self.preprocessor.transform(x)
        except NotFittedError:
            x = self.preprocessor.fit_transform(x)

    # fit the feature transformer
    self.feature_types = validate_feature_types(self.feature_types)
    self.transformer = MaxentFeatureTransformer(
        feature_types=self.feature_types,
        clamp=self.clamp,
        n_hinge_features=self.n_hinge_features,
        n_threshold_features=self.n_threshold_features,
    )
    features = self.transformer.fit_transform(x, categorical=categorical, labels=labels)
    feature_labels = self.transformer.feature_names_

    # compute class weights
    if self.class_weights is not None:
        pbr = len(y) / y.sum() if self.class_weights == "balanced" else self.class_weights
        class_weight = compute_weights(y, pbr=pbr)

        # scale the sample weight
        if sample_weight is None:
            sample_weight = class_weight
        else:
            sample_weight *= class_weight

    # model fitting with sklearn
    if self.use_sklearn:
        C = estimate_C_from_betas(self.beta_multiplier)
        self.initialize_sklearn_model(C)
        self.estimator.fit(features, y, sample_weight=sample_weight)
        self.beta_scores_ = self.estimator.coef_[0]

    # model fitting with glmnet
    else:
        # set feature regularization parameters
        self.regularization_ = compute_regularization(
            y,
            features,
            feature_labels=feature_labels,
            beta_multiplier=self.beta_multiplier,
            beta_lqp=self.beta_lqp,
            beta_threshold=self.beta_threshold,
            beta_hinge=self.beta_hinge,
            beta_categorical=self.beta_categorical,
        )

        # get model lambda scores to initialize the glm
        self.lambdas_ = compute_lambdas(y, sample_weight, self.regularization_, n_lambdas=self.n_lambdas)

        # model fitting
        self.initialize_glmnet_model(lambdas=self.lambdas_)
        self.estimator.fit(
            features,
            y,
            sample_weight=sample_weight,
            relative_penalties=self.regularization_,
        )

        # get the beta values based on which lambda selection method to use
        if self.use_lambdas == "last":
            self.beta_scores_ = self.estimator.coef_path_[0, :, -1]
        elif self.use_lambdas == "best":
            self.beta_scores_ = self.estimator.coef_path_[0, :, self.estimator.lambda_max_inx_]

    # store initialization state
    self.initialized_ = True

    # apply maxent-specific transformations
    class_transform = self.get_params()["transform"]
    self.set_params(transform="raw")
    raw = self.predict(x[y == 0])
    self.set_params(transform=class_transform)

    # alpha is a normalizing constant that ensures that f1(z) integrates (sums) to 1
    self.alpha_ = maxent_alpha(raw)

    # the distance from f(z) is the relative entropy of f1(z) WRT f(z)
    self.entropy_ = maxent_entropy(raw)

    return self

fit_predict(x, y, categorical=None, labels=None, preprocessor=None)

Trains and applies a model to x/y data.

Parameters:

Name Type Description Default
x ArrayLike

array-like of shape (n_samples, n_features) with covariate data

required
y ArrayLike

array-like of shape (n_samples,) with binary presence/background (1/0) values

required
categorical list

column indices indicating which columns are categorical

None
labels list

Covariate labels. Ignored if x is a pandas DataFrame

None
preprocessor BaseEstimator

an sklearn transformer with a .transform() and/or a .fit_transform() method. Some examples include a PCA() object or a RobustScaler().

None

Returns:

Name Type Description
predictions ArrayLike

Array-like of shape (n_samples,) with model predictions

Source code in elapid/models.py
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
def fit_predict(
    self,
    x: ArrayLike,
    y: ArrayLike,
    categorical: list = None,
    labels: list = None,
    preprocessor: BaseEstimator = None,
) -> ArrayLike:
    """Trains and applies a model to x/y data.

    Args:
        x: array-like of shape (n_samples, n_features) with covariate data
        y: array-like of shape (n_samples,) with binary presence/background (1/0) values
        categorical: column indices indicating which columns are categorical
        labels: Covariate labels. Ignored if x is a pandas DataFrame
        preprocessor: an `sklearn` transformer with a .transform() and/or
            a .fit_transform() method. Some examples include a PCA() object or a
            RobustScaler().

    Returns:
        predictions: Array-like of shape (n_samples,) with model predictions
    """
    self.fit(x, y, categorical=categorical, labels=labels, preprocessor=preprocessor)
    predictions = self.predict(x)

    return predictions

initialize_glmnet_model(lambdas, alpha=1, standardize=False, fit_intercept=True)

Creates the Logistic Regression with elastic net penalty model object.

Parameters:

Name Type Description Default
lambdas np.array

array of model lambda values. get from elapid.features.compute_lambdas()

required
alpha float

elasticnet mixing parameter. alpha=1 for lasso, alpha=0 for ridge

1
standardize bool

specify coefficient normalization

False
fit_intercept bool

include an intercept parameter

True
Source code in elapid/models.py
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
def initialize_glmnet_model(
    self,
    lambdas: np.array,
    alpha: float = 1,
    standardize: bool = False,
    fit_intercept: bool = True,
) -> None:
    """Creates the Logistic Regression with elastic net penalty model object.

    Args:
        lambdas: array of model lambda values. get from elapid.features.compute_lambdas()
        alpha: elasticnet mixing parameter. alpha=1 for lasso, alpha=0 for ridge
        standardize: specify coefficient normalization
        fit_intercept: include an intercept parameter
    """
    self.estimator = LogitNet(
        alpha=alpha,
        lambda_path=lambdas,
        standardize=standardize,
        fit_intercept=fit_intercept,
        scoring=self.scorer,
        n_jobs=self.n_cpus,
        tol=self.convergence_tolerance,
    )

initialize_sklearn_model(C, fit_intercept=True)

Creates an sklearn Logisticregression estimator with L1 penalties.

Parameters:

Name Type Description Default
C float

the regularization parameter

required
fit_intercept bool

include an intercept parameter

True
Source code in elapid/models.py
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
def initialize_sklearn_model(self, C: float, fit_intercept: bool = True) -> None:
    """Creates an sklearn Logisticregression estimator with L1 penalties.

    Args:
        C: the regularization parameter
        fit_intercept: include an intercept parameter
    """
    self.estimator = LogisticRegression(
        C=C,
        fit_intercept=fit_intercept,
        penalty="l1",
        solver="liblinear",
        tol=self.convergence_tolerance,
        max_iter=self.n_lambdas,
    )

predict(x)

Apply a model to a set of covariates or features. Requires that a model has been fit.

Parameters:

Name Type Description Default
x ArrayLike

array-like of shape (n_samples, n_features) with covariate data

required

Returns:

Name Type Description
predictions ArrayLike

array-like of shape (n_samples,) with model predictions

Source code in elapid/models.py
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
def predict(self, x: ArrayLike) -> ArrayLike:
    """Apply a model to a set of covariates or features. Requires that a model has been fit.

    Args:
        x: array-like of shape (n_samples, n_features) with covariate data

    Returns:
        predictions: array-like of shape (n_samples,) with model predictions
    """
    if not self.initialized_:
        raise NotFittedError("Model must be fit first")

    # feature transformations
    x = x if self.preprocessor is None else self.preprocessor.transform(x)
    features = x if self.transformer is None else self.transformer.transform(x)

    # apply the model
    engma = np.matmul(features, self.beta_scores_) + self.alpha_

    # scale based on the transform type
    if self.transform == "raw":
        return maxent_raw_transform(engma)

    elif self.transform == "logistic":
        return maxent_logistic_transform(engma, self.entropy_, self.tau)

    elif self.transform == "cloglog":
        return maxent_cloglog_transform(engma, self.entropy_)

predict_proba(x)

Compute prediction probability scores for the 0/1 classes.

Parameters:

Name Type Description Default
x ArrayLike

array-like of shape (n_samples, n_features) with covariate data

required

Returns:

Name Type Description
predictions ArrayLike

array-like of shape (n_samples, 2) with model predictions

Source code in elapid/models.py
264
265
266
267
268
269
270
271
272
273
274
275
276
def predict_proba(self, x: ArrayLike) -> ArrayLike:
    """Compute prediction probability scores for the 0/1 classes.

    Args:
        x: array-like of shape (n_samples, n_features) with covariate data

    Returns:
        predictions: array-like of shape (n_samples, 2) with model predictions
    """
    ypred = self.predict(x).reshape(-1, 1)
    predictions = np.hstack((1 - ypred, ypred))

    return predictions

NicheEnvelopeModel

Bases: BaseEstimator, ClassifierMixin, FeaturesMixin

Model estimator for niche envelope-style models.

Source code in elapid/models.py
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
class NicheEnvelopeModel(BaseEstimator, ClassifierMixin, FeaturesMixin):
    """Model estimator for niche envelope-style models."""

    def __init__(
        self,
        percentile_range: Tuple[float, float] = NicheEnvelopeConfig.percentile_range,
        overlay: str = NicheEnvelopeConfig.overlay,
    ):
        """Create a niche envelope model estimator.

        Args:
            percentile_range: covariate values within this range are flagged as suitable habitat
                using a narrow range like [10, 90] drops more areas from suitability maps
                while [0, 100] creates an envelope around the full range of observed
                covariates at all y==1 locations.
            overlay: niche envelope overlap type.
                select from ["average", "intersection", "union"]
        """
        self.percentile_range = percentile_range
        self.overlay = overlay
        self.feature_mins_ = None
        self.feature_maxs_ = None
        self.categorical_estimator = None
        self.categorical_ = None
        self.continuous_ = None
        self.categorical_pd_ = None
        self.continuous_pd_ = None
        self.in_categorical_ = None

    def fit(self, x: ArrayLike, y: ArrayLike, categorical: list = None, labels: list = None) -> None:
        """Fits a niche envelope model using a set of covariates and presence/background points.

        Args:
            x: array-like of shape (n_samples, n_features) with covariate data
            y: array-like of shape (n_samples,) with binary presence/background (1/0) values
            categorical: indices for which columns are categorical
            labels: covariate labels. ignored if x is a pandas DataFrame
        """
        # format the input x/y data
        self._format_labels_and_dtypes(x, categorical=categorical, labels=labels)
        con, cat = self._format_covariate_data(x)
        y = format_occurrence_data(y)

        # estimate the feature range of the continuous data
        self.feature_mins_ = np.percentile(con[y == 1], self.percentile_range[0], axis=0)
        self.feature_maxs_ = np.percentile(con[y == 1], self.percentile_range[1], axis=0)

        # one-hot encode the categorical data and label the classes with
        if cat is not None:
            self.categorical_estimator = CategoricalTransformer()
            ohe = self.categorical_estimator.fit_transform(cat)
            self.in_categorical_ = np.any(ohe[y == 1], axis=0)

        return self

    def predict(self, x: ArrayLike) -> np.ndarray:
        """Applies a model to a set of covariates or features. Requires that a model has been fit.

        Args:
            x: array-like of shape (n_samples, n_features) with covariate data

        Returns:
            array-like of shape (n_samples,) with model predictions
        """
        overlay = self.overlay.lower()
        overlay_options = ["average", "intersection", "union"]
        assert overlay in overlay_options, f"overlay must be one of {', '.join(overlay_options)}"

        # format the input data
        con, cat = self._format_covariate_data(x)
        nrows, ncols = x.shape

        # any value within the transformed range is considered within the envelope
        in_range = (con >= self.feature_mins_) * (con <= self.feature_maxs_)

        # map the class locations where the species has been observed
        if cat is not None:
            ohe = self.categorical_estimator.transform(cat)
            should_be_here = ohe[:, self.in_categorical_].any(axis=1).reshape((nrows, 1))
            shouldnt_be_here = (~ohe[:, ~self.in_categorical_].any(axis=1)).reshape((nrows, 1))
            in_range = np.concatenate((in_range, should_be_here, shouldnt_be_here), axis=1)

        # comput envelope based on the overlay method
        if overlay == "average":
            ypred = np.mean(in_range, axis=1, dtype="float32")

        elif overlay == "intersection":
            ypred = np.all(in_range, axis=1).astype("uint8")

        elif overlay == "union":
            ypred = np.any(in_range, axis=1).astype("uint8")

        return ypred

    def predict_proba(self, x: ArrayLike) -> ArrayLike:
        """Compute prediction probability scores for the 0/1 classes.

        Args:
            x: array-like of shape (n_samples, n_features) with covariate data

        Returns:
            predictions: array-like of shape (n_samples, 2) with model predictions
        """
        ypred = self.predict(x).reshape(-1, 1)
        predictions = np.hstack((1 - ypred, ypred))

        return predictions

    def fit_predict(self, x: ArrayLike, y: ArrayLike, categorical: list = None, labels: list = None) -> np.ndarray:
        """Trains and applies a model to x/y data.

        Args:
            x: array-like of shape (n_samples, n_features) with covariate data
            y: array-like of shape (n_samples,) with binary presence/background (1/0) values
            categorical: column indices indicating which columns are categorical
            labels: Covariate labels. Ignored if x is a pandas DataFrame

        Returns:
            array-like of shape (n_samples,) with model predictions
        """
        self.fit(x, y, categorical=categorical, labels=labels)
        return self.predict(x)

__init__(percentile_range=NicheEnvelopeConfig.percentile_range, overlay=NicheEnvelopeConfig.overlay)

Create a niche envelope model estimator.

Parameters:

Name Type Description Default
percentile_range Tuple[float, float]

covariate values within this range are flagged as suitable habitat using a narrow range like [10, 90] drops more areas from suitability maps while [0, 100] creates an envelope around the full range of observed covariates at all y==1 locations.

NicheEnvelopeConfig.percentile_range
overlay str

niche envelope overlap type. select from ["average", "intersection", "union"]

NicheEnvelopeConfig.overlay
Source code in elapid/models.py
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
def __init__(
    self,
    percentile_range: Tuple[float, float] = NicheEnvelopeConfig.percentile_range,
    overlay: str = NicheEnvelopeConfig.overlay,
):
    """Create a niche envelope model estimator.

    Args:
        percentile_range: covariate values within this range are flagged as suitable habitat
            using a narrow range like [10, 90] drops more areas from suitability maps
            while [0, 100] creates an envelope around the full range of observed
            covariates at all y==1 locations.
        overlay: niche envelope overlap type.
            select from ["average", "intersection", "union"]
    """
    self.percentile_range = percentile_range
    self.overlay = overlay
    self.feature_mins_ = None
    self.feature_maxs_ = None
    self.categorical_estimator = None
    self.categorical_ = None
    self.continuous_ = None
    self.categorical_pd_ = None
    self.continuous_pd_ = None
    self.in_categorical_ = None

fit(x, y, categorical=None, labels=None)

Fits a niche envelope model using a set of covariates and presence/background points.

Parameters:

Name Type Description Default
x ArrayLike

array-like of shape (n_samples, n_features) with covariate data

required
y ArrayLike

array-like of shape (n_samples,) with binary presence/background (1/0) values

required
categorical list

indices for which columns are categorical

None
labels list

covariate labels. ignored if x is a pandas DataFrame

None
Source code in elapid/models.py
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
def fit(self, x: ArrayLike, y: ArrayLike, categorical: list = None, labels: list = None) -> None:
    """Fits a niche envelope model using a set of covariates and presence/background points.

    Args:
        x: array-like of shape (n_samples, n_features) with covariate data
        y: array-like of shape (n_samples,) with binary presence/background (1/0) values
        categorical: indices for which columns are categorical
        labels: covariate labels. ignored if x is a pandas DataFrame
    """
    # format the input x/y data
    self._format_labels_and_dtypes(x, categorical=categorical, labels=labels)
    con, cat = self._format_covariate_data(x)
    y = format_occurrence_data(y)

    # estimate the feature range of the continuous data
    self.feature_mins_ = np.percentile(con[y == 1], self.percentile_range[0], axis=0)
    self.feature_maxs_ = np.percentile(con[y == 1], self.percentile_range[1], axis=0)

    # one-hot encode the categorical data and label the classes with
    if cat is not None:
        self.categorical_estimator = CategoricalTransformer()
        ohe = self.categorical_estimator.fit_transform(cat)
        self.in_categorical_ = np.any(ohe[y == 1], axis=0)

    return self

fit_predict(x, y, categorical=None, labels=None)

Trains and applies a model to x/y data.

Parameters:

Name Type Description Default
x ArrayLike

array-like of shape (n_samples, n_features) with covariate data

required
y ArrayLike

array-like of shape (n_samples,) with binary presence/background (1/0) values

required
categorical list

column indices indicating which columns are categorical

None
labels list

Covariate labels. Ignored if x is a pandas DataFrame

None

Returns:

Type Description
np.ndarray

array-like of shape (n_samples,) with model predictions

Source code in elapid/models.py
455
456
457
458
459
460
461
462
463
464
465
466
467
468
def fit_predict(self, x: ArrayLike, y: ArrayLike, categorical: list = None, labels: list = None) -> np.ndarray:
    """Trains and applies a model to x/y data.

    Args:
        x: array-like of shape (n_samples, n_features) with covariate data
        y: array-like of shape (n_samples,) with binary presence/background (1/0) values
        categorical: column indices indicating which columns are categorical
        labels: Covariate labels. Ignored if x is a pandas DataFrame

    Returns:
        array-like of shape (n_samples,) with model predictions
    """
    self.fit(x, y, categorical=categorical, labels=labels)
    return self.predict(x)

predict(x)

Applies a model to a set of covariates or features. Requires that a model has been fit.

Parameters:

Name Type Description Default
x ArrayLike

array-like of shape (n_samples, n_features) with covariate data

required

Returns:

Type Description
np.ndarray

array-like of shape (n_samples,) with model predictions

Source code in elapid/models.py
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
def predict(self, x: ArrayLike) -> np.ndarray:
    """Applies a model to a set of covariates or features. Requires that a model has been fit.

    Args:
        x: array-like of shape (n_samples, n_features) with covariate data

    Returns:
        array-like of shape (n_samples,) with model predictions
    """
    overlay = self.overlay.lower()
    overlay_options = ["average", "intersection", "union"]
    assert overlay in overlay_options, f"overlay must be one of {', '.join(overlay_options)}"

    # format the input data
    con, cat = self._format_covariate_data(x)
    nrows, ncols = x.shape

    # any value within the transformed range is considered within the envelope
    in_range = (con >= self.feature_mins_) * (con <= self.feature_maxs_)

    # map the class locations where the species has been observed
    if cat is not None:
        ohe = self.categorical_estimator.transform(cat)
        should_be_here = ohe[:, self.in_categorical_].any(axis=1).reshape((nrows, 1))
        shouldnt_be_here = (~ohe[:, ~self.in_categorical_].any(axis=1)).reshape((nrows, 1))
        in_range = np.concatenate((in_range, should_be_here, shouldnt_be_here), axis=1)

    # comput envelope based on the overlay method
    if overlay == "average":
        ypred = np.mean(in_range, axis=1, dtype="float32")

    elif overlay == "intersection":
        ypred = np.all(in_range, axis=1).astype("uint8")

    elif overlay == "union":
        ypred = np.any(in_range, axis=1).astype("uint8")

    return ypred

predict_proba(x)

Compute prediction probability scores for the 0/1 classes.

Parameters:

Name Type Description Default
x ArrayLike

array-like of shape (n_samples, n_features) with covariate data

required

Returns:

Name Type Description
predictions ArrayLike

array-like of shape (n_samples, 2) with model predictions

Source code in elapid/models.py
441
442
443
444
445
446
447
448
449
450
451
452
453
def predict_proba(self, x: ArrayLike) -> ArrayLike:
    """Compute prediction probability scores for the 0/1 classes.

    Args:
        x: array-like of shape (n_samples, n_features) with covariate data

    Returns:
        predictions: array-like of shape (n_samples, 2) with model predictions
    """
    ypred = self.predict(x).reshape(-1, 1)
    predictions = np.hstack((1 - ypred, ypred))

    return predictions

estimate_C_from_betas(beta_multiplier)

Convert the maxent-format beta_multiplier to an sklearn-format C regularization parameter.

Parameters:

Name Type Description Default
beta_multiplier float

the maxent beta regularization scaler

required

Returns:

Type Description
float

a C factor approximating the level of regularization passed to glmnet

Source code in elapid/models.py
633
634
635
636
637
638
639
640
641
642
def estimate_C_from_betas(beta_multiplier: float) -> float:
    """Convert the maxent-format beta_multiplier to an sklearn-format C regularization parameter.

    Args:
        beta_multiplier: the maxent beta regularization scaler

    Returns:
        a C factor approximating the level of regularization passed to glmnet
    """
    return 2 / (1 - np.exp(-beta_multiplier))

format_occurrence_data(y)

Reads input y data and formats it to consistent 1d array dtypes.

Parameters:

Name Type Description Default
y ArrayLike

array-like of shape (n_samples,) or (n_samples, 1)

required

Returns:

Type Description
ArrayLike

formatted uint8 ndarray of shape (n_samples,)

Raises:

Type Description
np.AxisError

an array with 2 or more columns is passed

Source code in elapid/models.py
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
def format_occurrence_data(y: ArrayLike) -> ArrayLike:
    """Reads input y data and formats it to consistent 1d array dtypes.

    Args:
        y: array-like of shape (n_samples,) or (n_samples, 1)

    Returns:
        formatted uint8 ndarray of shape (n_samples,)

    Raises:
        np.AxisError: an array with 2 or more columns is passed
    """
    if not isinstance(y, np.ndarray):
        y = np.array(y)

    if y.ndim > 1:
        if y.shape[1] > 1 or y.ndim > 2:
            raise np.AxisError(f"Multi-column y data passed of shape {y.shape}. Must be 1d or 1 column.")
        y = y.flatten()

    return y.astype("uint8")

maxent_alpha(raw)

Compute the sum-to-one alpha maxent model parameter.

Parameters:

Name Type Description Default
raw np.ndarray

uncalibrated maxent raw (exponential) model output

required

Returns:

Name Type Description
alpha float

the output sum-to-one scaling factor

Source code in elapid/models.py
542
543
544
545
546
547
548
549
550
551
def maxent_alpha(raw: np.ndarray) -> float:
    """Compute the sum-to-one alpha maxent model parameter.

    Args:
        raw: uncalibrated maxent raw (exponential) model output

    Returns:
        alpha: the output sum-to-one scaling factor
    """
    return -np.log(np.sum(raw))

maxent_cloglog_transform(engma, entropy)

Compute maxent's cumulative log-log suitability score

Parameters:

Name Type Description Default
engma np.ndarray

calibrated maxent linear model output

required
entropy float

the calibrated model entropy score

required

Returns:

Type Description
np.ndarray

the cloglog scores for each sample

Source code in elapid/models.py
597
598
599
600
601
602
603
604
605
606
607
def maxent_cloglog_transform(engma: np.ndarray, entropy: float) -> np.ndarray:
    """Compute maxent's cumulative log-log suitability score

    Args:
        engma: calibrated maxent linear model output
        entropy: the calibrated model entropy score

    Returns:
        the cloglog scores for each sample
    """
    return 1 - np.exp(-np.exp(engma) * np.exp(entropy))

maxent_entropy(raw)

Compute the maxent model entropy score for scaling the logistic output

Parameters:

Name Type Description Default
raw np.ndarray

uncalibrated maxent raw (exponential) model output

required

Returns:

Name Type Description
entropy float

background distribution entropy score

Source code in elapid/models.py
554
555
556
557
558
559
560
561
562
563
564
def maxent_entropy(raw: np.ndarray) -> float:
    """Compute the maxent model entropy score for scaling the logistic output

    Args:
        raw: uncalibrated maxent raw (exponential) model output

    Returns:
        entropy: background distribution entropy score
    """
    scaled = raw / np.sum(raw)
    return -np.sum(scaled * np.log(scaled))

maxent_logistic_transform(engma, entropy, tau=MaxentConfig.tau)

Compute maxent's logistic suitability score

Parameters:

Name Type Description Default
engma np.ndarray

calibrated maxent linear model output

required
entropy float

the calibrated model entropy score

required
tau float

the prevalence scaler. lower values indicate rarer species.

MaxentConfig.tau

Returns:

Type Description
np.ndarray

the tau-scaled logistic scores for each sample

Source code in elapid/models.py
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
def maxent_logistic_transform(engma: np.ndarray, entropy: float, tau: float = MaxentConfig.tau) -> np.ndarray:
    """Compute maxent's logistic suitability score

    Args:
        engma: calibrated maxent linear model output
        entropy: the calibrated model entropy score
        tau: the prevalence scaler. lower values indicate rarer species.

    Returns:
        the tau-scaled logistic scores for each sample
    """
    # maxnet's (tau-free) logistic formulation:
    # return 1 / (1 + np.exp(-entropy - engma))
    # use java's formulation instead
    logratio = np.exp(engma) * np.exp(entropy)
    return (tau * logratio) / ((1 - tau) + (tau * logratio))

maxent_raw_transform(engma)

Compute maxent's raw suitability score

Parameters:

Name Type Description Default
engma np.ndarray

calibrated maxent linear model output

required

Returns:

Type Description
np.ndarray

the log-linear raw scores for each sample

Source code in elapid/models.py
567
568
569
570
571
572
573
574
575
576
def maxent_raw_transform(engma: np.ndarray) -> np.ndarray:
    """Compute maxent's raw suitability score

    Args:
        engma: calibrated maxent linear model output

    Returns:
        the log-linear raw scores for each sample
    """
    return np.exp(engma)