Skip to content

elapid.features

Functions to transform covariate data into complex model features.

CategoricalTransformer (BaseEstimator)

Applies one-hot encoding to categorical covariate datasets.

Source code in elapid/features.py
class CategoricalTransformer(BaseEstimator):
    """Applies one-hot encoding to categorical covariate datasets."""

    estimators_: list = None

    def __init__(self):
        pass

    def fit(self, x: ArrayLike):
        """Compute the minimum and maximum for scaling.

        Args:
            x: array-like of shape (n_samples, n_features)
                The data used to compute the per-feature minimum and maximum
                used for later scaling along the features axis.
        Returns:
            None. Updates the transformer with feature fitting parameters.
        """
        self.estimators_ = []
        x = np.array(x)
        if x.ndim == 1:
            estimator = OneHotEncoder(dtype=np.uint8, sparse=False)
            self.estimators_.append(estimator.fit(x.reshape(-1, 1)))
        else:
            nrows, ncols = x.shape
            for col in range(ncols):
                xsub = x[:, col].reshape(-1, 1)
                estimator = OneHotEncoder(dtype=np.uint8, sparse=False)
                self.estimators_.append(estimator.fit(xsub))

    def transform(self, x: ArrayLike) -> np.ndarray:
        """Scale covariates according to the feature range.

        Args:
            x: array-like of shape (n_samples, n_features)
                Input data that will be transformed.

        Returns:
            ndarray with transformed data.
        """
        x = np.array(x)
        if x.ndim == 1:
            estimator = self.estimators_[0]
            return estimator.transform(x.reshape(-1, 1))
        else:
            class_data = []
            nrows, ncols = x.shape
            for col in range(ncols):
                xsub = x[:, col].reshape(-1, 1)
                estimator = self.estimators_[col]
                class_data.append(estimator.transform(xsub))
            return np.concatenate(class_data, axis=1)

    def fit_transform(self, x: ArrayLike) -> np.ndarray:
        """Fits scaler to x and returns transformed features.

        Args:
            x: array-like of shape (n_samples, n_features)
                Input data to fit the scaler and to transform.

        Returns:
            ndarray with transformed data.
        """
        self.fit(x)
        return self.transform(x)

fit(self, x)

Compute the minimum and maximum for scaling.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples, n_features) The data used to compute the per-feature minimum and maximum used for later scaling along the features axis.

required

Returns:

Type Description

None. Updates the transformer with feature fitting parameters.

Source code in elapid/features.py
def fit(self, x: ArrayLike):
    """Compute the minimum and maximum for scaling.

    Args:
        x: array-like of shape (n_samples, n_features)
            The data used to compute the per-feature minimum and maximum
            used for later scaling along the features axis.
    Returns:
        None. Updates the transformer with feature fitting parameters.
    """
    self.estimators_ = []
    x = np.array(x)
    if x.ndim == 1:
        estimator = OneHotEncoder(dtype=np.uint8, sparse=False)
        self.estimators_.append(estimator.fit(x.reshape(-1, 1)))
    else:
        nrows, ncols = x.shape
        for col in range(ncols):
            xsub = x[:, col].reshape(-1, 1)
            estimator = OneHotEncoder(dtype=np.uint8, sparse=False)
            self.estimators_.append(estimator.fit(xsub))

fit_transform(self, x)

Fits scaler to x and returns transformed features.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples, n_features) Input data to fit the scaler and to transform.

required

Returns:

Type Description
ndarray

ndarray with transformed data.

Source code in elapid/features.py
def fit_transform(self, x: ArrayLike) -> np.ndarray:
    """Fits scaler to x and returns transformed features.

    Args:
        x: array-like of shape (n_samples, n_features)
            Input data to fit the scaler and to transform.

    Returns:
        ndarray with transformed data.
    """
    self.fit(x)
    return self.transform(x)

transform(self, x)

Scale covariates according to the feature range.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples, n_features) Input data that will be transformed.

required

Returns:

Type Description
ndarray

ndarray with transformed data.

Source code in elapid/features.py
def transform(self, x: ArrayLike) -> np.ndarray:
    """Scale covariates according to the feature range.

    Args:
        x: array-like of shape (n_samples, n_features)
            Input data that will be transformed.

    Returns:
        ndarray with transformed data.
    """
    x = np.array(x)
    if x.ndim == 1:
        estimator = self.estimators_[0]
        return estimator.transform(x.reshape(-1, 1))
    else:
        class_data = []
        nrows, ncols = x.shape
        for col in range(ncols):
            xsub = x[:, col].reshape(-1, 1)
            estimator = self.estimators_[col]
            class_data.append(estimator.transform(xsub))
        return np.concatenate(class_data, axis=1)

CumulativeTransformer (QuantileTransformer)

Applies a percentile-based transform to estimate cumulative suitability.

Source code in elapid/features.py
class CumulativeTransformer(QuantileTransformer):
    """Applies a percentile-based transform to estimate cumulative suitability."""

    def __init__(self):
        super().__init__(n_quantiles=100, output_distribution="uniform")

HingeTransformer (BaseEstimator)

Fits hinge transformations to an array of covariates.

Source code in elapid/features.py
class HingeTransformer(BaseEstimator):
    """Fits hinge transformations to an array of covariates."""

    n_hinges_: int = None
    mins_: np.ndarray = None
    maxs_: np.ndarray = None
    hinge_indices_: np.ndarray = None

    def __init__(self, n_hinges: int = MaxentConfig.n_hinge_features):
        self.n_hinges_ = n_hinges

    def fit(self, x: ArrayLike):
        """Compute the minimum and maximum for scaling.

        Args:
            x: array-like of shape (n_samples, n_features)
                The data used to compute the per-feature minimum and maximum
                used for later scaling along the features axis.
        Returns:
            None. Updates the transformer with feature fitting parameters.
        """
        x = np.array(x)
        self.mins_ = x.min(axis=0)
        self.maxs_ = x.max(axis=0)
        self.hinge_indices_ = np.linspace(self.mins_, self.maxs_, self.n_hinges_)

    def transform(self, x: ArrayLike) -> np.ndarray:
        """Scale covariates according to the feature range.

        Args:
            x: array-like of shape (n_samples, n_features)
                Input data that will be transformed.

        Returns:
            ndarray with transformed data.
        """
        x = np.array(x)
        xarr = repeat_array(x, self.n_hinges_ - 1, axis=-1)
        lharr = repeat_array(self.hinge_indices_[:-1].transpose(), len(x), axis=0)
        rharr = repeat_array(self.hinge_indices_[1:].transpose(), len(x), axis=0)
        lh = left_hinge(xarr, lharr, self.maxs_)
        rh = right_hinge(xarr, self.mins_, rharr)
        return np.concatenate((lh, rh), axis=2).reshape(x.shape[0], -1)

    def fit_transform(self, x: ArrayLike) -> np.ndarray:
        """Fits scaler to x and returns transformed features.

        Args:
            x: array-like of shape (n_samples, n_features)
                Input data to fit the scaler and to transform.

        Returns:
            ndarray with transformed data.
        """
        self.fit(x)
        return self.transform(x)

fit(self, x)

Compute the minimum and maximum for scaling.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples, n_features) The data used to compute the per-feature minimum and maximum used for later scaling along the features axis.

required

Returns:

Type Description

None. Updates the transformer with feature fitting parameters.

Source code in elapid/features.py
def fit(self, x: ArrayLike):
    """Compute the minimum and maximum for scaling.

    Args:
        x: array-like of shape (n_samples, n_features)
            The data used to compute the per-feature minimum and maximum
            used for later scaling along the features axis.
    Returns:
        None. Updates the transformer with feature fitting parameters.
    """
    x = np.array(x)
    self.mins_ = x.min(axis=0)
    self.maxs_ = x.max(axis=0)
    self.hinge_indices_ = np.linspace(self.mins_, self.maxs_, self.n_hinges_)

fit_transform(self, x)

Fits scaler to x and returns transformed features.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples, n_features) Input data to fit the scaler and to transform.

required

Returns:

Type Description
ndarray

ndarray with transformed data.

Source code in elapid/features.py
def fit_transform(self, x: ArrayLike) -> np.ndarray:
    """Fits scaler to x and returns transformed features.

    Args:
        x: array-like of shape (n_samples, n_features)
            Input data to fit the scaler and to transform.

    Returns:
        ndarray with transformed data.
    """
    self.fit(x)
    return self.transform(x)

transform(self, x)

Scale covariates according to the feature range.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples, n_features) Input data that will be transformed.

required

Returns:

Type Description
ndarray

ndarray with transformed data.

Source code in elapid/features.py
def transform(self, x: ArrayLike) -> np.ndarray:
    """Scale covariates according to the feature range.

    Args:
        x: array-like of shape (n_samples, n_features)
            Input data that will be transformed.

    Returns:
        ndarray with transformed data.
    """
    x = np.array(x)
    xarr = repeat_array(x, self.n_hinges_ - 1, axis=-1)
    lharr = repeat_array(self.hinge_indices_[:-1].transpose(), len(x), axis=0)
    rharr = repeat_array(self.hinge_indices_[1:].transpose(), len(x), axis=0)
    lh = left_hinge(xarr, lharr, self.maxs_)
    rh = right_hinge(xarr, self.mins_, rharr)
    return np.concatenate((lh, rh), axis=2).reshape(x.shape[0], -1)

LinearTransformer (MinMaxScaler)

Applies linear feature transformations to rescale features from 0-1.

Source code in elapid/features.py
class LinearTransformer(MinMaxScaler):
    """Applies linear feature transformations to rescale features from 0-1."""

    clamp: bool = None
    feature_range: None

    def __init__(
        self,
        clamp: bool = MaxentConfig.clamp,
        feature_range: Tuple[float, float] = (0.0, 1.0),
    ):
        self.clamp = clamp
        self.feature_range = feature_range
        super().__init__(clip=clamp, feature_range=feature_range)

MaxentFeatureTransformer (BaseEstimator)

Transforms covariate data into maxent-format feature data.

Source code in elapid/features.py
class MaxentFeatureTransformer(BaseEstimator):
    """Transforms covariate data into maxent-format feature data."""

    feature_types_: list = None
    clamp_: bool = None
    n_hinge_features_: int = None
    n_threshold_features_: int = None
    categorical_: list = None
    continuous_: list = None
    categorical_pd_: list = None
    continuous_pd_: list = None
    labels_: list = None
    estimators_: dict = {
        "linear": None,
        "quadratic": None,
        "product": None,
        "threshold": None,
        "hinge": None,
        "categorical": None,
    }
    feature_names_: list = None

    def __init__(
        self,
        feature_types: Union[str, list] = MaxentConfig.feature_types,
        clamp: bool = MaxentConfig.clamp,
        n_hinge_features: int = MaxentConfig.n_hinge_features,
        n_threshold_features: int = MaxentConfig.n_threshold_features,
    ):
        """Computes features based on the maxent feature types specified (like linear, quadratic, hinge).

        Implemented using sklearn conventions (with `.fit()` and `.transform()` functions.

        Args:
            feature_types: list of maxent features to generate.
            clamp: set feature values to global mins/maxs during prediction
            n_hinge_features: number of hinge knots to generate
            n_threshold_features: nuber of threshold features to generate
        """
        self.feature_types_ = validate_feature_types(feature_types)
        self.clamp_ = validate_boolean(clamp)
        self.n_hinge_features_ = validate_numeric_scalar(n_hinge_features)
        self.n_threshold_features_ = validate_numeric_scalar(n_threshold_features)

    def _format_covariate_data(self, x: ArrayLike) -> Tuple[np.array, np.array]:
        """Reads input x data and formats it to consistent array dtypes.

        Args:
            x: array-like of shape (n_samples, n_features)

        Returns:
            (continuous, categorical) tuple of ndarrays with continuous and
                categorical covariate data.
        """
        if isinstance(x, np.ndarray):
            if self.categorical_ is None:
                con = x
                cat = None
            else:
                con = x[:, self.continuous_]
                cat = x[:, self.categorical_]

        elif isinstance(x, pd.DataFrame):
            con = x[self.continuous_pd_].to_numpy()
            if len(self.categorical_pd_) > 0:
                cat = x[self.categorical_pd_].to_numpy()
            else:
                cat = None

        else:
            raise TypeError(f"Unsupported x dtype: {type(x)}. Must be pd.DataFrame or np.array")

        return con, cat

    def _format_labels_and_dtypes(self, x: ArrayLike, categorical: list = None, labels: list = None) -> None:
        """Read input x data and lists of categorical data indices and band
            labels to format and store this info for later indexing.

        Args:
            s: array-like of shape (n_samples, n_features)
            categorical: indices indicating which x columns are categorical
            labels: covariate column labels. ignored if x is a pandas DataFrame
        """
        if isinstance(x, np.ndarray):
            nrows, ncols = x.shape
            if categorical is None:
                continuous = list(range(ncols))
            else:
                continuous = list(set(range(ncols)).difference(set(categorical)))
            self.labels_ = labels or make_band_labels(ncols)
            self.categorical_ = categorical
            self.continuous_ = continuous

        elif isinstance(x, pd.DataFrame):
            x.drop(["geometry"], axis=1, errors="ignore", inplace=True)
            self.labels_ = labels or list(x.columns)

            # store both pandas and numpy indexing of these values
            self.continuous_pd_ = list(x.select_dtypes(exclude="category").columns)
            self.categorical_pd_ = list(x.select_dtypes(include="category").columns)

            all_columns = list(x.columns)
            self.continuous_ = [all_columns.index(item) for item in self.continuous_pd_ if item in all_columns]
            if len(self.categorical_pd_) != 0:
                self.categorical_ = [all_columns.index(item) for item in self.categorical_pd_ if item in all_columns]
            else:
                self.categorical_ = None

    def fit(self, x: ArrayLike, categorical: list = None, labels: list = None) -> None:
        """Compute the minimum and maximum for scaling.

        Args:
            x: array-like of shape (n_samples, n_features)
                The data used to compute the per-feature minimum and maximum
                used for later scaling along the features axis.
            categorical: indices indicating which x columns are categorical
            labels: covariate column labels. ignored if x is a pandas DataFrame

        Returns:
            None. Updates the transformer with feature fitting parameters.
        """
        self._format_labels_and_dtypes(x, categorical=categorical, labels=labels)
        con, cat = self._format_covariate_data(x)
        nrows, ncols = con.shape

        feature_names = []
        if "linear" in self.feature_types_:
            estimator = LinearTransformer(clamp=self.clamp_)
            estimator.fit(con)
            self.estimators_["linear"] = estimator
            feature_names += ["linear"] * estimator.n_features_in_

        if "quadratic" in self.feature_types_:
            estimator = QuadraticTransformer(clamp=self.clamp_)
            estimator.fit(con)
            self.estimators_["quadratic"] = estimator
            feature_names += ["quadratic"] * estimator.estimator.n_features_in_

        if "product" in self.feature_types_:
            estimator = ProductTransformer(clamp=self.clamp_)
            estimator.fit(con)
            self.estimators_["product"] = estimator
            feature_names += ["product"] * estimator.estimator.n_features_in_

        if "threshold" in self.feature_types_:
            estimator = ThresholdTransformer(n_thresholds=self.n_threshold_features_)
            estimator.fit(con)
            self.estimators_["threshold"] = estimator
            feature_names += ["threshold"] * (estimator.n_thresholds_ * ncols)

        if "hinge" in self.feature_types_:
            estimator = HingeTransformer(n_hinges=self.n_hinge_features_)
            estimator.fit(con)
            self.estimators_["hinge"] = estimator
            feature_names += ["hinge"] * ((estimator.n_hinges_ - 1) * 2 * ncols)

        if cat is not None:
            estimator = CategoricalTransformer()
            estimator.fit(cat)
            self.estimators_["categorical"] = estimator
            for est in estimator.estimators_:
                feature_names += ["categorical"] * len(est.categories_[0])

        self.feature_names_ = feature_names

    def transform(self, x: ArrayLike) -> np.ndarray:
        """Scale covariates according to the feature range.

        Args:
            x: array-like of shape (n_samples, n_features)
                Input data that will be transformed.

        Returns:
            ndarray with transformed data.
        """
        con, cat = self._format_covariate_data(x)
        features = []

        if "linear" in self.feature_types_:
            features.append(self.estimators_["linear"].transform(con))

        if "quadratic" in self.feature_types_:
            features.append(self.estimators_["quadratic"].transform(con))

        if "product" in self.feature_types_:
            features.append(self.estimators_["product"].transform(con))

        if "threshold" in self.feature_types_:
            features.append(self.estimators_["threshold"].transform(con))

        if "hinge" in self.feature_types_:
            features.append(self.estimators_["hinge"].transform(con))

        if cat is not None:
            features.append(self.estimators_["categorical"].transform(cat))

        return np.concatenate(features, axis=1)

    def fit_transform(self, x: ArrayLike, categorical: list = None, labels: list = None) -> np.ndarray:
        """Fits scaler to x and returns transformed features.

        Args:
            x: array-like of shape (n_samples, n_features)
                Input data to fit the scaler and to transform.

        Returns:
            ndarray with transformed data.
        """
        self.fit(x, categorical=categorical, labels=labels)
        return self.transform(x)

__init__(self, feature_types=['linear', 'hinge', 'product'], clamp=True, n_hinge_features=10, n_threshold_features=10) special

Computes features based on the maxent feature types specified (like linear, quadratic, hinge).

Implemented using sklearn conventions (with .fit() and .transform() functions.

Parameters:

Name Type Description Default
feature_types Union[str, list]

list of maxent features to generate.

['linear', 'hinge', 'product']
clamp bool

set feature values to global mins/maxs during prediction

True
n_hinge_features int

number of hinge knots to generate

10
n_threshold_features int

nuber of threshold features to generate

10
Source code in elapid/features.py
def __init__(
    self,
    feature_types: Union[str, list] = MaxentConfig.feature_types,
    clamp: bool = MaxentConfig.clamp,
    n_hinge_features: int = MaxentConfig.n_hinge_features,
    n_threshold_features: int = MaxentConfig.n_threshold_features,
):
    """Computes features based on the maxent feature types specified (like linear, quadratic, hinge).

    Implemented using sklearn conventions (with `.fit()` and `.transform()` functions.

    Args:
        feature_types: list of maxent features to generate.
        clamp: set feature values to global mins/maxs during prediction
        n_hinge_features: number of hinge knots to generate
        n_threshold_features: nuber of threshold features to generate
    """
    self.feature_types_ = validate_feature_types(feature_types)
    self.clamp_ = validate_boolean(clamp)
    self.n_hinge_features_ = validate_numeric_scalar(n_hinge_features)
    self.n_threshold_features_ = validate_numeric_scalar(n_threshold_features)

fit(self, x, categorical=None, labels=None)

Compute the minimum and maximum for scaling.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples, n_features) The data used to compute the per-feature minimum and maximum used for later scaling along the features axis.

required
categorical list

indices indicating which x columns are categorical

None
labels list

covariate column labels. ignored if x is a pandas DataFrame

None

Returns:

Type Description
None

None. Updates the transformer with feature fitting parameters.

Source code in elapid/features.py
def fit(self, x: ArrayLike, categorical: list = None, labels: list = None) -> None:
    """Compute the minimum and maximum for scaling.

    Args:
        x: array-like of shape (n_samples, n_features)
            The data used to compute the per-feature minimum and maximum
            used for later scaling along the features axis.
        categorical: indices indicating which x columns are categorical
        labels: covariate column labels. ignored if x is a pandas DataFrame

    Returns:
        None. Updates the transformer with feature fitting parameters.
    """
    self._format_labels_and_dtypes(x, categorical=categorical, labels=labels)
    con, cat = self._format_covariate_data(x)
    nrows, ncols = con.shape

    feature_names = []
    if "linear" in self.feature_types_:
        estimator = LinearTransformer(clamp=self.clamp_)
        estimator.fit(con)
        self.estimators_["linear"] = estimator
        feature_names += ["linear"] * estimator.n_features_in_

    if "quadratic" in self.feature_types_:
        estimator = QuadraticTransformer(clamp=self.clamp_)
        estimator.fit(con)
        self.estimators_["quadratic"] = estimator
        feature_names += ["quadratic"] * estimator.estimator.n_features_in_

    if "product" in self.feature_types_:
        estimator = ProductTransformer(clamp=self.clamp_)
        estimator.fit(con)
        self.estimators_["product"] = estimator
        feature_names += ["product"] * estimator.estimator.n_features_in_

    if "threshold" in self.feature_types_:
        estimator = ThresholdTransformer(n_thresholds=self.n_threshold_features_)
        estimator.fit(con)
        self.estimators_["threshold"] = estimator
        feature_names += ["threshold"] * (estimator.n_thresholds_ * ncols)

    if "hinge" in self.feature_types_:
        estimator = HingeTransformer(n_hinges=self.n_hinge_features_)
        estimator.fit(con)
        self.estimators_["hinge"] = estimator
        feature_names += ["hinge"] * ((estimator.n_hinges_ - 1) * 2 * ncols)

    if cat is not None:
        estimator = CategoricalTransformer()
        estimator.fit(cat)
        self.estimators_["categorical"] = estimator
        for est in estimator.estimators_:
            feature_names += ["categorical"] * len(est.categories_[0])

    self.feature_names_ = feature_names

fit_transform(self, x, categorical=None, labels=None)

Fits scaler to x and returns transformed features.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples, n_features) Input data to fit the scaler and to transform.

required

Returns:

Type Description
ndarray

ndarray with transformed data.

Source code in elapid/features.py
def fit_transform(self, x: ArrayLike, categorical: list = None, labels: list = None) -> np.ndarray:
    """Fits scaler to x and returns transformed features.

    Args:
        x: array-like of shape (n_samples, n_features)
            Input data to fit the scaler and to transform.

    Returns:
        ndarray with transformed data.
    """
    self.fit(x, categorical=categorical, labels=labels)
    return self.transform(x)

transform(self, x)

Scale covariates according to the feature range.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples, n_features) Input data that will be transformed.

required

Returns:

Type Description
ndarray

ndarray with transformed data.

Source code in elapid/features.py
def transform(self, x: ArrayLike) -> np.ndarray:
    """Scale covariates according to the feature range.

    Args:
        x: array-like of shape (n_samples, n_features)
            Input data that will be transformed.

    Returns:
        ndarray with transformed data.
    """
    con, cat = self._format_covariate_data(x)
    features = []

    if "linear" in self.feature_types_:
        features.append(self.estimators_["linear"].transform(con))

    if "quadratic" in self.feature_types_:
        features.append(self.estimators_["quadratic"].transform(con))

    if "product" in self.feature_types_:
        features.append(self.estimators_["product"].transform(con))

    if "threshold" in self.feature_types_:
        features.append(self.estimators_["threshold"].transform(con))

    if "hinge" in self.feature_types_:
        features.append(self.estimators_["hinge"].transform(con))

    if cat is not None:
        features.append(self.estimators_["categorical"].transform(cat))

    return np.concatenate(features, axis=1)

ProductTransformer (BaseEstimator)

Computes the column-wise product of an array of input features, rescaling from 0-1.

Source code in elapid/features.py
class ProductTransformer(BaseEstimator):
    """Computes the column-wise product of an array of input features, rescaling from 0-1."""

    clamp: bool = None
    feature_range: Tuple[float, float] = None
    estimator: BaseEstimator = None

    def __init__(
        self,
        clamp: bool = MaxentConfig.clamp,
        feature_range: Tuple[float, float] = (0.0, 1.0),
    ):
        self.clamp = clamp
        self.feature_range = feature_range
        self.estimator = MinMaxScaler(clip=self.clamp, feature_range=self.feature_range)

    def fit(self, x: ArrayLike):
        """Compute the minimum and maximum for scaling.

        Args:
            x: array-like of shape (n_samples, n_features)
                The data used to compute the per-feature minimum and maximum
                used for later scaling along the features axis.
        Returns:
            None. Updates the transformer with feature fitting parameters.
        """
        self.estimator.fit(column_product(np.array(x)))

    def transform(self, x: ArrayLike) -> np.ndarray:
        """Scale covariates according to the feature range.

        Args:
            x: array-like of shape (n_samples, n_features)
                Input data that will be transformed.

        Returns:
            ndarray with transformed data.
        """
        return self.estimator.transform(column_product(np.array(x)))

    def fit_transform(self, x: ArrayLike) -> np.ndarray:
        """Fits scaler to x and returns transformed features.

        Args:
            x: array-like of shape (n_samples, n_features)
                Input data to fit the scaler and to transform.

        Returns:
            ndarray with transformed data.
        """
        self.fit(x)
        return self.transform(x)

fit(self, x)

Compute the minimum and maximum for scaling.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples, n_features) The data used to compute the per-feature minimum and maximum used for later scaling along the features axis.

required

Returns:

Type Description

None. Updates the transformer with feature fitting parameters.

Source code in elapid/features.py
def fit(self, x: ArrayLike):
    """Compute the minimum and maximum for scaling.

    Args:
        x: array-like of shape (n_samples, n_features)
            The data used to compute the per-feature minimum and maximum
            used for later scaling along the features axis.
    Returns:
        None. Updates the transformer with feature fitting parameters.
    """
    self.estimator.fit(column_product(np.array(x)))

fit_transform(self, x)

Fits scaler to x and returns transformed features.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples, n_features) Input data to fit the scaler and to transform.

required

Returns:

Type Description
ndarray

ndarray with transformed data.

Source code in elapid/features.py
def fit_transform(self, x: ArrayLike) -> np.ndarray:
    """Fits scaler to x and returns transformed features.

    Args:
        x: array-like of shape (n_samples, n_features)
            Input data to fit the scaler and to transform.

    Returns:
        ndarray with transformed data.
    """
    self.fit(x)
    return self.transform(x)

transform(self, x)

Scale covariates according to the feature range.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples, n_features) Input data that will be transformed.

required

Returns:

Type Description
ndarray

ndarray with transformed data.

Source code in elapid/features.py
def transform(self, x: ArrayLike) -> np.ndarray:
    """Scale covariates according to the feature range.

    Args:
        x: array-like of shape (n_samples, n_features)
            Input data that will be transformed.

    Returns:
        ndarray with transformed data.
    """
    return self.estimator.transform(column_product(np.array(x)))

QuadraticTransformer (BaseEstimator)

Applies quadtratic feature transformations and rescales features from 0-1.

Source code in elapid/features.py
class QuadraticTransformer(BaseEstimator):
    """Applies quadtratic feature transformations and rescales features from 0-1."""

    clamp: bool = None
    feature_range: Tuple[float, float] = None
    estimator: BaseEstimator = None

    def __init__(
        self,
        clamp: bool = MaxentConfig.clamp,
        feature_range: Tuple[float, float] = (0.0, 1.0),
    ):
        self.clamp = clamp
        self.feature_range = feature_range
        self.estimator = MinMaxScaler(clip=self.clamp, feature_range=self.feature_range)

    def fit(self, x: ArrayLike) -> None:
        """Compute the minimum and maximum for scaling.

        Args:
            x: array-like of shape (n_samples, n_features)
                The data used to compute the per-feature minimum and maximum
                used for later scaling along the features axis.
        Returns:
            None. Updates the transformer with feature fitting parameters.
        """
        self.estimator.fit(np.array(x) ** 2)

    def transform(self, x: ArrayLike) -> np.ndarray:
        """Scale covariates according to the feature range.

        Args:
            x: array-like of shape (n_samples, n_features)
                Input data that will be transformed.

        Returns:
            ndarray with transformed data.
        """
        return self.estimator.transform(np.array(x) ** 2)

    def fit_transform(self, x: ArrayLike) -> np.ndarray:
        """Fits scaler to x and returns transformed features.

        Args:
            x: array-like of shape (n_samples, n_features)
                Input data to fit the scaler and to transform.

        Returns:
            ndarray with transformed data.
        """
        self.fit(x)
        return self.estimator.transform(np.array(x) ** 2)

    def inverse_transform(self, x: ArrayLike) -> np.ndarray:
        """Revert from transformed features to original covariate values.

        Args:
            x: array-like of shape (n_xamples, n_features)
                Transformed feature data to convert to covariate data.

        Returns:
            ndarray with unscaled covariate values.
        """
        return self.estimator.inverse_transform(np.array(x)) ** 0.5

fit(self, x)

Compute the minimum and maximum for scaling.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples, n_features) The data used to compute the per-feature minimum and maximum used for later scaling along the features axis.

required

Returns:

Type Description
None

None. Updates the transformer with feature fitting parameters.

Source code in elapid/features.py
def fit(self, x: ArrayLike) -> None:
    """Compute the minimum and maximum for scaling.

    Args:
        x: array-like of shape (n_samples, n_features)
            The data used to compute the per-feature minimum and maximum
            used for later scaling along the features axis.
    Returns:
        None. Updates the transformer with feature fitting parameters.
    """
    self.estimator.fit(np.array(x) ** 2)

fit_transform(self, x)

Fits scaler to x and returns transformed features.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples, n_features) Input data to fit the scaler and to transform.

required

Returns:

Type Description
ndarray

ndarray with transformed data.

Source code in elapid/features.py
def fit_transform(self, x: ArrayLike) -> np.ndarray:
    """Fits scaler to x and returns transformed features.

    Args:
        x: array-like of shape (n_samples, n_features)
            Input data to fit the scaler and to transform.

    Returns:
        ndarray with transformed data.
    """
    self.fit(x)
    return self.estimator.transform(np.array(x) ** 2)

inverse_transform(self, x)

Revert from transformed features to original covariate values.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_xamples, n_features) Transformed feature data to convert to covariate data.

required

Returns:

Type Description
ndarray

ndarray with unscaled covariate values.

Source code in elapid/features.py
def inverse_transform(self, x: ArrayLike) -> np.ndarray:
    """Revert from transformed features to original covariate values.

    Args:
        x: array-like of shape (n_xamples, n_features)
            Transformed feature data to convert to covariate data.

    Returns:
        ndarray with unscaled covariate values.
    """
    return self.estimator.inverse_transform(np.array(x)) ** 0.5

transform(self, x)

Scale covariates according to the feature range.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples, n_features) Input data that will be transformed.

required

Returns:

Type Description
ndarray

ndarray with transformed data.

Source code in elapid/features.py
def transform(self, x: ArrayLike) -> np.ndarray:
    """Scale covariates according to the feature range.

    Args:
        x: array-like of shape (n_samples, n_features)
            Input data that will be transformed.

    Returns:
        ndarray with transformed data.
    """
    return self.estimator.transform(np.array(x) ** 2)

ThresholdTransformer (BaseEstimator)

Applies binary thresholds to each covariate based on n evenly-spaced thresholds across it's min/max range.

Source code in elapid/features.py
class ThresholdTransformer(BaseEstimator):
    """Applies binary thresholds to each covariate based on n evenly-spaced
    thresholds across it's min/max range."""

    n_thresholds_: int = None
    mins_: np.ndarray = None
    maxs_: np.ndarray = None
    threshold_indices_: np.ndarray = None

    def __init__(self, n_thresholds: int = MaxentConfig.n_threshold_features):
        self.n_thresholds_ = n_thresholds

    def fit(self, x: ArrayLike):
        """Compute the minimum and maximum for scaling.

        Args:
            x: array-like of shape (n_samples, n_features)
                The data used to compute the per-feature minimum and maximum
                used for later scaling along the features axis.
        Returns:
            None. Updates the transformer with feature fitting parameters.
        """
        x = np.array(x)
        self.mins_ = x.min(axis=0)
        self.maxs_ = x.max(axis=0)
        self.threshold_indices_ = np.linspace(self.mins_, self.maxs_, self.n_thresholds_)

    def transform(self, x: ArrayLike) -> np.ndarray:
        """Scale covariates according to the feature range.

        Args:
            x: array-like of shape (n_samples, n_features)
                Input data that will be transformed.

        Returns:
            ndarray with transformed data.
        """
        x = np.array(x)
        xarr = repeat_array(x, len(self.threshold_indices_), axis=-1)
        tarr = repeat_array(self.threshold_indices_.transpose(), len(x), axis=0)
        thresh = (xarr > tarr).reshape(x.shape[0], -1)
        return thresh.astype(np.uint8)

    def fit_transform(self, x: ArrayLike) -> np.ndarray:
        """Fits scaler to x and returns transformed features.

        Args:
            x: array-like of shape (n_samples, n_features)
                Input data to fit the scaler and to transform.

        Returns:
            ndarray with transformed data.
        """
        self.fit(x)
        return self.transform(x)

fit(self, x)

Compute the minimum and maximum for scaling.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples, n_features) The data used to compute the per-feature minimum and maximum used for later scaling along the features axis.

required

Returns:

Type Description

None. Updates the transformer with feature fitting parameters.

Source code in elapid/features.py
def fit(self, x: ArrayLike):
    """Compute the minimum and maximum for scaling.

    Args:
        x: array-like of shape (n_samples, n_features)
            The data used to compute the per-feature minimum and maximum
            used for later scaling along the features axis.
    Returns:
        None. Updates the transformer with feature fitting parameters.
    """
    x = np.array(x)
    self.mins_ = x.min(axis=0)
    self.maxs_ = x.max(axis=0)
    self.threshold_indices_ = np.linspace(self.mins_, self.maxs_, self.n_thresholds_)

fit_transform(self, x)

Fits scaler to x and returns transformed features.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples, n_features) Input data to fit the scaler and to transform.

required

Returns:

Type Description
ndarray

ndarray with transformed data.

Source code in elapid/features.py
def fit_transform(self, x: ArrayLike) -> np.ndarray:
    """Fits scaler to x and returns transformed features.

    Args:
        x: array-like of shape (n_samples, n_features)
            Input data to fit the scaler and to transform.

    Returns:
        ndarray with transformed data.
    """
    self.fit(x)
    return self.transform(x)

transform(self, x)

Scale covariates according to the feature range.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples, n_features) Input data that will be transformed.

required

Returns:

Type Description
ndarray

ndarray with transformed data.

Source code in elapid/features.py
def transform(self, x: ArrayLike) -> np.ndarray:
    """Scale covariates according to the feature range.

    Args:
        x: array-like of shape (n_samples, n_features)
            Input data that will be transformed.

    Returns:
        ndarray with transformed data.
    """
    x = np.array(x)
    xarr = repeat_array(x, len(self.threshold_indices_), axis=-1)
    tarr = repeat_array(self.threshold_indices_.transpose(), len(x), axis=0)
    thresh = (xarr > tarr).reshape(x.shape[0], -1)
    return thresh.astype(np.uint8)

column_product(array)

Computes the column-wise product of a 2D array.

Parameters:

Name Type Description Default
array ndarray

array-like of shape (n_samples, n_features)

required

Returns:

Type Description
ndarray

ndarray with of shape (n_samples, factorial(n_features-1))

Source code in elapid/features.py
def column_product(array: np.ndarray) -> np.ndarray:
    """Computes the column-wise product of a 2D array.

    Args:
        array: array-like of shape (n_samples, n_features)

    Returns:
        ndarray with of shape (n_samples, factorial(n_features-1))
    """
    nrows, ncols = array.shape

    if ncols == 1:
        return array
    else:
        products = []
        for xstart in range(0, ncols - 1):
            products.append(array[:, xstart].reshape(nrows, 1) * array[:, xstart + 1 :])
        return np.concatenate(products, axis=1)

compute_lambdas(y, weights, reg, n_lambdas=100)

Computes lambda parameter values for elastic lasso fits.

Parameters:

Name Type Description Default
y Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples,) with binary presence/background (1/0) values

required
weights Union[numpy.ndarray, pandas.core.frame.DataFrame]

per-sample model weights

required
reg Union[numpy.ndarray, pandas.core.frame.DataFrame]

per-feature regularization coefficients

required
n_lambdas int

number of lambda values to estimate

100

Returns:

Type Description
lambdas

Array of lambda scores of length n_lambda

Source code in elapid/features.py
def compute_lambdas(
    y: ArrayLike, weights: ArrayLike, reg: ArrayLike, n_lambdas: int = MaxentConfig.n_lambdas
) -> np.ndarray:
    """Computes lambda parameter values for elastic lasso fits.

    Args:
        y: array-like of shape (n_samples,) with binary presence/background (1/0) values
        weights: per-sample model weights
        reg: per-feature regularization coefficients
        n_lambdas: number of lambda values to estimate

    Returns:
        lambdas: Array of lambda scores of length n_lambda
    """
    n_presence = np.sum(y)
    mean_regularization = np.mean(reg)
    total_weight = np.sum(weights)
    seed_range = np.linspace(4, 0, n_lambdas)
    lambdas = 10 ** (seed_range) * mean_regularization * (n_presence / total_weight)

    return lambdas

compute_regularization(y, z, feature_labels, beta_multiplier=1.0, beta_lqp=1.0, beta_threshold=1.0, beta_hinge=1.0, beta_categorical=1.0)

Computes variable regularization values for all feature data.

Parameters:

Name Type Description Default
y Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples,) with binary presence/background (1/0) values

required
z ndarray

model features (transformations applied to covariates)

required
feature_labels List[str]

list of length n_features, with labels identifying each column's feature type with options ["linear", "quadratic", "product", "threshold", "hinge", "categorical"]

required
beta_multiplier float

scaler for all regularization parameters. higher values exclude more features

1.0
beta_lqp float

scaler for linear, quadratic and product feature regularization

1.0
beta_threshold float

scaler for threshold feature regularization

1.0
beta_hinge float

scaler for hinge feature regularization

1.0
beta_categorical float

scaler for categorical feature regularization

1.0

Returns:

Type Description
max_reg

Array with per-feature regularization parameters

Source code in elapid/features.py
def compute_regularization(
    y: ArrayLike,
    z: np.ndarray,
    feature_labels: List[str],
    beta_multiplier: float = MaxentConfig.beta_multiplier,
    beta_lqp: float = MaxentConfig.beta_lqp,
    beta_threshold: float = MaxentConfig.beta_threshold,
    beta_hinge: float = MaxentConfig.beta_hinge,
    beta_categorical: float = MaxentConfig.beta_hinge,
) -> np.ndarray:
    """Computes variable regularization values for all feature data.

    Args:
        y: array-like of shape (n_samples,) with binary presence/background (1/0) values
        z: model features (transformations applied to covariates)
        feature_labels: list of length n_features, with labels identifying each column's feature type
            with options ["linear", "quadratic", "product", "threshold", "hinge", "categorical"]
        beta_multiplier: scaler for all regularization parameters. higher values exclude more features
        beta_lqp: scaler for linear, quadratic and product feature regularization
        beta_threshold: scaler for threshold feature regularization
        beta_hinge: scaler for hinge feature regularization
        beta_categorical: scaler for categorical feature regularization

    Returns:
        max_reg: Array with per-feature regularization parameters
    """
    # compute regularization based on presence-only locations
    z1 = z[y == 1]
    nrows, ncols = z1.shape
    labels = np.array(feature_labels)
    nlabels = len(feature_labels)

    assert nlabels == ncols, f"number of feature_labels ({nlabels}) must match number of features ({ncols})"

    # create arrays to store the regularization params
    base_regularization = np.zeros(ncols)
    hinge_regularization = np.zeros(ncols)
    threshold_regularization = np.zeros(ncols)

    # use a different reg table based on the features set
    if "product" in labels:
        table_lqp = RegularizationConfig.product
    elif "quadratic" in labels:
        table_lqp = RegularizationConfig.quadratic
    else:
        table_lqp = RegularizationConfig.linear

    if "linear" in labels:
        linear_idxs = labels == "linear"
        fr_max, fr_min = table_lqp
        multiplier = beta_lqp
        ap = np.interp(nrows, fr_max, fr_min)
        reg = multiplier * ap / np.sqrt(nrows)
        base_regularization[linear_idxs] = reg

    if "quadratic" in labels:
        quadratic_idxs = labels == "quadratic"
        fr_max, fr_min = table_lqp
        multiplier = beta_lqp
        ap = np.interp(nrows, fr_max, fr_min)
        reg = multiplier * ap / np.sqrt(nrows)
        base_regularization[quadratic_idxs] = reg

    if "product" in labels:
        product_idxs = labels == "product"
        fr_max, fr_min = table_lqp
        multiplier = beta_lqp
        ap = np.interp(nrows, fr_max, fr_min)
        reg = multiplier * ap / np.sqrt(nrows)
        base_regularization[product_idxs] = reg

    if "threshold" in labels:
        threshold_idxs = labels == "threshold"
        fr_max, fr_min = RegularizationConfig.threshold
        multiplier = beta_threshold
        ap = np.interp(nrows, fr_max, fr_min)
        reg = multiplier * ap / np.sqrt(nrows)
        base_regularization[threshold_idxs] = reg

        # increase regularization for uniform threshlold values
        all_zeros = np.all(z1 == 0, axis=0)
        all_ones = np.all(z1 == 1, axis=0)
        threshold_regularization[all_zeros] = 1
        threshold_regularization[all_ones] = 1

    if "hinge" in labels:
        hinge_idxs = labels == "hinge"
        fr_max, fr_min = RegularizationConfig.hinge
        multiplier = beta_hinge
        ap = np.interp(nrows, fr_max, fr_min)
        reg = multiplier * ap / np.sqrt(nrows)
        base_regularization[hinge_idxs] = reg

        # increase regularization for extreme hinge values
        hinge_std = np.std(z1[:, hinge_idxs], ddof=1, axis=0)
        hinge_sqrt = np.zeros(len(hinge_std)) + (1 / np.sqrt(nrows))
        std = np.max((hinge_std, hinge_sqrt), axis=0)
        hinge_regularization[hinge_idxs] = (0.5 * std) / np.sqrt(nrows)

    if "categorical" in labels:
        categorical_idxs = labels == "categorical"
        fr_max, fr_min = RegularizationConfig.categorical
        multiplier = beta_categorical
        ap = np.interp(nrows, fr_max, fr_min)
        reg = multiplier * ap / np.sqrt(nrows)
        base_regularization[categorical_idxs] = reg

    # compute the maximum regularization based on a few different approaches
    default_regularization = 0.001 * (np.max(z, axis=0) - np.min(z, axis=0))
    variance_regularization = np.std(z1, ddof=1, axis=0) * base_regularization
    max_regularization = np.max(
        (default_regularization, variance_regularization, hinge_regularization, threshold_regularization), axis=0
    )

    # apply the final scaling factor
    max_regularization *= beta_multiplier

    return max_regularization

compute_weights(y, pbr=100)

Compute Maxent-format per-sample model weights.

Parameters:

Name Type Description Default
y Union[numpy.ndarray, pandas.core.frame.DataFrame]

array-like of shape (n_samples,) with binary presence/background (1/0) values

required
pbr int

presence-to-background weight ratio. pbr=100 sets background samples to 1/100 weight of presence samples.

100

Returns:

Type Description
weights

array with glmnet-formatted sample weights

Source code in elapid/features.py
def compute_weights(y: ArrayLike, pbr: int = 100) -> np.ndarray:
    """Compute Maxent-format per-sample model weights.

    Args:
        y: array-like of shape (n_samples,) with binary presence/background (1/0) values
        pbr: presence-to-background weight ratio. pbr=100 sets background samples to 1/100 weight of presence samples.

    Returns:
        weights: array with glmnet-formatted sample weights
    """
    weights = np.array(y + (1 - y) * pbr)
    return weights

left_hinge(x, mn, mx)

Computes hinge transformation values.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

Array-like of covariate values

required
mn float

Minimum covariate value to fit hinges to

required
mx float

Maximum covariate value to fit hinges to

required

Returns:

Type Description
ndarray

Array of hinge features

Source code in elapid/features.py
def left_hinge(x: ArrayLike, mn: float, mx: float) -> np.ndarray:
    """Computes hinge transformation values.

    Args:
        x: Array-like of covariate values
        mn: Minimum covariate value to fit hinges to
        mx: Maximum covariate value to fit hinges to

    Returns:
        Array of hinge features
    """
    return np.minimum(1, np.maximum(0, (x - mn) / (repeat_array(mx, mn.shape[-1], axis=1) - mn)))

right_hinge(x, mn, mx)

Computes hinge transformation values.

Parameters:

Name Type Description Default
x Union[numpy.ndarray, pandas.core.frame.DataFrame]

Array-like of covariate values

required
mn float

Minimum covariate value to fit hinges to

required
mx float

Maximum covariate value to fit hinges to

required

Returns:

Type Description
ndarray

Array of hinge features

Source code in elapid/features.py
def right_hinge(x: ArrayLike, mn: float, mx: float) -> np.ndarray:
    """Computes hinge transformation values.

    Args:
        x: Array-like of covariate values
        mn: Minimum covariate value to fit hinges to
        mx: Maximum covariate value to fit hinges to

    Returns:
        Array of hinge features
    """
    mn_broadcast = repeat_array(mn, mx.shape[-1], axis=1)
    return np.minimum(1, np.maximum(0, (x - mn_broadcast) / (mx - mn_broadcast)))
Back to top