Source code for coonfit.helper

"""
Utility functions for the coonfit regression workflow.

This module provides supporting functions used during predictor validation and
data quality assessment. It includes tools for detecting rank-deficient
predictor matrices (which would cause the normal equations to be singular) and
for counting the number of usable pixels within a boolean selector mask.
"""

from __future__ import annotations
import numpy as np


[docs] def check_rank_deficiency(array: np.ndarray, return_by_issue_type: bool = False, ) -> dict[int, str] | dict[str, list[int]]: """Check if matrix is rank deficient and identify problematic columns. Returns a dictionary with column indices (key) and issue description (value). An empty dictionary indicates that no rank deficiency was detected. Parameters ---------- array : NDArray Matrix to check for rank deficiency return_by_issue_type : bool, optional If True, returns nested dictionary separating issues by type: {"linear_dependent": [...], "all_zero": [...]} Returns ------- dict[int, str] or dict[str, list[int]] Problematic columns and their issues. Uses :func:`numpy.linalg.matrix_rank` to determine the rank of the array. See Also -------- :func:`~coonfit.parallel.get_XT_X_dependency` : Check predictors for linear dependency. """ all_zero_cols = {} rank_deficient_cols = {} _, num_columns = array.shape rank = np.linalg.matrix_rank(array) if rank == num_columns: return dict() for col in range(num_columns): column_vector = array[:, col] if np.all(column_vector == 0): all_zero_cols[col] = "All zero column" else: # drop focus column sub_array = np.delete(array, col, axis=1) # does removing a column increase the rank? if np.linalg.matrix_rank(sub_array) == rank: rank_deficient_cols[col] = "Linear dependent column" if return_by_issue_type: return dict(linear_dependent=[l for l in rank_deficient_cols.keys()], all_zero=[z for z in all_zero_cols.keys()]) else: return {**rank_deficient_cols, **all_zero_cols}
[docs] def usable_pixels_info(all_pixels: int, data_pixels: int) -> None: """Print the fraction of usable pixels. Parameters ---------- all_pixels : int Total number of pixels in the dataset data_pixels : int Number of pixels that contain usable data See Also -------- :func:`usable_pixels_count` : Count the number of usable pixels. Examples -------- >>> usable_pixels_info(1000, 750) Of all_pixels=1000 there are data_pixels=750, i.e. 75.0% are usable """ print(f"Of {all_pixels=} there are {data_pixels=}, i.e. " f"{round(100 * data_pixels/all_pixels, 2)}% are usable")
[docs] def usable_pixels_count(selector): """Count the number of usable pixels determined by the selector. Parameters ---------- selector : NDArray Boolean array where True indicates a usable pixel and False indicates a pixel to be excluded Returns ------- int Number of True values in the selector array (count of usable pixels). Uses :func:`numpy.unique` to count occurrences. See Also -------- :func:`usable_pixels_info` : Print the fraction of usable pixels. Examples -------- >>> selector = np.array([True, True, False, True, False]) >>> usable_pixels_count(selector) 3 """ vals, counts = np.unique(selector, return_counts=True) # vals: [True, False] or inv. in any case ok try: return int(counts[vals][0]) except IndexError: return 0