Source code for submarit.core.base

"""Base classes for SUBMARIT core functionality."""

from abc import ABC, abstractmethod
from typing import Any, Dict, Optional, Tuple

import numpy as np
from numpy.typing import NDArray


[docs] class BaseEstimator(ABC): """Base class for all estimators in SUBMARIT. All estimators should specify all the parameters that can be set at the class level in their __init__ as explicit keyword arguments. """ def __init__(self): """Initialize the base estimator.""" pass
[docs] def get_params(self, deep: bool = True) -> Dict[str, Any]: """Get parameters for this estimator. Args: deep: If True, will return parameters for sub-objects Returns: Parameter names mapped to their values """ params = {} for key in dir(self): if not key.startswith('_') and not callable(getattr(self, key)): params[key] = getattr(self, key) return params
[docs] def set_params(self, **params) -> "BaseEstimator": """Set parameters for this estimator. Args: **params: Estimator parameters Returns: Self """ for key, value in params.items(): setattr(self, key, value) return self
[docs] class BaseClusterer(BaseEstimator): """Base class for clustering algorithms.""" def __init__(self, n_clusters: int = 2, random_state: Optional[int] = None): """Initialize the base clusterer. Args: n_clusters: Number of clusters random_state: Random seed for reproducibility """ super().__init__() self.n_clusters = n_clusters self.random_state = random_state self._labels = None self._n_iter = None
[docs] @abstractmethod def fit(self, X: NDArray[np.float64]) -> "BaseClusterer": """Fit the clustering model. Args: X: Input data matrix Returns: Self """ pass
[docs] def predict(self, X: NDArray[np.float64]) -> NDArray[np.int64]: """Predict cluster labels. Args: X: Input data matrix Returns: Cluster labels """ if self._labels is None: raise ValueError("Model must be fitted before prediction") return self._labels
[docs] def fit_predict(self, X: NDArray[np.float64]) -> NDArray[np.int64]: """Fit the model and predict cluster labels. Args: X: Input data matrix Returns: Cluster labels """ self.fit(X) return self._labels
@property def labels_(self) -> Optional[NDArray[np.int64]]: """Get cluster labels.""" return self._labels @property def n_iter_(self) -> Optional[int]: """Get number of iterations.""" return self._n_iter
[docs] class BaseEvaluator(BaseEstimator): """Base class for cluster evaluation metrics."""
[docs] @abstractmethod def evaluate( self, X: NDArray[np.float64], labels: NDArray[np.int64] ) -> Dict[str, float]: """Evaluate clustering results. Args: X: Input data matrix labels: Cluster labels Returns: Dictionary of evaluation metrics """ pass
[docs] class BaseValidator(BaseEstimator): """Base class for validation methods."""
[docs] @abstractmethod def validate( self, X: NDArray[np.float64], clusterer: BaseClusterer, **kwargs ) -> Dict[str, Any]: """Validate clustering results. Args: X: Input data matrix clusterer: Clustering algorithm instance **kwargs: Additional validation parameters Returns: Validation results """ pass
[docs] class ClusteringResult: """Container for clustering results.""" def __init__( self, labels: NDArray[np.int64], log_likelihood: float, n_iter: int, converged: bool, metadata: Optional[Dict[str, Any]] = None ): """Initialize clustering result. Args: labels: Cluster assignments log_likelihood: Final log-likelihood value n_iter: Number of iterations converged: Whether algorithm converged metadata: Additional metadata """ self.labels = labels self.log_likelihood = log_likelihood self.n_iter = n_iter self.converged = converged self.metadata = metadata or {} def __repr__(self) -> str: """String representation.""" return ( f"ClusteringResult(n_clusters={len(np.unique(self.labels))}, " f"log_likelihood={self.log_likelihood:.4f}, " f"n_iter={self.n_iter}, converged={self.converged})" )
[docs] class EvaluationResult: """Container for evaluation results.""" def __init__( self, log_likelihood: float, z_score: float, diff_value: float, p_value: Optional[float] = None, metadata: Optional[Dict[str, Any]] = None ): """Initialize evaluation result. Args: log_likelihood: Log-likelihood value z_score: Z-score statistic diff_value: Difference metric p_value: P-value if available metadata: Additional metadata """ self.log_likelihood = log_likelihood self.z_score = z_score self.diff_value = diff_value self.p_value = p_value self.metadata = metadata or {} def __repr__(self) -> str: """String representation.""" return ( f"EvaluationResult(log_likelihood={self.log_likelihood:.4f}, " f"z_score={self.z_score:.4f}, diff_value={self.diff_value:.4f})" )