Skip to content
Open
6 changes: 3 additions & 3 deletions chemotools/_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,12 @@ def get_model_parameters(estimator: EstimatorType) -> Tuple[int, int, int]:
If *estimator* is neither ``_BasePCA`` nor ``_PLS``.
"""
if isinstance(estimator, _BasePCA):
return estimator.n_features_in_, estimator.n_components_, estimator.n_samples_ # type: ignore[unresolved-attribute] # sklearn fitted attributes
return estimator.n_features_in_, estimator.n_components_, estimator.n_samples_ # type: ignore[ty:unresolved-attribute] # sklearn fitted attributes
if isinstance(estimator, _PLS):
return (
estimator.n_features_in_, # type: ignore[unresolved-attribute] # sklearn fitted attribute
estimator.n_features_in_, # type: ignore[ty:unresolved-attribute] # sklearn fitted attribute
estimator.n_components,
len(estimator.x_scores_), # type: ignore[unresolved-attribute] # sklearn fitted attribute
len(estimator.x_scores_), # type: ignore[ty:unresolved-attribute] # sklearn fitted attribute
)
raise TypeError(
f"Cannot extract parameters from {type(estimator).__name__}. "
Expand Down
7 changes: 3 additions & 4 deletions chemotools/derivative/_norris_william.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# License: MIT

from numbers import Integral
from typing import Literal

import numpy as np
from scipy.ndimage import convolve1d
Expand Down Expand Up @@ -77,9 +78,7 @@ class NorrisWilliams(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
"window_length": [Interval(Integral, 3, None, closed="left")],
"gap_size": [Interval(Integral, 1, None, closed="left")],
"deriv": [Interval(Integral, 1, 2, closed="both")],
"mode": [
StrOptions({"nearest", "constant", "reflect", "wrap", "mirror", "interp"})
],
"mode": [StrOptions({"nearest", "constant", "reflect", "wrap", "mirror"})],
"window_size": [
Interval(Integral, 3, None, closed="left"),
deprecated_parameter_constraint(),
Expand All @@ -95,7 +94,7 @@ def __init__(
window_length: int = 5,
gap_size: int = 3,
deriv: int = 1,
mode="nearest",
mode: Literal["nearest", "constant", "reflect", "wrap", "mirror"] = "nearest",
window_size=DEPRECATED_PARAMETER,
derivative_order=DEPRECATED_PARAMETER,
):
Expand Down
89 changes: 89 additions & 0 deletions chemotools/domain_adaption/_direct_standardization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""
The :mod:'chemotools.domain_adaption:DirectStandardization'
module implements a Direct Standardization transformer
"""

# Authors: Ruggero Guerrini
# License: MIT

import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin


class DirectStandardization(BaseEstimator, TransformerMixin):
"""
Description
-----------
Implement a direct standardization transformer for the calibration
transfer application.
X_master contains the reference measurements acquired
on the master instrument.
X_slave contains the corresponding measurements of the same samples
acquired on the slave instrument.
The transformer estimates a mapping from the slave space to
the master space.
After fitting, new X_slave spectra can be transformed into
the X_master space.

Parameters
----------
None

Attributes
----------
T : np.ndarray of shape (n_features, n_features)
The pxp matrix that solver the problem X_slave T = X_master
using the method of least squares

Examples
--------
X_slave = np.random.randn((100,50))
X_portbale = X_slave*2+5
DS = DirectStandardization().fit(X_slave,X_master)
X_slave_transf = DS.transform(X_slave)
Comment on lines +1 to +43
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The module/class docstrings have several documentation issues: the Sphinx role markup in the module docstring is malformed, and the example code contains typos and invalid code (e.g., np.random.randn((100,50)) and an undefined X_master). Please fix the markup and make the example runnable so it can be used in generated docs.

Copilot uses AI. Check for mistakes.

"""

def __init__(self):
pass

def fit(self, X_slave: np.ndarray, X_master: np.ndarray) -> "DirectStandardization":
"""
Fit the DirectStandardization to the input data.

Parameters
----------
X_slave : np.ndarray of shape (n_samples, n_features)
The slave data
X_master : np.ndarray of shape (n_samples, n_features)
The master data

Returns
-------
self : DirectStandardization
The fitted model.
"""
X_slave = np.asarray(X_slave, dtype=float)
X_master = np.asarray(X_master, dtype=float)
Comment on lines +40 to +67
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fit() currently takes (X_slave, X_master) instead of the scikit-learn convention (X, y=None). If this transformer is meant to be sklearn-compatible (per the test/docstring), consider using fit(self, X, y) where y is the master spectra, so utilities like check_estimator, pipelines, and meta-estimators work as expected.

Suggested change
X_slave = np.random.randn((100,50))
X_portbale = X_slave*2+5
DS = DirectStandardization().fit(X_slave,X_master)
X_slave_transf = DS.transform(X_slave)
"""
def __init__(self):
pass
def fit(self, X_slave: np.ndarray, X_master: np.ndarray) -> "DirectStandardization":
"""
Fit the DirectStandardization to the input data.
Parameters
----------
X_slave : np.ndarray of shape (n_samples, n_features)
The slave data
X_master : np.ndarray of shape (n_samples, n_features)
The master data
Returns
-------
self : DirectStandardization
The fitted model.
"""
X_slave = np.asarray(X_slave, dtype=float)
X_master = np.asarray(X_master, dtype=float)
X_slave = np.random.randn(100, 50)
X_master = X_slave * 2 + 5
DS = DirectStandardization().fit(X_slave, X_master)
X_slave_transf = DS.transform(X_slave)
"""
def __init__(self):
self.T = None
def fit(self, X: np.ndarray, y: np.ndarray = None) -> "DirectStandardization":
"""
Fit the DirectStandardization to the input data.
Parameters
----------
X : np.ndarray of shape (n_samples, n_features)
The slave data.
y : np.ndarray of shape (n_samples, n_features), default=None
The master data corresponding to ``X``.
Returns
-------
self : DirectStandardization
The fitted model.
"""
if y is None:
raise ValueError("y must be provided and contain the master spectra")
X_slave = np.asarray(X, dtype=float)
X_master = np.asarray(y, dtype=float)

Copilot uses AI. Check for mistakes.
if X_master.shape != X_slave.shape:
raise ValueError("master and slave must have the same dimensions")
self.T = np.linalg.pinv(X_slave) @ X_master
return self
Comment on lines +66 to +71
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For sklearn compatibility, fit()/transform() should use validate_data(...) to enforce 2D numeric input and set n_features_in_, and fitted attributes should follow the trailing-underscore convention (e.g., T_ instead of T). As written, DirectStandardization won't satisfy sklearn's estimator checks and may accept inputs that later fail at @ multiplication.

Copilot uses AI. Check for mistakes.

def transform(self, X_slave) -> np.ndarray:
"""
Transform the slave data

Parameters
----------
X_slave : np.ndarray of shape (n_samples, n_features)
The input data to transform

Returns
-------
X_transf : np.ndarray of shape (n_samples, n_features)
The data transformed
"""
if self.T is None:
raise RuntimeError("Model not trained. Call train() first.")
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error message in transform() says "Call train() first", but the public API is fit(). This is confusing for users and makes debugging harder; update the message (and consider raising the standard NotFittedError used by scikit-learn).

Copilot uses AI. Check for mistakes.
return X_slave @ self.T
Comment on lines +87 to +89
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

transform() checks if self.T is None, but self.T is never initialized in __init__, so calling transform() before fit() will raise AttributeError instead of the intended error. Use sklearn.utils.validation.check_is_fitted (and a fitted attribute like T_) or initialize the attribute in __init__ and check with hasattr/check_is_fitted.

Copilot uses AI. Check for mistakes.
2 changes: 1 addition & 1 deletion chemotools/feature_selection/_index_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def _get_support_mask(self):
check_is_fitted(self)

# Create the mask
mask = np.zeros(self.n_features_in_, dtype=bool) # type: ignore[unresolved-attribute] # sklearn fitted attribute
mask = np.zeros(self.n_features_in_, dtype=bool) # type: ignore[ty:unresolved-attribute] # sklearn fitted attribute
mask[self.features_index_] = True

return mask
2 changes: 1 addition & 1 deletion chemotools/feature_selection/_range_cut.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def _get_support_mask(self):
check_is_fitted(self, ["start_index_", "end_index_"])

# Create the mask
mask = np.zeros(self.n_features_in_, dtype=bool) # type: ignore[unresolved-attribute] # sklearn fitted attribute
mask = np.zeros(self.n_features_in_, dtype=bool) # type: ignore[ty:unresolved-attribute] # sklearn fitted attribute
mask[self.start_index_ : self.end_index_] = True

return mask
4 changes: 2 additions & 2 deletions chemotools/feature_selection/_vip_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,14 +144,14 @@ def _calculate_features(self, X: np.ndarray) -> np.ndarray:
np.linalg.norm(self.estimator_.y_loadings_, ord=2, axis=0) ** 2
)
sum_of_squares_x_scores = (
np.linalg.norm(self.estimator_.x_scores_, ord=2, axis=0) ** 2 # type: ignore[unresolved-attribute] # sklearn fitted attribute
np.linalg.norm(self.estimator_.x_scores_, ord=2, axis=0) ** 2 # type: ignore[ty:unresolved-attribute] # sklearn fitted attribute
)

# Calculate the sum of squares
sum_of_squares = sum_of_squares_y_loadings * sum_of_squares_x_scores

# Calculate the numerator
numerator = self.estimator_.n_features_in_ * np.sum( # type: ignore[unresolved-attribute] # sklearn fitted attribute
numerator = self.estimator_.n_features_in_ * np.sum( # type: ignore[ty:unresolved-attribute] # sklearn fitted attribute
sum_of_squares * self.estimator_.x_weights_**2,
axis=1,
)
Expand Down
4 changes: 2 additions & 2 deletions chemotools/inspector/_pca_inspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ def get_loadings(
loadings : ndarray of shape (n_features, n_components_selected)
PCA loadings (components transposed)
"""
loadings = self.estimator.components_.T # type: ignore[unresolved-attribute] # sklearn fitted attribute
loadings = self.estimator.components_.T # type: ignore[ty:unresolved-attribute] # sklearn fitted attribute
return select_components(loadings, components)

# ------------------------------------------------------------------
Expand All @@ -318,7 +318,7 @@ def get_explained_variance_ratio(self) -> np.ndarray:
explained_variance_ratio : ndarray of shape (n_components,)
Explained variance ratio
"""
return self.estimator.explained_variance_ratio_ # type: ignore[unresolved-attribute] # sklearn fitted attribute
return self.estimator.explained_variance_ratio_ # type: ignore[ty:unresolved-attribute] # sklearn fitted attribute

# ------------------------------------------------------------------
# Main inspection method
Expand Down
2 changes: 1 addition & 1 deletion chemotools/inspector/_preprocessing_inspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ def inspect(
):
# Apply only this step's transform to the previous cumulative output
for ds_name in datasets:
cumulative[ds_name] = _step_transformer.transform(cumulative[ds_name]) # type: ignore[union-attr]
cumulative[ds_name] = _step_transformer.transform(cumulative[ds_name]) # type: ignore[ty:unresolved-attribute]

# Cumulative step label for the title/key
latest_step_type = type(_step_transformer).__name__
Expand Down
2 changes: 1 addition & 1 deletion chemotools/inspector/core/latent.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def get_latent_loadings(self) -> np.ndarray: # pragma: no cover
# Private methods
# ------------------------------------------------------------------
def _latent_inspector(self) -> "_LatentInspectorProto":
return self # type: ignore[return-value]
return self # type: ignore[ty:invalid-return-type]

def _get_latent_component_label(self) -> str:
return getattr(self, "component_label", "LV")
Expand Down
2 changes: 1 addition & 1 deletion chemotools/inspector/core/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def R2_val(self) -> Optional[float]:
# Private methods
# ------------------------------------------------------------------
def _regression_inspector(self) -> "_RegressionInspectorProto":
return self # type: ignore[return-value]
return self # type: ignore[ty:invalid-return-type]

def _regression_dataset_exists(self, dataset: str) -> bool:
inspector = self._regression_inspector()
Expand Down
2 changes: 1 addition & 1 deletion chemotools/inspector/core/spectra.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def __init_subclass__(cls, **kwargs: object) -> None:
# Private methods
# ------------------------------------------------------------------
def _spectra_inspector(self) -> "_SpectraInspectorProto":
return self # type: ignore[return-value]
return self # type: ignore[ty:invalid-return-type]

# ------------------------------------------------------------------
# Public API
Expand Down
4 changes: 2 additions & 2 deletions chemotools/outliers/_hotelling_t2.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,10 @@ def _fit_residuals(self, X: np.ndarray, y: Optional[np.ndarray]) -> None:
def _compute_residuals(self, X: np.ndarray, y: Optional[np.ndarray]) -> np.ndarray:
"""Calculate Hotelling's T-squared statistics for input data."""
if isinstance(self.estimator_, _BasePCA):
variances = self.estimator_.explained_variance_ # type: ignore[unresolved-attribute]
variances = self.estimator_.explained_variance_ # type: ignore[ty:unresolved-attribute]

if isinstance(self.estimator_, _PLS):
variances = np.var(self.estimator_.x_scores_, axis=0) # type: ignore[unresolved-attribute]
variances = np.var(self.estimator_.x_scores_, axis=0) # type: ignore[ty:unresolved-attribute]

X_transformed = self.estimator_.transform(X)
return np.sum((X_transformed**2) / variances, axis=1)
10 changes: 6 additions & 4 deletions chemotools/smooth/_mean_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# License: MIT

from numbers import Integral
from typing import Literal

import numpy as np
from scipy.ndimage import uniform_filter1d
Expand Down Expand Up @@ -57,17 +58,18 @@ class MeanFilter(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):

_parameter_constraints: dict = {
"window_length": [Interval(Integral, 3, None, closed="left")],
"mode": [
StrOptions({"nearest", "constant", "reflect", "wrap", "mirror", "interp"})
],
"mode": [StrOptions({"nearest", "constant", "reflect", "wrap", "mirror"})],
"window_size": [
Interval(Integral, 3, None, closed="left"),
deprecated_parameter_constraint(),
],
}

def __init__(
self, window_length: int = 3, mode="nearest", window_size=DEPRECATED_PARAMETER
self,
window_length: int = 3,
mode: Literal["nearest", "constant", "reflect", "wrap", "mirror"] = "nearest",
window_size=DEPRECATED_PARAMETER,
Comment on lines 59 to +72
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MeanFilter no longer allows mode="interp" (constraints/type now exclude it), but the docstring for mode still lists "interp" as a supported option. Please update the docstring to match the actual accepted values so users don't pass a mode that will now be rejected.

Copilot uses AI. Check for mistakes.
) -> None:
self.window_length = window_length
self.window_size = window_size
Expand Down
45 changes: 45 additions & 0 deletions tests/domain_adaption/test_direct_standardization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""
Test for DirectStandardization
"""

# Authors: Ruggero Guerrini
# License: MIT
import numpy as np

from chemotools.domain_adaption._direct_standardization import (
DirectStandardization,
)


def data_diff(dataset_ref, dataset_test):
diff_norm = np.linalg.norm(dataset_ref - dataset_test)
ref_norm = np.linalg.norm(dataset_ref)
difference = diff_norm / ref_norm
return difference


class Test_Direct_Standardization:
"""
Test that enhanced Direct Standardization maintains sklearn API compatibility
"""

def test_shape_consistency_and_improvement(self):
# Arrange - I create a slave linked to my mster
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo in comment: "mster" should be "master".

Suggested change
# Arrange - I create a slave linked to my mster
# Arrange - I create a slave linked to my master

Copilot uses AI. Check for mistakes.
np.random.seed(17)
X_master = np.random.rand(100, 50)
X_slave = 1.2 * X_master + 0.01 * np.random.randn(100, 50)

# Fit model
model = DirectStandardization().fit(X_slave, X_master)

Comment on lines +21 to +34
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test claims sklearn API compatibility, but it doesn't run sklearn.utils.estimator_checks.check_estimator like other transformer tests in this repo, and it imports from a private module path. Consider adding a check_estimator(DirectStandardization()) test (or adjusting the estimator to pass) and importing from the public package API to keep tests consistent and catch API regressions.

Copilot uses AI. Check for mistakes.
# Act
X_transformed = model.transform(X_slave)

# Assert
assert X_transformed.shape == X_slave.shape
assert X_transformed.shape == X_master.shape
assert model.T.shape == (X_slave.shape[1], X_master.shape[1])
# Test to verify that the difference is smaller with the transfer model
before = data_diff(X_master, X_slave)
after = data_diff(X_master, X_transformed)
assert before > after
Loading