Broad-Learning-System/BLS.py at main · FamALouiz/Broad-Learning-System · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.utils.extmath import softmax

from BLS_config import BLSConfig

_ACTS = {
    "identity": lambda x: x,
    "tanh": np.tanh,
    "sigmoid": lambda x: 1.0 / (1.0 + np.exp(-x)),
    "relu": lambda x: np.maximum(0.0, x),
}


class BroadLearningSystem:
    """
    Broad Learning System that uses scikit-learn:
      - StandardScaler
      - OneHotEncoder
      - Ridge for closed-form output weights
      - train_test_split helper
    """

    def __init__(self, cfg: BLSConfig):
        """
        Initialize the Broad Learning System.

        Args:
            cfg (BLSConfig): Configuration object containing all hyperparameters
                for the BLS model including feature groups, enhancement groups,
                activation functions, and regularization parameters.
        """
        self.cfg = cfg
        self.rng = np.random.default_rng(cfg.random_state)
        self.scaler = StandardScaler(
            with_mean=True, with_std=True) if cfg.standardize else None
        self.enc = None
        self.is_classification = None
        self.classes_ = None

        self.Wf, self.bf = [], []
        self.We, self.be = [], []
        self.Wout = None
        self.ridge = None

        if cfg.feature_activation not in _ACTS or cfg.enhancement_activation not in _ACTS:
            raise ValueError("Unknown activation name.")
        self.act_f = _ACTS[cfg.feature_activation]
        self.act_e = _ACTS[cfg.enhancement_activation]

    @staticmethod
    def split(X, y, test_size=0.2, random_state=0, stratify=None):
        """
        Split arrays or matrices into random train and test subsets.

        This is a convenience wrapper around sklearn's train_test_split.

        Args:
            X (array-like): Features array of shape (n_samples, n_features).
            y (array-like): Target array of shape (n_samples,) or (n_samples, n_outputs).
            test_size (float, optional): Proportion of dataset to include in test split.
                Defaults to 0.2.
            random_state (int, optional): Random seed for reproducible splits.
                Defaults to 0.
            stratify (array-like, optional): If not None, data is split in a stratified
                fashion using this as class labels. Defaults to None.

        Returns:
            tuple: X_train, X_test, y_train, y_test arrays.
        """
        return train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=stratify)

    # ---------- internal ----------
    def _init_groups(self, in_dim):
        """
        Initialize feature and enhancement groups with random weights and biases.

        Creates random weight matrices and bias vectors for both feature mapping
        groups and enhancement groups. Feature groups map input to feature space,
        while enhancement groups map feature outputs to enhancement space.

        Args:
            in_dim (int): Input dimension for feature groups.
        """
        self.Wf, self.bf = [], []
        for _ in range(self.cfg.n_feature_groups):
            W = self.rng.normal(0.0, 1.0, size=(
                in_dim, self.cfg.feature_group_size))
            b = self.rng.normal(0.0, 0.1, size=(self.cfg.feature_group_size,))
            self.Wf.append(W)
            self.bf.append(b)

        total_feature_nodes = self.cfg.n_feature_groups * self.cfg.feature_group_size
        self.We, self.be = [], []
        for _ in range(self.cfg.n_enhancement_groups):
            W = self.rng.normal(0.0, 1.0, size=(
                total_feature_nodes, self.cfg.enhancement_group_size))
            b = self.rng.normal(0.0, 0.1, size=(
                self.cfg.enhancement_group_size,))
            self.We.append(W)
            self.be.append(b)

    def _map_features(self, X):
        """
        Map input data through feature groups using random weights and activation functions.

        Args:
            X (np.ndarray): Input data of shape (n_samples, n_features).

        Returns:
            np.ndarray: Feature mappings of shape (n_samples, n_feature_groups * feature_group_size).
        """
        feats = [self.act_f(X @ W + b) for W, b in zip(self.Wf, self.bf)]
        return np.concatenate(feats, axis=1) if feats else np.empty((X.shape[0], 0))

    def _map_enhancements(self, F):
        """
        Map feature outputs through enhancement groups using random weights and activation functions.

        Args:
            F (np.ndarray): Feature mappings of shape (n_samples, n_feature_nodes).

        Returns:
            np.ndarray: Enhancement mappings of shape (n_samples, n_enhancement_groups * enhancement_group_size).
        """
        enh = [self.act_e(F @ W + b) for W, b in zip(self.We, self.be)]
        return np.concatenate(enh, axis=1) if enh else np.empty((F.shape[0], 0))

    def _design(self, X):
        """
        Create the design matrix by concatenating feature and enhancement mappings.

        Applies feature mapping, enhancement mapping, and optionally adds bias term
        to create the final design matrix used for output weight learning.

        Args:
            X (np.ndarray): Input data of shape (n_samples, n_features).

        Returns:
            np.ndarray: Design matrix of shape (n_samples, total_nodes + bias).
        """
        F = self._map_features(X)
        E = self._map_enhancements(F)
        H = np.concatenate([F, E], axis=1) if E.size else F
        if self.cfg.add_bias:
            H = np.concatenate([H, np.ones((H.shape[0], 1))], axis=1)
        return H

    def fit(self, X, y):
        """
        Train the Broad Learning System on the given data.

        Fits the model by standardizing input, initializing random groups,
        preparing target encoding (for classification), creating design matrix,
        and solving for optimal output weights using Ridge regression.

        Args:
            X (array-like): Training features of shape (n_samples, n_features).
            y (array-like): Training targets of shape (n_samples,) or (n_samples, n_outputs).

        Returns:
            BroadLearningSystem: Returns self for method chaining.
        """
        X = np.asarray(X, dtype=float)

        if self.scaler:
            Xs = self.scaler.fit_transform(X)
        else:
            Xs = X

        self._init_groups(Xs.shape[1])

        y = np.asarray(y)
        if y.ndim == 1 or (y.ndim == 2 and y.shape[1] == 1):
            self.is_classification = True
            y = y.reshape(-1, 1)
            self.enc = OneHotEncoder(
                sparse_output=False, handle_unknown="ignore")
            T = self.enc.fit_transform(y)
            self.classes_ = self.enc.categories_[0]
        else:
            self.is_classification = False
            T = y.astype(float)

        H = self._design(Xs)
        self.ridge = Ridge(alpha=self.cfg.lambda_reg,
                           fit_intercept=False, random_state=self.cfg.random_state)
        self.ridge.fit(H, T)
        self.Wout = self.ridge.coef_.T
        return self

    def _forward(self, X):
        """
        Perform forward pass through the trained BLS model.

        Args:
            X (np.ndarray): Input data of shape (n_samples, n_features).

        Returns:
            np.ndarray: Model outputs of shape (n_samples, n_outputs).
        """
        if self.scaler:
            X = self.scaler.transform(X)
        H = self._design(X)
        return H @ self.Wout

    def predict(self, X):
        """
        Make predictions on new data.

        For classification tasks, returns predicted class labels.
        For regression tasks, returns predicted continuous values.

        Args:
            X (array-like): Input features of shape (n_samples, n_features).

        Returns:
            np.ndarray: Predictions of shape (n_samples,) for classification or
                       (n_samples, n_outputs) for regression.
        """
        X = np.asarray(X, dtype=float)
        Y = self._forward(X)
        if self.is_classification:
            idx = np.argmax(Y, axis=1)
            return self.classes_[idx]
        return Y

    def predict_proba(self, X):
        """
        Compute class probabilities for input samples.

        Only available for classification tasks. Uses softmax to convert
        logits to probability distributions.

        Args:
            X (array-like): Input features of shape (n_samples, n_features).

        Returns:
            np.ndarray: Class probabilities of shape (n_samples, n_classes).

        Raises:
            ValueError: If called on a regression model.
        """
        if not self.is_classification:
            raise ValueError("predict_proba only for classification.")
        X = np.asarray(X, dtype=float)
        logits = self._forward(X)
        return softmax(logits)

    def add_feature_groups(self, k):
        """
        Add additional feature groups to expand the network breadth.

        This method supports incremental learning by adding new feature mapping
        groups without retraining the entire model. The output weights will need
        to be refitted after expansion.

        Args:
            k (int): Number of feature groups to add.

        Raises:
            RuntimeError: If called before the model is fitted.
        """
        in_dim = self.scaler.mean_.shape[0] if self.scaler else None
        if in_dim is None:
            raise RuntimeError("Model must be fitted before adding groups.")
        for _ in range(k):
            W = self.rng.normal(0.0, 1.0, size=(
                in_dim, self.cfg.feature_group_size))
            b = self.rng.normal(0.0, 0.1, size=(self.cfg.feature_group_size,))
            self.Wf.append(W)
            self.bf.append(b)

    def add_enhancement_groups(self, k):
        """
        Add additional enhancement groups to expand the network breadth.

        This method supports incremental learning by adding new enhancement
        groups that operate on the current feature space. The output weights
        will need to be refitted after expansion.

        Args:
            k (int): Number of enhancement groups to add.
        """
        total_feature_nodes = len(self.Wf) * self.cfg.feature_group_size
        for _ in range(k):
            W = self.rng.normal(0.0, 1.0, size=(
                total_feature_nodes, self.cfg.enhancement_group_size))
            b = self.rng.normal(0.0, 0.1, size=(
                self.cfg.enhancement_group_size,))
            self.We.append(W)
            self.be.append(b)

    def refit_output(self, X, y):
        """
        Refit only the output weights using the current network architecture.

        This method is efficient for updating the model after adding new feature
        or enhancement groups, as it only recomputes the final Ridge regression
        without reinitializing the random groups.

        Args:
            X (array-like): Training features of shape (n_samples, n_features).
            y (array-like): Training targets of shape (n_samples,) or (n_samples, n_outputs).

        Returns:
            BroadLearningSystem: Returns self for method chaining.
        """
        X = np.asarray(X, dtype=float)
        if self.scaler:
            X = self.scaler.transform(X)

        y = np.asarray(y)
        if self.is_classification:
            T = self.enc.transform(y.reshape(-1, 1))
        else:
            T = y.astype(float)

        H = self._design(X)
        self.ridge.fit(H, T)
        self.Wout = self.ridge.coef_.T
        return self

    def extract_features(self, X):
        """
        Extract the intermediate feature representation from the BLS network.

        Returns the design matrix (concatenated feature and enhancement mappings)
        without applying the final output weights. Useful for feature extraction
        and as input to other models like LSTMs.

        Args:
            X (array-like): Input features of shape (n_samples, n_features).

        Returns:
            np.ndarray: Feature representation of shape (n_samples, total_nodes + bias).
        """
        X = np.asarray(X, dtype=float)
        if self.scaler:
            X = self.scaler.transform(X)
        return self._design(X)