quant-iota commited on
Commit
190b350
·
verified ·
1 Parent(s): b7399ed

Upload 11 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/MNIST/raw/t10k-images-idx3-ubyte filter=lfs diff=lfs merge=lfs -text
37
+ data/MNIST/raw/train-images-idx3-ubyte filter=lfs diff=lfs merge=lfs -text
2503.13942v1.py ADDED
@@ -0,0 +1,429 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Course 1 - Foundation of SKA
2
+ import torch
3
+ import torch.nn as nn
4
+ import numpy as np
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
+ from torchvision import datasets, transforms
8
+ from torch.utils.data import DataLoader, TensorDataset
9
+ import time
10
+ import pandas as pd
11
+
12
+ # Set random seed for reproducibility
13
+ torch.manual_seed(42)
14
+ np.random.seed(42)
15
+
16
+ # Load the pre-saved MNIST subset (100 samples per class)
17
+ mnist_subset = torch.load("mnist_subset_100_per_class.pt")
18
+ images = torch.stack([item[0] for item in mnist_subset]) # Shape: [1000, 1, 28, 28]
19
+ labels = torch.tensor([item[1] for item in mnist_subset])
20
+
21
+ # Prepare the dataset (single batch for SKA forward learning)
22
+ inputs = images # No mini-batches, full dataset used for forward-only updates
23
+
24
+ # Define the SKA model with 4 layers
25
+ class SKAModel(nn.Module):
26
+ def __init__(self, input_size=784, layer_sizes=[256, 128, 64, 10], K=50):
27
+ super(SKAModel, self).__init__()
28
+ self.input_size = input_size
29
+ self.layer_sizes = layer_sizes
30
+ self.K = K # Number of forward steps
31
+
32
+ # Initialize weights and biases as nn.ParameterList
33
+ self.weights = nn.ParameterList()
34
+ self.biases = nn.ParameterList()
35
+ prev_size = input_size
36
+ for size in layer_sizes:
37
+ self.weights.append(nn.Parameter(torch.randn(prev_size, size) * 0.01))
38
+ self.biases.append(nn.Parameter(torch.zeros(size)))
39
+ prev_size = size
40
+
41
+ # Tracking tensors for knowledge accumulation and entropy computation
42
+ self.Z = [None] * len(layer_sizes) # Knowledge tensors per layer
43
+ self.Z_prev = [None] * len(layer_sizes) # Previous knowledge tensors
44
+ self.D = [None] * len(layer_sizes) # Decision probability tensors
45
+ self.D_prev = [None] * len(layer_sizes) # Previous decisions for computing shifts
46
+ self.delta_D = [None] * len(layer_sizes) # Decision shifts per step
47
+ self.entropy = [None] * len(layer_sizes) # Layer-wise entropy storage
48
+
49
+ # Store entropy, cosine, and output distribution history for visualization
50
+ self.entropy_history = [[] for _ in range(len(layer_sizes))]
51
+ self.cosine_history = [[] for _ in range(len(layer_sizes))]
52
+ self.output_history = [] # Store mean output distribution (10 classes) per step
53
+
54
+ # Store Frobenius norms for each layer per forward step
55
+ self.frobenius_history = [[] for _ in range(len(layer_sizes))]
56
+ # Store Frobenius norms for each layer's weight matrix W per forward step
57
+ self.weight_frobenius_history = [[] for _ in range(len(layer_sizes))]
58
+
59
+ # Store Tensor Net history and total
60
+ self.net_history = [[] for _ in range(len(layer_sizes))] # Per-step history
61
+ self.tensor_net_total = [0.0] * len(layer_sizes) # Cumulative total over K
62
+
63
+ def forward(self, x):
64
+ """Computes SKA forward pass, storing knowledge and decisions."""
65
+ batch_size = x.shape[0]
66
+ x = x.view(batch_size, -1) # Flatten images
67
+
68
+ for l in range(len(self.layer_sizes)):
69
+ # Compute knowledge tensor Z = Wx + b
70
+ z = torch.mm(x, self.weights[l]) + self.biases[l]
71
+ # Compute and store Frobenius norm of z
72
+ frobenius_norm = torch.norm(z, p='fro')
73
+ self.frobenius_history[l].append(frobenius_norm.item())
74
+ # Apply sigmoid activation to get decision probabilities
75
+ d = torch.sigmoid(z)
76
+ # Store values for entropy computation
77
+ self.Z[l] = z
78
+ self.D[l] = d
79
+ x = d # Output becomes input for the next layer
80
+ return x
81
+
82
+ def calculate_entropy(self):
83
+ """Computes entropy reduction, cos(theta), and Tensor Net per layer."""
84
+ total_entropy = 0
85
+ for l in range(len(self.layer_sizes)):
86
+ if self.Z[l] is not None and self.D_prev[l] is not None and self.D[l] is not None and self.Z_prev[l] is not None:
87
+ # Compute decision shifts (for entropy)
88
+ self.delta_D[l] = self.D[l] - self.D_prev[l]
89
+ # Compute delta Z (for Tensor Net)
90
+ delta_Z = self.Z[l] - self.Z_prev[l]
91
+
92
+ # Compute H_lk as a tensor (element-wise dot product, same shape as D)
93
+ H_lk = (-1 / np.log(2)) * (self.Z[l] * self.delta_D[l]) # Element-wise multiplication
94
+
95
+ # Compute layer-wise entropy as the sum over all elements
96
+ layer_entropy = torch.sum(H_lk) # Scalar, for history tracking
97
+ self.entropy[l] = layer_entropy.item()
98
+ self.entropy_history[l].append(layer_entropy.item())
99
+
100
+ # Compute cos(theta) for alignment
101
+ dot_product = torch.sum(self.Z[l] * self.delta_D[l])
102
+ z_norm = torch.norm(self.Z[l])
103
+ delta_d_norm = torch.norm(self.delta_D[l])
104
+ if z_norm > 0 and delta_d_norm > 0:
105
+ cos_theta = dot_product / (z_norm * delta_d_norm)
106
+ self.cosine_history[l].append(cos_theta.item())
107
+ else:
108
+ self.cosine_history[l].append(0.0)
109
+
110
+ total_entropy += layer_entropy
111
+
112
+ # Compute the entropy gradient: nabla_z H = (1/ln2) * z ⊙ D'
113
+ D_prime = self.D[l] * (1 - self.D[l])
114
+ nabla_z_H = (1 / np.log(2)) * self.Z[l] * D_prime
115
+
116
+
117
+
118
+ # Net^(l)_K = delta_Z • (D - nabla_z H)
119
+ tensor_net_step = torch.sum(delta_Z * (self.D[l] - nabla_z_H))
120
+ self.net_history[l].append(tensor_net_step.item())
121
+ self.tensor_net_total[l] += tensor_net_step.item()
122
+
123
+ return total_entropy
124
+
125
+ def ska_update(self, inputs, learning_rate=0.01):
126
+ """Updates weights using entropy-based learning without backpropagation."""
127
+ for l in range(len(self.layer_sizes)):
128
+ if self.delta_D[l] is not None:
129
+ # Previous layer's output
130
+ prev_output = inputs.view(inputs.shape[0], -1) if l == 0 else self.D_prev[l-1]
131
+ # Compute sigmoid derivative: D * (1 - D)
132
+ d_prime = self.D[l] * (1 - self.D[l])
133
+ # Compute entropy gradient
134
+ gradient = -1 / np.log(2) * (self.Z[l] * d_prime + self.delta_D[l])
135
+ # Compute weight updates via outer product
136
+ dW = torch.matmul(prev_output.t(), gradient) / prev_output.shape[0]
137
+ # Update weights and biases
138
+ self.weights[l] = self.weights[l] - learning_rate * dW
139
+ self.biases[l] = self.biases[l] - learning_rate * gradient.mean(dim=0)
140
+
141
+ def initialize_tensors(self, batch_size):
142
+ """Resets decision tensors at the start of each training iteration."""
143
+ for l in range(len(self.layer_sizes)):
144
+ self.Z[l] = None # Reset knowledge tensors
145
+ self.Z_prev[l] = None # Reset previous knowledge tensors
146
+ self.D[l] = None # Reset current decision probabilities
147
+ self.D_prev[l] = None # Reset previous decision probabilities
148
+ self.delta_D[l] = None # Reset decision shifts
149
+ self.entropy[l] = None # Reset entropy storage
150
+ self.entropy_history[l] = [] # Reset entropy history
151
+ self.cosine_history[l] = [] # Reset cosine history
152
+ self.frobenius_history[l] = [] # Reset Frobenius history
153
+ self.weight_frobenius_history[l] = [] # Reset weight Frobenius history
154
+ self.net_history[l] = [] # Reset Tensor Net history
155
+ self.tensor_net_total[l] = 0.0 # Reset Tensor Net total
156
+ self.output_history = [] # Reset output history
157
+
158
+
159
+
160
+
161
+ def visualize_entropy_heatmap(self, step):
162
+ """Dynamically scales the heatmap range and visualizes entropy reduction."""
163
+ entropy_data = np.array(self.entropy_history)
164
+ vmin = np.min(entropy_data) # Dynamically set minimum entropy value
165
+ vmax = 0.0 # Keep 0 as the upper limit for standardization
166
+ plt.figure(figsize=(12, 8))
167
+ sns.heatmap(entropy_data, cmap="Blues_r", vmin=vmin, vmax=vmax,
168
+ xticklabels=range(1, entropy_data.shape[1] + 1),
169
+ yticklabels=[f"Layer {i+1}" for i in range(len(self.layer_sizes))])
170
+ plt.title(f"Layer-wise Entropy Heatmap (Step {step})")
171
+ plt.xlabel("Step Index K")
172
+ plt.ylabel("Network Layers")
173
+ plt.tight_layout()
174
+ plt.savefig(f"entropy_heatmap_step_{step}.png")
175
+ plt.show()
176
+
177
+ def visualize_cosine_heatmap(self, step):
178
+ """Visualizes cos(theta) alignment heatmap with a diverging scale."""
179
+ cosine_data = np.array(self.cosine_history)
180
+ plt.figure(figsize=(12, 8))
181
+ sns.heatmap(cosine_data, cmap="coolwarm_r", vmin=-1.0, vmax=1.0,
182
+ xticklabels=range(1, cosine_data.shape[1] + 1),
183
+ yticklabels=[f"Layer {i+1}" for i in range(len(self.layer_sizes))])
184
+ plt.title(f"Layer-wise Cos(\u03B8) Alignment Heatmap (Step {step})")
185
+ plt.xlabel("Step Index K")
186
+ plt.ylabel("Network Layers")
187
+ plt.tight_layout()
188
+ plt.savefig(f"cosine_heatmap_step_{step}.png")
189
+ plt.show()
190
+
191
+ def visualize_frobenius_heatmap(self, step):
192
+ """Visualizes the Frobenius Norm heatmap for the knowledge tensor Z across layers."""
193
+ frobenius_data = np.array(self.frobenius_history)
194
+ vmin = np.min(frobenius_data) if frobenius_data.size > 0 else 0
195
+ vmax = np.max(frobenius_data) if frobenius_data.size > 0 else 1
196
+ plt.figure(figsize=(12, 8))
197
+ sns.heatmap(frobenius_data, cmap="viridis", vmin=vmin, vmax=vmax,
198
+ xticklabels=range(1, frobenius_data.shape[1] + 1),
199
+ yticklabels=[f"Layer {i+1}" for i in range(len(self.layer_sizes))])
200
+ plt.title(f"Layer-wise Frobenius Norm Heatmap (Step {step})")
201
+ plt.xlabel("Step Index K")
202
+ plt.ylabel("Network Layers")
203
+ plt.tight_layout()
204
+ plt.savefig(f"knowledge_frobenius_heatmap_step_{step}.png")
205
+ plt.show()
206
+
207
+ def visualize_weight_frobenius_heatmap(self, step):
208
+ """Visualizes the Frobenius Norm heatmap for the weight tensors W across layers."""
209
+ weight_data = np.array(self.weight_frobenius_history)
210
+ vmin = np.min(weight_data) if weight_data.size > 0 else 0
211
+ vmax = np.max(weight_data) if weight_data.size > 0 else 1
212
+ plt.figure(figsize=(12, 8))
213
+ sns.heatmap(weight_data, cmap="plasma", vmin=vmin, vmax=vmax,
214
+ xticklabels=range(1, weight_data.shape[1] + 1),
215
+ yticklabels=[f"Layer {i+1}" for i in range(len(self.layer_sizes))])
216
+ plt.title(f"Layer-wise Weight Frobenius Norm Heatmap (Step {step})")
217
+ plt.xlabel("Step Index K")
218
+ plt.ylabel("Network Layers")
219
+ plt.tight_layout()
220
+ plt.savefig(f"weight_frobenius_heatmap_step_{step}.png")
221
+ plt.show()
222
+
223
+ def visualize_output_distribution(self):
224
+ """Plots the evolution of mean neuron activations over K steps."""
225
+ output_data = np.array(self.output_history) # Shape: [K, 10]
226
+ plt.figure(figsize=(10, 6))
227
+ plt.plot(output_data) # Plot each neuron as a line
228
+ plt.title('Output Neuron Activation Evolution Across Steps (Single Pass)')
229
+ plt.xlabel('Step Index K')
230
+ plt.ylabel('Mean Neuron Activation')
231
+ plt.legend([f"Neuron {i}" for i in range(10)], loc='upper right', bbox_to_anchor=(1.17, 1))
232
+ plt.grid(True)
233
+ plt.tight_layout()
234
+ plt.savefig("output_neuron_activation_single_pass.png")
235
+ plt.show()
236
+
237
+ def visualize_net_heatmap(self, step):
238
+ """Visualizes the per-step Tensor Net heatmap."""
239
+ net_data = np.array(self.net_history)
240
+ vmin = np.min(net_data) if net_data.size > 0 else 0
241
+ vmax = np.max(net_data) if net_data.size > 0 else 1
242
+ plt.figure(figsize=(12, 8))
243
+ sns.heatmap(net_data, cmap="magma", vmin=vmin, vmax=vmax,
244
+ xticklabels=range(1, net_data.shape[1] + 1),
245
+ yticklabels=[f"Layer {i+1}" for i in range(len(self.layer_sizes))])
246
+ plt.title(f"Tensor Net Heatmap (Step {step})")
247
+ plt.xlabel("Step Index K")
248
+ plt.ylabel("Network Layers")
249
+ plt.tight_layout()
250
+ plt.savefig(f"tensor_net_heatmap_step_{step}.png")
251
+ plt.show()
252
+
253
+ def visualize_net_history(self):
254
+ """Plots the historical evolution of Tensor Net across layers."""
255
+ net_data = np.array(self.net_history).T # Transpose for layer-wise visualization
256
+ plt.figure(figsize=(8, 6))
257
+ plt.plot(net_data)
258
+ plt.title('Tensor Net Evolution Across Layers')
259
+ plt.xlabel('Step Index K')
260
+ plt.ylabel('Tensor Net')
261
+ plt.legend([f"Layer {i+1}" for i in range(len(self.layer_sizes))])
262
+ plt.grid(True)
263
+ plt.tight_layout()
264
+ plt.savefig("tensor_net_history_single_pass.png")
265
+ plt.show()
266
+
267
+ def visualize_entropy_vs_frobenius(self, step):
268
+ """Plots entropy reduction against Frobenius norm of Z for each layer."""
269
+ plt.figure(figsize=(12, 10))
270
+
271
+ # Set up subplots in a 2x2 grid (for 4 layers)
272
+ for l in range(len(self.layer_sizes)):
273
+ plt.subplot(2, 2, l+1)
274
+
275
+ # Skip if we don't have enough data points
276
+ if len(self.entropy_history[l]) < 2 or len(self.frobenius_history[l]) < 2:
277
+ plt.title(f"Layer {l+1}: Not enough data")
278
+ continue
279
+
280
+ # Get entropy and frobenius data for this layer
281
+ entropy_data = self.entropy_history[l]
282
+ frobenius_data = self.frobenius_history[l][1:] # Match entropy step indices
283
+
284
+ # Ensure same length
285
+ min_len = min(len(entropy_data), len(frobenius_data))
286
+ entropy_data = entropy_data[:min_len]
287
+ frobenius_data = frobenius_data[:min_len]
288
+
289
+ # Create scatter plot with connected lines
290
+ plt.scatter(frobenius_data, entropy_data, c=range(len(entropy_data)),
291
+ cmap='Blues_r', s=50, alpha=0.8)
292
+ plt.plot(frobenius_data, entropy_data, 'k-', alpha=0.3)
293
+
294
+ # Add colorbar to show step progression
295
+ cbar = plt.colorbar()
296
+ cbar.set_label('Step')
297
+
298
+ # Add labels and title
299
+ plt.xlabel('Frobenius Norm of Knowledge Tensor Z')
300
+ plt.ylabel('Entropy Reduction')
301
+ plt.title(f'Layer {l+1}: Entropy vs. Knowledge Magnitude')
302
+ plt.grid(True, alpha=0.3)
303
+
304
+ plt.tight_layout()
305
+ plt.savefig(f"entropy_vs_frobenius_step_{step}.png")
306
+ plt.show()
307
+
308
+
309
+ # Training parameters
310
+ model = SKAModel()
311
+ learning_rate = 0.01
312
+
313
+ # SKA training over multiple forward steps
314
+ total_entropy = 0
315
+ step_count = 0
316
+ start_time = time.time()
317
+
318
+ # Initialize tensors for first step
319
+ model.initialize_tensors(inputs.size(0))
320
+
321
+ # Process K forward steps (without backpropagation)
322
+ for k in range(model.K):
323
+ outputs = model.forward(inputs)
324
+ # Store mean output distribution for the final layer
325
+ model.output_history.append(outputs.mean(dim=0).detach().cpu().numpy()) # [10] vector
326
+ if k > 0: # Compute entropy after first step
327
+ batch_entropy = model.calculate_entropy()
328
+ model.ska_update(inputs, learning_rate)
329
+ total_entropy += batch_entropy
330
+ step_count += 1
331
+ print(f'Step: {k}, Total Steps: {step_count}, Entropy: {batch_entropy:.4f}')
332
+ model.visualize_entropy_heatmap(step_count)
333
+ model.visualize_cosine_heatmap(step_count)
334
+ # Visualize Frobenius norm heatmap
335
+ model.visualize_frobenius_heatmap(step_count)
336
+ # After weight updates, compute and store weight Frobenius norms
337
+ for l in range(len(model.layer_sizes)):
338
+ weight_norm = torch.norm(model.weights[l], p='fro')
339
+ model.weight_frobenius_history[l].append(weight_norm.item())
340
+ model.visualize_weight_frobenius_heatmap(step_count)
341
+ model.visualize_net_heatmap(step_count) # Visualize per-step Tensor Net
342
+ model.visualize_entropy_vs_frobenius(step_count)
343
+
344
+ # Update previous decision and knowledge tensors
345
+ model.D_prev = [d.clone().detach() if d is not None else None for d in model.D]
346
+ model.Z_prev = [z.clone().detach() if z is not None else None for z in model.Z]
347
+
348
+ # Final statistics
349
+ total_time = time.time() - start_time
350
+ avg_entropy = total_entropy / step_count if step_count > 0 else 0
351
+ print(f"Training Complete: Avg Entropy={avg_entropy:.4f}, Steps={step_count}, Time={total_time:.2f}s")
352
+ print(f"Tensor Net Total per layer: {[f'Layer {i+1}: {tn:.4f}' for i, tn in enumerate(model.tensor_net_total)]}")
353
+
354
+ # Plot historical evolution for all metrics
355
+ plt.figure(figsize=(8, 6))
356
+ plt.plot(np.array(model.entropy_history).T) # Entropy
357
+ plt.title('Entropy Evolution Across Layers (Single Pass)')
358
+ plt.xlabel('Step Index K')
359
+ plt.ylabel('Entropy')
360
+ plt.legend([f"Layer {i+1}" for i in range(len(model.layer_sizes))])
361
+ plt.grid(True)
362
+ plt.savefig("entropy_history_single_pass.png")
363
+ plt.show()
364
+
365
+ plt.figure(figsize=(8, 6))
366
+ plt.plot(np.array(model.cosine_history).T) # Cosine
367
+ plt.title('Cos(\u03B8) Alignment Evolution Across Layers (Single Pass)')
368
+ plt.xlabel('Step Index K')
369
+ plt.ylabel('Cos(\u03B8)')
370
+ plt.legend([f"Layer {i+1}" for i in range(len(model.layer_sizes))])
371
+ plt.grid(True)
372
+ plt.savefig("cosine_history_single_pass.png")
373
+ plt.show()
374
+
375
+ plt.figure(figsize=(8, 6))
376
+ plt.plot(np.array(model.frobenius_history).T) # Z Frobenius
377
+ plt.title('Z Tensor Frobenius Norm Evolution Across Layers (Single Pass)')
378
+ plt.xlabel('Step Index K')
379
+ plt.ylabel('Z Tensor Frobenius Norm')
380
+ plt.legend([f"Layer {i+1}" for i in range(len(model.layer_sizes))])
381
+ plt.grid(True)
382
+ plt.savefig("knowledge_frobenius_history_single_pass.png")
383
+ plt.show()
384
+
385
+ plt.figure(figsize=(8, 6))
386
+ plt.plot(np.array(model.weight_frobenius_history).T) # W Frobenius
387
+ plt.title('W Tensor Frobenius Norm Evolution Across Layers (Single Pass)')
388
+ plt.xlabel('Step Index K')
389
+ plt.ylabel('W Tensor Frobenius Norm')
390
+ plt.legend([f"Layer {i+1}" for i in range(len(model.layer_sizes))])
391
+ plt.grid(True)
392
+ plt.savefig("weight_frobenius_history_single_pass.png")
393
+ plt.show()
394
+
395
+ model.visualize_output_distribution() # Output distribution
396
+
397
+ model.visualize_net_history() # Tensor Net historical evolution
398
+
399
+
400
+
401
+ print("Training complete. Visualizations generated.")
402
+ ### **Function to Save Data as CSV**
403
+ # Define the save_metric_csv function OUTSIDE the class
404
+ def save_metric_csv(metric_data, filename, layers):
405
+ """Saves a 2D metric (list of lists) to a CSV file with layers as rows and correct step count."""
406
+ actual_steps = min(len(layer) for layer in metric_data) # Ensure correct step count
407
+ df = pd.DataFrame(metric_data,
408
+ index=[f"Layer {i+1}" for i in range(layers)],
409
+ columns=[f"K={j+1}" for j in range(actual_steps)])
410
+ df.to_csv(filename)
411
+ print(f"Saved {filename} with {actual_steps} steps")
412
+
413
+
414
+ layers = len(model.layer_sizes)
415
+ steps = model.K
416
+
417
+ save_metric_csv(model.entropy_history, "entropy_history.csv", layers)
418
+ save_metric_csv(model.cosine_history, "cosine_history.csv", layers)
419
+ save_metric_csv(model.frobenius_history, "frobenius_history.csv", layers)
420
+ save_metric_csv(model.weight_frobenius_history, "weight_frobenius_history.csv", layers)
421
+ save_metric_csv(model.net_history, "tensor_net_history.csv", layers)
422
+
423
+
424
+ # Save output history
425
+ df_output = pd.DataFrame(model.output_history, columns=[f"Neuron {i}" for i in range(10)])
426
+ df_output.to_csv("output_neuron_activation.csv", index_label="Step")
427
+ print("Saved output_neuron_activation.csv")
428
+ print("All metric data saved. You can now use TikZ in LaTeX to rebuild figures.")
429
+
app.py ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SKA Interactive Gradio App
2
+ import torch
3
+ import torch.nn as nn
4
+ import numpy as np
5
+ import matplotlib
6
+ matplotlib.use('Agg')
7
+ import matplotlib.pyplot as plt
8
+ from torchvision import datasets, transforms
9
+ import gradio as gr
10
+
11
+ # Load MNIST from local data
12
+ transform = transforms.Compose([transforms.ToTensor()])
13
+ mnist_dataset = datasets.MNIST(root='./data', train=True, download=False, transform=transform)
14
+
15
+
16
+ class SKAModel(nn.Module):
17
+ def __init__(self, input_size=784, layer_sizes=[256, 128, 64, 10], K=50):
18
+ super(SKAModel, self).__init__()
19
+ self.input_size = input_size
20
+ self.layer_sizes = layer_sizes
21
+ self.K = K
22
+
23
+ self.weights = nn.ParameterList()
24
+ self.biases = nn.ParameterList()
25
+ prev_size = input_size
26
+ for size in layer_sizes:
27
+ self.weights.append(nn.Parameter(torch.randn(prev_size, size) * 0.01))
28
+ self.biases.append(nn.Parameter(torch.zeros(size)))
29
+ prev_size = size
30
+
31
+ self.Z = [None] * len(layer_sizes)
32
+ self.Z_prev = [None] * len(layer_sizes)
33
+ self.D = [None] * len(layer_sizes)
34
+ self.D_prev = [None] * len(layer_sizes)
35
+ self.delta_D = [None] * len(layer_sizes)
36
+ self.entropy = [None] * len(layer_sizes)
37
+
38
+ self.entropy_history = [[] for _ in range(len(layer_sizes))]
39
+ self.cosine_history = [[] for _ in range(len(layer_sizes))]
40
+ self.output_history = []
41
+
42
+ self.frobenius_history = [[] for _ in range(len(layer_sizes))]
43
+ self.weight_frobenius_history = [[] for _ in range(len(layer_sizes))]
44
+ self.net_history = [[] for _ in range(len(layer_sizes))]
45
+ self.tensor_net_total = [0.0] * len(layer_sizes)
46
+
47
+ def forward(self, x):
48
+ batch_size = x.shape[0]
49
+ x = x.view(batch_size, -1)
50
+ for l in range(len(self.layer_sizes)):
51
+ z = torch.mm(x, self.weights[l]) + self.biases[l]
52
+ frobenius_norm = torch.norm(z, p='fro')
53
+ self.frobenius_history[l].append(frobenius_norm.item())
54
+ d = torch.sigmoid(z)
55
+ self.Z[l] = z
56
+ self.D[l] = d
57
+ x = d
58
+ return x
59
+
60
+ def calculate_entropy(self):
61
+ total_entropy = 0
62
+ for l in range(len(self.layer_sizes)):
63
+ if self.Z[l] is not None and self.D_prev[l] is not None and self.D[l] is not None and self.Z_prev[l] is not None:
64
+ self.delta_D[l] = self.D[l] - self.D_prev[l]
65
+ delta_Z = self.Z[l] - self.Z_prev[l]
66
+ H_lk = (-1 / np.log(2)) * (self.Z[l] * self.delta_D[l])
67
+ layer_entropy = torch.sum(H_lk)
68
+ self.entropy[l] = layer_entropy.item()
69
+ self.entropy_history[l].append(layer_entropy.item())
70
+
71
+ dot_product = torch.sum(self.Z[l] * self.delta_D[l])
72
+ z_norm = torch.norm(self.Z[l])
73
+ delta_d_norm = torch.norm(self.delta_D[l])
74
+ if z_norm > 0 and delta_d_norm > 0:
75
+ cos_theta = dot_product / (z_norm * delta_d_norm)
76
+ self.cosine_history[l].append(cos_theta.item())
77
+ else:
78
+ self.cosine_history[l].append(0.0)
79
+
80
+ total_entropy += layer_entropy
81
+
82
+ D_prime = self.D[l] * (1 - self.D[l])
83
+ nabla_z_H = (1 / np.log(2)) * self.Z[l] * D_prime
84
+ tensor_net_step = torch.sum(delta_Z * (self.D[l] - nabla_z_H))
85
+ self.net_history[l].append(tensor_net_step.item())
86
+ self.tensor_net_total[l] += tensor_net_step.item()
87
+
88
+ return total_entropy
89
+
90
+ def ska_update(self, inputs, learning_rate=0.01):
91
+ for l in range(len(self.layer_sizes)):
92
+ if self.delta_D[l] is not None:
93
+ prev_output = inputs.view(inputs.shape[0], -1) if l == 0 else self.D_prev[l-1]
94
+ d_prime = self.D[l] * (1 - self.D[l])
95
+ gradient = -1 / np.log(2) * (self.Z[l] * d_prime + self.delta_D[l])
96
+ dW = torch.matmul(prev_output.t(), gradient) / prev_output.shape[0]
97
+ self.weights[l] = self.weights[l] - learning_rate * dW
98
+ self.biases[l] = self.biases[l] - learning_rate * gradient.mean(dim=0)
99
+
100
+ def initialize_tensors(self, batch_size):
101
+ for l in range(len(self.layer_sizes)):
102
+ self.Z[l] = None
103
+ self.Z_prev[l] = None
104
+ self.D[l] = None
105
+ self.D_prev[l] = None
106
+ self.delta_D[l] = None
107
+ self.entropy[l] = None
108
+ self.entropy_history[l] = []
109
+ self.cosine_history[l] = []
110
+ self.frobenius_history[l] = []
111
+ self.weight_frobenius_history[l] = []
112
+ self.net_history[l] = []
113
+ self.tensor_net_total[l] = 0.0
114
+ self.output_history = []
115
+
116
+
117
+ def get_mnist_subset(samples_per_class):
118
+ """Select N samples per class from MNIST."""
119
+ images_list = []
120
+ labels_list = []
121
+ targets = mnist_dataset.targets.numpy()
122
+ for digit in range(10):
123
+ indices = np.where(targets == digit)[0][:samples_per_class]
124
+ for idx in indices:
125
+ img, label = mnist_dataset[idx]
126
+ images_list.append(img)
127
+ labels_list.append(label)
128
+ images = torch.stack(images_list)
129
+ return images
130
+
131
+
132
+ def run_ska(neurons_str, K, tau, samples_per_class):
133
+ # Parse layer sizes
134
+ try:
135
+ layer_sizes = [int(x.strip()) for x in neurons_str.split(",")]
136
+ except ValueError:
137
+ return None, None, None
138
+
139
+ K = int(K)
140
+ samples_per_class = int(samples_per_class)
141
+ learning_rate = tau / K
142
+
143
+ # Get data
144
+ inputs = get_mnist_subset(samples_per_class)
145
+
146
+ # Create model
147
+ torch.manual_seed(42)
148
+ np.random.seed(42)
149
+ model = SKAModel(input_size=784, layer_sizes=layer_sizes, K=K)
150
+ model.initialize_tensors(inputs.size(0))
151
+
152
+ # Run SKA
153
+ for k in range(K):
154
+ outputs = model.forward(inputs)
155
+ model.output_history.append(outputs.mean(dim=0).detach().cpu().numpy())
156
+ if k > 0:
157
+ batch_entropy = model.calculate_entropy()
158
+ model.ska_update(inputs, learning_rate)
159
+ for l in range(len(model.layer_sizes)):
160
+ weight_norm = torch.norm(model.weights[l], p='fro')
161
+ model.weight_frobenius_history[l].append(weight_norm.item())
162
+ model.D_prev = [d.clone().detach() if d is not None else None for d in model.D]
163
+ model.Z_prev = [z.clone().detach() if z is not None else None for z in model.Z]
164
+
165
+ num_layers = len(layer_sizes)
166
+
167
+ # Plot 1: Entropy trajectory
168
+ fig1, ax1 = plt.subplots(figsize=(8, 5))
169
+ for l in range(num_layers):
170
+ ax1.plot(model.entropy_history[l], label=f"Layer {l+1}")
171
+ ax1.set_title('Entropy Evolution Across Layers')
172
+ ax1.set_xlabel('Step Index K')
173
+ ax1.set_ylabel('Entropy')
174
+ ax1.legend()
175
+ ax1.grid(True)
176
+ fig1.tight_layout()
177
+
178
+ # Plot 2: Cosine alignment
179
+ fig2, ax2 = plt.subplots(figsize=(8, 5))
180
+ for l in range(num_layers):
181
+ ax2.plot(model.cosine_history[l], label=f"Layer {l+1}")
182
+ ax2.set_title('Cos(θ) Alignment Evolution Across Layers')
183
+ ax2.set_xlabel('Step Index K')
184
+ ax2.set_ylabel('Cos(θ)')
185
+ ax2.legend()
186
+ ax2.grid(True)
187
+ fig2.tight_layout()
188
+
189
+ # Plot 3: Output neuron activation
190
+ fig3, ax3 = plt.subplots(figsize=(8, 5))
191
+ output_data = np.array(model.output_history)
192
+ num_neurons = output_data.shape[1]
193
+ for i in range(num_neurons):
194
+ ax3.plot(output_data[:, i], label=f"Neuron {i}")
195
+ ax3.set_title('Output Neuron Activation Evolution')
196
+ ax3.set_xlabel('Step Index K')
197
+ ax3.set_ylabel('Mean Neuron Activation')
198
+ ax3.legend(loc='upper right', bbox_to_anchor=(1.15, 1), fontsize=7)
199
+ ax3.grid(True)
200
+ fig3.tight_layout()
201
+
202
+ # Plot 4: Frobenius norm (Z tensor)
203
+ fig4, ax4 = plt.subplots(figsize=(8, 5))
204
+ for l in range(num_layers):
205
+ ax4.plot(model.frobenius_history[l], label=f"Layer {l+1}")
206
+ ax4.set_title('Z Tensor Frobenius Norm Evolution Across Layers')
207
+ ax4.set_xlabel('Step Index K')
208
+ ax4.set_ylabel('Frobenius Norm')
209
+ ax4.legend()
210
+ ax4.grid(True)
211
+ fig4.tight_layout()
212
+
213
+ # Plot 5: Entropy vs Frobenius scatter
214
+ fig5, axes5 = plt.subplots(2, (num_layers + 1) // 2, figsize=(12, 8))
215
+ axes5 = axes5.flatten() if num_layers > 1 else [axes5]
216
+ for l in range(num_layers):
217
+ ax = axes5[l]
218
+ entropy_data = model.entropy_history[l]
219
+ frobenius_data = model.frobenius_history[l][1:]
220
+ min_len = min(len(entropy_data), len(frobenius_data))
221
+ if min_len < 2:
222
+ ax.set_title(f"Layer {l+1}: Not enough data")
223
+ continue
224
+ entropy_data = entropy_data[:min_len]
225
+ frobenius_data = frobenius_data[:min_len]
226
+ sc = ax.scatter(frobenius_data, entropy_data, c=range(min_len), cmap='Blues_r', s=50, alpha=0.8)
227
+ ax.plot(frobenius_data, entropy_data, 'k-', alpha=0.3)
228
+ plt.colorbar(sc, ax=ax, label='Step')
229
+ ax.set_xlabel('Frobenius Norm of Z')
230
+ ax.set_ylabel('Entropy')
231
+ ax.set_title(f'Layer {l+1}: Entropy vs. Knowledge Magnitude')
232
+ ax.grid(True, alpha=0.3)
233
+ for l in range(num_layers, len(axes5)):
234
+ axes5[l].set_visible(False)
235
+ fig5.tight_layout()
236
+
237
+ return fig1, fig2, fig3, fig4, fig5
238
+
239
+
240
+
241
+ with gr.Blocks(title="SKA - Structured Knowledge Accumulation") as demo:
242
+ gr.Markdown("# SKA - Structured Knowledge Accumulation")
243
+ gr.Markdown("Interactive visualization of the SKA forward learning algorithm on MNIST. Adjust architecture, steps K, and learning budget τ to explore entropy dynamics.")
244
+
245
+ with gr.Row():
246
+ with gr.Column(scale=1):
247
+ neurons_input = gr.Textbox(label="Layer sizes (comma-separated)", value="256, 128, 64, 10")
248
+ k_slider = gr.Slider(1, 200, value=50, step=1, label="K (forward steps)")
249
+ tau_slider = gr.Slider(0.25, 0.75, value=0.5, step=0.01, label="Learning budget τ (τ = η.K)")
250
+ samples_slider = gr.Slider(1, 100, value=100, step=1, label="Samples per class")
251
+ run_btn = gr.Button("Run SKA", variant="primary")
252
+
253
+ gr.Markdown("---")
254
+ gr.Markdown("### Reference Paper")
255
+ gr.Markdown("[arXiv:2503.13942v1](https://arxiv.org/abs/2503.13942v1)")
256
+
257
+ with gr.Column(scale=2):
258
+ plot_entropy = gr.Plot(label="Entropy Trajectory")
259
+ plot_cosine = gr.Plot(label="Cosine Alignment")
260
+ plot_output = gr.Plot(label="Output Neuron Activation")
261
+ plot_frobenius = gr.Plot(label="Z Tensor Frobenius Norm")
262
+ plot_entropy_vs_frob = gr.Plot(label="Entropy vs Frobenius")
263
+
264
+ run_btn.click(
265
+ fn=run_ska,
266
+ inputs=[neurons_input, k_slider, tau_slider, samples_slider],
267
+ outputs=[plot_entropy, plot_cosine, plot_output, plot_frobenius, plot_entropy_vs_frob],
268
+ )
269
+
270
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
data/MNIST/raw/t10k-images-idx3-ubyte ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fa7898d509279e482958e8ce81c8e77db3f2f8254e26661ceb7762c4d494ce7
3
+ size 7840016
data/MNIST/raw/t10k-images-idx3-ubyte.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d422c7b0a1c1c79245a5bcf07fe86e33eeafee792b84584aec276f5a2dbc4e6
3
+ size 1648877
data/MNIST/raw/t10k-labels-idx1-ubyte ADDED
Binary file (10 kB). View file
 
data/MNIST/raw/t10k-labels-idx1-ubyte.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7ae60f92e00ec6debd23a6088c31dbd2371eca3ffa0defaefb259924204aec6
3
+ size 4542
data/MNIST/raw/train-images-idx3-ubyte ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba891046e6505d7aadcbbe25680a0738ad16aec93bde7f9b65e87a2fc25776db
3
+ size 47040016
data/MNIST/raw/train-images-idx3-ubyte.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:440fcabf73cc546fa21475e81ea370265605f56be210a4024d2ca8f203523609
3
+ size 9912422
data/MNIST/raw/train-labels-idx1-ubyte ADDED
Binary file (60 kB). View file
 
data/MNIST/raw/train-labels-idx1-ubyte.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3552534a0a558bbed6aed32b30c495cca23d567ec52cac8be1a0730e8010255c
3
+ size 28881
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch
2
+ torchvision
3
+ matplotlib
4
+ seaborn
5
+ numpy