640 lines
26 KiB
Plaintext
640 lines
26 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "6c6e33cb-72f9-42fa-936a-33b5fe338d15",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"torch.Size([1, 1024, 128]) torch.Size([1, 1024, 128])\n",
|
|
"Causality check: gradients should not flow \"from future to past\"\n",
|
|
"tensor(2.1330e-09) tensor(0.2463)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# %load standalone_hyena.py\n",
|
|
"\"\"\"\n",
|
|
"Simplified standalone version of Hyena: https://arxiv.org/abs/2302.10866, designed for quick experimentation.\n",
|
|
"A complete version is available under `src.models.sequence.hyena`.\n",
|
|
"\"\"\"\n",
|
|
"\n",
|
|
"import math\n",
|
|
"import torch\n",
|
|
"import torch.nn as nn\n",
|
|
"import torch.nn.functional as F\n",
|
|
"from einops import rearrange\n",
|
|
"\n",
|
|
"\n",
|
|
"def fftconv(u, k, D):\n",
|
|
" seqlen = u.shape[-1]\n",
|
|
" fft_size = 2 * seqlen\n",
|
|
" \n",
|
|
" k_f = torch.fft.rfft(k, n=fft_size) / fft_size\n",
|
|
" u_f = torch.fft.rfft(u.to(dtype=k.dtype), n=fft_size)\n",
|
|
" \n",
|
|
" if len(u.shape) > 3: k_f = k_f.unsqueeze(1)\n",
|
|
" y = torch.fft.irfft(u_f * k_f, n=fft_size, norm='forward')[..., :seqlen]\n",
|
|
"\n",
|
|
" out = y + u * D.unsqueeze(-1)\n",
|
|
" return out.to(dtype=u.dtype)\n",
|
|
"\n",
|
|
"\n",
|
|
"@torch.jit.script \n",
|
|
"def mul_sum(q, y):\n",
|
|
" return (q * y).sum(dim=1)\n",
|
|
"\n",
|
|
"class OptimModule(nn.Module):\n",
|
|
" \"\"\" Interface for Module that allows registering buffers/parameters with configurable optimizer hyperparameters \"\"\"\n",
|
|
"\n",
|
|
" def register(self, name, tensor, lr=None, wd=0.0):\n",
|
|
" \"\"\"Register a tensor with a configurable learning rate and 0 weight decay\"\"\"\n",
|
|
"\n",
|
|
" if lr == 0.0:\n",
|
|
" self.register_buffer(name, tensor)\n",
|
|
" else:\n",
|
|
" self.register_parameter(name, nn.Parameter(tensor))\n",
|
|
"\n",
|
|
" optim = {}\n",
|
|
" if lr is not None: optim[\"lr\"] = lr\n",
|
|
" if wd is not None: optim[\"weight_decay\"] = wd\n",
|
|
" setattr(getattr(self, name), \"_optim\", optim)\n",
|
|
" \n",
|
|
"\n",
|
|
"class Sin(nn.Module):\n",
|
|
" def __init__(self, dim, w=10, train_freq=True):\n",
|
|
" super().__init__()\n",
|
|
" self.freq = nn.Parameter(w * torch.ones(1, dim)) if train_freq else w * torch.ones(1, dim)\n",
|
|
"\n",
|
|
" def forward(self, x):\n",
|
|
" return torch.sin(self.freq * x)\n",
|
|
" \n",
|
|
" \n",
|
|
"class PositionalEmbedding(OptimModule):\n",
|
|
" def __init__(self, emb_dim: int, seq_len: int, lr_pos_emb: float=1e-5, **kwargs): \n",
|
|
" \"\"\"Complex exponential positional embeddings for Hyena filters.\"\"\" \n",
|
|
" super().__init__()\n",
|
|
" \n",
|
|
" self.seq_len = seq_len\n",
|
|
" # The time embedding fed to the filteres is normalized so that t_f = 1\n",
|
|
" t = torch.linspace(0, 1, self.seq_len)[None, :, None] # 1, L, 1\n",
|
|
" \n",
|
|
" if emb_dim > 1:\n",
|
|
" bands = (emb_dim - 1) // 2 \n",
|
|
" # To compute the right embeddings we use the \"proper\" linspace \n",
|
|
" t_rescaled = torch.linspace(0, seq_len - 1, seq_len)[None, :, None]\n",
|
|
" w = 2 * math.pi * t_rescaled / seq_len # 1, L, 1 \n",
|
|
" \n",
|
|
" f = torch.linspace(1e-4, bands - 1, bands)[None, None] \n",
|
|
" z = torch.exp(-1j * f * w)\n",
|
|
" z = torch.cat([t, z.real, z.imag], dim=-1)\n",
|
|
" self.register(\"z\", z, lr=lr_pos_emb) \n",
|
|
" self.register(\"t\", t, lr=0.0)\n",
|
|
" \n",
|
|
" def forward(self, L):\n",
|
|
" return self.z[:, :L], self.t[:, :L]\n",
|
|
" \n",
|
|
"\n",
|
|
"class ExponentialModulation(OptimModule):\n",
|
|
" def __init__(\n",
|
|
" self,\n",
|
|
" d_model,\n",
|
|
" fast_decay_pct=0.3,\n",
|
|
" slow_decay_pct=1.5,\n",
|
|
" target=1e-2,\n",
|
|
" modulation_lr=0.0,\n",
|
|
" modulate: bool=True,\n",
|
|
" shift: float = 0.0,\n",
|
|
" **kwargs\n",
|
|
" ):\n",
|
|
" super().__init__()\n",
|
|
" self.modulate = modulate\n",
|
|
" self.shift = shift\n",
|
|
" max_decay = math.log(target) / fast_decay_pct\n",
|
|
" min_decay = math.log(target) / slow_decay_pct\n",
|
|
" deltas = torch.linspace(min_decay, max_decay, d_model)[None, None]\n",
|
|
" self.register(\"deltas\", deltas, lr=modulation_lr)\n",
|
|
" \n",
|
|
" def forward(self, t, x):\n",
|
|
" if self.modulate:\n",
|
|
" decay = torch.exp(-t * self.deltas.abs()) \n",
|
|
" x = x * (decay + self.shift)\n",
|
|
" return x \n",
|
|
"\n",
|
|
"\n",
|
|
"class HyenaFilter(OptimModule):\n",
|
|
" def __init__(\n",
|
|
" self, \n",
|
|
" d_model,\n",
|
|
" emb_dim=3, # dim of input to MLP, augments with positional encoding\n",
|
|
" order=16, # width of the implicit MLP \n",
|
|
" fused_fft_conv=False,\n",
|
|
" seq_len=1024, \n",
|
|
" lr=1e-3, \n",
|
|
" lr_pos_emb=1e-5,\n",
|
|
" dropout=0.0, \n",
|
|
" w=1, # frequency of periodic activations \n",
|
|
" wd=0, # weight decay of kernel parameters \n",
|
|
" bias=True,\n",
|
|
" num_inner_mlps=2,\n",
|
|
" normalized=False,\n",
|
|
" **kwargs\n",
|
|
" ):\n",
|
|
" \"\"\"\n",
|
|
" Implicit long filter with modulation.\n",
|
|
" \n",
|
|
" Args:\n",
|
|
" d_model: number of channels in the input\n",
|
|
" emb_dim: dimension of the positional encoding (`emb_dim` - 1) // 2 is the number of bands\n",
|
|
" order: width of the FFN\n",
|
|
" num_inner_mlps: number of inner linear layers inside filter MLP\n",
|
|
" \"\"\"\n",
|
|
" super().__init__()\n",
|
|
" self.d_model = d_model\n",
|
|
" self.use_bias = bias\n",
|
|
" self.fused_fft_conv = fused_fft_conv\n",
|
|
" self.bias = nn.Parameter(torch.randn(self.d_model))\n",
|
|
" self.dropout = nn.Dropout(dropout)\n",
|
|
" \n",
|
|
" act = Sin(dim=order, w=w)\n",
|
|
" self.emb_dim = emb_dim\n",
|
|
" assert emb_dim % 2 != 0 and emb_dim >= 3, \"emb_dim must be odd and greater or equal to 3 (time, sine and cosine)\"\n",
|
|
" self.seq_len = seq_len\n",
|
|
" \n",
|
|
" self.pos_emb = PositionalEmbedding(emb_dim, seq_len, lr_pos_emb)\n",
|
|
" \n",
|
|
" self.implicit_filter = nn.Sequential(\n",
|
|
" nn.Linear(emb_dim, order),\n",
|
|
" act,\n",
|
|
" )\n",
|
|
" for i in range(num_inner_mlps):\n",
|
|
" self.implicit_filter.append(nn.Linear(order, order))\n",
|
|
" self.implicit_filter.append(act)\n",
|
|
"\n",
|
|
" self.implicit_filter.append(nn.Linear(order, d_model, bias=False))\n",
|
|
" \n",
|
|
" self.modulation = ExponentialModulation(d_model, **kwargs)\n",
|
|
" \n",
|
|
" self.normalized = normalized\n",
|
|
" for c in self.implicit_filter.children():\n",
|
|
" for name, v in c.state_dict().items(): \n",
|
|
" optim = {\"weight_decay\": wd, \"lr\": lr}\n",
|
|
" setattr(getattr(c, name), \"_optim\", optim)\n",
|
|
"\n",
|
|
" def filter(self, L, *args, **kwargs):\n",
|
|
" z, t = self.pos_emb(L)\n",
|
|
" h = self.implicit_filter(z)\n",
|
|
" h = self.modulation(t, h)\n",
|
|
" return h\n",
|
|
"\n",
|
|
" def forward(self, x, L, k=None, bias=None, *args, **kwargs):\n",
|
|
" if k is None: k = self.filter(L)\n",
|
|
" \n",
|
|
" # Ensure compatibility with filters that return a tuple \n",
|
|
" k = k[0] if type(k) is tuple else k \n",
|
|
"\n",
|
|
" y = fftconv(x, k, bias)\n",
|
|
" return y\n",
|
|
" \n",
|
|
" \n",
|
|
"class HyenaOperator(nn.Module):\n",
|
|
" def __init__(\n",
|
|
" self,\n",
|
|
" d_model,\n",
|
|
" l_max,\n",
|
|
" order=2, \n",
|
|
" filter_order=64,\n",
|
|
" dropout=0.0, \n",
|
|
" filter_dropout=0.0, \n",
|
|
" **filter_args,\n",
|
|
" ):\n",
|
|
" r\"\"\"\n",
|
|
" Hyena operator described in the paper https://arxiv.org/pdf/2302.10866.pdf\n",
|
|
" \n",
|
|
" Args:\n",
|
|
" d_model (int): Dimension of the input and output embeddings (width of the layer)\n",
|
|
" l_max: (int): Maximum input sequence length. Defaults to None\n",
|
|
" order: (int): Depth of the Hyena recurrence. Defaults to 2\n",
|
|
" dropout: (float): Dropout probability. Defaults to 0.0\n",
|
|
" filter_dropout: (float): Dropout probability for the filter. Defaults to 0.0\n",
|
|
" \"\"\"\n",
|
|
" super().__init__()\n",
|
|
" self.d_model = d_model\n",
|
|
" self.l_max = l_max\n",
|
|
" self.order = order\n",
|
|
" inner_width = d_model * (order + 1)\n",
|
|
" self.dropout = nn.Dropout(dropout)\n",
|
|
" self.in_proj = nn.Linear(d_model, inner_width)\n",
|
|
" self.out_proj = nn.Linear(d_model, d_model)\n",
|
|
" \n",
|
|
" self.short_filter = nn.Conv1d(\n",
|
|
" inner_width, \n",
|
|
" inner_width, \n",
|
|
" 3,\n",
|
|
" padding=2,\n",
|
|
" groups=inner_width\n",
|
|
" )\n",
|
|
" self.filter_fn = HyenaFilter(\n",
|
|
" d_model * (order - 1), \n",
|
|
" order=filter_order, \n",
|
|
" seq_len=l_max,\n",
|
|
" channels=1, \n",
|
|
" dropout=filter_dropout, \n",
|
|
" **filter_args\n",
|
|
" ) \n",
|
|
"\n",
|
|
" def forward(self, u, *args, **kwargs):\n",
|
|
" l = u.size(-2)\n",
|
|
" l_filter = min(l, self.l_max)\n",
|
|
" u = self.in_proj(u)\n",
|
|
" u = rearrange(u, 'b l d -> b d l')\n",
|
|
" \n",
|
|
" uc = self.short_filter(u)[...,:l_filter] \n",
|
|
" *x, v = uc.split(self.d_model, dim=1)\n",
|
|
" \n",
|
|
" k = self.filter_fn.filter(l_filter)[0]\n",
|
|
" k = rearrange(k, 'l (o d) -> o d l', o=self.order - 1)\n",
|
|
" bias = rearrange(self.filter_fn.bias, '(o d) -> o d', o=self.order - 1)\n",
|
|
" \n",
|
|
" for o, x_i in enumerate(reversed(x[1:])):\n",
|
|
" v = self.dropout(v * x_i)\n",
|
|
" v = self.filter_fn(v, l_filter, k=k[o], bias=bias[o])\n",
|
|
"\n",
|
|
" y = rearrange(v * x[0], 'b d l -> b l d')\n",
|
|
"\n",
|
|
" y = self.out_proj(y)\n",
|
|
" return y\n",
|
|
"\n",
|
|
" \n",
|
|
" \n",
|
|
"if __name__ == \"__main__\":\n",
|
|
" layer = HyenaOperator(\n",
|
|
" \n",
|
|
" d_model=128, \n",
|
|
" l_max=1024, \n",
|
|
" order=2, \n",
|
|
" filter_order=64\n",
|
|
" )\n",
|
|
" x = torch.randn(1, 1024, 128, requires_grad=True)\n",
|
|
" y = layer(x)\n",
|
|
" \n",
|
|
" print(x.shape, y.shape)\n",
|
|
" \n",
|
|
" grad = torch.autograd.grad(y[:, 10, :].sum(), x)[0]\n",
|
|
" print('Causality check: gradients should not flow \"from future to past\"')\n",
|
|
" print(grad[0, 11, :].sum(), grad[0, 9, :].sum())\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "032ef08a-8cc6-491a-9eb8-4a6b3f2d165e",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"torch.Size([1, 1023, 1]) torch.Size([1, 1])\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"class HyenaOperatorAutoregressive1D(nn.Module):\n",
|
|
" def __init__(\n",
|
|
" self,\n",
|
|
" d_model,\n",
|
|
" l_max,\n",
|
|
" order=2, \n",
|
|
" filter_order=64,\n",
|
|
" dropout=0.0, \n",
|
|
" filter_dropout=0.0, \n",
|
|
" **filter_args,\n",
|
|
" ):\n",
|
|
" super().__init__()\n",
|
|
"\n",
|
|
" self.l_max = l_max\n",
|
|
" self.d_model = d_model\n",
|
|
" self.l_max = l_max\n",
|
|
" self.order = order\n",
|
|
" inner_width = d_model * (order + 1)\n",
|
|
"\n",
|
|
" self.dropout = nn.Dropout(dropout)\n",
|
|
" self.in_proj = nn.Linear(d_model, inner_width)\n",
|
|
" self.out_proj = nn.Linear(d_model, d_model)\n",
|
|
" self.fc_before = nn.Linear(1, d_model) # Fully connected layer before the main layer\n",
|
|
" self.fc_after = nn.Linear(d_model, 1) # Fully connected layer after the main layer\n",
|
|
"\n",
|
|
" self.operator = HyenaOperator(\n",
|
|
" d_model=d_model,\n",
|
|
" l_max=l_max,\n",
|
|
" order=order, \n",
|
|
" filter_order=filter_order,\n",
|
|
" dropout=dropout, \n",
|
|
" filter_dropout=filter_dropout, \n",
|
|
" **filter_args,\n",
|
|
" )\n",
|
|
"\n",
|
|
" def forward(self, u, *args, **kwargs):\n",
|
|
" # Increase the channel dimension from 1 to d_model\n",
|
|
" u = self.fc_before(u) \n",
|
|
" # Pass through the operator\n",
|
|
" u = self.operator(u)\n",
|
|
" last_state = u[:,-1,:]\n",
|
|
" # Decrease the channel dimension back to 1\n",
|
|
" y = self.fc_after(last_state)\n",
|
|
" return y,last_state\n",
|
|
"\n",
|
|
"\n",
|
|
"if __name__ == \"__main__\":\n",
|
|
" layer = HyenaOperatorAutoregressive1D(\n",
|
|
" d_model=128, \n",
|
|
" l_max=1024, \n",
|
|
" order=2, \n",
|
|
" filter_order=64\n",
|
|
" )\n",
|
|
"\n",
|
|
" x = torch.randn(1, 1023, 1, requires_grad=True) # 1D time series input\n",
|
|
" y, last_state = layer(x)\n",
|
|
"\n",
|
|
" #import pdb;pdb.set_trace()\n",
|
|
" print(x.shape, y.shape) # should now be [1, 1024, 1]\n",
|
|
"\n",
|
|
" #grad = torch.autograd.grad(y[:, 10, 0].sum(), x)[0]\n",
|
|
" #print('Causality check: gradients should not flow \"from future to past\"')\n",
|
|
" #print(grad[0, 11, 0].sum(), grad[0, 9, 0].sum())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "80cde67b-992f-4cb0-8824-4a6b7e4984ca",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Train Epoch: 1 [0/640 (0%)]\tLoss: 0.736030\n",
|
|
"Train Epoch: 2 [0/640 (0%)]\tLoss: 0.013385\n",
|
|
"Train Epoch: 3 [0/640 (0%)]\tLoss: 0.019001\n",
|
|
"Train Epoch: 4 [0/640 (0%)]\tLoss: 0.010262\n",
|
|
"Train Epoch: 5 [0/640 (0%)]\tLoss: 0.005347\n",
|
|
"Train Epoch: 6 [0/640 (0%)]\tLoss: 0.006345\n",
|
|
"Train Epoch: 7 [0/640 (0%)]\tLoss: 0.004454\n",
|
|
"Train Epoch: 8 [0/640 (0%)]\tLoss: 0.003857\n",
|
|
"Train Epoch: 9 [0/640 (0%)]\tLoss: 0.003062\n",
|
|
"Train Epoch: 10 [0/640 (0%)]\tLoss: 0.002607\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import torch\n",
|
|
"import torch.optim as optim\n",
|
|
"import torch.nn.functional as F\n",
|
|
"from torch.utils.data import DataLoader, Dataset\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"def generate_sine_with_noise(n_points, frequency, phase, amplitude, noise_sd):\n",
|
|
" # Generate an array of points from 0 to 2*pi\n",
|
|
" x = np.linspace(0, 2*np.pi, n_points)\n",
|
|
" \n",
|
|
" # Generate the sine wave\n",
|
|
" sine_wave = amplitude * np.sin(frequency * x + phase)\n",
|
|
" \n",
|
|
" # Generate Gaussian noise\n",
|
|
" noise = np.random.normal(scale=noise_sd, size=n_points)\n",
|
|
" \n",
|
|
" # Add the noise to the sine wave\n",
|
|
" sine_wave_noise = sine_wave + noise\n",
|
|
" \n",
|
|
" # Stack the sine wave and the noisy sine wave into a 2D array\n",
|
|
" output = np.column_stack((sine_wave, sine_wave_noise))\n",
|
|
" \n",
|
|
" return output\n",
|
|
" \n",
|
|
" \n",
|
|
"class SineDataset(Dataset):\n",
|
|
" def __init__(self, n_samples, n_points, frequency_range, phase_range, amplitude_range, noise_sd_range):\n",
|
|
" self.n_samples = n_samples\n",
|
|
" self.n_points = n_points\n",
|
|
" self.frequency_range = frequency_range\n",
|
|
" self.phase_range = phase_range\n",
|
|
" self.amplitude_range = amplitude_range\n",
|
|
" self.noise_sd_range = noise_sd_range\n",
|
|
"\n",
|
|
" def __len__(self):\n",
|
|
" return self.n_samples\n",
|
|
"\n",
|
|
" def __getitem__(self, idx):\n",
|
|
" # Generate random attributes\n",
|
|
" frequency = np.random.uniform(*self.frequency_range)\n",
|
|
" phase = np.random.uniform(*self.phase_range)\n",
|
|
" amplitude = np.random.uniform(*self.amplitude_range)\n",
|
|
" noise_sd = np.random.uniform(*self.noise_sd_range)\n",
|
|
"\n",
|
|
" # Generate sine wave with the random attributes\n",
|
|
" sine_wave = generate_sine_with_noise(self.n_points, frequency, phase, amplitude, noise_sd)\n",
|
|
"\n",
|
|
" # Return the sine wave and the parameters\n",
|
|
" return torch.Tensor(sine_wave[:-1, 1, None]), torch.Tensor(sine_wave[-1:, 0]), torch.Tensor([frequency, phase, amplitude, noise_sd])\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"# Usage:\n",
|
|
"dataset = SineDataset(640, 1025, (1, 3), (0, 2*np.pi), (0.5, 1.5), (0.05, 0.15))\n",
|
|
"\n",
|
|
"def train(model, device, train_loader, optimizer, epoch):\n",
|
|
" model.train()\n",
|
|
" for batch_idx, (data, target, params) in enumerate(train_loader):\n",
|
|
" #data = data[...,None]\n",
|
|
" data, target = data.to(device), target.to(device)\n",
|
|
" optimizer.zero_grad()\n",
|
|
" output,last_state = model(data)\n",
|
|
" #import pdb;pdb.set_trace()\n",
|
|
"\n",
|
|
" loss = F.mse_loss(output, target)\n",
|
|
" loss.backward()\n",
|
|
" optimizer.step()\n",
|
|
" if batch_idx % 10 == 0:\n",
|
|
" print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n",
|
|
" epoch, batch_idx * len(data), len(train_loader.dataset),\n",
|
|
" 100. * batch_idx / len(train_loader), loss.item()))\n",
|
|
"\n",
|
|
"if __name__ == \"__main__\":\n",
|
|
" device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
|
|
"\n",
|
|
" model = HyenaOperatorAutoregressive1D(\n",
|
|
" d_model=128, \n",
|
|
" l_max=1024, \n",
|
|
" order=2, \n",
|
|
" filter_order=64\n",
|
|
" ).to(device)\n",
|
|
"\n",
|
|
" optimizer = optim.Adam(model.parameters())\n",
|
|
"\n",
|
|
" # Assume 10000 samples in the dataset\n",
|
|
" #dataset = SineDataset(10000, 1025, 2, 0, 1, 0.1)\n",
|
|
" train_loader = DataLoader(dataset, batch_size=64, shuffle=True)\n",
|
|
"\n",
|
|
" for epoch in range(1, 11): # Train for 10 epochs\n",
|
|
" train(model, device, train_loader, optimizer, epoch)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "cc9f9031-5ee1-49f8-a70f-ad85ca015596",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"id": "1b763e03-baab-4b02-bae0-5747461bca7f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def correlate(model, device, data_loader):\n",
|
|
" model.eval()\n",
|
|
" correlations = {key: [] for key in [\"frequency\", \"phase\", \"amplitude\", \"noise_sd\"]}\n",
|
|
" with torch.no_grad():\n",
|
|
" for data, target, params in data_loader:\n",
|
|
"\n",
|
|
" data, target = data.to(device), target.to(device)\n",
|
|
" output, last_state = model(data)\n",
|
|
" last_state_np = last_state.cpu().numpy()\n",
|
|
" params_np = params.cpu().numpy()\n",
|
|
" #import pdb;pdb.set_trace()\n",
|
|
"\n",
|
|
" # Compute correlations between last_state and parameters\n",
|
|
" for i, key in enumerate(correlations.keys()):\n",
|
|
" correlation = np.corrcoef(last_state_np.squeeze(), params_np[:,i])[0,1]\n",
|
|
" correlations[key].append(correlation)\n",
|
|
" \n",
|
|
" # Average correlations over all batches\n",
|
|
" avg_correlations = {key: np.mean(value) for key, value in correlations.items()}\n",
|
|
" return avg_correlations"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"id": "f4c78c51-a538-4d24-ab7b-fb6035c78df8",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"> \u001b[0;32m/tmp/ipykernel_9454/3342195123.py\u001b[0m(14)\u001b[0;36mcorrelate\u001b[0;34m()\u001b[0m\n",
|
|
"\u001b[0;32m 12 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
"\u001b[0m\u001b[0;32m 13 \u001b[0;31m \u001b[0;31m# Compute correlations between last_state and parameters\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
"\u001b[0m\u001b[0;32m---> 14 \u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcorrelations\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
"\u001b[0m\u001b[0;32m 15 \u001b[0;31m \u001b[0mcorrelation\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcorrcoef\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlast_state_np\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msqueeze\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams_np\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
"\u001b[0m\u001b[0;32m 16 \u001b[0;31m \u001b[0mcorrelations\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcorrelation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
"\u001b[0m\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdin",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"ipdb> last_state_np.shape\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"(64, 128)\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdin",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"ipdb> last_state_np[0]\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"array([ 0.13440676, -0.03606963, -0.4140934 , 0.5431792 , 0.36556095,\n",
|
|
" 0.2256596 , -0.4616309 , -0.05567896, 0.17625177, -0.23529659,\n",
|
|
" -0.5208519 , 0.29691923, 0.15615058, 0.31342992, -0.5054718 ,\n",
|
|
" -0.33130994, -0.03956199, 0.31403548, -0.15925817, -0.22006416,\n",
|
|
" 0.00838468, -0.30691615, -0.1828884 , -0.52498204, 0.07198659,\n",
|
|
" 0.38572663, -0.27560705, 0.12110637, -0.17199083, 0.3913066 ,\n",
|
|
" -0.03978934, -0.21167544, -0.43025637, 0.20562531, 0.3000516 ,\n",
|
|
" -0.6784174 , -0.04233613, 0.4706083 , 0.20292807, 0.49932548,\n",
|
|
" 0.00203749, 0.2665777 , -0.16989222, 0.40648764, 0.22203793,\n",
|
|
" -0.44289762, 0.20751204, -0.38801843, -0.001487 , -0.49365598,\n",
|
|
" 0.05991718, -0.10120638, 0.36523518, -0.15450253, 0.11142011,\n",
|
|
" -0.20295474, 0.12229299, 0.09449576, -0.3422598 , 0.18969077,\n",
|
|
" 0.517254 , 0.08046471, 0.02134303, -0.35802346, -0.26192123,\n",
|
|
" 0.26145002, 0.11439252, 0.03314593, -0.15331428, 0.42282102,\n",
|
|
" 0.6026961 , -0.04233361, -0.5652172 , 0.33544067, 0.05744396,\n",
|
|
" 0.43544483, 0.2176097 , 0.22265801, -0.03894311, -0.01405966,\n",
|
|
" 0.23479447, 0.32931918, 0.21597862, 0.40402904, 0.20630498,\n",
|
|
" 0.09036086, -0.16922598, -0.1774486 , -0.14753146, -0.22214624,\n",
|
|
" -0.19101782, -0.09274255, 0.10928088, -0.01354241, -0.3864469 ,\n",
|
|
" 0.46331462, -0.38134843, -0.07766411, 0.750954 , -0.06306303,\n",
|
|
" -0.33691666, -0.1798551 , 0.19826202, -0.13544285, 0.01956506,\n",
|
|
" 0.6431204 , -0.11272874, 0.1345196 , 0.23029736, 0.28865197,\n",
|
|
" 0.70087713, 0.3592593 , 0.30329305, -0.26943353, -0.11942452,\n",
|
|
" -0.21187985, 0.19452253, 0.05659255, -0.00958484, -0.33417243,\n",
|
|
" -0.14836329, 0.28580692, 0.20885246, 0.18010336, 0.56253076,\n",
|
|
" -0.25303417, 0.0189368 , 0.2504725 ], dtype=float32)\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdin",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"ipdb> q\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"correlate(model,\"cpu\",train_loader)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e1558ffb-4699-4a8c-b05d-c3ac31a3829f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.10"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|