1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
|
# Eric Jang originally wrote an implementation of MAML in JAX
# (https://github.com/ericjang/maml-jax).
# We translated his implementation from JAX to PyTorch.
from functorch import grad, vmap, make_functional
import matplotlib.pyplot as plt
import math
import torch
import numpy as np
from torch import nn
from torch.nn import functional as F
import matplotlib as mpl
mpl.use('Agg')
class ThreeLayerNet(nn.Module):
def __init__(self):
super(ThreeLayerNet, self).__init__()
self.fc1 = nn.Linear(1, 40)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(40, 40)
self.relu2 = nn.ReLU()
self.fc3 = nn.Linear(40, 1)
def forward(self, x):
x = self.fc1(x)
x = self.relu1(x)
x = self.fc2(x)
x = self.relu2(x)
x = self.fc3(x)
return x
# TODO: Use F.mse_loss
def mse_loss(x, y):
return torch.mean((x - y) ** 2)
net, params = make_functional(ThreeLayerNet())
opt = torch.optim.Adam(params, lr=1e-3)
alpha = 0.1
K = 20
losses = []
num_tasks = 4
def sample_tasks(outer_batch_size, inner_batch_size):
# Select amplitude and phase for the task
As = []
phases = []
for _ in range(outer_batch_size):
As.append(np.random.uniform(low=0.1, high=.5))
phases.append(np.random.uniform(low=0., high=np.pi))
def get_batch():
xs, ys = [], []
for A, phase in zip(As, phases):
x = np.random.uniform(low=-5., high=5., size=(inner_batch_size, 1))
y = A * np.sin(x + phase)
xs.append(x)
ys.append(y)
return torch.tensor(xs, dtype=torch.float), torch.tensor(ys, dtype=torch.float)
x1, y1 = get_batch()
x2, y2 = get_batch()
return x1, y1, x2, y2
for it in range(20000):
loss2 = 0.0
opt.zero_grad()
def get_loss_for_task(x1, y1, x2, y2):
def inner_loss(params, x1, y1):
f = net(params, x1)
loss = mse_loss(f, y1)
return loss
grads = grad(inner_loss)(params, x1, y1)
new_params = [(params[i] - alpha * grads[i]) for i in range(len(params))]
v_f = net(new_params, x2)
return mse_loss(v_f, y2)
task = sample_tasks(num_tasks, K)
inner_losses = vmap(get_loss_for_task)(task[0], task[1], task[2], task[3])
loss2 = sum(inner_losses) / len(inner_losses)
loss2.backward()
opt.step()
if it % 100 == 0:
print('Iteration %d -- Outer Loss: %.4f' % (it, loss2))
losses.append(loss2.detach())
t_A = torch.tensor(0.0).uniform_(0.1, 0.5)
t_b = torch.tensor(0.0).uniform_(0.0, math.pi)
t_x = torch.empty(4, 1).uniform_(-5, 5)
t_y = t_A * torch.sin(t_x + t_b)
opt.zero_grad()
t_params = params
for k in range(5):
t_f = net(t_params, t_x)
t_loss = F.l1_loss(t_f, t_y)
grads = torch.autograd.grad(t_loss, t_params, create_graph=True)
t_params = [(t_params[i] - alpha * grads[i]) for i in range(len(params))]
test_x = torch.arange(-2 * math.pi, 2 * math.pi, step=0.01).unsqueeze(1)
test_y = t_A * torch.sin(test_x + t_b)
test_f = net(t_params, test_x)
plt.plot(test_x.data.numpy(), test_y.data.numpy(), label='sin(x)')
plt.plot(test_x.data.numpy(), test_f.data.numpy(), label='net(x)')
plt.plot(t_x.data.numpy(), t_y.data.numpy(), 'o', label='Examples')
plt.legend()
plt.savefig('maml-sine.png')
plt.figure()
plt.plot(np.convolve(losses, [.05] * 20))
plt.savefig('losses.png')
|