In [None]:
import torch

# Naive Implementation of Feed Forward Layers

## Neural Network

In [None]:
# dimension
D = 3
K = 5

In [None]:
# specify the parameters of the neural network
## the first layer
W1 = torch.randn((K, D), requires_grad=True)
b1 = torch.randn((K, 1), requires_grad=True)

In [None]:
print(W1)

In [None]:
print(b1)

In [None]:
## the second layer
W2 = torch.randn((K, K), requires_grad=True)
b2 = torch.randn((K, 1), requires_grad=True)

In [None]:
## the third layer
W3 = torch.randn((K, K), requires_grad=True)
b3 = torch.randn((K, 1), requires_grad=True)

In [None]:
## the last layer
W4 = torch.randn((1, K), requires_grad=True)
b4 = torch.randn((1,), requires_grad=True)

In [None]:
def sigmoid(x):
    return 1 / (1 + torch.exp(-x))

## Forward

In [None]:
# input
x = torch.randn((D, 1), requires_grad=True)

# ground truth
y_ground = torch.randn((1,))

In [None]:
# forward
## the first layer
z1 = torch.matmul(W1, x) + b1
a1 = sigmoid(z1)

print(a1)

In [None]:
## the second layer
z2 = torch.matmul(W2, a1) + b2
a2 = sigmoid(z2)

print(a2)

In [None]:
## the third layer
z3 = torch.matmul(W3, a2) + b3
a3 = sigmoid(z3)

print(a3)

In [None]:
## the last layer
z4 = torch.matmul(W4, a3) + b4
y_pred = sigmoid(z4)

print(y_pred)

In [None]:
# calculate the loss
J = (y_pred - y_ground) ** 2
print(J)

## Backward

In [None]:
print('the gradient of W2:')
print(W2.grad)
print('the gradient of b2:')
print(b2.grad)

In [None]:
# backward
J.backward()

In [None]:
print('the gradient of W2:')
print(W2.grad)
print('the gradient of b2:')
print(b2.grad)

## Update the Parameter

W <- W - lr * W_gradient

In [None]:
lr = 0.1

In [None]:
print('the weight of W2 before updation:')
print(W2)

In [None]:
expected_new_W2 = W2 - lr * W2.grad
print(expected_new_W2)

In [None]:
# specify an optimizer
optimizer = torch.optim.SGD([W1, b1, W2, b2, W3, b3, W4, b4], lr=lr)

In [None]:
optimizer.step()

In [None]:
print('the weight of W2 after updation:')
print(W2)

In [None]:
torch.eq(W2, expected_new_W2).all()

# Pytorch Implementation of FFN

## FFN Layer

```
W1 = torch.randn((K, D), requires_grad=True)

b1 = torch.randn((K, 1), requires_grad=True)

def sigmoid(x):
    ...
```

In [None]:
# define
layer1 = torch.nn.Linear(in_features=D, out_features=K, bias=True)

In [None]:
# input
x = torch.randn((D,))

```
z1 = torch.matmul(W1, x) + b1
a1 = sigmoid(z1)
```

In [None]:
# forward
z1 = layer1(x)
a1 = torch.nn.functional.sigmoid(z1)

## Neural Network

In [None]:
class neural_network(torch.nn.Module):
    def __init__(self, D, K):
        super(neural_network, self).__init__()
        self.layer1 = torch.nn.Linear(in_features=D, out_features=K, bias=True)
        self.layer2 = torch.nn.Linear(in_features=K, out_features=K, bias=True)
        self.layer3 = torch.nn.Linear(in_features=K, out_features=K, bias=True)
        self.layer4 = torch.nn.Linear(in_features=K, out_features=1, bias=True)

    def forward(self, x):
        # the first layer
        z1 = self.layer1(x)
        a1 = torch.nn.functional.sigmoid(z1)
        
        # the second layer
        z2 = self.layer2(a1)
        a2 = torch.nn.functional.sigmoid(z2)
        
        # the third layer
        z3 = self.layer3(a2)
        a3 = torch.nn.functional.sigmoid(z3)
        
        # the fourth layer
        z4 = self.layer4(a3)
        a4 = torch.nn.functional.sigmoid(z4)
        return a4

In [None]:
# specify the network and the optimizer
network = neural_network(D, K)

optimizer = torch.optim.SGD(network.parameters(), lr=lr)

In [None]:
# input
x = torch.randn((D,))

# ground truth
y_ground = torch.randn((1,))

In [None]:
# forward
y_pred = network(x)

In [None]:
# calculate the loss
J = (y_pred - y_ground) ** 2
print(J)

In [None]:
# backward
J.backward()

In [None]:
# update the parameters
optimizer.step()

In [None]:
for x, y_ground in ...:
    y_pred = network(x)          # forward
    J = (y_pred - y_ground) ** 2 # calculate the loss
    optimizer.zero_grad()        # clear the stored gradients!
    J.backward()                 # backward
    optimizer.step()             # update the parameters

In [None]:
[x.grad for x in network.parameters()]

In [None]:
optimizer.zero_grad()

In [None]:
[x.grad for x in network.parameters()]