import torch
import torchvision as tv

samples0, samples1 = 60000, 10000

source0 = tv.datasets.MNIST("../MNIST", train = True, download = True)
source1 = tv.datasets.MNIST("../MNIST", train = False, download = True)
DATA0 = source0.data.unsqueeze(1).float().cuda()
DATA1 = source1.data.unsqueeze(1).float().cuda()
TARGET0 = source0.targets.cuda()
TARGET1 = source1.targets.cuda()

class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv0 = torch.nn.Conv2d(1, 8, 5) #24
        self.relu0 = torch.nn.ReLU()
        self.pool0 = torch.nn.MaxPool2d(2) #12
        self.conv1 = torch.nn.Conv2d(8, 16, 5) #8
        self.relu1 = torch.nn.ReLU()
        self.pool1 = torch.nn.MaxPool2d(2) #4
        self.flat1 = torch.nn.Flatten()
        self.line2 = torch.nn.Linear(16 * 4 * 4, 128)
        self.relu2 = torch.nn.ReLU()
        self.line3 = torch.nn.Linear(128, 64)
        self.relu3 = torch.nn.ReLU()
        self.line4 = torch.nn.Linear(64, 10)
    def forward(self, SIGNAL):
        SIGNAL = self.conv0(SIGNAL)
        SIGNAL = self.relu0(SIGNAL)
        SIGNAL = self.pool0(SIGNAL)
        SIGNAL = self.conv1(SIGNAL)
        SIGNAL = self.relu1(SIGNAL)
        SIGNAL = self.pool1(SIGNAL)
        SIGNAL = self.flat1(SIGNAL)
        SIGNAL = self.line2(SIGNAL)
        SIGNAL = self.relu2(SIGNAL)
        SIGNAL = self.line3(SIGNAL)
        SIGNAL = self.relu3(SIGNAL)
        SIGNAL = self.line4(SIGNAL)
        return SIGNAL

model = Model().cuda()

batch = 1000
optimizer = torch.optim.Adam(model.parameters())
for epoch in range(100):
    LOSS0 = torch.zeros((), device = "cuda")
    ACCURACY0 = torch.zeros((), device = "cuda")
    count0 = 0
    for index in range(0, samples0, batch):
        optimizer.zero_grad()
        DATA = DATA0[index : index + batch]
        TARGET = TARGET0[index : index + batch]
        count = TARGET.size(0)
        ACTIVATION = model(DATA)
        LOSS = torch.nn.functional.cross_entropy(ACTIVATION, TARGET)
        LOSS0 += LOSS * count
        VALUE = ACTIVATION.argmax(1)
        ACCURACY0 += torch.eq(VALUE, TARGET).sum()
        count0 += count
        LOSS.backward()
        optimizer.step()
    LOSS0 /= count0
    ACCURACY0 /= count0
    with torch.no_grad():
        LOSS1 = torch.zeros((), device = "cuda")
        ACCURACY1 = torch.zeros((), device = "cuda")
        count1 = 0
        for index in range(0, samples1, batch):
            DATA = DATA1[index : index + batch]
            TARGET = TARGET1[index : index + batch]
            ACTIVATION = model(DATA)
            LOSS1 += torch.nn.functional.cross_entropy(ACTIVATION, TARGET,
                                                       reduction = "sum")
            VALUE = ACTIVATION.argmax(1)
            ACCURACY1 += torch.eq(VALUE, TARGET).sum()
            count1 += TARGET.size(0)
        LOSS1 /= count1
        ACCURACY1 /= count1
    print("%5d %12.3f %4.3f %12.3f %4.3f" % \
          (epoch, LOSS0, ACCURACY0, LOSS1, ACCURACY1), flush = True)

torch.save(model.state_dict(), "style.pt")
