import numpy as np
from sklearn import datasets
import torch

features = 64
classes = 10

source = datasets.load_digits()
data = source.data
target = source.target
design = np.insert(data, 0, 1., 1)
onehot = (np.arange(classes) == target[:, None])

DESIGN = torch.tensor(design, dtype = torch.float32)
TARGET = torch.tensor(target, dtype = torch.int64)
ONEHOT = torch.tensor(onehot)

PARAM = torch.zeros(1 + features, classes, requires_grad = True)

optimizer = torch.optim.SGD([PARAM], lr = 0.00001)
for epoch in range(1000):
    optimizer.zero_grad()
    ACTIVATION = DESIGN @ PARAM
    EXP = torch.exp(ACTIVATION)
    SUM = torch.sum(EXP, 1, keepdim = True)
    ACTIVITY = EXP / SUM
    LOG = torch.log(ACTIVITY)
    ENTROPY = -LOG * ONEHOT
    LOSS = torch.sum(ENTROPY)
    LOSS.backward()
    optimizer.step()
    print(epoch, LOSS.item(), flush = True)
