import numpy as np
from sklearn import datasets

features = 64
classes = 10

source = datasets.load_digits()
data = source.data
target = source.target
design = np.insert(data, 0, 1., 1)
onehot = (np.arange(classes) == target[:, None])

param = np.zeros((1 + features, classes))

for epoch in range(1000):
    activation = design @ param
    exp = np.exp(activation)
    sum = np.sum(exp, 1, keepdims = True)
    activity = exp / sum
    log = np.log(activity)
    entropy = -log * onehot
    loss = np.sum(entropy)
    grad = np.sum((design[:, None, None, :] * (activity[:, None, :] - np.identity(classes))[:, :, :, None])[onehot, :, :], 0).T
    param -= 0.0001 * grad
    print(epoch, loss, flush = True)

activation = design @ param
exp = np.exp(activation)
sum = np.sum(exp, 1, keepdims = True)
activity = exp / sum
value = np.argmax(activity, 1)
hit = (value == target)
accuracy = np.mean(hit)

print(accuracy)
