---
jupytext:
  text_representation:
    extension: .md
    format_name: myst
    format_version: 0.13
    jupytext_version: 1.16.4
kernelspec:
  display_name: Python 3 (ipykernel)
  language: python
  name: python3
---

Binary Classification Using an SVM and Subsequent Regularization
================================================================

```{code-cell} ipython3
from typing import Tuple

import numpy as np
```

First we create 40 point clouds of a torus and a sphere each.
Each point cloud consists of 125 points from the surface
and 125 points of random noise.
From each point cloud we take 25 subsamples of 30 points each
and save all of these sumbsamples in the NumPy array `point_clouds`.
See
<a href="../_modules/create_point_clouds.html">here for the source code</a>.

```{code-cell} ipython3
from create_point_clouds import create_point_clouds

point_clouds = create_point_clouds(
    nb_point_clouds = 40,
    nb_coarsened = 25
)[:2]
```

Then we plot one of these newly created point clouds
(subsamples to be precise) for each surface.


```{code-cell} ipython3
from tadasets import plot3d

sphere, torus = point_clouds[:,0,0]

plot3d( sphere );
plot3d( torus  );
```

Then we compute persistence intervals for each point cloud
(subsamples to be precise).

```{code-cell} ipython3
from gudhi_util import (
    point_clouds_to_simplex_trees,
    simplex_trees_to_persistence_intervals_in_dim
)

simplex_trees = point_clouds_to_simplex_trees( point_clouds )

persistence_intervals = [
    simplex_trees_to_persistence_intervals_in_dim(
        simplex_trees = simplex_trees,
        dim = d
    ) for
    d in range(3)
]
```

Then we merge all subsamples (persistence intervals thereof)
coming from the same point cloud
and we split the result into 10 point clouds
(persistence intervals thereof) from each surface for training
and 30 point clouds from each surface for testing.

```{code-cell} ipython3
import torch

nb_coarsened = persistence_intervals[0].shape[2]

def preprocess_n_split(array: np.ndarray
                       ) -> Tuple[torch.Tensor, torch.Tensor]:
    return torch.from_numpy( np.float32(np.arctan(array)) ).flatten(
        start_dim = 2,
        end_dim = 3
    ).split( (10, 30), dim = 1 )

training_dgms, testing_dgms = zip(
    *map( preprocess_n_split, persistence_intervals )
)
```

Then we create matching labels for training and testing.

```{code-cell} ipython3
labels = torch.Tensor( [[0], [1]] )

training_labels = labels.expand( *training_dgms[0].shape[:2] ).flatten(0, 1)
testing_labels  = labels.expand(  *testing_dgms[0].shape[:2] ).flatten(0, 1)

training_dgms_flat = [ dgm.flatten(0, 1) for dgm in training_dgms ]
testing_dgms_flat  = [ dgm.flatten(0, 1) for dgm in  testing_dgms ]
```

Then we compute the Gram matrix
with respect to the inner product of Hilbert functions
of the associated unravelled relative homology lattices
for the training data.

```{code-cell} ipython3
from persunraveltorch.nn import HilbertGram

hilbert_gram = HilbertGram()

# Here we could set
# training_gram = hilbert_gram(training_dgms_flat, training_dgms_flat)
# However, as this would allocate a large amount of RAM at once,
# we compute the Gram matrix one row at a time instead.
training_gram = torch.cat(
    [ hilbert_gram(dgm, training_dgms_flat) for
      dgm in
      zip(*training_dgms_flat)
     ]
).numpy() / nb_coarsened**2
```

Then we instantiate a support vector classifier
and train it with the previously computed Gram matrix.

```{code-cell} ipython3
from scipy.special import expit
from sklearn import svm, metrics

clf = svm.SVC( kernel = 'precomputed' )

clf.fit( training_gram, training_labels.numpy() )
```

Then we compute the Gram matrix for testing
and we test the previously trained support vector classifier.

```{code-cell} ipython3
from gram_test import GramTest

support_indices = torch.from_numpy( np.int64(clf.support_) )

gram_test = GramTest(
    gram = hilbert_gram,
    support_indices = support_indices,
    len_train = len( training_dgms_flat[0] ),
    support_vectors = [
        intervals[support_indices] for
        intervals in
        training_dgms_flat
    ]
)

testing_gram = torch.cat(
    [ gram_test(dgm) for dgm in zip(*testing_dgms_flat) ]
).numpy() / nb_coarsened**2

print(f"Model accuracy: {100 * metrics.accuracy_score(testing_labels.numpy(), clf.predict(testing_gram)):>0.1f}%")
print(f"Testing cross entropy: {metrics.log_loss(testing_labels.numpy(), expit(clf.decision_function(testing_gram))):.4f}")
print(f"Training cross entropy: {metrics.log_loss(training_labels.numpy(), expit(clf.decision_function(training_gram))):.4f}")
```

The we render a 2D bitmap corresponding to the normal
of the affine hyperplane determined by our support vector classifier.

```{code-cell} ipython3
from persunraveltorch.nn import BiplaneFromIntervals

pixel_columns = 200
padding = 4

biplane_from_intervals = BiplaneFromIntervals(
    pixel_columns = pixel_columns,
    padding = padding,
    max_overhead = 2**31
)

training_bitmap = (
    biplane_from_intervals( training_dgms_flat )[:,0,0] / nb_coarsened
)
normal = torch.einsum(
    'ij,jkl->kl',
    torch.from_numpy( np.float32(clf.dual_coef_) ),
    training_bitmap[clf.support_]
)

testing_bitmap = (
    biplane_from_intervals( testing_dgms_flat )[:,0,0] / nb_coarsened
)
```

Then we describe the decision function of the support vector classifier
as a decision function on 2D bitmaps thought of as Hilbert functions
and we test whether we get approximately the same accuracy
and cross entropy as with the support vector classifier.

```{code-cell} ipython3
import torch.nn as nn
import torch.nn.functional as F
from persunraveltorch.nn import StripAffineFunctional

strip_affine_functional = StripAffineFunctional(
    weight = normal.clone().detach(),
    bias = torch.tensor( clf.intercept_[0] ),
    pixel_area = biplane_from_intervals.pixel_area
)

model = nn.Sequential(
    strip_affine_functional,
    nn.Sigmoid()
)

model.eval()

with torch.no_grad():
    pred = model(testing_bitmap)
    print( f"Reconstructed model accuracy: {100 * pred.round().eq(testing_labels).float().mean():0.1f}%" )
    print( f"Reconstructed testing cross entropy: {F.binary_cross_entropy(pred, testing_labels):0.4f}" )
```

Then we plot the normal we computed as a pseudocolor image.

```{code-cell} ipython3
import matplotlib.pyplot as plt
from persunraveltorch.nn import Reshear, ReshearMode

reshear = Reshear(shape = normal.shape, mode = ReshearMode.ZERO)

def viz_normal():
    _, ax = plt.subplots( figsize = (7.2, 4.8) )

    ax.axis('off')
    ax.matshow(
        reshear(
            strip_affine_functional.weight.detach()
        ).numpy()[:,pixel_columns:]
    )
    #_, axs = plt.subplots( ncols = 2 )

    #axs[0].axis('off')
    #axs[1].axis('off')
    #axs[0].matshow(           normal.numpy() )
    #axs[1].matshow( resheared_normal.numpy() )

viz_normal()
```

As the previous plot shows a large degree of variation,
we try to regularize it using logistic regression with penalties
for the normal and its gradient.
We note however,
that in order to obtain the effect seen here,
we have to make the penalties so large,
that the cross entropy is of little influence in the optimization.
So really, the following logistic regression
is no more than a fancy way of blurring an image
and shown here merely to explore the design space.

```{code-cell} ipython3
optimizer = torch.optim.SGD( model.parameters(), lr = 0.9 )

epochs = 100

for t in range(epochs):
    model.train()
    penalty = strip_affine_functional.energy()
    variational_penalty = strip_affine_functional.variational_energy() / 100
    cross_entropy = F.binary_cross_entropy(
        model(training_bitmap),
        training_labels
    )
    loss = variational_penalty + penalty + cross_entropy
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    model.eval()
    if t % 10 == 0:
        print(f"Epoch {t+1}\n-------------------------------")
        print( f"Cross entropy: {cross_entropy:0.8f}" )
        print( f"Penalty: {penalty:0.4f}" )
        print( f"Variational penalty: {variational_penalty:0.4f}" )
        with torch.no_grad():
            pred = model( testing_bitmap )
            print( f"Testing cross entropy: {F.binary_cross_entropy(pred, testing_labels):0.8f}" )
            print( f"Testing accuracy: {100 * pred.round().eq(testing_labels).float().mean():0.1f}%\n" )
```

```{code-cell} ipython3
viz_normal();
```