Classification With Axon And Livebook
2022-12-05
Install Dependencies
Mix.install([
{:axon, "~> 0.1.0"},
{:exla, "~> 0.2.2"},
{:nx, "~> 0.2.1"},
{:explorer, "~> 0.2.0"},
{:req, "~> 0.3.0"},
{:vega_lite, "~> 0.1.5"},
{:kino_vega_lite, "~> 0.1.1"}
])
Introduction
In this notebook we are going to be exploring the Glass type dataset. This dataset is from the UCI repository. It was motivated by a criminal case where identification of a certain type of glass fragment was crucial. You can read more about the dataset here
First we will load the data into Explorer
and take a look. Then we will use Nx
to prepare our data for training and testing. Finally we will use Axon
to build a Neural Network and train it with the dataset. Along the way we will plot some charts with VegaLite
.
It is a multiclass classification problem with 7 classes labeled from 1 to 7. All the other input variables are numeric.
Load the Dataset
First lets download the data to our machine and then load it into Explorer
. We will append the column names to the csv so we can use them in the dataframe.
%{body: body} =
Req.get!("https://archive.ics.uci.edu/ml/machine-learning-databases/glass/glass.data")
filename = "glass_data.csv"
column_names = "id_number,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,class\n"
File.write!(filename, column_names <> body)
df = Explorer.DataFrame.from_csv!(filename)
Now we can explore the dataset to see what is inside
Explorer.DataFrame.pull(df, "class")
|> Explorer.Series.distinct()
Looks like there are no instances of class 4! Lets build a look up table so we can get the names of the classes from the numbers.
classes = %{
1 => "building_windows_float_processed",
2 => "building_windows_non_float_processed",
3 => "vehicle_windows_float_processed",
4 => "vehicle_windows_non_float_processed",
5 => "containers",
6 => "tableware",
7 => "headlamps"
}
Explorer.DataFrame.n_rows(df)
Let’s calculate the mean of every column and plot it out.
defmodule ReduceData do
def reduce_df(data_frame, series_fun) do
data_frame
|> Explorer.DataFrame.to_series()
|> Enum.map(fn {col_name, col} ->
%{"x" => col_name, "y" => series_fun.(col)}
end)
end
end
mean_data =
ReduceData.reduce_df(
df
|> Explorer.DataFrame.select(["id_number", "class"], :drop),
&Explorer.Series.mean/1
)
VegaLite.new(width: 400, height: 400)
|> VegaLite.data_from_values(mean_data)
|> VegaLite.mark(:bar)
|> VegaLite.encode_field(:x, "x", type: :nominal)
|> VegaLite.encode_field(:y, "y", type: :quantitative)
Now lets group the data by class and calculate the mean max and min value for each column
class_grouped_df = Explorer.DataFrame.group_by(df, ["class"])
cols = ["Al", "Ba", "Ca", "Fe", "K", "Mg", "Na", "RI", "Si"]
class_summary_df =
Explorer.DataFrame.summarise(
class_grouped_df,
cols
|> Enum.reduce(%{}, fn name, acc ->
acc
|> Map.put(name, [:min, :max, :mean])
end)
)
Nice! Now lets plot out the chart for each group with VegaLite
vega_concats =
class_summary_df
|> Explorer.DataFrame.to_rows()
|> Enum.map(fn row ->
data =
row
|> Enum.map(fn {key, val} ->
%{"x" => key, "y" => val}
end)
class_row =
data
|> Enum.find(fn row ->
row["x"] == "class"
end)
class = classes[class_row["y"]]
data =
data
|> Enum.reject(fn row ->
row["x"] == "class"
end)
VegaLite.new(width: 700, height: 400, title: class)
|> VegaLite.data_from_values(data)
|> VegaLite.mark(:bar)
|> VegaLite.encode_field(:x, "x", type: :nominal)
|> VegaLite.encode_field(:y, "y", type: :quantitative)
end)
VegaLite.new(width: 700)
|> VegaLite.concat(vega_concats, :vertical)
Prepare the Data
Now we need to get our data ready for training. First we will normalize our data using the min-max scaling technique. This will transform every value in each column to be between 0 and 1 and will facilitate the training of our model. Essentially if the different rows have diferent scales (one row is between 0 and 1 and another is between 40000 and 10000000) then the large scale data points will have more influence in the training of your model. You can read more about normalization here.
defmodule NormalData do
def normalize(data_frame, col_names) do
data_frame
|> Explorer.DataFrame.select(col_names)
|> Explorer.DataFrame.to_series()
|> Enum.map(fn {col_name, col} ->
max = Explorer.Series.max(col)
min = Explorer.Series.min(col)
range = max - min
normalize_fun = fn val ->
(val - min) / range
end
{col_name,
Explorer.Series.subtract(col, min)
|> Explorer.Series.cast(:float)
|> Explorer.Series.divide(range), normalize_fun}
end)
end
end
Here we normalize the data and also construct a function to normalize any future data we get before we pass it to our model for a prediction.
normal =
NormalData.normalize(
df,
["Al", "Ba", "Ca", "Fe", "K", "Mg", "Na", "RI", "Si"]
)
normal_df =
normal
|> Enum.map(fn {col_name, normalized_data, _normalize_fun} ->
{col_name, normalized_data}
end)
|> Explorer.DataFrame.new()
|> Explorer.DataFrame.mutate(id_number: Explorer.DataFrame.pull(df, "id_number"))
|> Explorer.DataFrame.mutate(class: Explorer.DataFrame.pull(df, "class"))
normalize_row_fun = fn row ->
normalize_funs =
normal
|> Enum.filter(fn {col_name, _, _} ->
col_name in [
"Al",
"Ba",
"Ca",
"Fe",
"K",
"Mg",
"Na",
"RI",
"Si"
]
end)
|> Enum.map(fn {_, _, normalize_fun} ->
normalize_fun
end)
Enum.zip([normalize_funs, Nx.to_flat_list(row)])
|> Enum.map(fn {normalize_fun, elem} ->
normalize_fun.(elem)
end)
|> Nx.tensor()
end
Lets take a look at means of the normalized data to make sure everything is ok.
mean_normal_data =
ReduceData.reduce_df(
normal_df
|> Explorer.DataFrame.select(["id_number", "class"], :drop),
&Explorer.Series.mean/1
)
VegaLite.new(width: 400, height: 400)
|> VegaLite.data_from_values(mean_normal_data)
|> VegaLite.mark(:bar)
|> VegaLite.encode_field(:x, "x", type: :nominal)
|> VegaLite.encode_field(:y, "y", type: :quantitative)
Now we will split our data into a training and testing set. This is a common machine learning technique. If we use all the data to train the model then we will have no way to evaluate it’s performance. We could have overfitted the data and then get poor performance with new data. So we keep a smaller sample for testing.
defmodule SplitData do
def train_test_split(data_frame, train_percentage) do
series = Explorer.DataFrame.pull(data_frame, "id_number")
train_sample =
series
|> Explorer.Series.sample(train_percentage)
|> Explorer.Series.to_list()
test_sample_df =
series
|> Explorer.Series.to_list()
|> Kernel.--(train_sample)
|> Kernel.then(fn list ->
Explorer.DataFrame.new(id_number: list)
end)
train_sample_df = Explorer.DataFrame.new(id_number: train_sample)
{Explorer.DataFrame.join(train_sample_df, data_frame),
Explorer.DataFrame.join(test_sample_df, data_frame)}
end
end
{train_df, test_df} = SplitData.train_test_split(normal_df, 0.75)
Now that our data is split we need to convert it to something that can be used by an Axon
model. for both the training and testing set we need to further split the data into inputs and outputs. We also need to one hot encode our outputs. Instead of a single class
output (ex: 7) we need a vector that represents that number (ex: [0, 0, 0, 0, 0, 0, 1]). Our model will output probabilities for each class and then we can choose the class with the highest probability.
defmodule Convert do
def to_training_data(df, col_names) do
col_names
|> Enum.map(fn name ->
df[name]
|> Explorer.Series.to_tensor(names: [name])
|> Nx.reshape({:auto, 1})
end)
|> Nx.concatenate(axis: 1)
end
def one_hot_encode(outputs) do
outputs
|> Nx.equal(Nx.tensor(Enum.to_list(1..7)))
end
end
train_input_data =
train_df
|> Convert.to_training_data(["Al", "Ba", "Ca", "Fe", "K", "Mg", "Na", "RI", "Si"])
|> Nx.to_batched_list(32)
train_output_data =
train_df
|> Convert.to_training_data(["class"])
|> Convert.one_hot_encode()
|> Nx.to_batched_list(32)
test_input_data =
test_df
|> Convert.to_training_data(["Al", "Ba", "Ca", "Fe", "K", "Mg", "Na", "RI", "Si"])
test_output_data =
test_df
|> Convert.to_training_data(["class"])
|> Convert.one_hot_encode()
Let’s take a quick look at the first batch of training data to make sure its all good.
train_input_data
|> List.first()
|> IO.inspect()
|> Nx.to_heatmap()
Model Creation
Now we can create our Neural Network model. Our input will be batched so we leave the first dimension as nil in our input
layer. Each input row in our training input set has 9 data points so the second dimension is 9. Next we add a 128 neuron dense
layer with a relu
activation. This function will ultimately decide whether or not a given neuron will fire or not. You can read more about this function here. We then add a dropout layer with a rate of 0.2. This layer will randomly drop certain neurons during training at the rate specified. This helps to prevent overfitting and you can read more here. We duplicate the above layers and just modifify the dropout rate. Finally we add a dense
layer with 7 outputs representing the one hot encoded outputs we use for training. We use a softmax activation function since this is a multiclass classification problem. It will transform the output of your model into a vector of probabilities for each class. You can learn more about softmax here.
model =
Axon.input({nil, 9}, "input")
|> Axon.dense(128, activation: :relu)
|> Axon.dropout(rate: 0.2)
|> Axon.dense(128, activation: :relu)
|> Axon.dropout(rate: 0.1)
|> Axon.dense(7, activation: :softmax)
Training and Evaluating the Model
Now that we have our training and testing inputs and outputs we can run a training loop. Axon
provides us a really nice api to do this. We create a trainer with a catgorical_cross_entropy
loss function. This loss function is used for predicting the probability between several classes. We will also use the adam
optimizer. We will print the accuracy and precision metric during training. Finally we run our model for 2000 epochs.
params =
model
|> Axon.Loop.trainer(:categorical_cross_entropy, :adam)
|> Axon.Loop.metric(:accuracy)
|> Axon.Loop.metric(:precision)
|> Axon.Loop.run(Stream.zip(train_input_data, train_output_data), %{},
compiler: EXLA,
epochs: 2000
)
After training we can use the parameters or weights from our training run to make predictions on the test input data.
%{prediction: prediction} = Axon.predict(model, params, test_input_data, mode: :train)
Now we can see the accuracy of our model. When I ran it I got around 0.77 accuracy. Not too bad!
Axon.Metrics.accuracy(test_output_data, prediction)
defmodule Confusion do
def matrix(output, pred) do
true_pos = Axon.Metrics.true_positives(output, pred) |> Nx.to_number()
true_neg = Axon.Metrics.true_negatives(output, pred) |> Nx.to_number()
false_pos = Axon.Metrics.false_positives(output, pred) |> Nx.to_number()
false_neg = Axon.Metrics.false_negatives(output, pred) |> Nx.to_number()
[
%{"predicted" => "true", "ground truth" => "true", "val" => true_pos},
%{"predicted" => "false", "ground truth" => "false", "val" => true_neg},
%{"predicted" => "true", "ground truth" => "false", "val" => false_pos},
%{"predicted" => "false", "ground truth" => "true", "val" => false_neg}
]
end
end
Now let’s form a confusion matrix
confusion_matrix_data = Confusion.matrix(test_output_data, prediction)
VegaLite.new(width: 400, height: 400)
|> VegaLite.data_from_values(confusion_matrix_data)
|> VegaLite.encode_field(:x, "predicted", type: :nominal)
|> VegaLite.encode_field(:y, "ground truth", type: :nominal)
|> VegaLite.layers([
VegaLite.new()
|> VegaLite.mark(:rect)
|> VegaLite.encode_field(:color, "val", type: :quantitative),
VegaLite.new()
|> VegaLite.mark(:text)
|> VegaLite.encode_field(:text, "val", type: :quantitative)
])
We can see that our true negatives and true positives are greater than our false negatives and false positives. Now we can calculate the precision and recall of the model. The precision
is the ratio between the number of true positives to the sum of the true and false positives. The precision can tell us how reliable the model is a classifying a positive. The recall
is the ratio between the true positives to the sum of the true positives and false negatives. It measures how many positives were correctly classified. You can read more about the precision, recall and the confusion matrix here.
pr_data =
[
%{
"name" => "precision",
"val" => Axon.Metrics.precision(test_output_data, prediction) |> Nx.to_number()
},
%{
"name" => "recall",
"val" => Axon.Metrics.recall(test_output_data, prediction) |> Nx.to_number()
}
]
|> IO.inspect()
VegaLite.new(width: 400, height: 400)
|> VegaLite.data_from_values(pr_data)
|> VegaLite.mark(:bar)
|> VegaLite.encode_field(:x, "name", type: :nominal)
|> VegaLite.encode_field(:y, "val", type: :quantitative)
Lets also form confusion matrices and precision/recall charts for each category. We can see that the model is more confident in some categories than others.
test_output_data_list =
test_output_data
|> Nx.to_batched_list(1)
test_input_data_list =
test_input_data
|> Nx.to_batched_list(1)
vega_concats =
Enum.zip(test_output_data_list, test_input_data_list)
|> Enum.group_by(fn {output, _input} ->
output
|> Nx.argmax()
|> Nx.to_number()
|> Kernel.+(1)
end)
|> Enum.map(fn {class, data} ->
glass_type = classes[class]
{out, inp} = Enum.unzip(data)
outputs =
out
|> Enum.map(&Nx.to_flat_list/1)
|> Nx.tensor()
inputs =
inp
|> Enum.map(&Nx.to_flat_list/1)
|> Nx.tensor()
%{prediction: pred} = Axon.predict(model, params, inputs, mode: :train)
conf_matrix_data = Confusion.matrix(outputs, pred)
conf_matrix_chart =
VegaLite.new(width: 300, height: 400)
|> VegaLite.data_from_values(conf_matrix_data)
|> VegaLite.encode_field(:x, "predicted", type: :nominal)
|> VegaLite.encode_field(:y, "ground truth", type: :nominal)
|> VegaLite.layers([
VegaLite.new(title: glass_type)
|> VegaLite.mark(:rect)
|> VegaLite.encode_field(:color, "val", type: :quantitative),
VegaLite.new()
|> VegaLite.mark(:text)
|> VegaLite.encode_field(:text, "val", type: :quantitative)
])
pr_data = [
%{
"name" => "precision",
"val" => Axon.Metrics.precision(outputs, pred) |> Nx.to_number()
},
%{
"name" => "recall",
"val" => Axon.Metrics.recall(outputs, pred) |> Nx.to_number()
}
]
pr_chart =
VegaLite.new(width: 300, height: 400)
|> VegaLite.data_from_values(pr_data)
|> VegaLite.mark(:bar)
|> VegaLite.encode_field(:x, "name", type: :nominal)
|> VegaLite.encode_field(:y, "val", type: :quantitative)
VegaLite.new(width: 800)
|> VegaLite.concat([conf_matrix_chart, pr_chart])
end)
VegaLite.new(width: 800)
|> VegaLite.concat(vega_concats, :vertical)
Download this notebook here