From 32cd9b2be1763f872c800b17e1fa63f852fe91c1 Mon Sep 17 00:00:00 2001
From: Thomas Vanbesien <tvanbesi@proton.me>
Date: Wed, 1 Apr 2026 17:42:04 +0200
Subject: Import from github.com

---
 module/math.py | 234 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 234 insertions(+)
 create mode 100644 module/math.py

(limited to 'module/math.py')

diff --git a/module/math.py b/module/math.py
new file mode 100644
index 0000000..7259d6b
--- /dev/null
+++ b/module/math.py
@@ -0,0 +1,234 @@
+from math import exp, log, sqrt, floor, ceil
+
+
+def get_mean(l):
+    """Returns the mean of a list of numbers."""
+    return sum(l) / len(l)
+
+
+def get_min(l):
+    """Returns the lowest element of a list of numbers."""
+    lowest = l[0]
+    for n in l:
+        if n < lowest:
+            lowest = n
+    return lowest
+
+
+def get_max(l):
+    """Returns the biggest element of a list of numbers."""
+    highest = l[0]
+    for n in l:
+        if n > highest:
+            highest = n
+    return highest
+
+
+def get_variance(l):
+    """Returns the variance of a list of numbers."""
+    mean = get_mean(l)
+    deviations = [(n - mean) ** 2 for n in l]
+    variance = get_mean(deviations)
+    return variance
+
+
+def get_std(l):
+    """Returns the standard deviation of a list of numbers."""
+    return sqrt(get_variance(l))
+
+
+def get_quartiles(l):
+    """Returns a tuple of the three quartiles of a list of numbers."""
+    quartile_limits = [len(l) * 0.25, len(l) * 0.5, len(l) * 0.75]
+    sorted_l = sorted(l)
+    quartiles = []
+    for limit in quartile_limits:
+        if int(limit) == limit:  # If limit is a whole number
+            q = sorted_l[int(limit)]
+        else:
+            q = (sorted_l[floor(limit)] + sorted_l[ceil(limit)]) / 2
+        quartiles.append(q)
+    return tuple(quartiles)
+
+
+def get_sigmoid(x):
+    """Returns the sigmoid of a number."""
+    return 1 / (1 + exp(-x))
+
+
+def get_linear_combination(variables, coefficients):
+    """
+    Returns the linear combination of a set of variables and a set of coefficients.
+
+    Parameters:
+        variables (list): A list of variables
+        coefficients (list): A list of coefficients
+    Returns:
+        float: The sum of each variable weighted by each coefficient
+    """
+    result = 0.0
+    for v, c in zip(variables, coefficients):
+        result += v * c
+    return result
+
+
+def get_hypothesis(feature_vector, weight_vector):
+    """
+    Returns the probability that the output is true given a feature vector and its associated\
+    weight vector.
+
+    Parameters:
+        feature_vector (list): A list of numbers representing the features
+
+        weight_vector (list): A list of numbers of size `len(feature_vector) + 1` whose last\
+        element is the bias term
+
+    Returns:
+        float: The probability that the output is true (between 0 and 1)
+    """
+    h = get_linear_combination(feature_vector, weight_vector[:-1])
+    h += weight_vector[-1]  # Bias term
+    return get_sigmoid(h)
+
+
+def get_cost(feature_vector, weight_vector, actual_label):
+    """
+    Returns the logistic regression cost for a training example.
+
+    Parameters:
+        feature_vector (list): A list of numbers representing the features of one training example.
+
+        weight_vector (list): A list of numbers of size `len(feature_vector) + 1` whose last\
+        element is the bias term. Represents the weight of each feature for the hypothesis.
+
+        actual_label (int): The actual label of the training example (1 if true or 0 if false).
+
+    Returns:
+        float: The log-loss of the prediction for a training example.
+    """
+    h = get_hypothesis(feature_vector, weight_vector)
+    return -actual_label * log(h) - (1 - actual_label) * log(1 - h)
+
+
+def get_total_cost(feature_matrix, weight_vector, actual_labels):
+    """
+    Returns the logistic regression cost for a set of training examples.
+
+    Parameters:
+        feature_matrix (list): A list of list of numbers representing the features for each\
+        training example.
+
+        weight_vector (list): A list of numbers the same size as each list in `feature_matrix`\
+        whose last element is the bias term. Represents the weight of each feature for the\
+        hypothesis.
+
+        actual_labels (list): A list of numbers representing the actual labels (1 if true or 0 if\
+        false), one for each training example.
+
+    Returns:
+        float: The log-loss of the predictions across all training examples.
+    """
+    total_cost = 0.0
+    for feature_vector, actual_label in zip(feature_matrix, actual_labels):
+        total_cost += get_cost(feature_vector, weight_vector, actual_label)
+    training_example_count = len(feature_matrix)
+    return (1 / training_example_count) * total_cost
+
+
+def get_partial_derivative(hypotheses, actual_labels, column_vector):
+    """
+    Returns the partial derivative of the cost function for a given parameter.
+
+    Parameters:
+        hypotheses (list): A list of numbers representing the vector of predictions for all\
+        training examples.
+
+        actual_labels (list): A list of numbers representing the actual labels (1 if true or 0 if\
+        false), one for each training example.
+
+        column_vector (list): A list of numbers representing the value of the parameter (feature)\
+        for which the partial derivative is to be computed, one for each training example.
+
+    Returns:
+        float: The partial derivative of the cost function with respect to the weight associated\
+        with the given parameter.
+    """
+    result = 0.0
+    for h, l, f in zip(hypotheses, actual_labels, column_vector):
+        result += (h - l) * f
+    training_example_count = len(hypotheses)
+    return (1 / training_example_count) * result
+
+
+def gradient_descent(
+    feature_matrix, weight_vector, actual_label_vector, learning_rate, iteration_count
+):
+    """
+    Apply the gradient descent algorithm to compute the weight vector that minimizes the cost\
+    function.
+
+    Additionally this function prints the cost upgrade during the descent's course.
+
+    Parameters:
+        feature_matrix (list): A list of list of numbers representing the features for each\
+        training example.
+
+        weight_vector (list): A list of numbers the same size as each list in `feature_matrix`\
+        whose last element is the bias term. Represents the weight of each feature for the\
+        hypothesis.
+
+        actual_label_vector (list): A list of numbers representing the actual label vector, one for\
+        each training example.
+
+        learning_rate (int): The learning rate of the algorithm.
+
+        iteration_count (int): The count of gradient descent updates the algorithm will perform.
+
+    Returns:
+        (list): The updated weight vector.
+    """
+    # Copy the weight vector in order not to modify the original
+    updated_weight_vector = weight_vector[:]
+
+    # Get the column vectors of each feature
+    feature_count = len(feature_matrix[0])
+    column_vectors = [[] for _ in range(feature_count)]
+    for feature_vector in feature_matrix:
+        for i in range(len(feature_vector)):
+            column_vectors[i].append(feature_vector[i])
+    # Add a column of features equal to 1 for the bias term partial derivative computation
+    column_vectors.append([1 for _ in range(feature_count)])
+
+    # Save initial cost for printing and comparing during the algorithm's course
+    old_cost = get_total_cost(feature_matrix, weight_vector, actual_label_vector)
+
+    for i in range(iteration_count):
+        # Get the predictions for every training example
+        hypotheses = [
+            get_hypothesis(feature_vector, updated_weight_vector)
+            for feature_vector in feature_matrix
+        ]
+        # Get the partial derivative of each feature
+        gradients = [
+            get_partial_derivative(
+                hypotheses,
+                actual_label_vector,
+                column_vector,
+            )
+            for column_vector in column_vectors
+        ]
+        # Update the weights
+        updated_weight_vector = [
+            w - learning_rate * g for w, g in zip(updated_weight_vector, gradients)
+        ]
+        # Print cost update periodically
+        if (i + 1) % (iteration_count / 10) == 0:
+            new_cost = get_total_cost(
+                feature_matrix, updated_weight_vector, actual_label_vector
+            )
+            print(
+                f"Iteration: {i + 1:>10}, cost: {old_cost:>8.4f} -> {new_cost:>8.4f}, diff: {abs(new_cost - old_cost):>8.6f}"
+            )
+            old_cost = new_cost
+
+    return updated_weight_vector
-- 
cgit v1.2.3