module/math.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234

from math import exp, log, sqrt, floor, ceil


def get_mean(l):
    """Returns the mean of a list of numbers."""
    return sum(l) / len(l)


def get_min(l):
    """Returns the lowest element of a list of numbers."""
    lowest = l[0]
    for n in l:
        if n < lowest:
            lowest = n
    return lowest


def get_max(l):
    """Returns the biggest element of a list of numbers."""
    highest = l[0]
    for n in l:
        if n > highest:
            highest = n
    return highest


def get_variance(l):
    """Returns the variance of a list of numbers."""
    mean = get_mean(l)
    deviations = [(n - mean) ** 2 for n in l]
    variance = get_mean(deviations)
    return variance


def get_std(l):
    """Returns the standard deviation of a list of numbers."""
    return sqrt(get_variance(l))


def get_quartiles(l):
    """Returns a tuple of the three quartiles of a list of numbers."""
    quartile_limits = [len(l) * 0.25, len(l) * 0.5, len(l) * 0.75]
    sorted_l = sorted(l)
    quartiles = []
    for limit in quartile_limits:
        if int(limit) == limit:  # If limit is a whole number
            q = sorted_l[int(limit)]
        else:
            q = (sorted_l[floor(limit)] + sorted_l[ceil(limit)]) / 2
        quartiles.append(q)
    return tuple(quartiles)


def get_sigmoid(x):
    """Returns the sigmoid of a number."""
    return 1 / (1 + exp(-x))


def get_linear_combination(variables, coefficients):
    """
    Returns the linear combination of a set of variables and a set of coefficients.

    Parameters:
        variables (list): A list of variables
        coefficients (list): A list of coefficients
    Returns:
        float: The sum of each variable weighted by each coefficient
    """
    result = 0.0
    for v, c in zip(variables, coefficients):
        result += v * c
    return result


def get_hypothesis(feature_vector, weight_vector):
    """
    Returns the probability that the output is true given a feature vector and its associated\
    weight vector.

    Parameters:
        feature_vector (list): A list of numbers representing the features

        weight_vector (list): A list of numbers of size `len(feature_vector) + 1` whose last\
        element is the bias term

    Returns:
        float: The probability that the output is true (between 0 and 1)
    """
    h = get_linear_combination(feature_vector, weight_vector[:-1])
    h += weight_vector[-1]  # Bias term
    return get_sigmoid(h)


def get_cost(feature_vector, weight_vector, actual_label):
    """
    Returns the logistic regression cost for a training example.

    Parameters:
        feature_vector (list): A list of numbers representing the features of one training example.

        weight_vector (list): A list of numbers of size `len(feature_vector) + 1` whose last\
        element is the bias term. Represents the weight of each feature for the hypothesis.

        actual_label (int): The actual label of the training example (1 if true or 0 if false).

    Returns:
        float: The log-loss of the prediction for a training example.
    """
    h = get_hypothesis(feature_vector, weight_vector)
    return -actual_label * log(h) - (1 - actual_label) * log(1 - h)


def get_total_cost(feature_matrix, weight_vector, actual_labels):
    """
    Returns the logistic regression cost for a set of training examples.

    Parameters:
        feature_matrix (list): A list of list of numbers representing the features for each\
        training example.

        weight_vector (list): A list of numbers the same size as each list in `feature_matrix`\
        whose last element is the bias term. Represents the weight of each feature for the\
        hypothesis.

        actual_labels (list): A list of numbers representing the actual labels (1 if true or 0 if\
        false), one for each training example.

    Returns:
        float: The log-loss of the predictions across all training examples.
    """
    total_cost = 0.0
    for feature_vector, actual_label in zip(feature_matrix, actual_labels):
        total_cost += get_cost(feature_vector, weight_vector, actual_label)
    training_example_count = len(feature_matrix)
    return (1 / training_example_count) * total_cost


def get_partial_derivative(hypotheses, actual_labels, column_vector):
    """
    Returns the partial derivative of the cost function for a given parameter.

    Parameters:
        hypotheses (list): A list of numbers representing the vector of predictions for all\
        training examples.

        actual_labels (list): A list of numbers representing the actual labels (1 if true or 0 if\
        false), one for each training example.

        column_vector (list): A list of numbers representing the value of the parameter (feature)\
        for which the partial derivative is to be computed, one for each training example.

    Returns:
        float: The partial derivative of the cost function with respect to the weight associated\
        with the given parameter.
    """
    result = 0.0
    for h, l, f in zip(hypotheses, actual_labels, column_vector):
        result += (h - l) * f
    training_example_count = len(hypotheses)
    return (1 / training_example_count) * result


def gradient_descent(
    feature_matrix, weight_vector, actual_label_vector, learning_rate, iteration_count
):
    """
    Apply the gradient descent algorithm to compute the weight vector that minimizes the cost\
    function.

    Additionally this function prints the cost upgrade during the descent's course.

    Parameters:
        feature_matrix (list): A list of list of numbers representing the features for each\
        training example.

        weight_vector (list): A list of numbers the same size as each list in `feature_matrix`\
        whose last element is the bias term. Represents the weight of each feature for the\
        hypothesis.

        actual_label_vector (list): A list of numbers representing the actual label vector, one for\
        each training example.

        learning_rate (int): The learning rate of the algorithm.

        iteration_count (int): The count of gradient descent updates the algorithm will perform.

    Returns:
        (list): The updated weight vector.
    """
    # Copy the weight vector in order not to modify the original
    updated_weight_vector = weight_vector[:]

    # Get the column vectors of each feature
    feature_count = len(feature_matrix[0])
    column_vectors = [[] for _ in range(feature_count)]
    for feature_vector in feature_matrix:
        for i in range(len(feature_vector)):
            column_vectors[i].append(feature_vector[i])
    # Add a column of features equal to 1 for the bias term partial derivative computation
    column_vectors.append([1 for _ in range(feature_count)])

    # Save initial cost for printing and comparing during the algorithm's course
    old_cost = get_total_cost(feature_matrix, weight_vector, actual_label_vector)

    for i in range(iteration_count):
        # Get the predictions for every training example
        hypotheses = [
            get_hypothesis(feature_vector, updated_weight_vector)
            for feature_vector in feature_matrix
        ]
        # Get the partial derivative of each feature
        gradients = [
            get_partial_derivative(
                hypotheses,
                actual_label_vector,
                column_vector,
            )
            for column_vector in column_vectors
        ]
        # Update the weights
        updated_weight_vector = [
            w - learning_rate * g for w, g in zip(updated_weight_vector, gradients)
        ]
        # Print cost update periodically
        if (i + 1) % (iteration_count / 10) == 0:
            new_cost = get_total_cost(
                feature_matrix, updated_weight_vector, actual_label_vector
            )
            print(
                f"Iteration: {i + 1:>10}, cost: {old_cost:>8.4f} -> {new_cost:>8.4f}, diff: {abs(new_cost - old_cost):>8.6f}"
            )
            old_cost = new_cost

    return updated_weight_vector