from math import exp, log, sqrt, floor, ceil def get_mean(l): """Returns the mean of a list of numbers.""" return sum(l) / len(l) def get_min(l): """Returns the lowest element of a list of numbers.""" lowest = l[0] for n in l: if n < lowest: lowest = n return lowest def get_max(l): """Returns the biggest element of a list of numbers.""" highest = l[0] for n in l: if n > highest: highest = n return highest def get_variance(l): """Returns the variance of a list of numbers.""" mean = get_mean(l) deviations = [(n - mean) ** 2 for n in l] variance = get_mean(deviations) return variance def get_std(l): """Returns the standard deviation of a list of numbers.""" return sqrt(get_variance(l)) def get_quartiles(l): """Returns a tuple of the three quartiles of a list of numbers.""" quartile_limits = [len(l) * 0.25, len(l) * 0.5, len(l) * 0.75] sorted_l = sorted(l) quartiles = [] for limit in quartile_limits: if int(limit) == limit: # If limit is a whole number q = sorted_l[int(limit)] else: q = (sorted_l[floor(limit)] + sorted_l[ceil(limit)]) / 2 quartiles.append(q) return tuple(quartiles) def get_sigmoid(x): """Returns the sigmoid of a number.""" return 1 / (1 + exp(-x)) def get_linear_combination(variables, coefficients): """ Returns the linear combination of a set of variables and a set of coefficients. Parameters: variables (list): A list of variables coefficients (list): A list of coefficients Returns: float: The sum of each variable weighted by each coefficient """ result = 0.0 for v, c in zip(variables, coefficients): result += v * c return result def get_hypothesis(feature_vector, weight_vector): """ Returns the probability that the output is true given a feature vector and its associated\ weight vector. Parameters: feature_vector (list): A list of numbers representing the features weight_vector (list): A list of numbers of size `len(feature_vector) + 1` whose last\ element is the bias term Returns: float: The probability that the output is true (between 0 and 1) """ h = get_linear_combination(feature_vector, weight_vector[:-1]) h += weight_vector[-1] # Bias term return get_sigmoid(h) def get_cost(feature_vector, weight_vector, actual_label): """ Returns the logistic regression cost for a training example. Parameters: feature_vector (list): A list of numbers representing the features of one training example. weight_vector (list): A list of numbers of size `len(feature_vector) + 1` whose last\ element is the bias term. Represents the weight of each feature for the hypothesis. actual_label (int): The actual label of the training example (1 if true or 0 if false). Returns: float: The log-loss of the prediction for a training example. """ h = get_hypothesis(feature_vector, weight_vector) return -actual_label * log(h) - (1 - actual_label) * log(1 - h) def get_total_cost(feature_matrix, weight_vector, actual_labels): """ Returns the logistic regression cost for a set of training examples. Parameters: feature_matrix (list): A list of list of numbers representing the features for each\ training example. weight_vector (list): A list of numbers the same size as each list in `feature_matrix`\ whose last element is the bias term. Represents the weight of each feature for the\ hypothesis. actual_labels (list): A list of numbers representing the actual labels (1 if true or 0 if\ false), one for each training example. Returns: float: The log-loss of the predictions across all training examples. """ total_cost = 0.0 for feature_vector, actual_label in zip(feature_matrix, actual_labels): total_cost += get_cost(feature_vector, weight_vector, actual_label) training_example_count = len(feature_matrix) return (1 / training_example_count) * total_cost def get_partial_derivative(hypotheses, actual_labels, column_vector): """ Returns the partial derivative of the cost function for a given parameter. Parameters: hypotheses (list): A list of numbers representing the vector of predictions for all\ training examples. actual_labels (list): A list of numbers representing the actual labels (1 if true or 0 if\ false), one for each training example. column_vector (list): A list of numbers representing the value of the parameter (feature)\ for which the partial derivative is to be computed, one for each training example. Returns: float: The partial derivative of the cost function with respect to the weight associated\ with the given parameter. """ result = 0.0 for h, l, f in zip(hypotheses, actual_labels, column_vector): result += (h - l) * f training_example_count = len(hypotheses) return (1 / training_example_count) * result def gradient_descent( feature_matrix, weight_vector, actual_label_vector, learning_rate, iteration_count ): """ Apply the gradient descent algorithm to compute the weight vector that minimizes the cost\ function. Additionally this function prints the cost upgrade during the descent's course. Parameters: feature_matrix (list): A list of list of numbers representing the features for each\ training example. weight_vector (list): A list of numbers the same size as each list in `feature_matrix`\ whose last element is the bias term. Represents the weight of each feature for the\ hypothesis. actual_label_vector (list): A list of numbers representing the actual label vector, one for\ each training example. learning_rate (int): The learning rate of the algorithm. iteration_count (int): The count of gradient descent updates the algorithm will perform. Returns: (list): The updated weight vector. """ # Copy the weight vector in order not to modify the original updated_weight_vector = weight_vector[:] # Get the column vectors of each feature feature_count = len(feature_matrix[0]) column_vectors = [[] for _ in range(feature_count)] for feature_vector in feature_matrix: for i in range(len(feature_vector)): column_vectors[i].append(feature_vector[i]) # Add a column of features equal to 1 for the bias term partial derivative computation column_vectors.append([1 for _ in range(feature_count)]) # Save initial cost for printing and comparing during the algorithm's course old_cost = get_total_cost(feature_matrix, weight_vector, actual_label_vector) for i in range(iteration_count): # Get the predictions for every training example hypotheses = [ get_hypothesis(feature_vector, updated_weight_vector) for feature_vector in feature_matrix ] # Get the partial derivative of each feature gradients = [ get_partial_derivative( hypotheses, actual_label_vector, column_vector, ) for column_vector in column_vectors ] # Update the weights updated_weight_vector = [ w - learning_rate * g for w, g in zip(updated_weight_vector, gradients) ] # Print cost update periodically if (i + 1) % (iteration_count / 10) == 0: new_cost = get_total_cost( feature_matrix, updated_weight_vector, actual_label_vector ) print( f"Iteration: {i + 1:>10}, cost: {old_cost:>8.4f} -> {new_cost:>8.4f}, diff: {abs(new_cost - old_cost):>8.6f}" ) old_cost = new_cost return updated_weight_vector