From fd5fe70ce5271f09303b51dae34b42acc47f5730 Mon Sep 17 00:00:00 2001 From: Thomas Vanbesien Date: Mon, 23 Mar 2026 21:17:11 +0100 Subject: Initial commit: linear regression for car price prediction Training, prediction, and visualization programs using gradient descent with min-max normalization. --- train.py | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 train.py (limited to 'train.py') diff --git a/train.py b/train.py new file mode 100644 index 0000000..a9c865b --- /dev/null +++ b/train.py @@ -0,0 +1,75 @@ +import csv +import sys + +DATASET = "data.csv" +THETAS_FILE = "thetas.csv" + + +def normalize(data): + min_val = min(data) + max_val = max(data) + return [(x - min_val) / (max_val - min_val) for x in data], min_val, max_val + + +def load_data(): + km = [] + price = [] + with open(DATASET) as f: + reader = csv.reader(f) + next(reader) + for row in reader: + km.append(float(row[0])) + price.append(float(row[1])) + return km, price + + +def estimate_price(mileage, theta0, theta1): + return theta0 + theta1 * mileage + + +# DV: dependant variable, IV: independant variable +def train_once(learning_rate, DV, IV, theta0, theta1): + tmp0 = ( + learning_rate + * (1.0 / len(DV)) + * sum(estimate_price(x, theta0, theta1) - y for x, y in zip(DV, IV)) + ) + tmp1 = ( + learning_rate + * (1.0 / len(DV)) + * sum((estimate_price(x, theta0, theta1) - y) * x for x, y in zip(DV, IV)) + ) + return tmp0, tmp1 + + +def denormalize_thetas(t0, t1, km_min, km_max, price_min, price_max): + price_range = price_max - price_min + km_range = km_max - km_min + real_t1 = t1 * price_range / km_range + real_t0 = t0 * price_range + price_min - real_t1 * km_min + return real_t0, real_t1 + + +def train(learning_rate, iterations): + kms, prices = load_data() + kms_norm, km_min, km_max = normalize(kms) + prices_norm, price_min, price_max = normalize(prices) + t0 = 0.0 + t1 = 0.0 + for _ in range(iterations): + grad0, grad1 = train_once(learning_rate, prices_norm, kms_norm, t0, t1) + t0 -= grad0 + t1 -= grad1 + return denormalize_thetas(t0, t1, km_min, km_max, price_min, price_max) + + +def save_thetas(theta0, theta1): + with open(THETAS_FILE, "w") as f: + f.write(f"{theta0}\n{theta1}\n") + + +learning_rate = float(sys.argv[1]) +i = int(sys.argv[2]) +t0, t1 = train(learning_rate, i) +save_thetas(t0, t1) +print(f"θ0={t0}, θ1={t1} saved to {THETAS_FILE}") -- cgit v1.2.3