From fd5fe70ce5271f09303b51dae34b42acc47f5730 Mon Sep 17 00:00:00 2001
From: Thomas Vanbesien <tvanbesi@proton.me>
Date: Mon, 23 Mar 2026 21:17:11 +0100
Subject: Initial commit: linear regression for car price prediction

Training, prediction, and visualization programs using gradient descent
with min-max normalization.
---
 train.py | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100644 train.py

(limited to 'train.py')

diff --git a/train.py b/train.py
new file mode 100644
index 0000000..a9c865b
--- /dev/null
+++ b/train.py
@@ -0,0 +1,75 @@
+import csv
+import sys
+
+DATASET = "data.csv"
+THETAS_FILE = "thetas.csv"
+
+
+def normalize(data):
+    min_val = min(data)
+    max_val = max(data)
+    return [(x - min_val) / (max_val - min_val) for x in data], min_val, max_val
+
+
+def load_data():
+    km = []
+    price = []
+    with open(DATASET) as f:
+        reader = csv.reader(f)
+        next(reader)
+        for row in reader:
+            km.append(float(row[0]))
+            price.append(float(row[1]))
+    return km, price
+
+
+def estimate_price(mileage, theta0, theta1):
+    return theta0 + theta1 * mileage
+
+
+# DV: dependant variable, IV: independant variable
+def train_once(learning_rate, DV, IV, theta0, theta1):
+    tmp0 = (
+        learning_rate
+        * (1.0 / len(DV))
+        * sum(estimate_price(x, theta0, theta1) - y for x, y in zip(DV, IV))
+    )
+    tmp1 = (
+        learning_rate
+        * (1.0 / len(DV))
+        * sum((estimate_price(x, theta0, theta1) - y) * x for x, y in zip(DV, IV))
+    )
+    return tmp0, tmp1
+
+
+def denormalize_thetas(t0, t1, km_min, km_max, price_min, price_max):
+    price_range = price_max - price_min
+    km_range = km_max - km_min
+    real_t1 = t1 * price_range / km_range
+    real_t0 = t0 * price_range + price_min - real_t1 * km_min
+    return real_t0, real_t1
+
+
+def train(learning_rate, iterations):
+    kms, prices = load_data()
+    kms_norm, km_min, km_max = normalize(kms)
+    prices_norm, price_min, price_max = normalize(prices)
+    t0 = 0.0
+    t1 = 0.0
+    for _ in range(iterations):
+        grad0, grad1 = train_once(learning_rate, prices_norm, kms_norm, t0, t1)
+        t0 -= grad0
+        t1 -= grad1
+    return denormalize_thetas(t0, t1, km_min, km_max, price_min, price_max)
+
+
+def save_thetas(theta0, theta1):
+    with open(THETAS_FILE, "w") as f:
+        f.write(f"{theta0}\n{theta1}\n")
+
+
+learning_rate = float(sys.argv[1])
+i = int(sys.argv[2])
+t0, t1 = train(learning_rate, i)
+save_thetas(t0, t1)
+print(f"θ0={t0}, θ1={t1} saved to {THETAS_FILE}")
-- 
cgit v1.2.3