Initial commit: linear regression for car price prediction

Training, prediction, and visualization programs using gradient descent with min-max normalization.
author: Thomas Vanbesien <tvanbesi@proton.me> 2026-03-23 21:17:11 +0100
committer: Thomas Vanbesien <tvanbesi@proton.me> 2026-03-23 21:17:11 +0100
commit: fd5fe70ce5271f09303b51dae34b42acc47f5730 (patch)
tree: 551b9b7c38b5fb8307cea3653e269ae79fb4b639 /train.py
download: ft_linear_regression-fd5fe70ce5271f09303b51dae34b42acc47f5730.tar.gz
ft_linear_regression-fd5fe70ce5271f09303b51dae34b42acc47f5730.zip
1 files changed, 75 insertions, 0 deletions
diff --git a/train.py b/train.py
new file mode 100644
index 0000000..a9c865b
--- /dev/null
+++ b/train.py
@@ -0,0 +1,75 @@
+import csv
+import sys
+
+DATASET = "data.csv"
+THETAS_FILE = "thetas.csv"
+
+
+def normalize(data):
+    min_val = min(data)
+    max_val = max(data)
+    return [(x - min_val) / (max_val - min_val) for x in data], min_val, max_val
+
+
+def load_data():
+    km = []
+    price = []
+    with open(DATASET) as f:
+        reader = csv.reader(f)
+        next(reader)
+        for row in reader:
+            km.append(float(row[0]))
+            price.append(float(row[1]))
+    return km, price
+
+
+def estimate_price(mileage, theta0, theta1):
+    return theta0 + theta1 * mileage
+
+
+# DV: dependant variable, IV: independant variable
+def train_once(learning_rate, DV, IV, theta0, theta1):
+    tmp0 = (
+        learning_rate
+        * (1.0 / len(DV))
+        * sum(estimate_price(x, theta0, theta1) - y for x, y in zip(DV, IV))
+    )
+    tmp1 = (
+        learning_rate
+        * (1.0 / len(DV))
+        * sum((estimate_price(x, theta0, theta1) - y) * x for x, y in zip(DV, IV))
+    )
+    return tmp0, tmp1
+
+
+def denormalize_thetas(t0, t1, km_min, km_max, price_min, price_max):
+    price_range = price_max - price_min
+    km_range = km_max - km_min
+    real_t1 = t1 * price_range / km_range
+    real_t0 = t0 * price_range + price_min - real_t1 * km_min
+    return real_t0, real_t1
+
+
+def train(learning_rate, iterations):
+    kms, prices = load_data()
+    kms_norm, km_min, km_max = normalize(kms)
+    prices_norm, price_min, price_max = normalize(prices)
+    t0 = 0.0
+    t1 = 0.0
+    for _ in range(iterations):
+        grad0, grad1 = train_once(learning_rate, prices_norm, kms_norm, t0, t1)
+        t0 -= grad0
+        t1 -= grad1
+    return denormalize_thetas(t0, t1, km_min, km_max, price_min, price_max)
+
+
+def save_thetas(theta0, theta1):
+    with open(THETAS_FILE, "w") as f:
+        f.write(f"{theta0}\n{theta1}\n")
+
+
+learning_rate = float(sys.argv[1])
+i = int(sys.argv[2])
+t0, t1 = train(learning_rate, i)
+save_thetas(t0, t1)
+print(f"θ0={t0}, θ1={t1} saved to {THETAS_FILE}")
author	Thomas Vanbesien <tvanbesi@proton.me>	2026-03-23 21:17:11 +0100
committer	Thomas Vanbesien <tvanbesi@proton.me>	2026-03-23 21:17:11 +0100
commit	fd5fe70ce5271f09303b51dae34b42acc47f5730 (patch)
tree	551b9b7c38b5fb8307cea3653e269ae79fb4b639 /train.py
download	ft_linear_regression-fd5fe70ce5271f09303b51dae34b42acc47f5730.tar.gz ft_linear_regression-fd5fe70ce5271f09303b51dae34b42acc47f5730.zip