aboutsummaryrefslogtreecommitdiffstats
path: root/train.py
diff options
context:
space:
mode:
authorThomas Vanbesien <tvanbesi@proton.me>2026-03-23 21:17:11 +0100
committerThomas Vanbesien <tvanbesi@proton.me>2026-03-23 21:17:11 +0100
commitfd5fe70ce5271f09303b51dae34b42acc47f5730 (patch)
tree551b9b7c38b5fb8307cea3653e269ae79fb4b639 /train.py
downloadft_linear_regression-fd5fe70ce5271f09303b51dae34b42acc47f5730.tar.gz
ft_linear_regression-fd5fe70ce5271f09303b51dae34b42acc47f5730.zip
Initial commit: linear regression for car price prediction
Training, prediction, and visualization programs using gradient descent with min-max normalization.
Diffstat (limited to 'train.py')
-rw-r--r--train.py75
1 files changed, 75 insertions, 0 deletions
diff --git a/train.py b/train.py
new file mode 100644
index 0000000..a9c865b
--- /dev/null
+++ b/train.py
@@ -0,0 +1,75 @@
+import csv
+import sys
+
+DATASET = "data.csv"
+THETAS_FILE = "thetas.csv"
+
+
+def normalize(data):
+ min_val = min(data)
+ max_val = max(data)
+ return [(x - min_val) / (max_val - min_val) for x in data], min_val, max_val
+
+
+def load_data():
+ km = []
+ price = []
+ with open(DATASET) as f:
+ reader = csv.reader(f)
+ next(reader)
+ for row in reader:
+ km.append(float(row[0]))
+ price.append(float(row[1]))
+ return km, price
+
+
+def estimate_price(mileage, theta0, theta1):
+ return theta0 + theta1 * mileage
+
+
+# DV: dependant variable, IV: independant variable
+def train_once(learning_rate, DV, IV, theta0, theta1):
+ tmp0 = (
+ learning_rate
+ * (1.0 / len(DV))
+ * sum(estimate_price(x, theta0, theta1) - y for x, y in zip(DV, IV))
+ )
+ tmp1 = (
+ learning_rate
+ * (1.0 / len(DV))
+ * sum((estimate_price(x, theta0, theta1) - y) * x for x, y in zip(DV, IV))
+ )
+ return tmp0, tmp1
+
+
+def denormalize_thetas(t0, t1, km_min, km_max, price_min, price_max):
+ price_range = price_max - price_min
+ km_range = km_max - km_min
+ real_t1 = t1 * price_range / km_range
+ real_t0 = t0 * price_range + price_min - real_t1 * km_min
+ return real_t0, real_t1
+
+
+def train(learning_rate, iterations):
+ kms, prices = load_data()
+ kms_norm, km_min, km_max = normalize(kms)
+ prices_norm, price_min, price_max = normalize(prices)
+ t0 = 0.0
+ t1 = 0.0
+ for _ in range(iterations):
+ grad0, grad1 = train_once(learning_rate, prices_norm, kms_norm, t0, t1)
+ t0 -= grad0
+ t1 -= grad1
+ return denormalize_thetas(t0, t1, km_min, km_max, price_min, price_max)
+
+
+def save_thetas(theta0, theta1):
+ with open(THETAS_FILE, "w") as f:
+ f.write(f"{theta0}\n{theta1}\n")
+
+
+learning_rate = float(sys.argv[1])
+i = int(sys.argv[2])
+t0, t1 = train(learning_rate, i)
+save_thetas(t0, t1)
+print(f"θ0={t0}, θ1={t1} saved to {THETAS_FILE}")