import csv import sys DATASET = "data.csv" THETAS_FILE = "thetas.csv" def normalize(data): min_val = min(data) max_val = max(data) return [(x - min_val) / (max_val - min_val) for x in data], min_val, max_val def load_data(): km = [] price = [] with open(DATASET) as f: reader = csv.reader(f) next(reader) for row in reader: km.append(float(row[0])) price.append(float(row[1])) return km, price def estimate_price(mileage, theta0, theta1): return theta0 + theta1 * mileage # DV: dependant variable, IV: independant variable def train_once(learning_rate, DV, IV, theta0, theta1): tmp0 = ( learning_rate * (1.0 / len(DV)) * sum(estimate_price(x, theta0, theta1) - y for x, y in zip(DV, IV)) ) tmp1 = ( learning_rate * (1.0 / len(DV)) * sum((estimate_price(x, theta0, theta1) - y) * x for x, y in zip(DV, IV)) ) return tmp0, tmp1 def denormalize_thetas(t0, t1, km_min, km_max, price_min, price_max): price_range = price_max - price_min km_range = km_max - km_min real_t1 = t1 * price_range / km_range real_t0 = t0 * price_range + price_min - real_t1 * km_min return real_t0, real_t1 def train(learning_rate, iterations): kms, prices = load_data() kms_norm, km_min, km_max = normalize(kms) prices_norm, price_min, price_max = normalize(prices) t0 = 0.0 t1 = 0.0 for _ in range(iterations): grad0, grad1 = train_once(learning_rate, prices_norm, kms_norm, t0, t1) t0 -= grad0 t1 -= grad1 return denormalize_thetas(t0, t1, km_min, km_max, price_min, price_max) def save_thetas(theta0, theta1): with open(THETAS_FILE, "w") as f: f.write(f"{theta0}\n{theta1}\n") learning_rate = float(sys.argv[1]) i = int(sys.argv[2]) t0, t1 = train(learning_rate, i) save_thetas(t0, t1) print(f"θ0={t0}, θ1={t1} saved to {THETAS_FILE}")