Work in progress

This commit is contained in:
2025-09-29 08:59:47 +02:00
parent 8a2a9d1064
commit b93df4af0f
26 changed files with 22986 additions and 0 deletions

View File

@ -0,0 +1,72 @@
import pandas as pd
import torch
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import numpy as np
df = pd.read_csv("./RegressionModels/AutoMPG/auto-mpg.csv")
df = df.dropna(subset=df.columns) #drop empty lines
df = df[df['horsepower'] != '?'] #drop lines where horsepower is unknown
df['horsepower'] = df['horsepower'].astype(int) # convert object to int
# print(df.dtypes)
# print(df.iloc[:,1:8])
X_train, X_test, Y_train, Y_test = train_test_split(df.iloc[:, 1:8], df["mpg"], test_size=0.2, random_state=42) #split train and test data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_train.values)
X = torch.tensor(X_scaled, dtype=torch.float32)
scaler_Y = StandardScaler()
Y_scaled = scaler_Y.fit_transform(Y_train.values.reshape(-1,1))
Y = torch.tensor(Y_scaled, dtype=torch.float32)
# X = torch.tensor(df.iloc[:, 1:8].values, dtype=torch.float32)
# Y = torch.tensor(df["mpg"].values, dtype=torch.float32)
model = torch.nn.Sequential( #increas complexity with more neural network layers!
torch.nn.Linear(7, 10),
torch.nn.ReLU(),
torch.nn.Linear(10, 1)
)
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
for epoch in range(3000):
pred_y = model(X)
loss = loss_fn(pred_y, Y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch % 999 == 0:
print(f"Epoch: {epoch}, Loss: {loss.item():.2f}")
# Scale test features using the same scaler as training
X_test_scaled = scaler.transform(X_test.values)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
# Get predictions (still in scaled space)
with torch.no_grad():
pred_Y_scaled = model(X_test_tensor).numpy()
# Inverse-transform predictions and true Y values to original scale
Y_test_array = Y_test.values.reshape(-1, 1) # shape (n, 1)
pred_Y = scaler_Y.inverse_transform(pred_Y_scaled) # shape (n, 1)
# Flatten to 1D for metrics
Y_test_unscaled_flat = Y_test_array.ravel()
pred_Y_flat = pred_Y.ravel()
mae = mean_absolute_error(Y_test_unscaled_flat, pred_Y_flat)
mape = np.mean(np.abs((Y_test_unscaled_flat - pred_Y_flat) / Y_test_unscaled_flat)) * 100
print(f"Test MAE: {mae:.2f}")
print(f"Test MAPE: {mape:.2f}%")