73 lines
2.3 KiB
Python
73 lines
2.3 KiB
Python
import pandas as pd
|
|
import torch
|
|
from sklearn.preprocessing import StandardScaler
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.metrics import mean_absolute_error
|
|
import numpy as np
|
|
|
|
df = pd.read_csv("./RegressionModels/AutoMPG/auto-mpg.csv")
|
|
df = df.dropna(subset=df.columns) #drop empty lines
|
|
df = df[df['horsepower'] != '?'] #drop lines where horsepower is unknown
|
|
df['horsepower'] = df['horsepower'].astype(int) # convert object to int
|
|
|
|
# print(df.dtypes)
|
|
|
|
# print(df.iloc[:,1:8])
|
|
|
|
|
|
X_train, X_test, Y_train, Y_test = train_test_split(df.iloc[:, 1:8], df["mpg"], test_size=0.2, random_state=42) #split train and test data
|
|
|
|
|
|
scaler = StandardScaler()
|
|
X_scaled = scaler.fit_transform(X_train.values)
|
|
X = torch.tensor(X_scaled, dtype=torch.float32)
|
|
|
|
scaler_Y = StandardScaler()
|
|
Y_scaled = scaler_Y.fit_transform(Y_train.values.reshape(-1,1))
|
|
Y = torch.tensor(Y_scaled, dtype=torch.float32)
|
|
|
|
# X = torch.tensor(df.iloc[:, 1:8].values, dtype=torch.float32)
|
|
# Y = torch.tensor(df["mpg"].values, dtype=torch.float32)
|
|
|
|
model = torch.nn.Sequential( #increas complexity with more neural network layers!
|
|
torch.nn.Linear(7, 10),
|
|
torch.nn.ReLU(),
|
|
torch.nn.Linear(10, 1)
|
|
)
|
|
|
|
|
|
loss_fn = torch.nn.MSELoss()
|
|
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
|
|
|
|
for epoch in range(3000):
|
|
pred_y = model(X)
|
|
loss = loss_fn(pred_y, Y)
|
|
optimizer.zero_grad()
|
|
loss.backward()
|
|
optimizer.step()
|
|
if epoch % 999 == 0:
|
|
print(f"Epoch: {epoch}, Loss: {loss.item():.2f}")
|
|
|
|
|
|
# Scale test features using the same scaler as training
|
|
X_test_scaled = scaler.transform(X_test.values)
|
|
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
|
|
|
|
# Get predictions (still in scaled space)
|
|
with torch.no_grad():
|
|
pred_Y_scaled = model(X_test_tensor).numpy()
|
|
|
|
|
|
# Inverse-transform predictions and true Y values to original scale
|
|
Y_test_array = Y_test.values.reshape(-1, 1) # shape (n, 1)
|
|
pred_Y = scaler_Y.inverse_transform(pred_Y_scaled) # shape (n, 1)
|
|
|
|
# Flatten to 1D for metrics
|
|
Y_test_unscaled_flat = Y_test_array.ravel()
|
|
pred_Y_flat = pred_Y.ravel()
|
|
|
|
mae = mean_absolute_error(Y_test_unscaled_flat, pred_Y_flat)
|
|
mape = np.mean(np.abs((Y_test_unscaled_flat - pred_Y_flat) / Y_test_unscaled_flat)) * 100
|
|
print(f"Test MAE: {mae:.2f}")
|
|
print(f"Test MAPE: {mape:.2f}%")
|