import pandas as pd import torch from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split from sklearn.metrics import mean_absolute_error import numpy as np df = pd.read_csv("./RegressionModels/AutoMPG/auto-mpg.csv") df = df.dropna(subset=df.columns) #drop empty lines df = df[df['horsepower'] != '?'] #drop lines where horsepower is unknown df['horsepower'] = df['horsepower'].astype(int) # convert object to int # print(df.dtypes) # print(df.iloc[:,1:8]) X_train, X_test, Y_train, Y_test = train_test_split(df.iloc[:, 1:8], df["mpg"], test_size=0.2, random_state=42) #split train and test data scaler = StandardScaler() X_scaled = scaler.fit_transform(X_train.values) X = torch.tensor(X_scaled, dtype=torch.float32) scaler_Y = StandardScaler() Y_scaled = scaler_Y.fit_transform(Y_train.values.reshape(-1,1)) Y = torch.tensor(Y_scaled, dtype=torch.float32) # X = torch.tensor(df.iloc[:, 1:8].values, dtype=torch.float32) # Y = torch.tensor(df["mpg"].values, dtype=torch.float32) model = torch.nn.Sequential( #increas complexity with more neural network layers! torch.nn.Linear(7, 10), torch.nn.ReLU(), torch.nn.Linear(10, 1) ) loss_fn = torch.nn.MSELoss() optimizer = torch.optim.SGD(model.parameters(), lr=1e-3) for epoch in range(3000): pred_y = model(X) loss = loss_fn(pred_y, Y) optimizer.zero_grad() loss.backward() optimizer.step() if epoch % 999 == 0: print(f"Epoch: {epoch}, Loss: {loss.item():.2f}") # Scale test features using the same scaler as training X_test_scaled = scaler.transform(X_test.values) X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32) # Get predictions (still in scaled space) with torch.no_grad(): pred_Y_scaled = model(X_test_tensor).numpy() # Inverse-transform predictions and true Y values to original scale Y_test_array = Y_test.values.reshape(-1, 1) # shape (n, 1) pred_Y = scaler_Y.inverse_transform(pred_Y_scaled) # shape (n, 1) # Flatten to 1D for metrics Y_test_unscaled_flat = Y_test_array.ravel() pred_Y_flat = pred_Y.ravel() mae = mean_absolute_error(Y_test_unscaled_flat, pred_Y_flat) mape = np.mean(np.abs((Y_test_unscaled_flat - pred_Y_flat) / Y_test_unscaled_flat)) * 100 print(f"Test MAE: {mae:.2f}") print(f"Test MAPE: {mape:.2f}%")