import torch import math # Set up PyTorch data type and device (CPU or GPU) dtype = torch.float device = torch.device("cpu") # device = torch.device("cuda:0") # Uncomment to use GPU if available # 1. Generate input data (x) in [-π, π] and corresponding target output (y = sin(x)) x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype) y = torch.sin(x) # This is what we want to approximate! # 2. Randomly initialize the weights (polynomial coefficients) a = torch.randn((), device=device, dtype=dtype) b = torch.randn((), device=device, dtype=dtype) c = torch.randn((), device=device, dtype=dtype) d = torch.randn((), device=device, dtype=dtype) learning_rate = 1e-6 for t in range(2000): # 3. Forward pass: compute predicted y using the current coefficients # y_pred = a + b*x + c*x^2 + d*x^3 (a cubic polynomial) y_pred = a + b * x + c * x ** 2 + d * x ** 3 # 4. Compute loss: sum of squared differences between prediction and true values # (This is called the "Mean Squared Error" loss, except without the mean) loss = (y_pred - y).pow(2).sum().item() if t % 100 == 99: print(t, loss) # 5. Manually compute gradients for each weight grad_y_pred = 2.0 * (y_pred - y) # Derivative of loss w.r.t. y_pred grad_a = grad_y_pred.sum() # Derivative for a grad_b = (grad_y_pred * x).sum() # Derivative for b grad_c = (grad_y_pred * x ** 2).sum() # Derivative for c grad_d = (grad_y_pred * x ** 3).sum() # Derivative for d # 6. Update each weight by taking a small step in the opposite direction of the gradient a -= learning_rate * grad_a b -= learning_rate * grad_b c -= learning_rate * grad_c d -= learning_rate * grad_d print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')