45 lines
1.8 KiB
Python
45 lines
1.8 KiB
Python
import torch
|
|
import math
|
|
|
|
# Set up PyTorch data type and device (CPU or GPU)
|
|
dtype = torch.float
|
|
device = torch.device("cpu")
|
|
# device = torch.device("cuda:0") # Uncomment to use GPU if available
|
|
|
|
# 1. Generate input data (x) in [-π, π] and corresponding target output (y = sin(x))
|
|
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
|
|
y = torch.sin(x) # This is what we want to approximate!
|
|
|
|
# 2. Randomly initialize the weights (polynomial coefficients)
|
|
a = torch.randn((), device=device, dtype=dtype)
|
|
b = torch.randn((), device=device, dtype=dtype)
|
|
c = torch.randn((), device=device, dtype=dtype)
|
|
d = torch.randn((), device=device, dtype=dtype)
|
|
|
|
learning_rate = 1e-6
|
|
for t in range(2000):
|
|
# 3. Forward pass: compute predicted y using the current coefficients
|
|
# y_pred = a + b*x + c*x^2 + d*x^3 (a cubic polynomial)
|
|
y_pred = a + b * x + c * x ** 2 + d * x ** 3
|
|
|
|
# 4. Compute loss: sum of squared differences between prediction and true values
|
|
# (This is called the "Mean Squared Error" loss, except without the mean)
|
|
loss = (y_pred - y).pow(2).sum().item()
|
|
if t % 100 == 99:
|
|
print(t, loss)
|
|
|
|
# 5. Manually compute gradients for each weight
|
|
grad_y_pred = 2.0 * (y_pred - y) # Derivative of loss w.r.t. y_pred
|
|
grad_a = grad_y_pred.sum() # Derivative for a
|
|
grad_b = (grad_y_pred * x).sum() # Derivative for b
|
|
grad_c = (grad_y_pred * x ** 2).sum() # Derivative for c
|
|
grad_d = (grad_y_pred * x ** 3).sum() # Derivative for d
|
|
|
|
# 6. Update each weight by taking a small step in the opposite direction of the gradient
|
|
a -= learning_rate * grad_a
|
|
b -= learning_rate * grad_b
|
|
c -= learning_rate * grad_c
|
|
d -= learning_rate * grad_d
|
|
|
|
print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')
|