Work in progress

This commit is contained in:
2025-09-29 08:59:47 +02:00
parent 8a2a9d1064
commit b93df4af0f
26 changed files with 22986 additions and 0 deletions

View File

@ -0,0 +1,21 @@
import numpy as np
import matplotlib.pyplot as plt
# Valódi mért adatok
x_real = np.array([1,2,3,4,5,6,7,8,9,10])
y_real = np.array([6,8,9,11,13,14,15,17,18,20])
# Lineáris regresszió illesztése
coeffs = np.polyfit(x_real, y_real, 1) # 1. fokú polinom = egyenes || Visszaad egy tömböt, ami az egyenes együtthatóit tartalmazza: [meredekség, tengelymetszet].
print(coeffs)
lin_y = coeffs[0]*x_real + coeffs[1] # Kiszámolja az egyenes y értékeit minden x_real pontra. y = mx + n
# Ábrázolás
plt.scatter(x_real, y_real, color='blue', label='Valódi mért pontok')
plt.plot(x_real, lin_y, color='black', linestyle='--', label='Lineáris regresszió')
plt.xlabel('Reklámba fektetett összeg (millió Ft)')
plt.ylabel('Eladott jegyek (ezer db)')
plt.title('Valódi adatok és lineáris regresszió')
plt.legend()
plt.grid(True)
plt.show()

View File

@ -0,0 +1,29 @@
import numpy as np
import matplotlib.pyplot as plt
# Együtthatók
a, b, c, d = 1, -2, 3, -1
# X értékek
x = np.linspace(-2, 2, 400)
# Harmadfokú polinom értékek
y = a + b*x + c*x**2 + d*x**3
# Válassz néhány pontot a polinomról
x_points = np.linspace(-2, 2, 10)
y_points = a + b*x_points + c*x_points**2 + d*x_points**3
# Lineáris regresszió illesztése a pontokra
coeffs = np.polyfit(x_points, y_points, 1) # 1. fokú polinom = egyenes
lin_y = coeffs[0]*x + coeffs[1]
# Ábrázolás
plt.plot(x, y, label='Harmadfokú polinom')
plt.scatter(x_points, y_points, color='blue', marker='x', s=80, label='Polinom pontjai')
plt.plot(x, lin_y, color='black', linestyle='--', label='Lineáris regresszió')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Polinom és lineáris regresszió')
plt.legend()
plt.grid(True)
plt.show()

View File

@ -0,0 +1,52 @@
import numpy as np
import matplotlib.pyplot as plt
from numpy import roots
# Együtthatók
a, b, c, d = 1, -2, 3, -1
# X értékek
x = np.linspace(-2, 2, 400)
# Polinom értékek
y = a + b*x + c*x**2 + d*x**3
# Gyökök (ahol f(x)=0)
gyokok = roots([d, c, b, a])
real_gyokok = gyokok[np.isreal(gyokok)].real
# Első derivált: extrémumok
# f'(x) = b + 2c*x + 3d*x**2
extr_gyokok = roots([3*d, 2*c, b])
real_extr = extr_gyokok[np.isreal(extr_gyokok)].real
extr_y = a + b*real_extr + c*real_extr**2 + d*real_extr**3
# Második derivált: inflexiós pont
# f''(x) = 2c + 6d*x
iflex_x = -2*c/(6*d)
iflex_y = a + b*iflex_x + c*iflex_x**2 + d*iflex_x**3
# Véletlenszerűen kiválasztott x pontok
x_points = np.linspace(-2, 2, 8)
y_points = a + b*x_points + c*x_points**2 + d*x_points**3
# Lineáris regresszió illesztése
coeffs = np.polyfit(x_points, y_points, 1) # 1. fokú polinom = egyenes
lin_y = coeffs[0]*x + coeffs[1] # y = a*x + b
plt.plot(x, y, label='Polinom')
plt.scatter(real_gyokok, np.zeros_like(real_gyokok), color='red', label='Gyökök')
plt.scatter(real_extr, extr_y, color='green', label='Extrémumok')
plt.scatter(iflex_x, iflex_y, color='purple', label='Inflekciós pont')
plt.scatter(0, a, color='orange', label='Y-tengely metszéspont')
plt.scatter(x_points, y_points, color='blue', marker='x', s=80, label='Közelítendő pontok')
plt.plot(x, lin_y, color='black', linestyle='--', label='Lineáris regresszió')
plt.legend()
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('Polinom, pontok és lineáris regresszió')
plt.grid(True)
plt.show()

View File

@ -0,0 +1,40 @@
# -*- coding: utf-8 -*-
import numpy as np
import math
# Create random input and output data
x = np.linspace(-math.pi, math.pi, 2000) #spaces between -pi and pi with 2000 points equally distributed
y = np.sin(x)
# Randomly initialize weights
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()
learning_rate = 1e-6
for t in range(2000):
# Forward pass: compute predicted y
# y = a + b x + c x^2 + d x^3
y_pred = a + b * x + c * x ** 2 + d * x ** 3
# Compute and print loss
loss = np.square(y_pred - y).sum()
if t % 100 == 99:
print(t, loss)
# Backprop to compute gradients of a, b, c, d with respect to loss
grad_y_pred = 2.0 * (y_pred - y)
grad_a = grad_y_pred.sum()
grad_b = (grad_y_pred * x).sum()
grad_c = (grad_y_pred * x ** 2).sum()
grad_d = (grad_y_pred * x ** 3).sum()
# Update weights
a -= learning_rate * grad_a
b -= learning_rate * grad_b
c -= learning_rate * grad_c
d -= learning_rate * grad_d
print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')

View File

@ -0,0 +1,37 @@
import numpy as np
import math
import matplotlib.pyplot as plt
# Adatok előállítása
x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)
# Véletlen súlyok
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()
learning_rate = 1e-6
for t in range(2000):
y_pred = a + b * x + c * x ** 2 + d * x ** 3
loss = np.square(y_pred - y).sum()
grad_y_pred = 2.0 * (y_pred - y)
grad_a = grad_y_pred.sum()
grad_b = (grad_y_pred * x).sum()
grad_c = (grad_y_pred * x ** 2).sum()
grad_d = (grad_y_pred * x ** 3).sum()
a -= learning_rate * grad_a
b -= learning_rate * grad_b
c -= learning_rate * grad_c
d -= learning_rate * grad_d
# Ábrázolás
plt.plot(x, y, label='Szinusz függvény')
plt.plot(x, a + b * x + c * x ** 2 + d * x ** 3, label='Tanult polinom', linestyle='--')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Szinusz és tanult polinom összehasonlítása')
plt.legend()
plt.grid(True)
plt.show()

View File

@ -0,0 +1,140 @@
<img src="https://r2cdn.perplexity.ai/pplx-full-logo-primary-dark%402x.png" style="height:64px;margin-right:32px"/>
# Kódrészlet magyarázata lépésről lépésre
Ez a kód egy **egyszerű polinomiális regressziót** valósít meg Pythonban, Numpy segítségével, hogy egy szinuszhullámot közelítsen egy harmadfokú polinommal. Ez még nem PyTorch, hanem "kézzel" írt gépi tanulás, de a logika ugyanaz, mint amit PyTorch-ban is használnál.
## 1. Adatok előállítása
```python
x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)
```
- **x**: 2000 darab egyenletesen elosztott pont -π és π között.
- **y**: minden x-hez kiszámolja a szinusz értékét. Ez lesz a "valódi" függvény, amit közelíteni akarunk.
## 2. Súlyok (együtthatók) véletlenszerű inicializálása
```python
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()
```
- Ezek a polinom együtthatói: $y = a + b x + c x^2 + d x^3$
- Kezdetben véletlen értékek, a tanulás során ezek fognak "tanulni".
## 3. Tanulási ráta beállítása
```python
learning_rate = 1e-6
```
- Ez határozza meg, hogy a súlyok mennyit változnak minden lépésben.
## 4. Tanítási ciklus (iterációk)
```python
for t in range(2000):
# ...
```
- 2000-szer ismétli a tanulási lépéseket.
### 4.1. Előrehaladás (forward pass)
```python
y_pred = a + b * x + c * x ** 2 + d * x ** 3
```
- A jelenlegi súlyokkal kiszámolja a polinom értékét minden x-re.
- Ez a "becsült" függvény, amit a tanulás során javítunk.
### 4.2. Veszteség (loss) kiszámítása
```python
loss = np.square(y_pred - y).sum()
```
- Megméri, mennyire tér el a becsült érték a valódi szinusz értéktől.
- A négyzetes eltérések összegét számolja (ez a **Mean Squared Error** logikája).
- Minden 100. lépésnél kiírja az aktuális veszteséget.
### 4.3. Visszaterjesztés (backpropagation) gradiens számítás
```python
grad_y_pred = 2.0 * (y_pred - y)
grad_a = grad_y_pred.sum()
grad_b = (grad_y_pred * x).sum()
grad_c = (grad_y_pred * x ** 2).sum()
grad_d = (grad_y_pred * x ** 3).sum()
```
- Kiszámolja, hogy a veszteség hogyan változik, ha az egyes együtthatókat kicsit módosítjuk (ez a **gradiens**).
- Ez mutatja meg, milyen irányba kell "tolni" az együtthatókat, hogy a veszteség csökkenjen.
### 4.4. Súlyok frissítése (gradient descent)
```python
a -= learning_rate * grad_a
b -= learning_rate * grad_b
c -= learning_rate * grad_c
d -= learning_rate * grad_d
```
- Minden együtthatót módosít a gradiens irányába, a tanulási ráta mértékével.
- Így a polinom egyre jobban fogja közelíteni a szinusz görbét.
## 5. Eredmény kiírása
```python
print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')
```
- A tanulás végén kiírja a megtanult polinom együtthatóit.
- Ezekkel az értékekkel a polinomod a lehető legjobban közelíti a szinusz hullámot a megadott x tartományban.
***
## Mit kellene látnod eredményként?
- A veszteség (loss) értéke a tanulás során **folyamatosan csökken** ez azt jelenti, hogy a polinom egyre jobban közelíti a szinusz hullámot.
- A végén kapsz egy polinomot, aminek az együtthatói (a, b, c, d) már "megtanulták" a szinusz hullám alakját.
- Ha ábrázolod a $y$ (szinusz) és $y_{pred}$ (polinom) görbéket, a tanulás végén a két görbe nagyon hasonló lesz.
***
## Lépések összefoglalva
1. **Adatok előállítása**: x és y (szinusz értékek)
2. **Súlyok inicializálása**: a, b, c, d
3. **Tanulási ciklus**:
- Előrehaladás: polinom kiszámítása
- Veszteség: eltérés mérése
- Gradiens: hogyan változtassunk a súlyokon
- Súlyok frissítése: tanulás
4. **Eredmény**: megtanult polinom együtthatók
***
Ha szeretnéd, segítek abban is, hogyan lehet ezt PyTorch-ban automatizálni, vagy hogyan ábrázolhatod a tanulás eredményét grafikonon!
<span style="display:none">[^1][^2][^3]</span>
<div align="center"></div>
[^1]: https://www.youtube.com/watch?v=oKNIr-iCb6g
[^2]: https://learn.microsoft.com/hu-hu/training/modules/understand-regression-machine-learning/
[^3]: https://www.youtube.com/watch?v=YAJ5XBwlN4o

View File

@ -0,0 +1,44 @@
import torch
import math
# Set up PyTorch data type and device (CPU or GPU)
dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment to use GPU if available
# 1. Generate input data (x) in [-π, π] and corresponding target output (y = sin(x))
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x) # This is what we want to approximate!
# 2. Randomly initialize the weights (polynomial coefficients)
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)
learning_rate = 1e-6
for t in range(2000):
# 3. Forward pass: compute predicted y using the current coefficients
# y_pred = a + b*x + c*x^2 + d*x^3 (a cubic polynomial)
y_pred = a + b * x + c * x ** 2 + d * x ** 3
# 4. Compute loss: sum of squared differences between prediction and true values
# (This is called the "Mean Squared Error" loss, except without the mean)
loss = (y_pred - y).pow(2).sum().item()
if t % 100 == 99:
print(t, loss)
# 5. Manually compute gradients for each weight
grad_y_pred = 2.0 * (y_pred - y) # Derivative of loss w.r.t. y_pred
grad_a = grad_y_pred.sum() # Derivative for a
grad_b = (grad_y_pred * x).sum() # Derivative for b
grad_c = (grad_y_pred * x ** 2).sum() # Derivative for c
grad_d = (grad_y_pred * x ** 3).sum() # Derivative for d
# 6. Update each weight by taking a small step in the opposite direction of the gradient
a -= learning_rate * grad_a
b -= learning_rate * grad_b
c -= learning_rate * grad_c
d -= learning_rate * grad_d
print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

View File

@ -0,0 +1,61 @@
import torch
import math
# We want to be able to train our model on an `accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
# such as CUDA, MPS, MTIA, or XPU. If the current accelerator is available, we will use it. Otherwise, we use the CPU.
dtype = torch.float
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")
torch.set_default_device(device)
# Create Tensors to hold input and outputs.
# By default, requires_grad=False, which indicates that we do not need to
# compute gradients with respect to these Tensors during the backward pass.
x = torch.linspace(-math.pi, math.pi, 2000, dtype=dtype)
y = torch.sin(x)
# Create random Tensors for weights. For a third order polynomial, we need
# 4 weights: y = a + b x + c x^2 + d x^3
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Tensors during the backward pass.
a = torch.randn((), dtype=dtype, requires_grad=True)
b = torch.randn((), dtype=dtype, requires_grad=True)
c = torch.randn((), dtype=dtype, requires_grad=True)
d = torch.randn((), dtype=dtype, requires_grad=True)
print(f"a = {a.item()}, b = {b.item()}, c = {c.item()}, d = {d.item()}")
learning_rate = 1e-6
for t in range(2000):
# Forward pass: compute predicted y using operations on Tensors.
y_pred = a + b * x + c * x ** 2 + d * x ** 3
# Compute and print loss using operations on Tensors.
# Now loss is a Tensor of shape (1,)
# loss.item() gets the scalar value held in the loss.
loss = (y_pred - y).pow(2).sum()
if t % 100 == 99:
print(t, loss.item())
# Use autograd to compute the backward pass. This call will compute the
# gradient of loss with respect to all Tensors with requires_grad=True.
# After this call a.grad, b.grad. c.grad and d.grad will be Tensors holding
# the gradient of the loss with respect to a, b, c, d respectively.
loss.backward()
# Manually update weights using gradient descent. Wrap in torch.no_grad()
# because weights have requires_grad=True, but we don't need to track this
# in autograd.
with torch.no_grad():
a -= learning_rate * a.grad
b -= learning_rate * b.grad
c -= learning_rate * c.grad
d -= learning_rate * d.grad
# Manually zero the gradients after updating weights
a.grad = None
b.grad = None
c.grad = None
d.grad = None
print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

View File

@ -0,0 +1,87 @@
import torch
import math
class LegendrePolynomial3(torch.autograd.Function):
"""
We can implement our own custom autograd Functions by subclassing
torch.autograd.Function and implementing the forward and backward passes
which operate on Tensors.
"""
@staticmethod
def forward(ctx, input):
"""
In the forward pass we receive a Tensor containing the input and return
a Tensor containing the output. ctx is a context object that can be used
to stash information for backward computation. You can cache tensors for
use in the backward pass using the ``ctx.save_for_backward`` method. Other
objects can be stored directly as attributes on the ctx object, such as
``ctx.my_object = my_object``. Check out `Extending torch.autograd <https://docs.pytorch.org/docs/stable/notes/extending.html#extending-torch-autograd>`_
for further details.
"""
ctx.save_for_backward(input)
return 0.5 * (5 * input ** 3 - 3 * input)
@staticmethod
def backward(ctx, grad_output):
"""
In the backward pass we receive a Tensor containing the gradient of the loss
with respect to the output, and we need to compute the gradient of the loss
with respect to the input.
"""
input, = ctx.saved_tensors
return grad_output * 1.5 * (5 * input ** 2 - 1)
dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU
# Create Tensors to hold input and outputs.
# By default, requires_grad=False, which indicates that we do not need to
# compute gradients with respect to these Tensors during the backward pass.
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)
# Create random Tensors for weights. For this example, we need
# 4 weights: y = a + b * P3(c + d * x), these weights need to be initialized
# not too far from the correct result to ensure convergence.
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Tensors during the backward pass.
a = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
b = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True)
c = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)
learning_rate = 5e-6
for t in range(2000):
# To apply our Function, we use Function.apply method. We alias this as 'P3'.
P3 = LegendrePolynomial3.apply
# Forward pass: compute predicted y using operations; we compute
# P3 using our custom autograd operation.
y_pred = a + b * P3(c + d * x)
# Compute and print loss
loss = (y_pred - y).pow(2).sum()
if t % 100 == 99:
print(t, loss.item())
# Use autograd to compute the backward pass.
loss.backward()
# Update weights using gradient descent
with torch.no_grad():
a -= learning_rate * a.grad
b -= learning_rate * b.grad
c -= learning_rate * c.grad
d -= learning_rate * d.grad
# Manually zero the gradients after updating weights
a.grad = None
b.grad = None
c.grad = None
d.grad = None
print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)')

View File

@ -0,0 +1,94 @@
<img src="https://r2cdn.perplexity.ai/pplx-full-logo-primary-dark%402x.png" style="height:64px;margin-right:32px"/>
# Custom autograd function in PyTorch: Step-by-step explanation
Let's break down what happens in your code, especially how you get the gradient numbers, what they mean, and how PyTorch's autograd system works when you define your own function.
***
## 1. What is a custom autograd function?
- In PyTorch, you can create your own mathematical operation and tell PyTorch **how to compute its gradient** (how it changes with respect to its input).
- You do this by subclassing `torch.autograd.Function` and implementing two methods:
- `forward`: computes the output from the input (normal math)
- `backward`: computes the gradient of the output with respect to the input (how the output changes if you nudge the input)
***
## 2. How do you get the gradient numbers?
- **Forward pass:** You calculate the output for your function. Here, it's the Legendre polynomial:
$$
P_3(x) = \frac{1}{2}(5x^3 - 3x)
$$
- **Backward pass:** You tell PyTorch the formula for the derivative of your function with respect to its input. For Legendre polynomial:
$$
\frac{dP_3}{dx} = \frac{1}{2}(15x^2 - 3) = 1.5(5x^2 - 1)
$$
- When you call `loss.backward()`, PyTorch uses your `backward` method to calculate how much the loss would change if you changed the input a little bit. This is the **gradient**.
- PyTorch automatically chains these gradients through all operations in your model, so you get the gradient of the loss with respect to every parameter (a, b, c, d).
***
## 3. What do these numbers mean?
- The gradient for each parameter (e.g., `a.grad`, `b.grad`, etc.) tells you **how much the loss would change if you nudged that parameter up or down**.
- If the gradient is large and positive, increasing the parameter will increase the loss (bad), so you want to decrease it.
- If the gradient is large and negative, increasing the parameter will decrease the loss (good), so you want to increase it.
- You use these numbers to update your parameters in the direction that makes the loss smaller (gradient descent).
***
## 4. How does PyTorch get these numbers?
- During the forward pass, PyTorch builds a **computation graph** of all operations.
- When you call `loss.backward()`, PyTorch walks backward through this graph, using the `backward` methods you defined (and built-in ones for standard operations), applying the chain rule to compute all gradients.
- The numbers you see in `.grad` are the result of this process: they are the derivatives of the loss with respect to each parameter.
***
## 5. Step-by-step in your code
1. **Forward pass:**
- You compute `y_pred = a + b * P3(c + d * x)` using your custom Legendre polynomial function.
2. **Loss calculation:**
- You compute the loss as the sum of squared errors between `y_pred` and the true `y`.
3. **Backward pass:**
- You call `loss.backward()`. PyTorch uses your custom `backward` method to compute the gradients for all parameters.
4. **Update parameters:**
- You use the gradients to update `a`, `b`, `c`, and `d` so the loss gets smaller.
***
## 6. Summary
- **Custom autograd functions** let you define both the math and the gradient for new operations.
- **Gradients** are numbers that tell you how to change each parameter to make the loss smaller.
- **PyTorch computes these automatically** using your `backward` method and the chain rule.
- You use these gradients to update your parameters and train your model.
If you want, I can walk through a concrete example with numbers, or show how the gradients change during training!
<span style="display:none">[^1][^2][^3][^4][^5][^6][^7][^8][^9]</span>
<div align="center"></div>
[^1]: https://docs.pytorch.org/tutorials/beginner/examples_autograd/polynomial_custom_function.html
[^2]: https://apxml.com/courses/advanced-pytorch/chapter-1-pytorch-internals-autograd/custom-autograd-functions
[^3]: https://docs.pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html
[^4]: https://brsoff.github.io/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html
[^5]: https://ghamrouni.github.io/stn-tuto/beginner/examples_autograd/two_layer_net_custom_function.html
[^6]: https://docs.pytorch.org/tutorials/beginner/pytorch_with_examples.html
[^7]: https://docs.pytorch.org/tutorials/intermediate/custom_function_double_backward_tutorial.html
[^8]: https://www.kaggle.com/code/peggy1502/learning-pytorch-2-new-autograd-functions
[^9]: https://stackoverflow.com/questions/54586938/how-to-wrap-pytorch-functions-and-implement-autograd

View File

@ -0,0 +1,69 @@
# -*- coding: utf-8 -*-
import torch
import math
# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)
# For this example, the output y is a linear function of (x, x^2, x^3), so
# we can consider it as a linear layer neural network. Let's prepare the
# tensor (x, x^2, x^3).
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)
# In the above code, x.unsqueeze(-1) has shape (2000, 1), and p has shape
# (3,), for this case, broadcasting semantics will apply to obtain a tensor
# of shape (2000, 3)
# Use the nn package to define our model as a sequence of layers. nn.Sequential
# is a Module which contains other Modules, and applies them in sequence to
# produce its output. The Linear Module computes output from input using a
# linear function, and holds internal Tensors for its weight and bias.
# The Flatten layer flatens the output of the linear layer to a 1D tensor,
# to match the shape of `y`.
model = torch.nn.Sequential(
torch.nn.Linear(3, 1),
torch.nn.Flatten(0, 1)
)
# The nn package also contains definitions of popular loss functions; in this
# case we will use Mean Squared Error (MSE) as our loss function.
loss_fn = torch.nn.MSELoss(reduction='sum')
learning_rate = 1e-6
for t in range(2000):
# Forward pass: compute predicted y by passing x to the model. Module objects
# override the __call__ operator so you can call them like functions. When
# doing so you pass a Tensor of input data to the Module and it produces
# a Tensor of output data.
y_pred = model(xx)
# Compute and print loss. We pass Tensors containing the predicted and true
# values of y, and the loss function returns a Tensor containing the
# loss.
loss = loss_fn(y_pred, y)
if t % 100 == 99:
print(t, loss.item())
# Zero the gradients before running the backward pass.
model.zero_grad()
# Backward pass: compute gradient of the loss with respect to all the learnable
# parameters of the model. Internally, the parameters of each Module are stored
# in Tensors with requires_grad=True, so this call will compute gradients for
# all learnable parameters in the model.
loss.backward()
# Update the weights using gradient descent. Each parameter is a Tensor, so
# we can access its gradients like we did before.
with torch.no_grad():
for param in model.parameters():
param -= learning_rate * param.grad
# You can access the first layer of `model` like accessing the first item of a list
linear_layer = model[0]
# For linear layer, its parameters are stored as `weight` and `bias`.
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

View File

@ -0,0 +1,128 @@
<img src="https://r2cdn.perplexity.ai/pplx-full-logo-primary-dark%402x.png" style="height:64px;margin-right:32px"/>
# What does this code do? Step-by-step explanation
You're absolutely right in your intuition! This time, you're using a **neural network** to learn the relationship between $x$ and $\sin(x)$ _without giving it the explicit formula_. Instead, you give the network lots of example data (inputs and outputs), and let it figure out the best way to predict $y$ from $x$.
Let's walk through every step of the code and what happens:
***
## 1. **Create input and output data**
```python
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)
```
- **x**: 2000 evenly spaced points from $-\pi$ to $\pi$.
- **y**: The true output, $\sin(x)$, for each input $x$.
- This is your "training data"—pairs of (input, output) for the network to learn from.
***
## 2. **Prepare features for the model**
```python
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)
```
- You create a new tensor $xx$ where each row is $[x, x^2, x^3]$.
- This means the network will get three features for each input: $x$, $x^2$, and $x^3$.
- This helps the network learn more complex (curvy) relationships than just a straight line.
***
## 3. **Define the neural network model**
```python
model = torch.nn.Sequential(
torch.nn.Linear(3, 1),
torch.nn.Flatten(0, 1)
)
```
- **torch.nn.Linear(3, 1)**: A single layer that takes 3 inputs ($x, x^2, x^3$) and outputs 1 value (prediction for $y$).
- **torch.nn.Flatten(0, 1)**: Flattens the output to match the shape of $y$.
- This is a very simple neural network (just one layer), but it's enough for this regression task.
***
## 4. **Define the loss function**
```python
loss_fn = torch.nn.MSELoss(reduction='sum')
```
- **Mean Squared Error (MSE)**: Measures how far off the predictions are from the true values.
- The goal is to make this loss as small as possible during training.
***
## 5. **Training loop**
```python
for t in range(2000):
y_pred = model(xx)
loss = loss_fn(y_pred, y)
if t % 100 == 99:
print(t, loss.item())
model.zero_grad()
loss.backward()
with torch.no_grad():
for param in model.parameters():
param -= learning_rate * param.grad
```
- **Forward pass**: The model predicts $y$ for each input $xx$.
- **Compute loss**: How far off are the predictions from the true $y$?
- **Zero gradients**: Clear old gradients before computing new ones.
- **Backward pass**: Compute gradients (how to change weights to reduce loss).
- **Update weights**: Adjust the model's weights using gradient descent.
- This loop repeats 2000 times, gradually improving the model's predictions.
***
## 6. **Result: Print the learned polynomial**
```python
linear_layer = model[^0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')
```
- After training, you print the final weights and bias.
- This gives you the best-fit polynomial the network found to approximate $\sin(x)$ using the data.
***
## **Summary: What is happening?**
- You give the network lots of example data ($x$, $\sin(x)$).
- The network tries to find the best weights to predict $y$ from $x$, $x^2$, and $x^3$.
- You don't tell it the formula for $\sin(x)$—it "learns" the best approximation from the data.
- This is the core idea of neural networks: **learn patterns from data, not from explicit formulas**.
If you want to use a more complex neural network (with hidden layers and nonlinear activations), you can! That would let the model learn even more complicated relationships, and is the next step in deep learning.
Let me know if you want to see how to do that, or want to visualize the predictions vs. the true values!
<span style="display:none">[^1][^2][^3][^4][^5][^6][^7][^8]</span>
<div align="center"></div>
[^1]: https://docs.pytorch.org/tutorials/beginner/pytorch_with_examples.html
[^2]: https://discuss.pytorch.org/t/neural-net-to-approximate-a-sine-function/101647
[^3]: https://mycoding.uk/a/pytorch_-_neural_network_for_simple_regression_analysis.html
[^4]: https://colab.research.google.com/github/lcharlin/80-629/blob/master/week6-RNNs+CNNs/RNNs_Questions.ipynb
[^5]: https://code-first-ml.github.io/book1/notebooks/neural_networks/2018-01-13-denoising.html
[^6]: https://www.youtube.com/watch?v=RSjB5kNHKXc
[^7]: https://discuss.pytorch.org/t/approximating-sine-function-using-neural-network/65875
[^8]: https://stackoverflow.com/questions/13897316/approximating-the-sine-function-with-a-neural-network

View File

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
import torch
import math
# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)
# Prepare the input tensor (x, x^2, x^3).
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)
# Use the nn package to define our model and loss function.
model = torch.nn.Sequential(
torch.nn.Linear(3, 1),
torch.nn.Flatten(0, 1)
)
loss_fn = torch.nn.MSELoss(reduction='sum')
# Use the optim package to define an Optimizer that will update the weights of
# the model for us. Here we will use RMSprop; the optim package contains many other
# optimization algorithms. The first argument to the RMSprop constructor tells the
# optimizer which Tensors it should update.
learning_rate = 1e-3
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
for t in range(2000):
# Forward pass: compute predicted y by passing x to the model.
y_pred = model(xx)
# Compute and print loss.
loss = loss_fn(y_pred, y)
if t % 100 == 99:
print(t, loss.item())
# Before the backward pass, use the optimizer object to zero all of the
# gradients for the variables it will update (which are the learnable
# weights of the model). This is because by default, gradients are
# accumulated in buffers( i.e, not overwritten) whenever .backward()
# is called. Checkout docs of torch.autograd.backward for more details.
optimizer.zero_grad()
# Backward pass: compute gradient of the loss with respect to model
# parameters
loss.backward()
# Calling the step function on an Optimizer makes an update to its
# parameters
optimizer.step()
linear_layer = model[0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

View File

@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
import torch
import math
class Polynomial3(torch.nn.Module):
def __init__(self):
"""
In the constructor we instantiate four parameters and assign them as
member parameters.
"""
super().__init__()
self.a = torch.nn.Parameter(torch.randn(()))
self.b = torch.nn.Parameter(torch.randn(()))
self.c = torch.nn.Parameter(torch.randn(()))
self.d = torch.nn.Parameter(torch.randn(()))
def forward(self, x):
"""
In the forward function we accept a Tensor of input data and we must return
a Tensor of output data. We can use Modules defined in the constructor as
well as arbitrary operators on Tensors.
"""
return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
def string(self):
"""
Just like any class in Python, you can also define custom method on PyTorch modules
"""
return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'
# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)
# Construct our model by instantiating the class defined above
model = Polynomial3()
# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters (defined
# with torch.nn.Parameter) which are members of the model.
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)
for t in range(2000):
# Forward pass: Compute predicted y by passing x to the model
y_pred = model(x)
# Compute and print loss
loss = criterion(y_pred, y)
if t % 100 == 99:
print(t, loss.item())
# Zero gradients, perform a backward pass, and update the weights.
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'Result: {model.string()}')

View File

@ -0,0 +1,68 @@
# -*- coding: utf-8 -*-
import random
import torch
import math
class DynamicNet(torch.nn.Module):
def __init__(self):
"""
In the constructor we instantiate five parameters and assign them as members.
"""
super().__init__()
self.a = torch.nn.Parameter(torch.randn(()))
self.b = torch.nn.Parameter(torch.randn(()))
self.c = torch.nn.Parameter(torch.randn(()))
self.d = torch.nn.Parameter(torch.randn(()))
self.e = torch.nn.Parameter(torch.randn(()))
def forward(self, x):
"""
For the forward pass of the model, we randomly choose either 4, 5
and reuse the e parameter to compute the contribution of these orders.
Since each forward pass builds a dynamic computation graph, we can use normal
Python control-flow operators like loops or conditional statements when
defining the forward pass of the model.
Here we also see that it is perfectly safe to reuse the same parameter many
times when defining a computational graph.
"""
y = self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
for exp in range(4, random.randint(4, 6)):
y = y + self.e * x ** exp
return y
def string(self):
"""
Just like any class in Python, you can also define custom method on PyTorch modules
"""
return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'
# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)
# Construct our model by instantiating the class defined above
model = DynamicNet()
# Construct our loss function and an Optimizer. Training this strange model with
# vanilla stochastic gradient descent is tough, so we use momentum
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9)
for t in range(30000):
# Forward pass: Compute predicted y by passing x to the model
y_pred = model(x)
# Compute and print loss
loss = criterion(y_pred, y)
if t % 2000 == 1999:
print(t, loss.item())
# Zero gradients, perform a backward pass, and update the weights.
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'Result: {model.string()}')