Work in progress
This commit is contained in:
21
PytorchTutorialCodes/1_linearregression_1.py
Normal file
21
PytorchTutorialCodes/1_linearregression_1.py
Normal file
@ -0,0 +1,21 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# Valódi mért adatok
|
||||
x_real = np.array([1,2,3,4,5,6,7,8,9,10])
|
||||
y_real = np.array([6,8,9,11,13,14,15,17,18,20])
|
||||
|
||||
# Lineáris regresszió illesztése
|
||||
coeffs = np.polyfit(x_real, y_real, 1) # 1. fokú polinom = egyenes || Visszaad egy tömböt, ami az egyenes együtthatóit tartalmazza: [meredekség, tengelymetszet].
|
||||
print(coeffs)
|
||||
lin_y = coeffs[0]*x_real + coeffs[1] # Kiszámolja az egyenes y értékeit minden x_real pontra. y = mx + n
|
||||
|
||||
# Ábrázolás
|
||||
plt.scatter(x_real, y_real, color='blue', label='Valódi mért pontok')
|
||||
plt.plot(x_real, lin_y, color='black', linestyle='--', label='Lineáris regresszió')
|
||||
plt.xlabel('Reklámba fektetett összeg (millió Ft)')
|
||||
plt.ylabel('Eladott jegyek (ezer db)')
|
||||
plt.title('Valódi adatok és lineáris regresszió')
|
||||
plt.legend()
|
||||
plt.grid(True)
|
||||
plt.show()
|
||||
29
PytorchTutorialCodes/1_linearregression_2.py
Normal file
29
PytorchTutorialCodes/1_linearregression_2.py
Normal file
@ -0,0 +1,29 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# Együtthatók
|
||||
a, b, c, d = 1, -2, 3, -1
|
||||
|
||||
# X értékek
|
||||
x = np.linspace(-2, 2, 400)
|
||||
# Harmadfokú polinom értékek
|
||||
y = a + b*x + c*x**2 + d*x**3
|
||||
|
||||
# Válassz néhány pontot a polinomról
|
||||
x_points = np.linspace(-2, 2, 10)
|
||||
y_points = a + b*x_points + c*x_points**2 + d*x_points**3
|
||||
|
||||
# Lineáris regresszió illesztése a pontokra
|
||||
coeffs = np.polyfit(x_points, y_points, 1) # 1. fokú polinom = egyenes
|
||||
lin_y = coeffs[0]*x + coeffs[1]
|
||||
|
||||
# Ábrázolás
|
||||
plt.plot(x, y, label='Harmadfokú polinom')
|
||||
plt.scatter(x_points, y_points, color='blue', marker='x', s=80, label='Polinom pontjai')
|
||||
plt.plot(x, lin_y, color='black', linestyle='--', label='Lineáris regresszió')
|
||||
plt.xlabel('x')
|
||||
plt.ylabel('y')
|
||||
plt.title('Polinom és lineáris regresszió')
|
||||
plt.legend()
|
||||
plt.grid(True)
|
||||
plt.show()
|
||||
52
PytorchTutorialCodes/1_linearregression_3.py
Normal file
52
PytorchTutorialCodes/1_linearregression_3.py
Normal file
@ -0,0 +1,52 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from numpy import roots
|
||||
|
||||
# Együtthatók
|
||||
a, b, c, d = 1, -2, 3, -1
|
||||
|
||||
# X értékek
|
||||
x = np.linspace(-2, 2, 400)
|
||||
|
||||
# Polinom értékek
|
||||
y = a + b*x + c*x**2 + d*x**3
|
||||
|
||||
# Gyökök (ahol f(x)=0)
|
||||
gyokok = roots([d, c, b, a])
|
||||
real_gyokok = gyokok[np.isreal(gyokok)].real
|
||||
|
||||
# Első derivált: extrémumok
|
||||
# f'(x) = b + 2c*x + 3d*x**2
|
||||
extr_gyokok = roots([3*d, 2*c, b])
|
||||
real_extr = extr_gyokok[np.isreal(extr_gyokok)].real
|
||||
extr_y = a + b*real_extr + c*real_extr**2 + d*real_extr**3
|
||||
|
||||
# Második derivált: inflexiós pont
|
||||
# f''(x) = 2c + 6d*x
|
||||
iflex_x = -2*c/(6*d)
|
||||
iflex_y = a + b*iflex_x + c*iflex_x**2 + d*iflex_x**3
|
||||
|
||||
# Véletlenszerűen kiválasztott x pontok
|
||||
x_points = np.linspace(-2, 2, 8)
|
||||
y_points = a + b*x_points + c*x_points**2 + d*x_points**3
|
||||
|
||||
# Lineáris regresszió illesztése
|
||||
coeffs = np.polyfit(x_points, y_points, 1) # 1. fokú polinom = egyenes
|
||||
lin_y = coeffs[0]*x + coeffs[1] # y = a*x + b
|
||||
|
||||
|
||||
|
||||
plt.plot(x, y, label='Polinom')
|
||||
plt.scatter(real_gyokok, np.zeros_like(real_gyokok), color='red', label='Gyökök')
|
||||
plt.scatter(real_extr, extr_y, color='green', label='Extrémumok')
|
||||
plt.scatter(iflex_x, iflex_y, color='purple', label='Inflekciós pont')
|
||||
plt.scatter(0, a, color='orange', label='Y-tengely metszéspont')
|
||||
plt.scatter(x_points, y_points, color='blue', marker='x', s=80, label='Közelítendő pontok')
|
||||
plt.plot(x, lin_y, color='black', linestyle='--', label='Lineáris regresszió')
|
||||
plt.legend()
|
||||
plt.xlabel('x')
|
||||
plt.ylabel('f(x)')
|
||||
plt.title('Polinom, pontok és lineáris regresszió')
|
||||
plt.grid(True)
|
||||
plt.show()
|
||||
|
||||
40
PytorchTutorialCodes/2_pred_with_numpy_.py
Normal file
40
PytorchTutorialCodes/2_pred_with_numpy_.py
Normal file
@ -0,0 +1,40 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import numpy as np
|
||||
import math
|
||||
|
||||
# Create random input and output data
|
||||
x = np.linspace(-math.pi, math.pi, 2000) #spaces between -pi and pi with 2000 points equally distributed
|
||||
y = np.sin(x)
|
||||
|
||||
|
||||
# Randomly initialize weights
|
||||
a = np.random.randn()
|
||||
b = np.random.randn()
|
||||
c = np.random.randn()
|
||||
d = np.random.randn()
|
||||
|
||||
learning_rate = 1e-6
|
||||
for t in range(2000):
|
||||
# Forward pass: compute predicted y
|
||||
# y = a + b x + c x^2 + d x^3
|
||||
y_pred = a + b * x + c * x ** 2 + d * x ** 3
|
||||
|
||||
# Compute and print loss
|
||||
loss = np.square(y_pred - y).sum()
|
||||
if t % 100 == 99:
|
||||
print(t, loss)
|
||||
|
||||
# Backprop to compute gradients of a, b, c, d with respect to loss
|
||||
grad_y_pred = 2.0 * (y_pred - y)
|
||||
grad_a = grad_y_pred.sum()
|
||||
grad_b = (grad_y_pred * x).sum()
|
||||
grad_c = (grad_y_pred * x ** 2).sum()
|
||||
grad_d = (grad_y_pred * x ** 3).sum()
|
||||
|
||||
# Update weights
|
||||
a -= learning_rate * grad_a
|
||||
b -= learning_rate * grad_b
|
||||
c -= learning_rate * grad_c
|
||||
d -= learning_rate * grad_d
|
||||
|
||||
print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')
|
||||
37
PytorchTutorialCodes/2_pred_with_numpy_2.py
Normal file
37
PytorchTutorialCodes/2_pred_with_numpy_2.py
Normal file
@ -0,0 +1,37 @@
|
||||
import numpy as np
|
||||
import math
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# Adatok előállítása
|
||||
x = np.linspace(-math.pi, math.pi, 2000)
|
||||
y = np.sin(x)
|
||||
|
||||
# Véletlen súlyok
|
||||
a = np.random.randn()
|
||||
b = np.random.randn()
|
||||
c = np.random.randn()
|
||||
d = np.random.randn()
|
||||
|
||||
learning_rate = 1e-6
|
||||
for t in range(2000):
|
||||
y_pred = a + b * x + c * x ** 2 + d * x ** 3
|
||||
loss = np.square(y_pred - y).sum()
|
||||
grad_y_pred = 2.0 * (y_pred - y)
|
||||
grad_a = grad_y_pred.sum()
|
||||
grad_b = (grad_y_pred * x).sum()
|
||||
grad_c = (grad_y_pred * x ** 2).sum()
|
||||
grad_d = (grad_y_pred * x ** 3).sum()
|
||||
a -= learning_rate * grad_a
|
||||
b -= learning_rate * grad_b
|
||||
c -= learning_rate * grad_c
|
||||
d -= learning_rate * grad_d
|
||||
|
||||
# Ábrázolás
|
||||
plt.plot(x, y, label='Szinusz függvény')
|
||||
plt.plot(x, a + b * x + c * x ** 2 + d * x ** 3, label='Tanult polinom', linestyle='--')
|
||||
plt.xlabel('x')
|
||||
plt.ylabel('y')
|
||||
plt.title('Szinusz és tanult polinom összehasonlítása')
|
||||
plt.legend()
|
||||
plt.grid(True)
|
||||
plt.show()
|
||||
140
PytorchTutorialCodes/2_pred_with_numpy_explanation.md
Normal file
140
PytorchTutorialCodes/2_pred_with_numpy_explanation.md
Normal file
@ -0,0 +1,140 @@
|
||||
<img src="https://r2cdn.perplexity.ai/pplx-full-logo-primary-dark%402x.png" style="height:64px;margin-right:32px"/>
|
||||
|
||||
# Kódrészlet magyarázata lépésről lépésre
|
||||
|
||||
Ez a kód egy **egyszerű polinomiális regressziót** valósít meg Pythonban, Numpy segítségével, hogy egy szinuszhullámot közelítsen egy harmadfokú polinommal. Ez még nem PyTorch, hanem "kézzel" írt gépi tanulás, de a logika ugyanaz, mint amit PyTorch-ban is használnál.
|
||||
|
||||
## 1. Adatok előállítása
|
||||
|
||||
```python
|
||||
x = np.linspace(-math.pi, math.pi, 2000)
|
||||
y = np.sin(x)
|
||||
```
|
||||
|
||||
- **x**: 2000 darab egyenletesen elosztott pont -π és π között.
|
||||
- **y**: minden x-hez kiszámolja a szinusz értékét. Ez lesz a "valódi" függvény, amit közelíteni akarunk.
|
||||
|
||||
|
||||
## 2. Súlyok (együtthatók) véletlenszerű inicializálása
|
||||
|
||||
```python
|
||||
a = np.random.randn()
|
||||
b = np.random.randn()
|
||||
c = np.random.randn()
|
||||
d = np.random.randn()
|
||||
```
|
||||
|
||||
- Ezek a polinom együtthatói: $y = a + b x + c x^2 + d x^3$
|
||||
- Kezdetben véletlen értékek, a tanulás során ezek fognak "tanulni".
|
||||
|
||||
|
||||
## 3. Tanulási ráta beállítása
|
||||
|
||||
```python
|
||||
learning_rate = 1e-6
|
||||
```
|
||||
|
||||
- Ez határozza meg, hogy a súlyok mennyit változnak minden lépésben.
|
||||
|
||||
|
||||
## 4. Tanítási ciklus (iterációk)
|
||||
|
||||
```python
|
||||
for t in range(2000):
|
||||
# ...
|
||||
```
|
||||
|
||||
- 2000-szer ismétli a tanulási lépéseket.
|
||||
|
||||
|
||||
### 4.1. Előrehaladás (forward pass)
|
||||
|
||||
```python
|
||||
y_pred = a + b * x + c * x ** 2 + d * x ** 3
|
||||
```
|
||||
|
||||
- A jelenlegi súlyokkal kiszámolja a polinom értékét minden x-re.
|
||||
- Ez a "becsült" függvény, amit a tanulás során javítunk.
|
||||
|
||||
|
||||
### 4.2. Veszteség (loss) kiszámítása
|
||||
|
||||
```python
|
||||
loss = np.square(y_pred - y).sum()
|
||||
```
|
||||
|
||||
- Megméri, mennyire tér el a becsült érték a valódi szinusz értéktől.
|
||||
- A négyzetes eltérések összegét számolja (ez a **Mean Squared Error** logikája).
|
||||
- Minden 100. lépésnél kiírja az aktuális veszteséget.
|
||||
|
||||
|
||||
### 4.3. Visszaterjesztés (backpropagation) – gradiens számítás
|
||||
|
||||
```python
|
||||
grad_y_pred = 2.0 * (y_pred - y)
|
||||
grad_a = grad_y_pred.sum()
|
||||
grad_b = (grad_y_pred * x).sum()
|
||||
grad_c = (grad_y_pred * x ** 2).sum()
|
||||
grad_d = (grad_y_pred * x ** 3).sum()
|
||||
```
|
||||
|
||||
- Kiszámolja, hogy a veszteség hogyan változik, ha az egyes együtthatókat kicsit módosítjuk (ez a **gradiens**).
|
||||
- Ez mutatja meg, milyen irányba kell "tolni" az együtthatókat, hogy a veszteség csökkenjen.
|
||||
|
||||
|
||||
### 4.4. Súlyok frissítése (gradient descent)
|
||||
|
||||
```python
|
||||
a -= learning_rate * grad_a
|
||||
b -= learning_rate * grad_b
|
||||
c -= learning_rate * grad_c
|
||||
d -= learning_rate * grad_d
|
||||
```
|
||||
|
||||
- Minden együtthatót módosít a gradiens irányába, a tanulási ráta mértékével.
|
||||
- Így a polinom egyre jobban fogja közelíteni a szinusz görbét.
|
||||
|
||||
|
||||
## 5. Eredmény kiírása
|
||||
|
||||
```python
|
||||
print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')
|
||||
```
|
||||
|
||||
- A tanulás végén kiírja a megtanult polinom együtthatóit.
|
||||
- Ezekkel az értékekkel a polinomod a lehető legjobban közelíti a szinusz hullámot a megadott x tartományban.
|
||||
|
||||
***
|
||||
|
||||
## Mit kellene látnod eredményként?
|
||||
|
||||
- A veszteség (loss) értéke a tanulás során **folyamatosan csökken** – ez azt jelenti, hogy a polinom egyre jobban közelíti a szinusz hullámot.
|
||||
- A végén kapsz egy polinomot, aminek az együtthatói (a, b, c, d) már "megtanulták" a szinusz hullám alakját.
|
||||
- Ha ábrázolod a $y$ (szinusz) és $y_{pred}$ (polinom) görbéket, a tanulás végén a két görbe nagyon hasonló lesz.
|
||||
|
||||
***
|
||||
|
||||
## Lépések összefoglalva
|
||||
|
||||
1. **Adatok előállítása**: x és y (szinusz értékek)
|
||||
2. **Súlyok inicializálása**: a, b, c, d
|
||||
3. **Tanulási ciklus**:
|
||||
- Előrehaladás: polinom kiszámítása
|
||||
- Veszteség: eltérés mérése
|
||||
- Gradiens: hogyan változtassunk a súlyokon
|
||||
- Súlyok frissítése: tanulás
|
||||
4. **Eredmény**: megtanult polinom együtthatók
|
||||
|
||||
***
|
||||
|
||||
Ha szeretnéd, segítek abban is, hogyan lehet ezt PyTorch-ban automatizálni, vagy hogyan ábrázolhatod a tanulás eredményét grafikonon!
|
||||
<span style="display:none">[^1][^2][^3]</span>
|
||||
|
||||
<div align="center">⁂</div>
|
||||
|
||||
[^1]: https://www.youtube.com/watch?v=oKNIr-iCb6g
|
||||
|
||||
[^2]: https://learn.microsoft.com/hu-hu/training/modules/understand-regression-machine-learning/
|
||||
|
||||
[^3]: https://www.youtube.com/watch?v=YAJ5XBwlN4o
|
||||
|
||||
44
PytorchTutorialCodes/3_pred_with_pytorch.py
Normal file
44
PytorchTutorialCodes/3_pred_with_pytorch.py
Normal file
@ -0,0 +1,44 @@
|
||||
import torch
|
||||
import math
|
||||
|
||||
# Set up PyTorch data type and device (CPU or GPU)
|
||||
dtype = torch.float
|
||||
device = torch.device("cpu")
|
||||
# device = torch.device("cuda:0") # Uncomment to use GPU if available
|
||||
|
||||
# 1. Generate input data (x) in [-π, π] and corresponding target output (y = sin(x))
|
||||
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
|
||||
y = torch.sin(x) # This is what we want to approximate!
|
||||
|
||||
# 2. Randomly initialize the weights (polynomial coefficients)
|
||||
a = torch.randn((), device=device, dtype=dtype)
|
||||
b = torch.randn((), device=device, dtype=dtype)
|
||||
c = torch.randn((), device=device, dtype=dtype)
|
||||
d = torch.randn((), device=device, dtype=dtype)
|
||||
|
||||
learning_rate = 1e-6
|
||||
for t in range(2000):
|
||||
# 3. Forward pass: compute predicted y using the current coefficients
|
||||
# y_pred = a + b*x + c*x^2 + d*x^3 (a cubic polynomial)
|
||||
y_pred = a + b * x + c * x ** 2 + d * x ** 3
|
||||
|
||||
# 4. Compute loss: sum of squared differences between prediction and true values
|
||||
# (This is called the "Mean Squared Error" loss, except without the mean)
|
||||
loss = (y_pred - y).pow(2).sum().item()
|
||||
if t % 100 == 99:
|
||||
print(t, loss)
|
||||
|
||||
# 5. Manually compute gradients for each weight
|
||||
grad_y_pred = 2.0 * (y_pred - y) # Derivative of loss w.r.t. y_pred
|
||||
grad_a = grad_y_pred.sum() # Derivative for a
|
||||
grad_b = (grad_y_pred * x).sum() # Derivative for b
|
||||
grad_c = (grad_y_pred * x ** 2).sum() # Derivative for c
|
||||
grad_d = (grad_y_pred * x ** 3).sum() # Derivative for d
|
||||
|
||||
# 6. Update each weight by taking a small step in the opposite direction of the gradient
|
||||
a -= learning_rate * grad_a
|
||||
b -= learning_rate * grad_b
|
||||
c -= learning_rate * grad_c
|
||||
d -= learning_rate * grad_d
|
||||
|
||||
print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')
|
||||
61
PytorchTutorialCodes/4_autograd_in_pytorch.py
Normal file
61
PytorchTutorialCodes/4_autograd_in_pytorch.py
Normal file
@ -0,0 +1,61 @@
|
||||
import torch
|
||||
import math
|
||||
|
||||
# We want to be able to train our model on an `accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
|
||||
# such as CUDA, MPS, MTIA, or XPU. If the current accelerator is available, we will use it. Otherwise, we use the CPU.
|
||||
|
||||
dtype = torch.float
|
||||
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
|
||||
print(f"Using {device} device")
|
||||
torch.set_default_device(device)
|
||||
|
||||
# Create Tensors to hold input and outputs.
|
||||
# By default, requires_grad=False, which indicates that we do not need to
|
||||
# compute gradients with respect to these Tensors during the backward pass.
|
||||
x = torch.linspace(-math.pi, math.pi, 2000, dtype=dtype)
|
||||
y = torch.sin(x)
|
||||
|
||||
# Create random Tensors for weights. For a third order polynomial, we need
|
||||
# 4 weights: y = a + b x + c x^2 + d x^3
|
||||
# Setting requires_grad=True indicates that we want to compute gradients with
|
||||
# respect to these Tensors during the backward pass.
|
||||
a = torch.randn((), dtype=dtype, requires_grad=True)
|
||||
b = torch.randn((), dtype=dtype, requires_grad=True)
|
||||
c = torch.randn((), dtype=dtype, requires_grad=True)
|
||||
d = torch.randn((), dtype=dtype, requires_grad=True)
|
||||
print(f"a = {a.item()}, b = {b.item()}, c = {c.item()}, d = {d.item()}")
|
||||
|
||||
learning_rate = 1e-6
|
||||
for t in range(2000):
|
||||
# Forward pass: compute predicted y using operations on Tensors.
|
||||
y_pred = a + b * x + c * x ** 2 + d * x ** 3
|
||||
|
||||
# Compute and print loss using operations on Tensors.
|
||||
# Now loss is a Tensor of shape (1,)
|
||||
# loss.item() gets the scalar value held in the loss.
|
||||
loss = (y_pred - y).pow(2).sum()
|
||||
if t % 100 == 99:
|
||||
print(t, loss.item())
|
||||
|
||||
# Use autograd to compute the backward pass. This call will compute the
|
||||
# gradient of loss with respect to all Tensors with requires_grad=True.
|
||||
# After this call a.grad, b.grad. c.grad and d.grad will be Tensors holding
|
||||
# the gradient of the loss with respect to a, b, c, d respectively.
|
||||
loss.backward()
|
||||
|
||||
# Manually update weights using gradient descent. Wrap in torch.no_grad()
|
||||
# because weights have requires_grad=True, but we don't need to track this
|
||||
# in autograd.
|
||||
with torch.no_grad():
|
||||
a -= learning_rate * a.grad
|
||||
b -= learning_rate * b.grad
|
||||
c -= learning_rate * c.grad
|
||||
d -= learning_rate * d.grad
|
||||
|
||||
# Manually zero the gradients after updating weights
|
||||
a.grad = None
|
||||
b.grad = None
|
||||
c.grad = None
|
||||
d.grad = None
|
||||
|
||||
print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')
|
||||
87
PytorchTutorialCodes/5_define_autograd_function.py
Normal file
87
PytorchTutorialCodes/5_define_autograd_function.py
Normal file
@ -0,0 +1,87 @@
|
||||
import torch
|
||||
import math
|
||||
|
||||
|
||||
class LegendrePolynomial3(torch.autograd.Function):
|
||||
"""
|
||||
We can implement our own custom autograd Functions by subclassing
|
||||
torch.autograd.Function and implementing the forward and backward passes
|
||||
which operate on Tensors.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def forward(ctx, input):
|
||||
"""
|
||||
In the forward pass we receive a Tensor containing the input and return
|
||||
a Tensor containing the output. ctx is a context object that can be used
|
||||
to stash information for backward computation. You can cache tensors for
|
||||
use in the backward pass using the ``ctx.save_for_backward`` method. Other
|
||||
objects can be stored directly as attributes on the ctx object, such as
|
||||
``ctx.my_object = my_object``. Check out `Extending torch.autograd <https://docs.pytorch.org/docs/stable/notes/extending.html#extending-torch-autograd>`_
|
||||
for further details.
|
||||
"""
|
||||
ctx.save_for_backward(input)
|
||||
return 0.5 * (5 * input ** 3 - 3 * input)
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_output):
|
||||
"""
|
||||
In the backward pass we receive a Tensor containing the gradient of the loss
|
||||
with respect to the output, and we need to compute the gradient of the loss
|
||||
with respect to the input.
|
||||
"""
|
||||
input, = ctx.saved_tensors
|
||||
return grad_output * 1.5 * (5 * input ** 2 - 1)
|
||||
|
||||
|
||||
dtype = torch.float
|
||||
device = torch.device("cpu")
|
||||
# device = torch.device("cuda:0") # Uncomment this to run on GPU
|
||||
|
||||
# Create Tensors to hold input and outputs.
|
||||
# By default, requires_grad=False, which indicates that we do not need to
|
||||
# compute gradients with respect to these Tensors during the backward pass.
|
||||
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
|
||||
y = torch.sin(x)
|
||||
|
||||
# Create random Tensors for weights. For this example, we need
|
||||
# 4 weights: y = a + b * P3(c + d * x), these weights need to be initialized
|
||||
# not too far from the correct result to ensure convergence.
|
||||
# Setting requires_grad=True indicates that we want to compute gradients with
|
||||
# respect to these Tensors during the backward pass.
|
||||
a = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
|
||||
b = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True)
|
||||
c = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
|
||||
d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)
|
||||
|
||||
learning_rate = 5e-6
|
||||
for t in range(2000):
|
||||
# To apply our Function, we use Function.apply method. We alias this as 'P3'.
|
||||
P3 = LegendrePolynomial3.apply
|
||||
|
||||
# Forward pass: compute predicted y using operations; we compute
|
||||
# P3 using our custom autograd operation.
|
||||
y_pred = a + b * P3(c + d * x)
|
||||
|
||||
# Compute and print loss
|
||||
loss = (y_pred - y).pow(2).sum()
|
||||
if t % 100 == 99:
|
||||
print(t, loss.item())
|
||||
|
||||
# Use autograd to compute the backward pass.
|
||||
loss.backward()
|
||||
|
||||
# Update weights using gradient descent
|
||||
with torch.no_grad():
|
||||
a -= learning_rate * a.grad
|
||||
b -= learning_rate * b.grad
|
||||
c -= learning_rate * c.grad
|
||||
d -= learning_rate * d.grad
|
||||
|
||||
# Manually zero the gradients after updating weights
|
||||
a.grad = None
|
||||
b.grad = None
|
||||
c.grad = None
|
||||
d.grad = None
|
||||
|
||||
print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)')
|
||||
@ -0,0 +1,94 @@
|
||||
<img src="https://r2cdn.perplexity.ai/pplx-full-logo-primary-dark%402x.png" style="height:64px;margin-right:32px"/>
|
||||
|
||||
# Custom autograd function in PyTorch: Step-by-step explanation
|
||||
|
||||
Let's break down what happens in your code, especially how you get the gradient numbers, what they mean, and how PyTorch's autograd system works when you define your own function.
|
||||
|
||||
***
|
||||
|
||||
## 1. What is a custom autograd function?
|
||||
|
||||
- In PyTorch, you can create your own mathematical operation and tell PyTorch **how to compute its gradient** (how it changes with respect to its input).
|
||||
- You do this by subclassing `torch.autograd.Function` and implementing two methods:
|
||||
- `forward`: computes the output from the input (normal math)
|
||||
- `backward`: computes the gradient of the output with respect to the input (how the output changes if you nudge the input)
|
||||
|
||||
***
|
||||
|
||||
## 2. How do you get the gradient numbers?
|
||||
|
||||
- **Forward pass:** You calculate the output for your function. Here, it's the Legendre polynomial:
|
||||
|
||||
$$
|
||||
P_3(x) = \frac{1}{2}(5x^3 - 3x)
|
||||
$$
|
||||
- **Backward pass:** You tell PyTorch the formula for the derivative of your function with respect to its input. For Legendre polynomial:
|
||||
|
||||
$$
|
||||
\frac{dP_3}{dx} = \frac{1}{2}(15x^2 - 3) = 1.5(5x^2 - 1)
|
||||
$$
|
||||
- When you call `loss.backward()`, PyTorch uses your `backward` method to calculate how much the loss would change if you changed the input a little bit. This is the **gradient**.
|
||||
- PyTorch automatically chains these gradients through all operations in your model, so you get the gradient of the loss with respect to every parameter (a, b, c, d).
|
||||
|
||||
***
|
||||
|
||||
## 3. What do these numbers mean?
|
||||
|
||||
- The gradient for each parameter (e.g., `a.grad`, `b.grad`, etc.) tells you **how much the loss would change if you nudged that parameter up or down**.
|
||||
- If the gradient is large and positive, increasing the parameter will increase the loss (bad), so you want to decrease it.
|
||||
- If the gradient is large and negative, increasing the parameter will decrease the loss (good), so you want to increase it.
|
||||
- You use these numbers to update your parameters in the direction that makes the loss smaller (gradient descent).
|
||||
|
||||
***
|
||||
|
||||
## 4. How does PyTorch get these numbers?
|
||||
|
||||
- During the forward pass, PyTorch builds a **computation graph** of all operations.
|
||||
- When you call `loss.backward()`, PyTorch walks backward through this graph, using the `backward` methods you defined (and built-in ones for standard operations), applying the chain rule to compute all gradients.
|
||||
- The numbers you see in `.grad` are the result of this process: they are the derivatives of the loss with respect to each parameter.
|
||||
|
||||
***
|
||||
|
||||
## 5. Step-by-step in your code
|
||||
|
||||
1. **Forward pass:**
|
||||
- You compute `y_pred = a + b * P3(c + d * x)` using your custom Legendre polynomial function.
|
||||
2. **Loss calculation:**
|
||||
- You compute the loss as the sum of squared errors between `y_pred` and the true `y`.
|
||||
3. **Backward pass:**
|
||||
- You call `loss.backward()`. PyTorch uses your custom `backward` method to compute the gradients for all parameters.
|
||||
4. **Update parameters:**
|
||||
- You use the gradients to update `a`, `b`, `c`, and `d` so the loss gets smaller.
|
||||
|
||||
***
|
||||
|
||||
## 6. Summary
|
||||
|
||||
- **Custom autograd functions** let you define both the math and the gradient for new operations.
|
||||
- **Gradients** are numbers that tell you how to change each parameter to make the loss smaller.
|
||||
- **PyTorch computes these automatically** using your `backward` method and the chain rule.
|
||||
- You use these gradients to update your parameters and train your model.
|
||||
|
||||
If you want, I can walk through a concrete example with numbers, or show how the gradients change during training!
|
||||
<span style="display:none">[^1][^2][^3][^4][^5][^6][^7][^8][^9]</span>
|
||||
|
||||
<div align="center">⁂</div>
|
||||
|
||||
[^1]: https://docs.pytorch.org/tutorials/beginner/examples_autograd/polynomial_custom_function.html
|
||||
|
||||
[^2]: https://apxml.com/courses/advanced-pytorch/chapter-1-pytorch-internals-autograd/custom-autograd-functions
|
||||
|
||||
[^3]: https://docs.pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html
|
||||
|
||||
[^4]: https://brsoff.github.io/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html
|
||||
|
||||
[^5]: https://ghamrouni.github.io/stn-tuto/beginner/examples_autograd/two_layer_net_custom_function.html
|
||||
|
||||
[^6]: https://docs.pytorch.org/tutorials/beginner/pytorch_with_examples.html
|
||||
|
||||
[^7]: https://docs.pytorch.org/tutorials/intermediate/custom_function_double_backward_tutorial.html
|
||||
|
||||
[^8]: https://www.kaggle.com/code/peggy1502/learning-pytorch-2-new-autograd-functions
|
||||
|
||||
[^9]: https://stackoverflow.com/questions/54586938/how-to-wrap-pytorch-functions-and-implement-autograd
|
||||
|
||||
69
PytorchTutorialCodes/6_pytorch_nn_module.py
Normal file
69
PytorchTutorialCodes/6_pytorch_nn_module.py
Normal file
@ -0,0 +1,69 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import torch
|
||||
import math
|
||||
|
||||
# Create Tensors to hold input and outputs.
|
||||
x = torch.linspace(-math.pi, math.pi, 2000)
|
||||
y = torch.sin(x)
|
||||
|
||||
# For this example, the output y is a linear function of (x, x^2, x^3), so
|
||||
# we can consider it as a linear layer neural network. Let's prepare the
|
||||
# tensor (x, x^2, x^3).
|
||||
p = torch.tensor([1, 2, 3])
|
||||
xx = x.unsqueeze(-1).pow(p)
|
||||
|
||||
# In the above code, x.unsqueeze(-1) has shape (2000, 1), and p has shape
|
||||
# (3,), for this case, broadcasting semantics will apply to obtain a tensor
|
||||
# of shape (2000, 3)
|
||||
|
||||
# Use the nn package to define our model as a sequence of layers. nn.Sequential
|
||||
# is a Module which contains other Modules, and applies them in sequence to
|
||||
# produce its output. The Linear Module computes output from input using a
|
||||
# linear function, and holds internal Tensors for its weight and bias.
|
||||
# The Flatten layer flatens the output of the linear layer to a 1D tensor,
|
||||
# to match the shape of `y`.
|
||||
model = torch.nn.Sequential(
|
||||
torch.nn.Linear(3, 1),
|
||||
torch.nn.Flatten(0, 1)
|
||||
)
|
||||
|
||||
# The nn package also contains definitions of popular loss functions; in this
|
||||
# case we will use Mean Squared Error (MSE) as our loss function.
|
||||
loss_fn = torch.nn.MSELoss(reduction='sum')
|
||||
|
||||
learning_rate = 1e-6
|
||||
for t in range(2000):
|
||||
|
||||
# Forward pass: compute predicted y by passing x to the model. Module objects
|
||||
# override the __call__ operator so you can call them like functions. When
|
||||
# doing so you pass a Tensor of input data to the Module and it produces
|
||||
# a Tensor of output data.
|
||||
y_pred = model(xx)
|
||||
|
||||
# Compute and print loss. We pass Tensors containing the predicted and true
|
||||
# values of y, and the loss function returns a Tensor containing the
|
||||
# loss.
|
||||
loss = loss_fn(y_pred, y)
|
||||
if t % 100 == 99:
|
||||
print(t, loss.item())
|
||||
|
||||
# Zero the gradients before running the backward pass.
|
||||
model.zero_grad()
|
||||
|
||||
# Backward pass: compute gradient of the loss with respect to all the learnable
|
||||
# parameters of the model. Internally, the parameters of each Module are stored
|
||||
# in Tensors with requires_grad=True, so this call will compute gradients for
|
||||
# all learnable parameters in the model.
|
||||
loss.backward()
|
||||
|
||||
# Update the weights using gradient descent. Each parameter is a Tensor, so
|
||||
# we can access its gradients like we did before.
|
||||
with torch.no_grad():
|
||||
for param in model.parameters():
|
||||
param -= learning_rate * param.grad
|
||||
|
||||
# You can access the first layer of `model` like accessing the first item of a list
|
||||
linear_layer = model[0]
|
||||
|
||||
# For linear layer, its parameters are stored as `weight` and `bias`.
|
||||
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')
|
||||
128
PytorchTutorialCodes/6_pytorch_nn_module_explanation.md
Normal file
128
PytorchTutorialCodes/6_pytorch_nn_module_explanation.md
Normal file
@ -0,0 +1,128 @@
|
||||
<img src="https://r2cdn.perplexity.ai/pplx-full-logo-primary-dark%402x.png" style="height:64px;margin-right:32px"/>
|
||||
|
||||
# What does this code do? Step-by-step explanation
|
||||
|
||||
You're absolutely right in your intuition! This time, you're using a **neural network** to learn the relationship between $x$ and $\sin(x)$ _without giving it the explicit formula_. Instead, you give the network lots of example data (inputs and outputs), and let it figure out the best way to predict $y$ from $x$.
|
||||
|
||||
Let's walk through every step of the code and what happens:
|
||||
|
||||
***
|
||||
|
||||
## 1. **Create input and output data**
|
||||
|
||||
```python
|
||||
x = torch.linspace(-math.pi, math.pi, 2000)
|
||||
y = torch.sin(x)
|
||||
```
|
||||
|
||||
- **x**: 2000 evenly spaced points from $-\pi$ to $\pi$.
|
||||
- **y**: The true output, $\sin(x)$, for each input $x$.
|
||||
- This is your "training data"—pairs of (input, output) for the network to learn from.
|
||||
|
||||
***
|
||||
|
||||
## 2. **Prepare features for the model**
|
||||
|
||||
```python
|
||||
p = torch.tensor([1, 2, 3])
|
||||
xx = x.unsqueeze(-1).pow(p)
|
||||
```
|
||||
|
||||
- You create a new tensor $xx$ where each row is $[x, x^2, x^3]$.
|
||||
- This means the network will get three features for each input: $x$, $x^2$, and $x^3$.
|
||||
- This helps the network learn more complex (curvy) relationships than just a straight line.
|
||||
|
||||
***
|
||||
|
||||
## 3. **Define the neural network model**
|
||||
|
||||
```python
|
||||
model = torch.nn.Sequential(
|
||||
torch.nn.Linear(3, 1),
|
||||
torch.nn.Flatten(0, 1)
|
||||
)
|
||||
```
|
||||
|
||||
- **torch.nn.Linear(3, 1)**: A single layer that takes 3 inputs ($x, x^2, x^3$) and outputs 1 value (prediction for $y$).
|
||||
- **torch.nn.Flatten(0, 1)**: Flattens the output to match the shape of $y$.
|
||||
- This is a very simple neural network (just one layer), but it's enough for this regression task.
|
||||
|
||||
***
|
||||
|
||||
## 4. **Define the loss function**
|
||||
|
||||
```python
|
||||
loss_fn = torch.nn.MSELoss(reduction='sum')
|
||||
```
|
||||
|
||||
- **Mean Squared Error (MSE)**: Measures how far off the predictions are from the true values.
|
||||
- The goal is to make this loss as small as possible during training.
|
||||
|
||||
***
|
||||
|
||||
## 5. **Training loop**
|
||||
|
||||
```python
|
||||
for t in range(2000):
|
||||
y_pred = model(xx)
|
||||
loss = loss_fn(y_pred, y)
|
||||
if t % 100 == 99:
|
||||
print(t, loss.item())
|
||||
model.zero_grad()
|
||||
loss.backward()
|
||||
with torch.no_grad():
|
||||
for param in model.parameters():
|
||||
param -= learning_rate * param.grad
|
||||
```
|
||||
|
||||
- **Forward pass**: The model predicts $y$ for each input $xx$.
|
||||
- **Compute loss**: How far off are the predictions from the true $y$?
|
||||
- **Zero gradients**: Clear old gradients before computing new ones.
|
||||
- **Backward pass**: Compute gradients (how to change weights to reduce loss).
|
||||
- **Update weights**: Adjust the model's weights using gradient descent.
|
||||
- This loop repeats 2000 times, gradually improving the model's predictions.
|
||||
|
||||
***
|
||||
|
||||
## 6. **Result: Print the learned polynomial**
|
||||
|
||||
```python
|
||||
linear_layer = model[^0]
|
||||
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')
|
||||
```
|
||||
|
||||
- After training, you print the final weights and bias.
|
||||
- This gives you the best-fit polynomial the network found to approximate $\sin(x)$ using the data.
|
||||
|
||||
***
|
||||
|
||||
## **Summary: What is happening?**
|
||||
|
||||
- You give the network lots of example data ($x$, $\sin(x)$).
|
||||
- The network tries to find the best weights to predict $y$ from $x$, $x^2$, and $x^3$.
|
||||
- You don't tell it the formula for $\sin(x)$—it "learns" the best approximation from the data.
|
||||
- This is the core idea of neural networks: **learn patterns from data, not from explicit formulas**.
|
||||
|
||||
If you want to use a more complex neural network (with hidden layers and nonlinear activations), you can! That would let the model learn even more complicated relationships, and is the next step in deep learning.
|
||||
|
||||
Let me know if you want to see how to do that, or want to visualize the predictions vs. the true values!
|
||||
<span style="display:none">[^1][^2][^3][^4][^5][^6][^7][^8]</span>
|
||||
|
||||
<div align="center">⁂</div>
|
||||
|
||||
[^1]: https://docs.pytorch.org/tutorials/beginner/pytorch_with_examples.html
|
||||
|
||||
[^2]: https://discuss.pytorch.org/t/neural-net-to-approximate-a-sine-function/101647
|
||||
|
||||
[^3]: https://mycoding.uk/a/pytorch_-_neural_network_for_simple_regression_analysis.html
|
||||
|
||||
[^4]: https://colab.research.google.com/github/lcharlin/80-629/blob/master/week6-RNNs+CNNs/RNNs_Questions.ipynb
|
||||
|
||||
[^5]: https://code-first-ml.github.io/book1/notebooks/neural_networks/2018-01-13-denoising.html
|
||||
|
||||
[^6]: https://www.youtube.com/watch?v=RSjB5kNHKXc
|
||||
|
||||
[^7]: https://discuss.pytorch.org/t/approximating-sine-function-using-neural-network/65875
|
||||
|
||||
[^8]: https://stackoverflow.com/questions/13897316/approximating-the-sine-function-with-a-neural-network
|
||||
|
||||
53
PytorchTutorialCodes/7_pytorch_optim.py
Normal file
53
PytorchTutorialCodes/7_pytorch_optim.py
Normal file
@ -0,0 +1,53 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import torch
|
||||
import math
|
||||
|
||||
|
||||
# Create Tensors to hold input and outputs.
|
||||
x = torch.linspace(-math.pi, math.pi, 2000)
|
||||
y = torch.sin(x)
|
||||
|
||||
# Prepare the input tensor (x, x^2, x^3).
|
||||
p = torch.tensor([1, 2, 3])
|
||||
xx = x.unsqueeze(-1).pow(p)
|
||||
|
||||
# Use the nn package to define our model and loss function.
|
||||
model = torch.nn.Sequential(
|
||||
torch.nn.Linear(3, 1),
|
||||
torch.nn.Flatten(0, 1)
|
||||
)
|
||||
loss_fn = torch.nn.MSELoss(reduction='sum')
|
||||
|
||||
# Use the optim package to define an Optimizer that will update the weights of
|
||||
# the model for us. Here we will use RMSprop; the optim package contains many other
|
||||
# optimization algorithms. The first argument to the RMSprop constructor tells the
|
||||
# optimizer which Tensors it should update.
|
||||
learning_rate = 1e-3
|
||||
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
|
||||
for t in range(2000):
|
||||
# Forward pass: compute predicted y by passing x to the model.
|
||||
y_pred = model(xx)
|
||||
|
||||
# Compute and print loss.
|
||||
loss = loss_fn(y_pred, y)
|
||||
if t % 100 == 99:
|
||||
print(t, loss.item())
|
||||
|
||||
# Before the backward pass, use the optimizer object to zero all of the
|
||||
# gradients for the variables it will update (which are the learnable
|
||||
# weights of the model). This is because by default, gradients are
|
||||
# accumulated in buffers( i.e, not overwritten) whenever .backward()
|
||||
# is called. Checkout docs of torch.autograd.backward for more details.
|
||||
optimizer.zero_grad()
|
||||
|
||||
# Backward pass: compute gradient of the loss with respect to model
|
||||
# parameters
|
||||
loss.backward()
|
||||
|
||||
# Calling the step function on an Optimizer makes an update to its
|
||||
# parameters
|
||||
optimizer.step()
|
||||
|
||||
|
||||
linear_layer = model[0]
|
||||
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')
|
||||
59
PytorchTutorialCodes/8_pytorch_custom_nn_module.py
Normal file
59
PytorchTutorialCodes/8_pytorch_custom_nn_module.py
Normal file
@ -0,0 +1,59 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import torch
|
||||
import math
|
||||
|
||||
|
||||
class Polynomial3(torch.nn.Module):
|
||||
def __init__(self):
|
||||
"""
|
||||
In the constructor we instantiate four parameters and assign them as
|
||||
member parameters.
|
||||
"""
|
||||
super().__init__()
|
||||
self.a = torch.nn.Parameter(torch.randn(()))
|
||||
self.b = torch.nn.Parameter(torch.randn(()))
|
||||
self.c = torch.nn.Parameter(torch.randn(()))
|
||||
self.d = torch.nn.Parameter(torch.randn(()))
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
In the forward function we accept a Tensor of input data and we must return
|
||||
a Tensor of output data. We can use Modules defined in the constructor as
|
||||
well as arbitrary operators on Tensors.
|
||||
"""
|
||||
return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
|
||||
|
||||
def string(self):
|
||||
"""
|
||||
Just like any class in Python, you can also define custom method on PyTorch modules
|
||||
"""
|
||||
return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'
|
||||
|
||||
|
||||
# Create Tensors to hold input and outputs.
|
||||
x = torch.linspace(-math.pi, math.pi, 2000)
|
||||
y = torch.sin(x)
|
||||
|
||||
# Construct our model by instantiating the class defined above
|
||||
model = Polynomial3()
|
||||
|
||||
# Construct our loss function and an Optimizer. The call to model.parameters()
|
||||
# in the SGD constructor will contain the learnable parameters (defined
|
||||
# with torch.nn.Parameter) which are members of the model.
|
||||
criterion = torch.nn.MSELoss(reduction='sum')
|
||||
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)
|
||||
for t in range(2000):
|
||||
# Forward pass: Compute predicted y by passing x to the model
|
||||
y_pred = model(x)
|
||||
|
||||
# Compute and print loss
|
||||
loss = criterion(y_pred, y)
|
||||
if t % 100 == 99:
|
||||
print(t, loss.item())
|
||||
|
||||
# Zero gradients, perform a backward pass, and update the weights.
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
print(f'Result: {model.string()}')
|
||||
@ -0,0 +1,68 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import random
|
||||
import torch
|
||||
import math
|
||||
|
||||
|
||||
class DynamicNet(torch.nn.Module):
|
||||
def __init__(self):
|
||||
"""
|
||||
In the constructor we instantiate five parameters and assign them as members.
|
||||
"""
|
||||
super().__init__()
|
||||
self.a = torch.nn.Parameter(torch.randn(()))
|
||||
self.b = torch.nn.Parameter(torch.randn(()))
|
||||
self.c = torch.nn.Parameter(torch.randn(()))
|
||||
self.d = torch.nn.Parameter(torch.randn(()))
|
||||
self.e = torch.nn.Parameter(torch.randn(()))
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
For the forward pass of the model, we randomly choose either 4, 5
|
||||
and reuse the e parameter to compute the contribution of these orders.
|
||||
|
||||
Since each forward pass builds a dynamic computation graph, we can use normal
|
||||
Python control-flow operators like loops or conditional statements when
|
||||
defining the forward pass of the model.
|
||||
|
||||
Here we also see that it is perfectly safe to reuse the same parameter many
|
||||
times when defining a computational graph.
|
||||
"""
|
||||
y = self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
|
||||
for exp in range(4, random.randint(4, 6)):
|
||||
y = y + self.e * x ** exp
|
||||
return y
|
||||
|
||||
def string(self):
|
||||
"""
|
||||
Just like any class in Python, you can also define custom method on PyTorch modules
|
||||
"""
|
||||
return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'
|
||||
|
||||
|
||||
# Create Tensors to hold input and outputs.
|
||||
x = torch.linspace(-math.pi, math.pi, 2000)
|
||||
y = torch.sin(x)
|
||||
|
||||
# Construct our model by instantiating the class defined above
|
||||
model = DynamicNet()
|
||||
|
||||
# Construct our loss function and an Optimizer. Training this strange model with
|
||||
# vanilla stochastic gradient descent is tough, so we use momentum
|
||||
criterion = torch.nn.MSELoss(reduction='sum')
|
||||
optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9)
|
||||
for t in range(30000):
|
||||
# Forward pass: Compute predicted y by passing x to the model
|
||||
y_pred = model(x)
|
||||
|
||||
# Compute and print loss
|
||||
loss = criterion(y_pred, y)
|
||||
if t % 2000 == 1999:
|
||||
print(t, loss.item())
|
||||
|
||||
# Zero gradients, perform a backward pass, and update the weights.
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
print(f'Result: {model.string()}')
|
||||
Reference in New Issue
Block a user