Work in progress

2025-09-29 08:59:47 +02:00
parent 8a2a9d1064
commit b93df4af0f
26 changed files with 22986 additions and 0 deletions
--- a/PytorchTutorialCodes/1_linearregression_1.py
+++ b/PytorchTutorialCodes/1_linearregression_1.py
@ -0,0 +1,21 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Valódi mért adatok
+x_real = np.array([1,2,3,4,5,6,7,8,9,10])
+y_real = np.array([6,8,9,11,13,14,15,17,18,20])
+
+# Lineáris regresszió illesztése
+coeffs = np.polyfit(x_real, y_real, 1)  # 1. fokú polinom = egyenes     ||       Visszaad egy tömböt, ami az egyenes együtthatóit tartalmazza: [meredekség, tengelymetszet].
+print(coeffs)
+lin_y = coeffs[0]*x_real + coeffs[1]          # Kiszámolja az egyenes y értékeit minden x_real pontra.  y = mx + n
+
+# Ábrázolás
+plt.scatter(x_real, y_real, color='blue', label='Valódi mért pontok')
+plt.plot(x_real, lin_y, color='black', linestyle='--', label='Lineáris regresszió')
+plt.xlabel('Reklámba fektetett összeg (millió Ft)')
+plt.ylabel('Eladott jegyek (ezer db)')
+plt.title('Valódi adatok és lineáris regresszió')
+plt.legend()
+plt.grid(True)
+plt.show()
--- a/PytorchTutorialCodes/1_linearregression_2.py
+++ b/PytorchTutorialCodes/1_linearregression_2.py
@ -0,0 +1,29 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Együtthatók
+a, b, c, d = 1, -2, 3, -1
+
+# X értékek
+x = np.linspace(-2, 2, 400)
+# Harmadfokú polinom értékek
+y = a + b*x + c*x**2 + d*x**3
+
+# Válassz néhány pontot a polinomról
+x_points = np.linspace(-2, 2, 10)
+y_points = a + b*x_points + c*x_points**2 + d*x_points**3
+
+# Lineáris regresszió illesztése a pontokra
+coeffs = np.polyfit(x_points, y_points, 1)  # 1. fokú polinom = egyenes
+lin_y = coeffs[0]*x + coeffs[1]
+
+# Ábrázolás
+plt.plot(x, y, label='Harmadfokú polinom')
+plt.scatter(x_points, y_points, color='blue', marker='x', s=80, label='Polinom pontjai')
+plt.plot(x, lin_y, color='black', linestyle='--', label='Lineáris regresszió')
+plt.xlabel('x')
+plt.ylabel('y')
+plt.title('Polinom és lineáris regresszió')
+plt.legend()
+plt.grid(True)
+plt.show()
--- a/PytorchTutorialCodes/1_linearregression_3.py
+++ b/PytorchTutorialCodes/1_linearregression_3.py
@ -0,0 +1,52 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from numpy import roots
+
+# Együtthatók
+a, b, c, d = 1, -2, 3, -1
+
+# X értékek
+x = np.linspace(-2, 2, 400)
+
+# Polinom értékek
+y = a + b*x + c*x**2 + d*x**3
+
+# Gyökök (ahol f(x)=0)
+gyokok = roots([d, c, b, a])
+real_gyokok = gyokok[np.isreal(gyokok)].real
+
+# Első derivált: extrémumok
+# f'(x) = b + 2c*x + 3d*x**2
+extr_gyokok = roots([3*d, 2*c, b])
+real_extr = extr_gyokok[np.isreal(extr_gyokok)].real
+extr_y = a + b*real_extr + c*real_extr**2 + d*real_extr**3
+
+# Második derivált: inflexiós pont
+# f''(x) = 2c + 6d*x
+iflex_x = -2*c/(6*d)
+iflex_y = a + b*iflex_x + c*iflex_x**2 + d*iflex_x**3
+
+# Véletlenszerűen kiválasztott x pontok
+x_points = np.linspace(-2, 2, 8)
+y_points = a + b*x_points + c*x_points**2 + d*x_points**3
+
+# Lineáris regresszió illesztése
+coeffs = np.polyfit(x_points, y_points, 1)  # 1. fokú polinom = egyenes
+lin_y = coeffs[0]*x + coeffs[1]  # y = a*x + b
+
+
+
+plt.plot(x, y, label='Polinom')
+plt.scatter(real_gyokok, np.zeros_like(real_gyokok), color='red', label='Gyökök')
+plt.scatter(real_extr, extr_y, color='green', label='Extrémumok')
+plt.scatter(iflex_x, iflex_y, color='purple', label='Inflekciós pont')
+plt.scatter(0, a, color='orange', label='Y-tengely metszéspont')
+plt.scatter(x_points, y_points, color='blue', marker='x', s=80, label='Közelítendő pontok')
+plt.plot(x, lin_y, color='black', linestyle='--', label='Lineáris regresszió')
+plt.legend()
+plt.xlabel('x')
+plt.ylabel('f(x)')
+plt.title('Polinom, pontok és lineáris regresszió')
+plt.grid(True)
+plt.show()
+
--- a/PytorchTutorialCodes/2_pred_with_numpy_.py
+++ b/PytorchTutorialCodes/2_pred_with_numpy_.py
@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+import math
+
+# Create random input and output data
+x = np.linspace(-math.pi, math.pi, 2000)           #spaces between -pi and pi with 2000 points equally distributed
+y = np.sin(x)
+
+
+# Randomly initialize weights
+a = np.random.randn()
+b = np.random.randn()
+c = np.random.randn()
+d = np.random.randn()
+
+learning_rate = 1e-6
+for t in range(2000):
+    # Forward pass: compute predicted y
+    # y = a + b x + c x^2 + d x^3
+    y_pred = a + b * x + c * x ** 2 + d * x ** 3
+
+    # Compute and print loss
+    loss = np.square(y_pred - y).sum()
+    if t % 100 == 99:
+        print(t, loss)
+
+    # Backprop to compute gradients of a, b, c, d with respect to loss
+    grad_y_pred = 2.0 * (y_pred - y)
+    grad_a = grad_y_pred.sum()
+    grad_b = (grad_y_pred * x).sum()
+    grad_c = (grad_y_pred * x ** 2).sum()
+    grad_d = (grad_y_pred * x ** 3).sum()
+
+    # Update weights
+    a -= learning_rate * grad_a
+    b -= learning_rate * grad_b
+    c -= learning_rate * grad_c
+    d -= learning_rate * grad_d
+
+print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')
--- a/PytorchTutorialCodes/2_pred_with_numpy_2.py
+++ b/PytorchTutorialCodes/2_pred_with_numpy_2.py
@ -0,0 +1,37 @@
+import numpy as np
+import math
+import matplotlib.pyplot as plt
+
+# Adatok előállítása
+x = np.linspace(-math.pi, math.pi, 2000)
+y = np.sin(x)
+
+# Véletlen súlyok
+a = np.random.randn()
+b = np.random.randn()
+c = np.random.randn()
+d = np.random.randn()
+
+learning_rate = 1e-6
+for t in range(2000):
+    y_pred = a + b * x + c * x ** 2 + d * x ** 3
+    loss = np.square(y_pred - y).sum()
+    grad_y_pred = 2.0 * (y_pred - y)
+    grad_a = grad_y_pred.sum()
+    grad_b = (grad_y_pred * x).sum()
+    grad_c = (grad_y_pred * x ** 2).sum()
+    grad_d = (grad_y_pred * x ** 3).sum()
+    a -= learning_rate * grad_a
+    b -= learning_rate * grad_b
+    c -= learning_rate * grad_c
+    d -= learning_rate * grad_d
+
+# Ábrázolás
+plt.plot(x, y, label='Szinusz függvény')
+plt.plot(x, a + b * x + c * x ** 2 + d * x ** 3, label='Tanult polinom', linestyle='--')
+plt.xlabel('x')
+plt.ylabel('y')
+plt.title('Szinusz és tanult polinom összehasonlítása')
+plt.legend()
+plt.grid(True)
+plt.show()
--- a/PytorchTutorialCodes/2_pred_with_numpy_explanation.md
+++ b/PytorchTutorialCodes/2_pred_with_numpy_explanation.md
@ -0,0 +1,140 @@
+<img src="https://r2cdn.perplexity.ai/pplx-full-logo-primary-dark%402x.png" style="height:64px;margin-right:32px"/>
+
+# Kódrészlet magyarázata lépésről lépésre
+
+Ez a kód egy **egyszerű polinomiális regressziót** valósít meg Pythonban, Numpy segítségével, hogy egy szinuszhullámot közelítsen egy harmadfokú polinommal. Ez még nem PyTorch, hanem "kézzel" írt gépi tanulás, de a logika ugyanaz, mint amit PyTorch-ban is használnál.
+
+## 1. Adatok előállítása
+
+```python
+x = np.linspace(-math.pi, math.pi, 2000)
+y = np.sin(x)
+```
+
+- **x**: 2000 darab egyenletesen elosztott pont -π és π között.
+- **y**: minden x-hez kiszámolja a szinusz értékét. Ez lesz a "valódi" függvény, amit közelíteni akarunk.
+
+
+## 2. Súlyok (együtthatók) véletlenszerű inicializálása
+
+```python
+a = np.random.randn()
+b = np.random.randn()
+c = np.random.randn()
+d = np.random.randn()
+```
+
+- Ezek a polinom együtthatói: $y = a + b x + c x^2 + d x^3$
+- Kezdetben véletlen értékek, a tanulás során ezek fognak "tanulni".
+
+
+## 3. Tanulási ráta beállítása
+
+```python
+learning_rate = 1e-6
+```
+
+- Ez határozza meg, hogy a súlyok mennyit változnak minden lépésben.
+
+
+## 4. Tanítási ciklus (iterációk)
+
+```python
+for t in range(2000):
+    # ...
+```
+
+- 2000-szer ismétli a tanulási lépéseket.
+
+
+### 4.1. Előrehaladás (forward pass)
+
+```python
+y_pred = a + b * x + c * x ** 2 + d * x ** 3
+```
+
+- A jelenlegi súlyokkal kiszámolja a polinom értékét minden x-re.
+- Ez a "becsült" függvény, amit a tanulás során javítunk.
+
+
+### 4.2. Veszteség (loss) kiszámítása
+
+```python
+loss = np.square(y_pred - y).sum()
+```
+
+- Megméri, mennyire tér el a becsült érték a valódi szinusz értéktől.
+- A négyzetes eltérések összegét számolja (ez a **Mean Squared Error** logikája).
+- Minden 100. lépésnél kiírja az aktuális veszteséget.
+
+
+### 4.3. Visszaterjesztés (backpropagation) – gradiens számítás
+
+```python
+grad_y_pred = 2.0 * (y_pred - y)
+grad_a = grad_y_pred.sum()
+grad_b = (grad_y_pred * x).sum()
+grad_c = (grad_y_pred * x ** 2).sum()
+grad_d = (grad_y_pred * x ** 3).sum()
+```
+
+- Kiszámolja, hogy a veszteség hogyan változik, ha az egyes együtthatókat kicsit módosítjuk (ez a **gradiens**).
+- Ez mutatja meg, milyen irányba kell "tolni" az együtthatókat, hogy a veszteség csökkenjen.
+
+
+### 4.4. Súlyok frissítése (gradient descent)
+
+```python
+a -= learning_rate * grad_a
+b -= learning_rate * grad_b
+c -= learning_rate * grad_c
+d -= learning_rate * grad_d
+```
+
+- Minden együtthatót módosít a gradiens irányába, a tanulási ráta mértékével.
+- Így a polinom egyre jobban fogja közelíteni a szinusz görbét.
+
+
+## 5. Eredmény kiírása
+
+```python
+print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')
+```
+
+- A tanulás végén kiírja a megtanult polinom együtthatóit.
+- Ezekkel az értékekkel a polinomod a lehető legjobban közelíti a szinusz hullámot a megadott x tartományban.
+
+***
+
+## Mit kellene látnod eredményként?
+
+- A veszteség (loss) értéke a tanulás során **folyamatosan csökken** – ez azt jelenti, hogy a polinom egyre jobban közelíti a szinusz hullámot.
+- A végén kapsz egy polinomot, aminek az együtthatói (a, b, c, d) már "megtanulták" a szinusz hullám alakját.
+- Ha ábrázolod a $y$ (szinusz) és $y_{pred}$ (polinom) görbéket, a tanulás végén a két görbe nagyon hasonló lesz.
+
+***
+
+## Lépések összefoglalva
+
+1. **Adatok előállítása**: x és y (szinusz értékek)
+2. **Súlyok inicializálása**: a, b, c, d
+3. **Tanulási ciklus**:
+    - Előrehaladás: polinom kiszámítása
+    - Veszteség: eltérés mérése
+    - Gradiens: hogyan változtassunk a súlyokon
+    - Súlyok frissítése: tanulás
+4. **Eredmény**: megtanult polinom együtthatók
+
+***
+
+Ha szeretnéd, segítek abban is, hogyan lehet ezt PyTorch-ban automatizálni, vagy hogyan ábrázolhatod a tanulás eredményét grafikonon!
+<span style="display:none">[^1][^2][^3]</span>
+
+<div align="center">⁂</div>
+
+[^1]: https://www.youtube.com/watch?v=oKNIr-iCb6g
+
+[^2]: https://learn.microsoft.com/hu-hu/training/modules/understand-regression-machine-learning/
+
+[^3]: https://www.youtube.com/watch?v=YAJ5XBwlN4o
+
--- a/PytorchTutorialCodes/3_pred_with_pytorch.py
+++ b/PytorchTutorialCodes/3_pred_with_pytorch.py
@ -0,0 +1,44 @@
+import torch
+import math
+
+# Set up PyTorch data type and device (CPU or GPU)
+dtype = torch.float
+device = torch.device("cpu")
+# device = torch.device("cuda:0") # Uncomment to use GPU if available
+
+# 1. Generate input data (x) in [-π, π] and corresponding target output (y = sin(x))
+x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
+y = torch.sin(x)  # This is what we want to approximate!
+
+# 2. Randomly initialize the weights (polynomial coefficients)
+a = torch.randn((), device=device, dtype=dtype)
+b = torch.randn((), device=device, dtype=dtype)
+c = torch.randn((), device=device, dtype=dtype)
+d = torch.randn((), device=device, dtype=dtype)
+
+learning_rate = 1e-6
+for t in range(2000):
+    # 3. Forward pass: compute predicted y using the current coefficients
+    #     y_pred = a + b*x + c*x^2 + d*x^3  (a cubic polynomial)
+    y_pred = a + b * x + c * x ** 2 + d * x ** 3
+
+    # 4. Compute loss: sum of squared differences between prediction and true values
+    #    (This is called the "Mean Squared Error" loss, except without the mean)
+    loss = (y_pred - y).pow(2).sum().item()
+    if t % 100 == 99:
+        print(t, loss)
+
+    # 5. Manually compute gradients for each weight
+    grad_y_pred = 2.0 * (y_pred - y)                 # Derivative of loss w.r.t. y_pred
+    grad_a = grad_y_pred.sum()                       # Derivative for a
+    grad_b = (grad_y_pred * x).sum()                 # Derivative for b
+    grad_c = (grad_y_pred * x ** 2).sum()            # Derivative for c
+    grad_d = (grad_y_pred * x ** 3).sum()            # Derivative for d
+
+    # 6. Update each weight by taking a small step in the opposite direction of the gradient
+    a -= learning_rate * grad_a
+    b -= learning_rate * grad_b
+    c -= learning_rate * grad_c
+    d -= learning_rate * grad_d
+
+print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')
--- a/PytorchTutorialCodes/4_autograd_in_pytorch.py
+++ b/PytorchTutorialCodes/4_autograd_in_pytorch.py
@ -0,0 +1,61 @@
+import torch
+import math
+
+# We want to be able to train our model on an `accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
+# such as CUDA, MPS, MTIA, or XPU. If the current accelerator is available, we will use it. Otherwise, we use the CPU.
+
+dtype = torch.float
+device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
+print(f"Using {device} device")
+torch.set_default_device(device)
+
+# Create Tensors to hold input and outputs.
+# By default, requires_grad=False, which indicates that we do not need to
+# compute gradients with respect to these Tensors during the backward pass.
+x = torch.linspace(-math.pi, math.pi, 2000, dtype=dtype)
+y = torch.sin(x)
+
+# Create random Tensors for weights. For a third order polynomial, we need
+# 4 weights: y = a + b x + c x^2 + d x^3
+# Setting requires_grad=True indicates that we want to compute gradients with
+# respect to these Tensors during the backward pass.
+a = torch.randn((), dtype=dtype, requires_grad=True)
+b = torch.randn((), dtype=dtype, requires_grad=True)
+c = torch.randn((), dtype=dtype, requires_grad=True)
+d = torch.randn((), dtype=dtype, requires_grad=True)
+print(f"a = {a.item()}, b = {b.item()}, c = {c.item()}, d = {d.item()}")
+
+learning_rate = 1e-6
+for t in range(2000):
+    # Forward pass: compute predicted y using operations on Tensors.
+    y_pred = a + b * x + c * x ** 2 + d * x ** 3
+
+    # Compute and print loss using operations on Tensors.
+    # Now loss is a Tensor of shape (1,)
+    # loss.item() gets the scalar value held in the loss.
+    loss = (y_pred - y).pow(2).sum()
+    if t % 100 == 99:
+        print(t, loss.item())
+
+    # Use autograd to compute the backward pass. This call will compute the
+    # gradient of loss with respect to all Tensors with requires_grad=True.
+    # After this call a.grad, b.grad. c.grad and d.grad will be Tensors holding
+    # the gradient of the loss with respect to a, b, c, d respectively.
+    loss.backward()
+    
+    # Manually update weights using gradient descent. Wrap in torch.no_grad()
+    # because weights have requires_grad=True, but we don't need to track this
+    # in autograd.
+    with torch.no_grad():
+        a -= learning_rate * a.grad
+        b -= learning_rate * b.grad
+        c -= learning_rate * c.grad
+        d -= learning_rate * d.grad
+
+        # Manually zero the gradients after updating weights
+        a.grad = None
+        b.grad = None
+        c.grad = None
+        d.grad = None
+
+print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')
--- a/PytorchTutorialCodes/5_define_autograd_function.py
+++ b/PytorchTutorialCodes/5_define_autograd_function.py
@ -0,0 +1,87 @@
+import torch
+import math
+
+
+class LegendrePolynomial3(torch.autograd.Function):
+    """
+    We can implement our own custom autograd Functions by subclassing
+    torch.autograd.Function and implementing the forward and backward passes
+    which operate on Tensors.
+    """
+
+    @staticmethod
+    def forward(ctx, input):
+        """
+        In the forward pass we receive a Tensor containing the input and return
+        a Tensor containing the output. ctx is a context object that can be used
+        to stash information for backward computation. You can cache tensors for
+        use in the backward pass using the ``ctx.save_for_backward`` method. Other
+        objects can be stored directly as attributes on the ctx object, such as
+        ``ctx.my_object = my_object``. Check out `Extending torch.autograd <https://docs.pytorch.org/docs/stable/notes/extending.html#extending-torch-autograd>`_
+        for further details.
+        """
+        ctx.save_for_backward(input)
+        return 0.5 * (5 * input ** 3 - 3 * input)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        """
+        In the backward pass we receive a Tensor containing the gradient of the loss
+        with respect to the output, and we need to compute the gradient of the loss
+        with respect to the input.
+        """
+        input, = ctx.saved_tensors
+        return grad_output * 1.5 * (5 * input ** 2 - 1)
+
+
+dtype = torch.float
+device = torch.device("cpu")
+# device = torch.device("cuda:0")  # Uncomment this to run on GPU
+
+# Create Tensors to hold input and outputs.
+# By default, requires_grad=False, which indicates that we do not need to
+# compute gradients with respect to these Tensors during the backward pass.
+x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
+y = torch.sin(x)
+
+# Create random Tensors for weights. For this example, we need
+# 4 weights: y = a + b * P3(c + d * x), these weights need to be initialized
+# not too far from the correct result to ensure convergence.
+# Setting requires_grad=True indicates that we want to compute gradients with
+# respect to these Tensors during the backward pass.
+a = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
+b = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True)
+c = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
+d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)
+
+learning_rate = 5e-6
+for t in range(2000):
+    # To apply our Function, we use Function.apply method. We alias this as 'P3'.
+    P3 = LegendrePolynomial3.apply
+
+    # Forward pass: compute predicted y using operations; we compute
+    # P3 using our custom autograd operation.
+    y_pred = a + b * P3(c + d * x)
+
+    # Compute and print loss
+    loss = (y_pred - y).pow(2).sum()
+    if t % 100 == 99:
+        print(t, loss.item())
+
+    # Use autograd to compute the backward pass.
+    loss.backward()
+
+    # Update weights using gradient descent
+    with torch.no_grad():
+        a -= learning_rate * a.grad
+        b -= learning_rate * b.grad
+        c -= learning_rate * c.grad
+        d -= learning_rate * d.grad
+
+        # Manually zero the gradients after updating weights
+        a.grad = None
+        b.grad = None
+        c.grad = None
+        d.grad = None
+
+print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)')
--- a/PytorchTutorialCodes/5_define_autograd_function_explanation.md
+++ b/PytorchTutorialCodes/5_define_autograd_function_explanation.md
@ -0,0 +1,94 @@
+<img src="https://r2cdn.perplexity.ai/pplx-full-logo-primary-dark%402x.png" style="height:64px;margin-right:32px"/>
+
+# Custom autograd function in PyTorch: Step-by-step explanation
+
+Let's break down what happens in your code, especially how you get the gradient numbers, what they mean, and how PyTorch's autograd system works when you define your own function.
+
+***
+
+## 1. What is a custom autograd function?
+
+- In PyTorch, you can create your own mathematical operation and tell PyTorch **how to compute its gradient** (how it changes with respect to its input).
+- You do this by subclassing `torch.autograd.Function` and implementing two methods:
+    - `forward`: computes the output from the input (normal math)
+    - `backward`: computes the gradient of the output with respect to the input (how the output changes if you nudge the input)
+
+***
+
+## 2. How do you get the gradient numbers?
+
+- **Forward pass:** You calculate the output for your function. Here, it's the Legendre polynomial:
+
+$$
+P_3(x) = \frac{1}{2}(5x^3 - 3x)
+$$
+- **Backward pass:** You tell PyTorch the formula for the derivative of your function with respect to its input. For Legendre polynomial:
+
+$$
+\frac{dP_3}{dx} = \frac{1}{2}(15x^2 - 3) = 1.5(5x^2 - 1)
+$$
+- When you call `loss.backward()`, PyTorch uses your `backward` method to calculate how much the loss would change if you changed the input a little bit. This is the **gradient**.
+- PyTorch automatically chains these gradients through all operations in your model, so you get the gradient of the loss with respect to every parameter (a, b, c, d).
+
+***
+
+## 3. What do these numbers mean?
+
+- The gradient for each parameter (e.g., `a.grad`, `b.grad`, etc.) tells you **how much the loss would change if you nudged that parameter up or down**.
+- If the gradient is large and positive, increasing the parameter will increase the loss (bad), so you want to decrease it.
+- If the gradient is large and negative, increasing the parameter will decrease the loss (good), so you want to increase it.
+- You use these numbers to update your parameters in the direction that makes the loss smaller (gradient descent).
+
+***
+
+## 4. How does PyTorch get these numbers?
+
+- During the forward pass, PyTorch builds a **computation graph** of all operations.
+- When you call `loss.backward()`, PyTorch walks backward through this graph, using the `backward` methods you defined (and built-in ones for standard operations), applying the chain rule to compute all gradients.
+- The numbers you see in `.grad` are the result of this process: they are the derivatives of the loss with respect to each parameter.
+
+***
+
+## 5. Step-by-step in your code
+
+1. **Forward pass:**
+    - You compute `y_pred = a + b * P3(c + d * x)` using your custom Legendre polynomial function.
+2. **Loss calculation:**
+    - You compute the loss as the sum of squared errors between `y_pred` and the true `y`.
+3. **Backward pass:**
+    - You call `loss.backward()`. PyTorch uses your custom `backward` method to compute the gradients for all parameters.
+4. **Update parameters:**
+    - You use the gradients to update `a`, `b`, `c`, and `d` so the loss gets smaller.
+
+***
+
+## 6. Summary
+
+- **Custom autograd functions** let you define both the math and the gradient for new operations.
+- **Gradients** are numbers that tell you how to change each parameter to make the loss smaller.
+- **PyTorch computes these automatically** using your `backward` method and the chain rule.
+- You use these gradients to update your parameters and train your model.
+
+If you want, I can walk through a concrete example with numbers, or show how the gradients change during training!
+<span style="display:none">[^1][^2][^3][^4][^5][^6][^7][^8][^9]</span>
+
+<div align="center">⁂</div>
+
+[^1]: https://docs.pytorch.org/tutorials/beginner/examples_autograd/polynomial_custom_function.html
+
+[^2]: https://apxml.com/courses/advanced-pytorch/chapter-1-pytorch-internals-autograd/custom-autograd-functions
+
+[^3]: https://docs.pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html
+
+[^4]: https://brsoff.github.io/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html
+
+[^5]: https://ghamrouni.github.io/stn-tuto/beginner/examples_autograd/two_layer_net_custom_function.html
+
+[^6]: https://docs.pytorch.org/tutorials/beginner/pytorch_with_examples.html
+
+[^7]: https://docs.pytorch.org/tutorials/intermediate/custom_function_double_backward_tutorial.html
+
+[^8]: https://www.kaggle.com/code/peggy1502/learning-pytorch-2-new-autograd-functions
+
+[^9]: https://stackoverflow.com/questions/54586938/how-to-wrap-pytorch-functions-and-implement-autograd
+
--- a/PytorchTutorialCodes/6_pytorch_nn_module.py
+++ b/PytorchTutorialCodes/6_pytorch_nn_module.py
@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+import torch
+import math
+
+# Create Tensors to hold input and outputs.
+x = torch.linspace(-math.pi, math.pi, 2000)
+y = torch.sin(x)
+
+# For this example, the output y is a linear function of (x, x^2, x^3), so
+# we can consider it as a linear layer neural network. Let's prepare the
+# tensor (x, x^2, x^3).
+p = torch.tensor([1, 2, 3])
+xx = x.unsqueeze(-1).pow(p)
+
+# In the above code, x.unsqueeze(-1) has shape (2000, 1), and p has shape
+# (3,), for this case, broadcasting semantics will apply to obtain a tensor
+# of shape (2000, 3) 
+
+# Use the nn package to define our model as a sequence of layers. nn.Sequential
+# is a Module which contains other Modules, and applies them in sequence to
+# produce its output. The Linear Module computes output from input using a
+# linear function, and holds internal Tensors for its weight and bias.
+# The Flatten layer flatens the output of the linear layer to a 1D tensor,
+# to match the shape of `y`.
+model = torch.nn.Sequential(
+    torch.nn.Linear(3, 1),
+    torch.nn.Flatten(0, 1)
+)
+
+# The nn package also contains definitions of popular loss functions; in this
+# case we will use Mean Squared Error (MSE) as our loss function.
+loss_fn = torch.nn.MSELoss(reduction='sum')
+
+learning_rate = 1e-6
+for t in range(2000):
+
+    # Forward pass: compute predicted y by passing x to the model. Module objects
+    # override the __call__ operator so you can call them like functions. When
+    # doing so you pass a Tensor of input data to the Module and it produces
+    # a Tensor of output data.
+    y_pred = model(xx)
+
+    # Compute and print loss. We pass Tensors containing the predicted and true
+    # values of y, and the loss function returns a Tensor containing the
+    # loss.
+    loss = loss_fn(y_pred, y)
+    if t % 100 == 99:
+        print(t, loss.item())
+
+    # Zero the gradients before running the backward pass.
+    model.zero_grad()
+
+    # Backward pass: compute gradient of the loss with respect to all the learnable
+    # parameters of the model. Internally, the parameters of each Module are stored
+    # in Tensors with requires_grad=True, so this call will compute gradients for
+    # all learnable parameters in the model.
+    loss.backward()
+
+    # Update the weights using gradient descent. Each parameter is a Tensor, so
+    # we can access its gradients like we did before.
+    with torch.no_grad():
+        for param in model.parameters():
+            param -= learning_rate * param.grad
+
+# You can access the first layer of `model` like accessing the first item of a list
+linear_layer = model[0]
+
+# For linear layer, its parameters are stored as `weight` and `bias`.
+print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')
--- a/PytorchTutorialCodes/6_pytorch_nn_module_explanation.md
+++ b/PytorchTutorialCodes/6_pytorch_nn_module_explanation.md
@ -0,0 +1,128 @@
+<img src="https://r2cdn.perplexity.ai/pplx-full-logo-primary-dark%402x.png" style="height:64px;margin-right:32px"/>
+
+# What does this code do? Step-by-step explanation
+
+You're absolutely right in your intuition! This time, you're using a **neural network** to learn the relationship between $x$ and $\sin(x)$ _without giving it the explicit formula_. Instead, you give the network lots of example data (inputs and outputs), and let it figure out the best way to predict $y$ from $x$.
+
+Let's walk through every step of the code and what happens:
+
+***
+
+## 1. **Create input and output data**
+
+```python
+x = torch.linspace(-math.pi, math.pi, 2000)
+y = torch.sin(x)
+```
+
+- **x**: 2000 evenly spaced points from $-\pi$ to $\pi$.
+- **y**: The true output, $\sin(x)$, for each input $x$.
+- This is your "training data"—pairs of (input, output) for the network to learn from.
+
+***
+
+## 2. **Prepare features for the model**
+
+```python
+p = torch.tensor([1, 2, 3])
+xx = x.unsqueeze(-1).pow(p)
+```
+
+- You create a new tensor $xx$ where each row is $[x, x^2, x^3]$.
+- This means the network will get three features for each input: $x$, $x^2$, and $x^3$.
+- This helps the network learn more complex (curvy) relationships than just a straight line.
+
+***
+
+## 3. **Define the neural network model**
+
+```python
+model = torch.nn.Sequential(
+    torch.nn.Linear(3, 1),
+    torch.nn.Flatten(0, 1)
+)
+```
+
+- **torch.nn.Linear(3, 1)**: A single layer that takes 3 inputs ($x, x^2, x^3$) and outputs 1 value (prediction for $y$).
+- **torch.nn.Flatten(0, 1)**: Flattens the output to match the shape of $y$.
+- This is a very simple neural network (just one layer), but it's enough for this regression task.
+
+***
+
+## 4. **Define the loss function**
+
+```python
+loss_fn = torch.nn.MSELoss(reduction='sum')
+```
+
+- **Mean Squared Error (MSE)**: Measures how far off the predictions are from the true values.
+- The goal is to make this loss as small as possible during training.
+
+***
+
+## 5. **Training loop**
+
+```python
+for t in range(2000):
+    y_pred = model(xx)
+    loss = loss_fn(y_pred, y)
+    if t % 100 == 99:
+        print(t, loss.item())
+    model.zero_grad()
+    loss.backward()
+    with torch.no_grad():
+        for param in model.parameters():
+            param -= learning_rate * param.grad
+```
+
+- **Forward pass**: The model predicts $y$ for each input $xx$.
+- **Compute loss**: How far off are the predictions from the true $y$?
+- **Zero gradients**: Clear old gradients before computing new ones.
+- **Backward pass**: Compute gradients (how to change weights to reduce loss).
+- **Update weights**: Adjust the model's weights using gradient descent.
+- This loop repeats 2000 times, gradually improving the model's predictions.
+
+***
+
+## 6. **Result: Print the learned polynomial**
+
+```python
+linear_layer = model[^0]
+print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')
+```
+
+- After training, you print the final weights and bias.
+- This gives you the best-fit polynomial the network found to approximate $\sin(x)$ using the data.
+
+***
+
+## **Summary: What is happening?**
+
+- You give the network lots of example data ($x$, $\sin(x)$).
+- The network tries to find the best weights to predict $y$ from $x$, $x^2$, and $x^3$.
+- You don't tell it the formula for $\sin(x)$—it "learns" the best approximation from the data.
+- This is the core idea of neural networks: **learn patterns from data, not from explicit formulas**.
+
+If you want to use a more complex neural network (with hidden layers and nonlinear activations), you can! That would let the model learn even more complicated relationships, and is the next step in deep learning.
+
+Let me know if you want to see how to do that, or want to visualize the predictions vs. the true values!
+<span style="display:none">[^1][^2][^3][^4][^5][^6][^7][^8]</span>
+
+<div align="center">⁂</div>
+
+[^1]: https://docs.pytorch.org/tutorials/beginner/pytorch_with_examples.html
+
+[^2]: https://discuss.pytorch.org/t/neural-net-to-approximate-a-sine-function/101647
+
+[^3]: https://mycoding.uk/a/pytorch_-_neural_network_for_simple_regression_analysis.html
+
+[^4]: https://colab.research.google.com/github/lcharlin/80-629/blob/master/week6-RNNs+CNNs/RNNs_Questions.ipynb
+
+[^5]: https://code-first-ml.github.io/book1/notebooks/neural_networks/2018-01-13-denoising.html
+
+[^6]: https://www.youtube.com/watch?v=RSjB5kNHKXc
+
+[^7]: https://discuss.pytorch.org/t/approximating-sine-function-using-neural-network/65875
+
+[^8]: https://stackoverflow.com/questions/13897316/approximating-the-sine-function-with-a-neural-network
+
--- a/PytorchTutorialCodes/7_pytorch_optim.py
+++ b/PytorchTutorialCodes/7_pytorch_optim.py
@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+import torch
+import math
+
+
+# Create Tensors to hold input and outputs.
+x = torch.linspace(-math.pi, math.pi, 2000)
+y = torch.sin(x)
+
+# Prepare the input tensor (x, x^2, x^3).
+p = torch.tensor([1, 2, 3])
+xx = x.unsqueeze(-1).pow(p)
+
+# Use the nn package to define our model and loss function.
+model = torch.nn.Sequential(
+    torch.nn.Linear(3, 1),
+    torch.nn.Flatten(0, 1)
+)
+loss_fn = torch.nn.MSELoss(reduction='sum')
+
+# Use the optim package to define an Optimizer that will update the weights of
+# the model for us. Here we will use RMSprop; the optim package contains many other
+# optimization algorithms. The first argument to the RMSprop constructor tells the
+# optimizer which Tensors it should update.
+learning_rate = 1e-3
+optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
+for t in range(2000):
+    # Forward pass: compute predicted y by passing x to the model.
+    y_pred = model(xx)
+
+    # Compute and print loss.
+    loss = loss_fn(y_pred, y)
+    if t % 100 == 99:
+        print(t, loss.item())
+
+    # Before the backward pass, use the optimizer object to zero all of the
+    # gradients for the variables it will update (which are the learnable
+    # weights of the model). This is because by default, gradients are
+    # accumulated in buffers( i.e, not overwritten) whenever .backward()
+    # is called. Checkout docs of torch.autograd.backward for more details.
+    optimizer.zero_grad()
+
+    # Backward pass: compute gradient of the loss with respect to model
+    # parameters
+    loss.backward()
+
+    # Calling the step function on an Optimizer makes an update to its
+    # parameters
+    optimizer.step()
+
+
+linear_layer = model[0]
+print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')
--- a/PytorchTutorialCodes/8_pytorch_custom_nn_module.py
+++ b/PytorchTutorialCodes/8_pytorch_custom_nn_module.py
@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+import torch
+import math
+
+
+class Polynomial3(torch.nn.Module):
+    def __init__(self):
+        """
+        In the constructor we instantiate four parameters and assign them as
+        member parameters.
+        """
+        super().__init__()
+        self.a = torch.nn.Parameter(torch.randn(()))
+        self.b = torch.nn.Parameter(torch.randn(()))
+        self.c = torch.nn.Parameter(torch.randn(()))
+        self.d = torch.nn.Parameter(torch.randn(()))
+
+    def forward(self, x):
+        """
+        In the forward function we accept a Tensor of input data and we must return
+        a Tensor of output data. We can use Modules defined in the constructor as
+        well as arbitrary operators on Tensors.
+        """
+        return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
+
+    def string(self):
+        """
+        Just like any class in Python, you can also define custom method on PyTorch modules
+        """
+        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'
+
+
+# Create Tensors to hold input and outputs.
+x = torch.linspace(-math.pi, math.pi, 2000)
+y = torch.sin(x)
+
+# Construct our model by instantiating the class defined above
+model = Polynomial3()
+
+# Construct our loss function and an Optimizer. The call to model.parameters()
+# in the SGD constructor will contain the learnable parameters (defined 
+# with torch.nn.Parameter) which are members of the model.
+criterion = torch.nn.MSELoss(reduction='sum')
+optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)
+for t in range(2000):
+    # Forward pass: Compute predicted y by passing x to the model
+    y_pred = model(x)
+
+    # Compute and print loss
+    loss = criterion(y_pred, y)
+    if t % 100 == 99:
+        print(t, loss.item())
+
+    # Zero gradients, perform a backward pass, and update the weights.
+    optimizer.zero_grad()
+    loss.backward()
+    optimizer.step()
+
+print(f'Result: {model.string()}')
--- a/PytorchTutorialCodes/9_pytorch_control_flow__weight_sharing.py
+++ b/PytorchTutorialCodes/9_pytorch_control_flow__weight_sharing.py
@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+import random
+import torch
+import math
+
+
+class DynamicNet(torch.nn.Module):
+    def __init__(self):
+        """
+        In the constructor we instantiate five parameters and assign them as members.
+        """
+        super().__init__()
+        self.a = torch.nn.Parameter(torch.randn(()))
+        self.b = torch.nn.Parameter(torch.randn(()))
+        self.c = torch.nn.Parameter(torch.randn(()))
+        self.d = torch.nn.Parameter(torch.randn(()))
+        self.e = torch.nn.Parameter(torch.randn(()))
+
+    def forward(self, x):
+        """
+        For the forward pass of the model, we randomly choose either 4, 5
+        and reuse the e parameter to compute the contribution of these orders.
+
+        Since each forward pass builds a dynamic computation graph, we can use normal
+        Python control-flow operators like loops or conditional statements when
+        defining the forward pass of the model.
+
+        Here we also see that it is perfectly safe to reuse the same parameter many
+        times when defining a computational graph.
+        """
+        y = self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
+        for exp in range(4, random.randint(4, 6)):
+            y = y + self.e * x ** exp
+        return y
+
+    def string(self):
+        """
+        Just like any class in Python, you can also define custom method on PyTorch modules
+        """
+        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'
+
+
+# Create Tensors to hold input and outputs.
+x = torch.linspace(-math.pi, math.pi, 2000)
+y = torch.sin(x)
+
+# Construct our model by instantiating the class defined above
+model = DynamicNet()
+
+# Construct our loss function and an Optimizer. Training this strange model with
+# vanilla stochastic gradient descent is tough, so we use momentum
+criterion = torch.nn.MSELoss(reduction='sum')
+optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9)
+for t in range(30000):
+    # Forward pass: Compute predicted y by passing x to the model
+    y_pred = model(x)
+
+    # Compute and print loss
+    loss = criterion(y_pred, y)
+    if t % 2000 == 1999:
+        print(t, loss.item())
+
+    # Zero gradients, perform a backward pass, and update the weights.
+    optimizer.zero_grad()
+    loss.backward()
+    optimizer.step()
+
+print(f'Result: {model.string()}')