Files
2024-09-21 11:20:44 +02:00

2809 lines
65 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## [pandas](https://pandas.pydata.org/)\n",
"\n",
"- A pandas egy NumPy-ra épülő adatfeldolgozó és elemző eszköz. Alapötleteit az R nyelvből vette.\n",
"- Alapvető adattípusa a DataFrame (tábla) és a Series (oszlop). Segítségükkel memóriabeli, oszlopalapú adatbázis kezelés valósítható meg.\n",
"- https://pandas.pydata.org/docs/user_guide/10min.html."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# A pandas modul importálása pd néven.\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'1.5.3'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# A pandas verziószáma.\n",
"pd.__version__"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# DataFrame létrehozása oszlopokból.\n",
"# A bemenet egy szótár, ahol a kulcsok az oszlopnevek, az értékek az oszlopok.\n",
"df1 = pd.DataFrame({'aa': [1, 2, 3], 'bb': ['x', 'y', 'z']})"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>aa</th>\n",
" <th>bb</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>x</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>z</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" aa bb\n",
"0 1 x\n",
"1 2 y\n",
"2 3 z"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"pandas.core.frame.DataFrame"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Az eredmény típusa.\n",
"type(df1)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['aa', 'bb'], dtype='object')"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Oszlopnevek.\n",
"df1.columns"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"aa\n",
"bb\n"
]
}
],
"source": [
"# Iterálás az oszlopneveken.\n",
"for c in df1:\n",
" print(c)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Sorok száma.\n",
"len(df1)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(3, 2)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# A DataFrame alakja.\n",
"df1.shape"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 3 entries, 0 to 2\n",
"Data columns (total 2 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 aa 3 non-null int64 \n",
" 1 bb 3 non-null object\n",
"dtypes: int64(1), object(1)\n",
"memory usage: 180.0+ bytes\n"
]
}
],
"source": [
"# Összesítő információ.\n",
"df1.info()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>aa</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>2.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>3.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" aa\n",
"count 3.0\n",
"mean 2.0\n",
"std 1.0\n",
"min 1.0\n",
"25% 1.5\n",
"50% 2.0\n",
"75% 2.5\n",
"max 3.0"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Alapvető oszlopstatisztikák (a numerikus oszlopokról).\n",
"df1.describe()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# DataFrame létrehozása sorokból.\n",
"# A bemenet szótárak listája, ahol minden szótár egy sort reprezentál.\n",
"data = [\n",
" {'alma': 10, 'körte': 20},\n",
" {'alma': 30},\n",
" {'alma': 40, 'körte': 50}\n",
"]\n",
"df2 = pd.DataFrame(data)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>alma</th>\n",
" <th>körte</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>20.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>30</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>40</td>\n",
" <td>50.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" alma körte\n",
"0 10 20.0\n",
"1 30 NaN\n",
"2 40 50.0"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- A pandas a hiányzó adatokat NaN (\"not a number\") értékkel reprezentálja.\n",
"- Ez hatékony, de vannak veszélyei."
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"np.nan == np.nan"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RangeIndex(start=0, stop=3, step=1)"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Minden DataFrame-hez (és Series-hez) tartozik index.\n",
"# Alapértelmezés szerint az index 0-tól induló, 1-esével növekedő sorszám.\n",
"df2.index"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>alma</th>\n",
" <th>körte</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>x</th>\n",
" <td>10</td>\n",
" <td>20.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>y</th>\n",
" <td>30</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>z</th>\n",
" <td>40</td>\n",
" <td>50.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" alma körte\n",
"x 10 20.0\n",
"y 30 NaN\n",
"z 40 50.0"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Másfajta indexet is használhatunk.\n",
"df3 = pd.DataFrame(data, ['x', 'y', 'z'])\n",
"df3"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['x', 'y', 'z'], dtype='object')"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3.index"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 20\n",
"1 30\n",
"2 40\n",
"dtype: int64"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Series létrehozása index megadása nélkül.\n",
"se1 = pd.Series([20, 30, 40])\n",
"se1"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"pandas.core.series.Series"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Az eredmény típusa.\n",
"type(se1)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dtype('int64')"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"se1.dtype"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"aa 20\n",
"bb 30\n",
"cc 40\n",
"dtype: int64"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Series létrehozása index megadásával.\n",
"se2 = pd.Series([20, 30, 40], ['aa', 'bb', 'cc'])\n",
"se2"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>aa</th>\n",
" <th>bb</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>x</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>z</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" aa bb\n",
"0 1 x\n",
"1 2 y\n",
"2 3 z"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# DataFrame-ből [] operátorral lehet kiválasztani oszlopot.\n",
"df1"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 1\n",
"1 2\n",
"2 3\n",
"Name: aa, dtype: int64"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1['aa']"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"pandas.core.series.Series"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(df1['aa'])"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 1\n",
"1 2\n",
"2 3\n",
"Name: aa, dtype: int64"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# ...illetve ha az oszlop neve érvényes azonosítónév, akkor . operátorral is.\n",
"df1.aa"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>bb</th>\n",
" <th>aa</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>x</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>y</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>z</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" bb aa\n",
"0 x 1\n",
"1 y 2\n",
"2 z 3"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Több oszlop kiválasztása.\n",
"df1[['bb', 'aa']]"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>bb</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>x</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>z</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" bb\n",
"0 x\n",
"1 y\n",
"2 z"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Ha 1 elemű listával indexelünk, akkor az eredmény DataFrame típusú.\n",
"df1[['bb']]"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"pandas.core.frame.DataFrame"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(df1[['bb']])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Megjegyzés\n",
"- Az 1 oszlopos DataFrame nem ugyanaz, mint a Series!"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>alma</th>\n",
" <th>körte</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>x</th>\n",
" <td>10</td>\n",
" <td>20.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>y</th>\n",
" <td>30</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>z</th>\n",
" <td>40</td>\n",
" <td>50.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" alma körte\n",
"x 10 20.0\n",
"y 30 NaN\n",
"z 40 50.0"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Sor(ok) kiválasztása DataFrame-ből.\n",
"df3"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"alma 10.0\n",
"körte 20.0\n",
"Name: x, dtype: float64"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Egy sor kiválasztása.\n",
"df3.loc['x']"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>alma</th>\n",
" <th>körte</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>x</th>\n",
" <td>10</td>\n",
" <td>20.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>z</th>\n",
" <td>40</td>\n",
" <td>50.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" alma körte\n",
"x 10 20.0\n",
"z 40 50.0"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Több sor kiválasztása.\n",
"df3.loc[['x', 'z']]"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"alma 10.0\n",
"körte 20.0\n",
"Name: x, dtype: float64"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# ...pozíció alapján is lehet sort kiválasztani.\n",
"df3.iloc[0]"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>alma</th>\n",
" <th>körte</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>x</th>\n",
" <td>10</td>\n",
" <td>20.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>y</th>\n",
" <td>30</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" alma körte\n",
"x 10 20.0\n",
"y 30 NaN"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# ...első néhány elem kiválasztása.\n",
"df3[:2]"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"aa 20\n",
"bb 30\n",
"cc 40\n",
"dtype: int64"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# A se2 tartalma (mert ezt fogjuk használni az alábbi néhány példában).\n",
"se2"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"30"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Egy elem kiválasztása Series-ből.\n",
"se2['bb']"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"cc 40\n",
"bb 30\n",
"dtype: int64"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Több elem kiválasztása Series-ből.\n",
"se2[['cc', 'bb']]"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([20, 30, 40], dtype=int64)"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Nyers adattartalom kinyerése Series-ből.\n",
"se2.values"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"numpy.ndarray"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# ...és annak típusa.\n",
"type(se2.values)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[10., 20.],\n",
" [30., nan],\n",
" [40., 50.]])"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Nyers adattartalom kinyerése DataFrame-ből.\n",
"df2.values"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### [Kiválasztás](https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html) (SELECT)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>tanuló</th>\n",
" <th>tantárgy</th>\n",
" <th>osztályzat</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Gipsz Jakab</td>\n",
" <td>Matematika</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Gipsz Jakab</td>\n",
" <td>Testnevelés</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Bank Aranka</td>\n",
" <td>Matematika</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Bank Aranka</td>\n",
" <td>Matematika</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Bank Aranka</td>\n",
" <td>Testnevelés</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Rontó Róbert</td>\n",
" <td>Matematika</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Rontó Róbert</td>\n",
" <td>Matematika</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Rontó Róbert</td>\n",
" <td>Testnevelés</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" tanuló tantárgy osztályzat\n",
"0 Gipsz Jakab Matematika 5\n",
"1 Gipsz Jakab Testnevelés 2\n",
"2 Bank Aranka Matematika 3\n",
"3 Bank Aranka Matematika 5\n",
"4 Bank Aranka Testnevelés 4\n",
"5 Rontó Róbert Matematika 1\n",
"6 Rontó Róbert Matematika 2\n",
"7 Rontó Róbert Testnevelés 5"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Hozzunk létre egy példa DataFrame-et!\n",
"df = pd.DataFrame([\n",
" {'tanuló': 'Gipsz Jakab', 'tantárgy': 'Matematika', 'osztályzat': 5},\n",
" {'tanuló': 'Gipsz Jakab', 'tantárgy': 'Testnevelés', 'osztályzat': 2},\n",
" {'tanuló': 'Bank Aranka', 'tantárgy': 'Matematika', 'osztályzat': 3},\n",
" {'tanuló': 'Bank Aranka', 'tantárgy': 'Matematika', 'osztályzat': 5},\n",
" {'tanuló': 'Bank Aranka', 'tantárgy': 'Testnevelés', 'osztályzat': 4},\n",
" {'tanuló': 'Rontó Róbert', 'tantárgy': 'Matematika', 'osztályzat': 1},\n",
" {'tanuló': 'Rontó Róbert', 'tantárgy': 'Matematika', 'osztályzat': 2},\n",
" {'tanuló': 'Rontó Róbert', 'tantárgy': 'Testnevelés', 'osztályzat': 5},\n",
"])\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 True\n",
"1 True\n",
"2 False\n",
"3 False\n",
"4 False\n",
"5 False\n",
"6 False\n",
"7 False\n",
"Name: tanuló, dtype: bool"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Logikai feltétel oszlop.\n",
"df['tanuló'] == 'Gipsz Jakab'"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"pandas.core.series.Series"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# ...aminek a típusa:\n",
"type(df['tanuló'] == 'Gipsz Jakab')"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>tanuló</th>\n",
" <th>tantárgy</th>\n",
" <th>osztályzat</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Gipsz Jakab</td>\n",
" <td>Matematika</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Gipsz Jakab</td>\n",
" <td>Testnevelés</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" tanuló tantárgy osztályzat\n",
"0 Gipsz Jakab Matematika 5\n",
"1 Gipsz Jakab Testnevelés 2"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Gipsz Jakab összes osztályzata.\n",
"df[df['tanuló'] == 'Gipsz Jakab']\n",
"# SQL-ben: SELECT * FROM df WHERE tanuló='Gipsz Jakab'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Műveletek\n",
"A logikai értékű Series adatok műveletei: & (és), | (vagy), ~ (tagadás)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>tanuló</th>\n",
" <th>tantárgy</th>\n",
" <th>osztályzat</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Gipsz Jakab</td>\n",
" <td>Testnevelés</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Bank Aranka</td>\n",
" <td>Matematika</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Bank Aranka</td>\n",
" <td>Testnevelés</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Rontó Róbert</td>\n",
" <td>Matematika</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" tanuló tantárgy osztályzat\n",
"1 Gipsz Jakab Testnevelés 2\n",
"2 Bank Aranka Matematika 3\n",
"4 Bank Aranka Testnevelés 4\n",
"6 Rontó Róbert Matematika 2"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Az 1-esnél jobb és 5-ösnél rosszabb osztályzatok (az és (&) művelettel).\n",
"df[(df['osztályzat'] > 1) & (df['osztályzat'] < 5)]"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>tanuló</th>\n",
" <th>tantárgy</th>\n",
" <th>osztályzat</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Gipsz Jakab</td>\n",
" <td>Testnevelés</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Bank Aranka</td>\n",
" <td>Matematika</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Bank Aranka</td>\n",
" <td>Testnevelés</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Rontó Róbert</td>\n",
" <td>Matematika</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" tanuló tantárgy osztályzat\n",
"1 Gipsz Jakab Testnevelés 2\n",
"2 Bank Aranka Matematika 3\n",
"4 Bank Aranka Testnevelés 4\n",
"6 Rontó Róbert Matematika 2"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# ...mint előbb, a vagy (|) művelettel.\n",
"df[(df['osztályzat'] == 2) | (df['osztályzat'] == 3) | (df['osztályzat'] == 4)]"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>tanuló</th>\n",
" <th>tantárgy</th>\n",
" <th>osztályzat</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Gipsz Jakab</td>\n",
" <td>Testnevelés</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Bank Aranka</td>\n",
" <td>Matematika</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Bank Aranka</td>\n",
" <td>Testnevelés</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Rontó Róbert</td>\n",
" <td>Matematika</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" tanuló tantárgy osztályzat\n",
"1 Gipsz Jakab Testnevelés 2\n",
"2 Bank Aranka Matematika 3\n",
"4 Bank Aranka Testnevelés 4\n",
"6 Rontó Róbert Matematika 2"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# ...mint előbb, a tagadás (~) művelettel.\n",
"df[~((df['osztályzat'] == 1) | (df['osztályzat'] == 5))]"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2.6666666666666665"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Rontó Róbert osztályzatainak átlaga.\n",
"df[df['tanuló'] == 'Rontó Róbert']['osztályzat'].mean()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### [Csoportosítás](https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html) (GROUPBY)\n",
"\n",
"Pandas-ban a csoportosítás folyamata az alábbi lépésekből áll:\n",
"\n",
"- Az adatok **felosztása (split)** csoportokra, valamilyen feltétel alapján.\n",
"- Valamely **függvény alkalmazása (apply)** az egyes csoportokra külön-külön.\n",
"- Az eredmények **kombinálása (combine)** egy adatszerkezetbe.\n",
"\n",
"Ezek közül a felosztás a legegyszerűbb. A felosztás kritériuma általában egy oszlopban (vagy oszlop kombinációban) található érték. A függvényalkalmazás lehet aggregáló jellegű (pl. csoportonkénti elemszám, összeg, átlag, minimum, maximum, első rekord, utolsó rekord) vagy egyéb (pl. csoportonkénti standardizálás, adathiány kitöltés vagy szűrés). A kombinálási lépés az aggregáló típusú függvények esetén automatikusan lefut, egyéb esetekben a programozónak kell kezdeményeznie."
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>tanuló</th>\n",
" <th>tantárgy</th>\n",
" <th>osztályzat</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Gipsz Jakab</td>\n",
" <td>Matematika</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Gipsz Jakab</td>\n",
" <td>Testnevelés</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Bank Aranka</td>\n",
" <td>Matematika</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Bank Aranka</td>\n",
" <td>Matematika</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Bank Aranka</td>\n",
" <td>Testnevelés</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Rontó Róbert</td>\n",
" <td>Matematika</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Rontó Róbert</td>\n",
" <td>Matematika</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Rontó Róbert</td>\n",
" <td>Testnevelés</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" tanuló tantárgy osztályzat\n",
"0 Gipsz Jakab Matematika 5\n",
"1 Gipsz Jakab Testnevelés 2\n",
"2 Bank Aranka Matematika 3\n",
"3 Bank Aranka Matematika 5\n",
"4 Bank Aranka Testnevelés 4\n",
"5 Rontó Róbert Matematika 1\n",
"6 Rontó Róbert Matematika 2\n",
"7 Rontó Róbert Testnevelés 5"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Használjuk az előző DataFrame-et!\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"# Csoportosítás tantárgyak szerint.\n",
"gb = df.groupby('tantárgy')"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000002B18F945750>"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Az eredmény típusa.\n",
"gb"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tantárgy\n",
"Matematika 5\n",
"Testnevelés 3\n",
"dtype: int64"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Hány rekord tartozik az egyes tantárgyakhoz?\n",
"gb.size()"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tantárgy\n",
"Matematika 5\n",
"Testnevelés 3\n",
"dtype: int64"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Ugyanez, az átmeneti GroupBy objektum használata nélkül.\n",
"df.groupby('tantárgy').size()\n",
"# SQL-ben: SELECT tantárgy, COUNT(*) FROM df GROUP BY tantárgy"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tantárgy\n",
"Matematika 3.200000\n",
"Testnevelés 3.666667\n",
"Name: osztályzat, dtype: float64"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Tantárgyankénti átlag.\n",
"df.groupby('tantárgy')['osztályzat'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tantárgy\n",
"Matematika 3.200000\n",
"Testnevelés 3.666667\n",
"Name: osztályzat, dtype: float64"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# ...ugyanez csúnyábban:\n",
"df['osztályzat'].groupby(df['tantárgy']).mean()"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tanuló tantárgy \n",
"Bank Aranka Matematika 4.0\n",
" Testnevelés 4.0\n",
"Gipsz Jakab Matematika 5.0\n",
" Testnevelés 2.0\n",
"Rontó Róbert Matematika 1.5\n",
" Testnevelés 5.0\n",
"Name: osztályzat, dtype: float64"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Tantárgyankénti átlag minden tanulóhoz.\n",
"df.groupby(['tanuló', 'tantárgy'])['osztályzat'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>tanuló</th>\n",
" <th>tantárgy</th>\n",
" <th>osztályzat</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Bank Aranka</td>\n",
" <td>Matematika</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Bank Aranka</td>\n",
" <td>Testnevelés</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Gipsz Jakab</td>\n",
" <td>Matematika</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Gipsz Jakab</td>\n",
" <td>Testnevelés</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Rontó Róbert</td>\n",
" <td>Matematika</td>\n",
" <td>1.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Rontó Róbert</td>\n",
" <td>Testnevelés</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" tanuló tantárgy osztályzat\n",
"0 Bank Aranka Matematika 4.0\n",
"1 Bank Aranka Testnevelés 4.0\n",
"2 Gipsz Jakab Matematika 5.0\n",
"3 Gipsz Jakab Testnevelés 2.0\n",
"4 Rontó Róbert Matematika 1.5\n",
"5 Rontó Róbert Testnevelés 5.0"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Az index átalakítása két hagyományos oszloppá.\n",
"df.groupby(['tanuló', 'tantárgy'])['osztályzat'].mean().reset_index()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Rendezés, minimum, maximum"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8</td>\n",
" <td>21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>15</td>\n",
" <td>19</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"0 10 20\n",
"1 12 29\n",
"2 8 21\n",
"3 15 19"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Példa tábla a rendezéshez.\n",
"df4 = pd.DataFrame([\n",
" {'a': 10, 'b': 20},\n",
" {'a': 12, 'b': 29},\n",
" {'a': 8, 'b': 21},\n",
" {'a': 15, 'b': 19}\n",
"])\n",
"df4"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8</td>\n",
" <td>21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>15</td>\n",
" <td>19</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"2 8 21\n",
"0 10 20\n",
"1 12 29\n",
"3 15 19"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Rendezés 'a' szerint.\n",
"df4.sort_values('a')"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>15</td>\n",
" <td>19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8</td>\n",
" <td>21</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"3 15 19\n",
"1 12 29\n",
"0 10 20\n",
"2 8 21"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Rendezés csökkenő sorrendbe.\n",
"df4.sort_values('a', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1 29\n",
"2 21\n",
"0 20\n",
"Name: b, dtype: int64"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Egy oszlop elemeinek rendezése (a három legnagyobb elem meghatározásához).\n",
"df4['b'].sort_values(ascending=False)[:3]"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"29"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# A 'b' oszlop maximuma.\n",
"df4['b'].max()"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Mely indexnél található a 'b' oszlop maximuma?\n",
"df4['b'].idxmax()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Módosítás"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>20</td>\n",
" <td>30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" <td>41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8</td>\n",
" <td>21</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>15</td>\n",
" <td>19</td>\n",
" <td>34</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b c\n",
"0 10 20 30\n",
"1 12 29 41\n",
"2 8 21 29\n",
"3 15 19 34"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Új oszlop felvétele.\n",
"df4['c'] = df4['a'] + df4['b']\n",
"df4"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"41"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Egy elem 'elérése'.\n",
"df4['c'][1]"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"41"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# ...ugyanez másképp:\n",
"df4.loc[1]['c']"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"41"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# ...ugyanez másképp:\n",
"df4.iloc[1]['c']"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>20</td>\n",
" <td>30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8</td>\n",
" <td>21</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>15</td>\n",
" <td>19</td>\n",
" <td>34</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b c\n",
"0 10 20 30\n",
"1 12 29 100\n",
"2 8 21 29\n",
"3 15 19 34"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Egy elem módosítása.\n",
"df4['c'][1] = 100\n",
"df4"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8</td>\n",
" <td>21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>15</td>\n",
" <td>19</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"0 10 20\n",
"1 12 29\n",
"2 8 21\n",
"3 15 19"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Oszlop törlése.\n",
"del df4['c']\n",
"df4"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}