Pandas feladatok
This commit is contained in:
@ -23,7 +23,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 57,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -68,7 +68,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -96,7 +96,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 97,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -145,6 +145,118 @@
|
||||
"- Cégkategóriánként hány dollárt fektettek be összesen?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0 LifeLock\n",
|
||||
"1 LifeLock\n",
|
||||
"2 LifeLock\n",
|
||||
"3 MyCityFaces\n",
|
||||
"4 Flypaper\n",
|
||||
"Name: company, dtype: object\n",
|
||||
"The head: company numEmps category city state fundedDate raisedAmt \\\n",
|
||||
"0 LifeLock NaN web Tempe AZ 1-May-07 6850000 \n",
|
||||
"1 LifeLock NaN web Tempe AZ 1-Oct-06 6000000 \n",
|
||||
"2 LifeLock NaN web Tempe AZ 1-Jan-08 25000000 \n",
|
||||
"3 MyCityFaces 7.0 web Scottsdale AZ 1-Jan-08 50000 \n",
|
||||
"4 Flypaper NaN web Phoenix AZ 1-Feb-08 3000000 \n",
|
||||
"\n",
|
||||
" raisedCurrency round \n",
|
||||
"0 USD b \n",
|
||||
"1 USD a \n",
|
||||
"2 USD c \n",
|
||||
"3 USD seed \n",
|
||||
"4 USD a \n",
|
||||
"1434\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"frame = pd.read_csv(\"investments.txt\", sep=\"|\")\n",
|
||||
"\n",
|
||||
"print(frame[\"company\"].iloc[:5])\n",
|
||||
"\n",
|
||||
"print(\"The head:\", frame.head())\n",
|
||||
"\n",
|
||||
"framecount = frame[\"company\"].count()\n",
|
||||
"print(framecount)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"company\n",
|
||||
"Facebook 7\n",
|
||||
"Juice Wireless 5\n",
|
||||
"Viv’simo 5\n",
|
||||
"Glam Media 5\n",
|
||||
"Brightcove 5\n",
|
||||
" ..\n",
|
||||
"AdReady 1\n",
|
||||
"AdMob 1\n",
|
||||
"Acquia 1\n",
|
||||
"x+1 1\n",
|
||||
"vbs tv 1\n",
|
||||
"Name: category, Length: 891, dtype: int64\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"grouped = frame.groupby(\"company\").count()\n",
|
||||
"\n",
|
||||
"grouped = frame[\"company\"].value_counts()\n",
|
||||
"\n",
|
||||
"grouped = frame.groupby(\"company\")[\"category\"].count().sort_values(ascending=False)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(grouped)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 97,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"category\n",
|
||||
"web 11753474750\n",
|
||||
"software 1017942000\n",
|
||||
"hardware 824500000\n",
|
||||
"mobile 323020000\n",
|
||||
"cleantech 258900000\n",
|
||||
"other 119850000\n",
|
||||
"biotech 77250000\n",
|
||||
"consulting 32135000\n",
|
||||
"Name: raisedAmt, dtype: int64\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"categ = frame.groupby(\"category\")[\"raisedAmt\"].sum().sort_values(ascending=False)\n",
|
||||
"\n",
|
||||
"print(categ)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@ -169,7 +281,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.6"
|
||||
"version": "3.13.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -137,17 +137,108 @@
|
||||
"- Hány ponttal magasabb a medencével (`Pool`) rendelkező szállodák átlagos értékelése (`Score`) a többi szálloda átlagos értékelésénél?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Hotel name Nr. rooms Pool Gym Tennis court \\\n",
|
||||
"0 Circus Circus Hotel & Casino Las Vegas 3773 NO YES NO \n",
|
||||
"1 Circus Circus Hotel & Casino Las Vegas 3773 NO YES NO \n",
|
||||
"2 Circus Circus Hotel & Casino Las Vegas 3773 NO YES NO \n",
|
||||
"3 Circus Circus Hotel & Casino Las Vegas 3773 NO YES NO \n",
|
||||
"4 Circus Circus Hotel & Casino Las Vegas 3773 NO YES NO \n",
|
||||
"\n",
|
||||
" Spa Casino Traveler type Period of stay Score \n",
|
||||
"0 NO YES Friends Dec-Feb 5 \n",
|
||||
"1 NO YES Business Dec-Feb 3 \n",
|
||||
"2 NO YES Families Mar-May 5 \n",
|
||||
"3 NO YES Friends Mar-May 4 \n",
|
||||
"4 NO YES Solo Mar-May 4 \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"frame = pd.read_csv(\"hotels.txt\", sep=';')\n",
|
||||
"print(frame[:5])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Hotel name\n",
|
||||
"The Venetian Las Vegas Hotel 96648\n",
|
||||
"Excalibur Hotel & Casino 95544\n",
|
||||
"Bellagio Las Vegas 94392\n",
|
||||
"Circus Circus Hotel & Casino Las Vegas 90552\n",
|
||||
"Caesars Palace 80352\n",
|
||||
"Name: Nr. rooms, dtype: int64\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"legtobb = frame.groupby(\"Hotel name\")[\"Nr. rooms\"].sum().sort_values(ascending=False)[:5]\n",
|
||||
"\n",
|
||||
"print(legtobb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Hotel name\n",
|
||||
"Wynn Las Vegas 4.625\n",
|
||||
"Name: Score, dtype: float64\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"scored = frame.groupby(\"Hotel name\")[\"Score\"].mean().sort_values(ascending=False)[:1]\n",
|
||||
"\n",
|
||||
"print(scored)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.9604166666666667\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"withspas = frame[frame[\"Pool\"] == \"YES\"][\"Score\"].mean()\n",
|
||||
"withoutspas = frame[frame[\"Pool\"] == \"NO\"][\"Score\"].mean()\n",
|
||||
"\n",
|
||||
"print(withspas - withoutspas)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -161,7 +252,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.5"
|
||||
"version": "3.13.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -168,16 +168,100 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"### 3. feladat [10p]\n",
|
||||
"\n",
|
||||
"Az [unicef.txt](unicef.txt) szövegfájl a világ 5 év alatti népességének élelmezési helyzetéről tartalmaz adatokat. Az egyes sorok felméréseknek felelnek meg, a felmérések országonként időbeli sorrendben vannak felsorolva. Töltsük be az adatokat, határozzuk meg és írjuk ki az alábbi statisztikákat!\n",
|
||||
"- Hány felmérés készült és hány országot érintett?\n",
|
||||
"- Az alábbi statisztikákat csak azon felmérések alapján készítsük el, amelyeknél mind a három érintett indikátor (`Severe Wasting`, `Underweight`, `Overweight`) definiált (azaz ezek pozitív adatok). Ha egy országra több ilyen felmérés is van, akkor a legutóbbit vegyük figyelembe!\n",
|
||||
" - Mely 5 országban a legmagasabb a `Severe Wasting` indikátor?\n",
|
||||
" - Az országok hányadrészében magasabb az `Underweight` indikátor az `Overweight` indikátornál?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Country United Nations Region United Nations Sub-Region \\\n",
|
||||
"0 AFGHANISTAN Asia Southern Asia \n",
|
||||
"1 AFGHANISTAN Asia Southern Asia \n",
|
||||
"2 AFGHANISTAN Asia Southern Asia \n",
|
||||
"3 ALBANIA Europe Southern Europe \n",
|
||||
"4 ALBANIA Europe Southern Europe \n",
|
||||
"\n",
|
||||
" World Bank Income Classification Survey Year Survey Sample (N) \\\n",
|
||||
"0 Low Income 1997 4846.0 \n",
|
||||
"1 Low Income 2004 946.0 \n",
|
||||
"2 Low Income 2013 21922.0 \n",
|
||||
"3 Upper Middle Income 1996-98 7642.0 \n",
|
||||
"4 Upper Middle Income 2000 1382.0 \n",
|
||||
"\n",
|
||||
" Severe Wasting Wasting Stunting Underweight Overweight \\\n",
|
||||
"0 NaN 18,2 53,2 44,9 6,5 \n",
|
||||
"1 3,5 8,6 59,3 32,9 4,6 \n",
|
||||
"2 4,0 9,5 40,9 25,0 5,4 \n",
|
||||
"3 NaN 8,1 20,4 7,1 9,5 \n",
|
||||
"4 6,2 12,2 39,2 17,0 30,0 \n",
|
||||
"\n",
|
||||
" Source Notes \\\n",
|
||||
"0 Afghanistan 1997 multiple indicator baseline ... Converted estimates \n",
|
||||
"1 Summary report of the national nutrition surve... NaN \n",
|
||||
"2 Afghanistan National Nutrition Survey 2013. (pending reanalysis) \n",
|
||||
"3 National study on nutrition in Albania. Instit... Converted estimates \n",
|
||||
"4 Multiple indicator cluster survey report Alban... NaN \n",
|
||||
"\n",
|
||||
" U5 Population ('000s) \n",
|
||||
"0 3637,632 \n",
|
||||
"1 4667,487 \n",
|
||||
"2 5235,867 \n",
|
||||
"3 307,887 \n",
|
||||
"4 278,753 \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"frame = pd.read_csv(\"unicef.txt\", sep=\"|\")\n",
|
||||
"print(frame[:5])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"keszult: 854\n",
|
||||
"Orszag: 152\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"orszag = frame[\"Country\"].count()\n",
|
||||
"\n",
|
||||
"print(\"keszult: \",orszag)\n",
|
||||
"\n",
|
||||
"city = frame.groupby(\"Country\")[\"Survey Year\"].count().count()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(\"Orszag: \",city)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -191,7 +275,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.5"
|
||||
"version": "3.13.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
Reference in New Issue
Block a user