Pandas feladatok

2024-12-09 19:41:42 +01:00
parent b99346376f
commit 32d805ecf1
3 changed files with 301 additions and 14 deletions
--- a/SajatMegoldasok/vizsga_01_minta.ipynb
+++ b/SajatMegoldasok/vizsga_01_minta.ipynb
@ -23,7 +23,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
@ -68,7 +68,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
@ -96,7 +96,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 97,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
@ -145,6 +145,118 @@
    "- Cégkategóriánként hány dollárt fektettek be összesen?"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0       LifeLock\n",
+      "1       LifeLock\n",
+      "2       LifeLock\n",
+      "3    MyCityFaces\n",
+      "4       Flypaper\n",
+      "Name: company, dtype: object\n",
+      "The head:        company  numEmps category        city state fundedDate  raisedAmt  \\\n",
+      "0     LifeLock      NaN      web       Tempe    AZ   1-May-07    6850000   \n",
+      "1     LifeLock      NaN      web       Tempe    AZ   1-Oct-06    6000000   \n",
+      "2     LifeLock      NaN      web       Tempe    AZ   1-Jan-08   25000000   \n",
+      "3  MyCityFaces      7.0      web  Scottsdale    AZ   1-Jan-08      50000   \n",
+      "4     Flypaper      NaN      web     Phoenix    AZ   1-Feb-08    3000000   \n",
+      "\n",
+      "  raisedCurrency round  \n",
+      "0            USD     b  \n",
+      "1            USD     a  \n",
+      "2            USD     c  \n",
+      "3            USD  seed  \n",
+      "4            USD     a  \n",
+      "1434\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "frame = pd.read_csv(\"investments.txt\", sep=\"|\")\n",
+    "\n",
+    "print(frame[\"company\"].iloc[:5])\n",
+    "\n",
+    "print(\"The head:\", frame.head())\n",
+    "\n",
+    "framecount = frame[\"company\"].count()\n",
+    "print(framecount)\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "company\n",
+      "Facebook          7\n",
+      "Juice Wireless    5\n",
+      "Viv’simo          5\n",
+      "Glam Media        5\n",
+      "Brightcove        5\n",
+      "                 ..\n",
+      "AdReady           1\n",
+      "AdMob             1\n",
+      "Acquia            1\n",
+      "x+1               1\n",
+      "vbs tv            1\n",
+      "Name: category, Length: 891, dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "grouped = frame.groupby(\"company\").count()\n",
+    "\n",
+    "grouped = frame[\"company\"].value_counts()\n",
+    "\n",
+    "grouped = frame.groupby(\"company\")[\"category\"].count().sort_values(ascending=False)\n",
+    "\n",
+    "\n",
+    "print(grouped)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "category\n",
+      "web           11753474750\n",
+      "software       1017942000\n",
+      "hardware        824500000\n",
+      "mobile          323020000\n",
+      "cleantech       258900000\n",
+      "other           119850000\n",
+      "biotech          77250000\n",
+      "consulting       32135000\n",
+      "Name: raisedAmt, dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "categ = frame.groupby(\"category\")[\"raisedAmt\"].sum().sort_values(ascending=False)\n",
+    "\n",
+    "print(categ)"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -169,7 +281,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.12.6"
+   "version": "3.13.0"
  }
 },
 "nbformat": 4,
--- a/SajatMegoldasok/vizsga_02_minta.ipynb
+++ b/SajatMegoldasok/vizsga_02_minta.ipynb
@ -137,17 +137,108 @@
    "- Hány ponttal magasabb a medencével (`Pool`) rendelkező szállodák átlagos értékelése (`Score`) a többi szálloda átlagos értékelésénél?"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                               Hotel name  Nr. rooms Pool  Gym Tennis court  \\\n",
+      "0  Circus Circus Hotel & Casino Las Vegas       3773   NO  YES           NO   \n",
+      "1  Circus Circus Hotel & Casino Las Vegas       3773   NO  YES           NO   \n",
+      "2  Circus Circus Hotel & Casino Las Vegas       3773   NO  YES           NO   \n",
+      "3  Circus Circus Hotel & Casino Las Vegas       3773   NO  YES           NO   \n",
+      "4  Circus Circus Hotel & Casino Las Vegas       3773   NO  YES           NO   \n",
+      "\n",
+      "  Spa Casino Traveler type Period of stay  Score  \n",
+      "0  NO    YES       Friends        Dec-Feb      5  \n",
+      "1  NO    YES      Business        Dec-Feb      3  \n",
+      "2  NO    YES      Families        Mar-May      5  \n",
+      "3  NO    YES       Friends        Mar-May      4  \n",
+      "4  NO    YES          Solo        Mar-May      4  \n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "frame = pd.read_csv(\"hotels.txt\", sep=';')\n",
+    "print(frame[:5])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Hotel name\n",
+      "The Venetian Las Vegas Hotel              96648\n",
+      "Excalibur Hotel & Casino                  95544\n",
+      "Bellagio Las Vegas                        94392\n",
+      "Circus Circus Hotel & Casino Las Vegas    90552\n",
+      "Caesars Palace                            80352\n",
+      "Name: Nr. rooms, dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "legtobb = frame.groupby(\"Hotel name\")[\"Nr. rooms\"].sum().sort_values(ascending=False)[:5]\n",
+    "\n",
+    "print(legtobb)"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Hotel name\n",
+      "Wynn Las Vegas    4.625\n",
+      "Name: Score, dtype: float64\n"
+     ]
+    }
+   ],
+   "source": [
+    "scored = frame.groupby(\"Hotel name\")[\"Score\"].mean().sort_values(ascending=False)[:1]\n",
+    "\n",
+    "print(scored)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.9604166666666667\n"
+     ]
+    }
+   ],
+   "source": [
+    "withspas = frame[frame[\"Pool\"] == \"YES\"][\"Score\"].mean()\n",
+    "withoutspas = frame[frame[\"Pool\"] == \"NO\"][\"Score\"].mean()\n",
+    "\n",
+    "print(withspas - withoutspas)"
+   ]
  }
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
@ -161,7 +252,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.12.5"
+   "version": "3.13.0"
  }
 },
 "nbformat": 4,
--- a/SajatMegoldasok/vizsga_03_minta.ipynb
+++ b/SajatMegoldasok/vizsga_03_minta.ipynb
@ -168,16 +168,100 @@
   ]
  },
  {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
   "metadata": {},
-   "outputs": [],
-   "source": []
+   "source": [
+    "### 3. feladat [10p]\n",
+    "\n",
+    "Az [unicef.txt](unicef.txt) szövegfájl a világ 5 év alatti népességének élelmezési helyzetéről tartalmaz adatokat. Az egyes sorok felméréseknek felelnek meg, a felmérések országonként időbeli sorrendben vannak felsorolva. Töltsük be az adatokat, határozzuk meg és írjuk ki az alábbi statisztikákat!\n",
+    "- Hány felmérés készült és hány országot érintett?\n",
+    "- Az alábbi statisztikákat csak azon felmérések alapján készítsük el, amelyeknél mind a három érintett indikátor (`Severe Wasting`, `Underweight`, `Overweight`) definiált (azaz ezek pozitív adatok). Ha egy országra több ilyen felmérés is van, akkor a legutóbbit vegyük figyelembe!\n",
+    "    - Mely 5 országban a legmagasabb a `Severe Wasting` indikátor?\n",
+    "    - Az országok hányadrészében magasabb az `Underweight` indikátor az `Overweight` indikátornál?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "       Country United Nations Region United Nations Sub-Region  \\\n",
+      "0  AFGHANISTAN                  Asia             Southern Asia   \n",
+      "1  AFGHANISTAN                  Asia             Southern Asia   \n",
+      "2  AFGHANISTAN                  Asia             Southern Asia   \n",
+      "3      ALBANIA                Europe           Southern Europe   \n",
+      "4      ALBANIA                Europe           Southern Europe   \n",
+      "\n",
+      "  World Bank Income Classification Survey Year  Survey Sample (N)  \\\n",
+      "0                       Low Income        1997             4846.0   \n",
+      "1                       Low Income        2004              946.0   \n",
+      "2                       Low Income        2013            21922.0   \n",
+      "3              Upper Middle Income     1996-98             7642.0   \n",
+      "4              Upper Middle Income        2000             1382.0   \n",
+      "\n",
+      "  Severe Wasting Wasting Stunting Underweight Overweight  \\\n",
+      "0            NaN    18,2     53,2        44,9        6,5   \n",
+      "1            3,5     8,6     59,3        32,9        4,6   \n",
+      "2            4,0     9,5     40,9        25,0        5,4   \n",
+      "3            NaN     8,1     20,4         7,1        9,5   \n",
+      "4            6,2    12,2     39,2        17,0       30,0   \n",
+      "\n",
+      "                                              Source                 Notes  \\\n",
+      "0   Afghanistan 1997 multiple indicator baseline ...   Converted estimates   \n",
+      "1  Summary report of the national nutrition surve...                   NaN   \n",
+      "2        Afghanistan National Nutrition Survey 2013.  (pending reanalysis)   \n",
+      "3  National study on nutrition in Albania. Instit...   Converted estimates   \n",
+      "4  Multiple indicator cluster survey report Alban...                   NaN   \n",
+      "\n",
+      "  U5 Population ('000s)  \n",
+      "0              3637,632  \n",
+      "1              4667,487  \n",
+      "2              5235,867  \n",
+      "3               307,887  \n",
+      "4               278,753  \n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "frame = pd.read_csv(\"unicef.txt\", sep=\"|\")\n",
+    "print(frame[:5])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "keszult:  854\n",
+      "Orszag:  152\n"
+     ]
+    }
+   ],
+   "source": [
+    "orszag = frame[\"Country\"].count()\n",
+    "\n",
+    "print(\"keszult: \",orszag)\n",
+    "\n",
+    "city = frame.groupby(\"Country\")[\"Survey Year\"].count().count()\n",
+    "\n",
+    "\n",
+    "print(\"Orszag: \",city)"
+   ]
  }
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
@ -191,7 +275,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.12.5"
+   "version": "3.13.0"
  }
 },
 "nbformat": 4,