Pandas feladatok

This commit is contained in:
2024-12-09 19:41:42 +01:00
parent b99346376f
commit 32d805ecf1
3 changed files with 301 additions and 14 deletions

View File

@ -23,7 +23,7 @@
},
{
"cell_type": "code",
"execution_count": 57,
"execution_count": 4,
"metadata": {},
"outputs": [
{
@ -68,7 +68,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@ -96,7 +96,7 @@
},
{
"cell_type": "code",
"execution_count": 97,
"execution_count": 6,
"metadata": {},
"outputs": [
{
@ -145,6 +145,118 @@
"- Cégkategóriánként hány dollárt fektettek be összesen?"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 LifeLock\n",
"1 LifeLock\n",
"2 LifeLock\n",
"3 MyCityFaces\n",
"4 Flypaper\n",
"Name: company, dtype: object\n",
"The head: company numEmps category city state fundedDate raisedAmt \\\n",
"0 LifeLock NaN web Tempe AZ 1-May-07 6850000 \n",
"1 LifeLock NaN web Tempe AZ 1-Oct-06 6000000 \n",
"2 LifeLock NaN web Tempe AZ 1-Jan-08 25000000 \n",
"3 MyCityFaces 7.0 web Scottsdale AZ 1-Jan-08 50000 \n",
"4 Flypaper NaN web Phoenix AZ 1-Feb-08 3000000 \n",
"\n",
" raisedCurrency round \n",
"0 USD b \n",
"1 USD a \n",
"2 USD c \n",
"3 USD seed \n",
"4 USD a \n",
"1434\n"
]
}
],
"source": [
"import pandas as pd\n",
"\n",
"frame = pd.read_csv(\"investments.txt\", sep=\"|\")\n",
"\n",
"print(frame[\"company\"].iloc[:5])\n",
"\n",
"print(\"The head:\", frame.head())\n",
"\n",
"framecount = frame[\"company\"].count()\n",
"print(framecount)\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"company\n",
"Facebook 7\n",
"Juice Wireless 5\n",
"Vivsimo 5\n",
"Glam Media 5\n",
"Brightcove 5\n",
" ..\n",
"AdReady 1\n",
"AdMob 1\n",
"Acquia 1\n",
"x+1 1\n",
"vbs tv 1\n",
"Name: category, Length: 891, dtype: int64\n"
]
}
],
"source": [
"grouped = frame.groupby(\"company\").count()\n",
"\n",
"grouped = frame[\"company\"].value_counts()\n",
"\n",
"grouped = frame.groupby(\"company\")[\"category\"].count().sort_values(ascending=False)\n",
"\n",
"\n",
"print(grouped)"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"category\n",
"web 11753474750\n",
"software 1017942000\n",
"hardware 824500000\n",
"mobile 323020000\n",
"cleantech 258900000\n",
"other 119850000\n",
"biotech 77250000\n",
"consulting 32135000\n",
"Name: raisedAmt, dtype: int64\n"
]
}
],
"source": [
"categ = frame.groupby(\"category\")[\"raisedAmt\"].sum().sort_values(ascending=False)\n",
"\n",
"print(categ)"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -169,7 +281,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.6"
"version": "3.13.0"
}
},
"nbformat": 4,