aboutsummaryrefslogtreecommitdiff
path: root/Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb
diff options
context:
space:
mode:
authorleshe4ka46 <alex9102naid1@ya.ru>2025-11-11 11:34:38 +0300
committerleshe4ka46 <alex9102naid1@ya.ru>2025-11-11 11:34:38 +0300
commit5aaff9711387ce1ea1ec8ee5c5b4ecd9e1ea3dd1 (patch)
treef52bf0453ac0c8c93c4928f3395dcbbb761f1303 /Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb
parent910a222fa60ce6ea0831f2956470b8a0b9f62670 (diff)
upd
Diffstat (limited to 'Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb')
-rw-r--r--Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb8
1 files changed, 6 insertions, 2 deletions
diff --git a/Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb b/Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb
index 0595f20..9444045 100644
--- a/Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb
+++ b/Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb
@@ -1292,8 +1292,12 @@
"%%cudf.pandas.line_profile\n",
"# DO NOT CHANGE THIS CELL\n",
"\n",
- "pvt_tbl=df[['county', 'sex', 'name']].pivot_table(index=['county'], columns=['sex'], values='name', aggfunc='count')\n",
- "pvt_tbl=pvt_tbl.apply(lambda x: x/sum(x), axis=1)\n",
+ "dd = df.groupby(['county','age','sex']).size().rename('n').reset_index()\n",
+ "dd['tot'] = dd.groupby(['county','age'])['n'].transform('sum')\n",
+ "dd['p'] = dd['n'] / dd['tot']\n",
+ "\n",
+ "pvt_tbl=dd.pivot_table(index=['county','age'], columns='sex', values='p', aggfunc='sum')\n",
+ "\n",
"display(pvt_tbl)"
]
},