aboutsummaryrefslogtreecommitdiff
path: root/Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb')
-rw-r--r--Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb8
1 files changed, 6 insertions, 2 deletions
diff --git a/Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb b/Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb
index 0595f20..9444045 100644
--- a/Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb
+++ b/Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb
@@ -1292,8 +1292,12 @@
"%%cudf.pandas.line_profile\n",
"# DO NOT CHANGE THIS CELL\n",
"\n",
- "pvt_tbl=df[['county', 'sex', 'name']].pivot_table(index=['county'], columns=['sex'], values='name', aggfunc='count')\n",
- "pvt_tbl=pvt_tbl.apply(lambda x: x/sum(x), axis=1)\n",
+ "dd = df.groupby(['county','age','sex']).size().rename('n').reset_index()\n",
+ "dd['tot'] = dd.groupby(['county','age'])['n'].transform('sum')\n",
+ "dd['p'] = dd['n'] / dd['tot']\n",
+ "\n",
+ "pvt_tbl=dd.pivot_table(index=['county','age'], columns='sex', values='p', aggfunc='sum')\n",
+ "\n",
"display(pvt_tbl)"
]
},