diff options
Diffstat (limited to 'Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb')
| -rw-r--r-- | Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb b/Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb index 0595f20..9444045 100644 --- a/Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb +++ b/Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb @@ -1292,8 +1292,12 @@ "%%cudf.pandas.line_profile\n", "# DO NOT CHANGE THIS CELL\n", "\n", - "pvt_tbl=df[['county', 'sex', 'name']].pivot_table(index=['county'], columns=['sex'], values='name', aggfunc='count')\n", - "pvt_tbl=pvt_tbl.apply(lambda x: x/sum(x), axis=1)\n", + "dd = df.groupby(['county','age','sex']).size().rename('n').reset_index()\n", + "dd['tot'] = dd.groupby(['county','age'])['n'].transform('sum')\n", + "dd['p'] = dd['n'] / dd['tot']\n", + "\n", + "pvt_tbl=dd.pivot_table(index=['county','age'], columns='sex', values='p', aggfunc='sum')\n", + "\n", "display(pvt_tbl)" ] }, |
