diff options
| author | leshe4ka46 <alex9102naid1@ya.ru> | 2025-11-11 11:34:38 +0300 |
|---|---|---|
| committer | leshe4ka46 <alex9102naid1@ya.ru> | 2025-11-11 11:34:38 +0300 |
| commit | 5aaff9711387ce1ea1ec8ee5c5b4ecd9e1ea3dd1 (patch) | |
| tree | f52bf0453ac0c8c93c4928f3395dcbbb761f1303 /Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb | |
| parent | 910a222fa60ce6ea0831f2956470b8a0b9f62670 (diff) | |
upd
Diffstat (limited to 'Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb')
| -rw-r--r-- | Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb b/Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb index 0595f20..9444045 100644 --- a/Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb +++ b/Fundamentals_of_Accelerated_Data_Science/1-05_grouping.ipynb @@ -1292,8 +1292,12 @@ "%%cudf.pandas.line_profile\n", "# DO NOT CHANGE THIS CELL\n", "\n", - "pvt_tbl=df[['county', 'sex', 'name']].pivot_table(index=['county'], columns=['sex'], values='name', aggfunc='count')\n", - "pvt_tbl=pvt_tbl.apply(lambda x: x/sum(x), axis=1)\n", + "dd = df.groupby(['county','age','sex']).size().rename('n').reset_index()\n", + "dd['tot'] = dd.groupby(['county','age'])['n'].transform('sum')\n", + "dd['p'] = dd['n'] / dd['tot']\n", + "\n", + "pvt_tbl=dd.pivot_table(index=['county','age'], columns='sex', values='p', aggfunc='sum')\n", + "\n", "display(pvt_tbl)" ] }, |
