pythonbook/实验 探索Chipotle快餐数据/3.探索酒类消费数据.ipynb

618 lines
18 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"# -- 将数据框命名为drinks\n",
"# -- 哪个大陆(continent)平均消耗的啤酒(beer)更多?\n",
"# -- 打印出每个大陆(continent)的红酒消耗(wine_servings)的描述性统计值\n",
"# -- 打印出每个大陆每种酒类别的消耗平均值\n",
"# -- 打印出每个大陆每种酒类别的消耗中位数\n",
"# -- 打印出每个大陆对spirit饮品消耗的平均值最大值和最小值"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"import pandas as pd\n",
"#将数据框命名为drinks\n",
"drinks = pd.read_csv('data/drinks.csv')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>beer_servings</th>\n",
" </tr>\n",
" <tr>\n",
" <th>continent</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>EU</th>\n",
" <td>193.777778</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" beer_servings\n",
"continent \n",
"EU 193.777778"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#哪个大陆(continent)平均消耗的啤酒(beer)更多?\n",
"drinks[['beer_servings','continent']].groupby('continent').mean().sort_values('beer_servings',ascending=False).head(1)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" </tr>\n",
" <tr>\n",
" <th>continent</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>AF</th>\n",
" <td>53.0</td>\n",
" <td>16.264151</td>\n",
" <td>38.846419</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>13.00</td>\n",
" <td>233.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>AS</th>\n",
" <td>44.0</td>\n",
" <td>9.068182</td>\n",
" <td>21.667034</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>8.00</td>\n",
" <td>123.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>EU</th>\n",
" <td>45.0</td>\n",
" <td>142.222222</td>\n",
" <td>97.421738</td>\n",
" <td>0.0</td>\n",
" <td>59.0</td>\n",
" <td>128.0</td>\n",
" <td>195.00</td>\n",
" <td>370.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>OC</th>\n",
" <td>16.0</td>\n",
" <td>35.625000</td>\n",
" <td>64.555790</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>8.5</td>\n",
" <td>23.25</td>\n",
" <td>212.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SA</th>\n",
" <td>12.0</td>\n",
" <td>62.416667</td>\n",
" <td>88.620189</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>12.0</td>\n",
" <td>98.50</td>\n",
" <td>221.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" count mean std min 25% 50% 75% max\n",
"continent \n",
"AF 53.0 16.264151 38.846419 0.0 1.0 2.0 13.00 233.0\n",
"AS 44.0 9.068182 21.667034 0.0 0.0 1.0 8.00 123.0\n",
"EU 45.0 142.222222 97.421738 0.0 59.0 128.0 195.00 370.0\n",
"OC 16.0 35.625000 64.555790 0.0 1.0 8.5 23.25 212.0\n",
"SA 12.0 62.416667 88.620189 1.0 3.0 12.0 98.50 221.0"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# -- 打印出每个大陆(continent)的红酒消耗(wine_servings)的描述性统计值\n",
"# drinks[['wine_servings','continent']].groupby('continent').sum()\n",
"drinks.groupby('continent').wine_servings.describe()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>beer_servings</th>\n",
" <th>spirit_servings</th>\n",
" <th>wine_servings</th>\n",
" <th>total_litres_of_pure_alcohol</th>\n",
" </tr>\n",
" <tr>\n",
" <th>continent</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>AF</th>\n",
" <td>61.471698</td>\n",
" <td>16.339623</td>\n",
" <td>16.264151</td>\n",
" <td>3.007547</td>\n",
" </tr>\n",
" <tr>\n",
" <th>AS</th>\n",
" <td>37.045455</td>\n",
" <td>60.840909</td>\n",
" <td>9.068182</td>\n",
" <td>2.170455</td>\n",
" </tr>\n",
" <tr>\n",
" <th>EU</th>\n",
" <td>193.777778</td>\n",
" <td>132.555556</td>\n",
" <td>142.222222</td>\n",
" <td>8.617778</td>\n",
" </tr>\n",
" <tr>\n",
" <th>OC</th>\n",
" <td>89.687500</td>\n",
" <td>58.437500</td>\n",
" <td>35.625000</td>\n",
" <td>3.381250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SA</th>\n",
" <td>175.083333</td>\n",
" <td>114.750000</td>\n",
" <td>62.416667</td>\n",
" <td>6.308333</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" beer_servings spirit_servings wine_servings \\\n",
"continent \n",
"AF 61.471698 16.339623 16.264151 \n",
"AS 37.045455 60.840909 9.068182 \n",
"EU 193.777778 132.555556 142.222222 \n",
"OC 89.687500 58.437500 35.625000 \n",
"SA 175.083333 114.750000 62.416667 \n",
"\n",
" total_litres_of_pure_alcohol \n",
"continent \n",
"AF 3.007547 \n",
"AS 2.170455 \n",
"EU 8.617778 \n",
"OC 3.381250 \n",
"SA 6.308333 "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# -- 打印出每个大陆每种酒类别的消耗平均值\n",
"# drinks\n",
"# drinks[['beer_servings','spirit_servings','wine_servings','continent']].groupby('continent').mean()\n",
"drinks.groupby('continent').mean()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>beer_servings</th>\n",
" <th>spirit_servings</th>\n",
" <th>wine_servings</th>\n",
" <th>total_litres_of_pure_alcohol</th>\n",
" </tr>\n",
" <tr>\n",
" <th>continent</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>AF</th>\n",
" <td>32.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>2.30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>AS</th>\n",
" <td>17.5</td>\n",
" <td>16.0</td>\n",
" <td>1.0</td>\n",
" <td>1.20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>EU</th>\n",
" <td>219.0</td>\n",
" <td>122.0</td>\n",
" <td>128.0</td>\n",
" <td>10.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>OC</th>\n",
" <td>52.5</td>\n",
" <td>37.0</td>\n",
" <td>8.5</td>\n",
" <td>1.75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SA</th>\n",
" <td>162.5</td>\n",
" <td>108.5</td>\n",
" <td>12.0</td>\n",
" <td>6.85</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" beer_servings spirit_servings wine_servings \\\n",
"continent \n",
"AF 32.0 3.0 2.0 \n",
"AS 17.5 16.0 1.0 \n",
"EU 219.0 122.0 128.0 \n",
"OC 52.5 37.0 8.5 \n",
"SA 162.5 108.5 12.0 \n",
"\n",
" total_litres_of_pure_alcohol \n",
"continent \n",
"AF 2.30 \n",
"AS 1.20 \n",
"EU 10.00 \n",
"OC 1.75 \n",
"SA 6.85 "
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# -- 打印出每个大陆每种酒类别的消耗中位数\n",
"drinks.groupby('continent').median()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" </tr>\n",
" <tr>\n",
" <th>continent</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>AF</th>\n",
" <td>53.0</td>\n",
" <td>16.339623</td>\n",
" <td>28.102794</td>\n",
" <td>0.0</td>\n",
" <td>1.00</td>\n",
" <td>3.0</td>\n",
" <td>19.00</td>\n",
" <td>152.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>AS</th>\n",
" <td>44.0</td>\n",
" <td>60.840909</td>\n",
" <td>84.362160</td>\n",
" <td>0.0</td>\n",
" <td>1.00</td>\n",
" <td>16.0</td>\n",
" <td>98.00</td>\n",
" <td>326.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>EU</th>\n",
" <td>45.0</td>\n",
" <td>132.555556</td>\n",
" <td>77.589115</td>\n",
" <td>0.0</td>\n",
" <td>81.00</td>\n",
" <td>122.0</td>\n",
" <td>173.00</td>\n",
" <td>373.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>OC</th>\n",
" <td>16.0</td>\n",
" <td>58.437500</td>\n",
" <td>70.504817</td>\n",
" <td>0.0</td>\n",
" <td>18.00</td>\n",
" <td>37.0</td>\n",
" <td>65.25</td>\n",
" <td>254.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SA</th>\n",
" <td>12.0</td>\n",
" <td>114.750000</td>\n",
" <td>77.077440</td>\n",
" <td>25.0</td>\n",
" <td>65.75</td>\n",
" <td>108.5</td>\n",
" <td>148.75</td>\n",
" <td>302.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" count mean std min 25% 50% 75% max\n",
"continent \n",
"AF 53.0 16.339623 28.102794 0.0 1.00 3.0 19.00 152.0\n",
"AS 44.0 60.840909 84.362160 0.0 1.00 16.0 98.00 326.0\n",
"EU 45.0 132.555556 77.589115 0.0 81.00 122.0 173.00 373.0\n",
"OC 16.0 58.437500 70.504817 0.0 18.00 37.0 65.25 254.0\n",
"SA 12.0 114.750000 77.077440 25.0 65.75 108.5 148.75 302.0"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#打印出每个大陆对spirit饮品消耗的平均值最大值和最小值\n",
"drinks.groupby('continent').spirit_servings.describe()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}