pythonbook/实验 探索Chipotle快餐数据/6.探索风速数据.ipynb

1289 lines
40 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# -- 将数据作存储并且设置前三列为合适的索引\n",
"# -- 2061年我们真的有这一年的数据创建一个函数并用它去修复这个bug\n",
"# -- 将日期设为索引注意数据类型应该是datetime64[ns]\n",
"# -- 对应每一个location一共有多少数据值缺失\n",
"# -- 对应每一个location一共有多少完整的数据值\n",
"# -- 对于全体数据,计算风速的平均值\n",
"# -- 创建一个名为loc_stats的数据框去计算并存储每个location的风速最小值最大值平均值和标准差\n",
"# -- 创建一个名为day_stats的数据框去计算并存储所有location的风速最小值最大值平均值和标准差\n",
"# -- 对于每一个location计算一月份的平均风速\n",
"# -- 对于数据记录按照年为频率取样\n",
"# -- 对于数据记录按照月为频率取样"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Yr_Mo_Dy</th>\n",
" <th>RPT</th>\n",
" <th>VAL</th>\n",
" <th>ROS</th>\n",
" <th>KIL</th>\n",
" <th>SHA</th>\n",
" <th>BIR</th>\n",
" <th>DUB</th>\n",
" <th>CLA</th>\n",
" <th>MUL</th>\n",
" <th>CLO</th>\n",
" <th>BEL</th>\n",
" <th>MAL</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2061-01-01</td>\n",
" <td>15.04</td>\n",
" <td>14.96</td>\n",
" <td>13.17</td>\n",
" <td>9.29</td>\n",
" <td>NaN</td>\n",
" <td>9.87</td>\n",
" <td>13.67</td>\n",
" <td>10.25</td>\n",
" <td>10.83</td>\n",
" <td>12.58</td>\n",
" <td>18.50</td>\n",
" <td>15.04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2061-01-02</td>\n",
" <td>14.71</td>\n",
" <td>NaN</td>\n",
" <td>10.83</td>\n",
" <td>6.50</td>\n",
" <td>12.62</td>\n",
" <td>7.67</td>\n",
" <td>11.50</td>\n",
" <td>10.04</td>\n",
" <td>9.79</td>\n",
" <td>9.67</td>\n",
" <td>17.54</td>\n",
" <td>13.83</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2061-01-03</td>\n",
" <td>18.50</td>\n",
" <td>16.88</td>\n",
" <td>12.33</td>\n",
" <td>10.13</td>\n",
" <td>11.17</td>\n",
" <td>6.17</td>\n",
" <td>11.25</td>\n",
" <td>NaN</td>\n",
" <td>8.50</td>\n",
" <td>7.67</td>\n",
" <td>12.75</td>\n",
" <td>12.71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2061-01-04</td>\n",
" <td>10.58</td>\n",
" <td>6.63</td>\n",
" <td>11.75</td>\n",
" <td>4.58</td>\n",
" <td>4.54</td>\n",
" <td>2.88</td>\n",
" <td>8.63</td>\n",
" <td>1.79</td>\n",
" <td>5.83</td>\n",
" <td>5.88</td>\n",
" <td>5.46</td>\n",
" <td>10.88</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2061-01-05</td>\n",
" <td>13.33</td>\n",
" <td>13.25</td>\n",
" <td>11.42</td>\n",
" <td>6.17</td>\n",
" <td>10.71</td>\n",
" <td>8.21</td>\n",
" <td>11.92</td>\n",
" <td>6.54</td>\n",
" <td>10.92</td>\n",
" <td>10.34</td>\n",
" <td>12.92</td>\n",
" <td>11.83</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Yr_Mo_Dy RPT VAL ROS KIL SHA BIR DUB CLA MUL \\\n",
"0 2061-01-01 15.04 14.96 13.17 9.29 NaN 9.87 13.67 10.25 10.83 \n",
"1 2061-01-02 14.71 NaN 10.83 6.50 12.62 7.67 11.50 10.04 9.79 \n",
"2 2061-01-03 18.50 16.88 12.33 10.13 11.17 6.17 11.25 NaN 8.50 \n",
"3 2061-01-04 10.58 6.63 11.75 4.58 4.54 2.88 8.63 1.79 5.83 \n",
"4 2061-01-05 13.33 13.25 11.42 6.17 10.71 8.21 11.92 6.54 10.92 \n",
"\n",
" CLO BEL MAL \n",
"0 12.58 18.50 15.04 \n",
"1 9.67 17.54 13.83 \n",
"2 7.67 12.75 12.71 \n",
"3 5.88 5.46 10.88 \n",
"4 10.34 12.92 11.83 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import datetime\n",
"#将数据作存储并且设置前三列为合适的索引\n",
"#parse_dates参数将csv中的时间字符串转换成日期格式\n",
"#文件中,前三列为年月日\n",
"df = pd.read_csv('data/wind.csv',sep='\\s+',parse_dates=[[0,1,2]])\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Yr_Mo_Dy</th>\n",
" <th>RPT</th>\n",
" <th>VAL</th>\n",
" <th>ROS</th>\n",
" <th>KIL</th>\n",
" <th>SHA</th>\n",
" <th>BIR</th>\n",
" <th>DUB</th>\n",
" <th>CLA</th>\n",
" <th>MUL</th>\n",
" <th>CLO</th>\n",
" <th>BEL</th>\n",
" <th>MAL</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1961-01-01</td>\n",
" <td>15.04</td>\n",
" <td>14.96</td>\n",
" <td>13.17</td>\n",
" <td>9.29</td>\n",
" <td>NaN</td>\n",
" <td>9.87</td>\n",
" <td>13.67</td>\n",
" <td>10.25</td>\n",
" <td>10.83</td>\n",
" <td>12.58</td>\n",
" <td>18.50</td>\n",
" <td>15.04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1961-01-02</td>\n",
" <td>14.71</td>\n",
" <td>NaN</td>\n",
" <td>10.83</td>\n",
" <td>6.50</td>\n",
" <td>12.62</td>\n",
" <td>7.67</td>\n",
" <td>11.50</td>\n",
" <td>10.04</td>\n",
" <td>9.79</td>\n",
" <td>9.67</td>\n",
" <td>17.54</td>\n",
" <td>13.83</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1961-01-03</td>\n",
" <td>18.50</td>\n",
" <td>16.88</td>\n",
" <td>12.33</td>\n",
" <td>10.13</td>\n",
" <td>11.17</td>\n",
" <td>6.17</td>\n",
" <td>11.25</td>\n",
" <td>NaN</td>\n",
" <td>8.50</td>\n",
" <td>7.67</td>\n",
" <td>12.75</td>\n",
" <td>12.71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1961-01-04</td>\n",
" <td>10.58</td>\n",
" <td>6.63</td>\n",
" <td>11.75</td>\n",
" <td>4.58</td>\n",
" <td>4.54</td>\n",
" <td>2.88</td>\n",
" <td>8.63</td>\n",
" <td>1.79</td>\n",
" <td>5.83</td>\n",
" <td>5.88</td>\n",
" <td>5.46</td>\n",
" <td>10.88</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1961-01-05</td>\n",
" <td>13.33</td>\n",
" <td>13.25</td>\n",
" <td>11.42</td>\n",
" <td>6.17</td>\n",
" <td>10.71</td>\n",
" <td>8.21</td>\n",
" <td>11.92</td>\n",
" <td>6.54</td>\n",
" <td>10.92</td>\n",
" <td>10.34</td>\n",
" <td>12.92</td>\n",
" <td>11.83</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Yr_Mo_Dy RPT VAL ROS KIL SHA BIR DUB CLA MUL \\\n",
"0 1961-01-01 15.04 14.96 13.17 9.29 NaN 9.87 13.67 10.25 10.83 \n",
"1 1961-01-02 14.71 NaN 10.83 6.50 12.62 7.67 11.50 10.04 9.79 \n",
"2 1961-01-03 18.50 16.88 12.33 10.13 11.17 6.17 11.25 NaN 8.50 \n",
"3 1961-01-04 10.58 6.63 11.75 4.58 4.54 2.88 8.63 1.79 5.83 \n",
"4 1961-01-05 13.33 13.25 11.42 6.17 10.71 8.21 11.92 6.54 10.92 \n",
"\n",
" CLO BEL MAL \n",
"0 12.58 18.50 15.04 \n",
"1 9.67 17.54 13.83 \n",
"2 7.67 12.75 12.71 \n",
"3 5.88 5.46 10.88 \n",
"4 10.34 12.92 11.83 "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# -- 2061年我们真的有这一年的数据创建一个函数并用它去修复这个bug\n",
"import datetime\n",
"def fix_year(x):\n",
" year = x.year-100 if x.year>1999 else x.year\n",
" return datetime.date(year,x.month,x.day)\n",
"df['Yr_Mo_Dy'] = df['Yr_Mo_Dy'].apply(fix_year)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>RPT</th>\n",
" <th>VAL</th>\n",
" <th>ROS</th>\n",
" <th>KIL</th>\n",
" <th>SHA</th>\n",
" <th>BIR</th>\n",
" <th>DUB</th>\n",
" <th>CLA</th>\n",
" <th>MUL</th>\n",
" <th>CLO</th>\n",
" <th>BEL</th>\n",
" <th>MAL</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Yr_Mo_Dy</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1961-01-01</th>\n",
" <td>15.04</td>\n",
" <td>14.96</td>\n",
" <td>13.17</td>\n",
" <td>9.29</td>\n",
" <td>NaN</td>\n",
" <td>9.87</td>\n",
" <td>13.67</td>\n",
" <td>10.25</td>\n",
" <td>10.83</td>\n",
" <td>12.58</td>\n",
" <td>18.50</td>\n",
" <td>15.04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1961-01-02</th>\n",
" <td>14.71</td>\n",
" <td>NaN</td>\n",
" <td>10.83</td>\n",
" <td>6.50</td>\n",
" <td>12.62</td>\n",
" <td>7.67</td>\n",
" <td>11.50</td>\n",
" <td>10.04</td>\n",
" <td>9.79</td>\n",
" <td>9.67</td>\n",
" <td>17.54</td>\n",
" <td>13.83</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1961-01-03</th>\n",
" <td>18.50</td>\n",
" <td>16.88</td>\n",
" <td>12.33</td>\n",
" <td>10.13</td>\n",
" <td>11.17</td>\n",
" <td>6.17</td>\n",
" <td>11.25</td>\n",
" <td>NaN</td>\n",
" <td>8.50</td>\n",
" <td>7.67</td>\n",
" <td>12.75</td>\n",
" <td>12.71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1961-01-04</th>\n",
" <td>10.58</td>\n",
" <td>6.63</td>\n",
" <td>11.75</td>\n",
" <td>4.58</td>\n",
" <td>4.54</td>\n",
" <td>2.88</td>\n",
" <td>8.63</td>\n",
" <td>1.79</td>\n",
" <td>5.83</td>\n",
" <td>5.88</td>\n",
" <td>5.46</td>\n",
" <td>10.88</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1961-01-05</th>\n",
" <td>13.33</td>\n",
" <td>13.25</td>\n",
" <td>11.42</td>\n",
" <td>6.17</td>\n",
" <td>10.71</td>\n",
" <td>8.21</td>\n",
" <td>11.92</td>\n",
" <td>6.54</td>\n",
" <td>10.92</td>\n",
" <td>10.34</td>\n",
" <td>12.92</td>\n",
" <td>11.83</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" RPT VAL ROS KIL SHA BIR DUB CLA MUL \\\n",
"Yr_Mo_Dy \n",
"1961-01-01 15.04 14.96 13.17 9.29 NaN 9.87 13.67 10.25 10.83 \n",
"1961-01-02 14.71 NaN 10.83 6.50 12.62 7.67 11.50 10.04 9.79 \n",
"1961-01-03 18.50 16.88 12.33 10.13 11.17 6.17 11.25 NaN 8.50 \n",
"1961-01-04 10.58 6.63 11.75 4.58 4.54 2.88 8.63 1.79 5.83 \n",
"1961-01-05 13.33 13.25 11.42 6.17 10.71 8.21 11.92 6.54 10.92 \n",
"\n",
" CLO BEL MAL \n",
"Yr_Mo_Dy \n",
"1961-01-01 12.58 18.50 15.04 \n",
"1961-01-02 9.67 17.54 13.83 \n",
"1961-01-03 7.67 12.75 12.71 \n",
"1961-01-04 5.88 5.46 10.88 \n",
"1961-01-05 10.34 12.92 11.83 "
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#将日期设为索引注意数据类型应该是datetime64[ns]\n",
"df['Yr_Mo_Dy'] = pd.to_datetime(df['Yr_Mo_Dy'])\n",
"df = df.set_index('Yr_Mo_Dy')\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RPT 6\n",
"VAL 3\n",
"ROS 2\n",
"KIL 5\n",
"SHA 2\n",
"BIR 0\n",
"DUB 3\n",
"CLA 2\n",
"MUL 3\n",
"CLO 1\n",
"BEL 0\n",
"MAL 4\n",
"dtype: int64"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#对应每一个location一共有多少数据值缺失\n",
"df.isnull().sum(axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"10.227982360836924"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#对于全体数据,计算风速的平均值\n",
"df.mean().mean()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>RPT</th>\n",
" <td>0.67</td>\n",
" <td>35.80</td>\n",
" <td>12.362987</td>\n",
" <td>5.618413</td>\n",
" </tr>\n",
" <tr>\n",
" <th>VAL</th>\n",
" <td>0.21</td>\n",
" <td>33.37</td>\n",
" <td>10.644314</td>\n",
" <td>5.267356</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ROS</th>\n",
" <td>1.50</td>\n",
" <td>33.84</td>\n",
" <td>11.660526</td>\n",
" <td>5.008450</td>\n",
" </tr>\n",
" <tr>\n",
" <th>KIL</th>\n",
" <td>0.00</td>\n",
" <td>28.46</td>\n",
" <td>6.306468</td>\n",
" <td>3.605811</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SHA</th>\n",
" <td>0.13</td>\n",
" <td>37.54</td>\n",
" <td>10.455834</td>\n",
" <td>4.936125</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" min max mean std\n",
"RPT 0.67 35.80 12.362987 5.618413\n",
"VAL 0.21 33.37 10.644314 5.267356\n",
"ROS 1.50 33.84 11.660526 5.008450\n",
"KIL 0.00 28.46 6.306468 3.605811\n",
"SHA 0.13 37.54 10.455834 4.936125"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#创建一个名为loc_stats的数据框去计算并存储每个location的风速最小值最大值平均值和标准差\n",
"loc_stats = pd.DataFrame()\n",
"loc_stats['min'] = df.min()\n",
"loc_stats['max'] = df.max()\n",
"loc_stats['mean'] = df.mean()\n",
"loc_stats['std'] = df.std()\n",
"loc_stats.head()"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Yr_Mo_Dy</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1961-01-01</th>\n",
" <td>1.0</td>\n",
" <td>1961.0</td>\n",
" <td>150.442857</td>\n",
" <td>521.138056</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1961-01-02</th>\n",
" <td>1.0</td>\n",
" <td>1961.0</td>\n",
" <td>149.192857</td>\n",
" <td>521.493581</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1961-01-03</th>\n",
" <td>1.0</td>\n",
" <td>1961.0</td>\n",
" <td>149.504286</td>\n",
" <td>521.406085</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1961-01-04</th>\n",
" <td>1.0</td>\n",
" <td>1961.0</td>\n",
" <td>136.362000</td>\n",
" <td>504.781236</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1961-01-05</th>\n",
" <td>1.0</td>\n",
" <td>1961.0</td>\n",
" <td>139.637333</td>\n",
" <td>503.877109</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" min max mean std\n",
"Yr_Mo_Dy \n",
"1961-01-01 1.0 1961.0 150.442857 521.138056\n",
"1961-01-02 1.0 1961.0 149.192857 521.493581\n",
"1961-01-03 1.0 1961.0 149.504286 521.406085\n",
"1961-01-04 1.0 1961.0 136.362000 504.781236\n",
"1961-01-05 1.0 1961.0 139.637333 503.877109"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#创建一个名为day_stats的数据框去计算并存储所有天的风速最小值最大值平均值和标准差\n",
"day_stats = pd.DataFrame()\n",
"day_stats['min'] = df.min(axis=1)\n",
"day_stats['max'] = df.max(axis=1)\n",
"day_stats['mean'] = df.mean(axis=1)\n",
"day_stats['std'] = df.std(axis=1)\n",
"day_stats.head()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RPT 14.847325\n",
"VAL 12.914560\n",
"ROS 13.299624\n",
"KIL 7.199498\n",
"SHA 11.667734\n",
"BIR 8.054839\n",
"DUB 11.819355\n",
"CLA 9.512047\n",
"MUL 9.543208\n",
"CLO 10.053566\n",
"BEL 14.550520\n",
"MAL 18.028763\n",
"dtype: float64"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#对于每一个location计算一月份的平均风速\n",
"df['date'] = df.index\n",
"df['year'] = df['date'].apply(lambda x:x.year)\n",
"df['month'] = df['date'].apply(lambda x:x.month)\n",
"df['day'] = df['date'].apply(lambda x:x.day)\n",
"january_winds = df[df.month==1]\n",
"january_winds.loc[:,'RPT':'MAL'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>RPT</th>\n",
" <th>VAL</th>\n",
" <th>ROS</th>\n",
" <th>KIL</th>\n",
" <th>SHA</th>\n",
" <th>BIR</th>\n",
" <th>DUB</th>\n",
" <th>CLA</th>\n",
" <th>MUL</th>\n",
" <th>CLO</th>\n",
" <th>BEL</th>\n",
" <th>MAL</th>\n",
" <th>date</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" <th>day</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Yr_Mo_Dy</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1961-01-01</th>\n",
" <td>15.04</td>\n",
" <td>14.96</td>\n",
" <td>13.17</td>\n",
" <td>9.29</td>\n",
" <td>NaN</td>\n",
" <td>9.87</td>\n",
" <td>13.67</td>\n",
" <td>10.25</td>\n",
" <td>10.83</td>\n",
" <td>12.58</td>\n",
" <td>18.50</td>\n",
" <td>15.04</td>\n",
" <td>1961-01-01</td>\n",
" <td>1961</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1962-01-01</th>\n",
" <td>9.29</td>\n",
" <td>3.42</td>\n",
" <td>11.54</td>\n",
" <td>3.50</td>\n",
" <td>2.21</td>\n",
" <td>1.96</td>\n",
" <td>10.41</td>\n",
" <td>2.79</td>\n",
" <td>3.54</td>\n",
" <td>5.17</td>\n",
" <td>4.38</td>\n",
" <td>7.92</td>\n",
" <td>1962-01-01</td>\n",
" <td>1962</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1963-01-01</th>\n",
" <td>15.59</td>\n",
" <td>13.62</td>\n",
" <td>19.79</td>\n",
" <td>8.38</td>\n",
" <td>12.25</td>\n",
" <td>10.00</td>\n",
" <td>23.45</td>\n",
" <td>15.71</td>\n",
" <td>13.59</td>\n",
" <td>14.37</td>\n",
" <td>17.58</td>\n",
" <td>34.13</td>\n",
" <td>1963-01-01</td>\n",
" <td>1963</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1964-01-01</th>\n",
" <td>25.80</td>\n",
" <td>22.13</td>\n",
" <td>18.21</td>\n",
" <td>13.25</td>\n",
" <td>21.29</td>\n",
" <td>14.79</td>\n",
" <td>14.12</td>\n",
" <td>19.58</td>\n",
" <td>13.25</td>\n",
" <td>16.75</td>\n",
" <td>28.96</td>\n",
" <td>21.00</td>\n",
" <td>1964-01-01</td>\n",
" <td>1964</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1965-01-01</th>\n",
" <td>9.54</td>\n",
" <td>11.92</td>\n",
" <td>9.00</td>\n",
" <td>4.38</td>\n",
" <td>6.08</td>\n",
" <td>5.21</td>\n",
" <td>10.25</td>\n",
" <td>6.08</td>\n",
" <td>5.71</td>\n",
" <td>8.63</td>\n",
" <td>12.04</td>\n",
" <td>17.41</td>\n",
" <td>1965-01-01</td>\n",
" <td>1965</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1966-01-01</th>\n",
" <td>22.04</td>\n",
" <td>21.50</td>\n",
" <td>17.08</td>\n",
" <td>12.75</td>\n",
" <td>22.17</td>\n",
" <td>15.59</td>\n",
" <td>21.79</td>\n",
" <td>18.12</td>\n",
" <td>16.66</td>\n",
" <td>17.83</td>\n",
" <td>28.33</td>\n",
" <td>23.79</td>\n",
" <td>1966-01-01</td>\n",
" <td>1966</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1967-01-01</th>\n",
" <td>6.46</td>\n",
" <td>4.46</td>\n",
" <td>6.50</td>\n",
" <td>3.21</td>\n",
" <td>6.67</td>\n",
" <td>3.79</td>\n",
" <td>11.38</td>\n",
" <td>3.83</td>\n",
" <td>7.71</td>\n",
" <td>9.08</td>\n",
" <td>10.67</td>\n",
" <td>20.91</td>\n",
" <td>1967-01-01</td>\n",
" <td>1967</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1968-01-01</th>\n",
" <td>30.04</td>\n",
" <td>17.88</td>\n",
" <td>16.25</td>\n",
" <td>16.25</td>\n",
" <td>21.79</td>\n",
" <td>12.54</td>\n",
" <td>18.16</td>\n",
" <td>16.62</td>\n",
" <td>18.75</td>\n",
" <td>17.62</td>\n",
" <td>22.25</td>\n",
" <td>27.29</td>\n",
" <td>1968-01-01</td>\n",
" <td>1968</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1969-01-01</th>\n",
" <td>6.13</td>\n",
" <td>1.63</td>\n",
" <td>5.41</td>\n",
" <td>1.08</td>\n",
" <td>2.54</td>\n",
" <td>1.00</td>\n",
" <td>8.50</td>\n",
" <td>2.42</td>\n",
" <td>4.58</td>\n",
" <td>6.34</td>\n",
" <td>9.17</td>\n",
" <td>16.71</td>\n",
" <td>1969-01-01</td>\n",
" <td>1969</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1970-01-01</th>\n",
" <td>9.59</td>\n",
" <td>2.96</td>\n",
" <td>11.79</td>\n",
" <td>3.42</td>\n",
" <td>6.13</td>\n",
" <td>4.08</td>\n",
" <td>9.00</td>\n",
" <td>4.46</td>\n",
" <td>7.29</td>\n",
" <td>3.50</td>\n",
" <td>7.33</td>\n",
" <td>13.00</td>\n",
" <td>1970-01-01</td>\n",
" <td>1970</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1971-01-01</th>\n",
" <td>3.71</td>\n",
" <td>0.79</td>\n",
" <td>4.71</td>\n",
" <td>0.17</td>\n",
" <td>1.42</td>\n",
" <td>1.04</td>\n",
" <td>4.63</td>\n",
" <td>0.75</td>\n",
" <td>1.54</td>\n",
" <td>1.08</td>\n",
" <td>4.21</td>\n",
" <td>9.54</td>\n",
" <td>1971-01-01</td>\n",
" <td>1971</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1972-01-01</th>\n",
" <td>9.29</td>\n",
" <td>3.63</td>\n",
" <td>14.54</td>\n",
" <td>4.25</td>\n",
" <td>6.75</td>\n",
" <td>4.42</td>\n",
" <td>13.00</td>\n",
" <td>5.33</td>\n",
" <td>10.04</td>\n",
" <td>8.54</td>\n",
" <td>8.71</td>\n",
" <td>19.17</td>\n",
" <td>1972-01-01</td>\n",
" <td>1972</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1973-01-01</th>\n",
" <td>16.50</td>\n",
" <td>15.92</td>\n",
" <td>14.62</td>\n",
" <td>7.41</td>\n",
" <td>8.29</td>\n",
" <td>11.21</td>\n",
" <td>13.54</td>\n",
" <td>7.79</td>\n",
" <td>10.46</td>\n",
" <td>10.79</td>\n",
" <td>13.37</td>\n",
" <td>9.71</td>\n",
" <td>1973-01-01</td>\n",
" <td>1973</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1974-01-01</th>\n",
" <td>23.21</td>\n",
" <td>16.54</td>\n",
" <td>16.08</td>\n",
" <td>9.75</td>\n",
" <td>15.83</td>\n",
" <td>11.46</td>\n",
" <td>9.54</td>\n",
" <td>13.54</td>\n",
" <td>13.83</td>\n",
" <td>16.66</td>\n",
" <td>17.21</td>\n",
" <td>25.29</td>\n",
" <td>1974-01-01</td>\n",
" <td>1974</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1975-01-01</th>\n",
" <td>14.04</td>\n",
" <td>13.54</td>\n",
" <td>11.29</td>\n",
" <td>5.46</td>\n",
" <td>12.58</td>\n",
" <td>5.58</td>\n",
" <td>8.12</td>\n",
" <td>8.96</td>\n",
" <td>9.29</td>\n",
" <td>5.17</td>\n",
" <td>7.71</td>\n",
" <td>11.63</td>\n",
" <td>1975-01-01</td>\n",
" <td>1975</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1976-01-01</th>\n",
" <td>18.34</td>\n",
" <td>17.67</td>\n",
" <td>14.83</td>\n",
" <td>8.00</td>\n",
" <td>16.62</td>\n",
" <td>10.13</td>\n",
" <td>13.17</td>\n",
" <td>9.04</td>\n",
" <td>13.13</td>\n",
" <td>5.75</td>\n",
" <td>11.38</td>\n",
" <td>14.96</td>\n",
" <td>1976-01-01</td>\n",
" <td>1976</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1977-01-01</th>\n",
" <td>20.04</td>\n",
" <td>11.92</td>\n",
" <td>20.25</td>\n",
" <td>9.13</td>\n",
" <td>9.29</td>\n",
" <td>8.04</td>\n",
" <td>10.75</td>\n",
" <td>5.88</td>\n",
" <td>9.00</td>\n",
" <td>9.00</td>\n",
" <td>14.88</td>\n",
" <td>25.70</td>\n",
" <td>1977-01-01</td>\n",
" <td>1977</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1978-01-01</th>\n",
" <td>8.33</td>\n",
" <td>7.12</td>\n",
" <td>7.71</td>\n",
" <td>3.54</td>\n",
" <td>8.50</td>\n",
" <td>7.50</td>\n",
" <td>14.71</td>\n",
" <td>10.00</td>\n",
" <td>11.83</td>\n",
" <td>10.00</td>\n",
" <td>15.09</td>\n",
" <td>20.46</td>\n",
" <td>1978-01-01</td>\n",
" <td>1978</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" RPT VAL ROS KIL SHA BIR DUB CLA MUL \\\n",
"Yr_Mo_Dy \n",
"1961-01-01 15.04 14.96 13.17 9.29 NaN 9.87 13.67 10.25 10.83 \n",
"1962-01-01 9.29 3.42 11.54 3.50 2.21 1.96 10.41 2.79 3.54 \n",
"1963-01-01 15.59 13.62 19.79 8.38 12.25 10.00 23.45 15.71 13.59 \n",
"1964-01-01 25.80 22.13 18.21 13.25 21.29 14.79 14.12 19.58 13.25 \n",
"1965-01-01 9.54 11.92 9.00 4.38 6.08 5.21 10.25 6.08 5.71 \n",
"1966-01-01 22.04 21.50 17.08 12.75 22.17 15.59 21.79 18.12 16.66 \n",
"1967-01-01 6.46 4.46 6.50 3.21 6.67 3.79 11.38 3.83 7.71 \n",
"1968-01-01 30.04 17.88 16.25 16.25 21.79 12.54 18.16 16.62 18.75 \n",
"1969-01-01 6.13 1.63 5.41 1.08 2.54 1.00 8.50 2.42 4.58 \n",
"1970-01-01 9.59 2.96 11.79 3.42 6.13 4.08 9.00 4.46 7.29 \n",
"1971-01-01 3.71 0.79 4.71 0.17 1.42 1.04 4.63 0.75 1.54 \n",
"1972-01-01 9.29 3.63 14.54 4.25 6.75 4.42 13.00 5.33 10.04 \n",
"1973-01-01 16.50 15.92 14.62 7.41 8.29 11.21 13.54 7.79 10.46 \n",
"1974-01-01 23.21 16.54 16.08 9.75 15.83 11.46 9.54 13.54 13.83 \n",
"1975-01-01 14.04 13.54 11.29 5.46 12.58 5.58 8.12 8.96 9.29 \n",
"1976-01-01 18.34 17.67 14.83 8.00 16.62 10.13 13.17 9.04 13.13 \n",
"1977-01-01 20.04 11.92 20.25 9.13 9.29 8.04 10.75 5.88 9.00 \n",
"1978-01-01 8.33 7.12 7.71 3.54 8.50 7.50 14.71 10.00 11.83 \n",
"\n",
" CLO BEL MAL date year month day \n",
"Yr_Mo_Dy \n",
"1961-01-01 12.58 18.50 15.04 1961-01-01 1961 1 1 \n",
"1962-01-01 5.17 4.38 7.92 1962-01-01 1962 1 1 \n",
"1963-01-01 14.37 17.58 34.13 1963-01-01 1963 1 1 \n",
"1964-01-01 16.75 28.96 21.00 1964-01-01 1964 1 1 \n",
"1965-01-01 8.63 12.04 17.41 1965-01-01 1965 1 1 \n",
"1966-01-01 17.83 28.33 23.79 1966-01-01 1966 1 1 \n",
"1967-01-01 9.08 10.67 20.91 1967-01-01 1967 1 1 \n",
"1968-01-01 17.62 22.25 27.29 1968-01-01 1968 1 1 \n",
"1969-01-01 6.34 9.17 16.71 1969-01-01 1969 1 1 \n",
"1970-01-01 3.50 7.33 13.00 1970-01-01 1970 1 1 \n",
"1971-01-01 1.08 4.21 9.54 1971-01-01 1971 1 1 \n",
"1972-01-01 8.54 8.71 19.17 1972-01-01 1972 1 1 \n",
"1973-01-01 10.79 13.37 9.71 1973-01-01 1973 1 1 \n",
"1974-01-01 16.66 17.21 25.29 1974-01-01 1974 1 1 \n",
"1975-01-01 5.17 7.71 11.63 1975-01-01 1975 1 1 \n",
"1976-01-01 5.75 11.38 14.96 1976-01-01 1976 1 1 \n",
"1977-01-01 9.00 14.88 25.70 1977-01-01 1977 1 1 \n",
"1978-01-01 10.00 15.09 20.46 1978-01-01 1978 1 1 "
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#对于数据记录按照年为频率取样\n",
"# df[(df.month==1 and df.day==1)]\n",
"#query等同于df[df.month==1]\n",
"df.query('month==1 and day==1')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}