pythonbook/实验 探索Chipotle快餐数据/8.探索Apple公司股价数据.ipynb

767 lines
21 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"# -- 读取数据并存为一个名叫apple的数据框\n",
"# -- 查看每一列的数据类型\n",
"# -- 将Date这个列转换为datetime类型\n",
"# -- 将Date设置为索引\n",
"# -- 有重复的日期吗?\n",
"# -- 将index设置为升序\n",
"# -- 找到每个月的最后一个交易日(business day)\n",
"# -- 数据集中最早的日期和最晚的日期相差多少天?\n",
"# -- 在数据中一共有多少个月?\n",
"# -- 按照时间顺序可视化Adj Close值"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"import pandas as pd\n",
"#读取数据并存为一个名叫apple的数据框\n",
"apple = pd.read_csv('data/appl_1980_2014.csv')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Date object\n",
"Open float64\n",
"High float64\n",
"Low float64\n",
"Close float64\n",
"Volume int64\n",
"Adj Close float64\n",
"dtype: object"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#查看每一列的数据类型\n",
"apple.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Open</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" <th>Adj Close</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2014-07-08</td>\n",
" <td>96.27</td>\n",
" <td>96.80</td>\n",
" <td>93.92</td>\n",
" <td>95.35</td>\n",
" <td>65130000</td>\n",
" <td>95.35</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2014-07-07</td>\n",
" <td>94.14</td>\n",
" <td>95.99</td>\n",
" <td>94.10</td>\n",
" <td>95.97</td>\n",
" <td>56305400</td>\n",
" <td>95.97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2014-07-03</td>\n",
" <td>93.67</td>\n",
" <td>94.10</td>\n",
" <td>93.20</td>\n",
" <td>94.03</td>\n",
" <td>22891800</td>\n",
" <td>94.03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2014-07-02</td>\n",
" <td>93.87</td>\n",
" <td>94.06</td>\n",
" <td>93.09</td>\n",
" <td>93.48</td>\n",
" <td>28420900</td>\n",
" <td>93.48</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2014-07-01</td>\n",
" <td>93.52</td>\n",
" <td>94.07</td>\n",
" <td>93.13</td>\n",
" <td>93.52</td>\n",
" <td>38170200</td>\n",
" <td>93.52</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date Open High Low Close Volume Adj Close\n",
"0 2014-07-08 96.27 96.80 93.92 95.35 65130000 95.35\n",
"1 2014-07-07 94.14 95.99 94.10 95.97 56305400 95.97\n",
"2 2014-07-03 93.67 94.10 93.20 94.03 22891800 94.03\n",
"3 2014-07-02 93.87 94.06 93.09 93.48 28420900 93.48\n",
"4 2014-07-01 93.52 94.07 93.13 93.52 38170200 93.52"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"apple.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Open</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" <th>Adj Close</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2014-07-08</td>\n",
" <td>96.27</td>\n",
" <td>96.80</td>\n",
" <td>93.92</td>\n",
" <td>95.35</td>\n",
" <td>65130000</td>\n",
" <td>95.35</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2014-07-07</td>\n",
" <td>94.14</td>\n",
" <td>95.99</td>\n",
" <td>94.10</td>\n",
" <td>95.97</td>\n",
" <td>56305400</td>\n",
" <td>95.97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2014-07-03</td>\n",
" <td>93.67</td>\n",
" <td>94.10</td>\n",
" <td>93.20</td>\n",
" <td>94.03</td>\n",
" <td>22891800</td>\n",
" <td>94.03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2014-07-02</td>\n",
" <td>93.87</td>\n",
" <td>94.06</td>\n",
" <td>93.09</td>\n",
" <td>93.48</td>\n",
" <td>28420900</td>\n",
" <td>93.48</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2014-07-01</td>\n",
" <td>93.52</td>\n",
" <td>94.07</td>\n",
" <td>93.13</td>\n",
" <td>93.52</td>\n",
" <td>38170200</td>\n",
" <td>93.52</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date Open High Low Close Volume Adj Close\n",
"0 2014-07-08 96.27 96.80 93.92 95.35 65130000 95.35\n",
"1 2014-07-07 94.14 95.99 94.10 95.97 56305400 95.97\n",
"2 2014-07-03 93.67 94.10 93.20 94.03 22891800 94.03\n",
"3 2014-07-02 93.87 94.06 93.09 93.48 28420900 93.48\n",
"4 2014-07-01 93.52 94.07 93.13 93.52 38170200 93.52"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"apple['Date'] = pd.to_datetime(apple['Date'])\n",
"apple.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Open</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" <th>Adj Close</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2014-07-08</th>\n",
" <td>96.27</td>\n",
" <td>96.80</td>\n",
" <td>93.92</td>\n",
" <td>95.35</td>\n",
" <td>65130000</td>\n",
" <td>95.35</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014-07-07</th>\n",
" <td>94.14</td>\n",
" <td>95.99</td>\n",
" <td>94.10</td>\n",
" <td>95.97</td>\n",
" <td>56305400</td>\n",
" <td>95.97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014-07-03</th>\n",
" <td>93.67</td>\n",
" <td>94.10</td>\n",
" <td>93.20</td>\n",
" <td>94.03</td>\n",
" <td>22891800</td>\n",
" <td>94.03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014-07-02</th>\n",
" <td>93.87</td>\n",
" <td>94.06</td>\n",
" <td>93.09</td>\n",
" <td>93.48</td>\n",
" <td>28420900</td>\n",
" <td>93.48</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014-07-01</th>\n",
" <td>93.52</td>\n",
" <td>94.07</td>\n",
" <td>93.13</td>\n",
" <td>93.52</td>\n",
" <td>38170200</td>\n",
" <td>93.52</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Open High Low Close Volume Adj Close\n",
"Date \n",
"2014-07-08 96.27 96.80 93.92 95.35 65130000 95.35\n",
"2014-07-07 94.14 95.99 94.10 95.97 56305400 95.97\n",
"2014-07-03 93.67 94.10 93.20 94.03 22891800 94.03\n",
"2014-07-02 93.87 94.06 93.09 93.48 28420900 93.48\n",
"2014-07-01 93.52 94.07 93.13 93.52 38170200 93.52"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#将Date设置为索引\n",
"apple = apple.set_index('Date')\n",
"apple.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#有重复的日期吗?\n",
"apple.index.is_unique"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Open</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" <th>Adj Close</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1980-12-12</th>\n",
" <td>28.75</td>\n",
" <td>28.87</td>\n",
" <td>28.75</td>\n",
" <td>28.75</td>\n",
" <td>117258400</td>\n",
" <td>0.45</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1980-12-15</th>\n",
" <td>27.38</td>\n",
" <td>27.38</td>\n",
" <td>27.25</td>\n",
" <td>27.25</td>\n",
" <td>43971200</td>\n",
" <td>0.42</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1980-12-16</th>\n",
" <td>25.37</td>\n",
" <td>25.37</td>\n",
" <td>25.25</td>\n",
" <td>25.25</td>\n",
" <td>26432000</td>\n",
" <td>0.39</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1980-12-17</th>\n",
" <td>25.87</td>\n",
" <td>26.00</td>\n",
" <td>25.87</td>\n",
" <td>25.87</td>\n",
" <td>21610400</td>\n",
" <td>0.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1980-12-18</th>\n",
" <td>26.63</td>\n",
" <td>26.75</td>\n",
" <td>26.63</td>\n",
" <td>26.63</td>\n",
" <td>18362400</td>\n",
" <td>0.41</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Open High Low Close Volume Adj Close\n",
"Date \n",
"1980-12-12 28.75 28.87 28.75 28.75 117258400 0.45\n",
"1980-12-15 27.38 27.38 27.25 27.25 43971200 0.42\n",
"1980-12-16 25.37 25.37 25.25 25.25 26432000 0.39\n",
"1980-12-17 25.87 26.00 25.87 25.87 21610400 0.40\n",
"1980-12-18 26.63 26.75 26.63 26.63 18362400 0.41"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#将index设置为升序\n",
"apple = apple.sort_index(ascending=True)\n",
"apple.head()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Open</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" <th>Adj Close</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1980-12-31</th>\n",
" <td>30.481538</td>\n",
" <td>30.567692</td>\n",
" <td>30.443077</td>\n",
" <td>30.443077</td>\n",
" <td>2.586252e+07</td>\n",
" <td>0.473077</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1981-01-30</th>\n",
" <td>31.754762</td>\n",
" <td>31.826667</td>\n",
" <td>31.654762</td>\n",
" <td>31.654762</td>\n",
" <td>7.249867e+06</td>\n",
" <td>0.493810</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1981-02-27</th>\n",
" <td>26.480000</td>\n",
" <td>26.572105</td>\n",
" <td>26.407895</td>\n",
" <td>26.407895</td>\n",
" <td>4.231832e+06</td>\n",
" <td>0.411053</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1981-03-31</th>\n",
" <td>24.937727</td>\n",
" <td>25.016818</td>\n",
" <td>24.836364</td>\n",
" <td>24.836364</td>\n",
" <td>7.962691e+06</td>\n",
" <td>0.387727</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1981-04-30</th>\n",
" <td>27.286667</td>\n",
" <td>27.368095</td>\n",
" <td>27.227143</td>\n",
" <td>27.227143</td>\n",
" <td>6.392000e+06</td>\n",
" <td>0.423333</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Open High Low Close Volume \\\n",
"Date \n",
"1980-12-31 30.481538 30.567692 30.443077 30.443077 2.586252e+07 \n",
"1981-01-30 31.754762 31.826667 31.654762 31.654762 7.249867e+06 \n",
"1981-02-27 26.480000 26.572105 26.407895 26.407895 4.231832e+06 \n",
"1981-03-31 24.937727 25.016818 24.836364 24.836364 7.962691e+06 \n",
"1981-04-30 27.286667 27.368095 27.227143 27.227143 6.392000e+06 \n",
"\n",
" Adj Close \n",
"Date \n",
"1980-12-31 0.473077 \n",
"1981-01-30 0.493810 \n",
"1981-02-27 0.411053 \n",
"1981-03-31 0.387727 \n",
"1981-04-30 0.423333 "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#找到每个月的最后一个交易日(business day)\n",
"#resample参数详解https://www.jianshu.com/p/5367ef7453ce\n",
"apple_month = apple.resample('BM').mean()\n",
"apple_month.head()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"12261"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#数据集中最早的日期和最晚的日期相差多少天?\n",
"(apple.index.max() - apple.index.min()).days"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"404"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(apple_month)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}