pythonbook/实验 探索Chipotle快餐数据/4.探索1960 - 2014 美国犯罪数据.ipynb

616 lines
20 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# -- 将数据框命名为crime\n",
"# -- 每一列(column)的数据类型是什么样的?\n",
"# -- 将Year的数据类型转换为 datetime64\n",
"# -- 将列Year设置为数据框的索引\n",
"# -- 删除名为Total的列\n",
"# -- 按照Year每十年对数据框进行分组并求和\n",
"# -- 何时是美国历史上生存最危险的年代?"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"#将数据框命名为drinks\n",
"crime = pd.read_csv('data/US_Crime_Rates_1960_2014.csv',index_col=0)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 55 entries, 0 to 54\n",
"Data columns (total 12 columns):\n",
"Year 55 non-null int64\n",
"Population 55 non-null int64\n",
"Total 55 non-null int64\n",
"Violent 55 non-null int64\n",
"Property 55 non-null int64\n",
"Murder 55 non-null int64\n",
"Forcible_Rape 55 non-null int64\n",
"Robbery 55 non-null int64\n",
"Aggravated_assault 55 non-null int64\n",
"Burglary 55 non-null int64\n",
"Larceny_Theft 55 non-null int64\n",
"Vehicle_Theft 55 non-null int64\n",
"dtypes: int64(12)\n",
"memory usage: 5.6 KB\n"
]
}
],
"source": [
"#每一列(column)的数据类型是什么样的?\n",
"crime.info()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Population</th>\n",
" <th>Total</th>\n",
" <th>Violent</th>\n",
" <th>Property</th>\n",
" <th>Murder</th>\n",
" <th>Forcible_Rape</th>\n",
" <th>Robbery</th>\n",
" <th>Aggravated_assault</th>\n",
" <th>Burglary</th>\n",
" <th>Larceny_Theft</th>\n",
" <th>Vehicle_Theft</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Year</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1960-01-01</th>\n",
" <td>179323175</td>\n",
" <td>3384200</td>\n",
" <td>288460</td>\n",
" <td>3095700</td>\n",
" <td>9110</td>\n",
" <td>17190</td>\n",
" <td>107840</td>\n",
" <td>154320</td>\n",
" <td>912100</td>\n",
" <td>1855400</td>\n",
" <td>328200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1961-01-01</th>\n",
" <td>182992000</td>\n",
" <td>3488000</td>\n",
" <td>289390</td>\n",
" <td>3198600</td>\n",
" <td>8740</td>\n",
" <td>17220</td>\n",
" <td>106670</td>\n",
" <td>156760</td>\n",
" <td>949600</td>\n",
" <td>1913000</td>\n",
" <td>336000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1962-01-01</th>\n",
" <td>185771000</td>\n",
" <td>3752200</td>\n",
" <td>301510</td>\n",
" <td>3450700</td>\n",
" <td>8530</td>\n",
" <td>17550</td>\n",
" <td>110860</td>\n",
" <td>164570</td>\n",
" <td>994300</td>\n",
" <td>2089600</td>\n",
" <td>366800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1963-01-01</th>\n",
" <td>188483000</td>\n",
" <td>4109500</td>\n",
" <td>316970</td>\n",
" <td>3792500</td>\n",
" <td>8640</td>\n",
" <td>17650</td>\n",
" <td>116470</td>\n",
" <td>174210</td>\n",
" <td>1086400</td>\n",
" <td>2297800</td>\n",
" <td>408300</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1964-01-01</th>\n",
" <td>191141000</td>\n",
" <td>4564600</td>\n",
" <td>364220</td>\n",
" <td>4200400</td>\n",
" <td>9360</td>\n",
" <td>21420</td>\n",
" <td>130390</td>\n",
" <td>203050</td>\n",
" <td>1213200</td>\n",
" <td>2514400</td>\n",
" <td>472800</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Population Total Violent Property Murder Forcible_Rape \\\n",
"Year \n",
"1960-01-01 179323175 3384200 288460 3095700 9110 17190 \n",
"1961-01-01 182992000 3488000 289390 3198600 8740 17220 \n",
"1962-01-01 185771000 3752200 301510 3450700 8530 17550 \n",
"1963-01-01 188483000 4109500 316970 3792500 8640 17650 \n",
"1964-01-01 191141000 4564600 364220 4200400 9360 21420 \n",
"\n",
" Robbery Aggravated_assault Burglary Larceny_Theft \\\n",
"Year \n",
"1960-01-01 107840 154320 912100 1855400 \n",
"1961-01-01 106670 156760 949600 1913000 \n",
"1962-01-01 110860 164570 994300 2089600 \n",
"1963-01-01 116470 174210 1086400 2297800 \n",
"1964-01-01 130390 203050 1213200 2514400 \n",
"\n",
" Vehicle_Theft \n",
"Year \n",
"1960-01-01 328200 \n",
"1961-01-01 336000 \n",
"1962-01-01 366800 \n",
"1963-01-01 408300 \n",
"1964-01-01 472800 "
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#将Year的数据类型转换为 datetime64\n",
"crime.Year = pd.to_datetime(crime.Year,format='%Y')\n",
"#将列Year设置为数据框的索引\n",
"crime = crime.set_index('Year',drop=True)\n",
"crime.head()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Population</th>\n",
" <th>Violent</th>\n",
" <th>Property</th>\n",
" <th>Murder</th>\n",
" <th>Forcible_Rape</th>\n",
" <th>Robbery</th>\n",
" <th>Aggravated_assault</th>\n",
" <th>Burglary</th>\n",
" <th>Larceny_Theft</th>\n",
" <th>Vehicle_Theft</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Year</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1960-01-01</th>\n",
" <td>179323175</td>\n",
" <td>288460</td>\n",
" <td>3095700</td>\n",
" <td>9110</td>\n",
" <td>17190</td>\n",
" <td>107840</td>\n",
" <td>154320</td>\n",
" <td>912100</td>\n",
" <td>1855400</td>\n",
" <td>328200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1961-01-01</th>\n",
" <td>182992000</td>\n",
" <td>289390</td>\n",
" <td>3198600</td>\n",
" <td>8740</td>\n",
" <td>17220</td>\n",
" <td>106670</td>\n",
" <td>156760</td>\n",
" <td>949600</td>\n",
" <td>1913000</td>\n",
" <td>336000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1962-01-01</th>\n",
" <td>185771000</td>\n",
" <td>301510</td>\n",
" <td>3450700</td>\n",
" <td>8530</td>\n",
" <td>17550</td>\n",
" <td>110860</td>\n",
" <td>164570</td>\n",
" <td>994300</td>\n",
" <td>2089600</td>\n",
" <td>366800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1963-01-01</th>\n",
" <td>188483000</td>\n",
" <td>316970</td>\n",
" <td>3792500</td>\n",
" <td>8640</td>\n",
" <td>17650</td>\n",
" <td>116470</td>\n",
" <td>174210</td>\n",
" <td>1086400</td>\n",
" <td>2297800</td>\n",
" <td>408300</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1964-01-01</th>\n",
" <td>191141000</td>\n",
" <td>364220</td>\n",
" <td>4200400</td>\n",
" <td>9360</td>\n",
" <td>21420</td>\n",
" <td>130390</td>\n",
" <td>203050</td>\n",
" <td>1213200</td>\n",
" <td>2514400</td>\n",
" <td>472800</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Population Violent Property Murder Forcible_Rape Robbery \\\n",
"Year \n",
"1960-01-01 179323175 288460 3095700 9110 17190 107840 \n",
"1961-01-01 182992000 289390 3198600 8740 17220 106670 \n",
"1962-01-01 185771000 301510 3450700 8530 17550 110860 \n",
"1963-01-01 188483000 316970 3792500 8640 17650 116470 \n",
"1964-01-01 191141000 364220 4200400 9360 21420 130390 \n",
"\n",
" Aggravated_assault Burglary Larceny_Theft Vehicle_Theft \n",
"Year \n",
"1960-01-01 154320 912100 1855400 328200 \n",
"1961-01-01 156760 949600 1913000 336000 \n",
"1962-01-01 164570 994300 2089600 366800 \n",
"1963-01-01 174210 1086400 2297800 408300 \n",
"1964-01-01 203050 1213200 2514400 472800 "
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#删除名为Total的列\n",
"del crime['Total']\n",
"crime.head()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Population</th>\n",
" <th>Violent</th>\n",
" <th>Property</th>\n",
" <th>Murder</th>\n",
" <th>Forcible_Rape</th>\n",
" <th>Robbery</th>\n",
" <th>Aggravated_assault</th>\n",
" <th>Burglary</th>\n",
" <th>Larceny_Theft</th>\n",
" <th>Vehicle_Theft</th>\n",
" <th>population</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Year</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1960-01-01</th>\n",
" <td>1915053175</td>\n",
" <td>4134930</td>\n",
" <td>45160900</td>\n",
" <td>106180</td>\n",
" <td>236720</td>\n",
" <td>1633510</td>\n",
" <td>2158520</td>\n",
" <td>13321100</td>\n",
" <td>26547700</td>\n",
" <td>5292100</td>\n",
" <td>&lt;bound method f of DatetimeIndexResampler [fre...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1970-01-01</th>\n",
" <td>2121193298</td>\n",
" <td>9607930</td>\n",
" <td>91383800</td>\n",
" <td>192230</td>\n",
" <td>554570</td>\n",
" <td>4159020</td>\n",
" <td>4702120</td>\n",
" <td>28486000</td>\n",
" <td>53157800</td>\n",
" <td>9739900</td>\n",
" <td>&lt;bound method f of DatetimeIndexResampler [fre...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1980-01-01</th>\n",
" <td>2371370069</td>\n",
" <td>14074328</td>\n",
" <td>117048900</td>\n",
" <td>206439</td>\n",
" <td>865639</td>\n",
" <td>5383109</td>\n",
" <td>7619130</td>\n",
" <td>33073494</td>\n",
" <td>72040253</td>\n",
" <td>11935411</td>\n",
" <td>&lt;bound method f of DatetimeIndexResampler [fre...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1990-01-01</th>\n",
" <td>2612825258</td>\n",
" <td>17527048</td>\n",
" <td>119053499</td>\n",
" <td>211664</td>\n",
" <td>998827</td>\n",
" <td>5748930</td>\n",
" <td>10568963</td>\n",
" <td>26750015</td>\n",
" <td>77679366</td>\n",
" <td>14624418</td>\n",
" <td>&lt;bound method f of DatetimeIndexResampler [fre...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2000-01-01</th>\n",
" <td>2947969117</td>\n",
" <td>13968056</td>\n",
" <td>100944369</td>\n",
" <td>163068</td>\n",
" <td>922499</td>\n",
" <td>4230366</td>\n",
" <td>8652124</td>\n",
" <td>21565176</td>\n",
" <td>67970291</td>\n",
" <td>11412834</td>\n",
" <td>&lt;bound method f of DatetimeIndexResampler [fre...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Population Violent Property Murder Forcible_Rape Robbery \\\n",
"Year \n",
"1960-01-01 1915053175 4134930 45160900 106180 236720 1633510 \n",
"1970-01-01 2121193298 9607930 91383800 192230 554570 4159020 \n",
"1980-01-01 2371370069 14074328 117048900 206439 865639 5383109 \n",
"1990-01-01 2612825258 17527048 119053499 211664 998827 5748930 \n",
"2000-01-01 2947969117 13968056 100944369 163068 922499 4230366 \n",
"\n",
" Aggravated_assault Burglary Larceny_Theft Vehicle_Theft \\\n",
"Year \n",
"1960-01-01 2158520 13321100 26547700 5292100 \n",
"1970-01-01 4702120 28486000 53157800 9739900 \n",
"1980-01-01 7619130 33073494 72040253 11935411 \n",
"1990-01-01 10568963 26750015 77679366 14624418 \n",
"2000-01-01 8652124 21565176 67970291 11412834 \n",
"\n",
" population \n",
"Year \n",
"1960-01-01 <bound method f of DatetimeIndexResampler [fre... \n",
"1970-01-01 <bound method f of DatetimeIndexResampler [fre... \n",
"1980-01-01 <bound method f of DatetimeIndexResampler [fre... \n",
"1990-01-01 <bound method f of DatetimeIndexResampler [fre... \n",
"2000-01-01 <bound method f of DatetimeIndexResampler [fre... "
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#按照Year每十年对数据框进行分组并求和\n",
"#按时间聚合参考博客https://www.jb51.net/article/177325.htm\n",
"crimes = crime.resample('10AS').sum()\n",
"#人口是累计数,不能直接求和\n",
"crimes['population'] = crime.resample('10AS').max\n",
"crimes.head()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Population 2014-01-01\n",
"Violent 1992-01-01\n",
"Property 1991-01-01\n",
"Murder 1991-01-01\n",
"Forcible_Rape 1992-01-01\n",
"Robbery 1991-01-01\n",
"Aggravated_assault 1993-01-01\n",
"Burglary 1980-01-01\n",
"Larceny_Theft 1991-01-01\n",
"Vehicle_Theft 1991-01-01\n",
"dtype: datetime64[ns]"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#何时是美国历史上生存最危险的年代?\n",
"crime.idxmax()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}