{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# -- 创建DataFrame\n",
"# -- 将上述的DataFrame分别命名为data1, data2, data3\n",
"# -- 将data1和data2两个数据框按照行的维度进行合并,命名为all_data\n",
"# -- 将data1和data2两个数据框按照列的维度进行合并,命名为all_data_col\n",
"# -- 打印data3\n",
"# -- 按照subject_id的值对all_data和data3作合并\n",
"# -- 对data1和data2按照subject_id作连接\n",
"# -- 找到 data1 和 data2 合并之后的所有匹配结果"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"raw_data_1 = {\n",
" 'subject_id': ['1', '2', '3', '4', '5'],\n",
" 'first_name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'], \n",
" 'last_name': ['Anderson', 'Ackerman', 'Ali', 'Aoni', 'Atiches']}\n",
"\n",
"raw_data_2 = {\n",
" 'subject_id': ['4', '5', '6', '7', '8'],\n",
" 'first_name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'], \n",
" 'last_name': ['Bonder', 'Black', 'Balwner', 'Brice', 'Btisan']}\n",
"\n",
"raw_data_3 = {\n",
" 'subject_id': ['1', '2', '3', '4', '5', '7', '8', '9', '10', '11'],\n",
" 'test_id': [51, 15, 15, 61, 16, 14, 15, 1, 61, 16]}"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" subject_id | \n",
" first_name | \n",
" last_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" Alex | \n",
" Anderson | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" Amy | \n",
" Ackerman | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" Allen | \n",
" Ali | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" Alice | \n",
" Aoni | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" Ayoung | \n",
" Atiches | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" subject_id first_name last_name\n",
"0 1 Alex Anderson\n",
"1 2 Amy Ackerman\n",
"2 3 Allen Ali\n",
"3 4 Alice Aoni\n",
"4 5 Ayoung Atiches"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#将上述的DataFrame分别命名为data1, data2, data3\n",
"data1 = pd.DataFrame(raw_data_1)\n",
"data2 = pd.DataFrame(raw_data_2)\n",
"data3 = pd.DataFrame(raw_data_3)\n",
"data1"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" subject_id | \n",
" first_name | \n",
" last_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" Alex | \n",
" Anderson | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" Amy | \n",
" Ackerman | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" Allen | \n",
" Ali | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" Alice | \n",
" Aoni | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" Ayoung | \n",
" Atiches | \n",
"
\n",
" \n",
" 0 | \n",
" 4 | \n",
" Billy | \n",
" Bonder | \n",
"
\n",
" \n",
" 1 | \n",
" 5 | \n",
" Brian | \n",
" Black | \n",
"
\n",
" \n",
" 2 | \n",
" 6 | \n",
" Bran | \n",
" Balwner | \n",
"
\n",
" \n",
" 3 | \n",
" 7 | \n",
" Bryce | \n",
" Brice | \n",
"
\n",
" \n",
" 4 | \n",
" 8 | \n",
" Betty | \n",
" Btisan | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" subject_id first_name last_name\n",
"0 1 Alex Anderson\n",
"1 2 Amy Ackerman\n",
"2 3 Allen Ali\n",
"3 4 Alice Aoni\n",
"4 5 Ayoung Atiches\n",
"0 4 Billy Bonder\n",
"1 5 Brian Black\n",
"2 6 Bran Balwner\n",
"3 7 Bryce Brice\n",
"4 8 Betty Btisan"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#将data1和data2两个数据框按照行的维度进行合并,命名为all_data\n",
"all_data = pd.concat([data1,data2],axis=0)\n",
"all_data"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" subject_id | \n",
" first_name | \n",
" last_name | \n",
" subject_id | \n",
" first_name | \n",
" last_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" Alex | \n",
" Anderson | \n",
" 4 | \n",
" Billy | \n",
" Bonder | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" Amy | \n",
" Ackerman | \n",
" 5 | \n",
" Brian | \n",
" Black | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" Allen | \n",
" Ali | \n",
" 6 | \n",
" Bran | \n",
" Balwner | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" Alice | \n",
" Aoni | \n",
" 7 | \n",
" Bryce | \n",
" Brice | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" Ayoung | \n",
" Atiches | \n",
" 8 | \n",
" Betty | \n",
" Btisan | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" subject_id first_name last_name subject_id first_name last_name\n",
"0 1 Alex Anderson 4 Billy Bonder\n",
"1 2 Amy Ackerman 5 Brian Black\n",
"2 3 Allen Ali 6 Bran Balwner\n",
"3 4 Alice Aoni 7 Bryce Brice\n",
"4 5 Ayoung Atiches 8 Betty Btisan"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#将data1和data2两个数据框按照列的维度进行合并,命名为all_data_col\n",
"all_data_col = pd.concat([data1,data2],axis=1)\n",
"all_data_col"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" subject_id | \n",
" test_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 51 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 15 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 15 | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" 61 | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" 16 | \n",
"
\n",
" \n",
" 5 | \n",
" 7 | \n",
" 14 | \n",
"
\n",
" \n",
" 6 | \n",
" 8 | \n",
" 15 | \n",
"
\n",
" \n",
" 7 | \n",
" 9 | \n",
" 1 | \n",
"
\n",
" \n",
" 8 | \n",
" 10 | \n",
" 61 | \n",
"
\n",
" \n",
" 9 | \n",
" 11 | \n",
" 16 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" subject_id test_id\n",
"0 1 51\n",
"1 2 15\n",
"2 3 15\n",
"3 4 61\n",
"4 5 16\n",
"5 7 14\n",
"6 8 15\n",
"7 9 1\n",
"8 10 61\n",
"9 11 16"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data3"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" subject_id | \n",
" first_name | \n",
" last_name | \n",
" test_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" Alex | \n",
" Anderson | \n",
" 51 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" Amy | \n",
" Ackerman | \n",
" 15 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" Allen | \n",
" Ali | \n",
" 15 | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" Alice | \n",
" Aoni | \n",
" 61 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" Billy | \n",
" Bonder | \n",
" 61 | \n",
"
\n",
" \n",
" 5 | \n",
" 5 | \n",
" Ayoung | \n",
" Atiches | \n",
" 16 | \n",
"
\n",
" \n",
" 6 | \n",
" 5 | \n",
" Brian | \n",
" Black | \n",
" 16 | \n",
"
\n",
" \n",
" 7 | \n",
" 7 | \n",
" Bryce | \n",
" Brice | \n",
" 14 | \n",
"
\n",
" \n",
" 8 | \n",
" 8 | \n",
" Betty | \n",
" Btisan | \n",
" 15 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" subject_id first_name last_name test_id\n",
"0 1 Alex Anderson 51\n",
"1 2 Amy Ackerman 15\n",
"2 3 Allen Ali 15\n",
"3 4 Alice Aoni 61\n",
"4 4 Billy Bonder 61\n",
"5 5 Ayoung Atiches 16\n",
"6 5 Brian Black 16\n",
"7 7 Bryce Brice 14\n",
"8 8 Betty Btisan 15"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#按照subject_id的值对all_data和data3作合并\n",
"pd.merge(all_data,data3,on='subject_id')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" subject_id | \n",
" first_name_x | \n",
" last_name_x | \n",
" first_name_y | \n",
" last_name_y | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 4 | \n",
" Alice | \n",
" Aoni | \n",
" Billy | \n",
" Bonder | \n",
"
\n",
" \n",
" 1 | \n",
" 5 | \n",
" Ayoung | \n",
" Atiches | \n",
" Brian | \n",
" Black | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" subject_id first_name_x last_name_x first_name_y last_name_y\n",
"0 4 Alice Aoni Billy Bonder\n",
"1 5 Ayoung Atiches Brian Black"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#对data1和data2按照subject_id作内连接\n",
"pd.merge(data1,data2,on='subject_id',how='inner')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" subject_id | \n",
" first_name_data1 | \n",
" last_name_data1 | \n",
" first_name_data2 | \n",
" last_name_data2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" Alex | \n",
" Anderson | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" Amy | \n",
" Ackerman | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" Allen | \n",
" Ali | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" Alice | \n",
" Aoni | \n",
" Billy | \n",
" Bonder | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" Ayoung | \n",
" Atiches | \n",
" Brian | \n",
" Black | \n",
"
\n",
" \n",
" 5 | \n",
" 6 | \n",
" NaN | \n",
" NaN | \n",
" Bran | \n",
" Balwner | \n",
"
\n",
" \n",
" 6 | \n",
" 7 | \n",
" NaN | \n",
" NaN | \n",
" Bryce | \n",
" Brice | \n",
"
\n",
" \n",
" 7 | \n",
" 8 | \n",
" NaN | \n",
" NaN | \n",
" Betty | \n",
" Btisan | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" subject_id first_name_data1 last_name_data1 first_name_data2 last_name_data2\n",
"0 1 Alex Anderson NaN NaN\n",
"1 2 Amy Ackerman NaN NaN\n",
"2 3 Allen Ali NaN NaN\n",
"3 4 Alice Aoni Billy Bonder\n",
"4 5 Ayoung Atiches Brian Black\n",
"5 6 NaN NaN Bran Balwner\n",
"6 7 NaN NaN Bryce Brice\n",
"7 8 NaN NaN Betty Btisan"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#找到 data1 和 data2 合并之后的所有匹配结果\n",
"pd.merge(data1,data2,on='subject_id',how='outer',suffixes=('_data1', '_data2'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}